import IMP import subprocess import random import IMP.domino import IMP.core import IMP.rmf import RMF import time import IMP.algebra import types import re import sys import operator import os import resource import atomicDominoUtilities class AtomicDomino: def __init__(self, model, protein, parameterFileName): self.model = model self.protein = protein self.namesToParticles = atomicDominoUtilities.makeNamesToParticles(protein) self.readParameters(parameterFileName) self.wroteNativeProtein = 0 self.maxMem = 0 def readParameters(self, parameterFileName): self.parameters = atomicDominoUtilities.readParameters(parameterFileName) def logMemory(self): currentMem = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss currentMem *= 10**-6 print "log memory: %s" % currentMem if (currentMem > self.maxMem): self.maxMem = currentMem #Get parameter value. Throws dictionary exception if not found def getParam(self, paramName): paramValue = self.parameters[paramName] return paramValue def loadDominoHelpers(self): self.subsetStateScores = {} self.subsetRestraintScores = {} self.subsetStateFailures = {} self.nodesToAssignmentCount = {} self.totalAssignments = 0 outputDir = self.getParam("output_directory") mtreeCytoscapeAssignmentFile = self.getParam("mtree_cytoscape_assignment_file") self.mtreeCytoscapeAssignmentFh = open(os.path.join(outputDir, mtreeCytoscapeAssignmentFile), 'w') self.mtreeCytoscapeAssignmentFh.write("Assignments\n") self.mtNamesToIndices = {} self.subsetNamesToAssignmentFiles = {} #Simple way to calculate run-time for certain methods and procedures. #When reset is 0, compares the previous saved time to the current one #and returns the difference (saving the current time too). #When reset is 1, just saves the current time. def logTimePoint(self, reset): newTime = time.time() if (reset == 0): timeDifference = newTime - self.timePoint timeDifference = round(timeDifference, 0) self.timePoint = newTime return timeDifference else: self.timePoint = newTime return newTime def createParticleStatesTable(self): for particleName in self.particleInfo.keys(): statesToCenters = {} dataArray = self.particleInfo[particleName] for i in range(len(dataArray)): [state, center] = dataArray[i] statesToCenters[state] = center statesList = [] for stateIndex in sorted(statesToCenters.keys()): vector3d = IMP.algebra.Vector3D(statesToCenters[stateIndex]) statesList.append(vector3d) #print "appending particle %s state %s center %s" % (particleName, stateIndex, vector3d) xyzStates = IMP.domino.XYZStates(statesList) self.dominoPst.set_particle_states(self.namesToParticles[particleName], xyzStates) def quickParticleName(self, particle): return atomicDominoUtilities.quickParticleName(particle) def filterAssignments(self, assignments, subset, nodeIndex, rssft): filteredSubsets = [] restraintList = [] #make dependency graph for stats for r in IMP.get_restraints([self.model.get_root_restraint_set()]): restraintList.append(r) dg = IMP.get_dependency_graph(restraintList) stateCounter = 0 passedCounter = 0 #create hdf5AssignmentContainer sFilter = rssft.get_subset_filter(subset, filteredSubsets) #check each unique state to see if passes filter filteredAssignments = [] for assignment in assignments: if (sFilter == None or sFilter.get_is_ok(assignment)): #add to assignment container if it passes passedCounter += 1 filteredAssignments.append(assignment) stateCounter += 1 fraction = (passedCounter * 1.0) / (stateCounter * 1.0) print "%s states passed out of %s total for this subset (fraction %s)" % (passedCounter, stateCounter, fraction) if (passedCounter == 0): print "subset %s had 0 assignments (out of %s) pass. Exiting..." % (subset, stateCounter) sys.exit() return filteredAssignments #Convert the name of the subset to something more readable. Currently returning name as sorted list of atoms def quickSubsetName(self, subset): cleanName = self.cleanVertexName(subset) atomNameList = cleanName.split(" ") sortedList = sorted(atomNameList) name = " ".join(sortedList) return name def getNodeIndexList(self): return self.mt.get_vertices() def getLeafNodeIndexList(self): leafNodeIndexList = [] for subset in self.subsets: index = self.getMtIndexForSubset(self.quickSubsetName(subset)) leafNodeIndexList.append(index) return leafNodeIndexList #Create Domino sampler and give it all the other domino objects it needs def createSampler(self): s=IMP.domino.DominoSampler(self.model, self.dominoPst) s.set_merge_tree(self.mt) filterTables = [] filterTables.append(self.rssft) s.set_subset_filter_tables(filterTables) s.set_assignments_table(self.lat) if (self.getParam("cross_subset_filtering") == 1): s.set_use_cross_subset_filtering(1) self.sampler = s #Use the model restraint set to get the interaction graph, junction tree, and merge tree, and also get subsets #from the junction tree and return them def createSubsets(self): self.initializeParticleStatesTable() ig = IMP.domino.get_interaction_graph([self.model.get_root_restraint_set()], self.dominoPst) print "interaction graph:" ig.show() jt = IMP.domino.get_junction_tree(ig) print "junction tree:" jt.show() self.subsetNamesToSubsets = {} mt = IMP.domino.get_balanced_merge_tree(jt) #make map of vertex indices to atoms in subsets for index in mt.get_vertices(): subset = mt.get_vertex_name(index) subsetName = self.cleanVertexName(subset) self.mtNamesToIndices[subsetName] = index subsets = IMP.domino.get_subsets(jt) for subset in subsets: print "created subset %s" % subset subsetName = self.quickSubsetName(subset) self.subsetNamesToSubsets[subsetName] = subset print "merge tree:" mt.show() self.ig = ig self.jt = jt self.mt = mt self.subsets = subsets self.parentSiblingMap = {} self.parentSiblingMap[self.mt.get_vertices()[-1]] = {} self.createSiblingMap(self.mt.get_vertices()[-1]) def getMtIndexForSubset(self, subsetName): for index in self.mt.get_vertices(): mtNode = self.mt.get_vertex_name(index) mtName = self.cleanVertexName(mtNode) mtName = mtName.rstrip() mtName = mtName.lstrip() mtNameList = mtName.split(" ") subsetNameList = subsetName.split(" ") mtNameListSorted = sorted(mtNameList) subsetNameListSorted = sorted(subsetNameList) if (" ".join(subsetNameListSorted) == " ".join(mtNameListSorted)): return index print "did not find merge tree index for subset name %s" % subsetName sys.exit() def getDominoParticleNames(self): particles = self.dominoPst.get_particles() particleNameList = [] for particle in particles: pName = self.quickParticleName(particle) particleNameList.append(pName) return particleNameList def getSubsetNameList(self): subsetNameList = [] for subset in self.subsets: name = self.quickSubsetName(subset) subsetNameList.append(name) return subsetNameList def getMtIndexToParticles(self): mtIndexToParticles = {} allVertices = self.mt.get_vertices() for nodeIndex in allVertices: subset = self.mt.get_vertex_name(nodeIndex) particleList = self.quickSubsetName(subset) mtIndexToParticles[nodeIndex] = particleList print "createtMtIndexToParticles: index %s is particleList %s " % (nodeIndex, particleList) return mtIndexToParticles def readTrajectoryFile(self, atomList, rh, frames, scoreOutputFile, skipDomino, flexibleAtoms): bestScore = 100000 bestRmsd = 10000 bestScoreFrame = 0 bestRmsdFrame = 0 scoreOutputFh = open(scoreOutputFile, 'w') for i in frames: try: IMP.rmf.load_frame(rh, i, self.protein) except Exception: print "execption in loading frame %s" % i continue score = self.model.evaluate(False) leaves = IMP.atom.get_leaves(self.protein) #for leaf in leaves: # xyzD = IMP.core.XYZ.decorate_particle(leaf) # print "read trajectory file: coordinates for frame %s particle %s are %s" % (i, self.quickParticleName(leaf), xyzD.get_coordinates()) rmsd = self.calculateNativeRmsd(flexibleAtoms) scoreOutputFh.write("%s\t%s\t%s\n" % (i, score, rmsd)) if (score < bestScore): bestScore = score bestScoreFrame = i if (rmsd < bestRmsd): bestRmsd = rmsd bestRmsdFrame = i if (skipDomino == 0): for atomName in atomList: particle = self.namesToParticles[atomName] #get grid index and coordinates gridIndex = self.snapToGrid(particle) center = self.grid.get_center(gridIndex) pythonCenter = [] for coordinate in center: pythonCenter.append(coordinate) #grid indices are mapped to coordinate states. Check if we've seen this grid index if (self.particleStatesSeen[atomName].has_key(gridIndex) == 0): #set this particle state index to size of the dictionary, which effectively increments the index with each new state currentSize = len(self.particleStatesSeen[atomName].keys()) self.particleStatesSeen[atomName][gridIndex] = currentSize state = self.particleStatesSeen[atomName][gridIndex] self.particleInfo[atomName].append([state, pythonCenter]) else: print "didn't add domino states due to skip parameters" return [bestScore, bestScoreFrame, bestRmsd, bestRmsdFrame] #Get the grid index for the given particle. Returns an integer that can be used to get the center #of the grid space for discretizing the particle def getParticleGridIndex(self, leaf): xyzDecorator = IMP.core.XYZ.decorate_particle(leaf) coordinates = xyzDecorator.get_coordinates() extendedIndex = 0 extendedIndex = self.grid.get_extended_index(coordinates) if (self.grid.get_has_index(extendedIndex) == 0): if (self.getParam("grid_type") == "sparse"): #mostly working with sparse grids now self.grid.add_voxel(extendedIndex, 1) else: self.grid.add_voxel(extendedIndex) index = self.grid.get_index(extendedIndex) return index #Set coordinates of this atom to be the center of the grid space containing it (effectiely discretizing the system) def snapToGrid(self, particle): index = self.getParticleGridIndex(particle) center = self.grid.get_center(index) xyzDecorator = IMP.core.XYZ.decorate_particle(particle) xyzDecorator.set_coordinates(center) return index #Take initial protein and snap every atom to the center of its gridpoint def discretizeNativeProtein(self): outputDir = self.getParam("output_directory") nativeSnappedFile = self.getParam("native_protein_snapped_output_file") leaves = IMP.atom.get_leaves(self.protein) for leaf in leaves: self.snapToGrid(leaf) IMP.atom.write_pdb(self.protein, os.path.join(outputDir, nativeSnappedFile)) #Get particle representing the alpha carbon at the center of the peptide def getPeptideCa(self): #get all residue indices in the peptide residues = IMP.atom.get_by_type(self.protein, IMP.atom.RESIDUE_TYPE) peptideIndicesToResidues = {} for residueH in residues: chain = IMP.atom.get_chain(residueH) chainId = chain.get_id() residue = residueH.get_as_residue() if (chainId == self.getParam("peptide_chain")): peptideIndicesToResidues[residue.get_index()] = residue #use the min and max residue indices to get the residue in the middle (rounding down) minPeptide = min(sorted(peptideIndicesToResidues.keys())) maxPeptide = max(sorted(peptideIndicesToResidues.keys())) centerPeptide = round(((maxPeptide - minPeptide) / 2 + minPeptide), 0) #get the particle corresponding to the ca atom at the middle residue and return it, centerName = atomicDominoUtilities.makeParticleName(self.getParam("peptide_chain"), int(centerPeptide), "CA") centerParticle = self.namesToParticles[centerName] return centerParticle #Create grid object that will be used to create discrete states for each particle def createGrid(self): protBb = IMP.atom.get_bounding_box(self.protein) gridSpacing = float(self.getParam("grid_spacing")) bufferSpace = float(self.getParam("grid_buffer_space")) protBb += bufferSpace #add buffer around grid g = 0 if (self.getParam("grid_type") == "sparse"): ca = self.getPeptideCa() xyzCa = IMP.core.XYZ.decorate_particle(ca) g= IMP.algebra.SparseUnboundedIntGrid3D(gridSpacing, xyzCa.get_coordinates()) else: g = IMP.algebra.DenseDoubleGrid3D(gridSpacing, protBb) self.grid = g #Create Particle States Table and for each particle in the system, add XYZStates with that particle's #initial location def initializeParticleStatesTable(self): dominoPst = IMP.domino.ParticleStatesTable() restrainedParticles = atomicDominoUtilities.getRestrainedParticles(self.protein, self.model, self.namesToParticles) for p in restrainedParticles: xyzD = IMP.core.XYZ.decorate_particle(p) xyz = IMP.core.XYZ(p).get_coordinates() xyzStates = IMP.domino.XYZStates([xyz]) dominoPst.set_particle_states(p, xyzStates) self.dominoPst = dominoPst def createUniqueLeafAssignments(self, particleNameList, particleInfo): size = len(particleInfo[particleNameList[0]]) allAssignments = [] for i in range(size): nextAssignment = [] for particleName in particleNameList: dataArray = particleInfo[particleName] [state, center] = dataArray[i] nextAssignment.append(state) allAssignments.append(nextAssignment) #make unique assignments to avoid duplicates uniqueAssignments = {} for assignment in allAssignments: stateString = "" for index in assignment: stateString = stateString + str(index) + "_" uniqueAssignments[stateString] = assignment finalAssignments = [] #add all assignments to final list for stateString in uniqueAssignments.keys(): assignmentList = uniqueAssignments[stateString] assignment = IMP.domino.Assignment(assignmentList) finalAssignments.append(assignment) return finalAssignments #Read MD trajectory; for each particle, save all unique states, and for each subset, save all assignments def readMdTrajectory(self, atomList, flexibleAtoms): #open trajectory file outputDir = self.getParam("output_directory") trajectoryFile = self.getParam("md_trajectory_output_file") fullFile = os.path.join(outputDir, trajectoryFile) rh = RMF.open_rmf_file(fullFile) IMP.rmf.set_hierarchies(rh, [self.protein]) framesToRead = atomicDominoUtilities.getMdIntervalFrames(rh, int(self.getParam("md_interval")), self.protein) print "preparing to read md frames %s" % framesToRead #prepare data structures for tracking particleInfo = {} #for each particle, list where each entry corresponds to an md step, and its value [domino state, coordinates] particleStatesSeen = {} #for each particle, dictionary where the key is the grid index and value is domino state for atomName in atomList: particle = self.namesToParticles[atomName] particleInfo[atomName] = [] particleStatesSeen[atomName] = {} self.particleStatesSeen = particleStatesSeen self.particleInfo = particleInfo #read trajectory file mdScoreOutputFile = os.path.join(outputDir, "%s" % self.getParam("md_score_output_file")) [bestMdScore, bestScoreFrame, bestRmsd, bestRmsdFrame] = self.readTrajectoryFile(atomList, rh, framesToRead, mdScoreOutputFile, 0, flexibleAtoms) #output best score information #print "try loading bad frame" self.singlePdbResults(fullFile, bestScoreFrame, self.getParam("best_md_score_output_file")) #self.singlePdbResults(fullFile, 10000, self.getParam("best_md_score_output_file")) self.singlePdbResults(fullFile, bestRmsdFrame, self.getParam("best_md_rmsd_output_file")) self.singlePdbResults(fullFile, -1, self.getParam("final_md_frame_output_file")) self.bestMdScore = round(bestMdScore, 2) self.bestMdRmsd = round(bestRmsd, 2) self.bestMdScoreFrame = bestScoreFrame self.bestMdRmsdFrame = bestRmsdFrame def readCgTrajectories(self, atomList, flexibleAtoms): cgFileName = self.getParam("cg_output_file") bestCgScore = 10000000 bestCgScoreFile = "" bestCgRmsd = 10000000 bestCgRmsdFile = "" outputDir = self.getParam("output_directory") trajectoryFile = self.getParam("md_trajectory_output_file") fullFile = os.path.join(outputDir, trajectoryFile) rh = RMF.open_rmf_file(fullFile) IMP.rmf.set_hierarchies(rh, [self.protein]) framesToRead = atomicDominoUtilities.getMdIntervalFrames(rh, int(self.getParam("cg_interval")), self.protein) skipCgDomino = int(self.getParam("skip_cg_domino")) if (len(framesToRead) > 0): for cgNumber in framesToRead: #Open next cg trajectory outputDir = self.getParam("output_directory") fullCgFileName = os.path.join(outputDir, "%s%s" % (cgFileName, cgNumber)) rh = RMF.open_rmf_file(fullCgFileName) IMP.rmf.set_hierarchies(rh, [self.protein]) #Only look at the bottom 20 frames frameCount = IMP.rmf.get_number_of_frames(rh, self.protein) cgFrames = [] startFrameCount = 0 if (frameCount > 20): startFrameCount = frameCount - 20 for i in range(startFrameCount, frameCount): cgFrames.append(i) #Process trajectory cgScoreOutputFile = os.path.join(outputDir, "%s%s" % (self.getParam("cg_score_output_file"), cgNumber)) [cgScore, cgScoreFrame, cgRmsd, cgRmsdFrame] = self.readTrajectoryFile(atomList, rh, cgFrames, cgScoreOutputFile, skipCgDomino, flexibleAtoms) print "cg number %s rmsd %s score %s" % (cgNumber, cgRmsd, cgScore) #Update best score if (cgScore < bestCgScore): bestCgScore = cgScore bestCgScoreFile = fullCgFileName if (cgRmsd < bestCgRmsd): bestCgRmsd = cgRmsd bestCgRmsdFile = fullCgFileName #output best score information self.singlePdbResults(bestCgScoreFile, -1, self.getParam("best_cg_score_output_file")) self.singlePdbResults(bestCgRmsdFile, -1, self.getParam("best_cg_rmsd_output_file")) self.singlePdbResults("%s%s" % (cgFileName, framesToRead[-1]), -1, self.getParam("final_cg_frame_output_file")) finalCgRmsd = self.calculateNativeRmsd(flexibleAtoms) print "final cg rmsd is %s " % finalCgRmsd self.bestCgScore = round(bestCgScore, 2) self.bestCgRmsd = round(bestCgRmsd, 2) self.bestCgScoreFile = bestCgScoreFile self.bestCgRmsdFile = bestCgRmsdFile def singlePdbResults(self, trajectoryFile, frame, outputPdbFile): fullTrajectoryFile = os.path.join(self.getParam("output_directory"), trajectoryFile) fullOutputFile = os.path.join(self.getParam("output_directory"), outputPdbFile) rh = RMF.open_rmf_file(fullTrajectoryFile) IMP.rmf.set_hierarchies(rh, [self.protein]) if (frame == -1): frame = IMP.rmf.get_number_of_frames(rh, self.protein) - 1 IMP.rmf.load_frame(rh, frame, self.protein) IMP.atom.write_pdb(self.protein, fullOutputFile) def calculateRmsd(self, otherProtein, flexibleAtoms): otherNamesToParticles = atomicDominoUtilities.makeNamesToParticles(otherProtein) otherVector = [] modelVector = [] for pName in otherNamesToParticles.keys(): if (flexibleAtoms.has_key(pName) == 0): continue otherParticle = otherNamesToParticles[pName] modelParticle = self.namesToParticles[pName] otherVector.append(IMP.core.XYZ.decorate_particle(otherParticle).get_coordinates()) modelVector.append(IMP.core.XYZ.decorate_particle(modelParticle).get_coordinates()) rmsd = IMP.atom.get_rmsd(otherVector, modelVector) return rmsd def calculateNativeRmsd(self, flexibleAtoms): if (self.wroteNativeProtein == 0): pdbName = self.getParam("native_pdb_input_file") self.nativeModel = IMP.Model() self.nativeProtein = IMP.atom.read_pdb(pdbName, self.nativeModel, IMP.atom.ATOMPDBSelector()) self.wroteNativeProtein = 1 return self.calculateRmsd(self.nativeProtein, flexibleAtoms) def calculateTrajectoryRmsd(self, trajectoryFile, trajectoryFrame, flexibleAtoms): pdbName = self.getParam("native_pdb_input_file") otherModel = IMP.Model() otherProtein = IMP.atom.read_pdb(pdbName, self.nativeModel, IMP.atom.ATOMPDBSelector()) outputDir = self.getParam("output_directory") fullFile = os.path.join(outputDir, trajectoryFile) print "open calculate traj rmf %s" % fullFile rh = RMF.open_rmf_file(fullFile) IMP.rmf.set_hierarchies(rh, [otherProtein]) if (trajectoryFrame == -1): trajectoryFrame = IMP.rmf.get_number_of_frames(rh, otherProtein) - 1 IMP.rmf.load_frame(rh, trajectoryFrame, otherProtein) return self.calculateRmsd(otherProtein, flexibleAtoms) def createAllSubsetAssignments(self): lat = IMP.domino.ListAssignmentsTable() rssft = IMP.domino.RestraintScoreSubsetFilterTable(self.model.get_root_restraint_set(), self.dominoPst) leafNodeIndexList = self.getLeafNodeIndexList() for nodeIndex in leafNodeIndexList: #get subset for this leaf subset = self.mt.get_vertex_name(nodeIndex) particleNameList = [] for particle in subset: particleNameList.append(self.quickParticleName(particle)) print "creating initial assignments for leaf %s" % nodeIndex #use particleInfo to create assignments and filter them assignments = self.createUniqueLeafAssignments(particleNameList, self.particleInfo) filteredAssignments = self.filterAssignments(assignments, subset, nodeIndex, rssft) #add assignemtns to container and listAssignmentTable packedAssignmentContainer = IMP.domino.PackedAssignmentContainer() for assignment in filteredAssignments: packedAssignmentContainer.add_assignment(assignment) lat.set_assignments(subset, packedAssignmentContainer) self.lat = lat self.rssft = rssft def runDomino(self): root = self.mt.get_vertices()[-1] completeAc = self.loadAssignments(root) self.completeAc = completeAc def loadAssignments(self, nodeIndex): children = self.mt.get_out_neighbors(nodeIndex) subset = self.mt.get_vertex_name(nodeIndex) heapCount = int(self.getParam("heap_count")) mine= IMP.domino.HeapAssignmentContainer(heapCount, self.rssft.get_subset_filter(subset, [])) if len(children)==0: print "computing assignments for leaf %s" % nodeIndex self.sampler.load_vertex_assignments(nodeIndex, mine) print "leaf node %s has %s leaf assignments" % (nodeIndex, mine.get_number_of_assignments()) else: if (children[0] > children[1]): children = [children[1], children[0]] # recurse on the two children firstAc = self.loadAssignments(children[0]) secondAc = self.loadAssignments(children[1]) self.logTimePoint(1) self.sampler.load_vertex_assignments(nodeIndex, firstAc, secondAc, mine) timeDifference = self.logTimePoint(0) print "Done Parent %s Assignments %s first child %s second child %s time %s" % (nodeIndex, mine.get_number_of_assignments(), firstAc.get_number_of_assignments(), secondAc.get_number_of_assignments(), timeDifference) self.totalAssignments += mine.get_number_of_assignments() self.logMemory() return mine def writeOutput(self, flexibleAtoms, startTime): bestAssignment = -1 bestDominoScore = 100000 bestAssignment = 0 finalAssignments = self.completeAc.get_assignments() for assignment in finalAssignments: IMP.domino.load_particle_states(self.dominoPst.get_subset(), assignment, self.dominoPst) score = self.model.evaluate(False) if (score < bestDominoScore): bestAssignment = assignment bestDominoScore = round(score, 2) print "best domino score is %s " % bestDominoScore print "best md score is %s" % self.bestMdScore print "best md rmsd is %s" % self.bestMdRmsd print "best cg score is %s" % self.bestCgScore print "best cg rmsd is %s" % self.bestCgRmsd print "merge tree contained %s total assignments" % self.totalAssignments IMP.domino.load_particle_states(self.dominoPst.get_subset(), bestAssignment, self.dominoPst) dominoVsMdRmsd = round(self.calculateTrajectoryRmsd(self.getParam("md_trajectory_output_file"), self.bestMdScoreFrame, flexibleAtoms), 2) cg = IMP.core.ConjugateGradients(self.model) cg.optimize(100) IMP.atom.write_pdb(self.protein, os.path.join(self.getParam("output_directory"), self.getParam("minimum_domino_score_pdb"))) dominoVsCgRmsd = round(self.calculateTrajectoryRmsd(self.bestCgScoreFile, -1, flexibleAtoms), 2) dominoMinimizedScore = round(self.model.evaluate(False), 2) dominoRmsd = round(self.calculateNativeRmsd(flexibleAtoms), 2) runTime = round(time.time() - startTime, 2) print "final domino score (after cg): %s" % dominoMinimizedScore print "final domino rmsd: %s" % dominoRmsd print "best domino rmsd with best md score: %s" % dominoVsMdRmsd print "domino rmsd with best cg score: %s" % dominoVsCgRmsd print "Final Results\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s" % (self.bestMdScore, self.bestMdRmsd, self.bestCgScore, self.bestCgRmsd, bestDominoScore, dominoRmsd, dominoMinimizedScore, dominoVsCgRmsd, self.totalAssignments, self.maxMem, runTime) #################### # Parallel methods #################### def createSubsetFromParticles(self, particleNames): particleNameList = particleNames.split(" ") particleList = [] for particleName in particleNameList: particle = self.namesToParticles[particleName] particleList.append(particle) subset = IMP.domino.Subset(particleList) return [subset, particleList] def createHdf5AssignmentContainer(self, index, particleNames, read): root = self.getAssignmentContainerRoot(index, read) print "got root for index %s" % index dataset = 0 if (read == 1): dataset = root.get_child_index_data_set_2d(str(index)) else: dataset = root.add_child_index_data_set_2d(str(index)) print "added child index dataset" #firstDataset.set_size([ [subset, particleOrder] = self.createSubsetFromParticles(particleNames) print "created subset for index %s" % index hdf5Ac = IMP.domino.create_assignments_container(dataset, subset, particleOrder) print "returning from create" order = IMP.domino.get_order(subset, particleOrder) for nextInt in order: print "next int is %s" % nextInt return [subset, hdf5Ac] def loadAssignmentsParallel(self, nodeIndex, particleInfo, mtIndexToNodeInfo, mtIndexToSubsetOrder, mtIndexToParticles): IMP.base.set_log_level(IMP.WARNING) if (mtIndexToNodeInfo[nodeIndex].has_key("firstChild") == 0): print "writing file for leaf index %s" % nodeIndex return self.createAssignmentsParallel(particleInfo, nodeIndex, mtIndexToParticles) else: beginTime = self.logTimePoint(1) firstChildIndex = mtIndexToNodeInfo[nodeIndex]["firstChild"] [firstSubset, firstAc] = self.createHdf5AssignmentContainer(firstChildIndex, mtIndexToSubsetOrder[firstChildIndex], 1) firstAcCreateTime = self.logTimePoint(0) secondChildIndex = mtIndexToNodeInfo[nodeIndex]["secondChild"] [secondSubset, secondAc] = self.createHdf5AssignmentContainer(secondChildIndex, mtIndexToSubsetOrder[secondChildIndex], 1) secondAcCreateTime = self.logTimePoint(0) print "getting assignments for nodeIndex %s first child %s second child %s" % (nodeIndex, firstChildIndex, secondChildIndex) firstChildParticles = mtIndexToSubsetOrder[firstChildIndex] secondChildParticles = mtIndexToSubsetOrder[secondChildIndex] myParticles = mtIndexToParticles[nodeIndex] print "first child particles %s\nsecond child particles %s\nmy particles; %s\n" % (firstChildParticles, secondChildParticles, myParticles) for p in firstSubset: print "next particle in first subset: %s" % self.quickParticleName(p) for p in secondSubset: print "next particle in second subset: %s" % self.quickParticleName(p) for assignment in firstAc.get_assignments(): print "next assignment for first child %s: %s" % (firstChildIndex, assignment) for assignment in secondAc.get_assignments(): print "next assignment for second child %s: %s" % (secondChildIndex, assignment) [mySubset, myAc] = self.createHdf5AssignmentContainer(nodeIndex, mtIndexToParticles[nodeIndex], 0) print "done creating hdf5" prepTime = self.logTimePoint(0) rssft = IMP.domino.RestraintScoreSubsetFilterTable(self.model.get_root_restraint_set(), self.dominoPst) rssf = rssft.get_subset_filter(mySubset, []) #heapAc = IMP.domino.HeapAssignmentContainer(1000, rssf) IMP.domino.load_merged_assignments(firstSubset, firstAc, secondSubset, secondAc, [rssft], myAc) heapTime = self.logTimePoint(0) #myAc.add_assignments(heapAc.get_assignments()) addTime = self.logTimePoint(0) for assignment in myAc.get_assignments(): print "loadAssignmentsParallel next assignment for %s: %s" % (nodeIndex, assignment) doneTime = self.logTimePoint(0) firstChildCount = firstAc.get_number_of_assignments() secondChildCount = secondAc.get_number_of_assignments() print "first count: %s second count: %s begin: %s firstAc: %s secondAc: %s prep: %s heap: %s add: %s done: %s" % (firstChildCount, secondChildCount, beginTime, firstAcCreateTime, secondAcCreateTime, prepTime, heapTime, addTime, doneTime) subsetOrder = self.getSubsetOrderList(mySubset) return subsetOrder def createAssignmentsParallel(self, particleInfo, nodeIndex, mtIndexToParticles): subsetName = mtIndexToParticles[nodeIndex] print "starting assignments parallel leaf index %s subset name %s" % (nodeIndex, subsetName) [subset, particleList] = self.createSubsetFromParticles(subsetName) particleNameList = subsetName.split(" ") #create assignment by reading states finalAssignments = self.createUniqueLeafAssignments(particleNameList, particleInfo) #lat = IMP.domino.ListAssignmentsTable() #lat.set_assignments(subset, finalAssignmentContainer) #self.sampler.set_assignments_table(lat) #hdf5AssignmentContainer = IMP.domino.HDF5AssignmentContainer(dataset, subset, self.dominoPst.get_particles(), subsetName) rssft = IMP.domino.RestraintScoreSubsetFilterTable(self.model.get_root_restraint_set(), self.dominoPst) filteredAssignments = self.filterAssignments(finalAssignments, subset, nodeIndex, rssft) root = self.getAssignmentContainerRoot(nodeIndex, 0) dataset= root.add_child_index_data_set_2d(str(nodeIndex)) dataset.set_size([0, len(subset)]) hdf5AssignmentContainer = IMP.domino.create_assignments_container(dataset, subset, particleOrder) for assignment in filteredAssignments: hdf5AssignmentContainer.add_assignment(assignment) for assignment in hdf5AssignmentContainer.get_assignments(): print "hdf5 assignment container node %s next assignmet %s" % (nodeIndex, assignment) #self.checkAssignments(subset, nodeIndex, particleOrder) subsetOrder = self.getSubsetOrderList(subset) print "leaf node returning with order %s" % subsetOrder return subsetOrder #Write pymol session files for the interactions across atoms and all subsets def writePymolData(self): outputDir = self.getParam("output_directory") geometry = IMP.domino.get_interaction_graph_geometry(self.ig) pymolInteractions = self.getParam("pymol_interactions_file") w= IMP.display.PymolWriter(os.path.join(outputDir, pymolInteractions)) for gg in geometry: w.add_geometry(gg) pymolSubsets = self.getParam("pymol_subsets_file") geometry = IMP.domino.get_subset_graph_geometry(self.jt) w= IMP.display.PymolWriter(os.path.join(outputDir, pymolSubsets)) for gg in geometry: w.add_geometry(gg) #Clean the default name of the vertex (in brackets and with each atom contained in quotes) and return a string where [] and " are removed def cleanVertexName(self, vertexName): nodeRe = re.compile('Subset\(\[(.*?)\s*\]') # not sure if any vertices still have a Subset prefix but keeping anyway secondNodeRe = re.compile('\[(.*?)\s*\]') #atom name node = nodeRe.search(str(vertexName)) secondNode = secondNodeRe.search(str(vertexName)) vertexNameFinal = "" foundName = 0 if node: foundName = node.group(1) if secondNode: foundName = secondNode.group(1) vertexNameFinal = foundName.replace('"', '') return vertexNameFinal def getSubsetOrderList(self, subset): subsetOrderList = [] for particle in subset: name = self.quickParticleName(particle) subsetOrderList.append(name) subsetOrder = " ".join(subsetOrderList) return subsetOrder def checkAssignments(self, subset, nodeIndex, particleOrder): print "reading back in assignments for leaf index %s" % nodeIndex root = self.getAssignmentContainerRoot(nodeIndex, 1) dataset = root.get_child_index_data_set_2d(str(nodeIndex)) hdf5 = IMP.domino.create_assignments_container(dataset, subset, particleOrder) for assignment in hdf5.get_assignments(): print "leaf index %s read back in %s" % (nodeIndex, assignment) def createSamplerLite(self): s=IMP.domino.DominoSampler(self.model, self.dominoPst) if (self.getParam("cross_subset_filtering") == 1): s.set_use_cross_subset_filtering(1) self.sampler = s def createSiblingMap(self, parentIndex): children = self.mt.get_out_neighbors(parentIndex) if (len(children) > 0): firstChild = children[0] secondChild = children[1] self.parentSiblingMap[firstChild] = {} self.parentSiblingMap[firstChild]["sibling"] = secondChild self.parentSiblingMap[firstChild]["parent"] = parentIndex self.parentSiblingMap[secondChild] = {} self.parentSiblingMap[secondChild]["sibling"] = firstChild self.parentSiblingMap[secondChild]["parent"] = parentIndex print "created map for parent %s first child %s second child %s" % (parentIndex, firstChild, secondChild) self.parentSiblingMap[parentIndex]["firstChild"] = firstChild self.parentSiblingMap[parentIndex]["secondChild"] = secondChild self.createSiblingMap(firstChild) self.createSiblingMap(secondChild) def getMtIndexToNodeInfo(self): return self.parentSiblingMap def getLeafParentNodeIndexList(self): leafParentNodeIndexList = {} leafNodeIndexList = self.getLeafNodeIndexList() for leafIndex in leafNodeIndexList: parent = self.parentSiblingMap[leafIndex]["parent"] leafParentNodeIndexList[parent] = 1 return leafParentNodeIndexList def getMtIndexToNameList(selt): mtIndexToNames = {} for index in self.mt.get_vertices(): name = self.mt.get_vertex_name(index) mtIndexToNames[index] = name return mtIndexToNames def getAssignmentContainerRoot(self, subsetIndex, read): outputDir = self.getParam("output_directory") filePrefix = self.getParam("subset_assignment_output_file") assignmentFileName = os.path.join(outputDir, "%s%s" % (filePrefix, subsetIndex)) print "creating hdf5 file with name %s" % assignmentFileName root = 0 if (read == 1): root = RMF.open_hdf5_file(assignmentFileName) else: root= RMF.create_hdf5_file(assignmentFileName) return root ########## #Begin Cytoscape Methods ########## def writeVisualization(self): self.writeCytoscapeIgInput() self.writeCytoscapeJtInput() self.writeCytoscapeMtInput() self.writeCytoscapeScripts() self.writePymolData() def writeCytoscapeScripts(self): outputDir = self.getParam("output_directory") mTreeCytoscapeInput = self.getParam("mtree_cytoscape_input_file") mTreeCytoscapeAssignments = self.getParam("mtree_cytoscape_assignment_file") mTreeCytoscapeAtomChains = self.getParam("mtree_cytoscape_atom_chain_file") mTreeCytoscapeAtomSummary = self.getParam("mtree_cytoscape_atom_summary_file") mTreeCytoscapeScript = self.getParam("mtree_cytoscape_script") mTreeFh = open(os.path.join(outputDir, mTreeCytoscapeScript), 'w') mTreeFh.write("network import file=%s\n" % os.path.join(outputDir, mTreeCytoscapeInput)) mTreeFh.write("node import attributes file=\"%s\"\n" % os.path.join(outputDir, mTreeCytoscapeAssignments)) mTreeFh.write("node import attributes file=\"%s\"\n" % os.path.join(outputDir, mTreeCytoscapeAtomSummary)) mTreeFh.write("node import attributes file=\"%s\"\n" % os.path.join(outputDir, mTreeCytoscapeAtomChains)) mTreeFh.write("layout jgraph-tree\n") def getGraphStructure(self, graph, fileName, separator): #write junction tree to file graphLogWrite = open(fileName, 'w') graph.show(graphLogWrite) graphLogWrite.close() #read file graphLogRead = open(fileName, 'r') nodeRe = re.compile('^(\d+)\[label\=\"\[*(.*?)\s*\]*\"') #atom name separatorEscape = "\\" + separator edgeString = "^(\d+)\-%s(\d+)" % separatorEscape edgeRe = re.compile(edgeString) nodesToNodes = {} #keys: source node, value: target node (track edges) nodesToNames = {} # keys: node number, value; string parsed from file for line in graphLogRead: #search for nodes node = nodeRe.search(line) if node: nodeNumber = node.group(1) atomString = node.group(2) nodesToNames[nodeNumber] = atomString continue #search for edges edge = edgeRe.search(line) if edge: firstNode = edge.group(1) secondNode = edge.group(2) firstNodeDict = {} if (nodesToNodes.has_key(firstNode)): firstNodeDict = nodesToNodes[firstNode] firstNodeDict[secondNode] = 1 nodesToNodes[firstNode] = firstNodeDict return [nodesToNames, nodesToNodes] def writeEdgeFile(self, nodesToNodes, edgeFileName): #write edge file outputDir = self.getParam("output_directory") graphInputFile = open(os.path.join(outputDir, edgeFileName), 'w') for firstNode in nodesToNodes.keys(): nodeDict = nodesToNodes[firstNode] for secondNode in nodeDict.keys(): graphInputFile.write("%s ttt %s\n" % (firstNode, secondNode)) graphInputFile.close() def writeCytoscapeIgInput(self): outputDir = self.getParam("output_directory") igOutputFile = self.getParam("ig_output_file") [nodesToNames, nodesToNodes] = self.getGraphStructure(self.ig, os.path.join(outputDir, igOutputFile), "-") self.writeEdgeFile(nodesToNodes, self.getParam("ig_cytoscape_input_file")) #write residue numbers for each node igResiduesFile = self.getParam("ig_cytoscape_residues_file") peptideChain = self.getParam("peptide_chain") subsetResidueFile = open(os.path.join(outputDir, igResiduesFile), 'w') subsetResidueFile.write("ResidueNumber\n") for nodeNumber in nodesToNames.keys(): nodeName = nodesToNames[nodeNumber] [nodeChain, residueNumber, nodeAtom] = atomicDominoUtilities.getAtomInfoFromName(nodeName) if (nodeChain == peptideChain): #peptideAtom subsetResidueFile.write("%s = %s\n" % (nodeNumber, residueNumber)) else: subsetResidueFile.write("%s = 100\n" % nodeNumber) #for now just write arbitrary number to designate as protein atom def writeCytoscapeMtInput(self): outputDir = self.getParam("output_directory") mTreeOutputFile = self.getParam("mtree_output_file") [nodesToNames, nodesToNodes] = self.getGraphStructure(self.mt, os.path.join(outputDir, mTreeOutputFile), ">") self.writeEdgeFile(nodesToNodes, self.getParam("mtree_cytoscape_input_file")) self.writeNodeNameAttributes(nodesToNames, self.getParam("mtree_cytoscape_atom_name_file"), self.getParam("mtree_cytoscape_atom_summary_file"), self.getParam("mtree_cytoscape_atom_chain_file")) def writeCytoscapeJtInput(self): outputDir = self.getParam("output_directory") jTreeOutputFile = self.getParam("jtree_output_file") [nodesToNames, nodesToNodes] = self.getGraphStructure(self.jt, os.path.join(outputDir, jTreeOutputFile), "-") self.writeEdgeFile(nodesToNodes, self.getParam("jtree_cytoscape_input_file")) self.writeNodeNameAttributes(nodesToNames, self.getParam("jtree_cytoscape_atom_name_file"), self.getParam("jtree_cytoscape_atom_summary_file"), self.getParam("jtree_cytoscape_atom_chain_file")) #write edge weight file -- weights are number of shared particles across nodes jtreeCytoscapeEdgeFile = self.getParam("jtree_cytoscape_edge_file") edgeWeightFile = open(os.path.join(outputDir, jtreeCytoscapeEdgeFile), 'w') edgeWeightFile.write("SubsetOverlap (class=Integer)\n") for firstNode in nodesToNodes.keys(): nodeDict = nodesToNodes[firstNode] for secondNode in nodeDict.keys(): firstNodeAtoms = nodesToNames[firstNode].split(" ") secondNodeAtoms = nodesToNames[secondNode].split(" ") intersection = [val for val in firstNodeAtoms if val in secondNodeAtoms] edgeWeightFile.write("%s (pp) %s = %s\n" % (firstNode, secondNode, len(intersection))) edgeWeightFile.close() def getAtomTypeCounts(self, atomNames): atomNames = atomNames.lstrip('[') atomNames = atomNames.rstrip(']') atomNames = atomNames.rstrip() atomList = atomNames.split(" ") #atom names peptideAtomCount = 0 proteinAtomCount = 0 for atom in atomList: [chain, residue, atom] = atomicDominoUtilities.getAtomInfoFromName(atom) if (chain == self.getParam("peptide_chain")): peptideAtomCount += 1 else: proteinAtomCount += 1 return [peptideAtomCount, proteinAtomCount] def writeNodeNameAttributes(self, nodesToNames, atomNameFile, atomSummaryFile, atomChainFile): #write attribute file (atom names for each node) outputDir = self.getParam("output_directory") subsetAtomNameFile = open(os.path.join(outputDir, atomNameFile), 'w') subsetAtomSummaryFile = open(os.path.join(outputDir, atomSummaryFile), 'w') subsetAtomChainFile = open(os.path.join(outputDir, atomChainFile), 'w') subsetAtomNameFile.write("Atom names\n") subsetAtomSummaryFile.write("Atom Summary\n") subsetAtomChainFile.write("Atom chain\n") for node in nodesToNames.keys(): atomNames = nodesToNames[node] subsetAtomNameFile.write("%s = %s\n" % (node, atomNames)) [peptideAtomCount, proteinAtomCount] = self.getAtomTypeCounts(atomNames) #Number of protein and peptide atoms in each subset subsetAtomSummaryFile.write("%s = %sp %sl\n" % (node, proteinAtomCount, peptideAtomCount)) #whether each subset has protein, peptide, or a mix of atoms if (proteinAtomCount == 0): subsetAtomChainFile.write("%s = 1\n" % node) elif (peptideAtomCount == 0): subsetAtomChainFile.write("%s = 2\n" % node) else: subsetAtomChainFile.write("%s = 3\n" % node) subsetAtomChainFile.close() subsetAtomSummaryFile.close() subsetAtomNameFile.close() ########## #End Cytoscape Methods ##########