import sys import time import csv import logging log = logging.getLogger("DominoModel") import IMP import IMP.domino as domino import IMP.core as core import IMP.container as container import IMP.display as display import IMP.atom as atom import IMP.algebra as alg import IMP.em2d as em2d import IMP.base import IMP.multifit as multifit import IMP.em2d.imp_general.comparisons as comparisons import IMP.em2d.imp_general.alignments as alignments import IMP.em2d.imp_general.io as io import IMP.em2d.imp_general.representation as representation import IMP.em2d.restraints as restraints import IMP.em2d.buildxlinks as buildxlinks import IMP.em2d.solutions_io as solutions_io import IMP.em2d.csv_related as csv_related field_delim = "," # separate fields in the output text file unit_delim = "/" # separate units within a field (eg, reference frames). # It is used in output text files and in databse files class DominoModel: """ Management of a model using DOMINO """ def __init__(self, name="my model"): self.model = IMP.Model() self.model.set_name(name) self.configuration_sampling_done = False self.assignments_sampling_done = False self.rb_states_table = domino.ParticleStatesTable() self.merge_tree = None # measure_models is True only when doing benchmark self.measure_models = False # The first time that create_coarse is found and its value is True # a coarse version of the assembly is built self.create_coarse = True self.restraints = dict() self.xlinks_dict = buildxlinks.XlinksDict() def add_restraint(self, r, name, weight, max_score=False): """ Adds a restraint to the model @param r An IMP.Restraint object @param name Name for the restraint @param weight Weight for the restraint @param max_score Maximum score allowed for the restraint. If max_score is False, there is no limit for the value of the restraint """ log.info("Adding restraint %s weight %s max_score %s", name, weight, max_score) if "-" in name: raise ValueError("SQL database does not accept restraint names "\ "containing -") r.set_name(name) if max_score != None and max_score != False: r.set_maximum_score(max_score) r.set_weight(weight) self.restraints[name] = r self.model.add_restraint(r) def align_rigid_bodies_states(self): """ Aligns the set of structures considered for DOMINO sampling. The function: 1) reads the possible reference frames that the rb_states_table stores for each rigid body. This table must be filled before using this function. Usually it is filled with the results from a previous Monte Carlo sampling. If the solutions from Monte Carlo seem to have the same structure but they are not aligned to each other, this function can help setting better starting positions to use with DOMINO. 2) Gets the first state for each of the rigid bodies and sets a reference structure using such states. 3) Aligns all the rest of the structures respect to the reference 4) Replaces the values of the reference frames stored in the rb_states_table with the new values obtained from the alignments. It does it for all states of a rigid body. @note: If this function is applied, the parameters "anchor" and "fixed" are ignored, as they are superseded by the use of the aligments calculated here. """ log.debug("Align the configurations read from the database before " \ "running DOMINO") rb_states = [] n_states = [] for rb in self.components_rbs: ps = self.rb_states_table.get_particle_states(rb) n = ps.get_number_of_particle_states() if n == 0: raise ValueError("There are no particle states for %s" % rb.get_name()) n_states.append(n) rb_states.append(ps) # coordinates of the first configuration (there is at least one for all the rbs) for rb, states in zip(self.components_rbs, rb_states): states.load_particle_state(0, rb) reference_coords = get_coordinates(self.components_rbs) aligned_transformations = [[] for rb in self.components_rbs] for i in range(1, max(n_states)): log.debug("Aligning rigid bodies configuration %s" % i) # load the configuration for j in range(len(self.components_rbs)): # if there are no more particle states for this rigid body, use the last state_index = min(i,n_states[j] - 1) rb_states[j].load_particle_state(state_index, self.components_rbs[j]) coords = get_coordinates(self.components_rbs) T = alg.get_transformation_aligning_first_to_second(coords, reference_coords) for j, rb in enumerate(self.components_rbs): t = rb.get_reference_frame().get_transformation_to() new_t = alg.compose(T, t) aligned_transformations[j].append(new_t) # set the new possible transformations for i, rb in enumerate(self.components_rbs): self.set_rb_states(rb, aligned_transformations[i]) def set_xlink_restraint(self, id1, chain1, residue1, id2, chain2, residue2, distance, weight, stddev, max_score=False): """ Set a restraint on the maximum distance between 2 residues @param id1 Name of the first component @param residue1 Residue number for the aminoacid in the first component.The number is the number in the PDB file, not the number relative to the beginning of the chain @param id2 Name of the second component @param residue2 Residue number for the aminoacid in the second component. @param distance Maximum distance tolerated @param weight Weight of the restraint @param stddev Standard deviation used to approximate the HarmonicUpperBound function to a Gaussian @param max_score See help for add_restraint(). If none is given, the maximum score is set to allow a maximum distance of 10 Angstrom greater than the parameter "distance". """ xlink = buildxlinks.Xlink(id1, chain1, residue1, id2, chain2, residue2, distance) log.info("Setting cross-linking restraint ") log.info("%s", xlink.show()) self.xlinks_dict.add(xlink) # setup restraint A = representation.get_component(self.assembly, xlink.first_id) s1=IMP.atom.Selection(A, chain=xlink.first_chain, residue_index=xlink.first_residue) p1 = s1.get_selected_particles()[0] B = representation.get_component(self.assembly, xlink.second_id) s2=IMP.atom.Selection(B, chain=xlink.second_chain, residue_index=xlink.second_residue) p2 = s2.get_selected_particles()[0] k = core.Harmonic.get_k_from_standard_deviation(stddev) score = core.HarmonicUpperBound(xlink.distance, k) pair_score = IMP.core.DistancePairScore(score) r = IMP.core.PairRestraint(pair_score, IMP.ParticlePair(p1, p2)) if not max_score: error_distance_allowed = 100 max_score = weight * score.evaluate(distance + error_distance_allowed) log.info("%s, max_score %s", xlink.show(), max_score) self.add_restraint(r, xlink.get_name(), weight, max_score) def set_complementarity_restraint(self, name1, name2, rname, max_sep_distance, max_penetration, weight,max_score=1e10): """ Set a restraint for geometric complementarity between 2 components @param name1 name of @param name2 - The restraint is applied to this components - rname - The name given to the restraint - max_sep_distance - maximum distance between molecules tolerated by the restraint - max_penetration - Maximum penetration allowd (angstrom) """ log.info("Setting geometric complementarity restraint %s: %s - %s", rname, name1, name2) A = representation.get_component(self.assembly, name1) B = representation.get_component(self.assembly, name2) restraint = multifit.ComplementarityRestraint(atom.get_leaves(A), atom.get_leaves(B), rname) restraint.set_maximum_separation(max_sep_distance) restraint.set_maximum_penetration(max_penetration) log.debug("Maximum separation: %s Maximum penetration score: %s", max_sep_distance, max_penetration) self.add_restraint(restraint, rname, weight, max_score) def create_coarse_assembly(self, n_residues, write=False): """ Simplify the assembly with a coarse representation @param n_residues Number of residues used for a coarse particle @param write If True, write the coarse assembly to a format that Chimera can display """ if self.create_coarse: log.info("Creating simplified assembly") self.coarse_assembly = \ representation.create_simplified_assembly(self.assembly, self.components_rbs, n_residues) if write: io.write_hierarchy_to_chimera(self.assembly, "coarse_assembly.py") self.create_coarse = False def do_sampling(self, mode="assignments_heap_container", params=None): """ Do Domino sampling @param mode The mode used to recover solutions from domino. It can have the value "configuration", "assignments", or "assignments_heap_container". The mode "configuration" recovers all possible configurations (takes a while to generate them). The mode "assignments" only provides the assignments (state numbers) of the solutions. It is faster but requires generating the solutions afterwards The mode "assignments_heap_container" selects the best solutions after ecah merging in DOMINO, discarding the rest. In practice I used the mode "assignments_heap_container" """ t0 = time.time() if mode == "configuration": self.configuration_set = self.sampler.get_sample() tf = time.time() log.info("found %s configurations. Time %s", self.configuration_set.get_number_of_configurations(), tf-t0) self.configuration_sampling_done = True elif mode == "assignments": subset = self.rb_states_table.get_subset() log.info("Do sampling with assignments. Subset has %s elements: %s", len(subset), subset) self.solution_assignments = \ self.sampler.get_sample_assignments(subset) tf = time.time() log.info("found %s assignments. Time %s", len(self.solution_assignments), tf-t0) self.assignments_sampling_done = True elif mode == "assignments_heap_container": subset = self.rb_states_table.get_subset() log.info("DOMINO sampling an assignments_heap_container. "\ "Subset has %s elements: %s", len(subset), subset) # last vertex is the root of the merge tree root = self.merge_tree.get_vertices()[-1] container = self.get_assignments_with_heap(root, params.heap_solutions) self.solution_assignments = container.get_assignments() tf = time.time() log.info("found %s assignments. Time %s", len(self.solution_assignments), tf-t0) self.assignments_sampling_done = True else: raise ValueError("Requested wrong mode for the DOMINO sampling") def get_assignment_text(self, assignment, subset): """ Get the formatted text for an assignment @param subset Subset of components of the assembly @param assignment Assignment class with the states for the subset @note: The order in assignment and subset is not always the order of the rigid bodies in self.components_rbs. The function reorders the terms in the assignment so there is correspondence. """ ordered_assignment = [] for rb in self.components_rbs: for i, particle in enumerate(subset): if rb.get_particle() == particle: ordered_assignment.append( assignment[i]) text = "|".join([str(i) for i in ordered_assignment]) return text def get_assignment_and_RFs(self, assignment, subset): """ Return a line with texts for an assignment and the the reference frames of the RigidBodies in the subset. @param subset Subset of components of the assembly @param assignment Assignment class with the states for the subset @note: see the get_assignment_assignment_text() note. """ ordered_assignment = [] RFs = [] for rb in self.components_rbs: for i, particle in enumerate(subset): if rb.get_particle() == particle: j = assignment[i] ordered_assignment.append(j) pstates = self.rb_states_table.get_particle_states(rb) pstates.load_particle_state(j, rb.get_particle()) RFs.append(rb.get_reference_frame()) RFs_text = unit_delim.join( [io.ReferenceFrameToText(ref).get_text() for ref in RFs]) assignment_numbers = "|".join([str(i) for i in ordered_assignment]) return [assignment_numbers, RFs_text] def get_assignment_info(self, assignment, subset): """ Info related to an assignment. Returns a list with text for the assignment and all the scores for the restraints @param subset Subset of components of the assembly @param assignment Assignment class with the states for the subset """ text = self.get_assignment_and_RFs(assignment, subset) scores, total_score = self.get_assignment_scores(assignment, subset) info = text + [total_score] + scores return info def get_assignment_scores(self, assignment, subset): """ Returns a list with the values for the restraints on a subset of the components of the assembly @param subset Subset of components of the assembly @param assignment Assignment class with the states for the subset """ restraints_to_evaluate = \ self.restraint_cache.get_restraints(subset, []) scores = [self.restraint_cache.get_score(r, subset, assignment) for r in restraints_to_evaluate] total_score = sum(scores) return scores, total_score def load_state(self, assignment): """ Load the positions of the components given by the assignment @param assignment Assignment class """ subset = self.rb_states_table.get_subset() domino.load_particle_states(subset, assignment, self.rb_states_table) def get_assignments_with_heap(self, vertex, k=0): """ Domino sampling that recovers the assignments for the root of the merge tree, but conserving only the best k scores for each vertex of the tree. @param vertex Vertex with the root of the current merge tree. This function is recursive. """ if(self.sampler.get_number_of_subset_filter_tables() == 0 ): raise ValueError("No subset filter tables") if(self.merge_tree == None): raise ValueError("No merge tree") # In the merge tree, the names of the vertices are the subsets. # The type of the vertex name is a domino.Subset subset = self.merge_tree.get_vertex_name(vertex) log.info("computing assignments for vertex %s",subset) t0 = time.time() assignments_container = domino.HeapAssignmentContainer(subset, k, self.restraint_cache, "my_heap_assignments_container" ) neighbors = self.merge_tree.get_out_neighbors(vertex) if len(neighbors) == 0: # A leaf # Fill the container with the assignments for the leaf self.sampler.load_vertex_assignments(vertex, assignments_container) else: if neighbors[0] > neighbors[1]: # get_vertex_assignments() methods in domino # expects the children in sorted order neighbors =[neighbors[1], neighbors[0]] # recurse on the two children container_child0 = self.get_assignments_with_heap(neighbors[0], k) container_child1 = self.get_assignments_with_heap(neighbors[1], k) self.sampler.load_vertex_assignments(vertex, container_child0, container_child1, assignments_container) tf = time.time() - t0 log.info("Merge tree vertex: %s assignments: %s Time %s sec.", subset, assignments_container.get_number_of_assignments(), tf) return assignments_container def get_restraint_value_for_assignment(self, assignment, name): """ Recover the value of a restraint @param name of the restraint @param assignment Assignment class containing the assignment for the solution desired """ if not self.assignments_sampling_done: raise ValueError("DominoModel: sampling not done") log.debug("Computing restraint value for assignment %s",assignment) self.load_state(assignment) for rb in self.components_rbs: log.debug("rb - %s", rb.get_reference_frame().get_transformation_to()) val = self.restraints[name].evaulate(False) log.debug("restraint %s = %s",restraint.get_name(), val) return val def set_native_assembly_for_benchmark(self, params): """ Sets the native model for benchmark, by reading the native structure and set the rigid bodies. @param fn_pdb_native PDB with the native structure @param anchored List of True/False values indicating if the components of the assembly are anchored. The function sets the FIRST anchored component in the (0,0,0) coordinate and moves the other components with the same translation """ self.measure_models = True self.native_model = IMP.Model() if hasattr(params.benchmark, "fn_pdb_native"): self.native_assembly = \ representation.create_assembly_from_pdb(self.native_model, params.benchmark.fn_pdb_native, params.names) elif hasattr(params.benchmark, "fn_pdbs_native"): self.native_assembly = \ representation.create_assembly(self.native_model, params.benchmark.fn_pdbs_native, params.names) else: raise ValueError("set_native_assembly_for_benchmark: Requested " \ "to set a native assembly for benchmark but did not provide " \ "its pdb, or the pdbs of the components" ) self.native_rbs = representation.create_rigid_bodies(self.native_assembly) anchor_assembly(self.native_rbs, params.anchor) def set_rb_states(self, rb, transformations): """ Add a set of reference frames as possible states for a rigid body @param rb The rigid body @param transformations Transformations are used to build the reference frames for the rigid body. """ log.info("Set rigid body states for %s",rb.get_name()) RFs = [ alg.ReferenceFrame3D(T) for T in transformations ] for st in RFs: log.debug("%s",st) rb_states = domino.RigidBodyStates(RFs) self.rb_states_table.set_particle_states(rb, rb_states) st = self.rb_states_table.get_particle_states(rb) log.info("RigidBodyStates created %s", st.get_number_of_particle_states()) def add_restraint_score_filter_table(self): """ Add a RestraintScoreSubsetFilterTable to DOMINO that allows to reject assignments that have score worse than the thresholds for the restraints """ log.info("Adding RestraintScoreSubsetFilterTable") # Restraint Cache is a centralized container of the restraints and # their scores self.restraint_cache = domino.RestraintCache(self.rb_states_table) self.restraint_cache.add_restraints(self.model.get_restraints()) rssft = domino.RestraintScoreSubsetFilterTable(self.restraint_cache) rssft.set_name('myRestraintScoreSubsetFilterTable') self.sampler.add_subset_filter_table(rssft) def set_assembly_components(self, fn_pdbs, names): """ Sets the components of an assembly, each one given as a separate PDB file. @param fn_pdbs List with the names of the pdb files @param names List with the names of the components of the assembly. """ if len(fn_pdbs) != len(names): raise ValueError("Each PDB file needs a name") self.names = names self.assembly = representation.create_assembly(self.model, fn_pdbs, names) self.components_rbs = representation.create_rigid_bodies(self.assembly) self.not_optimized_rbs = [] def set_assembly(self, fn_pdb, names): """ Sets an assembly from a single PDB file. The function assumes that the components of the assembly are the chains of the PDB file. @param fn_pdb PDB with the file for the asembly @param names Names for each of the components (chains) of the asembly """ self.assembly = representation.create_assembly_from_pdb(self.model, fn_pdb, names) self.components_rbs = representation.create_rigid_bodies(self.assembly) self.not_optimized_rbs = [] def add_branch_and_bound_assignments_table(self): """ BranchAndBoundAssignmentsTable enumerate states based on provided filtered using the provided the subset filter tables """ log.info("Setting BranchAndBoundAssignmentsTable") fts = [] for i in range(self.sampler.get_number_of_subset_filter_tables()): ft = self.sampler.get_subset_filter_table(i) fts.append(ft) at = domino.BranchAndBoundAssignmentsTable(self.rb_states_table, fts) self.sampler.set_assignments_table(at) self.assignments_table = at def add_exclusion_filter_table(self): """ ExclusionSubsetFilterTable ensures that two particles are not given the same state at the same time """ log.info("Setting ExclusionSubsetFilterTable") ft = domino.ExclusionSubsetFilterTable(self.rb_states_table) self.sampler.add_subset_filter_table(ft) def setup_domino_sampler(self): """ Creates a DOMINO sampler and adds the required filter tables """ if self.merge_tree == None: raise ValueError("Merge tree for DOMINO not set. It is required " \ "to setup the sampler") log.info("Domino sampler") self.sampler = domino.DominoSampler(self.model, self.rb_states_table) self.sampler.set_log_level(IMP.base.TERSE) self.sampler.set_merge_tree(self.merge_tree) self.add_exclusion_filter_table() self.add_restraint_score_filter_table() self.add_branch_and_bound_assignments_table() def read_merge_tree(self, fn): """ Read and create a merge tree from a file. The format of the file is the format written by write merge_tree() It does not matter the order of the indices in the file, as they are sorted by this function. The only condition is that the index for the vertex that is the root of the tree MUST be the largest. This is guaranteed when creating a merge tree with create_merge_tree() @param fn File containing the merge tree """ log.info("Reading merge tree from %s", fn) ps = self.rb_states_table.get_particles() log.debug("particles") for p in ps: log.debug("%s", p.get_name()) rows = csv_related.read_csv(fn, delimiter = field_delim) for i in range(len(rows)): row = rows[i] rows[i] = [int(row[0]), int(row[1]), int(row[2]), row[3]] # Sort rows by vertice index rows.sort() subsets = [] # build subsets from the rows text for row in rows: names = row[3].split(unit_delim) log.debug("row %s names %s", row, names) # get particles in the subset particles = [] for name in names: l = filter(lambda p: p.get_name() == name, ps) if (len(l) > 1): ValueError("More than one particle with same name" % names) particles.extend(l) s = domino.Subset(particles) subset_names = [p.get_name() for p in particles] log.debug("Merge tree Subset %s. Particles %s ",s, subset_names) subsets.append(s) # The vertex with the largest index is the root. # trick: get the merge tree object from a tree with only one node ... jt = domino.SubsetGraph() jt.add_vertex(subsets[0]) mt = domino.get_merge_tree(jt) # ... and add the rest of the vertices for i in range(1,len(subsets)): mt.add_vertex(subsets[i]) # the name of the vertex is a subset # set edges for row in rows: vid = row[0] child_left = row[1] child_right = row[2] if child_left != -1: mt.add_edge(vid, child_left) if child_right != -1: mt.add_edge(vid, child_right) self.merge_tree = mt log.info("%s",self.merge_tree.show_graphviz() ) def create_merge_tree(self): """ Creates a merge tree from the restraints that are set already """ rs = self.model.get_restraints() ig = domino.get_interaction_graph(rs, self.rb_states_table) # pruned_dep = IMP.get_pruned_dependency_graph(self.model) # IMP.base.show_graphviz(pruned_dep) # IMP.base.show_graphviz(ig) jt = domino.get_junction_tree(ig) # IMP.base.show_graphviz(jt) self.merge_tree = domino.get_balanced_merge_tree(jt) # IMP.base.show_graphviz(self.merge_tree) log.info("Balanced merge tree created") log.info("%s",self.merge_tree.show_graphviz() ) def write_merge_tree(self, fn): """ Writes the current merge tree to file. The format is: parent | child_left | child_right | labels @param fn File for storing the merge tree """ log.info("Writing merge to %s", fn) if self.merge_tree == None: raise ValueError("No merge tree") f = open(fn, "w") f.write( "# Vertex index | Child0 | Child1 | label (names of the particles)\n") w = csv.writer(f, delimiter = field_delim) root = self.merge_tree.get_vertices()[-1] self.write_subset(root, w) f.close() def write_subset(self, v, w): """ Writes lines describing a subset (vertex of a merge tree) @param w A csv.writer @param v Vertex index """ no_child_index = -1 subset = self.merge_tree.get_vertex_name(v) names = [p.get_name() for p in subset] reps = filter(lambda x: names.count(x) > 1, names) # repeated names if(len(reps)): raise ValueError("The names of the "\ "particles in the subset %s are not unique" % subset) names = unit_delim.join(names) neighbors = self.merge_tree.get_out_neighbors(v) if len(neighbors) == 0: # A leaf # Fill the container with the assignments for the leaf w.writerow([v,no_child_index,no_child_index, names]) else: if neighbors[0] > neighbors[1]: neighbors =[neighbors[1], neighbors[0]] w.writerow([v,neighbors[0],neighbors[1], names]) self.write_subset(neighbors[0], w) self.write_subset(neighbors[1], w) def set_connectivity_restraint(self, names, rname, distance=10, weight=1.0, max_score=None, n_pairs=1, stddev=2): """ Set a connectivity restraint between the elements of the assembly @param names List with all the elements to be connected @param distance Maximum distance tolerated by the restraint @param rname Name for the restraint @param weight Weight for the restraint @param max_score Maximum score for the restraint @param n_pairs Number of pairs of particles used in the KClosePairScore of the connecitivity restraint @param stddev Standard deviation used to approximate the HarmonicUpperBound function to a Gaussian """ components = [] for name in names: c = representation.get_component(self.coarse_assembly,name) components.append(c) log.info("Connectivity restraint for %s",components) spring_constant = core.Harmonic.get_k_from_standard_deviation(stddev) if max_score == None: max_score = weight * spring_constant * n_pairs * (stddev)**2 r = restraints.get_connectivity_restraint(components, distance, n_pairs, spring_constant) self.add_restraint(r, rname, weight, max_score) def set_em2d_restraint(self, name, fn_images_sel, pixel_size, resolution, n_projections, weight, max_score=False, mode="fast", n_optimized=1): """ Set a Restraint computing the em2d score. @param name Name for the restraint @param fn_images_sel Selection file with the names of the images used for the restraint @param pixel_size - pixel size in the images @param resolution - resolution used to downsample the projections of the model when projecting @param weight Weight of the restraint @param max_score - Maximum value tolerated for the restraint @param n_projections - Number of projections to generate when projecting the model to do the coarse alignments @param mode - Mode used for computing the restraints. @param n_optimized - number of results from the coarse alignment that are refined with the Simplex algorithm to get a more accurate value for the restraint """ log.info("Adding a em2D restraint: %s", fn_images_sel) restraint_params = em2d.Em2DRestraintParameters(pixel_size, resolution, n_projections) r = restraints.get_em2d_restraint( self.assembly, fn_images_sel, restraint_params, mode, n_optimized) self.add_restraint(r, name, weight, max_score) def set_not_optimized(self, name): """ Set a part of the model as not optimized (it does not move during the model optimization) @param Name of the component to optimized """ if name not in self.names: raise ValueError("DominoModel: There is not component " \ "in the assembly with this name") rb_name = representation.get_rb_name(name) self.not_optimized_rbs.append(rb_name) def set_close_pairs_excluded_volume_restraint(self, distance=10, weight=1.0, ratio=0.05, stddev=2, max_score=False ): """ Creates an excluded volume restraint between all the components of the coarse assembly.See help for @param distance Maximum distance tolerated between particles @param weight Weight for the restraint @param max_score Maximum value for the restraint. If the parameter is None, an automatic value is computed (using the ratio). @param ratio Fraction of the number of possible pairs of particles that are tolerated to overlap. The maximum score is modified according to this ratio. I have found that ratios of 0.05-0.1 work well allowing for some interpenetration @param stddev Standard deviation used to approximate the HarmonicUpperBound function to a Gaussian """ k = core.Harmonic.get_k_from_standard_deviation(stddev) for i, c1 in enumerate(self.coarse_assembly.get_children()): for j, c2 in enumerate(self.coarse_assembly.get_children()): if j > i: name = "exc_vol_%s_%s" % (c1.get_name(), c2.get_name()) ls1 = atom.get_leaves(c1) ls2 = atom.get_leaves(c2) possible_pairs = len(ls1) * len(ls2) n_pairs = possible_pairs * ratio marker1 = IMP.Particle(self.model, "marker1 " + name) marker2 = IMP.Particle(self.model, "marker2 " + name) table_refiner = core.TableRefiner() table_refiner.add_particle(marker1, ls1) table_refiner.add_particle(marker2, ls2) score = core.HarmonicLowerBound(distance, k) pair_score = core.SphereDistancePairScore(score) close_pair_score = core.ClosePairsPairScore(pair_score, table_refiner, distance) r = core.PairRestraint(close_pair_score, IMP.ParticlePair(marker1,marker2)) if not max_score: minimum_distance_allowed = 0 max_score = weight * n_pairs * \ score.evaluate(minimum_distance_allowed) log.debug("Setting close_pairs_excluded_volume_restraint(), " \ "max_score %s", max_score) self.add_restraint(r, name, weight, max_score) def set_pair_score_restraint(self, name1, name2, restraint_name, distance=10, weight=1.0, n_pairs = 1, stddev=2, max_score=None): """ Set a pair_score restraint between the coarse representation of two components @param name1 Name of the first component @param name2 Name of the second component @param restraint_name Name for the restraint @param distance Maximum distance tolerated between particles @param weight. Weight of the restraint @param max_score Maximum value tolerated for the restraint @param stddev Standard deviation used to approximate the HarmonicUpperBound function to a Gaussian """ table_refiner = core.TableRefiner() # The particles in A are attached to a marker particle via a refiner. # When the refiner gets a request for marker1, it returns the attached # particles A = representation.get_component(self.coarse_assembly, name1) marker1 = IMP.Particle(self.model, "marker1 "+restraint_name) table_refiner.add_particle(marker1, atom.get_leaves(A)) # same for B B = representation.get_component(self.coarse_assembly, name2) marker2 = IMP.Particle(self.model, "marker2 "+restraint_name) table_refiner.add_particle(marker2, atom.get_leaves(B)) k = core.Harmonic.get_k_from_standard_deviation(stddev) score = core.HarmonicUpperBoundSphereDistancePairScore(distance, k) # The score is set for the n_pairs closest pairs of particles pair_score = core.KClosePairsPairScore(score, table_refiner, n_pairs) # When KClosePairsPairScore is called, the refiner will provide the # particles for A and B if not max_score: # Build a maximum score based on the function type that is used, # an HarmonicUpperBound temp_score = core.HarmonicUpperBound(distance, k) error_distance_allowed = 10 max_score = weight * n_pairs * \ temp_score.evaluate(distance + error_distance_allowed) log.info("Setting pair score restraint for %s %s. k = %s, max_score " \ "= %s, stddev %s", name1, name2, k, max_score,stddev) r = core.PairRestraint(pair_score, IMP.ParticlePair( marker1, marker2 ) ) self.add_restraint(r, restraint_name, weight, max_score) def write_solutions_database(self, fn_database, max_number=None): """ Write the results of the DOMINO sampling to a SQLite database. @param max_number Maximum number of results to write """ log.info("Creating the database of solutions") if self.measure_models == True and not hasattr(self, "native_model"): raise ValueError("The native model has not been set") db = solutions_io.ResultsDB() db.create(fn_database, overwrite=True) db.connect(fn_database) subset = self.rb_states_table.get_subset() restraints_names = self.get_restraints_names_used(subset) db.add_results_table(restraints_names, self.measure_models) n = len(self.solution_assignments) if max_number != None: n = min(n, max_number) for sol_id, assignment in enumerate(self.solution_assignments[0:n]): txt = self.get_assignment_text(assignment, subset) # load the positions of the components in the state self.load_state(assignment) RFs = [rb.get_reference_frame() for rb in self.components_rbs] scores, total_score = self.get_assignment_scores(assignment, subset) measures = None if(self.measure_models): measures = measure_model(self.assembly, self.native_assembly, self.components_rbs, self.native_rbs) db.add_record(sol_id, txt, RFs, total_score, scores, measures) # Add native state if doing benchmarking if self.measure_models: assignment = "native" RFs = [rb.get_reference_frame() for rb in self.native_rbs] scores, total_score = self.get_restraints_for_native( restraints_names) db.add_native_record(assignment, RFs, total_score, scores) db.save_records() db.close() def get_restraints_names_used(self, subset): """ Get the names of the restraints used for a subset @param subset Subset of components of the assembly """ return [r.get_name() for r in self.restraint_cache.get_restraints(subset, [])] def get_restraints_for_native(self, restraints_names): """ Get values of the restraints for the native structure @param restraints_names Names of the restraints requested @return a list with the values of all scores, and the total score """ saved = [rb.get_reference_frame() for rb in self.components_rbs] # Set the state of the model to native for rb,rn in zip(self.components_rbs, self.native_rbs): # remove sub-rigid bodies rb_members = filter( lambda m: not core.RigidBody.particle_is_instance( m.get_particle()), rb.get_members()) rn_members = filter( lambda m: not core.RigidBody.particle_is_instance( m.get_particle()), rn.get_members()) rb_coords = [m.get_coordinates() for m in rb_members] rn_coords = [m.get_coordinates() for m in rn_members] # align and put rb in the position of rn if len(rn_coords) != len(rb_coords) : raise ValueError("Mismatch in the number of members. " "Reference %d Aligned %d " % (len(rn_coords), len(rb_coords))) T = alg.get_transformation_aligning_first_to_second(rb_coords, rn_coords) t = rb.get_reference_frame().get_transformation_to() new_t = alg.compose(T, t) rb.set_reference_frame(alg.ReferenceFrame3D(new_t)) scores = [] for name in restraints_names: scores.append( self.restraints[name].evaluate(False)) total_score = sum(scores) representation.set_reference_frames(self.components_rbs, saved) return scores, total_score def write_pdb_for_assignment(self, assignment, fn_pdb): """ Write the solution in the assignment @param assignment Assignment class with the states for the subset @param fn_pdb File to write the model """ if not self.assignments_sampling_done: raise ValueError("DominoModel: sampling not done") log.info("Writting PDB %s for assignment %s",fn_pdb, assignment) self.load_state(assignment) atom.write_pdb(self.assembly, fn_pdb) def write_pdb_for_configuration(self, n, fn_pdb): """ Write a file with a configuration for the model (configuration here means a configuration in DOMINO) @param n Index of the configuration desired. """ if not self.configuration_sampling_done: raise ValueError("DominoModel: sampling not done") log.debug("Writting PDB %s for configuration %s", fn_pdb, n) configuration_set.load_configuration(n) atom.write_pdb(self.assembly, fn_pdb) def write_pdb_for_reference_frames(self,RFs, fn_pdb): """ Write a PDB file with a solution given by a set of reference frames @param RFs Reference frames for the elements of the complex @param fn_pdb File to write the solution """ log.debug("Writting PDB %s for reference frames %s",fn_pdb, RFs) representation.set_reference_frames(self.components_rbs,RFs) atom.write_pdb(self.assembly, fn_pdb) def write_pdbs_for_reference_frames(self,RFs, fn_base): """ Write a separate PDB for each of the elements @param RFs Reference frames for the elements of the complex @param fn_base base string to buid the names of the PDBs files """ log.debug("Writting PDBs with basename %s",fn_base) representation.set_reference_frames(self.components_rbs,RFs) for i,ch in enumerate(self.assembly.get_children()): atom.write_pdb(ch, fn_base + "component-%02d.pdb" % i ) def write_pdb_for_component(self, component_index, ref, fn_pdb): """ Write one component of the assembly @param component_index Position of the component in the assembly @param ref Reference frame of the component @param fn_pdb Output PDB """ ("Writting PDB %s for component %s",fn_pdb, component_index) self.components_rbs[component_index].set_reference_frame(ref) hierarchy_component = self.assembly.get_child(component_index) atom.write_pdb(hierarchy_component, fn_pdb) def write_monte_carlo_solution(self, fn_database): """ Write the solution of a MonteCarlo run @param fn_database Database file with the solution. The database will contain only one record """ total_score = 0 rnames = [] scores = [] for i in range(self.model.get_number_of_restraints()): r = self.model.get_restraint(i) score = r.evaluate(False) rnames.append(r.get_name()) scores.append(score) total_score += score db = solutions_io.ResultsDB() db.create(fn_database, overwrite=True) db.connect(fn_database) db.add_results_table(rnames, self.measure_models) RFs = [ rb.get_reference_frame() for rb in self.components_rbs] measures = None if(self.measure_models): measures = measure_model(self.assembly, self.native_assembly, self.components_rbs, self.native_rbs) solution_id = 0 db.add_record(solution_id, "", RFs, total_score, scores, measures) db.save_records() db.close() def print_restraints_values(model): print "Restraints: Name, weight, value, maximum_value" total_score = 0 for i in range(model.get_number_of_restraints()): r = model.get_restraint(i) score = r.evaluate(False) # print "%20s %18f %18f %18f" % (r.get_name(), r.get_weight(), # score, r.get_maximum_score()) print "%20s %18f %18f" % (r.get_name(), r.get_weight(), score) total_score += score print "total_score:",total_score def anchor_assembly(components_rbs, anchored): """ "Anchor" a set of rigid bodies, by setting the position of one of them at the origin (0,0,0). @param components_rbs Rigid bodies of the components @param anchored - List of True/False values indicating if the components of the assembly are anchored. The function sets the FIRST anchored component in the (0,0,0) coordinate and moves the other components with the same translation. If all the values are False, the function does not do anything """ if True in anchored: anchored_rb = None for a, rb in zip(anchored, components_rbs) : if a: anchored_rb = rb break center = anchored_rb.get_coordinates() log.info("Anchoring assembly at (0,0,0)") for rb in components_rbs: T = rb.get_reference_frame().get_transformation_to() t = T.get_translation() R = T.get_rotation() log.debug("%s: Rerefence frame BEFORE %s", rb.get_name(),rb.get_reference_frame()) T2 = alg.Transformation3D(R, t - center) rb.set_reference_frame(alg.ReferenceFrame3D(T2)) log.debug("%s Rerefence frame AFTER %s", rb.get_name(),rb.get_reference_frame()) def measure_model(assembly, native_assembly, rbs, native_rbs): """ Get the drms, cdrms and crmsd for a model @param assembly Hierachy for an assembly @param native_assembly Hierarchy of the native structure @param rbs Rigid bodies for the components of the assembly @param native_rbs Rigid bodies for the components of the native structure @return cdrms, cdrms, crmsd - drms - typical drms measure - cdrms - drms for the centroids of the rigid bodies of the components of the assembly - crmsd - rmsd for the centroids """ log.debug("Measure model") drms = comparisons.get_drms_for_backbone(assembly, native_assembly) RFs = [rb.get_reference_frame() for rb in rbs] vs = [r.get_transformation_to().get_translation() for r in RFs] nRFs = [rb.get_reference_frame() for rb in native_rbs] nvs = [r.get_transformation_to().get_translation() for r in nRFs] cdrms = atom.get_drms(vs, nvs) crmsd, RFs = alignments.align_centroids_using_pca(RFs, nRFs) log.debug("drms %s cdrms %s crmsd %s",drms,cdrms,crmsd) return [drms, cdrms, crmsd] def get_coordinates(rigid_bodies): """ Return a list of the coordinates of all the members of the rigid bodies """ if len(rigid_bodies) == 0: raise ValueError("get_coordinates: There are not rigid bodies to get coordinates from") coords = [] for rb in rigid_bodies: # remove sub-rigid bodies rb_members = filter( lambda m: not core.RigidBody.particle_is_instance( m.get_particle()), rb.get_members()) coords.extend([m.get_coordinates() for m in rb_members]) return coords