/** * \file proteomics_em_alignment_atomic.cpp * \brief align proteomics graph to em density map * * Copyright 2007-2013 IMP Inventors. All rights reserved. * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include IMPMULTIFIT_BEGIN_NAMESPACE namespace { #if 0 Restraint * add_core_ev_restraint(atom::Hierarchy mh1, atom::Hierarchy mh2, const AlignmentParams ¶ms) { Model *mdl=mh1->get_model(); IMP_NEW(container::ListSingletonContainer,lsc,(mdl)); atom::AtomTypes atom_types; atom_types.push_back(atom::AtomType("CA")); //make a container of ca atoms of the first molecule atom::Hierarchies mh1_res=atom::get_by_type(mh1,atom::RESIDUE_TYPE); atom::Selection s1(mh1_res); s1.set_atom_types(atom_types); lsc->add_particles(s1.get_selected_particles()); //make a container of ca atoms of the second molecule atom::Hierarchies mh2_res=atom::get_by_type(mh2,atom::RESIDUE_TYPE); atom::Selection s2(mh2_res); s2.set_atom_types(atom_types); lsc->add_particles(s2.get_selected_particles()); float slack=params.get_ev_params().pair_slack_; float k=params.get_ev_params().hlb_k_; IMP_NEW(core::ExcludedVolumeRestraint,evr,(lsc,k,slack)); return evr.release(); } #endif #if 0 Restraint * add_ev_restraint(atom::Hierarchy mh1, atom::Hierarchy mh2, const AlignmentParams ¶ms) { Model *mdl=mh1->get_model(); atom::AtomTypes atom_types; atom_types.push_back(atom::AtomType("CA")); //make a container of ca atoms of the first molecule atom::Hierarchies mh1_res=atom::get_by_type(mh1,atom::RESIDUE_TYPE); atom::Selection s1(mh1_res); s1.set_atom_types(atom_types); IMP_NEW(container::ListSingletonContainer,lsc1,(mdl)); lsc1->add_particles(s1.get_selected_particles()); //make a container of ca atoms of the second molecule atom::Hierarchies mh2_res=atom::get_by_type(mh2,atom::RESIDUE_TYPE); atom::Selection s2(mh2_res); s2.set_atom_types(atom_types); IMP_NEW(container::ListSingletonContainer,lsc2,(mdl)); lsc2->add_particles(s2.get_selected_particles()); std::cout<<"Number of particles for "<get_name()<<"is " <get_name()<<"is " <add_particles(s1.get_selected_particles()); lsc3->add_particles(s2.get_selected_particles()); IMP_NEW(container::ClosePairContainer,nbl, (lsc3,distance, slack)); //====== float mean=params.get_ev_params().hlb_mean_; float k=params.get_ev_params().hlb_k_; /*the score value is: distancemean: 0.5*k*(distance-mean)*(distance-mean) */ IMP_NEW(core::HarmonicLowerBound,h,(mean,k)); //distance is defined as the distance between the surfaces of the spheres IMP_NEW(core::SphereDistancePairScore,sd,(h)); //use the lower bound on the inter-sphere distance to push the spheres apart IMP_NEW(container::PairsRestraint,nbr,(sd,nbl)); return nbr.release(); } #endif std::string get_pair_key(int ind1,int ind2) { std::stringstream ss; ss<::adjacency_iterator NeighborIterator; std::cout<<"======== 1"<::const_type SubsetMap; std::cout<<"======== 2"< be = boost::adjacent_vertices(vertex_ind, jt); std::cout<<"======== 4"<load_vertex_assignments(vertex_ind,hac); } else { int firsti=*be.first; int secondi=*(++be.first); if (firsti>secondi) { int temp=firsti; firsti=secondi; secondi=temp; } //recurse on the two children Pointer a0= get_assignments(jt,firsti,ds,k,rc,rssft); Pointer a1= get_assignments(jt,secondi,ds,k,rc,rssft); if ((a0->get_number_of_assignments()==0) || (a1->get_number_of_assignments()==0)) { std::cout<<"========== For vertex "<< vertex_ind <<" one of the children has 0 assignments, returning " <<"empty container"<load_vertex_assignments(vertex_ind, a0, a1, hac); } std::cout<<"========== For vertex "<< vertex_ind <<" number of assignments "<get_number_of_assignments() <get_name()<get_number_of_proteins();i++) { IMP_LOG_TERSE("going to load molecule "<< asmb_data_->get_component_header(i)->get_filename()<<"\n"); // prot_data_.get_protein_filename(i)<<"|\n"; atom::Hierarchy mh = atom::read_pdb(asmb_data_->get_component_header(i)->get_filename(), mdl_,sel); mh->set_name(asmb_data_->get_component_header(i)->get_name()); mh->set_was_used(true); mhs_.push_back(mh); std::cout<<"create pdb"<set_name(mh->get_name()); rbs_[rbs_.size()-1]->add_attribute(fit_state_key_,-1); rbs_[rbs_.size()-1]->add_attribute(order_key_,i); } } } domino::ParticleStatesTable* ProteomicsEMAlignmentAtomic::set_particle_states_table( domino::SubsetFilterTables &/*filters*/) { IMP_NEW(domino::ParticleStatesTable,pst,()); for(int i=0;iget_number_of_proteins();i++){ IMP_LOG_TERSE( "working on protein:"<get_protein_name(i)<get_component_header(i) ->get_transformations_fn().c_str()); IMP_LOG_VERBOSE( "number of fitting solutions:"<get_protein_name(i)); IMP_LOG_VERBOSE( "number of relevant fits found:"<get_protein_name(i)<<" is "<< fit_inds.size() <set_particle_states(rbs_[i],rb_states); } return pst.release(); } RestraintsTemp ProteomicsEMAlignmentAtomic::get_alignment_restraints() const { RestraintsTemp ret; /* domino::Subset s(pst_->get_particles()); domino::Subsets ss; domino::RestraintScoreSubsetFilter *rssf= all_rs_filt_->get_subset_filter(s,ss); RestraintsTemp rs = rssf->get_restraints(); for(int i=0;i<(int)rs.size();i++){ RestraintSet *rset=dynamic_cast(rs[i]); if (rset) { for(Restraints::iterator it = rset->restraints_begin(); it != rset->restraints_end();it++) { ret.push_back(*it); } } else { ret.push_back(rs[i]); } }*/ return ret; } void ProteomicsEMAlignmentAtomic::show_scores_header(std::ostream& out) const { RestraintsTemp rs = get_alignment_restraints(); for(int i=0;i<(int)rs.size();i++){ out<get_name()<<"|"; } out<get_particles()); domino::Subsets ss; domino::RestraintScoreSubsetFilter *rssf =all_rs_filt_->get_subset_filter(s,ss); RestraintsTemp rs=get_alignment_restraints(); Floats scores = rssf->get_scores(a); for(int i=0;i<(int)scores.size();i++){ out<rs[i]->get_maximum_score()) { num_violations+=1; } } out<<"|"<set_was_used(true); std::cout<<"=============4"<set_merge_tree(mt);//remove for non interative ds->set_subset_filter_tables(filters_); std::cout<<"Number of filters:"<get_particles()); std::cout<<"Particles in subset:"<get_name()<DiscreteSampler::get_sample_assignments(s);//pst_->get_particles()); all_rs_filt_ = new domino::RestraintScoreSubsetFilterTable(mdl_,pst_); std::cout<<"=======2"<set_use_caching(true); std::cout<<"============3"<set_maximum_number_of_cache_entries( // params_.get_domino_params().cache_size_); std::cout<<"before get assignments"<get_number_of_restraints();i++) { mdl_rs.push_back(mdl_->get_restraint(i)); } rc_->add_restraints(mdl_rs); Pointer all= get_assignments( mt,boost::num_vertices(mt)-1, ds,params_.get_domino_params().heap_size_,rc_,all_rs_filt_); all->set_was_used(true); // not in the correct order domino::Assignments sampled_assignments_temp=all->get_assignments(); std::cout<<"number of found assignments:"<get_value(order_key_)]=sampled_assignments_temp[i][j]; } sampled_assignments_.push_back(domino::Assignment(vals)); } std::cout<<"Number of found assignments :"< prot_ind_to_particle_map; for(int i=0;iget_number_of_proteins();i++) { prot_ind_to_particle_map[ prot_data_->find(prot_data_->get_protein_name(i))]=mhs_[i]; } IMP_LOG_VERBOSE("going to set the states\n"); //set the states pst_ = set_particle_states_table(filters_); rc_=new domino::RestraintCache(pst_); IMP_LOG_VERBOSE("number pf particles in table:" <get_particles().size()<set_maximum_number_of_states(params_.get_domino_params() // .max_num_states_for_subset_); // s->set_log_level(IMP::VERBOSE); //set the restraints that will be used to generate the //subset graph //filters IMP_LOG_VERBOSE("settings filters\n"); // two particles cannot // be in the same state if they have the same ParticleStates, IMP_NEW(domino::ExclusionSubsetFilterTable,dist_filt,(pst_)); filters_.push_back(dist_filt); // IMP_NEW(domino::RestraintScoreSubsetFilterTable,rs_filt,(mdl_,pst_)); // filters_.push_back(rs_filt); states_set_=true;filters_set_=true; } void ProteomicsEMAlignmentAtomic::add_all_restraints(){ IMP_USAGE_CHECK(params_.get_fragments_params().frag_len_==1, "In atomic mode the fragment length can not be higher than one!\n"); //====== initialize the two restraint sets conn_rs_=new RestraintSet("connectivity"); mdl_->add_restraint(conn_rs_); conn_rs_with_filter_=new RestraintSet("connectivity_filtered"); mdl_->add_restraint(conn_rs_with_filter_); xlink_rs_=new RestraintSet("xlinks"); mdl_->add_restraint(xlink_rs_); xlink_rs_with_filter_=new RestraintSet("xlinks_filtered"); mdl_->add_restraint(xlink_rs_with_filter_); em_rs_=new RestraintSet("em"); mdl_->add_restraint(em_rs_); ev_rs_=new RestraintSet("ev"); mdl_->add_restraint(ev_rs_); dummy_rs_=new RestraintSet("dummy"); mdl_->add_restraint(dummy_rs_); //====== set proteins map std::map prot_ind_to_particle_map; for(int i=0;iget_number_of_proteins();i++) { prot_ind_to_particle_map[ prot_data_->find(prot_data_->get_protein_name(i))]=mhs_[i]; } //====== set the merge tree builder MergeTreeBuilder mtb(mhs_); //add connectivity restraints IMP_LOG_VERBOSE("setting connectivity restraints\n"); std::cout<<"Number of interactions:" <get_number_of_interactions()<get_number_of_interactions();i++) { //get all of the relevant rigid bodies Ints prot_inds=prot_data_->get_interaction(i); IMP_IF_LOG(VERBOSE) { IMP_LOG_VERBOSE("creating interaction bewteen:\n"); for( int ii=0;ii<(int)prot_inds.size();ii++){ IMP_LOG_VERBOSE(prot_inds[i]<<" "); } IMP_LOG_VERBOSE(std::endl); } std::cout<<"creating interaction bewteen:\n"; for( int ii=0;ii<(int)prot_inds.size();ii++){ std::cout<get_name(); } int k=1;//todo - make this a parameter!! Restraint *r = atom::create_connectivity_restraint(sel,k); if (r != NULL){ r->set_name(ss.str()); //only allow the particles to penetrate or separate by 1 angstrom std::cout<<"Max Score for the restraint:"<set_maximum_score( r, params_.get_connectivity_params().max_conn_rest_val_); if (prot_data_->get_interaction_part_of_filter(i)){ std::cout<<"Adding restraint "<get_name()<<" to conn with filter" <add_restraint(r); } else { std::cout<<"Adding restraint "<get_name() <<" to conn without filter"<add_restraint(r); } //add pairs to merge tree builder for(unsigned int k1=0;k1get_number_of_cross_links() <get_number_of_cross_links();i++) { Pointer rx; //get all of the relevant rigid bodies std::pair xpair=prot_data_->get_cross_link(i); std::stringstream ss1; Particles pairx(2); ss1<<"xlink"; std::cout<<"First key:"<get_name()<<"."<get_name()<<"."<=mh1_end_res_ind) within_range=false; if (xpair.second.second=mh2_end_res_ind) within_range=false; if (!within_range) { std::cout<<"XLINK"<=mh1_end_res_ind) { extra_len+=multifit::get_approximated_radius( xpair.first.second-mh1_end_res_ind+1); xpair.first.second=mh1_end_res_ind; } if (xpair.second.second=mh2_end_res_ind) { extra_len+=multifit::get_approximated_radius( xpair.second.second-mh2_end_res_ind+1); xpair.second.second=mh2_end_res_ind; } } atom::Hierarchy h1,h2;bool found=false; //find CA of the right residue ind. //can not simply do mh1_res[xpair.first.second-mh1_start_res_ind //because some residues may be missing. for (unsigned int kk=0;kkget_name()<<" and " << pairx[1]->get_name()<get_cross_link_length(i)+extra_len, params_.get_xlink_params().k_)); rx = IMP::create_restraint(hub_updated.get(), ParticlePair(pairx[0],pairx[1])); } else { //treat as a connectivity restraint std::cout<<"treat as connectivity restraint"<get_cross_link_length(i), params_.get_xlink_params().k_); } if (rx != nullptr){ if (prot_data_->get_cross_link_part_of_filter(i)){ std::cout<<"Adding restraint "<get_name()<<" of length: " << prot_data_->get_cross_link_length(i) <<" to xlinks with filter"<add_restraint(rx); } else { std::cout<<"Adding restraint "<get_name()<<" of length: " << prot_data_->get_cross_link_length(i) <<" to xlinks without filter"<add_restraint(rx); } rx->set_name(ss1.str()); //only allow the particles to penetrate or separate by 1 angstrom std::cout<<"Max Score for the restraint:"<set_maximum_score(rx, params_.get_xlink_params().max_xlink_val_); // if (prot_data_.get_xlink_used_to_build_jt(i)){ // jt_rs_.push_back(rx); // } //add pairs to merge tree builder mtb.increase_edge(mh1,mh2); }else {std::cout<<"restraint is NULL"<0) { IMP_USAGE_CHECK(params_.get_fragments_params().subunit_rigid_, "Logic error, EV operates on rigid bodies\n"); IMP_LOG_VERBOSE("Add excluded volume restraint"<get_number_of_proteins();i++) { prot_names.push_back(asmb_data_->get_component_header(i)->get_filename()); } std::cout<<"END initialize docking surfaces"< pairs_map; for(int i=0;iget_number_of_ev_pairs();i++) { IntPair ev_pair=prot_data_->get_ev_pair(i); int ind1=ev_pair.first; int ind2=ev_pair.second; pairs_map[get_pair_key(ind1,ind2)]=ev_pair; } for(unsigned int i=0;iget_name()<<"_"<get_name(); evr->set_name(name.str()); ComplementarityParams comp_param=params_.get_complementarity_params(); evr->set_boundary_coefficient(comp_param.boundary_coef_); evr->set_complementarity_coefficient(comp_param.comp_coef_); evr->set_penetration_coefficient(comp_param.penetration_coef_); evr->set_interior_layer_thickness(comp_param.interior_layer_thickness_); float voxel_size=evr->get_voxel_size(); float max_penetration=comp_param.max_penetration_; evr->set_maximum_penetration_score( max_penetration*voxel_size*voxel_size*voxel_size); mdl_->set_maximum_score(evr,comp_param.max_score_); ev_rs_->add_restraint(evr); mtb.increase_edge(mhs_[i],mhs_[j]); } }//end adding all wev restraints } else { std::cout<<"No EV restraints will be added"<get_header()->show(); std::cout<<"================="<get_header()))); full_sampled_map->set_particles(all_leaves); //move all proteins to the center of the map algebra::Transformation3Ds cen_ts; for(unsigned int i=0;iget_centroid()- core::get_centroid(core::XYZs(core::get_leaves(mhs_[i])))); core::transform(rbs_[i],cen_t); cen_ts.push_back(cen_t); } full_sampled_map->resample(); for(unsigned int i=0;icalcRMS(); dmap_->calcRMS(); double upper=(dmap_->get_number_of_voxels() *dmap_->get_header()->dmean *full_sampled_map->get_header()->dmean)/mhs_.size(); double lower=dmap_->get_number_of_voxels()*dmap_->calcRMS() *full_sampled_map->calcRMS(); FloatPair norm_factors=std::make_pair(upper,lower); std::cout<<"norm factors:"<set_scale_factor(scale);//TODO - make parameter std::cout<<"EM fit scale "<set_name("fitting"); em_rs_->add_restraint(fitr); } double max_em_val=(params_.get_fitting_params().max_asmb_fit_score_ +(mhs_.size()-1))*scale; em_rs_->set_maximum_score(max_em_val); std::cout<<"SET MAX score on fitting restraint set:"<set_maximum_score(params_.get_domino_params().max_value_threshold_); } else { /* std::cout<<"creating fit restraint with "<< all_ca.size() <<" CA atoms "<add_restraint(fitr); // float scale=100*mhs_.size()*(mhs_.size()-1); float scale=1.; std::cout<<"using regular em restraint with maximum score of " <set_maximum_score( params_.get_domino_params().em_max_value_threshold_*scale); fitr->set_scale_factor(scale);//TODO - make parameter fitr->set_name("fitting"); */ std::cout<<"creating pcafit restraint with "<< all_ca.size() <<" CA atoms "<get_assembly_header()->get_threshold(), params_.get_fitting_params().pca_max_size_diff_, params_.get_fitting_params().pca_max_angle_diff_, params_.get_fitting_params().pca_max_cent_dist_diff_)); std::cout<<"end add restraint"<add_restraint(fitr); // float scale=100*mhs_.size()*(mhs_.size()-1); fitr->set_maximum_score(0.01); fitr->set_name("fitting"); } } restraints_set_=true; //add all filters std::cout<<"before adding filters"<get_number_of_restraints();ii++) { conn_rst.push_back(conn_rs_with_filter_->get_restraint(ii)); } std::cout<<"========1"<set_name("minimum_conn_filter"); // min_conn_filter->set_use_caching(true); // min_conn_filter->set_maximum_number_of_cache_entries( // params_.get_domino_params().cache_size_); filters_.push_back(min_conn_filter); std::cout<<"Create filter on connectivity restraints, number of " <<"allowed violations is:" <get_maximum_number_of_violated_restraints() <set_name("conn_filter"); // conn_filter->set_use_caching(true); // conn_filter->set_maximum_number_of_cache_entries( // params_.get_domino_params().cache_size_); filters_.push_back(conn_filter); std::cout<<"========2"<get_number_of_restraints();ii++) { xlink_rst.push_back(xlink_rs_with_filter_->get_restraint(ii)); } IMP_NEW(domino::MinimumRestraintScoreSubsetFilterTable,min_xlink_filter, (xlink_rst,rc_,params_.get_filters_params().max_num_violated_xlink_)); min_xlink_filter->set_name("minimum_xlink_filter"); // min_xlink_filter->set_use_caching(true); // min_xlink_filter->set_maximum_number_of_cache_entries( // params_.get_domino_params().cache_size_); filters_.push_back(min_xlink_filter); std::cout<<"Create filter for xlink restraints, number of allowed " <<"violations is:" <get_maximum_number_of_violated_restraints() <set_name("xlink_filter"); std::cout<<"========3"<set_use_caching(true); // xlink_filter->set_maximum_number_of_cache_entries( // params_.get_domino_params().cache_size_); filters_.push_back(xlink_filter); RestraintsTemp ev_rst; for(unsigned ii=0;iiget_number_of_restraints();ii++) { ev_rst.push_back(ev_rs_->get_restraint(ii)); } IMP_NEW(domino::MinimumRestraintScoreSubsetFilterTable,ev_filter, (ev_rst,rc_,params_.get_filters_params().max_num_violated_ev_)); ev_filter->set_name("ev_filter"); // ev_filter->set_use_caching(true); // ev_filter->set_maximum_number_of_cache_entries( // params_.get_domino_params().cache_size_); filters_.push_back(ev_filter); std::cout<<"Create filter on ev restraints, number of allowed violations is:" <get_maximum_number_of_violated_restraints()<set_use_caching(true); // em_filter->set_maximum_number_of_cache_entries( // params_.get_domino_params().cache_size_); filters_.push_back(em_filter); //=====create jt_rs: std::cout<<"========4"<get_name()<<"."<<(*it)[1]->get_name(); std::cout<<"Adding dummy restraint: "<< name.str()<set_name(name.str()); jt_rs_.push_back(dr); dummy_rs_->add_restraint(dr); } IMP_NEW(domino::RestraintScoreSubsetFilterTable,dummy_filter, (dummy_rs_,pst_)); std::cout<<"========5"<get_number_of_configurations() // <<" configurations\n"); // for(int i=0;i<(int)cg_->get_number_of_configurations();i++) { // cg_->load_configuration(i); // cg_sorted_.push_back(std::pair(i, // conn_rs_->evaluate(NULL)+ // rog_rs_->evaluate(NULL))); // } // std::sort(cg_sorted_.begin(),cg_sorted_.end(), // configuration_sorter); // } domino::Assignments ProteomicsEMAlignmentAtomic::get_combinations(bool /*uniques*/) const { return sampled_assignments_; /* //set the right order of the assignment. IntsList ret(sampled_assignments_.size()); std::map mps; for (core::RigidBodies::const_iterator it = rbs_.begin(); it != rbs_.end();it++) { mps[*it]=FittingStates::get_from(pst_->get_particle_states(*it)); } std::cout<<"Get sampled assignments for particles:"<get_particles(); std::map ps_order_map; std::cout<<"Correct order:"<get_name()<<"|"; }std::cout<get_name()<<"|"; }std::cout<<"Order:"<get_name() // <get_has_particle(mhs_[i].get_particle()), "Particle "<get_name() <<" does not have states\n"); pst_->get_particle_states(mhs_[i])->load_particle_state( state4particles[i],mhs_[i].get_particle()); } } IMPMULTIFIT_END_NAMESPACE