/** * \file protein_anchors_mapping_reader.cpp * \brief handles reading matches between a protein and its anchors * * Copyright 2007-2013 IMP Inventors. All rights reserved. * */ #include #include #include #include #include #include #include #include #include #include "boost/tuple/tuple.hpp" IMPMULTIFIT_BEGIN_NAMESPACE namespace { #if 0 bool is_anchors_line(const std::string &line) { typedef boost::split_iterator string_split_iterator; IMP_USAGE_CHECK(line.size() > 0,"no data to parse for anchors line:" < line_split; boost::split(line_split, line, boost::is_any_of("|")); //split returns zero length entires as well line_split.erase( std::remove_if(line_split.begin(),line_split.end(), boost::bind( &std::string::empty, _1 ) ),line_split.end() ); if (boost::lexical_cast(line_split[0]) != "anchors") { return false; } return true; } #endif bool is_protein_line(const std::string &line) { typedef boost::split_iterator string_split_iterator; IMP_LOG_VERBOSE("going to parse:"< line_split; boost::split(line_split, line, boost::is_any_of("|")); //split returns zero length entires as well line_split.erase( std::remove_if(line_split.begin(),line_split.end(), boost::bind( &std::string::empty, _1 ) ),line_split.end() ); if (boost::lexical_cast(line_split[0]) != "protein") { return false; } return true; } std::string parse_anchors_line(const std::string &line) { typedef boost::split_iterator string_split_iterator; IMP_USAGE_CHECK(line.size() > 0,"no data to parse"< line_split; boost::split(line_split, line, boost::is_any_of("|")); //split returns zero length entires as well line_split.erase( std::remove_if(line_split.begin(),line_split.end(), boost::bind( &std::string::empty, _1 ) ),line_split.end() ); return boost::lexical_cast(line_split[1]); } boost::tuple parse_protein_line(const std::string &config, const std::string &line, int max_paths) { typedef boost::split_iterator string_split_iterator; IMP_USAGE_CHECK(line.size() > 0,"no data to parse for protein line:" < line_split; boost::split(line_split, line, boost::is_any_of("|")); //split returns zero length entires as well line_split.erase( std::remove_if(line_split.begin(),line_split.end(), boost::bind( &std::string::empty, _1 ) ),line_split.end() ); //allow no precalculated paths IMP_USAGE_CHECK(line_split.size() > 1, "wrong format, should look like |protein|| or " "|protein|paths|"<(line_split[1]) <2) { paths_fn = base::get_relative_path(config, line_split[2]); std::cout<<"PATH FN:"<(line_split[1]), paths_fn, paths); } } ProteinsAnchorsSamplingSpace read_protein_anchors_mapping(multifit::ProteomicsData *prots, const std::string &anchors_prot_map_fn, int max_paths) { ProteinsAnchorsSamplingSpace ret(prots); std::fstream in; std::cout<<"FN:"< prot_data = parse_protein_line(anchors_prot_map_fn, line, max_paths); ret.set_paths_for_protein(boost::get<0>(prot_data), boost::get<2>(prot_data)); ret.set_paths_filename_for_protein(boost::get<0>(prot_data), boost::get<1>(prot_data)); } return ret; } void ProteinsAnchorsSamplingSpace::show(std::ostream &s) const { for(std::map::const_iterator it = paths_map_.begin(); it != paths_map_.end();it++) { IntsList inds=it->second; s<first<<" "< data_map; //store all components for (int i=0;i<(int)prots_sd->get_number_of_component_headers();i++) { data_map[prots_sd->get_component_header(i)->get_name()] =prots_sd->get_component_header(i); } //get just the relevant components for (Strings::const_iterator it = prot_names.begin(); it != prot_names.end(); it++) { IMP_INTERNAL_CHECK(data_map.find(*it) != data_map.end(), "Protein:"<<*it<<" was not found\n"); ret->add_component_header(data_map[*it]); } return ret.release(); } void write_protein_anchors_mapping( const std::string &anchors_prot_map_fn, const ProteinsAnchorsSamplingSpace &pa, const Strings &prot_names) { std::ofstream out; out.open(anchors_prot_map_fn.c_str(),std::ios::out); out<<"|anchors|"< > &prot_paths) { std::ofstream out; out.open(anchors_prot_map_fn.c_str(),std::ios::out); out<<"|anchors|"< >::const_iterator it = prot_paths.begin(); it != prot_paths.end(); it++) { out<<"|protein|"<first<<"|"<second<<"|"<