/** * \file VQClustering.cpp * \brief Vector quantization clustering. Based on Wriggers et at, JMB 1998 * * Copyright 2007-2013 IMP Inventors. All rights reserved. * */ #include #include #include #include #include #include #include IMPSTATISTICS_BEGIN_INTERNAL_NAMESPACE using namespace algebra::internal; typedef std::pair ValInd; typedef std::vector ValInds; namespace { bool comp_first_smaller_than_second(const ValInd& a, const ValInd& b) { return a.first < b.first; } } //! help function for sorting centers by their distance to a data point class CenterSorter { public: CenterSorter(){} CenterSorter(Array1DD *p,Array1DD_VEC *cs):p_(p),cs_(cs){} void set_point(Array1DD *p) {p_=p;} void set_centers(Array1DD_VEC *cs) {cs_=cs;} bool operator() (int c_i,int c_j) { double d_i=TNT::dot_product(*p_-(*cs_)[c_i],*p_-(*cs_)[c_i]); double d_j=TNT::dot_product(*p_-(*cs_)[c_j],*p_-(*cs_)[c_j]); if (d_iget_data(); IMP_INTERNAL_CHECK(data_->size()>0,"no data points to cluster"); dim_ = (*data_)[0].dim1(); par_=VQClusteringParameters(dim_,k_); is_set_ = false; } void VQClustering::sample_data_point(Array1DD &p) { p = full_data_->sample(); } void VQClustering::center_sampling(Array1DD_VEC *centers_sample) { for(int i=0;ipush_back(p_new); } } void VQClustering::sampling(Array1DD_VEC *tracking) { double log_ef_ei=log(par_.ef_/par_.ei_); double log_lf_li=log(par_.lf_/par_.li_); double epsilon;//neuron plasticity double lambda;//approximated width //order_track and order_centers_indexes are data structures used to sort //the centers //according to their distance from a randomly selected data point std::vector order_track; std::vector order_centers_indexes; order_track.insert(order_track.end(),k_,0); order_centers_indexes.insert(order_centers_indexes.end(),k_,0); CenterSorter sorter; int show_number_of_runs=0; if (show_status_bar_) { show_number_of_runs=par_.number_of_runs_; } boost::progress_display show_progress(show_number_of_runs); Array1DD_VEC centers_sample; for (int run_ind=0; run_indpush_back(centers_sample[i]); } }//end run iterations if (par_.eq_clusters_) { Array1DD_VEC centers_sample_eq; get_eq_centers(¢ers_sample,¢ers_sample_eq); std::cout<<"Updates centers:"< > closest_center; double min_dist, curr_dist; int closest_center_ind=0; Array1DD att_sum(dim_,0.); double wdiff = INT_MAX; Array1DD_VEC last_centers; for(int i=0;i()); } // for each sampling center in tracking find the closest center // in current centers. This information is stored in closest_center vector for (unsigned int j=0;jsize();j++) { min_dist = 1e20; for (int cen_ind=0;cen_ind0) { //check if center with cen_ind is going to be update for(int d=0;d 1e-5); } //Return eq size version of the current centers void VQClustering::get_eq_centers(Array1DD_VEC *centers, Array1DD_VEC *eq_centers) { const Array1DD_VEC * data = full_data_->get_data(); int num_points = full_data_->get_number_of_data_points(); unsigned int eq_size = num_points/k_; std::cout<<"Number of points:"< assignments(k_); int centers_full=0; int points_visited=0; bool not_done=true; //make a heap of the array std::make_heap(distances.begin(),distances.end(), comp_first_smaller_than_second); std::sort_heap(distances.begin(),distances.end(), comp_first_smaller_than_second); unsigned int heap_size=distances.size(); while (not_done) { int min_ind=distances[0].second; int center_ind=min_ind/num_points; int point_ind=min_ind-center_ind*num_points; assignments[center_ind].push_back(point_ind); std::pop_heap(distances.begin(),distances.begin()+heap_size, comp_first_smaller_than_second); --heap_size; std::sort_heap(distances.begin(),distances.begin()+heap_size, comp_first_smaller_than_second); if (assignments[center_ind].size()==eq_size) { std::cout<<"remove points for center:"<=center_ind*num_points) && (((int)distances[i].second)<(center_ind+1)*num_points)) { distances[i].first=INT_MAX; counter++; } heap_size-=counter; } //order is now wrong // std::make_heap(distances.begin(),distances.end(), // comp_first_smaller_than_second); std::sort_heap(distances.begin(),distances.begin()+heap_size, comp_first_smaller_than_second); ++centers_full; } ++points_visited; if (centers_full==k_) { std::cout<<"END eq loop: centers assignments are full"<push_back(v); } std::cout<<"before returning"<, nn,(all_cen)); assignment_.clear(); assignment_.insert(assignment_.end(),data_->size(),0); for(unsigned int j=0;jsize();j++) { algebra::Vector3D point((*data_)[j][0],(*data_)[j][1],(*data_)[j][2]); int closest_cen = nn->get_nearest_neighbor(point); //----- //debug int min_ind=0; double min_dist=999999; for(unsigned int kk=0;kkalgebra::get_distance(point,all_cen[kk])) { min_dist=algebra::get_distance(point,all_cen[kk]); min_ind=kk; } } if (min_ind!=closest_cen) { std::cerr<<"Center for "<100) { std::cerr<<"Outlier for cneter "<get_number_of_data_points();i++) { centers_.push_back((*(starting_centers->get_data()))[i]); } //rest are sampled for(int i=starting_centers->get_number_of_data_points();i