From 11fb7f96cb64251fa346a7713ef028ea582d7682 Mon Sep 17 00:00:00 2001 From: Buote Xu Date: Sat, 23 Jun 2012 15:39:09 +0200 Subject: [PATCH] changing the example-partitioning --- examples/centroid.h | 73 ++++++++++++++++++++ examples/splitter.h | 158 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 231 insertions(+) create mode 100644 examples/centroid.h create mode 100644 examples/splitter.h diff --git a/examples/centroid.h b/examples/centroid.h new file mode 100644 index 0000000..661ad4c --- /dev/null +++ b/examples/centroid.h @@ -0,0 +1,73 @@ +#include "fbi/tuple.h" +#include "fbi/tuplegenerator.h" + +struct Centroid +{ + double mz_; + unsigned int sn_; + double rt_; + Centroid(const double& mz, const unsigned int & sn, const double & rt) + : mz_(mz), sn_(sn), rt_(rt){} +}; + +namespace fbi { + + template<> + struct Traits : mpl::TraitsGenerator {}; + +} //end namespace fbi + +struct CentroidBoxGenerator +{ + template + typename fbi::tuple_element::key_type>::type + get(const Centroid&) const; + + double mzOffset_; + double mzWindowPpm_; + double rtOffset_; + double rtWindow_; + unsigned int snWindow_; + + CentroidBoxGenerator(double mzWindowPpm, unsigned int snWindow) + : mzOffset_(0.0), mzWindowPpm_(mzWindowPpm), snWindow_(snWindow) + {} + CentroidBoxGenerator(double mzWindowPpm, double rtWindow, unsigned int snWindow) + : mzOffset_(0.0), mzWindowPpm_(mzWindowPpm), + rtOffset_(0.0), rtWindow_(rtWindow), snWindow_(snWindow) + {} + + CentroidBoxGenerator(double mzOffset, double mzWindowPpm, + double rtOffset, double rtWindow, unsigned int snWindow) + : mzOffset_(mzOffset), mzWindowPpm_(mzWindowPpm), + rtOffset_(rtOffset), rtWindow_(rtWindow), snWindow_(snWindow) + {} + + +}; + + +template <> +std::pair +CentroidBoxGenerator::get<0>(const Centroid & centroid) const +{ + return std::make_pair( + mzOffset_ + centroid.mz_* (1 - mzWindowPpm_ * 1E-6), + mzOffset_ + centroid.mz_* (1 + mzWindowPpm_ * 1E-6)); +} + + +template <> +std::pair +CentroidBoxGenerator::get<1>(const Centroid & centroid) const +{ + return std::make_pair( + centroid.sn_ - snWindow_, + centroid.sn_ + snWindow_); +} + + + + + diff --git a/examples/splitter.h b/examples/splitter.h new file mode 100644 index 0000000..81fcdee --- /dev/null +++ b/examples/splitter.h @@ -0,0 +1,158 @@ +#include +#include +#include +#include +#include +#include "centroid.h" + +struct ProgramOptions +{ + std::string inputfileName_; + std::string outputfileName_; + unsigned int segmentsize_; + unsigned int overlap_; +}; + +int parseProgramOptions(int argc, char* argv[], ProgramOptions& options) +{ + namespace po = boost::program_options; + po::options_description generic("Generic options"); + generic.add_options() + ("help", "Display this help message") + ("inputfile,i", po::value(&options.inputfileName_), "input file") + ("outputfile,o", po::value(&options.outputfileName_), "output file") + ; + + po::options_description config("Allowed options"); + config.add_options() + ("segments", po::value(&options.segments_)->default_value( + 1), + "Number of segments the data should be partitioned in before using libfbi") + ("overlap", po::value(&options.overlap_)->default_value( + 0), + "Overlap in time-dimension taken into account to not have jumps"); + + po::options_description cmdline_options("Options available via command line"); + cmdline_options.add(generic).add(config); + + po::options_description visible("Allowed options"); + visible.add(generic).add(config); + + po::positional_options_description p; + p.add("inputfile", -1); + + po::variables_map vm; + + po::store(po::command_line_parser(argc, argv).options( + cmdline_options).positional(p).run(), vm); + po::notify(vm); + + if (vm.count("help")) { + std::cout << visible << "\n"; + return 0; + } + + if (!vm.count("inputfile")) { + std::cerr << "InputFile needed" << '\n'; + std::cout << visible << '\n'; + return 0; + } + + if (!vm.count("outputfile")) { + options.outputfileName_ = options.inputfileName_ + std::string(".out"); + } + return 1; +} + + + +template +struct +SNSplitter { + private: + ProgramOptions options_; + std::vector breakpoints_; + public: + bool + setOptions(const ProgramOptions & options) { + return (options_ = options); + } + + std::vector + parseString(const std::string & str, unsigned int sn) { + std::vector centroids; + float mz, massrange_lo, massrange_hi, rt; + char pol; + char mode[101]; + char mslevel[101]; + char line[101]; + int unknown, numentries, intensity; + typedef boost::tokenizer > Tokenizer; + boost::char_separator sep(","); + sn = 1; + breakpoints.push_back(0); + while(getline(ifs, str)) { + + Tokenizer tokens(str, sep); + Tokenizer::iterator it = tokens.begin(); + if (sscanf(str.c_str(), "%f,%c,%100[^,],%100[^,],%d,%100[^,],%f-%f,%u,", &rt, &pol, &mode, &mslevel, &unknown, &line, &massrange_lo, &massrange_hi, &numentries) != 9) { + continue; + } + for (int i = 0; i < 8; ++i) ++it; + while (it != tokens.end()) { + std::string mz_int_pair(*(it++)); + if (sscanf(mz_int_pair.c_str(), "%f %u", &mz, &intensity) == 2) { + centroids.push_back(Centroid(mz, sn, rt)); + } + } + ++sn; + } + } + ResultType + filterAdjList(const ResultType & adjList, unsigned int lowerLimit) { + ResultType filteredAdjList; + try { + std::vector labels; + LabelType nComponents = findConnectedComponents(filteredAdjList, labels); + std::vector counter(*std::max_element(labels.begin(), labels.end()), 0); + for (std::vector::size_type i = 0; i < labels.size(); ++i) { + ++counter[labels[i] - 1]; + } + filteredAdjList.resize(adjList.size()); + for (typename ResultType::size_type i = 0; i < tempResult.size(); ++i) { + if (counter[labels[i]-1] > rightoverlap_) { + std::copy(adjList[i].begin(), adjList[i].end(), std::back_inserter(filteredAdjList[i])); + } + } + + + + return filteredAdjList; + } + ResultType + findOverlaps(boost::function)> intersectFunctor) { + + boost::iostreams::stream ifs(options.inputfileName_); + std::string str; + std::deque centroids; + unsigned int segmentCounter = 0; + unsigned int centroidCounter = 0; + ResultType fullAdjList; + while(std::getline(ifs, str)) { + unsigned int breakpoint = parseString(str, centroids, counter); + segmentCounter++; + if (segmentCounter % options.segmentsize_ == 0) { + centroidCounter = centroids.size(); + } + if (segmentCounter % options.segmentsize_ == overlap && segmentCounter > options.segmentsize_) { + ResultType shortAdjList = intersectFunctor(centroids); + ResultType filteredAdjList = filterAdjList(shortAdjList, labels); + joinAdjLists(filteredAdjList, fullAdjList, centroidCounter); + } + } + return fullAdjList; + + + } + +};