diff --git a/R/RcppExports.R b/R/RcppExports.R index b55423c..bf7a34c 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -1,14 +1,6 @@ # Generated by using Rcpp::compileAttributes() -> do not edit by hand # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 -WritePhylipFile <- function(xPosition, yPosition, data, cutoff, countTable, saveLocation) { - invisible(.Call('_clustur_WritePhylipFile', PACKAGE = 'clustur', xPosition, yPosition, data, cutoff, countTable, saveLocation)) -} - -WriteColumnFile <- function(xPosition, yPosition, data, cutoff, countTable, saveLocation) { - invisible(.Call('_clustur_WriteColumnFile', PACKAGE = 'clustur', xPosition, yPosition, data, cutoff, countTable, saveLocation)) -} - DetermineIfPhylipOrColumnFile <- function(filePath) { .Call('_clustur_DetermineIfPhylipOrColumnFile', PACKAGE = 'clustur', filePath) } @@ -37,7 +29,7 @@ OptiCluster <- function(DistanceData, featureColumnName, binColumnName, cutoff) .Call('_clustur_OptiCluster', PACKAGE = 'clustur', DistanceData, featureColumnName, binColumnName, cutoff) } -CreateDataFrameFromSparse <- function(countTable) { - .Call('_clustur_CreateDataFrameFromSparse', PACKAGE = 'clustur', countTable) +CreateDataFrameFromSparseCountTable <- function(countTable) { + .Call('_clustur_CreateDataFrameFromSparseCountTable', PACKAGE = 'clustur', countTable) } diff --git a/R/cluster.R b/R/cluster.R index 491481f..00ad1b8 100644 --- a/R/cluster.R +++ b/R/cluster.R @@ -193,7 +193,8 @@ read_count <- function(count_table_path) { if (grepl("#", test_read[1, 1], fixed = TRUE)) { count_table_sparse <- read.delim(count_table_path, sep = "\t", skip = 2) count_table_sparse <- lapply(count_table_sparse, as.character) - return(validate_count_table(CreateDataFrameFromSparse(count_table_sparse))) + ct <- CreateDataFrameFromSparseCountTable(count_table_sparse) + return(validate_count_table(ct)) } return(validate_count_table(read.delim(count_table_path, sep = "\t"))) } diff --git a/README.Rmd b/README.Rmd index 862ca01..64013bc 100644 --- a/README.Rmd +++ b/README.Rmd @@ -63,4 +63,3 @@ To get started, look at the ["Getting started"](www.schlosslab.org/clustur/) pag For feature request please visit the issue page in github and use the feature tag. If you would like to contribute pull request are welcomed! - diff --git a/README.md b/README.md index 9f8c006..cb89746 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,7 @@ library(clustur) ## Getting Started To get started, look at the [“Getting -started”](https:://www.schlosslab.org/clustur/) page. +started”](www.schlosslab.org/clustur/) page. ## Contributions diff --git a/src/Adapters/CountTableAdapter.h b/src/Adapters/CountTableAdapter.h index 70051be..62435fc 100644 --- a/src/Adapters/CountTableAdapter.h +++ b/src/Adapters/CountTableAdapter.h @@ -12,7 +12,7 @@ #include #include -#include "../RowData.h" + class CountTableAdapter { public: @@ -29,7 +29,6 @@ class CountTableAdapter { std::vector GetGroups() const; Rcpp::DataFrame GetCountTable() const {return countTable;} Rcpp::DataFrame ReCreateDataFrame() const; - std::set CheckDistanceFileOnlyHasNamesInCount(const std::vector&) const; private: void CreateNameToIndex(); struct IndexAbundancePair { diff --git a/src/Adapters/DistanceFileReader.h b/src/Adapters/DistanceFileReader.h index 90289a7..c108426 100644 --- a/src/Adapters/DistanceFileReader.h +++ b/src/Adapters/DistanceFileReader.h @@ -9,7 +9,6 @@ #include "CountTableAdapter.h" #include "../MothurDependencies/ListVector.h" -#include "../RowData.h" #include "../MothurDependencies/SparseDistanceMatrix.h" class DistanceFileReader { @@ -18,12 +17,11 @@ class DistanceFileReader { // We need to deduce type, the easy way to do that is to see if there is a number for the first item read. // Phylip files have a number of sequences located at the top. We can use that to our advantage. virtual bool Read(const std::string& filePath) {return false;} - DistanceFileReader(SparseDistanceMatrix*, ListVector*, bool); + DistanceFileReader(SparseDistanceMatrix*, ListVector*, double, bool); DistanceFileReader() = default; // Phylip files do not need a count table SparseDistanceMatrix* GetSparseMatrix() const {return new SparseDistanceMatrix(*sparseMatrix);} ListVector* GetListVector() const {return new ListVector(*list);} - void SetRowDataMatrix(const std::vector& data); void SetCountTable(CountTableAdapter data); Rcpp::DataFrame SparseMatrixToDataFrame() const; Rcpp::DataFrame GetCountTable() const; diff --git a/src/Adapters/MatrixAdapter.h b/src/Adapters/MatrixAdapter.h index cdeb1b6..4b7c1f6 100644 --- a/src/Adapters/MatrixAdapter.h +++ b/src/Adapters/MatrixAdapter.h @@ -15,8 +15,6 @@ class MatrixAdapter { MatrixAdapter(const std::vector &iIndexes, const std::vector &jIndexes, const std::vector &dataValues, double cutOff, bool isSimularity, CountTableAdapter table); ~MatrixAdapter() = default; - bool CreatePhylipFile(const std::string &saveFileLocation); - bool CreateColumnDataFile(const std::string &saveFileLocation); SparseDistanceMatrix CreateSparseMatrix(); ListVector CreateListVector() const; diff --git a/src/Adapters/OptimatrixAdapter.h b/src/Adapters/OptimatrixAdapter.h index 2ef742d..9e9b9be 100644 --- a/src/Adapters/OptimatrixAdapter.h +++ b/src/Adapters/OptimatrixAdapter.h @@ -15,9 +15,6 @@ #include #include -#include "../RowData.h" - - class OptimatrixAdapter { ///Closeness is the data represntation, it is a vector of sets that tells us which values are closes to it -> /// So, close[0], if Namemap[0] = A, then close[0] is the values that are within the cutoff to A. @@ -28,14 +25,8 @@ class OptimatrixAdapter { explicit OptimatrixAdapter(const double cutoff) { this->cutoff = cutoff; } - OptiMatrix* ConvertToOptimatrix(const std::vector - &xPosition, const std::vector& yPosition, const std::vector& data); - OptiMatrix* ConvertToOptimatrix(const std::vector&, bool); OptiMatrix* ConvertToOptimatrix(const SparseDistanceMatrix* matrixData, const ListVector* listVector, bool sim); - std::vector> GetCloseness() {return {};} - std::vector GetNameList() {return {};} - std::vector GetSingletons() {return {};} private: double cutoff; //TODO Get rid of values inside of the adapter, it should not care about them diff --git a/src/ClusterCommand.cpp b/src/ClusterCommand.cpp index fb7f5db..281d401 100644 --- a/src/ClusterCommand.cpp +++ b/src/ClusterCommand.cpp @@ -84,10 +84,7 @@ ClusterExport* ClusterCommand::runOptiCluster(OptiMatrix *optiMatrix, const doub } util.AddRowToDataFrameMap(dataframeMapClusterMetrics, clusterMetrics, clusterMetricsHeaders); std::chrono::time_point start, end; - //m->mothurOutEndLine(); - // Stable Metric -> Keep the data stable, to prevent errors (rounding errors) - // The difference between what the current and last metric (delta) - // MaxIters -> is an exit condition + while ((delta > stableMetric) && (iters < maxIters)) { //long start = std::time(nullptr); double oldMetric = listVectorMetric; @@ -126,7 +123,6 @@ ClusterExport* ClusterCommand::runOptiCluster(OptiMatrix *optiMatrix, const doub clusterInformation.clusterBins = list->print(listFile); data->AddToData(clusterInformation); data->SetListVector(*list, std::to_string(cutoff)); - // clusterMatrixOutput = list->print(listFile); stats = cluster.getStats(tp, tn, fp, fn); sensFile += std::to_string(cutoff) + ',' + std::to_string(cutoff) + ',' + std::to_string(tp) + ',' + @@ -157,7 +153,7 @@ ClusterExport* ClusterCommand::runMothurCluster(const std::string &clusterMethod float previousDist = 0.00000; float rndPreviousDist = 0.00000; oldList = *list; - bool printHeaders = false; + constexpr bool printHeaders = false; std::string clusterResult; double highestDistLabel = -1; std::string binResults; diff --git a/src/ClusterCommandTestFixture.cpp b/src/ClusterCommandTestFixture.cpp deleted file mode 100644 index eee8db0..0000000 --- a/src/ClusterCommandTestFixture.cpp +++ /dev/null @@ -1,20 +0,0 @@ -// -// Created by Gregory Johnson on 5/1/24. -// - -#include "Tests/ClusterCommandTestFixture.h" - -bool ClusterCommandTestFixture::TestSetIterationsWorksCorrectly(const int iterations, const bool expectResult) { - Setup(); - const bool result = clusterCommand->SetMaxIterations(iterations); - TearDown(); - return result == expectResult; -} - -void ClusterCommandTestFixture::Setup() { - clusterCommand = new ClusterCommand(); -} - -void ClusterCommandTestFixture::TearDown() { - delete(clusterCommand); -} diff --git a/src/CountTableAdapter.cpp b/src/CountTableAdapter.cpp index 41d7939..0b4690f 100644 --- a/src/CountTableAdapter.cpp +++ b/src/CountTableAdapter.cpp @@ -147,16 +147,6 @@ Rcpp::DataFrame CountTableAdapter::ReCreateDataFrame() const { return countTable; } -std::set CountTableAdapter::CheckDistanceFileOnlyHasNamesInCount(const std::vector& data) const { - std::set names; - for(const auto& value : data) { - if(nameToRowIndex.find(value.name) == nameToRowIndex.end()) { - names.insert(value.name); - } - } - return names; -} - void CountTableAdapter::CreateNameToIndex() { for(size_t i = 0; i < sampleNames.size(); i++) { diff --git a/src/DistanceFileReader.cpp b/src/DistanceFileReader.cpp index a049c97..a1c1493 100644 --- a/src/DistanceFileReader.cpp +++ b/src/DistanceFileReader.cpp @@ -6,7 +6,8 @@ DistanceFileReader::DistanceFileReader(SparseDistanceMatrix *sparseDistanceMatrix, - ListVector *listVector, const bool isSim):sparseMatrix(sparseDistanceMatrix), list(listVector), sim(isSim){} + ListVector *listVector, const double cutoff, const bool isSim):sparseMatrix(sparseDistanceMatrix), +list(listVector), cutoff(cutoff), sim(isSim){} Rcpp::DataFrame DistanceFileReader::SparseMatrixToDataFrame() const { const size_t size = sparseMatrix->seqVec.size(); diff --git a/src/Makevars.txt b/src/Makevars.txt deleted file mode 100644 index 1440734..0000000 --- a/src/Makevars.txt +++ /dev/null @@ -1,2 +0,0 @@ -# Makevars -CXX_STD = CXX11 diff --git a/src/MatrixAdapter.cpp b/src/MatrixAdapter.cpp index 825b243..dd3cb3c 100644 --- a/src/MatrixAdapter.cpp +++ b/src/MatrixAdapter.cpp @@ -11,52 +11,6 @@ MatrixAdapter::MatrixAdapter(const std::vector &iIndexes, const std::vector data(dataValues) { } - -bool MatrixAdapter::CreatePhylipFile(const std::string &saveFileLocation) { - // if(saveFileLocation.empty()) - // return false; - // const auto matrix = DistanceMatrixToSquareMatrix(); - // int count = 0; - // const size_t size = matrixNames.size(); - // std::string distanceString = "\t" + std::to_string(size) + "\n"; - // for (const auto &cells: matrix) { - // distanceString += matrixNames[count]; - // for (int i = 0; i < count; i++) { - // distanceString += "\t" + std::to_string(cells.rowValues[i]); - // } - // distanceString += "\n"; - // count++; - // } - // std::ofstream writeOut(saveFileLocation); - // if (!writeOut.is_open()) { - // TestHelper::Print("Failed to open: \n"); - // } - // writeOut << distanceString; - // writeOut.close(); - return true; -} - -bool MatrixAdapter::CreateColumnDataFile(const std::string &saveFileLocation) { - if(saveFileLocation.empty()) - return false; - const auto matrix = CreateSparseMatrix(); - std::string data; - for (size_t i = 0; i < matrix.seqVec.size(); i++) { - std::string firstCellName = matrixNames[i]; - for(size_t j = 0; j < matrixNames[i].size(); j++) { - std::string otherCell = matrixNames[matrix.seqVec[i][j].index]; - data += firstCellName + "\t" + otherCell + "\t" + std::to_string(matrix.seqVec[i][j].dist) + "\n"; - } - } - std::ofstream writeOut(saveFileLocation); - if (!writeOut.is_open()) { - Rcpp::Rcout << "Failed to open: \n"; - } - writeOut << data; - writeOut.close(); - return true; -} - SparseDistanceMatrix MatrixAdapter::CreateSparseMatrix() { // The indexes are +1, i need to push them back so that 1 -> 0, 2-> 1, etc (name map maybe?) std::set names; diff --git a/src/MatrixAdapterTestFixture.cpp b/src/MatrixAdapterTestFixture.cpp index ec19fac..2aad848 100644 --- a/src/MatrixAdapterTestFixture.cpp +++ b/src/MatrixAdapterTestFixture.cpp @@ -11,13 +11,6 @@ bool MatrixAdapterTestFixture::TestCreateSparseMatrix(const size_t expectedResul return result == expectedResult; } -bool MatrixAdapterTestFixture::TestCreatePhylipFile(const bool expectedResult) { - Setup(); - const auto result = matrixAdapter->CreatePhylipFile(""); - TearDown(); - return result == expectedResult; -} - bool MatrixAdapterTestFixture::TestGetListVector(const bool createSparseFirst, const bool expectedResult) { Setup(); if(createSparseFirst) matrixAdapter->CreateSparseMatrix(); diff --git a/src/MothurDependencies/ColumnDistanceMatrixReader.h b/src/MothurDependencies/ColumnDistanceMatrixReader.h index 8cdaf88..6effef7 100644 --- a/src/MothurDependencies/ColumnDistanceMatrixReader.h +++ b/src/MothurDependencies/ColumnDistanceMatrixReader.h @@ -10,7 +10,6 @@ #include #include "ListVector.h" #include "SparseDistanceMatrix.h" -#include "../RowData.h" #include "../Adapters/CountTableAdapter.h" #include "../Adapters/DistanceFileReader.h" diff --git a/src/MothurDependencies/OptiMatrix.h b/src/MothurDependencies/OptiMatrix.h index a569929..eb83de0 100644 --- a/src/MothurDependencies/OptiMatrix.h +++ b/src/MothurDependencies/OptiMatrix.h @@ -15,13 +15,13 @@ class OptiMatrix final : public OptiData{ public: OptiMatrix(const std::vector>& close, const std::vector& name, - const std::vector& single, const double c): OptiData(c) + const std::vector& singleton, const double c): OptiData(c) { closeness = close; nameMap = name; - singletons = single; + singletons = singleton; }//closeness, namemap, singleton, cutoff - ~OptiMatrix(){} + ~OptiMatrix() override = default; std::vector> GetCloseness() {return closeness;} std::vector GetNameList() {return nameMap;} std::vector GetSingletons() {return singletons;} diff --git a/src/MothurDependencies/RSparseMatrix.h b/src/MothurDependencies/RSparseMatrix.h deleted file mode 100644 index d2baf35..0000000 --- a/src/MothurDependencies/RSparseMatrix.h +++ /dev/null @@ -1,31 +0,0 @@ -// -// Created by Gregory Johnson on 7/31/24. -// - -#ifndef RSPARSEMATRIX_H -#define RSPARSEMATRIX_H -#include - - -struct RSparseData { - RSparseData(const int i, const int j, const double value) { - iIndex = i; - jIndex = j; - data = value; - } - int iIndex; - int jIndex; - double data; -}; - -struct RSpraseMatrix { - RSpraseMatrix(const std::vector &iIndex, const std::vector &jIndex, const std::vector &data){ - for(size_t i = 0; i < data.size(); i++) { - sparseValues.emplace_back(iIndex[i], jIndex[i], data[i]); - } - } - std::vector sparseValues; -}; - - -#endif //RSPARSEMATRIX_H diff --git a/src/MothurDependencies/ReadPhylipMatrix.h b/src/MothurDependencies/ReadPhylipMatrix.h index 109b3b7..808d8e5 100644 --- a/src/MothurDependencies/ReadPhylipMatrix.h +++ b/src/MothurDependencies/ReadPhylipMatrix.h @@ -8,7 +8,6 @@ #include #include #include -#include "../RowData.h" #include #include "ListVector.h" #include "SparseDistanceMatrix.h" diff --git a/src/MothurDependencies/SharedFileBuilder.h b/src/MothurDependencies/SharedFileBuilder.h index bb7bd62..22880cd 100644 --- a/src/MothurDependencies/SharedFileBuilder.h +++ b/src/MothurDependencies/SharedFileBuilder.h @@ -15,7 +15,6 @@ #include "ClusterExport.h" #include "ListVector.h" -#include "RSparseMatrix.h" #include "SharedFile.h" #include "OtuAbundancePair.h" #include "../Adapters/CountTableAdapter.h" diff --git a/src/MothurDependencies/SparseDistanceMatrix.h b/src/MothurDependencies/SparseDistanceMatrix.h index aa0602b..c89f825 100644 --- a/src/MothurDependencies/SparseDistanceMatrix.h +++ b/src/MothurDependencies/SparseDistanceMatrix.h @@ -8,7 +8,7 @@ #include "PDistCell.h" #include - +#include #include "Utils.h" @@ -44,6 +44,7 @@ class SparseDistanceMatrix { int addCellSorted(unsigned long, PDistCell); std::vector > seqVec; bool print() const; + void FilterSparseMatrix(float cutoff); private: PDistCell smallCell; //The cell with the smallest distance diff --git a/src/OptiClusterTestFixture.cpp b/src/OptiClusterTestFixture.cpp index ccde2a5..eb36f71 100644 --- a/src/OptiClusterTestFixture.cpp +++ b/src/OptiClusterTestFixture.cpp @@ -66,12 +66,26 @@ bool OptiClusterTestFixture::OptiClusterGetsTheCorrectNumberOfBins(const long lo void OptiClusterTestFixture::Setup() { - const auto xVals = std::vector{0,0,0,1,1,2,3}; - const auto yVals = std::vector{1,2,4,2,4,4,4}; - const auto data = std::vector{0.02,0.04,0.025,0.01,0.028,0.045,0.05}; + const auto xVals = std::vector{0,0,0,1,1}; + const auto yVals = std::vector{1,2,4,2,4}; + const auto data = std::vector{0.02,0.04,0.025,0.01,0.028}; + SparseDistanceMatrix sparse; + ListVector vec; + vec.push_back("0"); + vec.push_back("1"); + vec.push_back("2"); + vec.push_back("3"); + vec.push_back("4"); + const size_t size = xVals.size(); + sparse.resize(size); + for(size_t i = 0; i < size; i++) { + sparse.addCell(xVals[i], PDistCell(yVals[i], data[i])); + } OptimatrixAdapter adapter(0.03); - OptiData *matrix = adapter.ConvertToOptimatrix(xVals, yVals, data); + OptiData *matrix = adapter.ConvertToOptimatrix(&sparse, &vec, false); optiCluster = new OptiCluster(matrix, new MCC(), 0);; + double value = 0; + optiCluster->initialize(value, false, ""); } void OptiClusterTestFixture::TearDown() { diff --git a/src/OptiDataTestFixture.cpp b/src/OptiDataTestFixture.cpp index 06669be..f8c314d 100644 --- a/src/OptiDataTestFixture.cpp +++ b/src/OptiDataTestFixture.cpp @@ -78,11 +78,23 @@ bool OptiDataTestFixture::TestIsCloseFitReturnsCorrectData(const long long index } void OptiDataTestFixture::Setup() { - const auto xVals = std::vector{0,0,0,1,1,2,3}; - const auto yVals = std::vector{1,2,4,2,4,4,4}; - const auto data = std::vector{0.02,0.04,0.025,0.01,0.028,0.045,0.05}; + const auto xVals = std::vector{0,0,0,1,1}; + const auto yVals = std::vector{1,2,4,2,4}; + const auto data = std::vector{0.02,0.04,0.025,0.01,0.028}; + SparseDistanceMatrix sparse; + ListVector vec; + vec.push_back("0"); + vec.push_back("1"); + vec.push_back("2"); + vec.push_back("3"); + vec.push_back("4"); + const size_t size = xVals.size(); + sparse.resize(size); + for(size_t i = 0; i < size; i++) { + sparse.addCell(xVals[i], PDistCell(yVals[i], data[i])); + } OptimatrixAdapter adapter(0.03); - optiData = adapter.ConvertToOptimatrix(xVals, yVals, data); + optiData = adapter.ConvertToOptimatrix(&sparse, &vec, false); } void OptiDataTestFixture::TearDown() { diff --git a/src/OptimatrixAdapter.cpp b/src/OptimatrixAdapter.cpp index 857a162..11a8f82 100644 --- a/src/OptimatrixAdapter.cpp +++ b/src/OptimatrixAdapter.cpp @@ -7,142 +7,8 @@ #include "Adapters/OptimatrixAdapter.h" #include "Adapters/CountTableAdapter.h" -#include "MothurDependencies/CountTable.h" #include "MothurDependencies/SparseDistanceMatrix.h" -//TODO: We have to create a full matrix, and add the values as from the sparse matrix inside the new matrix. -//TODO: This will create the full matrix, then and only then, will we be able to create a list of closeness, -//TODO: singletons, and names properly. -//Ensure we are not using a square matrix -//Fix the singletonIssue -// -// Hard code for matrix -// WE are going to add test cases in -//DONE Add TESTING SUITE FOR EASY DEBUGGING AND TESTING IN PURE CPP -OptiMatrix *OptimatrixAdapter::ConvertToOptimatrix(const std::vector &xPosition, - const std::vector &yPosition, - const std::vector &data) { - // The closeness map does not contain sequences that have 0 values.... - //If there are no values within the range of it, and its empty, REMOVE IT - // All the values that are correlated to this also belong in the nameMap... - // This was the issue! - // Create the singleton vector - // Create the closeness vector - // Create the names vector - // Creating these will allow us to have a prototype opticluster sparse matrix. - const size_t count = data.size(); - // Get unique names - // TYPE CONVERSIONS TO STRINGS ARE SLOW, but for a one-to-one concept, we are converting to strings - // As per the OptiMatrix class - std::map> closenessMap; - std::vector singletonList; - // Shouldn't be duplicates, but there can be duplicates. There is a way to deal with it - std::unordered_set singletonCandidates; // If your names is not included in this set you are an singleton - std::set names; - for (size_t i = 0; i < count; i++) { - int currentXPos = xPosition[i]; - int currentYPos = yPosition[i]; - if (data[i] > cutoff) // its 1 - score, so the higher your score, the closer to the cutoff you are - { - // You are a singleton, so we will add you to the list - //singletonList.emplace_back(std::to_string(xPosition[i])); - // At this position, I dont cluster with the current x,y Position, so I am a singleton - //List of possible singletons - //I am using the index not the actually position, the xPos is the position to the other. - singletonCandidates.insert(currentXPos); - singletonCandidates.insert(currentYPos); - continue; - } - // xPosition[i] is the name in this context - // Linked list? - closenessMap[currentXPos].emplace(currentYPos); - closenessMap[currentYPos].emplace(currentXPos); - } - //TODO Change this back into a vector, we do not need to delete values - std::vector > adjustedClosenessList; - for (const auto &closenessValues: closenessMap) { - adjustedClosenessList.emplace_back(closenessValues.second.begin(), closenessValues.second.end()); - names.insert(closenessValues.first); - } - //TODO Work on creating singletons properly! - for (const auto &key: singletonCandidates) { - if(names.find(key) != names.end()) //If its not found, its a singleton - continue; - singletonList.emplace_back(std::to_string(key)); - } - int counter = 0; - //Convert the unique singletons over to a list - std::vector namesVector(names.size()); - for (const auto &name: names) { - namesVector[counter++] = std::to_string(name); - } - std::vector > closeness = adjustedClosenessList; - std::vector nameList = namesVector; - std::vector singletons = singletonList; - return new OptiMatrix{adjustedClosenessList, namesVector, singletonList, cutoff}; -} -// TODO Change RowData to sparse matix -OptiMatrix* OptimatrixAdapter::ConvertToOptimatrix(const std::vector& matrixData, const bool sim) { - const auto size = static_cast(matrixData.size()); - std::vector> closeness; - std::vector nameList; - std::vector singletons; - Utils util; - std::vector singletonList(size, true); - nameList.resize(size); - std::unordered_map singletonIndexSwap; - for(long long i = 0; i < size; i++) { - nameList[i] = matrixData[i].name; - singletonIndexSwap[i] = i; - for(long long j = 0; j < i; j++) { - auto distance = static_cast(matrixData[i].rowValues[j]); - const bool equalivance = util.isEqual(distance, -1); - if (equalivance) { - distance = 1000000; - } else if (sim) { - distance = 1.0f - distance; - } - if(distance <= cutoff) { - singletonList[i] = false; // Find out who is a singleton - singletonList[j] = false; - singletonIndexSwap[i] = i; - singletonIndexSwap[j] = j; - } - - } - } - int nonSingletonCount = 0; - for(size_t i = 0; i < singletonList.size(); i ++) { - if(!singletonList[i]) { - singletonIndexSwap[static_cast(i)] = nonSingletonCount; - nonSingletonCount++; - } //Remove all singletonss - else - singletons.emplace_back(matrixData[i].name); - } - closeness.resize(nonSingletonCount); - for(long long i = 0; i < size; i++) { - nameList[singletonIndexSwap[i]] = matrixData[i].name; - for(long long j = 0; j < i; j++) { - auto distance = static_cast(matrixData[i].rowValues[j]); - const bool equalivance = util.isEqual(distance, -1); - if (equalivance) { - distance = 1000000; - } else if (sim) { - distance = 1.0f - distance; - } - if(distance <= cutoff) { - long long newB = singletonIndexSwap[j]; - long long newA = singletonIndexSwap[i]; - closeness[newA].insert(newB); - closeness[newB].insert(newA); - } - - } - } - return new OptiMatrix{closeness, nameList, singletons, cutoff}; -} - OptiMatrix* OptimatrixAdapter::ConvertToOptimatrix(const SparseDistanceMatrix* matrixData, const ListVector* listVector, const bool sim) { const auto size = static_cast(matrixData->seqVec.size()); @@ -157,12 +23,14 @@ OptiMatrix* OptimatrixAdapter::ConvertToOptimatrix(const SparseDistanceMatrix* m } } int count = 0; + int nameOffset = 0; std::vector> closeness(nonSingletonCount); for(const auto& cell : matrixData->seqVec) { - const std::string name =listVector->get(count); - nameList[count] = name; + const std::string name =listVector->get(count + nameOffset); + nameList[count + nameOffset] = name; if(cell.empty()) { singletons.emplace_back(name); + nameOffset++; continue; } std::unordered_set cells; @@ -180,79 +48,6 @@ OptiMatrix* OptimatrixAdapter::ConvertToOptimatrix(const SparseDistanceMatrix* m closeness[count] = cells; count++; } - - - - - - - - - - - - - - - - - - - - // Utils util; - // std::vector singletonList(size, true); - // nameList.resize(size); - // std::unordered_map singletonIndexSwap; - // for(long long i = 0; i < size; i++) { - // nameList[i] = listVector->get(i); - // singletonIndexSwap[i] = i; - // for(long long j = 0; j < static_cast(matrixData->seqVec[i].size()); j++) { - // const auto cell = matrixData->seqVec[i][j]; - // auto distance = static_cast(cell.dist); - // const bool equalivance = util.isEqual(distance, -1); - // if (equalivance) { - // distance = 1000000; - // } else if (sim) { - // distance = 1.0f - distance; - // } - // if(distance <= cutoff) { - // singletonList[i] = false; // Find out who is a singleton - // singletonList[cell.index] = false; - // singletonIndexSwap[i] = i; - // singletonIndexSwap[static_cast(cell.index)] = static_cast(cell.index); - // } - // - // } - // } - // int nonSingletonCount = 0; - // for(size_t i = 0; i < singletonList.size(); i ++) { - // if(!singletonList[i]) { - // singletonIndexSwap[static_cast(i)] = nonSingletonCount++; - // } //Remove all singletonss - // else - // singletons.emplace_back(listVector->get(static_cast(i))); - // } - // closeness.resize(nonSingletonCount); - // for(long long i = 0; i < size; i++) { - // nameList[singletonIndexSwap[i]] = listVector->get(i); - // for(long long j = 0; j < static_cast(matrixData->seqVec[i].size()); j++) { - // const PDistCell cell = matrixData->seqVec[i][j]; - // auto distance = static_cast(cell.dist); - // const bool equalivance = util.isEqual(distance, -1); - // if (equalivance) { - // distance = 1000000; - // } else if (sim) { - // distance = 1.0f - distance; - // } - // if(distance <= cutoff) { - // long long newB = singletonIndexSwap[static_cast(cell.index)]; - // long long newA = singletonIndexSwap[i]; - // closeness[newA].insert(newB); - // closeness[newB].insert(newA); - // } - // - // } - // } return new OptiMatrix{closeness, nameList, singletons, cutoff}; } diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 0ec1d68..06f930d 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -10,36 +10,6 @@ Rcpp::Rostream& Rcpp::Rcout = Rcpp::Rcpp_cout_get(); Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); #endif -// WritePhylipFile -void WritePhylipFile(const std::vector& xPosition, const std::vector& yPosition, const std::vector& data, const double cutoff, const Rcpp::DataFrame& countTable, const std::string& saveLocation); -RcppExport SEXP _clustur_WritePhylipFile(SEXP xPositionSEXP, SEXP yPositionSEXP, SEXP dataSEXP, SEXP cutoffSEXP, SEXP countTableSEXP, SEXP saveLocationSEXP) { -BEGIN_RCPP - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< const std::vector& >::type xPosition(xPositionSEXP); - Rcpp::traits::input_parameter< const std::vector& >::type yPosition(yPositionSEXP); - Rcpp::traits::input_parameter< const std::vector& >::type data(dataSEXP); - Rcpp::traits::input_parameter< const double >::type cutoff(cutoffSEXP); - Rcpp::traits::input_parameter< const Rcpp::DataFrame& >::type countTable(countTableSEXP); - Rcpp::traits::input_parameter< const std::string& >::type saveLocation(saveLocationSEXP); - WritePhylipFile(xPosition, yPosition, data, cutoff, countTable, saveLocation); - return R_NilValue; -END_RCPP -} -// WriteColumnFile -void WriteColumnFile(const std::vector& xPosition, const std::vector& yPosition, const std::vector& data, const double cutoff, const Rcpp::DataFrame& countTable, const std::string& saveLocation); -RcppExport SEXP _clustur_WriteColumnFile(SEXP xPositionSEXP, SEXP yPositionSEXP, SEXP dataSEXP, SEXP cutoffSEXP, SEXP countTableSEXP, SEXP saveLocationSEXP) { -BEGIN_RCPP - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< const std::vector& >::type xPosition(xPositionSEXP); - Rcpp::traits::input_parameter< const std::vector& >::type yPosition(yPositionSEXP); - Rcpp::traits::input_parameter< const std::vector& >::type data(dataSEXP); - Rcpp::traits::input_parameter< const double >::type cutoff(cutoffSEXP); - Rcpp::traits::input_parameter< const Rcpp::DataFrame& >::type countTable(countTableSEXP); - Rcpp::traits::input_parameter< const std::string& >::type saveLocation(saveLocationSEXP); - WriteColumnFile(xPosition, yPosition, data, cutoff, countTable, saveLocation); - return R_NilValue; -END_RCPP -} // DetermineIfPhylipOrColumnFile bool DetermineIfPhylipOrColumnFile(const std::string& filePath); RcppExport SEXP _clustur_DetermineIfPhylipOrColumnFile(SEXP filePathSEXP) { @@ -132,14 +102,14 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } -// CreateDataFrameFromSparse -Rcpp::DataFrame CreateDataFrameFromSparse(const Rcpp::DataFrame& countTable); -RcppExport SEXP _clustur_CreateDataFrameFromSparse(SEXP countTableSEXP) { +// CreateDataFrameFromSparseCountTable +Rcpp::DataFrame CreateDataFrameFromSparseCountTable(const Rcpp::DataFrame& countTable); +RcppExport SEXP _clustur_CreateDataFrameFromSparseCountTable(SEXP countTableSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const Rcpp::DataFrame& >::type countTable(countTableSEXP); - rcpp_result_gen = Rcpp::wrap(CreateDataFrameFromSparse(countTable)); + rcpp_result_gen = Rcpp::wrap(CreateDataFrameFromSparseCountTable(countTable)); return rcpp_result_gen; END_RCPP } @@ -147,8 +117,6 @@ END_RCPP RcppExport SEXP run_testthat_tests(SEXP); static const R_CallMethodDef CallEntries[] = { - {"_clustur_WritePhylipFile", (DL_FUNC) &_clustur_WritePhylipFile, 6}, - {"_clustur_WriteColumnFile", (DL_FUNC) &_clustur_WriteColumnFile, 6}, {"_clustur_DetermineIfPhylipOrColumnFile", (DL_FUNC) &_clustur_DetermineIfPhylipOrColumnFile, 1}, {"_clustur_ProcessDistanceFiles", (DL_FUNC) &_clustur_ProcessDistanceFiles, 4}, {"_clustur_ProcessSparseMatrix", (DL_FUNC) &_clustur_ProcessSparseMatrix, 6}, @@ -156,7 +124,7 @@ static const R_CallMethodDef CallEntries[] = { {"_clustur_GetCountTable", (DL_FUNC) &_clustur_GetCountTable, 1}, {"_clustur_Cluster", (DL_FUNC) &_clustur_Cluster, 5}, {"_clustur_OptiCluster", (DL_FUNC) &_clustur_OptiCluster, 4}, - {"_clustur_CreateDataFrameFromSparse", (DL_FUNC) &_clustur_CreateDataFrameFromSparse, 1}, + {"_clustur_CreateDataFrameFromSparseCountTable", (DL_FUNC) &_clustur_CreateDataFrameFromSparseCountTable, 1}, {"run_testthat_tests", (DL_FUNC) &run_testthat_tests, 1}, {NULL, NULL, 0} }; diff --git a/src/RowData.h b/src/RowData.h deleted file mode 100644 index 69a1cb3..0000000 --- a/src/RowData.h +++ /dev/null @@ -1,26 +0,0 @@ -// -// Created by Gregory Johnson on 6/21/24. -// - -#ifndef ROWDATA_H -#define ROWDATA_H -#include -#include - -struct RowValues { - RowValues(const std::string &name, const float value){ - this->name = name; - this->value = value; - } - std::string name; - float value; -}; -struct RowData { - std::string name; - std::vector rowValues; - static bool SortComparison(const RowData& a, const RowData& b) { - return std::stoi(a.name) < std::stoi(b.name); - } -}; - -#endif //ROWDATA_H diff --git a/src/SparseDistanceMatrix.cpp b/src/SparseDistanceMatrix.cpp index 2232862..b896a09 100644 --- a/src/SparseDistanceMatrix.cpp +++ b/src/SparseDistanceMatrix.cpp @@ -148,16 +148,22 @@ bool SparseDistanceMatrix::print() const{ if(seqVec.empty()) return false; - // std::cout << std::endl; - // //saves time in getSmallestCell, by making it so you dont search the repeats - // for (int i = 0; i < seqVec.size(); i++) { - // std::cout << i << '\t'; - // for (int j = 0; j < seqVec[i].size(); j++) { std::cout << seqVec[i][j].index << '\t' ; } - // std::cout << std::endl; - // } - // std::cout << std::endl; + //saves time in getSmallestCell, by making it so you dont search the repeats + for (size_t i = 0; i < seqVec.size(); i++) { + for (const auto j : seqVec[i]) { Rcpp::Rcout << i << '\t' << j.index << '\t' << j.dist << std::endl; } + } return true; } + +void SparseDistanceMatrix::FilterSparseMatrix(const float cutoff) { + for(int i = 0; i < static_cast(seqVec.size()); i++) { + for(int j = static_cast(seqVec[i].size()) - 1; j >= 0; j--) { + if(seqVec[i][j].dist > cutoff) + rmCell(i, j); + } + } +} + /***********************************************************************/ int SparseDistanceMatrix::sortSeqVec(){ diff --git a/src/Tests/ClusterCommandTestFixture.h b/src/Tests/ClusterCommandTestFixture.h deleted file mode 100644 index 72fb09f..0000000 --- a/src/Tests/ClusterCommandTestFixture.h +++ /dev/null @@ -1,27 +0,0 @@ -// -// Created by Gregory Johnson on 5/1/24. -// - -#ifndef CLUSTERCOMMANDTESTFIXTURE_H -#define CLUSTERCOMMANDTESTFIXTURE_H -#include -#include "TestFixture.h" -#include "../MothurDependencies/ClusterCommand.h" -#include "../MothurDependencies/OptiMatrix.h" - - - -class ClusterCommandTestFixture : TestFixture{ -public: - bool TestRunOptiClusterReturnsCorrectData(OptiMatrix* mockMatrix, - const std::string& expectedResult); - bool TestSetIterationsWorksCorrectly(int iterations, bool expectResult); -private: - ClusterCommand* clusterCommand = nullptr; - void Setup() override; - void TearDown() override; -}; - - - -#endif //CLUSTERCOMMANDTESTFIXTURE_H diff --git a/src/Tests/ColumnReaderTestFixture.h b/src/Tests/ColumnReaderTestFixture.h index e1073c8..420bfb3 100644 --- a/src/Tests/ColumnReaderTestFixture.h +++ b/src/Tests/ColumnReaderTestFixture.h @@ -7,7 +7,6 @@ #include "TestFixture.h" #include #include -#include "../RowData.h" #include "../MothurDependencies/ColumnDistanceMatrixReader.h" diff --git a/src/Tests/MatrixAdapterTestFixture.h b/src/Tests/MatrixAdapterTestFixture.h index b74eb3b..0a3c96c 100644 --- a/src/Tests/MatrixAdapterTestFixture.h +++ b/src/Tests/MatrixAdapterTestFixture.h @@ -13,7 +13,6 @@ class MatrixAdapterTestFixture final: public TestFixture { public: bool TestCreateSparseMatrix(size_t expectedResult); - bool TestCreatePhylipFile(bool expectedResult); bool TestGetListVector(bool createSparseFirst, bool expectedResult); ~MatrixAdapterTestFixture() override; private: diff --git a/src/Tests/PhylipReaderTestFixture.h b/src/Tests/PhylipReaderTestFixture.h index 04c0afb..27dd3f1 100644 --- a/src/Tests/PhylipReaderTestFixture.h +++ b/src/Tests/PhylipReaderTestFixture.h @@ -7,7 +7,6 @@ #include "TestFixture.h" #include #include -#include "../RowData.h" #include "../MothurDependencies/ReadPhylipMatrix.h" diff --git a/src/main.cpp b/src/main.cpp index d5ad8d3..a13fecb 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -12,27 +12,6 @@ #include #include -//[[Rcpp::export]] -void WritePhylipFile(const std::vector &xPosition, - const std::vector &yPosition, const std::vector &data, - const double cutoff, const Rcpp::DataFrame& countTable, const std::string& saveLocation) { - - CountTableAdapter countTableAdapter; - countTableAdapter.CreateDataFrameMap(countTable); - MatrixAdapter adapter(xPosition, yPosition, data, cutoff, false, countTableAdapter); - adapter.CreatePhylipFile(saveLocation); -} - -//[[Rcpp::export]] -void WriteColumnFile(const std::vector &xPosition, - const std::vector &yPosition, const std::vector &data, - const double cutoff, const Rcpp::DataFrame& countTable, const std::string& saveLocation) { - - CountTableAdapter countTableAdapter; - countTableAdapter.CreateDataFrameMap(countTable); - MatrixAdapter adapter(xPosition, yPosition, data, cutoff, false, countTableAdapter); - adapter.CreateColumnDataFile(saveLocation); -} Rcpp::DataFrame CreateSharedDataFrame(const CountTableAdapter& countTable, const ClusterExport* result) { SharedFileBuilder builder; @@ -91,7 +70,7 @@ SEXP ProcessSparseMatrix(const std::vector &xPosition, countTableAdapter.CreateDataFrameMap(countTable); MatrixAdapter adapter(xPosition, yPosition, data, cutoff, isSim, countTableAdapter); auto* read = new DistanceFileReader(new SparseDistanceMatrix(adapter.CreateSparseMatrix()), - new ListVector(adapter.CreateListVector()), isSim); + new ListVector(adapter.CreateListVector()), cutoff, isSim); read->CreateCountTableAdapter(countTable); return Rcpp::XPtr(read); } @@ -115,8 +94,11 @@ Rcpp::List Cluster(const SEXP& DistanceData,const std::string& method, const std const Rcpp::XPtr distanceData(DistanceData); const CountTableAdapter countTableAdapter = distanceData.get()->GetCountTableAdapter(); ClusterCommand command; - const auto sparseMatix = distanceData.get()->GetSparseMatrix(); // Going to have to make a copy of sparse matrix + const auto lastCutoff = distanceData.get()->GetCutoff(); const auto listVector = distanceData.get()->GetListVector(); // Going to have to make a copy of list vector, this two values are definitely being changed + auto sparseMatix = distanceData.get()->GetSparseMatrix(); // Going to have to make a copy of sparse matrix + if(cutoff < lastCutoff) + sparseMatix->FilterSparseMatrix(cutoff); const auto result = command.runMothurCluster(method, sparseMatix, cutoff, listVector); const auto label = result->GetListVector().label; const Rcpp::DataFrame clusterDataFrame = result->GetListVector().listVector->CreateDataFrameFromList( @@ -157,10 +139,8 @@ Rcpp::List OptiCluster(const SEXP& DistanceData, const std::string& featureColum } //[[Rcpp::export]] -Rcpp::DataFrame CreateDataFrameFromSparse(const Rcpp::DataFrame& countTable) { +Rcpp::DataFrame CreateDataFrameFromSparseCountTable(const Rcpp::DataFrame& countTable) { CountTableAdapter adapter; adapter.CreateDataFrameMapFromSparseCountTable(countTable); return adapter.ReCreateDataFrame(); } - - diff --git a/src/test-cluster-command.cpp b/src/test-cluster-command.cpp deleted file mode 100644 index 475c720..0000000 --- a/src/test-cluster-command.cpp +++ /dev/null @@ -1,34 +0,0 @@ -// -// Created by Gregory Johnson on 5/2/24. -// -// -// Created by Gregory Johnson on 5/1/24. -// - - -#include "Tests/ClusterCommandTestFixture.h" -#include -// Normally this would be a function from your package's -// compiled library -- you might instead just include a header -// file providing the definition, and let R CMD INSTALL -// handle building and linking. - -// Initialize a unit test context. This is similar to how you -// might begin an R test file with 'context()', expect the -// associated context should be wrapped in braced. -// OptiMatrix* ConvertToOptimatrix(const std::vector -// &xPosition, const std::vector& yPosition, const std::vector& data, int rowSize, int colSize); - -context("ClusterCommand") { - // The format for specifying tests is similar to that of - // testthat's R functions. Use 'test_that()' to define a - // unit test, and use 'expect_true()' and 'expect_false()' - // to test the desired conditions. - test_that("Cluster Command Sets iterations Correctly") { - ClusterCommandTestFixture fixture; - bool result = fixture.TestSetIterationsWorksCorrectly(4, true); - expect_true(result); - result = fixture.TestSetIterationsWorksCorrectly(4, false); - expect_false(result); - } -} diff --git a/src/test-matrix_adapter.cpp b/src/test-matrix_adapter.cpp index 0a7f115..317168a 100644 --- a/src/test-matrix_adapter.cpp +++ b/src/test-matrix_adapter.cpp @@ -19,13 +19,6 @@ context("MatrixAdapter Test") { // testthat's R functions. Use 'test_that()' to define a // unit test, and use 'expect_true()' and 'expect_false()' // to test the desired conditions. - test_that("Matrix Adapter can create phylip files") { - MatrixAdapterTestFixture fixture; - bool result = fixture.TestCreatePhylipFile(true); - expect_true(result); - result = fixture.TestCreatePhylipFile(false); - expect_false(result); - } test_that("Matrix Adapter can get its list vector") { MatrixAdapterTestFixture fixture; diff --git a/src/test-opticluster.cpp b/src/test-opticluster.cpp index 80254c9..1132bca 100644 --- a/src/test-opticluster.cpp +++ b/src/test-opticluster.cpp @@ -40,15 +40,7 @@ context("clustur test") { expect_false(res); } - //TODO: why does this not work on unbuntu/windows. It is also not needed for anything besides stats so I may get rid - //TODO: of it for now. - // test_that("clustur returns proper stats") { - // OptiClusterTestFixture testFixture; - // bool result = testFixture.OpticlusterReturnsTheCorrectStats(1,1,1,1, {0,0,0,0,1,0,0,0}); - // expect_true(result); - // result = testFixture.OpticlusterReturnsTheCorrectStats(1,1,1,1, {0,0,0,0,1,1,0,0}); - // expect_false(result); - // } + test_that("clustur Gets CloseFarCounts properly") { OptiClusterTestFixture testFixture; bool res = testFixture.OptiClusterGetsTheCorrectCloseFarCounts(1, 2, {0,0}); @@ -58,7 +50,7 @@ context("clustur test") { } test_that("clustur Returns ListVector properly") { OptiClusterTestFixture testFixture; - bool res = testFixture.OptiClusterReturnsTheCorrectList(1); + bool res = testFixture.OptiClusterReturnsTheCorrectList(2); expect_true(res); res = testFixture.OptiClusterReturnsTheCorrectList(0); expect_false(res); @@ -72,7 +64,7 @@ context("clustur test") { } test_that("clustur gets the correct number of bins") { OptiClusterTestFixture testFixture; - bool res = testFixture.OptiClusterGetsTheCorrectNumberOfBins(1); + bool res = testFixture.OptiClusterGetsTheCorrectNumberOfBins(2); expect_true(res); res = testFixture.OptiClusterGetsTheCorrectNumberOfBins(0); expect_false(res); diff --git a/src/test-optidata.cpp b/src/test-optidata.cpp index 79c943d..3e1d1f8 100644 --- a/src/test-optidata.cpp +++ b/src/test-optidata.cpp @@ -29,7 +29,7 @@ context("Optidata test") { // to test the desired conditions. test_that("Optidata gets the correct close sequences") { OptiDataTestFixture testFixture; - bool result = testFixture.TestGetCloseSequencesReturnCorrectData(1, {0,2,4}); + bool result = testFixture.TestGetCloseSequencesReturnCorrectData(1, {0,2,3}); expect_true(result); result = testFixture.TestGetCloseSequencesReturnCorrectData(1, {0,1,4}); expect_false(result); @@ -68,7 +68,7 @@ context("Optidata test") { OptiDataTestFixture testFixture; bool result = testFixture.TestGetNumSingletonsReturnsCorrectData(1); expect_true(result); - result = testFixture.TestGetNumSingletonsReturnsCorrectData(2); + result = testFixture.TestGetNumSingletonsReturnsCorrectData(0); expect_false(result); } test_that("Optidata GetList returns a proper listVector and the correct number of numSeqs") {