From 429a9de315509ccfc6cf9fca7530490fc274718b Mon Sep 17 00:00:00 2001 From: Oleksandr Kulkov Date: Thu, 23 May 2024 15:30:16 +0200 Subject: [PATCH] Add row_diff_traverse, row_diff_successor --- .github/workflows/main.yml | 2 - .../representation/base/sequence_graph.cpp | 44 +++++++++++++++++++ .../representation/base/sequence_graph.hpp | 19 ++++++++ 3 files changed, 63 insertions(+), 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index c0e2d4ef28..3be8ef96e3 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -7,8 +7,6 @@ on: tags: - 'v*' pull_request: - branches: - - master env: REGISTRY: ghcr.io diff --git a/metagraph/src/graph/representation/base/sequence_graph.cpp b/metagraph/src/graph/representation/base/sequence_graph.cpp index bc19dede84..23cc34e8fa 100644 --- a/metagraph/src/graph/representation/base/sequence_graph.cpp +++ b/metagraph/src/graph/representation/base/sequence_graph.cpp @@ -396,6 +396,50 @@ void DeBruijnGraph::call_unitigs(const CallPath &callback, ::mtg::graph::call_sequences(*this, callback, num_threads, true, min_tip_size, kmers_in_single_form); } +void DeBruijnGraph::row_diff_traverse(size_t num_threads, + size_t max_length, + const bit_vector &rd_succ, + sdsl::bit_vector *terminal) const { + sdsl::bit_vector visited(max_index() + 1, false); + auto finalised = visited; + std::vector distance(max_index() + 1); + assert(terminal->size() == visited.size()); + assert(rd_succ.size() == visited.size()); + auto set_terminal = [&](int v) { + distance[v] = 0; + (*terminal)[v] = true; + }; + call_nodes([&](node_index v) { + if (visited[v]) { + return; + } + static std::stack path; + while (!visited[v]) { + path.push(v); + visited[v] = true; + if (!has_no_outgoing(v)) { + v = row_diff_successor(v, rd_succ); + } + } + if (!finalised[v]) { + set_terminal(v); + finalised[v] = true; + } + node_index u = v; + while (!path.empty()) { + std::tie(u, v) = std::tie(path.top(), u); + if (!finalised[u]) { + distance[u] = distance[v] + 1; + if (distance[u] == max_length) { + set_terminal(u); + } + finalised[u] = true; + } + path.pop(); + } + }); +} + /** * Traverse graph and iterate over all nodes */ diff --git a/metagraph/src/graph/representation/base/sequence_graph.hpp b/metagraph/src/graph/representation/base/sequence_graph.hpp index 96d13cad34..c85bfb2b0f 100644 --- a/metagraph/src/graph/representation/base/sequence_graph.hpp +++ b/metagraph/src/graph/representation/base/sequence_graph.hpp @@ -1,6 +1,8 @@ #ifndef __SEQUENCE_GRAPH_HPP__ #define __SEQUENCE_GRAPH_HPP__ +#include "common/vectors/bit_vector.hpp" + #include #include #include @@ -198,6 +200,7 @@ class DeBruijnGraph : public SequenceGraph { virtual void call_kmers(const std::function &callback) const; virtual size_t outdegree(node_index) const = 0; + virtual bool has_no_outgoing(node_index node) const { return outdegree(node) == 0; } virtual bool has_single_outgoing(node_index node) const { return outdegree(node) == 1; } virtual bool has_multiple_outgoing(node_index node) const { return outdegree(node) > 1; } @@ -231,6 +234,22 @@ class DeBruijnGraph : public SequenceGraph { // Call all nodes that have no incoming edges virtual void call_source_nodes(const std::function &callback) const; + + virtual void row_diff_traverse(size_t num_threads, + size_t max_length, + const bit_vector &rd_succ, + sdsl::bit_vector *terminal) const; + + virtual node_index row_diff_successor(node_index node, const bit_vector &rd_succ) const { + node_index succ = npos; + adjacent_outgoing_nodes(node, [&](node_index adjacent_node) { + if(rd_succ[adjacent_node]) { + succ = adjacent_node; + } + }); + assert(succ != npos && "a row diff successor must exist"); + return succ; + } };