Skip to content

Commit

Permalink
Update, propagate to DBGSuccinct
Browse files Browse the repository at this point in the history
  • Loading branch information
adamant-pwn committed Aug 9, 2024
1 parent f2e15e4 commit f3fb0b0
Show file tree
Hide file tree
Showing 5 changed files with 80 additions and 26 deletions.
17 changes: 13 additions & 4 deletions metagraph/src/annotation/row_diff_builder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,16 @@ void sum_and_call_counts(const fs::path &dir,
}
}

rd_succ_bv_type route_at_forks(const graph::DBGSuccinct &graph,
uint64_t from_graph_index(const graph::DeBruijnGraph &graph,
graph::DeBruijnGraph::node_index idx) {
if (auto* g = dynamic_cast<graph::DBGSuccinct const*>(&graph)) {
return g->kmer_to_boss_index(idx);
} else {
return idx;
}
}

rd_succ_bv_type route_at_forks(const graph::DeBruijnGraph &graph,
const std::string &rd_succ_filename,
const std::string &count_vectors_dir,
const std::string &row_count_extension) {
Expand All @@ -282,7 +291,7 @@ rd_succ_bv_type route_at_forks(const graph::DBGSuccinct &graph,
logger->trace("RowDiff successors will be set to the adjacent nodes with"
" the largest number of labels");

const bit_vector &last = graph.get_boss().get_last();
const bit_vector &last = *graph.get_last();
graph::DeBruijnGraph::node_index graph_idx = to_node(0);

std::vector<uint32_t> outgoing_counts;
Expand All @@ -293,12 +302,12 @@ rd_succ_bv_type route_at_forks(const graph::DBGSuccinct &graph,
[&](int32_t count) {
// TODO: skip single outgoing
outgoing_counts.push_back(count);
if (last[graph.kmer_to_boss_index(graph_idx)]) {
if (last[from_graph_index(graph, graph_idx)]) {
// pick the node with the largest count
size_t max_pos = std::max_element(outgoing_counts.rbegin(),
outgoing_counts.rend())
- outgoing_counts.rbegin();
rd_succ_bv[graph.kmer_to_boss_index(graph_idx - max_pos)] = true;
rd_succ_bv[from_graph_index(graph, graph_idx - max_pos)] = true;
outgoing_counts.resize(0);
}
graph_idx++;
Expand Down
46 changes: 34 additions & 12 deletions metagraph/src/graph/representation/base/sequence_graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <sdsl/int_vector.hpp>

#include "common/logger.hpp"
#include "common/vectors/bit_vector_dyn.hpp"
#include "common/seq_tools/reverse_complement.hpp"
#include "common/threads/threading.hpp"
#include "common/vectors/vector_algorithm.hpp"
Expand Down Expand Up @@ -420,11 +421,23 @@ void DeBruijnGraph::call_unitigs(const CallPath &callback,
::mtg::graph::call_sequences(*this, callback, num_threads, true, min_tip_size, kmers_in_single_form);
}

std::shared_ptr<const bit_vector> DeBruijnGraph::get_last() const {
bit_vector_dyn last_bv(max_index() + 1);
call_nodes([&](node_index v) {
std::pair<char, node_index> last;
call_outgoing_kmers(v, [&](node_index u, char c) {
last = std::max(last, std::pair{c, u});
});
last_bv.set(last.second, true);
});
return std::make_shared<bit_vector_dyn>(std::move(last_bv));
}

void DeBruijnGraph::row_diff_traverse(size_t num_threads,
size_t max_length,
const bit_vector &rd_succ,
sdsl::bit_vector *terminal) const {
sdsl::bit_vector visited(max_index() + 1, false);
sdsl::bit_vector visited(max_index() + 1);
auto finalised = visited;
std::vector<size_t> distance(max_index() + 1);
assert(terminal->size() == visited.size());
Expand All @@ -434,9 +447,6 @@ void DeBruijnGraph::row_diff_traverse(size_t num_threads,
(*terminal)[v] = true;
};
call_nodes([&](node_index v) {
if (visited[v]) {
return;
}
static std::stack<node_index> path;
while (!visited[v]) {
path.push(v);
Expand All @@ -445,25 +455,37 @@ void DeBruijnGraph::row_diff_traverse(size_t num_threads,
v = row_diff_successor(v, rd_succ);
}
}
// Either a sink, or a cyclic dependency
if (!finalised[v]) {
set_terminal(v);
finalised[v] = true;
}
node_index u = v;
while (!path.empty()) {
std::tie(u, v) = std::tie(path.top(), u);
if (!finalised[u]) {
distance[u] = distance[v] + 1;
if (distance[u] == max_length) {
set_terminal(u);
node_index succ;
while (!empty(path)) {
succ = std::exchange(v, path.top());
if (!finalised[v]) {
distance[v] = distance[succ] + 1;
if (distance[v] == max_length) {
set_terminal(v);
}
finalised[u] = true;
finalised[v] = true;
}
path.pop();
}
});
}

node_index DeBruijnGraph::row_diff_successor(node_index node, const bit_vector &rd_succ) const {
node_index succ = npos;
adjacent_outgoing_nodes(node, [&](node_index adjacent_node) {
if(rd_succ[adjacent_node]) {
succ = adjacent_node;
}
});
assert(succ != npos && "a row diff successor must exist");
return succ;
}

/**
* Traverse graph and iterate over all nodes
*/
Expand Down
13 changes: 3 additions & 10 deletions metagraph/src/graph/representation/base/sequence_graph.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -247,21 +247,14 @@ class DeBruijnGraph : public SequenceGraph {
// Call all nodes that have no incoming edges
virtual void call_source_nodes(const std::function<void(node_index)> &callback) const;

virtual std::shared_ptr<const bit_vector> get_last() const;

virtual void row_diff_traverse(size_t num_threads,
size_t max_length,
const bit_vector &rd_succ,
sdsl::bit_vector *terminal) const;

virtual node_index row_diff_successor(node_index node, const bit_vector &rd_succ) const {
node_index succ = npos;
adjacent_outgoing_nodes(node, [&](node_index adjacent_node) {
if(rd_succ[adjacent_node]) {
succ = adjacent_node;
}
});
assert(succ != npos && "a row diff successor must exist");
return succ;
}
virtual node_index row_diff_successor(node_index node, const bit_vector &rd_succ) const;
};


Expand Down
21 changes: 21 additions & 0 deletions metagraph/src/graph/representation/succinct/dbg_succinct.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -516,6 +516,27 @@ ::call_source_nodes(const std::function<void(node_index)> &callback) const {
});
}

std::shared_ptr<const bit_vector> DBGSuccinct
::get_last() const {
return std::shared_ptr<const bit_vector>(&get_boss().get_last(), [](const bit_vector*) {
// Do not destruct BOSS's last with shared_ptr
});
}

void DBGSuccinct
::row_diff_traverse(size_t num_threads,
size_t max_length,
const bit_vector &rd_succ,
sdsl::bit_vector *terminal) const {
return get_boss().row_diff_traverse(num_threads, max_length, rd_succ, terminal);
}

node_index DBGSuccinct
::row_diff_successor(node_index node, const bit_vector &rd_succ) const {
return get_boss().row_diff_successor(node, rd_succ);
}


size_t DBGSuccinct::outdegree(node_index node) const {
assert(node > 0 && node <= num_nodes());

Expand Down
9 changes: 9 additions & 0 deletions metagraph/src/graph/representation/succinct/dbg_succinct.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,15 @@ class DBGSuccinct : public DeBruijnGraph {

virtual void call_source_nodes(const std::function<void(node_index)> &callback) const override final;

virtual std::shared_ptr<const bit_vector> get_last() const override final;

virtual void row_diff_traverse(size_t num_threads,
size_t max_length,
const bit_vector &rd_succ,
sdsl::bit_vector *terminal) const override final;

virtual node_index row_diff_successor(node_index node, const bit_vector &rd_succ) const override final;

uint64_t kmer_to_boss_index(node_index kmer_index) const;
node_index boss_to_kmer_index(uint64_t boss_index) const;

Expand Down

0 comments on commit f3fb0b0

Please sign in to comment.