Skip to content

Commit

Permalink
Use hash map instead of binary search. Extend benchmarks.
Browse files Browse the repository at this point in the history
  • Loading branch information
ielis committed Nov 1, 2023
1 parent 6a470a0 commit 474ffa7
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 28 deletions.
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package org.monarchinitiative.phenol.graph.csr.mono;

import org.monarchinitiative.phenol.graph.NodeNotPresentInGraphException;
import org.monarchinitiative.phenol.graph.OntologyGraph;
import org.monarchinitiative.phenol.graph.csr.util.Util;
import org.monarchinitiative.phenol.utils.IterableIteratorWrapper;

import java.util.*;
Expand All @@ -15,19 +15,16 @@
public class CsrMonoOntologyGraph<T> implements OntologyGraph<T> {

private final T root;
private final T[] nodes;
private final Comparator<T> comparator;
private final Map<T, Integer> nodesToIdx;
private final StaticCsrArray<T> parents;
private final StaticCsrArray<T> children;

CsrMonoOntologyGraph(T root,
T[] nodes,
Comparator<T> comparator,
Map<T, Integer> nodesToIdx,
StaticCsrArray<T> parents,
StaticCsrArray<T> children) {
this.root = Objects.requireNonNull(root);
this.nodes = Objects.requireNonNull(nodes);
this.comparator = Objects.requireNonNull(comparator);
this.nodesToIdx = Objects.requireNonNull(nodesToIdx);
this.parents = Objects.requireNonNull(parents);
this.children = Objects.requireNonNull(children);
}
Expand All @@ -40,8 +37,11 @@ StaticCsrArray<T> getChildArray() {
return children;
}

T[] getNodes() {
return nodes;
private int getNodeIdx(T node) {
Integer idx = nodesToIdx.get(node);
if (idx == null)
throw new NodeNotPresentInGraphException(String.format("Item not found in the graph: %s", node));
return idx;
}

@Override
Expand All @@ -57,7 +57,7 @@ public Iterable<T> getChildren(T source, boolean includeSource) {
@Override
public Iterable<T> getDescendants(T source, boolean includeSource) {
// Check if `source` is in the graph.
int intentionallyUnused = Util.getIndexOfUsingBinarySearch(source, nodes, comparator);
int intentionallyUnused = getNodeIdx(source);

return new IterableIteratorWrapper<>(() -> new TraversingIterator<>(source, src -> getChildren(src, includeSource)));
}
Expand All @@ -70,15 +70,16 @@ public Iterable<T> getParents(T source, boolean includeSource) {
@Override
public Iterable<T> getAncestors(T source, boolean includeSource) {
// Check if `source` is in the graph.
int intentionallyUnused = Util.getIndexOfUsingBinarySearch(source, nodes, comparator);
int intentionallyUnused = getNodeIdx(source);

return new IterableIteratorWrapper<>(() -> new TraversingIterator<>(source, src -> getParents(src, includeSource)));
}

private Iterable<T> getImmediateNeighbors(StaticCsrArray<T> array,
T source,
boolean includeSource) {
int index = Util.getIndexOfUsingBinarySearch(source, nodes, comparator);
int index = getNodeIdx(source);

Set<T> nodes = array.getOutgoingNodes(index);

return includeSource
Expand All @@ -88,13 +89,12 @@ private Iterable<T> getImmediateNeighbors(StaticCsrArray<T> array,

@Override
public int size() {
return nodes.length;
return nodesToIdx.size();
}

@Override
public Iterator<T> iterator() {
// TODO - can we do better here?
return Arrays.stream(nodes).iterator();
return nodesToIdx.keySet().iterator();
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,7 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.Stream;

Expand Down Expand Up @@ -55,8 +52,13 @@ public CsrMonoOntologyGraph<TermId> build(TermId root, Collection<? extends Onto
.toArray(TermId[]::new);

CsrData<TermId> csrData = makeCsrData(nodes, hierarchyEdges);
Map<TermId, Integer> nodeToIdx = new HashMap<>();
for (int i = 0; i < nodes.length; i++) {
TermId node = nodes[i];
nodeToIdx.put(node, i);
}

return new CsrMonoOntologyGraph<>(root, nodes, TermId::compareTo, csrData.getParents(), csrData.getChildren());
return new CsrMonoOntologyGraph<>(root, nodeToIdx, csrData.getParents(), csrData.getChildren());
}

private CsrData<TermId> makeCsrData(TermId[] nodes,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,9 @@ public void checkData() {
/* HP:03 */ {"HP:1"},
/* HP:1 */ {},
};
assertThatGroupsAreConsistent(graph.getNodes(), graph.getParentArray(), expectedParents);


assertThatGroupsAreConsistent(graph.size(), graph.getParentArray(), expectedParents);

String[][] expectedChildren = {
/* HP:01 */ {"HP:010", "HP:011"},
Expand All @@ -70,15 +72,15 @@ public void checkData() {
/* HP:03 */ {},
/* HP:1 */ {"HP:01", "HP:02", "HP:03"},
};
assertThatGroupsAreConsistent(graph.getNodes(), graph.getChildArray(), expectedChildren);
assertThatGroupsAreConsistent(graph.size(), graph.getChildArray(), expectedChildren);
}

private void assertThatGroupsAreConsistent(TermId[] nodes,
private void assertThatGroupsAreConsistent(int size,
StaticCsrArray<TermId> array,
String[][] expectedCuries) {
int[] indptr = array.getIndptr();
TermId[] data = array.getData();
for (int i = 0; i < nodes.length; i++) {
for (int i = 0; i < size; i++) {
int start = indptr[i];
int end = indptr[i + 1];
String[] curies = expectedCuries[i];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,29 +13,87 @@ public class OntologyGraphBench {
@Benchmark
public void poly_getParents(CsrPolyOntologyGraphSetup ontology, Blackhole blackhole) {
for (TermId termId : ontology.primaryTermIds) {
blackhole.consume(ontology.ontology.graph().getParents(termId, false));
ontology.ontology.graph().getParents(termId, false)
.forEach(blackhole::consume);
}
}

@Benchmark
public void poly_getChildren(CsrPolyOntologyGraphSetup ontology, Blackhole blackhole) {
for (TermId termId : ontology.primaryTermIds) {
blackhole.consume(ontology.ontology.graph().getChildren(termId, false));
ontology.ontology.graph().getChildren(termId, false)
.forEach(blackhole::consume);
}
}

@Benchmark
public void poly_getAncestors(CsrPolyOntologyGraphSetup ontology, Blackhole blackhole) {
for (TermId termId : ontology.primaryTermIds) {
ontology.ontology.graph().getAncestors(termId, false)
.forEach(blackhole::consume);
}
}

@Benchmark
public void poly_getDescendants(CsrPolyOntologyGraphSetup ontology, Blackhole blackhole) {
for (TermId termId : ontology.primaryTermIds) {
ontology.ontology.graph().getDescendants(termId, false)
.forEach(blackhole::consume);
}
}

@Benchmark
public void mono_getParents(CsrMonoOntologyGraphSetup ontology, Blackhole blackhole) {
for (TermId termId : ontology.primaryTermIds) {
blackhole.consume(ontology.ontology.graph().getParents(termId, false));
ontology.ontology.graph().getParents(termId, false)
.forEach(blackhole::consume);
}
}

@Benchmark
public void mono_getChildren(CsrMonoOntologyGraphSetup ontology, Blackhole blackhole) {
for (TermId termId : ontology.primaryTermIds) {
blackhole.consume(ontology.ontology.graph().getChildren(termId, false));
ontology.ontology.graph().getChildren(termId, false)
.forEach(blackhole::consume);
}
}

@Benchmark
public void mono_getAncestors(CsrMonoOntologyGraphSetup ontology, Blackhole blackhole) {
for (TermId termId : ontology.primaryTermIds) {
ontology.ontology.graph().getAncestors(termId, false)
.forEach(blackhole::consume);
}
}

@Benchmark
public void mono_getDescendants(CsrMonoOntologyGraphSetup ontology, Blackhole blackhole) {
for (TermId termId : ontology.primaryTermIds) {
ontology.ontology.graph().getDescendants(termId, false)
.forEach(blackhole::consume);
}
}

/*
# Run complete. Total time: 00:33:59
Commit 6a470a0f
REMEMBER: The numbers below are just data. To gain reusable insights, you need to follow up on
why the numbers are the way they are. Use profilers (see -prof, -lprof), design factorial
experiments, perform baseline and negative tests that provide experimental control, make sure
the benchmarking environment is safe on JVM/OS/HW level, ask for reviews from the domain experts.
Do not assume the numbers tell you what you want them to tell.
NOTE: Current JVM experimentally supports Compiler Blackholes, and they are in use. Please exercise
extra caution when trusting the results, look into the generated code to check the benchmark still
works, and factor in a small probability of new VM bugs. Additionally, while comparisons between
different JVMs are already problematic, the performance difference caused by different Blackhole
modes can be very significant. Please make sure you use the consistent Blackhole mode for comparisons.
Benchmark Mode Cnt Score Error Units
OntologyGraphBench.mono_getChildren thrpt 25 163.773 ± 11.709 ops/s
OntologyGraphBench.mono_getParents thrpt 25 149.663 ± 8.890 ops/s
OntologyGraphBench.poly_getChildren thrpt 25 134.231 ± 7.379 ops/s
OntologyGraphBench.poly_getParents thrpt 25 131.846 ± 5.647 ops/s
*/
}

0 comments on commit 474ffa7

Please sign in to comment.