Skip to content

Commit

Permalink
Plugin: stop considering docs with zero LSH hash matches (#720)
Browse files Browse the repository at this point in the history
  • Loading branch information
alexklibisz authored Aug 28, 2024
1 parent c5a8e21 commit b28ae02
Show file tree
Hide file tree
Showing 6 changed files with 20 additions and 33 deletions.
2 changes: 1 addition & 1 deletion docs/pages/performance/fashion-mnist/plot.b64

Large diffs are not rendered by default.

Binary file modified docs/pages/performance/fashion-mnist/plot.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
16 changes: 8 additions & 8 deletions docs/pages/performance/fashion-mnist/results.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
|Model|Parameters|Recall|Queries per Second|
|---|---|---|---|
|eknn-l2lsh|L=100 k=4 w=1024 candidates=500 probes=0|0.378|375.370|
|eknn-l2lsh|L=100 k=4 w=1024 candidates=1000 probes=0|0.447|320.039|
|eknn-l2lsh|L=100 k=4 w=1024 candidates=500 probes=3|0.635|294.600|
|eknn-l2lsh|L=100 k=4 w=1024 candidates=1000 probes=3|0.716|257.913|
|eknn-l2lsh|L=100 k=4 w=2048 candidates=500 probes=0|0.767|332.779|
|eknn-l2lsh|L=100 k=4 w=2048 candidates=1000 probes=0|0.846|289.472|
|eknn-l2lsh|L=100 k=4 w=2048 candidates=500 probes=3|0.921|220.716|
|eknn-l2lsh|L=100 k=4 w=2048 candidates=1000 probes=3|0.960|204.668|
|eknn-l2lsh|L=100 k=4 w=1024 candidates=500 probes=0|0.379|380.371|
|eknn-l2lsh|L=100 k=4 w=1024 candidates=1000 probes=0|0.447|305.831|
|eknn-l2lsh|L=100 k=4 w=1024 candidates=500 probes=3|0.635|292.164|
|eknn-l2lsh|L=100 k=4 w=1024 candidates=1000 probes=3|0.717|253.758|
|eknn-l2lsh|L=100 k=4 w=2048 candidates=500 probes=0|0.767|330.408|
|eknn-l2lsh|L=100 k=4 w=2048 candidates=1000 probes=0|0.847|278.242|
|eknn-l2lsh|L=100 k=4 w=2048 candidates=500 probes=3|0.921|221.691|
|eknn-l2lsh|L=100 k=4 w=2048 candidates=1000 probes=3|0.960|197.147|
Original file line number Diff line number Diff line change
Expand Up @@ -70,17 +70,17 @@ private KthGreatestResult kthGreatest(int k) {
// accumulating counts of counts until we've exceeded k.
int numGreaterEqual = 0;
short kthGreatest = maxValue;
while (kthGreatest > 0) {

while (true) {
numGreaterEqual += hist[kthGreatest];
if (numGreaterEqual > k) break;
else kthGreatest--;
if (kthGreatest > 1 && numGreaterEqual < k) kthGreatest--;
else break;
}

// Finally we find the number that were greater than the kth greatest count.
// There's a special case if kthGreatest is zero, then the number that were greater is the number of hits.
int numGreater = numGreaterEqual - hist[kthGreatest];
if (kthGreatest == 0) numGreater = numHits;
return new KthGreatestResult(kthGreatest, numGreater, numHits);
return new KthGreatestResult(kthGreatest, numGreater);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,9 @@
public class KthGreatestResult {
public final short kthGreatest;
public final int numGreaterThan;
public final int numNonZero;
public KthGreatestResult(short kthGreatest, int numGreaterThan, int numNonZero) {
public KthGreatestResult(short kthGreatest, int numGreaterThan) {
this.kthGreatest = kthGreatest;
this.numGreaterThan = numGreaterThan;
this.numNonZero = numNonZero;
}

@Override
Expand All @@ -17,12 +15,12 @@ public boolean equals(Object o) {
} else if (!(o instanceof KthGreatestResult other)) {
return false;
} else {
return kthGreatest == other.kthGreatest && numGreaterThan == other.numGreaterThan && numNonZero == other.numNonZero;
return kthGreatest == other.kthGreatest && numGreaterThan == other.numGreaterThan;
}
}

@Override
public String toString() {
return String.format("KthGreatestResult(kthGreatest=%d, numGreaterThan=%d, numNonZero=%d)", kthGreatest, numGreaterThan, numNonZero);
return String.format("KthGreatestResult(kthGreatest=%d, numGreaterThan=%d)", kthGreatest, numGreaterThan);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,6 @@ final class ArrayHitCounterSpec extends AnyFreeSpec with Matchers {
// A very naive/inefficient way to implement the DocIdSetIterator.
if (k == 0 || counts.isEmpty) DocIdSetIterator.empty()
else {
// This is a hack to replicate a bug in how we emit doc IDs.
// Basically if the kth greatest value is zero, we end up emitting docs that were never matched,
// so we need to fill the map with zeros to replicate the behavior here.
val minKey = counts.keys.min
val maxKey = counts.keys.max
(minKey to maxKey).foreach(k => counts.update(k, counts(k)))

val valuesSorted = counts.values.toArray.sorted.reverse
val kthGreatest = valuesSorted.take(k).last
val greaterDocIds = counts.filter(_._2 > kthGreatest).keys.toArray
Expand Down Expand Up @@ -116,23 +109,19 @@ final class ArrayHitCounterSpec extends AnyFreeSpec with Matchers {
ahc.get(doc) shouldBe ref.get(doc)
}
val k = rng.nextInt(numDocs)
val actualDocIds = consumeDocIdSetIterator(ahc.docIdSetIterator(k))
val referenceDocIds = consumeDocIdSetIterator(ref.docIdSetIterator(k))
val actualDocIds = consumeDocIdSetIterator(ahc.docIdSetIterator(k))

referenceDocIds shouldBe actualDocIds
}
}

"the counter emits docs that had zero matches (bug, https://github.com/alexklibisz/elastiknn/issues/715)" in {
"the DocIdSetIterator omits docs that had zero matches" in {
// Only documents 0 and 9 had a hit, so we should expect to only emit those two.
// But the k=10th greatest value is 0, so we end up emitting all of the doc IDs,
// including 8 of which had zero hits.
val ahc = new ArrayHitCounter(10)
ahc.increment(0)
ahc.increment(9)
val docIds = consumeDocIdSetIterator(ahc.docIdSetIterator(10))
docIds shouldBe List(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)
// Once the bug is fixed, this should be the correct result:
// docIds shouldBe List(0, 9)
docIds shouldBe List(0, 9)
}
}

0 comments on commit b28ae02

Please sign in to comment.