From 6e64e4b6f0075733bfc3b3b9eae5463924ae4158 Mon Sep 17 00:00:00 2001 From: Liam Date: Tue, 13 Aug 2024 17:16:47 -0400 Subject: [PATCH] Increase memory efficiency of local autocomplete --- assets/eslint.config.js | 2 +- assets/js/autocomplete.js | 3 +- .../__tests__/local-autocompleter.spec.ts | 30 +++--- assets/js/utils/__tests__/unique-heap.spec.ts | 70 ++++++++++++++ assets/js/utils/local-autocompleter.ts | 90 ++++++++++------- assets/js/utils/unique-heap.ts | 96 +++++++++++++++++++ 6 files changed, 239 insertions(+), 52 deletions(-) create mode 100644 assets/js/utils/__tests__/unique-heap.spec.ts create mode 100644 assets/js/utils/unique-heap.ts diff --git a/assets/eslint.config.js b/assets/eslint.config.js index c927efb6c..2c7a6e636 100644 --- a/assets/eslint.config.js +++ b/assets/eslint.config.js @@ -125,7 +125,7 @@ export default tsEslint.config( 'no-irregular-whitespace': 2, 'no-iterator': 2, 'no-label-var': 2, - 'no-labels': 2, + 'no-labels': [2, { allowSwitch: true, allowLoop: true }], 'no-lone-blocks': 2, 'no-lonely-if': 0, 'no-loop-func': 2, diff --git a/assets/js/autocomplete.js b/assets/js/autocomplete.js index 1a95fb046..8ec0f278d 100644 --- a/assets/js/autocomplete.js +++ b/assets/js/autocomplete.js @@ -237,7 +237,8 @@ function listenAutocomplete() { } const suggestions = localAc - .topK(originalTerm, suggestionsCount) + .matchPrefix(originalTerm) + .topK(suggestionsCount) .map(({ name, imageCount }) => ({ label: `${name} (${imageCount})`, value: name })); if (suggestions.length) { diff --git a/assets/js/utils/__tests__/local-autocompleter.spec.ts b/assets/js/utils/__tests__/local-autocompleter.spec.ts index 2310c92d2..5bef0fe1c 100644 --- a/assets/js/utils/__tests__/local-autocompleter.spec.ts +++ b/assets/js/utils/__tests__/local-autocompleter.spec.ts @@ -58,42 +58,44 @@ describe('Local Autocompleter', () => { }); it('should return suggestions for exact tag name match', () => { - const result = localAc.topK('safe', defaultK); - expect(result).toEqual([expect.objectContaining({ name: 'safe', imageCount: 6 })]); + const result = localAc.matchPrefix('safe').topK(defaultK); + expect(result).toEqual([expect.objectContaining({ aliasName: 'safe', name: 'safe', imageCount: 6 })]); }); it('should return suggestion for original tag when passed an alias', () => { - const result = localAc.topK('flowers', defaultK); - expect(result).toEqual([expect.objectContaining({ name: 'flower', imageCount: 1 })]); + const result = localAc.matchPrefix('flowers').topK(defaultK); + expect(result).toEqual([expect.objectContaining({ aliasName: 'flowers', name: 'flower', imageCount: 1 })]); }); it('should return suggestions sorted by image count', () => { - const result = localAc.topK(termStem, defaultK); + const result = localAc.matchPrefix(termStem).topK(defaultK); expect(result).toEqual([ - expect.objectContaining({ name: 'forest', imageCount: 3 }), - expect.objectContaining({ name: 'fog', imageCount: 1 }), - expect.objectContaining({ name: 'force field', imageCount: 1 }), + expect.objectContaining({ aliasName: 'forest', name: 'forest', imageCount: 3 }), + expect.objectContaining({ aliasName: 'fog', name: 'fog', imageCount: 1 }), + expect.objectContaining({ aliasName: 'force field', name: 'force field', imageCount: 1 }), ]); }); it('should return namespaced suggestions without including namespace', () => { - const result = localAc.topK('test', defaultK); - expect(result).toEqual([expect.objectContaining({ name: 'artist:test', imageCount: 1 })]); + const result = localAc.matchPrefix('test').topK(defaultK); + expect(result).toEqual([ + expect.objectContaining({ aliasName: 'artist:test', name: 'artist:test', imageCount: 1 }), + ]); }); it('should return only the required number of suggestions', () => { - const result = localAc.topK(termStem, 1); - expect(result).toEqual([expect.objectContaining({ name: 'forest', imageCount: 3 })]); + const result = localAc.matchPrefix(termStem).topK(1); + expect(result).toEqual([expect.objectContaining({ aliasName: 'forest', name: 'forest', imageCount: 3 })]); }); it('should NOT return suggestions associated with hidden tags', () => { window.booru.hiddenTagList = [1]; - const result = localAc.topK(termStem, defaultK); + const result = localAc.matchPrefix(termStem).topK(defaultK); expect(result).toEqual([]); }); it('should return empty array for empty prefix', () => { - const result = localAc.topK('', defaultK); + const result = localAc.matchPrefix('').topK(defaultK); expect(result).toEqual([]); }); }); diff --git a/assets/js/utils/__tests__/unique-heap.spec.ts b/assets/js/utils/__tests__/unique-heap.spec.ts new file mode 100644 index 000000000..e7127ef6c --- /dev/null +++ b/assets/js/utils/__tests__/unique-heap.spec.ts @@ -0,0 +1,70 @@ +import { UniqueHeap } from '../unique-heap'; + +describe('Unique Heap', () => { + interface Result { + name: string; + } + + function compare(a: Result, b: Result): boolean { + return a.name < b.name; + } + + test('it should return no results when empty', () => { + const heap = new UniqueHeap(compare, 'name'); + expect(heap.topK(5)).toEqual([]); + }); + + test("doesn't insert duplicate results", () => { + const heap = new UniqueHeap(compare, 'name'); + + heap.append({ name: 'name' }); + heap.append({ name: 'name' }); + + expect(heap.topK(2)).toEqual([expect.objectContaining({ name: 'name' })]); + }); + + test('it should return results in reverse sorted order', () => { + const heap = new UniqueHeap(compare, 'name'); + + const names = [ + 'alpha', + 'beta', + 'gamma', + 'delta', + 'epsilon', + 'zeta', + 'eta', + 'theta', + 'iota', + 'kappa', + 'lambda', + 'mu', + 'nu', + 'xi', + 'omicron', + 'pi', + 'rho', + 'sigma', + 'tau', + 'upsilon', + 'phi', + 'chi', + 'psi', + 'omega', + ]; + + for (const name of names) { + heap.append({ name }); + } + + const results = heap.topK(5); + + expect(results).toEqual([ + expect.objectContaining({ name: 'zeta' }), + expect.objectContaining({ name: 'xi' }), + expect.objectContaining({ name: 'upsilon' }), + expect.objectContaining({ name: 'theta' }), + expect.objectContaining({ name: 'tau' }), + ]); + }); +}); diff --git a/assets/js/utils/local-autocompleter.ts b/assets/js/utils/local-autocompleter.ts index ec3ba1626..8b7521364 100644 --- a/assets/js/utils/local-autocompleter.ts +++ b/assets/js/utils/local-autocompleter.ts @@ -1,12 +1,21 @@ // Client-side tag completion. +import { UniqueHeap } from './unique-heap'; import store from './store'; -interface Result { +export interface Result { + aliasName: string; name: string; imageCount: number; associations: number[]; } +/** + * Returns whether Result a is considered less than Result b. + */ +function compareResult(a: Result, b: Result): boolean { + return a.imageCount === b.imageCount ? a.name > b.name : a.imageCount < b.imageCount; +} + /** * Compare two strings, C-style. */ @@ -18,10 +27,13 @@ function strcmp(a: string, b: string): number { * Returns the name of a tag without any namespace component. */ function nameInNamespace(s: string): string { - const v = s.split(':', 2); + const first = s.indexOf(':'); + + if (first !== -1) { + return s.slice(first + 1); + } - if (v.length === 2) return v[1]; - return v[0]; + return s; } /** @@ -59,7 +71,7 @@ export class LocalAutocompleter { /** * Get a tag's name and its associations given a byte location inside the file. */ - getTagFromLocation(location: number): [string, number[]] { + private getTagFromLocation(location: number, imageCount: number, aliasName?: string): Result { const nameLength = this.view.getUint8(location); const assnLength = this.view.getUint8(location + 1 + nameLength); @@ -70,29 +82,29 @@ export class LocalAutocompleter { associations.push(this.view.getUint32(location + 1 + nameLength + 1 + i * 4, true)); } - return [name, associations]; + return { aliasName: aliasName || name, name, imageCount, associations }; } /** * Get a Result object as the ith tag inside the file. */ - getResultAt(i: number): [string, Result] { - const nameLocation = this.view.getUint32(this.referenceStart + i * 8, true); + private getResultAt(i: number, aliasName?: string): Result { + const tagLocation = this.view.getUint32(this.referenceStart + i * 8, true); const imageCount = this.view.getInt32(this.referenceStart + i * 8 + 4, true); - const [name, associations] = this.getTagFromLocation(nameLocation); + const result = this.getTagFromLocation(tagLocation, imageCount, aliasName); if (imageCount < 0) { // This is actually an alias, so follow it - return [name, this.getResultAt(-imageCount - 1)[1]]; + return this.getResultAt(-imageCount - 1, aliasName || result.name); } - return [name, { name, imageCount, associations }]; + return result; } /** * Get a Result object as the ith tag inside the file, secondary ordering. */ - getSecondaryResultAt(i: number): [string, Result] { + private getSecondaryResultAt(i: number): Result { const referenceIndex = this.view.getUint32(this.secondaryStart + i * 4, true); return this.getResultAt(referenceIndex); } @@ -100,23 +112,22 @@ export class LocalAutocompleter { /** * Perform a binary search to fetch all results matching a condition. */ - scanResults( - getResult: (i: number) => [string, Result], + private scanResults( + getResult: (i: number) => Result, compare: (name: string) => number, - results: Record, + results: UniqueHeap, + hiddenTags: Set, ) { - const unfilter = store.get('unfilter_tag_suggestions'); + const filter = !store.get('unfilter_tag_suggestions'); let min = 0; let max = this.numTags; - const hiddenTags = window.booru.hiddenTagList; - while (min < max - 1) { - const med = (min + (max - min) / 2) | 0; - const sortKey = getResult(med)[0]; + const med = min + (((max - min) / 2) | 0); + const result = getResult(med); - if (compare(sortKey) >= 0) { + if (compare(result.aliasName) >= 0) { // too large, go left max = med; } else { @@ -126,40 +137,47 @@ export class LocalAutocompleter { } // Scan forward until no more matches occur - while (min < this.numTags - 1) { - const [sortKey, result] = getResult(++min); - if (compare(sortKey) !== 0) { + outer: while (min < this.numTags - 1) { + const result = getResult(++min); + + if (compare(result.aliasName) !== 0) { break; } - // Add if not filtering or no associations are filtered - if (unfilter || hiddenTags.findIndex(ht => result.associations.includes(ht)) === -1) { - results[result.name] = result; + // Check if any associations are filtered + if (filter) { + for (const association of result.associations) { + if (hiddenTags.has(association)) { + continue outer; + } + } } + + // Nothing was filtered, so add + results.append(result); } } /** * Find the top k results by image count which match the given string prefix. */ - topK(prefix: string, k: number): Result[] { - const results: Record = {}; + matchPrefix(prefix: string): UniqueHeap { + const results = new UniqueHeap(compareResult, 'name'); if (prefix === '') { - return []; + return results; } + const hiddenTags = new Set(window.booru.hiddenTagList); + // Find normally, in full name-sorted order const prefixMatch = (name: string) => strcmp(name.slice(0, prefix.length), prefix); - this.scanResults(this.getResultAt.bind(this), prefixMatch, results); + this.scanResults(this.getResultAt.bind(this), prefixMatch, results, hiddenTags); // Find in secondary order const namespaceMatch = (name: string) => strcmp(nameInNamespace(name).slice(0, prefix.length), prefix); - this.scanResults(this.getSecondaryResultAt.bind(this), namespaceMatch, results); - - // Sort results by image count - const sorted = Object.values(results).sort((a, b) => b.imageCount - a.imageCount); + this.scanResults(this.getSecondaryResultAt.bind(this), namespaceMatch, results, hiddenTags); - return sorted.slice(0, k); + return results; } } diff --git a/assets/js/utils/unique-heap.ts b/assets/js/utils/unique-heap.ts new file mode 100644 index 000000000..3b4e840c0 --- /dev/null +++ b/assets/js/utils/unique-heap.ts @@ -0,0 +1,96 @@ +export type Compare = (a: T, b: T) => boolean; + +export class UniqueHeap { + private keys: Set; + private values: T[]; + private keyName: keyof T; + private compare: Compare; + + constructor(compare: Compare, keyName: keyof T) { + this.keys = new Set(); + this.values = []; + this.keyName = keyName; + this.compare = compare; + } + + append(value: T) { + const key = value[this.keyName]; + + if (!this.keys.has(key)) { + this.keys.add(key); + this.values.push(value); + } + } + + topK(k: number): T[] { + // Create the output array. + const output: T[] = []; + + for (const result of this.results()) { + if (output.length >= k) { + break; + } + + output.push(result); + } + + return output; + } + + *results(): Generator { + const { values } = this; + const length = values.length; + + // Build the heap. + for (let i = (length >> 1) - 1; i >= 0; i--) { + this.heapify(length, i); + } + + // Begin extracting values. + for (let i = 0; i < length; i++) { + // Top value is the largest. + yield values[0]; + + // Swap with the element at the end. + const lastIndex = length - i - 1; + values[0] = values[lastIndex]; + + // Restore top value being the largest. + this.heapify(lastIndex, 0); + } + } + + private heapify(length: number, initialIndex: number) { + const { compare, values } = this; + let i = initialIndex; + + while (true) { + const left = 2 * i + 1; + const right = 2 * i + 2; + let largest = i; + + if (left < length && compare(values[largest], values[left])) { + // Left child is in-bounds and larger than parent. Swap with left. + largest = left; + } + + if (right < length && compare(values[largest], values[right])) { + // Right child is in-bounds and larger than parent or left. Swap with right. + largest = right; + } + + if (largest === i) { + // Largest value was already the parent. Done. + return; + } + + // Swap. + const temp = values[i]; + values[i] = values[largest]; + values[largest] = temp; + + // Repair the subtree previously containing the largest element. + i = largest; + } + } +}