Skip to content

Commit

Permalink
derive a numeric seed from non-numeric-looking seed strings via digest
Browse files Browse the repository at this point in the history
  • Loading branch information
brontolosone committed Dec 16, 2024
1 parent 2bda8af commit 9bb403c
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 10 deletions.
39 changes: 36 additions & 3 deletions packages/xpath/src/functions/xforms/node-set.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import sha256 from 'crypto-js/sha256';

import type { XPathNode } from '../../adapter/interface/XPathNode.ts';
import type { XPathDOMProvider } from '../../adapter/xpathDOMProvider.ts';
import { LocationPathEvaluation } from '../../evaluations/LocationPathEvaluation.ts';
Expand Down Expand Up @@ -384,8 +386,39 @@ export const randomize = new NodeSetFunction(

const nodeResults = Array.from(results.values());
const nodes = nodeResults.map(({ value }) => value);
const seed = seedExpression?.evaluate(context).toNumber();

return seededRandomize(nodes, seed);
if (seedExpression === undefined) return seededRandomize(nodes);
const seed = seedExpression.evaluate(context);
const asNumber = seed.toNumber(); // TODO: There are some peculiarities to address: https://github.com/getodk/web-forms/issues/240
let finalSeed: number | bigint | undefined;
if (Number.isNaN(asNumber)) {
// Specific behaviors for when a seed value is not interpretable as numeric.
// We still want to derive a seed in those cases, see https://github.com/getodk/javarosa/issues/800
const seedString = seed.toString();
if (seedString === '') {
finalSeed = 0; // special case: JR behaviour
} else {
// any other string, we'll convert to a number via a digest function
finalSeed = toBigIntHash(seedString);
}
} else {
finalSeed = asNumber;
}
return seededRandomize(nodes, finalSeed);
}
);

function toBigIntHash(text: string): bigint {
// hash text with sha256, and interpret the first 64 bits of output
// (the first and second int32s ("words") of CryptoJS digest output)
// as a BigInt. Thus the entropy of the hash is reduced to 64 bits, which
// for some applications is sufficient.
// The underlying representations are big-endian regardless of the endianness
// of the machine this runs on, as is the equivalent JavaRosa implementation
// at https://github.com/getodk/javarosa/blob/ab0e8f4da6ad8180ac7ede5bc939f3f261c16edf/src/main/java/org/javarosa/xpath/expr/XPathFuncExpr.java#L718-L726
const buffer = new ArrayBuffer(8);
const dataview = new DataView(buffer);
sha256(text)
.words.slice(0, 2)
.forEach((val, ix) => dataview.setInt32(ix * 4, val));
return dataview.getBigInt64(0);
}
11 changes: 5 additions & 6 deletions packages/xpath/src/lib/collections/sort.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class SeededPseudoRandomNumberGenerator implements PseudoRandomNumberGenerator {

constructor(seed: Int | bigint) {
let initialSeed: number;
if (typeof(seed) === ("bigint")) {
if (typeof seed === 'bigint') {
// the result of the modulo operation is always smaller than Number.MAX_SAFE_INTEGER,
// thus it's safe to convert to a Number.
initialSeed = Number(BigInt(seed) % BigInt(SEED_MODULO_OPERAND));
Expand All @@ -45,7 +45,7 @@ class SeededPseudoRandomNumberGenerator implements PseudoRandomNumberGenerator {
}
}

export const seededRandomize = <T>(values: readonly T[], seed?: number): T[] => {
export const seededRandomize = <T>(values: readonly T[], seed?: number | bigint): T[] => {
let generator: PseudoRandomNumberGenerator;

if (seed == null) {
Expand All @@ -64,12 +64,11 @@ export const seededRandomize = <T>(values: readonly T[], seed?: number): T[] =>
// In Java, a NaN double's .longValue is 0
if (Number.isNaN(seed)) finalSeed = 0;
// In Java, an Infinity double's .longValue() is 2**63 -1, which is larger than Number.MAX_SAFE_INTEGER, thus we'll need a BigInt.
else if (seed === Infinity) finalSeed = 2n ** 63n -1n;
else if (seed === Infinity) finalSeed = 2n ** 63n - 1n;
// Analogous with the above conversion, but for -Infinity
else if (seed === -Infinity) finalSeed = -(2n ** 63n);
// A Java double's .longValue drops the fractional.
else if (typeof(seed) === "number" && !Number.isInteger(seed)) finalSeed = Math.trunc(seed);
// TODO: There's still more peculiarities to address: https://github.com/getodk/web-forms/issues/240
// A Java double's .longValue drops the fractional part.
else if (typeof seed === 'number' && !Number.isInteger(seed)) finalSeed = Math.trunc(seed);
else finalSeed = seed;
generator = new SeededPseudoRandomNumberGenerator(finalSeed);
}
Expand Down
10 changes: 9 additions & 1 deletion packages/xpath/test/xforms/randomize.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ describe('randomize()', () => {
});

const SELECTOR = '//xhtml:div[@id="FunctionRandomize"]/xhtml:div';
const MIRROR = 'mirror';
const MIRROR_HASH_VALUE = 5989458117437254; // in Python: "from struct import unpack; from hashlib import sha256; unpack('>Q', sha256(b'mirror').digest()[:8])[0]"
const MIRROR_HASH_SORT_ORDER = 'ACBEDF';

describe('shuffles nodesets', () => {
beforeEach(() => {
Expand All @@ -44,7 +47,10 @@ describe('randomize()', () => {
<p>3</p>
<p>4</p>
</div>
</body>
<div id="testFunctionNodeset3">
<p>${MIRROR}</p>
</div>
</body>
</html>`,
{ namespaceResolver }
);
Expand Down Expand Up @@ -81,6 +87,8 @@ describe('randomize()', () => {
{ seed: -Infinity, expected: 'CFBEAD' },
{ seed: 'floor(1.1)', expected: 'BFEACD' },
{ seed: '//xhtml:div[@id="testFunctionNodeset2"]/xhtml:p', expected: 'BFEACD' },
{ seed: MIRROR_HASH_VALUE, expected: MIRROR_HASH_SORT_ORDER },
{ seed: '//xhtml:div[@id="testFunctionNodeset3"]/xhtml:p', expected: MIRROR_HASH_SORT_ORDER },
].forEach(({ seed, expected }) => {
it(`with a seed: ${seed}`, () => {
const expression = `randomize(${SELECTOR}, ${seed})`;
Expand Down

0 comments on commit 9bb403c

Please sign in to comment.