diff --git a/packages/xpath/src/functions/xforms/node-set.ts b/packages/xpath/src/functions/xforms/node-set.ts index c51a16366..a0bd3e1c8 100644 --- a/packages/xpath/src/functions/xforms/node-set.ts +++ b/packages/xpath/src/functions/xforms/node-set.ts @@ -1,3 +1,5 @@ +import sha256 from 'crypto-js/sha256'; + import type { XPathNode } from '../../adapter/interface/XPathNode.ts'; import type { XPathDOMProvider } from '../../adapter/xpathDOMProvider.ts'; import { LocationPathEvaluation } from '../../evaluations/LocationPathEvaluation.ts'; @@ -384,8 +386,38 @@ export const randomize = new NodeSetFunction( const nodeResults = Array.from(results.values()); const nodes = nodeResults.map(({ value }) => value); - const seed = seedExpression?.evaluate(context).toNumber(); - - return seededRandomize(nodes, seed); + if (seedExpression === undefined) + return seededRandomize(nodes); + const seed = seedExpression.evaluate(context); + const asNumber = seed.toNumber(); // TODO: There are some peculiarities to address: https://github.com/getodk/web-forms/issues/240 + let finalSeed: number | bigint | undefined; + if (Number.isNaN(asNumber)) { + // Specific behaviors for when a seed value is not interpretable as numeric. + // We still want to derive a seed in those cases, see https://github.com/getodk/javarosa/issues/800 + const seedString = seed.toString(); + if (seedString === '') { + finalSeed = 0; // special case: JR behaviour + } else { + // any other string, we'll convert to a number via a digest function + finalSeed = toBigIntHash(seedString); + } + } else { + finalSeed = asNumber; + } + return seededRandomize(nodes, finalSeed); } ); + +function toBigIntHash(text: string): bigint { + // hash text with sha256, and interpret the first 64 bits of output + // (the first and second int32s ("words") of CryptoJS digest output) + // as a BigInt. Thus the entropy of the hash is reduced to 64 bits, which + // for some applications is sufficient. + // The underlying representations are big-endian regardless of the endianness + // of the machine this runs on, as is the equivalent JavaRosa implementation + // at https://github.com/getodk/javarosa/blob/ab0e8f4da6ad8180ac7ede5bc939f3f261c16edf/src/main/java/org/javarosa/xpath/expr/XPathFuncExpr.java#L718-L726 + const buffer = new ArrayBuffer(8); + const dataview = new DataView(buffer); + sha256(text).words.slice(0, 2).forEach((val, ix) => dataview.setInt32(ix * 4, val)); + return dataview.getBigInt64(0); +} diff --git a/packages/xpath/src/lib/collections/sort.ts b/packages/xpath/src/lib/collections/sort.ts index 6e324b3c3..a6f6971f8 100644 --- a/packages/xpath/src/lib/collections/sort.ts +++ b/packages/xpath/src/lib/collections/sort.ts @@ -20,7 +20,7 @@ class SeededPseudoRandomNumberGenerator implements PseudoRandomNumberGenerator { constructor(seed: Int | bigint) { let initialSeed: number; - if (typeof(seed) === ("bigint")) { + if (typeof seed === 'bigint') { // the result of the modulo operation is always smaller than Number.MAX_SAFE_INTEGER, // thus it's safe to convert to a Number. initialSeed = Number(BigInt(seed) % BigInt(SEED_MODULO_OPERAND)); @@ -45,7 +45,7 @@ class SeededPseudoRandomNumberGenerator implements PseudoRandomNumberGenerator { } } -export const seededRandomize = (values: readonly T[], seed?: number): T[] => { +export const seededRandomize = (values: readonly T[], seed?: number | bigint): T[] => { let generator: PseudoRandomNumberGenerator; if (seed == null) { @@ -64,12 +64,11 @@ export const seededRandomize = (values: readonly T[], seed?: number): T[] => // In Java, a NaN double's .longValue is 0 if (Number.isNaN(seed)) finalSeed = 0; // In Java, an Infinity double's .longValue() is 2**63 -1, which is larger than Number.MAX_SAFE_INTEGER, thus we'll need a BigInt. - else if (seed === Infinity) finalSeed = 2n ** 63n -1n; + else if (seed === Infinity) finalSeed = 2n ** 63n - 1n; // Analogous with the above conversion, but for -Infinity else if (seed === -Infinity) finalSeed = -(2n ** 63n); - // A Java double's .longValue drops the fractional. - else if (typeof(seed) === "number" && !Number.isInteger(seed)) finalSeed = Math.trunc(seed); - // TODO: There's still more peculiarities to address: https://github.com/getodk/web-forms/issues/240 + // A Java double's .longValue drops the fractional part. + else if (typeof seed === 'number' && !Number.isInteger(seed)) finalSeed = Math.trunc(seed); else finalSeed = seed; generator = new SeededPseudoRandomNumberGenerator(finalSeed); } diff --git a/packages/xpath/test/xforms/randomize.test.ts b/packages/xpath/test/xforms/randomize.test.ts index 0bbc4a4ed..036816194 100644 --- a/packages/xpath/test/xforms/randomize.test.ts +++ b/packages/xpath/test/xforms/randomize.test.ts @@ -18,6 +18,9 @@ describe('randomize()', () => { }); const SELECTOR = '//xhtml:div[@id="FunctionRandomize"]/xhtml:div'; + const MIRROR = 'mirror'; + const MIRROR_HASH_VALUE = 5989458117437254; // in Python: "from struct import unpack; from hashlib import sha256; unpack('>Q', sha256(b'mirror').digest()[:8])[0]" + const MIRROR_HASH_SORT_ORDER = 'ACBEDF'; describe('shuffles nodesets', () => { beforeEach(() => { @@ -44,7 +47,10 @@ describe('randomize()', () => {

3

4

- +
+

${MIRROR}

+
+ `, { namespaceResolver } ); @@ -81,6 +87,8 @@ describe('randomize()', () => { { seed: -Infinity, expected: 'CFBEAD' }, { seed: 'floor(1.1)', expected: 'BFEACD' }, { seed: '//xhtml:div[@id="testFunctionNodeset2"]/xhtml:p', expected: 'BFEACD' }, + { seed: MIRROR_HASH_VALUE, expected: MIRROR_HASH_SORT_ORDER }, + { seed: '//xhtml:div[@id="testFunctionNodeset3"]/xhtml:p', expected: MIRROR_HASH_SORT_ORDER }, ].forEach(({ seed, expected }) => { it(`with a seed: ${seed}`, () => { const expression = `randomize(${SELECTOR}, ${seed})`;