Skip to content

Commit ae5ea98

Browse files
committed
poyfill: switch to murmur3 style hash
1 parent e7866cf commit ae5ea98

File tree

4 files changed

+143
-49
lines changed

4 files changed

+143
-49
lines changed

polyfill/internal/hash.ts

Lines changed: 41 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
11
import type { Composite } from "../composite.ts";
22
import { isNaN, NaN, apply, ownKeys, keyFor, weakMapGet, weakMapSet, sort, localeCompare } from "./originals.ts";
33
import { assert } from "./utils.ts";
4+
import { randomHash, MurmurHashStream, type Hasher } from "./murmur.ts";
45

5-
const seed = randomHash();
66
const TRUE = randomHash();
77
const FALSE = randomHash();
88
const NULL = randomHash();
99
const UNDEFINED = randomHash();
1010
const SYMBOLS = randomHash();
1111
const KEY = randomHash();
12+
const OBJECTS = randomHash();
1213

1314
const hashCache = new WeakMap<symbol | object, number | typeof lazyCompositeHash>();
1415
const symbolsInWeakMap = (() => {
@@ -35,93 +36,90 @@ export function maybeHashComposite(input: Composite): number | undefined {
3536
return undefined;
3637
}
3738

38-
// TODO - use a better hashing function
39-
/** A very basic, demonstrative, non-cryptographic, hashing function for Composites */
4039
export function hashComposite(input: Composite): number {
41-
let hash = maybeHashComposite(input);
42-
if (hash !== undefined) {
43-
return hash;
40+
const cachedHash = maybeHashComposite(input);
41+
if (cachedHash !== undefined) {
42+
return cachedHash;
4443
}
45-
hash = 0;
44+
const hasher = new MurmurHashStream();
4645
const keys = apply(ownKeys, null, [input]);
4746
apply(sort, keys, [keySort]);
4847
for (let i = 0; i < keys.length; i++) {
4948
const key = keys[i];
5049
if (typeof key === "string") {
51-
hash ^= stringHash(key) ^ KEY;
52-
hash ^= hashValue(input[key as keyof typeof input]);
50+
hasher.update(KEY);
51+
hasher.update(key);
52+
updateHasher(hasher, input[key as keyof typeof input]);
5353
continue;
5454
}
5555
assert(typeof key === "symbol");
5656
if (!symbolsInWeakMap && keyFor(key) === undefined) {
57-
// Remaining keys can't be hashed
57+
// Remaining keys can't be hashed in this JS engine
5858
break;
5959
}
60-
hash ^= symbolHash(key) ^ KEY;
61-
hash ^= hashValue(input[key as keyof typeof input]);
60+
hasher.update(KEY);
61+
symbolUpdateHasher(hasher, key);
62+
updateHasher(hasher, input[key as keyof typeof input]);
6263
}
6364
assert(apply(weakMapGet, hashCache, [input]) === lazyCompositeHash);
65+
const hash = hasher.digest();
6466
apply(weakMapSet, hashCache, [input, hash]);
6567
return hash;
6668
}
6769

68-
function hashValue(input: unknown): number {
70+
function updateHasher(hasher: Hasher, input: unknown): void {
6971
if (input === null) {
70-
return NULL;
72+
hasher.update(NULL);
73+
return;
7174
}
7275
switch (typeof input) {
7376
case "undefined":
74-
return UNDEFINED;
77+
hasher.update(UNDEFINED);
78+
return;
7579
case "boolean":
76-
return input ? TRUE : FALSE;
80+
hasher.update(input ? TRUE : FALSE);
81+
return;
7782
case "number":
78-
return numberHash(input);
83+
// Normalize NaNs and -0
84+
hasher.update(isNaN(input) ? NaN : input === 0 ? 0 : input);
85+
return;
7986
case "bigint":
80-
return numberHash(Number(input));
8187
case "string":
82-
return stringHash(input);
88+
hasher.update(input);
89+
return;
8390
case "symbol":
84-
return symbolHash(input);
91+
symbolUpdateHasher(hasher, input);
92+
return;
8593
case "object":
86-
return cachedHash(input);
8794
case "function":
88-
return cachedHash(input);
95+
hasher.update(cachedHash(input));
96+
return;
8997
default:
9098
throw new TypeError(`Unsupported input type: ${typeof input}`);
9199
}
92100
}
93101

94-
const floatArray = new Float64Array(1);
95-
const intArray = new Uint32Array(floatArray.buffer);
96-
function numberHash(input: number): number {
97-
floatArray[0] = input === 0 ? 0 : isNaN(input) ? NaN : input;
98-
const hash = intArray[0] ^ intArray[1];
99-
return hash >>> 0;
100-
}
101-
102-
function stringHash(input: string): number {
103-
let hash = seed;
104-
for (let i = 0; i < input.length; i++) {
105-
hash = (hash * 33) ^ input.charCodeAt(i);
106-
}
107-
return hash >>> 0;
108-
}
109-
110-
function symbolHash(input: symbol): number {
102+
function symbolUpdateHasher(hasher: Hasher, input: symbol): void {
111103
const regA = Symbol.keyFor(input);
112104
if (regA !== undefined) {
113-
return stringHash(regA) ^ SYMBOLS;
105+
hasher.update(SYMBOLS);
106+
hasher.update(regA);
107+
return;
114108
}
115109
if (!symbolsInWeakMap) {
116-
return SYMBOLS;
110+
hasher.update(SYMBOLS);
111+
return;
112+
} else {
113+
hasher.update(cachedHash(input));
117114
}
118-
return cachedHash(input);
119115
}
120116

117+
let nextObjectId = 1;
121118
function cachedHash(input: object | symbol): number {
122119
let hash = apply(weakMapGet, hashCache, [input]);
123120
if (hash === undefined) {
124-
hash = randomHash();
121+
hash = nextObjectId ^ OBJECTS;
122+
nextObjectId++;
125123
apply(weakMapSet, hashCache, [input, hash]);
126124
return hash;
127125
}
@@ -131,10 +129,6 @@ function cachedHash(input: object | symbol): number {
131129
return hash;
132130
}
133131

134-
function randomHash() {
135-
return (Math.random() * (2 ** 31 - 1)) >>> 0;
136-
}
137-
138132
/**
139133
* Strings before symbols.
140134
* Strings sorted lexicographically.

polyfill/internal/murmur.ts

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
import { apply, imul, charCodeAt, Number } from "./originals.ts";
2+
3+
const RANDOM_SEED = randomHash();
4+
const STRING_MARKER = randomHash();
5+
const BIG_INT_MARKER = randomHash();
6+
const NEG_BIG_INT_MARKER = randomHash();
7+
8+
export function randomHash() {
9+
return (Math.random() * (2 ** 31 - 1)) >>> 0;
10+
}
11+
12+
export interface Hasher {
13+
update(val: string | number | bigint): void;
14+
digest(): number;
15+
}
16+
17+
export class MurmurHashStream implements Hasher {
18+
private hash: number = RANDOM_SEED;
19+
private length: number = 0;
20+
private carry: number = 0;
21+
private carryBytes: number = 0;
22+
23+
private _mix(k1: number): void {
24+
k1 = imul(k1, 0xcc9e2d51);
25+
k1 = (k1 << 15) | (k1 >>> 17);
26+
k1 = imul(k1, 0x1b873593);
27+
this.hash ^= k1;
28+
this.hash = (this.hash << 13) | (this.hash >>> 19);
29+
this.hash = imul(this.hash, 5) + 0xe6546b64;
30+
}
31+
32+
private _writeByte(byte: number): void {
33+
this.carry |= (byte & 0xff) << (8 * this.carryBytes);
34+
this.carryBytes++;
35+
this.length++;
36+
37+
if (this.carryBytes === 4) {
38+
this._mix(this.carry >>> 0);
39+
this.carry = 0;
40+
this.carryBytes = 0;
41+
}
42+
}
43+
44+
update(chunk: string | number | bigint): void {
45+
switch (typeof chunk) {
46+
case "string":
47+
this.update(STRING_MARKER);
48+
for (let i = 0; i < chunk.length; i++) {
49+
const code = apply(charCodeAt, chunk, [i]);
50+
this._writeByte(code & 0xff);
51+
this._writeByte((code >>> 8) & 0xff);
52+
}
53+
return;
54+
case "number":
55+
this._writeByte(chunk & 0xff);
56+
this._writeByte((chunk >>> 8) & 0xff);
57+
this._writeByte((chunk >>> 16) & 0xff);
58+
this._writeByte((chunk >>> 24) & 0xff);
59+
return;
60+
case "bigint": {
61+
let value = chunk;
62+
if (value < 0n) {
63+
value = -value;
64+
this.update(NEG_BIG_INT_MARKER);
65+
} else {
66+
this.update(BIG_INT_MARKER);
67+
}
68+
while (value > 0n) {
69+
this._writeByte(Number(value & 0xffn));
70+
value >>= 8n;
71+
}
72+
if (chunk === 0n) this._writeByte(0);
73+
return;
74+
}
75+
default:
76+
throw new TypeError(`Unsupported input type: ${typeof chunk}`);
77+
}
78+
}
79+
80+
digest(): number {
81+
if (this.carryBytes > 0) {
82+
let k1 = this.carry >>> 0;
83+
k1 = imul(k1, 0xcc9e2d51);
84+
k1 = (k1 << 15) | (k1 >>> 17);
85+
k1 = imul(k1, 0x1b873593);
86+
this.hash ^= k1;
87+
}
88+
89+
this.hash ^= this.length;
90+
this.hash ^= this.hash >>> 16;
91+
this.hash = imul(this.hash, 0x85ebca6b);
92+
this.hash ^= this.hash >>> 13;
93+
this.hash = imul(this.hash, 0xc2b2ae35);
94+
this.hash ^= this.hash >>> 16;
95+
96+
return this.hash >>> 0;
97+
}
98+
}

polyfill/internal/originals.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
1+
export const Number = globalThis.Number;
12
export const { isNaN, NaN, POSITIVE_INFINITY, NEGATIVE_INFINITY } = Number;
2-
export const { abs, floor, min } = Math;
3+
export const { abs, floor, min, imul } = Math;
34
export const { apply, ownKeys, defineProperty, preventExtensions, getOwnPropertyDescriptor } = Reflect;
45
export const { is, freeze } = Object;
56
export const { sort, splice, includes, indexOf, lastIndexOf } = Array.prototype;
67
export const { keyFor, iterator } = Symbol;
7-
export const { localeCompare } = String.prototype;
8+
export const { localeCompare, charCodeAt } = String.prototype;
89
export const Map = globalThis.Map;
910
export const { has: mapHas, set: mapSet, get: mapGet, delete: mapDelete, clear: mapClear } = Map.prototype;
1011
export const mapSize = getOwnPropertyDescriptor(Map.prototype, "size")!.get!;

tsconfig.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
"module": "Preserve",
66
"moduleDetection": "force",
77
"allowImportingTsExtensions": true,
8+
"verbatimModuleSyntax": true,
89
"noEmit": true
910
},
1011
"include": ["polyfill/**/*.ts"]

0 commit comments

Comments
 (0)