diff --git a/.jules/bolt.md b/.jules/bolt.md index 2b98dfb..eceeccb 100644 --- a/.jules/bolt.md +++ b/.jules/bolt.md @@ -13,3 +13,7 @@ Action: Apply loop unrolling for max reductions in high-frequency typed array op ## 2024-11-20 - Softmax math.exp 8x unrolling with local var cache Learning: Unrolling the `Math.exp` accumulation loop to 8x and caching the multiplication `(tokenLogits[i] - maxLogit) * invTemp` into local variables before passing to `Math.exp` yields a measurable performance improvement (~4%) over the previous 4x unrolled implementation in the V8 engine, by reducing property access and allowing better instruction-level parallelism. Action: Utilize 8x loop unrolling paired with local variable caching for tight floating-point accumulation loops over TypedArrays. + +## 2024-11-20 - BigInt64Array initialization optimization +Learning: Using `BigInt64Array.from([BigInt(val)])` or `new BigInt64Array([BigInt(val)])` is noticeably slower in V8 than manually allocating an array with `new BigInt64Array(1)` and then setting the value `arr[0] = BigInt(val)`. +Action: Prefer manual array allocation and assignment over `.from()` or array literal initialization for typed arrays in performance critical paths. diff --git a/src/parakeet.js b/src/parakeet.js index c982d91..6384940 100644 --- a/src/parakeet.js +++ b/src/parakeet.js @@ -676,7 +676,9 @@ export class ParakeetModel { // count of *valid* frames. For the JS preprocessor T === validLength; // for the ONNX preprocessor T may be validLength+1. const encoderLength = validLength ?? T; - const lenTensor = new this.ort.Tensor('int64', BigInt64Array.from([BigInt(encoderLength)]), [1]); + const lenArr = new BigInt64Array(1); + lenArr[0] = BigInt(encoderLength); + const lenTensor = new this.ort.Tensor('int64', lenArr, [1]); let enc; try { if (perfEnabled) { diff --git a/src/preprocessor.js b/src/preprocessor.js index 0fda30d..89d3890 100644 --- a/src/preprocessor.js +++ b/src/preprocessor.js @@ -97,7 +97,8 @@ export class OnnxPreprocessor { const waveforms = new this.ort.Tensor('float32', buffer, [1, buffer.length]); - const lenArr = new BigInt64Array([BigInt(buffer.length)]); + const lenArr = new BigInt64Array(1); + lenArr[0] = BigInt(buffer.length); const waveforms_lens = new this.ort.Tensor('int64', lenArr, [1]); const feeds = { waveforms, waveforms_lens };