From 20b66c255ff617c438775e54081eaa02d5b983e1 Mon Sep 17 00:00:00 2001 From: "[Paul Taylor" Date: Sun, 16 Jan 2022 12:23:25 -0500 Subject: [PATCH] ARROW-12549: [JS] Table and RecordBatch should not extend Vector, make JS lib smaller MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This pull request addresses a number of issues that requires a more substantial refactor. The main goals are: 1. Eliminate cruft by dropping support for outdated browsers/environments. 2. Reduce total surface area by eliminating unnecessary `Vector`, `Chunked`, and `Column` classes. 3. Reduce the amount of the library pulled in when Table, RecordBatch, or Vector classes are imported. In this pull request, we have eliminated type specific Vector classes. There is now only one vector that has a data instance and we use type-specific visitors. Record batches don't inherit from vectors anymore. Neither do Tables. Columns are gone. To create vectors and tables, we now have separate methods that can be easily tree shaken. We also added tests for the bundles, fixed some issues with bundling in webpack, updated dependencies (including typescript and flatbuffers). We also added memoization to dictionary vectors to reduce the overhead of decoding UTF-8 to strings. A quick overview of Arrow with the new API: https://observablehq.com/d/9480eccb30a21010. Also addresses: * [ARROW-10255](https://issues.apache.org/jira/browse/ARROW-10255) * [ARROW-11347](https://issues.apache.org/jira/browse/ARROW-11347) * [ARROW-12548](https://issues.apache.org/jira/browse/ARROW-12548) * [ARROW-13514](https://issues.apache.org/jira/browse/ARROW-13514) * [ARROW-10220](https://issues.apache.org/jira/browse/ARROW-10220) * [ARROW-14933](https://issues.apache.org/jira/browse/ARROW-14933) * [ARROW-12538](https://issues.apache.org/jira/browse/ARROW-12538) * [ARROW-12536](https://issues.apache.org/jira/browse/ARROW-12536) ## Performance comparison: ### Master: ``` Prepare Data: 502.401ms Running "Parse" suite... dataset: tracks, function: Table.from 15,578 ops/s ±0.67%, 0.064 ms, 94 samples dataset: tracks, function: readBatches 15,853 ops/s ±0.59%, 0.063 ms, 97 samples dataset: tracks, function: serialize 969 ops/s ±1.8%, 1 ms, 93 samples Running "Get values by index" suite... dataset: tracks, column: lat, length: 1,000,000, type: Float32 78 ops/s ±0.090%, 13 ms, 82 samples dataset: tracks, column: lng, length: 1,000,000, type: Float32 79 ops/s ±0.090%, 13 ms, 70 samples dataset: tracks, column: origin, length: 1,000,000, type: Dictionary 1.59 ops/s ±25%, 563 ms, 9 samples dataset: tracks, column: destination, length: 1,000,000, type: Dictionary 1.74 ops/s ±3.2%, 576 ms, 9 samples Running "Iterate vectors" suite... dataset: tracks, column: lat, length: 1,000,000, type: Float32 85 ops/s ±0.14%, 12 ms, 74 samples dataset: tracks, column: lng, length: 1,000,000, type: Float32 85 ops/s ±0.11%, 12 ms, 75 samples dataset: tracks, column: origin, length: 1,000,000, type: Dictionary 1.51 ops/s ±3.1%, 657 ms, 8 samples dataset: tracks, column: destination, length: 1,000,000, type: Dictionary 1.49 ops/s ±4.0%, 666 ms, 8 samples Running "Slice toArray vectors" suite... dataset: tracks, column: lat, length: 1,000,000, type: Float32 2,588 ops/s ±3.0%, 0.4 ms, 74 samples dataset: tracks, column: lng, length: 1,000,000, type: Float32 2,345 ops/s ±1.7%, 0.43 ms, 73 samples dataset: tracks, column: origin, length: 1,000,000, type: Dictionary 1.29 ops/s ±5.3%, 760 ms, 8 samples dataset: tracks, column: destination, length: 1,000,000, type: Dictionary 1.28 ops/s ±4.1%, 784 ms, 8 samples Running "Slice vectors" suite... dataset: tracks, column: lat, length: 1,000,000, type: Float32 4,212,193 ops/s ±0.23%, 0 ms, 100 samples dataset: tracks, column: lng, length: 1,000,000, type: Float32 4,400,234 ops/s ±0.80%, 0 ms, 92 samples dataset: tracks, column: origin, length: 1,000,000, type: Dictionary 4,764,651 ops/s ±0.13%, 0 ms, 101 samples dataset: tracks, column: destination, length: 1,000,000, type: Dictionary 4,763,581 ops/s ±0.050%, 0 ms, 98 samples Running "DataFrame Iterate" suite... dataset: tracks, length: 1,000,000 23.1 ops/s ±2.1%, 43 ms, 43 samples Running "DataFrame Count By" suite... dataset: tracks, column: origin, length: 1,000,000, type: Dictionary 535 ops/s ±0.050%, 1.9 ms, 99 samples dataset: tracks, column: destination, length: 1,000,000, type: Dictionary 535 ops/s ±0.040%, 1.9 ms, 96 samples Running "DataFrame Filter-Scan Count" suite... dataset: tracks, column: lat, length: 1,000,000, type: Float32, test: gt, value: 0 57 ops/s ±0.090%, 18 ms, 75 samples dataset: tracks, column: lng, length: 1,000,000, type: Float32, test: gt, value: 0 57 ops/s ±0.050%, 18 ms, 74 samples dataset: tracks, column: origin, length: 1,000,000, type: Dictionary, test: eq, value: Seattle 99 ops/s ±0.060%, 10 ms, 86 samples Running "DataFrame Filter-Iterate" suite... dataset: tracks, column: lat, length: 1,000,000, type: Float32, test: gt, value: 0 37 ops/s ±0.12%, 27 ms, 66 samples dataset: tracks, column: lng, length: 1,000,000, type: Float32, test: gt, value: 0 37 ops/s ±0.14%, 27 ms, 66 samples dataset: tracks, column: origin, length: 1,000,000, type: Dictionary, test: eq, value: Seattle 70 ops/s ±0.45%, 14 ms, 73 samples Running "DataFrame Direct Count" suite... dataset: tracks, column: lat, length: 1,000,000, type: Float32, test: gt, value: 0 160 ops/s ±0.040%, 6.3 ms, 83 samples dataset: tracks, column: lng, length: 1,000,000, type: Float32, test: gt, value: 0 162 ops/s ±0.12%, 6.1 ms, 85 samples dataset: tracks, column: origin, length: 1,000,000, type: Dictionary, test: eq, value: Seattle 1.51 ops/s ±5.6%, 664 ms, 8 samples ``` ### This branch: ``` Running "vectorFromArray" suite... from: numbers 106 ops/s ±1.1%, 9.3 ms, 79 samples from: booleans 101 ops/s ±1.4%, 9.8 ms, 76 samples from: dictionary 105 ops/s ±4.1%, 9 ms, 78 samples Running "Iterate Vector" suite... from: uint8Array 896 ops/s ±0.21%, 1.1 ms, 94 samples from: uint16Array 896 ops/s ±0.82%, 1.1 ms, 94 samples from: uint32Array 884 ops/s ±0.39%, 1.1 ms, 95 samples from: uint64Array 285 ops/s ±0.19%, 3.5 ms, 92 samples from: int8Array 882 ops/s ±0.65%, 1.1 ms, 95 samples from: int16Array 899 ops/s ±0.37%, 1.1 ms, 95 samples from: int32Array 887 ops/s ±0.46%, 1.1 ms, 92 samples from: int64Array 280 ops/s ±0.60%, 3.5 ms, 91 samples from: float32Array 805 ops/s ±0.86%, 1.2 ms, 90 samples from: float64Array 814 ops/s ±0.44%, 1.2 ms, 92 samples from: numbers 812 ops/s ±0.39%, 1.2 ms, 91 samples from: booleans 284 ops/s ±0.14%, 3.5 ms, 92 samples from: dictionary 298 ops/s ±0.44%, 3.3 ms, 91 samples from: string 16.2 ops/s ±3.9%, 59 ms, 45 samples Running "Spread Vector" suite... from: uint8Array 360 ops/s ±1.2%, 2.7 ms, 93 samples from: uint16Array 374 ops/s ±0.55%, 2.6 ms, 92 samples from: uint32Array 372 ops/s ±1.1%, 2.6 ms, 91 samples from: uint64Array 164 ops/s ±0.66%, 6 ms, 78 samples from: int8Array 372 ops/s ±0.64%, 2.7 ms, 96 samples from: int16Array 380 ops/s ±0.42%, 2.6 ms, 94 samples from: int32Array 375 ops/s ±0.87%, 2.6 ms, 92 samples from: int64Array 164 ops/s ±0.64%, 6.1 ms, 86 samples from: float32Array 327 ops/s ±0.62%, 3 ms, 85 samples from: float64Array 318 ops/s ±1.1%, 3.1 ms, 91 samples from: numbers 326 ops/s ±0.74%, 3 ms, 89 samples from: booleans 178 ops/s ±0.92%, 5.6 ms, 84 samples from: dictionary 189 ops/s ±0.51%, 5.2 ms, 89 samples from: string 14.8 ops/s ±3.7%, 65 ms, 41 samples Running "toArray Vector" suite... from: uint8Array 28,488,216 ops/s ±0.22%, 0 ms, 101 samples from: uint16Array 28,777,482 ops/s ±0.41%, 0 ms, 98 samples from: uint32Array 28,387,333 ops/s ±0.25%, 0 ms, 97 samples from: uint64Array 23,412,763 ops/s ±0.68%, 0 ms, 97 samples from: int8Array 21,497,600 ops/s ±0.22%, 0 ms, 94 samples from: int16Array 21,990,137 ops/s ±0.16%, 0 ms, 101 samples from: int32Array 21,809,196 ops/s ±0.68%, 0 ms, 96 samples from: int64Array 20,084,822 ops/s ±0.68%, 0 ms, 93 samples from: float32Array 18,452,580 ops/s ±0.83%, 0 ms, 96 samples from: float64Array 18,527,057 ops/s ±0.54%, 0 ms, 92 samples from: numbers 18,555,045 ops/s ±0.52%, 0 ms, 99 samples from: booleans 178 ops/s ±0.43%, 5.6 ms, 84 samples from: dictionary 189 ops/s ±0.61%, 5.3 ms, 89 samples from: string 15.8 ops/s ±0.76%, 63 ms, 43 samples Running "get Vector" suite... from: uint8Array 441 ops/s ±1.1%, 2.2 ms, 95 samples from: uint16Array 441 ops/s ±0.48%, 2.2 ms, 95 samples from: uint32Array 443 ops/s ±0.23%, 2.2 ms, 96 samples from: uint64Array 414 ops/s ±0.68%, 2.4 ms, 93 samples from: int8Array 439 ops/s ±0.30%, 2.3 ms, 95 samples from: int16Array 447 ops/s ±0.35%, 2.2 ms, 96 samples from: int32Array 439 ops/s ±0.48%, 2.3 ms, 94 samples from: int64Array 415 ops/s ±0.17%, 2.4 ms, 97 samples from: float32Array 472 ops/s ±0.49%, 2.1 ms, 94 samples from: float64Array 471 ops/s ±0.26%, 2.1 ms, 97 samples from: numbers 473 ops/s ±0.22%, 2.1 ms, 98 samples from: booleans 429 ops/s ±0.25%, 2.3 ms, 97 samples from: dictionary 464 ops/s ±0.23%, 2.1 ms, 96 samples from: string 17.8 ops/s ±1.3%, 56 ms, 48 samples Running "Parse" suite... dataset: tracks, function: read recordBatches 12,047 ops/s ±0.77%, 0.082 ms, 100 samples dataset: tracks, function: write recordBatches 1,028 ops/s ±0.72%, 0.96 ms, 96 samples Running "Get values by index" suite... dataset: tracks, column: lat, length: 1,000,000, type: Float32 46 ops/s ±0.12%, 22 ms, 61 samples dataset: tracks, column: lng, length: 1,000,000, type: Float32 46 ops/s ±0.15%, 22 ms, 61 samples dataset: tracks, column: origin, length: 1,000,000, type: Dictionary 25.3 ops/s ±0.37%, 39 ms, 46 samples dataset: tracks, column: destination, length: 1,000,000, type: Dictionary 25.1 ops/s ±0.76%, 39 ms, 46 samples Running "Iterate vectors" suite... dataset: tracks, column: lat, length: 1,000,000, type: Float32 84 ops/s ±0.20%, 12 ms, 73 samples dataset: tracks, column: lng, length: 1,000,000, type: Float32 82 ops/s ±0.65%, 12 ms, 72 samples dataset: tracks, column: origin, length: 1,000,000, type: Dictionary 30 ops/s ±0.94%, 33 ms, 54 samples dataset: tracks, column: destination, length: 1,000,000, type: Dictionary 30 ops/s ±0.41%, 33 ms, 54 samples Running "Slice toArray vectors" suite... dataset: tracks, column: lat, length: 1,000,000, type: Float32 2,911 ops/s ±3.3%, 0.33 ms, 86 samples dataset: tracks, column: lng, length: 1,000,000, type: Float32 2,765 ops/s ±3.2%, 0.35 ms, 77 samples dataset: tracks, column: origin, length: 1,000,000, type: Dictionary 18 ops/s ±1.2%, 55 ms, 49 samples dataset: tracks, column: destination, length: 1,000,000, type: Dictionary 18.2 ops/s ±0.73%, 54 ms, 50 samples Running "Slice vectors" suite... dataset: tracks, column: lat, length: 1,000,000, type: Float32 4,338,570 ops/s ±0.52%, 0 ms, 94 samples dataset: tracks, column: lng, length: 1,000,000, type: Float32 4,341,418 ops/s ±0.41%, 0 ms, 97 samples dataset: tracks, column: origin, length: 1,000,000, type: Dictionary 3,656,243 ops/s ±0.45%, 0 ms, 101 samples dataset: tracks, column: destination, length: 1,000,000, type: Dictionary 3,598,448 ops/s ±1.0%, 0 ms, 97 samples Running "Spread vectors" suite... dataset: tracks, column: lat, length: 1,000,000, type: Float32 16 ops/s ±4.3%, 59 ms, 44 samples dataset: tracks, column: lng, length: 1,000,000, type: Float32 16.1 ops/s ±4.2%, 60 ms, 45 samples dataset: tracks, column: origin, length: 1,000,000, type: Dictionary 17.8 ops/s ±1.5%, 55 ms, 49 samples dataset: tracks, column: destination, length: 1,000,000, type: Dictionary 17.6 ops/s ±1.7%, 55 ms, 48 samples Running "Table" suite... Iterate, dataset: tracks, numRows: 1,000,000 27 ops/s ±0.28%, 37 ms, 49 samples Spread, dataset: tracks, numRows: 1,000,000 8.73 ops/s ±3.7%, 111 ms, 25 samples toArray, dataset: tracks, numRows: 1,000,000 8.15 ops/s ±4.9%, 115 ms, 26 samples get, dataset: tracks, numRows: 1,000,000 17.2 ops/s ±0.31%, 58 ms, 47 samples Running "Table Direct Count" suite... dataset: tracks, column: lat, numRows: 1,000,000, type: Float32, test: gt, value: 0 74 ops/s ±0.16%, 14 ms, 77 samples dataset: tracks, column: lng, numRows: 1,000,000, type: Float32, test: gt, value: 0 74 ops/s ±0.20%, 14 ms, 77 samples dataset: tracks, column: origin, numRows: 1,000,000, type: Dictionary, test: eq, value: Seattle 80 ops/s ±0.060%, 12 ms, 71 samples ``` Closes #10371 from trxcllnt/fea/simplify Lead-authored-by: [Paul Taylor ] Co-authored-by: Dominik Moritz Co-authored-by: ptaylor Signed-off-by: Dominik Moritz --- .env | 2 +- .github/workflows/js.yml | 6 +- ci/docker/conda-integration.dockerfile | 2 +- ci/docker/debian-10-js.dockerfile | 2 +- ci/docker/debian-11-js.dockerfile | 2 +- ci/docker/linux-apt-docs.dockerfile | 2 +- ci/scripts/js_test.sh | 1 + dev/release/verify-release-candidate.sh | 1 + js/.eslintignore | 3 +- js/{.eslintrc.js => .eslintrc.cjs} | 27 +- js/.gitignore | 3 + js/.vscode/extensions.json | 1 + js/.vscode/launch.json | 36 +- js/.vscode/settings.json | 19 +- js/DEVELOP.md | 26 +- js/README.md | 98 +- js/bin/arrow2csv.js | 3 +- js/bin/file-to-stream.js | 2 +- js/bin/integration.js | 45 +- js/bin/json-to-arrow.js | 2 +- js/bin/package.json | 3 + js/bin/print-buffer-alignment.js | 4 +- js/bin/stream-to-file.js | 2 +- js/gulp/argv.js | 7 +- js/gulp/arrow-task.js | 46 +- js/gulp/bundle-task.js | 137 + js/gulp/clean-task.js | 18 +- js/gulp/closure-task.js | 56 +- js/gulp/compile-task.js | 15 +- .../vector/struct.ts => gulp/esm-require.cjs} | 32 +- js/gulp/memoize-task.js | 20 +- js/gulp/package-task.js | 56 +- js/gulp/test-task.js | 86 +- js/gulp/typescript-task.js | 48 +- js/gulp/util.js | 121 +- js/gulpfile.js | 47 +- js/{index.js => index.cjs} | 0 js/index.mjs | 2 +- js/jest.config.js | 6 +- js/jestconfigs/jest.apache-arrow.config.js | 29 +- js/jestconfigs/jest.coverage.config.js | 24 +- js/jestconfigs/jest.es2015.cjs.config.js | 29 +- js/jestconfigs/jest.es2015.esm.config.js | 29 +- js/jestconfigs/jest.es2015.umd.config.js | 29 +- js/jestconfigs/jest.es5.cjs.config.js | 29 +- js/jestconfigs/jest.es5.esm.config.js | 29 +- js/jestconfigs/jest.es5.umd.config.js | 29 +- js/jestconfigs/jest.esnext.cjs.config.js | 29 +- js/jestconfigs/jest.esnext.esm.config.js | 29 +- js/jestconfigs/jest.esnext.umd.config.js | 29 +- js/jestconfigs/jest.src.config.js | 20 +- js/jestconfigs/jest.ts.config.js | 28 +- js/package.json | 77 +- js/perf/config.ts | 60 +- js/perf/index.ts | 253 +- js/src/Arrow.dom.ts | 67 +- js/src/Arrow.node.ts | 18 +- js/src/Arrow.ts | 128 +- js/src/bin/arrow2csv.ts | 80 +- js/src/builder.ts | 227 +- js/src/builder/binary.ts | 14 +- js/src/builder/bool.ts | 6 +- js/src/builder/buffer.ts | 65 +- js/src/builder/date.ts | 19 +- js/src/builder/decimal.ts | 9 +- js/src/builder/dictionary.ts | 15 +- js/src/builder/fixedsizebinary.ts | 9 +- js/src/builder/fixedsizelist.ts | 18 +- js/src/builder/float.ts | 26 +- js/src/builder/index.ts | 82 - js/src/builder/int.ts | 56 +- js/src/builder/interval.ts | 19 +- js/src/builder/list.ts | 27 +- js/src/builder/map.ts | 34 +- js/src/builder/null.ts | 6 +- js/src/builder/run.ts | 34 - js/src/builder/struct.ts | 18 +- js/src/builder/time.ts | 29 +- js/src/builder/timestamp.ts | 29 +- js/src/builder/union.ts | 23 +- js/src/builder/utf8.ts | 12 +- js/src/builder/valid.ts | 6 +- js/src/column.ts | 136 - js/src/compute/dataframe.ts | 288 -- js/src/compute/predicate.ts | 292 -- js/src/data.ts | 418 +- js/src/enum.ts | 232 +- js/src/factories.ts | 243 + js/src/fb/{.eslintrc.js => .eslintrc.cjs} | 5 +- js/src/fb/File.ts | 300 -- js/src/fb/Message.ts | 709 --- js/src/fb/Schema.ts | 2658 ----------- js/src/fb/binary.ts | 39 + js/src/fb/block.ts | 49 + js/src/fb/body-compression-method.ts | 20 + js/src/fb/body-compression.ts | 72 + js/src/fb/bool.ts | 36 + js/src/fb/buffer.ts | 48 + js/src/fb/compression-type.ts | 7 + js/src/fb/date-unit.ts | 7 + js/src/fb/date.ts | 57 + js/src/fb/decimal.ts | 82 + js/src/fb/dictionary-batch.ts | 75 + js/src/fb/dictionary-encoding.ts | 90 + js/src/fb/dictionary-kind.ts | 13 + js/src/fb/duration.ts | 49 + js/src/fb/endianness.ts | 11 + js/src/fb/feature.ts | 41 + js/src/fb/field-node.ts | 53 + js/src/fb/field.ts | 161 + js/src/fb/fixed-size-binary.ts | 49 + js/src/fb/fixed-size-list.ts | 49 + js/src/fb/floating-point.ts | 49 + js/src/fb/footer.ts | 134 + js/src/fb/int.ts | 56 + js/src/fb/interval-unit.ts | 8 + js/src/fb/interval.ts | 49 + js/src/fb/key-value.ts | 65 + js/src/fb/large-binary.ts | 40 + js/src/fb/large-list.ts | 40 + js/src/fb/large-utf8.ts | 40 + js/src/fb/list.ts | 36 + js/src/fb/map.ts | 76 + js/src/fb/message-header.ts | 59 + js/src/fb/message.ts | 117 + js/src/fb/metadata-version.ts | 42 + js/src/fb/null.ts | 39 + js/src/fb/precision.ts | 8 + js/src/fb/record-batch.ts | 114 + js/src/fb/schema.ts | 152 + js/src/fb/sparse-matrix-compressed-axis.ts | 7 + js/src/fb/sparse-matrix-index-c-s-x.ts | 134 + js/src/fb/sparse-tensor-index-c-o-o.ts | 142 + js/src/fb/sparse-tensor-index-c-s-f.ts | 207 + js/src/fb/sparse-tensor-index.ts | 41 + js/src/fb/sparse-tensor.ts | 149 + js/src/fb/struct_.ts | 41 + js/src/fb/tensor-dim.ts | 69 + js/src/fb/tensor.ts | 142 + js/src/fb/time-unit.ts | 9 + js/src/fb/time.ts | 75 + js/src/fb/timestamp.ts | 180 + js/src/fb/type.ts | 118 + js/src/fb/union-mode.ts | 7 + js/src/fb/union.ts | 92 + js/src/fb/utf8.ts | 39 + js/src/interfaces.ts | 466 +- js/src/io/adapters.ts | 128 +- js/src/io/file.ts | 16 +- js/src/io/interfaces.ts | 10 +- js/src/io/node/builder.ts | 7 +- js/src/io/node/iterable.ts | 2 +- js/src/io/node/reader.ts | 12 +- js/src/io/node/writer.ts | 10 +- js/src/io/stream.ts | 18 +- js/src/io/whatwg/builder.ts | 30 +- js/src/io/whatwg/iterable.ts | 10 +- js/src/io/whatwg/reader.ts | 10 +- js/src/io/whatwg/writer.ts | 12 +- js/src/ipc/message.ts | 35 +- js/src/ipc/metadata/file.ts | 32 +- js/src/ipc/metadata/json.ts | 20 +- js/src/ipc/metadata/message.ts | 107 +- js/src/ipc/reader.ts | 210 +- js/src/ipc/serialization.ts | 51 + js/src/ipc/writer.ts | 123 +- js/src/recordbatch.ts | 406 +- js/src/row/map.ts | 152 + js/src/row/struct.ts | 157 + js/src/schema.ts | 75 +- js/src/table.ts | 578 ++- js/src/type.ts | 381 +- js/src/util/args.ts | 196 - js/src/util/bit.ts | 10 +- js/src/util/bn.ts | 60 +- js/src/util/buffer.ts | 65 +- js/src/util/chunk.ts | 160 + js/src/util/compat.ts | 36 +- js/src/util/fn.ts | 31 - js/src/util/int.ts | 70 +- js/src/util/math.ts | 16 +- js/src/util/pretty.ts | 10 +- js/src/util/recordbatch.ts | 121 +- js/src/util/vector.ts | 32 +- js/src/vector.ts | 483 +- js/src/vector/base.ts | 111 - js/src/vector/binary.ts | 27 - js/src/vector/bool.ts | 35 - js/src/vector/chunked.ts | 320 -- js/src/vector/date.ts | 51 - js/src/vector/dictionary.ts | 60 - js/src/vector/fixedsizebinary.ts | 22 - js/src/vector/fixedsizelist.ts | 22 - js/src/vector/float.ts | 144 - js/src/vector/index.ts | 207 - js/src/vector/int.ts | 195 - js/src/vector/interval.ts | 26 - js/src/vector/map.ts | 35 - js/src/vector/row.ts | 296 -- js/src/vector/time.ts | 30 - js/src/vector/timestamp.ts | 30 - js/src/vector/union.ts | 32 - js/src/vector/utf8.ts | 39 - js/src/visitor.ts | 240 +- js/src/visitor/builderctor.ts | 135 +- js/src/visitor/bytelength.ts | 136 + js/src/visitor/bytewidth.ts | 68 - js/src/visitor/get.ts | 364 +- js/src/visitor/indexof.ts | 221 +- js/src/visitor/iterator.ts | 257 +- js/src/visitor/jsontypeassembler.ts | 12 +- js/src/visitor/jsonvectorassembler.ts | 173 +- js/src/visitor/set.ts | 462 +- js/src/visitor/toarray.ts | 151 - js/src/visitor/typeassembler.ts | 45 +- js/src/visitor/typecomparator.ts | 194 +- js/src/visitor/typector.ts | 102 +- js/src/visitor/vectorassembler.ts | 169 +- js/src/visitor/vectorctor.ts | 99 - js/src/visitor/vectorloader.ts | 106 +- js/test/{.eslintrc.js => .eslintrc.cjs} | 2 + js/test/{Arrow.ts => bundle/field.js} | 4 +- .../null.ts => test/bundle/makeTable.js} | 10 +- .../decimal.ts => test/bundle/makeVector.js} | 8 +- js/test/bundle/schema.js | 20 + js/test/bundle/table.js | 20 + js/test/bundle/tableFromArrays.js | 24 + js/test/bundle/tableFromIPC.js | 25 + js/test/bundle/vector.js | 20 + js/test/bundle/vectorFromArray.js | 22 + js/test/data/tables.ts | 23 +- js/test/generate-test-data.ts | 428 +- js/test/inference/column.ts | 36 +- js/test/inference/nested.ts | 63 +- js/test/inference/visitor/get.ts | 62 +- js/test/jest-extensions.ts | 107 +- js/test/tsconfig.json | 1 + js/test/tsconfig/tsconfig.base.json | 1 + js/test/unit/bit-tests.ts | 4 +- js/test/unit/builders/builder-tests.ts | 122 +- js/test/unit/builders/date-tests.ts | 16 +- js/test/unit/builders/dictionary-tests.ts | 14 +- js/test/unit/builders/int64-tests.ts | 45 +- js/test/unit/builders/primitive-tests.ts | 26 +- js/test/unit/builders/uint64-tests.ts | 45 +- js/test/unit/builders/utf8-tests.ts | 9 +- js/test/unit/builders/utils.ts | 98 +- js/test/unit/dataframe-tests.ts | 282 -- js/test/unit/generated-data-tests.ts | 84 +- js/test/unit/generated-data-validators.ts | 274 +- js/test/unit/int-tests.ts | 216 +- js/test/unit/ipc/helpers.ts | 28 +- js/test/unit/ipc/message-reader-tests.ts | 16 +- js/test/unit/ipc/reader/file-reader-tests.ts | 19 +- .../unit/ipc/reader/from-inference-tests.ts | 45 +- js/test/unit/ipc/reader/json-reader-tests.ts | 9 +- .../unit/ipc/reader/stream-reader-tests.ts | 14 +- js/test/unit/ipc/reader/streams-dom-tests.ts | 28 +- js/test/unit/ipc/reader/streams-node-tests.ts | 27 +- .../serialization-tests.ts} | 137 +- js/test/unit/ipc/validate.ts | 6 +- js/test/unit/ipc/writer/file-writer-tests.ts | 13 +- js/test/unit/ipc/writer/json-writer-tests.ts | 20 +- .../unit/ipc/writer/stream-writer-tests.ts | 72 +- js/test/unit/ipc/writer/streams-dom-tests.ts | 42 +- js/test/unit/ipc/writer/streams-node-tests.ts | 46 +- .../unit/recordbatch/record-batch-tests.ts | 74 +- js/test/unit/table-tests.ts | 338 +- js/test/unit/table/assign-tests.ts | 22 +- js/test/unit/table/table-test.ts | 56 + .../list.ts => test/unit/type-tests.ts} | 11 +- js/test/unit/utils-tests.ts | 32 - js/test/unit/vector/bool-vector-tests.ts | 30 +- js/test/unit/vector/date-vector-tests.ts | 10 +- js/test/unit/vector/numeric-vector-tests.ts | 443 +- js/test/unit/vector/vector-tests.ts | 109 +- js/test/unit/visitor-tests.ts | 115 +- js/tsconfig.json | 4 +- js/tsconfig/tsconfig.base.json | 2 + js/tsconfig/tsconfig.es2015.cls.json | 2 +- js/tsconfig/tsconfig.es5.cls.json | 2 +- js/tsconfig/tsconfig.esnext.cls.json | 2 +- js/typedoc.js | 31 - js/typedoc.json | 14 + js/yarn.lock | 3911 ++++++++++------- 285 files changed, 13932 insertions(+), 14555 deletions(-) rename js/{.eslintrc.js => .eslintrc.cjs} (74%) create mode 100644 js/bin/package.json create mode 100644 js/gulp/bundle-task.js rename js/{src/vector/struct.ts => gulp/esm-require.cjs} (55%) rename js/{index.js => index.cjs} (100%) delete mode 100644 js/src/builder/index.ts delete mode 100644 js/src/builder/run.ts delete mode 100644 js/src/column.ts delete mode 100644 js/src/compute/dataframe.ts delete mode 100644 js/src/compute/predicate.ts create mode 100644 js/src/factories.ts rename js/src/fb/{.eslintrc.js => .eslintrc.cjs} (85%) delete mode 100644 js/src/fb/File.ts delete mode 100644 js/src/fb/Message.ts delete mode 100644 js/src/fb/Schema.ts create mode 100644 js/src/fb/binary.ts create mode 100644 js/src/fb/block.ts create mode 100644 js/src/fb/body-compression-method.ts create mode 100644 js/src/fb/body-compression.ts create mode 100644 js/src/fb/bool.ts create mode 100644 js/src/fb/buffer.ts create mode 100644 js/src/fb/compression-type.ts create mode 100644 js/src/fb/date-unit.ts create mode 100644 js/src/fb/date.ts create mode 100644 js/src/fb/decimal.ts create mode 100644 js/src/fb/dictionary-batch.ts create mode 100644 js/src/fb/dictionary-encoding.ts create mode 100644 js/src/fb/dictionary-kind.ts create mode 100644 js/src/fb/duration.ts create mode 100644 js/src/fb/endianness.ts create mode 100644 js/src/fb/feature.ts create mode 100644 js/src/fb/field-node.ts create mode 100644 js/src/fb/field.ts create mode 100644 js/src/fb/fixed-size-binary.ts create mode 100644 js/src/fb/fixed-size-list.ts create mode 100644 js/src/fb/floating-point.ts create mode 100644 js/src/fb/footer.ts create mode 100644 js/src/fb/int.ts create mode 100644 js/src/fb/interval-unit.ts create mode 100644 js/src/fb/interval.ts create mode 100644 js/src/fb/key-value.ts create mode 100644 js/src/fb/large-binary.ts create mode 100644 js/src/fb/large-list.ts create mode 100644 js/src/fb/large-utf8.ts create mode 100644 js/src/fb/list.ts create mode 100644 js/src/fb/map.ts create mode 100644 js/src/fb/message-header.ts create mode 100644 js/src/fb/message.ts create mode 100644 js/src/fb/metadata-version.ts create mode 100644 js/src/fb/null.ts create mode 100644 js/src/fb/precision.ts create mode 100644 js/src/fb/record-batch.ts create mode 100644 js/src/fb/schema.ts create mode 100644 js/src/fb/sparse-matrix-compressed-axis.ts create mode 100644 js/src/fb/sparse-matrix-index-c-s-x.ts create mode 100644 js/src/fb/sparse-tensor-index-c-o-o.ts create mode 100644 js/src/fb/sparse-tensor-index-c-s-f.ts create mode 100644 js/src/fb/sparse-tensor-index.ts create mode 100644 js/src/fb/sparse-tensor.ts create mode 100644 js/src/fb/struct_.ts create mode 100644 js/src/fb/tensor-dim.ts create mode 100644 js/src/fb/tensor.ts create mode 100644 js/src/fb/time-unit.ts create mode 100644 js/src/fb/time.ts create mode 100644 js/src/fb/timestamp.ts create mode 100644 js/src/fb/type.ts create mode 100644 js/src/fb/union-mode.ts create mode 100644 js/src/fb/union.ts create mode 100644 js/src/fb/utf8.ts create mode 100644 js/src/ipc/serialization.ts create mode 100644 js/src/row/map.ts create mode 100644 js/src/row/struct.ts delete mode 100644 js/src/util/args.ts create mode 100644 js/src/util/chunk.ts delete mode 100644 js/src/util/fn.ts delete mode 100644 js/src/vector/base.ts delete mode 100644 js/src/vector/binary.ts delete mode 100644 js/src/vector/bool.ts delete mode 100644 js/src/vector/chunked.ts delete mode 100644 js/src/vector/date.ts delete mode 100644 js/src/vector/dictionary.ts delete mode 100644 js/src/vector/fixedsizebinary.ts delete mode 100644 js/src/vector/fixedsizelist.ts delete mode 100644 js/src/vector/float.ts delete mode 100644 js/src/vector/index.ts delete mode 100644 js/src/vector/int.ts delete mode 100644 js/src/vector/interval.ts delete mode 100644 js/src/vector/map.ts delete mode 100644 js/src/vector/row.ts delete mode 100644 js/src/vector/time.ts delete mode 100644 js/src/vector/timestamp.ts delete mode 100644 js/src/vector/union.ts delete mode 100644 js/src/vector/utf8.ts create mode 100644 js/src/visitor/bytelength.ts delete mode 100644 js/src/visitor/bytewidth.ts delete mode 100644 js/src/visitor/toarray.ts delete mode 100644 js/src/visitor/vectorctor.ts rename js/test/{.eslintrc.js => .eslintrc.cjs} (95%) rename js/test/{Arrow.ts => bundle/field.js} (92%) rename js/{src/vector/null.ts => test/bundle/makeTable.js} (85%) rename js/{src/vector/decimal.ts => test/bundle/makeVector.js} (84%) create mode 100644 js/test/bundle/schema.js create mode 100644 js/test/bundle/table.js create mode 100644 js/test/bundle/tableFromArrays.js create mode 100644 js/test/bundle/tableFromIPC.js create mode 100644 js/test/bundle/vector.js create mode 100644 js/test/bundle/vectorFromArray.js delete mode 100644 js/test/unit/dataframe-tests.ts rename js/test/unit/{table/serialize-tests.ts => ipc/serialization-tests.ts} (57%) create mode 100644 js/test/unit/table/table-test.ts rename js/{src/vector/list.ts => test/unit/type-tests.ts} (78%) delete mode 100644 js/test/unit/utils-tests.ts delete mode 100644 js/typedoc.js create mode 100644 js/typedoc.json diff --git a/.env b/.env index cde3f145f0bc3..ce7f5826750d2 100644 --- a/.env +++ b/.env @@ -63,7 +63,7 @@ JDK=8 KARTOTHEK=latest LLVM=12 MAVEN=3.5.4 -NODE=14 +NODE=16 NUMPY=latest PANDAS=latest PYTHON=3.8 diff --git a/.github/workflows/js.yml b/.github/workflows/js.yml index 7c964920b8967..0f4ecd4db42c0 100644 --- a/.github/workflows/js.yml +++ b/.github/workflows/js.yml @@ -42,7 +42,7 @@ env: jobs: docker: - name: AMD64 Debian 11 NodeJS 14 + name: AMD64 Debian 11 NodeJS 16 runs-on: ubuntu-latest if: ${{ !contains(github.event.pull_request.title, 'WIP') }} timeout-minutes: 60 @@ -75,7 +75,7 @@ jobs: strategy: fail-fast: false matrix: - node: [14] + node: [16] steps: - name: Checkout Arrow uses: actions/checkout@v2 @@ -99,7 +99,7 @@ jobs: strategy: fail-fast: false matrix: - node: [14] + node: [16] steps: - name: Checkout Arrow uses: actions/checkout@v2 diff --git a/ci/docker/conda-integration.dockerfile b/ci/docker/conda-integration.dockerfile index 378fe71df7009..652d4f4901d85 100644 --- a/ci/docker/conda-integration.dockerfile +++ b/ci/docker/conda-integration.dockerfile @@ -21,7 +21,7 @@ FROM ${repo}:${arch}-conda-cpp ARG arch=amd64 ARG maven=3.5 -ARG node=14 +ARG node=16 ARG jdk=8 ARG go=1.15 diff --git a/ci/docker/debian-10-js.dockerfile b/ci/docker/debian-10-js.dockerfile index f994de4141d65..e0938d96cd5b7 100644 --- a/ci/docker/debian-10-js.dockerfile +++ b/ci/docker/debian-10-js.dockerfile @@ -16,7 +16,7 @@ # under the License. ARG arch=amd64 -ARG node=14 +ARG node=16 FROM ${arch}/node:${node} ENV NODE_NO_WARNINGS=1 diff --git a/ci/docker/debian-11-js.dockerfile b/ci/docker/debian-11-js.dockerfile index f994de4141d65..e0938d96cd5b7 100644 --- a/ci/docker/debian-11-js.dockerfile +++ b/ci/docker/debian-11-js.dockerfile @@ -16,7 +16,7 @@ # under the License. ARG arch=amd64 -ARG node=14 +ARG node=16 FROM ${arch}/node:${node} ENV NODE_NO_WARNINGS=1 diff --git a/ci/docker/linux-apt-docs.dockerfile b/ci/docker/linux-apt-docs.dockerfile index 4703f54b39342..52c47c35d54ef 100644 --- a/ci/docker/linux-apt-docs.dockerfile +++ b/ci/docker/linux-apt-docs.dockerfile @@ -67,7 +67,7 @@ RUN /arrow/ci/scripts/util_download_apache.sh \ ENV PATH=/opt/apache-maven-${maven}/bin:$PATH RUN mvn -version -ARG node=14 +ARG node=16 RUN wget -q -O - https://deb.nodesource.com/setup_${node}.x | bash - && \ apt-get install -y nodejs && \ apt-get clean && \ diff --git a/ci/scripts/js_test.sh b/ci/scripts/js_test.sh index 345d6cb811e76..40de974ede161 100755 --- a/ci/scripts/js_test.sh +++ b/ci/scripts/js_test.sh @@ -25,5 +25,6 @@ pushd ${source_dir} yarn lint yarn test +yarn test:bundle popd diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index 7a163d53ba697..44b35f2bb5a92 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -440,6 +440,7 @@ test_js() { yarn lint yarn build yarn test + yarn test:bundle popd } diff --git a/js/.eslintignore b/js/.eslintignore index 94ef668a61c36..8f1992e62511b 100644 --- a/js/.eslintignore +++ b/js/.eslintignore @@ -1,5 +1,6 @@ -.eslintrc.js +.eslintrc.cjs gulp jest.config.js jestconfigs targets +test/bundle/ diff --git a/js/.eslintrc.js b/js/.eslintrc.cjs similarity index 74% rename from js/.eslintrc.js rename to js/.eslintrc.cjs index 6d5020db10b10..bbdeff2b0ff2b 100644 --- a/js/.eslintrc.js +++ b/js/.eslintrc.cjs @@ -27,9 +27,10 @@ module.exports = { sourceType: "module", ecmaVersion: 2020, }, - plugins: ["@typescript-eslint", "jest"], + plugins: ["@typescript-eslint", "jest", "unicorn"], extends: [ "eslint:recommended", + "plugin:unicorn/recommended", "plugin:jest/recommended", "plugin:jest/style", "plugin:@typescript-eslint/recommended", @@ -82,6 +83,28 @@ module.exports = { "no-trailing-spaces": "error", "no-var": "error", "no-empty": "off", - "no-cond-assign": "off" + "no-cond-assign": "off", + + "unicorn/catch-error-name": "off", + "unicorn/no-nested-ternary": "off", + "unicorn/no-new-array": "off", + "unicorn/no-null": "off", + "unicorn/empty-brace-spaces": "off", + "unicorn/no-zero-fractions": "off", + "unicorn/prevent-abbreviations": "off", + "unicorn/prefer-module": "off", + "unicorn/numeric-separators-style": "off", + "unicorn/prefer-spread": "off", + "unicorn/filename-case": "off", + "unicorn/prefer-export-from": "off", + "unicorn/prefer-switch": "off", + "unicorn/prefer-node-protocol": "off", + + "unicorn/consistent-destructuring": "warn", + "unicorn/no-array-reduce": ["warn", { "allowSimpleOperations": true }], + "unicorn/no-await-expression-member": "warn", + "unicorn/no-useless-undefined": "warn", + "unicorn/consistent-function-scoping": "warn", + "unicorn/prefer-math-trunc": "warn" }, }; diff --git a/js/.gitignore b/js/.gitignore index 799f789d64da8..5b8e0dcc7eba2 100644 --- a/js/.gitignore +++ b/js/.gitignore @@ -78,6 +78,9 @@ targets test/data/**/*.json test/data/**/*.arrow +# test bundles +test/bundle/**/*-bundle.js* + # jest snapshots (too big) test/__snapshots__/ diff --git a/js/.vscode/extensions.json b/js/.vscode/extensions.json index 1cb01b6b9fe6f..fb771bcdc0709 100644 --- a/js/.vscode/extensions.json +++ b/js/.vscode/extensions.json @@ -2,5 +2,6 @@ "recommendations": [ "dbaeumer.vscode-eslint", "augustocdias.tasks-shell-input", + "orta.vscode-jest" ] } diff --git a/js/.vscode/launch.json b/js/.vscode/launch.json index ae72e1f48503e..e09d0a39e1ff1 100644 --- a/js/.vscode/launch.json +++ b/js/.vscode/launch.json @@ -34,6 +34,16 @@ "command": "./node_modules/.bin/jest --listTests | sed -r \"s@$PWD/test/@@g\"", } }, + { + "type": "command", + "id": "BUNDLE_FILE", + "command": "shellCommand.execute", + "args": { + "cwd": "${workspaceFolder}", + "description": "Select a file to debug", + "command": "ls test/bundle/**/*-bundle.js", + } + }, { "type": "command", "id": "TEST_RUNTIME_ARGS", @@ -100,6 +110,29 @@ "VALIDATE" ] }, + { + "name": "Debug Bundle", + "program": "${input:BUNDLE_FILE}", + "request": "launch", + "skipFiles": [ + "/**" + ], + "type": "node" + }, + { + "name": "Debug Benchmarks", + "program": "${workspaceFolder}/perf/index.ts", + "request": "launch", + "skipFiles": [ + "/**", + "${workspaceFolder}/node_modules/**/*.js" + ], + "runtimeArgs": [ + "--loader", + "ts-node/esm/transpile-only" + ], + "type": "node" + }, { "type": "node", "request": "launch", @@ -213,7 +246,8 @@ "${workspaceFolder}/bin/print-buffer-alignment.js", "./test/data/cpp/stream/struct_example.arrow" ] - },{ + }, + { "type": "node", "name": "vscode-jest-tests", "request": "launch", diff --git a/js/.vscode/settings.json b/js/.vscode/settings.json index 379ddf14d0fd6..113a662180c3c 100644 --- a/js/.vscode/settings.json +++ b/js/.vscode/settings.json @@ -2,6 +2,21 @@ "typescript.tsdk": "node_modules/typescript/lib", "editor.trimAutoWhitespace": true, "editor.codeActionsOnSave": { - "source.fixAll.eslint": true - } + "source.fixAll.eslint": false + }, + "[javascript]": { + "editor.tabSize": 4, + "editor.formatOnSave": true, + "editor.formatOnSaveMode": "file", + "editor.defaultFormatter": "vscode.typescript-language-features" + }, + "[typescript]": { + "editor.tabSize": 4, + "editor.formatOnSave": true, + "editor.formatOnSaveMode": "file", + "editor.defaultFormatter": "vscode.typescript-language-features" + }, + "jest.jestCommandLine": "node --experimental-vm-modules node_modules/jest/bin/jest.js --config jest.config.js", + "jest.autoRun": {"watch": false, "onSave": "test-src-file"}, + "typescript.preferences.importModuleSpecifierEnding": "js" } diff --git a/js/DEVELOP.md b/js/DEVELOP.md index fd62486c403ea..06699830b9316 100644 --- a/js/DEVELOP.md +++ b/js/DEVELOP.md @@ -80,6 +80,12 @@ You can run the benchmarks with `yarn perf`. To print the results to stderr as J You can change the target you want to test by changing the imports in `perf/index.ts`. Note that you need to compile the bundles with `yarn build` before you can import them. +# Testing Bundling + +The bunldes use `apache-arrow` so make sure to build it with `yarn build -t apache-arrow`. To bundle with a variety of bundlers, run `yarn test:bundle` or `yarn gulp bundle`. + +Run `yarn gulp bundle:webpack:analyze` to open [Webpack Bundle Analyzer](https://github.com/webpack-contrib/webpack-bundle-analyzer). + # Updating the Arrow format flatbuffers generated code 1. Once generated, the flatbuffers format code needs to be adjusted for our build scripts (assumes `gnu-sed`): @@ -96,27 +102,17 @@ You can change the target you want to test by changing the imports in `perf/inde sed -i '+s+org.apache.arrow.flatbuf.++ig' $tmp_format_dir/*.fbs # Generate TS source from the modified Arrow flatbuffers schemas - flatc --ts --no-ts-reexport -o ./js/src/fb $tmp_format_dir/{File,Schema,Message}.fbs + flatc --ts -o ./js/src/fb $tmp_format_dir/{File,Schema,Message,Tensor,SparseTensor}.fbs # Remove the tmpdir rm -rf $tmp_format_dir + ``` - cd ./js/src/fb - - # Rename the existing files to .bak.ts - mv File{,.bak}.ts && mv Schema{,.bak}.ts && mv Message{,.bak}.ts +2. Manually fix the unused imports and add // @ts-ignore for other errors - # Remove `_generated` from the ES6 imports of the generated files - sed -i '+s+_generated\";+\";+ig' *_generated.ts - # Fix all the `flatbuffers` imports - sed -i '+s+./flatbuffers+flatbuffers+ig' *_generated.ts - # Fix the Union createTypeIdsVector typings - sed -i -r '+s+static createTypeIdsVector\(builder: flatbuffers.Builder, data: number\[\] \| Uint8Array+static createTypeIdsVector\(builder: flatbuffers.Builder, data: number\[\] \| Int32Array+ig' Schema_generated.ts - # Remove "_generated" suffix from TS files - mv File{_generated,}.ts && mv Schema{_generated,}.ts && mv Message{_generated,}.ts - ``` +3. Add `.js` to the imports. In VSCode, you can search for `^(import [^';]* from '(\./|(\.\./)+)[^';.]*)';` and replace with `$1.js';`. -2. Execute `yarn lint` from the `js` directory to fix the linting errors +4. Execute `yarn lint` from the `js` directory to fix the linting errors [1]: mailto:dev-subscribe@arrow.apache.org [2]: https://github.com/apache/arrow/tree/master/format diff --git a/js/README.md b/js/README.md index 945d4dc68f183..cbf40867e39b0 100644 --- a/js/README.md +++ b/js/README.md @@ -37,11 +37,8 @@ Apache Arrow is the emerging standard for large in-memory columnar data ([Spark] # Get Started -Check out our [API documentation][7] to learn more about how to use Apache Arrow's JS implementation. You can also learn by example by checking out some of the following resources: +Check out our [API documentation][5] to learn more about how to use Apache Arrow's JS implementation. You can also learn by example by checking out some of the following resources: -* [Observable: Introduction to Apache Arrow][5] -* [Observable: Manipulating flat arrays arrow-style][6] -* [Observable: Rich columnar data tables - Dictionary-encoded strings, 64bit ints, and nested structs][8] * [/js/test/unit](https://github.com/apache/arrow/tree/master/js/test/unit) - Unit tests for Table and Vector ## Cookbook @@ -50,12 +47,12 @@ Check out our [API documentation][7] to learn more about how to use Apache Arrow ```js import { readFileSync } from 'fs'; -import { Table } from 'apache-arrow'; +import { tableFromIPC } from 'apache-arrow'; const arrow = readFileSync('simple.arrow'); -const table = Table.from([arrow]); +const table = tableFromIPC(arrow); -console.log(table.toString()); +console.table(table.toArray()); /* foo, bar, baz @@ -71,14 +68,14 @@ null, null, null ```js import { readFileSync } from 'fs'; -import { Table } from 'apache-arrow'; +import { tableFromIPC } from 'apache-arrow'; -const table = Table.from([ +const table = tableFromIPC([ 'latlong/schema.arrow', 'latlong/records.arrow' ].map((file) => readFileSync(file))); -console.log(table.toString()); +console.table([...table]); /* origin_lat, origin_lon @@ -93,60 +90,80 @@ console.log(table.toString()); ### Create a Table from JavaScript arrays ```js -import { - Table, - FloatVector, - DateVector -} from 'apache-arrow'; +import { tableFromArrays } from 'apache-arrow'; const LENGTH = 2000; const rainAmounts = Float32Array.from( - { length: LENGTH }, - () => Number((Math.random() * 20).toFixed(1))); + { length: LENGTH }, + () => Number((Math.random() * 20).toFixed(1))); const rainDates = Array.from( - { length: LENGTH }, - (_, i) => new Date(Date.now() - 1000 * 60 * 60 * 24 * i)); + { length: LENGTH }, + (_, i) => new Date(Date.now() - 1000 * 60 * 60 * 24 * i)); -const rainfall = Table.new( - [FloatVector.from(rainAmounts), DateVector.from(rainDates)], - ['precipitation', 'date'] -); +const rainfall = tableFromArrays({ + precipitation: rainAmounts, + date: rainDates +}); + +console.table([...rainfall]); ``` ### Load data with `fetch` ```js -import { Table } from "apache-arrow"; +import { tableFromIPC } from "apache-arrow"; -const table = await Table.from(fetch("/simple.arrow")); -console.log(table.toString()); +const table = await tableFromIPC(fetch("/simple.arrow")); +console.table([...table]); ``` -### Columns look like JS Arrays +### Vectors look like JS Arrays + +You can create vector from JavaScript typed arrays with `makeVector` and from JavaScript arrays with `vectorFromArray`. `makeVector` is a lot faster and does not require a copy. ```js -import { readFileSync } from 'fs'; -import { Table } from 'apache-arrow'; +import { makeVector } from "apache-arrow"; -const table = Table.from([ - 'latlong/schema.arrow', - 'latlong/records.arrow' -].map(readFileSync)); +const LENGTH = 2000; + +const rainAmounts = Float32Array.from( + { length: LENGTH }, + () => Number((Math.random() * 20).toFixed(1))); + +const vector = makeVector(rainAmounts); -const column = table.getColumn('origin_lat'); +const typed = vector.toArray() -// Copy the data into a TypedArray -const typed = column.toArray(); assert(typed instanceof Float32Array); -for (let i = -1, n = column.length; ++i < n;) { - assert(column.get(i) === typed[i]); +for (let i = -1, n = vector.length; ++i < n;) { + assert(vector.get(i) === typed[i]); } ``` +### String vectors + +Strings can be encoded as UTF-8 or dictionary encoded UTF-8. Dictionary encoding encodes repeated values more efficiently. You can create a dictionary encoded string conveniently with `vectorFromArray` or efficiently with `makeVector`. + +```js +import { makeVector, vectorFromArray, Dictionary, Uint8, Utf8 } from "apache-arrow"; + +const uft8Vector = vectorFromArray(['foo', 'bar', 'baz'], new Utf8); + +const dictionaryVector1 = vectorFromArray( + ['foo', 'bar', 'baz', 'foo', 'bar'] +); + +const dictionaryVector2 = makeVector({ + data: [0, 1, 2, 0, 1], // indexes into the dictionary + dictionary: uft8Vector, + type: new Dictionary(new Utf8, new Uint8) +}); +``` + # Getting involved See [DEVELOP.md](DEVELOP.md) @@ -235,7 +252,4 @@ Full list of broader Apache Arrow [projects & organizations](https://arrow.apach [2]: https://github.com/apache/arrow/tree/master/format [3]: https://issues.apache.org/jira/browse/ARROW [4]: https://github.com/apache/arrow -[5]: https://beta.observablehq.com/@theneuralbit/introduction-to-apache-arrow -[6]: https://beta.observablehq.com/@lmeyerov/manipulating-flat-arrays-arrow-style -[7]: https://arrow.apache.org/docs/js/ -[8]: https://observablehq.com/@lmeyerov/rich-data-types-in-apache-arrow-js-efficient-data-tables-wit +[5]: https://arrow.apache.org/docs/js/ diff --git a/js/bin/arrow2csv.js b/js/bin/arrow2csv.js index 0e446fabe7958..51984a7971df1 100755 --- a/js/bin/arrow2csv.js +++ b/js/bin/arrow2csv.js @@ -19,10 +19,9 @@ const Path = require(`path`); const here = Path.resolve(__dirname, '../'); -const tsnode = require.resolve(`ts-node/register`); const arrow2csv = Path.join(here, `src/bin/arrow2csv.ts`); const env = { ...process.env, TS_NODE_TRANSPILE_ONLY: `true` }; require('child_process').spawn(`node`, [ - `-r`, tsnode, arrow2csv, ...process.argv.slice(2) + `--loader`, 'ts-node/esm/transpile-only', arrow2csv, ...process.argv.slice(2) ], { cwd: here, env, stdio: `inherit` }); diff --git a/js/bin/file-to-stream.js b/js/bin/file-to-stream.js index 090cd0b0eda77..7fab54e2cb09f 100755 --- a/js/bin/file-to-stream.js +++ b/js/bin/file-to-stream.js @@ -22,7 +22,7 @@ const fs = require('fs'); const path = require('path'); const eos = require('util').promisify(require('stream').finished); -const extension = process.env.ARROW_JS_DEBUG === 'src' ? '.ts' : ''; +const extension = process.env.ARROW_JS_DEBUG === 'src' ? '.ts' : '.cjs'; const { RecordBatchReader, RecordBatchStreamWriter } = require(`../index${extension}`); (async () => { diff --git a/js/bin/integration.js b/js/bin/integration.js index 507514ebade79..f5c1f71d591e4 100755 --- a/js/bin/integration.js +++ b/js/bin/integration.js @@ -29,6 +29,7 @@ const argv = require(`command-line-args`)(cliOpts(), { partial: true }); const { Table, RecordBatchReader, + RecordBatchStreamWriter, util: { createElementComparator } } = require('../targets/apache-arrow/'); @@ -42,7 +43,7 @@ const exists = async (p) => { if (!argv.mode) { return print_usage(); } - let mode = argv.mode.toUpperCase(); + const mode = argv.mode.toUpperCase(); let jsonPaths = [...(argv.json || [])]; let arrowPaths = [...(argv.arrow || [])]; @@ -52,20 +53,29 @@ const exists = async (p) => { if (!jsonPaths.length) { return print_usage(); } + let threw = false; + switch (mode) { case 'VALIDATE': for (let [jsonPath, arrowPath] of zip(jsonPaths, arrowPaths)) { - await validate(jsonPath, arrowPath); + try { + await validate(jsonPath, arrowPath); + } catch (e) { + threw = true; + e && process.stderr.write(`${e?.stack || e}\n`); + } } break; default: return print_usage(); } + + return threw ? 1 : 0; })() -.then((x) => +x || 0, (e) => { - e && process.stderr.write(`${e?.stack || e}\n`); - return process.exitCode || 1; -}).then((code) => process.exit(code)); + .then((x) => +x || 0, (e) => { + e && process.stderr.write(`${e?.stack || e}\n`); + return process.exitCode || 1; + }).then((code) => process.exit(code)); function cliOpts() { return [ @@ -125,6 +135,10 @@ async function validate(jsonPath, arrowPath) { const arrowData = files[0]; const jsonData = bignumJSONParse(files[1]); + process.stdout.write(`\n`); + process.stdout.write(` json: ${jsonPath}\n`); + process.stdout.write(`arrow: ${arrowPath}\n`); + validateReaderIntegration(jsonData, arrowData); validateTableFromBuffersIntegration(jsonData, arrowData); validateTableToBuffersIntegration('json', 'file')(jsonData, arrowData); @@ -148,8 +162,8 @@ function validateReaderIntegration(jsonData, arrowBuffer) { function validateTableFromBuffersIntegration(jsonData, arrowBuffer) { const msg = `json and arrow tables report the same values`; try { - const jsonTable = Table.from(jsonData); - const binaryTable = Table.from(arrowBuffer); + const jsonTable = new Table(RecordBatchReader.from(jsonData)); + const binaryTable = new Table(RecordBatchReader.from(arrowBuffer)); compareTableIsh(jsonTable, binaryTable); } catch (e) { throw new Error(`${msg}: fail \n ${e?.stack || e}`); } process.stdout.write(`${msg}: pass\n`); @@ -160,9 +174,9 @@ function validateTableToBuffersIntegration(srcFormat, arrowFormat) { return function testTableToBuffersIntegration(jsonData, arrowBuffer) { const msg = `serialized ${srcFormat} ${arrowFormat} reports the same values as the ${refFormat} ${arrowFormat}`; try { - const refTable = Table.from(refFormat === `json` ? jsonData : arrowBuffer); - const srcTable = Table.from(srcFormat === `json` ? jsonData : arrowBuffer); - const dstTable = Table.from(srcTable.serialize(`binary`, arrowFormat === `stream`)); + const refTable = new Table(RecordBatchReader.from(refFormat === `json` ? jsonData : arrowBuffer)); + const srcTable = new Table(RecordBatchReader.from(srcFormat === `json` ? jsonData : arrowBuffer)); + const dstTable = new Table(RecordBatchReader.from(RecordBatchStreamWriter.writeAll(srcTable).toUint8Array(true))); compareTableIsh(dstTable, refTable); } catch (e) { throw new Error(`${msg}: fail \n ${e?.stack || e}`); } process.stdout.write(`${msg}: pass\n`); @@ -177,10 +191,9 @@ function compareTableIsh(actual, expected) { throw new Error(`numCols: ${actual.numCols} !== ${expected.numCols}`); } (() => { - const getChildAtFn = expected instanceof Table ? 'getColumnAt' : 'getChildAt'; for (let i = -1, n = actual.numCols; ++i < n;) { - const v1 = actual[getChildAtFn](i); - const v2 = expected[getChildAtFn](i); + const v1 = actual.getChildAt(i); + const v2 = expected.getChildAt(i); compareVectors(v1, v2); } })(); @@ -192,7 +205,7 @@ function compareVectors(actual, expected) { throw new Error(`${actual == null ? `actual` : `expected`} is null, was expecting ${actual ?? expected} to be that also`); } - let props = ['type', 'length', 'nullCount']; + const props = ['type', 'length', 'nullCount']; (() => { for (let i = -1, n = props.length; ++i < n;) { @@ -205,7 +218,7 @@ function compareVectors(actual, expected) { (() => { for (let i = -1, n = actual.length; ++i < n;) { - let x1 = actual.get(i), x2 = expected.get(i); + const x1 = actual.get(i), x2 = expected.get(i); if (!createElementComparator(x2)(x1)) { throw new Error(`${i}: ${x1} !== ${x2}`); } diff --git a/js/bin/json-to-arrow.js b/js/bin/json-to-arrow.js index 8f3fbd3fce988..ee7a952befddc 100755 --- a/js/bin/json-to-arrow.js +++ b/js/bin/json-to-arrow.js @@ -23,7 +23,7 @@ const fs = require('fs'); const Path = require('path'); const { parse } = require('json-bignum'); const eos = require('util').promisify(require('stream').finished); -const extension = process.env.ARROW_JS_DEBUG === 'src' ? '.ts' : ''; +const extension = process.env.ARROW_JS_DEBUG === 'src' ? '.ts' : '.cjs'; const argv = require(`command-line-args`)(cliOpts(), { partial: true }); const { RecordBatchReader, RecordBatchFileWriter, RecordBatchStreamWriter } = require(`../index${extension}`); diff --git a/js/bin/package.json b/js/bin/package.json new file mode 100644 index 0000000000000..6a0d2ef2aa8b2 --- /dev/null +++ b/js/bin/package.json @@ -0,0 +1,3 @@ +{ + "type": "commonjs" +} \ No newline at end of file diff --git a/js/bin/print-buffer-alignment.js b/js/bin/print-buffer-alignment.js index 4c3260397a72b..639c0af5d80ea 100755 --- a/js/bin/print-buffer-alignment.js +++ b/js/bin/print-buffer-alignment.js @@ -21,7 +21,7 @@ const fs = require('fs'); const path = require('path'); -const extension = process.env.ARROW_JS_DEBUG === 'src' ? '.ts' : ''; +const extension = process.env.ARROW_JS_DEBUG === 'src' ? '.ts' : '.cjs'; const { RecordBatch, AsyncMessageReader } = require(`../index${extension}`); const { VectorLoader } = require(`../targets/apache-arrow/visitor/vectorloader`); @@ -32,7 +32,7 @@ const { VectorLoader } = require(`../targets/apache-arrow/visitor/vectorloader`) let schema, recordBatchIndex = 0, dictionaryBatchIndex = 0; - for await (let message of reader) { + for await (const message of reader) { let bufferRegions = []; diff --git a/js/bin/stream-to-file.js b/js/bin/stream-to-file.js index 015a5eace74d8..5cd9e76f8085f 100755 --- a/js/bin/stream-to-file.js +++ b/js/bin/stream-to-file.js @@ -22,7 +22,7 @@ const fs = require('fs'); const path = require('path'); const eos = require('util').promisify(require('stream').finished); -const extension = process.env.ARROW_JS_DEBUG === 'src' ? '.ts' : ''; +const extension = process.env.ARROW_JS_DEBUG === 'src' ? '.ts' : '.cjs'; const { RecordBatchReader, RecordBatchFileWriter } = require(`../index${extension}`); (async () => { diff --git a/js/gulp/argv.js b/js/gulp/argv.js index 0acdad7d5e198..5c49fc5155a80 100644 --- a/js/gulp/argv.js +++ b/js/gulp/argv.js @@ -15,7 +15,8 @@ // specific language governing permissions and limitations // under the License. -const argv = require(`command-line-args`)([ +import args from 'command-line-args'; +export const argv = args([ { name: `all`, type: Boolean }, { name: 'verbose', alias: `v`, type: Boolean }, { name: `target`, type: String, defaultValue: `` }, @@ -25,7 +26,7 @@ const argv = require(`command-line-args`)([ { name: `modules`, alias: `m`, type: String, multiple: true, defaultValue: [] }, ], { partial: true }); -const { targets, modules } = argv; +export const { targets, modules } = argv; if (argv.target === `src`) { argv.target && !targets.length && targets.push(argv.target); @@ -35,5 +36,3 @@ if (argv.target === `src`) { (argv.all || !targets.length) && targets.push(`all`); (argv.all || !modules.length) && modules.push(`all`); } - -module.exports = { argv, targets, modules }; diff --git a/js/gulp/arrow-task.js b/js/gulp/arrow-task.js index fc85dd72ebd58..e211d7d1abe1f 100644 --- a/js/gulp/arrow-task.js +++ b/js/gulp/arrow-task.js @@ -15,25 +15,21 @@ // specific language governing permissions and limitations // under the License. -const { - targetDir, observableFromStreams -} = require('./util'); +import { targetDir, observableFromStreams } from "./util.js"; -const del = require('del'); -const gulp = require('gulp'); -const mkdirp = require('mkdirp'); -const gulpRename = require(`gulp-rename`); -const { memoizeTask } = require('./memoize-task'); -const { - ReplaySubject, - forkJoin: ObservableForkJoin, -} = require('rxjs'); -const { - share -} = require('rxjs/operators'); -const pipeline = require('util').promisify(require('stream').pipeline); +import del from "del"; +import gulp from "gulp"; +import mkdirp from "mkdirp"; +import gulpRename from "gulp-rename"; +import gulpReplace from "gulp-replace"; +import { memoizeTask } from "./memoize-task.js"; +import { ReplaySubject, forkJoin as ObservableForkJoin } from "rxjs"; +import { share } from "rxjs/operators"; +import util from "util"; +import stream from "stream"; +const pipeline = util.promisify(stream.pipeline); -const arrowTask = ((cache) => memoizeTask(cache, function copyMain(target) { +export const arrowTask = ((cache) => memoizeTask(cache, function copyMain(target) { const out = targetDir(target); const dtsGlob = `${targetDir(`es2015`, `cjs`)}/**/*.ts`; const cjsGlob = `${targetDir(`es2015`, `cjs`)}/**/*.js`; @@ -45,26 +41,22 @@ const arrowTask = ((cache) => memoizeTask(cache, function copyMain(target) { const es2015UmdSourceMapsGlob = `${targetDir(`es2015`, `umd`)}/*.map`; const esnextUmdSourceMapsGlob = `${targetDir(`esnext`, `umd`)}/*.map`; return ObservableForkJoin([ - observableFromStreams(gulp.src(dtsGlob), gulp.dest(out)), // copy d.ts files - observableFromStreams(gulp.src(cjsGlob), gulp.dest(out)), // copy es2015 cjs files - observableFromStreams(gulp.src(cjsSourceMapsGlob), gulp.dest(out)), // copy es2015 cjs sourcemaps - observableFromStreams(gulp.src(esmSourceMapsGlob), gulp.dest(out)), // copy es2015 esm sourcemaps + observableFromStreams(gulp.src(dtsGlob), gulp.dest(out)), // copy d.ts files + observableFromStreams(gulp.src(cjsGlob), gulp.dest(out)), // copy es2015 cjs files + observableFromStreams(gulp.src(cjsSourceMapsGlob), gulp.dest(out)), // copy es2015 cjs sourcemaps + observableFromStreams(gulp.src(esmSourceMapsGlob), gulp.dest(out)), // copy es2015 esm sourcemaps observableFromStreams(gulp.src(es2015UmdSourceMapsGlob), gulp.dest(out)), // copy es2015 umd sourcemap files, but don't rename observableFromStreams(gulp.src(esnextUmdSourceMapsGlob), gulp.dest(out)), // copy esnext umd sourcemap files, but don't rename - observableFromStreams(gulp.src(esmGlob), gulpRename((p) => { p.extname = '.mjs'; }), gulp.dest(out)), // copy es2015 esm files and rename to `.mjs` + observableFromStreams(gulp.src(esmGlob), gulpRename((p) => { p.extname = '.mjs'; }), gulpReplace(`.js'`, `.mjs'`), gulp.dest(out)), // copy es2015 esm files and rename to `.mjs` observableFromStreams(gulp.src(es2015UmdGlob), gulpRename((p) => { p.basename += `.es2015.min`; }), gulp.dest(out)), // copy es2015 umd files and add `.es2015.min` observableFromStreams(gulp.src(esnextUmdGlob), gulpRename((p) => { p.basename += `.esnext.min`; }), gulp.dest(out)), // copy esnext umd files and add `.esnext.min` ]).pipe(share({ connector: () => new ReplaySubject(), resetOnError: false, resetOnComplete: false, resetOnRefCountZero: false })); }))({}); -const arrowTSTask = ((cache) => memoizeTask(cache, async function copyTS(target, format) { +export const arrowTSTask = ((cache) => memoizeTask(cache, async function copyTS(target, format) { const out = targetDir(target, format); await mkdirp(out); await pipeline(gulp.src(`src/**/*`), gulp.dest(out)); await del(`${out}/**/*.js`); }))({}); - -module.exports = arrowTask; -module.exports.arrowTask = arrowTask; -module.exports.arrowTSTask = arrowTSTask; diff --git a/js/gulp/bundle-task.js b/js/gulp/bundle-task.js new file mode 100644 index 0000000000000..dc9821fee2a3c --- /dev/null +++ b/js/gulp/bundle-task.js @@ -0,0 +1,137 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import gulp from "gulp"; +import size from "gulp-vinyl-size"; +import gulpRename from "gulp-rename"; +import terser from "gulp-terser"; +import source from "vinyl-source-stream"; +import buffer from "vinyl-buffer"; +import { observableFromStreams } from "./util.js"; +import { forkJoin as ObservableForkJoin } from "rxjs"; +import { resolve, join } from "path"; +import { readdirSync } from "fs"; +import { execSync } from 'child_process'; + +import gulpEsbuild from "gulp-esbuild"; +import esbuildAlias from "esbuild-plugin-alias"; + +import rollupStream from "@rollup/stream"; +import { default as nodeResolve } from "@rollup/plugin-node-resolve"; +import rollupAlias from "@rollup/plugin-alias"; + +import { BundleAnalyzerPlugin } from "webpack-bundle-analyzer"; +import webpack from "webpack-stream"; +import named from "vinyl-named"; + +import { fileURLToPath } from 'url'; +import { dirname } from 'path'; +const __filename = fileURLToPath(import.meta.url); +const __dirname = dirname(__filename); + +const bundleDir = resolve(__dirname, '../test/bundle'); + +const fileNames = readdirSync(bundleDir) + .filter(fileName => fileName.endsWith('.js')) + .map(fileName => fileName.replace(/\.js$/, '')); + +const target = `apache-arrow`; + +const bundlesGlob = join(bundleDir, '**.js'); +const esbuildDir = join(bundleDir, 'esbuild'); +export const esbuildTask = (minify = true) => () => observableFromStreams( + gulp.src(bundlesGlob), + gulpEsbuild({ + bundle: true, + minify, + treeShaking: true, + plugins: [ + esbuildAlias({ + 'apache-arrow': resolve(__dirname, `../targets/${target}/Arrow.dom.mjs`), + }), + ], + }), + gulpRename((p) => { p.basename += '-bundle'; }), + gulp.dest(esbuildDir), + size({ gzip: true }) +); + +const rollupDir = join(bundleDir, 'rollup'); +export const rollupTask = (minify = true) => () => ObservableForkJoin( + fileNames.map(fileName => observableFromStreams( + rollupStream({ + input: join(bundleDir, `${fileName}.js`), + output: { format: 'cjs' }, + plugins: [ + rollupAlias({ + entries: { 'apache-arrow': resolve(__dirname, `../targets/${target}/`) } + }), + nodeResolve() + ], + onwarn: (message) => { + if (message.code === 'CIRCULAR_DEPENDENCY') return + console.error(message); + } + }), + source(`${fileName}-bundle.js`), + buffer(), + ...(minify ? [terser()] : []), + gulp.dest(rollupDir), + size({ gzip: true }) + )) +) + +const webpackDir = join(bundleDir, 'webpack'); +export const webpackTask = (opts = { minify: true, analyze: false }) => () => observableFromStreams( + gulp.src(bundlesGlob), + named(), + webpack({ + mode: opts?.minify == false ? 'development' : 'production', + optimization: { + usedExports: true + }, + output: { + filename: '[name]-bundle.js' + }, + module: { + rules: [ + { + resolve: { + fullySpecified: false, + } + } + ] + }, + resolve: { + alias: { 'apache-arrow': resolve(__dirname, `../targets/${target}/`) } + }, + stats: 'errors-only', + plugins: opts?.analyze ? [new BundleAnalyzerPlugin()] : [] + }), + gulp.dest(webpackDir), + size({ gzip: true }) +); + +export const execBundleTask = () => () => observableFromStreams( + gulp.src(join(bundleDir, '**/**-bundle.js')), + async (generator) => { + for await (const file of generator) { + console.log(`executing ${file.path}`); + execSync(`node ${file.path}`); + } + } +); diff --git a/js/gulp/clean-task.js b/js/gulp/clean-task.js index 0034f9a095dc5..8dc25a9cae36d 100644 --- a/js/gulp/clean-task.js +++ b/js/gulp/clean-task.js @@ -15,20 +15,16 @@ // specific language governing permissions and limitations // under the License. -const del = require('del'); -const { targetDir } = require('./util'); -const memoizeTask = require('./memoize-task'); -const { catchError } = require('rxjs/operators'); -const { - from: ObservableFrom, - EMPTY: ObservableEmpty, -} = require('rxjs'); +import del from "del"; +import { targetDir } from "./util.js"; +import memoizeTask from "./memoize-task.js"; +import { catchError } from "rxjs/operators"; +import { from as ObservableFrom, EMPTY as ObservableEmpty } from "rxjs"; -const cleanTask = ((cache) => memoizeTask(cache, function clean(target, format) { +export const cleanTask = ((cache) => memoizeTask(cache, function clean(target, format) { const dir = targetDir(target, format); return ObservableFrom(del(dir)) .pipe(catchError((e) => ObservableEmpty())); }))({}); -module.exports = cleanTask; -module.exports.cleanTask = cleanTask; \ No newline at end of file +export default cleanTask; diff --git a/js/gulp/closure-task.js b/js/gulp/closure-task.js index 6e5a61d82b53d..c195011f8c1f4 100644 --- a/js/gulp/closure-task.js +++ b/js/gulp/closure-task.js @@ -15,27 +15,20 @@ // specific language governing permissions and limitations // under the License. -const { - targetDir, - mainExport, - esmRequire, - gCCLanguageNames, - publicModulePaths, - observableFromStreams, - shouldRunInChildProcess, - spawnGulpCommandInChildProcess, -} = require('./util'); - -const fs = require('fs'); -const gulp = require('gulp'); -const path = require('path'); -const mkdirp = require('mkdirp'); -const sourcemaps = require('gulp-sourcemaps'); -const { memoizeTask } = require('./memoize-task'); -const { compileBinFiles } = require('./typescript-task'); -const closureCompiler = require('google-closure-compiler').gulp(); - -const closureTask = ((cache) => memoizeTask(cache, async function closure(target, format) { +import { targetDir, mainExport, esmRequire, gCCLanguageNames, publicModulePaths, observableFromStreams, shouldRunInChildProcess, spawnGulpCommandInChildProcess } from "./util.js"; + +import fs from "fs"; +import gulp from "gulp"; +import path from "path"; +import mkdirp from "mkdirp"; +import sourcemaps from "gulp-sourcemaps"; +import { memoizeTask } from "./memoize-task.js"; +import { compileBinFiles } from "./typescript-task.js"; + +import closureCompiler from 'google-closure-compiler'; +const compiler = closureCompiler.gulp(); + +export const closureTask = ((cache) => memoizeTask(cache, async function closure(target, format) { if (shouldRunInChildProcess(target, format)) { return spawnGulpCommandInChildProcess('compile', target, format); @@ -71,11 +64,11 @@ const closureTask = ((cache) => memoizeTask(cache, async function closure(target gulp.src([ /* external libs first */ `node_modules/flatbuffers/package.json`, - `node_modules/flatbuffers/js/flatbuffers.mjs`, + `node_modules/flatbuffers/**/*.js`, `${src}/**/*.js` /* <-- then source globs */ ], { base: `./` }), sourcemaps.init(), - closureCompiler(createClosureArgs(entry_point, externs, target), { + compiler(createClosureArgs(entry_point, externs, target), { platform: ['native', 'java', 'javascript'] }), // rename the sourcemaps from *.js.map files to *.min.js.map @@ -85,8 +78,7 @@ const closureTask = ((cache) => memoizeTask(cache, async function closure(target } }))({}); -module.exports = closureTask; -module.exports.closureTask = closureTask; +export default closureTask; const createClosureArgs = (entry_point, externs, target) => ({ externs, @@ -104,7 +96,7 @@ const createClosureArgs = (entry_point, externs, target) => ({ js_output_file: `${mainExport}.js`, language_in: gCCLanguageNames[`esnext`], language_out: gCCLanguageNames[target], - output_wrapper:`${apacheHeader()} + output_wrapper: `${apacheHeader()} (function (global, factory) { typeof exports === 'object' && typeof module !== 'undefined' ? factory(exports) : typeof define === 'function' && define.amd ? define(['exports'], factory) : @@ -160,11 +152,21 @@ Symbol.iterator; Symbol.toPrimitive; /** @type {symbol} */ Symbol.asyncIterator; + +var Encoding = function() {}; +/** @type {?} */ +Encoding[1] = function() {}; +/** @type {?} */ +Encoding[2] = function() {}; +/** @type {?} */ +Encoding.UTF8_BYTES = function() {}; +/** @type {?} */ +Encoding.UTF16_STRING = function() {}; `); } function getPublicExportedNames(entryModule) { - const fn = function() {}; + const fn = function () { }; const isStaticOrProtoName = (x) => ( !(x in fn) && (x !== `default`) && diff --git a/js/gulp/compile-task.js b/js/gulp/compile-task.js index 07109ef73e040..88b0875460c81 100644 --- a/js/gulp/compile-task.js +++ b/js/gulp/compile-task.js @@ -15,13 +15,13 @@ // specific language governing permissions and limitations // under the License. -const { Observable } = require('rxjs'); -const { npmPkgName } = require('./util'); -const { memoizeTask } = require('./memoize-task'); +import { Observable } from "rxjs"; +import { npmPkgName } from "./util.js"; +import { memoizeTask } from "./memoize-task.js"; -const closureTask = require('./closure-task'); -const typescriptTask = require('./typescript-task'); -const { arrowTask, arrowTSTask } = require('./arrow-task'); +import closureTask from "./closure-task.js"; +import typescriptTask from "./typescript-task.js"; +import { arrowTask, arrowTSTask } from "./arrow-task.js"; const compileTask = ((cache) => memoizeTask(cache, function compile(target, format, ...args) { return target === `src` ? Observable.empty() @@ -31,5 +31,4 @@ const compileTask = ((cache) => memoizeTask(cache, function compile(target, form : typescriptTask(target, format, ...args)(); }))({}); -module.exports = compileTask; -module.exports.compileTask = compileTask; +export default compileTask; diff --git a/js/src/vector/struct.ts b/js/gulp/esm-require.cjs similarity index 55% rename from js/src/vector/struct.ts rename to js/gulp/esm-require.cjs index b825f092e4fe9..ab09e97d707a7 100644 --- a/js/src/vector/struct.ts +++ b/js/gulp/esm-require.cjs @@ -15,18 +15,24 @@ // specific language governing permissions and limitations // under the License. -import { StructRow } from './row'; -import { BaseVector } from './base'; -import { DataType, Struct } from '../type'; +const esm = require("esm"); -/** @ignore */ const kRowIndex = Symbol.for('rowIndex'); -/** @ignore */ -export class StructVector extends BaseVector> { - private _row!: StructRow; - public bind(index: number): Struct['TValue'] { - const proto = this._row || (this._row = new StructRow(this)); - const bound = Object.create(proto); - bound[kRowIndex] = index; - return bound; +const esmRequire = esm(module, { + mode: `auto`, + cjs: { + /* A boolean for storing ES modules in require.cache. */ + cache: true, + /* A boolean for respecting require.extensions in ESM. */ + extensions: true, + /* A boolean for __esModule interoperability. */ + interop: true, + /* A boolean for importing named exports of CJS modules. */ + namedExports: true, + /* A boolean for following CJS path rules in ESM. */ + paths: true, + /* A boolean for __dirname, __filename, and require in ESM. */ + vars: true, } -} +}); + +module.exports = esmRequire; diff --git a/js/gulp/memoize-task.js b/js/gulp/memoize-task.js index 408ee3b8839db..303e3e5991ead 100644 --- a/js/gulp/memoize-task.js +++ b/js/gulp/memoize-task.js @@ -15,24 +15,22 @@ // specific language governing permissions and limitations // under the License. -const { taskName } = require('./util'); +import { taskName } from "./util.js"; -const createTask = ((taskFn) => ((target, format, ...args) => { +export const createTask = ((taskFn) => ((target, format, ...args) => { // Give the memoized fn a displayName so gulp's output is easier to follow. const fn = () => taskFn(target, format, ...args); fn.displayName = `${taskFn.name || ``}:${taskName(target, format, ...args)}:task`; return fn; })); -const memoizeTask = ((cache, taskFn) => ((target, format, ...args) => { - // Give the memoized fn a displayName so gulp's output is easier to follow. - const fn = () => ( - cache[taskName(target, format)] || ( +export const memoizeTask = ((cache, taskFn) => ((target, format, ...args) => { + // Give the memoized fn a displayName so gulp's output is easier to follow. + const fn = () => ( + cache[taskName(target, format)] || ( cache[taskName(target, format)] = taskFn(target, format, ...args))); - fn.displayName = `${taskFn.name || ``}:${taskName(target, format, ...args)}:task`; - return fn; + fn.displayName = `${taskFn.name || ``}:${taskName(target, format, ...args)}:task`; + return fn; })); -module.exports = memoizeTask; -module.exports.createTask = createTask; -module.exports.memoizeTask = memoizeTask; +export default memoizeTask; diff --git a/js/gulp/package-task.js b/js/gulp/package-task.js index 994ebe0ef8ad5..087a51479e9d6 100644 --- a/js/gulp/package-task.js +++ b/js/gulp/package-task.js @@ -15,25 +15,15 @@ // specific language governing permissions and limitations // under the License. -const { - metadataFiles, packageJSONFields, - mainExport, npmPkgName, npmOrgName, - targetDir, packageName, observableFromStreams -} = require('./util'); +import { metadataFiles, packageJSONFields, mainExport, npmPkgName, npmOrgName, targetDir, packageName, observableFromStreams } from "./util.js"; -const gulp = require('gulp'); -const { memoizeTask } = require('./memoize-task'); -const { - ReplaySubject, - EMPTY: ObservableEmpty, - forkJoin: ObservableForkJoin, -} = require('rxjs'); -const { - share -} = require('rxjs/operators'); -const gulpJsonTransform = require('gulp-json-transform'); +import gulp from "gulp"; +import { memoizeTask } from "./memoize-task.js"; +import { ReplaySubject, EMPTY as ObservableEmpty, forkJoin as ObservableForkJoin } from "rxjs"; +import { share } from "rxjs/operators"; +import gulpJsonTransform from "gulp-json-transform"; -const packageTask = ((cache) => memoizeTask(cache, function bundle(target, format) { +export const packageTask = ((cache) => memoizeTask(cache, function bundle(target, format) { if (target === `src`) return ObservableEmpty(); const out = targetDir(target, format); const jsonTransform = gulpJsonTransform(target === npmPkgName ? createMainPackageJson(target, format) : @@ -41,16 +31,12 @@ const packageTask = ((cache) => memoizeTask(cache, function bundle(target, forma : createScopedPackageJSON(target, format), 2); return ObservableForkJoin([ - observableFromStreams(gulp.src(metadataFiles), gulp.dest(out)), // copy metadata files - observableFromStreams(gulp.src(`package.json`), jsonTransform, gulp.dest(out)) // write packageJSONs + observableFromStreams(gulp.src(metadataFiles), gulp.dest(out)), // copy metadata files + observableFromStreams(gulp.src(`package.json`), jsonTransform, gulp.dest(out)) // write packageJSONs ]).pipe(share({ connector: () => new ReplaySubject(), resetOnError: false, resetOnComplete: false, resetOnRefCountZero: false })); }))({}); -module.exports = packageTask; -module.exports.packageTask = packageTask; - -// FIXME: set this to false when we have no side effects -const sideEffects = true; +export default packageTask; const createMainPackageJson = (target, format) => (orig) => ({ ...createTypeScriptPackageJson(target, format)(orig), @@ -64,17 +50,23 @@ const createMainPackageJson = (target, format) => (orig) => ({ [`./${mainExport}.node.mjs`]: `./${mainExport}.dom.mjs` }, exports: { - node: { - import: `./${mainExport}.node.mjs`, - require: `./${mainExport}.node.js`, + ".": { + node: { + import: `./${mainExport}.node.mjs`, + require: `./${mainExport}.node.js`, + }, + import: `./${mainExport}.dom.mjs`, + require: `./${mainExport}.dom.js`, }, - import: `./${mainExport}.dom.mjs`, - require: `./${mainExport}.dom.js`, + "./*": { + import: `./*.mjs`, + require: `./*.js` + } }, types: `${mainExport}.node.d.ts`, unpkg: `${mainExport}.es2015.min.js`, jsdelivr: `${mainExport}.es2015.min.js`, - sideEffects: sideEffects, + sideEffects: false, esm: { mode: `all`, sourceMap: true } }); @@ -86,7 +78,7 @@ const createTypeScriptPackageJson = (target, format) => (orig) => ({ types: `${mainExport}.node.ts`, browser: `${mainExport}.dom.ts`, type: "module", - sideEffects: sideEffects, + sideEffects: false, esm: { mode: `auto`, sourceMap: true }, dependencies: { '@types/flatbuffers': '*', @@ -115,7 +107,7 @@ const createScopedPackageJSON = (target, format) => (({ name, ...orig }) => // set "module" if building scoped ESM target module: format === 'esm' ? `${mainExport}.node.js` : undefined, // set "sideEffects" to false as a hint to Webpack that it's safe to tree-shake the ESM target - sideEffects: format === 'esm' ? sideEffects : undefined, + sideEffects: format === 'esm' ? false : undefined, // include "esm" settings for https://www.npmjs.com/package/esm if building scoped ESM target esm: format === `esm` ? { mode: `auto`, sourceMap: true } : undefined, // set "types" (for TypeScript/VSCode) diff --git a/js/gulp/test-task.js b/js/gulp/test-task.js index 2012f74290743..5e7f603c870af 100644 --- a/js/gulp/test-task.js +++ b/js/gulp/test-task.js @@ -15,22 +15,40 @@ // specific language governing permissions and limitations // under the License. -const del = require('del'); -const path = require('path'); -const mkdirp = require('mkdirp'); -const cpy = require('cpy'); -const { argv } = require('./argv'); -const { promisify } = require('util'); -const glob = promisify(require('glob')); -const child_process = require(`child_process`); -const { memoizeTask } = require('./memoize-task'); -const readFile = promisify(require('fs').readFile); -const asyncDone = promisify(require('async-done')); -const exec = promisify(require('child_process').exec); -const parseXML = promisify(require('xml2js').parseString); -const { targetAndModuleCombinations, npmPkgName } = require('./util'); +import del from "del"; +import path from "path"; +import mkdirp from "mkdirp"; +import { argv } from "./argv.js"; +import { promisify } from "util"; +import globSync from "glob"; +const glob = promisify(globSync); +import child_process from "child_process"; +import { memoizeTask } from "./memoize-task.js"; +import fs from "fs"; +const readFile = promisify(fs.readFile); +import asyncDoneSync from "async-done"; +const asyncDone = promisify(asyncDoneSync); +const exec = promisify(child_process.exec); +import xml2js from "xml2js"; +const parseXML = promisify(xml2js.parseString); +import { targetAndModuleCombinations, npmPkgName } from "./util.js"; +import { createRequire } from 'module'; + +const require = createRequire(import.meta.url); const jestArgv = [`--reporters=jest-silent-reporter`]; +const testFiles = [ + `test/unit/`, + // `test/unit/bit-tests.ts`, + // `test/unit/int-tests.ts`, + // `test/unit/math-tests.ts`, + // `test/unit/table-tests.ts`, + // `test/unit/generated-data-tests.ts`, + // `test/unit/builders/`, + // `test/unit/recordbatch/`, + // `test/unit/table/`, + // `test/unit/ipc/`, +]; if (argv.verbose) { jestArgv.push(`--verbose`); @@ -50,7 +68,7 @@ const testOptions = { }, }; -const testTask = ((cache, execArgv, testOptions) => memoizeTask(cache, function test(target, format) { +export const testTask = ((cache, execArgv, testOptions) => memoizeTask(cache, function test(target, format) { const opts = { ...testOptions }; const args = [...execArgv]; if (format === 'esm' || target === 'ts' || target === 'src' || target === npmPkgName) { @@ -60,29 +78,24 @@ const testTask = ((cache, execArgv, testOptions) => memoizeTask(cache, function args.push(`-c`, `jestconfigs/jest.coverage.config.js`); } else { const cfgname = [target, format].filter(Boolean).join('.'); - args.push(`-c`, `jestconfigs/jest.${cfgname}.config.js`, `test/unit/`); + args.push(`-c`, `jestconfigs/jest.${cfgname}.config.js`, ...testFiles); } opts.env = { ...opts.env, TEST_TARGET: target, TEST_MODULE: format, - TEST_DOM_STREAMS: (target ==='src' || format === 'umd').toString(), - TEST_NODE_STREAMS: (target ==='src' || format !== 'umd').toString(), + TEST_DOM_STREAMS: (target === 'src' || format === 'umd').toString(), + TEST_NODE_STREAMS: (target === 'src' || format !== 'umd').toString(), TEST_TS_SOURCE: !!argv.coverage || (target === 'src') || (opts.env.TEST_TS_SOURCE === 'true') }; return asyncDone(() => child_process.spawn(`node`, args, opts)); }))({}, [jest, ...jestArgv], testOptions); -module.exports = testTask; -module.exports.testTask = testTask; -module.exports.cleanTestData = cleanTestData; -module.exports.createTestData = createTestData; - // Pull C++ and Java paths from environment vars first, otherwise sane defaults const ARROW_HOME = process.env.ARROW_HOME || path.resolve('../'); const ARROW_JAVA_DIR = process.env.ARROW_JAVA_DIR || path.join(ARROW_HOME, 'java'); const CPP_EXE_PATH = process.env.ARROW_CPP_EXE_PATH || path.join(ARROW_HOME, 'cpp/build/debug'); -const ARROW_INTEGRATION_DIR = process.env.ARROW_INTEGRATION_DIR || path.join(ARROW_HOME, 'integration'); +const ARROW_ARCHERY_DIR = process.env.ARROW_ARCHERY_DIR || path.join(ARROW_HOME, 'dev/archery'); const CPP_JSON_TO_ARROW = path.join(CPP_EXE_PATH, 'arrow-json-integration-test'); const CPP_FILE_TO_STREAM = path.join(CPP_EXE_PATH, 'arrow-file-to-stream'); @@ -92,7 +105,7 @@ const cppFilesDir = path.join(testFilesDir, 'cpp'); const javaFilesDir = path.join(testFilesDir, 'java'); const jsonFilesDir = path.join(testFilesDir, 'json'); -async function cleanTestData() { +export async function cleanTestData() { return await del([ `${cppFilesDir}/**`, `${javaFilesDir}/**`, @@ -103,11 +116,15 @@ async function cleanTestData() { async function createTestJSON() { await mkdirp(jsonFilesDir); - await cpy(`cp ${ARROW_INTEGRATION_DIR}/data/*.json`, jsonFilesDir); - await exec(`python3 ${ARROW_INTEGRATION_DIR}/integration_test.py --write_generated_json ${jsonFilesDir}`); + await exec(`python3 -B -c '\ +import sys\n\ +sys.path.append("${ARROW_ARCHERY_DIR}")\n\ +sys.argv.append("--write_generated_json=${jsonFilesDir}")\n\ +from archery.cli import integration\n\ +integration()'`); } -async function createTestData() { +export async function createTestData() { let JAVA_TOOLS_JAR = process.env.ARROW_JAVA_INTEGRATION_JAR; if (!JAVA_TOOLS_JAR) { @@ -127,7 +144,7 @@ async function createTestData() { const errors = []; const names = await glob(path.join(jsonFilesDir, '*.json')); - for (let jsonPath of names) { + for (const jsonPath of names) { const name = path.parse(path.basename(jsonPath)).name; const arrowCppFilePath = path.join(cppFilesDir, 'file', `${name}.arrow`); const arrowJavaFilePath = path.join(javaFilesDir, 'file', `${name}.arrow`); @@ -150,9 +167,7 @@ async function createTestData() { async function generateCPPFile(jsonPath, filePath) { await del(filePath); return await exec( - `${CPP_JSON_TO_ARROW} ${ - `--integration --mode=JSON_TO_ARROW`} ${ - `--json=${jsonPath} --arrow=${filePath}`}`, + `${CPP_JSON_TO_ARROW} ${`--integration --mode=JSON_TO_ARROW`} ${`--json=${jsonPath} --arrow=${filePath}`}`, { maxBuffer: Math.pow(2, 53) - 1 } ); } @@ -168,9 +183,7 @@ async function createTestData() { async function generateJavaFile(jsonPath, filePath) { await del(filePath); return await exec( - `java -cp ${JAVA_TOOLS_JAR} ${ - `org.apache.arrow.tools.Integration -c JSON_TO_ARROW`} ${ - `-j ${path.resolve(jsonPath)} -a ${filePath}`}`, + `java -cp ${JAVA_TOOLS_JAR} ${`org.apache.arrow.tools.Integration -c JSON_TO_ARROW`} ${`-j ${path.resolve(jsonPath)} -a ${filePath}`}`, { maxBuffer: Math.pow(2, 53) - 1 } ); } @@ -178,8 +191,7 @@ async function createTestData() { async function generateJavaStream(filePath, streamPath) { await del(streamPath); return await exec( - `java -cp ${JAVA_TOOLS_JAR} ${ - `org.apache.arrow.tools.FileToStream`} ${filePath} ${streamPath}`, + `java -cp ${JAVA_TOOLS_JAR} ${`org.apache.arrow.tools.FileToStream`} ${filePath} ${streamPath}`, { maxBuffer: Math.pow(2, 53) - 1 } ); } diff --git a/js/gulp/typescript-task.js b/js/gulp/typescript-task.js index cc1d277453a82..ceb0e9930f8b3 100644 --- a/js/gulp/typescript-task.js +++ b/js/gulp/typescript-task.js @@ -15,30 +15,18 @@ // specific language governing permissions and limitations // under the License. -const { - targetDir, - tsconfigName, - observableFromStreams, - shouldRunInChildProcess, - spawnGulpCommandInChildProcess, -} = require('./util'); +import { targetDir, tsconfigName, observableFromStreams, shouldRunInChildProcess, spawnGulpCommandInChildProcess } from "./util.js"; -const gulp = require('gulp'); -const path = require('path'); -const ts = require(`gulp-typescript`); -const sourcemaps = require('gulp-sourcemaps'); -const { memoizeTask } = require('./memoize-task'); -const { - ReplaySubject, - forkJoin: ObservableForkJoin, -} = require('rxjs'); -const { - mergeWith, - takeLast, - share -} = require('rxjs/operators'); +import gulp from "gulp"; +import path from "path"; +import ts from "gulp-typescript"; +import tsc from "typescript"; +import sourcemaps from "gulp-sourcemaps"; +import { memoizeTask } from "./memoize-task.js"; +import { ReplaySubject, forkJoin as ObservableForkJoin } from "rxjs"; +import { mergeWith, takeLast, share } from "rxjs/operators"; -const typescriptTask = ((cache) => memoizeTask(cache, function typescript(target, format) { +export const typescriptTask = ((cache) => memoizeTask(cache, function typescript(target, format) { if (shouldRunInChildProcess(target, format)) { return spawnGulpCommandInChildProcess('compile', target, format); } @@ -51,28 +39,26 @@ const typescriptTask = ((cache) => memoizeTask(cache, function typescript(target .pipe(share({ connector: () => new ReplaySubject(), resetOnError: false, resetOnComplete: false, resetOnRefCountZero: false })) }))({}); -function compileBinFiles(target, format) { +export default typescriptTask; + +export function compileBinFiles(target, format) { const out = targetDir(target, format); const tsconfigPath = path.join(`tsconfig`, `tsconfig.${tsconfigName('bin', 'cjs')}.json`); return compileTypescript(path.join(out, 'bin'), tsconfigPath, { target }); } function compileTypescript(out, tsconfigPath, tsconfigOverrides) { - const tsProject = ts.createProject(tsconfigPath, { typescript: require(`typescript`), ...tsconfigOverrides}); + const tsProject = ts.createProject(tsconfigPath, { typescript: tsc, ...tsconfigOverrides }); const { stream: { js, dts } } = observableFromStreams( - tsProject.src(), sourcemaps.init(), - tsProject(ts.reporter.defaultReporter()) + tsProject.src(), sourcemaps.init(), + tsProject(ts.reporter.defaultReporter()) ); const writeSources = observableFromStreams(tsProject.src(), gulp.dest(path.join(out, 'src'))); const writeDTypes = observableFromStreams(dts, sourcemaps.write('./', { includeContent: false, sourceRoot: 'src' }), gulp.dest(out)); - const mapFile = tsProject.options.module === 5 ? esmMapFile : cjsMapFile; + const mapFile = tsProject.options.module === tsc.ModuleKind.ES2015 ? esmMapFile : cjsMapFile; const writeJS = observableFromStreams(js, sourcemaps.write('./', { mapFile, includeContent: false, sourceRoot: 'src' }), gulp.dest(out)); return ObservableForkJoin([writeSources, writeDTypes, writeJS]); } const cjsMapFile = (mapFilePath) => mapFilePath; const esmMapFile = (mapFilePath) => mapFilePath.replace('.js.map', '.mjs.map'); - -module.exports = typescriptTask; -module.exports.typescriptTask = typescriptTask; -module.exports.compileBinFiles = compileBinFiles; diff --git a/js/gulp/util.js b/js/gulp/util.js index d8cde29e8fa78..af70cd949179b 100644 --- a/js/gulp/util.js +++ b/js/gulp/util.js @@ -15,25 +15,21 @@ // specific language governing permissions and limitations // under the License. -const fs = require('fs'); -const path = require(`path`); -const pump = require(`stream`).pipeline; -const child_process = require(`child_process`); -const { targets, modules } = require('./argv'); -const { - ReplaySubject, - empty: ObservableEmpty, - throwError: ObservableThrow, - fromEvent: ObservableFromEvent -} = require('rxjs'); -const { - share, - flatMap, - takeUntil, - defaultIfEmpty, - mergeWith, -} = require('rxjs/operators'); -const asyncDone = require('util').promisify(require('async-done')); +import fs from "fs"; +import path from "path"; +import child_process from "child_process"; +import stream from "stream"; +import util from "util"; +import asyncDoneSync from "async-done"; +const pump = stream.pipeline; +import { targets, modules } from "./argv.js"; +import { ReplaySubject, empty as ObservableEmpty, throwError as ObservableThrow, fromEvent as ObservableFromEvent } from "rxjs"; +import { share, flatMap, takeUntil, defaultIfEmpty, mergeWith } from "rxjs/operators"; +const asyncDone = util.promisify(asyncDoneSync); +import { createRequire } from "module"; +import esmRequire from "./esm-require.cjs" + +const require = createRequire(import.meta.url); const mainExport = `Arrow`; const npmPkgName = `apache-arrow`; @@ -47,15 +43,15 @@ const tasksToSkipPerTargetOrFormat = { cls: { test: true, package: true } }; const packageJSONFields = [ - `version`, `license`, `description`, - `author`, `homepage`, `repository`, - `bugs`, `keywords`, `dependencies`, - `bin` + `version`, `license`, `description`, + `author`, `homepage`, `repository`, + `bugs`, `keywords`, `dependencies`, + `bin` ]; const metadataFiles = [`LICENSE.txt`, `NOTICE.txt`, `README.md`].map((filename) => { - let prefixes = [`./`, `../`]; - let p = prefixes.find((prefix) => { + const prefixes = [`./`, `../`]; + const p = prefixes.find((prefix) => { try { fs.statSync(path.resolve(path.join(prefix, filename))); } catch (e) { return false; } @@ -70,12 +66,12 @@ const metadataFiles = [`LICENSE.txt`, `NOTICE.txt`, `README.md`].map((filename) // see: https://github.com/google/closure-compiler/blob/c1372b799d94582eaf4b507a4a22558ff26c403c/src/com/google/javascript/jscomp/CompilerOptions.java#L2988 const gCCLanguageNames = { es5: `ECMASCRIPT5`, - es2015: `ECMASCRIPT_2015`, - es2016: `ECMASCRIPT_2016`, - es2017: `ECMASCRIPT_2017`, - es2018: `ECMASCRIPT_2018`, - es2019: `ECMASCRIPT_2019`, - esnext: `ECMASCRIPT_NEXT` + es2015: `ECMASCRIPT_2015`, + es2016: `ECMASCRIPT_2016`, + es2017: `ECMASCRIPT_2017`, + es2018: `ECMASCRIPT_2018`, + es2019: `ECMASCRIPT_2019`, + esnext: `ECMASCRIPT_NEXT` }; function taskName(target, format) { @@ -105,16 +101,22 @@ function shouldRunInChildProcess(target, format) { const gulp = path.join(path.parse(require.resolve(`gulp`)).dir, `bin/gulp.js`); function spawnGulpCommandInChildProcess(command, target, format) { - const args = [gulp, command, '-t', target, '-m', format, `--silent`]; - const opts = { - stdio: [`ignore`, `inherit`, `inherit`], - env: { ...process.env, NODE_NO_WARNINGS: `1` } - }; - return asyncDone(() => child_process.spawn(`node`, args, opts)) - .catch((e) => { throw `Error in "${command}:${taskName(target, format)}" task`; }); + const err = []; + return asyncDone(() => { + const child = child_process.spawn( + `node`, + [gulp, command, '-t', target, '-m', format, `-L`], + { + stdio: [`ignore`, `ignore`, `pipe`], + env: { ...process.env, NODE_NO_WARNINGS: `1` } + }); + child.stderr.on('data', (line) => err.push(line)); + return child; + }).catch(() => Promise.reject(err.length > 0 ? err.join('\n') + : `Error in "${command}:${taskName(target, format)}" task.`)); } -const logAndDie = (e) => { if (e) { process.exit(1) } }; +const logAndDie = (e) => { if (e) { console.error(e); process.exit(1); } }; function observableFromStreams(...streams) { if (streams.length <= 0) { return ObservableEmpty(); } const pumped = streams.length <= 1 ? streams[0] : pump(...streams, logAndDie); @@ -153,48 +155,29 @@ function* combinations(_targets, _modules) { function known(known, values) { return values.includes(`all`) ? known - : values.includes(`src`) ? [`src`] - : Object.keys( - values.reduce((map, arg) => (( - (known.includes(arg)) && - (map[arg.toLowerCase()] = true) - || true) && map - ), {}) - ).sort((a, b) => known.indexOf(a) - known.indexOf(b)); + : values.includes(`src`) ? [`src`] + : Object.keys( + values.reduce((map, arg) => (( + (known.includes(arg)) && + (map[arg.toLowerCase()] = true) + || true) && map + ), {}) + ).sort((a, b) => known.indexOf(a) - known.indexOf(b)); } } const publicModulePaths = (dir) => [ `${dir}/${mainExport}.dom.js`, `${dir}/util/int.js`, - `${dir}/compute/predicate.js`, ]; -const esmRequire = require(`esm`)(module, { - mode: `auto`, - cjs: { - /* A boolean for storing ES modules in require.cache. */ - cache: true, - /* A boolean for respecting require.extensions in ESM. */ - extensions: true, - /* A boolean for __esModule interoperability. */ - interop: true, - /* A boolean for importing named exports of CJS modules. */ - namedExports: true, - /* A boolean for following CJS path rules in ESM. */ - paths: true, - /* A boolean for __dirname, __filename, and require in ESM. */ - vars: true, - } -}); - -module.exports = { +export { mainExport, npmPkgName, npmOrgName, metadataFiles, packageJSONFields, knownTargets, knownModules, tasksToSkipPerTargetOrFormat, gCCLanguageNames, taskName, packageName, tsconfigName, targetDir, combinations, observableFromStreams, publicModulePaths, esmRequire, shouldRunInChildProcess, spawnGulpCommandInChildProcess, - - targetAndModuleCombinations: [...combinations(targets, modules)] }; + +export const targetAndModuleCombinations = [...combinations(targets, modules)]; diff --git a/js/gulpfile.js b/js/gulpfile.js index a257a2deff0a7..bf3de1acbf1fb 100644 --- a/js/gulpfile.js +++ b/js/gulpfile.js @@ -15,29 +15,23 @@ // specific language governing permissions and limitations // under the License. -const del = require('del'); -const gulp = require('gulp'); -const { targets } = require('./gulp/argv'); -const { - from: ObservableFrom, - bindNodeCallback: ObservableBindNodeCallback -} = require('rxjs'); -const { flatMap } = require('rxjs/operators'); -const cleanTask = require('./gulp/clean-task'); -const compileTask = require('./gulp/compile-task'); -const packageTask = require('./gulp/package-task'); -const { testTask, createTestData, cleanTestData } = require('./gulp/test-task'); -const { - taskName, combinations, - targetDir, knownTargets, - npmPkgName, tasksToSkipPerTargetOrFormat, - targetAndModuleCombinations -} = require('./gulp/util'); +import del from "del"; +import os from "os"; +import gulp from "gulp"; +import { targets } from "./gulp/argv.js"; +import { from as ObservableFrom, bindNodeCallback as ObservableBindNodeCallback } from "rxjs"; +import { mergeMap } from "rxjs/operators"; +import cleanTask from "./gulp/clean-task.js"; +import compileTask from "./gulp/compile-task.js"; +import packageTask from "./gulp/package-task.js"; +import { testTask, createTestData, cleanTestData } from "./gulp/test-task.js"; +import { esbuildTask, rollupTask, webpackTask, execBundleTask } from "./gulp/bundle-task.js"; +import { taskName, combinations, targetDir, knownTargets, npmPkgName, tasksToSkipPerTargetOrFormat, targetAndModuleCombinations } from "./gulp/util.js"; for (const [target, format] of combinations([`all`], [`all`])) { const task = taskName(target, format); gulp.task(`clean:${task}`, cleanTask(target, format)); - gulp.task(`test:${task}`, testTask(target, format)); + gulp.task(`test:${task}`, testTask(target, format)); gulp.task(`compile:${task}`, compileTask(target, format)); gulp.task(`package:${task}`, packageTask(target, format)); gulp.task(`build:${task}`, gulp.series( @@ -85,11 +79,20 @@ gulp.task(`clean`, gulp.parallel(getTasks(`clean`))); gulp.task(`build`, gulpConcurrent(getTasks(`build`))); gulp.task(`compile`, gulpConcurrent(getTasks(`compile`))); gulp.task(`package`, gulpConcurrent(getTasks(`package`))); -gulp.task(`default`, gulp.series(`clean`, `build`, `test`)); +gulp.task(`default`, gulp.series(`clean`, `build`, `test`)); -function gulpConcurrent(tasks, numCPUs = Math.max(1, require('os').cpus().length * 0.5) | 0) { +gulp.task(`bundle:esbuild`, esbuildTask()); +gulp.task(`bundle:rollup`, rollupTask()); +gulp.task(`bundle:webpack`, webpackTask()); +gulp.task(`bundle:webpack:analyze`, webpackTask({ analyze: true })); +gulp.task(`bundle:clean`, () => del(`test/bundle/**/*-bundle.js`)); +gulp.task(`bundle:exec`, execBundleTask()); + +gulp.task(`bundle`, gulp.series(`bundle:clean`, `bundle:esbuild`, `bundle:rollup`, `bundle:webpack`, `bundle:exec`)); + +function gulpConcurrent(tasks, numCPUs = Math.max(1, os.cpus().length * 0.5) | 0) { return () => ObservableFrom(tasks.map((task) => gulp.series(task))) - .pipe(flatMap((task) => ObservableBindNodeCallback(task)(), numCPUs || 1)); + .pipe(mergeMap((task) => ObservableBindNodeCallback(task)(), numCPUs || 1)); } function getTasks(name) { diff --git a/js/index.js b/js/index.cjs similarity index 100% rename from js/index.js rename to js/index.cjs diff --git a/js/index.mjs b/js/index.mjs index 304353712424e..163f250e61641 100644 --- a/js/index.mjs +++ b/js/index.mjs @@ -15,4 +15,4 @@ // specific language governing permissions and limitations // under the License. -export * from './targets/apache-arrow'; \ No newline at end of file +export * from './targets/apache-arrow/Arrow.mjs'; diff --git a/js/jest.config.js b/js/jest.config.js index fb3f97c44097f..d30090a00ee45 100644 --- a/js/jest.config.js +++ b/js/jest.config.js @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -module.exports = { +export default { verbose: false, testEnvironment: "node", globals: { @@ -31,7 +31,7 @@ module.exports = { moduleFileExtensions: ["mjs", "js", "ts"], coverageReporters: ["lcov", "json"], coveragePathIgnorePatterns: [ - "fb\\/(File|Message|Schema|Tensor)\\.(js|ts)$", + "fb\\/.*\\.(js|ts)$", "test\\/.*\\.(ts|js)$", "/node_modules/", ], @@ -48,6 +48,6 @@ module.exports = { moduleNameMapper: { "^apache-arrow$": "/src/Arrow.node", "^apache-arrow(.*)": "/src$1", - flatbuffers: "flatbuffers/js/flatbuffers.mjs", + "^(\\.{1,2}/.*)\\.js$": "$1", }, }; diff --git a/js/jestconfigs/jest.apache-arrow.config.js b/js/jestconfigs/jest.apache-arrow.config.js index 103dc5a92c274..e945d944e2b81 100644 --- a/js/jestconfigs/jest.apache-arrow.config.js +++ b/js/jestconfigs/jest.apache-arrow.config.js @@ -15,18 +15,21 @@ // specific language governing permissions and limitations // under the License. -module.exports = { - ...require("../jest.config"), - rootDir: "../", - preset: "ts-jest", - moduleFileExtensions: ["js", "ts"], - globals: { - "ts-jest": { - diagnostics: false, - tsconfig: "/test/tsconfig/tsconfig.apache-arrow.json", +import config from "../jest.config.js"; + +export default { + ...config, + rootDir: "../", + preset: "ts-jest", + moduleFileExtensions: ["js", "ts"], + globals: { + "ts-jest": { + diagnostics: false, + tsconfig: "/test/tsconfig/tsconfig.apache-arrow.json", + }, + }, + moduleNameMapper: { + "^apache-arrow(.*)": "/targets/apache-arrow$1", + "^(\\.{1,2}/.*)\\.js$": "$1", }, - }, - moduleNameMapper: { - "^apache-arrow(.*)": "/targets/apache-arrow$1", - }, }; diff --git a/js/jestconfigs/jest.coverage.config.js b/js/jestconfigs/jest.coverage.config.js index 3b0b6a1c6c427..3e7444b736add 100644 --- a/js/jestconfigs/jest.coverage.config.js +++ b/js/jestconfigs/jest.coverage.config.js @@ -15,16 +15,18 @@ // specific language governing permissions and limitations // under the License. -module.exports = { - ...require("../jest.config"), - rootDir: "../", - collectCoverage: true, - reporters: undefined, - globals: { - "ts-jest": { - diagnostics: false, - tsconfig: "/test/tsconfig/tsconfig.coverage.json", - useESM: true, +import config from "../jest.config.js"; + +export default { + ...config, + rootDir: "../", + collectCoverage: true, + reporters: undefined, + globals: { + "ts-jest": { + diagnostics: false, + tsconfig: "/test/tsconfig/tsconfig.coverage.json", + useESM: true, + }, }, - }, }; diff --git a/js/jestconfigs/jest.es2015.cjs.config.js b/js/jestconfigs/jest.es2015.cjs.config.js index 1d56767612954..260aa39b9ae73 100644 --- a/js/jestconfigs/jest.es2015.cjs.config.js +++ b/js/jestconfigs/jest.es2015.cjs.config.js @@ -15,18 +15,21 @@ // specific language governing permissions and limitations // under the License. -module.exports = { - ...require("../jest.config"), - rootDir: "../", - preset: "ts-jest", - moduleFileExtensions: ["js", "ts"], - globals: { - "ts-jest": { - diagnostics: false, - tsconfig: "/test/tsconfig/tsconfig.es2015.cjs.json", +import config from "../jest.config.js"; + +export default { + ...config, + rootDir: "../", + preset: "ts-jest", + moduleFileExtensions: ["js", "ts"], + globals: { + "ts-jest": { + diagnostics: false, + tsconfig: "/test/tsconfig/tsconfig.es2015.cjs.json", + }, + }, + moduleNameMapper: { + "^apache-arrow(.*)": "/targets/es2015/cjs$1", + "^(\\.{1,2}/.*)\\.js$": "$1", }, - }, - moduleNameMapper: { - "^apache-arrow(.*)": "/targets/es2015/cjs$1", - }, }; diff --git a/js/jestconfigs/jest.es2015.esm.config.js b/js/jestconfigs/jest.es2015.esm.config.js index cf564fb234aa6..b5978f6a1d3f6 100644 --- a/js/jestconfigs/jest.es2015.esm.config.js +++ b/js/jestconfigs/jest.es2015.esm.config.js @@ -15,18 +15,21 @@ // specific language governing permissions and limitations // under the License. -module.exports = { - ...require("../jest.config"), - rootDir: "../", - globals: { - "ts-jest": { - diagnostics: false, - tsconfig: "/test/tsconfig/tsconfig.es2015.esm.json", - useESM: true, +import config from "../jest.config.js"; + +export default { + ...config, + rootDir: "../", + globals: { + "ts-jest": { + diagnostics: false, + tsconfig: "/test/tsconfig/tsconfig.es2015.esm.json", + useESM: true, + }, + }, + moduleNameMapper: { + "^apache-arrow(.*)": "/targets/es2015/esm$1", + tslib: "tslib/tslib.es6.js", + "^(\\.{1,2}/.*)\\.js$": "$1", }, - }, - moduleNameMapper: { - "^apache-arrow(.*)": "/targets/es2015/esm$1", - tslib: "tslib/tslib.es6.js" - }, }; diff --git a/js/jestconfigs/jest.es2015.umd.config.js b/js/jestconfigs/jest.es2015.umd.config.js index 21f27872d9120..5c67846e0efcb 100644 --- a/js/jestconfigs/jest.es2015.umd.config.js +++ b/js/jestconfigs/jest.es2015.umd.config.js @@ -15,18 +15,21 @@ // specific language governing permissions and limitations // under the License. -module.exports = { - ...require("../jest.config"), - rootDir: "../", - preset: "ts-jest", - moduleFileExtensions: ["js", "ts"], - globals: { - "ts-jest": { - diagnostics: false, - tsconfig: "/test/tsconfig/tsconfig.es2015.umd.json", +import config from "../jest.config.js"; + +export default { + ...config, + rootDir: "../", + preset: "ts-jest", + moduleFileExtensions: ["js", "ts"], + globals: { + "ts-jest": { + diagnostics: false, + tsconfig: "/test/tsconfig/tsconfig.es2015.umd.json", + }, + }, + moduleNameMapper: { + "^apache-arrow": "/targets/es2015/umd/Arrow.js", + "^(\\.{1,2}/.*)\\.js$": "$1", }, - }, - moduleNameMapper: { - "^apache-arrow(.*)": "/targets/es2015/umd/Arrow.js", - }, }; diff --git a/js/jestconfigs/jest.es5.cjs.config.js b/js/jestconfigs/jest.es5.cjs.config.js index ae3e9bb423021..61ff88f3d9a94 100644 --- a/js/jestconfigs/jest.es5.cjs.config.js +++ b/js/jestconfigs/jest.es5.cjs.config.js @@ -15,18 +15,21 @@ // specific language governing permissions and limitations // under the License. -module.exports = { - ...require("../jest.config"), - rootDir: "../", - preset: "ts-jest", - moduleFileExtensions: ["js", "ts"], - globals: { - "ts-jest": { - diagnostics: false, - tsconfig: "/test/tsconfig/tsconfig.es5.cjs.json", +import config from "../jest.config.js"; + +export default { + ...config, + rootDir: "../", + preset: "ts-jest", + moduleFileExtensions: ["js", "ts"], + globals: { + "ts-jest": { + diagnostics: false, + tsconfig: "/test/tsconfig/tsconfig.es5.cjs.json", + }, + }, + moduleNameMapper: { + "^apache-arrow(.*)": "/targets/es5/cjs$1", + "^(\\.{1,2}/.*)\\.js$": "$1", }, - }, - moduleNameMapper: { - "^apache-arrow(.*)": "/targets/es5/cjs$1", - }, }; diff --git a/js/jestconfigs/jest.es5.esm.config.js b/js/jestconfigs/jest.es5.esm.config.js index 0a0a21b761c06..aaa73b212714e 100644 --- a/js/jestconfigs/jest.es5.esm.config.js +++ b/js/jestconfigs/jest.es5.esm.config.js @@ -15,18 +15,21 @@ // specific language governing permissions and limitations // under the License. -module.exports = { - ...require("../jest.config"), - rootDir: "../", - globals: { - "ts-jest": { - diagnostics: false, - tsconfig: "/test/tsconfig/tsconfig.es5.esm.json", - useESM: true, +import config from "../jest.config.js"; + +export default { + ...config, + rootDir: "../", + globals: { + "ts-jest": { + diagnostics: false, + tsconfig: "/test/tsconfig/tsconfig.es5.esm.json", + useESM: true, + }, + }, + moduleNameMapper: { + "^apache-arrow(.*)": "/targets/es5/esm$1", + tslib: "tslib/tslib.es6.js", + "^(\\.{1,2}/.*)\\.js$": "$1", }, - }, - moduleNameMapper: { - "^apache-arrow(.*)": "/targets/es5/esm$1", - tslib: "tslib/tslib.es6.js" - }, }; diff --git a/js/jestconfigs/jest.es5.umd.config.js b/js/jestconfigs/jest.es5.umd.config.js index f52af07bc8a81..280575eac1c2b 100644 --- a/js/jestconfigs/jest.es5.umd.config.js +++ b/js/jestconfigs/jest.es5.umd.config.js @@ -15,18 +15,21 @@ // specific language governing permissions and limitations // under the License. -module.exports = { - ...require("../jest.config"), - rootDir: "../", - preset: "ts-jest", - moduleFileExtensions: ["js", "ts"], - globals: { - "ts-jest": { - diagnostics: false, - tsconfig: "/test/tsconfig/tsconfig.es5.umd.json", +import config from "../jest.config.js"; + +export default { + ...config, + rootDir: "../", + preset: "ts-jest", + moduleFileExtensions: ["js", "ts"], + globals: { + "ts-jest": { + diagnostics: false, + tsconfig: "/test/tsconfig/tsconfig.es5.umd.json", + }, + }, + moduleNameMapper: { + "^apache-arrow": "/targets/es5/umd/Arrow.js", + "^(\\.{1,2}/.*)\\.js$": "$1", }, - }, - moduleNameMapper: { - "^apache-arrow(.*)": "/targets/es5/umd/Arrow.js", - }, }; diff --git a/js/jestconfigs/jest.esnext.cjs.config.js b/js/jestconfigs/jest.esnext.cjs.config.js index 8be999e3d38cb..febfe6de9634e 100644 --- a/js/jestconfigs/jest.esnext.cjs.config.js +++ b/js/jestconfigs/jest.esnext.cjs.config.js @@ -15,18 +15,21 @@ // specific language governing permissions and limitations // under the License. -module.exports = { - ...require("../jest.config"), - rootDir: "../", - preset: "ts-jest", - moduleFileExtensions: ["js", "ts"], - globals: { - "ts-jest": { - diagnostics: false, - tsconfig: "/test/tsconfig/tsconfig.esnext.cjs.json", +import config from "../jest.config.js"; + +export default { + ...config, + rootDir: "../", + preset: "ts-jest", + moduleFileExtensions: ["js", "ts"], + globals: { + "ts-jest": { + diagnostics: false, + tsconfig: "/test/tsconfig/tsconfig.esnext.cjs.json", + }, + }, + moduleNameMapper: { + "^apache-arrow(.*)": "/targets/esnext/cjs$1", + "^(\\.{1,2}/.*)\\.js$": "$1", }, - }, - moduleNameMapper: { - "^apache-arrow(.*)": "/targets/esnext/cjs$1", - }, }; diff --git a/js/jestconfigs/jest.esnext.esm.config.js b/js/jestconfigs/jest.esnext.esm.config.js index aca4c52080593..6513637881bf3 100644 --- a/js/jestconfigs/jest.esnext.esm.config.js +++ b/js/jestconfigs/jest.esnext.esm.config.js @@ -15,18 +15,21 @@ // specific language governing permissions and limitations // under the License. -module.exports = { - ...require("../jest.config"), - rootDir: "../", - globals: { - "ts-jest": { - diagnostics: false, - tsconfig: "/test/tsconfig/tsconfig.esnext.esm.json", - useESM: true, +import config from "../jest.config.js"; + +export default { + ...config, + rootDir: "../", + globals: { + "ts-jest": { + diagnostics: false, + tsconfig: "/test/tsconfig/tsconfig.esnext.esm.json", + useESM: true, + }, + }, + moduleNameMapper: { + "^apache-arrow(.*)": "/targets/esnext/esm$1", + tslib: "tslib/tslib.es6.js", + "^(\\.{1,2}/.*)\\.js$": "$1", }, - }, - moduleNameMapper: { - "^apache-arrow(.*)": "/targets/esnext/esm$1", - tslib: "tslib/tslib.es6.js" - }, }; diff --git a/js/jestconfigs/jest.esnext.umd.config.js b/js/jestconfigs/jest.esnext.umd.config.js index 5013d45e03a53..a0f4f13335d61 100644 --- a/js/jestconfigs/jest.esnext.umd.config.js +++ b/js/jestconfigs/jest.esnext.umd.config.js @@ -15,18 +15,21 @@ // specific language governing permissions and limitations // under the License. -module.exports = { - ...require("../jest.config"), - rootDir: "../", - preset: "ts-jest", - moduleFileExtensions: ["js", "ts"], - globals: { - "ts-jest": { - diagnostics: false, - tsconfig: "/test/tsconfig/tsconfig.esnext.umd.json", +import config from "../jest.config.js"; + +export default { + ...config, + rootDir: "../", + preset: "ts-jest", + moduleFileExtensions: ["js", "ts"], + globals: { + "ts-jest": { + diagnostics: false, + tsconfig: "/test/tsconfig/tsconfig.esnext.umd.json", + }, + }, + moduleNameMapper: { + "^apache-arrow": "/targets/esnext/umd/Arrow.js", + "^(\\.{1,2}/.*)\\.js$": "$1", }, - }, - moduleNameMapper: { - "^apache-arrow(.*)": "/targets/esnext/umd/Arrow.js", - }, }; diff --git a/js/jestconfigs/jest.src.config.js b/js/jestconfigs/jest.src.config.js index 08ccad061ba70..18766713e38df 100644 --- a/js/jestconfigs/jest.src.config.js +++ b/js/jestconfigs/jest.src.config.js @@ -15,14 +15,16 @@ // specific language governing permissions and limitations // under the License. -module.exports = { - ...require("../jest.config"), - rootDir: "../", - globals: { - "ts-jest": { - diagnostics: false, - tsconfig: "/test/tsconfig/tsconfig.src.json", - useESM: true, +import config from "../jest.config.js"; + +export default { + ...config, + rootDir: "../", + globals: { + "ts-jest": { + diagnostics: false, + tsconfig: "/test/tsconfig/tsconfig.src.json", + useESM: true, + }, }, - }, }; diff --git a/js/jestconfigs/jest.ts.config.js b/js/jestconfigs/jest.ts.config.js index e56161b8b4c79..3d4eccd9ee76d 100644 --- a/js/jestconfigs/jest.ts.config.js +++ b/js/jestconfigs/jest.ts.config.js @@ -15,17 +15,21 @@ // specific language governing permissions and limitations // under the License. -module.exports = { - ...require("../jest.config"), - rootDir: "../", - globals: { - "ts-jest": { - diagnostics: false, - tsconfig: "/test/tsconfig/tsconfig.ts.json", - useESM: true, +import config from "../jest.config.js"; + +export default { + ...config, + rootDir: "../", + globals: { + "ts-jest": { + diagnostics: false, + tsconfig: "/test/tsconfig/tsconfig.ts.json", + useESM: true, + }, + }, + moduleNameMapper: { + "^apache-arrow$": "/targets/ts/Arrow.node", + "^apache-arrow(.*)": "/targets/ts$1", + "^(\\.{1,2}/.*)\\.js$": "$1", }, - }, - moduleNameMapper: { - "^apache-arrow(.*)": "/targets/ts$1" - }, }; diff --git a/js/package.json b/js/package.json index 918e6e9825d41..d1c5c409360b4 100644 --- a/js/package.json +++ b/js/package.json @@ -4,20 +4,22 @@ "bin": { "arrow2csv": "bin/arrow2csv.js" }, + "type": "module", "scripts": { "lerna": "lerna", "test": "cross-env NODE_NO_WARNINGS=1 gulp test", "build": "cross-env NODE_NO_WARNINGS=1 gulp build", "clean": "cross-env NODE_NO_WARNINGS=1 gulp clean", "debug": "cross-env NODE_NO_WARNINGS=1 gulp debug", - "perf": "ts-node-transpile-only ./perf/index.ts", + "perf": "node --loader ts-node/esm/transpile-only ./perf/index.ts", "test:integration": "node ./bin/integration.js --mode validate", "release": "./npm-release.sh", "clean:all": "yarn clean && yarn clean:testdata", "clean:testdata": "gulp clean:testdata", "create:testdata": "gulp create:testdata", "test:coverage": "gulp test -t src --coverage", - "doc": "del-cli ./doc && typedoc --options typedoc.js", + "test:bundle": "gulp bundle", + "doc": "del-cli ./doc && typedoc", "lint": "eslint src test --fix", "lint:ci": "eslint src test", "prepublishOnly": "echo \"Error: do 'yarn release' instead of 'npm publish'\" && exit 1" @@ -50,51 +52,70 @@ "jest.config.js" ], "dependencies": { - "@types/flatbuffers": "^1.10.0", - "@types/node": "^16.4.0", - "command-line-args": "5.1.3", + "@types/command-line-args": "5.2.0", + "@types/command-line-usage": "5.0.2", + "@types/node": "^17.0.8", + "@types/pad-left": "2.1.1", + "command-line-args": "5.2.0", "command-line-usage": "6.1.1", - "flatbuffers": "1.12.0", + "flatbuffers": "2.0.4", "json-bignum": "^0.0.3", "pad-left": "^2.1.0", - "tslib": "^2.3.0" + "tslib": "^2.3.1" }, "devDependencies": { - "@openpgp/web-stream-tools": "0.0.6", - "@types/glob": "7.1.4", - "@types/jest": "26.0.24", - "@types/randomatic": "3.1.2", - "@typescript-eslint/eslint-plugin": "4.28.4", - "@typescript-eslint/parser": "4.28.4", + "@openpgp/web-stream-tools": "0.0.8", + "@rollup/plugin-alias": "3.1.9", + "@rollup/plugin-node-resolve": "13.1.3", + "@rollup/stream": "2.0.0", + "@types/benchmark": "2.1.1", + "@types/glob": "7.2.0", + "@types/jest": "27.4.0", + "@types/randomatic": "3.1.3", + "@typescript-eslint/eslint-plugin": "5.9.1", + "@typescript-eslint/parser": "5.9.1", "async-done": "1.3.2", - "benny": "3.6.15", - "cpy": "8.1.2", + "benny": "3.7.1", "cross-env": "7.0.3", "del-cli": "4.0.1", - "eslint": "7.31.0", - "eslint-plugin-jest": "24.3.7", + "esbuild": "0.14.11", + "esbuild-plugin-alias": "0.2.1", + "eslint": "8.6.0", + "eslint-plugin-jest": "25.7.0", + "eslint-plugin-unicorn": "40.0.0", "esm": "https://github.com/jsg2021/esm/releases/download/v3.x.x-pr883/esm-3.x.x-pr883.tgz", - "glob": "7.1.7", - "google-closure-compiler": "20210601.0.0", + "glob": "7.2.0", + "google-closure-compiler": "20211006.0.0", "gulp": "4.0.2", + "gulp-esbuild": "0.10.0", "gulp-json-transform": "0.4.7", "gulp-rename": "2.0.0", + "gulp-replace": "1.1.3", "gulp-sourcemaps": "3.0.0", + "gulp-terser": "2.1.0", "gulp-typescript": "5.0.1", - "ix": "4.4.1", - "jest": "27.0.6", + "gulp-vinyl-size": "1.0.1", + "ix": "4.5.2", + "jest": "27.4.7", "jest-silent-reporter": "0.5.0", "lerna": "4.0.0", - "memfs": "3.2.2", + "memfs": "3.4.1", "mkdirp": "1.0.4", "multistream": "4.1.0", "randomatic": "3.1.1", - "rxjs": "7.2.0", - "ts-jest": "27.0.3", - "ts-node": "10.1.0", - "typedoc": "0.21.4", - "typescript": "4.0.2", - "web-streams-polyfill": "3.0.3", + "rollup": "2.64.0", + "rxjs": "7.5.2", + "ts-jest": "27.1.3", + "ts-node": "10.4.0", + "typedoc": "0.22.10", + "typescript": "4.5.4", + "vinyl-buffer": "1.0.1", + "vinyl-named": "1.1.0", + "vinyl-source-stream": "2.0.0", + "web-streams-polyfill": "3.2.0", + "webpack": "5.66.0", + "webpack-bundle-analyzer": "4.5.0", + "webpack-stream": "7.0.0", "xml2js": "0.4.23" }, "engines": { diff --git a/js/perf/config.ts b/js/perf/config.ts index 08ea9ecc1d525..d5a0707558440 100644 --- a/js/perf/config.ts +++ b/js/perf/config.ts @@ -15,23 +15,24 @@ // specific language governing permissions and limitations // under the License. -import * as Arrow from '../src/Arrow.dom'; +import { Arrow } from './index.js'; // from https://stackoverflow.com/a/19303725/214950 let seed = 1; function random() { - const x = Math.sin(seed++) * 10000; + const x = Math.sin(seed++) * 10_000; return x - Math.floor(x); } console.time('Prepare Data'); -const LENGTH = 100000; +const LENGTH = 100_000; const NUM_BATCHES = 10; +const cities = ['Charlottesville', 'New York', 'San Francisco', 'Seattle', 'Terre Haute', 'Washington, DC']; -const values = Arrow.Utf8Vector.from(['Charlottesville', 'New York', 'San Francisco', 'Seattle', 'Terre Haute', 'Washington, DC']); +const values = Arrow.vectorFromArray(cities, new Arrow.Utf8).memoize(); -const batches = Array.from({length: NUM_BATCHES}).map(() => { +const batches = Array.from({ length: NUM_BATCHES }).map(() => { const lat = Float32Array.from( { length: LENGTH }, () => ((random() - 0.5) * 2 * 90)); @@ -49,28 +50,55 @@ const batches = Array.from({length: NUM_BATCHES}).map(() => { const originType = new Arrow.Dictionary(values.type, new Arrow.Int8, 0, false); const destinationType = new Arrow.Dictionary(values.type, new Arrow.Int8, 0, false); - return Arrow.RecordBatch.new({ - 'lat': Arrow.Float32Vector.from(lat), - 'lng': Arrow.Float32Vector.from(lng), - 'origin': Arrow.Vector.new(Arrow.Data.Dictionary(originType, 0, origin.length, 0, null, origin, values)), - 'destination': Arrow.Vector.new(Arrow.Data.Dictionary(destinationType, 0, destination.length, 0, null, destination, values)), + return new Arrow.RecordBatch({ + 'lat': Arrow.makeData({ type: new Arrow.Float32, data: lat }), + 'lng': Arrow.makeData({ type: new Arrow.Float32, data: lng }), + 'origin': Arrow.makeData({ type: originType, length: origin.length, nullCount: 0, data: origin, dictionary: values }), + 'destination': Arrow.makeData({ type: destinationType, length: destination.length, nullCount: 0, data: destination, dictionary: values }), }); }); -const tracks = new Arrow.DataFrame(batches[0].schema, batches); +export const typedArrays = { + uint8Array: Uint8Array.from({ length: LENGTH }, () => random() * 255), + uint16Array: Uint16Array.from({ length: LENGTH }, () => random() * 255), + uint32Array: Uint32Array.from({ length: LENGTH }, () => random() * 255), + uint64Array: BigUint64Array.from({ length: LENGTH }, () => 42n), + + int8Array: Int8Array.from({ length: LENGTH }, () => random() * 255), + int16Array: Int16Array.from({ length: LENGTH }, () => random() * 255), + int32Array: Int32Array.from({ length: LENGTH }, () => random() * 255), + int64Array: BigInt64Array.from({ length: LENGTH }, () => 42n), + + float32Array: Float32Array.from({ length: LENGTH }, () => random() * 255), + float64Array: Float64Array.from({ length: LENGTH }, () => random() * 255) +}; + +export const arrays = { + numbers: Array.from({ length: LENGTH }, () => random() * 255), + booleans: Array.from({ length: LENGTH }, () => random() > 0.5), + dictionary: Array.from({ length: LENGTH }, () => cities[Math.floor(random() * cities.length)]) +}; + +export const vectors: { [k: string]: Arrow.Vector } = Object.fromEntries([ + ...Object.entries(typedArrays).map(([name, array]) => [name, Arrow.makeVector(array)]), + ...Object.entries(arrays).map(([name, array]) => [name, Arrow.vectorFromArray(array)]), + ['string', Arrow.vectorFromArray(arrays.dictionary, new Arrow.Utf8)], +]); + +const tracks = new Arrow.Table(batches[0].schema, batches); console.timeEnd('Prepare Data'); export default [ { name: 'tracks', - df: tracks, - ipc: tracks.serialize(), + table: tracks, + ipc: Arrow.RecordBatchStreamWriter.writeAll(tracks).toUint8Array(true), countBys: ['origin', 'destination'], counts: [ - {column: 'lat', test: 'gt' as 'gt' | 'eq', value: 0 }, - {column: 'lng', test: 'gt' as 'gt' | 'eq', value: 0 }, - {column: 'origin', test: 'eq' as 'gt' | 'eq', value: 'Seattle'}, + { column: 'lat', test: 'gt' as 'gt' | 'eq', value: 0 }, + { column: 'lng', test: 'gt' as 'gt' | 'eq', value: 0 }, + { column: 'origin', test: 'eq' as 'gt' | 'eq', value: 'Seattle' }, ], } ]; diff --git a/js/perf/index.ts b/js/perf/index.ts index 9f6cb8f79a093..40225a6d8ae8c 100644 --- a/js/perf/index.ts +++ b/js/perf/index.ts @@ -16,28 +16,35 @@ // under the License. // Alternatively, use bundles for performance tests -// import * as Arrow from '../targets/es5/umd'; -// import * as Arrow from '../targets/es5/cjs'; -// import * as Arrow from '../targets/es2015/umd'; -// import * as Arrow from '../targets/es2015/cjs'; - -import * as Arrow from '../src/Arrow'; - -import config from './config'; +// import * as Arrow from '../targets/es5/umd/Arrow.js'; +// import * as Arrow from '../targets/es5/esm/Arrow.js'; +// import * as Arrow from '../targets/es5/cjs/Arrow.js'; +// import * as Arrow from '../targets/es2015/umd/Arrow.js'; +// import * as Arrow from '../targets/es2015/esm/Arrow.js'; +// import * as Arrow from '../targets/es2015/cjs/Arrow.js'; +// import * as Arrow from '../targets/esnext/umd/Arrow.js'; +// import * as Arrow from '../targets/esnext/esm/Arrow.js'; +// import * as Arrow from '../targets/esnext/cjs/Arrow.js'; + +import * as Arrow from '../src/Arrow.js'; + +import config, { arrays, typedArrays, vectors } from './config.js'; import b from 'benny'; import { CaseResult, Summary } from 'benny/lib/internal/common-types'; import kleur from 'kleur'; +export { Arrow }; -const { predicate, Table, RecordBatchReader } = Arrow; -const { col } = predicate; +const { RecordBatchReader, RecordBatchStreamWriter } = Arrow; const args = process.argv.slice(2); const json = args[0] === '--json'; +if (json) console.log(kleur.red('JSON output is on!')); + const formatter = new Intl.NumberFormat(); function formatNumber(number: number, precision = 0) { - const rounded = number > precision * 10 ? Math.round(number) : parseFloat((number).toPrecision(precision)); + const rounded = number > precision * 10 ? Math.round(number) : Number.parseFloat((number).toPrecision(precision)); return formatter.format(rounded); } @@ -46,59 +53,130 @@ const results: CaseResult[] = []; function cycle(result: CaseResult, _summary: Summary) { const duration = result.details.median * 1000; if (json) { - result.suite = _summary.name; + (result).suite = _summary.name; results.push(result); } - console.log( - `${kleur.cyan(result.name)} ${formatNumber(result.ops, 3)} ops/s ±${result.margin.toPrecision(2)}%, ${formatNumber(duration, 2)} ms, ${kleur.gray(result.samples + ' samples')}`, + + const numbers = `${`${formatNumber(result.ops, 3)} ops/s ±${`${result.margin.toPrecision(2)}%,`.padEnd(6)}`.padStart(27) + } ${formatNumber(duration, 2).padStart(4)} ms, ${kleur.gray(`${result.samples} samples`.padStart(10))}`; + console.log(result.name.length >= 30 ? + `${kleur.cyan(result.name)} \n${numbers}` : + `${kleur.cyan(result.name.padEnd(20))} ${numbers}`, ); } -for (const { name, ipc, df } of config) { +b.suite( + `makeVector`, + + ...Object.entries(typedArrays).map(([name, array]) => + b.add.skip(`from ${name}`, () => { + Arrow.makeVector(array); + })), + + b.cycle(cycle) +); + +b.suite( + `vectorFromArray`, + + ...Object.entries(arrays).map(([name, array]) => + b.add(`from: ${name}`, () => { + Arrow.vectorFromArray(array as any); + })), + + b.cycle(cycle), +); + +b.suite( + `Iterate Vector`, + + ...Object.entries(vectors).map(([name, vector]) => + b.add(`from: ${name}`, () => { + for (const _value of vector) { } + })), + + b.cycle(cycle), +); + +b.suite( + `Spread Vector`, + + ...Object.entries(vectors).map(([name, vector]) => + b.add(`from: ${name}`, () => { + [...vector]; + })), + + b.cycle(cycle) +); + +b.suite( + `toArray Vector`, + + ...Object.entries(vectors).map(([name, vector]) => + b.add(`from: ${name}`, () => { + vector.toArray(); + })), + + b.cycle(cycle) +); + +b.suite( + `get Vector`, + + ...Object.entries(vectors).map(([name, vector]) => + b.add(`from: ${name}`, () => { + for (let i = -1, n = vector.length; ++i < n;) { + vector.get(i); + } + })), + + b.cycle(cycle) +); + +for (const { name, ipc, table } of config) { b.suite( `Parse`, - b.add(`dataset: ${name}, function: Table.from`, () => { - Table.from(ipc); - }), - - b.add(`dataset: ${name}, function: readBatches`, () => { - for (const _recordBatch of RecordBatchReader.from(ipc)) {} + b.add(`dataset: ${name}, function: read recordBatches`, () => { + for (const _recordBatch of RecordBatchReader.from(ipc)) { } }), - b.add(`dataset: ${name}, function: serialize`, () => { - df.serialize(); + b.add(`dataset: ${name}, function: write recordBatches`, () => { + RecordBatchStreamWriter.writeAll(table).toUint8Array(true); }), b.cycle(cycle) ); - const schema = df.schema; + const schema = table.schema; const suites = [{ - suite_name: `Get values by index`, - fn(vector: Arrow.Column) { - for (let i = -1, n = vector.length; ++i < n;) { - vector.get(i); - } + suite_name: `Get values by index`, + fn(vector: Arrow.Vector) { + for (let i = -1, n = vector.length; ++i < n;) { + vector.get(i); } - }, { - suite_name: `Iterate vectors`, - fn(vector: Arrow.Column) { for (const _value of vector) {} } - }, { - suite_name: `Slice toArray vectors`, - fn(vector: Arrow.Column) { vector.slice().toArray(); } - }, { - suite_name: `Slice vectors`, - fn(vector: Arrow.Column) { vector.slice(); } - }]; - - for (const {suite_name, fn} of suites) { + } + }, { + suite_name: `Iterate vectors`, + fn(vector: Arrow.Vector) { for (const _value of vector) { } } + }, { + suite_name: `Slice toArray vectors`, + fn(vector: Arrow.Vector) { vector.slice().toArray(); } + }, { + suite_name: `Slice vectors`, + fn(vector: Arrow.Vector) { vector.slice(); } + }, { + suite_name: `Spread vectors`, + fn(vector: Arrow.Vector) { [...vector]; } + }]; + + for (const { suite_name, fn } of suites) { b.suite( suite_name, ...schema.fields.map((f, i) => { - const vector = df.getColumnAt(i)!; + const vector = table.getChildAt(i)!; return b.add(`dataset: ${name}, column: ${f.name}, length: ${formatNumber(vector.length)}, type: ${vector.type}`, () => { fn(vector); }); @@ -110,93 +188,50 @@ for (const { name, ipc, df } of config) { } -for (const { name, df, countBys, counts } of config) { +for (const { name, table, counts } of config) { b.suite( - `DataFrame Iterate`, + `Table`, - b.add(`dataset: ${name}, length: ${formatNumber(df.length)}`, () => { - for (const _value of df) {} + b.add(`Iterate, dataset: ${name}, numRows: ${formatNumber(table.numRows)}`, () => { + for (const _value of table) { } }), - b.cycle(cycle) - ); - - b.suite( - `DataFrame Count By`, - - ...countBys.map((column: string) => b.add( - `dataset: ${name}, column: ${column}, length: ${formatNumber(df.length)}, type: ${df.schema.fields.find((c)=> c.name === column)!.type}`, - () => df.countBy(column) - )), - - b.cycle(cycle) - ); - - b.suite( - `DataFrame Filter-Scan Count`, - - ...counts.map(({ column, test, value }: {column: string; test: 'gt' | 'eq'; value: number | string}) => b.add( - `dataset: ${name}, column: ${column}, length: ${formatNumber(df.length)}, type: ${df.schema.fields.find((c)=> c.name === column)!.type}, test: ${test}, value: ${value}`, - () => { - let filteredDf: Arrow.FilteredDataFrame; - if (test == 'gt') { - filteredDf = df.filter(col(column).gt(value)); - } else if (test == 'eq') { - filteredDf = df.filter(col(column).eq(value)); - } else { - throw new Error(`Unrecognized test "${test}"`); - } - - return () => filteredDf.count(); - } - )), - - b.cycle(cycle) - ); - - b.suite( - `DataFrame Filter-Iterate`, + b.add(`Spread, dataset: ${name}, numRows: ${formatNumber(table.numRows)}`, () => { + [...table]; + }), - ...counts.map(({ column, test, value }: {column: string; test: 'gt' | 'eq'; value: number | string}) => b.add( - `dataset: ${name}, column: ${column}, length: ${formatNumber(df.length)}, type: ${df.schema.fields.find((c)=> c.name === column)!.type}, test: ${test}, value: ${value}`, - () => { - let filteredDf: Arrow.FilteredDataFrame; - if (test == 'gt') { - filteredDf = df.filter(col(column).gt(value)); - } else if (test == 'eq') { - filteredDf = df.filter(col(column).eq(value)); - } else { - throw new Error(`Unrecognized test "${test}"`); - } + b.add(`toArray, dataset: ${name}, numRows: ${formatNumber(table.numRows)}`, () => { + table.toArray(); + }), - return () => { - for (const _value of filteredDf) {} - }; + b.add(`get, dataset: ${name}, numRows: ${formatNumber(table.numRows)}`, () => { + for (let i = -1, n = table.numRows; ++i < n;) { + table.get(i); } - )), + }), b.cycle(cycle) ); b.suite( - `DataFrame Direct Count`, + `Table Direct Count`, - ...counts.map(({ column, test, value }: {column: string; test: 'gt' | 'eq'; value: number | string}) => b.add( - `dataset: ${name}, column: ${column}, length: ${formatNumber(df.length)}, type: ${df.schema.fields.find((c)=> c.name === column)!.type}, test: ${test}, value: ${value}`, + ...counts.map(({ column, test, value }: { column: string; test: 'gt' | 'eq'; value: number | string }) => b.add( + `dataset: ${name}, column: ${column}, numRows: ${formatNumber(table.numRows)}, type: ${table.schema.fields.find((c) => c.name === column)!.type}, test: ${test}, value: ${value}`, () => { - const colidx = df.schema.fields.findIndex((c)=> c.name === column); + const colidx = table.schema.fields.findIndex((c) => c.name === column); if (test == 'gt') { return () => { let sum = 0; - const batches = df.chunks; + const batches = table.batches; const numBatches = batches.length; for (let batchIndex = -1; ++batchIndex < numBatches;) { // load batches const batch = batches[batchIndex]; const vector = batch.getChildAt(colidx)!; // yield all indices - for (let index = -1, length = batch.length; ++index < length;) { + for (let index = -1, length = batch.numRows; ++index < length;) { sum += (vector.get(index) >= value) ? 1 : 0; } } @@ -205,15 +240,15 @@ for (const { name, df, countBys, counts } of config) { } else if (test == 'eq') { return () => { let sum = 0; - const batches = df.chunks; + const batches = table.batches; const numBatches = batches.length; for (let batchIndex = -1; ++batchIndex < numBatches;) { // load batches const batch = batches[batchIndex]; const vector = batch.getChildAt(colidx)!; // yield all indices - for (let index = -1, length = batch.length; ++index < length;) { - sum += (vector.get(index) === value) ? 1 : 0; + for (let index = -1, length = batch.numRows; ++index < length;) { + sum += (vector.get(index) === value) ? 1 : 0; } } return sum; @@ -229,6 +264,6 @@ for (const { name, df, countBys, counts } of config) { b.complete(() => { // last benchmark finished json && process.stderr.write(JSON.stringify(results, null, 2)); - }) + }), ); } diff --git a/js/src/Arrow.dom.ts b/js/src/Arrow.dom.ts index 07f0c8b8e06e0..75efb3d22bd06 100644 --- a/js/src/Arrow.dom.ts +++ b/js/src/Arrow.dom.ts @@ -15,14 +15,14 @@ // specific language governing permissions and limitations // under the License. -import streamAdapters from './io/adapters'; -import { Builder } from './builder/index'; -import { RecordBatchReader, RecordBatchFileReader, RecordBatchStreamReader, } from './ipc/reader'; -import { RecordBatchWriter, RecordBatchFileWriter, RecordBatchStreamWriter, } from './ipc/writer'; -import { toDOMStream } from './io/whatwg/iterable'; -import { builderThroughDOMStream } from './io/whatwg/builder'; -import { recordBatchReaderThroughDOMStream } from './io/whatwg/reader'; -import { recordBatchWriterThroughDOMStream } from './io/whatwg/writer'; +import streamAdapters from './io/adapters.js'; +import { Builder } from './builder.js'; +import { RecordBatchReader, RecordBatchFileReader, RecordBatchStreamReader, } from './ipc/reader.js'; +import { RecordBatchWriter, RecordBatchFileWriter, RecordBatchStreamWriter, } from './ipc/writer.js'; +import { toDOMStream } from './io/whatwg/iterable.js'; +import { builderThroughDOMStream } from './io/whatwg/builder.js'; +import { recordBatchReaderThroughDOMStream } from './io/whatwg/reader.js'; +import { recordBatchWriterThroughDOMStream } from './io/whatwg/writer.js'; streamAdapters.toDOMStream = toDOMStream; Builder['throughDOM'] = builderThroughDOMStream; @@ -33,9 +33,15 @@ RecordBatchWriter['throughDOM'] = recordBatchWriterThroughDOMStream; RecordBatchFileWriter['throughDOM'] = recordBatchWriterThroughDOMStream; RecordBatchStreamWriter['throughDOM'] = recordBatchWriterThroughDOMStream; +export type { + TypeMap, + ReadableSource, WritableSink, + ArrowJSONLike, FileHandle, Readable, Writable, ReadableWritable, ReadableDOMStreamOptions, +} from './Arrow.js'; + export { DateUnit, IntervalUnit, MessageHeader, MetadataVersion, Precision, TimeUnit, Type, UnionMode, BufferType, - Data, + Data, makeData, DataType, Null, Bool, @@ -49,48 +55,28 @@ export { Time, TimeSecond, TimeMillisecond, TimeMicrosecond, TimeNanosecond, Decimal, List, - Struct, + Struct, StructRow, Union, DenseUnion, SparseUnion, Dictionary, Interval, IntervalDayTime, IntervalYearMonth, FixedSizeList, - Map_, - Table, - Column, + Map_, MapRow, + Table, makeTable, tableFromArrays, Schema, Field, Visitor, - Vector, - BaseVector, - BinaryVector, - BoolVector, - Chunked, - DateVector, DateDayVector, DateMillisecondVector, - DecimalVector, - DictionaryVector, - FixedSizeBinaryVector, - FixedSizeListVector, - FloatVector, Float16Vector, Float32Vector, Float64Vector, - IntervalVector, IntervalDayTimeVector, IntervalYearMonthVector, - IntVector, Int8Vector, Int16Vector, Int32Vector, Int64Vector, Uint8Vector, Uint16Vector, Uint32Vector, Uint64Vector, - ListVector, - MapVector, - NullVector, - StructVector, - TimestampVector, TimestampSecondVector, TimestampMillisecondVector, TimestampMicrosecondVector, TimestampNanosecondVector, - TimeVector, TimeSecondVector, TimeMillisecondVector, TimeMicrosecondVector, TimeNanosecondVector, - UnionVector, DenseUnionVector, SparseUnionVector, - Utf8Vector, - ByteStream, AsyncByteStream, AsyncByteQueue, ReadableSource, WritableSink, + Vector, makeVector, vectorFromArray, + ByteStream, AsyncByteStream, AsyncByteQueue, RecordBatchReader, RecordBatchFileReader, RecordBatchStreamReader, AsyncRecordBatchFileReader, AsyncRecordBatchStreamReader, RecordBatchWriter, RecordBatchFileWriter, RecordBatchStreamWriter, RecordBatchJSONWriter, + tableFromIPC, tableToIPC, MessageReader, AsyncMessageReader, JSONMessageReader, Message, RecordBatch, - ArrowJSONLike, FileHandle, Readable, Writable, ReadableWritable, ReadableDOMStreamOptions, - DataFrame, FilteredDataFrame, CountByResult, BindFunc, NextFunc, - predicate, util, - Builder, + Builder, makeBuilder, builderThroughIterable, builderThroughAsyncIterable, +} from './Arrow.js'; + +export { BinaryBuilder, BoolBuilder, DateBuilder, DateDayBuilder, DateMillisecondBuilder, @@ -109,5 +95,4 @@ export { TimeBuilder, TimeSecondBuilder, TimeMillisecondBuilder, TimeMicrosecondBuilder, TimeNanosecondBuilder, UnionBuilder, DenseUnionBuilder, SparseUnionBuilder, Utf8Builder, - isTypedArray, -} from './Arrow'; +} from './Arrow.js'; diff --git a/js/src/Arrow.node.ts b/js/src/Arrow.node.ts index 44221f613d745..d47f0880ff48b 100644 --- a/js/src/Arrow.node.ts +++ b/js/src/Arrow.node.ts @@ -15,18 +15,18 @@ // specific language governing permissions and limitations // under the License. -import streamAdapters from './io/adapters'; -import { Builder } from './builder/index'; -import { RecordBatchReader } from './ipc/reader'; -import { RecordBatchWriter } from './ipc/writer'; -import { toNodeStream } from './io/node/iterable'; -import { builderThroughNodeStream } from './io/node/builder'; -import { recordBatchReaderThroughNodeStream } from './io/node/reader'; -import { recordBatchWriterThroughNodeStream } from './io/node/writer'; +import streamAdapters from './io/adapters.js'; +import { Builder } from './builder.js'; +import { RecordBatchReader } from './ipc/reader.js'; +import { RecordBatchWriter } from './ipc/writer.js'; +import { toNodeStream } from './io/node/iterable.js'; +import { builderThroughNodeStream } from './io/node/builder.js'; +import { recordBatchReaderThroughNodeStream } from './io/node/reader.js'; +import { recordBatchWriterThroughNodeStream } from './io/node/writer.js'; streamAdapters.toNodeStream = toNodeStream; Builder['throughNode'] = builderThroughNodeStream; RecordBatchReader['throughNode'] = recordBatchReaderThroughNodeStream; RecordBatchWriter['throughNode'] = recordBatchWriterThroughNodeStream; -export * from './Arrow.dom'; +export * from './Arrow.dom.js'; diff --git a/js/src/Arrow.ts b/js/src/Arrow.ts index 8bf29631039ae..bf87ed617733f 100644 --- a/js/src/Arrow.ts +++ b/js/src/Arrow.ts @@ -15,20 +15,21 @@ // specific language governing permissions and limitations // under the License. +export { MessageHeader } from './fb/message-header.js'; + export { + Type, + BufferType, DateUnit, TimeUnit, Precision, UnionMode, IntervalUnit, MetadataVersion, -} from './fb/Schema'; - -export { MessageHeader } from './fb/Message'; +} from './enum.js'; -export { Type, BufferType } from './enum'; - -export { Data } from './data'; +export { Data, makeData } from './data.js'; +export type { TypeMap } from './type.js'; export { DataType, Null, @@ -48,78 +49,57 @@ export { Dictionary, Interval, IntervalDayTime, IntervalYearMonth, FixedSizeList, - Map_, -} from './type'; + Map_ +} from './type.js'; -export { Table } from './table'; -export { Column } from './column'; -export { Visitor } from './visitor'; -export { Schema, Field } from './schema'; -export { - Vector, - BaseVector, - BinaryVector, - BoolVector, - Chunked, - DateVector, DateDayVector, DateMillisecondVector, - DecimalVector, - DictionaryVector, - FixedSizeBinaryVector, - FixedSizeListVector, - FloatVector, Float16Vector, Float32Vector, Float64Vector, - IntervalVector, IntervalDayTimeVector, IntervalYearMonthVector, - IntVector, Int8Vector, Int16Vector, Int32Vector, Int64Vector, Uint8Vector, Uint16Vector, Uint32Vector, Uint64Vector, - ListVector, - MapVector, - NullVector, - StructVector, - TimestampVector, TimestampSecondVector, TimestampMillisecondVector, TimestampMicrosecondVector, TimestampNanosecondVector, - TimeVector, TimeSecondVector, TimeMillisecondVector, TimeMicrosecondVector, TimeNanosecondVector, - UnionVector, DenseUnionVector, SparseUnionVector, - Utf8Vector, -} from './vector/index'; +export { Table, makeTable, tableFromArrays } from './table.js'; +export { Vector, makeVector } from './vector.js'; +export { Visitor } from './visitor.js'; +export { Schema, Field } from './schema.js'; -export { - Builder, - BinaryBuilder, - BoolBuilder, - DateBuilder, DateDayBuilder, DateMillisecondBuilder, - DecimalBuilder, - DictionaryBuilder, - FixedSizeBinaryBuilder, - FixedSizeListBuilder, - FloatBuilder, Float16Builder, Float32Builder, Float64Builder, - IntervalBuilder, IntervalDayTimeBuilder, IntervalYearMonthBuilder, - IntBuilder, Int8Builder, Int16Builder, Int32Builder, Int64Builder, Uint8Builder, Uint16Builder, Uint32Builder, Uint64Builder, - ListBuilder, - MapBuilder, - NullBuilder, - StructBuilder, - TimestampBuilder, TimestampSecondBuilder, TimestampMillisecondBuilder, TimestampMicrosecondBuilder, TimestampNanosecondBuilder, - TimeBuilder, TimeSecondBuilder, TimeMillisecondBuilder, TimeMicrosecondBuilder, TimeNanosecondBuilder, - UnionBuilder, DenseUnionBuilder, SparseUnionBuilder, - Utf8Builder, -} from './builder/index'; +export { MapRow } from './row/map.js'; +export { StructRow } from './row/struct.js'; + +export { Builder } from './builder.js'; +export { makeBuilder, vectorFromArray, builderThroughIterable, builderThroughAsyncIterable } from './factories.js'; +export type { BuilderOptions } from './builder.js'; +export { BoolBuilder } from './builder/bool.js'; +export { NullBuilder } from './builder/null.js'; +export { DateBuilder, DateDayBuilder, DateMillisecondBuilder } from './builder/date.js'; +export { DecimalBuilder } from './builder/decimal.js'; +export { DictionaryBuilder } from './builder/dictionary.js'; +export { FixedSizeBinaryBuilder } from './builder/fixedsizebinary.js'; +export { FloatBuilder, Float16Builder, Float32Builder, Float64Builder } from './builder/float.js'; +export { IntBuilder, Int8Builder, Int16Builder, Int32Builder, Int64Builder, Uint8Builder, Uint16Builder, Uint32Builder, Uint64Builder } from './builder/int.js'; +export { TimeBuilder, TimeSecondBuilder, TimeMillisecondBuilder, TimeMicrosecondBuilder, TimeNanosecondBuilder } from './builder/time.js'; +export { TimestampBuilder, TimestampSecondBuilder, TimestampMillisecondBuilder, TimestampMicrosecondBuilder, TimestampNanosecondBuilder } from './builder/timestamp.js'; +export { IntervalBuilder, IntervalDayTimeBuilder, IntervalYearMonthBuilder } from './builder/interval.js'; +export { Utf8Builder } from './builder/utf8.js'; +export { BinaryBuilder } from './builder/binary.js'; +export { ListBuilder } from './builder/list.js'; +export { FixedSizeListBuilder } from './builder/fixedsizelist.js'; +export { MapBuilder } from './builder/map.js'; +export { StructBuilder } from './builder/struct.js'; +export { UnionBuilder, SparseUnionBuilder, DenseUnionBuilder } from './builder/union.js'; -export { ByteStream, AsyncByteStream, AsyncByteQueue, ReadableSource, WritableSink } from './io/stream'; -export { RecordBatchReader, RecordBatchFileReader, RecordBatchStreamReader, AsyncRecordBatchFileReader, AsyncRecordBatchStreamReader } from './ipc/reader'; -export { RecordBatchWriter, RecordBatchFileWriter, RecordBatchStreamWriter, RecordBatchJSONWriter } from './ipc/writer'; -export { MessageReader, AsyncMessageReader, JSONMessageReader } from './ipc/message'; -export { Message } from './ipc/metadata/message'; -export { RecordBatch } from './recordbatch'; -export { ArrowJSONLike, FileHandle, Readable, Writable, ReadableWritable, ReadableDOMStreamOptions } from './io/interfaces'; -export { DataFrame, FilteredDataFrame, CountByResult, BindFunc, NextFunc } from './compute/dataframe'; +export { ByteStream, AsyncByteStream, AsyncByteQueue } from './io/stream.js'; +export type { ReadableSource, WritableSink } from './io/stream.js'; +export { RecordBatchReader, RecordBatchFileReader, RecordBatchStreamReader, AsyncRecordBatchFileReader, AsyncRecordBatchStreamReader } from './ipc/reader.js'; +export { RecordBatchWriter, RecordBatchFileWriter, RecordBatchStreamWriter, RecordBatchJSONWriter } from './ipc/writer.js'; +export { tableToIPC, tableFromIPC } from './ipc/serialization.js'; +export { MessageReader, AsyncMessageReader, JSONMessageReader } from './ipc/message.js'; +export { Message } from './ipc/metadata/message.js'; +export { RecordBatch } from './recordbatch.js'; +export type { ArrowJSONLike, FileHandle, Readable, Writable, ReadableWritable, ReadableDOMStreamOptions } from './io/interfaces.js'; -import * as util_bn_ from './util/bn'; -import * as util_int_ from './util/int'; -import * as util_bit_ from './util/bit'; -import * as util_math_ from './util/math'; -import * as util_buffer_ from './util/buffer'; -import * as util_vector_ from './util/vector'; -import * as predicate from './compute/predicate'; -import { compareSchemas, compareFields, compareTypes } from './visitor/typecomparator'; +import * as util_bn_ from './util/bn.js'; +import * as util_int_ from './util/int.js'; +import * as util_bit_ from './util/bit.js'; +import * as util_math_ from './util/math.js'; +import * as util_buffer_ from './util/buffer.js'; +import * as util_vector_ from './util/vector.js'; +import { compareSchemas, compareFields, compareTypes } from './visitor/typecomparator.js'; -export { predicate }; /** @ignore */ export const util = { ...util_bn_, @@ -132,5 +112,3 @@ export const util = { compareFields, compareTypes, }; - -export { isTypedArray } from './util/args'; diff --git a/js/src/bin/arrow2csv.ts b/js/src/bin/arrow2csv.ts index d5803cce02c6c..41b928fc4d70b 100644 --- a/js/src/bin/arrow2csv.ts +++ b/js/src/bin/arrow2csv.ts @@ -17,26 +17,30 @@ // specific language governing permissions and limitations // under the License. +/* eslint-disable unicorn/no-array-for-each */ + import * as fs from 'fs'; import * as stream from 'stream'; -import { valueToString } from '../util/pretty'; -import { Schema, RecordBatch, RecordBatchReader, AsyncByteQueue } from '../Arrow.node'; +import { valueToString } from '../util/pretty.js'; +import { Schema, RecordBatch, RecordBatchReader, AsyncByteQueue } from '../Arrow.node.js'; -/* eslint-disable @typescript-eslint/no-require-imports */ +import commandLineUsage from 'command-line-usage'; +import commandLineArgs from 'command-line-args'; +import padLeft from 'pad-left'; +// @ts-ignore +import { parse as bignumJSONParse } from 'json-bignum'; -const padLeft = require('pad-left'); -const bignumJSONParse = require('json-bignum').parse; -const argv = require(`command-line-args`)(cliOpts(), { partial: true }); +const argv = commandLineArgs(cliOpts(), { partial: true }); const files = argv.help ? [] : [...(argv.file || []), ...(argv._unknown || [])].filter(Boolean); const state = { ...argv, closed: false, maxColWidths: [10] }; type ToStringState = { - hr: string; - sep: string; - schema: any; - closed: boolean; - metadata: boolean; + hr?: string; + sep?: string; + schema?: any; + closed?: boolean; + metadata?: boolean; maxColWidths: number[]; }; @@ -65,12 +69,12 @@ type ToStringState = { return hasReaders ? 0 : print_usage(); })() -.then((x) => +x || 0, (err) => { - if (err) { - console.error(`${err?.stack || err}`); - } - return process.exitCode || 1; -}).then((code) => process.exit(code)); + .then((x) => +x || 0, (err) => { + if (err) { + console.error(`${err?.stack || err}`); + } + return process.exitCode || 1; + }).then((code) => process.exit(code)); function pipeTo(source: NodeJS.ReadableStream, sink: NodeJS.WritableStream, opts?: { end: boolean }) { return new Promise((resolve, reject) => { @@ -87,7 +91,7 @@ function pipeTo(source: NodeJS.ReadableStream, sink: NodeJS.WritableStream, opts }); } -async function *recordBatchReaders(createSourceStream: () => NodeJS.ReadableStream) { +async function* recordBatchReaders(createSourceStream: () => NodeJS.ReadableStream): AsyncGenerator { const json = new AsyncByteQueue(); const stream = new AsyncByteQueue(); @@ -97,14 +101,14 @@ async function *recordBatchReaders(createSourceStream: () => NodeJS.ReadableStre // tee the input source, just in case it's JSON source.on('end', () => [stream, json].forEach((y) => y.close())) .on('data', (x) => [stream, json].forEach((y) => y.write(x))) - .on('error', (e) => [stream, json].forEach((y) => y.abort(e))); + .on('error', (e) => [stream, json].forEach((y) => y.abort(e))); try { for await (reader of RecordBatchReader.readAll(stream)) { reader && (yield reader); } if (reader) return; - } catch (e) { readers = null; } + } catch { readers = null; } if (!readers) { await json.closed; @@ -114,7 +118,7 @@ async function *recordBatchReaders(createSourceStream: () => NodeJS.ReadableStre for await (reader of RecordBatchReader.readAll(bignumJSONParse(await json.toString()))) { reader && (yield reader); } - } catch (e) { readers = null; } + } catch { readers = null; } } } @@ -123,9 +127,9 @@ function batchesToString(state: ToStringState, schema: Schema) { let rowId = 0; let batchId = -1; let maxColWidths = [10]; - const { hr, sep } = state; + const { hr, sep, metadata } = state; - const header = ['row_id', ...schema.fields.map((f) => `${f}`)].map(valueToString); + const header = ['row_id', ...schema.fields.map((f) => `${f}`)].map(val => valueToString(val)); state.maxColWidths = header.map((x, i) => Math.max(maxColWidths[i] || 0, x.length)); @@ -138,7 +142,7 @@ function batchesToString(state: ToStringState, schema: Schema) { if (batchId === -1) { hr && this.push(`${horizontalRule(state.maxColWidths, hr, sep)}\n\n`); this.push(`${formatRow(header, maxColWidths, sep)}\n`); - if (state.metadata && schema.metadata.size > 0) { + if (metadata && schema.metadata.size > 0) { this.push(`metadata:\n${formatMetadata(schema.metadata)}\n`); } } @@ -147,7 +151,7 @@ function batchesToString(state: ToStringState, schema: Schema) { }, transform(batch: RecordBatch, _enc: string, cb: (error?: Error, data?: any) => void) { - batch = !state.schema?.length ? batch : batch.select(...state.schema); + batch = !state.schema?.length ? batch : batch.select(state.schema); if (state.closed) { return cb(undefined, null); } @@ -157,16 +161,16 @@ function batchesToString(state: ToStringState, schema: Schema) { // If this is the first batch in a stream, print a top horizontal rule, schema metadata, and if (++batchId === 0) { hr && this.push(`${horizontalRule(state.maxColWidths, hr, sep)}\n`); - if (state.metadata && batch.schema.metadata.size > 0) { + if (metadata && batch.schema.metadata.size > 0) { this.push(`metadata:\n${formatMetadata(batch.schema.metadata)}\n`); hr && this.push(`${horizontalRule(state.maxColWidths, hr, sep)}\n`); } - if (batch.length <= 0 || batch.numCols <= 0) { + if (batch.numRows <= 0 || batch.numCols <= 0) { this.push(`${formatRow(header, maxColWidths = state.maxColWidths, sep)}\n`); } } - if (batch.length > 0 && batch.numCols > 0) { + if (batch.numRows > 0 && batch.numCols > 0) { // If any of the column widths changed, print the header again if (rowId % 350 !== 0 && JSON.stringify(state.maxColWidths) !== JSON.stringify(maxColWidths)) { this.push(`${formatRow(header, state.maxColWidths, sep)}\n`); @@ -177,7 +181,7 @@ function batchesToString(state: ToStringState, schema: Schema) { if (rowId++ % 350 === 0) { this.push(`${formatRow(header, maxColWidths, sep)}\n`); } - this.push(`${formatRow([rowId, ...row.toArray()].map(valueToString), maxColWidths, sep)}\n`); + this.push(`${formatRow([rowId, ...row.toArray()].map(v => valueToString(v)), maxColWidths, sep)}\n`); } } cb(); @@ -193,19 +197,19 @@ function formatRow(row: string[] = [], maxColWidths: number[] = [], sep = ' | ') return `${row.map((x, j) => padLeft(x, maxColWidths[j])).join(sep)}`; } +function formatMetadataValue(value = '') { + let parsed = value; + try { + parsed = JSON.stringify(JSON.parse(value), null, 2); + } catch { parsed = value; } + return valueToString(parsed).split('\n').join('\n '); +} + function formatMetadata(metadata: Map) { return [...metadata].map(([key, val]) => ` ${key}: ${formatMetadataValue(val)}` ).join(', \n'); - - function formatMetadataValue(value = '') { - let parsed = value; - try { - parsed = JSON.stringify(JSON.parse(value), null, 2); - } catch (e) { parsed = value; } - return valueToString(parsed).split('\n').join('\n '); - } } function measureColumnWidths(rowId: number, batch: RecordBatch, maxColWidths: number[] = []) { @@ -299,7 +303,7 @@ function cliOpts() { } function print_usage() { - console.log(require('command-line-usage')([ + console.log(commandLineUsage([ { header: 'arrow2csv', content: 'Print a CSV from an Arrow file' diff --git a/js/src/builder.ts b/js/src/builder.ts index 86db953065ac1..6f84154935f7b 100644 --- a/js/src/builder.ts +++ b/js/src/builder.ts @@ -15,18 +15,17 @@ // specific language governing permissions and limitations // under the License. -import { Vector } from './vector'; -import { BufferType } from './enum'; -import { Data, Buffers } from './data'; -import { createIsValidFunction } from './builder/valid'; -import { BuilderType as B, VectorType as V} from './interfaces'; -import { BufferBuilder, BitmapBufferBuilder, DataBufferBuilder, OffsetsBufferBuilder } from './builder/buffer'; +import { Vector } from './vector.js'; +import { Data, makeData } from './data.js'; +import { MapRow, kKeys } from './row/map.js'; import { DataType, strideForType, Float, Int, Decimal, FixedSizeBinary, Date_, Time, Timestamp, Interval, - Utf8, Binary, List, Map_ -} from './type'; + Utf8, Binary, List, Map_, +} from './type.js'; +import { createIsValidFunction } from './builder/valid.js'; +import { BufferBuilder, BitmapBufferBuilder, DataBufferBuilder, OffsetsBufferBuilder } from './builder/buffer.js'; /** * A set of options required to create a `Builder` instance for a given `DataType`. @@ -38,19 +37,6 @@ export interface BuilderOptions { children?: { [key: string]: BuilderOptions } | BuilderOptions[]; } -/** - * A set of options to create an Iterable or AsyncIterable `Builder` transform function. - * @see {@link Builder.throughIterable} - * @see {@link Builder.throughAsyncIterable} - */ - -export interface IterableBuilderOptions extends BuilderOptions { - highWaterMark?: number; - queueingStrategy?: 'bytes' | 'count'; - dictionaryHashFunction?: (value: any) => string | number; - valueToChildTypeId?: (builder: Builder, value: any, offset: number) => number; -} - /** * An abstract base class for types that construct Arrow Vectors from arbitrary JavaScript values. * @@ -59,7 +45,7 @@ export interface IterableBuilderOptions e * for each DataType, creating or resizing the underlying ArrayBuffers as necessary. * * The `Builder` for each Arrow `DataType` handles converting and appending - * values for a given `DataType`. The high-level {@link Builder.new `Builder.new()`} convenience + * values for a given `DataType`. The high-level {@link makeBuilder `makeBuilder()`} convenience * method creates the specific `Builder` subclass for the supplied `DataType`. * * Once created, `Builder` instances support both appending values to the end @@ -84,10 +70,11 @@ export interface IterableBuilderOptions e * because this is when it flushes the values that have been enqueued in its internal * dictionary's `Builder`, and creates the `dictionaryVector` for the `Dictionary` `DataType`. * + * @example * ```ts * import { Builder, Utf8 } from 'apache-arrow'; * - * const utf8Builder = Builder.new({ + * const utf8Builder = makeBuilder({ * type: new Utf8(), * nullValues: [null, 'n/a'] * }); @@ -109,18 +96,6 @@ export interface IterableBuilderOptions e */ export abstract class Builder { - /** - * Create a `Builder` instance based on the `type` property of the supplied `options` object. - * @param {BuilderOptions} options An object with a required `DataType` instance - * and other optional parameters to be passed to the `Builder` subclass for the given `type`. - * - * @typeparam T The `DataType` of the `Builder` to create. - * @typeparam TNull The type(s) of values which will be considered null-value sentinels. - * @nocollapse - */ - // @ts-ignore - public static new(options: BuilderOptions): B {} - /** @nocollapse */ // @ts-ignore public static throughNode(options: import('./io/node/builder').BuilderDuplexOptions): import('stream').Duplex { @@ -132,63 +107,6 @@ export abstract class Builder { throw new Error(`"throughDOM" not available in this environment`); } - /** - * Transform a synchronous `Iterable` of arbitrary JavaScript values into a - * sequence of Arrow Vector following the chunking semantics defined in - * the supplied `options` argument. - * - * This function returns a function that accepts an `Iterable` of values to - * transform. When called, this function returns an Iterator of `Vector`. - * - * The resulting `Iterator>` yields Vectors based on the - * `queueingStrategy` and `highWaterMark` specified in the `options` argument. - * - * * If `queueingStrategy` is `"count"` (or omitted), The `Iterator>` - * will flush the underlying `Builder` (and yield a new `Vector`) once the - * Builder's `length` reaches or exceeds the supplied `highWaterMark`. - * * If `queueingStrategy` is `"bytes"`, the `Iterator>` will flush - * the underlying `Builder` (and yield a new `Vector`) once its `byteLength` - * reaches or exceeds the supplied `highWaterMark`. - * - * @param {IterableBuilderOptions} options An object of properties which determine the `Builder` to create and the chunking semantics to use. - * @returns A function which accepts a JavaScript `Iterable` of values to - * write, and returns an `Iterator` that yields Vectors according - * to the chunking semantics defined in the `options` argument. - * @nocollapse - */ - public static throughIterable(options: IterableBuilderOptions) { - return throughIterable(options); - } - - /** - * Transform an `AsyncIterable` of arbitrary JavaScript values into a - * sequence of Arrow Vector following the chunking semantics defined in - * the supplied `options` argument. - * - * This function returns a function that accepts an `AsyncIterable` of values to - * transform. When called, this function returns an AsyncIterator of `Vector`. - * - * The resulting `AsyncIterator>` yields Vectors based on the - * `queueingStrategy` and `highWaterMark` specified in the `options` argument. - * - * * If `queueingStrategy` is `"count"` (or omitted), The `AsyncIterator>` - * will flush the underlying `Builder` (and yield a new `Vector`) once the - * Builder's `length` reaches or exceeds the supplied `highWaterMark`. - * * If `queueingStrategy` is `"bytes"`, the `AsyncIterator>` will flush - * the underlying `Builder` (and yield a new `Vector`) once its `byteLength` - * reaches or exceeds the supplied `highWaterMark`. - * - * @param {IterableBuilderOptions} options An object of properties which determine the `Builder` to create and the chunking semantics to use. - * @returns A function which accepts a JavaScript `AsyncIterable` of values - * to write, and returns an `AsyncIterator` that yields Vectors - * according to the chunking semantics defined in the `options` - * argument. - * @nocollapse - */ - public static throughAsyncIterable(options: IterableBuilderOptions) { - return throughAsyncIterable(options); - } - /** * Construct a builder with the given Arrow DataType with optional null values, * which will be interpreted as "null" when set or appended to the `Builder`. @@ -242,7 +160,7 @@ export abstract class Builder { * Flush the `Builder` and return a `Vector`. * @returns {Vector} A `Vector` of the flushed values. */ - public toVector() { return Vector.new(this.flush()); } + public toVector() { return new Vector([this.flush()]); } public get ArrayType() { return this.type.ArrayType; } public get nullCount() { return this._nulls.numInvalid; } @@ -253,11 +171,12 @@ export abstract class Builder { */ public get byteLength(): number { let size = 0; - this._offsets && (size += this._offsets.byteLength); - this._values && (size += this._values.byteLength); - this._nulls && (size += this._nulls.byteLength); - this._typeIds && (size += this._typeIds.byteLength); - return this.children.reduce((size, child) => size + child.byteLength, size); + const { _offsets, _values, _nulls, _typeIds, children } = this; + _offsets && (size += _offsets.byteLength); + _values && (size += _values.byteLength); + _nulls && (size += _nulls.byteLength); + _typeIds && (size += _typeIds.byteLength); + return children.reduce((size, child) => size + child.byteLength, size); } /** @@ -279,20 +198,20 @@ export abstract class Builder { return this.children.reduce((size, child) => size + child.reservedByteLength, size); } - protected _offsets!: DataBufferBuilder; + declare protected _offsets: DataBufferBuilder; public get valueOffsets() { return this._offsets ? this._offsets.buffer : null; } - protected _values!: BufferBuilder; + declare protected _values: BufferBuilder; public get values() { return this._values ? this._values.buffer : null; } - protected _nulls: BitmapBufferBuilder; + declare protected _nulls: BitmapBufferBuilder; public get nullBitmap() { return this._nulls ? this._nulls.buffer : null; } - protected _typeIds!: DataBufferBuilder; + declare protected _typeIds: DataBufferBuilder; public get typeIds() { return this._typeIds ? this._typeIds.buffer : null; } - protected _isValid!: (value: T['TValue'] | TNull) => boolean; - protected _setValue!: (inst: Builder, index: number, value: T['TValue']) => void; + declare protected _isValid: (value: T['TValue'] | TNull) => boolean; + declare protected _setValue: (inst: Builder, index: number, value: T['TValue']) => void; /** * Appends a value (or null) to this `Builder`. @@ -355,37 +274,39 @@ export abstract class Builder { * Commit all the values that have been written to their underlying * ArrayBuffers, including any child Builders if applicable, and reset * the internal `Builder` state. - * @returns A `Data` of the buffers and childData representing the values written. + * @returns A `Data` of the buffers and children representing the values written. */ - public flush() { + public flush(): Data { - const buffers: any = []; - const values = this._values; - const offsets = this._offsets; - const typeIds = this._typeIds; - const { length, nullCount } = this; + let data; + let typeIds; + let nullBitmap; + let valueOffsets; + const { type, length, nullCount, _typeIds, _offsets, _values, _nulls } = this; - if (typeIds) { /* Unions */ - buffers[BufferType.TYPE] = typeIds.flush(length); + if (typeIds = _typeIds?.flush(length)) { // Unions // DenseUnions - offsets && (buffers[BufferType.OFFSET] = offsets.flush(length)); - } else if (offsets) { /* Variable-width primitives (Binary, Utf8) and Lists */ + valueOffsets = _offsets?.flush(length); + } else if (valueOffsets = _offsets?.flush(length)) { // Variable-width primitives (Binary, Utf8), and Lists // Binary, Utf8 - values && (buffers[BufferType.DATA] = values.flush(offsets.last())); - buffers[BufferType.OFFSET] = offsets.flush(length); - } else if (values) { /* Fixed-width primitives (Int, Float, Decimal, Time, Timestamp, and Interval) */ - buffers[BufferType.DATA] = values.flush(length); + data = _values?.flush(_offsets.last()); + } else { // Fixed-width primitives (Int, Float, Decimal, Time, Timestamp, and Interval) + data = _values?.flush(length); } - nullCount > 0 && (buffers[BufferType.VALIDITY] = this._nulls.flush(length)); + if (nullCount > 0) { + nullBitmap = _nulls?.flush(length); + } - const data = Data.new( - this.type, 0, length, nullCount, buffers as Buffers, - this.children.map((child) => child.flush())) as Data; + const children = this.children.map((child) => child.flush()); this.clear(); - return data; + return makeData({ + type, length, nullCount, + children, 'child': children[0], + data, typeIds, nullBitmap, valueOffsets, + }) as Data; } /** @@ -394,7 +315,7 @@ export abstract class Builder { */ public finish() { this.finished = true; - this.children.forEach((child) => child.finish()); + for (const child of this.children) child.finish(); return this; } @@ -404,11 +325,11 @@ export abstract class Builder { */ public clear() { this.length = 0; - this._offsets && (this._offsets.clear()); - this._values && (this._values.clear()); - this._nulls && (this._nulls.clear()); - this._typeIds && (this._typeIds.clear()); - this.children.forEach((child) => child.clear()); + this._nulls?.clear(); + this._values?.clear(); + this._offsets?.clear(); + this._typeIds?.clear(); + for (const child of this.children) child.clear(); return this; } } @@ -446,7 +367,7 @@ export abstract class VariableWidthBuilder, pendingLength: number): void; } - -/** @ignore */ -type ThroughIterable = (source: Iterable) => IterableIterator>; - -/** @ignore */ -function throughIterable(options: IterableBuilderOptions) { - const { ['queueingStrategy']: queueingStrategy = 'count' } = options; - const { ['highWaterMark']: highWaterMark = queueingStrategy !== 'bytes' ? 1000 : 2 ** 14 } = options; - const sizeProperty: 'length' | 'byteLength' = queueingStrategy !== 'bytes' ? 'length' : 'byteLength'; - return function*(source: Iterable) { - let numChunks = 0; - const builder = Builder.new(options); - for (const value of source) { - if (builder.append(value)[sizeProperty] >= highWaterMark) { - ++numChunks && (yield builder.toVector()); - } - } - if (builder.finish().length > 0 || numChunks === 0) { - yield builder.toVector(); - } - } as ThroughIterable; -} - -/** @ignore */ -type ThroughAsyncIterable = (source: Iterable | AsyncIterable) => AsyncIterableIterator>; - -/** @ignore */ -function throughAsyncIterable(options: IterableBuilderOptions) { - const { ['queueingStrategy']: queueingStrategy = 'count' } = options; - const { ['highWaterMark']: highWaterMark = queueingStrategy !== 'bytes' ? 1000 : 2 ** 14 } = options; - const sizeProperty: 'length' | 'byteLength' = queueingStrategy !== 'bytes' ? 'length' : 'byteLength'; - return async function* (source: Iterable | AsyncIterable) { - let numChunks = 0; - const builder = Builder.new(options); - for await (const value of source) { - if (builder.append(value)[sizeProperty] >= highWaterMark) { - ++numChunks && (yield builder.toVector()); - } - } - if (builder.finish().length > 0 || numChunks === 0) { - yield builder.toVector(); - } - } as ThroughAsyncIterable; -} diff --git a/js/src/builder/binary.ts b/js/src/builder/binary.ts index 829da5c9764d2..3c12ddf34abb0 100644 --- a/js/src/builder/binary.ts +++ b/js/src/builder/binary.ts @@ -15,10 +15,10 @@ // specific language governing permissions and limitations // under the License. -import { Binary } from '../type'; -import { toUint8Array } from '../util/buffer'; -import { BufferBuilder } from './buffer'; -import { VariableWidthBuilder, BuilderOptions } from '../builder'; +import { Binary } from '../type.js'; +import { toUint8Array } from '../util/buffer.js'; +import { BufferBuilder } from './buffer.js'; +import { VariableWidthBuilder, BuilderOptions } from '../builder.js'; /** @ignore */ export class BinaryBuilder extends VariableWidthBuilder { @@ -39,12 +39,12 @@ export class BinaryBuilder extends VariableWidthBuilder, pendingLength: number) { const offsets = this._offsets; const data = this._values.reserve(pendingLength).buffer; - let index = 0, length = 0, offset = 0, value: Uint8Array | undefined; - for ([index, value] of pending) { + let offset = 0; + for (const [index, value] of pending) { if (value === undefined) { offsets.set(index, 0); } else { - length = value.length; + const length = value.length; data.set(value, offset); offsets.set(index, length); offset += length; diff --git a/js/src/builder/bool.ts b/js/src/builder/bool.ts index 5c0e0950e16dc..8289d0bcd6dd8 100644 --- a/js/src/builder/bool.ts +++ b/js/src/builder/bool.ts @@ -15,9 +15,9 @@ // specific language governing permissions and limitations // under the License. -import { Bool } from '../type'; -import { BitmapBufferBuilder } from './buffer'; -import { Builder, BuilderOptions } from '../builder'; +import { Bool } from '../type.js'; +import { BitmapBufferBuilder } from './buffer.js'; +import { Builder, BuilderOptions } from '../builder.js'; /** @ignore */ export class BoolBuilder extends Builder { diff --git a/js/src/builder/buffer.ts b/js/src/builder/buffer.ts index 3c20cc001b33d..e65cdc430e821 100644 --- a/js/src/builder/buffer.ts +++ b/js/src/builder/buffer.ts @@ -15,17 +15,16 @@ // specific language governing permissions and limitations // under the License. -import { memcpy } from '../util/buffer'; -import { BigIntAvailable, BigInt64Array, BigUint64Array } from '../util/compat'; +import { memcpy } from '../util/buffer.js'; import { TypedArray, TypedArrayConstructor, BigIntArray, BigIntArrayConstructor -} from '../interfaces'; +} from '../interfaces.js'; /** @ignore */ type DataValue = T extends TypedArray ? number : T extends BigIntArray ? WideValue : T; /** @ignore */ type WideValue = T extends BigIntArray ? bigint | Int32Array | Uint32Array : never; /** @ignore */ type ArrayCtor = - T extends TypedArray ? TypedArrayConstructor : + T extends TypedArray ? TypedArrayConstructor : T extends BigIntArray ? BigIntArrayConstructor : any; @@ -49,7 +48,7 @@ export class BufferBuilder; - this._resize(this.length = buffer.length / stride | 0); + this._resize(this.length = Math.trunc(buffer.length / stride)); } public buffer: T; @@ -58,7 +57,7 @@ export class BufferBuilder; public readonly BYTES_PER_ELEMENT: number; - public get byteLength() { return this.length * this.stride * this.BYTES_PER_ELEMENT | 0; } + public get byteLength() { return Math.trunc(this.length * this.stride * this.BYTES_PER_ELEMENT); } public get reservedLength() { return this.buffer.length / this.stride; } public get reservedByteLength() { return this.buffer.byteLength; } @@ -92,7 +91,7 @@ export class BufferBuilder memcpy(new this.ArrayType(newLength), this.buffer); + return this.buffer = memcpy(new this.ArrayType(newLength), this.buffer); } } @@ -122,7 +121,7 @@ export class BitmapBufferBuilder extends DataBufferBuilder { const byte = idx >> 3, bit = idx % 8, cur = buffer[byte] >> bit & 1; // If `val` is truthy and the current bit is 0, flip it to 1 and increment `numValid`. // If `val` is falsey and the current bit is 1, flip it to 0 and decrement `numValid`. - val ? cur === 0 && ((buffer[byte] |= (1 << bit)), ++this.numValid) + val ? cur === 0 && ((buffer[byte] |= (1 << bit)), ++this.numValid) : cur === 1 && ((buffer[byte] &= ~(1 << bit)), --this.numValid); return this; } @@ -155,28 +154,28 @@ export class OffsetsBufferBuilder extends DataBufferBuilder { } } -/** @ignore */ -export class WideBufferBuilder extends BufferBuilder> { - public buffer64!: R; - protected _ArrayType64!: BigIntArrayConstructor; - public get ArrayType64() { - return this._ArrayType64 || (this._ArrayType64 = > (this.buffer instanceof Int32Array ? BigInt64Array : BigUint64Array)); - } - public set(index: number, value: DataValue) { - this.reserve(index - this.length + 1); - switch (typeof value) { - case 'bigint': this.buffer64[index] = value; break; - case 'number': this.buffer[index * this.stride] = value; break; - default: this.buffer.set(value as TypedArray, index * this.stride); - } - return this; - } - protected _resize(newLength: number) { - const data = super._resize(newLength); - const length = data.byteLength / (this.BYTES_PER_ELEMENT * this.stride); - if (BigIntAvailable) { - this.buffer64 = new this.ArrayType64(data.buffer, data.byteOffset, length); - } - return data; - } -} +// /** @ignore */ +// export class WideBufferBuilder extends BufferBuilder> { +// public buffer64!: R; +// protected _ArrayType64!: BigIntArrayConstructor; +// public get ArrayType64() { +// return this._ArrayType64 || (this._ArrayType64 = > (this.buffer instanceof Int32Array ? BigInt64Array : BigUint64Array)); +// } +// public set(index: number, value: DataValue) { +// this.reserve(index - this.length + 1); +// switch (typeof value) { +// case 'bigint': this.buffer64[index] = value; break; +// case 'number': this.buffer[index * this.stride] = value; break; +// default: this.buffer.set(value as TypedArray, index * this.stride); +// } +// return this; +// } +// protected _resize(newLength: number) { +// const data = super._resize(newLength); +// const length = data.byteLength / (this.BYTES_PER_ELEMENT * this.stride); +// if (BigIntAvailable) { +// this.buffer64 = new this.ArrayType64(data.buffer, data.byteOffset, length); +// } +// return data; +// } +// } diff --git a/js/src/builder/date.ts b/js/src/builder/date.ts index e9748b58c39f1..56a231c1faaad 100644 --- a/js/src/builder/date.ts +++ b/js/src/builder/date.ts @@ -15,12 +15,21 @@ // specific language governing permissions and limitations // under the License. -import { FixedWidthBuilder } from '../builder'; -import { Date_, DateDay, DateMillisecond } from '../type'; +import { FixedWidthBuilder } from '../builder.js'; +import { Date_, DateDay, DateMillisecond } from '../type.js'; +import { setDate, setDateDay, setDateMillisecond } from '../visitor/set.js'; /** @ignore */ -export class DateBuilder extends FixedWidthBuilder {} +export class DateBuilder extends FixedWidthBuilder { } + +(DateBuilder.prototype as any)._setValue = setDate; + /** @ignore */ -export class DateDayBuilder extends DateBuilder {} +export class DateDayBuilder extends DateBuilder { } + +(DateDayBuilder.prototype as any)._setValue = setDateDay; + /** @ignore */ -export class DateMillisecondBuilder extends DateBuilder {} +export class DateMillisecondBuilder extends DateBuilder { } + +(DateMillisecondBuilder.prototype as any)._setValue = setDateMillisecond; diff --git a/js/src/builder/decimal.ts b/js/src/builder/decimal.ts index 5814abd5b17ec..4554f055279e0 100644 --- a/js/src/builder/decimal.ts +++ b/js/src/builder/decimal.ts @@ -15,8 +15,11 @@ // specific language governing permissions and limitations // under the License. -import { Decimal } from '../type'; -import { FixedWidthBuilder } from '../builder'; +import { Decimal } from '../type.js'; +import { FixedWidthBuilder } from '../builder.js'; +import { setDecimal } from '../visitor/set.js'; /** @ignore */ -export class DecimalBuilder extends FixedWidthBuilder {} +export class DecimalBuilder extends FixedWidthBuilder { } + +(DecimalBuilder.prototype as any)._setValue = setDecimal; diff --git a/js/src/builder/dictionary.ts b/js/src/builder/dictionary.ts index 6602825dd164e..022afd1516f3c 100644 --- a/js/src/builder/dictionary.ts +++ b/js/src/builder/dictionary.ts @@ -15,10 +15,11 @@ // specific language governing permissions and limitations // under the License. -import { Vector } from '../vector'; -import { IntBuilder } from './int'; -import { Dictionary, DataType } from '../type'; -import { Builder, BuilderOptions } from '../builder'; +import { Vector } from '../vector.js'; +import { IntBuilder } from './int.js'; +import { Dictionary, DataType } from '../type.js'; +import { Builder, BuilderOptions } from '../builder.js'; +import { makeBuilder } from '../factories.js'; type DictionaryHashFunction = (x: any) => string | number; @@ -37,11 +38,11 @@ export class DictionaryBuilder extends Builde constructor({ 'type': type, 'nullValues': nulls, 'dictionaryHashFunction': hashFn }: DictionaryBuilderOptions) { super({ type: new Dictionary(type.dictionary, type.indices, type.id, type.isOrdered) as T }); - this._nulls = null; + this._nulls = null; this._dictionaryOffset = 0; this._keysToIndices = Object.create(null); - this.indices = Builder.new({ 'type': this.type.indices, 'nullValues': nulls }) as IntBuilder; - this.dictionary = Builder.new({ 'type': this.type.dictionary, 'nullValues': null }) as Builder; + this.indices = makeBuilder({ 'type': this.type.indices, 'nullValues': nulls }) as IntBuilder; + this.dictionary = makeBuilder({ 'type': this.type.dictionary, 'nullValues': null }) as Builder; if (typeof hashFn === 'function') { this.valueToKey = hashFn; } diff --git a/js/src/builder/fixedsizebinary.ts b/js/src/builder/fixedsizebinary.ts index 99aaf46a385a3..917e790251b59 100644 --- a/js/src/builder/fixedsizebinary.ts +++ b/js/src/builder/fixedsizebinary.ts @@ -15,8 +15,11 @@ // specific language governing permissions and limitations // under the License. -import { FixedSizeBinary } from '../type'; -import { FixedWidthBuilder } from '../builder'; +import { FixedSizeBinary } from '../type.js'; +import { FixedWidthBuilder } from '../builder.js'; +import { setFixedSizeBinary } from '../visitor/set.js'; /** @ignore */ -export class FixedSizeBinaryBuilder extends FixedWidthBuilder {} +export class FixedSizeBinaryBuilder extends FixedWidthBuilder { } + +(FixedSizeBinaryBuilder.prototype as any)._setValue = setFixedSizeBinary; diff --git a/js/src/builder/fixedsizelist.ts b/js/src/builder/fixedsizelist.ts index cc20f5ba2135d..f4b4b95df2ef7 100644 --- a/js/src/builder/fixedsizelist.ts +++ b/js/src/builder/fixedsizelist.ts @@ -15,16 +15,18 @@ // specific language governing permissions and limitations // under the License. -import { Run } from './run'; -import { Field } from '../schema'; -import { Builder } from '../builder'; -import { DataType, FixedSizeList } from '../type'; +import { Field } from '../schema.js'; +import { Builder } from '../builder.js'; +import { DataType, FixedSizeList } from '../type.js'; /** @ignore */ export class FixedSizeListBuilder extends Builder, TNull> { - protected _run = new Run(); public setValue(index: number, value: T['TValue']) { - super.setValue(index, this._run.bind(value)); + const [child] = this.children; + const start = index * this.stride; + for (let i = -1, n = value.length; ++i < n;) { + child.set(start + i, value[i]); + } } public addChild(child: Builder, name = '0') { if (this.numChildren > 0) { @@ -34,8 +36,4 @@ export class FixedSizeListBuilder extends this.type = new FixedSizeList(this.type.listSize, new Field(name, child.type, true)); return childIndex; } - public clear() { - this._run.clear(); - return super.clear(); - } } diff --git a/js/src/builder/float.ts b/js/src/builder/float.ts index dbf4c0d0646ee..d67832f93e62c 100644 --- a/js/src/builder/float.ts +++ b/js/src/builder/float.ts @@ -15,31 +15,27 @@ // specific language governing permissions and limitations // under the License. -import { float64ToUint16 } from '../util/math'; -import { FixedWidthBuilder } from '../builder'; -import { Float, Float16, Float32, Float64 } from '../type'; +import { float64ToUint16 } from '../util/math.js'; +import { FixedWidthBuilder } from '../builder.js'; +import { Float, Float16, Float32, Float64 } from '../type.js'; /** @ignore */ -export class FloatBuilder extends FixedWidthBuilder {} +export class FloatBuilder extends FixedWidthBuilder { + public setValue(index: number, value: number) { + this._values.set(index, value); + } +} /** @ignore */ export class Float16Builder extends FloatBuilder { public setValue(index: number, value: number) { // convert JS float64 to a uint16 - this._values.set(index, float64ToUint16(value)); + super.setValue(index, float64ToUint16(value)); } } /** @ignore */ -export class Float32Builder extends FloatBuilder { - public setValue(index: number, value: number) { - this._values.set(index, value); - } -} +export class Float32Builder extends FloatBuilder { } /** @ignore */ -export class Float64Builder extends FloatBuilder { - public setValue(index: number, value: number) { - this._values.set(index, value); - } -} +export class Float64Builder extends FloatBuilder { } diff --git a/js/src/builder/index.ts b/js/src/builder/index.ts deleted file mode 100644 index dfd9d54f1f5eb..0000000000000 --- a/js/src/builder/index.ts +++ /dev/null @@ -1,82 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -/** @ignore */ -export { Builder, BuilderOptions } from '../builder'; -export { BoolBuilder } from './bool'; -export { NullBuilder } from './null'; -export { DateBuilder, DateDayBuilder, DateMillisecondBuilder } from './date'; -export { DecimalBuilder } from './decimal'; -export { DictionaryBuilder } from './dictionary'; -export { FixedSizeBinaryBuilder } from './fixedsizebinary'; -export { FloatBuilder, Float16Builder, Float32Builder, Float64Builder } from './float'; -export { IntBuilder, Int8Builder, Int16Builder, Int32Builder, Int64Builder, Uint8Builder, Uint16Builder, Uint32Builder, Uint64Builder } from './int'; -export { TimeBuilder, TimeSecondBuilder, TimeMillisecondBuilder, TimeMicrosecondBuilder, TimeNanosecondBuilder } from './time'; -export { TimestampBuilder, TimestampSecondBuilder, TimestampMillisecondBuilder, TimestampMicrosecondBuilder, TimestampNanosecondBuilder } from './timestamp'; -export { IntervalBuilder, IntervalDayTimeBuilder, IntervalYearMonthBuilder } from './interval'; -export { Utf8Builder } from './utf8'; -export { BinaryBuilder } from './binary'; -export { ListBuilder } from './list'; -export { FixedSizeListBuilder } from './fixedsizelist'; -export { MapBuilder } from './map'; -export { StructBuilder } from './struct'; -export { UnionBuilder, SparseUnionBuilder, DenseUnionBuilder } from './union'; - -import { Type } from '../enum'; -import { Field } from '../schema'; -import { DataType } from '../type'; -import { Utf8Builder } from './utf8'; -import { BuilderType as B } from '../interfaces'; -import { Builder, BuilderOptions } from '../builder'; -import { instance as setVisitor } from '../visitor/set'; -import { instance as getBuilderConstructor } from '../visitor/builderctor'; - -/** @nocollapse */ -Builder.new = newBuilder; - -function newBuilder(options: BuilderOptions): B { - - const type = options.type; - const builder = new (getBuilderConstructor.getVisitFn(type)())(options) as Builder; - - if (type.children && type.children.length > 0) { - - const children = options['children'] || [] as BuilderOptions[]; - const defaultOptions = { 'nullValues': options['nullValues'] }; - const getChildOptions = Array.isArray(children) - ? ((_: Field, i: number) => children[i] || defaultOptions) - : (({ name }: Field) => children[name] || defaultOptions); - - type.children.forEach((field, index) => { - const { type } = field; - const opts = getChildOptions(field, index); - builder.children.push(newBuilder({ ...opts, type })); - }); - } - - return builder as B; -} - -(Object.keys(Type) as any[]) - .map((T: any) => Type[T] as any) - .filter((T: any): T is Type => typeof T === 'number' && T !== Type.NONE) - .forEach((typeId) => { - const BuilderCtor = getBuilderConstructor.visit(typeId); - BuilderCtor.prototype._setValue = setVisitor.getVisitFn(typeId); - }); - -(Utf8Builder.prototype as any)._setValue = setVisitor.visitBinary; diff --git a/js/src/builder/int.ts b/js/src/builder/int.ts index 5777bd1258a29..f7641e7c9e523 100644 --- a/js/src/builder/int.ts +++ b/js/src/builder/int.ts @@ -15,11 +15,8 @@ // specific language governing permissions and limitations // under the License. -import { bignumToBigInt } from '../util/bn'; -import { WideBufferBuilder } from './buffer'; -import { BigInt64Array } from '../util/compat'; -import { FixedWidthBuilder, BuilderOptions } from '../builder'; -import { Int, Int8, Int16, Int32, Int64, Uint8, Uint16, Uint32, Uint64 } from '../type'; +import { FixedWidthBuilder } from '../builder.js'; +import { Int, Int8, Int16, Int32, Int64, Uint8, Uint16, Uint32, Uint64 } from '../type.js'; /** @ignore */ export class IntBuilder extends FixedWidthBuilder { @@ -29,52 +26,19 @@ export class IntBuilder extends FixedWidthBuil } /** @ignore */ -export class Int8Builder extends IntBuilder {} +export class Int8Builder extends IntBuilder { } /** @ignore */ -export class Int16Builder extends IntBuilder {} +export class Int16Builder extends IntBuilder { } /** @ignore */ -export class Int32Builder extends IntBuilder {} +export class Int32Builder extends IntBuilder { } /** @ignore */ -export class Int64Builder extends IntBuilder { - protected _values: WideBufferBuilder; - constructor(options: BuilderOptions) { - if (options['nullValues']) { - options['nullValues'] = (options['nullValues'] as TNull[]).map(toBigInt); - } - super(options); - this._values = new WideBufferBuilder(new Int32Array(0), 2); - } - public get values64() { return this._values.buffer64; } - public isValid(value: Int32Array | bigint | TNull) { return super.isValid(toBigInt(value)); } -} +export class Int64Builder extends IntBuilder { } /** @ignore */ -export class Uint8Builder extends IntBuilder {} +export class Uint8Builder extends IntBuilder { } /** @ignore */ -export class Uint16Builder extends IntBuilder {} +export class Uint16Builder extends IntBuilder { } /** @ignore */ -export class Uint32Builder extends IntBuilder {} +export class Uint32Builder extends IntBuilder { } /** @ignore */ -export class Uint64Builder extends IntBuilder { - protected _values: WideBufferBuilder; - constructor(options: BuilderOptions) { - if (options['nullValues']) { - options['nullValues'] = (options['nullValues'] as TNull[]).map(toBigInt); - } - super(options); - this._values = new WideBufferBuilder(new Uint32Array(0), 2); - } - public get values64() { return this._values.buffer64; } - public isValid(value: Uint32Array | bigint | TNull) { return super.isValid(toBigInt(value)); } -} - -const toBigInt = ((memo: any) => (value: any) => { - if (ArrayBuffer.isView(value)) { - memo.buffer = value.buffer; - memo.byteOffset = value.byteOffset; - memo.byteLength = value.byteLength; - value = bignumToBigInt(memo); - memo.buffer = null; - } - return value; -})({ 'BigIntArray': BigInt64Array }); +export class Uint64Builder extends IntBuilder { } diff --git a/js/src/builder/interval.ts b/js/src/builder/interval.ts index 3742282156d9e..31525953af954 100644 --- a/js/src/builder/interval.ts +++ b/js/src/builder/interval.ts @@ -15,12 +15,21 @@ // specific language governing permissions and limitations // under the License. -import { FixedWidthBuilder } from '../builder'; -import { Interval, IntervalDayTime, IntervalYearMonth } from '../type'; +import { FixedWidthBuilder } from '../builder.js'; +import { Interval, IntervalDayTime, IntervalYearMonth } from '../type.js'; +import { setIntervalValue, setIntervalDayTime, setIntervalYearMonth } from '../visitor/set.js'; /** @ignore */ -export class IntervalBuilder extends FixedWidthBuilder {} +export class IntervalBuilder extends FixedWidthBuilder { } + +(IntervalBuilder.prototype as any)._setValue = setIntervalValue; + /** @ignore */ -export class IntervalDayTimeBuilder extends IntervalBuilder {} +export class IntervalDayTimeBuilder extends IntervalBuilder { } + +(IntervalDayTimeBuilder.prototype as any)._setValue = setIntervalDayTime; + /** @ignore */ -export class IntervalYearMonthBuilder extends IntervalBuilder {} +export class IntervalYearMonthBuilder extends IntervalBuilder { } + +(IntervalYearMonthBuilder.prototype as any)._setValue = setIntervalYearMonth; diff --git a/js/src/builder/list.ts b/js/src/builder/list.ts index 844681eae0673..e0ff0df052804 100644 --- a/js/src/builder/list.ts +++ b/js/src/builder/list.ts @@ -15,15 +15,13 @@ // specific language governing permissions and limitations // under the License. -import { Run } from './run'; -import { Field } from '../schema'; -import { DataType, List } from '../type'; -import { OffsetsBufferBuilder } from './buffer'; -import { Builder, BuilderOptions, VariableWidthBuilder } from '../builder'; +import { Field } from '../schema.js'; +import { DataType, List } from '../type.js'; +import { OffsetsBufferBuilder } from './buffer.js'; +import { Builder, BuilderOptions, VariableWidthBuilder } from '../builder.js'; /** @ignore */ export class ListBuilder extends VariableWidthBuilder, TNull> { - protected _run = new Run(); protected _offsets: OffsetsBufferBuilder; constructor(opts: BuilderOptions, TNull>) { super(opts); @@ -37,21 +35,18 @@ export class ListBuilder extends Variable this.type = new List(new Field(name, child.type, true)); return this.numChildren - 1; } - public clear() { - this._run.clear(); - return super.clear(); - } protected _flushPending(pending: Map) { - const run = this._run; const offsets = this._offsets; - const setValue = this._setValue; - let index = 0, value: Uint8Array | undefined; - for ([index, value] of pending) { + const [child] = this.children; + for (const [index, value] of pending) { if (value === undefined) { offsets.set(index, 0); } else { - offsets.set(index, value.length); - setValue(this, index, run.bind(value)); + const n = value.length; + const start = offsets.set(index, n).buffer[index]; + for (let i = -1; ++i < n;) { + child.set(start + i, value[i]); + } } } } diff --git a/js/src/builder/map.ts b/js/src/builder/map.ts index 25affef2c77fc..4c73c4f2f98d4 100644 --- a/js/src/builder/map.ts +++ b/js/src/builder/map.ts @@ -15,29 +15,29 @@ // specific language governing permissions and limitations // under the License. -import { Field } from '../schema'; -import { DataType, Map_, Struct } from '../type'; -import { Builder, VariableWidthBuilder } from '../builder'; +import { Field } from '../schema.js'; +import { DataType, Map_, Struct } from '../type.js'; +import { Builder, VariableWidthBuilder } from '../builder.js'; /** @ignore */ type MapValue = Map_['TValue']; /** @ignore */ type MapValues = Map | undefined>; -/** @ignore */ type MapValueExt = MapValue | { [key: string]: V } | { [key: number]: V } ; +/** @ignore */ type MapValueExt = MapValue | { [key: string]: V } | { [key: number]: V }; /** @ignore */ export class MapBuilder extends VariableWidthBuilder, TNull> { - protected _pending: MapValues | undefined; + declare protected _pending: MapValues | undefined; public set(index: number, value: MapValueExt | TNull) { return super.set(index, value as MapValue | TNull); } public setValue(index: number, value: MapValueExt) { - value = value instanceof Map ? value : new Map(Object.entries(value)); + const row = (value instanceof Map ? value : new Map(Object.entries(value))) as MapValue; const pending = this._pending || (this._pending = new Map() as MapValues); - const current = pending.get(index); + const current = pending.get(index) as Map | undefined; current && (this._pendingLength -= current.size); - this._pendingLength += value.size; - pending.set(index, value); + this._pendingLength += row.size; + pending.set(index, row); } public addChild(child: Builder>, name = `${this.numChildren}`) { @@ -51,14 +51,20 @@ export class MapBuilder) { const offsets = this._offsets; - const setValue = this._setValue; - pending.forEach((value, index) => { + const [child] = this.children; + for (const [index, value] of pending) { if (value === undefined) { offsets.set(index, 0); } else { - offsets.set(index, value.size); - setValue(this, index, value); + let { + [index]: idx, + [index + 1]: end + } = offsets.set(index, value.size).buffer; + for (const val of value.entries()) { + child.set(idx, val); + if (++idx >= end) break; + } } - }); + } } } diff --git a/js/src/builder/null.ts b/js/src/builder/null.ts index 4be3f063b24b9..fb57bb7b96e82 100644 --- a/js/src/builder/null.ts +++ b/js/src/builder/null.ts @@ -15,13 +15,13 @@ // specific language governing permissions and limitations // under the License. -import { Null } from '../type'; -import { Builder } from '../builder'; +import { Null } from '../type.js'; +import { Builder } from '../builder.js'; /** @ignore */ export class NullBuilder extends Builder { // @ts-ignore - public setValue(index: number, value: null) {} + public setValue(index: number, value: null) { } public setValid(index: number, valid: boolean) { this.length = Math.max(index + 1, this.length); return valid; diff --git a/js/src/builder/run.ts b/js/src/builder/run.ts deleted file mode 100644 index 5239f51f29318..0000000000000 --- a/js/src/builder/run.ts +++ /dev/null @@ -1,34 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -import { Vector } from '../vector'; -import { DataType } from '../type'; - -/** @ignore */ -export class Run { - protected _values!: ArrayLike; - public get length() { return this._values.length; } - public get(index: number) { return this._values[index]; } - public clear() { this._values = null; return this; } - public bind(values: Vector | ArrayLike) { - if (values instanceof Vector) { - return values; - } - this._values = values; - return this as any; - } -} diff --git a/js/src/builder/struct.ts b/js/src/builder/struct.ts index 4d12336ceacf1..cf288cdcef397 100644 --- a/js/src/builder/struct.ts +++ b/js/src/builder/struct.ts @@ -15,12 +15,22 @@ // specific language governing permissions and limitations // under the License. -import { Field } from '../schema'; -import { Builder } from '../builder'; -import { DataType, Struct } from '../type'; +/* eslint-disable unicorn/no-array-for-each */ + +import { Field } from '../schema.js'; +import { Builder } from '../builder.js'; +import { Struct, TypeMap } from '../type.js'; /** @ignore */ -export class StructBuilder extends Builder, TNull> { +export class StructBuilder extends Builder, TNull> { + public setValue(index: number, value: Struct['TValue']) { + const children = this.children; + switch (Array.isArray(value) || value.constructor) { + case true: return this.type.children.forEach((_, i) => children[i].set(index, value[i])); + case Map: return this.type.children.forEach((f, i) => children[i].set(index, value.get(f.name))); + default: return this.type.children.forEach((f, i) => children[i].set(index, value[f.name])); + } + } public addChild(child: Builder, name = `${this.numChildren}`) { const childIndex = this.children.push(child); this.type = new Struct([...this.type.children, new Field(name, child.type, true)]); diff --git a/js/src/builder/time.ts b/js/src/builder/time.ts index bfa71d2b5d6c1..ad1d052ae77d7 100644 --- a/js/src/builder/time.ts +++ b/js/src/builder/time.ts @@ -15,16 +15,31 @@ // specific language governing permissions and limitations // under the License. -import { FixedWidthBuilder } from '../builder'; -import { Time, TimeSecond, TimeMillisecond, TimeMicrosecond, TimeNanosecond } from '../type'; +import { FixedWidthBuilder } from '../builder.js'; +import { Time, TimeSecond, TimeMillisecond, TimeMicrosecond, TimeNanosecond } from '../type.js'; +import { setTime, setTimeSecond, setTimeMillisecond, setTimeMicrosecond, setTimeNanosecond } from '../visitor/set.js'; /** @ignore */ -export class TimeBuilder extends FixedWidthBuilder {} +export class TimeBuilder extends FixedWidthBuilder { } + +(TimeBuilder.prototype as any)._setValue = setTime; + /** @ignore */ -export class TimeSecondBuilder extends TimeBuilder {} +export class TimeSecondBuilder extends TimeBuilder { } + +(TimeSecondBuilder.prototype as any)._setValue = setTimeSecond; + /** @ignore */ -export class TimeMillisecondBuilder extends TimeBuilder {} +export class TimeMillisecondBuilder extends TimeBuilder { } + +(TimeMillisecondBuilder.prototype as any)._setValue = setTimeMillisecond; + /** @ignore */ -export class TimeMicrosecondBuilder extends TimeBuilder {} +export class TimeMicrosecondBuilder extends TimeBuilder { } + +(TimeMicrosecondBuilder.prototype as any)._setValue = setTimeMicrosecond; + /** @ignore */ -export class TimeNanosecondBuilder extends TimeBuilder {} +export class TimeNanosecondBuilder extends TimeBuilder { } + +(TimeNanosecondBuilder.prototype as any)._setValue = setTimeNanosecond; diff --git a/js/src/builder/timestamp.ts b/js/src/builder/timestamp.ts index 49741e9ba67e6..2dbf9ee878e93 100644 --- a/js/src/builder/timestamp.ts +++ b/js/src/builder/timestamp.ts @@ -15,16 +15,31 @@ // specific language governing permissions and limitations // under the License. -import { FixedWidthBuilder } from '../builder'; -import { Timestamp, TimestampSecond, TimestampMillisecond, TimestampMicrosecond, TimestampNanosecond } from '../type'; +import { FixedWidthBuilder } from '../builder.js'; +import { Timestamp, TimestampSecond, TimestampMillisecond, TimestampMicrosecond, TimestampNanosecond } from '../type.js'; +import { setTimestamp, setTimestampSecond, setTimestampMillisecond, setTimestampMicrosecond, setTimestampNanosecond } from '../visitor/set.js'; /** @ignore */ -export class TimestampBuilder extends FixedWidthBuilder {} +export class TimestampBuilder extends FixedWidthBuilder { } + +(TimestampBuilder.prototype as any)._setValue = setTimestamp; + /** @ignore */ -export class TimestampSecondBuilder extends TimestampBuilder {} +export class TimestampSecondBuilder extends TimestampBuilder { } + +(TimestampSecondBuilder.prototype as any)._setValue = setTimestampSecond; + /** @ignore */ -export class TimestampMillisecondBuilder extends TimestampBuilder {} +export class TimestampMillisecondBuilder extends TimestampBuilder { } + +(TimestampMillisecondBuilder.prototype as any)._setValue = setTimestampMillisecond; + /** @ignore */ -export class TimestampMicrosecondBuilder extends TimestampBuilder {} +export class TimestampMicrosecondBuilder extends TimestampBuilder { } + +(TimestampMicrosecondBuilder.prototype as any)._setValue = setTimestampMicrosecond; + /** @ignore */ -export class TimestampNanosecondBuilder extends TimestampBuilder {} +export class TimestampNanosecondBuilder extends TimestampBuilder { } + +(TimestampNanosecondBuilder.prototype as any)._setValue = setTimestampNanosecond; diff --git a/js/src/builder/union.ts b/js/src/builder/union.ts index 18ac05bf69ed4..1e668f9423e7f 100644 --- a/js/src/builder/union.ts +++ b/js/src/builder/union.ts @@ -15,10 +15,10 @@ // specific language governing permissions and limitations // under the License. -import { Field } from '../schema'; -import { DataBufferBuilder } from './buffer'; -import { Builder, BuilderOptions } from '../builder'; -import { Union, SparseUnion, DenseUnion } from '../type'; +import { Field } from '../schema.js'; +import { DataBufferBuilder } from './buffer.js'; +import { Builder, BuilderOptions } from '../builder.js'; +import { Union, SparseUnion, DenseUnion } from '../type.js'; export interface UnionBuilderOptions extends BuilderOptions { valueToChildTypeId?: (builder: UnionBuilder, value: any, offset: number) => number; @@ -55,14 +55,16 @@ export abstract class UnionBuilder extends Builder public setValue(index: number, value: T['TValue'], childTypeId?: number) { this._typeIds.set(index, childTypeId!); - super.setValue(index, value); + const childIndex = this.type.typeIdToChildIndex[childTypeId!]; + const child = this.children[childIndex]; + child?.set(index, value); } public addChild(child: Builder, name = `${this.children.length}`) { const childTypeId = this.children.push(child); const { type: { children, mode, typeIds } } = this; const fields = [...children, new Field(name, child.type)]; - this.type = new Union(mode, [...typeIds, childTypeId], fields); + this.type = new Union(mode, [...typeIds, childTypeId], fields); return childTypeId; } @@ -76,7 +78,7 @@ or supply a \`valueToChildTypeId\` function as part of the UnionBuilder construc } /** @ignore */ -export class SparseUnionBuilder extends UnionBuilder {} +export class SparseUnionBuilder extends UnionBuilder { } /** @ignore */ export class DenseUnionBuilder extends UnionBuilder { @@ -89,8 +91,9 @@ export class DenseUnionBuilder extends UnionB /** @ignore */ public setValue(index: number, value: T['TValue'], childTypeId?: number) { - const childIndex = this.type.typeIdToChildIndex[childTypeId!]; - this._offsets.set(index, this.getChildAt(childIndex)!.length); - return super.setValue(index, value, childTypeId); + const id = this._typeIds.set(index, childTypeId!).buffer[index]; + const child = this.getChildAt(this.type.typeIdToChildIndex[id])!; + const denseIndex = this._offsets.set(index, child.length).buffer[index]; + child?.set(denseIndex, value); } } diff --git a/js/src/builder/utf8.ts b/js/src/builder/utf8.ts index 7564cdad6d5d6..53b8306cbaffd 100644 --- a/js/src/builder/utf8.ts +++ b/js/src/builder/utf8.ts @@ -15,11 +15,11 @@ // specific language governing permissions and limitations // under the License. -import { Utf8 } from '../type'; -import { encodeUtf8 } from '../util/utf8'; -import { BinaryBuilder } from './binary'; -import { BufferBuilder } from './buffer'; -import { VariableWidthBuilder, BuilderOptions } from '../builder'; +import { Utf8 } from '../type.js'; +import { encodeUtf8 } from '../util/utf8.js'; +import { BinaryBuilder } from './binary.js'; +import { BufferBuilder } from './buffer.js'; +import { VariableWidthBuilder, BuilderOptions } from '../builder.js'; /** @ignore */ export class Utf8Builder extends VariableWidthBuilder { @@ -38,7 +38,7 @@ export class Utf8Builder extends VariableWidthBuilder return super.setValue(index, encodeUtf8(value) as any); } // @ts-ignore - protected _flushPending(pending: Map, pendingLength: number): void {} + protected _flushPending(pending: Map, pendingLength: number): void { } } (Utf8Builder.prototype as any)._flushPending = (BinaryBuilder.prototype as any)._flushPending; diff --git a/js/src/builder/valid.ts b/js/src/builder/valid.ts index ae5b799fb0631..c38af7208b625 100644 --- a/js/src/builder/valid.ts +++ b/js/src/builder/valid.ts @@ -15,9 +15,9 @@ // specific language governing permissions and limitations // under the License. -import { DataType } from '../type'; -import { valueToString } from '../util/pretty'; -import { BigIntAvailable } from '../util/compat'; +import { DataType } from '../type.js'; +import { valueToString } from '../util/pretty.js'; +import { BigIntAvailable } from '../util/compat.js'; /** * Dynamically compile the null values into an `isValid()` function whose diff --git a/js/src/column.ts b/js/src/column.ts deleted file mode 100644 index 48b40e5a1b3c1..0000000000000 --- a/js/src/column.ts +++ /dev/null @@ -1,136 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -import { Data } from './data'; -import { Field } from './schema'; -import { DataType } from './type'; -import { Vector } from './vector'; -import { Clonable, Sliceable, Applicative } from './vector'; -import { VectorCtorArgs, VectorType as V } from './interfaces'; -import { Chunked, SearchContinuation } from './vector/chunked'; - -export interface Column { - concat(...others: Vector[]): Column; - slice(begin?: number, end?: number): Column; - clone(chunks?: Vector[], offsets?: Uint32Array): Column; -} - -export class Column - extends Chunked - implements Clonable>, - Sliceable>, - Applicative> { - - public static new(data: Data, ...args: VectorCtorArgs>): Column; - public static new(field: string | Field, ...chunks: (Vector | Vector[])[]): Column; - public static new(field: string | Field, data: Data, ...args: VectorCtorArgs>): Column; - /** @nocollapse */ - public static new(...args: any[]) { - - let [field, data, ...rest] = args as [ - string | Field, - Data | Vector | (Data | Vector)[], - ...any[] - ]; - - if (typeof field !== 'string' && !(field instanceof Field)) { - data = | Vector | (Data | Vector)[]> field; - field = ''; - } - - const chunks = Chunked.flatten( - Array.isArray(data) ? [...data, ...rest] : - data instanceof Vector ? [data, ...rest] : - [Vector.new(data, ...rest)] - ); - - if (typeof field === 'string') { - const type = chunks[0].data.type; - field = new Field(field, type, true); - } else if (!field.nullable && chunks.some(({ nullCount }) => nullCount > 0)) { - field = field.clone({ nullable: true }); - } - return new Column(field, chunks); - } - - constructor(field: Field, vectors: Vector[] = [], offsets?: Uint32Array) { - vectors = Chunked.flatten(...vectors); - super(field.type, vectors, offsets); - this._field = field; - if (vectors.length === 1 && !(this instanceof SingleChunkColumn)) { - return new SingleChunkColumn(field, vectors[0], this._chunkOffsets); - } - } - - protected _field: Field; - protected _children?: Column[]; - - public get field() { return this._field; } - public get name() { return this._field.name; } - public get nullable() { return this._field.nullable; } - public get metadata() { return this._field.metadata; } - - public clone(chunks = this._chunks) { - return new Column(this._field, chunks); - } - - public getChildAt(index: number): Column | null { - - if (index < 0 || index >= this.numChildren) { return null; } - - const columns = this._children || (this._children = []); - let column: Column, field: Field, chunks: Vector[]; - - if (column = columns[index]) { return column; } - if (field = ((this.type.children || [])[index] as Field)) { - chunks = this._chunks - .map((vector) => vector.getChildAt(index)) - .filter((vec): vec is Vector => vec != null); - if (chunks.length > 0) { - return (columns[index] = new Column(field, chunks)); - } - } - - return null; - } -} - -/** @ignore */ -class SingleChunkColumn extends Column { - protected _chunk: Vector; - constructor(field: Field, vector: Vector, offsets?: Uint32Array) { - super(field, [vector], offsets); - this._chunk = vector; - } - public search(index: number): [number, number] | null; - public search>>(index: number, then?: N): ReturnType; - public search>>(index: number, then?: N) { - return then ? then(this, 0, index) : [0, index]; - } - public isValid(index: number): boolean { - return this._chunk.isValid(index); - } - public get(index: number): T['TValue'] | null { - return this._chunk.get(index); - } - public set(index: number, value: T['TValue'] | null): void { - this._chunk.set(index, value); - } - public indexOf(element: T['TValue'], offset?: number): number { - return this._chunk.indexOf(element, offset); - } -} diff --git a/js/src/compute/dataframe.ts b/js/src/compute/dataframe.ts deleted file mode 100644 index e9df3719490f0..0000000000000 --- a/js/src/compute/dataframe.ts +++ /dev/null @@ -1,288 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -import { Table } from '../table'; -import { Vector } from '../vector'; -import { IntVector } from '../vector/int'; -import { Field, Schema } from '../schema'; -import { Predicate, Col, PredicateFunc } from './predicate'; -import { RecordBatch } from '../recordbatch'; -import { VectorType as V } from '../interfaces'; -import { DataType, Int, Struct, Dictionary } from '../type'; - -/** @ignore */ -export type BindFunc = (batch: RecordBatch) => void; -/** @ignore */ -export type NextFunc = (idx: number, batch: RecordBatch) => void; - -/** - * `DataFrame` extends {@link Table} with support for predicate filtering. - * - * You can construct `DataFrames` like tables or convert a `Table` to a `DataFrame` - * with the constructor. - * - * ```ts - * const df = new DataFrame(table); - * ``` - */ -export class DataFrame extends Table { - public filter(predicate: Predicate): FilteredDataFrame { - return new FilteredDataFrame(this.chunks, predicate); - } - public scan(next: NextFunc, bind?: BindFunc) { - const batches = this.chunks, numBatches = batches.length; - for (let batchIndex = -1; ++batchIndex < numBatches;) { - // load batches - const batch = batches[batchIndex]; - if (bind) { bind(batch); } - // yield all indices - for (let index = -1, numRows = batch.length; ++index < numRows;) { - next(index, batch); - } - } - } - public scanReverse(next: NextFunc, bind?: BindFunc) { - const batches = this.chunks, numBatches = batches.length; - for (let batchIndex = numBatches; --batchIndex >= 0;) { - // load batches - const batch = batches[batchIndex]; - if (bind) { bind(batch); } - // yield all indices - for (let index = batch.length; --index >= 0;) { - next(index, batch); - } - } - } - public countBy(name: Col | string) { - const batches = this.chunks, numBatches = batches.length; - const count_by = typeof name === 'string' ? new Col(name) : name as Col; - // Assume that all dictionary batches are deltas, which means that the - // last record batch has the most complete dictionary - count_by.bind(batches[numBatches - 1]); - const vector = count_by.vector as V; - if (!DataType.isDictionary(vector.type)) { - throw new Error('countBy currently only supports dictionary-encoded columns'); - } - - const countByteLength = Math.ceil(Math.log(vector.length) / Math.log(256)); - const CountsArrayType = countByteLength == 4 ? Uint32Array : - countByteLength >= 2 ? Uint16Array : Uint8Array; - - const counts = new CountsArrayType(vector.dictionary.length); - for (let batchIndex = -1; ++batchIndex < numBatches;) { - // load batches - const batch = batches[batchIndex]; - // rebind the countBy Col - count_by.bind(batch); - const keys = (count_by.vector as V).indices; - // yield all indices - for (let index = -1, numRows = batch.length; ++index < numRows;) { - const key = keys.get(index); - if (key !== null) { counts[key]++; } - } - } - return new CountByResult(vector.dictionary, IntVector.from(counts)); - } -} - -/** @ignore */ -export class CountByResult extends Table<{ values: T; counts: TCount }> { - constructor(values: Vector, counts: V) { - type R = { values: T; counts: TCount }; - const schema = new Schema([ - new Field('values', values.type), - new Field('counts', counts.type) - ]); - super(new RecordBatch(schema, counts.length, [values, counts])); - } - public toJSON(): Record { - const values = this.getColumnAt(0)!; - const counts = this.getColumnAt(1)!; - const result = {} as { [k: string]: number | null }; - for (let i = -1; ++i < this.length;) { - result[values.get(i)] = counts.get(i); - } - return result; - } -} - -/** @ignore */ -class FilteredBatchIterator implements IterableIterator['TValue']> { - private batchIndex = 0; - private batch: RecordBatch; - private index = 0; - private predicateFunc: PredicateFunc; - - constructor( - private batches: RecordBatch[], - private predicate: Predicate - ) { - // TODO: bind batches lazily - // If predicate doesn't match anything in the batch we don't need - // to bind the callback - this.batch = this.batches[this.batchIndex]; - this.predicateFunc = this.predicate.bind(this.batch); - } - - next(): IteratorResult['TValue']> { - while (this.batchIndex < this.batches.length) { - while (this.index < this.batch.length) { - if (this.predicateFunc(this.index, this.batch)) { - return { - value: this.batch.get(this.index++) as any, - }; - } - this.index++; - } - - if (++this.batchIndex < this.batches.length) { - this.index = 0; - this.batch = this.batches[this.batchIndex]; - this.predicateFunc = this.predicate.bind(this.batch); - } - } - - return {done: true, value: null}; - } - - [Symbol.iterator]() { - return this; - } -} - -/** @ignore */ -export class FilteredDataFrame extends DataFrame { - private _predicate: Predicate; - constructor (batches: RecordBatch[], predicate: Predicate) { - super(batches); - this._predicate = predicate; - } - public scan(next: NextFunc, bind?: BindFunc) { - // inlined version of this: - // this.parent.scan((idx, columns) => { - // if (this.predicate(idx, columns)) next(idx, columns); - // }); - const batches = this._chunks; - const numBatches = batches.length; - for (let batchIndex = -1; ++batchIndex < numBatches;) { - // load batches - const batch = batches[batchIndex]; - const predicate = this._predicate.bind(batch); - let isBound = false; - // yield all indices - for (let index = -1, numRows = batch.length; ++index < numRows;) { - if (predicate(index, batch)) { - // bind batches lazily - if predicate doesn't match anything - // in the batch we don't need to call bind on the batch - if (bind && !isBound) { - bind(batch); - isBound = true; - } - next(index, batch); - } - } - } - } - public scanReverse(next: NextFunc, bind?: BindFunc) { - const batches = this._chunks; - const numBatches = batches.length; - for (let batchIndex = numBatches; --batchIndex >= 0;) { - // load batches - const batch = batches[batchIndex]; - const predicate = this._predicate.bind(batch); - let isBound = false; - // yield all indices - for (let index = batch.length; --index >= 0;) { - if (predicate(index, batch)) { - // bind batches lazily - if predicate doesn't match anything - // in the batch we don't need to call bind on the batch - if (bind && !isBound) { - bind(batch); - isBound = true; - } - next(index, batch); - } - } - } - } - public count(): number { - // inlined version of this: - // let sum = 0; - // this.parent.scan((idx, columns) => { - // if (this.predicate(idx, columns)) ++sum; - // }); - // return sum; - let sum = 0; - const batches = this._chunks; - const numBatches = batches.length; - for (let batchIndex = -1; ++batchIndex < numBatches;) { - // load batches - const batch = batches[batchIndex]; - const predicate = this._predicate.bind(batch); - for (let index = -1, numRows = batch.length; ++index < numRows;) { - if (predicate(index, batch)) { ++sum; } - } - } - return sum; - } - - public [Symbol.iterator](): IterableIterator['TValue']> { - // inlined version of this: - // this.parent.scan((idx, columns) => { - // if (this.predicate(idx, columns)) next(idx, columns); - // }); - return new FilteredBatchIterator(this._chunks, this._predicate); - } - public filter(predicate: Predicate): FilteredDataFrame { - return new FilteredDataFrame( - this._chunks, - this._predicate.and(predicate) - ); - } - public countBy(name: Col | string) { - const batches = this._chunks, numBatches = batches.length; - const count_by = typeof name === 'string' ? new Col(name) : name as Col; - // Assume that all dictionary batches are deltas, which means that the - // last record batch has the most complete dictionary - count_by.bind(batches[numBatches - 1]); - const vector = count_by.vector as V; - if (!DataType.isDictionary(vector.type)) { - throw new Error('countBy currently only supports dictionary-encoded columns'); - } - - const countByteLength = Math.ceil(Math.log(vector.length) / Math.log(256)); - const CountsArrayType = countByteLength == 4 ? Uint32Array : - countByteLength >= 2 ? Uint16Array : Uint8Array; - - const counts = new CountsArrayType(vector.dictionary.length); - - for (let batchIndex = -1; ++batchIndex < numBatches;) { - // load batches - const batch = batches[batchIndex]; - const predicate = this._predicate.bind(batch); - // rebind the countBy Col - count_by.bind(batch); - const keys = (count_by.vector as V).indices; - // yield all indices - for (let index = -1, numRows = batch.length; ++index < numRows;) { - const key = keys.get(index); - if (key !== null && predicate(index, batch)) { counts[key]++; } - } - } - return new CountByResult(vector.dictionary, IntVector.from(counts)); - } -} diff --git a/js/src/compute/predicate.ts b/js/src/compute/predicate.ts deleted file mode 100644 index 52030763dc3db..0000000000000 --- a/js/src/compute/predicate.ts +++ /dev/null @@ -1,292 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -import { Vector } from '../vector'; -import { RecordBatch } from '../recordbatch'; -import { DictionaryVector } from '../vector/dictionary'; - -/** @ignore */ -export type ValueFunc = (idx: number, cols: RecordBatch) => T | null; -/** @ignore */ -export type PredicateFunc = (idx: number, cols: RecordBatch) => boolean; - -/** @ignore */ -export abstract class Value { - eq(other: Value | T): Predicate { - if (!(other instanceof Value)) { other = new Literal(other); } - return new Equals(this, other); - } - le(other: Value | T): Predicate { - if (!(other instanceof Value)) { other = new Literal(other); } - return new LTeq(this, other); - } - ge(other: Value | T): Predicate { - if (!(other instanceof Value)) { other = new Literal(other); } - return new GTeq(this, other); - } - lt(other: Value | T): Predicate { - return new Not(this.ge(other)); - } - gt(other: Value | T): Predicate { - return new Not(this.le(other)); - } - ne(other: Value | T): Predicate { - return new Not(this.eq(other)); - } -} - -/** @ignore */ -export class Literal extends Value { - constructor(public v: T) { super(); } -} - -/** @ignore */ -export class Col extends Value { - public vector!: Vector; - public colidx!: number; - - constructor(public name: string) { super(); } - bind(batch: RecordBatch): (idx: number, batch?: RecordBatch) => any { - if (!this.colidx) { - // Assume column index doesn't change between calls to bind - //this.colidx = cols.findIndex(v => v.name.indexOf(this.name) != -1); - this.colidx = -1; - const fields = batch.schema.fields; - for (let idx = -1; ++idx < fields.length;) { - if (fields[idx].name === this.name) { - this.colidx = idx; - break; - } - } - if (this.colidx < 0) { throw new Error(`Failed to bind Col "${this.name}"`); } - } - - const vec = this.vector = batch.getChildAt(this.colidx)!; - return (idx: number) => vec.get(idx); - } -} - -/** @ignore */ -export abstract class Predicate { - abstract bind(batch: RecordBatch): PredicateFunc; - and(...expr: Predicate[]): And { return new And(this, ...expr); } - or(...expr: Predicate[]): Or { return new Or(this, ...expr); } - not(): Predicate { return new Not(this); } -} - -/** @ignore */ -export abstract class ComparisonPredicate extends Predicate { - constructor(public readonly left: Value, public readonly right: Value) { - super(); - } - - bind(batch: RecordBatch) { - if (this.left instanceof Literal) { - if (this.right instanceof Literal) { - return this._bindLitLit(batch, this.left, this.right); - } else { // right is a Col - - return this._bindLitCol(batch, this.left, this.right as Col); - } - } else { // left is a Col - if (this.right instanceof Literal) { - return this._bindColLit(batch, this.left as Col, this.right); - } else { // right is a Col - return this._bindColCol(batch, this.left as Col, this.right as Col); - } - } - } - - protected abstract _bindLitLit(batch: RecordBatch, left: Literal, right: Literal): PredicateFunc; - protected abstract _bindColCol(batch: RecordBatch, left: Col, right: Col): PredicateFunc; - protected abstract _bindColLit(batch: RecordBatch, col: Col, lit: Literal): PredicateFunc; - protected abstract _bindLitCol(batch: RecordBatch, lit: Literal, col: Col): PredicateFunc; -} - -/** @ignore */ -export abstract class CombinationPredicate extends Predicate { - readonly children: Predicate[]; - constructor(...children: Predicate[]) { - super(); - this.children = children; - } -} -// add children to prototype so it doesn't get mangled in es2015/umd -( CombinationPredicate.prototype).children = Object.freeze([]); // freeze for safety - -/** @ignore */ -export class And extends CombinationPredicate { - constructor(...children: Predicate[]) { - // Flatten any Ands - children = children.reduce((accum: Predicate[], p: Predicate): Predicate[] => { - return accum.concat(p instanceof And ? p.children : p); - }, []); - super(...children); - } - bind(batch: RecordBatch) { - const bound = this.children.map((p) => p.bind(batch)); - return (idx: number, batch: RecordBatch) => bound.every((p) => p(idx, batch)); - } -} - -/** @ignore */ -export class Or extends CombinationPredicate { - constructor(...children: Predicate[]) { - // Flatten any Ors - children = children.reduce((accum: Predicate[], p: Predicate): Predicate[] => { - return accum.concat(p instanceof Or ? p.children : p); - }, []); - super(...children); - } - bind(batch: RecordBatch) { - const bound = this.children.map((p) => p.bind(batch)); - return (idx: number, batch: RecordBatch) => bound.some((p) => p(idx, batch)); - } -} - -/** @ignore */ -export class Equals extends ComparisonPredicate { - // Helpers used to cache dictionary reverse lookups between calls to bind - private lastDictionary: Vector|undefined; - private lastKey: number|undefined; - - protected _bindLitLit(_batch: RecordBatch, left: Literal, right: Literal): PredicateFunc { - const rtrn: boolean = left.v == right.v; - return () => rtrn; - } - - protected _bindColCol(batch: RecordBatch, left: Col, right: Col): PredicateFunc { - const left_func = left.bind(batch); - const right_func = right.bind(batch); - return (idx: number, batch: RecordBatch) => left_func(idx, batch) == right_func(idx, batch); - } - - protected _bindColLit(batch: RecordBatch, col: Col, lit: Literal): PredicateFunc { - const col_func = col.bind(batch); - if (col.vector instanceof DictionaryVector) { - let key: any; - const vector = col.vector as DictionaryVector; - if (vector.dictionary !== this.lastDictionary) { - key = vector.reverseLookup(lit.v); - this.lastDictionary = vector.dictionary; - this.lastKey = key; - } else { - key = this.lastKey; - } - - if (key === -1) { - // the value doesn't exist in the dictionary - always return - // false - // TODO: special-case of PredicateFunc that encapsulates this - // "always false" behavior. That way filtering operations don't - // have to bother checking - return () => false; - } else { - return (idx: number) => { - return vector.getKey(idx) === key; - }; - } - } else { - return (idx: number, cols: RecordBatch) => col_func(idx, cols) == lit.v; - } - } - - protected _bindLitCol(batch: RecordBatch, lit: Literal, col: Col) { - // Equals is commutative - return this._bindColLit(batch, col, lit); - } -} - -/** @ignore */ -export class LTeq extends ComparisonPredicate { - protected _bindLitLit(_batch: RecordBatch, left: Literal, right: Literal): PredicateFunc { - const rtrn: boolean = left.v <= right.v; - return () => rtrn; - } - - protected _bindColCol(batch: RecordBatch, left: Col, right: Col): PredicateFunc { - const left_func = left.bind(batch); - const right_func = right.bind(batch); - return (idx: number, cols: RecordBatch) => left_func(idx, cols) <= right_func(idx, cols); - } - - protected _bindColLit(batch: RecordBatch, col: Col, lit: Literal): PredicateFunc { - const col_func = col.bind(batch); - return (idx: number, cols: RecordBatch) => col_func(idx, cols) <= lit.v; - } - - protected _bindLitCol(batch: RecordBatch, lit: Literal, col: Col) { - const col_func = col.bind(batch); - return (idx: number, cols: RecordBatch) => lit.v <= col_func(idx, cols); - } -} - -/** @ignore */ -export class GTeq extends ComparisonPredicate { - protected _bindLitLit(_batch: RecordBatch, left: Literal, right: Literal): PredicateFunc { - const rtrn: boolean = left.v >= right.v; - return () => rtrn; - } - - protected _bindColCol(batch: RecordBatch, left: Col, right: Col): PredicateFunc { - const left_func = left.bind(batch); - const right_func = right.bind(batch); - return (idx: number, cols: RecordBatch) => left_func(idx, cols) >= right_func(idx, cols); - } - - protected _bindColLit(batch: RecordBatch, col: Col, lit: Literal): PredicateFunc { - const col_func = col.bind(batch); - return (idx: number, cols: RecordBatch) => col_func(idx, cols) >= lit.v; - } - - protected _bindLitCol(batch: RecordBatch, lit: Literal, col: Col) { - const col_func = col.bind(batch); - return (idx: number, cols: RecordBatch) => lit.v >= col_func(idx, cols); - } -} - -/** @ignore */ -export class Not extends Predicate { - constructor(public readonly child: Predicate) { - super(); - } - - bind(batch: RecordBatch) { - const func = this.child.bind(batch); - return (idx: number, batch: RecordBatch) => !func(idx, batch); - } -} - -/** @ignore */ -export class CustomPredicate extends Predicate { - constructor(private next: PredicateFunc, private bind_: (batch: RecordBatch) => void) { - super(); - } - - bind(batch: RecordBatch) { - this.bind_(batch); - return this.next; - } -} - -export function lit(v: any): Value { return new Literal(v); } -export function col(n: string): Col { return new Col(n); } -export function and(...p: Predicate[]): And { return new And(...p); } -export function or(...p: Predicate[]): Or { return new Or(...p); } -export function custom(next: PredicateFunc, bind: (batch: RecordBatch) => void) { - return new CustomPredicate(next, bind); -} diff --git a/js/src/data.ts b/js/src/data.ts index 2a549088c65cb..b6f53b6d0e131 100644 --- a/js/src/data.ts +++ b/js/src/data.ts @@ -15,19 +15,10 @@ // specific language governing permissions and limitations // under the License. -import { Vector } from './vector'; -import { truncateBitmap } from './util/bit'; -import { popcnt_bit_range } from './util/bit'; -import { BufferType, UnionMode, Type } from './enum'; -import { DataType, SparseUnion, DenseUnion, strideForType } from './type'; -import { toArrayBufferView, toUint8Array, toInt32Array } from './util/buffer'; -import { - Dictionary, - Null, Int, Float, - Binary, Bool, Utf8, Decimal, - Date_, Time, Timestamp, Interval, - List, Struct, Union, FixedSizeBinary, FixedSizeList, Map_, -} from './type'; +import { Vector } from './vector.js'; +import { BufferType, Type } from './enum.js'; +import { DataType, strideForType } from './type.js'; +import { popcnt_bit_range, truncateBitmap } from './util/bit.js'; // When slicing, we do not know the null count of the sliced range without // doing some computation. To avoid doing this eagerly, we set the null count @@ -37,16 +28,16 @@ import { /** @ignore */ export const kUnknownNullCount = -1; /** @ignore */ export type NullBuffer = Uint8Array | null | undefined; -/** @ignore */ export type TypeIdsBuffer = Int8Array | ArrayLike | Iterable | undefined; -/** @ignore */ export type ValueOffsetsBuffer = Int32Array | ArrayLike | Iterable | undefined; +/** @ignore */ export type TypeIdsBuffer = Int8Array | ArrayLike | Iterable | undefined; +/** @ignore */ export type ValueOffsetsBuffer = Int32Array | ArrayLike | Iterable | undefined; /** @ignore */ export type DataBuffer = T['TArray'] | ArrayLike | Iterable | undefined; /** @ignore */ export interface Buffers { - [BufferType.OFFSET]: Int32Array; - [BufferType.DATA]: T['TArray']; + [BufferType.OFFSET]: Int32Array; + [BufferType.DATA]: T['TArray']; [BufferType.VALIDITY]: Uint8Array; - [BufferType.TYPE]: T['TArray']; + [BufferType.TYPE]: T['TArray']; } /** @ignore */ @@ -56,24 +47,27 @@ export interface Data { readonly TValue: T['TValue']; } -/** @ignore */ +/** + * Data structure underlying {@link Vector}s. Use the convenience method {@link makeData}. + */ export class Data { - public readonly type: T; - public readonly length: number; - public readonly offset: number; - public readonly stride: number; - public readonly childData: Data[]; + declare public readonly type: T; + declare public readonly length: number; + declare public readonly offset: number; + declare public readonly stride: number; + declare public readonly nullable: boolean; + declare public readonly children: Data[]; /** * The dictionary for this Vector, if any. Only used for Dictionary type. */ - public dictionary?: Vector; + declare public dictionary?: Vector; - public readonly values!: Buffers[BufferType.DATA]; - public readonly typeIds!: Buffers[BufferType.TYPE]; - public readonly nullBitmap!: Buffers[BufferType.VALIDITY]; - public readonly valueOffsets!: Buffers[BufferType.OFFSET]; + declare public readonly values: Buffers[BufferType.DATA]; + declare public readonly typeIds: Buffers[BufferType.TYPE]; + declare public readonly nullBitmap: Buffers[BufferType.VALIDITY]; + declare public readonly valueOffsets: Buffers[BufferType.OFFSET]; public get typeId(): T['TType'] { return this.type.typeId; } public get ArrayType(): T['ArrayType'] { return this.type.ArrayType; } @@ -84,10 +78,10 @@ export class Data { let byteLength = 0; const { valueOffsets, values, nullBitmap, typeIds } = this; valueOffsets && (byteLength += valueOffsets.byteLength); - values && (byteLength += values.byteLength); - nullBitmap && (byteLength += nullBitmap.byteLength); - typeIds && (byteLength += typeIds.byteLength); - return this.childData.reduce((byteLength, child) => byteLength + child.byteLength, byteLength); + values && (byteLength += values.byteLength); + nullBitmap && (byteLength += nullBitmap.byteLength); + typeIds && (byteLength += typeIds.byteLength); + return this.children.reduce((byteLength, child) => byteLength + child.byteLength, byteLength); } protected _nullCount: number | kUnknownNullCount; @@ -101,13 +95,13 @@ export class Data { return nullCount; } - constructor(type: T, offset: number, length: number, nullCount?: number, buffers?: Partial> | Data, childData?: (Data | Vector)[], dictionary?: Vector) { + constructor(type: T, offset: number, length: number, nullCount?: number, buffers?: Partial> | Data, children: Data[] = [], dictionary?: Vector) { this.type = type; + this.children = children; this.dictionary = dictionary; this.offset = Math.floor(Math.max(offset || 0, 0)); this.length = Math.floor(Math.max(length || 0, 0)); this._nullCount = Math.floor(Math.max(nullCount || 0, -1)); - this.childData = (childData || []).map((x) => x instanceof Data ? x : x.data) as Data[]; let buffer: Buffers[keyof Buffers]; if (buffers instanceof Data) { this.stride = buffers.stride; @@ -124,14 +118,43 @@ export class Data { (buffer = (buffers as Buffers)[3]) && (this.typeIds = buffer); } } + this.nullable = this._nullCount !== 0 && this.nullBitmap && this.nullBitmap.byteLength > 0; } - public clone(type: R, offset = this.offset, length = this.length, nullCount = this._nullCount, buffers: Buffers = this, childData: (Data | Vector)[] = this.childData) { - return new Data(type, offset, length, nullCount, buffers, childData, this.dictionary); + public getValid(index: number) { + if (this.nullable && this.nullCount > 0) { + const pos = this.offset + index; + const val = this.nullBitmap[pos >> 3]; + return (val & (1 << (pos % 8))) !== 0; + } + return true; + } + + public setValid(index: number, value: boolean) { + // Don't interact w/ nullBitmap if not nullable + if (!this.nullable) { return value; } + // If no null bitmap, initialize one on the fly + if (!this.nullBitmap || this.nullBitmap.byteLength <= (index >> 3)) { + const { nullBitmap } = this._changeLengthAndBackfillNullBitmap(this.length); + Object.assign(this, { nullBitmap, _nullCount: 0 }); + } + const { nullBitmap, offset } = this; + const pos = (offset + index) >> 3; + const bit = (offset + index) % 8; + const val = (nullBitmap[pos] >> bit) & 1; + // If `val` is truthy and the current bit is 0, flip it to 1 and increment `_nullCount`. + // If `val` is falsey and the current bit is 1, flip it to 0 and decrement `_nullCount`. + value ? val === 0 && ((nullBitmap[pos] |= (1 << bit)), (this._nullCount = this.nullCount + 1)) + : val === 1 && ((nullBitmap[pos] &= ~(1 << bit)), (this._nullCount = this.nullCount - 1)); + return value; + } + + public clone(type: R = this.type as any, offset = this.offset, length = this.length, nullCount = this._nullCount, buffers: Buffers = this, children: Data[] = this.children) { + return new Data(type, offset, length, nullCount, buffers, children, this.dictionary); } public slice(offset: number, length: number): Data { - const { stride, typeId, childData } = this; + const { stride, typeId, children } = this; // +true === 1, +false === 0, so this means // we keep nullCount at 0 if it's already 0, // otherwise set to the invalidated flag -1 @@ -140,7 +163,7 @@ export class Data { const buffers = this._sliceBuffers(offset, length, stride, typeId); return this.clone(this.type, this.offset + offset, length, nullCount, buffers, // Don't slice children if we have value offsets (the variable-width types) - (!childData.length || this.valueOffsets) ? childData : this._sliceChildren(childData, childStride * offset, childStride * length)); + (children.length === 0 || this.valueOffsets) ? children : this._sliceChildren(children, childStride * offset, childStride * length)); } public _changeLengthAndBackfillNullBitmap(newLength: number): Data { @@ -168,128 +191,249 @@ export class Data { (arr = buffers[BufferType.TYPE]) && (buffers[BufferType.TYPE] = arr.subarray(offset, offset + length)); // If offsets exist, only slice the offsets buffer (arr = buffers[BufferType.OFFSET]) && (buffers[BufferType.OFFSET] = arr.subarray(offset, offset + length + 1)) || - // Otherwise if no offsets, slice the data buffer. Don't slice the data vector for Booleans, since the offset goes by bits not bytes - (arr = buffers[BufferType.DATA]) && (buffers[BufferType.DATA] = typeId === 6 ? arr : arr.subarray(stride * offset, stride * (offset + length))); + // Otherwise if no offsets, slice the data buffer. Don't slice the data vector for Booleans, since the offset goes by bits not bytes + (arr = buffers[BufferType.DATA]) && (buffers[BufferType.DATA] = typeId === 6 ? arr : arr.subarray(stride * offset, stride * (offset + length))); return buffers; } - protected _sliceChildren(childData: Data[], offset: number, length: number): Data[] { - return childData.map((child) => child.slice(offset, length)); + protected _sliceChildren(children: Data[], offset: number, length: number): Data[] { + return children.map((child) => child.slice(offset, length)); } +} - // - // Convenience methods for creating Data instances for each of the Arrow Vector types - // - /** @nocollapse */ - public static new(type: T, offset: number, length: number, nullCount?: number, buffers?: Partial> | Data, childData?: (Data | Vector)[], dictionary?: Vector): Data { - if (buffers instanceof Data) { buffers = buffers.buffers; } else if (!buffers) { buffers = [] as Partial>; } - switch (type.typeId) { - case Type.Null: return Data.Null( type as Null, offset, length) as Data; - case Type.Int: return Data.Int( type as Int, offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.DATA] || []) as Data; - case Type.Dictionary: return Data.Dictionary( type as Dictionary, offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.DATA] || [], dictionary!) as Data; - case Type.Float: return Data.Float( type as Float, offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.DATA] || []) as Data; - case Type.Bool: return Data.Bool( type as Bool, offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.DATA] || []) as Data; - case Type.Decimal: return Data.Decimal( type as Decimal, offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.DATA] || []) as Data; - case Type.Date: return Data.Date( type as Date_, offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.DATA] || []) as Data; - case Type.Time: return Data.Time( type as Time, offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.DATA] || []) as Data; - case Type.Timestamp: return Data.Timestamp( type as Timestamp, offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.DATA] || []) as Data; - case Type.Interval: return Data.Interval( type as Interval, offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.DATA] || []) as Data; - case Type.FixedSizeBinary: return Data.FixedSizeBinary( type as FixedSizeBinary, offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.DATA] || []) as Data; - case Type.Binary: return Data.Binary( type as Binary, offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.OFFSET] || [], buffers[BufferType.DATA] || []) as Data; - case Type.Utf8: return Data.Utf8( type as Utf8, offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.OFFSET] || [], buffers[BufferType.DATA] || []) as Data; - case Type.List: return Data.List( type as List, offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.OFFSET] || [], (childData || [])[0]) as Data; - case Type.FixedSizeList: return Data.FixedSizeList( type as FixedSizeList, offset, length, nullCount || 0, buffers[BufferType.VALIDITY], (childData || [])[0]) as Data; - case Type.Struct: return Data.Struct( type as Struct, offset, length, nullCount || 0, buffers[BufferType.VALIDITY], childData || []) as Data; - case Type.Map: return Data.Map( type as Map_, offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.OFFSET] || [], (childData || [])[0]) as Data; - case Type.Union: return Data.Union( type as Union, offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.TYPE] || [], buffers[BufferType.OFFSET] || childData, childData) as Data; - } - throw new Error(`Unrecognized typeId ${type.typeId}`); - } +(Data.prototype as any).children = Object.freeze([]); - /** @nocollapse */ - public static Null(type: T, offset: number, length: number) { +import { + Dictionary, + Bool, Null, Utf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct, + Float, + Int, + Date_, + Interval, + Time, + Timestamp, + Union, DenseUnion, SparseUnion, +} from './type.js'; + +import { Visitor } from './visitor.js'; +import { toArrayBufferView, toInt32Array, toUint8Array } from './util/buffer.js'; + +class MakeDataVisitor extends Visitor { + public visit(props: any): Data { + return this.getVisitFn(props['type']).call(this, props); + } + public visitNull(props: NullDataProps) { + const { + ['type']: type, + ['offset']: offset = 0, + ['length']: length = 0, + } = props; return new Data(type, offset, length, 0); } - /** @nocollapse */ - public static Int(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer) { - return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toUint8Array(nullBitmap)]); + public visitBool(props: BoolDataProps) { + const { ['type']: type, ['offset']: offset = 0 } = props; + const nullBitmap = toUint8Array(props['nullBitmap']); + const data = toArrayBufferView(type.ArrayType, props['data']); + const { ['length']: length = data.length >> 3, ['nullCount']: nullCount = props['nullBitmap'] ? -1 : 0, } = props; + return new Data(type, offset, length, nullCount, [undefined, data, nullBitmap]); } - /** @nocollapse */ - public static Dictionary(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer, dictionary: Vector) { - return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.indices.ArrayType, data), toUint8Array(nullBitmap)], [], dictionary); + public visitInt(props: IntDataProps) { + const { ['type']: type, ['offset']: offset = 0 } = props; + const nullBitmap = toUint8Array(props['nullBitmap']); + const data = toArrayBufferView(type.ArrayType, props['data']); + const { ['length']: length = data.length, ['nullCount']: nullCount = props['nullBitmap'] ? -1 : 0, } = props; + return new Data(type, offset, length, nullCount, [undefined, data, nullBitmap]); } - /** @nocollapse */ - public static Float(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer) { - return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toUint8Array(nullBitmap)]); + public visitFloat(props: FloatDataProps) { + const { ['type']: type, ['offset']: offset = 0 } = props; + const nullBitmap = toUint8Array(props['nullBitmap']); + const data = toArrayBufferView(type.ArrayType, props['data']); + const { ['length']: length = data.length, ['nullCount']: nullCount = props['nullBitmap'] ? -1 : 0, } = props; + return new Data(type, offset, length, nullCount, [undefined, data, nullBitmap]); } - /** @nocollapse */ - public static Bool(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer) { - return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toUint8Array(nullBitmap)]); + public visitUtf8(props: Utf8DataProps) { + const { ['type']: type, ['offset']: offset = 0 } = props; + const data = toUint8Array(props['data']); + const nullBitmap = toUint8Array(props['nullBitmap']); + const valueOffsets = toInt32Array(props['valueOffsets']); + const { ['length']: length = valueOffsets.length - 1, ['nullCount']: nullCount = props['nullBitmap'] ? -1 : 0 } = props; + return new Data(type, offset, length, nullCount, [valueOffsets, data, nullBitmap]); } - /** @nocollapse */ - public static Decimal(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer) { - return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toUint8Array(nullBitmap)]); + public visitBinary(props: BinaryDataProps) { + const { ['type']: type, ['offset']: offset = 0 } = props; + const data = toUint8Array(props['data']); + const nullBitmap = toUint8Array(props['nullBitmap']); + const valueOffsets = toInt32Array(props['valueOffsets']); + const { ['length']: length = valueOffsets.length - 1, ['nullCount']: nullCount = props['nullBitmap'] ? -1 : 0 } = props; + return new Data(type, offset, length, nullCount, [valueOffsets, data, nullBitmap]); } - /** @nocollapse */ - public static Date(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer) { - return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toUint8Array(nullBitmap)]); + public visitFixedSizeBinary(props: FixedSizeBinaryDataProps) { + const { ['type']: type, ['offset']: offset = 0 } = props; + const nullBitmap = toUint8Array(props['nullBitmap']); + const data = toArrayBufferView(type.ArrayType, props['data']); + const { ['length']: length = data.length / strideForType(type), ['nullCount']: nullCount = props['nullBitmap'] ? -1 : 0, } = props; + return new Data(type, offset, length, nullCount, [undefined, data, nullBitmap]); } - /** @nocollapse */ - public static Time(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer) { - return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toUint8Array(nullBitmap)]); + public visitDate(props: Date_DataProps) { + const { ['type']: type, ['offset']: offset = 0 } = props; + const nullBitmap = toUint8Array(props['nullBitmap']); + const data = toArrayBufferView(type.ArrayType, props['data']); + const { ['length']: length = data.length / strideForType(type), ['nullCount']: nullCount = props['nullBitmap'] ? -1 : 0, } = props; + return new Data(type, offset, length, nullCount, [undefined, data, nullBitmap]); } - /** @nocollapse */ - public static Timestamp(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer) { - return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toUint8Array(nullBitmap)]); + public visitTimestamp(props: TimestampDataProps) { + const { ['type']: type, ['offset']: offset = 0 } = props; + const nullBitmap = toUint8Array(props['nullBitmap']); + const data = toArrayBufferView(type.ArrayType, props['data']); + const { ['length']: length = data.length / strideForType(type), ['nullCount']: nullCount = props['nullBitmap'] ? -1 : 0, } = props; + return new Data(type, offset, length, nullCount, [undefined, data, nullBitmap]); } - /** @nocollapse */ - public static Interval(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer) { - return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toUint8Array(nullBitmap)]); + public visitTime(props: TimeDataProps) { + const { ['type']: type, ['offset']: offset = 0 } = props; + const nullBitmap = toUint8Array(props['nullBitmap']); + const data = toArrayBufferView(type.ArrayType, props['data']); + const { ['length']: length = data.length / strideForType(type), ['nullCount']: nullCount = props['nullBitmap'] ? -1 : 0, } = props; + return new Data(type, offset, length, nullCount, [undefined, data, nullBitmap]); } - /** @nocollapse */ - public static FixedSizeBinary(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer) { - return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toUint8Array(nullBitmap)]); + public visitDecimal(props: DecimalDataProps) { + const { ['type']: type, ['offset']: offset = 0 } = props; + const nullBitmap = toUint8Array(props['nullBitmap']); + const data = toArrayBufferView(type.ArrayType, props['data']); + const { ['length']: length = data.length / strideForType(type), ['nullCount']: nullCount = props['nullBitmap'] ? -1 : 0, } = props; + return new Data(type, offset, length, nullCount, [undefined, data, nullBitmap]); } - /** @nocollapse */ - public static Binary(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, valueOffsets: ValueOffsetsBuffer, data: DataBuffer) { - return new Data(type, offset, length, nullCount, [toInt32Array(valueOffsets), toUint8Array(data), toUint8Array(nullBitmap)]); + public visitList(props: ListDataProps) { + const { ['type']: type, ['offset']: offset = 0, ['child']: child } = props; + const nullBitmap = toUint8Array(props['nullBitmap']); + const valueOffsets = toInt32Array(props['valueOffsets']); + const { ['length']: length = valueOffsets.length - 1, ['nullCount']: nullCount = props['nullBitmap'] ? -1 : 0 } = props; + return new Data(type, offset, length, nullCount, [valueOffsets, undefined, nullBitmap], [child]); } - /** @nocollapse */ - public static Utf8(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, valueOffsets: ValueOffsetsBuffer, data: DataBuffer) { - return new Data(type, offset, length, nullCount, [toInt32Array(valueOffsets), toUint8Array(data), toUint8Array(nullBitmap)]); + public visitStruct(props: StructDataProps) { + const { ['type']: type, ['offset']: offset = 0, ['children']: children = [] } = props; + const nullBitmap = toUint8Array(props['nullBitmap']); + const { + length = children.reduce((len, { length }) => Math.max(len, length), 0), + nullCount = props['nullBitmap'] ? -1 : 0 + } = props; + return new Data(type, offset, length, nullCount, [undefined, undefined, nullBitmap], children); } - /** @nocollapse */ - public static List(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, valueOffsets: ValueOffsetsBuffer, child: Data | Vector) { - return new Data(type, offset, length, nullCount, [toInt32Array(valueOffsets), undefined, toUint8Array(nullBitmap)], child ? [child] : []); + public visitUnion(props: UnionDataProps) { + const { ['type']: type, ['offset']: offset = 0, ['children']: children = [] } = props; + const nullBitmap = toUint8Array(props['nullBitmap']); + const typeIds = toArrayBufferView(type.ArrayType, props['typeIds']); + const { ['length']: length = typeIds.length, ['nullCount']: nullCount = props['nullBitmap'] ? -1 : 0, } = props; + if (DataType.isSparseUnion(type)) { + return new Data(type, offset, length, nullCount, [undefined, undefined, nullBitmap, typeIds], children); + } + const valueOffsets = toInt32Array(props['valueOffsets']); + return new Data(type, offset, length, nullCount, [valueOffsets, undefined, nullBitmap, typeIds], children); } - /** @nocollapse */ - public static FixedSizeList(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, child: Data | Vector) { - return new Data(type, offset, length, nullCount, [undefined, undefined, toUint8Array(nullBitmap)], child ? [child] : []); + public visitDictionary(props: DictionaryDataProps) { + const { ['type']: type, ['offset']: offset = 0 } = props; + const nullBitmap = toUint8Array(props['nullBitmap']); + const data = toArrayBufferView(type.indices.ArrayType, props['data']); + const { ['dictionary']: dictionary = new Vector([new MakeDataVisitor().visit({ type: type.dictionary })]) } = props; + const { ['length']: length = data.length, ['nullCount']: nullCount = props['nullBitmap'] ? -1 : 0 } = props; + return new Data(type, offset, length, nullCount, [undefined, data, nullBitmap], [], dictionary); } - /** @nocollapse */ - public static Struct(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, children: (Data | Vector)[]) { - return new Data(type, offset, length, nullCount, [undefined, undefined, toUint8Array(nullBitmap)], children); + public visitInterval(props: IntervalDataProps) { + const { ['type']: type, ['offset']: offset = 0 } = props; + const nullBitmap = toUint8Array(props['nullBitmap']); + const data = toArrayBufferView(type.ArrayType, props['data']); + const { ['length']: length = data.length / strideForType(type), ['nullCount']: nullCount = props['nullBitmap'] ? -1 : 0, } = props; + return new Data(type, offset, length, nullCount, [undefined, data, nullBitmap]); } - /** @nocollapse */ - public static Map(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, valueOffsets: ValueOffsetsBuffer, child: (Data | Vector)) { - return new Data(type, offset, length, nullCount, [toInt32Array(valueOffsets), undefined, toUint8Array(nullBitmap)], child ? [child] : []); + public visitFixedSizeList(props: FixedSizeListDataProps) { + const { ['type']: type, ['offset']: offset = 0, ['child']: child = new MakeDataVisitor().visit({ type: type.valueType }) } = props; + const nullBitmap = toUint8Array(props['nullBitmap']); + const { ['length']: length = child.length / strideForType(type), ['nullCount']: nullCount = props['nullBitmap'] ? -1 : 0 } = props; + return new Data(type, offset, length, nullCount, [undefined, undefined, nullBitmap], [child]); } - public static Union(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, typeIds: TypeIdsBuffer, children: (Data | Vector)[], _?: any): Data; - public static Union(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, typeIds: TypeIdsBuffer, valueOffsets: ValueOffsetsBuffer, children: (Data | Vector)[]): Data; - public static Union(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, typeIds: TypeIdsBuffer, valueOffsetsOrChildren: ValueOffsetsBuffer | (Data | Vector)[], children?: (Data | Vector)[]): Data; - /** @nocollapse */ - public static Union(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, typeIds: TypeIdsBuffer, valueOffsetsOrChildren: ValueOffsetsBuffer | (Data | Vector)[], children?: (Data | Vector)[]) { - const buffers = [ - undefined, undefined, - toUint8Array(nullBitmap), - toArrayBufferView(type.ArrayType, typeIds) - ] as Partial>; - if (type.mode === UnionMode.Sparse) { - return new Data(type, offset, length, nullCount, buffers, valueOffsetsOrChildren as (Data | Vector)[]); - } - buffers[BufferType.OFFSET] = toInt32Array( valueOffsetsOrChildren); - return new Data(type, offset, length, nullCount, buffers, children); + public visitMap(props: Map_DataProps) { + const { ['type']: type, ['offset']: offset = 0, ['child']: child = new MakeDataVisitor().visit({ type: type.childType }) } = props; + const nullBitmap = toUint8Array(props['nullBitmap']); + const valueOffsets = toInt32Array(props['valueOffsets']); + const { ['length']: length = valueOffsets.length - 1, ['nullCount']: nullCount = props['nullBitmap'] ? -1 : 0, } = props; + return new Data(type, offset, length, nullCount, [valueOffsets, undefined, nullBitmap], [child]); } } -(Data.prototype as any).childData = Object.freeze([]); +/** @ignore */ +interface DataProps_ { + type: T; + offset?: number; + length?: number; + nullCount?: number; + nullBitmap?: NullBuffer; +} + +interface NullDataProps { type: T; offset?: number; length?: number } +interface IntDataProps extends DataProps_ { data?: DataBuffer } +interface DictionaryDataProps extends DataProps_ { data?: DataBuffer; dictionary?: Vector } +interface FloatDataProps extends DataProps_ { data?: DataBuffer } +interface BoolDataProps extends DataProps_ { data?: DataBuffer } +interface DecimalDataProps extends DataProps_ { data?: DataBuffer } +interface Date_DataProps extends DataProps_ { data?: DataBuffer } +interface TimeDataProps extends DataProps_ { data?: DataBuffer } +interface TimestampDataProps extends DataProps_ { data?: DataBuffer } +interface IntervalDataProps extends DataProps_ { data?: DataBuffer } +interface FixedSizeBinaryDataProps extends DataProps_ { data?: DataBuffer } +interface BinaryDataProps extends DataProps_ { valueOffsets: ValueOffsetsBuffer; data?: DataBuffer } +interface Utf8DataProps extends DataProps_ { valueOffsets: ValueOffsetsBuffer; data?: DataBuffer } +interface ListDataProps extends DataProps_ { valueOffsets: ValueOffsetsBuffer; child: Data } +interface FixedSizeListDataProps extends DataProps_ { child: Data } +interface StructDataProps extends DataProps_ { children: Data[] } +interface Map_DataProps extends DataProps_ { valueOffsets: ValueOffsetsBuffer; child: Data } +interface SparseUnionDataProps extends DataProps_ { typeIds: TypeIdsBuffer; children: Data[] } +interface DenseUnionDataProps extends DataProps_ { typeIds: TypeIdsBuffer; children: Data[]; valueOffsets: ValueOffsetsBuffer } +interface UnionDataProps extends DataProps_ { typeIds: TypeIdsBuffer; children: Data[]; valueOffsets?: ValueOffsetsBuffer } + +export type DataProps = ( + T extends Null /* */ ? NullDataProps : + T extends Int /* */ ? IntDataProps : + T extends Dictionary /* */ ? DictionaryDataProps : + T extends Float /* */ ? FloatDataProps : + T extends Bool /* */ ? BoolDataProps : + T extends Decimal /* */ ? DecimalDataProps : + T extends Date_ /* */ ? Date_DataProps : + T extends Time /* */ ? TimeDataProps : + T extends Timestamp /* */ ? TimestampDataProps : + T extends Interval /* */ ? IntervalDataProps : + T extends FixedSizeBinary /* */ ? FixedSizeBinaryDataProps : + T extends Binary /* */ ? BinaryDataProps : + T extends Utf8 /* */ ? Utf8DataProps : + T extends List /* */ ? ListDataProps : + T extends FixedSizeList /* */ ? FixedSizeListDataProps : + T extends Struct /* */ ? StructDataProps : + T extends Map_ /* */ ? Map_DataProps : + T extends SparseUnion /* */ ? SparseUnionDataProps : + T extends DenseUnion /* */ ? DenseUnionDataProps : + T extends Union /* */ ? UnionDataProps : + /* */ DataProps_ +); + +export function makeData(props: NullDataProps): Data; +export function makeData(props: IntDataProps): Data; +export function makeData(props: DictionaryDataProps): Data; +export function makeData(props: FloatDataProps): Data; +export function makeData(props: BoolDataProps): Data; +export function makeData(props: DecimalDataProps): Data; +export function makeData(props: Date_DataProps): Data; +export function makeData(props: TimeDataProps): Data; +export function makeData(props: TimestampDataProps): Data; +export function makeData(props: IntervalDataProps): Data; +export function makeData(props: FixedSizeBinaryDataProps): Data; +export function makeData(props: BinaryDataProps): Data; +export function makeData(props: Utf8DataProps): Data; +export function makeData(props: ListDataProps): Data; +export function makeData(props: FixedSizeListDataProps): Data; +export function makeData(props: StructDataProps): Data; +export function makeData(props: Map_DataProps): Data; +export function makeData(props: SparseUnionDataProps): Data; +export function makeData(props: DenseUnionDataProps): Data; +export function makeData(props: UnionDataProps): Data; +export function makeData(props: DataProps_): Data; +export function makeData(props: any) { + return new MakeDataVisitor().visit(props); +} diff --git a/js/src/enum.ts b/js/src/enum.ts index 517aa27e8824d..f5856bc06afbe 100644 --- a/js/src/enum.ts +++ b/js/src/enum.ts @@ -15,16 +15,119 @@ // specific language governing permissions and limitations // under the License. -export { - DateUnit, - TimeUnit, - Precision, - UnionMode, - IntervalUnit, - MetadataVersion, -} from './fb/Schema'; +//// +// +// A few enums copied from `fb/Schema.ts` and `fb/Message.ts` because Webpack +// v4 doesn't seem to be able to tree-shake the rest of those exports. +// +// We will have to keep these enums in sync when we re-generate the flatbuffers +// code from the shchemas. See js/DEVELOP.md for info on how to run flatbuffers +// code generation. +// +//// -export { MessageHeader } from './fb/Message'; +/** + * Logical types, vector layouts, and schemas + * + * @enum {number} + */ +export enum MetadataVersion { + /** + * 0.1.0 (October 2016). + */ + V1 = 0, + + /** + * 0.2.0 (February 2017). Non-backwards compatible with V1. + */ + V2 = 1, + + /** + * 0.3.0 -> 0.7.1 (May - December 2017). Non-backwards compatible with V2. + */ + V3 = 2, + + /** + * >= 0.8.0 (December 2017). Non-backwards compatible with V3. + */ + V4 = 3, + + /** + * >= 1.0.0 (July 2020. Backwards compatible with V4 (V5 readers can read V4 + * metadata and IPC messages). Implementations are recommended to provide a + * V4 compatibility mode with V5 format changes disabled. + * + * Incompatible changes between V4 and V5: + * - Union buffer layout has changed. In V5, Unions don't have a validity + * bitmap buffer. + */ + V5 = 4 +} + +/** + * @enum {number} + */ +export enum UnionMode { + Sparse = 0, + Dense = 1 +} + +/** + * @enum {number} + */ +export enum Precision { + HALF = 0, + SINGLE = 1, + DOUBLE = 2 +} + +/** + * @enum {number} + */ +export enum DateUnit { + DAY = 0, + MILLISECOND = 1 +} + +/** + * @enum {number} + */ +export enum TimeUnit { + SECOND = 0, + MILLISECOND = 1, + MICROSECOND = 2, + NANOSECOND = 3 +} + +/** + * @enum {number} + */ +export enum IntervalUnit { + YEAR_MONTH = 0, + DAY_TIME = 1, + MONTH_DAY_NANO = 2 +} + +/** + * ---------------------------------------------------------------------- + * The root Message type + * This union enables us to easily send different message types without + * redundant storage, and in the future we can easily add new message types. + * + * Arrow implementations do not need to implement all of the message types, + * which may include experimental metadata types. For maximum compatibility, + * it is best to send data using RecordBatch + * + * @enum {number} + */ +export enum MessageHeader { + NONE = 0, + Schema = 1, + DictionaryBatch = 2, + RecordBatch = 3, + Tensor = 4, + SparseTensor = 5 +} /** * Main data type enumeration. @@ -53,70 +156,51 @@ export { MessageHeader } from './fb/Message'; * to the flatbuffers metadata of serialized Arrow IPC payloads. */ export enum Type { - /** The default placeholder type */ - NONE = 0, - /** A NULL type having no physical storage */ - Null = 1, - /** Signed or unsigned 8, 16, 32, or 64-bit little-endian integer */ - Int = 2, - /** 2, 4, or 8-byte floating point value */ - Float = 3, - /** Variable-length bytes (no guarantee of UTF8-ness) */ - Binary = 4, - /** UTF8 variable-length string as List */ - Utf8 = 5, - /** Boolean as 1 bit, LSB bit-packed ordering */ - Bool = 6, - /** Precision-and-scale-based decimal type. Storage type depends on the parameters. */ - Decimal = 7, - /** int32_t days or int64_t milliseconds since the UNIX epoch */ - Date = 8, - /** Time as signed 32 or 64-bit integer, representing either seconds, milliseconds, microseconds, or nanoseconds since midnight since midnight */ - Time = 9, - /** Exact timestamp encoded with int64 since UNIX epoch (Default unit millisecond) */ - Timestamp = 10, - /** YEAR_MONTH or DAY_TIME interval in SQL style */ - Interval = 11, - /** A list of some logical data type */ - List = 12, - /** Struct of logical types */ - Struct = 13, - /** Union of logical types */ - Union = 14, - /** Fixed-size binary. Each value occupies the same number of bytes */ - FixedSizeBinary = 15, - /** Fixed-size list. Each value occupies the same number of bytes */ - FixedSizeList = 16, - /** Map of named logical types */ - Map = 17, - - /** Dictionary aka Category type */ - Dictionary = -1, - Int8 = -2, - Int16 = -3, - Int32 = -4, - Int64 = -5, - Uint8 = -6, - Uint16 = -7, - Uint32 = -8, - Uint64 = -9, - Float16 = -10, - Float32 = -11, - Float64 = -12, - DateDay = -13, - DateMillisecond = -14, - TimestampSecond = -15, - TimestampMillisecond = -16, - TimestampMicrosecond = -17, - TimestampNanosecond = -18, - TimeSecond = -19, - TimeMillisecond = -20, - TimeMicrosecond = -21, - TimeNanosecond = -22, - DenseUnion = -23, - SparseUnion = -24, - IntervalDayTime = -25, - IntervalYearMonth = -26, + NONE = 0, /** The default placeholder type */ + Null = 1, /** A NULL type having no physical storage */ + Int = 2, /** Signed or unsigned 8, 16, 32, or 64-bit little-endian integer */ + Float = 3, /** 2, 4, or 8-byte floating point value */ + Binary = 4, /** Variable-length bytes (no guarantee of UTF8-ness) */ + Utf8 = 5, /** UTF8 variable-length string as List */ + Bool = 6, /** Boolean as 1 bit, LSB bit-packed ordering */ + Decimal = 7, /** Precision-and-scale-based decimal type. Storage type depends on the parameters. */ + Date = 8, /** int32_t days or int64_t milliseconds since the UNIX epoch */ + Time = 9, /** Time as signed 32 or 64-bit integer, representing either seconds, milliseconds, microseconds, or nanoseconds since midnight since midnight */ + Timestamp = 10, /** Exact timestamp encoded with int64 since UNIX epoch (Default unit millisecond) */ + Interval = 11, /** YEAR_MONTH or DAY_TIME interval in SQL style */ + List = 12, /** A list of some logical data type */ + Struct = 13, /** Struct of logical types */ + Union = 14, /** Union of logical types */ + FixedSizeBinary = 15, /** Fixed-size binary. Each value occupies the same number of bytes */ + FixedSizeList = 16, /** Fixed-size list. Each value occupies the same number of bytes */ + Map = 17, /** Map of named logical types */ + + Dictionary = -1, /** Dictionary aka Category type */ + Int8 = -2, + Int16 = -3, + Int32 = -4, + Int64 = -5, + Uint8 = -6, + Uint16 = -7, + Uint32 = -8, + Uint64 = -9, + Float16 = -10, + Float32 = -11, + Float64 = -12, + DateDay = -13, + DateMillisecond = -14, + TimestampSecond = -15, + TimestampMillisecond = -16, + TimestampMicrosecond = -17, + TimestampNanosecond = -18, + TimeSecond = -19, + TimeMillisecond = -20, + TimeMicrosecond = -21, + TimeNanosecond = -22, + DenseUnion = -23, + SparseUnion = -24, + IntervalDayTime = -25, + IntervalYearMonth = -26, } export enum BufferType { @@ -139,4 +223,4 @@ export enum BufferType { * Type vector used in Union type */ TYPE = 3 - } +} diff --git a/js/src/factories.ts b/js/src/factories.ts new file mode 100644 index 0000000000000..6032339e8f9d2 --- /dev/null +++ b/js/src/factories.ts @@ -0,0 +1,243 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import { Field } from './schema.js'; +import * as dtypes from './type.js'; +import { Data, DataProps } from './data.js'; +import { BuilderType } from './interfaces.js'; +import { Vector, makeVector } from './vector.js'; +import { Builder, BuilderOptions } from './builder.js'; +import { instance as getBuilderConstructor } from './visitor/builderctor.js'; +import { ArrayDataType, BigIntArray, JavaScriptArrayDataType, TypedArray, TypedArrayDataType } from './interfaces.js'; + +export function makeBuilder(options: BuilderOptions): BuilderType { + + const type = options.type; + const builder = new (getBuilderConstructor.getVisitFn(type)())(options) as Builder; + + if (type.children && type.children.length > 0) { + + const children = options['children'] || [] as BuilderOptions[]; + const defaultOptions = { 'nullValues': options['nullValues'] }; + const getChildOptions = Array.isArray(children) + ? ((_: Field, i: number) => children[i] || defaultOptions) + : (({ name }: Field) => children[name] || defaultOptions); + + for (const [index, field] of type.children.entries()) { + const { type } = field; + const opts = getChildOptions(field, index); + builder.children.push(makeBuilder({ ...opts, type })); + } + } + + return builder as BuilderType; +} + +/** + * Creates a Vector from a JavaScript array via a {@link Builder}. + * Use {@link makeVector} if you only want to create a vector from a typed array. + * + * @example + * ```ts + * const vf64 = vectorFromArray([1, 2, 3]); + * const vi8 = vectorFromArray([1, 2, 3], new Int8); + * const vdict = vectorFromArray(['foo', 'bar']); + * ``` + */ +export function vectorFromArray(values: readonly (null | undefined)[], type?: dtypes.Null): Vector; +export function vectorFromArray(values: readonly (null | undefined | boolean)[], type?: dtypes.Bool): Vector; +export function vectorFromArray = dtypes.Dictionary>(values: readonly (null | undefined | string)[], type?: T): Vector; +export function vectorFromArray(values: readonly (null | undefined | Date)[], type?: T): Vector; +export function vectorFromArray(values: readonly (null | undefined | number)[], type: T): Vector; +export function vectorFromArray(values: readonly (null | undefined | bigint)[], type?: T): Vector; +export function vectorFromArray(values: readonly (null | undefined | number)[], type?: T): Vector; +export function vectorFromArray(values: readonly (unknown)[], type: T): Vector; +export function vectorFromArray(values: T): Vector>; +/** Creates a Vector from a typed array via {@link makeVector}. */ +export function vectorFromArray(data: T): Vector>; + +export function vectorFromArray(data: Data): Vector; +export function vectorFromArray(data: Vector): Vector; +export function vectorFromArray(data: DataProps): Vector; +export function vectorFromArray(data: T): Vector>; + +export function vectorFromArray(init: any, type?: dtypes.DataType) { + if (init instanceof Data || init instanceof Vector || init.type instanceof dtypes.DataType || ArrayBuffer.isView(init)) { + return makeVector(init as any); + } + const options: IterableBuilderOptions = { type: type ?? inferType(init), nullValues: [null] }; + const chunks = [...builderThroughIterable(options)(init)]; + const vector = chunks.length === 1 ? chunks[0] : chunks.reduce((a, b) => a.concat(b)); + if (dtypes.DataType.isDictionary(vector.type)) { + return vector.memoize(); + } + return vector; +} + +/** @ignore */ +function inferType(value: readonly unknown[]): dtypes.DataType { + if (value.length === 0) { return new dtypes.Null; } + let nullsCount = 0; + // @ts-ignore + let arraysCount = 0; + // @ts-ignore + let objectsCount = 0; + let numbersCount = 0; + let stringsCount = 0; + let bigintsCount = 0; + let booleansCount = 0; + let datesCount = 0; + + for (const val of value) { + if (val == null) { ++nullsCount; continue; } + switch (typeof val) { + case 'bigint': ++bigintsCount; continue; + case 'boolean': ++booleansCount; continue; + case 'number': ++numbersCount; continue; + case 'string': ++stringsCount; continue; + case 'object': + if (Array.isArray(val)) { + ++arraysCount; + } else if (Object.prototype.toString.call(val) === '[object Date]') { + ++datesCount; + } else { + ++objectsCount; + } + continue; + } + throw new TypeError('Unable to infer Vector type from input values, explicit type declaration expected'); + } + + if (numbersCount + nullsCount === value.length) { + return new dtypes.Float64; + } else if (stringsCount + nullsCount === value.length) { + return new dtypes.Dictionary(new dtypes.Utf8, new dtypes.Int32); + } else if (bigintsCount + nullsCount === value.length) { + return new dtypes.Int64; + } else if (booleansCount + nullsCount === value.length) { + return new dtypes.Bool; + } else if (datesCount + nullsCount === value.length) { + return new dtypes.DateMillisecond; + } + // TODO: add more types to infererence + + throw new TypeError('Unable to infer Vector type from input values, explicit type declaration expected'); +} + +/** + * A set of options to create an Iterable or AsyncIterable `Builder` transform function. + * @see {@link builderThroughIterable} + * @see {@link builderThroughAsyncIterable} + */ + +export interface IterableBuilderOptions extends BuilderOptions { + highWaterMark?: number; + queueingStrategy?: 'bytes' | 'count'; + dictionaryHashFunction?: (value: any) => string | number; + valueToChildTypeId?: (builder: Builder, value: any, offset: number) => number; +} + +/** @ignore */ +type ThroughIterable = (source: Iterable) => IterableIterator>; + +/** + * Transform a synchronous `Iterable` of arbitrary JavaScript values into a + * sequence of Arrow Vector following the chunking semantics defined in + * the supplied `options` argument. + * + * This function returns a function that accepts an `Iterable` of values to + * transform. When called, this function returns an Iterator of `Vector`. + * + * The resulting `Iterator>` yields Vectors based on the + * `queueingStrategy` and `highWaterMark` specified in the `options` argument. + * + * * If `queueingStrategy` is `"count"` (or omitted), The `Iterator>` + * will flush the underlying `Builder` (and yield a new `Vector`) once the + * Builder's `length` reaches or exceeds the supplied `highWaterMark`. + * * If `queueingStrategy` is `"bytes"`, the `Iterator>` will flush + * the underlying `Builder` (and yield a new `Vector`) once its `byteLength` + * reaches or exceeds the supplied `highWaterMark`. + * + * @param {IterableBuilderOptions} options An object of properties which determine the `Builder` to create and the chunking semantics to use. + * @returns A function which accepts a JavaScript `Iterable` of values to + * write, and returns an `Iterator` that yields Vectors according + * to the chunking semantics defined in the `options` argument. + * @nocollapse + */ +export function builderThroughIterable(options: IterableBuilderOptions) { + const { ['queueingStrategy']: queueingStrategy = 'count' } = options; + const { ['highWaterMark']: highWaterMark = queueingStrategy !== 'bytes' ? Number.POSITIVE_INFINITY : 2 ** 14 } = options; + const sizeProperty: 'length' | 'byteLength' = queueingStrategy !== 'bytes' ? 'length' : 'byteLength'; + return function* (source: Iterable) { + let numChunks = 0; + const builder = makeBuilder(options); + for (const value of source) { + if (builder.append(value)[sizeProperty] >= highWaterMark) { + ++numChunks && (yield builder.toVector()); + } + } + if (builder.finish().length > 0 || numChunks === 0) { + yield builder.toVector(); + } + } as ThroughIterable; +} + +/** @ignore */ +type ThroughAsyncIterable = (source: Iterable | AsyncIterable) => AsyncIterableIterator>; + +/** + * Transform an `AsyncIterable` of arbitrary JavaScript values into a + * sequence of Arrow Vector following the chunking semantics defined in + * the supplied `options` argument. + * + * This function returns a function that accepts an `AsyncIterable` of values to + * transform. When called, this function returns an AsyncIterator of `Vector`. + * + * The resulting `AsyncIterator>` yields Vectors based on the + * `queueingStrategy` and `highWaterMark` specified in the `options` argument. + * + * * If `queueingStrategy` is `"count"` (or omitted), The `AsyncIterator>` + * will flush the underlying `Builder` (and yield a new `Vector`) once the + * Builder's `length` reaches or exceeds the supplied `highWaterMark`. + * * If `queueingStrategy` is `"bytes"`, the `AsyncIterator>` will flush + * the underlying `Builder` (and yield a new `Vector`) once its `byteLength` + * reaches or exceeds the supplied `highWaterMark`. + * + * @param {IterableBuilderOptions} options An object of properties which determine the `Builder` to create and the chunking semantics to use. + * @returns A function which accepts a JavaScript `AsyncIterable` of values + * to write, and returns an `AsyncIterator` that yields Vectors + * according to the chunking semantics defined in the `options` + * argument. + * @nocollapse + */ +export function builderThroughAsyncIterable(options: IterableBuilderOptions) { + const { ['queueingStrategy']: queueingStrategy = 'count' } = options; + const { ['highWaterMark']: highWaterMark = queueingStrategy !== 'bytes' ? Number.POSITIVE_INFINITY : 2 ** 14 } = options; + const sizeProperty: 'length' | 'byteLength' = queueingStrategy !== 'bytes' ? 'length' : 'byteLength'; + return async function* (source: Iterable | AsyncIterable) { + let numChunks = 0; + const builder = makeBuilder(options); + for await (const value of source) { + if (builder.append(value)[sizeProperty] >= highWaterMark) { + ++numChunks && (yield builder.toVector()); + } + } + if (builder.finish().length > 0 || numChunks === 0) { + yield builder.toVector(); + } + } as ThroughAsyncIterable; +} diff --git a/js/src/fb/.eslintrc.js b/js/src/fb/.eslintrc.cjs similarity index 85% rename from js/src/fb/.eslintrc.js rename to js/src/fb/.eslintrc.cjs index d448540e4afe0..eb0fc1c7cdccb 100644 --- a/js/src/fb/.eslintrc.js +++ b/js/src/fb/.eslintrc.cjs @@ -17,7 +17,8 @@ module.exports = { rules: { - "@typescript-eslint/no-require-imports": "off", - "@typescript-eslint/no-inferrable-types": "off" + "@typescript-eslint/type-annotation-spacing": "off", + "@typescript-eslint/semi": "off", + "unicorn/no-new-buffer": "off", }, }; \ No newline at end of file diff --git a/js/src/fb/File.ts b/js/src/fb/File.ts deleted file mode 100644 index 5746dd183a5ca..0000000000000 --- a/js/src/fb/File.ts +++ /dev/null @@ -1,300 +0,0 @@ -// automatically generated by the FlatBuffers compiler, do not modify - -import { flatbuffers } from 'flatbuffers'; -import * as NS13596923344997147894 from './Schema'; -/** - * ---------------------------------------------------------------------- - * Arrow File metadata - * - * - * @constructor - */ -export class Footer { - bb: flatbuffers.ByteBuffer | null = null; - - bb_pos: number = 0; - /** - * @param number i - * @param flatbuffers.ByteBuffer bb - * @returns Footer - */ - __init(i: number, bb: flatbuffers.ByteBuffer): Footer { - this.bb_pos = i; - this.bb = bb; - return this; - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param Footer= obj - * @returns Footer - */ - static getRootAsFooter(bb: flatbuffers.ByteBuffer, obj?: Footer): Footer { - return (obj || new Footer()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param Footer= obj - * @returns Footer - */ - static getSizePrefixedRootAsFooter(bb: flatbuffers.ByteBuffer, obj?: Footer): Footer { - bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); - return (obj || new Footer()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * @returns MetadataVersion - */ - version(): NS13596923344997147894.MetadataVersion { - const offset = this.bb!.__offset(this.bb_pos, 4); - return offset ? /** */ (this.bb!.readInt16(this.bb_pos + offset)) : NS13596923344997147894.MetadataVersion.V1; - } - - /** - * @param Schema= obj - * @returns Schema|null - */ - schema(obj?: NS13596923344997147894.Schema): NS13596923344997147894.Schema | null { - const offset = this.bb!.__offset(this.bb_pos, 6); - return offset ? (obj || new NS13596923344997147894.Schema()).__init(this.bb!.__indirect(this.bb_pos + offset), this.bb!) : null; - } - - /** - * @param number index - * @param Block= obj - * @returns Block - */ - dictionaries(index: number, obj?: Block): Block | null { - const offset = this.bb!.__offset(this.bb_pos, 8); - return offset ? (obj || new Block()).__init(this.bb!.__vector(this.bb_pos + offset) + index * 24, this.bb!) : null; - } - - /** - * @returns number - */ - dictionariesLength(): number { - const offset = this.bb!.__offset(this.bb_pos, 8); - return offset ? this.bb!.__vector_len(this.bb_pos + offset) : 0; - } - - /** - * @param number index - * @param Block= obj - * @returns Block - */ - recordBatches(index: number, obj?: Block): Block | null { - const offset = this.bb!.__offset(this.bb_pos, 10); - return offset ? (obj || new Block()).__init(this.bb!.__vector(this.bb_pos + offset) + index * 24, this.bb!) : null; - } - - /** - * @returns number - */ - recordBatchesLength(): number { - const offset = this.bb!.__offset(this.bb_pos, 10); - return offset ? this.bb!.__vector_len(this.bb_pos + offset) : 0; - } - - /** - * User-defined metadata - * - * @param number index - * @param KeyValue= obj - * @returns KeyValue - */ - customMetadata(index: number, obj?: NS13596923344997147894.KeyValue): NS13596923344997147894.KeyValue | null { - const offset = this.bb!.__offset(this.bb_pos, 12); - return offset ? (obj || new NS13596923344997147894.KeyValue()).__init(this.bb!.__indirect(this.bb!.__vector(this.bb_pos + offset) + index * 4), this.bb!) : null; - } - - /** - * @returns number - */ - customMetadataLength(): number { - const offset = this.bb!.__offset(this.bb_pos, 12); - return offset ? this.bb!.__vector_len(this.bb_pos + offset) : 0; - } - - /** - * @param flatbuffers.Builder builder - */ - static startFooter(builder: flatbuffers.Builder) { - builder.startObject(5); - } - - /** - * @param flatbuffers.Builder builder - * @param MetadataVersion version - */ - static addVersion(builder: flatbuffers.Builder, version: NS13596923344997147894.MetadataVersion) { - builder.addFieldInt16(0, version, NS13596923344997147894.MetadataVersion.V1); - } - - /** - * @param flatbuffers.Builder builder - * @param flatbuffers.Offset schemaOffset - */ - static addSchema(builder: flatbuffers.Builder, schemaOffset: flatbuffers.Offset) { - builder.addFieldOffset(1, schemaOffset, 0); - } - - /** - * @param flatbuffers.Builder builder - * @param flatbuffers.Offset dictionariesOffset - */ - static addDictionaries(builder: flatbuffers.Builder, dictionariesOffset: flatbuffers.Offset) { - builder.addFieldOffset(2, dictionariesOffset, 0); - } - - /** - * @param flatbuffers.Builder builder - * @param number numElems - */ - static startDictionariesVector(builder: flatbuffers.Builder, numElems: number) { - builder.startVector(24, numElems, 8); - } - - /** - * @param flatbuffers.Builder builder - * @param flatbuffers.Offset recordBatchesOffset - */ - static addRecordBatches(builder: flatbuffers.Builder, recordBatchesOffset: flatbuffers.Offset) { - builder.addFieldOffset(3, recordBatchesOffset, 0); - } - - /** - * @param flatbuffers.Builder builder - * @param number numElems - */ - static startRecordBatchesVector(builder: flatbuffers.Builder, numElems: number) { - builder.startVector(24, numElems, 8); - } - - /** - * @param flatbuffers.Builder builder - * @param flatbuffers.Offset customMetadataOffset - */ - static addCustomMetadata(builder: flatbuffers.Builder, customMetadataOffset: flatbuffers.Offset) { - builder.addFieldOffset(4, customMetadataOffset, 0); - } - - /** - * @param flatbuffers.Builder builder - * @param Array. data - * @returns flatbuffers.Offset - */ - static createCustomMetadataVector(builder: flatbuffers.Builder, data: flatbuffers.Offset[]): flatbuffers.Offset { - builder.startVector(4, data.length, 4); - for (let i = data.length - 1; i >= 0; i--) { - builder.addOffset(data[i]); - } - return builder.endVector(); - } - - /** - * @param flatbuffers.Builder builder - * @param number numElems - */ - static startCustomMetadataVector(builder: flatbuffers.Builder, numElems: number) { - builder.startVector(4, numElems, 4); - } - - /** - * @param flatbuffers.Builder builder - * @returns flatbuffers.Offset - */ - static endFooter(builder: flatbuffers.Builder): flatbuffers.Offset { - const offset = builder.endObject(); - return offset; - } - - /** - * @param flatbuffers.Builder builder - * @param flatbuffers.Offset offset - */ - static finishFooterBuffer(builder: flatbuffers.Builder, offset: flatbuffers.Offset) { - builder.finish(offset); - } - - /** - * @param flatbuffers.Builder builder - * @param flatbuffers.Offset offset - */ - static finishSizePrefixedFooterBuffer(builder: flatbuffers.Builder, offset: flatbuffers.Offset) { - builder.finish(offset, undefined, true); - } - - static createFooter(builder: flatbuffers.Builder, version: NS13596923344997147894.MetadataVersion, schemaOffset: flatbuffers.Offset, dictionariesOffset: flatbuffers.Offset, recordBatchesOffset: flatbuffers.Offset, customMetadataOffset: flatbuffers.Offset): flatbuffers.Offset { - Footer.startFooter(builder); - Footer.addVersion(builder, version); - Footer.addSchema(builder, schemaOffset); - Footer.addDictionaries(builder, dictionariesOffset); - Footer.addRecordBatches(builder, recordBatchesOffset); - Footer.addCustomMetadata(builder, customMetadataOffset); - return Footer.endFooter(builder); - } -} -/** - * @constructor - */ -export class Block { - bb: flatbuffers.ByteBuffer | null = null; - - bb_pos: number = 0; - /** - * @param number i - * @param flatbuffers.ByteBuffer bb - * @returns Block - */ - __init(i: number, bb: flatbuffers.ByteBuffer): Block { - this.bb_pos = i; - this.bb = bb; - return this; - } - - /** - * Index to the start of the RecordBlock (note this is past the Message header) - * - * @returns flatbuffers.Long - */ - offset(): flatbuffers.Long { - return this.bb!.readInt64(this.bb_pos); - } - - /** - * Length of the metadata - * - * @returns number - */ - metaDataLength(): number { - return this.bb!.readInt32(this.bb_pos + 8); - } - - /** - * Length of the data (this is aligned so there can be a gap between this and - * the metadata). - * - * @returns flatbuffers.Long - */ - bodyLength(): flatbuffers.Long { - return this.bb!.readInt64(this.bb_pos + 16); - } - - /** - * @param flatbuffers.Builder builder - * @param flatbuffers.Long offset - * @param number metaDataLength - * @param flatbuffers.Long bodyLength - * @returns flatbuffers.Offset - */ - static createBlock(builder: flatbuffers.Builder, offset: flatbuffers.Long, metaDataLength: number, bodyLength: flatbuffers.Long): flatbuffers.Offset { - builder.prep(8, 24); - builder.writeInt64(bodyLength); - builder.pad(4); - builder.writeInt32(metaDataLength); - builder.writeInt64(offset); - return builder.offset(); - } - -} diff --git a/js/src/fb/Message.ts b/js/src/fb/Message.ts deleted file mode 100644 index 973eb0425349d..0000000000000 --- a/js/src/fb/Message.ts +++ /dev/null @@ -1,709 +0,0 @@ -// automatically generated by the FlatBuffers compiler, do not modify - -import { flatbuffers } from 'flatbuffers'; -import * as NS13596923344997147894 from './Schema'; -/** - * @enum {number} - */ -export enum CompressionType { - LZ4_FRAME = 0, - ZSTD = 1 -} - -/** - * Provided for forward compatibility in case we need to support different - * strategies for compressing the IPC message body (like whole-body - * compression rather than buffer-level) in the future - * - * @enum {number} - */ -export enum BodyCompressionMethod { - /** - * Each constituent buffer is first compressed with the indicated - * compressor, and then written with the uncompressed length in the first 8 - * bytes as a 64-bit little-endian signed integer followed by the compressed - * buffer bytes (and then padding as required by the protocol). The - * uncompressed length may be set to -1 to indicate that the data that - * follows is not compressed, which can be useful for cases where - * compression does not yield appreciable savings. - */ - BUFFER = 0 -} - -/** - * ---------------------------------------------------------------------- - * The root Message type - * This union enables us to easily send different message types without - * redundant storage, and in the future we can easily add new message types. - * - * Arrow implementations do not need to implement all of the message types, - * which may include experimental metadata types. For maximum compatibility, - * it is best to send data using RecordBatch - * - * @enum {number} - */ -export enum MessageHeader { - NONE = 0, - Schema = 1, - DictionaryBatch = 2, - RecordBatch = 3, - Tensor = 4, - SparseTensor = 5 -} - -/** - * ---------------------------------------------------------------------- - * Data structures for describing a table row batch (a collection of - * equal-length Arrow arrays) - * Metadata about a field at some level of a nested type tree (but not - * its children). - * - * For example, a List with values [[1, 2, 3], null, [4], [5, 6], null] - * would have {length: 5, null_count: 2} for its List node, and {length: 6, - * null_count: 0} for its Int16 node, as separate FieldNode structs - * - * @constructor - */ -export class FieldNode { - bb: flatbuffers.ByteBuffer | null = null; - - bb_pos: number = 0; - /** - * @param number i - * @param flatbuffers.ByteBuffer bb - * @returns FieldNode - */ - __init(i: number, bb: flatbuffers.ByteBuffer): FieldNode { - this.bb_pos = i; - this.bb = bb; - return this; - } - - /** - * The number of value slots in the Arrow array at this level of a nested - * tree - * - * @returns flatbuffers.Long - */ - length(): flatbuffers.Long { - return this.bb!.readInt64(this.bb_pos); - } - - /** - * The number of observed nulls. Fields with null_count == 0 may choose not - * to write their physical validity bitmap out as a materialized buffer, - * instead setting the length of the bitmap buffer to 0. - * - * @returns flatbuffers.Long - */ - nullCount(): flatbuffers.Long { - return this.bb!.readInt64(this.bb_pos + 8); - } - - /** - * @param flatbuffers.Builder builder - * @param flatbuffers.Long length - * @param flatbuffers.Long null_count - * @returns flatbuffers.Offset - */ - static createFieldNode(builder: flatbuffers.Builder, length: flatbuffers.Long, null_count: flatbuffers.Long): flatbuffers.Offset { - builder.prep(8, 16); - builder.writeInt64(null_count); - builder.writeInt64(length); - return builder.offset(); - } - -} -/** - * Optional compression for the memory buffers constituting IPC message - * bodies. Intended for use with RecordBatch but could be used for other - * message types - * - * @constructor - */ -export class BodyCompression { - bb: flatbuffers.ByteBuffer | null = null; - - bb_pos: number = 0; - /** - * @param number i - * @param flatbuffers.ByteBuffer bb - * @returns BodyCompression - */ - __init(i: number, bb: flatbuffers.ByteBuffer): BodyCompression { - this.bb_pos = i; - this.bb = bb; - return this; - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param BodyCompression= obj - * @returns BodyCompression - */ - static getRootAsBodyCompression(bb: flatbuffers.ByteBuffer, obj?: BodyCompression): BodyCompression { - return (obj || new BodyCompression()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param BodyCompression= obj - * @returns BodyCompression - */ - static getSizePrefixedRootAsBodyCompression(bb: flatbuffers.ByteBuffer, obj?: BodyCompression): BodyCompression { - bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); - return (obj || new BodyCompression()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * Compressor library - * - * @returns CompressionType - */ - codec(): CompressionType { - const offset = this.bb!.__offset(this.bb_pos, 4); - return offset ? /** */ (this.bb!.readInt8(this.bb_pos + offset)) : CompressionType.LZ4_FRAME; - } - - /** - * Indicates the way the record batch body was compressed - * - * @returns BodyCompressionMethod - */ - method(): BodyCompressionMethod { - const offset = this.bb!.__offset(this.bb_pos, 6); - return offset ? /** */ (this.bb!.readInt8(this.bb_pos + offset)) : BodyCompressionMethod.BUFFER; - } - - /** - * @param flatbuffers.Builder builder - */ - static startBodyCompression(builder: flatbuffers.Builder) { - builder.startObject(2); - } - - /** - * @param flatbuffers.Builder builder - * @param CompressionType codec - */ - static addCodec(builder: flatbuffers.Builder, codec: CompressionType) { - builder.addFieldInt8(0, codec, CompressionType.LZ4_FRAME); - } - - /** - * @param flatbuffers.Builder builder - * @param BodyCompressionMethod method - */ - static addMethod(builder: flatbuffers.Builder, method: BodyCompressionMethod) { - builder.addFieldInt8(1, method, BodyCompressionMethod.BUFFER); - } - - /** - * @param flatbuffers.Builder builder - * @returns flatbuffers.Offset - */ - static endBodyCompression(builder: flatbuffers.Builder): flatbuffers.Offset { - const offset = builder.endObject(); - return offset; - } - - static createBodyCompression(builder: flatbuffers.Builder, codec: CompressionType, method: BodyCompressionMethod): flatbuffers.Offset { - BodyCompression.startBodyCompression(builder); - BodyCompression.addCodec(builder, codec); - BodyCompression.addMethod(builder, method); - return BodyCompression.endBodyCompression(builder); - } -} -/** - * A data header describing the shared memory layout of a "record" or "row" - * batch. Some systems call this a "row batch" internally and others a "record - * batch". - * - * @constructor - */ -export class RecordBatch { - bb: flatbuffers.ByteBuffer | null = null; - - bb_pos: number = 0; - /** - * @param number i - * @param flatbuffers.ByteBuffer bb - * @returns RecordBatch - */ - __init(i: number, bb: flatbuffers.ByteBuffer): RecordBatch { - this.bb_pos = i; - this.bb = bb; - return this; - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param RecordBatch= obj - * @returns RecordBatch - */ - static getRootAsRecordBatch(bb: flatbuffers.ByteBuffer, obj?: RecordBatch): RecordBatch { - return (obj || new RecordBatch()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param RecordBatch= obj - * @returns RecordBatch - */ - static getSizePrefixedRootAsRecordBatch(bb: flatbuffers.ByteBuffer, obj?: RecordBatch): RecordBatch { - bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); - return (obj || new RecordBatch()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * number of records / rows. The arrays in the batch should all have this - * length - * - * @returns flatbuffers.Long - */ - length(): flatbuffers.Long { - const offset = this.bb!.__offset(this.bb_pos, 4); - return offset ? this.bb!.readInt64(this.bb_pos + offset) : this.bb!.createLong(0, 0); - } - - /** - * Nodes correspond to the pre-ordered flattened logical schema - * - * @param number index - * @param FieldNode= obj - * @returns FieldNode - */ - nodes(index: number, obj?: FieldNode): FieldNode | null { - const offset = this.bb!.__offset(this.bb_pos, 6); - return offset ? (obj || new FieldNode()).__init(this.bb!.__vector(this.bb_pos + offset) + index * 16, this.bb!) : null; - } - - /** - * @returns number - */ - nodesLength(): number { - const offset = this.bb!.__offset(this.bb_pos, 6); - return offset ? this.bb!.__vector_len(this.bb_pos + offset) : 0; - } - - /** - * Buffers correspond to the pre-ordered flattened buffer tree - * - * The number of buffers appended to this list depends on the schema. For - * example, most primitive arrays will have 2 buffers, 1 for the validity - * bitmap and 1 for the values. For struct arrays, there will only be a - * single buffer for the validity (nulls) bitmap - * - * @param number index - * @param Buffer= obj - * @returns Buffer - */ - buffers(index: number, obj?: NS13596923344997147894.Buffer): NS13596923344997147894.Buffer | null { - const offset = this.bb!.__offset(this.bb_pos, 8); - return offset ? (obj || new NS13596923344997147894.Buffer()).__init(this.bb!.__vector(this.bb_pos + offset) + index * 16, this.bb!) : null; - } - - /** - * @returns number - */ - buffersLength(): number { - const offset = this.bb!.__offset(this.bb_pos, 8); - return offset ? this.bb!.__vector_len(this.bb_pos + offset) : 0; - } - - /** - * Optional compression of the message body - * - * @param BodyCompression= obj - * @returns BodyCompression|null - */ - compression(obj?: BodyCompression): BodyCompression | null { - const offset = this.bb!.__offset(this.bb_pos, 10); - return offset ? (obj || new BodyCompression()).__init(this.bb!.__indirect(this.bb_pos + offset), this.bb!) : null; - } - - /** - * @param flatbuffers.Builder builder - */ - static startRecordBatch(builder: flatbuffers.Builder) { - builder.startObject(4); - } - - /** - * @param flatbuffers.Builder builder - * @param flatbuffers.Long length - */ - static addLength(builder: flatbuffers.Builder, length: flatbuffers.Long) { - builder.addFieldInt64(0, length, builder.createLong(0, 0)); - } - - /** - * @param flatbuffers.Builder builder - * @param flatbuffers.Offset nodesOffset - */ - static addNodes(builder: flatbuffers.Builder, nodesOffset: flatbuffers.Offset) { - builder.addFieldOffset(1, nodesOffset, 0); - } - - /** - * @param flatbuffers.Builder builder - * @param number numElems - */ - static startNodesVector(builder: flatbuffers.Builder, numElems: number) { - builder.startVector(16, numElems, 8); - } - - /** - * @param flatbuffers.Builder builder - * @param flatbuffers.Offset buffersOffset - */ - static addBuffers(builder: flatbuffers.Builder, buffersOffset: flatbuffers.Offset) { - builder.addFieldOffset(2, buffersOffset, 0); - } - - /** - * @param flatbuffers.Builder builder - * @param number numElems - */ - static startBuffersVector(builder: flatbuffers.Builder, numElems: number) { - builder.startVector(16, numElems, 8); - } - - /** - * @param flatbuffers.Builder builder - * @param flatbuffers.Offset compressionOffset - */ - static addCompression(builder: flatbuffers.Builder, compressionOffset: flatbuffers.Offset) { - builder.addFieldOffset(3, compressionOffset, 0); - } - - /** - * @param flatbuffers.Builder builder - * @returns flatbuffers.Offset - */ - static endRecordBatch(builder: flatbuffers.Builder): flatbuffers.Offset { - const offset = builder.endObject(); - return offset; - } - - static createRecordBatch(builder: flatbuffers.Builder, length: flatbuffers.Long, nodesOffset: flatbuffers.Offset, buffersOffset: flatbuffers.Offset, compressionOffset: flatbuffers.Offset): flatbuffers.Offset { - RecordBatch.startRecordBatch(builder); - RecordBatch.addLength(builder, length); - RecordBatch.addNodes(builder, nodesOffset); - RecordBatch.addBuffers(builder, buffersOffset); - RecordBatch.addCompression(builder, compressionOffset); - return RecordBatch.endRecordBatch(builder); - } -} -/** - * For sending dictionary encoding information. Any Field can be - * dictionary-encoded, but in this case none of its children may be - * dictionary-encoded. - * There is one vector / column per dictionary, but that vector / column - * may be spread across multiple dictionary batches by using the isDelta - * flag - * - * @constructor - */ -export class DictionaryBatch { - bb: flatbuffers.ByteBuffer | null = null; - - bb_pos: number = 0; - /** - * @param number i - * @param flatbuffers.ByteBuffer bb - * @returns DictionaryBatch - */ - __init(i: number, bb: flatbuffers.ByteBuffer): DictionaryBatch { - this.bb_pos = i; - this.bb = bb; - return this; - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param DictionaryBatch= obj - * @returns DictionaryBatch - */ - static getRootAsDictionaryBatch(bb: flatbuffers.ByteBuffer, obj?: DictionaryBatch): DictionaryBatch { - return (obj || new DictionaryBatch()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param DictionaryBatch= obj - * @returns DictionaryBatch - */ - static getSizePrefixedRootAsDictionaryBatch(bb: flatbuffers.ByteBuffer, obj?: DictionaryBatch): DictionaryBatch { - bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); - return (obj || new DictionaryBatch()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * @returns flatbuffers.Long - */ - id(): flatbuffers.Long { - const offset = this.bb!.__offset(this.bb_pos, 4); - return offset ? this.bb!.readInt64(this.bb_pos + offset) : this.bb!.createLong(0, 0); - } - - /** - * @param RecordBatch= obj - * @returns RecordBatch|null - */ - data(obj?: RecordBatch): RecordBatch | null { - const offset = this.bb!.__offset(this.bb_pos, 6); - return offset ? (obj || new RecordBatch()).__init(this.bb!.__indirect(this.bb_pos + offset), this.bb!) : null; - } - - /** - * If isDelta is true the values in the dictionary are to be appended to a - * dictionary with the indicated id. If isDelta is false this dictionary - * should replace the existing dictionary. - * - * @returns boolean - */ - isDelta(): boolean { - const offset = this.bb!.__offset(this.bb_pos, 8); - return offset ? !!this.bb!.readInt8(this.bb_pos + offset) : false; - } - - /** - * @param flatbuffers.Builder builder - */ - static startDictionaryBatch(builder: flatbuffers.Builder) { - builder.startObject(3); - } - - /** - * @param flatbuffers.Builder builder - * @param flatbuffers.Long id - */ - static addId(builder: flatbuffers.Builder, id: flatbuffers.Long) { - builder.addFieldInt64(0, id, builder.createLong(0, 0)); - } - - /** - * @param flatbuffers.Builder builder - * @param flatbuffers.Offset dataOffset - */ - static addData(builder: flatbuffers.Builder, dataOffset: flatbuffers.Offset) { - builder.addFieldOffset(1, dataOffset, 0); - } - - /** - * @param flatbuffers.Builder builder - * @param boolean isDelta - */ - static addIsDelta(builder: flatbuffers.Builder, isDelta: boolean) { - builder.addFieldInt8(2, +isDelta, +false); - } - - /** - * @param flatbuffers.Builder builder - * @returns flatbuffers.Offset - */ - static endDictionaryBatch(builder: flatbuffers.Builder): flatbuffers.Offset { - const offset = builder.endObject(); - return offset; - } - - static createDictionaryBatch(builder: flatbuffers.Builder, id: flatbuffers.Long, dataOffset: flatbuffers.Offset, isDelta: boolean): flatbuffers.Offset { - DictionaryBatch.startDictionaryBatch(builder); - DictionaryBatch.addId(builder, id); - DictionaryBatch.addData(builder, dataOffset); - DictionaryBatch.addIsDelta(builder, isDelta); - return DictionaryBatch.endDictionaryBatch(builder); - } -} -/** - * @constructor - */ -export class Message { - bb: flatbuffers.ByteBuffer | null = null; - - bb_pos: number = 0; - /** - * @param number i - * @param flatbuffers.ByteBuffer bb - * @returns Message - */ - __init(i: number, bb: flatbuffers.ByteBuffer): Message { - this.bb_pos = i; - this.bb = bb; - return this; - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param Message= obj - * @returns Message - */ - static getRootAsMessage(bb: flatbuffers.ByteBuffer, obj?: Message): Message { - return (obj || new Message()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param Message= obj - * @returns Message - */ - static getSizePrefixedRootAsMessage(bb: flatbuffers.ByteBuffer, obj?: Message): Message { - bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); - return (obj || new Message()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * @returns MetadataVersion - */ - version(): NS13596923344997147894.MetadataVersion { - const offset = this.bb!.__offset(this.bb_pos, 4); - return offset ? /** */ (this.bb!.readInt16(this.bb_pos + offset)) : NS13596923344997147894.MetadataVersion.V1; - } - - /** - * @returns MessageHeader - */ - headerType(): MessageHeader { - const offset = this.bb!.__offset(this.bb_pos, 6); - return offset ? /** */ (this.bb!.readUint8(this.bb_pos + offset)) : MessageHeader.NONE; - } - - /** - * @param flatbuffers.Table obj - * @returns ?flatbuffers.Table - */ - header(obj: T): T | null { - const offset = this.bb!.__offset(this.bb_pos, 8); - return offset ? this.bb!.__union(obj, this.bb_pos + offset) : null; - } - - /** - * @returns flatbuffers.Long - */ - bodyLength(): flatbuffers.Long { - const offset = this.bb!.__offset(this.bb_pos, 10); - return offset ? this.bb!.readInt64(this.bb_pos + offset) : this.bb!.createLong(0, 0); - } - - /** - * @param number index - * @param KeyValue= obj - * @returns KeyValue - */ - customMetadata(index: number, obj?: NS13596923344997147894.KeyValue): NS13596923344997147894.KeyValue | null { - const offset = this.bb!.__offset(this.bb_pos, 12); - return offset ? (obj || new NS13596923344997147894.KeyValue()).__init(this.bb!.__indirect(this.bb!.__vector(this.bb_pos + offset) + index * 4), this.bb!) : null; - } - - /** - * @returns number - */ - customMetadataLength(): number { - const offset = this.bb!.__offset(this.bb_pos, 12); - return offset ? this.bb!.__vector_len(this.bb_pos + offset) : 0; - } - - /** - * @param flatbuffers.Builder builder - */ - static startMessage(builder: flatbuffers.Builder) { - builder.startObject(5); - } - - /** - * @param flatbuffers.Builder builder - * @param MetadataVersion version - */ - static addVersion(builder: flatbuffers.Builder, version: NS13596923344997147894.MetadataVersion) { - builder.addFieldInt16(0, version, NS13596923344997147894.MetadataVersion.V1); - } - - /** - * @param flatbuffers.Builder builder - * @param MessageHeader headerType - */ - static addHeaderType(builder: flatbuffers.Builder, headerType: MessageHeader) { - builder.addFieldInt8(1, headerType, MessageHeader.NONE); - } - - /** - * @param flatbuffers.Builder builder - * @param flatbuffers.Offset headerOffset - */ - static addHeader(builder: flatbuffers.Builder, headerOffset: flatbuffers.Offset) { - builder.addFieldOffset(2, headerOffset, 0); - } - - /** - * @param flatbuffers.Builder builder - * @param flatbuffers.Long bodyLength - */ - static addBodyLength(builder: flatbuffers.Builder, bodyLength: flatbuffers.Long) { - builder.addFieldInt64(3, bodyLength, builder.createLong(0, 0)); - } - - /** - * @param flatbuffers.Builder builder - * @param flatbuffers.Offset customMetadataOffset - */ - static addCustomMetadata(builder: flatbuffers.Builder, customMetadataOffset: flatbuffers.Offset) { - builder.addFieldOffset(4, customMetadataOffset, 0); - } - - /** - * @param flatbuffers.Builder builder - * @param Array. data - * @returns flatbuffers.Offset - */ - static createCustomMetadataVector(builder: flatbuffers.Builder, data: flatbuffers.Offset[]): flatbuffers.Offset { - builder.startVector(4, data.length, 4); - for (let i = data.length - 1; i >= 0; i--) { - builder.addOffset(data[i]); - } - return builder.endVector(); - } - - /** - * @param flatbuffers.Builder builder - * @param number numElems - */ - static startCustomMetadataVector(builder: flatbuffers.Builder, numElems: number) { - builder.startVector(4, numElems, 4); - } - - /** - * @param flatbuffers.Builder builder - * @returns flatbuffers.Offset - */ - static endMessage(builder: flatbuffers.Builder): flatbuffers.Offset { - const offset = builder.endObject(); - return offset; - } - - /** - * @param flatbuffers.Builder builder - * @param flatbuffers.Offset offset - */ - static finishMessageBuffer(builder: flatbuffers.Builder, offset: flatbuffers.Offset) { - builder.finish(offset); - } - - /** - * @param flatbuffers.Builder builder - * @param flatbuffers.Offset offset - */ - static finishSizePrefixedMessageBuffer(builder: flatbuffers.Builder, offset: flatbuffers.Offset) { - builder.finish(offset, undefined, true); - } - - static createMessage(builder: flatbuffers.Builder, version: NS13596923344997147894.MetadataVersion, headerType: MessageHeader, headerOffset: flatbuffers.Offset, bodyLength: flatbuffers.Long, customMetadataOffset: flatbuffers.Offset): flatbuffers.Offset { - Message.startMessage(builder); - Message.addVersion(builder, version); - Message.addHeaderType(builder, headerType); - Message.addHeader(builder, headerOffset); - Message.addBodyLength(builder, bodyLength); - Message.addCustomMetadata(builder, customMetadataOffset); - return Message.endMessage(builder); - } -} diff --git a/js/src/fb/Schema.ts b/js/src/fb/Schema.ts deleted file mode 100644 index f675bc2a062ba..0000000000000 --- a/js/src/fb/Schema.ts +++ /dev/null @@ -1,2658 +0,0 @@ -// automatically generated by the FlatBuffers compiler, do not modify - -import { flatbuffers } from 'flatbuffers'; -/** - * Logical types, vector layouts, and schemas - * - * @enum {number} - */ -export enum MetadataVersion { - /** - * 0.1.0 (October 2016). - */ - V1 = 0, - - /** - * 0.2.0 (February 2017). Non-backwards compatible with V1. - */ - V2 = 1, - - /** - * 0.3.0 -> 0.7.1 (May - December 2017). Non-backwards compatible with V2. - */ - V3 = 2, - - /** - * >= 0.8.0 (December 2017). Non-backwards compatible with V3. - */ - V4 = 3, - - /** - * >= 1.0.0 (July 2020. Backwards compatible with V4 (V5 readers can read V4 - * metadata and IPC messages). Implementations are recommended to provide a - * V4 compatibility mode with V5 format changes disabled. - * - * Incompatible changes between V4 and V5: - * - Union buffer layout has changed. In V5, Unions don't have a validity - * bitmap buffer. - */ - V5 = 4 -} - -/** - * Represents Arrow Features that might not have full support - * within implementations. This is intended to be used in - * two scenarios: - * 1. A mechanism for readers of Arrow Streams - * and files to understand that the stream or file makes - * use of a feature that isn't supported or unknown to - * the implementation (and therefore can meet the Arrow - * forward compatibility guarantees). - * 2. A means of negotiating between a client and server - * what features a stream is allowed to use. The enums - * values here are intented to represent higher level - * features, additional details maybe negotiated - * with key-value pairs specific to the protocol. - * - * Enums added to this list should be assigned power-of-two values - * to facilitate exchanging and comparing bitmaps for supported - * features. - * - * @enum {number} - */ -export enum Feature { - /** - * Needed to make flatbuffers happy. - */ - UNUSED = 0, - - /** - * The stream makes use of multiple full dictionaries with the - * same ID and assumes clients implement dictionary replacement - * correctly. - */ - DICTIONARY_REPLACEMENT = 1, - - /** - * The stream makes use of compressed bodies as described - * in Message.fbs. - */ - COMPRESSED_BODY = 2 -} - -/** - * @enum {number} - */ -export enum UnionMode { - Sparse = 0, - Dense = 1 -} - -/** - * @enum {number} - */ -export enum Precision { - HALF = 0, - SINGLE = 1, - DOUBLE = 2 -} - -/** - * @enum {number} - */ -export enum DateUnit { - DAY = 0, - MILLISECOND = 1 -} - -/** - * @enum {number} - */ -export enum TimeUnit { - SECOND = 0, - MILLISECOND = 1, - MICROSECOND = 2, - NANOSECOND = 3 -} - -/** - * @enum {number} - */ -export enum IntervalUnit { - YEAR_MONTH = 0, - DAY_TIME = 1 -} - -/** - * ---------------------------------------------------------------------- - * Top-level Type value, enabling extensible type-specific metadata. We can - * add new logical types to Type without breaking backwards compatibility - * - * @enum {number} - */ -export enum Type { - NONE = 0, - Null = 1, - Int = 2, - FloatingPoint = 3, - Binary = 4, - Utf8 = 5, - Bool = 6, - Decimal = 7, - Date = 8, - Time = 9, - Timestamp = 10, - Interval = 11, - List = 12, - Struct_ = 13, - Union = 14, - FixedSizeBinary = 15, - FixedSizeList = 16, - Map = 17, - Duration = 18, - LargeBinary = 19, - LargeUtf8 = 20, - LargeList = 21 -} - -/** - * ---------------------------------------------------------------------- - * Dictionary encoding metadata - * Maintained for forwards compatibility, in the future - * Dictionaries might be explicit maps between integers and values - * allowing for non-contiguous index values - * - * @enum {number} - */ -export enum DictionaryKind { - DenseArray = 0 -} - -/** - * ---------------------------------------------------------------------- - * Endianness of the platform producing the data - * - * @enum {number} - */ -export enum Endianness { - Little = 0, - Big = 1 -} - -/** - * These are stored in the flatbuffer in the Type union below - * - * @constructor - */ -export class Null { - bb: flatbuffers.ByteBuffer | null = null; - - bb_pos: number = 0; - /** - * @param number i - * @param flatbuffers.ByteBuffer bb - * @returns Null - */ - __init(i: number, bb: flatbuffers.ByteBuffer): Null { - this.bb_pos = i; - this.bb = bb; - return this; - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param Null= obj - * @returns Null - */ - static getRootAsNull(bb: flatbuffers.ByteBuffer, obj?: Null): Null { - return (obj || new Null()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param Null= obj - * @returns Null - */ - static getSizePrefixedRootAsNull(bb: flatbuffers.ByteBuffer, obj?: Null): Null { - bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); - return (obj || new Null()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * @param flatbuffers.Builder builder - */ - static startNull(builder: flatbuffers.Builder) { - builder.startObject(0); - } - - /** - * @param flatbuffers.Builder builder - * @returns flatbuffers.Offset - */ - static endNull(builder: flatbuffers.Builder): flatbuffers.Offset { - const offset = builder.endObject(); - return offset; - } - - static createNull(builder: flatbuffers.Builder): flatbuffers.Offset { - Null.startNull(builder); - return Null.endNull(builder); - } -} -/** - * A Struct_ in the flatbuffer metadata is the same as an Arrow Struct - * (according to the physical memory layout). We used Struct_ here as - * Struct is a reserved word in Flatbuffers - * - * @constructor - */ -export class Struct_ { - bb: flatbuffers.ByteBuffer | null = null; - - bb_pos: number = 0; - /** - * @param number i - * @param flatbuffers.ByteBuffer bb - * @returns Struct_ - */ - __init(i: number, bb: flatbuffers.ByteBuffer): Struct_ { - this.bb_pos = i; - this.bb = bb; - return this; - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param Struct_= obj - * @returns Struct_ - */ - static getRootAsStruct_(bb: flatbuffers.ByteBuffer, obj?: Struct_): Struct_ { - return (obj || new Struct_()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param Struct_= obj - * @returns Struct_ - */ - static getSizePrefixedRootAsStruct_(bb: flatbuffers.ByteBuffer, obj?: Struct_): Struct_ { - bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); - return (obj || new Struct_()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * @param flatbuffers.Builder builder - */ - static startStruct_(builder: flatbuffers.Builder) { - builder.startObject(0); - } - - /** - * @param flatbuffers.Builder builder - * @returns flatbuffers.Offset - */ - static endStruct_(builder: flatbuffers.Builder): flatbuffers.Offset { - const offset = builder.endObject(); - return offset; - } - - static createStruct_(builder: flatbuffers.Builder): flatbuffers.Offset { - Struct_.startStruct_(builder); - return Struct_.endStruct_(builder); - } -} -/** - * @constructor - */ -export class List { - bb: flatbuffers.ByteBuffer | null = null; - - bb_pos: number = 0; - /** - * @param number i - * @param flatbuffers.ByteBuffer bb - * @returns List - */ - __init(i: number, bb: flatbuffers.ByteBuffer): List { - this.bb_pos = i; - this.bb = bb; - return this; - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param List= obj - * @returns List - */ - static getRootAsList(bb: flatbuffers.ByteBuffer, obj?: List): List { - return (obj || new List()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param List= obj - * @returns List - */ - static getSizePrefixedRootAsList(bb: flatbuffers.ByteBuffer, obj?: List): List { - bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); - return (obj || new List()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * @param flatbuffers.Builder builder - */ - static startList(builder: flatbuffers.Builder) { - builder.startObject(0); - } - - /** - * @param flatbuffers.Builder builder - * @returns flatbuffers.Offset - */ - static endList(builder: flatbuffers.Builder): flatbuffers.Offset { - const offset = builder.endObject(); - return offset; - } - - static createList(builder: flatbuffers.Builder): flatbuffers.Offset { - List.startList(builder); - return List.endList(builder); - } -} -/** - * Same as List, but with 64-bit offsets, allowing to represent - * extremely large data values. - * - * @constructor - */ -export class LargeList { - bb: flatbuffers.ByteBuffer | null = null; - - bb_pos: number = 0; - /** - * @param number i - * @param flatbuffers.ByteBuffer bb - * @returns LargeList - */ - __init(i: number, bb: flatbuffers.ByteBuffer): LargeList { - this.bb_pos = i; - this.bb = bb; - return this; - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param LargeList= obj - * @returns LargeList - */ - static getRootAsLargeList(bb: flatbuffers.ByteBuffer, obj?: LargeList): LargeList { - return (obj || new LargeList()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param LargeList= obj - * @returns LargeList - */ - static getSizePrefixedRootAsLargeList(bb: flatbuffers.ByteBuffer, obj?: LargeList): LargeList { - bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); - return (obj || new LargeList()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * @param flatbuffers.Builder builder - */ - static startLargeList(builder: flatbuffers.Builder) { - builder.startObject(0); - } - - /** - * @param flatbuffers.Builder builder - * @returns flatbuffers.Offset - */ - static endLargeList(builder: flatbuffers.Builder): flatbuffers.Offset { - const offset = builder.endObject(); - return offset; - } - - static createLargeList(builder: flatbuffers.Builder): flatbuffers.Offset { - LargeList.startLargeList(builder); - return LargeList.endLargeList(builder); - } -} -/** - * @constructor - */ -export class FixedSizeList { - bb: flatbuffers.ByteBuffer | null = null; - - bb_pos: number = 0; - /** - * @param number i - * @param flatbuffers.ByteBuffer bb - * @returns FixedSizeList - */ - __init(i: number, bb: flatbuffers.ByteBuffer): FixedSizeList { - this.bb_pos = i; - this.bb = bb; - return this; - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param FixedSizeList= obj - * @returns FixedSizeList - */ - static getRootAsFixedSizeList(bb: flatbuffers.ByteBuffer, obj?: FixedSizeList): FixedSizeList { - return (obj || new FixedSizeList()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param FixedSizeList= obj - * @returns FixedSizeList - */ - static getSizePrefixedRootAsFixedSizeList(bb: flatbuffers.ByteBuffer, obj?: FixedSizeList): FixedSizeList { - bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); - return (obj || new FixedSizeList()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * Number of list items per value - * - * @returns number - */ - listSize(): number { - const offset = this.bb!.__offset(this.bb_pos, 4); - return offset ? this.bb!.readInt32(this.bb_pos + offset) : 0; - } - - /** - * @param flatbuffers.Builder builder - */ - static startFixedSizeList(builder: flatbuffers.Builder) { - builder.startObject(1); - } - - /** - * @param flatbuffers.Builder builder - * @param number listSize - */ - static addListSize(builder: flatbuffers.Builder, listSize: number) { - builder.addFieldInt32(0, listSize, 0); - } - - /** - * @param flatbuffers.Builder builder - * @returns flatbuffers.Offset - */ - static endFixedSizeList(builder: flatbuffers.Builder): flatbuffers.Offset { - const offset = builder.endObject(); - return offset; - } - - static createFixedSizeList(builder: flatbuffers.Builder, listSize: number): flatbuffers.Offset { - FixedSizeList.startFixedSizeList(builder); - FixedSizeList.addListSize(builder, listSize); - return FixedSizeList.endFixedSizeList(builder); - } -} -/** - * A Map is a logical nested type that is represented as - * - * List> - * - * In this layout, the keys and values are each respectively contiguous. We do - * not constrain the key and value types, so the application is responsible - * for ensuring that the keys are hashable and unique. Whether the keys are sorted - * may be set in the metadata for this field. - * - * In a field with Map type, the field has a child Struct field, which then - * has two children: key type and the second the value type. The names of the - * child fields may be respectively "entries", "key", and "value", but this is - * not enforced. - * - * Map - * - child[0] entries: Struct - * - child[0] key: K - * - child[1] value: V - * - * Neither the "entries" field nor the "key" field may be nullable. - * - * The metadata is structured so that Arrow systems without special handling - * for Map can make Map an alias for List. The "layout" attribute for the Map - * field must have the same contents as a List. - * - * @constructor - */ -export class Map { - bb: flatbuffers.ByteBuffer | null = null; - - bb_pos: number = 0; - /** - * @param number i - * @param flatbuffers.ByteBuffer bb - * @returns Map - */ - __init(i: number, bb: flatbuffers.ByteBuffer): Map { - this.bb_pos = i; - this.bb = bb; - return this; - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param Map= obj - * @returns Map - */ - static getRootAsMap(bb: flatbuffers.ByteBuffer, obj?: Map): Map { - return (obj || new Map()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param Map= obj - * @returns Map - */ - static getSizePrefixedRootAsMap(bb: flatbuffers.ByteBuffer, obj?: Map): Map { - bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); - return (obj || new Map()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * Set to true if the keys within each value are sorted - * - * @returns boolean - */ - keysSorted(): boolean { - const offset = this.bb!.__offset(this.bb_pos, 4); - return offset ? !!this.bb!.readInt8(this.bb_pos + offset) : false; - } - - /** - * @param flatbuffers.Builder builder - */ - static startMap(builder: flatbuffers.Builder) { - builder.startObject(1); - } - - /** - * @param flatbuffers.Builder builder - * @param boolean keysSorted - */ - static addKeysSorted(builder: flatbuffers.Builder, keysSorted: boolean) { - builder.addFieldInt8(0, +keysSorted, +false); - } - - /** - * @param flatbuffers.Builder builder - * @returns flatbuffers.Offset - */ - static endMap(builder: flatbuffers.Builder): flatbuffers.Offset { - const offset = builder.endObject(); - return offset; - } - - static createMap(builder: flatbuffers.Builder, keysSorted: boolean): flatbuffers.Offset { - Map.startMap(builder); - Map.addKeysSorted(builder, keysSorted); - return Map.endMap(builder); - } -} -/** - * A union is a complex type with children in Field - * By default ids in the type vector refer to the offsets in the children - * optionally typeIds provides an indirection between the child offset and the type id - * for each child typeIds[offset] is the id used in the type vector - * - * @constructor - */ -export class Union { - bb: flatbuffers.ByteBuffer | null = null; - - bb_pos: number = 0; - /** - * @param number i - * @param flatbuffers.ByteBuffer bb - * @returns Union - */ - __init(i: number, bb: flatbuffers.ByteBuffer): Union { - this.bb_pos = i; - this.bb = bb; - return this; - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param Union= obj - * @returns Union - */ - static getRootAsUnion(bb: flatbuffers.ByteBuffer, obj?: Union): Union { - return (obj || new Union()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param Union= obj - * @returns Union - */ - static getSizePrefixedRootAsUnion(bb: flatbuffers.ByteBuffer, obj?: Union): Union { - bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); - return (obj || new Union()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * @returns UnionMode - */ - mode(): UnionMode { - const offset = this.bb!.__offset(this.bb_pos, 4); - return offset ? /** */ (this.bb!.readInt16(this.bb_pos + offset)) : UnionMode.Sparse; - } - - /** - * @param number index - * @returns number - */ - typeIds(index: number): number | null { - const offset = this.bb!.__offset(this.bb_pos, 6); - return offset ? this.bb!.readInt32(this.bb!.__vector(this.bb_pos + offset) + index * 4) : 0; - } - - /** - * @returns number - */ - typeIdsLength(): number { - const offset = this.bb!.__offset(this.bb_pos, 6); - return offset ? this.bb!.__vector_len(this.bb_pos + offset) : 0; - } - - /** - * @returns Int32Array - */ - typeIdsArray(): Int32Array | null { - const offset = this.bb!.__offset(this.bb_pos, 6); - return offset ? new Int32Array(this.bb!.bytes().buffer, this.bb!.bytes().byteOffset + this.bb!.__vector(this.bb_pos + offset), this.bb!.__vector_len(this.bb_pos + offset)) : null; - } - - /** - * @param flatbuffers.Builder builder - */ - static startUnion(builder: flatbuffers.Builder) { - builder.startObject(2); - } - - /** - * @param flatbuffers.Builder builder - * @param UnionMode mode - */ - static addMode(builder: flatbuffers.Builder, mode: UnionMode) { - builder.addFieldInt16(0, mode, UnionMode.Sparse); - } - - /** - * @param flatbuffers.Builder builder - * @param flatbuffers.Offset typeIdsOffset - */ - static addTypeIds(builder: flatbuffers.Builder, typeIdsOffset: flatbuffers.Offset) { - builder.addFieldOffset(1, typeIdsOffset, 0); - } - - /** - * @param flatbuffers.Builder builder - * @param Array. data - * @returns flatbuffers.Offset - */ - static createTypeIdsVector(builder: flatbuffers.Builder, data: number[] | Int32Array): flatbuffers.Offset { - builder.startVector(4, data.length, 4); - for (let i = data.length - 1; i >= 0; i--) { - builder.addInt32(data[i]); - } - return builder.endVector(); - } - - /** - * @param flatbuffers.Builder builder - * @param number numElems - */ - static startTypeIdsVector(builder: flatbuffers.Builder, numElems: number) { - builder.startVector(4, numElems, 4); - } - - /** - * @param flatbuffers.Builder builder - * @returns flatbuffers.Offset - */ - static endUnion(builder: flatbuffers.Builder): flatbuffers.Offset { - const offset = builder.endObject(); - return offset; - } - - static createUnion(builder: flatbuffers.Builder, mode: UnionMode, typeIdsOffset: flatbuffers.Offset): flatbuffers.Offset { - Union.startUnion(builder); - Union.addMode(builder, mode); - Union.addTypeIds(builder, typeIdsOffset); - return Union.endUnion(builder); - } -} -/** - * @constructor - */ -export class Int { - bb: flatbuffers.ByteBuffer | null = null; - - bb_pos: number = 0; - /** - * @param number i - * @param flatbuffers.ByteBuffer bb - * @returns Int - */ - __init(i: number, bb: flatbuffers.ByteBuffer): Int { - this.bb_pos = i; - this.bb = bb; - return this; - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param Int= obj - * @returns Int - */ - static getRootAsInt(bb: flatbuffers.ByteBuffer, obj?: Int): Int { - return (obj || new Int()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param Int= obj - * @returns Int - */ - static getSizePrefixedRootAsInt(bb: flatbuffers.ByteBuffer, obj?: Int): Int { - bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); - return (obj || new Int()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * @returns number - */ - bitWidth(): number { - const offset = this.bb!.__offset(this.bb_pos, 4); - return offset ? this.bb!.readInt32(this.bb_pos + offset) : 0; - } - - /** - * @returns boolean - */ - isSigned(): boolean { - const offset = this.bb!.__offset(this.bb_pos, 6); - return offset ? !!this.bb!.readInt8(this.bb_pos + offset) : false; - } - - /** - * @param flatbuffers.Builder builder - */ - static startInt(builder: flatbuffers.Builder) { - builder.startObject(2); - } - - /** - * @param flatbuffers.Builder builder - * @param number bitWidth - */ - static addBitWidth(builder: flatbuffers.Builder, bitWidth: number) { - builder.addFieldInt32(0, bitWidth, 0); - } - - /** - * @param flatbuffers.Builder builder - * @param boolean isSigned - */ - static addIsSigned(builder: flatbuffers.Builder, isSigned: boolean) { - builder.addFieldInt8(1, +isSigned, +false); - } - - /** - * @param flatbuffers.Builder builder - * @returns flatbuffers.Offset - */ - static endInt(builder: flatbuffers.Builder): flatbuffers.Offset { - const offset = builder.endObject(); - return offset; - } - - static createInt(builder: flatbuffers.Builder, bitWidth: number, isSigned: boolean): flatbuffers.Offset { - Int.startInt(builder); - Int.addBitWidth(builder, bitWidth); - Int.addIsSigned(builder, isSigned); - return Int.endInt(builder); - } -} -/** - * @constructor - */ -export class FloatingPoint { - bb: flatbuffers.ByteBuffer | null = null; - - bb_pos: number = 0; - /** - * @param number i - * @param flatbuffers.ByteBuffer bb - * @returns FloatingPoint - */ - __init(i: number, bb: flatbuffers.ByteBuffer): FloatingPoint { - this.bb_pos = i; - this.bb = bb; - return this; - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param FloatingPoint= obj - * @returns FloatingPoint - */ - static getRootAsFloatingPoint(bb: flatbuffers.ByteBuffer, obj?: FloatingPoint): FloatingPoint { - return (obj || new FloatingPoint()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param FloatingPoint= obj - * @returns FloatingPoint - */ - static getSizePrefixedRootAsFloatingPoint(bb: flatbuffers.ByteBuffer, obj?: FloatingPoint): FloatingPoint { - bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); - return (obj || new FloatingPoint()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * @returns Precision - */ - precision(): Precision { - const offset = this.bb!.__offset(this.bb_pos, 4); - return offset ? /** */ (this.bb!.readInt16(this.bb_pos + offset)) : Precision.HALF; - } - - /** - * @param flatbuffers.Builder builder - */ - static startFloatingPoint(builder: flatbuffers.Builder) { - builder.startObject(1); - } - - /** - * @param flatbuffers.Builder builder - * @param Precision precision - */ - static addPrecision(builder: flatbuffers.Builder, precision: Precision) { - builder.addFieldInt16(0, precision, Precision.HALF); - } - - /** - * @param flatbuffers.Builder builder - * @returns flatbuffers.Offset - */ - static endFloatingPoint(builder: flatbuffers.Builder): flatbuffers.Offset { - const offset = builder.endObject(); - return offset; - } - - static createFloatingPoint(builder: flatbuffers.Builder, precision: Precision): flatbuffers.Offset { - FloatingPoint.startFloatingPoint(builder); - FloatingPoint.addPrecision(builder, precision); - return FloatingPoint.endFloatingPoint(builder); - } -} -/** - * Unicode with UTF-8 encoding - * - * @constructor - */ -export class Utf8 { - bb: flatbuffers.ByteBuffer | null = null; - - bb_pos: number = 0; - /** - * @param number i - * @param flatbuffers.ByteBuffer bb - * @returns Utf8 - */ - __init(i: number, bb: flatbuffers.ByteBuffer): Utf8 { - this.bb_pos = i; - this.bb = bb; - return this; - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param Utf8= obj - * @returns Utf8 - */ - static getRootAsUtf8(bb: flatbuffers.ByteBuffer, obj?: Utf8): Utf8 { - return (obj || new Utf8()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param Utf8= obj - * @returns Utf8 - */ - static getSizePrefixedRootAsUtf8(bb: flatbuffers.ByteBuffer, obj?: Utf8): Utf8 { - bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); - return (obj || new Utf8()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * @param flatbuffers.Builder builder - */ - static startUtf8(builder: flatbuffers.Builder) { - builder.startObject(0); - } - - /** - * @param flatbuffers.Builder builder - * @returns flatbuffers.Offset - */ - static endUtf8(builder: flatbuffers.Builder): flatbuffers.Offset { - const offset = builder.endObject(); - return offset; - } - - static createUtf8(builder: flatbuffers.Builder): flatbuffers.Offset { - Utf8.startUtf8(builder); - return Utf8.endUtf8(builder); - } -} -/** - * Opaque binary data - * - * @constructor - */ -export class Binary { - bb: flatbuffers.ByteBuffer | null = null; - - bb_pos: number = 0; - /** - * @param number i - * @param flatbuffers.ByteBuffer bb - * @returns Binary - */ - __init(i: number, bb: flatbuffers.ByteBuffer): Binary { - this.bb_pos = i; - this.bb = bb; - return this; - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param Binary= obj - * @returns Binary - */ - static getRootAsBinary(bb: flatbuffers.ByteBuffer, obj?: Binary): Binary { - return (obj || new Binary()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param Binary= obj - * @returns Binary - */ - static getSizePrefixedRootAsBinary(bb: flatbuffers.ByteBuffer, obj?: Binary): Binary { - bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); - return (obj || new Binary()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * @param flatbuffers.Builder builder - */ - static startBinary(builder: flatbuffers.Builder) { - builder.startObject(0); - } - - /** - * @param flatbuffers.Builder builder - * @returns flatbuffers.Offset - */ - static endBinary(builder: flatbuffers.Builder): flatbuffers.Offset { - const offset = builder.endObject(); - return offset; - } - - static createBinary(builder: flatbuffers.Builder): flatbuffers.Offset { - Binary.startBinary(builder); - return Binary.endBinary(builder); - } -} -/** - * Same as Utf8, but with 64-bit offsets, allowing to represent - * extremely large data values. - * - * @constructor - */ -export class LargeUtf8 { - bb: flatbuffers.ByteBuffer | null = null; - - bb_pos: number = 0; - /** - * @param number i - * @param flatbuffers.ByteBuffer bb - * @returns LargeUtf8 - */ - __init(i: number, bb: flatbuffers.ByteBuffer): LargeUtf8 { - this.bb_pos = i; - this.bb = bb; - return this; - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param LargeUtf8= obj - * @returns LargeUtf8 - */ - static getRootAsLargeUtf8(bb: flatbuffers.ByteBuffer, obj?: LargeUtf8): LargeUtf8 { - return (obj || new LargeUtf8()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param LargeUtf8= obj - * @returns LargeUtf8 - */ - static getSizePrefixedRootAsLargeUtf8(bb: flatbuffers.ByteBuffer, obj?: LargeUtf8): LargeUtf8 { - bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); - return (obj || new LargeUtf8()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * @param flatbuffers.Builder builder - */ - static startLargeUtf8(builder: flatbuffers.Builder) { - builder.startObject(0); - } - - /** - * @param flatbuffers.Builder builder - * @returns flatbuffers.Offset - */ - static endLargeUtf8(builder: flatbuffers.Builder): flatbuffers.Offset { - const offset = builder.endObject(); - return offset; - } - - static createLargeUtf8(builder: flatbuffers.Builder): flatbuffers.Offset { - LargeUtf8.startLargeUtf8(builder); - return LargeUtf8.endLargeUtf8(builder); - } -} -/** - * Same as Binary, but with 64-bit offsets, allowing to represent - * extremely large data values. - * - * @constructor - */ -export class LargeBinary { - bb: flatbuffers.ByteBuffer | null = null; - - bb_pos: number = 0; - /** - * @param number i - * @param flatbuffers.ByteBuffer bb - * @returns LargeBinary - */ - __init(i: number, bb: flatbuffers.ByteBuffer): LargeBinary { - this.bb_pos = i; - this.bb = bb; - return this; - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param LargeBinary= obj - * @returns LargeBinary - */ - static getRootAsLargeBinary(bb: flatbuffers.ByteBuffer, obj?: LargeBinary): LargeBinary { - return (obj || new LargeBinary()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param LargeBinary= obj - * @returns LargeBinary - */ - static getSizePrefixedRootAsLargeBinary(bb: flatbuffers.ByteBuffer, obj?: LargeBinary): LargeBinary { - bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); - return (obj || new LargeBinary()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * @param flatbuffers.Builder builder - */ - static startLargeBinary(builder: flatbuffers.Builder) { - builder.startObject(0); - } - - /** - * @param flatbuffers.Builder builder - * @returns flatbuffers.Offset - */ - static endLargeBinary(builder: flatbuffers.Builder): flatbuffers.Offset { - const offset = builder.endObject(); - return offset; - } - - static createLargeBinary(builder: flatbuffers.Builder): flatbuffers.Offset { - LargeBinary.startLargeBinary(builder); - return LargeBinary.endLargeBinary(builder); - } -} -/** - * @constructor - */ -export class FixedSizeBinary { - bb: flatbuffers.ByteBuffer | null = null; - - bb_pos: number = 0; - /** - * @param number i - * @param flatbuffers.ByteBuffer bb - * @returns FixedSizeBinary - */ - __init(i: number, bb: flatbuffers.ByteBuffer): FixedSizeBinary { - this.bb_pos = i; - this.bb = bb; - return this; - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param FixedSizeBinary= obj - * @returns FixedSizeBinary - */ - static getRootAsFixedSizeBinary(bb: flatbuffers.ByteBuffer, obj?: FixedSizeBinary): FixedSizeBinary { - return (obj || new FixedSizeBinary()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param FixedSizeBinary= obj - * @returns FixedSizeBinary - */ - static getSizePrefixedRootAsFixedSizeBinary(bb: flatbuffers.ByteBuffer, obj?: FixedSizeBinary): FixedSizeBinary { - bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); - return (obj || new FixedSizeBinary()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * Number of bytes per value - * - * @returns number - */ - byteWidth(): number { - const offset = this.bb!.__offset(this.bb_pos, 4); - return offset ? this.bb!.readInt32(this.bb_pos + offset) : 0; - } - - /** - * @param flatbuffers.Builder builder - */ - static startFixedSizeBinary(builder: flatbuffers.Builder) { - builder.startObject(1); - } - - /** - * @param flatbuffers.Builder builder - * @param number byteWidth - */ - static addByteWidth(builder: flatbuffers.Builder, byteWidth: number) { - builder.addFieldInt32(0, byteWidth, 0); - } - - /** - * @param flatbuffers.Builder builder - * @returns flatbuffers.Offset - */ - static endFixedSizeBinary(builder: flatbuffers.Builder): flatbuffers.Offset { - const offset = builder.endObject(); - return offset; - } - - static createFixedSizeBinary(builder: flatbuffers.Builder, byteWidth: number): flatbuffers.Offset { - FixedSizeBinary.startFixedSizeBinary(builder); - FixedSizeBinary.addByteWidth(builder, byteWidth); - return FixedSizeBinary.endFixedSizeBinary(builder); - } -} -/** - * @constructor - */ -export class Bool { - bb: flatbuffers.ByteBuffer | null = null; - - bb_pos: number = 0; - /** - * @param number i - * @param flatbuffers.ByteBuffer bb - * @returns Bool - */ - __init(i: number, bb: flatbuffers.ByteBuffer): Bool { - this.bb_pos = i; - this.bb = bb; - return this; - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param Bool= obj - * @returns Bool - */ - static getRootAsBool(bb: flatbuffers.ByteBuffer, obj?: Bool): Bool { - return (obj || new Bool()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param Bool= obj - * @returns Bool - */ - static getSizePrefixedRootAsBool(bb: flatbuffers.ByteBuffer, obj?: Bool): Bool { - bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); - return (obj || new Bool()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * @param flatbuffers.Builder builder - */ - static startBool(builder: flatbuffers.Builder) { - builder.startObject(0); - } - - /** - * @param flatbuffers.Builder builder - * @returns flatbuffers.Offset - */ - static endBool(builder: flatbuffers.Builder): flatbuffers.Offset { - const offset = builder.endObject(); - return offset; - } - - static createBool(builder: flatbuffers.Builder): flatbuffers.Offset { - Bool.startBool(builder); - return Bool.endBool(builder); - } -} -/** - * Exact decimal value represented as an integer value in two's - * complement. Currently only 128-bit (16-byte) and 256-bit (32-byte) integers - * are used. The representation uses the endianness indicated - * in the Schema. - * - * @constructor - */ -export class Decimal { - bb: flatbuffers.ByteBuffer | null = null; - - bb_pos: number = 0; - /** - * @param number i - * @param flatbuffers.ByteBuffer bb - * @returns Decimal - */ - __init(i: number, bb: flatbuffers.ByteBuffer): Decimal { - this.bb_pos = i; - this.bb = bb; - return this; - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param Decimal= obj - * @returns Decimal - */ - static getRootAsDecimal(bb: flatbuffers.ByteBuffer, obj?: Decimal): Decimal { - return (obj || new Decimal()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param Decimal= obj - * @returns Decimal - */ - static getSizePrefixedRootAsDecimal(bb: flatbuffers.ByteBuffer, obj?: Decimal): Decimal { - bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); - return (obj || new Decimal()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * Total number of decimal digits - * - * @returns number - */ - precision(): number { - const offset = this.bb!.__offset(this.bb_pos, 4); - return offset ? this.bb!.readInt32(this.bb_pos + offset) : 0; - } - - /** - * Number of digits after the decimal point "." - * - * @returns number - */ - scale(): number { - const offset = this.bb!.__offset(this.bb_pos, 6); - return offset ? this.bb!.readInt32(this.bb_pos + offset) : 0; - } - - /** - * Number of bits per value. The only accepted widths are 128 and 256. - * We use bitWidth for consistency with Int::bitWidth. - * - * @returns number - */ - bitWidth(): number { - const offset = this.bb!.__offset(this.bb_pos, 8); - return offset ? this.bb!.readInt32(this.bb_pos + offset) : 128; - } - - /** - * @param flatbuffers.Builder builder - */ - static startDecimal(builder: flatbuffers.Builder) { - builder.startObject(3); - } - - /** - * @param flatbuffers.Builder builder - * @param number precision - */ - static addPrecision(builder: flatbuffers.Builder, precision: number) { - builder.addFieldInt32(0, precision, 0); - } - - /** - * @param flatbuffers.Builder builder - * @param number scale - */ - static addScale(builder: flatbuffers.Builder, scale: number) { - builder.addFieldInt32(1, scale, 0); - } - - /** - * @param flatbuffers.Builder builder - * @param number bitWidth - */ - static addBitWidth(builder: flatbuffers.Builder, bitWidth: number) { - builder.addFieldInt32(2, bitWidth, 128); - } - - /** - * @param flatbuffers.Builder builder - * @returns flatbuffers.Offset - */ - static endDecimal(builder: flatbuffers.Builder): flatbuffers.Offset { - const offset = builder.endObject(); - return offset; - } - - static createDecimal(builder: flatbuffers.Builder, precision: number, scale: number, bitWidth: number): flatbuffers.Offset { - Decimal.startDecimal(builder); - Decimal.addPrecision(builder, precision); - Decimal.addScale(builder, scale); - Decimal.addBitWidth(builder, bitWidth); - return Decimal.endDecimal(builder); - } -} -/** - * Date is either a 32-bit or 64-bit type representing elapsed time since UNIX - * epoch (1970-01-01), stored in either of two units: - * - * * Milliseconds (64 bits) indicating UNIX time elapsed since the epoch (no - * leap seconds), where the values are evenly divisible by 86400000 - * * Days (32 bits) since the UNIX epoch - * - * @constructor - */ -export class Date { - bb: flatbuffers.ByteBuffer | null = null; - - bb_pos: number = 0; - /** - * @param number i - * @param flatbuffers.ByteBuffer bb - * @returns Date - */ - __init(i: number, bb: flatbuffers.ByteBuffer): Date { - this.bb_pos = i; - this.bb = bb; - return this; - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param Date= obj - * @returns Date - */ - static getRootAsDate(bb: flatbuffers.ByteBuffer, obj?: Date): Date { - return (obj || new Date()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param Date= obj - * @returns Date - */ - static getSizePrefixedRootAsDate(bb: flatbuffers.ByteBuffer, obj?: Date): Date { - bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); - return (obj || new Date()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * @returns DateUnit - */ - unit(): DateUnit { - const offset = this.bb!.__offset(this.bb_pos, 4); - return offset ? /** */ (this.bb!.readInt16(this.bb_pos + offset)) : DateUnit.MILLISECOND; - } - - /** - * @param flatbuffers.Builder builder - */ - static startDate(builder: flatbuffers.Builder) { - builder.startObject(1); - } - - /** - * @param flatbuffers.Builder builder - * @param DateUnit unit - */ - static addUnit(builder: flatbuffers.Builder, unit: DateUnit) { - builder.addFieldInt16(0, unit, DateUnit.MILLISECOND); - } - - /** - * @param flatbuffers.Builder builder - * @returns flatbuffers.Offset - */ - static endDate(builder: flatbuffers.Builder): flatbuffers.Offset { - const offset = builder.endObject(); - return offset; - } - - static createDate(builder: flatbuffers.Builder, unit: DateUnit): flatbuffers.Offset { - Date.startDate(builder); - Date.addUnit(builder, unit); - return Date.endDate(builder); - } -} -/** - * Time type. The physical storage type depends on the unit - * - SECOND and MILLISECOND: 32 bits - * - MICROSECOND and NANOSECOND: 64 bits - * - * @constructor - */ -export class Time { - bb: flatbuffers.ByteBuffer | null = null; - - bb_pos: number = 0; - /** - * @param number i - * @param flatbuffers.ByteBuffer bb - * @returns Time - */ - __init(i: number, bb: flatbuffers.ByteBuffer): Time { - this.bb_pos = i; - this.bb = bb; - return this; - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param Time= obj - * @returns Time - */ - static getRootAsTime(bb: flatbuffers.ByteBuffer, obj?: Time): Time { - return (obj || new Time()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param Time= obj - * @returns Time - */ - static getSizePrefixedRootAsTime(bb: flatbuffers.ByteBuffer, obj?: Time): Time { - bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); - return (obj || new Time()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * @returns TimeUnit - */ - unit(): TimeUnit { - const offset = this.bb!.__offset(this.bb_pos, 4); - return offset ? /** */ (this.bb!.readInt16(this.bb_pos + offset)) : TimeUnit.MILLISECOND; - } - - /** - * @returns number - */ - bitWidth(): number { - const offset = this.bb!.__offset(this.bb_pos, 6); - return offset ? this.bb!.readInt32(this.bb_pos + offset) : 32; - } - - /** - * @param flatbuffers.Builder builder - */ - static startTime(builder: flatbuffers.Builder) { - builder.startObject(2); - } - - /** - * @param flatbuffers.Builder builder - * @param TimeUnit unit - */ - static addUnit(builder: flatbuffers.Builder, unit: TimeUnit) { - builder.addFieldInt16(0, unit, TimeUnit.MILLISECOND); - } - - /** - * @param flatbuffers.Builder builder - * @param number bitWidth - */ - static addBitWidth(builder: flatbuffers.Builder, bitWidth: number) { - builder.addFieldInt32(1, bitWidth, 32); - } - - /** - * @param flatbuffers.Builder builder - * @returns flatbuffers.Offset - */ - static endTime(builder: flatbuffers.Builder): flatbuffers.Offset { - const offset = builder.endObject(); - return offset; - } - - static createTime(builder: flatbuffers.Builder, unit: TimeUnit, bitWidth: number): flatbuffers.Offset { - Time.startTime(builder); - Time.addUnit(builder, unit); - Time.addBitWidth(builder, bitWidth); - return Time.endTime(builder); - } -} -/** - * Time elapsed from the Unix epoch, 00:00:00.000 on 1 January 1970, excluding - * leap seconds, as a 64-bit integer. Note that UNIX time does not include - * leap seconds. - * - * The Timestamp metadata supports both "time zone naive" and "time zone - * aware" timestamps. Read about the timezone attribute for more detail - * - * @constructor - */ -export class Timestamp { - bb: flatbuffers.ByteBuffer | null = null; - - bb_pos: number = 0; - /** - * @param number i - * @param flatbuffers.ByteBuffer bb - * @returns Timestamp - */ - __init(i: number, bb: flatbuffers.ByteBuffer): Timestamp { - this.bb_pos = i; - this.bb = bb; - return this; - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param Timestamp= obj - * @returns Timestamp - */ - static getRootAsTimestamp(bb: flatbuffers.ByteBuffer, obj?: Timestamp): Timestamp { - return (obj || new Timestamp()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param Timestamp= obj - * @returns Timestamp - */ - static getSizePrefixedRootAsTimestamp(bb: flatbuffers.ByteBuffer, obj?: Timestamp): Timestamp { - bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); - return (obj || new Timestamp()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * @returns TimeUnit - */ - unit(): TimeUnit { - const offset = this.bb!.__offset(this.bb_pos, 4); - return offset ? /** */ (this.bb!.readInt16(this.bb_pos + offset)) : TimeUnit.SECOND; - } - - /** - * The time zone is a string indicating the name of a time zone, one of: - * - * * As used in the Olson time zone database (the "tz database" or - * "tzdata"), such as "America/New_York" - * * An absolute time zone offset of the form +XX:XX or -XX:XX, such as +07:30 - * - * Whether a timezone string is present indicates different semantics about - * the data: - * - * * If the time zone is null or equal to an empty string, the data is "time - * zone naive" and shall be displayed *as is* to the user, not localized - * to the locale of the user. This data can be though of as UTC but - * without having "UTC" as the time zone, it is not considered to be - * localized to any time zone - * - * * If the time zone is set to a valid value, values can be displayed as - * "localized" to that time zone, even though the underlying 64-bit - * integers are identical to the same data stored in UTC. Converting - * between time zones is a metadata-only operation and does not change the - * underlying values - * - * @param flatbuffers.Encoding= optionalEncoding - * @returns string|Uint8Array|null - */ - timezone(): string | null; - timezone(optionalEncoding: flatbuffers.Encoding): string | Uint8Array | null; - timezone(optionalEncoding?: any): string | Uint8Array | null { - const offset = this.bb!.__offset(this.bb_pos, 6); - return offset ? this.bb!.__string(this.bb_pos + offset, optionalEncoding) : null; - } - - /** - * @param flatbuffers.Builder builder - */ - static startTimestamp(builder: flatbuffers.Builder) { - builder.startObject(2); - } - - /** - * @param flatbuffers.Builder builder - * @param TimeUnit unit - */ - static addUnit(builder: flatbuffers.Builder, unit: TimeUnit) { - builder.addFieldInt16(0, unit, TimeUnit.SECOND); - } - - /** - * @param flatbuffers.Builder builder - * @param flatbuffers.Offset timezoneOffset - */ - static addTimezone(builder: flatbuffers.Builder, timezoneOffset: flatbuffers.Offset) { - builder.addFieldOffset(1, timezoneOffset, 0); - } - - /** - * @param flatbuffers.Builder builder - * @returns flatbuffers.Offset - */ - static endTimestamp(builder: flatbuffers.Builder): flatbuffers.Offset { - const offset = builder.endObject(); - return offset; - } - - static createTimestamp(builder: flatbuffers.Builder, unit: TimeUnit, timezoneOffset: flatbuffers.Offset): flatbuffers.Offset { - Timestamp.startTimestamp(builder); - Timestamp.addUnit(builder, unit); - Timestamp.addTimezone(builder, timezoneOffset); - return Timestamp.endTimestamp(builder); - } -} -/** - * @constructor - */ -export class Interval { - bb: flatbuffers.ByteBuffer | null = null; - - bb_pos: number = 0; - /** - * @param number i - * @param flatbuffers.ByteBuffer bb - * @returns Interval - */ - __init(i: number, bb: flatbuffers.ByteBuffer): Interval { - this.bb_pos = i; - this.bb = bb; - return this; - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param Interval= obj - * @returns Interval - */ - static getRootAsInterval(bb: flatbuffers.ByteBuffer, obj?: Interval): Interval { - return (obj || new Interval()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param Interval= obj - * @returns Interval - */ - static getSizePrefixedRootAsInterval(bb: flatbuffers.ByteBuffer, obj?: Interval): Interval { - bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); - return (obj || new Interval()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * @returns IntervalUnit - */ - unit(): IntervalUnit { - const offset = this.bb!.__offset(this.bb_pos, 4); - return offset ? /** */ (this.bb!.readInt16(this.bb_pos + offset)) : IntervalUnit.YEAR_MONTH; - } - - /** - * @param flatbuffers.Builder builder - */ - static startInterval(builder: flatbuffers.Builder) { - builder.startObject(1); - } - - /** - * @param flatbuffers.Builder builder - * @param IntervalUnit unit - */ - static addUnit(builder: flatbuffers.Builder, unit: IntervalUnit) { - builder.addFieldInt16(0, unit, IntervalUnit.YEAR_MONTH); - } - - /** - * @param flatbuffers.Builder builder - * @returns flatbuffers.Offset - */ - static endInterval(builder: flatbuffers.Builder): flatbuffers.Offset { - const offset = builder.endObject(); - return offset; - } - - static createInterval(builder: flatbuffers.Builder, unit: IntervalUnit): flatbuffers.Offset { - Interval.startInterval(builder); - Interval.addUnit(builder, unit); - return Interval.endInterval(builder); - } -} -/** - * @constructor - */ -export class Duration { - bb: flatbuffers.ByteBuffer | null = null; - - bb_pos: number = 0; - /** - * @param number i - * @param flatbuffers.ByteBuffer bb - * @returns Duration - */ - __init(i: number, bb: flatbuffers.ByteBuffer): Duration { - this.bb_pos = i; - this.bb = bb; - return this; - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param Duration= obj - * @returns Duration - */ - static getRootAsDuration(bb: flatbuffers.ByteBuffer, obj?: Duration): Duration { - return (obj || new Duration()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param Duration= obj - * @returns Duration - */ - static getSizePrefixedRootAsDuration(bb: flatbuffers.ByteBuffer, obj?: Duration): Duration { - bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); - return (obj || new Duration()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * @returns TimeUnit - */ - unit(): TimeUnit { - const offset = this.bb!.__offset(this.bb_pos, 4); - return offset ? /** */ (this.bb!.readInt16(this.bb_pos + offset)) : TimeUnit.MILLISECOND; - } - - /** - * @param flatbuffers.Builder builder - */ - static startDuration(builder: flatbuffers.Builder) { - builder.startObject(1); - } - - /** - * @param flatbuffers.Builder builder - * @param TimeUnit unit - */ - static addUnit(builder: flatbuffers.Builder, unit: TimeUnit) { - builder.addFieldInt16(0, unit, TimeUnit.MILLISECOND); - } - - /** - * @param flatbuffers.Builder builder - * @returns flatbuffers.Offset - */ - static endDuration(builder: flatbuffers.Builder): flatbuffers.Offset { - const offset = builder.endObject(); - return offset; - } - - static createDuration(builder: flatbuffers.Builder, unit: TimeUnit): flatbuffers.Offset { - Duration.startDuration(builder); - Duration.addUnit(builder, unit); - return Duration.endDuration(builder); - } -} -/** - * ---------------------------------------------------------------------- - * user defined key value pairs to add custom metadata to arrow - * key namespacing is the responsibility of the user - * - * @constructor - */ -export class KeyValue { - bb: flatbuffers.ByteBuffer | null = null; - - bb_pos: number = 0; - /** - * @param number i - * @param flatbuffers.ByteBuffer bb - * @returns KeyValue - */ - __init(i: number, bb: flatbuffers.ByteBuffer): KeyValue { - this.bb_pos = i; - this.bb = bb; - return this; - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param KeyValue= obj - * @returns KeyValue - */ - static getRootAsKeyValue(bb: flatbuffers.ByteBuffer, obj?: KeyValue): KeyValue { - return (obj || new KeyValue()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param KeyValue= obj - * @returns KeyValue - */ - static getSizePrefixedRootAsKeyValue(bb: flatbuffers.ByteBuffer, obj?: KeyValue): KeyValue { - bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); - return (obj || new KeyValue()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * @param flatbuffers.Encoding= optionalEncoding - * @returns string|Uint8Array|null - */ - key(): string | null; - key(optionalEncoding: flatbuffers.Encoding): string | Uint8Array | null; - key(optionalEncoding?: any): string | Uint8Array | null { - const offset = this.bb!.__offset(this.bb_pos, 4); - return offset ? this.bb!.__string(this.bb_pos + offset, optionalEncoding) : null; - } - - /** - * @param flatbuffers.Encoding= optionalEncoding - * @returns string|Uint8Array|null - */ - value(): string | null; - value(optionalEncoding: flatbuffers.Encoding): string | Uint8Array | null; - value(optionalEncoding?: any): string | Uint8Array | null { - const offset = this.bb!.__offset(this.bb_pos, 6); - return offset ? this.bb!.__string(this.bb_pos + offset, optionalEncoding) : null; - } - - /** - * @param flatbuffers.Builder builder - */ - static startKeyValue(builder: flatbuffers.Builder) { - builder.startObject(2); - } - - /** - * @param flatbuffers.Builder builder - * @param flatbuffers.Offset keyOffset - */ - static addKey(builder: flatbuffers.Builder, keyOffset: flatbuffers.Offset) { - builder.addFieldOffset(0, keyOffset, 0); - } - - /** - * @param flatbuffers.Builder builder - * @param flatbuffers.Offset valueOffset - */ - static addValue(builder: flatbuffers.Builder, valueOffset: flatbuffers.Offset) { - builder.addFieldOffset(1, valueOffset, 0); - } - - /** - * @param flatbuffers.Builder builder - * @returns flatbuffers.Offset - */ - static endKeyValue(builder: flatbuffers.Builder): flatbuffers.Offset { - const offset = builder.endObject(); - return offset; - } - - static createKeyValue(builder: flatbuffers.Builder, keyOffset: flatbuffers.Offset, valueOffset: flatbuffers.Offset): flatbuffers.Offset { - KeyValue.startKeyValue(builder); - KeyValue.addKey(builder, keyOffset); - KeyValue.addValue(builder, valueOffset); - return KeyValue.endKeyValue(builder); - } -} -/** - * @constructor - */ -export class DictionaryEncoding { - bb: flatbuffers.ByteBuffer | null = null; - - bb_pos: number = 0; - /** - * @param number i - * @param flatbuffers.ByteBuffer bb - * @returns DictionaryEncoding - */ - __init(i: number, bb: flatbuffers.ByteBuffer): DictionaryEncoding { - this.bb_pos = i; - this.bb = bb; - return this; - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param DictionaryEncoding= obj - * @returns DictionaryEncoding - */ - static getRootAsDictionaryEncoding(bb: flatbuffers.ByteBuffer, obj?: DictionaryEncoding): DictionaryEncoding { - return (obj || new DictionaryEncoding()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param DictionaryEncoding= obj - * @returns DictionaryEncoding - */ - static getSizePrefixedRootAsDictionaryEncoding(bb: flatbuffers.ByteBuffer, obj?: DictionaryEncoding): DictionaryEncoding { - bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); - return (obj || new DictionaryEncoding()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * The known dictionary id in the application where this data is used. In - * the file or streaming formats, the dictionary ids are found in the - * DictionaryBatch messages - * - * @returns flatbuffers.Long - */ - id(): flatbuffers.Long { - const offset = this.bb!.__offset(this.bb_pos, 4); - return offset ? this.bb!.readInt64(this.bb_pos + offset) : this.bb!.createLong(0, 0); - } - - /** - * The dictionary indices are constrained to be non-negative integers. If - * this field is null, the indices must be signed int32. To maximize - * cross-language compatibility and performance, implementations are - * recommended to prefer signed integer types over unsigned integer types - * and to avoid uint64 indices unless they are required by an application. - * - * @param Int= obj - * @returns Int|null - */ - indexType(obj?: Int): Int | null { - const offset = this.bb!.__offset(this.bb_pos, 6); - return offset ? (obj || new Int()).__init(this.bb!.__indirect(this.bb_pos + offset), this.bb!) : null; - } - - /** - * By default, dictionaries are not ordered, or the order does not have - * semantic meaning. In some statistical, applications, dictionary-encoding - * is used to represent ordered categorical data, and we provide a way to - * preserve that metadata here - * - * @returns boolean - */ - isOrdered(): boolean { - const offset = this.bb!.__offset(this.bb_pos, 8); - return offset ? !!this.bb!.readInt8(this.bb_pos + offset) : false; - } - - /** - * @returns DictionaryKind - */ - dictionaryKind(): DictionaryKind { - const offset = this.bb!.__offset(this.bb_pos, 10); - return offset ? /** */ (this.bb!.readInt16(this.bb_pos + offset)) : DictionaryKind.DenseArray; - } - - /** - * @param flatbuffers.Builder builder - */ - static startDictionaryEncoding(builder: flatbuffers.Builder) { - builder.startObject(4); - } - - /** - * @param flatbuffers.Builder builder - * @param flatbuffers.Long id - */ - static addId(builder: flatbuffers.Builder, id: flatbuffers.Long) { - builder.addFieldInt64(0, id, builder.createLong(0, 0)); - } - - /** - * @param flatbuffers.Builder builder - * @param flatbuffers.Offset indexTypeOffset - */ - static addIndexType(builder: flatbuffers.Builder, indexTypeOffset: flatbuffers.Offset) { - builder.addFieldOffset(1, indexTypeOffset, 0); - } - - /** - * @param flatbuffers.Builder builder - * @param boolean isOrdered - */ - static addIsOrdered(builder: flatbuffers.Builder, isOrdered: boolean) { - builder.addFieldInt8(2, +isOrdered, +false); - } - - /** - * @param flatbuffers.Builder builder - * @param DictionaryKind dictionaryKind - */ - static addDictionaryKind(builder: flatbuffers.Builder, dictionaryKind: DictionaryKind) { - builder.addFieldInt16(3, dictionaryKind, DictionaryKind.DenseArray); - } - - /** - * @param flatbuffers.Builder builder - * @returns flatbuffers.Offset - */ - static endDictionaryEncoding(builder: flatbuffers.Builder): flatbuffers.Offset { - const offset = builder.endObject(); - return offset; - } - - static createDictionaryEncoding(builder: flatbuffers.Builder, id: flatbuffers.Long, indexTypeOffset: flatbuffers.Offset, isOrdered: boolean, dictionaryKind: DictionaryKind): flatbuffers.Offset { - DictionaryEncoding.startDictionaryEncoding(builder); - DictionaryEncoding.addId(builder, id); - DictionaryEncoding.addIndexType(builder, indexTypeOffset); - DictionaryEncoding.addIsOrdered(builder, isOrdered); - DictionaryEncoding.addDictionaryKind(builder, dictionaryKind); - return DictionaryEncoding.endDictionaryEncoding(builder); - } -} -/** - * ---------------------------------------------------------------------- - * A field represents a named column in a record / row batch or child of a - * nested type. - * - * @constructor - */ -export class Field { - bb: flatbuffers.ByteBuffer | null = null; - - bb_pos: number = 0; - /** - * @param number i - * @param flatbuffers.ByteBuffer bb - * @returns Field - */ - __init(i: number, bb: flatbuffers.ByteBuffer): Field { - this.bb_pos = i; - this.bb = bb; - return this; - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param Field= obj - * @returns Field - */ - static getRootAsField(bb: flatbuffers.ByteBuffer, obj?: Field): Field { - return (obj || new Field()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param Field= obj - * @returns Field - */ - static getSizePrefixedRootAsField(bb: flatbuffers.ByteBuffer, obj?: Field): Field { - bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); - return (obj || new Field()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * Name is not required, in i.e. a List - * - * @param flatbuffers.Encoding= optionalEncoding - * @returns string|Uint8Array|null - */ - name(): string | null; - name(optionalEncoding: flatbuffers.Encoding): string | Uint8Array | null; - name(optionalEncoding?: any): string | Uint8Array | null { - const offset = this.bb!.__offset(this.bb_pos, 4); - return offset ? this.bb!.__string(this.bb_pos + offset, optionalEncoding) : null; - } - - /** - * Whether or not this field can contain nulls. Should be true in general. - * - * @returns boolean - */ - nullable(): boolean { - const offset = this.bb!.__offset(this.bb_pos, 6); - return offset ? !!this.bb!.readInt8(this.bb_pos + offset) : false; - } - - /** - * @returns Type - */ - typeType(): Type { - const offset = this.bb!.__offset(this.bb_pos, 8); - return offset ? /** */ (this.bb!.readUint8(this.bb_pos + offset)) : Type.NONE; - } - - /** - * This is the type of the decoded value if the field is dictionary encoded. - * - * @param flatbuffers.Table obj - * @returns ?flatbuffers.Table - */ - type(obj: T): T | null { - const offset = this.bb!.__offset(this.bb_pos, 10); - return offset ? this.bb!.__union(obj, this.bb_pos + offset) : null; - } - - /** - * Present only if the field is dictionary encoded. - * - * @param DictionaryEncoding= obj - * @returns DictionaryEncoding|null - */ - dictionary(obj?: DictionaryEncoding): DictionaryEncoding | null { - const offset = this.bb!.__offset(this.bb_pos, 12); - return offset ? (obj || new DictionaryEncoding()).__init(this.bb!.__indirect(this.bb_pos + offset), this.bb!) : null; - } - - /** - * children apply only to nested data types like Struct, List and Union. For - * primitive types children will have length 0. - * - * @param number index - * @param Field= obj - * @returns Field - */ - children(index: number, obj?: Field): Field | null { - const offset = this.bb!.__offset(this.bb_pos, 14); - return offset ? (obj || new Field()).__init(this.bb!.__indirect(this.bb!.__vector(this.bb_pos + offset) + index * 4), this.bb!) : null; - } - - /** - * @returns number - */ - childrenLength(): number { - const offset = this.bb!.__offset(this.bb_pos, 14); - return offset ? this.bb!.__vector_len(this.bb_pos + offset) : 0; - } - - /** - * User-defined metadata - * - * @param number index - * @param KeyValue= obj - * @returns KeyValue - */ - customMetadata(index: number, obj?: KeyValue): KeyValue | null { - const offset = this.bb!.__offset(this.bb_pos, 16); - return offset ? (obj || new KeyValue()).__init(this.bb!.__indirect(this.bb!.__vector(this.bb_pos + offset) + index * 4), this.bb!) : null; - } - - /** - * @returns number - */ - customMetadataLength(): number { - const offset = this.bb!.__offset(this.bb_pos, 16); - return offset ? this.bb!.__vector_len(this.bb_pos + offset) : 0; - } - - /** - * @param flatbuffers.Builder builder - */ - static startField(builder: flatbuffers.Builder) { - builder.startObject(7); - } - - /** - * @param flatbuffers.Builder builder - * @param flatbuffers.Offset nameOffset - */ - static addName(builder: flatbuffers.Builder, nameOffset: flatbuffers.Offset) { - builder.addFieldOffset(0, nameOffset, 0); - } - - /** - * @param flatbuffers.Builder builder - * @param boolean nullable - */ - static addNullable(builder: flatbuffers.Builder, nullable: boolean) { - builder.addFieldInt8(1, +nullable, +false); - } - - /** - * @param flatbuffers.Builder builder - * @param Type typeType - */ - static addTypeType(builder: flatbuffers.Builder, typeType: Type) { - builder.addFieldInt8(2, typeType, Type.NONE); - } - - /** - * @param flatbuffers.Builder builder - * @param flatbuffers.Offset typeOffset - */ - static addType(builder: flatbuffers.Builder, typeOffset: flatbuffers.Offset) { - builder.addFieldOffset(3, typeOffset, 0); - } - - /** - * @param flatbuffers.Builder builder - * @param flatbuffers.Offset dictionaryOffset - */ - static addDictionary(builder: flatbuffers.Builder, dictionaryOffset: flatbuffers.Offset) { - builder.addFieldOffset(4, dictionaryOffset, 0); - } - - /** - * @param flatbuffers.Builder builder - * @param flatbuffers.Offset childrenOffset - */ - static addChildren(builder: flatbuffers.Builder, childrenOffset: flatbuffers.Offset) { - builder.addFieldOffset(5, childrenOffset, 0); - } - - /** - * @param flatbuffers.Builder builder - * @param Array. data - * @returns flatbuffers.Offset - */ - static createChildrenVector(builder: flatbuffers.Builder, data: flatbuffers.Offset[]): flatbuffers.Offset { - builder.startVector(4, data.length, 4); - for (let i = data.length - 1; i >= 0; i--) { - builder.addOffset(data[i]); - } - return builder.endVector(); - } - - /** - * @param flatbuffers.Builder builder - * @param number numElems - */ - static startChildrenVector(builder: flatbuffers.Builder, numElems: number) { - builder.startVector(4, numElems, 4); - } - - /** - * @param flatbuffers.Builder builder - * @param flatbuffers.Offset customMetadataOffset - */ - static addCustomMetadata(builder: flatbuffers.Builder, customMetadataOffset: flatbuffers.Offset) { - builder.addFieldOffset(6, customMetadataOffset, 0); - } - - /** - * @param flatbuffers.Builder builder - * @param Array. data - * @returns flatbuffers.Offset - */ - static createCustomMetadataVector(builder: flatbuffers.Builder, data: flatbuffers.Offset[]): flatbuffers.Offset { - builder.startVector(4, data.length, 4); - for (let i = data.length - 1; i >= 0; i--) { - builder.addOffset(data[i]); - } - return builder.endVector(); - } - - /** - * @param flatbuffers.Builder builder - * @param number numElems - */ - static startCustomMetadataVector(builder: flatbuffers.Builder, numElems: number) { - builder.startVector(4, numElems, 4); - } - - /** - * @param flatbuffers.Builder builder - * @returns flatbuffers.Offset - */ - static endField(builder: flatbuffers.Builder): flatbuffers.Offset { - const offset = builder.endObject(); - return offset; - } - - static createField(builder: flatbuffers.Builder, nameOffset: flatbuffers.Offset, nullable: boolean, typeType: Type, typeOffset: flatbuffers.Offset, dictionaryOffset: flatbuffers.Offset, childrenOffset: flatbuffers.Offset, customMetadataOffset: flatbuffers.Offset): flatbuffers.Offset { - Field.startField(builder); - Field.addName(builder, nameOffset); - Field.addNullable(builder, nullable); - Field.addTypeType(builder, typeType); - Field.addType(builder, typeOffset); - Field.addDictionary(builder, dictionaryOffset); - Field.addChildren(builder, childrenOffset); - Field.addCustomMetadata(builder, customMetadataOffset); - return Field.endField(builder); - } -} -/** - * ---------------------------------------------------------------------- - * A Buffer represents a single contiguous memory segment - * - * @constructor - */ -export class Buffer { - bb: flatbuffers.ByteBuffer | null = null; - - bb_pos: number = 0; - /** - * @param number i - * @param flatbuffers.ByteBuffer bb - * @returns Buffer - */ - __init(i: number, bb: flatbuffers.ByteBuffer): Buffer { - this.bb_pos = i; - this.bb = bb; - return this; - } - - /** - * The relative offset into the shared memory page where the bytes for this - * buffer starts - * - * @returns flatbuffers.Long - */ - offset(): flatbuffers.Long { - return this.bb!.readInt64(this.bb_pos); - } - - /** - * The absolute length (in bytes) of the memory buffer. The memory is found - * from offset (inclusive) to offset + length (non-inclusive). When building - * messages using the encapsulated IPC message, padding bytes may be written - * after a buffer, but such padding bytes do not need to be accounted for in - * the size here. - * - * @returns flatbuffers.Long - */ - length(): flatbuffers.Long { - return this.bb!.readInt64(this.bb_pos + 8); - } - - /** - * @param flatbuffers.Builder builder - * @param flatbuffers.Long offset - * @param flatbuffers.Long length - * @returns flatbuffers.Offset - */ - static createBuffer(builder: flatbuffers.Builder, offset: flatbuffers.Long, length: flatbuffers.Long): flatbuffers.Offset { - builder.prep(8, 16); - builder.writeInt64(length); - builder.writeInt64(offset); - return builder.offset(); - } - -} -/** - * ---------------------------------------------------------------------- - * A Schema describes the columns in a row batch - * - * @constructor - */ -export class Schema { - bb: flatbuffers.ByteBuffer | null = null; - - bb_pos: number = 0; - /** - * @param number i - * @param flatbuffers.ByteBuffer bb - * @returns Schema - */ - __init(i: number, bb: flatbuffers.ByteBuffer): Schema { - this.bb_pos = i; - this.bb = bb; - return this; - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param Schema= obj - * @returns Schema - */ - static getRootAsSchema(bb: flatbuffers.ByteBuffer, obj?: Schema): Schema { - return (obj || new Schema()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * @param flatbuffers.ByteBuffer bb - * @param Schema= obj - * @returns Schema - */ - static getSizePrefixedRootAsSchema(bb: flatbuffers.ByteBuffer, obj?: Schema): Schema { - bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); - return (obj || new Schema()).__init(bb.readInt32(bb.position()) + bb.position(), bb); - } - - /** - * endianness of the buffer - * it is Little Endian by default - * if endianness doesn't match the underlying system then the vectors need to be converted - * - * @returns Endianness - */ - endianness(): Endianness { - const offset = this.bb!.__offset(this.bb_pos, 4); - return offset ? /** */ (this.bb!.readInt16(this.bb_pos + offset)) : Endianness.Little; - } - - /** - * @param number index - * @param Field= obj - * @returns Field - */ - fields(index: number, obj?: Field): Field | null { - const offset = this.bb!.__offset(this.bb_pos, 6); - return offset ? (obj || new Field()).__init(this.bb!.__indirect(this.bb!.__vector(this.bb_pos + offset) + index * 4), this.bb!) : null; - } - - /** - * @returns number - */ - fieldsLength(): number { - const offset = this.bb!.__offset(this.bb_pos, 6); - return offset ? this.bb!.__vector_len(this.bb_pos + offset) : 0; - } - - /** - * @param number index - * @param KeyValue= obj - * @returns KeyValue - */ - customMetadata(index: number, obj?: KeyValue): KeyValue | null { - const offset = this.bb!.__offset(this.bb_pos, 8); - return offset ? (obj || new KeyValue()).__init(this.bb!.__indirect(this.bb!.__vector(this.bb_pos + offset) + index * 4), this.bb!) : null; - } - - /** - * @returns number - */ - customMetadataLength(): number { - const offset = this.bb!.__offset(this.bb_pos, 8); - return offset ? this.bb!.__vector_len(this.bb_pos + offset) : 0; - } - - /** - * Features used in the stream/file. - * - * @param number index - * @returns flatbuffers.Long - */ - features(index: number): flatbuffers.Long | null { - const offset = this.bb!.__offset(this.bb_pos, 10); - return offset ? /** */ (this.bb!.readInt64(this.bb!.__vector(this.bb_pos + offset) + index * 8)) : this.bb!.createLong(0, 0); - } - - /** - * @returns number - */ - featuresLength(): number { - const offset = this.bb!.__offset(this.bb_pos, 10); - return offset ? this.bb!.__vector_len(this.bb_pos + offset) : 0; - } - - /** - * @param flatbuffers.Builder builder - */ - static startSchema(builder: flatbuffers.Builder) { - builder.startObject(4); - } - - /** - * @param flatbuffers.Builder builder - * @param Endianness endianness - */ - static addEndianness(builder: flatbuffers.Builder, endianness: Endianness) { - builder.addFieldInt16(0, endianness, Endianness.Little); - } - - /** - * @param flatbuffers.Builder builder - * @param flatbuffers.Offset fieldsOffset - */ - static addFields(builder: flatbuffers.Builder, fieldsOffset: flatbuffers.Offset) { - builder.addFieldOffset(1, fieldsOffset, 0); - } - - /** - * @param flatbuffers.Builder builder - * @param Array. data - * @returns flatbuffers.Offset - */ - static createFieldsVector(builder: flatbuffers.Builder, data: flatbuffers.Offset[]): flatbuffers.Offset { - builder.startVector(4, data.length, 4); - for (let i = data.length - 1; i >= 0; i--) { - builder.addOffset(data[i]); - } - return builder.endVector(); - } - - /** - * @param flatbuffers.Builder builder - * @param number numElems - */ - static startFieldsVector(builder: flatbuffers.Builder, numElems: number) { - builder.startVector(4, numElems, 4); - } - - /** - * @param flatbuffers.Builder builder - * @param flatbuffers.Offset customMetadataOffset - */ - static addCustomMetadata(builder: flatbuffers.Builder, customMetadataOffset: flatbuffers.Offset) { - builder.addFieldOffset(2, customMetadataOffset, 0); - } - - /** - * @param flatbuffers.Builder builder - * @param Array. data - * @returns flatbuffers.Offset - */ - static createCustomMetadataVector(builder: flatbuffers.Builder, data: flatbuffers.Offset[]): flatbuffers.Offset { - builder.startVector(4, data.length, 4); - for (let i = data.length - 1; i >= 0; i--) { - builder.addOffset(data[i]); - } - return builder.endVector(); - } - - /** - * @param flatbuffers.Builder builder - * @param number numElems - */ - static startCustomMetadataVector(builder: flatbuffers.Builder, numElems: number) { - builder.startVector(4, numElems, 4); - } - - /** - * @param flatbuffers.Builder builder - * @param flatbuffers.Offset featuresOffset - */ - static addFeatures(builder: flatbuffers.Builder, featuresOffset: flatbuffers.Offset) { - builder.addFieldOffset(3, featuresOffset, 0); - } - - /** - * @param flatbuffers.Builder builder - * @param Array. data - * @returns flatbuffers.Offset - */ - static createFeaturesVector(builder: flatbuffers.Builder, data: flatbuffers.Long[]): flatbuffers.Offset { - builder.startVector(8, data.length, 8); - for (let i = data.length - 1; i >= 0; i--) { - builder.addInt64(data[i]); - } - return builder.endVector(); - } - - /** - * @param flatbuffers.Builder builder - * @param number numElems - */ - static startFeaturesVector(builder: flatbuffers.Builder, numElems: number) { - builder.startVector(8, numElems, 8); - } - - /** - * @param flatbuffers.Builder builder - * @returns flatbuffers.Offset - */ - static endSchema(builder: flatbuffers.Builder): flatbuffers.Offset { - const offset = builder.endObject(); - return offset; - } - - /** - * @param flatbuffers.Builder builder - * @param flatbuffers.Offset offset - */ - static finishSchemaBuffer(builder: flatbuffers.Builder, offset: flatbuffers.Offset) { - builder.finish(offset); - } - - /** - * @param flatbuffers.Builder builder - * @param flatbuffers.Offset offset - */ - static finishSizePrefixedSchemaBuffer(builder: flatbuffers.Builder, offset: flatbuffers.Offset) { - builder.finish(offset, undefined, true); - } - - static createSchema(builder: flatbuffers.Builder, endianness: Endianness, fieldsOffset: flatbuffers.Offset, customMetadataOffset: flatbuffers.Offset, featuresOffset: flatbuffers.Offset): flatbuffers.Offset { - Schema.startSchema(builder); - Schema.addEndianness(builder, endianness); - Schema.addFields(builder, fieldsOffset); - Schema.addCustomMetadata(builder, customMetadataOffset); - Schema.addFeatures(builder, featuresOffset); - return Schema.endSchema(builder); - } -} diff --git a/js/src/fb/binary.ts b/js/src/fb/binary.ts new file mode 100644 index 0000000000000..55af5e8e8676c --- /dev/null +++ b/js/src/fb/binary.ts @@ -0,0 +1,39 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +import * as flatbuffers from 'flatbuffers'; + +/** + * Opaque binary data + */ +export class Binary { + bb: flatbuffers.ByteBuffer|null = null; + bb_pos = 0; +__init(i:number, bb:flatbuffers.ByteBuffer):Binary { + this.bb_pos = i; + this.bb = bb; + return this; +} + +static getRootAsBinary(bb:flatbuffers.ByteBuffer, obj?:Binary):Binary { + return (obj || new Binary()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +static getSizePrefixedRootAsBinary(bb:flatbuffers.ByteBuffer, obj?:Binary):Binary { + bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); + return (obj || new Binary()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +static startBinary(builder:flatbuffers.Builder) { + builder.startObject(0); +} + +static endBinary(builder:flatbuffers.Builder):flatbuffers.Offset { + const offset = builder.endObject(); + return offset; +} + +static createBinary(builder:flatbuffers.Builder):flatbuffers.Offset { + Binary.startBinary(builder); + return Binary.endBinary(builder); +} +} diff --git a/js/src/fb/block.ts b/js/src/fb/block.ts new file mode 100644 index 0000000000000..590ad4efcb885 --- /dev/null +++ b/js/src/fb/block.ts @@ -0,0 +1,49 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +import * as flatbuffers from 'flatbuffers'; + +export class Block { + bb: flatbuffers.ByteBuffer|null = null; + bb_pos = 0; +__init(i:number, bb:flatbuffers.ByteBuffer):Block { + this.bb_pos = i; + this.bb = bb; + return this; +} + +/** + * Index to the start of the RecordBlock (note this is past the Message header) + */ +offset():flatbuffers.Long { + return this.bb!.readInt64(this.bb_pos); +} + +/** + * Length of the metadata + */ +metaDataLength():number { + return this.bb!.readInt32(this.bb_pos + 8); +} + +/** + * Length of the data (this is aligned so there can be a gap between this and + * the metadata). + */ +bodyLength():flatbuffers.Long { + return this.bb!.readInt64(this.bb_pos + 16); +} + +static sizeOf():number { + return 24; +} + +static createBlock(builder:flatbuffers.Builder, offset: flatbuffers.Long, metaDataLength: number, bodyLength: flatbuffers.Long):flatbuffers.Offset { + builder.prep(8, 24); + builder.writeInt64(bodyLength); + builder.pad(4); + builder.writeInt32(metaDataLength); + builder.writeInt64(offset); + return builder.offset(); +} + +} diff --git a/js/src/fb/body-compression-method.ts b/js/src/fb/body-compression-method.ts new file mode 100644 index 0000000000000..ef6302de7e24e --- /dev/null +++ b/js/src/fb/body-compression-method.ts @@ -0,0 +1,20 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +/** + * Provided for forward compatibility in case we need to support different + * strategies for compressing the IPC message body (like whole-body + * compression rather than buffer-level) in the future + */ +export enum BodyCompressionMethod{ + /** + * Each constituent buffer is first compressed with the indicated + * compressor, and then written with the uncompressed length in the first 8 + * bytes as a 64-bit little-endian signed integer followed by the compressed + * buffer bytes (and then padding as required by the protocol). The + * uncompressed length may be set to -1 to indicate that the data that + * follows is not compressed, which can be useful for cases where + * compression does not yield appreciable savings. + */ + BUFFER = 0 +} + diff --git a/js/src/fb/body-compression.ts b/js/src/fb/body-compression.ts new file mode 100644 index 0000000000000..68a13972659f1 --- /dev/null +++ b/js/src/fb/body-compression.ts @@ -0,0 +1,72 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +import * as flatbuffers from 'flatbuffers'; + +import { BodyCompressionMethod } from './body-compression-method.js'; +import { CompressionType } from './compression-type.js'; + + +/** + * Optional compression for the memory buffers constituting IPC message + * bodies. Intended for use with RecordBatch but could be used for other + * message types + */ +export class BodyCompression { + bb: flatbuffers.ByteBuffer|null = null; + bb_pos = 0; +__init(i:number, bb:flatbuffers.ByteBuffer):BodyCompression { + this.bb_pos = i; + this.bb = bb; + return this; +} + +static getRootAsBodyCompression(bb:flatbuffers.ByteBuffer, obj?:BodyCompression):BodyCompression { + return (obj || new BodyCompression()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +static getSizePrefixedRootAsBodyCompression(bb:flatbuffers.ByteBuffer, obj?:BodyCompression):BodyCompression { + bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); + return (obj || new BodyCompression()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +/** + * Compressor library. + * For LZ4_FRAME, each compressed buffer must consist of a single frame. + */ +codec():CompressionType { + const offset = this.bb!.__offset(this.bb_pos, 4); + return offset ? this.bb!.readInt8(this.bb_pos + offset) : CompressionType.LZ4_FRAME; +} + +/** + * Indicates the way the record batch body was compressed + */ +method():BodyCompressionMethod { + const offset = this.bb!.__offset(this.bb_pos, 6); + return offset ? this.bb!.readInt8(this.bb_pos + offset) : BodyCompressionMethod.BUFFER; +} + +static startBodyCompression(builder:flatbuffers.Builder) { + builder.startObject(2); +} + +static addCodec(builder:flatbuffers.Builder, codec:CompressionType) { + builder.addFieldInt8(0, codec, CompressionType.LZ4_FRAME); +} + +static addMethod(builder:flatbuffers.Builder, method:BodyCompressionMethod) { + builder.addFieldInt8(1, method, BodyCompressionMethod.BUFFER); +} + +static endBodyCompression(builder:flatbuffers.Builder):flatbuffers.Offset { + const offset = builder.endObject(); + return offset; +} + +static createBodyCompression(builder:flatbuffers.Builder, codec:CompressionType, method:BodyCompressionMethod):flatbuffers.Offset { + BodyCompression.startBodyCompression(builder); + BodyCompression.addCodec(builder, codec); + BodyCompression.addMethod(builder, method); + return BodyCompression.endBodyCompression(builder); +} +} diff --git a/js/src/fb/bool.ts b/js/src/fb/bool.ts new file mode 100644 index 0000000000000..9798bd547f1c3 --- /dev/null +++ b/js/src/fb/bool.ts @@ -0,0 +1,36 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +import * as flatbuffers from 'flatbuffers'; + +export class Bool { + bb: flatbuffers.ByteBuffer|null = null; + bb_pos = 0; +__init(i:number, bb:flatbuffers.ByteBuffer):Bool { + this.bb_pos = i; + this.bb = bb; + return this; +} + +static getRootAsBool(bb:flatbuffers.ByteBuffer, obj?:Bool):Bool { + return (obj || new Bool()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +static getSizePrefixedRootAsBool(bb:flatbuffers.ByteBuffer, obj?:Bool):Bool { + bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); + return (obj || new Bool()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +static startBool(builder:flatbuffers.Builder) { + builder.startObject(0); +} + +static endBool(builder:flatbuffers.Builder):flatbuffers.Offset { + const offset = builder.endObject(); + return offset; +} + +static createBool(builder:flatbuffers.Builder):flatbuffers.Offset { + Bool.startBool(builder); + return Bool.endBool(builder); +} +} diff --git a/js/src/fb/buffer.ts b/js/src/fb/buffer.ts new file mode 100644 index 0000000000000..c2dffb9898617 --- /dev/null +++ b/js/src/fb/buffer.ts @@ -0,0 +1,48 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +import * as flatbuffers from 'flatbuffers'; + +/** + * ---------------------------------------------------------------------- + * A Buffer represents a single contiguous memory segment + */ +export class Buffer { + bb: flatbuffers.ByteBuffer|null = null; + bb_pos = 0; +__init(i:number, bb:flatbuffers.ByteBuffer):Buffer { + this.bb_pos = i; + this.bb = bb; + return this; +} + +/** + * The relative offset into the shared memory page where the bytes for this + * buffer starts + */ +offset():flatbuffers.Long { + return this.bb!.readInt64(this.bb_pos); +} + +/** + * The absolute length (in bytes) of the memory buffer. The memory is found + * from offset (inclusive) to offset + length (non-inclusive). When building + * messages using the encapsulated IPC message, padding bytes may be written + * after a buffer, but such padding bytes do not need to be accounted for in + * the size here. + */ +length():flatbuffers.Long { + return this.bb!.readInt64(this.bb_pos + 8); +} + +static sizeOf():number { + return 16; +} + +static createBuffer(builder:flatbuffers.Builder, offset: flatbuffers.Long, length: flatbuffers.Long):flatbuffers.Offset { + builder.prep(8, 16); + builder.writeInt64(length); + builder.writeInt64(offset); + return builder.offset(); +} + +} diff --git a/js/src/fb/compression-type.ts b/js/src/fb/compression-type.ts new file mode 100644 index 0000000000000..1acec18995809 --- /dev/null +++ b/js/src/fb/compression-type.ts @@ -0,0 +1,7 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +export enum CompressionType{ + LZ4_FRAME = 0, + ZSTD = 1 +} + diff --git a/js/src/fb/date-unit.ts b/js/src/fb/date-unit.ts new file mode 100644 index 0000000000000..bb7ff3045db3b --- /dev/null +++ b/js/src/fb/date-unit.ts @@ -0,0 +1,7 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +export enum DateUnit{ + DAY = 0, + MILLISECOND = 1 +} + diff --git a/js/src/fb/date.ts b/js/src/fb/date.ts new file mode 100644 index 0000000000000..fbc5292190d66 --- /dev/null +++ b/js/src/fb/date.ts @@ -0,0 +1,57 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +import * as flatbuffers from 'flatbuffers'; + +import { DateUnit } from './date-unit.js'; + + +/** + * Date is either a 32-bit or 64-bit signed integer type representing an + * elapsed time since UNIX epoch (1970-01-01), stored in either of two units: + * + * * Milliseconds (64 bits) indicating UNIX time elapsed since the epoch (no + * leap seconds), where the values are evenly divisible by 86400000 + * * Days (32 bits) since the UNIX epoch + */ +export class Date { + bb: flatbuffers.ByteBuffer|null = null; + bb_pos = 0; +__init(i:number, bb:flatbuffers.ByteBuffer):Date { + this.bb_pos = i; + this.bb = bb; + return this; +} + +static getRootAsDate(bb:flatbuffers.ByteBuffer, obj?:Date):Date { + return (obj || new Date()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +static getSizePrefixedRootAsDate(bb:flatbuffers.ByteBuffer, obj?:Date):Date { + bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); + return (obj || new Date()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +unit():DateUnit { + const offset = this.bb!.__offset(this.bb_pos, 4); + return offset ? this.bb!.readInt16(this.bb_pos + offset) : DateUnit.MILLISECOND; +} + +static startDate(builder:flatbuffers.Builder) { + builder.startObject(1); +} + +static addUnit(builder:flatbuffers.Builder, unit:DateUnit) { + builder.addFieldInt16(0, unit, DateUnit.MILLISECOND); +} + +static endDate(builder:flatbuffers.Builder):flatbuffers.Offset { + const offset = builder.endObject(); + return offset; +} + +static createDate(builder:flatbuffers.Builder, unit:DateUnit):flatbuffers.Offset { + Date.startDate(builder); + Date.addUnit(builder, unit); + return Date.endDate(builder); +} +} diff --git a/js/src/fb/decimal.ts b/js/src/fb/decimal.ts new file mode 100644 index 0000000000000..40a03e684fa67 --- /dev/null +++ b/js/src/fb/decimal.ts @@ -0,0 +1,82 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +import * as flatbuffers from 'flatbuffers'; + +/** + * Exact decimal value represented as an integer value in two's + * complement. Currently only 128-bit (16-byte) and 256-bit (32-byte) integers + * are used. The representation uses the endianness indicated + * in the Schema. + */ +export class Decimal { + bb: flatbuffers.ByteBuffer|null = null; + bb_pos = 0; +__init(i:number, bb:flatbuffers.ByteBuffer):Decimal { + this.bb_pos = i; + this.bb = bb; + return this; +} + +static getRootAsDecimal(bb:flatbuffers.ByteBuffer, obj?:Decimal):Decimal { + return (obj || new Decimal()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +static getSizePrefixedRootAsDecimal(bb:flatbuffers.ByteBuffer, obj?:Decimal):Decimal { + bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); + return (obj || new Decimal()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +/** + * Total number of decimal digits + */ +precision():number { + const offset = this.bb!.__offset(this.bb_pos, 4); + return offset ? this.bb!.readInt32(this.bb_pos + offset) : 0; +} + +/** + * Number of digits after the decimal point "." + */ +scale():number { + const offset = this.bb!.__offset(this.bb_pos, 6); + return offset ? this.bb!.readInt32(this.bb_pos + offset) : 0; +} + +/** + * Number of bits per value. The only accepted widths are 128 and 256. + * We use bitWidth for consistency with Int::bitWidth. + */ +bitWidth():number { + const offset = this.bb!.__offset(this.bb_pos, 8); + return offset ? this.bb!.readInt32(this.bb_pos + offset) : 128; +} + +static startDecimal(builder:flatbuffers.Builder) { + builder.startObject(3); +} + +static addPrecision(builder:flatbuffers.Builder, precision:number) { + builder.addFieldInt32(0, precision, 0); +} + +static addScale(builder:flatbuffers.Builder, scale:number) { + builder.addFieldInt32(1, scale, 0); +} + +static addBitWidth(builder:flatbuffers.Builder, bitWidth:number) { + builder.addFieldInt32(2, bitWidth, 128); +} + +static endDecimal(builder:flatbuffers.Builder):flatbuffers.Offset { + const offset = builder.endObject(); + return offset; +} + +static createDecimal(builder:flatbuffers.Builder, precision:number, scale:number, bitWidth:number):flatbuffers.Offset { + Decimal.startDecimal(builder); + Decimal.addPrecision(builder, precision); + Decimal.addScale(builder, scale); + Decimal.addBitWidth(builder, bitWidth); + return Decimal.endDecimal(builder); +} +} diff --git a/js/src/fb/dictionary-batch.ts b/js/src/fb/dictionary-batch.ts new file mode 100644 index 0000000000000..6dcfb46679046 --- /dev/null +++ b/js/src/fb/dictionary-batch.ts @@ -0,0 +1,75 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +import * as flatbuffers from 'flatbuffers'; + +import { RecordBatch } from './record-batch.js'; + + +/** + * For sending dictionary encoding information. Any Field can be + * dictionary-encoded, but in this case none of its children may be + * dictionary-encoded. + * There is one vector / column per dictionary, but that vector / column + * may be spread across multiple dictionary batches by using the isDelta + * flag + */ +export class DictionaryBatch { + bb: flatbuffers.ByteBuffer|null = null; + bb_pos = 0; +__init(i:number, bb:flatbuffers.ByteBuffer):DictionaryBatch { + this.bb_pos = i; + this.bb = bb; + return this; +} + +static getRootAsDictionaryBatch(bb:flatbuffers.ByteBuffer, obj?:DictionaryBatch):DictionaryBatch { + return (obj || new DictionaryBatch()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +static getSizePrefixedRootAsDictionaryBatch(bb:flatbuffers.ByteBuffer, obj?:DictionaryBatch):DictionaryBatch { + bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); + return (obj || new DictionaryBatch()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +id():flatbuffers.Long { + const offset = this.bb!.__offset(this.bb_pos, 4); + return offset ? this.bb!.readInt64(this.bb_pos + offset) : this.bb!.createLong(0, 0); +} + +data(obj?:RecordBatch):RecordBatch|null { + const offset = this.bb!.__offset(this.bb_pos, 6); + return offset ? (obj || new RecordBatch()).__init(this.bb!.__indirect(this.bb_pos + offset), this.bb!) : null; +} + +/** + * If isDelta is true the values in the dictionary are to be appended to a + * dictionary with the indicated id. If isDelta is false this dictionary + * should replace the existing dictionary. + */ +isDelta():boolean { + const offset = this.bb!.__offset(this.bb_pos, 8); + return offset ? !!this.bb!.readInt8(this.bb_pos + offset) : false; +} + +static startDictionaryBatch(builder:flatbuffers.Builder) { + builder.startObject(3); +} + +static addId(builder:flatbuffers.Builder, id:flatbuffers.Long) { + builder.addFieldInt64(0, id, builder.createLong(0, 0)); +} + +static addData(builder:flatbuffers.Builder, dataOffset:flatbuffers.Offset) { + builder.addFieldOffset(1, dataOffset, 0); +} + +static addIsDelta(builder:flatbuffers.Builder, isDelta:boolean) { + builder.addFieldInt8(2, +isDelta, +false); +} + +static endDictionaryBatch(builder:flatbuffers.Builder):flatbuffers.Offset { + const offset = builder.endObject(); + return offset; +} + +} diff --git a/js/src/fb/dictionary-encoding.ts b/js/src/fb/dictionary-encoding.ts new file mode 100644 index 0000000000000..44364702acc98 --- /dev/null +++ b/js/src/fb/dictionary-encoding.ts @@ -0,0 +1,90 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +import * as flatbuffers from 'flatbuffers'; + +import { DictionaryKind } from './dictionary-kind.js'; +import { Int } from './int.js'; + + +export class DictionaryEncoding { + bb: flatbuffers.ByteBuffer|null = null; + bb_pos = 0; +__init(i:number, bb:flatbuffers.ByteBuffer):DictionaryEncoding { + this.bb_pos = i; + this.bb = bb; + return this; +} + +static getRootAsDictionaryEncoding(bb:flatbuffers.ByteBuffer, obj?:DictionaryEncoding):DictionaryEncoding { + return (obj || new DictionaryEncoding()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +static getSizePrefixedRootAsDictionaryEncoding(bb:flatbuffers.ByteBuffer, obj?:DictionaryEncoding):DictionaryEncoding { + bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); + return (obj || new DictionaryEncoding()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +/** + * The known dictionary id in the application where this data is used. In + * the file or streaming formats, the dictionary ids are found in the + * DictionaryBatch messages + */ +id():flatbuffers.Long { + const offset = this.bb!.__offset(this.bb_pos, 4); + return offset ? this.bb!.readInt64(this.bb_pos + offset) : this.bb!.createLong(0, 0); +} + +/** + * The dictionary indices are constrained to be non-negative integers. If + * this field is null, the indices must be signed int32. To maximize + * cross-language compatibility and performance, implementations are + * recommended to prefer signed integer types over unsigned integer types + * and to avoid uint64 indices unless they are required by an application. + */ +indexType(obj?:Int):Int|null { + const offset = this.bb!.__offset(this.bb_pos, 6); + return offset ? (obj || new Int()).__init(this.bb!.__indirect(this.bb_pos + offset), this.bb!) : null; +} + +/** + * By default, dictionaries are not ordered, or the order does not have + * semantic meaning. In some statistical, applications, dictionary-encoding + * is used to represent ordered categorical data, and we provide a way to + * preserve that metadata here + */ +isOrdered():boolean { + const offset = this.bb!.__offset(this.bb_pos, 8); + return offset ? !!this.bb!.readInt8(this.bb_pos + offset) : false; +} + +dictionaryKind():DictionaryKind { + const offset = this.bb!.__offset(this.bb_pos, 10); + return offset ? this.bb!.readInt16(this.bb_pos + offset) : DictionaryKind.DenseArray; +} + +static startDictionaryEncoding(builder:flatbuffers.Builder) { + builder.startObject(4); +} + +static addId(builder:flatbuffers.Builder, id:flatbuffers.Long) { + builder.addFieldInt64(0, id, builder.createLong(0, 0)); +} + +static addIndexType(builder:flatbuffers.Builder, indexTypeOffset:flatbuffers.Offset) { + builder.addFieldOffset(1, indexTypeOffset, 0); +} + +static addIsOrdered(builder:flatbuffers.Builder, isOrdered:boolean) { + builder.addFieldInt8(2, +isOrdered, +false); +} + +static addDictionaryKind(builder:flatbuffers.Builder, dictionaryKind:DictionaryKind) { + builder.addFieldInt16(3, dictionaryKind, DictionaryKind.DenseArray); +} + +static endDictionaryEncoding(builder:flatbuffers.Builder):flatbuffers.Offset { + const offset = builder.endObject(); + return offset; +} + +} diff --git a/js/src/fb/dictionary-kind.ts b/js/src/fb/dictionary-kind.ts new file mode 100644 index 0000000000000..90d20a9b38366 --- /dev/null +++ b/js/src/fb/dictionary-kind.ts @@ -0,0 +1,13 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +/** + * ---------------------------------------------------------------------- + * Dictionary encoding metadata + * Maintained for forwards compatibility, in the future + * Dictionaries might be explicit maps between integers and values + * allowing for non-contiguous index values + */ +export enum DictionaryKind{ + DenseArray = 0 +} + diff --git a/js/src/fb/duration.ts b/js/src/fb/duration.ts new file mode 100644 index 0000000000000..547a7bc5e26b4 --- /dev/null +++ b/js/src/fb/duration.ts @@ -0,0 +1,49 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +import * as flatbuffers from 'flatbuffers'; + +import { TimeUnit } from './time-unit.js'; + + +export class Duration { + bb: flatbuffers.ByteBuffer|null = null; + bb_pos = 0; +__init(i:number, bb:flatbuffers.ByteBuffer):Duration { + this.bb_pos = i; + this.bb = bb; + return this; +} + +static getRootAsDuration(bb:flatbuffers.ByteBuffer, obj?:Duration):Duration { + return (obj || new Duration()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +static getSizePrefixedRootAsDuration(bb:flatbuffers.ByteBuffer, obj?:Duration):Duration { + bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); + return (obj || new Duration()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +unit():TimeUnit { + const offset = this.bb!.__offset(this.bb_pos, 4); + return offset ? this.bb!.readInt16(this.bb_pos + offset) : TimeUnit.MILLISECOND; +} + +static startDuration(builder:flatbuffers.Builder) { + builder.startObject(1); +} + +static addUnit(builder:flatbuffers.Builder, unit:TimeUnit) { + builder.addFieldInt16(0, unit, TimeUnit.MILLISECOND); +} + +static endDuration(builder:flatbuffers.Builder):flatbuffers.Offset { + const offset = builder.endObject(); + return offset; +} + +static createDuration(builder:flatbuffers.Builder, unit:TimeUnit):flatbuffers.Offset { + Duration.startDuration(builder); + Duration.addUnit(builder, unit); + return Duration.endDuration(builder); +} +} diff --git a/js/src/fb/endianness.ts b/js/src/fb/endianness.ts new file mode 100644 index 0000000000000..55f7e4b8ce022 --- /dev/null +++ b/js/src/fb/endianness.ts @@ -0,0 +1,11 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +/** + * ---------------------------------------------------------------------- + * Endianness of the platform producing the data + */ +export enum Endianness{ + Little = 0, + Big = 1 +} + diff --git a/js/src/fb/feature.ts b/js/src/fb/feature.ts new file mode 100644 index 0000000000000..41f307562f868 --- /dev/null +++ b/js/src/fb/feature.ts @@ -0,0 +1,41 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +/** + * Represents Arrow Features that might not have full support + * within implementations. This is intended to be used in + * two scenarios: + * 1. A mechanism for readers of Arrow Streams + * and files to understand that the stream or file makes + * use of a feature that isn't supported or unknown to + * the implementation (and therefore can meet the Arrow + * forward compatibility guarantees). + * 2. A means of negotiating between a client and server + * what features a stream is allowed to use. The enums + * values here are intented to represent higher level + * features, additional details maybe negotiated + * with key-value pairs specific to the protocol. + * + * Enums added to this list should be assigned power-of-two values + * to facilitate exchanging and comparing bitmaps for supported + * features. + */ +export enum Feature{ + /** + * Needed to make flatbuffers happy. + */ + UNUSED = 0, + + /** + * The stream makes use of multiple full dictionaries with the + * same ID and assumes clients implement dictionary replacement + * correctly. + */ + DICTIONARY_REPLACEMENT = 1, + + /** + * The stream makes use of compressed bodies as described + * in Message.fbs. + */ + COMPRESSED_BODY = 2 +} + diff --git a/js/src/fb/field-node.ts b/js/src/fb/field-node.ts new file mode 100644 index 0000000000000..7e83a7b9c177c --- /dev/null +++ b/js/src/fb/field-node.ts @@ -0,0 +1,53 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +import * as flatbuffers from 'flatbuffers'; + +/** + * ---------------------------------------------------------------------- + * Data structures for describing a table row batch (a collection of + * equal-length Arrow arrays) + * Metadata about a field at some level of a nested type tree (but not + * its children). + * + * For example, a List with values `[[1, 2, 3], null, [4], [5, 6], null]` + * would have {length: 5, null_count: 2} for its List node, and {length: 6, + * null_count: 0} for its Int16 node, as separate FieldNode structs + */ +export class FieldNode { + bb: flatbuffers.ByteBuffer|null = null; + bb_pos = 0; +__init(i:number, bb:flatbuffers.ByteBuffer):FieldNode { + this.bb_pos = i; + this.bb = bb; + return this; +} + +/** + * The number of value slots in the Arrow array at this level of a nested + * tree + */ +length():flatbuffers.Long { + return this.bb!.readInt64(this.bb_pos); +} + +/** + * The number of observed nulls. Fields with null_count == 0 may choose not + * to write their physical validity bitmap out as a materialized buffer, + * instead setting the length of the bitmap buffer to 0. + */ +nullCount():flatbuffers.Long { + return this.bb!.readInt64(this.bb_pos + 8); +} + +static sizeOf():number { + return 16; +} + +static createFieldNode(builder:flatbuffers.Builder, length: flatbuffers.Long, null_count: flatbuffers.Long):flatbuffers.Offset { + builder.prep(8, 16); + builder.writeInt64(null_count); + builder.writeInt64(length); + return builder.offset(); +} + +} diff --git a/js/src/fb/field.ts b/js/src/fb/field.ts new file mode 100644 index 0000000000000..21215415fcbb2 --- /dev/null +++ b/js/src/fb/field.ts @@ -0,0 +1,161 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +import * as flatbuffers from 'flatbuffers'; + +import { DictionaryEncoding } from './dictionary-encoding.js'; +import { KeyValue } from './key-value.js'; +import { Type } from './type.js'; + + +/** + * ---------------------------------------------------------------------- + * A field represents a named column in a record / row batch or child of a + * nested type. + */ +export class Field { + bb: flatbuffers.ByteBuffer|null = null; + bb_pos = 0; +__init(i:number, bb:flatbuffers.ByteBuffer):Field { + this.bb_pos = i; + this.bb = bb; + return this; +} + +static getRootAsField(bb:flatbuffers.ByteBuffer, obj?:Field):Field { + return (obj || new Field()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +static getSizePrefixedRootAsField(bb:flatbuffers.ByteBuffer, obj?:Field):Field { + bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); + return (obj || new Field()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +/** + * Name is not required, in i.e. a List + */ +name():string|null +name(optionalEncoding:flatbuffers.Encoding):string|Uint8Array|null +name(optionalEncoding?:any):string|Uint8Array|null { + const offset = this.bb!.__offset(this.bb_pos, 4); + return offset ? this.bb!.__string(this.bb_pos + offset, optionalEncoding) : null; +} + +/** + * Whether or not this field can contain nulls. Should be true in general. + */ +nullable():boolean { + const offset = this.bb!.__offset(this.bb_pos, 6); + return offset ? !!this.bb!.readInt8(this.bb_pos + offset) : false; +} + +typeType():Type { + const offset = this.bb!.__offset(this.bb_pos, 8); + return offset ? this.bb!.readUint8(this.bb_pos + offset) : Type.NONE; +} + +/** + * This is the type of the decoded value if the field is dictionary encoded. + */ +// @ts-ignore +type(obj:any):any|null { + const offset = this.bb!.__offset(this.bb_pos, 10); + return offset ? this.bb!.__union(obj, this.bb_pos + offset) : null; +} + +/** + * Present only if the field is dictionary encoded. + */ +dictionary(obj?:DictionaryEncoding):DictionaryEncoding|null { + const offset = this.bb!.__offset(this.bb_pos, 12); + return offset ? (obj || new DictionaryEncoding()).__init(this.bb!.__indirect(this.bb_pos + offset), this.bb!) : null; +} + +/** + * children apply only to nested data types like Struct, List and Union. For + * primitive types children will have length 0. + */ +children(index: number, obj?:Field):Field|null { + const offset = this.bb!.__offset(this.bb_pos, 14); + return offset ? (obj || new Field()).__init(this.bb!.__indirect(this.bb!.__vector(this.bb_pos + offset) + index * 4), this.bb!) : null; +} + +childrenLength():number { + const offset = this.bb!.__offset(this.bb_pos, 14); + return offset ? this.bb!.__vector_len(this.bb_pos + offset) : 0; +} + +/** + * User-defined metadata + */ +customMetadata(index: number, obj?:KeyValue):KeyValue|null { + const offset = this.bb!.__offset(this.bb_pos, 16); + return offset ? (obj || new KeyValue()).__init(this.bb!.__indirect(this.bb!.__vector(this.bb_pos + offset) + index * 4), this.bb!) : null; +} + +customMetadataLength():number { + const offset = this.bb!.__offset(this.bb_pos, 16); + return offset ? this.bb!.__vector_len(this.bb_pos + offset) : 0; +} + +static startField(builder:flatbuffers.Builder) { + builder.startObject(7); +} + +static addName(builder:flatbuffers.Builder, nameOffset:flatbuffers.Offset) { + builder.addFieldOffset(0, nameOffset, 0); +} + +static addNullable(builder:flatbuffers.Builder, nullable:boolean) { + builder.addFieldInt8(1, +nullable, +false); +} + +static addTypeType(builder:flatbuffers.Builder, typeType:Type) { + builder.addFieldInt8(2, typeType, Type.NONE); +} + +static addType(builder:flatbuffers.Builder, typeOffset:flatbuffers.Offset) { + builder.addFieldOffset(3, typeOffset, 0); +} + +static addDictionary(builder:flatbuffers.Builder, dictionaryOffset:flatbuffers.Offset) { + builder.addFieldOffset(4, dictionaryOffset, 0); +} + +static addChildren(builder:flatbuffers.Builder, childrenOffset:flatbuffers.Offset) { + builder.addFieldOffset(5, childrenOffset, 0); +} + +static createChildrenVector(builder:flatbuffers.Builder, data:flatbuffers.Offset[]):flatbuffers.Offset { + builder.startVector(4, data.length, 4); + for (let i = data.length - 1; i >= 0; i--) { + builder.addOffset(data[i]!); + } + return builder.endVector(); +} + +static startChildrenVector(builder:flatbuffers.Builder, numElems:number) { + builder.startVector(4, numElems, 4); +} + +static addCustomMetadata(builder:flatbuffers.Builder, customMetadataOffset:flatbuffers.Offset) { + builder.addFieldOffset(6, customMetadataOffset, 0); +} + +static createCustomMetadataVector(builder:flatbuffers.Builder, data:flatbuffers.Offset[]):flatbuffers.Offset { + builder.startVector(4, data.length, 4); + for (let i = data.length - 1; i >= 0; i--) { + builder.addOffset(data[i]!); + } + return builder.endVector(); +} + +static startCustomMetadataVector(builder:flatbuffers.Builder, numElems:number) { + builder.startVector(4, numElems, 4); +} + +static endField(builder:flatbuffers.Builder):flatbuffers.Offset { + const offset = builder.endObject(); + return offset; +} + +} diff --git a/js/src/fb/fixed-size-binary.ts b/js/src/fb/fixed-size-binary.ts new file mode 100644 index 0000000000000..897474e4e52dc --- /dev/null +++ b/js/src/fb/fixed-size-binary.ts @@ -0,0 +1,49 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +import * as flatbuffers from 'flatbuffers'; + +export class FixedSizeBinary { + bb: flatbuffers.ByteBuffer|null = null; + bb_pos = 0; +__init(i:number, bb:flatbuffers.ByteBuffer):FixedSizeBinary { + this.bb_pos = i; + this.bb = bb; + return this; +} + +static getRootAsFixedSizeBinary(bb:flatbuffers.ByteBuffer, obj?:FixedSizeBinary):FixedSizeBinary { + return (obj || new FixedSizeBinary()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +static getSizePrefixedRootAsFixedSizeBinary(bb:flatbuffers.ByteBuffer, obj?:FixedSizeBinary):FixedSizeBinary { + bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); + return (obj || new FixedSizeBinary()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +/** + * Number of bytes per value + */ +byteWidth():number { + const offset = this.bb!.__offset(this.bb_pos, 4); + return offset ? this.bb!.readInt32(this.bb_pos + offset) : 0; +} + +static startFixedSizeBinary(builder:flatbuffers.Builder) { + builder.startObject(1); +} + +static addByteWidth(builder:flatbuffers.Builder, byteWidth:number) { + builder.addFieldInt32(0, byteWidth, 0); +} + +static endFixedSizeBinary(builder:flatbuffers.Builder):flatbuffers.Offset { + const offset = builder.endObject(); + return offset; +} + +static createFixedSizeBinary(builder:flatbuffers.Builder, byteWidth:number):flatbuffers.Offset { + FixedSizeBinary.startFixedSizeBinary(builder); + FixedSizeBinary.addByteWidth(builder, byteWidth); + return FixedSizeBinary.endFixedSizeBinary(builder); +} +} diff --git a/js/src/fb/fixed-size-list.ts b/js/src/fb/fixed-size-list.ts new file mode 100644 index 0000000000000..8e369349a5b2b --- /dev/null +++ b/js/src/fb/fixed-size-list.ts @@ -0,0 +1,49 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +import * as flatbuffers from 'flatbuffers'; + +export class FixedSizeList { + bb: flatbuffers.ByteBuffer|null = null; + bb_pos = 0; +__init(i:number, bb:flatbuffers.ByteBuffer):FixedSizeList { + this.bb_pos = i; + this.bb = bb; + return this; +} + +static getRootAsFixedSizeList(bb:flatbuffers.ByteBuffer, obj?:FixedSizeList):FixedSizeList { + return (obj || new FixedSizeList()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +static getSizePrefixedRootAsFixedSizeList(bb:flatbuffers.ByteBuffer, obj?:FixedSizeList):FixedSizeList { + bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); + return (obj || new FixedSizeList()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +/** + * Number of list items per value + */ +listSize():number { + const offset = this.bb!.__offset(this.bb_pos, 4); + return offset ? this.bb!.readInt32(this.bb_pos + offset) : 0; +} + +static startFixedSizeList(builder:flatbuffers.Builder) { + builder.startObject(1); +} + +static addListSize(builder:flatbuffers.Builder, listSize:number) { + builder.addFieldInt32(0, listSize, 0); +} + +static endFixedSizeList(builder:flatbuffers.Builder):flatbuffers.Offset { + const offset = builder.endObject(); + return offset; +} + +static createFixedSizeList(builder:flatbuffers.Builder, listSize:number):flatbuffers.Offset { + FixedSizeList.startFixedSizeList(builder); + FixedSizeList.addListSize(builder, listSize); + return FixedSizeList.endFixedSizeList(builder); +} +} diff --git a/js/src/fb/floating-point.ts b/js/src/fb/floating-point.ts new file mode 100644 index 0000000000000..60532aa2bee6e --- /dev/null +++ b/js/src/fb/floating-point.ts @@ -0,0 +1,49 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +import * as flatbuffers from 'flatbuffers'; + +import { Precision } from './precision.js'; + + +export class FloatingPoint { + bb: flatbuffers.ByteBuffer|null = null; + bb_pos = 0; +__init(i:number, bb:flatbuffers.ByteBuffer):FloatingPoint { + this.bb_pos = i; + this.bb = bb; + return this; +} + +static getRootAsFloatingPoint(bb:flatbuffers.ByteBuffer, obj?:FloatingPoint):FloatingPoint { + return (obj || new FloatingPoint()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +static getSizePrefixedRootAsFloatingPoint(bb:flatbuffers.ByteBuffer, obj?:FloatingPoint):FloatingPoint { + bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); + return (obj || new FloatingPoint()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +precision():Precision { + const offset = this.bb!.__offset(this.bb_pos, 4); + return offset ? this.bb!.readInt16(this.bb_pos + offset) : Precision.HALF; +} + +static startFloatingPoint(builder:flatbuffers.Builder) { + builder.startObject(1); +} + +static addPrecision(builder:flatbuffers.Builder, precision:Precision) { + builder.addFieldInt16(0, precision, Precision.HALF); +} + +static endFloatingPoint(builder:flatbuffers.Builder):flatbuffers.Offset { + const offset = builder.endObject(); + return offset; +} + +static createFloatingPoint(builder:flatbuffers.Builder, precision:Precision):flatbuffers.Offset { + FloatingPoint.startFloatingPoint(builder); + FloatingPoint.addPrecision(builder, precision); + return FloatingPoint.endFloatingPoint(builder); +} +} diff --git a/js/src/fb/footer.ts b/js/src/fb/footer.ts new file mode 100644 index 0000000000000..f5ac80df14016 --- /dev/null +++ b/js/src/fb/footer.ts @@ -0,0 +1,134 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +import * as flatbuffers from 'flatbuffers'; + +import { Block } from './block.js'; +import { KeyValue } from './key-value.js'; +import { MetadataVersion } from './metadata-version.js'; +import { Schema } from './schema.js'; + + +/** + * ---------------------------------------------------------------------- + * Arrow File metadata + * + */ +export class Footer { + bb: flatbuffers.ByteBuffer|null = null; + bb_pos = 0; +__init(i:number, bb:flatbuffers.ByteBuffer):Footer { + this.bb_pos = i; + this.bb = bb; + return this; +} + +static getRootAsFooter(bb:flatbuffers.ByteBuffer, obj?:Footer):Footer { + return (obj || new Footer()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +static getSizePrefixedRootAsFooter(bb:flatbuffers.ByteBuffer, obj?:Footer):Footer { + bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); + return (obj || new Footer()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +version():MetadataVersion { + const offset = this.bb!.__offset(this.bb_pos, 4); + return offset ? this.bb!.readInt16(this.bb_pos + offset) : MetadataVersion.V1; +} + +schema(obj?:Schema):Schema|null { + const offset = this.bb!.__offset(this.bb_pos, 6); + return offset ? (obj || new Schema()).__init(this.bb!.__indirect(this.bb_pos + offset), this.bb!) : null; +} + +dictionaries(index: number, obj?:Block):Block|null { + const offset = this.bb!.__offset(this.bb_pos, 8); + return offset ? (obj || new Block()).__init(this.bb!.__vector(this.bb_pos + offset) + index * 24, this.bb!) : null; +} + +dictionariesLength():number { + const offset = this.bb!.__offset(this.bb_pos, 8); + return offset ? this.bb!.__vector_len(this.bb_pos + offset) : 0; +} + +recordBatches(index: number, obj?:Block):Block|null { + const offset = this.bb!.__offset(this.bb_pos, 10); + return offset ? (obj || new Block()).__init(this.bb!.__vector(this.bb_pos + offset) + index * 24, this.bb!) : null; +} + +recordBatchesLength():number { + const offset = this.bb!.__offset(this.bb_pos, 10); + return offset ? this.bb!.__vector_len(this.bb_pos + offset) : 0; +} + +/** + * User-defined metadata + */ +customMetadata(index: number, obj?:KeyValue):KeyValue|null { + const offset = this.bb!.__offset(this.bb_pos, 12); + return offset ? (obj || new KeyValue()).__init(this.bb!.__indirect(this.bb!.__vector(this.bb_pos + offset) + index * 4), this.bb!) : null; +} + +customMetadataLength():number { + const offset = this.bb!.__offset(this.bb_pos, 12); + return offset ? this.bb!.__vector_len(this.bb_pos + offset) : 0; +} + +static startFooter(builder:flatbuffers.Builder) { + builder.startObject(5); +} + +static addVersion(builder:flatbuffers.Builder, version:MetadataVersion) { + builder.addFieldInt16(0, version, MetadataVersion.V1); +} + +static addSchema(builder:flatbuffers.Builder, schemaOffset:flatbuffers.Offset) { + builder.addFieldOffset(1, schemaOffset, 0); +} + +static addDictionaries(builder:flatbuffers.Builder, dictionariesOffset:flatbuffers.Offset) { + builder.addFieldOffset(2, dictionariesOffset, 0); +} + +static startDictionariesVector(builder:flatbuffers.Builder, numElems:number) { + builder.startVector(24, numElems, 8); +} + +static addRecordBatches(builder:flatbuffers.Builder, recordBatchesOffset:flatbuffers.Offset) { + builder.addFieldOffset(3, recordBatchesOffset, 0); +} + +static startRecordBatchesVector(builder:flatbuffers.Builder, numElems:number) { + builder.startVector(24, numElems, 8); +} + +static addCustomMetadata(builder:flatbuffers.Builder, customMetadataOffset:flatbuffers.Offset) { + builder.addFieldOffset(4, customMetadataOffset, 0); +} + +static createCustomMetadataVector(builder:flatbuffers.Builder, data:flatbuffers.Offset[]):flatbuffers.Offset { + builder.startVector(4, data.length, 4); + for (let i = data.length - 1; i >= 0; i--) { + builder.addOffset(data[i]!); + } + return builder.endVector(); +} + +static startCustomMetadataVector(builder:flatbuffers.Builder, numElems:number) { + builder.startVector(4, numElems, 4); +} + +static endFooter(builder:flatbuffers.Builder):flatbuffers.Offset { + const offset = builder.endObject(); + return offset; +} + +static finishFooterBuffer(builder:flatbuffers.Builder, offset:flatbuffers.Offset) { + builder.finish(offset); +} + +static finishSizePrefixedFooterBuffer(builder:flatbuffers.Builder, offset:flatbuffers.Offset) { + builder.finish(offset, undefined, true); +} + +} diff --git a/js/src/fb/int.ts b/js/src/fb/int.ts new file mode 100644 index 0000000000000..52b00c982b803 --- /dev/null +++ b/js/src/fb/int.ts @@ -0,0 +1,56 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +import * as flatbuffers from 'flatbuffers'; + +export class Int { + bb: flatbuffers.ByteBuffer|null = null; + bb_pos = 0; +__init(i:number, bb:flatbuffers.ByteBuffer):Int { + this.bb_pos = i; + this.bb = bb; + return this; +} + +static getRootAsInt(bb:flatbuffers.ByteBuffer, obj?:Int):Int { + return (obj || new Int()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +static getSizePrefixedRootAsInt(bb:flatbuffers.ByteBuffer, obj?:Int):Int { + bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); + return (obj || new Int()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +bitWidth():number { + const offset = this.bb!.__offset(this.bb_pos, 4); + return offset ? this.bb!.readInt32(this.bb_pos + offset) : 0; +} + +isSigned():boolean { + const offset = this.bb!.__offset(this.bb_pos, 6); + return offset ? !!this.bb!.readInt8(this.bb_pos + offset) : false; +} + +static startInt(builder:flatbuffers.Builder) { + builder.startObject(2); +} + +static addBitWidth(builder:flatbuffers.Builder, bitWidth:number) { + builder.addFieldInt32(0, bitWidth, 0); +} + +static addIsSigned(builder:flatbuffers.Builder, isSigned:boolean) { + builder.addFieldInt8(1, +isSigned, +false); +} + +static endInt(builder:flatbuffers.Builder):flatbuffers.Offset { + const offset = builder.endObject(); + return offset; +} + +static createInt(builder:flatbuffers.Builder, bitWidth:number, isSigned:boolean):flatbuffers.Offset { + Int.startInt(builder); + Int.addBitWidth(builder, bitWidth); + Int.addIsSigned(builder, isSigned); + return Int.endInt(builder); +} +} diff --git a/js/src/fb/interval-unit.ts b/js/src/fb/interval-unit.ts new file mode 100644 index 0000000000000..844a213701fb9 --- /dev/null +++ b/js/src/fb/interval-unit.ts @@ -0,0 +1,8 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +export enum IntervalUnit{ + YEAR_MONTH = 0, + DAY_TIME = 1, + MONTH_DAY_NANO = 2 +} + diff --git a/js/src/fb/interval.ts b/js/src/fb/interval.ts new file mode 100644 index 0000000000000..a98d586a5ebe7 --- /dev/null +++ b/js/src/fb/interval.ts @@ -0,0 +1,49 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +import * as flatbuffers from 'flatbuffers'; + +import { IntervalUnit } from './interval-unit.js'; + + +export class Interval { + bb: flatbuffers.ByteBuffer|null = null; + bb_pos = 0; +__init(i:number, bb:flatbuffers.ByteBuffer):Interval { + this.bb_pos = i; + this.bb = bb; + return this; +} + +static getRootAsInterval(bb:flatbuffers.ByteBuffer, obj?:Interval):Interval { + return (obj || new Interval()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +static getSizePrefixedRootAsInterval(bb:flatbuffers.ByteBuffer, obj?:Interval):Interval { + bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); + return (obj || new Interval()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +unit():IntervalUnit { + const offset = this.bb!.__offset(this.bb_pos, 4); + return offset ? this.bb!.readInt16(this.bb_pos + offset) : IntervalUnit.YEAR_MONTH; +} + +static startInterval(builder:flatbuffers.Builder) { + builder.startObject(1); +} + +static addUnit(builder:flatbuffers.Builder, unit:IntervalUnit) { + builder.addFieldInt16(0, unit, IntervalUnit.YEAR_MONTH); +} + +static endInterval(builder:flatbuffers.Builder):flatbuffers.Offset { + const offset = builder.endObject(); + return offset; +} + +static createInterval(builder:flatbuffers.Builder, unit:IntervalUnit):flatbuffers.Offset { + Interval.startInterval(builder); + Interval.addUnit(builder, unit); + return Interval.endInterval(builder); +} +} diff --git a/js/src/fb/key-value.ts b/js/src/fb/key-value.ts new file mode 100644 index 0000000000000..e67a6af908449 --- /dev/null +++ b/js/src/fb/key-value.ts @@ -0,0 +1,65 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +import * as flatbuffers from 'flatbuffers'; + +/** + * ---------------------------------------------------------------------- + * user defined key value pairs to add custom metadata to arrow + * key namespacing is the responsibility of the user + */ +export class KeyValue { + bb: flatbuffers.ByteBuffer|null = null; + bb_pos = 0; +__init(i:number, bb:flatbuffers.ByteBuffer):KeyValue { + this.bb_pos = i; + this.bb = bb; + return this; +} + +static getRootAsKeyValue(bb:flatbuffers.ByteBuffer, obj?:KeyValue):KeyValue { + return (obj || new KeyValue()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +static getSizePrefixedRootAsKeyValue(bb:flatbuffers.ByteBuffer, obj?:KeyValue):KeyValue { + bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); + return (obj || new KeyValue()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +key():string|null +key(optionalEncoding:flatbuffers.Encoding):string|Uint8Array|null +key(optionalEncoding?:any):string|Uint8Array|null { + const offset = this.bb!.__offset(this.bb_pos, 4); + return offset ? this.bb!.__string(this.bb_pos + offset, optionalEncoding) : null; +} + +value():string|null +value(optionalEncoding:flatbuffers.Encoding):string|Uint8Array|null +value(optionalEncoding?:any):string|Uint8Array|null { + const offset = this.bb!.__offset(this.bb_pos, 6); + return offset ? this.bb!.__string(this.bb_pos + offset, optionalEncoding) : null; +} + +static startKeyValue(builder:flatbuffers.Builder) { + builder.startObject(2); +} + +static addKey(builder:flatbuffers.Builder, keyOffset:flatbuffers.Offset) { + builder.addFieldOffset(0, keyOffset, 0); +} + +static addValue(builder:flatbuffers.Builder, valueOffset:flatbuffers.Offset) { + builder.addFieldOffset(1, valueOffset, 0); +} + +static endKeyValue(builder:flatbuffers.Builder):flatbuffers.Offset { + const offset = builder.endObject(); + return offset; +} + +static createKeyValue(builder:flatbuffers.Builder, keyOffset:flatbuffers.Offset, valueOffset:flatbuffers.Offset):flatbuffers.Offset { + KeyValue.startKeyValue(builder); + KeyValue.addKey(builder, keyOffset); + KeyValue.addValue(builder, valueOffset); + return KeyValue.endKeyValue(builder); +} +} diff --git a/js/src/fb/large-binary.ts b/js/src/fb/large-binary.ts new file mode 100644 index 0000000000000..50003b26cd6fd --- /dev/null +++ b/js/src/fb/large-binary.ts @@ -0,0 +1,40 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +import * as flatbuffers from 'flatbuffers'; + +/** + * Same as Binary, but with 64-bit offsets, allowing to represent + * extremely large data values. + */ +export class LargeBinary { + bb: flatbuffers.ByteBuffer|null = null; + bb_pos = 0; +__init(i:number, bb:flatbuffers.ByteBuffer):LargeBinary { + this.bb_pos = i; + this.bb = bb; + return this; +} + +static getRootAsLargeBinary(bb:flatbuffers.ByteBuffer, obj?:LargeBinary):LargeBinary { + return (obj || new LargeBinary()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +static getSizePrefixedRootAsLargeBinary(bb:flatbuffers.ByteBuffer, obj?:LargeBinary):LargeBinary { + bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); + return (obj || new LargeBinary()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +static startLargeBinary(builder:flatbuffers.Builder) { + builder.startObject(0); +} + +static endLargeBinary(builder:flatbuffers.Builder):flatbuffers.Offset { + const offset = builder.endObject(); + return offset; +} + +static createLargeBinary(builder:flatbuffers.Builder):flatbuffers.Offset { + LargeBinary.startLargeBinary(builder); + return LargeBinary.endLargeBinary(builder); +} +} diff --git a/js/src/fb/large-list.ts b/js/src/fb/large-list.ts new file mode 100644 index 0000000000000..2a39d585a9ff6 --- /dev/null +++ b/js/src/fb/large-list.ts @@ -0,0 +1,40 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +import * as flatbuffers from 'flatbuffers'; + +/** + * Same as List, but with 64-bit offsets, allowing to represent + * extremely large data values. + */ +export class LargeList { + bb: flatbuffers.ByteBuffer|null = null; + bb_pos = 0; +__init(i:number, bb:flatbuffers.ByteBuffer):LargeList { + this.bb_pos = i; + this.bb = bb; + return this; +} + +static getRootAsLargeList(bb:flatbuffers.ByteBuffer, obj?:LargeList):LargeList { + return (obj || new LargeList()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +static getSizePrefixedRootAsLargeList(bb:flatbuffers.ByteBuffer, obj?:LargeList):LargeList { + bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); + return (obj || new LargeList()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +static startLargeList(builder:flatbuffers.Builder) { + builder.startObject(0); +} + +static endLargeList(builder:flatbuffers.Builder):flatbuffers.Offset { + const offset = builder.endObject(); + return offset; +} + +static createLargeList(builder:flatbuffers.Builder):flatbuffers.Offset { + LargeList.startLargeList(builder); + return LargeList.endLargeList(builder); +} +} diff --git a/js/src/fb/large-utf8.ts b/js/src/fb/large-utf8.ts new file mode 100644 index 0000000000000..974acc18c010f --- /dev/null +++ b/js/src/fb/large-utf8.ts @@ -0,0 +1,40 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +import * as flatbuffers from 'flatbuffers'; + +/** + * Same as Utf8, but with 64-bit offsets, allowing to represent + * extremely large data values. + */ +export class LargeUtf8 { + bb: flatbuffers.ByteBuffer|null = null; + bb_pos = 0; +__init(i:number, bb:flatbuffers.ByteBuffer):LargeUtf8 { + this.bb_pos = i; + this.bb = bb; + return this; +} + +static getRootAsLargeUtf8(bb:flatbuffers.ByteBuffer, obj?:LargeUtf8):LargeUtf8 { + return (obj || new LargeUtf8()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +static getSizePrefixedRootAsLargeUtf8(bb:flatbuffers.ByteBuffer, obj?:LargeUtf8):LargeUtf8 { + bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); + return (obj || new LargeUtf8()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +static startLargeUtf8(builder:flatbuffers.Builder) { + builder.startObject(0); +} + +static endLargeUtf8(builder:flatbuffers.Builder):flatbuffers.Offset { + const offset = builder.endObject(); + return offset; +} + +static createLargeUtf8(builder:flatbuffers.Builder):flatbuffers.Offset { + LargeUtf8.startLargeUtf8(builder); + return LargeUtf8.endLargeUtf8(builder); +} +} diff --git a/js/src/fb/list.ts b/js/src/fb/list.ts new file mode 100644 index 0000000000000..872d0b14def4f --- /dev/null +++ b/js/src/fb/list.ts @@ -0,0 +1,36 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +import * as flatbuffers from 'flatbuffers'; + +export class List { + bb: flatbuffers.ByteBuffer|null = null; + bb_pos = 0; +__init(i:number, bb:flatbuffers.ByteBuffer):List { + this.bb_pos = i; + this.bb = bb; + return this; +} + +static getRootAsList(bb:flatbuffers.ByteBuffer, obj?:List):List { + return (obj || new List()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +static getSizePrefixedRootAsList(bb:flatbuffers.ByteBuffer, obj?:List):List { + bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); + return (obj || new List()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +static startList(builder:flatbuffers.Builder) { + builder.startObject(0); +} + +static endList(builder:flatbuffers.Builder):flatbuffers.Offset { + const offset = builder.endObject(); + return offset; +} + +static createList(builder:flatbuffers.Builder):flatbuffers.Offset { + List.startList(builder); + return List.endList(builder); +} +} diff --git a/js/src/fb/map.ts b/js/src/fb/map.ts new file mode 100644 index 0000000000000..b47b51dbbb1ca --- /dev/null +++ b/js/src/fb/map.ts @@ -0,0 +1,76 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +import * as flatbuffers from 'flatbuffers'; + +/** + * A Map is a logical nested type that is represented as + * + * List> + * + * In this layout, the keys and values are each respectively contiguous. We do + * not constrain the key and value types, so the application is responsible + * for ensuring that the keys are hashable and unique. Whether the keys are sorted + * may be set in the metadata for this field. + * + * In a field with Map type, the field has a child Struct field, which then + * has two children: key type and the second the value type. The names of the + * child fields may be respectively "entries", "key", and "value", but this is + * not enforced. + * + * Map + * ```text + * - child[0] entries: Struct + * - child[0] key: K + * - child[1] value: V + * ``` + * Neither the "entries" field nor the "key" field may be nullable. + * + * The metadata is structured so that Arrow systems without special handling + * for Map can make Map an alias for List. The "layout" attribute for the Map + * field must have the same contents as a List. + */ +export class Map { + bb: flatbuffers.ByteBuffer|null = null; + bb_pos = 0; +__init(i:number, bb:flatbuffers.ByteBuffer):Map { + this.bb_pos = i; + this.bb = bb; + return this; +} + +static getRootAsMap(bb:flatbuffers.ByteBuffer, obj?:Map):Map { + return (obj || new Map()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +static getSizePrefixedRootAsMap(bb:flatbuffers.ByteBuffer, obj?:Map):Map { + bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); + return (obj || new Map()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +/** + * Set to true if the keys within each value are sorted + */ +keysSorted():boolean { + const offset = this.bb!.__offset(this.bb_pos, 4); + return offset ? !!this.bb!.readInt8(this.bb_pos + offset) : false; +} + +static startMap(builder:flatbuffers.Builder) { + builder.startObject(1); +} + +static addKeysSorted(builder:flatbuffers.Builder, keysSorted:boolean) { + builder.addFieldInt8(0, +keysSorted, +false); +} + +static endMap(builder:flatbuffers.Builder):flatbuffers.Offset { + const offset = builder.endObject(); + return offset; +} + +static createMap(builder:flatbuffers.Builder, keysSorted:boolean):flatbuffers.Offset { + Map.startMap(builder); + Map.addKeysSorted(builder, keysSorted); + return Map.endMap(builder); +} +} diff --git a/js/src/fb/message-header.ts b/js/src/fb/message-header.ts new file mode 100644 index 0000000000000..9357013d78e6e --- /dev/null +++ b/js/src/fb/message-header.ts @@ -0,0 +1,59 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +import { DictionaryBatch } from './dictionary-batch.js'; +import { RecordBatch } from './record-batch.js'; +import { Schema } from './schema.js'; +import { SparseTensor } from './sparse-tensor.js'; +import { Tensor } from './tensor.js'; + + +/** + * ---------------------------------------------------------------------- + * The root Message type + * This union enables us to easily send different message types without + * redundant storage, and in the future we can easily add new message types. + * + * Arrow implementations do not need to implement all of the message types, + * which may include experimental metadata types. For maximum compatibility, + * it is best to send data using RecordBatch + */ +export enum MessageHeader{ + NONE = 0, + Schema = 1, + DictionaryBatch = 2, + RecordBatch = 3, + Tensor = 4, + SparseTensor = 5 +} + +export function unionToMessageHeader( + type: MessageHeader, + accessor: (obj:DictionaryBatch|RecordBatch|Schema|SparseTensor|Tensor) => DictionaryBatch|RecordBatch|Schema|SparseTensor|Tensor|null +): DictionaryBatch|RecordBatch|Schema|SparseTensor|Tensor|null { + switch(MessageHeader[type]) { + case 'NONE': return null; + case 'Schema': return accessor(new Schema())! as Schema; + case 'DictionaryBatch': return accessor(new DictionaryBatch())! as DictionaryBatch; + case 'RecordBatch': return accessor(new RecordBatch())! as RecordBatch; + case 'Tensor': return accessor(new Tensor())! as Tensor; + case 'SparseTensor': return accessor(new SparseTensor())! as SparseTensor; + default: return null; + } +} + +export function unionListToMessageHeader( + type: MessageHeader, + accessor: (index: number, obj:DictionaryBatch|RecordBatch|Schema|SparseTensor|Tensor) => DictionaryBatch|RecordBatch|Schema|SparseTensor|Tensor|null, + index: number +): DictionaryBatch|RecordBatch|Schema|SparseTensor|Tensor|null { + switch(MessageHeader[type]) { + case 'NONE': return null; + case 'Schema': return accessor(index, new Schema())! as Schema; + case 'DictionaryBatch': return accessor(index, new DictionaryBatch())! as DictionaryBatch; + case 'RecordBatch': return accessor(index, new RecordBatch())! as RecordBatch; + case 'Tensor': return accessor(index, new Tensor())! as Tensor; + case 'SparseTensor': return accessor(index, new SparseTensor())! as SparseTensor; + default: return null; + } +} + diff --git a/js/src/fb/message.ts b/js/src/fb/message.ts new file mode 100644 index 0000000000000..7b13c1d387f53 --- /dev/null +++ b/js/src/fb/message.ts @@ -0,0 +1,117 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +import * as flatbuffers from 'flatbuffers'; + +import { KeyValue } from './key-value.js'; +import { MessageHeader } from './message-header.js'; +import { MetadataVersion } from './metadata-version.js'; + + +export class Message { + bb: flatbuffers.ByteBuffer|null = null; + bb_pos = 0; +__init(i:number, bb:flatbuffers.ByteBuffer):Message { + this.bb_pos = i; + this.bb = bb; + return this; +} + +static getRootAsMessage(bb:flatbuffers.ByteBuffer, obj?:Message):Message { + return (obj || new Message()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +static getSizePrefixedRootAsMessage(bb:flatbuffers.ByteBuffer, obj?:Message):Message { + bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); + return (obj || new Message()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +version():MetadataVersion { + const offset = this.bb!.__offset(this.bb_pos, 4); + return offset ? this.bb!.readInt16(this.bb_pos + offset) : MetadataVersion.V1; +} + +headerType():MessageHeader { + const offset = this.bb!.__offset(this.bb_pos, 6); + return offset ? this.bb!.readUint8(this.bb_pos + offset) : MessageHeader.NONE; +} + +// @ts-ignore +header(obj:any):any|null { + const offset = this.bb!.__offset(this.bb_pos, 8); + return offset ? this.bb!.__union(obj, this.bb_pos + offset) : null; +} + +bodyLength():flatbuffers.Long { + const offset = this.bb!.__offset(this.bb_pos, 10); + return offset ? this.bb!.readInt64(this.bb_pos + offset) : this.bb!.createLong(0, 0); +} + +customMetadata(index: number, obj?:KeyValue):KeyValue|null { + const offset = this.bb!.__offset(this.bb_pos, 12); + return offset ? (obj || new KeyValue()).__init(this.bb!.__indirect(this.bb!.__vector(this.bb_pos + offset) + index * 4), this.bb!) : null; +} + +customMetadataLength():number { + const offset = this.bb!.__offset(this.bb_pos, 12); + return offset ? this.bb!.__vector_len(this.bb_pos + offset) : 0; +} + +static startMessage(builder:flatbuffers.Builder) { + builder.startObject(5); +} + +static addVersion(builder:flatbuffers.Builder, version:MetadataVersion) { + builder.addFieldInt16(0, version, MetadataVersion.V1); +} + +static addHeaderType(builder:flatbuffers.Builder, headerType:MessageHeader) { + builder.addFieldInt8(1, headerType, MessageHeader.NONE); +} + +static addHeader(builder:flatbuffers.Builder, headerOffset:flatbuffers.Offset) { + builder.addFieldOffset(2, headerOffset, 0); +} + +static addBodyLength(builder:flatbuffers.Builder, bodyLength:flatbuffers.Long) { + builder.addFieldInt64(3, bodyLength, builder.createLong(0, 0)); +} + +static addCustomMetadata(builder:flatbuffers.Builder, customMetadataOffset:flatbuffers.Offset) { + builder.addFieldOffset(4, customMetadataOffset, 0); +} + +static createCustomMetadataVector(builder:flatbuffers.Builder, data:flatbuffers.Offset[]):flatbuffers.Offset { + builder.startVector(4, data.length, 4); + for (let i = data.length - 1; i >= 0; i--) { + builder.addOffset(data[i]!); + } + return builder.endVector(); +} + +static startCustomMetadataVector(builder:flatbuffers.Builder, numElems:number) { + builder.startVector(4, numElems, 4); +} + +static endMessage(builder:flatbuffers.Builder):flatbuffers.Offset { + const offset = builder.endObject(); + return offset; +} + +static finishMessageBuffer(builder:flatbuffers.Builder, offset:flatbuffers.Offset) { + builder.finish(offset); +} + +static finishSizePrefixedMessageBuffer(builder:flatbuffers.Builder, offset:flatbuffers.Offset) { + builder.finish(offset, undefined, true); +} + +static createMessage(builder:flatbuffers.Builder, version:MetadataVersion, headerType:MessageHeader, headerOffset:flatbuffers.Offset, bodyLength:flatbuffers.Long, customMetadataOffset:flatbuffers.Offset):flatbuffers.Offset { + Message.startMessage(builder); + Message.addVersion(builder, version); + Message.addHeaderType(builder, headerType); + Message.addHeader(builder, headerOffset); + Message.addBodyLength(builder, bodyLength); + Message.addCustomMetadata(builder, customMetadataOffset); + return Message.endMessage(builder); +} +} diff --git a/js/src/fb/metadata-version.ts b/js/src/fb/metadata-version.ts new file mode 100644 index 0000000000000..18d4bc80f0f28 --- /dev/null +++ b/js/src/fb/metadata-version.ts @@ -0,0 +1,42 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +/** + * Logical types, vector layouts, and schemas + * Format Version History. + * Version 1.0 - Forward and backwards compatibility guaranteed. + * Version 1.1 - Add Decimal256 (No format release). + * Version 1.2 (Pending)- Add Interval MONTH_DAY_NANO + */ +export enum MetadataVersion{ + /** + * 0.1.0 (October 2016). + */ + V1 = 0, + + /** + * 0.2.0 (February 2017). Non-backwards compatible with V1. + */ + V2 = 1, + + /** + * 0.3.0 -> 0.7.1 (May - December 2017). Non-backwards compatible with V2. + */ + V3 = 2, + + /** + * >= 0.8.0 (December 2017). Non-backwards compatible with V3. + */ + V4 = 3, + + /** + * >= 1.0.0 (July 2020. Backwards compatible with V4 (V5 readers can read V4 + * metadata and IPC messages). Implementations are recommended to provide a + * V4 compatibility mode with V5 format changes disabled. + * + * Incompatible changes between V4 and V5: + * - Union buffer layout has changed. In V5, Unions don't have a validity + * bitmap buffer. + */ + V5 = 4 +} + diff --git a/js/src/fb/null.ts b/js/src/fb/null.ts new file mode 100644 index 0000000000000..82403dd6ef4b8 --- /dev/null +++ b/js/src/fb/null.ts @@ -0,0 +1,39 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +import * as flatbuffers from 'flatbuffers'; + +/** + * These are stored in the flatbuffer in the Type union below + */ +export class Null { + bb: flatbuffers.ByteBuffer|null = null; + bb_pos = 0; +__init(i:number, bb:flatbuffers.ByteBuffer):Null { + this.bb_pos = i; + this.bb = bb; + return this; +} + +static getRootAsNull(bb:flatbuffers.ByteBuffer, obj?:Null):Null { + return (obj || new Null()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +static getSizePrefixedRootAsNull(bb:flatbuffers.ByteBuffer, obj?:Null):Null { + bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); + return (obj || new Null()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +static startNull(builder:flatbuffers.Builder) { + builder.startObject(0); +} + +static endNull(builder:flatbuffers.Builder):flatbuffers.Offset { + const offset = builder.endObject(); + return offset; +} + +static createNull(builder:flatbuffers.Builder):flatbuffers.Offset { + Null.startNull(builder); + return Null.endNull(builder); +} +} diff --git a/js/src/fb/precision.ts b/js/src/fb/precision.ts new file mode 100644 index 0000000000000..76d3f8d191822 --- /dev/null +++ b/js/src/fb/precision.ts @@ -0,0 +1,8 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +export enum Precision{ + HALF = 0, + SINGLE = 1, + DOUBLE = 2 +} + diff --git a/js/src/fb/record-batch.ts b/js/src/fb/record-batch.ts new file mode 100644 index 0000000000000..e2666cd161f25 --- /dev/null +++ b/js/src/fb/record-batch.ts @@ -0,0 +1,114 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +import * as flatbuffers from 'flatbuffers'; + +import { BodyCompression } from './body-compression.js'; +import { Buffer } from './buffer.js'; +import { FieldNode } from './field-node.js'; + + +/** + * A data header describing the shared memory layout of a "record" or "row" + * batch. Some systems call this a "row batch" internally and others a "record + * batch". + */ +export class RecordBatch { + bb: flatbuffers.ByteBuffer|null = null; + bb_pos = 0; +__init(i:number, bb:flatbuffers.ByteBuffer):RecordBatch { + this.bb_pos = i; + this.bb = bb; + return this; +} + +static getRootAsRecordBatch(bb:flatbuffers.ByteBuffer, obj?:RecordBatch):RecordBatch { + return (obj || new RecordBatch()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +static getSizePrefixedRootAsRecordBatch(bb:flatbuffers.ByteBuffer, obj?:RecordBatch):RecordBatch { + bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); + return (obj || new RecordBatch()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +/** + * number of records / rows. The arrays in the batch should all have this + * length + */ +length():flatbuffers.Long { + const offset = this.bb!.__offset(this.bb_pos, 4); + return offset ? this.bb!.readInt64(this.bb_pos + offset) : this.bb!.createLong(0, 0); +} + +/** + * Nodes correspond to the pre-ordered flattened logical schema + */ +nodes(index: number, obj?:FieldNode):FieldNode|null { + const offset = this.bb!.__offset(this.bb_pos, 6); + return offset ? (obj || new FieldNode()).__init(this.bb!.__vector(this.bb_pos + offset) + index * 16, this.bb!) : null; +} + +nodesLength():number { + const offset = this.bb!.__offset(this.bb_pos, 6); + return offset ? this.bb!.__vector_len(this.bb_pos + offset) : 0; +} + +/** + * Buffers correspond to the pre-ordered flattened buffer tree + * + * The number of buffers appended to this list depends on the schema. For + * example, most primitive arrays will have 2 buffers, 1 for the validity + * bitmap and 1 for the values. For struct arrays, there will only be a + * single buffer for the validity (nulls) bitmap + */ +buffers(index: number, obj?:Buffer):Buffer|null { + const offset = this.bb!.__offset(this.bb_pos, 8); + return offset ? (obj || new Buffer()).__init(this.bb!.__vector(this.bb_pos + offset) + index * 16, this.bb!) : null; +} + +buffersLength():number { + const offset = this.bb!.__offset(this.bb_pos, 8); + return offset ? this.bb!.__vector_len(this.bb_pos + offset) : 0; +} + +/** + * Optional compression of the message body + */ +compression(obj?:BodyCompression):BodyCompression|null { + const offset = this.bb!.__offset(this.bb_pos, 10); + return offset ? (obj || new BodyCompression()).__init(this.bb!.__indirect(this.bb_pos + offset), this.bb!) : null; +} + +static startRecordBatch(builder:flatbuffers.Builder) { + builder.startObject(4); +} + +static addLength(builder:flatbuffers.Builder, length:flatbuffers.Long) { + builder.addFieldInt64(0, length, builder.createLong(0, 0)); +} + +static addNodes(builder:flatbuffers.Builder, nodesOffset:flatbuffers.Offset) { + builder.addFieldOffset(1, nodesOffset, 0); +} + +static startNodesVector(builder:flatbuffers.Builder, numElems:number) { + builder.startVector(16, numElems, 8); +} + +static addBuffers(builder:flatbuffers.Builder, buffersOffset:flatbuffers.Offset) { + builder.addFieldOffset(2, buffersOffset, 0); +} + +static startBuffersVector(builder:flatbuffers.Builder, numElems:number) { + builder.startVector(16, numElems, 8); +} + +static addCompression(builder:flatbuffers.Builder, compressionOffset:flatbuffers.Offset) { + builder.addFieldOffset(3, compressionOffset, 0); +} + +static endRecordBatch(builder:flatbuffers.Builder):flatbuffers.Offset { + const offset = builder.endObject(); + return offset; +} + +} diff --git a/js/src/fb/schema.ts b/js/src/fb/schema.ts new file mode 100644 index 0000000000000..c832c2b652a9d --- /dev/null +++ b/js/src/fb/schema.ts @@ -0,0 +1,152 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +import * as flatbuffers from 'flatbuffers'; + +import { Endianness } from './endianness.js'; +import { Field } from './field.js'; +import { KeyValue } from './key-value.js'; + + +/** + * ---------------------------------------------------------------------- + * A Schema describes the columns in a row batch + */ +export class Schema { + bb: flatbuffers.ByteBuffer|null = null; + bb_pos = 0; +__init(i:number, bb:flatbuffers.ByteBuffer):Schema { + this.bb_pos = i; + this.bb = bb; + return this; +} + +static getRootAsSchema(bb:flatbuffers.ByteBuffer, obj?:Schema):Schema { + return (obj || new Schema()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +static getSizePrefixedRootAsSchema(bb:flatbuffers.ByteBuffer, obj?:Schema):Schema { + bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); + return (obj || new Schema()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +/** + * endianness of the buffer + * it is Little Endian by default + * if endianness doesn't match the underlying system then the vectors need to be converted + */ +endianness():Endianness { + const offset = this.bb!.__offset(this.bb_pos, 4); + return offset ? this.bb!.readInt16(this.bb_pos + offset) : Endianness.Little; +} + +fields(index: number, obj?:Field):Field|null { + const offset = this.bb!.__offset(this.bb_pos, 6); + return offset ? (obj || new Field()).__init(this.bb!.__indirect(this.bb!.__vector(this.bb_pos + offset) + index * 4), this.bb!) : null; +} + +fieldsLength():number { + const offset = this.bb!.__offset(this.bb_pos, 6); + return offset ? this.bb!.__vector_len(this.bb_pos + offset) : 0; +} + +customMetadata(index: number, obj?:KeyValue):KeyValue|null { + const offset = this.bb!.__offset(this.bb_pos, 8); + return offset ? (obj || new KeyValue()).__init(this.bb!.__indirect(this.bb!.__vector(this.bb_pos + offset) + index * 4), this.bb!) : null; +} + +customMetadataLength():number { + const offset = this.bb!.__offset(this.bb_pos, 8); + return offset ? this.bb!.__vector_len(this.bb_pos + offset) : 0; +} + +/** + * Features used in the stream/file. + */ +features(index: number):flatbuffers.Long|null { + const offset = this.bb!.__offset(this.bb_pos, 10); + return offset ? this.bb!.readInt64(this.bb!.__vector(this.bb_pos + offset) + index * 8) : this.bb!.createLong(0, 0); +} + +featuresLength():number { + const offset = this.bb!.__offset(this.bb_pos, 10); + return offset ? this.bb!.__vector_len(this.bb_pos + offset) : 0; +} + +static startSchema(builder:flatbuffers.Builder) { + builder.startObject(4); +} + +static addEndianness(builder:flatbuffers.Builder, endianness:Endianness) { + builder.addFieldInt16(0, endianness, Endianness.Little); +} + +static addFields(builder:flatbuffers.Builder, fieldsOffset:flatbuffers.Offset) { + builder.addFieldOffset(1, fieldsOffset, 0); +} + +static createFieldsVector(builder:flatbuffers.Builder, data:flatbuffers.Offset[]):flatbuffers.Offset { + builder.startVector(4, data.length, 4); + for (let i = data.length - 1; i >= 0; i--) { + builder.addOffset(data[i]!); + } + return builder.endVector(); +} + +static startFieldsVector(builder:flatbuffers.Builder, numElems:number) { + builder.startVector(4, numElems, 4); +} + +static addCustomMetadata(builder:flatbuffers.Builder, customMetadataOffset:flatbuffers.Offset) { + builder.addFieldOffset(2, customMetadataOffset, 0); +} + +static createCustomMetadataVector(builder:flatbuffers.Builder, data:flatbuffers.Offset[]):flatbuffers.Offset { + builder.startVector(4, data.length, 4); + for (let i = data.length - 1; i >= 0; i--) { + builder.addOffset(data[i]!); + } + return builder.endVector(); +} + +static startCustomMetadataVector(builder:flatbuffers.Builder, numElems:number) { + builder.startVector(4, numElems, 4); +} + +static addFeatures(builder:flatbuffers.Builder, featuresOffset:flatbuffers.Offset) { + builder.addFieldOffset(3, featuresOffset, 0); +} + +static createFeaturesVector(builder:flatbuffers.Builder, data:flatbuffers.Long[]):flatbuffers.Offset { + builder.startVector(8, data.length, 8); + for (let i = data.length - 1; i >= 0; i--) { + builder.addInt64(data[i]!); + } + return builder.endVector(); +} + +static startFeaturesVector(builder:flatbuffers.Builder, numElems:number) { + builder.startVector(8, numElems, 8); +} + +static endSchema(builder:flatbuffers.Builder):flatbuffers.Offset { + const offset = builder.endObject(); + return offset; +} + +static finishSchemaBuffer(builder:flatbuffers.Builder, offset:flatbuffers.Offset) { + builder.finish(offset); +} + +static finishSizePrefixedSchemaBuffer(builder:flatbuffers.Builder, offset:flatbuffers.Offset) { + builder.finish(offset, undefined, true); +} + +static createSchema(builder:flatbuffers.Builder, endianness:Endianness, fieldsOffset:flatbuffers.Offset, customMetadataOffset:flatbuffers.Offset, featuresOffset:flatbuffers.Offset):flatbuffers.Offset { + Schema.startSchema(builder); + Schema.addEndianness(builder, endianness); + Schema.addFields(builder, fieldsOffset); + Schema.addCustomMetadata(builder, customMetadataOffset); + Schema.addFeatures(builder, featuresOffset); + return Schema.endSchema(builder); +} +} diff --git a/js/src/fb/sparse-matrix-compressed-axis.ts b/js/src/fb/sparse-matrix-compressed-axis.ts new file mode 100644 index 0000000000000..e1dc200c66a8a --- /dev/null +++ b/js/src/fb/sparse-matrix-compressed-axis.ts @@ -0,0 +1,7 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +export enum SparseMatrixCompressedAxis{ + Row = 0, + Column = 1 +} + diff --git a/js/src/fb/sparse-matrix-index-c-s-x.ts b/js/src/fb/sparse-matrix-index-c-s-x.ts new file mode 100644 index 0000000000000..77510ee052784 --- /dev/null +++ b/js/src/fb/sparse-matrix-index-c-s-x.ts @@ -0,0 +1,134 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +import * as flatbuffers from 'flatbuffers'; + +import { Buffer } from './buffer.js'; +import { Int } from './int.js'; +import { SparseMatrixCompressedAxis } from './sparse-matrix-compressed-axis.js'; + + +/** + * Compressed Sparse format, that is matrix-specific. + */ +export class SparseMatrixIndexCSX { + bb: flatbuffers.ByteBuffer|null = null; + bb_pos = 0; +__init(i:number, bb:flatbuffers.ByteBuffer):SparseMatrixIndexCSX { + this.bb_pos = i; + this.bb = bb; + return this; +} + +static getRootAsSparseMatrixIndexCSX(bb:flatbuffers.ByteBuffer, obj?:SparseMatrixIndexCSX):SparseMatrixIndexCSX { + return (obj || new SparseMatrixIndexCSX()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +static getSizePrefixedRootAsSparseMatrixIndexCSX(bb:flatbuffers.ByteBuffer, obj?:SparseMatrixIndexCSX):SparseMatrixIndexCSX { + bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); + return (obj || new SparseMatrixIndexCSX()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +/** + * Which axis, row or column, is compressed + */ +compressedAxis():SparseMatrixCompressedAxis { + const offset = this.bb!.__offset(this.bb_pos, 4); + return offset ? this.bb!.readInt16(this.bb_pos + offset) : SparseMatrixCompressedAxis.Row; +} + +/** + * The type of values in indptrBuffer + */ +indptrType(obj?:Int):Int|null { + const offset = this.bb!.__offset(this.bb_pos, 6); + return offset ? (obj || new Int()).__init(this.bb!.__indirect(this.bb_pos + offset), this.bb!) : null; +} + +/** + * indptrBuffer stores the location and size of indptr array that + * represents the range of the rows. + * The i-th row spans from `indptr[i]` to `indptr[i+1]` in the data. + * The length of this array is 1 + (the number of rows), and the type + * of index value is long. + * + * For example, let X be the following 6x4 matrix: + * ```text + * X := [[0, 1, 2, 0], + * [0, 0, 3, 0], + * [0, 4, 0, 5], + * [0, 0, 0, 0], + * [6, 0, 7, 8], + * [0, 9, 0, 0]]. + * ``` + * The array of non-zero values in X is: + * ```text + * values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9]. + * ``` + * And the indptr of X is: + * ```text + * indptr(X) = [0, 2, 3, 5, 5, 8, 10]. + * ``` + */ +indptrBuffer(obj?:Buffer):Buffer|null { + const offset = this.bb!.__offset(this.bb_pos, 8); + return offset ? (obj || new Buffer()).__init(this.bb_pos + offset, this.bb!) : null; +} + +/** + * The type of values in indicesBuffer + */ +indicesType(obj?:Int):Int|null { + const offset = this.bb!.__offset(this.bb_pos, 10); + return offset ? (obj || new Int()).__init(this.bb!.__indirect(this.bb_pos + offset), this.bb!) : null; +} + +/** + * indicesBuffer stores the location and size of the array that + * contains the column indices of the corresponding non-zero values. + * The type of index value is long. + * + * For example, the indices of the above X is: + * ```text + * indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1]. + * ``` + * Note that the indices are sorted in lexicographical order for each row. + */ +indicesBuffer(obj?:Buffer):Buffer|null { + const offset = this.bb!.__offset(this.bb_pos, 12); + return offset ? (obj || new Buffer()).__init(this.bb_pos + offset, this.bb!) : null; +} + +static startSparseMatrixIndexCSX(builder:flatbuffers.Builder) { + builder.startObject(5); +} + +static addCompressedAxis(builder:flatbuffers.Builder, compressedAxis:SparseMatrixCompressedAxis) { + builder.addFieldInt16(0, compressedAxis, SparseMatrixCompressedAxis.Row); +} + +static addIndptrType(builder:flatbuffers.Builder, indptrTypeOffset:flatbuffers.Offset) { + builder.addFieldOffset(1, indptrTypeOffset, 0); +} + +static addIndptrBuffer(builder:flatbuffers.Builder, indptrBufferOffset:flatbuffers.Offset) { + builder.addFieldStruct(2, indptrBufferOffset, 0); +} + +static addIndicesType(builder:flatbuffers.Builder, indicesTypeOffset:flatbuffers.Offset) { + builder.addFieldOffset(3, indicesTypeOffset, 0); +} + +static addIndicesBuffer(builder:flatbuffers.Builder, indicesBufferOffset:flatbuffers.Offset) { + builder.addFieldStruct(4, indicesBufferOffset, 0); +} + +static endSparseMatrixIndexCSX(builder:flatbuffers.Builder):flatbuffers.Offset { + const offset = builder.endObject(); + builder.requiredField(offset, 6) // indptrType + builder.requiredField(offset, 8) // indptrBuffer + builder.requiredField(offset, 10) // indicesType + builder.requiredField(offset, 12) // indicesBuffer + return offset; +} + +} diff --git a/js/src/fb/sparse-tensor-index-c-o-o.ts b/js/src/fb/sparse-tensor-index-c-o-o.ts new file mode 100644 index 0000000000000..0de7f1e2c2675 --- /dev/null +++ b/js/src/fb/sparse-tensor-index-c-o-o.ts @@ -0,0 +1,142 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +import * as flatbuffers from 'flatbuffers'; + +import { Buffer } from './buffer.js'; +import { Int } from './int.js'; + + +/** + * ---------------------------------------------------------------------- + * EXPERIMENTAL: Data structures for sparse tensors + * Coordinate (COO) format of sparse tensor index. + * + * COO's index list are represented as a NxM matrix, + * where N is the number of non-zero values, + * and M is the number of dimensions of a sparse tensor. + * + * indicesBuffer stores the location and size of the data of this indices + * matrix. The value type and the stride of the indices matrix is + * specified in indicesType and indicesStrides fields. + * + * For example, let X be a 2x3x4x5 tensor, and it has the following + * 6 non-zero values: + * ```text + * X[0, 1, 2, 0] := 1 + * X[1, 1, 2, 3] := 2 + * X[0, 2, 1, 0] := 3 + * X[0, 1, 3, 0] := 4 + * X[0, 1, 2, 1] := 5 + * X[1, 2, 0, 4] := 6 + * ``` + * In COO format, the index matrix of X is the following 4x6 matrix: + * ```text + * [[0, 0, 0, 0, 1, 1], + * [1, 1, 1, 2, 1, 2], + * [2, 2, 3, 1, 2, 0], + * [0, 1, 0, 0, 3, 4]] + * ``` + * When isCanonical is true, the indices is sorted in lexicographical order + * (row-major order), and it does not have duplicated entries. Otherwise, + * the indices may not be sorted, or may have duplicated entries. + */ +export class SparseTensorIndexCOO { + bb: flatbuffers.ByteBuffer|null = null; + bb_pos = 0; +__init(i:number, bb:flatbuffers.ByteBuffer):SparseTensorIndexCOO { + this.bb_pos = i; + this.bb = bb; + return this; +} + +static getRootAsSparseTensorIndexCOO(bb:flatbuffers.ByteBuffer, obj?:SparseTensorIndexCOO):SparseTensorIndexCOO { + return (obj || new SparseTensorIndexCOO()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +static getSizePrefixedRootAsSparseTensorIndexCOO(bb:flatbuffers.ByteBuffer, obj?:SparseTensorIndexCOO):SparseTensorIndexCOO { + bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); + return (obj || new SparseTensorIndexCOO()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +/** + * The type of values in indicesBuffer + */ +indicesType(obj?:Int):Int|null { + const offset = this.bb!.__offset(this.bb_pos, 4); + return offset ? (obj || new Int()).__init(this.bb!.__indirect(this.bb_pos + offset), this.bb!) : null; +} + +/** + * Non-negative byte offsets to advance one value cell along each dimension + * If omitted, default to row-major order (C-like). + */ +indicesStrides(index: number):flatbuffers.Long|null { + const offset = this.bb!.__offset(this.bb_pos, 6); + return offset ? this.bb!.readInt64(this.bb!.__vector(this.bb_pos + offset) + index * 8) : this.bb!.createLong(0, 0); +} + +indicesStridesLength():number { + const offset = this.bb!.__offset(this.bb_pos, 6); + return offset ? this.bb!.__vector_len(this.bb_pos + offset) : 0; +} + +/** + * The location and size of the indices matrix's data + */ +indicesBuffer(obj?:Buffer):Buffer|null { + const offset = this.bb!.__offset(this.bb_pos, 8); + return offset ? (obj || new Buffer()).__init(this.bb_pos + offset, this.bb!) : null; +} + +/** + * This flag is true if and only if the indices matrix is sorted in + * row-major order, and does not have duplicated entries. + * This sort order is the same as of Tensorflow's SparseTensor, + * but it is inverse order of SciPy's canonical coo_matrix + * (SciPy employs column-major order for its coo_matrix). + */ +isCanonical():boolean { + const offset = this.bb!.__offset(this.bb_pos, 10); + return offset ? !!this.bb!.readInt8(this.bb_pos + offset) : false; +} + +static startSparseTensorIndexCOO(builder:flatbuffers.Builder) { + builder.startObject(4); +} + +static addIndicesType(builder:flatbuffers.Builder, indicesTypeOffset:flatbuffers.Offset) { + builder.addFieldOffset(0, indicesTypeOffset, 0); +} + +static addIndicesStrides(builder:flatbuffers.Builder, indicesStridesOffset:flatbuffers.Offset) { + builder.addFieldOffset(1, indicesStridesOffset, 0); +} + +static createIndicesStridesVector(builder:flatbuffers.Builder, data:flatbuffers.Long[]):flatbuffers.Offset { + builder.startVector(8, data.length, 8); + for (let i = data.length - 1; i >= 0; i--) { + builder.addInt64(data[i]!); + } + return builder.endVector(); +} + +static startIndicesStridesVector(builder:flatbuffers.Builder, numElems:number) { + builder.startVector(8, numElems, 8); +} + +static addIndicesBuffer(builder:flatbuffers.Builder, indicesBufferOffset:flatbuffers.Offset) { + builder.addFieldStruct(2, indicesBufferOffset, 0); +} + +static addIsCanonical(builder:flatbuffers.Builder, isCanonical:boolean) { + builder.addFieldInt8(3, +isCanonical, +false); +} + +static endSparseTensorIndexCOO(builder:flatbuffers.Builder):flatbuffers.Offset { + const offset = builder.endObject(); + builder.requiredField(offset, 4) // indicesType + builder.requiredField(offset, 8) // indicesBuffer + return offset; +} + +} diff --git a/js/src/fb/sparse-tensor-index-c-s-f.ts b/js/src/fb/sparse-tensor-index-c-s-f.ts new file mode 100644 index 0000000000000..ad23d0af76839 --- /dev/null +++ b/js/src/fb/sparse-tensor-index-c-s-f.ts @@ -0,0 +1,207 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +import * as flatbuffers from 'flatbuffers'; + +import { Buffer } from './buffer.js'; +import { Int } from './int.js'; + + +/** + * Compressed Sparse Fiber (CSF) sparse tensor index. + */ +export class SparseTensorIndexCSF { + bb: flatbuffers.ByteBuffer|null = null; + bb_pos = 0; +__init(i:number, bb:flatbuffers.ByteBuffer):SparseTensorIndexCSF { + this.bb_pos = i; + this.bb = bb; + return this; +} + +static getRootAsSparseTensorIndexCSF(bb:flatbuffers.ByteBuffer, obj?:SparseTensorIndexCSF):SparseTensorIndexCSF { + return (obj || new SparseTensorIndexCSF()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +static getSizePrefixedRootAsSparseTensorIndexCSF(bb:flatbuffers.ByteBuffer, obj?:SparseTensorIndexCSF):SparseTensorIndexCSF { + bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); + return (obj || new SparseTensorIndexCSF()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +/** + * CSF is a generalization of compressed sparse row (CSR) index. + * See [smith2017knl](http://shaden.io/pub-files/smith2017knl.pdf) + * + * CSF index recursively compresses each dimension of a tensor into a set + * of prefix trees. Each path from a root to leaf forms one tensor + * non-zero index. CSF is implemented with two arrays of buffers and one + * arrays of integers. + * + * For example, let X be a 2x3x4x5 tensor and let it have the following + * 8 non-zero values: + * ```text + * X[0, 0, 0, 1] := 1 + * X[0, 0, 0, 2] := 2 + * X[0, 1, 0, 0] := 3 + * X[0, 1, 0, 2] := 4 + * X[0, 1, 1, 0] := 5 + * X[1, 1, 1, 0] := 6 + * X[1, 1, 1, 1] := 7 + * X[1, 1, 1, 2] := 8 + * ``` + * As a prefix tree this would be represented as: + * ```text + * 0 1 + * / \ | + * 0 1 1 + * / / \ | + * 0 0 1 1 + * /| /| | /| | + * 1 2 0 2 0 0 1 2 + * ``` + * The type of values in indptrBuffers + */ +indptrType(obj?:Int):Int|null { + const offset = this.bb!.__offset(this.bb_pos, 4); + return offset ? (obj || new Int()).__init(this.bb!.__indirect(this.bb_pos + offset), this.bb!) : null; +} + +/** + * indptrBuffers stores the sparsity structure. + * Each two consecutive dimensions in a tensor correspond to a buffer in + * indptrBuffers. A pair of consecutive values at `indptrBuffers[dim][i]` + * and `indptrBuffers[dim][i + 1]` signify a range of nodes in + * `indicesBuffers[dim + 1]` who are children of `indicesBuffers[dim][i]` node. + * + * For example, the indptrBuffers for the above X is: + * ```text + * indptrBuffer(X) = [ + * [0, 2, 3], + * [0, 1, 3, 4], + * [0, 2, 4, 5, 8] + * ]. + * ``` + */ +indptrBuffers(index: number, obj?:Buffer):Buffer|null { + const offset = this.bb!.__offset(this.bb_pos, 6); + return offset ? (obj || new Buffer()).__init(this.bb!.__vector(this.bb_pos + offset) + index * 16, this.bb!) : null; +} + +indptrBuffersLength():number { + const offset = this.bb!.__offset(this.bb_pos, 6); + return offset ? this.bb!.__vector_len(this.bb_pos + offset) : 0; +} + +/** + * The type of values in indicesBuffers + */ +indicesType(obj?:Int):Int|null { + const offset = this.bb!.__offset(this.bb_pos, 8); + return offset ? (obj || new Int()).__init(this.bb!.__indirect(this.bb_pos + offset), this.bb!) : null; +} + +/** + * indicesBuffers stores values of nodes. + * Each tensor dimension corresponds to a buffer in indicesBuffers. + * For example, the indicesBuffers for the above X is: + * ```text + * indicesBuffer(X) = [ + * [0, 1], + * [0, 1, 1], + * [0, 0, 1, 1], + * [1, 2, 0, 2, 0, 0, 1, 2] + * ]. + * ``` + */ +indicesBuffers(index: number, obj?:Buffer):Buffer|null { + const offset = this.bb!.__offset(this.bb_pos, 10); + return offset ? (obj || new Buffer()).__init(this.bb!.__vector(this.bb_pos + offset) + index * 16, this.bb!) : null; +} + +indicesBuffersLength():number { + const offset = this.bb!.__offset(this.bb_pos, 10); + return offset ? this.bb!.__vector_len(this.bb_pos + offset) : 0; +} + +/** + * axisOrder stores the sequence in which dimensions were traversed to + * produce the prefix tree. + * For example, the axisOrder for the above X is: + * ```text + * axisOrder(X) = [0, 1, 2, 3]. + * ``` + */ +axisOrder(index: number):number|null { + const offset = this.bb!.__offset(this.bb_pos, 12); + return offset ? this.bb!.readInt32(this.bb!.__vector(this.bb_pos + offset) + index * 4) : 0; +} + +axisOrderLength():number { + const offset = this.bb!.__offset(this.bb_pos, 12); + return offset ? this.bb!.__vector_len(this.bb_pos + offset) : 0; +} + +axisOrderArray():Int32Array|null { + const offset = this.bb!.__offset(this.bb_pos, 12); + return offset ? new Int32Array(this.bb!.bytes().buffer, this.bb!.bytes().byteOffset + this.bb!.__vector(this.bb_pos + offset), this.bb!.__vector_len(this.bb_pos + offset)) : null; +} + +static startSparseTensorIndexCSF(builder:flatbuffers.Builder) { + builder.startObject(5); +} + +static addIndptrType(builder:flatbuffers.Builder, indptrTypeOffset:flatbuffers.Offset) { + builder.addFieldOffset(0, indptrTypeOffset, 0); +} + +static addIndptrBuffers(builder:flatbuffers.Builder, indptrBuffersOffset:flatbuffers.Offset) { + builder.addFieldOffset(1, indptrBuffersOffset, 0); +} + +static startIndptrBuffersVector(builder:flatbuffers.Builder, numElems:number) { + builder.startVector(16, numElems, 8); +} + +static addIndicesType(builder:flatbuffers.Builder, indicesTypeOffset:flatbuffers.Offset) { + builder.addFieldOffset(2, indicesTypeOffset, 0); +} + +static addIndicesBuffers(builder:flatbuffers.Builder, indicesBuffersOffset:flatbuffers.Offset) { + builder.addFieldOffset(3, indicesBuffersOffset, 0); +} + +static startIndicesBuffersVector(builder:flatbuffers.Builder, numElems:number) { + builder.startVector(16, numElems, 8); +} + +static addAxisOrder(builder:flatbuffers.Builder, axisOrderOffset:flatbuffers.Offset) { + builder.addFieldOffset(4, axisOrderOffset, 0); +} + +static createAxisOrderVector(builder:flatbuffers.Builder, data:number[]|Int32Array):flatbuffers.Offset; +/** + * @deprecated This Uint8Array overload will be removed in the future. + */ +static createAxisOrderVector(builder:flatbuffers.Builder, data:number[]|Uint8Array):flatbuffers.Offset; +static createAxisOrderVector(builder:flatbuffers.Builder, data:number[]|Int32Array|Uint8Array):flatbuffers.Offset { + builder.startVector(4, data.length, 4); + for (let i = data.length - 1; i >= 0; i--) { + builder.addInt32(data[i]!); + } + return builder.endVector(); +} + +static startAxisOrderVector(builder:flatbuffers.Builder, numElems:number) { + builder.startVector(4, numElems, 4); +} + +static endSparseTensorIndexCSF(builder:flatbuffers.Builder):flatbuffers.Offset { + const offset = builder.endObject(); + builder.requiredField(offset, 4) // indptrType + builder.requiredField(offset, 6) // indptrBuffers + builder.requiredField(offset, 8) // indicesType + builder.requiredField(offset, 10) // indicesBuffers + builder.requiredField(offset, 12) // axisOrder + return offset; +} + +} diff --git a/js/src/fb/sparse-tensor-index.ts b/js/src/fb/sparse-tensor-index.ts new file mode 100644 index 0000000000000..a9a3553d3f93a --- /dev/null +++ b/js/src/fb/sparse-tensor-index.ts @@ -0,0 +1,41 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +import { SparseMatrixIndexCSX } from './sparse-matrix-index-c-s-x.js'; +import { SparseTensorIndexCOO } from './sparse-tensor-index-c-o-o.js'; +import { SparseTensorIndexCSF } from './sparse-tensor-index-c-s-f.js'; + + +export enum SparseTensorIndex{ + NONE = 0, + SparseTensorIndexCOO = 1, + SparseMatrixIndexCSX = 2, + SparseTensorIndexCSF = 3 +} + +export function unionToSparseTensorIndex( + type: SparseTensorIndex, + accessor: (obj:SparseMatrixIndexCSX|SparseTensorIndexCOO|SparseTensorIndexCSF) => SparseMatrixIndexCSX|SparseTensorIndexCOO|SparseTensorIndexCSF|null +): SparseMatrixIndexCSX|SparseTensorIndexCOO|SparseTensorIndexCSF|null { + switch(SparseTensorIndex[type]) { + case 'NONE': return null; + case 'SparseTensorIndexCOO': return accessor(new SparseTensorIndexCOO())! as SparseTensorIndexCOO; + case 'SparseMatrixIndexCSX': return accessor(new SparseMatrixIndexCSX())! as SparseMatrixIndexCSX; + case 'SparseTensorIndexCSF': return accessor(new SparseTensorIndexCSF())! as SparseTensorIndexCSF; + default: return null; + } +} + +export function unionListToSparseTensorIndex( + type: SparseTensorIndex, + accessor: (index: number, obj:SparseMatrixIndexCSX|SparseTensorIndexCOO|SparseTensorIndexCSF) => SparseMatrixIndexCSX|SparseTensorIndexCOO|SparseTensorIndexCSF|null, + index: number +): SparseMatrixIndexCSX|SparseTensorIndexCOO|SparseTensorIndexCSF|null { + switch(SparseTensorIndex[type]) { + case 'NONE': return null; + case 'SparseTensorIndexCOO': return accessor(index, new SparseTensorIndexCOO())! as SparseTensorIndexCOO; + case 'SparseMatrixIndexCSX': return accessor(index, new SparseMatrixIndexCSX())! as SparseMatrixIndexCSX; + case 'SparseTensorIndexCSF': return accessor(index, new SparseTensorIndexCSF())! as SparseTensorIndexCSF; + default: return null; + } +} + diff --git a/js/src/fb/sparse-tensor.ts b/js/src/fb/sparse-tensor.ts new file mode 100644 index 0000000000000..99a429e31eb8d --- /dev/null +++ b/js/src/fb/sparse-tensor.ts @@ -0,0 +1,149 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +import * as flatbuffers from 'flatbuffers'; + +import { Buffer } from './buffer.js'; +import { SparseTensorIndex } from './sparse-tensor-index.js'; +import { TensorDim } from './tensor-dim.js'; +import { Type } from './type.js'; + + +export class SparseTensor { + bb: flatbuffers.ByteBuffer|null = null; + bb_pos = 0; +__init(i:number, bb:flatbuffers.ByteBuffer):SparseTensor { + this.bb_pos = i; + this.bb = bb; + return this; +} + +static getRootAsSparseTensor(bb:flatbuffers.ByteBuffer, obj?:SparseTensor):SparseTensor { + return (obj || new SparseTensor()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +static getSizePrefixedRootAsSparseTensor(bb:flatbuffers.ByteBuffer, obj?:SparseTensor):SparseTensor { + bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); + return (obj || new SparseTensor()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +typeType():Type { + const offset = this.bb!.__offset(this.bb_pos, 4); + return offset ? this.bb!.readUint8(this.bb_pos + offset) : Type.NONE; +} + +/** + * The type of data contained in a value cell. + * Currently only fixed-width value types are supported, + * no strings or nested types. + */ +// @ts-ignore +type(obj:any):any|null { + const offset = this.bb!.__offset(this.bb_pos, 6); + return offset ? this.bb!.__union(obj, this.bb_pos + offset) : null; +} + +/** + * The dimensions of the tensor, optionally named. + */ +shape(index: number, obj?:TensorDim):TensorDim|null { + const offset = this.bb!.__offset(this.bb_pos, 8); + return offset ? (obj || new TensorDim()).__init(this.bb!.__indirect(this.bb!.__vector(this.bb_pos + offset) + index * 4), this.bb!) : null; +} + +shapeLength():number { + const offset = this.bb!.__offset(this.bb_pos, 8); + return offset ? this.bb!.__vector_len(this.bb_pos + offset) : 0; +} + +/** + * The number of non-zero values in a sparse tensor. + */ +nonZeroLength():flatbuffers.Long { + const offset = this.bb!.__offset(this.bb_pos, 10); + return offset ? this.bb!.readInt64(this.bb_pos + offset) : this.bb!.createLong(0, 0); +} + +sparseIndexType():SparseTensorIndex { + const offset = this.bb!.__offset(this.bb_pos, 12); + return offset ? this.bb!.readUint8(this.bb_pos + offset) : SparseTensorIndex.NONE; +} + +/** + * Sparse tensor index + */ +// @ts-ignore +sparseIndex(obj:any):any|null { + const offset = this.bb!.__offset(this.bb_pos, 14); + return offset ? this.bb!.__union(obj, this.bb_pos + offset) : null; +} + +/** + * The location and size of the tensor's data + */ +data(obj?:Buffer):Buffer|null { + const offset = this.bb!.__offset(this.bb_pos, 16); + return offset ? (obj || new Buffer()).__init(this.bb_pos + offset, this.bb!) : null; +} + +static startSparseTensor(builder:flatbuffers.Builder) { + builder.startObject(7); +} + +static addTypeType(builder:flatbuffers.Builder, typeType:Type) { + builder.addFieldInt8(0, typeType, Type.NONE); +} + +static addType(builder:flatbuffers.Builder, typeOffset:flatbuffers.Offset) { + builder.addFieldOffset(1, typeOffset, 0); +} + +static addShape(builder:flatbuffers.Builder, shapeOffset:flatbuffers.Offset) { + builder.addFieldOffset(2, shapeOffset, 0); +} + +static createShapeVector(builder:flatbuffers.Builder, data:flatbuffers.Offset[]):flatbuffers.Offset { + builder.startVector(4, data.length, 4); + for (let i = data.length - 1; i >= 0; i--) { + builder.addOffset(data[i]!); + } + return builder.endVector(); +} + +static startShapeVector(builder:flatbuffers.Builder, numElems:number) { + builder.startVector(4, numElems, 4); +} + +static addNonZeroLength(builder:flatbuffers.Builder, nonZeroLength:flatbuffers.Long) { + builder.addFieldInt64(3, nonZeroLength, builder.createLong(0, 0)); +} + +static addSparseIndexType(builder:flatbuffers.Builder, sparseIndexType:SparseTensorIndex) { + builder.addFieldInt8(4, sparseIndexType, SparseTensorIndex.NONE); +} + +static addSparseIndex(builder:flatbuffers.Builder, sparseIndexOffset:flatbuffers.Offset) { + builder.addFieldOffset(5, sparseIndexOffset, 0); +} + +static addData(builder:flatbuffers.Builder, dataOffset:flatbuffers.Offset) { + builder.addFieldStruct(6, dataOffset, 0); +} + +static endSparseTensor(builder:flatbuffers.Builder):flatbuffers.Offset { + const offset = builder.endObject(); + builder.requiredField(offset, 6) // type + builder.requiredField(offset, 8) // shape + builder.requiredField(offset, 14) // sparseIndex + builder.requiredField(offset, 16) // data + return offset; +} + +static finishSparseTensorBuffer(builder:flatbuffers.Builder, offset:flatbuffers.Offset) { + builder.finish(offset); +} + +static finishSizePrefixedSparseTensorBuffer(builder:flatbuffers.Builder, offset:flatbuffers.Offset) { + builder.finish(offset, undefined, true); +} + +} diff --git a/js/src/fb/struct_.ts b/js/src/fb/struct_.ts new file mode 100644 index 0000000000000..84cdf041652f5 --- /dev/null +++ b/js/src/fb/struct_.ts @@ -0,0 +1,41 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +import * as flatbuffers from 'flatbuffers'; + +/** + * A Struct_ in the flatbuffer metadata is the same as an Arrow Struct + * (according to the physical memory layout). We used Struct_ here as + * Struct is a reserved word in Flatbuffers + */ +export class Struct_ { + bb: flatbuffers.ByteBuffer|null = null; + bb_pos = 0; +__init(i:number, bb:flatbuffers.ByteBuffer):Struct_ { + this.bb_pos = i; + this.bb = bb; + return this; +} + +static getRootAsStruct_(bb:flatbuffers.ByteBuffer, obj?:Struct_):Struct_ { + return (obj || new Struct_()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +static getSizePrefixedRootAsStruct_(bb:flatbuffers.ByteBuffer, obj?:Struct_):Struct_ { + bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); + return (obj || new Struct_()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +static startStruct_(builder:flatbuffers.Builder) { + builder.startObject(0); +} + +static endStruct_(builder:flatbuffers.Builder):flatbuffers.Offset { + const offset = builder.endObject(); + return offset; +} + +static createStruct_(builder:flatbuffers.Builder):flatbuffers.Offset { + Struct_.startStruct_(builder); + return Struct_.endStruct_(builder); +} +} diff --git a/js/src/fb/tensor-dim.ts b/js/src/fb/tensor-dim.ts new file mode 100644 index 0000000000000..cef6da1752138 --- /dev/null +++ b/js/src/fb/tensor-dim.ts @@ -0,0 +1,69 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +import * as flatbuffers from 'flatbuffers'; + +/** + * ---------------------------------------------------------------------- + * Data structures for dense tensors + * Shape data for a single axis in a tensor + */ +export class TensorDim { + bb: flatbuffers.ByteBuffer|null = null; + bb_pos = 0; +__init(i:number, bb:flatbuffers.ByteBuffer):TensorDim { + this.bb_pos = i; + this.bb = bb; + return this; +} + +static getRootAsTensorDim(bb:flatbuffers.ByteBuffer, obj?:TensorDim):TensorDim { + return (obj || new TensorDim()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +static getSizePrefixedRootAsTensorDim(bb:flatbuffers.ByteBuffer, obj?:TensorDim):TensorDim { + bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); + return (obj || new TensorDim()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +/** + * Length of dimension + */ +size():flatbuffers.Long { + const offset = this.bb!.__offset(this.bb_pos, 4); + return offset ? this.bb!.readInt64(this.bb_pos + offset) : this.bb!.createLong(0, 0); +} + +/** + * Name of the dimension, optional + */ +name():string|null +name(optionalEncoding:flatbuffers.Encoding):string|Uint8Array|null +name(optionalEncoding?:any):string|Uint8Array|null { + const offset = this.bb!.__offset(this.bb_pos, 6); + return offset ? this.bb!.__string(this.bb_pos + offset, optionalEncoding) : null; +} + +static startTensorDim(builder:flatbuffers.Builder) { + builder.startObject(2); +} + +static addSize(builder:flatbuffers.Builder, size:flatbuffers.Long) { + builder.addFieldInt64(0, size, builder.createLong(0, 0)); +} + +static addName(builder:flatbuffers.Builder, nameOffset:flatbuffers.Offset) { + builder.addFieldOffset(1, nameOffset, 0); +} + +static endTensorDim(builder:flatbuffers.Builder):flatbuffers.Offset { + const offset = builder.endObject(); + return offset; +} + +static createTensorDim(builder:flatbuffers.Builder, size:flatbuffers.Long, nameOffset:flatbuffers.Offset):flatbuffers.Offset { + TensorDim.startTensorDim(builder); + TensorDim.addSize(builder, size); + TensorDim.addName(builder, nameOffset); + return TensorDim.endTensorDim(builder); +} +} diff --git a/js/src/fb/tensor.ts b/js/src/fb/tensor.ts new file mode 100644 index 0000000000000..01f4f6d88ec4e --- /dev/null +++ b/js/src/fb/tensor.ts @@ -0,0 +1,142 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +import * as flatbuffers from 'flatbuffers'; + +import { Buffer } from './buffer.js'; +import { TensorDim } from './tensor-dim.js'; +import { Type } from './type.js'; + + +export class Tensor { + bb: flatbuffers.ByteBuffer|null = null; + bb_pos = 0; +__init(i:number, bb:flatbuffers.ByteBuffer):Tensor { + this.bb_pos = i; + this.bb = bb; + return this; +} + +static getRootAsTensor(bb:flatbuffers.ByteBuffer, obj?:Tensor):Tensor { + return (obj || new Tensor()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +static getSizePrefixedRootAsTensor(bb:flatbuffers.ByteBuffer, obj?:Tensor):Tensor { + bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); + return (obj || new Tensor()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +typeType():Type { + const offset = this.bb!.__offset(this.bb_pos, 4); + return offset ? this.bb!.readUint8(this.bb_pos + offset) : Type.NONE; +} + +/** + * The type of data contained in a value cell. Currently only fixed-width + * value types are supported, no strings or nested types + */ +// @ts-ignore +type(obj:any):any|null { + const offset = this.bb!.__offset(this.bb_pos, 6); + return offset ? this.bb!.__union(obj, this.bb_pos + offset) : null; +} + +/** + * The dimensions of the tensor, optionally named + */ +shape(index: number, obj?:TensorDim):TensorDim|null { + const offset = this.bb!.__offset(this.bb_pos, 8); + return offset ? (obj || new TensorDim()).__init(this.bb!.__indirect(this.bb!.__vector(this.bb_pos + offset) + index * 4), this.bb!) : null; +} + +shapeLength():number { + const offset = this.bb!.__offset(this.bb_pos, 8); + return offset ? this.bb!.__vector_len(this.bb_pos + offset) : 0; +} + +/** + * Non-negative byte offsets to advance one value cell along each dimension + * If omitted, default to row-major order (C-like). + */ +strides(index: number):flatbuffers.Long|null { + const offset = this.bb!.__offset(this.bb_pos, 10); + return offset ? this.bb!.readInt64(this.bb!.__vector(this.bb_pos + offset) + index * 8) : this.bb!.createLong(0, 0); +} + +stridesLength():number { + const offset = this.bb!.__offset(this.bb_pos, 10); + return offset ? this.bb!.__vector_len(this.bb_pos + offset) : 0; +} + +/** + * The location and size of the tensor's data + */ +data(obj?:Buffer):Buffer|null { + const offset = this.bb!.__offset(this.bb_pos, 12); + return offset ? (obj || new Buffer()).__init(this.bb_pos + offset, this.bb!) : null; +} + +static startTensor(builder:flatbuffers.Builder) { + builder.startObject(5); +} + +static addTypeType(builder:flatbuffers.Builder, typeType:Type) { + builder.addFieldInt8(0, typeType, Type.NONE); +} + +static addType(builder:flatbuffers.Builder, typeOffset:flatbuffers.Offset) { + builder.addFieldOffset(1, typeOffset, 0); +} + +static addShape(builder:flatbuffers.Builder, shapeOffset:flatbuffers.Offset) { + builder.addFieldOffset(2, shapeOffset, 0); +} + +static createShapeVector(builder:flatbuffers.Builder, data:flatbuffers.Offset[]):flatbuffers.Offset { + builder.startVector(4, data.length, 4); + for (let i = data.length - 1; i >= 0; i--) { + builder.addOffset(data[i]!); + } + return builder.endVector(); +} + +static startShapeVector(builder:flatbuffers.Builder, numElems:number) { + builder.startVector(4, numElems, 4); +} + +static addStrides(builder:flatbuffers.Builder, stridesOffset:flatbuffers.Offset) { + builder.addFieldOffset(3, stridesOffset, 0); +} + +static createStridesVector(builder:flatbuffers.Builder, data:flatbuffers.Long[]):flatbuffers.Offset { + builder.startVector(8, data.length, 8); + for (let i = data.length - 1; i >= 0; i--) { + builder.addInt64(data[i]!); + } + return builder.endVector(); +} + +static startStridesVector(builder:flatbuffers.Builder, numElems:number) { + builder.startVector(8, numElems, 8); +} + +static addData(builder:flatbuffers.Builder, dataOffset:flatbuffers.Offset) { + builder.addFieldStruct(4, dataOffset, 0); +} + +static endTensor(builder:flatbuffers.Builder):flatbuffers.Offset { + const offset = builder.endObject(); + builder.requiredField(offset, 6) // type + builder.requiredField(offset, 8) // shape + builder.requiredField(offset, 12) // data + return offset; +} + +static finishTensorBuffer(builder:flatbuffers.Builder, offset:flatbuffers.Offset) { + builder.finish(offset); +} + +static finishSizePrefixedTensorBuffer(builder:flatbuffers.Builder, offset:flatbuffers.Offset) { + builder.finish(offset, undefined, true); +} + +} diff --git a/js/src/fb/time-unit.ts b/js/src/fb/time-unit.ts new file mode 100644 index 0000000000000..67041d58bbfa2 --- /dev/null +++ b/js/src/fb/time-unit.ts @@ -0,0 +1,9 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +export enum TimeUnit{ + SECOND = 0, + MILLISECOND = 1, + MICROSECOND = 2, + NANOSECOND = 3 +} + diff --git a/js/src/fb/time.ts b/js/src/fb/time.ts new file mode 100644 index 0000000000000..53b77d61ba600 --- /dev/null +++ b/js/src/fb/time.ts @@ -0,0 +1,75 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +import * as flatbuffers from 'flatbuffers'; + +import { TimeUnit } from './time-unit.js'; + + +/** + * Time is either a 32-bit or 64-bit signed integer type representing an + * elapsed time since midnight, stored in either of four units: seconds, + * milliseconds, microseconds or nanoseconds. + * + * The integer `bitWidth` depends on the `unit` and must be one of the following: + * * SECOND and MILLISECOND: 32 bits + * * MICROSECOND and NANOSECOND: 64 bits + * + * The allowed values are between 0 (inclusive) and 86400 (=24*60*60) seconds + * (exclusive), adjusted for the time unit (for example, up to 86400000 + * exclusive for the MILLISECOND unit). + * This definition doesn't allow for leap seconds. Time values from + * measurements with leap seconds will need to be corrected when ingesting + * into Arrow (for example by replacing the value 86400 with 86399). + */ +export class Time { + bb: flatbuffers.ByteBuffer|null = null; + bb_pos = 0; +__init(i:number, bb:flatbuffers.ByteBuffer):Time { + this.bb_pos = i; + this.bb = bb; + return this; +} + +static getRootAsTime(bb:flatbuffers.ByteBuffer, obj?:Time):Time { + return (obj || new Time()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +static getSizePrefixedRootAsTime(bb:flatbuffers.ByteBuffer, obj?:Time):Time { + bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); + return (obj || new Time()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +unit():TimeUnit { + const offset = this.bb!.__offset(this.bb_pos, 4); + return offset ? this.bb!.readInt16(this.bb_pos + offset) : TimeUnit.MILLISECOND; +} + +bitWidth():number { + const offset = this.bb!.__offset(this.bb_pos, 6); + return offset ? this.bb!.readInt32(this.bb_pos + offset) : 32; +} + +static startTime(builder:flatbuffers.Builder) { + builder.startObject(2); +} + +static addUnit(builder:flatbuffers.Builder, unit:TimeUnit) { + builder.addFieldInt16(0, unit, TimeUnit.MILLISECOND); +} + +static addBitWidth(builder:flatbuffers.Builder, bitWidth:number) { + builder.addFieldInt32(1, bitWidth, 32); +} + +static endTime(builder:flatbuffers.Builder):flatbuffers.Offset { + const offset = builder.endObject(); + return offset; +} + +static createTime(builder:flatbuffers.Builder, unit:TimeUnit, bitWidth:number):flatbuffers.Offset { + Time.startTime(builder); + Time.addUnit(builder, unit); + Time.addBitWidth(builder, bitWidth); + return Time.endTime(builder); +} +} diff --git a/js/src/fb/timestamp.ts b/js/src/fb/timestamp.ts new file mode 100644 index 0000000000000..01ec36d24b87a --- /dev/null +++ b/js/src/fb/timestamp.ts @@ -0,0 +1,180 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +import * as flatbuffers from 'flatbuffers'; + +import { TimeUnit } from './time-unit.js'; + + +/** + * Timestamp is a 64-bit signed integer representing an elapsed time since a + * fixed epoch, stored in either of four units: seconds, milliseconds, + * microseconds or nanoseconds, and is optionally annotated with a timezone. + * + * Timestamp values do not include any leap seconds (in other words, all + * days are considered 86400 seconds long). + * + * Timestamps with a non-empty timezone + * ------------------------------------ + * + * If a Timestamp column has a non-empty timezone value, its epoch is + * 1970-01-01 00:00:00 (January 1st 1970, midnight) in the *UTC* timezone + * (the Unix epoch), regardless of the Timestamp's own timezone. + * + * Therefore, timestamp values with a non-empty timezone correspond to + * physical points in time together with some additional information about + * how the data was obtained and/or how to display it (the timezone). + * + * For example, the timestamp value 0 with the timezone string "Europe/Paris" + * corresponds to "January 1st 1970, 00h00" in the UTC timezone, but the + * application may prefer to display it as "January 1st 1970, 01h00" in + * the Europe/Paris timezone (which is the same physical point in time). + * + * One consequence is that timestamp values with a non-empty timezone + * can be compared and ordered directly, since they all share the same + * well-known point of reference (the Unix epoch). + * + * Timestamps with an unset / empty timezone + * ----------------------------------------- + * + * If a Timestamp column has no timezone value, its epoch is + * 1970-01-01 00:00:00 (January 1st 1970, midnight) in an *unknown* timezone. + * + * Therefore, timestamp values without a timezone cannot be meaningfully + * interpreted as physical points in time, but only as calendar / clock + * indications ("wall clock time") in an unspecified timezone. + * + * For example, the timestamp value 0 with an empty timezone string + * corresponds to "January 1st 1970, 00h00" in an unknown timezone: there + * is not enough information to interpret it as a well-defined physical + * point in time. + * + * One consequence is that timestamp values without a timezone cannot + * be reliably compared or ordered, since they may have different points of + * reference. In particular, it is *not* possible to interpret an unset + * or empty timezone as the same as "UTC". + * + * Conversion between timezones + * ---------------------------- + * + * If a Timestamp column has a non-empty timezone, changing the timezone + * to a different non-empty value is a metadata-only operation: + * the timestamp values need not change as their point of reference remains + * the same (the Unix epoch). + * + * However, if a Timestamp column has no timezone value, changing it to a + * non-empty value requires to think about the desired semantics. + * One possibility is to assume that the original timestamp values are + * relative to the epoch of the timezone being set; timestamp values should + * then adjusted to the Unix epoch (for example, changing the timezone from + * empty to "Europe/Paris" would require converting the timestamp values + * from "Europe/Paris" to "UTC", which seems counter-intuitive but is + * nevertheless correct). + * + * Guidelines for encoding data from external libraries + * ---------------------------------------------------- + * + * Date & time libraries often have multiple different data types for temporal + * data. In order to ease interoperability between different implementations the + * Arrow project has some recommendations for encoding these types into a Timestamp + * column. + * + * An "instant" represents a physical point in time that has no relevant timezone + * (for example, astronomical data). To encode an instant, use a Timestamp with + * the timezone string set to "UTC", and make sure the Timestamp values + * are relative to the UTC epoch (January 1st 1970, midnight). + * + * A "zoned date-time" represents a physical point in time annotated with an + * informative timezone (for example, the timezone in which the data was + * recorded). To encode a zoned date-time, use a Timestamp with the timezone + * string set to the name of the timezone, and make sure the Timestamp values + * are relative to the UTC epoch (January 1st 1970, midnight). + * + * (There is some ambiguity between an instant and a zoned date-time with the + * UTC timezone. Both of these are stored the same in Arrow. Typically, + * this distinction does not matter. If it does, then an application should + * use custom metadata or an extension type to distinguish between the two cases.) + * + * An "offset date-time" represents a physical point in time combined with an + * explicit offset from UTC. To encode an offset date-time, use a Timestamp + * with the timezone string set to the numeric timezone offset string + * (e.g. "+03:00"), and make sure the Timestamp values are relative to + * the UTC epoch (January 1st 1970, midnight). + * + * A "naive date-time" (also called "local date-time" in some libraries) + * represents a wall clock time combined with a calendar date, but with + * no indication of how to map this information to a physical point in time. + * Naive date-times must be handled with care because of this missing + * information, and also because daylight saving time (DST) may make + * some values ambiguous or non-existent. A naive date-time may be + * stored as a struct with Date and Time fields. However, it may also be + * encoded into a Timestamp column with an empty timezone. The timestamp + * values should be computed "as if" the timezone of the date-time values + * was UTC; for example, the naive date-time "January 1st 1970, 00h00" would + * be encoded as timestamp value 0. + */ +export class Timestamp { + bb: flatbuffers.ByteBuffer|null = null; + bb_pos = 0; +__init(i:number, bb:flatbuffers.ByteBuffer):Timestamp { + this.bb_pos = i; + this.bb = bb; + return this; +} + +static getRootAsTimestamp(bb:flatbuffers.ByteBuffer, obj?:Timestamp):Timestamp { + return (obj || new Timestamp()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +static getSizePrefixedRootAsTimestamp(bb:flatbuffers.ByteBuffer, obj?:Timestamp):Timestamp { + bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); + return (obj || new Timestamp()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +unit():TimeUnit { + const offset = this.bb!.__offset(this.bb_pos, 4); + return offset ? this.bb!.readInt16(this.bb_pos + offset) : TimeUnit.SECOND; +} + +/** + * The timezone is an optional string indicating the name of a timezone, + * one of: + * + * * As used in the Olson timezone database (the "tz database" or + * "tzdata"), such as "America/New_York". + * * An absolute timezone offset of the form "+XX:XX" or "-XX:XX", + * such as "+07:30". + * + * Whether a timezone string is present indicates different semantics about + * the data (see above). + */ +timezone():string|null +timezone(optionalEncoding:flatbuffers.Encoding):string|Uint8Array|null +timezone(optionalEncoding?:any):string|Uint8Array|null { + const offset = this.bb!.__offset(this.bb_pos, 6); + return offset ? this.bb!.__string(this.bb_pos + offset, optionalEncoding) : null; +} + +static startTimestamp(builder:flatbuffers.Builder) { + builder.startObject(2); +} + +static addUnit(builder:flatbuffers.Builder, unit:TimeUnit) { + builder.addFieldInt16(0, unit, TimeUnit.SECOND); +} + +static addTimezone(builder:flatbuffers.Builder, timezoneOffset:flatbuffers.Offset) { + builder.addFieldOffset(1, timezoneOffset, 0); +} + +static endTimestamp(builder:flatbuffers.Builder):flatbuffers.Offset { + const offset = builder.endObject(); + return offset; +} + +static createTimestamp(builder:flatbuffers.Builder, unit:TimeUnit, timezoneOffset:flatbuffers.Offset):flatbuffers.Offset { + Timestamp.startTimestamp(builder); + Timestamp.addUnit(builder, unit); + Timestamp.addTimezone(builder, timezoneOffset); + return Timestamp.endTimestamp(builder); +} +} diff --git a/js/src/fb/type.ts b/js/src/fb/type.ts new file mode 100644 index 0000000000000..c4691fb81ae52 --- /dev/null +++ b/js/src/fb/type.ts @@ -0,0 +1,118 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +import { Binary } from './binary.js'; +import { Bool } from './bool.js'; +import { Date } from './date.js'; +import { Decimal } from './decimal.js'; +import { Duration } from './duration.js'; +import { FixedSizeBinary } from './fixed-size-binary.js'; +import { FixedSizeList } from './fixed-size-list.js'; +import { FloatingPoint } from './floating-point.js'; +import { Int } from './int.js'; +import { Interval } from './interval.js'; +import { LargeBinary } from './large-binary.js'; +import { LargeList } from './large-list.js'; +import { LargeUtf8 } from './large-utf8.js'; +import { List } from './list.js'; +import { Map } from './map.js'; +import { Null } from './null.js'; +import { Struct_ } from './struct_.js'; +import { Time } from './time.js'; +import { Timestamp } from './timestamp.js'; +import { Union } from './union.js'; +import { Utf8 } from './utf8.js'; + + +/** + * ---------------------------------------------------------------------- + * Top-level Type value, enabling extensible type-specific metadata. We can + * add new logical types to Type without breaking backwards compatibility + */ +export enum Type{ + NONE = 0, + Null = 1, + Int = 2, + FloatingPoint = 3, + Binary = 4, + Utf8 = 5, + Bool = 6, + Decimal = 7, + Date = 8, + Time = 9, + Timestamp = 10, + Interval = 11, + List = 12, + Struct_ = 13, + Union = 14, + FixedSizeBinary = 15, + FixedSizeList = 16, + Map = 17, + Duration = 18, + LargeBinary = 19, + LargeUtf8 = 20, + LargeList = 21 +} + +export function unionToType( + type: Type, + accessor: (obj:Binary|Bool|Date|Decimal|Duration|FixedSizeBinary|FixedSizeList|FloatingPoint|Int|Interval|LargeBinary|LargeList|LargeUtf8|List|Map|Null|Struct_|Time|Timestamp|Union|Utf8) => Binary|Bool|Date|Decimal|Duration|FixedSizeBinary|FixedSizeList|FloatingPoint|Int|Interval|LargeBinary|LargeList|LargeUtf8|List|Map|Null|Struct_|Time|Timestamp|Union|Utf8|null +): Binary|Bool|Date|Decimal|Duration|FixedSizeBinary|FixedSizeList|FloatingPoint|Int|Interval|LargeBinary|LargeList|LargeUtf8|List|Map|Null|Struct_|Time|Timestamp|Union|Utf8|null { + switch(Type[type]) { + case 'NONE': return null; + case 'Null': return accessor(new Null())! as Null; + case 'Int': return accessor(new Int())! as Int; + case 'FloatingPoint': return accessor(new FloatingPoint())! as FloatingPoint; + case 'Binary': return accessor(new Binary())! as Binary; + case 'Utf8': return accessor(new Utf8())! as Utf8; + case 'Bool': return accessor(new Bool())! as Bool; + case 'Decimal': return accessor(new Decimal())! as Decimal; + case 'Date': return accessor(new Date())! as Date; + case 'Time': return accessor(new Time())! as Time; + case 'Timestamp': return accessor(new Timestamp())! as Timestamp; + case 'Interval': return accessor(new Interval())! as Interval; + case 'List': return accessor(new List())! as List; + case 'Struct_': return accessor(new Struct_())! as Struct_; + case 'Union': return accessor(new Union())! as Union; + case 'FixedSizeBinary': return accessor(new FixedSizeBinary())! as FixedSizeBinary; + case 'FixedSizeList': return accessor(new FixedSizeList())! as FixedSizeList; + case 'Map': return accessor(new Map())! as Map; + case 'Duration': return accessor(new Duration())! as Duration; + case 'LargeBinary': return accessor(new LargeBinary())! as LargeBinary; + case 'LargeUtf8': return accessor(new LargeUtf8())! as LargeUtf8; + case 'LargeList': return accessor(new LargeList())! as LargeList; + default: return null; + } +} + +export function unionListToType( + type: Type, + accessor: (index: number, obj:Binary|Bool|Date|Decimal|Duration|FixedSizeBinary|FixedSizeList|FloatingPoint|Int|Interval|LargeBinary|LargeList|LargeUtf8|List|Map|Null|Struct_|Time|Timestamp|Union|Utf8) => Binary|Bool|Date|Decimal|Duration|FixedSizeBinary|FixedSizeList|FloatingPoint|Int|Interval|LargeBinary|LargeList|LargeUtf8|List|Map|Null|Struct_|Time|Timestamp|Union|Utf8|null, + index: number +): Binary|Bool|Date|Decimal|Duration|FixedSizeBinary|FixedSizeList|FloatingPoint|Int|Interval|LargeBinary|LargeList|LargeUtf8|List|Map|Null|Struct_|Time|Timestamp|Union|Utf8|null { + switch(Type[type]) { + case 'NONE': return null; + case 'Null': return accessor(index, new Null())! as Null; + case 'Int': return accessor(index, new Int())! as Int; + case 'FloatingPoint': return accessor(index, new FloatingPoint())! as FloatingPoint; + case 'Binary': return accessor(index, new Binary())! as Binary; + case 'Utf8': return accessor(index, new Utf8())! as Utf8; + case 'Bool': return accessor(index, new Bool())! as Bool; + case 'Decimal': return accessor(index, new Decimal())! as Decimal; + case 'Date': return accessor(index, new Date())! as Date; + case 'Time': return accessor(index, new Time())! as Time; + case 'Timestamp': return accessor(index, new Timestamp())! as Timestamp; + case 'Interval': return accessor(index, new Interval())! as Interval; + case 'List': return accessor(index, new List())! as List; + case 'Struct_': return accessor(index, new Struct_())! as Struct_; + case 'Union': return accessor(index, new Union())! as Union; + case 'FixedSizeBinary': return accessor(index, new FixedSizeBinary())! as FixedSizeBinary; + case 'FixedSizeList': return accessor(index, new FixedSizeList())! as FixedSizeList; + case 'Map': return accessor(index, new Map())! as Map; + case 'Duration': return accessor(index, new Duration())! as Duration; + case 'LargeBinary': return accessor(index, new LargeBinary())! as LargeBinary; + case 'LargeUtf8': return accessor(index, new LargeUtf8())! as LargeUtf8; + case 'LargeList': return accessor(index, new LargeList())! as LargeList; + default: return null; + } +} + diff --git a/js/src/fb/union-mode.ts b/js/src/fb/union-mode.ts new file mode 100644 index 0000000000000..04c1d448bf0d0 --- /dev/null +++ b/js/src/fb/union-mode.ts @@ -0,0 +1,7 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +export enum UnionMode{ + Sparse = 0, + Dense = 1 +} + diff --git a/js/src/fb/union.ts b/js/src/fb/union.ts new file mode 100644 index 0000000000000..4403f46194b19 --- /dev/null +++ b/js/src/fb/union.ts @@ -0,0 +1,92 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +import * as flatbuffers from 'flatbuffers'; + +import { UnionMode } from './union-mode.js'; + + +/** + * A union is a complex type with children in Field + * By default ids in the type vector refer to the offsets in the children + * optionally typeIds provides an indirection between the child offset and the type id + * for each child `typeIds[offset]` is the id used in the type vector + */ +export class Union { + bb: flatbuffers.ByteBuffer|null = null; + bb_pos = 0; +__init(i:number, bb:flatbuffers.ByteBuffer):Union { + this.bb_pos = i; + this.bb = bb; + return this; +} + +static getRootAsUnion(bb:flatbuffers.ByteBuffer, obj?:Union):Union { + return (obj || new Union()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +static getSizePrefixedRootAsUnion(bb:flatbuffers.ByteBuffer, obj?:Union):Union { + bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); + return (obj || new Union()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +mode():UnionMode { + const offset = this.bb!.__offset(this.bb_pos, 4); + return offset ? this.bb!.readInt16(this.bb_pos + offset) : UnionMode.Sparse; +} + +typeIds(index: number):number|null { + const offset = this.bb!.__offset(this.bb_pos, 6); + return offset ? this.bb!.readInt32(this.bb!.__vector(this.bb_pos + offset) + index * 4) : 0; +} + +typeIdsLength():number { + const offset = this.bb!.__offset(this.bb_pos, 6); + return offset ? this.bb!.__vector_len(this.bb_pos + offset) : 0; +} + +typeIdsArray():Int32Array|null { + const offset = this.bb!.__offset(this.bb_pos, 6); + return offset ? new Int32Array(this.bb!.bytes().buffer, this.bb!.bytes().byteOffset + this.bb!.__vector(this.bb_pos + offset), this.bb!.__vector_len(this.bb_pos + offset)) : null; +} + +static startUnion(builder:flatbuffers.Builder) { + builder.startObject(2); +} + +static addMode(builder:flatbuffers.Builder, mode:UnionMode) { + builder.addFieldInt16(0, mode, UnionMode.Sparse); +} + +static addTypeIds(builder:flatbuffers.Builder, typeIdsOffset:flatbuffers.Offset) { + builder.addFieldOffset(1, typeIdsOffset, 0); +} + +static createTypeIdsVector(builder:flatbuffers.Builder, data:number[]|Int32Array):flatbuffers.Offset; +/** + * @deprecated This Uint8Array overload will be removed in the future. + */ +static createTypeIdsVector(builder:flatbuffers.Builder, data:number[]|Uint8Array):flatbuffers.Offset; +static createTypeIdsVector(builder:flatbuffers.Builder, data:number[]|Int32Array|Uint8Array):flatbuffers.Offset { + builder.startVector(4, data.length, 4); + for (let i = data.length - 1; i >= 0; i--) { + builder.addInt32(data[i]!); + } + return builder.endVector(); +} + +static startTypeIdsVector(builder:flatbuffers.Builder, numElems:number) { + builder.startVector(4, numElems, 4); +} + +static endUnion(builder:flatbuffers.Builder):flatbuffers.Offset { + const offset = builder.endObject(); + return offset; +} + +static createUnion(builder:flatbuffers.Builder, mode:UnionMode, typeIdsOffset:flatbuffers.Offset):flatbuffers.Offset { + Union.startUnion(builder); + Union.addMode(builder, mode); + Union.addTypeIds(builder, typeIdsOffset); + return Union.endUnion(builder); +} +} diff --git a/js/src/fb/utf8.ts b/js/src/fb/utf8.ts new file mode 100644 index 0000000000000..87e68c090ed29 --- /dev/null +++ b/js/src/fb/utf8.ts @@ -0,0 +1,39 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +import * as flatbuffers from 'flatbuffers'; + +/** + * Unicode with UTF-8 encoding + */ +export class Utf8 { + bb: flatbuffers.ByteBuffer|null = null; + bb_pos = 0; +__init(i:number, bb:flatbuffers.ByteBuffer):Utf8 { + this.bb_pos = i; + this.bb = bb; + return this; +} + +static getRootAsUtf8(bb:flatbuffers.ByteBuffer, obj?:Utf8):Utf8 { + return (obj || new Utf8()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +static getSizePrefixedRootAsUtf8(bb:flatbuffers.ByteBuffer, obj?:Utf8):Utf8 { + bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH); + return (obj || new Utf8()).__init(bb.readInt32(bb.position()) + bb.position(), bb); +} + +static startUtf8(builder:flatbuffers.Builder) { + builder.startObject(0); +} + +static endUtf8(builder:flatbuffers.Builder):flatbuffers.Offset { + const offset = builder.endObject(); + return offset; +} + +static createUtf8(builder:flatbuffers.Builder):flatbuffers.Offset { + Utf8.startUtf8(builder); + return Utf8.endUtf8(builder); +} +} diff --git a/js/src/interfaces.ts b/js/src/interfaces.ts index 43977ca7af5df..3f85e16e0337d 100644 --- a/js/src/interfaces.ts +++ b/js/src/interfaces.ts @@ -15,13 +15,29 @@ // specific language governing permissions and limitations // under the License. -import { Data } from './data'; -import { Type } from './enum'; -import * as type from './type'; -import { DataType } from './type'; -import * as vecs from './vector/index'; -import * as builders from './builder/index'; -import { BuilderOptions } from './builder/index'; +import * as type from './type.js'; +import type { Type } from './enum.js'; +import type { DataType } from './type.js'; +import type { Builder } from './builder.js'; +import type { BuilderOptions } from './builder.js'; +import type { BoolBuilder } from './builder/bool.js'; +import type { NullBuilder } from './builder/null.js'; +import type { DateBuilder, DateDayBuilder, DateMillisecondBuilder } from './builder/date.js'; +import type { DecimalBuilder } from './builder/decimal.js'; +import type { DictionaryBuilder } from './builder/dictionary.js'; +import type { FixedSizeBinaryBuilder } from './builder/fixedsizebinary.js'; +import type { FloatBuilder, Float16Builder, Float32Builder, Float64Builder } from './builder/float.js'; +import type { IntBuilder, Int8Builder, Int16Builder, Int32Builder, Int64Builder, Uint8Builder, Uint16Builder, Uint32Builder, Uint64Builder } from './builder/int.js'; +import type { TimeBuilder, TimeSecondBuilder, TimeMillisecondBuilder, TimeMicrosecondBuilder, TimeNanosecondBuilder } from './builder/time.js'; +import type { TimestampBuilder, TimestampSecondBuilder, TimestampMillisecondBuilder, TimestampMicrosecondBuilder, TimestampNanosecondBuilder } from './builder/timestamp.js'; +import type { IntervalBuilder, IntervalDayTimeBuilder, IntervalYearMonthBuilder } from './builder/interval.js'; +import type { Utf8Builder } from './builder/utf8.js'; +import type { BinaryBuilder } from './builder/binary.js'; +import type { ListBuilder } from './builder/list.js'; +import type { FixedSizeListBuilder } from './builder/fixedsizelist.js'; +import type { MapBuilder } from './builder/map.js'; +import type { StructBuilder } from './builder/struct.js'; +import type { UnionBuilder, SparseUnionBuilder, DenseUnionBuilder } from './builder/union.js'; /** @ignore */ type FloatArray = Float32Array | Float64Array; /** @ignore */ type IntArray = Int8Array | Int16Array | Int32Array; @@ -81,23 +97,14 @@ export interface BigIntArrayConstructor { from(arrayLike: ArrayLike, mapfn: (v: U, k: number) => bigint, thisArg?: any): T; } -/** @ignore */ -export type VectorCtorArgs< - T extends VectorType, - R extends DataType = any, - TArgs extends any[] = any[], - TCtor extends new (data: Data, ...args: TArgs) => T = - new (data: Data, ...args: TArgs) => T -> = TCtor extends new (data: Data, ...args: infer TArgs) => T ? TArgs : never; - /** @ignore */ export type BuilderCtorArgs< T extends BuilderType, R extends DataType = any, TArgs extends any[] = any[], TCtor extends new (type: R, ...args: TArgs) => T = - new (type: R, ...args: TArgs) => T -> = TCtor extends new (type: R, ...args: infer TArgs) => T ? TArgs : never; + new (type: R, ...args: TArgs) => T + > = TCtor extends new (type: R, ...args: infer TArgs) => T ? TArgs : never; /** * Obtain the constructor function of an instance type @@ -106,69 +113,46 @@ export type BuilderCtorArgs< export type ConstructorType< T, TCtor extends new (...args: any[]) => T = - new (...args: any[]) => T -> = TCtor extends new (...args: any[]) => T ? TCtor : never; - -/** @ignore */ -export type VectorCtorType< - T extends VectorType, - R extends DataType = any, - TCtor extends new (type: R, data?: Data[], offsets?: Uint32Array) => T = - new (type: R, data?: Data[], offsets?: Uint32Array) => T -> = TCtor extends new (type: R, data?: Data[], offsets?: Uint32Array) => T ? TCtor : never; + new (...args: any[]) => T + > = TCtor extends new (...args: any[]) => T ? TCtor : never; /** @ignore */ export type BuilderCtorType< T extends BuilderType, R extends DataType = any, TCtor extends new (options: BuilderOptions) => T = - new (options: BuilderOptions) => T -> = TCtor extends new (options: BuilderOptions) => T ? TCtor : never; - -/** @ignore */ -export type VectorType = - T extends Type ? TypeToVector : - T extends DataType ? DataTypeToVector : - vecs.BaseVector - ; + new (options: BuilderOptions) => T + > = TCtor extends new (options: BuilderOptions) => T ? TCtor : never; /** @ignore */ export type BuilderType = - T extends Type ? TypeToBuilder : - T extends DataType ? DataTypeToBuilder : - builders.Builder - ; - -/** @ignore */ -export type VectorCtor = - T extends VectorType ? VectorCtorType> : - T extends Type ? VectorCtorType> : - T extends DataType ? VectorCtorType> : - VectorCtorType> + T extends Type ? TypeToBuilder : + T extends DataType ? DataTypeToBuilder : + Builder ; /** @ignore */ export type BuilderCtor = - T extends Type ? BuilderCtorType> : - T extends DataType ? BuilderCtorType> : - BuilderCtorType + T extends Type ? BuilderCtorType> : + T extends DataType ? BuilderCtorType> : + BuilderCtorType ; /** @ignore */ -export type DataTypeCtor = - T extends DataType ? ConstructorType : - T extends VectorType ? ConstructorType : - T extends Type ? ConstructorType> : - never +export type DataTypeCtor = + T extends DataType ? ConstructorType : + T extends Type ? ConstructorType> : + never ; /** @ignore */ -export type TypedArrayDataType | BigIntArray> = +export type TypedArrayDataType = T extends Int8Array ? type.Int8 : T extends Int16Array ? type.Int16 : T extends Int32Array ? type.Int32 : T extends BigInt64Array ? type.Int64 : T extends Uint8Array ? type.Uint8 : + T extends Uint8ClampedArray ? type.Uint8 : T extends Uint16Array ? type.Uint16 : T extends Uint32Array ? type.Uint32 : T extends BigUint64Array ? type.Uint64 : @@ -177,241 +161,161 @@ export type TypedArrayDataType never; /** @ignore */ -type TypeToVector = { - [key: number ]: vecs.Vector ; - [Type.Null ]: vecs.NullVector ; - [Type.Bool ]: vecs.BoolVector ; - [Type.Int8 ]: vecs.Int8Vector ; - [Type.Int16 ]: vecs.Int16Vector ; - [Type.Int32 ]: vecs.Int32Vector ; - [Type.Int64 ]: vecs.Int64Vector ; - [Type.Uint8 ]: vecs.Uint8Vector ; - [Type.Uint16 ]: vecs.Uint16Vector ; - [Type.Uint32 ]: vecs.Uint32Vector ; - [Type.Uint64 ]: vecs.Uint64Vector ; - [Type.Int ]: vecs.IntVector ; - [Type.Float16 ]: vecs.Float16Vector ; - [Type.Float32 ]: vecs.Float32Vector ; - [Type.Float64 ]: vecs.Float64Vector ; - [Type.Float ]: vecs.FloatVector ; - [Type.Utf8 ]: vecs.Utf8Vector ; - [Type.Binary ]: vecs.BinaryVector ; - [Type.FixedSizeBinary ]: vecs.FixedSizeBinaryVector ; - [Type.Date ]: vecs.DateVector ; - [Type.DateDay ]: vecs.DateDayVector ; - [Type.DateMillisecond ]: vecs.DateMillisecondVector ; - [Type.Timestamp ]: vecs.TimestampVector ; - [Type.TimestampSecond ]: vecs.TimestampSecondVector ; - [Type.TimestampMillisecond ]: vecs.TimestampMillisecondVector ; - [Type.TimestampMicrosecond ]: vecs.TimestampMicrosecondVector ; - [Type.TimestampNanosecond ]: vecs.TimestampNanosecondVector ; - [Type.Time ]: vecs.TimeVector ; - [Type.TimeSecond ]: vecs.TimeSecondVector ; - [Type.TimeMillisecond ]: vecs.TimeMillisecondVector ; - [Type.TimeMicrosecond ]: vecs.TimeMicrosecondVector ; - [Type.TimeNanosecond ]: vecs.TimeNanosecondVector ; - [Type.Decimal ]: vecs.DecimalVector ; - [Type.Union ]: vecs.UnionVector ; - [Type.DenseUnion ]: vecs.DenseUnionVector ; - [Type.SparseUnion ]: vecs.SparseUnionVector ; - [Type.Interval ]: vecs.IntervalVector ; - [Type.IntervalDayTime ]: vecs.IntervalDayTimeVector ; - [Type.IntervalYearMonth ]: vecs.IntervalYearMonthVector ; - [Type.Map ]: vecs.MapVector ; - [Type.List ]: vecs.ListVector ; - [Type.Struct ]: vecs.StructVector ; - [Type.Dictionary ]: vecs.DictionaryVector ; - [Type.FixedSizeList ]: vecs.FixedSizeListVector ; -}[T]; +export type JavaScriptArrayDataType = + T extends readonly (null | undefined)[] ? type.Null : + T extends readonly (null | undefined | boolean)[] ? type.Bool : + T extends readonly (null | undefined | string)[] ? type.Dictionary : + T extends readonly (null | undefined | Date)[] ? type.Date_ : + T extends readonly (null | undefined | bigint)[] ? type.Int64 : + T extends readonly (null | undefined | number)[] ? type.Float64 : + never; /** @ignore */ -type DataTypeToVector = { - [key: number ]: vecs.Vector ; - [Type.Null ]: T extends type.Null ? vecs.NullVector : never ; - [Type.Bool ]: T extends type.Bool ? vecs.BoolVector : never ; - [Type.Int8 ]: T extends type.Int8 ? vecs.Int8Vector : never ; - [Type.Int16 ]: T extends type.Int16 ? vecs.Int16Vector : never ; - [Type.Int32 ]: T extends type.Int32 ? vecs.Int32Vector : never ; - [Type.Int64 ]: T extends type.Int64 ? vecs.Int64Vector : never ; - [Type.Uint8 ]: T extends type.Uint8 ? vecs.Uint8Vector : never ; - [Type.Uint16 ]: T extends type.Uint16 ? vecs.Uint16Vector : never ; - [Type.Uint32 ]: T extends type.Uint32 ? vecs.Uint32Vector : never ; - [Type.Uint64 ]: T extends type.Uint64 ? vecs.Uint64Vector : never ; - [Type.Int ]: T extends type.Int ? vecs.IntVector : never ; - [Type.Float16 ]: T extends type.Float16 ? vecs.Float16Vector : never ; - [Type.Float32 ]: T extends type.Float32 ? vecs.Float32Vector : never ; - [Type.Float64 ]: T extends type.Float64 ? vecs.Float64Vector : never ; - [Type.Float ]: T extends type.Float ? vecs.FloatVector : never ; - [Type.Utf8 ]: T extends type.Utf8 ? vecs.Utf8Vector : never ; - [Type.Binary ]: T extends type.Binary ? vecs.BinaryVector : never ; - [Type.FixedSizeBinary ]: T extends type.FixedSizeBinary ? vecs.FixedSizeBinaryVector : never ; - [Type.Date ]: T extends type.Date_ ? vecs.DateVector : never ; - [Type.DateDay ]: T extends type.DateDay ? vecs.DateDayVector : never ; - [Type.DateMillisecond ]: T extends type.DateMillisecond ? vecs.DateMillisecondVector : never ; - [Type.Timestamp ]: T extends type.Timestamp ? vecs.TimestampVector : never ; - [Type.TimestampSecond ]: T extends type.TimestampSecond ? vecs.TimestampSecondVector : never ; - [Type.TimestampMillisecond ]: T extends type.TimestampMillisecond ? vecs.TimestampMillisecondVector : never ; - [Type.TimestampMicrosecond ]: T extends type.TimestampMicrosecond ? vecs.TimestampMicrosecondVector : never ; - [Type.TimestampNanosecond ]: T extends type.TimestampNanosecond ? vecs.TimestampNanosecondVector : never ; - [Type.Time ]: T extends type.Time ? vecs.TimeVector : never ; - [Type.TimeSecond ]: T extends type.TimeSecond ? vecs.TimeSecondVector : never ; - [Type.TimeMillisecond ]: T extends type.TimeMillisecond ? vecs.TimeMillisecondVector : never ; - [Type.TimeMicrosecond ]: T extends type.TimeMicrosecond ? vecs.TimeMicrosecondVector : never ; - [Type.TimeNanosecond ]: T extends type.TimeNanosecond ? vecs.TimeNanosecondVector : never ; - [Type.Decimal ]: T extends type.Decimal ? vecs.DecimalVector : never ; - [Type.Union ]: T extends type.Union ? vecs.UnionVector : never ; - [Type.DenseUnion ]: T extends type.DenseUnion ? vecs.DenseUnionVector : never ; - [Type.SparseUnion ]: T extends type.SparseUnion ? vecs.SparseUnionVector : never ; - [Type.Interval ]: T extends type.Interval ? vecs.IntervalVector : never ; - [Type.IntervalDayTime ]: T extends type.IntervalDayTime ? vecs.IntervalDayTimeVector : never ; - [Type.IntervalYearMonth ]: T extends type.IntervalYearMonth ? vecs.IntervalYearMonthVector : never ; - [Type.Map ]: T extends type.Map_ ? vecs.MapVector : never ; - [Type.List ]: T extends type.List ? vecs.ListVector : never ; - [Type.Struct ]: T extends type.Struct ? vecs.StructVector : never ; - [Type.Dictionary ]: T extends type.Dictionary ? vecs.DictionaryVector : never ; - [Type.FixedSizeList ]: T extends type.FixedSizeList ? vecs.FixedSizeListVector : never ; -}[T['TType']]; +export type ArrayDataType = + T extends TypedArray | BigIntArray ? TypedArrayDataType : + T extends readonly unknown[] ? JavaScriptArrayDataType : + never; /** @ignore */ export type TypeToDataType = { - [key: number ]: type.DataType ; - [Type.Null ]: type.Null ; - [Type.Bool ]: type.Bool ; - [Type.Int ]: type.Int ; - [Type.Int16 ]: type.Int16 ; - [Type.Int32 ]: type.Int32 ; - [Type.Int64 ]: type.Int64 ; - [Type.Uint8 ]: type.Uint8 ; - [Type.Uint16 ]: type.Uint16 ; - [Type.Uint32 ]: type.Uint32 ; - [Type.Uint64 ]: type.Uint64 ; - [Type.Int8 ]: type.Int8 ; - [Type.Float16 ]: type.Float16 ; - [Type.Float32 ]: type.Float32 ; - [Type.Float64 ]: type.Float64 ; - [Type.Float ]: type.Float ; - [Type.Utf8 ]: type.Utf8 ; - [Type.Binary ]: type.Binary ; - [Type.FixedSizeBinary ]: type.FixedSizeBinary ; - [Type.Date ]: type.Date_ ; - [Type.DateDay ]: type.DateDay ; - [Type.DateMillisecond ]: type.DateMillisecond ; - [Type.Timestamp ]: type.Timestamp ; - [Type.TimestampSecond ]: type.TimestampSecond ; - [Type.TimestampMillisecond ]: type.TimestampMillisecond ; - [Type.TimestampMicrosecond ]: type.TimestampMicrosecond ; - [Type.TimestampNanosecond ]: type.TimestampNanosecond ; - [Type.Time ]: type.Time ; - [Type.TimeSecond ]: type.TimeSecond ; - [Type.TimeMillisecond ]: type.TimeMillisecond ; - [Type.TimeMicrosecond ]: type.TimeMicrosecond ; - [Type.TimeNanosecond ]: type.TimeNanosecond ; - [Type.Decimal ]: type.Decimal ; - [Type.Union ]: type.Union ; - [Type.DenseUnion ]: type.DenseUnion ; - [Type.SparseUnion ]: type.SparseUnion ; - [Type.Interval ]: type.Interval ; - [Type.IntervalDayTime ]: type.IntervalDayTime ; - [Type.IntervalYearMonth ]: type.IntervalYearMonth ; - [Type.Map ]: type.Map_ ; - [Type.List ]: type.List ; - [Type.Struct ]: type.Struct ; - [Type.Dictionary ]: type.Dictionary ; - [Type.FixedSizeList ]: type.FixedSizeList ; + [key: number]: type.DataType; + [Type.Null]: type.Null; + [Type.Bool]: type.Bool; + [Type.Int]: type.Int; + [Type.Int16]: type.Int16; + [Type.Int32]: type.Int32; + [Type.Int64]: type.Int64; + [Type.Uint8]: type.Uint8; + [Type.Uint16]: type.Uint16; + [Type.Uint32]: type.Uint32; + [Type.Uint64]: type.Uint64; + [Type.Int8]: type.Int8; + [Type.Float16]: type.Float16; + [Type.Float32]: type.Float32; + [Type.Float64]: type.Float64; + [Type.Float]: type.Float; + [Type.Utf8]: type.Utf8; + [Type.Binary]: type.Binary; + [Type.FixedSizeBinary]: type.FixedSizeBinary; + [Type.Date]: type.Date_; + [Type.DateDay]: type.DateDay; + [Type.DateMillisecond]: type.DateMillisecond; + [Type.Timestamp]: type.Timestamp; + [Type.TimestampSecond]: type.TimestampSecond; + [Type.TimestampMillisecond]: type.TimestampMillisecond; + [Type.TimestampMicrosecond]: type.TimestampMicrosecond; + [Type.TimestampNanosecond]: type.TimestampNanosecond; + [Type.Time]: type.Time; + [Type.TimeSecond]: type.TimeSecond; + [Type.TimeMillisecond]: type.TimeMillisecond; + [Type.TimeMicrosecond]: type.TimeMicrosecond; + [Type.TimeNanosecond]: type.TimeNanosecond; + [Type.Decimal]: type.Decimal; + [Type.Union]: type.Union; + [Type.DenseUnion]: type.DenseUnion; + [Type.SparseUnion]: type.SparseUnion; + [Type.Interval]: type.Interval; + [Type.IntervalDayTime]: type.IntervalDayTime; + [Type.IntervalYearMonth]: type.IntervalYearMonth; + [Type.Map]: type.Map_; + [Type.List]: type.List; + [Type.Struct]: type.Struct; + [Type.Dictionary]: type.Dictionary; + [Type.FixedSizeList]: type.FixedSizeList; }[T]; /** @ignore */ type TypeToBuilder = { - [key: number ]: builders.Builder ; - [Type.Null ]: builders.NullBuilder ; - [Type.Bool ]: builders.BoolBuilder ; - [Type.Int8 ]: builders.Int8Builder ; - [Type.Int16 ]: builders.Int16Builder ; - [Type.Int32 ]: builders.Int32Builder ; - [Type.Int64 ]: builders.Int64Builder ; - [Type.Uint8 ]: builders.Uint8Builder ; - [Type.Uint16 ]: builders.Uint16Builder ; - [Type.Uint32 ]: builders.Uint32Builder ; - [Type.Uint64 ]: builders.Uint64Builder ; - [Type.Int ]: builders.IntBuilder ; - [Type.Float16 ]: builders.Float16Builder ; - [Type.Float32 ]: builders.Float32Builder ; - [Type.Float64 ]: builders.Float64Builder ; - [Type.Float ]: builders.FloatBuilder ; - [Type.Utf8 ]: builders.Utf8Builder ; - [Type.Binary ]: builders.BinaryBuilder ; - [Type.FixedSizeBinary ]: builders.FixedSizeBinaryBuilder ; - [Type.Date ]: builders.DateBuilder ; - [Type.DateDay ]: builders.DateDayBuilder ; - [Type.DateMillisecond ]: builders.DateMillisecondBuilder ; - [Type.Timestamp ]: builders.TimestampBuilder ; - [Type.TimestampSecond ]: builders.TimestampSecondBuilder ; - [Type.TimestampMillisecond ]: builders.TimestampMillisecondBuilder ; - [Type.TimestampMicrosecond ]: builders.TimestampMicrosecondBuilder ; - [Type.TimestampNanosecond ]: builders.TimestampNanosecondBuilder ; - [Type.Time ]: builders.TimeBuilder ; - [Type.TimeSecond ]: builders.TimeSecondBuilder ; - [Type.TimeMillisecond ]: builders.TimeMillisecondBuilder ; - [Type.TimeMicrosecond ]: builders.TimeMicrosecondBuilder ; - [Type.TimeNanosecond ]: builders.TimeNanosecondBuilder ; - [Type.Decimal ]: builders.DecimalBuilder ; - [Type.Union ]: builders.UnionBuilder ; - [Type.DenseUnion ]: builders.DenseUnionBuilder ; - [Type.SparseUnion ]: builders.SparseUnionBuilder ; - [Type.Interval ]: builders.IntervalBuilder ; - [Type.IntervalDayTime ]: builders.IntervalDayTimeBuilder ; - [Type.IntervalYearMonth ]: builders.IntervalYearMonthBuilder ; - [Type.Map ]: builders.MapBuilder ; - [Type.List ]: builders.ListBuilder ; - [Type.Struct ]: builders.StructBuilder ; - [Type.Dictionary ]: builders.DictionaryBuilder ; - [Type.FixedSizeList ]: builders.FixedSizeListBuilder ; + [key: number]: Builder; + [Type.Null]: NullBuilder; + [Type.Bool]: BoolBuilder; + [Type.Int8]: Int8Builder; + [Type.Int16]: Int16Builder; + [Type.Int32]: Int32Builder; + [Type.Int64]: Int64Builder; + [Type.Uint8]: Uint8Builder; + [Type.Uint16]: Uint16Builder; + [Type.Uint32]: Uint32Builder; + [Type.Uint64]: Uint64Builder; + [Type.Int]: IntBuilder; + [Type.Float16]: Float16Builder; + [Type.Float32]: Float32Builder; + [Type.Float64]: Float64Builder; + [Type.Float]: FloatBuilder; + [Type.Utf8]: Utf8Builder; + [Type.Binary]: BinaryBuilder; + [Type.FixedSizeBinary]: FixedSizeBinaryBuilder; + [Type.Date]: DateBuilder; + [Type.DateDay]: DateDayBuilder; + [Type.DateMillisecond]: DateMillisecondBuilder; + [Type.Timestamp]: TimestampBuilder; + [Type.TimestampSecond]: TimestampSecondBuilder; + [Type.TimestampMillisecond]: TimestampMillisecondBuilder; + [Type.TimestampMicrosecond]: TimestampMicrosecondBuilder; + [Type.TimestampNanosecond]: TimestampNanosecondBuilder; + [Type.Time]: TimeBuilder; + [Type.TimeSecond]: TimeSecondBuilder; + [Type.TimeMillisecond]: TimeMillisecondBuilder; + [Type.TimeMicrosecond]: TimeMicrosecondBuilder; + [Type.TimeNanosecond]: TimeNanosecondBuilder; + [Type.Decimal]: DecimalBuilder; + [Type.Union]: UnionBuilder; + [Type.DenseUnion]: DenseUnionBuilder; + [Type.SparseUnion]: SparseUnionBuilder; + [Type.Interval]: IntervalBuilder; + [Type.IntervalDayTime]: IntervalDayTimeBuilder; + [Type.IntervalYearMonth]: IntervalYearMonthBuilder; + [Type.Map]: MapBuilder; + [Type.List]: ListBuilder; + [Type.Struct]: StructBuilder; + [Type.Dictionary]: DictionaryBuilder; + [Type.FixedSizeList]: FixedSizeListBuilder; }[T]; /** @ignore */ type DataTypeToBuilder = { - [key: number ]: builders.Builder ; - [Type.Null ]: T extends type.Null ? builders.NullBuilder : never ; - [Type.Bool ]: T extends type.Bool ? builders.BoolBuilder : never ; - [Type.Int8 ]: T extends type.Int8 ? builders.Int8Builder : never ; - [Type.Int16 ]: T extends type.Int16 ? builders.Int16Builder : never ; - [Type.Int32 ]: T extends type.Int32 ? builders.Int32Builder : never ; - [Type.Int64 ]: T extends type.Int64 ? builders.Int64Builder : never ; - [Type.Uint8 ]: T extends type.Uint8 ? builders.Uint8Builder : never ; - [Type.Uint16 ]: T extends type.Uint16 ? builders.Uint16Builder : never ; - [Type.Uint32 ]: T extends type.Uint32 ? builders.Uint32Builder : never ; - [Type.Uint64 ]: T extends type.Uint64 ? builders.Uint64Builder : never ; - [Type.Int ]: T extends type.Int ? builders.IntBuilder : never ; - [Type.Float16 ]: T extends type.Float16 ? builders.Float16Builder : never ; - [Type.Float32 ]: T extends type.Float32 ? builders.Float32Builder : never ; - [Type.Float64 ]: T extends type.Float64 ? builders.Float64Builder : never ; - [Type.Float ]: T extends type.Float ? builders.FloatBuilder : never ; - [Type.Utf8 ]: T extends type.Utf8 ? builders.Utf8Builder : never ; - [Type.Binary ]: T extends type.Binary ? builders.BinaryBuilder : never ; - [Type.FixedSizeBinary ]: T extends type.FixedSizeBinary ? builders.FixedSizeBinaryBuilder : never ; - [Type.Date ]: T extends type.Date_ ? builders.DateBuilder : never ; - [Type.DateDay ]: T extends type.DateDay ? builders.DateDayBuilder : never ; - [Type.DateMillisecond ]: T extends type.DateMillisecond ? builders.DateMillisecondBuilder : never ; - [Type.Timestamp ]: T extends type.Timestamp ? builders.TimestampBuilder : never ; - [Type.TimestampSecond ]: T extends type.TimestampSecond ? builders.TimestampSecondBuilder : never ; - [Type.TimestampMillisecond ]: T extends type.TimestampMillisecond ? builders.TimestampMillisecondBuilder : never ; - [Type.TimestampMicrosecond ]: T extends type.TimestampMicrosecond ? builders.TimestampMicrosecondBuilder : never ; - [Type.TimestampNanosecond ]: T extends type.TimestampNanosecond ? builders.TimestampNanosecondBuilder : never ; - [Type.Time ]: T extends type.Time ? builders.TimeBuilder : never ; - [Type.TimeSecond ]: T extends type.TimeSecond ? builders.TimeSecondBuilder : never ; - [Type.TimeMillisecond ]: T extends type.TimeMillisecond ? builders.TimeMillisecondBuilder : never ; - [Type.TimeMicrosecond ]: T extends type.TimeMicrosecond ? builders.TimeMicrosecondBuilder : never ; - [Type.TimeNanosecond ]: T extends type.TimeNanosecond ? builders.TimeNanosecondBuilder : never ; - [Type.Decimal ]: T extends type.Decimal ? builders.DecimalBuilder : never ; - [Type.Union ]: T extends type.Union ? builders.UnionBuilder : never ; - [Type.DenseUnion ]: T extends type.DenseUnion ? builders.DenseUnionBuilder : never ; - [Type.SparseUnion ]: T extends type.SparseUnion ? builders.SparseUnionBuilder : never ; - [Type.Interval ]: T extends type.Interval ? builders.IntervalBuilder : never ; - [Type.IntervalDayTime ]: T extends type.IntervalDayTime ? builders.IntervalDayTimeBuilder : never ; - [Type.IntervalYearMonth ]: T extends type.IntervalYearMonth ? builders.IntervalYearMonthBuilder : never ; - [Type.Map ]: T extends type.Map_ ? builders.MapBuilder : never ; - [Type.List ]: T extends type.List ? builders.ListBuilder : never ; - [Type.Struct ]: T extends type.Struct ? builders.StructBuilder : never ; - [Type.Dictionary ]: T extends type.Dictionary ? builders.DictionaryBuilder : never ; - [Type.FixedSizeList ]: T extends type.FixedSizeList ? builders.FixedSizeListBuilder : never ; + [key: number]: Builder; + [Type.Null]: T extends type.Null ? NullBuilder : never; + [Type.Bool]: T extends type.Bool ? BoolBuilder : never; + [Type.Int8]: T extends type.Int8 ? Int8Builder : never; + [Type.Int16]: T extends type.Int16 ? Int16Builder : never; + [Type.Int32]: T extends type.Int32 ? Int32Builder : never; + [Type.Int64]: T extends type.Int64 ? Int64Builder : never; + [Type.Uint8]: T extends type.Uint8 ? Uint8Builder : never; + [Type.Uint16]: T extends type.Uint16 ? Uint16Builder : never; + [Type.Uint32]: T extends type.Uint32 ? Uint32Builder : never; + [Type.Uint64]: T extends type.Uint64 ? Uint64Builder : never; + [Type.Int]: T extends type.Int ? IntBuilder : never; + [Type.Float16]: T extends type.Float16 ? Float16Builder : never; + [Type.Float32]: T extends type.Float32 ? Float32Builder : never; + [Type.Float64]: T extends type.Float64 ? Float64Builder : never; + [Type.Float]: T extends type.Float ? FloatBuilder : never; + [Type.Utf8]: T extends type.Utf8 ? Utf8Builder : never; + [Type.Binary]: T extends type.Binary ? BinaryBuilder : never; + [Type.FixedSizeBinary]: T extends type.FixedSizeBinary ? FixedSizeBinaryBuilder : never; + [Type.Date]: T extends type.Date_ ? DateBuilder : never; + [Type.DateDay]: T extends type.DateDay ? DateDayBuilder : never; + [Type.DateMillisecond]: T extends type.DateMillisecond ? DateMillisecondBuilder : never; + [Type.Timestamp]: T extends type.Timestamp ? TimestampBuilder : never; + [Type.TimestampSecond]: T extends type.TimestampSecond ? TimestampSecondBuilder : never; + [Type.TimestampMillisecond]: T extends type.TimestampMillisecond ? TimestampMillisecondBuilder : never; + [Type.TimestampMicrosecond]: T extends type.TimestampMicrosecond ? TimestampMicrosecondBuilder : never; + [Type.TimestampNanosecond]: T extends type.TimestampNanosecond ? TimestampNanosecondBuilder : never; + [Type.Time]: T extends type.Time ? TimeBuilder : never; + [Type.TimeSecond]: T extends type.TimeSecond ? TimeSecondBuilder : never; + [Type.TimeMillisecond]: T extends type.TimeMillisecond ? TimeMillisecondBuilder : never; + [Type.TimeMicrosecond]: T extends type.TimeMicrosecond ? TimeMicrosecondBuilder : never; + [Type.TimeNanosecond]: T extends type.TimeNanosecond ? TimeNanosecondBuilder : never; + [Type.Decimal]: T extends type.Decimal ? DecimalBuilder : never; + [Type.Union]: T extends type.Union ? UnionBuilder : never; + [Type.DenseUnion]: T extends type.DenseUnion ? DenseUnionBuilder : never; + [Type.SparseUnion]: T extends type.SparseUnion ? SparseUnionBuilder : never; + [Type.Interval]: T extends type.Interval ? IntervalBuilder : never; + [Type.IntervalDayTime]: T extends type.IntervalDayTime ? IntervalDayTimeBuilder : never; + [Type.IntervalYearMonth]: T extends type.IntervalYearMonth ? IntervalYearMonthBuilder : never; + [Type.Map]: T extends type.Map_ ? MapBuilder : never; + [Type.List]: T extends type.List ? ListBuilder : never; + [Type.Struct]: T extends type.Struct ? StructBuilder : never; + [Type.Dictionary]: T extends type.Dictionary ? DictionaryBuilder : never; + [Type.FixedSizeList]: T extends type.FixedSizeList ? FixedSizeListBuilder : never; }[T['TType']]; diff --git a/js/src/io/adapters.ts b/js/src/io/adapters.ts index a83346ef74f42..69c63aaabaed2 100644 --- a/js/src/io/adapters.ts +++ b/js/src/io/adapters.ts @@ -21,11 +21,10 @@ import { ArrayBufferViewInput, toUint8ArrayIterator, toUint8ArrayAsyncIterator -} from '../util/buffer'; +} from '../util/buffer.js'; -import { ReadableDOMStreamOptions } from './interfaces'; +import { ReadableDOMStreamOptions } from './interfaces.js'; -interface ReadableStreamReadResult { done: boolean; value: T } type Uint8ArrayGenerator = Generator; type AsyncUint8ArrayGenerator = AsyncGenerator; @@ -72,7 +71,7 @@ function* fromIterable(source: Iterable | T): } // Yield so the caller can inject the read command before creating the source Iterator - ({ cmd, size } = yield null); + ({ cmd, size } = yield null); // initialize the iterator const it = toUint8ArrayIterator(source)[Symbol.iterator](); @@ -80,8 +79,8 @@ function* fromIterable(source: Iterable | T): try { do { // read the next value - ({ done, value: buffer } = isNaN(size - bufferLength) ? - it.next(undefined) : it.next(size - bufferLength)); + ({ done, value: buffer } = Number.isNaN(size - bufferLength) ? + it.next() : it.next(size - bufferLength)); // if chunk is not null or empty, push it onto the queue if (!done && buffer.byteLength > 0) { buffers.push(buffer); @@ -118,7 +117,7 @@ async function* fromAsyncIterable(source: AsyncI } // Yield so the caller can inject the read command before creating the source AsyncIterator - ({ cmd, size } = (yield null)!); + ({ cmd, size } = (yield null)!); // initialize the iterator const it = toUint8ArrayAsyncIterator(source)[Symbol.asyncIterator](); @@ -126,8 +125,8 @@ async function* fromAsyncIterable(source: AsyncI try { do { // read the next value - ({ done, value: buffer } = isNaN(size - bufferLength) - ? await it.next(undefined) + ({ done, value: buffer } = Number.isNaN(size - bufferLength) + ? await it.next() : await it.next(size - bufferLength)); // if chunk is not null or empty, push it onto the queue if (!done && buffer.byteLength > 0) { @@ -168,7 +167,7 @@ async function* fromDOMStream(source: ReadableSt } // Yield so the caller can inject the read command before we establish the ReadableStream lock - ({ cmd, size } = yield null); + ({ cmd, size } = yield null); // initialize the reader and lock the stream const it = new AdaptiveByteReader(source); @@ -176,8 +175,8 @@ async function* fromDOMStream(source: ReadableSt try { do { // read the next value - ({ done, value: buffer } = isNaN(size - bufferLength) - ? await it['read'](undefined) + ({ done, value: buffer } = Number.isNaN(size - bufferLength) + ? await it['read']() : await it['read'](size - bufferLength)); // if chunk is not null or empty, push it onto the queue if (!done && buffer.byteLength > 0) { @@ -203,92 +202,43 @@ async function* fromDOMStream(source: ReadableSt /** @ignore */ class AdaptiveByteReader { - private supportsBYOB: boolean; - private byobReader: ReadableStreamBYOBReader | null = null; - private defaultReader: ReadableStreamDefaultReader | null = null; - private reader: ReadableStreamBYOBReader | ReadableStreamDefaultReader | null; + private reader: ReadableStreamDefaultReader | null = null; constructor(private source: ReadableStream) { - try { - this.supportsBYOB = !!(this.reader = this.getBYOBReader()); - } catch (e) { - this.supportsBYOB = !(this.reader = this.getDefaultReader()); - } + this.reader = this.source['getReader'](); + // We have to catch and swallow errors here to avoid uncaught promise rejection exceptions + // that seem to be raised when we call `releaseLock()` on this reader. I'm still mystified + // about why these errors are raised, but I'm sure there's some important spec reason that + // I haven't considered. I hate to employ such an anti-pattern here, but it seems like the + // only solution in this case :/ + this.reader['closed'].catch(() => { }); } get closed(): Promise { - return this.reader ? this.reader['closed'].catch(() => {}) : Promise.resolve(); + return this.reader ? this.reader['closed'].catch(() => { }) : Promise.resolve(); } releaseLock(): void { if (this.reader) { this.reader.releaseLock(); } - this.reader = this.byobReader = this.defaultReader = null; + this.reader = null; } async cancel(reason?: any): Promise { const { reader, source } = this; - reader && (await reader['cancel'](reason).catch(() => {})); + reader && (await reader['cancel'](reason).catch(() => { })); source && (source['locked'] && this.releaseLock()); } - async read(size?: number): Promise> { + async read(size?: number): Promise> { if (size === 0) { - return { done: this.reader == null, value: new Uint8Array(0) }; - } - const result = !this.supportsBYOB || typeof size !== 'number' - ? await this.getDefaultReader().read() - : await this.readFromBYOBReader(size); - !result.done && (result.value = toUint8Array(result as ReadableStreamReadResult)); - return result as ReadableStreamReadResult; - } - - private getDefaultReader() { - if (this.byobReader) { this.releaseLock(); } - if (!this.defaultReader) { - this.defaultReader = this.source['getReader'](); - // We have to catch and swallow errors here to avoid uncaught promise rejection exceptions - // that seem to be raised when we call `releaseLock()` on this reader. I'm still mystified - // about why these errors are raised, but I'm sure there's some important spec reason that - // I haven't considered. I hate to employ such an anti-pattern here, but it seems like the - // only solution in this case :/ - this.defaultReader['closed'].catch(() => {}); - } - return (this.reader = this.defaultReader); - } - - private getBYOBReader() { - if (this.defaultReader) { this.releaseLock(); } - if (!this.byobReader) { - this.byobReader = this.source['getReader']({ mode: 'byob' }); - // We have to catch and swallow errors here to avoid uncaught promise rejection exceptions - // that seem to be raised when we call `releaseLock()` on this reader. I'm still mystified - // about why these errors are raised, but I'm sure there's some important spec reason that - // I haven't considered. I hate to employ such an anti-pattern here, but it seems like the - // only solution in this case :/ - this.byobReader['closed'].catch(() => {}); + return { done: this.reader == null, value: new Uint8Array(0) } as ReadableStreamDefaultReadValueResult; } - return (this.reader = this.byobReader); - } - - // This strategy plucked from the example in the streams spec: - // https://streams.spec.whatwg.org/#example-manual-read-bytes - private async readFromBYOBReader(size: number) { - return await readInto(this.getBYOBReader(), new ArrayBuffer(size), 0, size); - } -} - -/** @ignore */ -async function readInto(reader: ReadableStreamBYOBReader, buffer: ArrayBufferLike, offset: number, size: number): Promise> { - if (offset >= size) { - return { done: false, value: new Uint8Array(buffer, 0, size) }; - } - const { done, value } = await reader.read(new Uint8Array(buffer, offset, size - offset)); - if (((offset += value!.byteLength) < size) && !done) { - return await readInto(reader, value!.buffer, offset, size); + const result = await this.reader!.read() as ReadableStreamDefaultReadValueResult; + !result.done && (result.value = toUint8Array(result)); + return result; } - return { done, value: new Uint8Array(value!.buffer, 0, offset) }; } /** @ignore */ @@ -298,7 +248,7 @@ type Event = [EventName, (_: any) => void, Promise<[EventName, Error | null]>]; /** @ignore */ const onEvent = (stream: NodeJS.ReadableStream, event: T) => { const handler = (_: any) => resolve([event, _]); - let resolve: (value?: [T, any] | PromiseLike<[T, any]>) => void; + let resolve: (value: [T, any] | PromiseLike<[T, any]>) => void; return [event, handler, new Promise<[T, any]>( (r) => (resolve = r) && stream['once'](event, handler) )] as Event; @@ -323,7 +273,7 @@ async function* fromNodeStream(stream: NodeJS.ReadableStream): AsyncUint8ArrayGe // Yield so the caller can inject the read command before we // add the listener for the source stream's 'readable' event. - ({ cmd, size } = yield null); + ({ cmd, size } = yield null); // ignore stdin if it's a TTY if ((stream as any)['isTTY']) { @@ -346,22 +296,22 @@ async function* fromNodeStream(stream: NodeJS.ReadableStream): AsyncUint8ArrayGe if (event === 'error') { break; } if (!(done = event === 'end')) { // If the size is NaN, request to read everything in the stream's internal buffer - if (!isFinite(size - bufferLength)) { - buffer = toUint8Array(stream['read'](undefined)); + if (!Number.isFinite(size - bufferLength)) { + buffer = toUint8Array(stream['read']()); } else { buffer = toUint8Array(stream['read'](size - bufferLength)); // If the byteLength is 0, then the requested amount is more than the stream has // in its internal buffer. In this case the stream needs a "kick" to tell it to // continue emitting readable events, so request to read everything the stream // has in its internal buffer right now. - if (buffer.byteLength < (size - bufferLength)) { - buffer = toUint8Array(stream['read'](undefined)); + if ((buffer as Uint8Array).byteLength < (size - bufferLength)) { + buffer = toUint8Array(stream['read']()); } } // if chunk is not null or empty, push it onto the queue - if (buffer.byteLength > 0) { - buffers.push(buffer); - bufferLength += buffer.byteLength; + if ((buffer as Uint8Array).byteLength > 0) { + buffers.push(buffer as Uint8Array); + bufferLength += (buffer as Uint8Array).byteLength; } } // If we have enough bytes in our buffer, yield chunks until we don't @@ -378,8 +328,8 @@ async function* fromNodeStream(stream: NodeJS.ReadableStream): AsyncUint8ArrayGe return null; function cleanup(events: Event[], err?: T) { - buffer = buffers = null; - return new Promise((resolve, reject) => { + buffer = buffers = null; + return new Promise((resolve, reject) => { for (const [evt, fn] of events) { stream['off'](evt, fn); } @@ -390,7 +340,7 @@ async function* fromNodeStream(stream: NodeJS.ReadableStream): AsyncUint8ArrayGe const destroy = (stream as any)['destroy']; destroy && destroy.call(stream, err); err = undefined; - } catch (e) { err = e || err; } finally { + } catch (e) { err = e as T || err; } finally { err != null ? reject(err) : resolve(); } }); diff --git a/js/src/io/file.ts b/js/src/io/file.ts index 20b7dbf02dfcb..01d1479cfaa91 100644 --- a/js/src/io/file.ts +++ b/js/src/io/file.ts @@ -15,9 +15,9 @@ // specific language governing permissions and limitations // under the License. -import { FileHandle } from './interfaces'; -import { ByteStream, AsyncByteStream } from './stream'; -import { ArrayBufferViewInput, toUint8Array } from '../util/buffer'; +import { FileHandle } from './interfaces.js'; +import { ByteStream, AsyncByteStream } from './stream.js'; +import { ArrayBufferViewInput, toUint8Array } from '../util/buffer.js'; /** @ignore */ export class RandomAccessFile extends ByteStream { @@ -27,7 +27,7 @@ export class RandomAccessFile extends ByteStream { constructor(buffer: ArrayBufferViewInput, byteLength?: number) { super(); this.buffer = toUint8Array(buffer); - this.size = typeof byteLength === 'undefined' ? this.buffer.byteLength : byteLength; + this.size = typeof byteLength === 'undefined' ? this.buffer!.byteLength : byteLength; } public readInt32(position: number) { const { buffer, byteOffset } = this.readAt(position, 4); @@ -40,9 +40,9 @@ export class RandomAccessFile extends ByteStream { public read(nBytes?: number | null) { const { buffer, size, position } = this; if (buffer && position < size) { - if (typeof nBytes !== 'number') { nBytes = Infinity; } + if (typeof nBytes !== 'number') { nBytes = Number.POSITIVE_INFINITY; } this.position = Math.min(size, - position + Math.min(size - position, nBytes)); + position + Math.min(size - position, nBytes)); return buffer.subarray(position, this.position); } return null; @@ -59,7 +59,7 @@ export class RandomAccessFile extends ByteStream { /** @ignore */ export class AsyncRandomAccessFile extends AsyncByteStream { - public size!: number; + declare public size: number; public position = 0; public _pending?: Promise; protected _handle: FileHandle | null; @@ -88,7 +88,7 @@ export class AsyncRandomAccessFile extends AsyncByteStream { this._pending && await this._pending; const { _handle: file, size, position } = this; if (file && position < size) { - if (typeof nBytes !== 'number') { nBytes = Infinity; } + if (typeof nBytes !== 'number') { nBytes = Number.POSITIVE_INFINITY; } let pos = position, offset = 0, bytesRead = 0; const end = Math.min(size, pos + Math.min(size - pos, nBytes)); const buffer = new Uint8Array(Math.max(0, (this.position = end) - pos)); diff --git a/js/src/io/interfaces.ts b/js/src/io/interfaces.ts index 4b5641ff13a8c..d69841bcbbb91 100644 --- a/js/src/io/interfaces.ts +++ b/js/src/io/interfaces.ts @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -import streamAdapters from './adapters'; +import streamAdapters from './adapters.js'; /** @ignore */ export const ITERATOR_DONE: any = Object.freeze({ done: true, value: void (0) }); @@ -29,7 +29,7 @@ export type ReadableDOMStreamOptions = { type: 'bytes' | undefined; autoAllocate /** @ignore */ export class ArrowJSON { - constructor(private _json: ArrowJSONLike) {} + constructor(private _json: ArrowJSONLike) { } public get schema(): any { return this._json['schema']; } public get batches(): any[] { return (this._json['batches'] || []) as any[]; } public get dictionaries(): any[] { return (this._json['dictionaries'] || []) as any[]; } @@ -75,8 +75,8 @@ export abstract class ReadableInterop { public pipe(writable: R, options?: { end?: boolean }) { return this._getNodeStream().pipe(writable, options); } - public pipeTo(writable: WritableStream, options?: PipeOptions) { return this._getDOMStream().pipeTo(writable, options); } - public pipeThrough>(duplex: { writable: WritableStream; readable: R }, options?: PipeOptions) { + public pipeTo(writable: WritableStream, options?: StreamPipeOptions) { return this._getDOMStream().pipeTo(writable, options); } + public pipeThrough>(duplex: { writable: WritableStream; readable: R }, options?: StreamPipeOptions) { return this._getDOMStream().pipeThrough(duplex, options); } @@ -92,7 +92,7 @@ export abstract class ReadableInterop { } /** @ignore */ -type Resolution = { resolve: (value?: T | PromiseLike) => void; reject: (reason?: any) => void }; +type Resolution = { resolve: (value: T | PromiseLike) => void; reject: (reason?: any) => void }; /** @ignore */ export class AsyncQueue extends ReadableInterop diff --git a/js/src/io/node/builder.ts b/js/src/io/node/builder.ts index eb95795365f9d..be289f447f5da 100644 --- a/js/src/io/node/builder.ts +++ b/js/src/io/node/builder.ts @@ -16,8 +16,9 @@ // under the License. import { Duplex } from 'stream'; -import { DataType } from '../../type'; -import { Builder, BuilderOptions } from '../../builder/index'; +import { DataType } from '../../type.js'; +import { Builder, BuilderOptions } from '../../builder.js'; +import { makeBuilder } from '../../factories.js'; /** @ignore */ export interface BuilderDuplexOptions extends BuilderOptions { @@ -30,7 +31,7 @@ export interface BuilderDuplexOptions ext /** @ignore */ export function builderThroughNodeStream(options: BuilderDuplexOptions) { - return new BuilderDuplex(Builder.new(options), options); + return new BuilderDuplex(makeBuilder(options), options); } /** @ignore */ diff --git a/js/src/io/node/iterable.ts b/js/src/io/node/iterable.ts index 457bc894dadc5..6698e7fa92915 100644 --- a/js/src/io/node/iterable.ts +++ b/js/src/io/node/iterable.ts @@ -16,7 +16,7 @@ // under the License. import { Readable } from 'stream'; -import { isIterable, isAsyncIterable } from '../../util/compat'; +import { isIterable, isAsyncIterable } from '../../util/compat.js'; /** @ignore */ type ReadableOptions = import('stream').ReadableOptions; diff --git a/js/src/io/node/reader.ts b/js/src/io/node/reader.ts index a51fb0b403612..e8bbf736aa651 100644 --- a/js/src/io/node/reader.ts +++ b/js/src/io/node/reader.ts @@ -15,14 +15,14 @@ // specific language governing permissions and limitations // under the License. -import { DataType } from '../../type'; import { Duplex, DuplexOptions } from 'stream'; -import { RecordBatch } from '../../recordbatch'; -import { AsyncByteQueue } from '../../io/stream'; -import { RecordBatchReader } from '../../ipc/reader'; +import { AsyncByteQueue } from '../../io/stream.js'; +import { RecordBatchReader } from '../../ipc/reader.js'; +import { RecordBatch } from '../../recordbatch.js'; +import { TypeMap } from '../../type.js'; /** @ignore */ -export function recordBatchReaderThroughNodeStream(options?: DuplexOptions & { autoDestroy: boolean }) { +export function recordBatchReaderThroughNodeStream(options?: DuplexOptions & { autoDestroy: boolean }) { return new RecordBatchReaderDuplex(options); } @@ -30,7 +30,7 @@ export function recordBatchReaderThroughNodeStream void; /** @ignore */ -class RecordBatchReaderDuplex extends Duplex { +class RecordBatchReaderDuplex extends Duplex { private _pulling = false; private _autoDestroy = true; private _reader: RecordBatchReader | null; diff --git a/js/src/io/node/writer.ts b/js/src/io/node/writer.ts index 79d61b9a315a3..5725ef7a5d726 100644 --- a/js/src/io/node/writer.ts +++ b/js/src/io/node/writer.ts @@ -15,13 +15,13 @@ // specific language governing permissions and limitations // under the License. -import { DataType } from '../../type'; import { Duplex, DuplexOptions } from 'stream'; -import { AsyncByteStream } from '../../io/stream'; -import { RecordBatchWriter } from '../../ipc/writer'; +import { AsyncByteStream } from '../../io/stream.js'; +import { RecordBatchWriter } from '../../ipc/writer.js'; +import { TypeMap } from '../../type.js'; /** @ignore */ -export function recordBatchWriterThroughNodeStream(this: typeof RecordBatchWriter, options?: DuplexOptions & { autoDestroy: boolean }) { +export function recordBatchWriterThroughNodeStream(this: typeof RecordBatchWriter, options?: DuplexOptions & { autoDestroy: boolean }) { return new RecordBatchWriterDuplex(new this(options)); } @@ -29,7 +29,7 @@ export function recordBatchWriterThroughNodeStream void; /** @ignore */ -class RecordBatchWriterDuplex extends Duplex { +class RecordBatchWriterDuplex extends Duplex { private _pulling = false; private _reader: AsyncByteStream | null; private _writer: RecordBatchWriter | null; diff --git a/js/src/io/stream.ts b/js/src/io/stream.ts index 2384ab0b96f4c..b011d5a6dc127 100644 --- a/js/src/io/stream.ts +++ b/js/src/io/stream.ts @@ -15,16 +15,16 @@ // specific language governing permissions and limitations // under the License. -import streamAdapters from './adapters'; -import { decodeUtf8 } from '../util/utf8'; -import { ITERATOR_DONE, Readable, Writable, AsyncQueue } from './interfaces'; -import { toUint8Array, joinUint8Arrays, ArrayBufferViewInput } from '../util/buffer'; +import streamAdapters from './adapters.js'; +import { decodeUtf8 } from '../util/utf8.js'; +import { ITERATOR_DONE, Readable, Writable, AsyncQueue } from './interfaces.js'; +import { toUint8Array, joinUint8Arrays, ArrayBufferViewInput } from '../util/buffer.js'; import { isPromise, isFetchResponse, isIterable, isAsyncIterable, isReadableDOMStream, isReadableNodeStream -} from '../util/compat'; +} from '../util/compat.js'; /** @ignore */ export type WritableSink = Writable | WritableStream | NodeJS.WritableStream | null; @@ -62,7 +62,7 @@ export class AsyncByteQueue extends /** @ignore */ export class ByteStream implements IterableIterator { - private source!: ByteStreamSource; + declare private source: ByteStreamSource; constructor(source?: Iterable | ArrayBufferViewInput) { if (source) { this.source = new ByteStreamSource(streamAdapters.fromIterable(source)); @@ -78,7 +78,7 @@ export class ByteStream implements IterableIterator { /** @ignore */ export class AsyncByteStream implements Readable, AsyncIterableIterator { - private source!: AsyncByteStreamSource; + declare private source: AsyncByteStreamSource; constructor(source?: PromiseLike | Response | ReadableStream | NodeJS.ReadableStream | AsyncIterable | Iterable) { if (source instanceof AsyncByteStream) { this.source = (source as AsyncByteStream).source; @@ -115,7 +115,7 @@ type AsyncByteStreamSourceIterator = AsyncGenerator { - constructor(protected source: ByteStreamSourceIterator) {} + constructor(protected source: ByteStreamSourceIterator) { } public cancel(reason?: any) { this.return(reason); } public peek(size?: number | null): T | null { return this.next(size, 'peek').value; } public read(size?: number | null): T | null { return this.next(size, 'read').value; } @@ -129,7 +129,7 @@ class AsyncByteStreamSource implements Readable { private _closedPromise: Promise; private _closedPromiseResolve?: (value?: any) => void; - constructor (protected source: ByteStreamSourceIterator | AsyncByteStreamSourceIterator) { + constructor(protected source: ByteStreamSourceIterator | AsyncByteStreamSourceIterator) { this._closedPromise = new Promise((r) => this._closedPromiseResolve = r); } public async cancel(reason?: any) { await this.return(reason); } diff --git a/js/src/io/whatwg/builder.ts b/js/src/io/whatwg/builder.ts index c65511844b9f9..7b0ba8035987d 100644 --- a/js/src/io/whatwg/builder.ts +++ b/js/src/io/whatwg/builder.ts @@ -15,10 +15,10 @@ // specific language governing permissions and limitations // under the License. -import { DataType } from '../../type'; -import { Vector } from '../../vector'; -import { VectorType as V } from '../../interfaces'; -import { Builder, BuilderOptions } from '../../builder/index'; +import { DataType } from '../../type.js'; +import { Vector } from '../../vector.js'; +import { Builder, BuilderOptions } from '../../builder.js'; +import { makeBuilder } from '../../factories.js'; /** @ignore */ export interface BuilderTransformOptions extends BuilderOptions { @@ -37,9 +37,9 @@ export function builderThroughDOMStream(o /** @ignore */ export class BuilderTransform { - public readable: ReadableStream>; + public readable: ReadableStream>; public writable: WritableStream; - public _controller: ReadableStreamDefaultController> | null; + public _controller: ReadableStreamDefaultController> | null; private _numChunks = 0; private _finished = false; @@ -59,14 +59,14 @@ export class BuilderTransform { } = options; this._controller = null; - this._builder = Builder.new(builderOptions); + this._builder = makeBuilder(builderOptions); this._getSize = queueingStrategy !== 'bytes' ? chunkLength : chunkByteLength; const { ['highWaterMark']: readableHighWaterMark = queueingStrategy === 'bytes' ? 2 ** 14 : 1000 } = { ...readableStrategy }; const { ['highWaterMark']: writableHighWaterMark = queueingStrategy === 'bytes' ? 2 ** 14 : 1000 } = { ...writableStrategy }; - this['readable'] = new ReadableStream>({ - ['cancel']: () => { this._builder.clear(); }, + this['readable'] = new ReadableStream>({ + ['cancel']: () => { this._builder.clear(); }, ['pull']: (c) => { this._maybeFlush(this._builder, this._controller = c); }, ['start']: (c) => { this._maybeFlush(this._builder, this._controller = c); }, }, { @@ -90,8 +90,8 @@ export class BuilderTransform { return this._bufferedSize - bufferedSize; } - private _maybeFlush(builder: Builder, controller: ReadableStreamDefaultController> | null) { - if (controller === null) { return; } + private _maybeFlush(builder: Builder, controller: ReadableStreamDefaultController> | null) { + if (controller == null) { return; } if (this._bufferedSize >= controller.desiredSize!) { ++this._numChunks && this._enqueue(controller, builder.toVector()); } @@ -105,12 +105,12 @@ export class BuilderTransform { } } - private _enqueue(controller: ReadableStreamDefaultController>, chunk: V | null) { + private _enqueue(controller: ReadableStreamDefaultController>, chunk: Vector | null) { this._bufferedSize = 0; this._controller = null; - chunk === null ? controller.close() : controller.enqueue(chunk); + chunk == null ? controller.close() : controller.enqueue(chunk); } } -/** @ignore */ const chunkLength = (chunk: Vector | Builder) => chunk.length; -/** @ignore */ const chunkByteLength = (chunk: Vector | Builder) => chunk.byteLength; +/** @ignore */ const chunkLength = (chunk?: Vector | Builder) => chunk?.length ?? 0; +/** @ignore */ const chunkByteLength = (chunk?: Vector | Builder) => chunk?.byteLength ?? 0; diff --git a/js/src/io/whatwg/iterable.ts b/js/src/io/whatwg/iterable.ts index ce9e97369f137..508a10b89bb01 100644 --- a/js/src/io/whatwg/iterable.ts +++ b/js/src/io/whatwg/iterable.ts @@ -15,9 +15,9 @@ // specific language governing permissions and limitations // under the License. -import { toUint8Array } from '../../util/buffer'; -import { ReadableDOMStreamOptions } from '../../io/interfaces'; -import { isIterable, isAsyncIterable } from '../../util/compat'; +import { toUint8Array } from '../../util/buffer.js'; +import { ReadableDOMStreamOptions } from '../../io/interfaces.js'; +import { isIterable, isAsyncIterable } from '../../util/compat.js'; /** @ignore */ type SourceIterator = Generator; @@ -53,7 +53,7 @@ function iterableAsReadableDOMStream(source: Iterable, options?: ReadableD while (!(r = it.next(bm ? size : null)).done) { if (ArrayBuffer.isView(r.value) && (buf = toUint8Array(r.value))) { size != null && bm && (size = size - buf.byteLength + 1); - r.value = buf; + r.value = buf; } controller.enqueue(r.value); if (size != null && --size <= 0) { return; } @@ -83,7 +83,7 @@ function asyncIterableAsReadableDOMStream(source: AsyncIterable, options?: while (!(r = await it.next(bm ? size : null)).done) { if (ArrayBuffer.isView(r.value) && (buf = toUint8Array(r.value))) { size != null && bm && (size = size - buf.byteLength + 1); - r.value = buf; + r.value = buf; } controller.enqueue(r.value); if (size != null && --size <= 0) { return; } diff --git a/js/src/io/whatwg/reader.ts b/js/src/io/whatwg/reader.ts index 9e19bac53f606..7c4b2ec3974bb 100644 --- a/js/src/io/whatwg/reader.ts +++ b/js/src/io/whatwg/reader.ts @@ -15,13 +15,13 @@ // specific language governing permissions and limitations // under the License. -import { DataType } from '../../type'; -import { RecordBatch } from '../../recordbatch'; -import { AsyncByteQueue } from '../../io/stream'; -import { RecordBatchReader } from '../../ipc/reader'; +import { TypeMap } from '../../type.js'; +import { RecordBatch } from '../../recordbatch.js'; +import { AsyncByteQueue } from '../../io/stream.js'; +import { RecordBatchReader } from '../../ipc/reader.js'; /** @ignore */ -export function recordBatchReaderThroughDOMStream(writableStrategy?: ByteLengthQueuingStrategy, readableStrategy?: { autoDestroy: boolean }) { +export function recordBatchReaderThroughDOMStream(writableStrategy?: ByteLengthQueuingStrategy, readableStrategy?: { autoDestroy: boolean }) { const queue = new AsyncByteQueue(); let reader: RecordBatchReader | null = null; diff --git a/js/src/io/whatwg/writer.ts b/js/src/io/whatwg/writer.ts index 49789bdd33a2a..d7711201e7636 100644 --- a/js/src/io/whatwg/writer.ts +++ b/js/src/io/whatwg/writer.ts @@ -15,13 +15,13 @@ // specific language governing permissions and limitations // under the License. -import { DataType } from '../../type'; -import { RecordBatch } from '../../recordbatch'; -import { AsyncByteStream } from '../../io/stream'; -import { RecordBatchWriter } from '../../ipc/writer'; +import { TypeMap } from '../../type.js'; +import { RecordBatch } from '../../recordbatch.js'; +import { AsyncByteStream } from '../../io/stream.js'; +import { RecordBatchWriter } from '../../ipc/writer.js'; /** @ignore */ -export function recordBatchWriterThroughDOMStream( +export function recordBatchWriterThroughDOMStream( this: typeof RecordBatchWriter, writableStrategy?: QueuingStrategy> & { autoDestroy: boolean }, readableStrategy?: { highWaterMark?: number; size?: any } @@ -30,7 +30,7 @@ export function recordBatchWriterThroughDOMStream(writableStrategy); const reader = new AsyncByteStream(writer); const readable = new ReadableStream({ - type: 'bytes', + // type: 'bytes', async cancel() { await reader.cancel(); }, async pull(controller) { await next(controller); }, async start(controller) { await next(controller); }, diff --git a/js/src/ipc/message.ts b/js/src/ipc/message.ts index 34c0aa3082af6..678e6e5f26e12 100644 --- a/js/src/ipc/message.ts +++ b/js/src/ipc/message.ts @@ -15,19 +15,18 @@ // specific language governing permissions and limitations // under the License. -import { MessageHeader } from '../enum'; -import { flatbuffers } from 'flatbuffers'; -import ByteBuffer = flatbuffers.ByteBuffer; -import { Message } from './metadata/message'; -import { isFileHandle } from '../util/compat'; -import { AsyncRandomAccessFile } from '../io/file'; -import { toUint8Array, ArrayBufferViewInput } from '../util/buffer'; -import { ByteStream, ReadableSource, AsyncByteStream } from '../io/stream'; -import { ArrowJSON, ArrowJSONLike, ITERATOR_DONE, FileHandle } from '../io/interfaces'; +import { MessageHeader } from '../enum.js'; +import { ByteBuffer } from 'flatbuffers'; +import { Message } from './metadata/message.js'; +import { isFileHandle } from '../util/compat.js'; +import { AsyncRandomAccessFile } from '../io/file.js'; +import { toUint8Array, ArrayBufferViewInput } from '../util/buffer.js'; +import { ByteStream, ReadableSource, AsyncByteStream } from '../io/stream.js'; +import { ArrowJSON, ArrowJSONLike, ITERATOR_DONE, FileHandle } from '../io/interfaces.js'; -/** @ignore */ const invalidMessageType = (type: MessageHeader) => `Expected ${MessageHeader[type]} Message in stream, but was null or length 0.`; -/** @ignore */ const nullMessage = (type: MessageHeader) => `Header pointer of flatbuffer-encoded ${MessageHeader[type]} Message is null or length 0.`; -/** @ignore */ const invalidMessageMetadata = (expected: number, actual: number) => `Expected to read ${expected} metadata bytes, but only read ${actual}.`; +/** @ignore */ const invalidMessageType = (type: MessageHeader) => `Expected ${MessageHeader[type]} Message in stream, but was null or length 0.`; +/** @ignore */ const nullMessage = (type: MessageHeader) => `Header pointer of flatbuffer-encoded ${MessageHeader[type]} Message is null or length 0.`; +/** @ignore */ const invalidMessageMetadata = (expected: number, actual: number) => `Expected to read ${expected} metadata bytes, but only read ${actual}.`; /** @ignore */ const invalidMessageBodyLength = (expected: number, actual: number) => `Expected to read ${expected} bytes for message body, but only read ${actual}.`; /** @ignore */ @@ -46,7 +45,7 @@ export class MessageReader implements IterableIterator { if ((r.value === -1) && (r = this.readMetadataLength()).done) { return ITERATOR_DONE; } if ((r = this.readMetadata(r.value)).done) { return ITERATOR_DONE; } - return ( r) as IteratorResult; + return (r) as IteratorResult; } public throw(value?: any) { return this.source.throw(value); } public return(value?: any) { return this.source.return(value); } @@ -102,8 +101,8 @@ export class AsyncMessageReader implements AsyncIterableIterator { constructor(source: any, byteLength?: number) { this.source = source instanceof AsyncByteStream ? source : isFileHandle(source) - ? new AsyncRandomAccessFile(source, byteLength!) - : new AsyncByteStream(source); + ? new AsyncRandomAccessFile(source, byteLength!) + : new AsyncByteStream(source); } public [Symbol.asyncIterator](): AsyncIterableIterator { return this as AsyncIterableIterator; } public async next(): Promise> { @@ -115,7 +114,7 @@ export class AsyncMessageReader implements AsyncIterableIterator { if ((r.value === -1) && (r = await this.readMetadataLength()).done) { return ITERATOR_DONE; } if ((r = await this.readMetadata(r.value)).done) { return ITERATOR_DONE; } - return ( r) as IteratorResult; + return (r) as IteratorResult; } public async throw(value?: any) { return await this.source.throw(value); } public async return(value?: any) { return await this.source.return(value); } @@ -235,8 +234,8 @@ export const MAGIC_STR = 'ARROW1'; /** @ignore */ export const MAGIC = new Uint8Array(MAGIC_STR.length); -for (let i = 0; i < MAGIC_STR.length; i += 1 | 0) { - MAGIC[i] = MAGIC_STR.charCodeAt(i); +for (let i = 0; i < MAGIC_STR.length; i += 1) { + MAGIC[i] = MAGIC_STR.codePointAt(i)!; } /** @ignore */ diff --git a/js/src/ipc/metadata/file.ts b/js/src/ipc/metadata/file.ts index 5a1be844e158b..d0fe0aa81f504 100644 --- a/js/src/ipc/metadata/file.ts +++ b/js/src/ipc/metadata/file.ts @@ -17,21 +17,19 @@ /* eslint-disable @typescript-eslint/naming-convention */ -import { - Block as _Block, - Footer as _Footer -} from '../../fb/File'; +import { Block as _Block } from '../../fb/block.js'; +import { Footer as _Footer } from '../../fb/footer.js'; -import { flatbuffers } from 'flatbuffers'; +import * as flatbuffers from 'flatbuffers'; import Long = flatbuffers.Long; import Builder = flatbuffers.Builder; import ByteBuffer = flatbuffers.ByteBuffer; -import { Schema } from '../../schema'; -import { MetadataVersion } from '../../enum'; -import { toUint8Array } from '../../util/buffer'; -import { ArrayBufferViewInput } from '../../util/buffer'; +import { Schema } from '../../schema.js'; +import { MetadataVersion } from '../../enum.js'; +import { toUint8Array } from '../../util/buffer.js'; +import { ArrayBufferViewInput } from '../../util/buffer.js'; /** @ignore */ class Footer_ { @@ -51,11 +49,15 @@ class Footer_ { const schemaOffset = Schema.encode(b, footer.schema); _Footer.startRecordBatchesVector(b, footer.numRecordBatches); - [...footer.recordBatches()].slice().reverse().forEach((rb) => FileBlock.encode(b, rb)); + for (const rb of [...footer.recordBatches()].slice().reverse()) { + FileBlock.encode(b, rb); + } const recordBatchesOffset = b.endVector(); _Footer.startDictionariesVector(b, footer.numDictionaries); - [...footer.dictionaryBatches()].slice().reverse().forEach((db) => FileBlock.encode(b, db)); + for (const db of [...footer.dictionaryBatches()].slice().reverse()) { + FileBlock.encode(b, db); + } const dictionaryBatchesOffset = b.endVector(); @@ -69,14 +71,14 @@ class Footer_ { return b.asUint8Array(); } - protected _recordBatches!: FileBlock[]; - protected _dictionaryBatches!: FileBlock[]; + declare protected _recordBatches: FileBlock[]; + declare protected _dictionaryBatches: FileBlock[]; public get numRecordBatches() { return this._recordBatches.length; } public get numDictionaries() { return this._dictionaryBatches.length; } constructor(public schema: Schema, - public version: MetadataVersion = MetadataVersion.V4, - recordBatches?: FileBlock[], dictionaryBatches?: FileBlock[]) { + public version: MetadataVersion = MetadataVersion.V4, + recordBatches?: FileBlock[], dictionaryBatches?: FileBlock[]) { recordBatches && (this._recordBatches = recordBatches); dictionaryBatches && (this._dictionaryBatches = dictionaryBatches); } diff --git a/js/src/ipc/metadata/json.ts b/js/src/ipc/metadata/json.ts index 399615c31d4d0..0fc1ca0801926 100644 --- a/js/src/ipc/metadata/json.ts +++ b/js/src/ipc/metadata/json.ts @@ -17,16 +17,16 @@ /* eslint-disable brace-style */ -import { Schema, Field } from '../../schema'; +import { Schema, Field } from '../../schema.js'; import { DataType, Dictionary, TimeBitWidth, Utf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct, Union, Bool, Null, Int, Float, Date_, Time, Interval, Timestamp, IntBitWidth, Int32, TKeys, -} from '../../type'; +} from '../../type.js'; -import { DictionaryBatch, RecordBatch, FieldNode, BufferRegion } from './message'; -import { TimeUnit, Precision, IntervalUnit, UnionMode, DateUnit } from '../../enum'; +import { DictionaryBatch, RecordBatch, FieldNode, BufferRegion } from './message.js'; +import { TimeUnit, Precision, IntervalUnit, UnionMode, DateUnit } from '../../enum.js'; /** @ignore */ export function schemaFromJSON(_schema: any, dictionaries: Map = new Map()) { @@ -146,12 +146,12 @@ function typeFromJSON(f: any, children?: Field[]): DataType { const typeId = f['type']['name']; switch (typeId) { - case 'NONE': return new Null(); - case 'null': return new Null(); + case 'NONE': return new Null(); + case 'null': return new Null(); case 'binary': return new Binary(); - case 'utf8': return new Utf8(); - case 'bool': return new Bool(); - case 'list': return new List((children || [])[0]); + case 'utf8': return new Utf8(); + case 'bool': return new Bool(); + case 'list': return new List((children || [])[0]); case 'struct': return new Struct(children || []); case 'struct_': return new Struct(children || []); } @@ -167,7 +167,7 @@ function typeFromJSON(f: any, children?: Field[]): DataType { } case 'decimal': { const t = f['type']; - return new Decimal(t['scale'], t['precision']); + return new Decimal(t['scale'], t['precision'], t['bitWidth']); } case 'date': { const t = f['type']; diff --git a/js/src/ipc/metadata/message.ts b/js/src/ipc/metadata/message.ts index 2ebb73e4c0fe2..b351519944e98 100644 --- a/js/src/ipc/metadata/message.ts +++ b/js/src/ipc/metadata/message.ts @@ -17,42 +17,37 @@ /* eslint-disable brace-style */ -import { flatbuffers } from 'flatbuffers'; - -import { - Type, - Int as _Int, - Field as _Field, - Schema as _Schema, - Buffer as _Buffer, - KeyValue as _KeyValue, - Endianness as _Endianness, - DictionaryEncoding as _DictionaryEncoding, - FloatingPoint as _FloatingPoint, - Decimal as _Decimal, - Date as _Date, - Time as _Time, - Timestamp as _Timestamp, - Interval as _Interval, - Union as _Union, - FixedSizeBinary as _FixedSizeBinary, - FixedSizeList as _FixedSizeList, - Map as _Map, -} from '../../fb/Schema'; - -import { - Message as _Message, - FieldNode as _FieldNode, - RecordBatch as _RecordBatch, - DictionaryBatch as _DictionaryBatch, -} from '../../fb/Message'; - -import { Schema, Field } from '../../schema'; -import { toUint8Array } from '../../util/buffer'; -import { ArrayBufferViewInput } from '../../util/buffer'; -import { MessageHeader, MetadataVersion } from '../../enum'; -import { instance as typeAssembler } from '../../visitor/typeassembler'; -import { fieldFromJSON, schemaFromJSON, recordBatchFromJSON, dictionaryBatchFromJSON } from './json'; +import * as flatbuffers from 'flatbuffers'; + +import { Schema as _Schema } from '../../fb/schema.js'; +import { Int as _Int } from '../../fb/int.js'; +import { RecordBatch as _RecordBatch } from '../../fb/record-batch.js'; +import { DictionaryBatch as _DictionaryBatch } from '../../fb/dictionary-batch.js'; +import { Buffer as _Buffer } from '../../fb/buffer.js'; +import { Field as _Field } from '../../fb/field.js'; +import { FieldNode as _FieldNode } from '../../fb/field-node.js'; +import { DictionaryEncoding as _DictionaryEncoding } from '../../fb/dictionary-encoding.js'; +import { Type } from '../../fb/type.js'; +import { KeyValue as _KeyValue } from '../../fb/key-value.js'; +import { Endianness as _Endianness } from '../../fb/endianness.js'; +import { FloatingPoint as _FloatingPoint } from '../../fb/floating-point.js'; +import { Decimal as _Decimal } from '../../fb/decimal.js'; +import { Date as _Date } from '../../fb/date.js'; +import { Time as _Time } from '../../fb/time.js'; +import { Timestamp as _Timestamp } from '../../fb/timestamp.js'; +import { Interval as _Interval } from '../../fb/interval.js'; +import { Union as _Union } from '../../fb/union.js'; +import { FixedSizeBinary as _FixedSizeBinary } from '../../fb/fixed-size-binary.js'; +import { FixedSizeList as _FixedSizeList } from '../../fb/fixed-size-list.js'; +import { Map as _Map } from '../../fb/map.js'; +import { Message as _Message } from '../../fb/message.js'; + +import { Schema, Field } from '../../schema.js'; +import { toUint8Array } from '../../util/buffer.js'; +import { ArrayBufferViewInput } from '../../util/buffer.js'; +import { MessageHeader, MetadataVersion } from '../../enum.js'; +import { instance as typeAssembler } from '../../visitor/typeassembler.js'; +import { fieldFromJSON, schemaFromJSON, recordBatchFromJSON, dictionaryBatchFromJSON } from './json.js'; import Long = flatbuffers.Long; import Builder = flatbuffers.Builder; @@ -63,7 +58,7 @@ import { Utf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct, Union, Bool, Null, Int, Float, Date_, Time, Interval, Timestamp, IntBitWidth, Int32, TKeys, -} from '../../type'; +} from '../../type.js'; /** * @ignore @@ -132,7 +127,7 @@ export class Message { public get version() { return this._version; } public get headerType() { return this._headerType; } public get bodyLength() { return this._bodyLength; } - protected _createHeader!: MessageHeaderDecoder; + declare protected _createHeader: MessageHeaderDecoder; public header() { return this._createHeader(); } public isSchema(): this is Message { return this.headerType === MessageHeader.Schema; } public isRecordBatch(): this is Message { return this.headerType === MessageHeader.RecordBatch; } @@ -336,10 +331,10 @@ function decodeBuffers(batch: _RecordBatch, version: MetadataVersion) { const bufferRegions = [] as BufferRegion[]; for (let b, i = -1, j = -1, n = batch.buffersLength(); ++i < n;) { if (b = batch.buffers(i)) { - // If this Arrow buffer was written before version 4, - // advance the buffer's bb_pos 8 bytes to skip past - // the now-removed page_id field - if (version < MetadataVersion.V4) { + // If this Arrow buffer was written before version 4, + // advance the buffer's bb_pos 8 bytes to skip past + // the now-removed page_id field + if (version < MetadataVersion.V4) { b.bb_pos += (8 * (i + 1)); } bufferRegions[++j] = BufferRegion.decode(b); @@ -410,7 +405,7 @@ function decodeField(f: _Field, dictionaries?: Map) { function decodeCustomMetadata(parent?: _Schema | _Field | null) { const data = new Map(); if (parent) { - for (let entry, key, i = -1, n = parent.customMetadataLength() | 0; ++i < n;) { + for (let entry, key, i = -1, n = Math.trunc(parent.customMetadataLength()); ++i < n;) { if ((entry = parent.customMetadata(i)) && (key = entry.key()) != null) { data.set(key, entry.value()!); } @@ -430,12 +425,12 @@ function decodeFieldType(f: _Field, children?: Field[]): DataType { const typeId = f.typeType(); switch (typeId) { - case Type['NONE']: return new Null(); - case Type['Null']: return new Null(); - case Type['Binary']: return new Binary(); - case Type['Utf8']: return new Utf8(); - case Type['Bool']: return new Bool(); - case Type['List']: return new List((children || [])[0]); + case Type['NONE']: return new Null(); + case Type['Null']: return new Null(); + case Type['Binary']: return new Binary(); + case Type['Utf8']: return new Utf8(); + case Type['Bool']: return new Bool(); + case Type['List']: return new List((children || [])[0]); case Type['Struct_']: return new Struct(children || []); } @@ -450,7 +445,7 @@ function decodeFieldType(f: _Field, children?: Field[]): DataType { } case Type['Decimal']: { const t = f.type(new _Decimal())!; - return new Decimal(t.scale(), t.precision()); + return new Decimal(t.scale(), t.precision(), t.bitWidth()); } case Type['Date']: { const t = f.type(new _Date())!; @@ -524,7 +519,7 @@ function encodeField(b: Builder, field: Field) { let dictionaryOffset = -1; const type = field.type; - let typeId: Type = field.typeId; + let typeId: Type = field.typeId; if (!DataType.isDictionary(type)) { typeOffset = typeAssembler.visit(type, b)!; @@ -571,12 +566,12 @@ function encodeRecordBatch(b: Builder, recordBatch: RecordBatch) { const buffers = recordBatch.buffers || []; _RecordBatch.startNodesVector(b, nodes.length); - nodes.slice().reverse().forEach((n) => FieldNode.encode(b, n)); + for (const n of nodes.slice().reverse()) FieldNode.encode(b, n); const nodesVectorOffset = b.endVector(); _RecordBatch.startBuffersVector(b, buffers.length); - buffers.slice().reverse().forEach((b_) => BufferRegion.encode(b, b_)); + for (const b_ of buffers.slice().reverse()) BufferRegion.encode(b, b_); const buffersVectorOffset = b.endVector(); @@ -608,7 +603,7 @@ function encodeBufferRegion(b: Builder, node: BufferRegion) { } /** @ignore */ -const platformIsLittleEndian = (function() { +const platformIsLittleEndian = (() => { const buffer = new ArrayBuffer(2); new DataView(buffer).setInt16(0, 256, true /* littleEndian */); // Int16Array uses the platform's endianness. @@ -617,5 +612,5 @@ const platformIsLittleEndian = (function() { /** @ignore */ type MessageHeaderDecoder = () => T extends MessageHeader.Schema ? Schema - : T extends MessageHeader.RecordBatch ? RecordBatch - : T extends MessageHeader.DictionaryBatch ? DictionaryBatch : never; + : T extends MessageHeader.RecordBatch ? RecordBatch + : T extends MessageHeader.DictionaryBatch ? DictionaryBatch : never; diff --git a/js/src/ipc/reader.ts b/js/src/ipc/reader.ts index a150ac1bb3c05..5b949322d92c8 100644 --- a/js/src/ipc/reader.ts +++ b/js/src/ipc/reader.ts @@ -15,36 +15,37 @@ // specific language governing permissions and limitations // under the License. -import { Vector } from '../vector'; -import { DataType } from '../type'; -import { MessageHeader } from '../enum'; -import { Footer } from './metadata/file'; -import { Schema, Field } from '../schema'; -import streamAdapters from '../io/adapters'; -import { Message } from './metadata/message'; -import * as metadata from './metadata/message'; -import { ArrayBufferViewInput } from '../util/buffer'; -import { ByteStream, AsyncByteStream } from '../io/stream'; -import { RandomAccessFile, AsyncRandomAccessFile } from '../io/file'; -import { VectorLoader, JSONVectorLoader } from '../visitor/vectorloader'; -import { RecordBatch, _InternalEmptyPlaceholderRecordBatch } from '../recordbatch'; +import { makeData } from '../data.js'; +import { Vector } from '../vector.js'; +import { DataType, Struct, TypeMap } from '../type.js'; +import { MessageHeader } from '../enum.js'; +import { Footer } from './metadata/file.js'; +import { Schema, Field } from '../schema.js'; +import streamAdapters from '../io/adapters.js'; +import { Message } from './metadata/message.js'; +import * as metadata from './metadata/message.js'; +import { ArrayBufferViewInput } from '../util/buffer.js'; +import { ByteStream, AsyncByteStream } from '../io/stream.js'; +import { RandomAccessFile, AsyncRandomAccessFile } from '../io/file.js'; +import { VectorLoader, JSONVectorLoader } from '../visitor/vectorloader.js'; +import { RecordBatch, _InternalEmptyPlaceholderRecordBatch } from '../recordbatch.js'; import { FileHandle, ArrowJSONLike, ITERATOR_DONE, ReadableInterop, -} from '../io/interfaces'; +} from '../io/interfaces.js'; import { MessageReader, AsyncMessageReader, JSONMessageReader, checkForMagicArrowString, magicLength, magicAndPadding, magicX2AndPadding -} from './message'; +} from './message.js'; import { isPromise, isIterable, isAsyncIterable, isIteratorResult, isArrowJSON, isFileHandle, isFetchResponse, isReadableDOMStream, isReadableNodeStream -} from '../util/compat'; +} from '../util/compat.js'; /** @ignore */ export type FromArg0 = ArrowJSONLike; /** @ignore */ export type FromArg1 = PromiseLike; @@ -55,12 +56,12 @@ import { /** @ignore */ export type FromArgs = FromArg0 | FromArg1 | FromArg2 | FromArg3 | FromArg4 | FromArg5; /** @ignore */ type OpenOptions = { autoDestroy?: boolean }; -/** @ignore */ type RecordBatchReaders = RecordBatchFileReader | RecordBatchStreamReader; -/** @ignore */ type AsyncRecordBatchReaders = AsyncRecordBatchFileReader | AsyncRecordBatchStreamReader; -/** @ignore */ type RecordBatchFileReaders = RecordBatchFileReader | AsyncRecordBatchFileReader; -/** @ignore */ type RecordBatchStreamReaders = RecordBatchStreamReader | AsyncRecordBatchStreamReader; +/** @ignore */ type RecordBatchReaders = RecordBatchFileReader | RecordBatchStreamReader; +/** @ignore */ type AsyncRecordBatchReaders = AsyncRecordBatchFileReader | AsyncRecordBatchStreamReader; +/** @ignore */ type RecordBatchFileReaders = RecordBatchFileReader | AsyncRecordBatchFileReader; +/** @ignore */ type RecordBatchStreamReaders = RecordBatchStreamReader | AsyncRecordBatchStreamReader; -export class RecordBatchReader extends ReadableInterop> { +export class RecordBatchReader extends ReadableInterop> { protected _impl: RecordBatchReaderImpls; protected constructor(impl: RecordBatchReaderImpls) { @@ -107,10 +108,10 @@ export class RecordBatchReader exte return this._impl.isFile() ? this._impl.readRecordBatch(index) : null; } public [Symbol.iterator](): IterableIterator> { - return (>> this._impl)[Symbol.iterator](); + return (>>this._impl)[Symbol.iterator](); } public [Symbol.asyncIterator](): AsyncIterableIterator> { - return (>> this._impl)[Symbol.asyncIterator](); + return (>>this._impl)[Symbol.asyncIterator](); } public toDOMStream() { return streamAdapters.toDOMStream>( @@ -132,7 +133,7 @@ export class RecordBatchReader exte throw new Error(`"throughNode" not available in this environment`); } /** @nocollapse */ - public static throughDOM( + public static throughDOM( // @ts-ignore writableStrategy?: ByteLengthQueuingStrategy, // @ts-ignore @@ -142,14 +143,14 @@ export class RecordBatchReader exte } public static from(source: T): T; - public static from(source: FromArg0): RecordBatchStreamReader; - public static from(source: FromArg1): Promise>; - public static from(source: FromArg2): RecordBatchFileReader | RecordBatchStreamReader; - public static from(source: FromArg3): Promise | RecordBatchStreamReader>; - public static from(source: FromArg4): Promise | AsyncRecordBatchReaders>; - public static from(source: FromArg5): Promise | AsyncRecordBatchStreamReader>; + public static from(source: FromArg0): RecordBatchStreamReader; + public static from(source: FromArg1): Promise>; + public static from(source: FromArg2): RecordBatchFileReader | RecordBatchStreamReader; + public static from(source: FromArg3): Promise | RecordBatchStreamReader>; + public static from(source: FromArg4): Promise | AsyncRecordBatchReaders>; + public static from(source: FromArg5): Promise | AsyncRecordBatchStreamReader>; /** @nocollapse */ - public static from(source: any) { + public static from(source: any) { if (source instanceof RecordBatchReader) { return source; } else if (isArrowJSON(source)) { @@ -165,14 +166,14 @@ export class RecordBatchReader exte } public static readAll(source: T): T extends RecordBatchReaders ? IterableIterator : AsyncIterableIterator; - public static readAll(source: FromArg0): IterableIterator>; - public static readAll(source: FromArg1): AsyncIterableIterator>; - public static readAll(source: FromArg2): IterableIterator | RecordBatchStreamReader>; - public static readAll(source: FromArg3): AsyncIterableIterator | RecordBatchStreamReader>; - public static readAll(source: FromArg4): AsyncIterableIterator | AsyncRecordBatchReaders>; - public static readAll(source: FromArg5): AsyncIterableIterator | AsyncRecordBatchStreamReader>; + public static readAll(source: FromArg0): IterableIterator>; + public static readAll(source: FromArg1): AsyncIterableIterator>; + public static readAll(source: FromArg2): IterableIterator | RecordBatchStreamReader>; + public static readAll(source: FromArg3): AsyncIterableIterator | RecordBatchStreamReader>; + public static readAll(source: FromArg4): AsyncIterableIterator>; + public static readAll(source: FromArg5): AsyncIterableIterator>; /** @nocollapse */ - public static readAll(source: any) { + public static readAll(source: any) { if (source instanceof RecordBatchReader) { return source.isSync() ? readAllSync(source) : readAllAsync(source as AsyncRecordBatchReaders); } else if (isArrowJSON(source) || ArrayBuffer.isView(source) || isIterable(source) || isIteratorResult(source)) { @@ -203,24 +204,30 @@ export class RecordBatchReader exte // /** @ignore */ -export class RecordBatchStreamReader extends RecordBatchReader { - constructor(protected _impl: RecordBatchStreamReaderImpl) { super (_impl); } +export class RecordBatchStreamReader extends RecordBatchReader { + constructor(protected _impl: RecordBatchStreamReaderImpl) { super(_impl); } + public readAll() { return [...this]; } public [Symbol.iterator]() { return (this._impl as IterableIterator>)[Symbol.iterator](); } public async *[Symbol.asyncIterator](): AsyncIterableIterator> { yield* this[Symbol.iterator](); } } /** @ignore */ -export class AsyncRecordBatchStreamReader extends RecordBatchReader { - constructor(protected _impl: AsyncRecordBatchStreamReaderImpl) { super (_impl); } +export class AsyncRecordBatchStreamReader extends RecordBatchReader { + constructor(protected _impl: AsyncRecordBatchStreamReaderImpl) { super(_impl); } + public async readAll() { + const batches = new Array>(); + for await (const batch of this) { batches.push(batch); } + return batches; + } public [Symbol.iterator](): IterableIterator> { throw new Error(`AsyncRecordBatchStreamReader is not Iterable`); } public [Symbol.asyncIterator]() { return (this._impl as AsyncIterableIterator>)[Symbol.asyncIterator](); } } /** @ignore */ -export class RecordBatchFileReader extends RecordBatchStreamReader { - constructor(protected _impl: RecordBatchFileReaderImpl) { super (_impl); } +export class RecordBatchFileReader extends RecordBatchStreamReader { + constructor(protected _impl: RecordBatchFileReaderImpl) { super(_impl); } } /** @ignore */ -export class AsyncRecordBatchFileReader extends AsyncRecordBatchStreamReader { - constructor(protected _impl: AsyncRecordBatchFileReaderImpl) { super (_impl); } +export class AsyncRecordBatchFileReader extends AsyncRecordBatchStreamReader { + constructor(protected _impl: AsyncRecordBatchFileReaderImpl) { super(_impl); } } // @@ -228,7 +235,7 @@ export class AsyncRecordBatchFileReader extends RecordBatchReader { +export interface RecordBatchStreamReader extends RecordBatchReader { open(options?: OpenOptions | undefined): this; cancel(): void; throw(value?: any): IteratorResult; @@ -237,7 +244,7 @@ export interface RecordBatchStreamReader extends RecordBatchReader { +export interface AsyncRecordBatchStreamReader extends RecordBatchReader { open(options?: OpenOptions | undefined): Promise; cancel(): Promise; throw(value?: any): Promise>; @@ -246,25 +253,25 @@ export interface AsyncRecordBatchStreamReader extends RecordBatchStreamReader { +export interface RecordBatchFileReader extends RecordBatchStreamReader { readRecordBatch(index: number): RecordBatch | null; } /** @ignore */ -export interface AsyncRecordBatchFileReader extends AsyncRecordBatchStreamReader { +export interface AsyncRecordBatchFileReader extends AsyncRecordBatchStreamReader { readRecordBatch(index: number): Promise | null>; } /** @ignore */ -type RecordBatchReaderImpls = - RecordBatchJSONReaderImpl | - RecordBatchFileReaderImpl | - RecordBatchStreamReaderImpl | - AsyncRecordBatchFileReaderImpl | - AsyncRecordBatchStreamReaderImpl; +type RecordBatchReaderImpls = + RecordBatchJSONReaderImpl | + RecordBatchFileReaderImpl | + RecordBatchStreamReaderImpl | + AsyncRecordBatchFileReaderImpl | + AsyncRecordBatchStreamReaderImpl; /** @ignore */ -interface RecordBatchReaderImpl { +interface RecordBatchReaderImpl { closed: boolean; schema: Schema; @@ -280,7 +287,7 @@ interface RecordBatchReaderImpl { } /** @ignore */ -interface RecordBatchStreamReaderImpl extends RecordBatchReaderImpl { +interface RecordBatchStreamReaderImpl extends RecordBatchReaderImpl { open(options?: OpenOptions): this; cancel(): void; @@ -293,7 +300,7 @@ interface RecordBatchStreamReaderImpl extends RecordBatchReaderImpl { +interface AsyncRecordBatchStreamReaderImpl extends RecordBatchReaderImpl { open(options?: OpenOptions): Promise; cancel(): Promise; @@ -306,19 +313,19 @@ interface AsyncRecordBatchStreamReaderImpl extends RecordBatchStreamReaderImpl { +interface RecordBatchFileReaderImpl extends RecordBatchStreamReaderImpl { readRecordBatch(index: number): RecordBatch | null; } /** @ignore */ -interface AsyncRecordBatchFileReaderImpl extends AsyncRecordBatchStreamReaderImpl { +interface AsyncRecordBatchFileReaderImpl extends AsyncRecordBatchStreamReaderImpl { readRecordBatch(index: number): Promise | null>; } /** @ignore */ -abstract class RecordBatchReaderImpl implements RecordBatchReaderImpl { +abstract class RecordBatchReaderImpl implements RecordBatchReaderImpl { - public schema!: Schema; + declare public schema: Schema; public closed = false; public autoDestroy = true; public dictionaries: Map; @@ -340,25 +347,28 @@ abstract class RecordBatchReaderImpl | null) { this._dictionaryIndex = 0; this._recordBatchIndex = 0; - this.schema = schema; + this.schema = schema; this.dictionaries = new Map(); return this; } protected _loadRecordBatch(header: metadata.RecordBatch, body: any) { - return new RecordBatch(this.schema, header.length, this._loadVectors(header, body, this.schema.fields)); + const children = this._loadVectors(header, body, this.schema.fields); + const data = makeData({ type: new Struct(this.schema.fields), length: header.length, children }); + return new RecordBatch(this.schema, data); } protected _loadDictionaryBatch(header: metadata.DictionaryBatch, body: any) { - const { id, isDelta, data } = header; + const { id, isDelta } = header; const { dictionaries, schema } = this; const dictionary = dictionaries.get(id); if (isDelta || !dictionary) { const type = schema.dictionaries.get(id)!; + const data = this._loadVectors(header.data, body, [type]); return (dictionary && isDelta ? dictionary.concat( - Vector.new(this._loadVectors(data, body, [type])[0])) : - Vector.new(this._loadVectors(data, body, [type])[0])) as Vector; + new Vector(data)) : + new Vector(data)).memoize() as Vector; } - return dictionary; + return dictionary.memoize(); } protected _loadVectors(header: metadata.RecordBatch, body: any, types: (Field | DataType)[]) { return new VectorLoader(body, header.nodes, header.buffers, this.dictionaries).visitMany(types); @@ -366,7 +376,7 @@ abstract class RecordBatchReaderImpl extends RecordBatchReaderImpl implements IterableIterator> { +class RecordBatchStreamReaderImpl extends RecordBatchReaderImpl implements IterableIterator> { protected _reader: MessageReader; protected _handle: ByteStream | ArrowJSONLike; @@ -386,8 +396,8 @@ class RecordBatchStreamReaderImpl e public cancel() { if (!this.closed && (this.closed = true)) { this.reset()._reader.return(); - this._reader = null; - this.dictionaries = null; + this._reader = null; + this.dictionaries = null; } } public open(options?: OpenOptions) { @@ -444,7 +454,7 @@ class RecordBatchStreamReaderImpl e } /** @ignore */ -class AsyncRecordBatchStreamReaderImpl extends RecordBatchReaderImpl implements AsyncIterableIterator> { +class AsyncRecordBatchStreamReaderImpl extends RecordBatchReaderImpl implements AsyncIterableIterator> { protected _handle: AsyncByteStream; protected _reader: AsyncMessageReader; @@ -461,8 +471,8 @@ class AsyncRecordBatchStreamReaderImpl null; - this.dictionaries = null; + this._reader = null; + this.dictionaries = null; } } public async open(options?: OpenOptions) { @@ -519,10 +529,10 @@ class AsyncRecordBatchStreamReaderImpl extends RecordBatchStreamReaderImpl { +class RecordBatchFileReaderImpl extends RecordBatchStreamReaderImpl { protected _footer?: Footer; - protected _handle!: RandomAccessFile; + declare protected _handle: RandomAccessFile; public get footer() { return this._footer!; } public get numDictionaries() { return this._footer ? this._footer.numDictionaries : 0; } public get numRecordBatches() { return this._footer ? this._footer.numRecordBatches : 0; } @@ -544,7 +554,7 @@ class RecordBatchFileReaderImpl ext public readRecordBatch(index: number) { if (this.closed) { return null; } if (!this._footer) { this.open(); } - const block = this._footer && this._footer.getRecordBatch(index); + const block = this._footer?.getRecordBatch(index); if (block && this._handle.seek(block.offset)) { const message = this._reader.readMessage(MessageHeader.RecordBatch); if (message?.isRecordBatch()) { @@ -557,7 +567,7 @@ class RecordBatchFileReaderImpl ext return null; } protected _readDictionaryBatch(index: number) { - const block = this._footer && this._footer.getDictionaryBatch(index); + const block = this._footer?.getDictionaryBatch(index); if (block && this._handle.seek(block.offset)) { const message = this._reader.readMessage(MessageHeader.DictionaryBatch); if (message?.isDictionaryBatch()) { @@ -578,7 +588,7 @@ class RecordBatchFileReaderImpl ext protected _readNextMessageAndValidate(type?: T | null): Message | null { if (!this._footer) { this.open(); } if (this._footer && this._recordBatchIndex < this.numRecordBatches) { - const block = this._footer && this._footer.getRecordBatch(this._recordBatchIndex); + const block = this._footer?.getRecordBatch(this._recordBatchIndex); if (block && this._handle.seek(block.offset)) { return this._reader.readMessage(type); } @@ -588,11 +598,11 @@ class RecordBatchFileReaderImpl ext } /** @ignore */ -class AsyncRecordBatchFileReaderImpl extends AsyncRecordBatchStreamReaderImpl +class AsyncRecordBatchFileReaderImpl extends AsyncRecordBatchStreamReaderImpl implements AsyncRecordBatchFileReaderImpl { protected _footer?: Footer; - protected _handle!: AsyncRandomAccessFile; + declare protected _handle: AsyncRandomAccessFile; public get footer() { return this._footer!; } public get numDictionaries() { return this._footer ? this._footer.numDictionaries : 0; } public get numRecordBatches() { return this._footer ? this._footer.numRecordBatches : 0; } @@ -600,8 +610,8 @@ class AsyncRecordBatchFileReaderImpl); constructor(source: FileHandle | AsyncRandomAccessFile, dictionaries?: Map); constructor(source: FileHandle | AsyncRandomAccessFile, ...rest: any[]) { - const byteLength = typeof rest[0] !== 'number' ? rest.shift() : undefined; - const dictionaries = rest[0] instanceof Map ? > rest.shift() : undefined; + const byteLength = typeof rest[0] !== 'number' ? rest.shift() : undefined; + const dictionaries = rest[0] instanceof Map ? >rest.shift() : undefined; super(source instanceof AsyncRandomAccessFile ? source : new AsyncRandomAccessFile(source, byteLength), dictionaries); } public isFile(): this is RecordBatchFileReaders { return true; } @@ -618,7 +628,7 @@ class AsyncRecordBatchFileReaderImpl extends RecordBatchStreamReaderImpl { +class RecordBatchJSONReaderImpl extends RecordBatchStreamReaderImpl { constructor(source: ArrowJSONLike, dictionaries?: Map) { super(source, dictionaries); } @@ -684,8 +694,8 @@ function shouldAutoDestroy(self: { autoDestroy: boolean }, options?: OpenOptions } /** @ignore */ -function* readAllSync(source: RecordBatchReaders | FromArg0 | FromArg2) { - const reader = RecordBatchReader.from( source) as RecordBatchReaders; +function* readAllSync(source: RecordBatchReaders | FromArg0 | FromArg2) { + const reader = RecordBatchReader.from(source) as RecordBatchReaders; try { if (!reader.open({ autoDestroy: false }).closed) { do { yield reader; } while (!(reader.reset().open()).closed); @@ -694,8 +704,8 @@ function* readAllSync(source: Recor } /** @ignore */ -async function* readAllAsync(source: AsyncRecordBatchReaders | FromArg1 | FromArg3 | FromArg4 | FromArg5) { - const reader = await RecordBatchReader.from( source) as RecordBatchReader; +async function* readAllAsync(source: AsyncRecordBatchReaders | FromArg1 | FromArg3 | FromArg4 | FromArg5) { + const reader = await RecordBatchReader.from(source) as RecordBatchReader; try { if (!(await reader.open({ autoDestroy: false })).closed) { do { yield reader; } while (!(await reader.reset().open()).closed); @@ -704,36 +714,34 @@ async function* readAllAsync(source } /** @ignore */ -function fromArrowJSON(source: ArrowJSONLike) { +function fromArrowJSON(source: ArrowJSONLike) { return new RecordBatchStreamReader(new RecordBatchJSONReaderImpl(source)); } /** @ignore */ -function fromByteStream(source: ByteStream) { +function fromByteStream(source: ByteStream) { const bytes = source.peek((magicLength + 7) & ~7); return bytes && bytes.byteLength >= 4 ? !checkForMagicArrowString(bytes) ? new RecordBatchStreamReader(new RecordBatchStreamReaderImpl(source)) : new RecordBatchFileReader(new RecordBatchFileReaderImpl(source.read())) - : new RecordBatchStreamReader(new RecordBatchStreamReaderImpl(function*(): any {}())); + : new RecordBatchStreamReader(new RecordBatchStreamReaderImpl(function* (): any { }())); } /** @ignore */ -async function fromAsyncByteStream(source: AsyncByteStream) { +async function fromAsyncByteStream(source: AsyncByteStream) { const bytes = await source.peek((magicLength + 7) & ~7); return bytes && bytes.byteLength >= 4 ? !checkForMagicArrowString(bytes) ? new AsyncRecordBatchStreamReader(new AsyncRecordBatchStreamReaderImpl(source)) : new RecordBatchFileReader(new RecordBatchFileReaderImpl(await source.read())) - : new AsyncRecordBatchStreamReader(new AsyncRecordBatchStreamReaderImpl(async function*(): any {}())); + : new AsyncRecordBatchStreamReader(new AsyncRecordBatchStreamReaderImpl(async function* (): any { }())); } /** @ignore */ -async function fromFileHandle(source: FileHandle) { +async function fromFileHandle(source: FileHandle) { const { size } = await source.stat(); const file = new AsyncRandomAccessFile(source, size); - if (size >= magicX2AndPadding) { - if (checkForMagicArrowString(await file.readAt(0, (magicLength + 7) & ~7))) { - return new AsyncRecordBatchFileReader(new AsyncRecordBatchFileReaderImpl(file)); - } + if (size >= magicX2AndPadding && checkForMagicArrowString(await file.readAt(0, (magicLength + 7) & ~7))) { + return new AsyncRecordBatchFileReader(new AsyncRecordBatchFileReaderImpl(file)); } return new AsyncRecordBatchStreamReader(new AsyncRecordBatchStreamReaderImpl(file)); } diff --git a/js/src/ipc/serialization.ts b/js/src/ipc/serialization.ts new file mode 100644 index 0000000000000..680babd7b250c --- /dev/null +++ b/js/src/ipc/serialization.ts @@ -0,0 +1,51 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import { Table } from '../table.js'; +import { TypeMap } from '../type.js'; +import { isPromise } from '../util/compat.js'; +import { FromArg0, FromArg1, FromArg2, FromArg3, FromArg4, FromArg5, RecordBatchReader } from './reader.js'; +import { RecordBatchFileWriter, RecordBatchStreamWriter } from './writer.js'; + +/** + * Deserialize the IPC format into a {@link Table}. This function is a + * convenience wrapper for {@link RecordBatchReader}. Opposite of {@link tableToIPC}. + */ +export function tableFromIPC(source: FromArg0 | FromArg2): Table; +export function tableFromIPC(source: FromArg1): Promise>; +export function tableFromIPC(source: FromArg3 | FromArg4 | FromArg5): Promise> | Table; +export function tableFromIPC(input: any): Table | Promise> { + const reader = RecordBatchReader.from(input); + if (isPromise(reader)) { + return (async () => new Table(await (await reader).readAll()))(); + } + return new Table(reader.readAll()); +} + +/** + * Serialize a {@link Table} to the IPC format. This function is a convenience + * wrapper for {@link RecordBatchStreamWriter} and {@link RecordBatchFileWriter}. + * Opposite of {@link tableFromIPC}. + * + * @param table The Table to serialize. + * @param type Whether to serialize the Table as a file or a stream. + */ +export function tableToIPC(table: Table, type: 'file' | 'stream' = 'stream'): Uint8Array { + return (type === 'stream' ? RecordBatchStreamWriter : RecordBatchFileWriter) + .writeAll(table) + .toUint8Array(true); +} diff --git a/js/src/ipc/writer.ts b/js/src/ipc/writer.ts index 12aa83355f0fd..5f5cc37562bb0 100644 --- a/js/src/ipc/writer.ts +++ b/js/src/ipc/writer.ts @@ -15,25 +15,25 @@ // specific language governing permissions and limitations // under the License. -import { Table } from '../table'; -import { MAGIC } from './message'; -import { Vector } from '../vector'; -import { Column } from '../column'; -import { DataType } from '../type'; -import { Schema, Field } from '../schema'; -import { Message } from './metadata/message'; -import * as metadata from './metadata/message'; -import { FileBlock, Footer } from './metadata/file'; -import { MessageHeader, MetadataVersion } from '../enum'; -import { compareSchemas } from '../visitor/typecomparator'; -import { WritableSink, AsyncByteQueue } from '../io/stream'; -import { VectorAssembler } from '../visitor/vectorassembler'; -import { JSONTypeAssembler } from '../visitor/jsontypeassembler'; -import { JSONVectorAssembler } from '../visitor/jsonvectorassembler'; -import { ArrayBufferViewInput, toUint8Array } from '../util/buffer'; -import { RecordBatch, _InternalEmptyPlaceholderRecordBatch } from '../recordbatch'; -import { Writable, ReadableInterop, ReadableDOMStreamOptions } from '../io/interfaces'; -import { isPromise, isAsyncIterable, isWritableDOMStream, isWritableNodeStream, isIterable, isObject } from '../util/compat'; +import { Data } from '../data.js'; +import { Table } from '../table.js'; +import { MAGIC } from './message.js'; +import { Vector } from '../vector.js'; +import { DataType, TypeMap } from '../type.js'; +import { Schema, Field } from '../schema.js'; +import { Message } from './metadata/message.js'; +import * as metadata from './metadata/message.js'; +import { FileBlock, Footer } from './metadata/file.js'; +import { MessageHeader, MetadataVersion } from '../enum.js'; +import { compareSchemas } from '../visitor/typecomparator.js'; +import { WritableSink, AsyncByteQueue } from '../io/stream.js'; +import { VectorAssembler } from '../visitor/vectorassembler.js'; +import { JSONTypeAssembler } from '../visitor/jsontypeassembler.js'; +import { JSONVectorAssembler } from '../visitor/jsonvectorassembler.js'; +import { ArrayBufferViewInput, toUint8Array } from '../util/buffer.js'; +import { RecordBatch, _InternalEmptyPlaceholderRecordBatch } from '../recordbatch.js'; +import { Writable, ReadableInterop, ReadableDOMStreamOptions } from '../io/interfaces.js'; +import { isPromise, isAsyncIterable, isWritableDOMStream, isWritableNodeStream, isIterable, isObject } from '../util/compat.js'; export interface RecordBatchStreamWriterOptions { /** @@ -49,7 +49,7 @@ export interface RecordBatchStreamWriterOptions { writeLegacyIpcFormat?: boolean; } -export class RecordBatchWriter extends ReadableInterop implements Writable> { +export class RecordBatchWriter extends ReadableInterop implements Writable> { /** @nocollapse */ // @ts-ignore @@ -57,7 +57,7 @@ export class RecordBatchWriter exte throw new Error(`"throughNode" not available in this environment`); } /** @nocollapse */ - public static throughDOM( + public static throughDOM( // @ts-ignore writableStrategy?: QueuingStrategy> & { autoDestroy: boolean }, // @ts-ignore @@ -105,7 +105,7 @@ export class RecordBatchWriter exte } else if (isAsyncIterable>(input)) { return writeAllAsync(this, input); } - return writeAll(this, input); + return writeAll(this, input); } public get closed() { return this._sink.closed; } @@ -145,7 +145,7 @@ export class RecordBatchWriter exte this._dictionaryDeltaOffsets = new Map(); if (!schema || !(compareSchemas(schema, this._schema))) { - if (schema === null) { + if (schema == null) { this._position = 0; this._schema = null; } else { @@ -183,7 +183,7 @@ export class RecordBatchWriter exte this._writeRecordBatch(payload); } } else if (payload instanceof Table) { - this.writeAll(payload.chunks); + this.writeAll(payload.batches); } else if (isIterable(payload)) { this.writeAll(payload); } @@ -248,7 +248,7 @@ export class RecordBatchWriter exte protected _writeRecordBatch(batch: RecordBatch) { const { byteLength, nodes, bufferRegions, buffers } = VectorAssembler.assemble(batch); - const recordBatch = new metadata.RecordBatch(batch.length, nodes, bufferRegions); + const recordBatch = new metadata.RecordBatch(batch.numRows, nodes, bufferRegions); const message = Message.from(recordBatch, byteLength); return this ._writeDictionaries(batch) @@ -256,9 +256,9 @@ export class RecordBatchWriter exte ._writeBodyBuffers(buffers); } - protected _writeDictionaryBatch(dictionary: Vector, id: number, isDelta = false) { + protected _writeDictionaryBatch(dictionary: Data, id: number, isDelta = false) { this._dictionaryDeltaOffsets.set(id, dictionary.length + (this._dictionaryDeltaOffsets.get(id) || 0)); - const { byteLength, nodes, bufferRegions, buffers } = VectorAssembler.assemble(dictionary); + const { byteLength, nodes, bufferRegions, buffers } = VectorAssembler.assemble(new Vector([dictionary])); const recordBatch = new metadata.RecordBatch(dictionary.length, nodes, bufferRegions); const dictionaryBatch = new metadata.DictionaryBatch(recordBatch, id, isDelta); const message = Message.from(dictionaryBatch, byteLength); @@ -284,11 +284,10 @@ export class RecordBatchWriter exte protected _writeDictionaries(batch: RecordBatch) { for (let [id, dictionary] of batch.dictionaries) { let offset = this._dictionaryDeltaOffsets.get(id) || 0; - if (offset === 0 || (dictionary = dictionary.slice(offset)).length > 0) { - const chunks = 'chunks' in dictionary ? (dictionary as any).chunks : [dictionary]; - for (const chunk of chunks) { - this._writeDictionaryBatch(chunk, id, offset > 0); - offset += chunk.length; + if (offset === 0 || (dictionary = dictionary?.slice(offset)).length > 0) { + for (const data of dictionary.data) { + this._writeDictionaryBatch(data, id, offset > 0); + offset += data.length; } } } @@ -297,13 +296,13 @@ export class RecordBatchWriter exte } /** @ignore */ -export class RecordBatchStreamWriter extends RecordBatchWriter { - public static writeAll(input: Table | Iterable>, options?: RecordBatchStreamWriterOptions): RecordBatchStreamWriter; - public static writeAll(input: AsyncIterable>, options?: RecordBatchStreamWriterOptions): Promise>; - public static writeAll(input: PromiseLike>>, options?: RecordBatchStreamWriterOptions): Promise>; - public static writeAll(input: PromiseLike | Iterable>>, options?: RecordBatchStreamWriterOptions): Promise>; +export class RecordBatchStreamWriter extends RecordBatchWriter { + public static writeAll(input: Table | Iterable>, options?: RecordBatchStreamWriterOptions): RecordBatchStreamWriter; + public static writeAll(input: AsyncIterable>, options?: RecordBatchStreamWriterOptions): Promise>; + public static writeAll(input: PromiseLike>>, options?: RecordBatchStreamWriterOptions): Promise>; + public static writeAll(input: PromiseLike | Iterable>>, options?: RecordBatchStreamWriterOptions): Promise>; /** @nocollapse */ - public static writeAll(input: any, options?: RecordBatchStreamWriterOptions) { + public static writeAll(input: any, options?: RecordBatchStreamWriterOptions) { const writer = new RecordBatchStreamWriter(options); if (isPromise(input)) { return input.then((x) => writer.writeAll(x)); @@ -315,13 +314,13 @@ export class RecordBatchStreamWriter extends RecordBatchWriter { - public static writeAll(input: Table | Iterable>): RecordBatchFileWriter; - public static writeAll(input: AsyncIterable>): Promise>; - public static writeAll(input: PromiseLike>>): Promise>; - public static writeAll(input: PromiseLike | Iterable>>): Promise>; +export class RecordBatchFileWriter extends RecordBatchWriter { + public static writeAll(input: Table | Iterable>): RecordBatchFileWriter; + public static writeAll(input: AsyncIterable>): Promise>; + public static writeAll(input: PromiseLike>>): Promise>; + public static writeAll(input: PromiseLike | Iterable>>): Promise>; /** @nocollapse */ - public static writeAll(input: any) { + public static writeAll(input: any) { const writer = new RecordBatchFileWriter(); if (isPromise(input)) { return input.then((x) => writer.writeAll(x)); @@ -355,15 +354,15 @@ export class RecordBatchFileWriter } /** @ignore */ -export class RecordBatchJSONWriter extends RecordBatchWriter { +export class RecordBatchJSONWriter extends RecordBatchWriter { - public static writeAll(this: typeof RecordBatchWriter, input: Table | Iterable>): RecordBatchJSONWriter; + public static writeAll(this: typeof RecordBatchWriter, input: Table | Iterable>): RecordBatchJSONWriter; // @ts-ignore - public static writeAll(this: typeof RecordBatchWriter, input: AsyncIterable>): Promise>; - public static writeAll(this: typeof RecordBatchWriter, input: PromiseLike>>): Promise>; - public static writeAll(this: typeof RecordBatchWriter, input: PromiseLike | Iterable>>): Promise>; + public static writeAll(this: typeof RecordBatchWriter, input: AsyncIterable>): Promise>; + public static writeAll(this: typeof RecordBatchWriter, input: PromiseLike>>): Promise>; + public static writeAll(this: typeof RecordBatchWriter, input: PromiseLike | Iterable>>): Promise>; /** @nocollapse */ - public static writeAll(this: typeof RecordBatchWriter, input: any) { + public static writeAll(this: typeof RecordBatchWriter, input: any) { return new RecordBatchJSONWriter().writeAll(input as any); } @@ -381,9 +380,7 @@ export class RecordBatchJSONWriter // @ts-ignore protected _writeFooter(schema: Schema) { return this; } protected _writeSchema(schema: Schema) { - return this._write(`{\n "schema": ${ - JSON.stringify({ fields: schema.fields.map(fieldToJSON) }, null, 2) - }`); + return this._write(`{\n "schema": ${JSON.stringify({ fields: schema.fields.map(field => fieldToJSON(field)) }, null, 2)}`); } protected _writeDictionaries(batch: RecordBatch) { if (batch.dictionaries.size > 0) { @@ -391,7 +388,7 @@ export class RecordBatchJSONWriter } return this; } - protected _writeDictionaryBatch(dictionary: Vector, id: number, isDelta = false) { + protected _writeDictionaryBatch(dictionary: Data, id: number, isDelta = false) { this._dictionaryDeltaOffsets.set(id, dictionary.length + (this._dictionaryDeltaOffsets.get(id) || 0)); this._write(this._dictionaryBlocks.length === 0 ? ` ` : `,\n `); this._write(`${dictionaryBatchToJSON(dictionary, id, isDelta)}`); @@ -434,10 +431,10 @@ export class RecordBatchJSONWriter } /** @ignore */ -function writeAll(writer: RecordBatchWriter, input: Table | Iterable>) { +function writeAll(writer: RecordBatchWriter, input: Table | Iterable>) { let chunks = input as Iterable>; if (input instanceof Table) { - chunks = input.chunks; + chunks = input.batches; writer.reset(undefined, input.schema); } for (const batch of chunks) { @@ -447,7 +444,7 @@ function writeAll(writer: RecordBat } /** @ignore */ -async function writeAllAsync(writer: RecordBatchWriter, batches: AsyncIterable>) { +async function writeAllAsync(writer: RecordBatchWriter, batches: AsyncIterable>) { for await (const batch of batches) { writer.write(batch); } @@ -460,7 +457,7 @@ function fieldToJSON({ name, type, nullable }: Field): Record { return { 'name': name, 'nullable': nullable, 'type': assembler.visit(type), - 'children': (type.children || []).map(fieldToJSON), + 'children': (type.children || []).map((field: any) => fieldToJSON(field)), 'dictionary': !DataType.isDictionary(type) ? undefined : { 'id': type.id, 'isOrdered': type.isOrdered, @@ -470,9 +467,8 @@ function fieldToJSON({ name, type, nullable }: Field): Record { } /** @ignore */ -function dictionaryBatchToJSON(dictionary: Vector, id: number, isDelta = false) { - const field = new Field(`${id}`, dictionary.type, dictionary.nullCount > 0); - const columns = JSONVectorAssembler.assemble(new Column(field, [dictionary])); +function dictionaryBatchToJSON(dictionary: Data, id: number, isDelta = false) { + const [columns] = JSONVectorAssembler.assemble(new RecordBatch({ [id]: dictionary })); return JSON.stringify({ 'id': id, 'isDelta': isDelta, @@ -485,8 +481,9 @@ function dictionaryBatchToJSON(dictionary: Vector, id: number, isDelta = false) /** @ignore */ function recordBatchToJSON(records: RecordBatch) { + const [columns] = JSONVectorAssembler.assemble(records); return JSON.stringify({ - 'count': records.length, - 'columns': JSONVectorAssembler.assemble(records) + 'count': records.numRows, + 'columns': columns }, null, 2); } diff --git a/js/src/recordbatch.ts b/js/src/recordbatch.ts index 5463a387fae2e..a7284816f4112 100644 --- a/js/src/recordbatch.ts +++ b/js/src/recordbatch.ts @@ -15,98 +15,325 @@ // specific language governing permissions and limitations // under the License. -import { Data } from './data'; -import { Table } from './table'; -import { Vector } from './vector'; -import { Visitor } from './visitor'; -import { Schema, Field } from './schema'; -import { isIterable } from './util/compat'; -import { Chunked } from './vector/chunked'; -import { selectFieldArgs } from './util/args'; -import { DataType, Struct, Dictionary } from './type'; -import { ensureSameLengthData } from './util/recordbatch'; -import { Clonable, Sliceable, Applicative } from './vector'; -import { StructVector, VectorBuilderOptions, VectorBuilderOptionsAsync } from './vector/index'; - -type VectorMap = { [key: string]: Vector }; -type Fields = (keyof T)[] | Field[]; -type ChildData = (Data | Vector)[]; - -export interface RecordBatch { - concat(...others: Vector>[]): Table; - slice(begin?: number, end?: number): RecordBatch; - clone(data: Data>, children?: Vector[]): RecordBatch; +import { Data, makeData } from './data.js'; +import { Table } from './table.js'; +import { Vector } from './vector.js'; +import { Schema, Field } from './schema.js'; +import { DataType, Struct, Null, TypeMap } from './type.js'; + +import { instance as getVisitor } from './visitor/get.js'; +import { instance as setVisitor } from './visitor/set.js'; +import { instance as indexOfVisitor } from './visitor/indexof.js'; +import { instance as iteratorVisitor } from './visitor/iterator.js'; +import { instance as byteLengthVisitor } from './visitor/bytelength.js'; + +/** @ignore */ +export interface RecordBatch { + /// + // Virtual properties for the TypeScript compiler. + // These do not exist at runtime. + /// + readonly TType: Struct; + readonly TArray: Struct['TArray']; + readonly TValue: Struct['TValue']; + + /** + * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Symbol/isConcatSpreadable + */ + [Symbol.isConcatSpreadable]: true; } -export class RecordBatch - extends StructVector - implements Clonable>, - Sliceable>, - Applicative, Table> { - - public static from(options: VectorBuilderOptions, TNull>): Table; - public static from(options: VectorBuilderOptionsAsync, TNull>): Promise>; - /** @nocollapse */ - public static from(options: VectorBuilderOptions, TNull> | VectorBuilderOptionsAsync, TNull>) { - if (isIterable<(Struct)['TValue'] | TNull>(options['values'])) { - return Table.from(options as VectorBuilderOptions, TNull>); +/** @ignore */ +export class RecordBatch { + + constructor(columns: { [P in keyof T]: Data }); + constructor(schema: Schema, data?: Data>); + constructor(...args: any[]) { + switch (args.length) { + case 2: { + [this.schema] = args; + if (!(this.schema instanceof Schema)) { + throw new TypeError('RecordBatch constructor expects a [Schema, Data] pair.'); + } + [, + this.data = makeData({ + nullCount: 0, + type: new Struct(this.schema.fields), + children: this.schema.fields.map((f) => makeData({ type: f.type, nullCount: 0 })) + }) + ] = args; + if (!(this.data instanceof Data)) { + throw new TypeError('RecordBatch constructor expects a [Schema, Data] pair.'); + } + [this.schema, this.data] = ensureSameLengthData(this.schema, this.data.children as Data[]); + break; + } + case 1: { + const [obj] = args; + const { fields, children, length } = Object.keys(obj).reduce((memo, name, i) => { + memo.children[i] = obj[name]; + memo.length = Math.max(memo.length, obj[name].length); + memo.fields[i] = Field.new({ name, type: obj[name].type, nullable: true }); + return memo; + }, { + length: 0, + fields: new Array>(), + children: new Array>(), + }); + + const schema = new Schema(fields); + const data = makeData({ type: new Struct(fields), length, children, nullCount: 0 }); + [this.schema, this.data] = ensureSameLengthData(schema, data.children as Data[], length); + break; + } + default: throw new TypeError('RecordBatch constructor expects an Object mapping names to child Data, or a [Schema, Data] pair.'); } - return Table.from(options as VectorBuilderOptionsAsync, TNull>); } - public static new(children: T): RecordBatch<{ [P in keyof T]: T[P]['type'] }>; - public static new(children: ChildData, fields?: Fields): RecordBatch; - /** @nocollapse */ - public static new(...args: any[]) { - const [fs, xs] = selectFieldArgs(args); - const vs = xs.filter((x): x is Vector => x instanceof Vector); - return new RecordBatch(...ensureSameLengthData(new Schema(fs), vs.map((x) => x.data))); + protected _dictionaries?: Map; + + public readonly schema: Schema; + public readonly data: Data>; + + public get dictionaries() { + return this._dictionaries || (this._dictionaries = collectDictionaries(this.schema.fields, this.data.children)); } - protected _schema: Schema; - protected _dictionaries?: Map; + /** + * The number of columns in this RecordBatch. + */ + public get numCols() { return this.schema.fields.length; } - constructor(schema: Schema, length: number, children: (Data | Vector)[]); - constructor(schema: Schema, data: Data>, children?: Vector[]); - constructor(...args: any[]) { - let data: Data>; - const schema = args[0] as Schema; - let children: Vector[] | undefined; - if (args[1] instanceof Data) { - [, data, children] = (args as [any, Data>, Vector[]?]); - } else { - const fields = schema.fields as Field[]; - const [, length, childData] = args as [any, number, Data[]]; - data = Data.Struct(new Struct(fields), 0, length, 0, null, childData); + /** + * The number of rows in this RecordBatch. + */ + public get numRows() { return this.data.length; } + + /** + * The number of null rows in this RecordBatch. + */ + public get nullCount() { + return this.data.nullCount; + } + + /** + * Check whether an element is null. + * @param index The index at which to read the validity bitmap. + */ + public isValid(index: number) { + return this.data.getValid(index); + } + + /** + * Get a row by position. + * @param index The index of the element to read. + */ + public get(index: number) { + return getVisitor.visit(this.data, index); + } + + /** + * Set a row by position. + * @param index The index of the element to write. + * @param value The value to set. + */ + public set(index: number, value: Struct['TValue']) { + return setVisitor.visit(this.data, index, value); + } + + /** + * Retrieve the index of the first occurrence of a row in an RecordBatch. + * @param element The row to locate in the RecordBatch. + * @param offset The index at which to begin the search. If offset is omitted, the search starts at index 0. + */ + public indexOf(element: Struct['TValue'], offset?: number): number { + return indexOfVisitor.visit(this.data, element, offset); + } + + /** + * Get the size (in bytes) of a row by index. + * @param index The row index for which to compute the byteLength. + */ + public getByteLength(index: number): number { + return byteLengthVisitor.visit(this.data, index); + } + + /** + * Iterator for rows in this RecordBatch. + */ + public [Symbol.iterator]() { + return iteratorVisitor.visit(new Vector([this.data])); + } + + /** + * Return a JavaScript Array of the RecordBatch rows. + * @returns An Array of RecordBatch rows. + */ + public toArray() { + return [...this]; + } + + /** + * Combines two or more RecordBatch of the same schema. + * @param others Additional RecordBatch to add to the end of this RecordBatch. + */ + public concat(...others: RecordBatch[]) { + return new Table(this.schema, [this, ...others]); + } + + /** + * Return a zero-copy sub-section of this RecordBatch. + * @param start The beginning of the specified portion of the RecordBatch. + * @param end The end of the specified portion of the RecordBatch. This is exclusive of the element at the index 'end'. + */ + public slice(begin?: number, end?: number): RecordBatch { + const [slice] = new Vector([this.data]).slice(begin, end).data; + return new RecordBatch(this.schema, slice); + } + + /** + * Returns a child Vector by name, or null if this Vector has no child with the given name. + * @param name The name of the child to retrieve. + */ + public getChild

(name: P) { + return this.getChildAt(this.schema.fields?.findIndex((f) => f.name === name)); + } + + /** + * Returns a child Vector by index, or null if this Vector has no child at the supplied index. + * @param index The index of the child to retrieve. + */ + public getChildAt(index: number): Vector | null { + if (index > -1 && index < this.schema.fields.length) { + return new Vector([this.data.children[index]]) as Vector; } - super(data, children); - this._schema = schema; + return null; } - public clone(data: Data>, children = this._children) { - return new RecordBatch(this._schema, data, children); + /** + * Sets a child Vector by name. + * @param name The name of the child to overwrite. + * @returns A new RecordBatch with the new child for the specified name. + */ + public setChild

(name: P, child: Vector) { + return this.setChildAt(this.schema.fields?.findIndex((f) => f.name === name), child) as RecordBatch; } - public concat(...others: Vector>[]): Table { - const schema = this._schema, chunks = Chunked.flatten(this, ...others); - return new Table(schema, chunks.map(({ data }) => new RecordBatch(schema, data))); + /** + * Sets a child Vector by index. + * @param index The index of the child to overwrite. + * @returns A new RecordBatch with the new child at the specified index. + */ + public setChildAt(index: number, child?: null): RecordBatch; + public setChildAt(index: number, child: Vector): RecordBatch; + public setChildAt(index: number, child: any) { + let schema: Schema = this.schema; + let data: Data = this.data; + if (index > -1 && index < this.numCols) { + if (!child) { + child = new Vector([makeData({ type: new Null, length: this.numRows })]); + } + const fields = schema.fields.slice() as Field[]; + const children = data.children.slice() as Data[]; + const field = fields[index].clone({ type: child.type }); + [fields[index], children[index]] = [field, child.data[0]]; + schema = new Schema(fields, new Map(this.schema.metadata)); + data = makeData({ type: new Struct(fields), children }); + } + return new RecordBatch(schema, data); } - public get schema() { return this._schema; } - public get numCols() { return this._schema.fields.length; } - public get dictionaries() { - return this._dictionaries || (this._dictionaries = DictionaryCollector.collect(this)); + /** + * Construct a new RecordBatch containing only specified columns. + * + * @param columnNames Names of columns to keep. + * @returns A new RecordBatch of columns matching the specified names. + */ + public select(columnNames: K[]) { + const schema = this.schema.select(columnNames); + const type = new Struct(schema.fields); + const children = [] as Data[]; + for (const name of columnNames) { + const index = this.schema.fields.findIndex((f) => f.name === name); + if (~index) { + children[index] = this.data.children[index] as Data; + } + } + return new RecordBatch(schema, makeData({ type, length: this.numRows, children })); } - public select(...columnNames: K[]) { - const nameToIndex = this._schema.fields.reduce((m, f, i) => m.set(f.name as K, i), new Map()); - return this.selectAt(...columnNames.map((columnName) => nameToIndex.get(columnName)!).filter((x) => x > -1)); + /** + * Construct a new RecordBatch containing only columns at the specified indices. + * + * @param columnIndices Indices of columns to keep. + * @returns A new RecordBatch of columns matching at the specified indices. + */ + public selectAt(columnIndices: number[]) { + const schema = this.schema.selectAt(columnIndices); + const children = columnIndices.map((i) => this.data.children[i]).filter(Boolean); + const subset = makeData({ type: new Struct(schema.fields), length: this.numRows, children }); + return new RecordBatch<{ [P in keyof K]: K[P] }>(schema, subset); } - public selectAt(...columnIndices: number[]) { - const schema = this._schema.selectAt(...columnIndices); - const childData = columnIndices.map((i) => this.data.childData[i]).filter(Boolean); - return new RecordBatch<{ [key: string]: K }>(schema, this.length, childData); + + // Initialize this static property via an IIFE so bundlers don't tree-shake + // out this logic, but also so we're still compliant with `"sideEffects": false` + protected static [Symbol.toStringTag] = ((proto: RecordBatch) => { + (proto as any)._nullCount = -1; + (proto as any)[Symbol.isConcatSpreadable] = true; + return 'RecordBatch'; + })(RecordBatch.prototype); +} + + +/** @ignore */ +function ensureSameLengthData( + schema: Schema, + chunks: Data[], + maxLength = chunks.reduce((max, col) => Math.max(max, col.length), 0) +) { + const fields = [...schema.fields]; + const children = [...chunks] as Data[]; + const nullBitmapSize = ((maxLength + 63) & ~63) >> 3; + + for (const [idx, field] of schema.fields.entries()) { + const chunk = chunks[idx]; + if (!chunk || chunk.length !== maxLength) { + fields[idx] = field.clone({ nullable: true }); + children[idx] = chunk?._changeLengthAndBackfillNullBitmap(maxLength) ?? makeData({ + type: field.type, + length: maxLength, + nullCount: maxLength, + nullBitmap: new Uint8Array(nullBitmapSize) + }); + } } + + return [ + schema.assign(fields), + makeData({ type: new Struct(fields), length: maxLength, children }) + ] as [Schema, Data>]; +} + +/** @ignore */ +function collectDictionaries(fields: Field[], children: Data[], dictionaries = new Map()): Map { + for (let i = -1, n = fields.length; ++i < n;) { + const field = fields[i]; + const type = field.type; + const data = children[i]; + if (DataType.isDictionary(type)) { + if (!dictionaries.has(type.id)) { + if (data.dictionary) { + dictionaries.set(type.id, data.dictionary); + } + } else if (dictionaries.get(type.id) !== data.dictionary) { + throw new Error(`Cannot create Schema containing two different dictionaries with the same Id`); + } + } + if (type.children && type.children.length > 0) { + collectDictionaries(type.children, data.children, dictionaries); + } + } + return dictionaries; } /** @@ -117,35 +344,10 @@ export class RecordBatch * @ignore * @private */ -/* eslint-disable @typescript-eslint/naming-convention */ -export class _InternalEmptyPlaceholderRecordBatch extends RecordBatch { +export class _InternalEmptyPlaceholderRecordBatch extends RecordBatch { constructor(schema: Schema) { - super(schema, 0, schema.fields.map((f) => Data.new(f.type, 0, 0, 0))); - } -} - -/** @ignore */ -class DictionaryCollector extends Visitor { - public dictionaries = new Map(); - public static collect(batch: T) { - return new DictionaryCollector().visit( - batch.data, new Struct(batch.schema.fields) - ).dictionaries; - } - public visit(data: Data, type: DataType) { - if (DataType.isDictionary(type)) { - return this.visitDictionary(data, type); - } else { - data.childData.forEach((child, i) => - this.visit(child, type.children[i].type)); - } - return this; - } - public visitDictionary(data: Data, type: Dictionary) { - const dictionary = data.dictionary; - if (dictionary && dictionary.length > 0) { - this.dictionaries.set(type.id, dictionary); - } - return this; + const children = schema.fields.map((f) => makeData({ type: f.type })); + const data = makeData({ type: new Struct(schema.fields), nullCount: 0, children }); + super(schema, data); } } diff --git a/js/src/row/map.ts b/js/src/row/map.ts new file mode 100644 index 0000000000000..d0ccb872ec33c --- /dev/null +++ b/js/src/row/map.ts @@ -0,0 +1,152 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import { Data } from '../data.js'; +import { Vector } from '../vector.js'; +import { DataType, Struct } from '../type.js'; +import { valueToString } from '../util/pretty.js'; +import { instance as getVisitor } from '../visitor/get.js'; +import { instance as setVisitor } from '../visitor/set.js'; + +/** @ignore */ export const kKeys = Symbol.for('keys'); +/** @ignore */ export const kVals = Symbol.for('vals'); + +export class MapRow { + + [key: string]: V['TValue']; + + declare private [kKeys]: Vector; + declare private [kVals]: Data; + + constructor(slice: Data>) { + this[kKeys] = new Vector([slice.children[0]]).memoize() as Vector; + this[kVals] = slice.children[1] as Data; + return new Proxy(this, new MapRowProxyHandler()); + } + + [Symbol.iterator]() { + return new MapRowIterator(this[kKeys], this[kVals]); + } + + public get size() { return this[kKeys].length; } + + public toArray() { return Object.values(this.toJSON()); } + + public toJSON() { + const keys = this[kKeys]; + const vals = this[kVals]; + const json = {} as { [P in K['TValue']]: V['TValue'] }; + for (let i = -1, n = keys.length; ++i < n;) { + json[keys.get(i)] = getVisitor.visit(vals, i); + } + return json; + } + + public toString() { + return `{${[...this].map(([key, val]) => + `${valueToString(key)}: ${valueToString(val)}` + ).join(', ') + }}`; + } + + public [Symbol.for('nodejs.util.inspect.custom')]() { + return this.toString(); + } +} + +class MapRowIterator + implements IterableIterator<[K['TValue'], V['TValue'] | null]> { + + private keys: Vector; + private vals: Data; + private numKeys: number; + private keyIndex: number; + + constructor(keys: Vector, vals: Data) { + this.keys = keys; + this.vals = vals; + this.keyIndex = 0; + this.numKeys = keys.length; + } + + [Symbol.iterator]() { return this; } + + next() { + const i = this.keyIndex; + if (i === this.numKeys) { + return { done: true, value: null } as IteratorReturnResult; + } + this.keyIndex++; + return { + done: false, + value: [ + this.keys.get(i), + getVisitor.visit(this.vals, i), + ] as [K['TValue'], V['TValue'] | null] + }; + } +} + +/** @ignore */ +class MapRowProxyHandler implements ProxyHandler> { + isExtensible() { return false; } + deleteProperty() { return false; } + preventExtensions() { return true; } + ownKeys(row: MapRow) { + return row[kKeys].toArray().map(String); + } + has(row: MapRow, key: string | symbol) { + return row[kKeys].includes(key); + } + getOwnPropertyDescriptor(row: MapRow, key: string | symbol) { + const idx = row[kKeys].indexOf(key); + if (idx !== -1) { + return { writable: true, enumerable: true, configurable: true }; + } + return; + } + get(row: MapRow, key: string | symbol) { + // Look up key in row first + if (Reflect.has(row, key)) { + return (row as any)[key]; + } + const idx = row[kKeys].indexOf(key); + if (idx !== -1) { + const val = getVisitor.visit(Reflect.get(row, kVals), idx); + // Cache key/val lookups + Reflect.set(row, key, val); + return val; + } + } + set(row: MapRow, key: string | symbol, val: V) { + const idx = row[kKeys].indexOf(key); + if (idx !== -1) { + setVisitor.visit(Reflect.get(row, kVals), idx, val); + // Cache key/val lookups + return Reflect.set(row, key, val); + } else if (Reflect.has(row, key)) { + return Reflect.set(row, key, val); + } + return false; + } +} + +Object.defineProperties(MapRow.prototype, { + [Symbol.toStringTag]: { enumerable: false, configurable: false, value: 'Row' }, + [kKeys]: { writable: true, enumerable: false, configurable: false, value: null }, + [kVals]: { writable: true, enumerable: false, configurable: false, value: null }, +}); diff --git a/js/src/row/struct.ts b/js/src/row/struct.ts new file mode 100644 index 0000000000000..483a435670cfd --- /dev/null +++ b/js/src/row/struct.ts @@ -0,0 +1,157 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import { Data } from '../data.js'; +import { Field } from '../schema.js'; +import { Struct, TypeMap } from '../type.js'; +import { valueToString } from '../util/pretty.js'; +import { instance as getVisitor } from '../visitor/get.js'; +import { instance as setVisitor } from '../visitor/set.js'; + +/** @ignore */ const kParent = Symbol.for('parent'); +/** @ignore */ const kRowIndex = Symbol.for('rowIndex'); + +export type StructRowProxy = StructRow & { + [P in keyof T]: T[P]['TValue']; +}; + +export class StructRow { + + declare private [kRowIndex]: number; + declare private [kParent]: Data>; + + constructor(parent: Data>, rowIndex: number) { + this[kParent] = parent; + this[kRowIndex] = rowIndex; + return new Proxy(this, new StructRowProxyHandler()); + } + + public toArray() { return Object.values(this.toJSON()); } + + public toJSON() { + const i = this[kRowIndex]; + const parent = this[kParent]; + const keys = parent.type.children; + const json = {} as { [P in string & keyof T]: T[P]['TValue'] }; + for (let j = -1, n = keys.length; ++j < n;) { + json[keys[j].name as string & keyof T] = getVisitor.visit(parent.children[j], i); + } + return json; + } + + public toString() { + return `{${[...this].map(([key, val]) => + `${valueToString(key)}: ${valueToString(val)}` + ).join(', ') + }}`; + } + + public [Symbol.for('nodejs.util.inspect.custom')]() { + return this.toString(); + } + + [Symbol.iterator](): IterableIterator<[ + keyof T, { [P in keyof T]: T[P]['TValue'] | null }[keyof T] + ]> { + return new StructRowIterator(this[kParent], this[kRowIndex]); + } +} + +class StructRowIterator + implements IterableIterator<[ + keyof T, { [P in keyof T]: T[P]['TValue'] | null }[keyof T] + ]> { + + declare private rowIndex: number; + declare private childIndex: number; + declare private numChildren: number; + declare private children: Data[]; + declare private childFields: Field[]; + + constructor(data: Data>, rowIndex: number) { + this.childIndex = 0; + this.children = data.children; + this.rowIndex = rowIndex; + this.childFields = data.type.children; + this.numChildren = this.childFields.length; + } + + [Symbol.iterator]() { return this; } + + next() { + const i = this.childIndex; + if (i < this.numChildren) { + this.childIndex = i + 1; + return { + done: false, + value: [ + this.childFields[i].name, + getVisitor.visit(this.children[i], this.rowIndex) + ] + } as IteratorYieldResult<[any, any]>; + } + return { done: true, value: null } as IteratorReturnResult; + } +} + +Object.defineProperties(StructRow.prototype, { + [Symbol.toStringTag]: { enumerable: false, configurable: false, value: 'Row' }, + [kParent]: { writable: true, enumerable: false, configurable: false, value: null }, + [kRowIndex]: { writable: true, enumerable: false, configurable: false, value: -1 }, +}); + +class StructRowProxyHandler implements ProxyHandler> { + isExtensible() { return false; } + deleteProperty() { return false; } + preventExtensions() { return true; } + ownKeys(row: StructRow) { + return row[kParent].type.children.map((f) => f.name); + } + has(row: StructRow, key: string) { + return row[kParent].type.children.findIndex((f) => f.name === key) !== -1; + } + getOwnPropertyDescriptor(row: StructRow, key: string) { + if (row[kParent].type.children.findIndex((f) => f.name === key) !== -1) { + return { writable: true, enumerable: true, configurable: true }; + } + return; + } + get(row: StructRow, key: string) { + // Look up key in row first + if (Reflect.has(row, key)) { + return (row as any)[key]; + } + const idx = row[kParent].type.children.findIndex((f) => f.name === key); + if (idx !== -1) { + const val = getVisitor.visit(row[kParent].children[idx], row[kRowIndex]); + // Cache key/val lookups + Reflect.set(row, key, val); + return val; + } + } + set(row: StructRow, key: string, val: any) { + const idx = row[kParent].type.children.findIndex((f) => f.name === key); + if (idx !== -1) { + setVisitor.visit(row[kParent].children[idx], row[kRowIndex], val); + // Cache key/val lookups + return Reflect.set(row, key, val); + } else if (Reflect.has(row, key)) { + return Reflect.set(row, key, val); + } + return false; + } +} diff --git a/js/src/schema.ts b/js/src/schema.ts index 437ffa228ec64..858bab915b010 100644 --- a/js/src/schema.ts +++ b/js/src/schema.ts @@ -15,17 +15,18 @@ // specific language governing permissions and limitations // under the License. -import { DataType } from './type'; +import { DataType, TypeMap } from './type.js'; -export class Schema { +export class Schema { public readonly fields: Field[]; public readonly metadata: Map; public readonly dictionaries: Map; - constructor(fields: Field[] = [], - metadata?: Map | null, - dictionaries?: Map | null) { + constructor( + fields: Field[] = [], + metadata?: Map | null, + dictionaries?: Map | null) { this.fields = (fields || []) as Field[]; this.metadata = metadata || new Map(); if (!dictionaries) { @@ -34,27 +35,45 @@ export class Schema { this.dictionaries = dictionaries; } public get [Symbol.toStringTag]() { return 'Schema'; } + + public get names(): (keyof T)[] { return this.fields.map((f) => f.name); } + public toString() { return `Schema<{ ${this.fields.map((f, i) => `${i}: ${f}`).join(', ')} }>`; } - public select(...columnNames: K[]) { - const names = columnNames.reduce((xs, x) => (xs[x] = true) && xs, Object.create(null)); - return new Schema<{ [P in K]: T[P] }>(this.fields.filter((f) => names[f.name]), this.metadata); + /** + * Construct a new Schema containing only specified fields. + * + * @param fieldNames Names of fields to keep. + * @returns A new Schema of fields matching the specified names. + */ + public select(fieldNames: K[]) { + const names = new Set(fieldNames); + const fields = this.fields.filter((f) => names.has(f.name)) as Field[]; + return new Schema<{ [P in K]: T[P] }>(fields, this.metadata); } - public selectAt(...columnIndices: number[]) { - return new Schema<{ [key: string]: K }>(columnIndices.map((i) => this.fields[i]).filter(Boolean), this.metadata); + + /** + * Construct a new Schema containing only fields at the specified indices. + * + * @param fieldIndices Indices of fields to keep. + * @returns A new Schema of fields at the specified indices. + */ + public selectAt(fieldIndices: number[]) { + const fields = fieldIndices.map((i) => this.fields[i]).filter(Boolean) as Field[]; + return new Schema(fields, this.metadata); } - public assign(schema: Schema): Schema; - public assign(...fields: (Field | Field[])[]): Schema; - public assign(...args: (Schema | Field | Field[])[]) { + public assign(schema: Schema): Schema; + public assign(...fields: (Field | Field[])[]): Schema; + public assign(...args: (Schema | Field | Field[])[]) { const other = (args[0] instanceof Schema ? args[0] as Schema : Array.isArray(args[0]) - ? new Schema([]> args[0]) - : new Schema([]> args)); + ? new Schema([]>args[0]) + : new Schema([]>args)); const curFields = [...this.fields] as Field[]; const metadata = mergeMaps(mergeMaps(new Map(), this.metadata), other.metadata); @@ -74,6 +93,12 @@ export class Schema { } } +// Add these here so they're picked up by the externs creator +// in the build, and closure-compiler doesn't minify them away +(Schema.prototype as any).fields = null; +(Schema.prototype as any).metadata = null; +(Schema.prototype as any).dictionaries = null; + export class Field { public static new(props: { name: string | number; type: T; nullable?: boolean; metadata?: Map | null }): Field; @@ -111,11 +136,18 @@ export class Field { let [name, type, nullable, metadata] = args; (!args[0] || typeof args[0] !== 'object') ? ([name = this.name, type = this.type, nullable = this.nullable, metadata = this.metadata] = args) - : ({name = this.name, type = this.type, nullable = this.nullable, metadata = this.metadata} = args[0]); + : ({ name = this.name, type = this.type, nullable = this.nullable, metadata = this.metadata } = args[0]); return Field.new(name, type, nullable, metadata); } } +// Add these here so they're picked up by the externs creator +// in the build, and closure-compiler doesn't minify them away +(Field.prototype as any).type = null; +(Field.prototype as any).name = null; +(Field.prototype as any).nullable = null; +(Field.prototype as any).metadata = null; + /** @ignore */ function mergeMaps(m1?: Map | null, m2?: Map | null): Map { return new Map([...(m1 || new Map()), ...(m2 || new Map())]); @@ -141,14 +173,3 @@ function generateDictionaryMap(fields: Field[], dictionaries = new Map }; -type Fields = (keyof T)[] | Field[]; -type ChildData = Data[] | Vector[]; -type Columns = Column[] | Column[][]; - -export interface Table { - - get(index: number): Struct['TValue']; - [Symbol.iterator](): IterableIterator>; - - slice(begin?: number, end?: number): Table; - concat(...others: Vector>[]): Table; - clone(chunks?: RecordBatch[], offsets?: Uint32Array): Table; +import { Type } from './enum.js'; +import { Data, makeData } from './data.js'; +import { vectorFromArray } from './factories.js'; +import { makeVector, Vector } from './vector.js'; +import { Field, Schema } from './schema.js'; +import { DataType, Null, Struct, TypeMap } from './type.js'; +import { compareSchemas } from './visitor/typecomparator.js'; +import { distributeVectorsIntoRecordBatches } from './util/recordbatch.js'; + +import { + isChunkedValid, + computeChunkOffsets, + computeChunkNullCounts, + wrapChunkedCall1, + wrapChunkedCall2, + wrapChunkedIndexOf, + sliceChunks, +} from './util/chunk.js'; + +import { instance as getVisitor } from './visitor/get.js'; +import { instance as setVisitor } from './visitor/set.js'; +import { instance as indexOfVisitor } from './visitor/indexof.js'; +import { instance as iteratorVisitor } from './visitor/iterator.js'; +import { instance as byteLengthVisitor } from './visitor/bytelength.js'; + +import { DataProps } from './data.js'; +import { clampRange } from './util/vector.js'; +import { ArrayDataType, BigIntArray, TypedArray, TypedArrayDataType } from './interfaces.js'; +import { RecordBatch, _InternalEmptyPlaceholderRecordBatch } from './recordbatch.js'; + +/** @ignore */ +export interface Table { + /// + // Virtual properties for the TypeScript compiler. + // These do not exist at runtime. + /// + readonly TType: Struct; + readonly TArray: Struct['TArray']; + readonly TValue: Struct['TValue']; + + /** + * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Symbol/isConcatSpreadable + */ + [Symbol.isConcatSpreadable]: true; } -export class Table - extends Chunked> - implements Clonable>, - Sliceable>, - Applicative, Table> { - - /** @nocollapse */ - public static empty>(schema = new Schema([])) { return new Table(schema, []); } - - public static from(): Table>; - public static from(source: RecordBatchReader): Table; - public static from(source: import('./ipc/reader').FromArg0): Table; - public static from(source: import('./ipc/reader').FromArg2): Table; - public static from(source: import('./ipc/reader').FromArg1): Promise>; - public static from(source: import('./ipc/reader').FromArg3): Promise>; - public static from(source: import('./ipc/reader').FromArg4): Promise>; - public static from(source: import('./ipc/reader').FromArg5): Promise>; - public static from(source: PromiseLike>): Promise>; - public static from(options: VectorBuilderOptions, TNull>): Table; - public static from(options: VectorBuilderOptionsAsync, TNull>): Promise>; - /** @nocollapse */ - public static from(input?: any) { - - if (!input) { return Table.empty(); } - - if (typeof input === 'object') { - const table = isIterable(input['values']) ? tableFromIterable(input) - : isAsyncIterable(input['values']) ? tableFromAsyncIterable(input) - : null; - if (table !== null) { return table; } +/** + * Tables are collections of {@link Vector}s and have a {@link Schema}. Use the convenience methods {@link makeTable} + * or {@link tableFromArrays} to create a table in JavaScript. To create a table from the IPC format, use + * {@link tableFromIPC}. + */ +export class Table { + + constructor(); + constructor(batches: Iterable>); + constructor(...batches: readonly RecordBatch[]); + constructor(...columns: { [P in keyof T]: Vector }[]); + constructor(...columns: { [P in keyof T]: Data | DataProps }[]); + constructor(schema: Schema, data?: RecordBatch | RecordBatch[]); + constructor(schema: Schema, data?: RecordBatch | RecordBatch[], offsets?: Uint32Array); + constructor(...args: any[]) { + + if (args.length === 0) { + this.batches = []; + this.schema = new Schema([]); + this._offsets = [0]; + return this; } - let reader = RecordBatchReader.from(input) as RecordBatchReader | Promise>; + let schema: Schema | undefined; + let offsets: Uint32Array | number[] | undefined; - if (isPromise>(reader)) { - return (async () => await Table.from(await reader))(); + if (args[0] instanceof Schema) { + schema = args.shift() as Schema; } - if (reader.isSync() && (reader = reader.open())) { - return !reader.schema ? Table.empty() : new Table(reader.schema, [...reader]); + + if (args[args.length - 1] instanceof Uint32Array) { + offsets = args.pop(); } - return (async (opening) => { - const reader = await opening; - const schema = reader.schema; - const batches: RecordBatch[] = []; - if (schema) { - for await (const batch of reader) { - batches.push(batch); + + const unwrap = (x: any): RecordBatch[] => { + if (x) { + if (x instanceof RecordBatch) { + return [x]; + } else if (x instanceof Table) { + return x.batches; + } else if (x instanceof Data) { + if (x.type instanceof Struct) { + return [new RecordBatch(new Schema(x.type.children), x)]; + } + } else if (Array.isArray(x)) { + return x.flatMap(v => unwrap(v)); + } else if (typeof x[Symbol.iterator] === 'function') { + return [...x].flatMap(v => unwrap(v)); + } else if (typeof x === 'object') { + const keys = Object.keys(x) as (keyof T)[]; + const vecs = keys.map((k) => new Vector([x[k]])); + const schema = new Schema(keys.map((k, i) => new Field(String(k), vecs[i].type))); + const [, batches] = distributeVectorsIntoRecordBatches(schema, vecs); + return batches.length === 0 ? [new RecordBatch(x)] : batches; } - return new Table(schema, batches); } - return Table.empty(); - })(reader.open()); - } + return []; + }; - /** @nocollapse */ - public static async fromAsync(source: import('./ipc/reader').FromArgs): Promise> { - return await Table.from(source as any); - } + const batches = args.flatMap(v => unwrap(v)); - /** @nocollapse */ - public static fromStruct(vector: Vector>) { - return Table.new(vector.data.childData as Data[], vector.type.children); + schema = schema ?? batches[0]?.schema ?? new Schema([]); + + if (!(schema instanceof Schema)) { + throw new TypeError('Table constructor expects a [Schema, RecordBatch[]] pair.'); + } + + for (const batch of batches) { + if (!(batch instanceof RecordBatch)) { + throw new TypeError('Table constructor expects a [Schema, RecordBatch[]] pair.'); + } + if (!compareSchemas(schema, batch.schema)) { + throw new TypeError('Table and inner RecordBatch schemas must be equivalent.'); + } + } + + this.schema = schema; + this.batches = batches; + this._offsets = offsets ?? computeChunkOffsets(this.data); } + declare protected _offsets: Uint32Array | number[]; + declare protected _nullCount: number; + + declare public readonly schema: Schema; + /** - * @summary Create a new Table from a collection of Columns or Vectors, - * with an optional list of names or Fields. - * - * - * `Table.new` accepts an Object of - * Columns or Vectors, where the keys will be used as the field names - * for the Schema: - * ```ts - * const i32s = Int32Vector.from([1, 2, 3]); - * const f32s = Float32Vector.from([.1, .2, .3]); - * const table = Table.new({ i32: i32s, f32: f32s }); - * assert(table.schema.fields[0].name === 'i32'); - * ``` - * - * It also accepts a a list of Vectors with an optional list of names or - * Fields for the resulting Schema. If the list is omitted or a name is - * missing, the numeric index of each Vector will be used as the name: - * ```ts - * const i32s = Int32Vector.from([1, 2, 3]); - * const f32s = Float32Vector.from([.1, .2, .3]); - * const table = Table.new([i32s, f32s], ['i32']); - * assert(table.schema.fields[0].name === 'i32'); - * assert(table.schema.fields[1].name === '1'); - * ``` - * - * If the supplied arguments are Columns, `Table.new` will infer the Schema - * from the Columns: - * ```ts - * const i32s = Column.new('i32', Int32Vector.from([1, 2, 3])); - * const f32s = Column.new('f32', Float32Vector.from([.1, .2, .3])); - * const table = Table.new(i32s, f32s); - * assert(table.schema.fields[0].name === 'i32'); - * assert(table.schema.fields[1].name === 'f32'); - * ``` - * - * If the supplied Vector or Column lengths are unequal, `Table.new` will - * extend the lengths of the shorter Columns, allocating additional bytes - * to represent the additional null slots. The memory required to allocate - * these additional bitmaps can be computed as: - * ```ts - * let additionalBytes = 0; - * for (let vec in shorter_vectors) { - * additionalBytes += (((longestLength - vec.length) + 63) & ~63) >> 3; - * } - * ``` - * - * For example, an additional null bitmap for one million null values would require - * 125,000 bytes (`((1e6 + 63) & ~63) >> 3`), or approx. `0.11MiB` + * The contiguous {@link RecordBatch `RecordBatch`} chunks of the Table rows. */ - public static new(...columns: Columns): Table; - public static new(children: T): Table<{ [P in keyof T]: T[P] extends Vector ? T[P]['type'] : T[P] extends Exclude ? TypedArrayDataType : never}>; - public static new(children: ChildData, fields?: Fields): Table; - /** @nocollapse */ - public static new(...cols: any[]) { - return new Table(...distributeColumnsIntoRecordBatches(selectColumnArgs(cols))); - } - - constructor(table: Table); - constructor(batches: RecordBatch[]); - constructor(...batches: RecordBatch[]); - constructor(schema: Schema, batches: RecordBatch[]); - constructor(schema: Schema, ...batches: RecordBatch[]); - constructor(...args: any[]) { + declare public readonly batches: RecordBatch[]; - let schema: Schema = null!; + /** + * The contiguous {@link RecordBatch `RecordBatch`} chunks of the Table rows. + */ + public get data() { return this.batches.map(({ data }) => data); } - if (args[0] instanceof Schema) { schema = args[0]; } + /** + * The number of columns in this Table. + */ + public get numCols() { return this.schema.fields.length; } - const chunks = args[0] instanceof Table ? (args[0] as Table).chunks : selectArgs>(RecordBatch, args); + /** + * The number of rows in this Table. + */ + public get numRows() { + return this.data.reduce((numRows, data) => numRows + data.length, 0); + } - if (!schema && !(schema = chunks[0]?.schema)) { - throw new TypeError('Table must be initialized with a Schema or at least one RecordBatch'); + /** + * The number of null rows in this Table. + */ + public get nullCount() { + if (this._nullCount === -1) { + this._nullCount = computeChunkNullCounts(this.data); } + return this._nullCount; + } - chunks[0] || (chunks[0] = new _InternalEmptyPlaceholderRecordBatch(schema)); + /** + * Check whether an element is null. + * + * @param index The index at which to read the validity bitmap. + */ + // @ts-ignore + public isValid(index: number): boolean { return false; } - super(new Struct(schema.fields), chunks); + /** + * Get an element value by position. + * + * @param index The index of the element to read. + */ + // @ts-ignore + public get(index: number): Struct['TValue'] | null { return null; } - this._schema = schema; - this._chunks = chunks; - } + /** + * Set an element value by position. + * + * @param index The index of the element to write. + * @param value The value to set. + */ + // @ts-ignore + public set(index: number, value: Struct['TValue'] | null): void { return; } - protected _schema: Schema; - // List of inner RecordBatches - protected _chunks: RecordBatch[]; - protected _children?: Column[]; + /** + * Retrieve the index of the first occurrence of a value in an Vector. + * + * @param element The value to locate in the Vector. + * @param offset The index at which to begin the search. If offset is omitted, the search starts at index 0. + */ + // @ts-ignore + public indexOf(element: Struct['TValue'], offset?: number): number { return -1; } - public get schema() { return this._schema; } - public get length() { return this._length; } - public get chunks() { return this._chunks; } - public get numCols() { return this._numChildren; } + /** + * Get the size in bytes of an element by index. + * @param index The index at which to get the byteLength. + */ + // @ts-ignore + public getByteLength(index: number): number { return 0; } + + /** + * Iterator for rows in this Table. + */ + public [Symbol.iterator]() { + return iteratorVisitor.visit(new Vector(this.data)); + } + + /** + * Return a JavaScript Array of the Table rows. + * + * @returns An Array of Table rows. + */ + public toArray() { + return [...this]; + } - public clone(chunks = this._chunks) { - return new Table(this._schema, chunks); + /** + * Returns a string representation of the Table rows. + * + * @returns A string representation of the Table rows. + */ + public toString() { + return `[\n ${this.toArray().join(',\n ')}\n]`; } - public getColumn(name: R): Column { - return this.getColumnAt(this.getColumnIndex(name)) as Column; + /** + * Combines two or more Tables of the same schema. + * + * @param others Additional Tables to add to the end of this Tables. + */ + public concat(...others: Table[]) { + const schema = this.schema; + const data = this.data.concat(others.flatMap(({ data }) => data)); + return new Table(schema, data.map((data) => new RecordBatch(schema, data))); } - public getColumnAt(index: number): Column | null { - return this.getChildAt(index); + + /** + * Return a zero-copy sub-section of this Table. + * + * @param start The beginning of the specified portion of the Table. + * @param end The end of the specified portion of the Table. This is exclusive of the element at the index 'end'. + */ + public slice(begin?: number, end?: number): Table { + const schema = this.schema; + [begin, end] = clampRange({ length: this.numRows }, begin, end); + const data = sliceChunks(this.data, this._offsets, begin, end); + return new Table(schema, data.map((chunk) => new RecordBatch(schema, chunk))); } - public getColumnIndex(name: R) { - return this._schema.fields.findIndex((f) => f.name === name); + + /** + * Returns a child Vector by name, or null if this Vector has no child with the given name. + * + * @param name The name of the child to retrieve. + */ + public getChild

(name: P) { + return this.getChildAt(this.schema.fields.findIndex((f) => f.name === name)); } - public getChildAt(index: number): Column | null { - if (index < 0 || index >= this.numChildren) { return null; } - let field: Field, child: Column; - const fields = (this._schema as Schema).fields; - const columns = this._children || (this._children = []) as Column[]; - if (child = columns[index]) { return child as Column; } - if (field = fields[index]) { - const chunks = this._chunks - .map((chunk) => chunk.getChildAt(index)) - .filter((vec): vec is Vector => vec != null); - if (chunks.length > 0) { - return (columns[index] = new Column(field, chunks)); + + /** + * Returns a child Vector by index, or null if this Vector has no child at the supplied index. + * + * @param index The index of the child to retrieve. + */ + public getChildAt(index: number): Vector | null { + if (index > -1 && index < this.schema.fields.length) { + const data = this.data.map((data) => data.children[index] as Data); + if (data.length === 0) { + const { type } = this.schema.fields[index] as Field; + const empty = makeData({ type, length: 0, nullCount: 0 }); + data.push(empty._changeLengthAndBackfillNullBitmap(this.numRows)); } + return new Vector(data); } return null; } - // @ts-ignore - public serialize(encoding = 'binary', stream = true) { - const Writer = !stream - ? RecordBatchFileWriter - : RecordBatchStreamWriter; - return Writer.writeAll(this).toUint8Array(true); + /** + * Sets a child Vector by name. + * + * @param name The name of the child to overwrite. + * @returns A new Table with the supplied child for the specified name. + */ + public setChild

(name: P, child: Vector) { + return this.setChildAt(this.schema.fields?.findIndex((f) => f.name === name), child) as Table; } - public count(): number { - return this._length; + + /** + * Sets a child Vector by index. + * + * @param index The index of the child to overwrite. + * @returns A new Table with the supplied child at the specified index. + */ + public setChildAt(index: number, child?: null): Table; + public setChildAt(index: number, child: Vector): Table; + public setChildAt(index: number, child: any) { + let schema: Schema = this.schema; + let batches: RecordBatch[] = [...this.batches]; + if (index > -1 && index < this.numCols) { + if (!child) { + child = new Vector([makeData({ type: new Null, length: this.numRows })]); + } + const fields = schema.fields.slice() as Field[]; + const field = fields[index].clone({ type: child.type }); + const children = this.schema.fields.map((_, i) => this.getChildAt(i)!); + [fields[index], children[index]] = [field, child]; + [schema, batches] = distributeVectorsIntoRecordBatches(schema, children); + } + return new Table(schema, batches); } - public select(...columnNames: K[]) { - const nameToIndex = this._schema.fields.reduce((m, f, i) => m.set(f.name as K, i), new Map()); - return this.selectAt(...columnNames.map((columnName) => nameToIndex.get(columnName)!).filter((x) => x > -1)); + + /** + * Construct a new Table containing only specified columns. + * + * @param columnNames Names of columns to keep. + * @returns A new Table of columns matching the specified names. + */ + public select(columnNames: K[]) { + const nameToIndex = this.schema.fields.reduce((m, f, i) => m.set(f.name as K, i), new Map()); + return this.selectAt(columnNames.map((columnName) => nameToIndex.get(columnName)!).filter((x) => x > -1)); } - public selectAt(...columnIndices: number[]) { - const schema = this._schema.selectAt(...columnIndices); - return new Table(schema, this._chunks.map(({ length, data: { childData } }) => { - return new RecordBatch(schema, length, columnIndices.map((i) => childData[i]).filter(Boolean)); - })); + + /** + * Construct a new Table containing only columns at the specified indices. + * + * @param columnIndices Indices of columns to keep. + * @returns A new Table of columns at the specified indices. + */ + public selectAt(columnIndices: number[]) { + const schema = this.schema.selectAt(columnIndices); + const data = this.batches.map((batch) => batch.selectAt(columnIndices)); + return new Table<{ [key: string]: K }>(schema, data); } - public assign(other: Table) { - const fields = this._schema.fields; + public assign(other: Table) { + + const fields = this.schema.fields; const [indices, oldToNew] = other.schema.fields.reduce((memo, f2, newIdx) => { const [indices, oldToNew] = memo; const i = fields.findIndex((f) => f.name === f2.name); @@ -261,29 +363,79 @@ export class Table return memo; }, [[], []] as number[][]); - const schema = this._schema.assign(other.schema); + const schema = this.schema.assign(other.schema); const columns = [ - ...fields.map((_f, i, _fs, j = oldToNew[i]) => - (j === undefined ? this.getColumnAt(i) : other.getColumnAt(j))!), - ...indices.map((i) => other.getColumnAt(i)!) - ].filter(Boolean) as Column<(T & R)[keyof T | keyof R]>[]; + ...fields.map((_, i) => [i, oldToNew[i]]).map(([i, j]) => + (j === undefined ? this.getChildAt(i) : other.getChildAt(j))!), + ...indices.map((i) => other.getChildAt(i)!) + ].filter(Boolean) as Vector<(T & R)[keyof T | keyof R]>[]; return new Table(...distributeVectorsIntoRecordBatches(schema, columns)); } + + // Initialize this static property via an IIFE so bundlers don't tree-shake + // out this logic, but also so we're still compliant with `"sideEffects": false` + protected static [Symbol.toStringTag] = ((proto: Table) => { + (proto as any).schema = null; + (proto as any).batches = []; + (proto as any)._offsets = new Uint32Array([0]); + (proto as any)._nullCount = -1; + (proto as any)[Symbol.isConcatSpreadable] = true; + (proto as any)['isValid'] = wrapChunkedCall1(isChunkedValid); + (proto as any)['get'] = wrapChunkedCall1(getVisitor.getVisitFn(Type.Struct)); + (proto as any)['set'] = wrapChunkedCall2(setVisitor.getVisitFn(Type.Struct)); + (proto as any)['indexOf'] = wrapChunkedIndexOf(indexOfVisitor.getVisitFn(Type.Struct)); + (proto as any)['getByteLength'] = wrapChunkedCall1(byteLengthVisitor.getVisitFn(Type.Struct)); + return 'Table'; + })(Table.prototype); } -function tableFromIterable(input: VectorBuilderOptions, TNull>) { - const { type } = input; - if (type instanceof Struct) { - return Table.fromStruct(StructVector.from(input as VectorBuilderOptions, TNull>)); + +type VectorsMap = { [P in keyof T]: Vector }; + +/** + * Creates a new Table from an object of typed arrays. + * +* @example + * ```ts + * const table = makeTable({ + * a: new Int8Array([1, 2, 3]), + * }) + * ``` + * + * @param input Input an object of typed arrays. + * @returns A new Table. + */ +export function makeTable>(input: I): Table<{ [P in keyof I]: TypedArrayDataType }> { + type T = { [P in keyof I]: TypedArrayDataType }; + const vecs = {} as VectorsMap; + const inputs = Object.entries(input) as [keyof I, I[keyof I]][]; + for (const [key, col] of inputs) { + vecs[key] = makeVector(col); } - return null; + return new Table(vecs); } -function tableFromAsyncIterable(input: VectorBuilderOptionsAsync, TNull>) { - const { type } = input; - if (type instanceof Struct) { - return StructVector.from(input as VectorBuilderOptionsAsync, TNull>).then((vector) => Table.fromStruct(vector)); +/** + * Creates a new Table from an object of typed arrays or JavaScript arrays. + * + * @example + * ```ts + * const table = tableFromArrays({ + * a: [1, 2, 3], + * b: new Int8Array([1, 2, 3]), + * }) + * ``` + * + * @param Input an object of typed arrays or JavaScript arrays. + * @returns A new Table. + */ +export function tableFromArrays>(input: I): Table<{ [P in keyof I]: ArrayDataType }> { + type T = { [P in keyof I]: ArrayDataType }; + const vecs = {} as VectorsMap; + const inputs = Object.entries(input) as [keyof I, I[keyof I]][]; + for (const [key, col] of inputs) { + vecs[key] = vectorFromArray(col); } - return null; + return new Table(vecs); } diff --git a/js/src/type.ts b/js/src/type.ts index 7d5c051ad0e89..4d0fbbfc6098e 100644 --- a/js/src/type.ts +++ b/js/src/type.ts @@ -15,19 +15,18 @@ // specific language governing permissions and limitations // under the License. -/* eslint-disable @typescript-eslint/naming-convention */ +import { Field } from './schema.js'; +import { Vector } from './vector.js'; +import { MapRow } from './row/map.js'; +import { StructRow, StructRowProxy } from './row/struct.js'; +import { Long } from 'flatbuffers'; +import { TypedArrayConstructor } from './interfaces.js'; -import { Field } from './schema'; -import { flatbuffers } from 'flatbuffers'; -import { VectorType as V } from './interfaces'; -import { TypedArrayConstructor } from './interfaces'; - -import Long = flatbuffers.Long; import { Type, Precision, UnionMode, DateUnit, TimeUnit, IntervalUnit -} from './enum'; +} from './enum.js'; /** @ignore */ export type TimeBitWidth = 32 | 64; @@ -35,24 +34,12 @@ export type TimeBitWidth = 32 | 64; export type IntBitWidth = 8 | 16 | 32 | 64; /** @ignore */ export type IsSigned = { 'true': true; 'false': false }; -/** @ignore */ -export type RowLike = - ( Iterable<[string, T[keyof T]['TValue'] | null]> ) - & { [P in keyof T]: T[P]['TValue'] | null } - & { get(key: K): T[K]['TValue'] | null } - & { set(key: K, val: T[K]['TValue'] | null): void } - ; -/** @ignore */ -export type MapLike = - { [P in K['TValue']]: V['TValue'] | null } - & ( Map ) - ; - -export interface DataType { +export interface DataType { readonly TType: TType; readonly TArray: any; readonly TValue: any; + readonly TChildren: TChildren; readonly ArrayType: any; readonly children: Field[]; } @@ -61,34 +48,37 @@ export interface DataType { - - public [Symbol.toStringTag]: string; - - /** @nocollapse */ static isNull (x: any): x is Null { return x?.typeId === Type.Null; } - /** @nocollapse */ static isInt (x: any): x is Int_ { return x?.typeId === Type.Int; } - /** @nocollapse */ static isFloat (x: any): x is Float { return x?.typeId === Type.Float; } - /** @nocollapse */ static isBinary (x: any): x is Binary { return x?.typeId === Type.Binary; } - /** @nocollapse */ static isUtf8 (x: any): x is Utf8 { return x?.typeId === Type.Utf8; } - /** @nocollapse */ static isBool (x: any): x is Bool { return x?.typeId === Type.Bool; } - /** @nocollapse */ static isDecimal (x: any): x is Decimal { return x?.typeId === Type.Decimal; } - /** @nocollapse */ static isDate (x: any): x is Date_ { return x?.typeId === Type.Date; } - /** @nocollapse */ static isTime (x: any): x is Time_ { return x?.typeId === Type.Time; } - /** @nocollapse */ static isTimestamp (x: any): x is Timestamp_ { return x?.typeId === Type.Timestamp; } - /** @nocollapse */ static isInterval (x: any): x is Interval_ { return x?.typeId === Type.Interval; } - /** @nocollapse */ static isList (x: any): x is List { return x?.typeId === Type.List; } - /** @nocollapse */ static isStruct (x: any): x is Struct { return x?.typeId === Type.Struct; } - /** @nocollapse */ static isUnion (x: any): x is Union_ { return x?.typeId === Type.Union; } - /** @nocollapse */ static isFixedSizeBinary (x: any): x is FixedSizeBinary { return x?.typeId === Type.FixedSizeBinary; } - /** @nocollapse */ static isFixedSizeList (x: any): x is FixedSizeList { return x?.typeId === Type.FixedSizeList; } - /** @nocollapse */ static isMap (x: any): x is Map_ { return x?.typeId === Type.Map; } - /** @nocollapse */ static isDictionary (x: any): x is Dictionary { return x?.typeId === Type.Dictionary; } - - public get typeId(): TType { return Type.NONE; } +export abstract class DataType { + + declare public [Symbol.toStringTag]: string; + + /** @nocollapse */ static isNull(x: any): x is Null { return x?.typeId === Type.Null; } + /** @nocollapse */ static isInt(x: any): x is Int_ { return x?.typeId === Type.Int; } + /** @nocollapse */ static isFloat(x: any): x is Float { return x?.typeId === Type.Float; } + /** @nocollapse */ static isBinary(x: any): x is Binary { return x?.typeId === Type.Binary; } + /** @nocollapse */ static isUtf8(x: any): x is Utf8 { return x?.typeId === Type.Utf8; } + /** @nocollapse */ static isBool(x: any): x is Bool { return x?.typeId === Type.Bool; } + /** @nocollapse */ static isDecimal(x: any): x is Decimal { return x?.typeId === Type.Decimal; } + /** @nocollapse */ static isDate(x: any): x is Date_ { return x?.typeId === Type.Date; } + /** @nocollapse */ static isTime(x: any): x is Time_ { return x?.typeId === Type.Time; } + /** @nocollapse */ static isTimestamp(x: any): x is Timestamp_ { return x?.typeId === Type.Timestamp; } + /** @nocollapse */ static isInterval(x: any): x is Interval_ { return x?.typeId === Type.Interval; } + /** @nocollapse */ static isList(x: any): x is List { return x?.typeId === Type.List; } + /** @nocollapse */ static isStruct(x: any): x is Struct { return x?.typeId === Type.Struct; } + /** @nocollapse */ static isUnion(x: any): x is Union_ { return x?.typeId === Type.Union; } + /** @nocollapse */ static isFixedSizeBinary(x: any): x is FixedSizeBinary { return x?.typeId === Type.FixedSizeBinary; } + /** @nocollapse */ static isFixedSizeList(x: any): x is FixedSizeList { return x?.typeId === Type.FixedSizeList; } + /** @nocollapse */ static isMap(x: any): x is Map_ { return x?.typeId === Type.Map; } + /** @nocollapse */ static isDictionary(x: any): x is Dictionary { return x?.typeId === Type.Dictionary; } + + /** @nocollapse */ static isDenseUnion(x: any): x is DenseUnion { return DataType.isUnion(x) && x.mode === UnionMode.Dense; } + /** @nocollapse */ static isSparseUnion(x: any): x is SparseUnion { return DataType.isUnion(x) && x.mode === UnionMode.Sparse; } + + public get typeId(): TType { return Type.NONE; } protected static [Symbol.toStringTag] = ((proto: DataType) => { - ( proto).children = null; - ( proto).ArrayType = Array; + (proto).children = null; + (proto).ArrayType = Array; return proto[Symbol.toStringTag] = 'DataType'; })(DataType.prototype); } @@ -99,24 +89,22 @@ export interface Null extends DataType { TArray: void; TValue: null } export class Null extends DataType { public toString() { return `Null`; } public get typeId() { return Type.Null as Type.Null; } - protected static [Symbol.toStringTag] = ((proto: Null) => { - return proto[Symbol.toStringTag] = 'Null'; - })(Null.prototype); + protected static [Symbol.toStringTag] = ((proto: Null) => proto[Symbol.toStringTag] = 'Null')(Null.prototype); } /** @ignore */ type Ints = Type.Int | Type.Int8 | Type.Int16 | Type.Int32 | Type.Int64 | Type.Uint8 | Type.Uint16 | Type.Uint32 | Type.Uint64; /** @ignore */ type IType = { - [Type.Int ]: { bitWidth: IntBitWidth; isSigned: true | false; TArray: IntArray; TValue: number | bigint | Int32Array | Uint32Array }; - [Type.Int8 ]: { bitWidth: 8; isSigned: true; TArray: Int8Array; TValue: number }; - [Type.Int16 ]: { bitWidth: 16; isSigned: true; TArray: Int16Array; TValue: number }; - [Type.Int32 ]: { bitWidth: 32; isSigned: true; TArray: Int32Array; TValue: number }; - [Type.Int64 ]: { bitWidth: 64; isSigned: true; TArray: Int32Array; TValue: bigint | Int32Array | Uint32Array }; - [Type.Uint8 ]: { bitWidth: 8; isSigned: false; TArray: Uint8Array; TValue: number }; - [Type.Uint16]: { bitWidth: 16; isSigned: false; TArray: Uint16Array; TValue: number }; - [Type.Uint32]: { bitWidth: 32; isSigned: false; TArray: Uint32Array; TValue: number }; - [Type.Uint64]: { bitWidth: 64; isSigned: false; TArray: Uint32Array; TValue: bigint | Int32Array | Uint32Array }; + [Type.Int]: { bitWidth: IntBitWidth; isSigned: true | false; TArray: IntArray; TValue: number | bigint }; + [Type.Int8]: { bitWidth: 8; isSigned: true; TArray: Int8Array; TValue: number }; + [Type.Int16]: { bitWidth: 16; isSigned: true; TArray: Int16Array; TValue: number }; + [Type.Int32]: { bitWidth: 32; isSigned: true; TArray: Int32Array; TValue: number }; + [Type.Int64]: { bitWidth: 64; isSigned: true; TArray: BigInt64Array; TValue: bigint }; + [Type.Uint8]: { bitWidth: 8; isSigned: false; TArray: Uint8Array; TValue: number }; + [Type.Uint16]: { bitWidth: 16; isSigned: false; TArray: Uint16Array; TValue: number }; + [Type.Uint32]: { bitWidth: 32; isSigned: false; TArray: Uint32Array; TValue: number }; + [Type.Uint64]: { bitWidth: 64; isSigned: false; TArray: BigUint64Array; TValue: bigint }; }; /** @ignore */ @@ -124,23 +112,23 @@ interface Int_ extends DataType { TArray: IType[T]['TA /** @ignore */ class Int_ extends DataType { constructor(public readonly isSigned: IType[T]['isSigned'], - public readonly bitWidth: IType[T]['bitWidth']) { + public readonly bitWidth: IType[T]['bitWidth']) { super(); } public get typeId() { return Type.Int as T; } - public get ArrayType(): TypedArrayConstructor { + public get ArrayType() { switch (this.bitWidth) { - case 8: return this.isSigned ? Int8Array : Uint8Array; + case 8: return this.isSigned ? Int8Array : Uint8Array; case 16: return this.isSigned ? Int16Array : Uint16Array; case 32: return this.isSigned ? Int32Array : Uint32Array; - case 64: return this.isSigned ? Int32Array : Uint32Array; + case 64: return this.isSigned ? BigInt64Array : BigUint64Array; } throw new Error(`Unrecognized ${this[Symbol.toStringTag]} type`); } public toString() { return `${this.isSigned ? `I` : `Ui`}nt${this.bitWidth}`; } protected static [Symbol.toStringTag] = ((proto: Int_) => { - ( proto).isSigned = null; - ( proto).bitWidth = null; + (proto).isSigned = null; + (proto).bitWidth = null; return proto[Symbol.toStringTag] = 'Int'; })(Int_.prototype); } @@ -148,39 +136,63 @@ class Int_ extends DataType { export { Int_ as Int }; /** @ignore */ -export class Int8 extends Int_ { constructor() { super(true, 8); } } +export class Int8 extends Int_ { + constructor() { super(true, 8); } + public get ArrayType() { return Int8Array; } +} /** @ignore */ -export class Int16 extends Int_ { constructor() { super(true, 16); } } +export class Int16 extends Int_ { + constructor() { super(true, 16); } + public get ArrayType() { return Int16Array; } +} /** @ignore */ -export class Int32 extends Int_ { constructor() { super(true, 32); } } +export class Int32 extends Int_ { + constructor() { super(true, 32); } + public get ArrayType() { return Int32Array; } +} /** @ignore */ -export class Int64 extends Int_ { constructor() { super(true, 64); } } +export class Int64 extends Int_ { + constructor() { super(true, 64); } + public get ArrayType() { return BigInt64Array; } +} /** @ignore */ -export class Uint8 extends Int_ { constructor() { super(false, 8); } } +export class Uint8 extends Int_ { + constructor() { super(false, 8); } + public get ArrayType() { return Uint8Array; } +} /** @ignore */ -export class Uint16 extends Int_ { constructor() { super(false, 16); } } +export class Uint16 extends Int_ { + constructor() { super(false, 16); } + public get ArrayType() { return Uint16Array; } +} /** @ignore */ -export class Uint32 extends Int_ { constructor() { super(false, 32); } } +export class Uint32 extends Int_ { + constructor() { super(false, 32); } + public get ArrayType() { return Uint32Array; } +} /** @ignore */ -export class Uint64 extends Int_ { constructor() { super(false, 64); } } +export class Uint64 extends Int_ { + constructor() { super(false, 64); } + public get ArrayType() { return BigUint64Array; } +} Object.defineProperty(Int8.prototype, 'ArrayType', { value: Int8Array }); Object.defineProperty(Int16.prototype, 'ArrayType', { value: Int16Array }); Object.defineProperty(Int32.prototype, 'ArrayType', { value: Int32Array }); -Object.defineProperty(Int64.prototype, 'ArrayType', { value: Int32Array }); +Object.defineProperty(Int64.prototype, 'ArrayType', { value: BigInt64Array }); Object.defineProperty(Uint8.prototype, 'ArrayType', { value: Uint8Array }); Object.defineProperty(Uint16.prototype, 'ArrayType', { value: Uint16Array }); Object.defineProperty(Uint32.prototype, 'ArrayType', { value: Uint32Array }); -Object.defineProperty(Uint64.prototype, 'ArrayType', { value: Uint32Array }); +Object.defineProperty(Uint64.prototype, 'ArrayType', { value: BigUint64Array }); /** @ignore */ type Floats = Type.Float | Type.Float16 | Type.Float32 | Type.Float64; /** @ignore */ type FType = { - [Type.Float ]: { precision: Precision; TArray: FloatArray; TValue: number }; - [Type.Float16]: { precision: Precision.HALF; TArray: Uint16Array; TValue: number }; - [Type.Float32]: { precision: Precision.SINGLE; TArray: Float32Array; TValue: number }; - [Type.Float64]: { precision: Precision.DOUBLE; TArray: Float64Array; TValue: number }; + [Type.Float]: { precision: Precision; TArray: FloatArray; TValue: number }; + [Type.Float16]: { precision: Precision.HALF; TArray: Uint16Array; TValue: number }; + [Type.Float32]: { precision: Precision.SINGLE; TArray: Float32Array; TValue: number }; + [Type.Float64]: { precision: Precision.DOUBLE; TArray: Float64Array; TValue: number }; }; /** @ignore */ @@ -202,7 +214,7 @@ export class Float extends DataType { } public toString() { return `Float${(this.precision << 5) || 16}`; } protected static [Symbol.toStringTag] = ((proto: Float) => { - ( proto).precision = null; + (proto).precision = null; return proto[Symbol.toStringTag] = 'Float'; })(Float.prototype); } @@ -228,7 +240,7 @@ export class Binary extends DataType { public get typeId() { return Type.Binary as Type.Binary; } public toString() { return `Binary`; } protected static [Symbol.toStringTag] = ((proto: Binary) => { - ( proto).ArrayType = Uint8Array; + (proto).ArrayType = Uint8Array; return proto[Symbol.toStringTag] = 'Binary'; })(Binary.prototype); } @@ -243,7 +255,7 @@ export class Utf8 extends DataType { public get typeId() { return Type.Utf8 as Type.Utf8; } public toString() { return `Utf8`; } protected static [Symbol.toStringTag] = ((proto: Utf8) => { - ( proto).ArrayType = Uint8Array; + (proto).ArrayType = Uint8Array; return proto[Symbol.toStringTag] = 'Utf8'; })(Utf8.prototype); } @@ -258,7 +270,7 @@ export class Bool extends DataType { public get typeId() { return Type.Bool as Type.Bool; } public toString() { return `Bool`; } protected static [Symbol.toStringTag] = ((proto: Bool) => { - ( proto).ArrayType = Uint8Array; + (proto).ArrayType = Uint8Array; return proto[Symbol.toStringTag] = 'Bool'; })(Bool.prototype); } @@ -268,15 +280,16 @@ export interface Decimal extends DataType { TArray: Uint32Array; T /** @ignore */ export class Decimal extends DataType { constructor(public readonly scale: number, - public readonly precision: number) { + public readonly precision: number, + public readonly bitWidth: number = 128) { super(); } public get typeId() { return Type.Decimal as Type.Decimal; } public toString() { return `Decimal[${this.precision}e${this.scale > 0 ? `+` : ``}${this.scale}]`; } protected static [Symbol.toStringTag] = ((proto: Decimal) => { - ( proto).scale = null; - ( proto).precision = null; - ( proto).ArrayType = Uint32Array; + (proto).scale = null; + (proto).precision = null; + (proto).ArrayType = Uint32Array; return proto[Symbol.toStringTag] = 'Decimal'; })(Decimal.prototype); } @@ -293,8 +306,8 @@ export class Date_ extends DataType { public get typeId() { return Type.Date as T; } public toString() { return `Date${(this.unit + 1) * 32}<${DateUnit[this.unit]}>`; } protected static [Symbol.toStringTag] = ((proto: Date_) => { - ( proto).unit = null; - ( proto).ArrayType = Int32Array; + (proto).unit = null; + (proto).ArrayType = Int32Array; return proto[Symbol.toStringTag] = 'Date'; })(Date_.prototype); } @@ -308,27 +321,37 @@ export class DateMillisecond extends Date_ { constructor() type Times = Type.Time | Type.TimeSecond | Type.TimeMillisecond | Type.TimeMicrosecond | Type.TimeNanosecond; /** @ignore */ type TimesType = { - [Type.Time ]: { unit: TimeUnit; TValue: number | Int32Array }; - [Type.TimeSecond ]: { unit: TimeUnit.SECOND; TValue: number }; - [Type.TimeMillisecond]: { unit: TimeUnit.MILLISECOND; TValue: number }; - [Type.TimeMicrosecond]: { unit: TimeUnit.MICROSECOND; TValue: Int32Array }; - [Type.TimeNanosecond ]: { unit: TimeUnit.NANOSECOND; TValue: Int32Array }; + [Type.Time]: { unit: TimeUnit; TValue: number | bigint; TArray: Int32Array | BigInt64Array }; + [Type.TimeSecond]: { unit: TimeUnit.SECOND; TValue: number; TArray: Int32Array }; + [Type.TimeMillisecond]: { unit: TimeUnit.MILLISECOND; TValue: number; TArray: Int32Array }; + [Type.TimeMicrosecond]: { unit: TimeUnit.MICROSECOND; TValue: bigint; TArray: BigInt64Array }; + [Type.TimeNanosecond]: { unit: TimeUnit.NANOSECOND; TValue: bigint; TArray: BigInt64Array }; }; /** @ignore */ -interface Time_ extends DataType { TArray: Int32Array; TValue: TimesType[T]['TValue']; ArrayType: TypedArrayConstructor } +interface Time_ extends DataType { + TArray: TimesType[T]['TArray']; + TValue: TimesType[T]['TValue']; +} /** @ignore */ class Time_ extends DataType { constructor(public readonly unit: TimesType[T]['unit'], - public readonly bitWidth: TimeBitWidth) { + public readonly bitWidth: TimeBitWidth) { super(); } public get typeId() { return Type.Time as T; } public toString() { return `Time${this.bitWidth}<${TimeUnit[this.unit]}>`; } + public get ArrayType() { + switch (this.bitWidth) { + case 32: return Int32Array; + case 64: return BigInt64Array; + } + // @ts-ignore + throw new Error(`Unrecognized ${this[Symbol.toStringTag]} type`); + } protected static [Symbol.toStringTag] = ((proto: Time_) => { - ( proto).unit = null; - ( proto).bitWidth = null; - ( proto).ArrayType = Int32Array; + (proto).unit = null; + (proto).bitWidth = null; return proto[Symbol.toStringTag] = 'Time'; })(Time_.prototype); } @@ -347,19 +370,24 @@ export class TimeNanosecond extends Time_ { constructor() { /** @ignore */ type Timestamps = Type.Timestamp | Type.TimestampSecond | Type.TimestampMillisecond | Type.TimestampMicrosecond | Type.TimestampNanosecond; /** @ignore */ -interface Timestamp_ extends DataType { TArray: Int32Array; TValue: number; ArrayType: TypedArrayConstructor } +interface Timestamp_ extends DataType { + TArray: Int32Array; + TValue: number; + ArrayType: TypedArrayConstructor; +} + /** @ignore */ class Timestamp_ extends DataType { constructor(public readonly unit: TimeUnit, - public readonly timezone?: string | null) { + public readonly timezone?: string | null) { super(); } public get typeId() { return Type.Timestamp as T; } public toString() { return `Timestamp<${TimeUnit[this.unit]}${this.timezone ? `, ${this.timezone}` : ``}>`; } protected static [Symbol.toStringTag] = ((proto: Timestamp_) => { - ( proto).unit = null; - ( proto).timezone = null; - ( proto).ArrayType = Int32Array; + (proto).unit = null; + (proto).timezone = null; + (proto).ArrayType = Int32Array; return proto[Symbol.toStringTag] = 'Timestamp'; })(Timestamp_.prototype); } @@ -378,7 +406,12 @@ export class TimestampNanosecond extends Timestamp_ { /** @ignore */ type Intervals = Type.Interval | Type.IntervalDayTime | Type.IntervalYearMonth; /** @ignore */ -interface Interval_ extends DataType { TArray: Int32Array; TValue: Int32Array; ArrayType: TypedArrayConstructor } +interface Interval_ extends DataType { + TArray: Int32Array; + TValue: Int32Array; + ArrayType: TypedArrayConstructor; +} + /** @ignore */ class Interval_ extends DataType { constructor(public readonly unit: IntervalUnit) { @@ -387,8 +420,8 @@ class Interval_ extends DataType { public get typeId() { return Type.Interval as T; } public toString() { return `Interval<${IntervalUnit[this.unit]}>`; } protected static [Symbol.toStringTag] = ((proto: Interval_) => { - ( proto).unit = null; - ( proto).ArrayType = Int32Array; + (proto).unit = null; + (proto).ArrayType = Int32Array; return proto[Symbol.toStringTag] = 'Interval'; })(Interval_.prototype); } @@ -401,30 +434,40 @@ export class IntervalDayTime extends Interval_ { construct export class IntervalYearMonth extends Interval_ { constructor() { super(IntervalUnit.YEAR_MONTH); } } /** @ignore */ -export interface List extends DataType { TArray: IterableArrayLike; TValue: V } +export interface List extends DataType { + TArray: Array; + TValue: Vector; +} + /** @ignore */ export class List extends DataType { constructor(child: Field) { super(); this.children = [child]; } - public readonly children: Field[]; + public declare readonly children: Field[]; public get typeId() { return Type.List as Type.List; } public toString() { return `List<${this.valueType}>`; } public get valueType(): T { return this.children[0].type as T; } public get valueField(): Field { return this.children[0] as Field; } public get ArrayType(): T['ArrayType'] { return this.valueType.ArrayType; } protected static [Symbol.toStringTag] = ((proto: List) => { - ( proto).children = null; + (proto).children = null; return proto[Symbol.toStringTag] = 'List'; })(List.prototype); } /** @ignore */ -export interface Struct extends DataType { TArray: IterableArrayLike>; TValue: RowLike; dataTypes: T } +export interface Struct extends DataType { + TArray: Array>; + TValue: StructRowProxy; + dataTypes: T; +} + /** @ignore */ -export class Struct extends DataType { - public readonly children: Field[]; +export class Struct extends DataType { + public declare _row: StructRow; + public declare readonly children: Field[]; constructor(children: Field[]) { super(); this.children = children; @@ -432,7 +475,7 @@ export class Struct extends DataTyp public get typeId() { return Type.Struct as Type.Struct; } public toString() { return `Struct<{${this.children.map((f) => `${f.name}:${f.type}`).join(`, `)}}>`; } protected static [Symbol.toStringTag] = ((proto: Struct) => { - ( proto).children = null; + (proto).children = null; return proto[Symbol.toStringTag] = 'Struct'; })(Struct.prototype); } @@ -443,33 +486,30 @@ type Unions = Type.Union | Type.DenseUnion | Type.SparseUnion; interface Union_ extends DataType { TArray: Int8Array; TValue: any; ArrayType: TypedArrayConstructor } /** @ignore */ class Union_ extends DataType { - public readonly mode: UnionMode; - public readonly typeIds: Int32Array; - public readonly children: Field[]; - public readonly typeIdToChildIndex: { [key: number]: number }; + public declare readonly mode: UnionMode; + public declare readonly typeIds: Int32Array; + public declare readonly children: Field[]; + public declare readonly typeIdToChildIndex: { [key: number]: number }; constructor(mode: UnionMode, - typeIds: number[] | Int32Array, - children: Field[]) { + typeIds: number[] | Int32Array, + children: Field[]) { super(); this.mode = mode; this.children = children; this.typeIds = typeIds = Int32Array.from(typeIds); - this.typeIdToChildIndex = typeIds.reduce((typeIdToChildIndex, typeId, idx) => { - return (typeIdToChildIndex[typeId] = idx) && typeIdToChildIndex || typeIdToChildIndex; - }, Object.create(null) as { [key: number]: number }); + this.typeIdToChildIndex = typeIds.reduce((typeIdToChildIndex, typeId, idx) => (typeIdToChildIndex[typeId] = idx) && typeIdToChildIndex || typeIdToChildIndex, Object.create(null) as { [key: number]: number }); } public get typeId() { return Type.Union as T; } public toString() { - return `${this[Symbol.toStringTag]}<${ - this.children.map((x) => `${x.type}`).join(` | `) - }>`; -} + return `${this[Symbol.toStringTag]}<${this.children.map((x) => `${x.type}`).join(` | `) + }>`; + } protected static [Symbol.toStringTag] = ((proto: Union_) => { - ( proto).mode = null; - ( proto).typeIds = null; - ( proto).children = null; - ( proto).typeIdToChildIndex = null; - ( proto).ArrayType = Int8Array; + (proto).mode = null; + (proto).typeIds = null; + (proto).children = null; + (proto).typeIdToChildIndex = null; + (proto).ArrayType = Int8Array; return proto[Symbol.toStringTag] = 'Union'; })(Union_.prototype); } @@ -491,7 +531,12 @@ export class SparseUnion extends Union_ { } /** @ignore */ -export interface FixedSizeBinary extends DataType { TArray: Uint8Array; TValue: Uint8Array; ArrayType: TypedArrayConstructor } +export interface FixedSizeBinary extends DataType { + TArray: Uint8Array; + TValue: Uint8Array; + ArrayType: TypedArrayConstructor; +} + /** @ignore */ export class FixedSizeBinary extends DataType { constructor(public readonly byteWidth: number) { @@ -500,17 +545,21 @@ export class FixedSizeBinary extends DataType { public get typeId() { return Type.FixedSizeBinary as Type.FixedSizeBinary; } public toString() { return `FixedSizeBinary[${this.byteWidth}]`; } protected static [Symbol.toStringTag] = ((proto: FixedSizeBinary) => { - ( proto).byteWidth = null; - ( proto).ArrayType = Uint8Array; + (proto).byteWidth = null; + (proto).ArrayType = Uint8Array; return proto[Symbol.toStringTag] = 'FixedSizeBinary'; })(FixedSizeBinary.prototype); } /** @ignore */ -export interface FixedSizeList extends DataType { TArray: IterableArrayLike; TValue: V } +export interface FixedSizeList extends DataType { + TArray: Array; + TValue: Vector; +} + /** @ignore */ export class FixedSizeList extends DataType { - public readonly children: Field[]; + public declare readonly children: Field[]; constructor(public readonly listSize: number, child: Field) { super(); this.children = [child]; @@ -521,35 +570,36 @@ export class FixedSizeList extends DataType`; } protected static [Symbol.toStringTag] = ((proto: FixedSizeList) => { - ( proto).children = null; - ( proto).listSize = null; + (proto).children = null; + (proto).listSize = null; return proto[Symbol.toStringTag] = 'FixedSizeList'; })(FixedSizeList.prototype); } /** @ignore */ -export interface Map_ extends DataType { - TArray: IterableArrayLike>; +export interface Map_ extends DataType }> { + TArray: Array>; TChild: Struct<{ key: TKey; value: TValue }>; - TValue: MapLike; + TValue: MapRow; } /** @ignore */ -export class Map_ extends DataType { +export class Map_ extends DataType }> { constructor(child: Field>, keysSorted = false) { super(); this.children = [child]; this.keysSorted = keysSorted; } - public readonly keysSorted: boolean; - public readonly children: Field>[]; + public declare readonly keysSorted: boolean; + public declare readonly children: Field>[]; public get typeId() { return Type.Map as Type.Map; } public get keyType(): TKey { return this.children[0].type.children[0].type as TKey; } public get valueType(): TValue { return this.children[0].type.children[1].type as TValue; } + public get childType() { return this.children[0].type as Struct<{ key: TKey; value: TValue }>; } public toString() { return `Map<{${this.children[0].type.children.map((f) => `${f.name}:${f.type}`).join(`, `)}}>`; } protected static [Symbol.toStringTag] = ((proto: Map_) => { - ( proto).children = null; - ( proto).keysSorted = null; + (proto).children = null; + (proto).keysSorted = null; return proto[Symbol.toStringTag] = 'Map_'; })(Map_.prototype); } @@ -561,19 +611,23 @@ const getId = ((atomicDictionaryId) => () => ++atomicDictionaryId)(-1); export type TKeys = Int8 | Int16 | Int32 | Uint8 | Uint16 | Uint32; /** @ignore */ -export interface Dictionary extends DataType { TArray: TKey['TArray']; TValue: T['TValue'] } +export interface Dictionary extends DataType { + TArray: TKey['TArray']; + TValue: T['TValue']; +} + /** @ignore */ export class Dictionary extends DataType { - public readonly id: number; - public readonly indices: TKey; - public readonly dictionary: T; - public readonly isOrdered: boolean; + public declare readonly id: number; + public declare readonly indices: TKey; + public declare readonly dictionary: T; + public declare readonly isOrdered: boolean; constructor(dictionary: T, indices: TKey, id?: Long | number | null, isOrdered?: boolean | null) { super(); this.indices = indices; this.dictionary = dictionary; this.isOrdered = isOrdered || false; - this.id = id == null ? getId() : typeof id === 'number' ? id : id.low; + this.id = id == null ? getId() : (typeof id === 'number' ? id : id.low); } public get typeId() { return Type.Dictionary as Type.Dictionary; } public get children() { return this.dictionary.children; } @@ -581,16 +635,14 @@ export class Dictionary ex public get ArrayType(): T['ArrayType'] { return this.dictionary.ArrayType; } public toString() { return `Dictionary<${this.indices}, ${this.dictionary}>`; } protected static [Symbol.toStringTag] = ((proto: Dictionary) => { - ( proto).id = null; - ( proto).indices = null; - ( proto).isOrdered = null; - ( proto).dictionary = null; + (proto).id = null; + (proto).indices = null; + (proto).isOrdered = null; + (proto).dictionary = null; return proto[Symbol.toStringTag] = 'Dictionary'; })(Dictionary.prototype); } -/** @ignore */ -export interface IterableArrayLike extends ArrayLike, Iterable {} /** @ignore */ export type FloatArray = Uint16Array | Float32Array | Float64Array; /** @ignore */ @@ -600,14 +652,17 @@ export type IntArray = Int8Array | Int16Array | Int32Array | Uint8Array | Uint16 export function strideForType(type: DataType) { const t: any = type; switch (type.typeId) { - case Type.Decimal: return 4; + case Type.Decimal: return (type as Decimal).bitWidth / 32; case Type.Timestamp: return 2; case Type.Date: return 1 + (t as Date_).unit; case Type.Interval: return 1 + (t as Interval_).unit; - case Type.Int: return 1 + +((t as Int_).bitWidth > 32); - case Type.Time: return 1 + +((t as Time_).bitWidth > 32); + // case Type.Int: return 1 + +((t as Int_).bitWidth > 32); + // case Type.Time: return 1 + +((t as Time_).bitWidth > 32); case Type.FixedSizeList: return (t as FixedSizeList).listSize; case Type.FixedSizeBinary: return (t as FixedSizeBinary).byteWidth; default: return 1; } } + +/** @ignore */ +export type TypeMap = Record; diff --git a/js/src/util/args.ts b/js/src/util/args.ts deleted file mode 100644 index 25f571999ff62..0000000000000 --- a/js/src/util/args.ts +++ /dev/null @@ -1,196 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -import { Data } from '../data'; -import { Field } from '../schema'; -import { Column } from '../column'; -import { Vector } from '../vector'; -import { DataType, Float32, Float64, FloatArray, IntArray, Int16, Int32, Int64, Int8, Uint16, Uint32, Uint64, Uint8 } from '../type'; -import { Chunked } from '../vector/chunked'; -import { BigIntArray, TypedArray as TypedArray_ } from '../interfaces'; -import { FloatArrayCtor } from '../vector/float'; -import { IntArrayCtor } from '../vector/int'; - -type RecordBatchCtor = typeof import('../recordbatch').RecordBatch; - -const isArray = Array.isArray; - -type TypedArray = Exclude; - -/** @ignore */ -export function isTypedArray(arr: any): arr is TypedArray { - return ArrayBuffer.isView(arr) && 'BYTES_PER_ELEMENT' in arr; -} - - -/** @ignore */ -type ArrayCtor = FloatArrayCtor | IntArrayCtor; - -/** @ignore */ -export function arrayTypeToDataType(ctor: ArrayCtor) { - switch (ctor) { - case Int8Array: return Int8; - case Int16Array: return Int16; - case Int32Array: return Int32; - case BigInt64Array: return Int64; - case Uint8Array: return Uint8; - case Uint16Array: return Uint16; - case Uint32Array: return Uint32; - case BigUint64Array: return Uint64; - case Float32Array: return Float32; - case Float64Array: return Float64; - default: return null; - } -} - -/** @ignore */ -function vectorFromTypedArray(array: TypedArray): Vector { - const ArrowType = arrayTypeToDataType(array.constructor as ArrayCtor); - if (!ArrowType) { - throw new TypeError('Unrecognized Array input'); - } - const type = new ArrowType(); - const data = Data.new(type, 0, array.length, 0, [undefined, array as IntArray | FloatArray]); - return Vector.new(data); -} - -/** @ignore */ -export const selectArgs = (Ctor: any, vals: any[]) => _selectArgs(Ctor, vals, [], 0) as T[]; -/** @ignore */ -export const selectColumnArgs = (args: any[]) => { - const [fields, values] = _selectFieldArgs(args, [[], []]); - return values.map((x, i) => - x instanceof Column ? Column.new(x.field.clone(fields[i]), x) : - x instanceof Vector ? Column.new(fields[i], x) as Column : - isTypedArray(x) ? Column.new(fields[i], vectorFromTypedArray(x)) as Column : - Column.new(fields[i], [] as Vector[])); -}; - -/** @ignore */ -export const selectFieldArgs = (args: any[]) => _selectFieldArgs(args, [[], []]); -/** @ignore */ -export const selectChunkArgs = (Ctor: any, vals: any[]) => _selectChunkArgs(Ctor, vals, [], 0) as T[]; -/** @ignore */ -export const selectVectorChildrenArgs = (Ctor: RecordBatchCtor, vals: any[]) => _selectVectorChildrenArgs(Ctor, vals, [], 0) as T[]; -/** @ignore */ -export const selectColumnChildrenArgs = (Ctor: RecordBatchCtor, vals: any[]) => _selectColumnChildrenArgs(Ctor, vals, [], 0) as T[]; - -/** @ignore */ -function _selectArgs(Ctor: any, vals: any[], res: T[], idx: number) { - let value: any, j = idx; - let i = -1; - const n = vals.length; - while (++i < n) { - if (isArray(value = vals[i])) { - j = _selectArgs(Ctor, value, res, j).length; - } else if (value instanceof Ctor) { res[j++] = value; } - } - return res; -} - -/** @ignore */ -function _selectChunkArgs(Ctor: any, vals: any[], res: T[], idx: number) { - let value: any, j = idx; - let i = -1; - const n = vals.length; - while (++i < n) { - if (isArray(value = vals[i])) { - j = _selectChunkArgs(Ctor, value, res, j).length; - } else if (value instanceof Chunked) { - j = _selectChunkArgs(Ctor, value.chunks, res, j).length; - } else if (value instanceof Ctor) { res[j++] = value; } - } - return res; -} - -/** @ignore */ -function _selectVectorChildrenArgs(Ctor: RecordBatchCtor, vals: any[], res: T[], idx: number) { - let value: any, j = idx; - let i = -1; - const n = vals.length; - while (++i < n) { - if (isArray(value = vals[i])) { - j = _selectVectorChildrenArgs(Ctor, value, res, j).length; - } else if (value instanceof Ctor) { - j = _selectArgs(Vector, value.schema.fields.map((_, i) => value.getChildAt(i)!), res, j).length; - } else if (value instanceof Vector) { res[j++] = value as T; } - } - return res; -} - -/** @ignore */ -function _selectColumnChildrenArgs(Ctor: RecordBatchCtor, vals: any[], res: T[], idx: number) { - let value: any, j = idx; - let i = -1; - const n = vals.length; - while (++i < n) { - if (isArray(value = vals[i])) { - j = _selectColumnChildrenArgs(Ctor, value, res, j).length; - } else if (value instanceof Ctor) { - j = _selectArgs(Column, value.schema.fields.map((f, i) => Column.new(f, value.getChildAt(i)!)), res, j).length; - } else if (value instanceof Column) { res[j++] = value as T; } - } - return res; -} - -/** @ignore */ -const toKeysAndValues = (xs: [any[], any[]], [k, v]: [any, any], i: number) => (xs[0][i] = k, xs[1][i] = v, xs); - -/** @ignore */ -function _selectFieldArgs(vals: any[], ret: [Field[], (Vector | TypedArray)[]]): [Field[], (T[keyof T] | Vector | TypedArray)[]] { - let keys: any[]; - let n: number; - switch (n = vals.length) { - case 0: return ret; - case 1: - keys = ret[0]; - if (!(vals[0])) { return ret; } - if (isArray(vals[0])) { return _selectFieldArgs(vals[0], ret); } - if (!(vals[0] instanceof Data || vals[0] instanceof Vector || isTypedArray(vals[0]) || vals[0] instanceof DataType)) { - [keys, vals] = Object.entries(vals[0]).reduce(toKeysAndValues, ret); - } - break; - default: - !isArray(keys = vals[n - 1]) - ? (vals = isArray(vals[0]) ? vals[0] : vals, keys = []) - : (vals = isArray(vals[0]) ? vals[0] : vals.slice(0, n - 1)); - } - - let fieldIndex = -1; - let valueIndex = -1; - let idx = -1; - const len = vals.length; - let field: number | string | Field; - let val: Vector | Data; - const [fields, values] = ret as [Field[], any[]]; - - while (++idx < len) { - val = vals[idx]; - if (val instanceof Column && (values[++valueIndex] = val)) { - fields[++fieldIndex] = val.field.clone(keys[idx], val.type, true); - } else { - ({ [idx]: field = idx } = keys); - if (val instanceof DataType && (values[++valueIndex] = val)) { - fields[++fieldIndex] = Field.new(field, val as DataType, true) as Field; - } else if (val?.type && (values[++valueIndex] = val)) { - val instanceof Data && (values[valueIndex] = val = Vector.new(val) as Vector); - fields[++fieldIndex] = Field.new(field, val.type, true) as Field; - } - } - } - return ret; -} diff --git a/js/src/util/bit.ts b/js/src/util/bit.ts index e4c3d267ecf33..ddda9cee58093 100644 --- a/js/src/util/bit.ts +++ b/js/src/util/bit.ts @@ -28,8 +28,8 @@ export function getBit(_data: any, _index: number, byte: number, bit: number): 0 /** @ignore */ export function setBool(bytes: Uint8Array, index: number, value: any) { return value ? - !!(bytes[index >> 3] |= (1 << (index % 8))) || true : - !(bytes[index >> 3] &= ~(1 << (index % 8))) && false ; + !!(bytes[index >> 3] |= (1 << (index % 8))) || true : + !(bytes[index >> 3] &= ~(1 << (index % 8))) && false; } /** @ignore */ @@ -134,9 +134,9 @@ export function popcnt_bit_range(data: Uint8Array, lhs: number, rhs: number): nu /** @ignore */ export function popcnt_array(arr: ArrayBufferView, byteOffset?: number, byteLength?: number) { - let cnt = 0, pos = byteOffset! | 0; + let cnt = 0, pos = Math.trunc(byteOffset!); const view = new DataView(arr.buffer, arr.byteOffset, arr.byteLength); - const len = byteLength === void 0 ? arr.byteLength : pos + byteLength; + const len = byteLength === void 0 ? arr.byteLength : pos + byteLength; while (len - pos >= 4) { cnt += popcnt_uint32(view.getUint32(pos)); pos += 4; @@ -154,7 +154,7 @@ export function popcnt_array(arr: ArrayBufferView, byteOffset?: number, byteLeng /** @ignore */ export function popcnt_uint32(uint32: number): number { - let i = uint32 | 0; + let i = Math.trunc(uint32); i = i - ((i >>> 1) & 0x55555555); i = (i & 0x33333333) + ((i >>> 2) & 0x33333333); return (((i + (i >>> 4)) & 0x0F0F0F0F) * 0x01010101) >>> 24; diff --git a/js/src/util/bn.ts b/js/src/util/bn.ts index 7c71969a41995..925a850840b9d 100644 --- a/js/src/util/bn.ts +++ b/js/src/util/bn.ts @@ -15,10 +15,10 @@ // specific language governing permissions and limitations // under the License. -import { ArrayBufferViewInput, toArrayBufferView } from './buffer'; -import { TypedArray, TypedArrayConstructor } from '../interfaces'; -import { BigIntArray, BigIntArrayConstructor } from '../interfaces'; -import { BigIntAvailable, BigInt64Array, BigUint64Array } from './compat'; +import { ArrayBufferViewInput, toArrayBufferView } from './buffer.js'; +import { TypedArray, TypedArrayConstructor } from '../interfaces.js'; +import { BigIntArray, BigIntArrayConstructor } from '../interfaces.js'; +import { BigIntAvailable, BigInt64Array, BigUint64Array } from './compat.js'; /** @ignore */ export const isArrowBigNumSymbol = Symbol.for('isArrowBigNum'); @@ -36,10 +36,10 @@ function BigNum(this: any, x: any, ...xs: any) { } BigNum.prototype[isArrowBigNumSymbol] = true; -BigNum.prototype.toJSON = function>(this: T) { return `"${bignumToString(this)}"`; }; -BigNum.prototype.valueOf = function>(this: T) { return bignumToNumber(this); }; -BigNum.prototype.toString = function>(this: T) { return bignumToString(this); }; -BigNum.prototype[Symbol.toPrimitive] = function>(this: T, hint: 'string' | 'number' | 'default' = 'default') { +BigNum.prototype.toJSON = function >(this: T) { return `"${bignumToString(this)}"`; }; +BigNum.prototype.valueOf = function >(this: T) { return bignumToNumber(this); }; +BigNum.prototype.toString = function >(this: T) { return bignumToString(this); }; +BigNum.prototype[Symbol.toPrimitive] = function >(this: T, hint: 'string' | 'number' | 'default' = 'default') { switch (hint) { case 'number': return bignumToNumber(this); case 'string': return bignumToString(this); @@ -53,7 +53,7 @@ BigNum.prototype[Symbol.toPrimitive] = function>(this: type TypedArrayConstructorArgs = [number | void] | [Iterable | Iterable] | - [ArrayBufferLike, number | void, number | void] ; + [ArrayBufferLike, number | void, number | void]; /** @ignore */ function SignedBigNum(this: any, ...args: TypedArrayConstructorArgs) { return BigNum.apply(this, args); } @@ -62,12 +62,12 @@ function UnsignedBigNum(this: any, ...args: TypedArrayConstructorArgs) { return /** @ignore */ function DecimalBigNum(this: any, ...args: TypedArrayConstructorArgs) { return BigNum.apply(this, args); } -Object.setPrototypeOf(SignedBigNum.prototype, Object.create(Int32Array.prototype)); +Object.setPrototypeOf(SignedBigNum.prototype, Object.create(Int32Array.prototype)); Object.setPrototypeOf(UnsignedBigNum.prototype, Object.create(Uint32Array.prototype)); -Object.setPrototypeOf(DecimalBigNum.prototype, Object.create(Uint32Array.prototype)); -Object.assign(SignedBigNum.prototype, BigNum.prototype, { 'constructor': SignedBigNum, 'signed': true, 'TypedArray': Int32Array, 'BigIntArray': BigInt64Array }); +Object.setPrototypeOf(DecimalBigNum.prototype, Object.create(Uint32Array.prototype)); +Object.assign(SignedBigNum.prototype, BigNum.prototype, { 'constructor': SignedBigNum, 'signed': true, 'TypedArray': Int32Array, 'BigIntArray': BigInt64Array }); Object.assign(UnsignedBigNum.prototype, BigNum.prototype, { 'constructor': UnsignedBigNum, 'signed': false, 'TypedArray': Uint32Array, 'BigIntArray': BigUint64Array }); -Object.assign(DecimalBigNum.prototype, BigNum.prototype, { 'constructor': DecimalBigNum, 'signed': true, 'TypedArray': Uint32Array, 'BigIntArray': BigUint64Array }); +Object.assign(DecimalBigNum.prototype, BigNum.prototype, { 'constructor': DecimalBigNum, 'signed': true, 'TypedArray': Uint32Array, 'BigIntArray': BigUint64Array }); /** @ignore */ function bignumToNumber>(bn: T) { @@ -92,7 +92,7 @@ export let bignumToBigInt: { >(a: T): bigint }; if (!BigIntAvailable) { bignumToString = decimalToString; - bignumToBigInt = bignumToString; + bignumToBigInt = bignumToString; } else { bignumToBigInt = (>(a: T) => a.byteLength === 8 ? new a['BigIntArray'](a.buffer, a.byteOffset, 1)[0] : decimalToString(a)); bignumToString = (>(a: T) => a.byteLength === 8 ? `${new a['BigIntArray'](a.buffer, a.byteOffset, 1)[0]}` : decimalToString(a)); @@ -123,32 +123,32 @@ export class BN { /** @nocollapse */ public static new(num: T, isSigned?: boolean): (T & BN) { switch (isSigned) { - case true: return new ( SignedBigNum)(num) as (T & BN); - case false: return new ( UnsignedBigNum)(num) as (T & BN); + case true: return new (SignedBigNum)(num) as (T & BN); + case false: return new (UnsignedBigNum)(num) as (T & BN); } switch (num.constructor) { case Int8Array: case Int16Array: case Int32Array: case BigInt64Array: - return new ( SignedBigNum)(num) as (T & BN); + return new (SignedBigNum)(num) as (T & BN); } if (num.byteLength === 16) { - return new ( DecimalBigNum)(num) as (T & BN); + return new (DecimalBigNum)(num) as (T & BN); } - return new ( UnsignedBigNum)(num) as (T & BN); + return new (UnsignedBigNum)(num) as (T & BN); } /** @nocollapse */ public static signed(num: T): (T & BN) { - return new ( SignedBigNum)(num) as (T & BN); + return new (SignedBigNum)(num) as (T & BN); } /** @nocollapse */ public static unsigned(num: T): (T & BN) { - return new ( UnsignedBigNum)(num) as (T & BN); + return new (UnsignedBigNum)(num) as (T & BN); } /** @nocollapse */ public static decimal(num: T): (T & BN) { - return new ( DecimalBigNum)(num) as (T & BN); + return new (DecimalBigNum)(num) as (T & BN); } constructor(num: T, isSigned?: boolean) { return BN.new(num, isSigned) as any; @@ -158,20 +158,20 @@ export class BN { /** @ignore */ export interface BN extends TypedArrayLike { - new(buffer: T, signed?: boolean): T; + new (buffer: T, signed?: boolean): T; readonly signed: boolean; readonly TypedArray: TypedArrayConstructor; readonly BigIntArray: BigIntArrayConstructor; [Symbol.toStringTag]: - 'Int8Array' | - 'Int16Array' | - 'Int32Array' | - 'Uint8Array' | - 'Uint16Array' | - 'Uint32Array' | - 'Uint8ClampedArray'; + 'Int8Array' | + 'Int16Array' | + 'Int32Array' | + 'Uint8Array' | + 'Uint16Array' | + 'Uint32Array' | + 'Uint8ClampedArray'; /** * Convert the bytes to their (positive) decimal representation for printing diff --git a/js/src/util/buffer.ts b/js/src/util/buffer.ts index 86dae86c6b33f..8822d794cfe98 100644 --- a/js/src/util/buffer.ts +++ b/js/src/util/buffer.ts @@ -15,12 +15,10 @@ // specific language governing permissions and limitations // under the License. -import { flatbuffers } from 'flatbuffers'; -import { encodeUtf8 } from '../util/utf8'; -import ByteBuffer = flatbuffers.ByteBuffer; -import { TypedArray, TypedArrayConstructor } from '../interfaces'; -import { BigIntArray, BigIntArrayConstructor } from '../interfaces'; -import { isPromise, isIterable, isAsyncIterable, isIteratorResult, BigInt64Array, BigUint64Array } from './compat'; +import { encodeUtf8 } from '../util/utf8.js'; +import { TypedArray, TypedArrayConstructor, BigIntArrayConstructor } from '../interfaces.js'; +import { isPromise, isIterable, isAsyncIterable, isIteratorResult, isFlatbuffersByteBuffer, BigInt64Array, BigUint64Array } from './compat.js'; +import { ByteBuffer } from 'flatbuffers'; /** @ignore */ const SharedArrayBuf = (typeof SharedArrayBuffer !== 'undefined' ? SharedArrayBuffer : ArrayBuffer); @@ -67,8 +65,8 @@ export function joinUint8Arrays(chunks: Uint8Array[], size?: number | null): [Ui const byteLength = result.reduce((x, b) => x + b.byteLength, 0); let source: Uint8Array, sliced: Uint8Array, buffer: Uint8Array | void; let offset = 0, index = -1; - const length = Math.min(size || Infinity, byteLength); - for (let n = result.length; ++index < n;) { + const length = Math.min(size || Number.POSITIVE_INFINITY, byteLength); + for (const n = result.length; ++index < n;) { source = result[index]; sliced = source.subarray(0, Math.min(source.length, length - offset)); if (length <= (offset + sliced.length)) { @@ -85,14 +83,14 @@ export function joinUint8Arrays(chunks: Uint8Array[], size?: number | null): [Ui } /** @ignore */ -export type ArrayBufferViewInput = ArrayBufferView | ArrayBufferLike | ArrayBufferView | Iterable | ArrayLike | ByteBuffer | string | null | undefined | - IteratorResult | ArrayLike | ByteBuffer | string | null | undefined> | - ReadableStreamReadResult | ArrayLike | ByteBuffer | string | null | undefined> ; +export type ArrayBufferViewInput = ArrayBufferView | ArrayBufferLike | ArrayBufferView | Iterable | ArrayLike | ByteBuffer | string | null | undefined | + IteratorResult | ArrayLike | ByteBuffer | string | null | undefined> | + ReadableStreamDefaultReadResult | ArrayLike | ByteBuffer | string | null | undefined>; /** @ignore */ -export function toArrayBufferView(ArrayBufferViewCtor: TypedArrayConstructor, input: ArrayBufferViewInput): T; -export function toArrayBufferView(ArrayBufferViewCtor: BigIntArrayConstructor, input: ArrayBufferViewInput): T; -export function toArrayBufferView(ArrayBufferViewCtor: any, input: ArrayBufferViewInput) { +export function toArrayBufferView< + T extends TypedArrayConstructor | BigIntArrayConstructor +>(ArrayBufferViewCtor: any, input: ArrayBufferViewInput): InstanceType { let value: any = isIteratorResult(input) ? input.value : input; @@ -108,9 +106,9 @@ export function toArrayBufferView(ArrayBufferViewCtor: any, input: ArrayBufferVi if (typeof value === 'string') { value = encodeUtf8(value); } if (value instanceof ArrayBuffer) { return new ArrayBufferViewCtor(value); } if (value instanceof SharedArrayBuf) { return new ArrayBufferViewCtor(value); } - if (value instanceof ByteBuffer) { return toArrayBufferView(ArrayBufferViewCtor, value.bytes()); } - return !ArrayBuffer.isView(value) ? ArrayBufferViewCtor.from(value) : value.byteLength <= 0 ? new ArrayBufferViewCtor(0) - : new ArrayBufferViewCtor(value.buffer, value.byteOffset, value.byteLength / ArrayBufferViewCtor.BYTES_PER_ELEMENT); + if (isFlatbuffersByteBuffer(value)) { return toArrayBufferView(ArrayBufferViewCtor, value.bytes()); } + return !ArrayBuffer.isView(value) ? ArrayBufferViewCtor.from(value) : (value.byteLength <= 0 ? new ArrayBufferViewCtor(0) + : new ArrayBufferViewCtor(value.buffer, value.byteOffset, value.byteLength / ArrayBufferViewCtor.BYTES_PER_ELEMENT)); } /** @ignore */ export const toInt8Array = (input: ArrayBufferViewInput) => toArrayBufferView(Int8Array, input); @@ -133,17 +131,16 @@ const pump = | AsyncIterator>(iterator: T) => { it /** @ignore */ export function* toArrayBufferViewIterator(ArrayCtor: TypedArrayConstructor, source: ArrayBufferViewIteratorInput) { - const wrap = function*(x: T) { yield x; }; const buffers: Iterable = - (typeof source === 'string') ? wrap(source) - : (ArrayBuffer.isView(source)) ? wrap(source) - : (source instanceof ArrayBuffer) ? wrap(source) - : (source instanceof SharedArrayBuf) ? wrap(source) - : !isIterable(source) ? wrap(source) : source; + (typeof source === 'string') ? wrap(source) + : (ArrayBuffer.isView(source)) ? wrap(source) + : (source instanceof ArrayBuffer) ? wrap(source) + : (source instanceof SharedArrayBuf) ? wrap(source) + : !isIterable(source) ? wrap(source) : source; yield* pump((function* (it: Iterator): Generator { - let r: IteratorResult = null; + let r: IteratorResult = null; do { r = it.next(yield toArrayBufferView(ArrayCtor, r)); } while (!r.done); @@ -174,8 +171,8 @@ export async function* toArrayBufferViewAsyncIterator(Arra const wrap = async function*(x: T) { yield await x; }; const emit = async function* >(source: T) { - yield* pump((function*(it: Iterator) { - let r: IteratorResult = null; + yield* pump((function* (it: Iterator) { + let r: IteratorResult = null; do { r = it.next(yield r?.value); } while (!r.done); @@ -183,16 +180,16 @@ export async function* toArrayBufferViewAsyncIterator(Arra }; const buffers: AsyncIterable = - (typeof source === 'string') ? wrap(source) // if string, wrap in an AsyncIterableIterator - : (ArrayBuffer.isView(source)) ? wrap(source) // if TypedArray, wrap in an AsyncIterableIterator - : (source instanceof ArrayBuffer) ? wrap(source) // if ArrayBuffer, wrap in an AsyncIterableIterator - : (source instanceof SharedArrayBuf) ? wrap(source) // if SharedArrayBuffer, wrap in an AsyncIterableIterator - : isIterable(source) ? emit(source) // If Iterable, wrap in an AsyncIterableIterator and compose the `next` values - : !isAsyncIterable(source) ? wrap(source) // If not an AsyncIterable, treat as a sentinel and wrap in an AsyncIterableIterator - : source; // otherwise if AsyncIterable, use it + (typeof source === 'string') ? wrap(source) // if string, wrap in an AsyncIterableIterator + : (ArrayBuffer.isView(source)) ? wrap(source) // if TypedArray, wrap in an AsyncIterableIterator + : (source instanceof ArrayBuffer) ? wrap(source) // if ArrayBuffer, wrap in an AsyncIterableIterator + : (source instanceof SharedArrayBuf) ? wrap(source) // if SharedArrayBuffer, wrap in an AsyncIterableIterator + : isIterable(source) ? emit(source) // If Iterable, wrap in an AsyncIterableIterator and compose the `next` values + : !isAsyncIterable(source) ? wrap(source) // If not an AsyncIterable, treat as a sentinel and wrap in an AsyncIterableIterator + : source; // otherwise if AsyncIterable, use it yield* pump((async function* (it: AsyncIterator): AsyncGenerator { - let r: IteratorResult = null; + let r: IteratorResult = null; do { r = await it.next(yield toArrayBufferView(ArrayCtor, r)); } while (!r.done); diff --git a/js/src/util/chunk.ts b/js/src/util/chunk.ts new file mode 100644 index 0000000000000..6098b04243422 --- /dev/null +++ b/js/src/util/chunk.ts @@ -0,0 +1,160 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import { Data } from '../data.js'; +import { DataType } from '../type.js'; + +/** @ignore */ +export class ChunkedIterator implements IterableIterator { + private chunkIndex = 0; + private chunkIterator: IterableIterator; + + constructor( + private numChunks: number = 0, + private getChunkIterator: (chunkIndex: number) => IterableIterator + ) { + this.chunkIterator = this.getChunkIterator(0); + } + + next(): IteratorResult { + while (this.chunkIndex < this.numChunks) { + const next = this.chunkIterator.next(); + + if (!next.done) { + return next; + } + + if (++this.chunkIndex < this.numChunks) { + this.chunkIterator = this.getChunkIterator(this.chunkIndex); + } + } + + return { done: true, value: null }; + } + + [Symbol.iterator]() { + return this; + } +} + +/** @ignore */ +export function computeChunkNullCounts(chunks: ReadonlyArray>) { + return chunks.reduce((nullCount, chunk) => nullCount + chunk.nullCount, 0); +} + +/** @ignore */ +export function computeChunkOffsets(chunks: ReadonlyArray>) { + return chunks.reduce((offsets, chunk, index) => { + offsets[index + 1] = offsets[index] + chunk.length; + return offsets; + }, new Uint32Array(chunks.length + 1)); +} + +/** @ignore */ +export function sliceChunks(chunks: ReadonlyArray>, offsets: Uint32Array | Array, begin: number, end: number) { + const slices: Data[] = []; + for (let i = -1, n = chunks.length; ++i < n;) { + const chunk = chunks[i]; + const offset = offsets[i]; + const { length } = chunk; + // Stop if the child is to the right of the slice boundary + if (offset >= end) { break; } + // Exclude children to the left of of the slice boundary + if (begin >= offset + length) { continue; } + // Include entire child if between both left and right boundaries + if (offset >= begin && (offset + length) <= end) { + slices.push(chunk); + continue; + } + // Include the child slice that overlaps one of the slice boundaries + const from = Math.max(0, begin - offset); + const to = Math.min(end - offset, length); + slices.push(chunk.slice(from, to - from)); + } + if (slices.length === 0) { + slices.push(chunks[0].slice(0, 0)); + } + return slices; +} + +/** @ignore */ +export function binarySearch< + T extends DataType, + F extends (chunks: ReadonlyArray>, _1: number, _2: number) => any +>(chunks: ReadonlyArray>, offsets: Uint32Array | number[], idx: number, fn: F) { + let lhs = 0, mid = 0, rhs = offsets.length - 1; + do { + if (lhs >= rhs - 1) { + return (idx < offsets[rhs]) ? fn(chunks, lhs, idx - offsets[lhs]) : null; + } + mid = lhs + (Math.trunc((rhs - lhs) * .5)); + idx < offsets[mid] ? (rhs = mid) : (lhs = mid); + } while (lhs < rhs); +} + +/** @ignore */ +export function isChunkedValid(data: Data, index: number): boolean { + return data.getValid(index); +} + +/** @ignore */ +export function wrapChunkedCall1(fn: (c: Data, _1: number) => any) { + function chunkedFn(chunks: ReadonlyArray>, i: number, j: number) { return fn(chunks[i], j); } + return function (this: any, index: number) { + const data = this.data as ReadonlyArray>; + return binarySearch(data, this._offsets, index, chunkedFn); + }; +} + +/** @ignore */ +export function wrapChunkedCall2(fn: (c: Data, _1: number, _2: any) => any) { + let _2: any; + function chunkedFn(chunks: ReadonlyArray>, i: number, j: number) { return fn(chunks[i], j, _2); } + return function (this: any, index: number, value: any) { + const data = this.data as ReadonlyArray>; + _2 = value; + const result = binarySearch(data, this._offsets, index, chunkedFn); + _2 = undefined; + return result; + }; +} + +/** @ignore */ +export function wrapChunkedIndexOf(indexOf: (c: Data, e: T['TValue'], o?: number) => any) { + let _1: any; + function chunkedIndexOf(data: ReadonlyArray>, chunkIndex: number, fromIndex: number) { + let begin = fromIndex, index = 0, total = 0; + for (let i = chunkIndex - 1, n = data.length; ++i < n;) { + const chunk = data[i]; + if (~(index = indexOf(chunk, _1, begin))) { + return total + index; + } + begin = 0; + total += chunk.length; + } + return -1; + } + return function (this: any, element: T['TValue'], offset?: number) { + _1 = element; + const data = this.data as ReadonlyArray>; + const result = typeof offset !== 'number' + ? chunkedIndexOf(data, 0, 0) + : binarySearch(data, this._offsets, offset, chunkedIndexOf); + _1 = undefined; + return result; + }; +} diff --git a/js/src/util/compat.ts b/js/src/util/compat.ts index 62fcb772e4390..dec75e02e3306 100644 --- a/js/src/util/compat.ts +++ b/js/src/util/compat.ts @@ -15,7 +15,9 @@ // specific language governing permissions and limitations // under the License. -import { ReadableInterop, ArrowJSONLike } from '../io/interfaces'; +import { ReadableInterop, ArrowJSONLike } from '../io/interfaces.js'; + +/* eslint-disable unicorn/throw-new-error */ /** @ignore */ type FSReadStream = import('fs').ReadStream; @@ -46,7 +48,7 @@ const [BigIntCtor, BigIntAvailable] = (() => { function BigIntUnavailable() { throw BigIntUnavailableError(); } BigIntUnavailable.asIntN = () => { throw BigIntUnavailableError(); }; BigIntUnavailable.asUintN = () => { throw BigIntUnavailableError(); }; - return typeof BigInt !== 'undefined' ? [BigInt, true] : [ BigIntUnavailable, false]; + return typeof BigInt !== 'undefined' ? [BigInt, true] : [BigIntUnavailable, false]; })() as [BigIntConstructor, boolean]; /** @ignore */ @@ -58,7 +60,7 @@ const [BigInt64ArrayCtor, BigInt64ArrayAvailable] = (() => { static from() { throw BigInt64ArrayUnavailableError(); } constructor() { throw BigInt64ArrayUnavailableError(); } } - return typeof BigInt64Array !== 'undefined' ? [BigInt64Array, true] : [ BigInt64ArrayUnavailable, false]; + return typeof BigInt64Array !== 'undefined' ? [BigInt64Array, true] : [BigInt64ArrayUnavailable, false]; })() as [BigInt64ArrayConstructor, boolean]; /** @ignore */ @@ -70,7 +72,7 @@ const [BigUint64ArrayCtor, BigUint64ArrayAvailable] = (() => { static from() { throw BigUint64ArrayUnavailableError(); } constructor() { throw BigUint64ArrayUnavailableError(); } } - return typeof BigUint64Array !== 'undefined' ? [BigUint64Array, true] : [ BigUint64ArrayUnavailable, false]; + return typeof BigUint64Array !== 'undefined' ? [BigUint64Array, true] : [BigUint64ArrayUnavailable, false]; })() as [BigUint64ArrayConstructor, boolean]; export { BigIntCtor as BigInt, BigIntAvailable }; @@ -105,7 +107,7 @@ export const isAsyncIterable = (x: any): x is AsyncIterable => { }; /** @ignore */ -export const isArrowJSON = (x: any): x is ArrowJSONLike => { +export const isArrowJSON = (x: any): x is ArrowJSONLike => { return isObject(x) && isObject(x['schema']); }; @@ -135,7 +137,7 @@ export const isFileHandle = (x: any): x is FileHandle => { /** @ignore */ export const isFSReadStream = (x: any): x is FSReadStream => { - return isReadableNodeStream(x) && isNumber(( x)['bytesRead']); + return isReadableNodeStream(x) && isNumber((x)['bytesRead']); }; /** @ignore */ @@ -143,12 +145,14 @@ export const isFetchResponse = (x: any): x is Response => { return isObject(x) && isReadableDOMStream(x['body']); }; +const isReadableInterop = (x: any): x is ReadableInterop => ('_getDOMStream' in x && '_getNodeStream' in x); + /** @ignore */ export const isWritableDOMStream = (x: any): x is WritableStream => { return isObject(x) && isFunction(x['abort']) && isFunction(x['getWriter']) && - !(x instanceof ReadableInterop); + !isReadableInterop(x); }; /** @ignore */ @@ -156,7 +160,7 @@ export const isReadableDOMStream = (x: any): x is ReadableStream => return isObject(x) && isFunction(x['cancel']) && isFunction(x['getReader']) && - !(x instanceof ReadableInterop); + !isReadableInterop(x); }; /** @ignore */ @@ -165,7 +169,7 @@ export const isWritableNodeStream = (x: any): x is NodeJS.WritableStream => { isFunction(x['end']) && isFunction(x['write']) && isBoolean(x['writable']) && - !(x instanceof ReadableInterop); + !isReadableInterop(x); }; /** @ignore */ @@ -174,5 +178,17 @@ export const isReadableNodeStream = (x: any): x is NodeJS.ReadableStream => { isFunction(x['read']) && isFunction(x['pipe']) && isBoolean(x['readable']) && - !(x instanceof ReadableInterop); + !isReadableInterop(x); +}; + +/** @ignore */ +export const isFlatbuffersByteBuffer = (x: any): x is import('flatbuffers').ByteBuffer => { + return isObject(x) && + isFunction(x['clear']) && + isFunction(x['bytes']) && + isFunction(x['position']) && + isFunction(x['setPosition']) && + isFunction(x['capacity']) && + isFunction(x['getBufferIdentifier']) && + isFunction(x['createLong']); }; diff --git a/js/src/util/fn.ts b/js/src/util/fn.ts deleted file mode 100644 index a58f9d3373185..0000000000000 --- a/js/src/util/fn.ts +++ /dev/null @@ -1,31 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -/** @ignore */ -export function partial0(visit: (node: T) => any) { - return function(this: T) { return visit(this); }; -} - -/** @ignore */ -export function partial1(visit: (node: T, a: any) => any) { - return function(this: T, a: any) { return visit(this, a); }; -} - -/** @ignore */ -export function partial2(visit: (node: T, a: any, b: any) => any) { - return function(this: T, a: any, b: any) { return visit(this, a, b); }; -} diff --git a/js/src/util/int.ts b/js/src/util/int.ts index 147106dbb30d6..53ee7e9a9558b 100644 --- a/js/src/util/int.ts +++ b/js/src/util/int.ts @@ -29,22 +29,24 @@ function intAsHex(value: number): string { /** @ignore */ const kInt32DecimalDigits = 8; /** @ignore */ -const kPowersOfTen = [1, - 10, - 100, - 1000, - 10000, - 100000, - 1000000, - 10000000, - 100000000]; +const kPowersOfTen = [ + 1, + 10, + 100, + 1000, + 10000, + 100000, + 1000000, + 10000000, + 100000000 +]; /** @ignore */ export class BaseInt64 { - constructor (protected buffer: Uint32Array) {} + constructor(protected buffer: Uint32Array) { } public high(): number { return this.buffer[1]; } - public low (): number { return this.buffer[0]; } + public low(): number { return this.buffer[0]; } protected _times(other: BaseInt64) { // Break the left and right numbers into 16 bit chunks @@ -83,13 +85,13 @@ export class BaseInt64 { this.buffer[1] += (L[0] * R[3] + L[1] * R[2] + L[2] * R[1] + L[3] * R[0]) << 16; return this; - } + } protected _plus(other: BaseInt64) { const sum = (this.buffer[0] + other.buffer[0]) >>> 0; this.buffer[1] += other.buffer[1]; if (sum < (this.buffer[0] >>> 0)) { - ++this.buffer[1]; + ++this.buffer[1]; } this.buffer[0] = sum; } @@ -127,7 +129,7 @@ export class Uint64 extends BaseInt64 { /** @nocollapse */ public static from(val: any, out_buffer = new Uint32Array(2)): Uint64 { return Uint64.fromString( - typeof(val) === 'string' ? val : val.toString(), + typeof (val) === 'string' ? val : val.toString(), out_buffer ); } @@ -150,8 +152,8 @@ export class Uint64 extends BaseInt64 { const out = new Uint64(out_buffer); for (let posn = 0; posn < length;) { const group = kInt32DecimalDigits < length - posn ? - kInt32DecimalDigits : length - posn; - const chunk = new Uint64(new Uint32Array([parseInt(str.substr(posn, group), 10), 0])); + kInt32DecimalDigits : length - posn; + const chunk = new Uint64(new Uint32Array([Number.parseInt(str.slice(posn, posn + group), 10), 0])); const multiple = new Uint64(new Uint32Array([kPowersOfTen[group], 0])); out.times(multiple); @@ -164,7 +166,7 @@ export class Uint64 extends BaseInt64 { } /** @nocollapse */ - public static convertArray(values: (string|number)[]): Uint32Array { + public static convertArray(values: (string | number)[]): Uint32Array { const data = new Uint32Array(values.length * 2); for (let i = -1, n = values.length; ++i < n;) { Uint64.from(values[i], new Uint32Array(data.buffer, data.byteOffset + 2 * i * 4, 2)); @@ -207,7 +209,9 @@ export class Int64 extends BaseInt64 { public lessThan(other: Int64): boolean { // force high bytes to be signed + // eslint-disable-next-line unicorn/prefer-math-trunc const this_high = this.buffer[1] << 0; + // eslint-disable-next-line unicorn/prefer-math-trunc const other_high = other.buffer[1] << 0; return this_high < other_high || (this_high === other_high && this.buffer[0] < other.buffer[0]); @@ -216,7 +220,7 @@ export class Int64 extends BaseInt64 { /** @nocollapse */ public static from(val: any, out_buffer = new Uint32Array(2)): Int64 { return Int64.fromString( - typeof(val) === 'string' ? val : val.toString(), + typeof (val) === 'string' ? val : val.toString(), out_buffer ); } @@ -241,8 +245,8 @@ export class Int64 extends BaseInt64 { const out = new Int64(out_buffer); for (let posn = negate ? 1 : 0; posn < length;) { const group = kInt32DecimalDigits < length - posn ? - kInt32DecimalDigits : length - posn; - const chunk = new Int64(new Uint32Array([parseInt(str.substr(posn, group), 10), 0])); + kInt32DecimalDigits : length - posn; + const chunk = new Int64(new Uint32Array([Number.parseInt(str.slice(posn, posn + group), 10), 0])); const multiple = new Int64(new Uint32Array([kPowersOfTen[group], 0])); out.times(multiple); @@ -254,7 +258,7 @@ export class Int64 extends BaseInt64 { } /** @nocollapse */ - public static convertArray(values: (string|number)[]): Uint32Array { + public static convertArray(values: (string | number)[]): Uint32Array { const data = new Uint32Array(values.length * 2); for (let i = -1, n = values.length; ++i < n;) { Int64.from(values[i], new Uint32Array(data.buffer, data.byteOffset + 2 * i * 4, 2)); @@ -277,7 +281,7 @@ export class Int64 extends BaseInt64 { /** @ignore */ export class Int128 { - constructor (private buffer: Uint32Array) { + constructor(private buffer: Uint32Array) { // buffer[3] MSB (high) // buffer[2] // buffer[1] @@ -307,10 +311,10 @@ export class Int128 { public times(other: Int128): Int128 { // Break the left and right numbers into 32 bit chunks // so that we can multiply them without overflow. - const L0 = new Uint64(new Uint32Array([this.buffer[3], 0])); - const L1 = new Uint64(new Uint32Array([this.buffer[2], 0])); - const L2 = new Uint64(new Uint32Array([this.buffer[1], 0])); - const L3 = new Uint64(new Uint32Array([this.buffer[0], 0])); + const L0 = new Uint64(new Uint32Array([this.buffer[3], 0])); + const L1 = new Uint64(new Uint32Array([this.buffer[2], 0])); + const L2 = new Uint64(new Uint32Array([this.buffer[1], 0])); + const L3 = new Uint64(new Uint32Array([this.buffer[0], 0])); const R0 = new Uint64(new Uint32Array([other.buffer[3], 0])); const R1 = new Uint64(new Uint32Array([other.buffer[2], 0])); @@ -339,9 +343,9 @@ export class Int128 { .plus(Uint64.multiply(L2, R2)) .plus(Uint64.multiply(L3, R1)); this.buffer[3] += Uint64.multiply(L0, R3) - .plus(Uint64.multiply(L1, R2)) - .plus(Uint64.multiply(L2, R1)) - .plus(Uint64.multiply(L3, R0)).low(); + .plus(Uint64.multiply(L1, R2)) + .plus(Uint64.multiply(L2, R1)) + .plus(Uint64.multiply(L3, R0)).low(); return this; } @@ -390,7 +394,7 @@ export class Int128 { /** @nocollapse */ public static from(val: any, out_buffer = new Uint32Array(4)): Int128 { return Int128.fromString( - typeof(val) === 'string' ? val : val.toString(), + typeof (val) === 'string' ? val : val.toString(), out_buffer ); } @@ -415,8 +419,8 @@ export class Int128 { const out = new Int128(out_buffer); for (let posn = negate ? 1 : 0; posn < length;) { const group = kInt32DecimalDigits < length - posn ? - kInt32DecimalDigits : length - posn; - const chunk = new Int128(new Uint32Array([parseInt(str.substr(posn, group), 10), 0, 0, 0])); + kInt32DecimalDigits : length - posn; + const chunk = new Int128(new Uint32Array([Number.parseInt(str.slice(posn, posn + group), 10), 0, 0, 0])); const multiple = new Int128(new Uint32Array([kPowersOfTen[group], 0, 0, 0])); out.times(multiple); @@ -429,7 +433,7 @@ export class Int128 { } /** @nocollapse */ - public static convertArray(values: (string|number)[]): Uint32Array { + public static convertArray(values: (string | number)[]): Uint32Array { // TODO: Distinguish between string and number at compile-time const data = new Uint32Array(values.length * 4); for (let i = -1, n = values.length; ++i < n;) { diff --git a/js/src/util/math.ts b/js/src/util/math.ts index 47678e1a961ac..f6972a8883636 100644 --- a/js/src/util/math.ts +++ b/js/src/util/math.ts @@ -30,7 +30,7 @@ export function uint16ToFloat64(h: number) { const sigf = (h & 0x03FF) / 1024; const sign = (-1) ** ((h & 0x8000) >> 15); switch (expo) { - case 0x1F: return sign * (sigf ? NaN : 1 / 0); + case 0x1F: return sign * (sigf ? Number.NaN : 1 / 0); case 0x00: return sign * (sigf ? 6.103515625e-5 * sigf : 0); } return sign * (2 ** (expo - 15)) * (1 + sigf); @@ -55,9 +55,9 @@ export function float64ToUint16(d: number) { // 0x000fffff = 00000000 00001111 11111111 11111111 -- masks the 1st-20th bit const sign = (u32[1] & 0x80000000) >> 16 & 0xFFFF; - let expo = (u32[1] & 0x7ff00000), sigf = 0x0000; + let expo = (u32[1] & 0x7FF00000), sigf = 0x0000; - if (expo >= 0x40f00000) { + if (expo >= 0x40F00000) { // // If exponent overflowed, the float16 is either NaN or Infinity. // Rules to propagate the sign bit: mantissa > 0 ? NaN : +/-Infinity @@ -77,16 +77,16 @@ export function float64ToUint16(d: number) { expo = 0x7C00; } else { expo = (expo & 0x7C000000) >> 16; - sigf = (u32[1] & 0x000fffff) >> 10; + sigf = (u32[1] & 0x000FFFFF) >> 10; } - } else if (expo <= 0x3f000000) { + } else if (expo <= 0x3F000000) { // // If exponent underflowed, the float is either signed zero or subnormal. // // Magic numbers: // 0x3F000000 = 00111111 00000000 00000000 00000000 -- 6-bit exponent underflow // - sigf = 0x100000 + (u32[1] & 0x000fffff); + sigf = 0x100000 + (u32[1] & 0x000FFFFF); sigf = 0x100000 + (sigf << ((expo >> 20) - 998)) >> 21; expo = 0; } else { @@ -97,8 +97,8 @@ export function float64ToUint16(d: number) { // // Ensure the first mantissa bit (the 10th one) is 1 and round - expo = (expo - 0x3f000000) >> 10; - sigf = ((u32[1] & 0x000fffff) + 0x200) >> 10; + expo = (expo - 0x3F000000) >> 10; + sigf = ((u32[1] & 0x000FFFFF) + 0x200) >> 10; } return sign | expo | sigf & 0xFFFF; diff --git a/js/src/util/pretty.ts b/js/src/util/pretty.ts index a189fc490c3dd..de8fec3f5cb10 100644 --- a/js/src/util/pretty.ts +++ b/js/src/util/pretty.ts @@ -18,7 +18,7 @@ /** @ignore */ const undf = void (0); /** @ignore */ -export function valueToString(x: any) { +export function valueToString(x: any): string { if (x === null) { return 'null'; } if (x === undf) { return 'undefined'; } switch (typeof x) { @@ -33,5 +33,11 @@ export function valueToString(x: any) { if (typeof x[Symbol.toPrimitive] === 'function') { return x[Symbol.toPrimitive]('string'); } - return ArrayBuffer.isView(x) ? `[${x}]` : JSON.stringify(x); + if (ArrayBuffer.isView(x)) { + if (x instanceof BigInt64Array || x instanceof BigUint64Array) { + return `[${[...x].map(x => valueToString(x))}]`; + } + return `[${x}]`; + } + return ArrayBuffer.isView(x) ? `[${x}]` : JSON.stringify(x, (_, y) => typeof y === 'bigint' ? `${y}` : y); } diff --git a/js/src/util/recordbatch.ts b/js/src/util/recordbatch.ts index 37a630858d942..47108c2fa982c 100644 --- a/js/src/util/recordbatch.ts +++ b/js/src/util/recordbatch.ts @@ -15,107 +15,86 @@ // specific language governing permissions and limitations // under the License. -import { Column } from '../column'; -import { Vector } from '../vector'; -import { DataType } from '../type'; -import { Data, Buffers } from '../data'; -import { Schema, Field } from '../schema'; -import { Chunked } from '../vector/chunked'; -import { RecordBatch } from '../recordbatch'; - -const noopBuf = new Uint8Array(0); -const nullBufs = (bitmapLength: number) => [ - noopBuf, noopBuf, new Uint8Array(bitmapLength), noopBuf -] as Buffers; +import { Vector } from '../vector.js'; +import { Data, makeData } from '../data.js'; +import { Struct, TypeMap } from '../type.js'; +import { Schema, Field } from '../schema.js'; +import { RecordBatch } from '../recordbatch.js'; /** @ignore */ -export function ensureSameLengthData( - schema: Schema, - chunks: Data[], - batchLength = chunks.reduce((l, c) => Math.max(l, c.length), 0) -) { - let data: Data; - let field: Field; - let i = -1; - const n = chunks.length; - const fields = [...schema.fields]; - const batchData = [] as Data[]; - const bitmapLength = ((batchLength + 63) & ~63) >> 3; - while (++i < n) { - if ((data = chunks[i]) && data.length === batchLength) { - batchData[i] = data; - } else { - (field = fields[i]).nullable || (fields[i] = fields[i].clone({ nullable: true }) as Field); - batchData[i] = data ? data._changeLengthAndBackfillNullBitmap(batchLength) - : Data.new(field.type, 0, batchLength, batchLength, nullBufs(bitmapLength)) as Data; - } - } - return [new Schema(fields), batchLength, batchData] as [Schema, number, Data[]]; +export function distributeVectorsIntoRecordBatches(schema: Schema, vecs: Vector[]): [Schema, RecordBatch[]] { + return uniformlyDistributeChunksAcrossRecordBatches(schema, vecs.map((v) => v.data.concat())); } /** @ignore */ -export function distributeColumnsIntoRecordBatches(columns: Column[]): [Schema, RecordBatch[]] { - return distributeVectorsIntoRecordBatches(new Schema(columns.map(({ field }) => field)), columns); -} - -/** @ignore */ -export function distributeVectorsIntoRecordBatches(schema: Schema, vecs: (Vector | Chunked)[]): [Schema, RecordBatch[]] { - return uniformlyDistributeChunksAcrossRecordBatches(schema, vecs.map((v) => v instanceof Chunked ? v.chunks.map((c) => c.data) : [v.data])); -} - -/** @ignore */ -function uniformlyDistributeChunksAcrossRecordBatches(schema: Schema, columns: Data[][]): [Schema, RecordBatch[]] { +function uniformlyDistributeChunksAcrossRecordBatches(schema: Schema, cols: Data[][]): [Schema, RecordBatch[]] { const fields = [...schema.fields]; - const batchArgs = [] as [number, Data[]][]; - const memo = { numBatches: columns.reduce((n, c) => Math.max(n, c.length), 0) }; + const batches = [] as Data>[]; + const memo = { numBatches: cols.reduce((n, c) => Math.max(n, c.length), 0) }; let numBatches = 0, batchLength = 0; let i = -1; - const numColumns = columns.length; - let child: Data, childData: Data[] = []; + const numColumns = cols.length; + let child: Data, children: Data[] = []; while (memo.numBatches-- > 0) { for (batchLength = Number.POSITIVE_INFINITY, i = -1; ++i < numColumns;) { - childData[i] = child = columns[i].shift()!; + children[i] = child = cols[i].shift()!; batchLength = Math.min(batchLength, child ? child.length : batchLength); } - if (isFinite(batchLength)) { - childData = distributeChildData(fields, batchLength, childData, columns, memo); + if (Number.isFinite(batchLength)) { + children = distributeChildren(fields, batchLength, children, cols, memo); if (batchLength > 0) { - batchArgs[numBatches++] = [batchLength, childData.slice()]; + batches[numBatches++] = makeData({ + type: new Struct(fields), + length: batchLength, + nullCount: 0, + children: children.slice() + }); } } } + return [ - schema = new Schema(fields, schema.metadata), - batchArgs.map((xs) => new RecordBatch(schema, ...xs)) + schema = schema.assign(fields), + batches.map((data) => new RecordBatch(schema, data)) ]; } /** @ignore */ -function distributeChildData(fields: Field[], batchLength: number, childData: Data[], columns: Data[][], memo: { numBatches: number }) { - let data: Data; - let field: Field; - let length = 0, i = -1; - const n = columns.length; - const bitmapLength = ((batchLength + 63) & ~63) >> 3; - while (++i < n) { - if ((data = childData[i]) && ((length = data.length) >= batchLength)) { +function distributeChildren( + fields: Field[], + batchLength: number, + children: Data[], + columns: Data[][], + memo: { numBatches: number } +) { + const nullBitmapSize = ((batchLength + 63) & ~63) >> 3; + for (let i = -1, n = columns.length; ++i < n;) { + const child = children[i]; + const length = child?.length; + if (length >= batchLength) { if (length === batchLength) { - childData[i] = data; + children[i] = child; } else { - childData[i] = data.slice(0, batchLength); - data = data.slice(batchLength, length - batchLength); - memo.numBatches = Math.max(memo.numBatches, columns[i].unshift(data)); + children[i] = child.slice(0, batchLength); + memo.numBatches = Math.max(memo.numBatches, columns[i].unshift( + child.slice(batchLength, length - batchLength) + )); } } else { - (field = fields[i]).nullable || (fields[i] = field.clone({ nullable: true }) as Field); - childData[i] = data ? data._changeLengthAndBackfillNullBitmap(batchLength) - : Data.new(field.type, 0, batchLength, batchLength, nullBufs(bitmapLength)) as Data; + const field = fields[i]; + fields[i] = field.clone({ nullable: true }); + children[i] = child?._changeLengthAndBackfillNullBitmap(batchLength) ?? makeData({ + type: field.type, + length: batchLength, + nullCount: batchLength, + nullBitmap: new Uint8Array(nullBitmapSize) + }) as Data; } } - return childData; + return children; } diff --git a/js/src/util/vector.ts b/js/src/util/vector.ts index a6cfd0373f17a..179b17a39f3f3 100644 --- a/js/src/util/vector.ts +++ b/js/src/util/vector.ts @@ -15,10 +15,10 @@ // specific language governing permissions and limitations // under the License. -import { Vector } from '../vector'; -import { MapRow, StructRow } from '../vector/row'; -import { compareArrayLike } from '../util/buffer'; -import { BigInt, BigIntAvailable } from './compat'; +import { Vector } from '../vector.js'; +import { MapRow } from '../row/map.js'; +import { StructRow } from '../row/struct.js'; +import { compareArrayLike } from '../util/buffer.js'; /** @ignore */ type RangeLike = { length: number; stride?: number }; @@ -54,13 +54,12 @@ export function clampRange = Cl (rhs < 0) && (rhs = ((rhs % len) + len) % len); // ensure lhs <= rhs (rhs < lhs) && (tmp = lhs, lhs = rhs, rhs = tmp); - // ensure rhs <= length + // ensure rhs <= length (rhs > len) && (rhs = len); return then ? then(source, lhs, rhs) : [lhs, rhs]; } -const big0 = BigIntAvailable ? BigInt(0) : 0; const isNaNFast = (value: any) => value !== value; /** @ignore */ @@ -72,9 +71,7 @@ export function createElementComparator(search: any) { if (isNaNFast(search)) { return isNaNFast; } - return typeofSearch !== 'bigint' - ? (value: any) => value === search - : (value: any) => (big0 + value) === search; + return (value: any) => value === search; } // Compare Dates if (search instanceof Date) { @@ -86,13 +83,14 @@ export function createElementComparator(search: any) { return (value: any) => value ? compareArrayLike(search, value) : false; } // Compare Maps and Rows - if (search instanceof Map) { return creatMapComparator(search); } + if (search instanceof Map) { return createMapComparator(search); } // Compare Array-likes if (Array.isArray(search)) { return createArrayLikeComparator(search); } // Compare Vectors if (search instanceof Vector) { return createVectorComparator(search); } + return createObjectComparator(search, true); // Compare non-empty Objects - return createObjectComparator(search); + // return createObjectComparator(search, search instanceof Proxy); } /** @ignore */ @@ -105,10 +103,10 @@ function createArrayLikeComparator(lhs: ArrayLike) { } /** @ignore */ -function creatMapComparator(lhs: Map) { +function createMapComparator(lhs: Map) { let i = -1; const comparators = [] as ((x: any) => boolean)[]; - lhs.forEach((v) => comparators[++i] = createElementComparator(v)); + for (const v of lhs.values()) comparators[++i] = createElementComparator(v); return createSubElementsComparator(comparators); } @@ -122,10 +120,10 @@ function createVectorComparator(lhs: Vector) { } /** @ignore */ -function createObjectComparator(lhs: any) { +function createObjectComparator(lhs: any, allowEmpty = false) { const keys = Object.keys(lhs); // Only compare non-empty Objects - if (keys.length === 0) { return () => false; } + if (!allowEmpty && keys.length === 0) { return () => false; } const comparators = [] as ((x: any) => boolean)[]; for (let i = -1, n = keys.length; ++i < n;) { comparators[i] = createElementComparator(lhs[keys[i]]); @@ -141,9 +139,9 @@ function createSubElementsComparator(comparators: ((x: any) => boolean)[], keys? switch (rhs.constructor) { case Array: return compareArray(comparators, rhs); case Map: + return compareObject(comparators, rhs, rhs.keys()); case MapRow: case StructRow: - return compareObject(comparators, rhs, rhs.keys()); case Object: case undefined: // support `Object.create(null)` objects return compareObject(comparators, rhs, keys || Object.keys(rhs)); @@ -183,7 +181,7 @@ function compareObject(comparators: ((x: any) => boolean)[], obj: Map, let rKey = rKeyItr.next(); for (; i < n && !lKey.done && !rKey.done && !rVal.done; - ++i, lKey = lKeyItr.next(), rKey = rKeyItr.next(), rVal = rValItr.next()) { + ++i, lKey = lKeyItr.next(), rKey = rKeyItr.next(), rVal = rValItr.next()) { if (lKey.value !== rKey.value || !comparators[i](rVal.value)) { break; } diff --git a/js/src/vector.ts b/js/src/vector.ts index bd7838cdf9634..5ec00a6d7a055 100644 --- a/js/src/vector.ts +++ b/js/src/vector.ts @@ -15,59 +15,460 @@ // specific language governing permissions and limitations // under the License. -import { Data } from './data'; -import { DataType } from './type'; -import { Chunked } from './vector/chunked'; +import { Type } from './enum.js'; +import { clampRange } from './util/vector.js'; +import { DataType, strideForType } from './type.js'; +import { Data, makeData, DataProps } from './data.js'; +import { BigIntArray, TypedArray, TypedArrayDataType } from './interfaces.js'; -/** @ignore */ -export interface Clonable { - clone(...args: any[]): R; -} - -/** @ignore */ -export interface Sliceable { - slice(begin?: number, end?: number): R; -} +import { + isChunkedValid, + computeChunkOffsets, + computeChunkNullCounts, + sliceChunks, + wrapChunkedCall1, + wrapChunkedCall2, + wrapChunkedIndexOf, +} from './util/chunk.js'; -/** @ignore */ -export interface Applicative { - concat(...others: Vector[]): R; - readonly [Symbol.isConcatSpreadable]: boolean; -} - -export interface AbstractVector - extends Clonable>, - Sliceable>, - Applicative> { +import { instance as getVisitor } from './visitor/get.js'; +import { instance as setVisitor } from './visitor/set.js'; +import { instance as indexOfVisitor } from './visitor/indexof.js'; +import { instance as iteratorVisitor } from './visitor/iterator.js'; +import { instance as byteLengthVisitor } from './visitor/bytelength.js'; +export interface Vector { + /// + // Virtual properties for the TypeScript compiler. + // These do not exist at runtime. + /// readonly TType: T['TType']; readonly TArray: T['TArray']; readonly TValue: T['TValue']; + + /** + * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Symbol/isConcatSpreadable + */ + [Symbol.isConcatSpreadable]: true; } -export abstract class AbstractVector implements Iterable { +const visitorsByTypeId = {} as { [typeId: number]: { get: any; set: any; indexOf: any; byteLength: any } }; +const vectorPrototypesByTypeId = {} as { [typeId: number]: any }; + +/** + * Array-like data structure. Use the convenience method {@link makeVector} and {@link vectorFromArray} to create vectors. + */ +export class Vector { + + constructor(input: readonly (Data | Vector)[]) { + const data: Data[] = input[0] instanceof Vector + ? (input as Vector[]).flatMap(x => x.data) + : input as Data[]; + if (data.some((x) => !(x instanceof Data))) { + throw new TypeError('Vector constructor expects an Array of Data instances.'); + } + const type = data[0]?.type; + switch (data.length) { + case 0: this._offsets = [0]; break; + case 1: { + // special case for unchunked vectors + const { get, set, indexOf, byteLength } = visitorsByTypeId[type.typeId]; + const unchunkedData = data[0]; + + this.isValid = (index: number) => isChunkedValid(unchunkedData, index); + this.get = (index: number) => get(unchunkedData, index); + this.set = (index: number, value: T) => set(unchunkedData, index, value); + this.indexOf = (index: number) => indexOf(unchunkedData, index); + this.getByteLength = (index: number) => byteLength(unchunkedData, index); + this._offsets = [0, unchunkedData.length]; + break; + } + default: + Object.setPrototypeOf(this, vectorPrototypesByTypeId[type.typeId]); + this._offsets = computeChunkOffsets(data); + break; + } + this.data = data; + this.type = type; + this.stride = strideForType(type); + this.numChildren = type.children?.length ?? 0; + this.length = this._offsets[this._offsets.length - 1]; + } + + declare protected _offsets: number[] | Uint32Array; + declare protected _nullCount: number; + declare protected _byteLength: number; + + /** + * The {@link DataType `DataType`} of this Vector. + */ + public declare readonly type: T; + + /** + * The primitive {@link Data `Data`} instances for this Vector's elements. + */ + public declare readonly data: ReadonlyArray>; + + /** + * The number of elements in this Vector. + */ + public declare readonly length: number; + + /** + * The number of primitive values per Vector element. + */ + public declare readonly stride: number; + + /** + * The number of child Vectors if this Vector is a nested dtype. + */ + public declare readonly numChildren: number; + + /** + * The aggregate size (in bytes) of this Vector's buffers and/or child Vectors. + */ + public get byteLength() { + if (this._byteLength === -1) { + this._byteLength = this.data.reduce((byteLength, data) => byteLength + data.byteLength, 0); + } + return this._byteLength; + } + + /** + * The number of null elements in this Vector. + */ + public get nullCount() { + if (this._nullCount === -1) { + this._nullCount = computeChunkNullCounts(this.data); + } + return this._nullCount; + } + + /** + * The Array or TypedAray constructor used for the JS representation + * of the element's values in {@link Vector.prototype.toArray `toArray()`}. + */ + public get ArrayType(): T['ArrayType'] { return this.type.ArrayType; } + + /** + * The name that should be printed when the Vector is logged in a message. + */ + public get [Symbol.toStringTag]() { + return `${this.VectorName}<${this.type[Symbol.toStringTag]}>`; + } + + /** + * The name of this Vector. + */ + public get VectorName() { return `${Type[this.type.typeId]}Vector`; } + + /** + * Check whether an element is null. + * @param index The index at which to read the validity bitmap. + */ + // @ts-ignore + public isValid(index: number): boolean { return false; } + + /** + * Get an element value by position. + * @param index The index of the element to read. + */ + // @ts-ignore + public get(index: number): T['TValue'] | null { return null; } + + /** + * Set an element value by position. + * @param index The index of the element to write. + * @param value The value to set. + */ + // @ts-ignore + public set(index: number, value: T['TValue'] | null): void { return; } + + /** + * Retrieve the index of the first occurrence of a value in an Vector. + * @param element The value to locate in the Vector. + * @param offset The index at which to begin the search. If offset is omitted, the search starts at index 0. + */ + // @ts-ignore + public indexOf(element: T['TValue'], offset?: number): number { return -1; } + + public includes(element: T['TValue'], offset?: number): boolean { return this.indexOf(element, offset) > 0; } + + /** + * Get the size in bytes of an element by index. + * @param index The index at which to get the byteLength. + */ + // @ts-ignore + public getByteLength(index: number): number { return 0; } + + /** + * Iterator for the Vector's elements. + */ + public [Symbol.iterator](): IterableIterator { + return iteratorVisitor.visit(this); + } - public abstract readonly data: Data; - public abstract readonly type: T; - public abstract readonly typeId: T['TType']; - public abstract readonly length: number; - public abstract readonly stride: number; - public abstract readonly nullCount: number; - public abstract readonly byteLength: number; - public abstract readonly numChildren: number; + /** + * Combines two or more Vectors of the same type. + * @param others Additional Vectors to add to the end of this Vector. + */ + public concat(...others: Vector[]): Vector { + return new Vector(this.data.concat(others.flatMap((x) => x.data).flat(Number.POSITIVE_INFINITY))); + } - public abstract readonly ArrayType: T['ArrayType']; + /** + * Return a zero-copy sub-section of this Vector. + * @param start The beginning of the specified portion of the Vector. + * @param end The end of the specified portion of the Vector. This is exclusive of the element at the index 'end'. + */ + public slice(begin?: number, end?: number): Vector { + return new Vector(clampRange(this, begin, end, ({ data, _offsets }, begin, end) => + sliceChunks(data, _offsets, begin, end) + )); + } - public abstract isValid(index: number): boolean; - public abstract get(index: number): T['TValue'] | null; - public abstract set(index: number, value: T['TValue'] | null): void; - public abstract indexOf(value: T['TValue'] | null, fromIndex?: number): number; - public abstract [Symbol.iterator](): IterableIterator; + public toJSON() { return [...this]; } - public abstract toArray(): T['TArray']; - public abstract getChildAt(index: number): Vector | null; + /** + * Return a JavaScript Array or TypedArray of the Vector's elements. + * + * @note If this Vector contains a single Data chunk and the Vector's type is a + * primitive numeric type corresponding to one of the JavaScript TypedArrays, this + * method returns a zero-copy slice of the underlying TypedArray values. If there's + * more than one chunk, the resulting TypedArray will be a copy of the data from each + * chunk's underlying TypedArray values. + * + * @returns An Array or TypedArray of the Vector's elements, based on the Vector's DataType. + */ + public toArray(): T['TArray'] { + const { type, data, length, stride, ArrayType } = this; + // Fast case, return subarray if possible + switch (type.typeId) { + case Type.Int: + case Type.Float: + case Type.Decimal: + case Type.Time: + case Type.Timestamp: + switch (data.length) { + case 0: return new ArrayType(); + case 1: return data[0].values.subarray(0, length * stride); + default: return data.reduce((memo, { values }) => { + memo.array.set(values, memo.offset); + memo.offset += values.length; + return memo; + }, { array: new ArrayType(length * stride), offset: 0 }).array; + } + } + // Otherwise if not primitive, slow copy + return [...this] as T['TArray']; + } + + /** + * Returns a string representation of the Vector. + * + * @returns A string representation of the Vector. + */ + public toString() { + return `[${[...this].join(',')}]`; + } + + /** + * Returns a child Vector by name, or null if this Vector has no child with the given name. + * @param name The name of the child to retrieve. + */ + public getChild(name: R) { + return this.getChildAt(this.type.children?.findIndex((f) => f.name === name)); + } + + /** + * Returns a child Vector by index, or null if this Vector has no child at the supplied index. + * @param index The index of the child to retrieve. + */ + public getChildAt(index: number): Vector | null { + if (index > -1 && index < this.numChildren) { + return new Vector(this.data.map(({ children }) => children[index] as Data)); + } + return null; + } + + public get isMemoized(): boolean { + if (DataType.isDictionary(this.type)) { + return this.data[0].dictionary!.isMemoized; + } + return false; + } + + /** + * Adds memoization to the Vector's {@link get} method. For dictionary + * vectors, this method return a vector that memoizes only the dictionary + * values. + * + * Memoization is very useful when decoding a value is expensive such as + * Uft8. The memoization creates a cache of the size of the Vector and + * therfore increases memory usage. + * + * @returns A new vector that memoizes calls to {@link get}. + */ + public memoize(): MemoizedVector { + if (DataType.isDictionary(this.type)) { + const dictionary = new MemoizedVector(this.data[0].dictionary!); + const newData = this.data.map((data) => { + const cloned = data.clone(); + cloned.dictionary = dictionary; + return cloned; + }); + return new Vector(newData); + } + return new MemoizedVector(this); + } + + /** + * Returns a vector without memoization of the {@link get} method. If this + * vector is not memoized, this method returns this vector. + * + * @returns A a vector without memoization. + */ + public unmemoize(): Vector { + if (DataType.isDictionary(this.type) && this.isMemoized) { + const dictionary = this.data[0].dictionary!.unmemoize(); + const newData = this.data.map((data) => { + const newData = data.clone(); + newData.dictionary = dictionary; + return newData; + }); + return new Vector(newData); + } + return this; + } + + // Initialize this static property via an IIFE so bundlers don't tree-shake + // out this logic, but also so we're still compliant with `"sideEffects": false` + protected static [Symbol.toStringTag] = ((proto: Vector) => { + (proto as any).type = DataType.prototype; + (proto as any).data = []; + (proto as any).length = 0; + (proto as any).stride = 1; + (proto as any).numChildren = 0; + (proto as any)._nullCount = -1; + (proto as any)._byteLength = -1; + (proto as any)._offsets = new Uint32Array([0]); + (proto as any)[Symbol.isConcatSpreadable] = true; + + const typeIds: Type[] = Object.keys(Type) + .map((T: any) => Type[T] as any) + .filter((T: any) => typeof T === 'number' && T !== Type.NONE); + + for (const typeId of typeIds) { + const get = getVisitor.getVisitFnByTypeId(typeId); + const set = setVisitor.getVisitFnByTypeId(typeId); + const indexOf = indexOfVisitor.getVisitFnByTypeId(typeId); + const byteLength = byteLengthVisitor.getVisitFnByTypeId(typeId); + + visitorsByTypeId[typeId] = { get, set, indexOf, byteLength }; + vectorPrototypesByTypeId[typeId] = Object.create(proto, { + ['isValid']: { value: wrapChunkedCall1(isChunkedValid) }, + ['get']: { value: wrapChunkedCall1(getVisitor.getVisitFnByTypeId(typeId)) }, + ['set']: { value: wrapChunkedCall2(setVisitor.getVisitFnByTypeId(typeId)) }, + ['indexOf']: { value: wrapChunkedIndexOf(indexOfVisitor.getVisitFnByTypeId(typeId)) }, + ['getByteLength']: { value: wrapChunkedCall1(byteLengthVisitor.getVisitFnByTypeId(typeId)) }, + }); + } + + return 'Vector'; + })(Vector.prototype); +} + +class MemoizedVector extends Vector { + + public constructor(vector: Vector) { + super(vector.data); + + const get = this.get; + const set = this.set; + const slice = this.slice; + + const cache = new Array(this.length); + + Object.defineProperty(this, 'get', { + value(index: number) { + const cachedValue = cache[index]; + if (cachedValue !== undefined) { + return cachedValue; + } + const value = get.call(this, index); + cache[index] = value; + return value; + } + }); + + Object.defineProperty(this, 'set', { + value(index: number, value: T['TValue'] | null) { + set.call(this, index, value); + cache[index] = value; + } + }); + + Object.defineProperty(this, 'slice', { + value: (begin?: number, end?: number) => new MemoizedVector(slice.call(this, begin, end)) + }); + + Object.defineProperty(this, 'isMemoized', { value: true }); + + Object.defineProperty(this, 'unmemoize', { + value: () => new Vector(this.data) + }); + + Object.defineProperty(this, 'memoize', { + value: () => this + }); + } } -(AbstractVector.prototype as any).data = null; +import * as dtypes from './type.js'; + +/** + * Creates a Vector without data copies. + * + * @example + * ```ts + * const vector = makeVector(new Int32Array([1, 2, 3])); + * ``` + */ +export function makeVector(data: T | readonly T[]): Vector>; +export function makeVector(data: T | readonly T[]): Vector; +export function makeVector(data: Data | readonly Data[]): Vector; +export function makeVector(data: Vector | readonly Vector[]): Vector; +export function makeVector(data: DataProps | readonly DataProps[]): Vector; -export { AbstractVector as Vector }; +export function makeVector(init: any) { + if (init) { + if (init instanceof Data) { return new Vector([init]); } + if (init instanceof Vector) { return new Vector(init.data); } + if (init.type instanceof DataType) { return new Vector([makeData(init)]); } + if (Array.isArray(init)) { + return new Vector(init.flatMap(v => unwrapInputs(v))); + } + if (ArrayBuffer.isView(init)) { + if (init instanceof DataView) { + init = new Uint8Array(init.buffer); + } + const props = { offset: 0, length: init.length, nullCount: 0, data: init }; + if (init instanceof Int8Array) { return new Vector([makeData({ ...props, type: new dtypes.Int8 })]); } + if (init instanceof Int16Array) { return new Vector([makeData({ ...props, type: new dtypes.Int16 })]); } + if (init instanceof Int32Array) { return new Vector([makeData({ ...props, type: new dtypes.Int32 })]); } + if (init instanceof BigInt64Array) { return new Vector([makeData({ ...props, type: new dtypes.Int64 })]); } + if (init instanceof Uint8Array || init instanceof Uint8ClampedArray) { return new Vector([makeData({ ...props, type: new dtypes.Uint8 })]); } + if (init instanceof Uint16Array) { return new Vector([makeData({ ...props, type: new dtypes.Uint16 })]); } + if (init instanceof Uint32Array) { return new Vector([makeData({ ...props, type: new dtypes.Uint32 })]); } + if (init instanceof BigUint64Array) { return new Vector([makeData({ ...props, type: new dtypes.Uint64 })]); } + if (init instanceof Float32Array) { return new Vector([makeData({ ...props, type: new dtypes.Float32 })]); } + if (init instanceof Float64Array) { return new Vector([makeData({ ...props, type: new dtypes.Float64 })]); } + throw new Error('Unrecognized input'); + } + } + throw new Error('Unrecognized input'); +} + +function unwrapInputs(x: any) { + return x instanceof Data ? [x] : (x instanceof Vector ? x.data : makeVector(x).data); +} diff --git a/js/src/vector/base.ts b/js/src/vector/base.ts deleted file mode 100644 index 2ceecdda4a065..0000000000000 --- a/js/src/vector/base.ts +++ /dev/null @@ -1,111 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -import { Data } from '../data'; -import { Type } from '../enum'; -import { DataType } from '../type'; -import { Chunked } from './chunked'; -import { clampRange } from '../util/vector'; -import { VectorType as V } from '../interfaces'; -import { AbstractVector, Vector, Clonable, Sliceable, Applicative } from '../vector'; - -/** @ignore */ -export interface BaseVector extends Clonable>, Sliceable>, Applicative> { - slice(begin?: number, end?: number): V; - concat(...others: Vector[]): Chunked; - clone(data: Data, children?: Vector[]): V; -} - -/** @ignore */ -export abstract class BaseVector extends AbstractVector - implements Clonable>, Sliceable>, Applicative> { - - protected _children?: Vector[]; - - constructor(data: Data, children?: Vector[]) { - super(); - this._children = children; - this.numChildren = data.childData.length; - this._bindDataAccessors(this.data = data); - } - - public readonly data: Data; - public readonly numChildren: number; - - public get type() { return this.data.type; } - public get typeId() { return this.data.typeId; } - public get length() { return this.data.length; } - public get offset() { return this.data.offset; } - public get stride() { return this.data.stride; } - public get nullCount() { return this.data.nullCount; } - public get byteLength() { return this.data.byteLength; } - public get VectorName() { return `${Type[this.typeId]}Vector`; } - - public get ArrayType(): T['ArrayType'] { return this.type.ArrayType; } - - public get values() { return this.data.values; } - public get typeIds() { return this.data.typeIds; } - public get nullBitmap() { return this.data.nullBitmap; } - public get valueOffsets() { return this.data.valueOffsets; } - - public get [Symbol.toStringTag]() { return `${this.VectorName}<${this.type[Symbol.toStringTag]}>`; } - - public clone(data: Data, children = this._children) { - return Vector.new(data, children) as any; - } - - public concat(...others: Vector[]) { - return Chunked.concat(this, ...others); - } - - public slice(begin?: number, end?: number) { - // Adjust args similar to Array.prototype.slice. Normalize begin/end to - // clamp between 0 and length, and wrap around on negative indices, e.g. - // slice(-1, 5) or slice(5, -1) - return clampRange(this, begin, end, this._sliceInternal); - } - - public isValid(index: number): boolean { - if (this.nullCount > 0) { - const idx = this.offset + index; - const val = this.nullBitmap[idx >> 3]; - const mask = (val & (1 << (idx % 8))); - return mask !== 0; - } - return true; - } - - public getChildAt(index: number): Vector | null { - return index < 0 || index >= this.numChildren ? null : ( - (this._children || (this._children = []))[index] || - (this._children[index] = Vector.new(this.data.childData[index] as Data)) - ) as Vector; - } - - public toJSON() { return [...this]; } - - protected _sliceInternal(self: this, begin: number, end: number) { - return self.clone(self.data.slice(begin, end - begin), null!); - } - - // @ts-ignore - protected _bindDataAccessors(data: Data) { - // Implementation in src/vectors/index.ts due to circular dependency/packaging shenanigans - } -} - -(BaseVector.prototype as any)[Symbol.isConcatSpreadable] = true; diff --git a/js/src/vector/binary.ts b/js/src/vector/binary.ts deleted file mode 100644 index 603187a7822b3..0000000000000 --- a/js/src/vector/binary.ts +++ /dev/null @@ -1,27 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -import { Vector } from '../vector'; -import { BaseVector } from './base'; -import { Binary, Utf8 } from '../type'; - -/** @ignore */ -export class BinaryVector extends BaseVector { - public asUtf8() { - return Vector.new(this.data.clone(new Utf8())); - } -} diff --git a/js/src/vector/bool.ts b/js/src/vector/bool.ts deleted file mode 100644 index b555f469271d3..0000000000000 --- a/js/src/vector/bool.ts +++ /dev/null @@ -1,35 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -import { Bool } from '../type'; -import { Chunked } from './chunked'; -import { BaseVector } from './base'; -import { VectorBuilderOptions } from './index'; -import { vectorFromValuesWithType } from './index'; -import { VectorBuilderOptionsAsync } from './index'; - -/** @ignore */ -export class BoolVector extends BaseVector { - public static from(input: Iterable): BoolVector; - public static from(input: AsyncIterable): Promise; - public static from(input: VectorBuilderOptions): Chunked; - public static from(input: VectorBuilderOptionsAsync): Promise>; - /** @nocollapse */ - public static from(input: Iterable | AsyncIterable | VectorBuilderOptions | VectorBuilderOptionsAsync) { - return vectorFromValuesWithType(() => new Bool(), input); - } -} diff --git a/js/src/vector/chunked.ts b/js/src/vector/chunked.ts deleted file mode 100644 index 656c4a1b6c716..0000000000000 --- a/js/src/vector/chunked.ts +++ /dev/null @@ -1,320 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -import { Data } from '../data'; -import { Field } from '../schema'; -import { clampRange } from '../util/vector'; -import { DataType, Dictionary } from '../type'; -import { selectChunkArgs } from '../util/args'; -import { DictionaryVector } from './dictionary'; -import { AbstractVector, Vector } from '../vector'; -import { Clonable, Sliceable, Applicative } from '../vector'; - -/** @ignore */ -type ChunkedDict = T extends Dictionary ? Vector : null | never; -/** @ignore */ -type ChunkedKeys = T extends Dictionary ? Vector | Chunked : null | never; - -/** @ignore */ -export type SearchContinuation = (column: T, chunkIndex: number, valueIndex: number) => any; - -/** @ignore */ -class ChunkedIterator implements IterableIterator { - private chunkIndex = 0; - private chunkIterator: IterableIterator; - - constructor( - private chunks: Vector[], - ) { - this.chunkIterator = this.getChunkIterator(); - } - - next(): IteratorResult { - while (this.chunkIndex < this.chunks.length) { - const next = this.chunkIterator.next(); - - if (!next.done) { - return next; - } - - if (++this.chunkIndex < this.chunks.length) { - this.chunkIterator = this.getChunkIterator(); - } - } - - return {done: true, value: null}; - } - - getChunkIterator() { - return this.chunks[this.chunkIndex][Symbol.iterator](); - } - - [Symbol.iterator]() { - return this; - } -} - -/** @ignore */ -export class Chunked - extends AbstractVector - implements Clonable>, - Sliceable>, - Applicative> { - - /** @nocollapse */ - public static flatten(...vectors: (Vector | Vector[])[]) { - return selectChunkArgs>(Vector, vectors); - } - - /** @nocollapse */ - public static concat(...vectors: (Vector | Vector[])[]) { - const chunks = Chunked.flatten(...vectors); - return new Chunked(chunks[0].type, chunks); - } - - protected _type: T; - protected _length: number; - protected _chunks: Vector[]; - protected _numChildren: number; - protected _children?: Chunked[]; - protected _nullCount = -1; - protected _chunkOffsets: Uint32Array; - - constructor(type: T, chunks: Vector[] = [], offsets = calculateOffsets(chunks)) { - super(); - this._type = type; - this._chunks = chunks; - this._chunkOffsets = offsets; - this._length = offsets[offsets.length - 1]; - this._numChildren = (this._type.children || []).length; - } - - public get type() { return this._type; } - public get length() { return this._length; } - public get chunks() { return this._chunks; } - public get typeId(): T['TType'] { return this._type.typeId; } - public get VectorName() { return `Chunked<${this._type}>`; } - public get data(): Data { - return this._chunks[0] ? this._chunks[0].data : null; - } - - public get ArrayType() { return this._type.ArrayType; } - public get numChildren() { return this._numChildren; } - public get stride() { return this._chunks[0] ? this._chunks[0].stride : 1; } - public get byteLength(): number { - return this._chunks.reduce((byteLength, chunk) => byteLength + chunk.byteLength, 0); - } - public get nullCount() { - let nullCount = this._nullCount; - if (nullCount < 0) { - this._nullCount = nullCount = this._chunks.reduce((x, { nullCount }) => x + nullCount, 0); - } - return nullCount; - } - - protected _indices?: ChunkedKeys; - public get indices(): ChunkedKeys | null { - if (DataType.isDictionary(this._type)) { - if (!this._indices) { - const chunks = ( this._chunks) as DictionaryVector[]; - this._indices = (chunks.length === 1 - ? chunks[0].indices - : Chunked.concat(...chunks.map((x) => x.indices))) as ChunkedKeys; - } - return this._indices; - } - return null; - } - public get dictionary(): ChunkedDict | null { - if (DataType.isDictionary(this._type)) { - return this._chunks[this._chunks.length - 1].data.dictionary as ChunkedDict; - } - return null; - } - - public [Symbol.iterator](): IterableIterator { - return new ChunkedIterator(this._chunks); - } - - public clone(chunks = this._chunks): Chunked { - return new Chunked(this._type, chunks); - } - - public concat(...others: Vector[]): Chunked { - return this.clone(Chunked.flatten(this, ...others)); - } - - public slice(begin?: number, end?: number): Chunked { - return clampRange(this, begin, end, this._sliceInternal); - } - - public getChildAt(index: number): Chunked | null { - - if (index < 0 || index >= this._numChildren) { return null; } - - const columns = this._children || (this._children = []); - let child: Chunked, field: Field, chunks: Vector[]; - - if (child = columns[index]) { return child; } - if (field = ((this._type.children || [])[index] as Field)) { - chunks = this._chunks - .map((vector) => vector.getChildAt(index)) - .filter((vec): vec is Vector => vec != null); - if (chunks.length > 0) { - return (columns[index] = new Chunked(field.type, chunks)); - } - } - - return null; - } - - public search(index: number): [number, number] | null; - public search>>(index: number, then?: N): ReturnType; - public search>>(index: number, then?: N) { - const idx = index; - // binary search to find the child vector and value indices - const offsets = this._chunkOffsets; - let rhs = offsets.length - 1; - // return early if out of bounds, or if there's just one child - if (idx < 0 ) { return null; } - if (idx >= offsets[rhs]) { return null; } - if (rhs <= 1 ) { return then ? then(this, 0, idx) : [0, idx]; } - let lhs = 0, pos = 0, mid = 0; - do { - if (lhs + 1 === rhs) { - return then ? then(this, lhs, idx - pos) : [lhs, idx - pos]; - } - mid = lhs + ((rhs - lhs) / 2) | 0; - idx >= offsets[mid] ? (lhs = mid) : (rhs = mid); - } while (idx < offsets[rhs] && idx >= (pos = offsets[lhs])); - return null; - } - - public isValid(index: number): boolean { - return !!this.search(index, this.isValidInternal); - } - - public get(index: number): T['TValue'] | null { - return this.search(index, this.getInternal); - } - - public set(index: number, value: T['TValue'] | null): void { - this.search(index, ({ chunks }, i, j) => chunks[i].set(j, value)); - } - - public indexOf(element: T['TValue'], offset?: number): number { - if (offset && typeof offset === 'number') { - return this.search(offset, (self, i, j) => this.indexOfInternal(self, i, j, element))!; - } - return this.indexOfInternal(this, 0, Math.max(0, offset || 0), element); - } - - public toArray(): T['TArray'] { - const { chunks } = this; - const n = chunks.length; - let ArrayType: any = this._type.ArrayType; - if (n <= 0) { return new ArrayType(0); } - if (n <= 1) { return chunks[0].toArray(); } - let len = 0; - const src = new Array(n); - for (let i = -1; ++i < n;) { - len += (src[i] = chunks[i].toArray()).length; - } - if (ArrayType !== src[0].constructor) { - ArrayType = src[0].constructor; - } - const dst = new ArrayType(len); - const set: any = ArrayType === Array ? arraySet : typedSet; - for (let i = -1, idx = 0; ++i < n;) { - idx = set(src[i], dst, idx); - } - return dst; - } - - protected getInternal({ _chunks }: Chunked, i: number, j: number) { return _chunks[i].get(j); } - protected isValidInternal({ _chunks }: Chunked, i: number, j: number) { return _chunks[i].isValid(j); } - protected indexOfInternal({ _chunks }: Chunked, chunkIndex: number, fromIndex: number, element: T['TValue']) { - let i = chunkIndex - 1; - const n = _chunks.length; - let start = fromIndex, offset = 0, found = -1; - while (++i < n) { - if (~(found = _chunks[i].indexOf(element, start))) { - return offset + found; - } - start = 0; - offset += _chunks[i].length; - } - return -1; - } - - protected _sliceInternal(self: Chunked, begin: number, end: number) { - const slices: Vector[] = []; - const { chunks, _chunkOffsets: chunkOffsets } = self; - for (let i = -1, n = chunks.length; ++i < n;) { - const chunk = chunks[i]; - const chunkLength = chunk.length; - const chunkOffset = chunkOffsets[i]; - // If the child is to the right of the slice boundary, we can stop - if (chunkOffset >= end) { break; } - // If the child is to the left of of the slice boundary, exclude - if (begin >= chunkOffset + chunkLength) { continue; } - // If the child is between both left and right boundaries, include w/o slicing - if (chunkOffset >= begin && (chunkOffset + chunkLength) <= end) { - slices.push(chunk); - continue; - } - // If the child overlaps one of the slice boundaries, include that slice - const from = Math.max(0, begin - chunkOffset); - const to = Math.min(end - chunkOffset, chunkLength); - slices.push(chunk.slice(from, to) as Vector); - } - return self.clone(slices); - } -} - -/** @ignore */ -function calculateOffsets(vectors: Vector[]) { - const offsets = new Uint32Array((vectors || []).length + 1); - let offset = offsets[0] = 0; - const length = offsets.length; - for (let index = 0; ++index < length;) { - offsets[index] = (offset += vectors[index - 1].length); - } - return offsets; -} - -/** @ignore */ -const typedSet = (src: TypedArray, dst: TypedArray, offset: number) => { - dst.set(src, offset); - return (offset + src.length); -}; - -/** @ignore */ -const arraySet = (src: any[], dst: any[], offset: number) => { - let idx = offset; - for (let i = -1, n = src.length; ++i < n;) { - dst[idx++] = src[i]; - } - return idx; -}; - -/** @ignore */ -interface TypedArray extends ArrayBufferView { - readonly length: number; - readonly [n: number]: number; - set(array: ArrayLike, offset?: number): void; -} diff --git a/js/src/vector/date.ts b/js/src/vector/date.ts deleted file mode 100644 index 8c2b7a563568f..0000000000000 --- a/js/src/vector/date.ts +++ /dev/null @@ -1,51 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -import { DateUnit } from '../enum'; -import { Chunked } from './chunked'; -import { BaseVector } from './base'; -import { VectorType as V } from '../interfaces'; -import { VectorBuilderOptions } from './index'; -import { vectorFromValuesWithType } from './index'; -import { VectorBuilderOptionsAsync } from './index'; -import { Date_, DateDay, DateMillisecond } from '../type'; - -/** @ignore */ -type FromArgs = [Iterable, T['unit']]; - -/** @ignore */ -export class DateVector extends BaseVector { - public static from(...args: FromArgs): V; - public static from(...args: FromArgs): V; - public static from(input: Iterable): V; - public static from(input: AsyncIterable): Promise>; - public static from(input: VectorBuilderOptions): Chunked; - public static from(input: VectorBuilderOptionsAsync): Promise>; - /** @nocollapse */ - public static from(...args: FromArgs | [Iterable | AsyncIterable | VectorBuilderOptions | VectorBuilderOptionsAsync]) { - if (args.length === 2) { - return vectorFromValuesWithType(() => args[1] === DateUnit.DAY ? new DateDay() : new DateMillisecond() as T, args[0]); - } - return vectorFromValuesWithType(() => new DateMillisecond() as T, args[0]); - } -} - -/** @ignore */ -export class DateDayVector extends DateVector {} - -/** @ignore */ -export class DateMillisecondVector extends DateVector {} diff --git a/js/src/vector/dictionary.ts b/js/src/vector/dictionary.ts deleted file mode 100644 index 4b39dbe978534..0000000000000 --- a/js/src/vector/dictionary.ts +++ /dev/null @@ -1,60 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -import { Data } from '../data'; -import { Vector } from '../vector'; -import { BaseVector } from './base'; -import { VectorType as V } from '../interfaces'; -import { VectorBuilderOptions } from './index'; -import { vectorFromValuesWithType } from './index'; -import { VectorBuilderOptionsAsync } from './index'; -import { DataType, Dictionary, TKeys } from '../type'; - -/** @ignore */ -type FromArgs = [Vector, TKey, ArrayLike | TKey['TArray']]; - -/** @ignore */ -export class DictionaryVector extends BaseVector> { - public static from(...args: FromArgs): V>; - public static from(input: VectorBuilderOptions>): Vector>; - public static from(input: VectorBuilderOptionsAsync>): Promise>>; - /** @nocollapse */ - public static from(...args: any[]) { - if (args.length === 3) { - const [values, indices, keys] = args as FromArgs; - const type = new Dictionary(values.type, indices, null, null); - return Vector.new(Data.Dictionary(type, 0, keys.length, 0, null, keys, values)); - } - return vectorFromValuesWithType(() => args[0].type, args[0]); - } - - constructor(data: Data>) { - super(data); - this.indices = Vector.new(data.clone(this.type.indices)); - } - - public readonly indices: V; - - public get dictionary() { return > this.data.dictionary; } - public reverseLookup(value: T) { return this.dictionary.indexOf(value); } - public getKey(idx: number): TKey['TValue'] | null { return this.indices.get(idx); } - public getValue(key: number): T['TValue'] | null { return this.dictionary.get(key); } - public setKey(idx: number, key: TKey['TValue'] | null) { return this.indices.set(idx, key); } - public setValue(key: number, value: T['TValue'] | null) { return this.dictionary.set(key, value); } -} - -(DictionaryVector.prototype as any).indices = null; diff --git a/js/src/vector/fixedsizebinary.ts b/js/src/vector/fixedsizebinary.ts deleted file mode 100644 index 779be19ff1278..0000000000000 --- a/js/src/vector/fixedsizebinary.ts +++ /dev/null @@ -1,22 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -import { BaseVector } from './base'; -import { FixedSizeBinary } from '../type'; - -/** @ignore */ -export class FixedSizeBinaryVector extends BaseVector {} diff --git a/js/src/vector/fixedsizelist.ts b/js/src/vector/fixedsizelist.ts deleted file mode 100644 index 13637021f805c..0000000000000 --- a/js/src/vector/fixedsizelist.ts +++ /dev/null @@ -1,22 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -import { BaseVector } from './base'; -import { DataType, FixedSizeList } from '../type'; - -/** @ignore */ -export class FixedSizeListVector extends BaseVector> {} diff --git a/js/src/vector/float.ts b/js/src/vector/float.ts deleted file mode 100644 index 8260d2b27dbbc..0000000000000 --- a/js/src/vector/float.ts +++ /dev/null @@ -1,144 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -import { Data } from '../data'; -import { Vector } from '../vector'; -import { Chunked } from './chunked'; -import { BaseVector } from './base'; -import { VectorBuilderOptions } from './index'; -import { vectorFromValuesWithType } from './index'; -import { VectorBuilderOptionsAsync } from './index'; -import { Float, Float16, Float32, Float64, FloatArray } from '../type'; -import { VectorType as V, TypedArrayConstructor } from '../interfaces'; - -/** @ignore */ -type FloatVectorConstructors = - typeof FloatVector | - typeof Float16Vector | - typeof Float32Vector | - typeof Float64Vector ; - -/** @ignore */ -type FromInput = - FloatArray | - Iterable | - AsyncIterable | - VectorBuilderOptions | - VectorBuilderOptionsAsync ; - -/** @ignore */ -export type FloatArrayCtor = TypedArrayConstructor; - -/** @ignore */ -export class FloatVector extends BaseVector { - - // Guaranteed zero-copy variants - public static from(this: typeof FloatVector, input: Uint16Array): Float16Vector; - public static from(this: typeof FloatVector, input: Float32Array): Float32Vector; - public static from(this: typeof FloatVector, input: Float64Array): Float64Vector; - - // Zero-copy if input is a TypedArray of the same type as the - // Vector that from is called on, otherwise uses the Builders - public static from(this: typeof Float16Vector, input: FromInput): Float16Vector; - public static from(this: typeof Float32Vector, input: FromInput): Float32Vector; - public static from(this: typeof Float64Vector, input: FromInput): Float64Vector; - - // Not zero-copy - public static from(this: typeof FloatVector, input: Iterable): V; - public static from(this: typeof FloatVector, input: AsyncIterable): Promise>; - public static from(this: typeof FloatVector, input: VectorBuilderOptions): Chunked; - public static from(this: typeof FloatVector, input: VectorBuilderOptionsAsync): Promise>; - /** @nocollapse */ - public static from(this: FloatVectorConstructors, input: FromInput) { - - let ArrowType = vectorTypeToDataType(this); - - if ((input instanceof ArrayBuffer) || ArrayBuffer.isView(input)) { - const InputType = arrayTypeToDataType(input.constructor as FloatArrayCtor) || ArrowType; - // Special case, infer the Arrow DataType from the input if calling the base - // FloatVector.from with a TypedArray, e.g. `FloatVector.from(new Float32Array())` - if (ArrowType === null) { - ArrowType = InputType; - } - // If the DataType inferred from the Vector constructor matches the - // DataType inferred from the input arguments, return zero-copy view - if (ArrowType && ArrowType === InputType) { - const type = new ArrowType(); - const length = input.byteLength / type.ArrayType.BYTES_PER_ELEMENT; - // If the ArrowType is Float16 but the input type isn't a Uint16Array, - // let the Float16Builder handle casting the input values to Uint16s. - if (!convertTo16Bit(ArrowType, input.constructor)) { - return Vector.new(Data.Float(type, 0, length, 0, null, input as FloatArray)); - } - } - } - - if (ArrowType) { - // If the DataType inferred from the Vector constructor is different than - // the DataType inferred from the input TypedArray, or if input isn't a - // TypedArray, use the Builders to construct the result Vector - return vectorFromValuesWithType(() => new ArrowType!() as T, input); - } - - if ((input instanceof DataView) || (input instanceof ArrayBuffer)) { - throw new TypeError(`Cannot infer float type from instance of ${input.constructor.name}`); - } - - throw new TypeError('Unrecognized FloatVector input'); - } -} - -/** @ignore */ -export class Float16Vector extends FloatVector { - // Since JS doesn't have half floats, `toArray()` returns a zero-copy slice - // of the underlying Uint16Array data. This behavior ensures we don't incur - // extra compute or copies if you're calling `toArray()` in order to create - // a buffer for something like WebGL. Buf if you're using JS and want typed - // arrays of 4-to-8-byte precision, these methods will enumerate the values - // and clamp to the desired byte lengths. - public toFloat32Array() { return new Float32Array(this as Iterable); } - public toFloat64Array() { return new Float64Array(this as Iterable); } -} - -/** @ignore */ -export class Float32Vector extends FloatVector {} -/** @ignore */ -export class Float64Vector extends FloatVector {} - -const convertTo16Bit = (typeCtor: any, dataCtor: any) => { - return (typeCtor === Float16) && (dataCtor !== Uint16Array); -}; - -/** @ignore */ -const arrayTypeToDataType = (ctor: FloatArrayCtor) => { - switch (ctor) { - case Uint16Array: return Float16; - case Float32Array: return Float32; - case Float64Array: return Float64; - default: return null; - } -}; - -/** @ignore */ -const vectorTypeToDataType = (ctor: FloatVectorConstructors) => { - switch (ctor) { - case Float16Vector: return Float16; - case Float32Vector: return Float32; - case Float64Vector: return Float64; - default: return null; - } -}; diff --git a/js/src/vector/index.ts b/js/src/vector/index.ts deleted file mode 100644 index 30f5e3cfa8a65..0000000000000 --- a/js/src/vector/index.ts +++ /dev/null @@ -1,207 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -export { Vector } from '../vector'; -export { BaseVector } from './base'; -export { BinaryVector } from './binary'; -export { BoolVector } from './bool'; -export { Chunked } from './chunked'; -export { DateVector, DateDayVector, DateMillisecondVector } from './date'; -export { DecimalVector } from './decimal'; -export { DictionaryVector } from './dictionary'; -export { FixedSizeBinaryVector } from './fixedsizebinary'; -export { FixedSizeListVector } from './fixedsizelist'; -export { FloatVector, Float16Vector, Float32Vector, Float64Vector } from './float'; -export { IntervalVector, IntervalDayTimeVector, IntervalYearMonthVector } from './interval'; -export { IntVector, Int8Vector, Int16Vector, Int32Vector, Int64Vector, Uint8Vector, Uint16Vector, Uint32Vector, Uint64Vector } from './int'; -export { ListVector } from './list'; -export { MapVector } from './map'; -export { NullVector } from './null'; -export { StructVector } from './struct'; -export { TimestampVector, TimestampSecondVector, TimestampMillisecondVector, TimestampMicrosecondVector, TimestampNanosecondVector } from './timestamp'; -export { TimeVector, TimeSecondVector, TimeMillisecondVector, TimeMicrosecondVector, TimeNanosecondVector } from './time'; -export { UnionVector, DenseUnionVector, SparseUnionVector } from './union'; -export { Utf8Vector } from './utf8'; -export { MapRow, StructRow } from './row'; - -import * as fn from '../util/fn'; -import { Data } from '../data'; -import { Type } from '../enum'; -import { Vector } from '../vector'; -import { DataType } from '../type'; -import { Chunked } from './chunked'; -import { BaseVector } from './base'; -import { setBool } from '../util/bit'; -import { isIterable, isAsyncIterable } from '../util/compat'; -import { Builder, IterableBuilderOptions } from '../builder'; -import { VectorType as V, VectorCtorArgs } from '../interfaces'; -import { instance as getVisitor } from '../visitor/get'; -import { instance as setVisitor } from '../visitor/set'; -import { instance as indexOfVisitor } from '../visitor/indexof'; -import { instance as toArrayVisitor } from '../visitor/toarray'; -import { instance as iteratorVisitor } from '../visitor/iterator'; -import { instance as byteWidthVisitor } from '../visitor/bytewidth'; -import { instance as getVectorConstructor } from '../visitor/vectorctor'; - -declare module '../vector' { - namespace Vector { - export { newVector as new }; - export { vectorFrom as from }; - } -} - -declare module './base' { - namespace BaseVector { - export { vectorFrom as from }; - } - interface BaseVector { - get(index: number): T['TValue'] | null; - set(index: number, value: T['TValue'] | null): void; - indexOf(value: T['TValue'] | null, fromIndex?: number): number; - toArray(): T['TArray']; - getByteWidth(): number; - [Symbol.iterator](): IterableIterator; - } -} - -/** @nocollapse */ -Vector.new = newVector; - -/** @nocollapse */ -Vector.from = vectorFrom; - -/** @ignore */ -function newVector(data: Data, ...args: VectorCtorArgs>): V { - return new (getVectorConstructor.getVisitFn(data)())(data, ...args) as V; -} - -/** @ignore */ -export interface VectorBuilderOptions extends IterableBuilderOptions { values: Iterable } -/** @ignore */ -export interface VectorBuilderOptionsAsync extends IterableBuilderOptions { values: AsyncIterable } - -/** @ignore */ -export function vectorFromValuesWithType(newDataType: () => T, input: Iterable | AsyncIterable | VectorBuilderOptions | VectorBuilderOptionsAsync) { - if (isIterable(input)) { - return Vector.from({ 'nullValues': [null, undefined], type: newDataType(), 'values': input }) as V; - } else if (isAsyncIterable(input)) { - return Vector.from({ 'nullValues': [null, undefined], type: newDataType(), 'values': input }) as Promise>; - } - const { - 'values': values = [], - 'type': type = newDataType(), - 'nullValues': nullValues = [null, undefined], - } = { ...input }; - return isIterable(values) - ? Vector.from({ nullValues, ...input, type } as VectorBuilderOptions) - : Vector.from({ nullValues, ...input, type } as VectorBuilderOptionsAsync); -} - -/** @ignore */ -function vectorFrom(input: VectorBuilderOptions): Vector; -function vectorFrom(input: VectorBuilderOptionsAsync): Promise>; -function vectorFrom(input: VectorBuilderOptions | VectorBuilderOptionsAsync) { - const { 'values': values = [], ...options } = { 'nullValues': [null, undefined], ...input } as VectorBuilderOptions | VectorBuilderOptionsAsync; - if (isIterable(values)) { - const chunks = [...Builder.throughIterable(options)(values)]; - return (chunks.length === 1 ? chunks[0] : Chunked.concat(chunks)) as Vector; - } - return (async (chunks: V[]) => { - const transform = Builder.throughAsyncIterable(options); - for await (const chunk of transform(values)) { - chunks.push(chunk); - } - return (chunks.length === 1 ? chunks[0] : Chunked.concat(chunks)) as Vector; - })([]); -} - -// -// We provide the following method implementations for code navigability purposes only. -// They're overridden at runtime below with the specific Visitor implementation for each type, -// short-circuiting the usual Visitor traversal and reducing intermediate lookups and calls. -// This comment is here to remind you to not set breakpoints in these function bodies, or to inform -// you why the breakpoints you have already set are not being triggered. Have a great day! -// - -BaseVector.prototype.get = function baseVectorGet(this: BaseVector, index: number): T['TValue'] | null { - return getVisitor.visit(this, index); -}; - -BaseVector.prototype.set = function baseVectorSet(this: BaseVector, index: number, value: T['TValue'] | null): void { - return setVisitor.visit(this, index, value); -}; - -BaseVector.prototype.indexOf = function baseVectorIndexOf(this: BaseVector, value: T['TValue'] | null, fromIndex?: number): number { - return indexOfVisitor.visit(this, value, fromIndex); -}; - -BaseVector.prototype.toArray = function baseVectorToArray(this: BaseVector): T['TArray'] { - return toArrayVisitor.visit(this); -}; - -BaseVector.prototype.getByteWidth = function baseVectorGetByteWidth(this: BaseVector): number { - return byteWidthVisitor.visit(this.type); -}; - -BaseVector.prototype[Symbol.iterator] = function baseVectorSymbolIterator(this: BaseVector): IterableIterator { - return iteratorVisitor.visit(this); -}; - -(BaseVector.prototype as any)._bindDataAccessors = bindBaseVectorDataAccessors; - -// Perf: bind and assign the operator Visitor methods to each of the Vector subclasses for each Type -(Object.keys(Type) as any[]) - .map((T: any) => Type[T] as any) - .filter((T: any): T is Type => typeof T === 'number') - .filter((typeId) => typeId !== Type.NONE) - .forEach((typeId) => { - const VectorCtor = getVectorConstructor.visit(typeId); - VectorCtor.prototype['get'] = fn.partial1(getVisitor.getVisitFn(typeId)); - VectorCtor.prototype['set'] = fn.partial2(setVisitor.getVisitFn(typeId)); - VectorCtor.prototype['indexOf'] = fn.partial2(indexOfVisitor.getVisitFn(typeId)); - VectorCtor.prototype['toArray'] = fn.partial0(toArrayVisitor.getVisitFn(typeId)); - VectorCtor.prototype['getByteWidth'] = partialType0(byteWidthVisitor.getVisitFn(typeId)); - VectorCtor.prototype[Symbol.iterator] = fn.partial0(iteratorVisitor.getVisitFn(typeId)); - }); - -/** @ignore */ -function partialType0(visit: (node: T['type']) => any) { - return function(this: T) { return visit(this.type); }; -} - -/** @ignore */ -function wrapNullableGet, F extends (i: number) => any>(fn: F): (...args: Parameters) => ReturnType { - return function(this: V, i: number) { return this.isValid(i) ? fn.call(this, i) : null; }; -} - -/** @ignore */ -function wrapNullableSet, F extends (i: number, a: any) => void>(fn: F): (...args: Parameters) => void { - return function(this: V, i: number, a: any) { - if (setBool(this.nullBitmap, this.offset + i, !((a == null)))) { - fn.call(this, i, a); - } - }; -} - -/** @ignore */ -function bindBaseVectorDataAccessors(this: BaseVector) { - const nullBitmap = this.nullBitmap; - if (nullBitmap && nullBitmap.byteLength > 0) { - this.get = wrapNullableGet(this.get); - this.set = wrapNullableSet(this.set); - } -} diff --git a/js/src/vector/int.ts b/js/src/vector/int.ts deleted file mode 100644 index dbfba58c9d91a..0000000000000 --- a/js/src/vector/int.ts +++ /dev/null @@ -1,195 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -import { Data } from '../data'; -import { Vector } from '../vector'; -import { Chunked } from './chunked'; -import { BaseVector } from './base'; -import { VectorBuilderOptions } from './index'; -import { vectorFromValuesWithType } from './index'; -import { VectorBuilderOptionsAsync } from './index'; -import { BigInt64Array, BigUint64Array } from '../util/compat'; -import { toBigInt64Array, toBigUint64Array } from '../util/buffer'; -import { Int, Uint8, Uint16, Uint32, Uint64, Int8, Int16, Int32, Int64, IntArray } from '../type'; -import { VectorType as V, TypedArrayConstructor, BigIntArrayConstructor, BigIntArray } from '../interfaces'; - -/** @ignore */ -type IntVectorConstructors = - typeof IntVector | - typeof Int8Vector | - typeof Int16Vector | - typeof Int32Vector | - typeof Uint8Vector | - typeof Uint16Vector | - typeof Uint32Vector | - typeof Int64Vector | - typeof Uint64Vector ; - -/** @ignore */ -type FromInput = - IntArray | BigIntArray | - Iterable | - AsyncIterable | - VectorBuilderOptions | - VectorBuilderOptionsAsync ; - -/** @ignore */ -type FromArgs = [FromInput, boolean?]; - -/** @ignore */ -export type IntArrayCtor = TypedArrayConstructor | BigIntArrayConstructor; - -/** @ignore */ -export class IntVector extends BaseVector { - - // Guaranteed zero-copy variants - public static from(this: typeof IntVector, input: Int8Array): Int8Vector; - public static from(this: typeof IntVector, input: Int16Array): Int16Vector; - public static from(this: typeof IntVector, input: Int32Array): Int32Vector; - public static from(this: typeof IntVector, input: BigInt64Array): Int64Vector; - public static from(this: typeof IntVector, input: Int32Array, is64bit: true): Int64Vector; - public static from(this: typeof IntVector, input: Uint8Array): Uint8Vector; - public static from(this: typeof IntVector, input: Uint16Array): Uint16Vector; - public static from(this: typeof IntVector, input: Uint32Array): Uint32Vector; - public static from(this: typeof IntVector, input: BigUint64Array): Uint64Vector; - public static from(this: typeof IntVector, input: Uint32Array, is64bit: true): Uint64Vector; - - // Zero-copy if input is a TypedArray of the same type as the - // Vector that from is called on, otherwise uses the Builders - public static from(this: typeof Int8Vector, input: FromInput): Int8Vector; - public static from(this: typeof Int16Vector, input: FromInput): Int16Vector; - public static from(this: typeof Int32Vector, input: FromInput): Int32Vector; - public static from(this: typeof Int64Vector, input: FromInput): Int64Vector; - public static from(this: typeof Uint8Vector, input: FromInput): Uint8Vector; - public static from(this: typeof Uint16Vector, input: FromInput): Uint16Vector; - public static from(this: typeof Uint32Vector, input: FromInput): Uint32Vector; - public static from(this: typeof Uint64Vector, input: FromInput): Uint64Vector; - - // Not zero-copy - public static from(this: typeof IntVector, input: Iterable): V; - public static from(this: typeof IntVector, input: AsyncIterable): Promise>; - public static from(this: typeof IntVector, input: VectorBuilderOptions): Chunked; - public static from(this: typeof IntVector, input: VectorBuilderOptionsAsync): Promise>; - /** @nocollapse */ - public static from(this: IntVectorConstructors, ...args: FromArgs) { - - const [input, is64bit = false] = args; - let ArrowType = vectorTypeToDataType(this, is64bit); - - if ((input instanceof ArrayBuffer) || ArrayBuffer.isView(input)) { - const InputType = arrayTypeToDataType(input.constructor as IntArrayCtor, is64bit) || ArrowType; - // Special case, infer the Arrow DataType from the input if calling the base - // IntVector.from with a TypedArray, e.g. `IntVector.from(new Int32Array())` - if (ArrowType === null) { - ArrowType = InputType; - } - // If the DataType inferred from the Vector constructor matches the - // DataType inferred from the input arguments, return zero-copy view - if (ArrowType && ArrowType === InputType) { - const type = new ArrowType(); - let length = input.byteLength / type.ArrayType.BYTES_PER_ELEMENT; - // If the ArrowType is 64bit but the input type is 32bit pairs, update the logical length - if (convert32To64Bit(ArrowType, input.constructor)) { - length *= 0.5; - } - return Vector.new(Data.Int(type, 0, length, 0, null, input as IntArray)); - } - } - - if (ArrowType) { - // If the DataType inferred from the Vector constructor is different than - // the DataType inferred from the input TypedArray, or if input isn't a - // TypedArray, use the Builders to construct the result Vector - return vectorFromValuesWithType(() => new ArrowType!() as T, input); - } - - if ((input instanceof DataView) || (input instanceof ArrayBuffer)) { - throw new TypeError(`Cannot infer integer type from instance of ${input.constructor.name}`); - } - - throw new TypeError('Unrecognized IntVector input'); - } -} - -/** @ignore */ -export class Int8Vector extends IntVector {} -/** @ignore */ -export class Int16Vector extends IntVector {} -/** @ignore */ -export class Int32Vector extends IntVector {} -/** @ignore */ -export class Int64Vector extends IntVector { - public toBigInt64Array() { - return toBigInt64Array(this.values); - } - private _values64!: BigInt64Array; - public get values64(): BigInt64Array { - return this._values64 || (this._values64 = this.toBigInt64Array()); - } -} - -/** @ignore */ -export class Uint8Vector extends IntVector {} -/** @ignore */ -export class Uint16Vector extends IntVector {} -/** @ignore */ -export class Uint32Vector extends IntVector {} -/** @ignore */ -export class Uint64Vector extends IntVector { - public toBigUint64Array() { - return toBigUint64Array(this.values); - } - private _values64!: BigUint64Array; - public get values64(): BigUint64Array { - return this._values64 || (this._values64 = this.toBigUint64Array()); - } -} - -const convert32To64Bit = (typeCtor: any, dataCtor: any) => { - return (typeCtor === Int64 || typeCtor === Uint64) && - (dataCtor === Int32Array || dataCtor === Uint32Array); -}; - -/** @ignore */ -const arrayTypeToDataType = (ctor: IntArrayCtor, is64bit: boolean) => { - switch (ctor) { - case Int8Array: return Int8; - case Int16Array: return Int16; - case Int32Array: return is64bit ? Int64 : Int32; - case BigInt64Array: return Int64; - case Uint8Array: return Uint8; - case Uint16Array: return Uint16; - case Uint32Array: return is64bit ? Uint64 : Uint32; - case BigUint64Array: return Uint64; - default: return null; - } -}; - -/** @ignore */ -const vectorTypeToDataType = (ctor: IntVectorConstructors, is64bit: boolean) => { - switch (ctor) { - case Int8Vector: return Int8; - case Int16Vector: return Int16; - case Int32Vector: return is64bit ? Int64 : Int32; - case Int64Vector: return Int64; - case Uint8Vector: return Uint8; - case Uint16Vector: return Uint16; - case Uint32Vector: return is64bit ? Uint64 : Uint32; - case Uint64Vector: return Uint64; - default: return null; - } -}; diff --git a/js/src/vector/interval.ts b/js/src/vector/interval.ts deleted file mode 100644 index 70384ab97cc71..0000000000000 --- a/js/src/vector/interval.ts +++ /dev/null @@ -1,26 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -import { BaseVector } from './base'; -import { Interval, IntervalDayTime, IntervalYearMonth } from '../type'; - -/** @ignore */ -export class IntervalVector extends BaseVector {} -/** @ignore */ -export class IntervalDayTimeVector extends IntervalVector {} -/** @ignore */ -export class IntervalYearMonthVector extends IntervalVector {} diff --git a/js/src/vector/map.ts b/js/src/vector/map.ts deleted file mode 100644 index 9975919f7c657..0000000000000 --- a/js/src/vector/map.ts +++ /dev/null @@ -1,35 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -import { MapRow } from './row'; -import { Field } from '../schema'; -import { Vector } from '../vector'; -import { BaseVector } from './base'; -import { DataType, Map_, Struct, List } from '../type'; - -/** @ignore */ -export class MapVector extends BaseVector> { - public asList() { - const child = this.type.children[0] as Field>; - return Vector.new(this.data.clone(new List>(child))); - } - public bind(index: number): Map_['TValue'] { - const child = this.getChildAt>(0)!; - const { [index]: begin, [index + 1]: end } = this.valueOffsets; - return new MapRow(child.slice(begin, end)); - } -} diff --git a/js/src/vector/row.ts b/js/src/vector/row.ts deleted file mode 100644 index 23d1b5440f899..0000000000000 --- a/js/src/vector/row.ts +++ /dev/null @@ -1,296 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -import { Vector } from '../vector'; -import { StructVector } from './struct'; -import { valueToString } from '../util/pretty'; -import { DataType, Struct, RowLike } from '../type'; - -/** @ignore */ const kParent = Symbol.for('parent'); -/** @ignore */ const kRowIndex = Symbol.for('rowIndex'); -/** @ignore */ const kKeyToIdx = Symbol.for('keyToIdx'); -/** @ignore */ const kIdxToVal = Symbol.for('idxToVal'); -/** @ignore */ const kCustomInspect = Symbol.for('nodejs.util.inspect.custom'); - -abstract class Row implements Map { - - public readonly size: number; - public readonly [Symbol.toStringTag]: string; - - protected [kRowIndex]: number; - protected [kParent]: Vector; - protected [kKeyToIdx]: Map; - protected [kIdxToVal]: V[]; - - constructor(parent: Vector, numKeys: number) { - this[kParent] = parent; - this.size = numKeys; - } - - public abstract keys(): IterableIterator; - public abstract values(): IterableIterator; - public abstract getKey(idx: number): K; - public abstract getIndex(key: K): number; - public abstract getValue(idx: number): V; - public abstract setValue(idx: number, val: V): void; - - public entries() { return this[Symbol.iterator](); } - - public has(key: K) { return this.get(key) !== undefined; } - - public get(key: K) { - let val = undefined; - if (key != null) { - const ktoi = this[kKeyToIdx] || (this[kKeyToIdx] = new Map()); - let idx = ktoi.get(key); - if (idx !== undefined) { - const itov = this[kIdxToVal] || (this[kIdxToVal] = new Array(this.size)); - ((val = itov[idx]) !== undefined) || (itov[idx] = val = this.getValue(idx)); - } else if ((idx = this.getIndex(key)) > -1) { - ktoi.set(key, idx); - const itov = this[kIdxToVal] || (this[kIdxToVal] = new Array(this.size)); - ((val = itov[idx]) !== undefined) || (itov[idx] = val = this.getValue(idx)); - } - } - return val; - } - - public set(key: K, val: V) { - if (key != null) { - const ktoi = this[kKeyToIdx] || (this[kKeyToIdx] = new Map()); - let idx = ktoi.get(key); - if (idx === undefined) { - ktoi.set(key, idx = this.getIndex(key)); - } - if (idx > -1) { - const itov = this[kIdxToVal] || (this[kIdxToVal] = new Array(this.size)); - itov[idx] = this.setValue(idx, val); - } - } - return this; - } - - public clear(): void { throw new Error(`Clearing ${this[Symbol.toStringTag]} not supported.`); } - - public delete(_: K): boolean { throw new Error(`Deleting ${this[Symbol.toStringTag]} values not supported.`); } - - public *[Symbol.iterator](): IterableIterator<[K, V]> { - - const ki = this.keys(); - const vi = this.values(); - const ktoi = this[kKeyToIdx] || (this[kKeyToIdx] = new Map()); - const itov = this[kIdxToVal] || (this[kIdxToVal] = new Array(this.size)); - - for (let k: K, v: V, i = 0, kr: IteratorResult, vr: IteratorResult; - !((kr = ki.next()).done || (vr = vi.next()).done); - ++i - ) { - k = kr.value; - v = vr.value; - itov[i] = v; - ktoi.has(k) || ktoi.set(k, i); - yield [k, v]; - } - } - - public forEach(callbackfn: (value: V, key: K, map: Map) => void, thisArg?: any): void { - - const ki = this.keys(); - const vi = this.values(); - const callback = thisArg === undefined ? callbackfn : - (v: V, k: K, m: Map) => callbackfn.call(thisArg, v, k, m); - const ktoi = this[kKeyToIdx] || (this[kKeyToIdx] = new Map()); - const itov = this[kIdxToVal] || (this[kIdxToVal] = new Array(this.size)); - - for (let k: K, v: V, i = 0, kr: IteratorResult, vr: IteratorResult; - !((kr = ki.next()).done || (vr = vi.next()).done); - ++i - ) { - k = kr.value; - v = vr.value; - itov[i] = v; - ktoi.has(k) || ktoi.set(k, i); - callback(v, k, this); - } - } - - public toArray() { return [...this.values()]; } - public toJSON() { - const obj = {} as any; - this.forEach((val, key) => obj[key] = val); - return obj; - } - - public inspect() { return this.toString(); } - public [kCustomInspect]() { return this.toString(); } - public toString() { - const str: string[] = []; - this.forEach((val, key) => { - key = valueToString(key); - val = valueToString(val); - str.push(`${key}: ${val}`); - }); - return `{ ${str.join(', ')} }`; - } - - protected static [Symbol.toStringTag] = ((proto: Row) => { - Object.defineProperties(proto, { - 'size': { writable: true, enumerable: false, configurable: false, value: 0 }, - [kParent]: { writable: true, enumerable: false, configurable: false, value: null }, - [kRowIndex]: { writable: true, enumerable: false, configurable: false, value: -1 }, - }); - return (proto as any)[Symbol.toStringTag] = 'Row'; - })(Row.prototype); -} - -export class MapRow extends Row { - constructor(slice: Vector>) { - super(slice, slice.length); - return createRowProxy(this); - } - public keys() { - return this[kParent].getChildAt(0)![Symbol.iterator](); - } - public values() { - return this[kParent].getChildAt(1)![Symbol.iterator](); - } - public getKey(idx: number): K['TValue'] { - return this[kParent].getChildAt(0)!.get(idx); - } - public getIndex(key: K['TValue']): number { - return this[kParent].getChildAt(0)!.indexOf(key); - } - public getValue(index: number): V['TValue'] | null { - return this[kParent].getChildAt(1)!.get(index); - } - public setValue(index: number, value: V['TValue'] | null): void { - this[kParent].getChildAt(1)!.set(index, value); - } -} - -export class StructRow extends Row { - constructor(parent: StructVector) { - super(parent, parent.type.children.length); - return defineRowProxyProperties(this); - } - public *keys() { - for (const field of this[kParent].type.children) { - yield field.name as keyof T; - } - } - public *values() { - for (const field of this[kParent].type.children) { - yield (this as RowLike)[field.name]; - } - } - public getKey(idx: number): keyof T { - return this[kParent].type.children[idx].name as keyof T; - } - public getIndex(key: keyof T): number { - return this[kParent].type.children.findIndex((f) => f.name === key); - } - public getValue(index: number): T[keyof T]['TValue'] | null { - return this[kParent].getChildAt(index)!.get(this[kRowIndex]); - } - public setValue(index: number, value: T[keyof T]['TValue'] | null): void { - return this[kParent].getChildAt(index)!.set(this[kRowIndex], value); - } -} - -Object.setPrototypeOf(Row.prototype, Map.prototype); - -/** @ignore */ -const defineRowProxyProperties = (() => { - const desc = { enumerable: true, configurable: false, get: null as any, set: null as any }; - return (row: T) => { - let idx = -1; - const ktoi = row[kKeyToIdx] || (row[kKeyToIdx] = new Map()); - const getter = (key: any) => function(this: T) { return this.get(key); }; - const setter = (key: any) => function(this: T, val: any) { return this.set(key, val); }; - for (const key of row.keys()) { - ktoi.set(key, ++idx); - desc.get = getter(key); - desc.set = setter(key); - Object.prototype.hasOwnProperty.call(row, key) || (desc.enumerable = true, Object.defineProperty(row, key, desc)); - Object.prototype.hasOwnProperty.call(row, idx) || (desc.enumerable = false, Object.defineProperty(row, idx, desc)); - } - desc.get = desc.set = null; - return row; - }; -})(); - -/** @ignore */ -const createRowProxy = (() => { - if (typeof Proxy === 'undefined') { - return defineRowProxyProperties; - } - const has = Row.prototype.has; - const get = Row.prototype.get; - const set = Row.prototype.set; - const getKey = Row.prototype.getKey; - const RowProxyHandler: ProxyHandler = { - isExtensible() { return false; }, - deleteProperty() { return false; }, - preventExtensions() { return true; }, - ownKeys(row: Row) { return [...row.keys()].map((x) => `${x}`); }, - has(row: Row, key: PropertyKey) { - switch (key) { - case 'getKey': case 'getIndex': case 'getValue': case 'setValue': case 'toArray': case 'toJSON': case 'inspect': - case 'constructor': case 'isPrototypeOf': case 'propertyIsEnumerable': case 'toString': case 'toLocaleString': case 'valueOf': - case 'size': case 'has': case 'get': case 'set': case 'clear': case 'delete': case 'keys': case 'values': case 'entries': case 'forEach': - case '__proto__': case '__defineGetter__': case '__defineSetter__': case 'hasOwnProperty': case '__lookupGetter__': case '__lookupSetter__': - case Symbol.iterator: case Symbol.toStringTag: case kParent: case kRowIndex: case kIdxToVal: case kKeyToIdx: case kCustomInspect: - return true; - } - if (typeof key === 'number' && !row.has(key)) { - key = row.getKey(key); - } - return row.has(key); - }, - get(row: Row, key: PropertyKey, receiver: any) { - switch (key) { - case 'getKey': case 'getIndex': case 'getValue': case 'setValue': case 'toArray': case 'toJSON': case 'inspect': - case 'constructor': case 'isPrototypeOf': case 'propertyIsEnumerable': case 'toString': case 'toLocaleString': case 'valueOf': - case 'size': case 'has': case 'get': case 'set': case 'clear': case 'delete': case 'keys': case 'values': case 'entries': case 'forEach': - case '__proto__': case '__defineGetter__': case '__defineSetter__': case 'hasOwnProperty': case '__lookupGetter__': case '__lookupSetter__': - case Symbol.iterator: case Symbol.toStringTag: case kParent: case kRowIndex: case kIdxToVal: case kKeyToIdx: case kCustomInspect: - return Reflect.get(row, key, receiver); - } - if (typeof key === 'number' && !has.call(receiver, key)) { - key = getKey.call(receiver, key); - } - return get.call(receiver, key); - }, - set(row: Row, key: PropertyKey, val: any, receiver: any) { - switch (key) { - case kParent: case kRowIndex: case kIdxToVal: case kKeyToIdx: - return Reflect.set(row, key, val, receiver); - case 'getKey': case 'getIndex': case 'getValue': case 'setValue': case 'toArray': case 'toJSON': case 'inspect': - case 'constructor': case 'isPrototypeOf': case 'propertyIsEnumerable': case 'toString': case 'toLocaleString': case 'valueOf': - case 'size': case 'has': case 'get': case 'set': case 'clear': case 'delete': case 'keys': case 'values': case 'entries': case 'forEach': - case '__proto__': case '__defineGetter__': case '__defineSetter__': case 'hasOwnProperty': case '__lookupGetter__': case '__lookupSetter__': - case Symbol.iterator: case Symbol.toStringTag: - return false; - } - if (typeof key === 'number' && !has.call(receiver, key)) { - key = getKey.call(receiver, key); - } - return has.call(receiver, key) ? !!set.call(receiver, key, val) : false; - }, - }; - return (row: T) => new Proxy(row, RowProxyHandler) as T; -})(); diff --git a/js/src/vector/time.ts b/js/src/vector/time.ts deleted file mode 100644 index 0abded9403435..0000000000000 --- a/js/src/vector/time.ts +++ /dev/null @@ -1,30 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -import { BaseVector } from './base'; -import { Time, TimeSecond, TimeMillisecond, TimeMicrosecond, TimeNanosecond } from '../type'; - -/** @ignore */ -export class TimeVector extends BaseVector {} -/** @ignore */ -export class TimeSecondVector extends TimeVector {} -/** @ignore */ -export class TimeMillisecondVector extends TimeVector {} -/** @ignore */ -export class TimeMicrosecondVector extends TimeVector {} -/** @ignore */ -export class TimeNanosecondVector extends TimeVector {} diff --git a/js/src/vector/timestamp.ts b/js/src/vector/timestamp.ts deleted file mode 100644 index caff0bd6ffbe1..0000000000000 --- a/js/src/vector/timestamp.ts +++ /dev/null @@ -1,30 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -import { BaseVector } from './base'; -import { Timestamp, TimestampSecond, TimestampMillisecond, TimestampMicrosecond, TimestampNanosecond } from '../type'; - -/** @ignore */ -export class TimestampVector extends BaseVector {} -/** @ignore */ -export class TimestampSecondVector extends TimestampVector {} -/** @ignore */ -export class TimestampMillisecondVector extends TimestampVector {} -/** @ignore */ -export class TimestampMicrosecondVector extends TimestampVector {} -/** @ignore */ -export class TimestampNanosecondVector extends TimestampVector {} diff --git a/js/src/vector/union.ts b/js/src/vector/union.ts deleted file mode 100644 index 854519c5781e2..0000000000000 --- a/js/src/vector/union.ts +++ /dev/null @@ -1,32 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -import { BaseVector } from './base'; -import { Union, DenseUnion, SparseUnion} from '../type'; - -/** @ignore */ -export class UnionVector extends BaseVector { - public get typeIdToChildIndex() { return this.data.type.typeIdToChildIndex; } -} - -/** @ignore */ -export class DenseUnionVector extends UnionVector { - public get valueOffsets() { return this.data.valueOffsets!; } -} - -/** @ignore */ -export class SparseUnionVector extends UnionVector {} diff --git a/js/src/vector/utf8.ts b/js/src/vector/utf8.ts deleted file mode 100644 index a891c0dc5a69a..0000000000000 --- a/js/src/vector/utf8.ts +++ /dev/null @@ -1,39 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -import { Vector } from '../vector'; -import { Chunked } from './chunked'; -import { BaseVector } from './base'; -import { Binary, Utf8 } from '../type'; -import { VectorBuilderOptions } from './index'; -import { vectorFromValuesWithType } from './index'; -import { VectorBuilderOptionsAsync } from './index'; - -/** @ignore */ -export class Utf8Vector extends BaseVector { - public static from(input: Iterable): Utf8Vector; - public static from(input: AsyncIterable): Promise; - public static from(input: VectorBuilderOptions): Chunked; - public static from(input: VectorBuilderOptionsAsync): Promise>; - /** @nocollapse */ - public static from(input: Iterable | AsyncIterable | VectorBuilderOptions | VectorBuilderOptionsAsync) { - return vectorFromValuesWithType(() => new Utf8(), input); - } - public asBinary() { - return Vector.new(this.data.clone(new Binary())); - } -} diff --git a/js/src/visitor.ts b/js/src/visitor.ts index 3a63c93f96380..3be50a6d3eacf 100644 --- a/js/src/visitor.ts +++ b/js/src/visitor.ts @@ -15,10 +15,8 @@ // specific language governing permissions and limitations // under the License. -import { Data } from './data'; -import { Vector } from './vector'; -import { Type, Precision, DateUnit, TimeUnit, IntervalUnit, UnionMode } from './enum'; -import { DataType, Float, Int, Date_, Interval, Time, Timestamp, Union, } from './type'; +import { Type, Precision, DateUnit, TimeUnit, IntervalUnit, UnionMode } from './enum.js'; +import { DataType, Float, Int, Date_, Interval, Time, Timestamp, Union, } from './type.js'; export abstract class Visitor { public visitMany(nodes: any[], ...args: any[][]) { @@ -30,79 +28,93 @@ export abstract class Visitor { public getVisitFn(node: any, throwIfNotFound = true) { return getVisitFn(this, node, throwIfNotFound); } - public visitNull (_node: any, ..._args: any[]): any { return null; } - public visitBool (_node: any, ..._args: any[]): any { return null; } - public visitInt (_node: any, ..._args: any[]): any { return null; } - public visitFloat (_node: any, ..._args: any[]): any { return null; } - public visitUtf8 (_node: any, ..._args: any[]): any { return null; } - public visitBinary (_node: any, ..._args: any[]): any { return null; } - public visitFixedSizeBinary (_node: any, ..._args: any[]): any { return null; } - public visitDate (_node: any, ..._args: any[]): any { return null; } - public visitTimestamp (_node: any, ..._args: any[]): any { return null; } - public visitTime (_node: any, ..._args: any[]): any { return null; } - public visitDecimal (_node: any, ..._args: any[]): any { return null; } - public visitList (_node: any, ..._args: any[]): any { return null; } - public visitStruct (_node: any, ..._args: any[]): any { return null; } - public visitUnion (_node: any, ..._args: any[]): any { return null; } - public visitDictionary (_node: any, ..._args: any[]): any { return null; } - public visitInterval (_node: any, ..._args: any[]): any { return null; } - public visitFixedSizeList (_node: any, ..._args: any[]): any { return null; } - public visitMap (_node: any, ..._args: any[]): any { return null; } + public getVisitFnByTypeId(typeId: Type, throwIfNotFound = true) { + return getVisitFnByTypeId(this, typeId, throwIfNotFound); + } + public visitNull(_node: any, ..._args: any[]): any { return null; } + public visitBool(_node: any, ..._args: any[]): any { return null; } + public visitInt(_node: any, ..._args: any[]): any { return null; } + public visitFloat(_node: any, ..._args: any[]): any { return null; } + public visitUtf8(_node: any, ..._args: any[]): any { return null; } + public visitBinary(_node: any, ..._args: any[]): any { return null; } + public visitFixedSizeBinary(_node: any, ..._args: any[]): any { return null; } + public visitDate(_node: any, ..._args: any[]): any { return null; } + public visitTimestamp(_node: any, ..._args: any[]): any { return null; } + public visitTime(_node: any, ..._args: any[]): any { return null; } + public visitDecimal(_node: any, ..._args: any[]): any { return null; } + public visitList(_node: any, ..._args: any[]): any { return null; } + public visitStruct(_node: any, ..._args: any[]): any { return null; } + public visitUnion(_node: any, ..._args: any[]): any { return null; } + public visitDictionary(_node: any, ..._args: any[]): any { return null; } + public visitInterval(_node: any, ..._args: any[]): any { return null; } + public visitFixedSizeList(_node: any, ..._args: any[]): any { return null; } + public visitMap(_node: any, ..._args: any[]): any { return null; } } /** @ignore */ function getVisitFn(visitor: Visitor, node: any, throwIfNotFound = true) { - let fn: any = null; - let dtype: T['TType'] = Type.NONE; - if (node instanceof Data ) dtype = inferDType(node.type as T); - else if (node instanceof Vector ) dtype = inferDType(node.type as T); - else if (node instanceof DataType) dtype = inferDType(node as T); - else if (typeof (dtype = node) !== 'number') dtype = Type[node] as any as T['TType']; + if (typeof node === 'number') { + return getVisitFnByTypeId(visitor, node, throwIfNotFound); + } + if (typeof node === 'string' && (node in Type)) { + return getVisitFnByTypeId(visitor, Type[node as keyof typeof Type], throwIfNotFound); + } + if (node && (node instanceof DataType)) { + return getVisitFnByTypeId(visitor, inferDType(node as T), throwIfNotFound); + } + if (node?.type && (node.type instanceof DataType)) { + return getVisitFnByTypeId(visitor, inferDType(node.type as T), throwIfNotFound); + } + return getVisitFnByTypeId(visitor, Type.NONE, throwIfNotFound); +} +/** @ignore */ +function getVisitFnByTypeId(visitor: Visitor, dtype: Type, throwIfNotFound = true) { + let fn: any = null; switch (dtype) { - case Type.Null: fn = visitor.visitNull; break; - case Type.Bool: fn = visitor.visitBool; break; - case Type.Int: fn = visitor.visitInt; break; - case Type.Int8: fn = visitor.visitInt8 || visitor.visitInt; break; - case Type.Int16: fn = visitor.visitInt16 || visitor.visitInt; break; - case Type.Int32: fn = visitor.visitInt32 || visitor.visitInt; break; - case Type.Int64: fn = visitor.visitInt64 || visitor.visitInt; break; - case Type.Uint8: fn = visitor.visitUint8 || visitor.visitInt; break; - case Type.Uint16: fn = visitor.visitUint16 || visitor.visitInt; break; - case Type.Uint32: fn = visitor.visitUint32 || visitor.visitInt; break; - case Type.Uint64: fn = visitor.visitUint64 || visitor.visitInt; break; - case Type.Float: fn = visitor.visitFloat; break; - case Type.Float16: fn = visitor.visitFloat16 || visitor.visitFloat; break; - case Type.Float32: fn = visitor.visitFloat32 || visitor.visitFloat; break; - case Type.Float64: fn = visitor.visitFloat64 || visitor.visitFloat; break; - case Type.Utf8: fn = visitor.visitUtf8; break; - case Type.Binary: fn = visitor.visitBinary; break; - case Type.FixedSizeBinary: fn = visitor.visitFixedSizeBinary; break; - case Type.Date: fn = visitor.visitDate; break; - case Type.DateDay: fn = visitor.visitDateDay || visitor.visitDate; break; - case Type.DateMillisecond: fn = visitor.visitDateMillisecond || visitor.visitDate; break; - case Type.Timestamp: fn = visitor.visitTimestamp; break; - case Type.TimestampSecond: fn = visitor.visitTimestampSecond || visitor.visitTimestamp; break; + case Type.Null: fn = visitor.visitNull; break; + case Type.Bool: fn = visitor.visitBool; break; + case Type.Int: fn = visitor.visitInt; break; + case Type.Int8: fn = visitor.visitInt8 || visitor.visitInt; break; + case Type.Int16: fn = visitor.visitInt16 || visitor.visitInt; break; + case Type.Int32: fn = visitor.visitInt32 || visitor.visitInt; break; + case Type.Int64: fn = visitor.visitInt64 || visitor.visitInt; break; + case Type.Uint8: fn = visitor.visitUint8 || visitor.visitInt; break; + case Type.Uint16: fn = visitor.visitUint16 || visitor.visitInt; break; + case Type.Uint32: fn = visitor.visitUint32 || visitor.visitInt; break; + case Type.Uint64: fn = visitor.visitUint64 || visitor.visitInt; break; + case Type.Float: fn = visitor.visitFloat; break; + case Type.Float16: fn = visitor.visitFloat16 || visitor.visitFloat; break; + case Type.Float32: fn = visitor.visitFloat32 || visitor.visitFloat; break; + case Type.Float64: fn = visitor.visitFloat64 || visitor.visitFloat; break; + case Type.Utf8: fn = visitor.visitUtf8; break; + case Type.Binary: fn = visitor.visitBinary; break; + case Type.FixedSizeBinary: fn = visitor.visitFixedSizeBinary; break; + case Type.Date: fn = visitor.visitDate; break; + case Type.DateDay: fn = visitor.visitDateDay || visitor.visitDate; break; + case Type.DateMillisecond: fn = visitor.visitDateMillisecond || visitor.visitDate; break; + case Type.Timestamp: fn = visitor.visitTimestamp; break; + case Type.TimestampSecond: fn = visitor.visitTimestampSecond || visitor.visitTimestamp; break; case Type.TimestampMillisecond: fn = visitor.visitTimestampMillisecond || visitor.visitTimestamp; break; case Type.TimestampMicrosecond: fn = visitor.visitTimestampMicrosecond || visitor.visitTimestamp; break; - case Type.TimestampNanosecond: fn = visitor.visitTimestampNanosecond || visitor.visitTimestamp; break; - case Type.Time: fn = visitor.visitTime; break; - case Type.TimeSecond: fn = visitor.visitTimeSecond || visitor.visitTime; break; - case Type.TimeMillisecond: fn = visitor.visitTimeMillisecond || visitor.visitTime; break; - case Type.TimeMicrosecond: fn = visitor.visitTimeMicrosecond || visitor.visitTime; break; - case Type.TimeNanosecond: fn = visitor.visitTimeNanosecond || visitor.visitTime; break; - case Type.Decimal: fn = visitor.visitDecimal; break; - case Type.List: fn = visitor.visitList; break; - case Type.Struct: fn = visitor.visitStruct; break; - case Type.Union: fn = visitor.visitUnion; break; - case Type.DenseUnion: fn = visitor.visitDenseUnion || visitor.visitUnion; break; - case Type.SparseUnion: fn = visitor.visitSparseUnion || visitor.visitUnion; break; - case Type.Dictionary: fn = visitor.visitDictionary; break; - case Type.Interval: fn = visitor.visitInterval; break; - case Type.IntervalDayTime: fn = visitor.visitIntervalDayTime || visitor.visitInterval; break; - case Type.IntervalYearMonth: fn = visitor.visitIntervalYearMonth || visitor.visitInterval; break; - case Type.FixedSizeList: fn = visitor.visitFixedSizeList; break; - case Type.Map: fn = visitor.visitMap; break; + case Type.TimestampNanosecond: fn = visitor.visitTimestampNanosecond || visitor.visitTimestamp; break; + case Type.Time: fn = visitor.visitTime; break; + case Type.TimeSecond: fn = visitor.visitTimeSecond || visitor.visitTime; break; + case Type.TimeMillisecond: fn = visitor.visitTimeMillisecond || visitor.visitTime; break; + case Type.TimeMicrosecond: fn = visitor.visitTimeMicrosecond || visitor.visitTime; break; + case Type.TimeNanosecond: fn = visitor.visitTimeNanosecond || visitor.visitTime; break; + case Type.Decimal: fn = visitor.visitDecimal; break; + case Type.List: fn = visitor.visitList; break; + case Type.Struct: fn = visitor.visitStruct; break; + case Type.Union: fn = visitor.visitUnion; break; + case Type.DenseUnion: fn = visitor.visitDenseUnion || visitor.visitUnion; break; + case Type.SparseUnion: fn = visitor.visitSparseUnion || visitor.visitUnion; break; + case Type.Dictionary: fn = visitor.visitDictionary; break; + case Type.Interval: fn = visitor.visitInterval; break; + case Type.IntervalDayTime: fn = visitor.visitIntervalDayTime || visitor.visitInterval; break; + case Type.IntervalYearMonth: fn = visitor.visitIntervalYearMonth || visitor.visitInterval; break; + case Type.FixedSizeList: fn = visitor.visitFixedSizeList; break; + case Type.Map: fn = visitor.visitMap; break; } if (typeof fn === 'function') return fn; if (!throwIfNotFound) return () => null; @@ -116,7 +128,7 @@ function inferDType(type: T): Type { case Type.Int: { const { bitWidth, isSigned } = (type as any as Int); switch (bitWidth) { - case 8: return isSigned ? Type.Int8 : Type.Uint8 ; + case 8: return isSigned ? Type.Int8 : Type.Uint8; case 16: return isSigned ? Type.Int16 : Type.Uint16; case 32: return isSigned ? Type.Int32 : Type.Uint32; case 64: return isSigned ? Type.Int64 : Type.Uint64; @@ -125,7 +137,7 @@ function inferDType(type: T): Type { return Type.Int; } case Type.Float: - switch((type as any as Float).precision) { + switch ((type as any as Float).precision) { case Precision.HALF: return Type.Float16; case Precision.SINGLE: return Type.Float32; case Precision.DOUBLE: return Type.Float64; @@ -186,49 +198,49 @@ function inferDType(type: T): Type { } export interface Visitor { - visitNull (node: any, ...args: any[]): any; - visitBool (node: any, ...args: any[]): any; - visitInt (node: any, ...args: any[]): any; - visitInt8? (node: any, ...args: any[]): any; - visitInt16? (node: any, ...args: any[]): any; - visitInt32? (node: any, ...args: any[]): any; - visitInt64? (node: any, ...args: any[]): any; - visitUint8? (node: any, ...args: any[]): any; - visitUint16? (node: any, ...args: any[]): any; - visitUint32? (node: any, ...args: any[]): any; - visitUint64? (node: any, ...args: any[]): any; - visitFloat (node: any, ...args: any[]): any; - visitFloat16? (node: any, ...args: any[]): any; - visitFloat32? (node: any, ...args: any[]): any; - visitFloat64? (node: any, ...args: any[]): any; - visitUtf8 (node: any, ...args: any[]): any; - visitBinary (node: any, ...args: any[]): any; - visitFixedSizeBinary (node: any, ...args: any[]): any; - visitDate (node: any, ...args: any[]): any; - visitDateDay? (node: any, ...args: any[]): any; - visitDateMillisecond? (node: any, ...args: any[]): any; - visitTimestamp (node: any, ...args: any[]): any; - visitTimestampSecond? (node: any, ...args: any[]): any; - visitTimestampMillisecond? (node: any, ...args: any[]): any; - visitTimestampMicrosecond? (node: any, ...args: any[]): any; - visitTimestampNanosecond? (node: any, ...args: any[]): any; - visitTime (node: any, ...args: any[]): any; - visitTimeSecond? (node: any, ...args: any[]): any; - visitTimeMillisecond? (node: any, ...args: any[]): any; - visitTimeMicrosecond? (node: any, ...args: any[]): any; - visitTimeNanosecond? (node: any, ...args: any[]): any; - visitDecimal (node: any, ...args: any[]): any; - visitList (node: any, ...args: any[]): any; - visitStruct (node: any, ...args: any[]): any; - visitUnion (node: any, ...args: any[]): any; - visitDenseUnion? (node: any, ...args: any[]): any; - visitSparseUnion? (node: any, ...args: any[]): any; - visitDictionary (node: any, ...args: any[]): any; - visitInterval (node: any, ...args: any[]): any; - visitIntervalDayTime? (node: any, ...args: any[]): any; - visitIntervalYearMonth? (node: any, ...args: any[]): any; - visitFixedSizeList (node: any, ...args: any[]): any; - visitMap (node: any, ...args: any[]): any; + visitNull(node: any, ...args: any[]): any; + visitBool(node: any, ...args: any[]): any; + visitInt(node: any, ...args: any[]): any; + visitInt8?(node: any, ...args: any[]): any; + visitInt16?(node: any, ...args: any[]): any; + visitInt32?(node: any, ...args: any[]): any; + visitInt64?(node: any, ...args: any[]): any; + visitUint8?(node: any, ...args: any[]): any; + visitUint16?(node: any, ...args: any[]): any; + visitUint32?(node: any, ...args: any[]): any; + visitUint64?(node: any, ...args: any[]): any; + visitFloat(node: any, ...args: any[]): any; + visitFloat16?(node: any, ...args: any[]): any; + visitFloat32?(node: any, ...args: any[]): any; + visitFloat64?(node: any, ...args: any[]): any; + visitUtf8(node: any, ...args: any[]): any; + visitBinary(node: any, ...args: any[]): any; + visitFixedSizeBinary(node: any, ...args: any[]): any; + visitDate(node: any, ...args: any[]): any; + visitDateDay?(node: any, ...args: any[]): any; + visitDateMillisecond?(node: any, ...args: any[]): any; + visitTimestamp(node: any, ...args: any[]): any; + visitTimestampSecond?(node: any, ...args: any[]): any; + visitTimestampMillisecond?(node: any, ...args: any[]): any; + visitTimestampMicrosecond?(node: any, ...args: any[]): any; + visitTimestampNanosecond?(node: any, ...args: any[]): any; + visitTime(node: any, ...args: any[]): any; + visitTimeSecond?(node: any, ...args: any[]): any; + visitTimeMillisecond?(node: any, ...args: any[]): any; + visitTimeMicrosecond?(node: any, ...args: any[]): any; + visitTimeNanosecond?(node: any, ...args: any[]): any; + visitDecimal(node: any, ...args: any[]): any; + visitList(node: any, ...args: any[]): any; + visitStruct(node: any, ...args: any[]): any; + visitUnion(node: any, ...args: any[]): any; + visitDenseUnion?(node: any, ...args: any[]): any; + visitSparseUnion?(node: any, ...args: any[]): any; + visitDictionary(node: any, ...args: any[]): any; + visitInterval(node: any, ...args: any[]): any; + visitIntervalDayTime?(node: any, ...args: any[]): any; + visitIntervalYearMonth?(node: any, ...args: any[]): any; + visitFixedSizeList(node: any, ...args: any[]): any; + visitMap(node: any, ...args: any[]): any; } // Add these here so they're picked up by the externs creator diff --git a/js/src/visitor/builderctor.ts b/js/src/visitor/builderctor.ts index ac35a9874e96b..9ce9ae4d4a797 100644 --- a/js/src/visitor/builderctor.ts +++ b/js/src/visitor/builderctor.ts @@ -15,83 +15,84 @@ // specific language governing permissions and limitations // under the License. -import { Data } from '../data'; -import { Type } from '../enum'; -import { DataType } from '../type'; -import { Visitor } from '../visitor'; -import { VectorType, BuilderCtor } from '../interfaces'; -import { BinaryBuilder } from '../builder/binary'; -import { BoolBuilder } from '../builder/bool'; -import { DateBuilder, DateDayBuilder, DateMillisecondBuilder } from '../builder/date'; -import { DecimalBuilder } from '../builder/decimal'; -import { DictionaryBuilder } from '../builder/dictionary'; -import { FixedSizeBinaryBuilder } from '../builder/fixedsizebinary'; -import { FixedSizeListBuilder } from '../builder/fixedsizelist'; -import { FloatBuilder, Float16Builder, Float32Builder, Float64Builder } from '../builder/float'; -import { IntervalBuilder, IntervalDayTimeBuilder, IntervalYearMonthBuilder } from '../builder/interval'; -import { IntBuilder, Int8Builder, Int16Builder, Int32Builder, Int64Builder, Uint8Builder, Uint16Builder, Uint32Builder, Uint64Builder } from '../builder/int'; -import { ListBuilder } from '../builder/list'; -import { MapBuilder } from '../builder/map'; -import { NullBuilder } from '../builder/null'; -import { StructBuilder } from '../builder/struct'; -import { TimestampBuilder, TimestampSecondBuilder, TimestampMillisecondBuilder, TimestampMicrosecondBuilder, TimestampNanosecondBuilder } from '../builder/timestamp'; -import { TimeBuilder, TimeSecondBuilder, TimeMillisecondBuilder, TimeMicrosecondBuilder, TimeNanosecondBuilder } from '../builder/time'; -import { UnionBuilder, DenseUnionBuilder, SparseUnionBuilder } from '../builder/union'; -import { Utf8Builder } from '../builder/utf8'; +import { Data } from '../data.js'; +import { Type } from '../enum.js'; +import { Vector } from '../vector.js'; +import { DataType } from '../type.js'; +import { Visitor } from '../visitor.js'; +import { BuilderCtor } from '../interfaces.js'; +import { BinaryBuilder } from '../builder/binary.js'; +import { BoolBuilder } from '../builder/bool.js'; +import { DateBuilder, DateDayBuilder, DateMillisecondBuilder } from '../builder/date.js'; +import { DecimalBuilder } from '../builder/decimal.js'; +import { DictionaryBuilder } from '../builder/dictionary.js'; +import { FixedSizeBinaryBuilder } from '../builder/fixedsizebinary.js'; +import { FixedSizeListBuilder } from '../builder/fixedsizelist.js'; +import { FloatBuilder, Float16Builder, Float32Builder, Float64Builder } from '../builder/float.js'; +import { IntervalBuilder, IntervalDayTimeBuilder, IntervalYearMonthBuilder } from '../builder/interval.js'; +import { IntBuilder, Int8Builder, Int16Builder, Int32Builder, Int64Builder, Uint8Builder, Uint16Builder, Uint32Builder, Uint64Builder } from '../builder/int.js'; +import { ListBuilder } from '../builder/list.js'; +import { MapBuilder } from '../builder/map.js'; +import { NullBuilder } from '../builder/null.js'; +import { StructBuilder } from '../builder/struct.js'; +import { TimestampBuilder, TimestampSecondBuilder, TimestampMillisecondBuilder, TimestampMicrosecondBuilder, TimestampNanosecondBuilder } from '../builder/timestamp.js'; +import { TimeBuilder, TimeSecondBuilder, TimeMillisecondBuilder, TimeMicrosecondBuilder, TimeNanosecondBuilder } from '../builder/time.js'; +import { UnionBuilder, DenseUnionBuilder, SparseUnionBuilder } from '../builder/union.js'; +import { Utf8Builder } from '../builder/utf8.js'; /** @ignore */ export interface GetBuilderCtor extends Visitor { visit(type: T): BuilderCtor; visitMany(types: T[]): BuilderCtor[]; getVisitFn(type: T): () => BuilderCtor; - getVisitFn(node: VectorType | Data | T): () => BuilderCtor; + getVisitFn(node: Vector | Data | T): () => BuilderCtor; } /** @ignore */ export class GetBuilderCtor extends Visitor { - public visitNull () { return NullBuilder; } - public visitBool () { return BoolBuilder; } - public visitInt () { return IntBuilder; } - public visitInt8 () { return Int8Builder; } - public visitInt16 () { return Int16Builder; } - public visitInt32 () { return Int32Builder; } - public visitInt64 () { return Int64Builder; } - public visitUint8 () { return Uint8Builder; } - public visitUint16 () { return Uint16Builder; } - public visitUint32 () { return Uint32Builder; } - public visitUint64 () { return Uint64Builder; } - public visitFloat () { return FloatBuilder; } - public visitFloat16 () { return Float16Builder; } - public visitFloat32 () { return Float32Builder; } - public visitFloat64 () { return Float64Builder; } - public visitUtf8 () { return Utf8Builder; } - public visitBinary () { return BinaryBuilder; } - public visitFixedSizeBinary () { return FixedSizeBinaryBuilder; } - public visitDate () { return DateBuilder; } - public visitDateDay () { return DateDayBuilder; } - public visitDateMillisecond () { return DateMillisecondBuilder; } - public visitTimestamp () { return TimestampBuilder; } - public visitTimestampSecond () { return TimestampSecondBuilder; } - public visitTimestampMillisecond () { return TimestampMillisecondBuilder; } - public visitTimestampMicrosecond () { return TimestampMicrosecondBuilder; } - public visitTimestampNanosecond () { return TimestampNanosecondBuilder; } - public visitTime () { return TimeBuilder; } - public visitTimeSecond () { return TimeSecondBuilder; } - public visitTimeMillisecond () { return TimeMillisecondBuilder; } - public visitTimeMicrosecond () { return TimeMicrosecondBuilder; } - public visitTimeNanosecond () { return TimeNanosecondBuilder; } - public visitDecimal () { return DecimalBuilder; } - public visitList () { return ListBuilder; } - public visitStruct () { return StructBuilder; } - public visitUnion () { return UnionBuilder; } - public visitDenseUnion () { return DenseUnionBuilder; } - public visitSparseUnion () { return SparseUnionBuilder; } - public visitDictionary () { return DictionaryBuilder; } - public visitInterval () { return IntervalBuilder; } - public visitIntervalDayTime () { return IntervalDayTimeBuilder; } - public visitIntervalYearMonth () { return IntervalYearMonthBuilder; } - public visitFixedSizeList () { return FixedSizeListBuilder; } - public visitMap () { return MapBuilder; } + public visitNull() { return NullBuilder; } + public visitBool() { return BoolBuilder; } + public visitInt() { return IntBuilder; } + public visitInt8() { return Int8Builder; } + public visitInt16() { return Int16Builder; } + public visitInt32() { return Int32Builder; } + public visitInt64() { return Int64Builder; } + public visitUint8() { return Uint8Builder; } + public visitUint16() { return Uint16Builder; } + public visitUint32() { return Uint32Builder; } + public visitUint64() { return Uint64Builder; } + public visitFloat() { return FloatBuilder; } + public visitFloat16() { return Float16Builder; } + public visitFloat32() { return Float32Builder; } + public visitFloat64() { return Float64Builder; } + public visitUtf8() { return Utf8Builder; } + public visitBinary() { return BinaryBuilder; } + public visitFixedSizeBinary() { return FixedSizeBinaryBuilder; } + public visitDate() { return DateBuilder; } + public visitDateDay() { return DateDayBuilder; } + public visitDateMillisecond() { return DateMillisecondBuilder; } + public visitTimestamp() { return TimestampBuilder; } + public visitTimestampSecond() { return TimestampSecondBuilder; } + public visitTimestampMillisecond() { return TimestampMillisecondBuilder; } + public visitTimestampMicrosecond() { return TimestampMicrosecondBuilder; } + public visitTimestampNanosecond() { return TimestampNanosecondBuilder; } + public visitTime() { return TimeBuilder; } + public visitTimeSecond() { return TimeSecondBuilder; } + public visitTimeMillisecond() { return TimeMillisecondBuilder; } + public visitTimeMicrosecond() { return TimeMicrosecondBuilder; } + public visitTimeNanosecond() { return TimeNanosecondBuilder; } + public visitDecimal() { return DecimalBuilder; } + public visitList() { return ListBuilder; } + public visitStruct() { return StructBuilder; } + public visitUnion() { return UnionBuilder; } + public visitDenseUnion() { return DenseUnionBuilder; } + public visitSparseUnion() { return SparseUnionBuilder; } + public visitDictionary() { return DictionaryBuilder; } + public visitInterval() { return IntervalBuilder; } + public visitIntervalDayTime() { return IntervalDayTimeBuilder; } + public visitIntervalYearMonth() { return IntervalYearMonthBuilder; } + public visitFixedSizeList() { return FixedSizeListBuilder; } + public visitMap() { return MapBuilder; } } /** @ignore */ diff --git a/js/src/visitor/bytelength.ts b/js/src/visitor/bytelength.ts new file mode 100644 index 0000000000000..103556e608a2a --- /dev/null +++ b/js/src/visitor/bytelength.ts @@ -0,0 +1,136 @@ +/* istanbul ignore file */ + +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +/* eslint-disable unicorn/no-array-callback-reference */ + +import { Data } from '../data.js'; +import { Visitor } from '../visitor.js'; +import { TypeToDataType } from '../interfaces.js'; +import { Type, TimeUnit, UnionMode } from '../enum.js'; +import { + DataType, Dictionary, + Float, Int, Date_, Interval, Time, Timestamp, + Bool, Null, Utf8, Binary, Decimal, FixedSizeBinary, + List, FixedSizeList, Map_, Struct, Union, DenseUnion, SparseUnion, +} from '../type.js'; + +/** @ignore */ const sum = (x: number, y: number) => x + y; + +/** @ignore */ +export interface GetByteLengthVisitor extends Visitor { + visit(node: Data, index: number): number; + visitMany(nodes: Data[], index: number[]): number[]; + getVisitFn(node: Data | T): (data: Data, index: number) => number; + getVisitFn(node: T): (data: Data>, index: number) => number; + visitBinary(data: Data, index: number): number; + visitUtf8(data: Data, index: number): number; + visitList(data: Data, index: number): number; + visitDenseUnion(data: Data, index: number): number; + visitSparseUnion(data: Data, index: number): number; + visitFixedSizeList(data: Data, index: number): number; +} + +/** @ignore */ +export class GetByteLengthVisitor extends Visitor { + public visitNull(____: Data, _: number) { return 0; } + public visitInt(data: Data, _: number) { return data.type.bitWidth / 8; } + public visitFloat(data: Data, _: number) { return data.type.ArrayType.BYTES_PER_ELEMENT; } + public visitBool(____: Data, _: number) { return 1 / 8; } + public visitDecimal(____: Data, _: number) { return 16; } + public visitDate(data: Data, _: number) { return (data.type.unit + 1) * 4; } + public visitTime(data: Data