From d9c164bbdf33402693903496ea3a7c21b920059e Mon Sep 17 00:00:00 2001 From: Xun Li Date: Mon, 18 Sep 2023 09:51:34 -0700 Subject: [PATCH] [Feat] Support WKB geometry column in CSV (#2312) --- .../src/side-panel/add-by-dataset-button.tsx | 3 +- src/layers/package.json | 5 +-- src/layers/src/geojson-layer/geojson-utils.ts | 19 ++++++++++-- src/processors/src/data-processor.ts | 2 +- src/utils/src/dataset-utils.ts | 29 ++++++++++++++++- test/node/utils/data-processor-test.js | 31 ++++++++++++++++--- test/node/utils/dataset-utils-test.js | 27 ++++++++++++++++ tsconfig.json | 2 +- yarn.lock | 28 ++++++++++++++++- 9 files changed, 130 insertions(+), 16 deletions(-) diff --git a/src/components/src/side-panel/add-by-dataset-button.tsx b/src/components/src/side-panel/add-by-dataset-button.tsx index 9bc09b1c4a..bf339861d3 100644 --- a/src/components/src/side-panel/add-by-dataset-button.tsx +++ b/src/components/src/side-panel/add-by-dataset-button.tsx @@ -25,8 +25,7 @@ import {Datasets} from '@kepler.gl/table'; import Tippy from '@tippyjs/react'; import {Add} from '../common/icons'; -import {Button} from '../common/styled-components'; -import {DatasetSquare} from '../..'; +import {Button, DatasetSquare} from '../common/styled-components'; import Typeahead from '../common/item-selector/typeahead'; import Accessor from '../common/item-selector/accessor'; import {useIntl} from 'react-intl'; diff --git a/src/layers/package.json b/src/layers/package.json index 48ac2a9630..d399ad5ef6 100644 --- a/src/layers/package.json +++ b/src/layers/package.json @@ -43,7 +43,9 @@ "@kepler.gl/types": "3.0.0-alpha.0", "@kepler.gl/utils": "3.0.0-alpha.0", "@loaders.gl/core": "^3.0.9", + "@loaders.gl/gis": "^3.0.9", "@loaders.gl/gltf": "^3.0.9", + "@loaders.gl/wkt": "^3.0.9", "@luma.gl/constants": "^8.5.10", "@mapbox/geojson-normalize": "0.0.1", "@nebula.gl/layers": "1.0.2-alpha.1", @@ -67,8 +69,7 @@ "s2-geometry": "^1.2.10", "styled-components": "^4.1.3", "type-analyzer": "0.4.0", - "viewport-mercator-project": "^6.0.0", - "wellknown": "^0.5.0" + "viewport-mercator-project": "^6.0.0" }, "nyc": { "sourceMap": false, diff --git a/src/layers/src/geojson-layer/geojson-utils.ts b/src/layers/src/geojson-layer/geojson-utils.ts index fcecb50156..3a4ec3ba32 100644 --- a/src/layers/src/geojson-layer/geojson-utils.ts +++ b/src/layers/src/geojson-layer/geojson-utils.ts @@ -18,9 +18,11 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. -import wktParser from 'wellknown'; import normalize from '@mapbox/geojson-normalize'; import bbox from '@turf/bbox'; +import {parseSync} from '@loaders.gl/core'; +import {WKBLoader, WKTLoader} from '@loaders.gl/wkt'; +import {binaryToGeometry} from '@loaders.gl/gis'; import {Feature, BBox} from 'geojson'; import {getSampleData} from '@kepler.gl/utils'; @@ -129,10 +131,21 @@ export function parseGeometryFromString(geoString: string): Feature | null { // keep trying to parse } - // try parse as wkt + // try parse as wkt using loaders.gl WKTLoader if (!parsedGeo) { try { - parsedGeo = wktParser(geoString); + parsedGeo = parseSync(geoString, WKTLoader); + } catch (e) { + return null; + } + } + + // try parse as wkb using loaders.gl WKBLoader + if (!parsedGeo) { + try { + const buffer = Buffer.from(geoString, 'hex'); + const binaryGeo = parseSync(buffer, WKBLoader); + parsedGeo = binaryToGeometry(binaryGeo); } catch (e) { return null; } diff --git a/src/processors/src/data-processor.ts b/src/processors/src/data-processor.ts index 3348834123..e5d7609c33 100644 --- a/src/processors/src/data-processor.ts +++ b/src/processors/src/data-processor.ts @@ -105,7 +105,7 @@ export const PARSE_FIELD_VALUE_FROM_STRING = { * options: {centerMap: true, readOnly: true} * })); */ -export function processCsvData(rawData: unknown[][], header?: string[]): ProcessorResult { +export function processCsvData(rawData: unknown[][] | string, header?: string[]): ProcessorResult { let rows: unknown[][] | undefined; let headerRow: string[] | undefined; diff --git a/src/utils/src/dataset-utils.ts b/src/utils/src/dataset-utils.ts index 596ac32d7c..53e8d94810 100644 --- a/src/utils/src/dataset-utils.ts +++ b/src/utils/src/dataset-utils.ts @@ -388,6 +388,27 @@ export function getSampleForTypeAnalyze({ return sample; } +/** + * Check if string is a valid Well-known binary (WKB) in HEX format + * https://en.wikipedia.org/wiki/Well-known_text_representation_of_geometry + * + * @param str input string + * @returns true if string is a valid WKB in HEX format + */ +export function isHexWkb(str: string | null): boolean { + if (!str) return false; + // check if the length of the string is even and is at least 10 characters long + if (str.length < 10 || str.length % 2 !== 0) { + return false; + } + // check if first two characters are 00 or 01 + if (!str.startsWith('00') && !str.startsWith('01')) { + return false; + } + // check if the rest of the string is a valid hex + return /^[0-9a-fA-F]+$/.test(str.slice(2)); +} + /** * Analyze field types from data in `string` format, e.g. uploaded csv. * Assign `type`, `fieldIdx` and `format` (timestamp only) to each field @@ -446,7 +467,13 @@ export function getFieldsFromData(data: RowData, fieldOrder: string[]): Field[] const name = fieldByIndex[index]; const fieldMeta = metadata.find(m => m.key === field); - const {type, format} = fieldMeta || {}; + let type = fieldMeta.type; + const format = fieldMeta.format; + + // check if string is hex wkb + if (type === AnalyzerDATA_TYPES.STRING) { + type = data.some(d => isHexWkb(d[name])) ? AnalyzerDATA_TYPES.GEOMETRY : type; + } return { name, diff --git a/test/node/utils/data-processor-test.js b/test/node/utils/data-processor-test.js index 0c843910a0..86ac98247d 100644 --- a/test/node/utils/data-processor-test.js +++ b/test/node/utils/data-processor-test.js @@ -72,7 +72,10 @@ test('Processor -> getFieldsFromData', t => { value: '4', surge: '1.2', isTrip: 'true', - zeroOnes: '0' + zeroOnes: '0', + geojson: '{"type":"Point","coordinates":[-122.4194155,37.7749295]}', + wkt: 'POINT (-122.4194155 37.7749295)', + wkb: '0101000020E6100000E17A14AE47D25EC0F6F3F6F2F7F94040' }, { time: '2016-09-17 00:30:08', @@ -81,7 +84,12 @@ test('Processor -> getFieldsFromData', t => { value: '3', surge: null, isTrip: 'false', - zeroOnes: '1' + zeroOnes: '1', + geojson: + '{"type":"Polygon","coordinates":[[[-122.4194155,37.7749295],[-122.4194155,37.7749295],[-122.4194155,37.7749295]]]}', + wkt: 'POLYGON ((-122.4194155 37.7749295, -122.4194155 37.7749295, -122.4194155 37.7749295))', + wkb: + '0103000020E61000000100000005000000E17A14AE47D25EC0F6F3F6F2F7F940400000000E17A14AE47D25EC0F6F3F6F2F7F940400000000E17A14AE47D25EC0F6F3F6F2F7F94040' }, { time: null, @@ -90,7 +98,12 @@ test('Processor -> getFieldsFromData', t => { value: '2', surge: '1.3', isTrip: null, - zeroOnes: '1' + zeroOnes: '1', + geojson: + '{"type":"LineString","coordinates":[[-122.4194155,37.7749295],[-122.4194155,37.7749295]]}', + wkt: 'LINESTRING (-122.4194155 37.7749295, -122.4194155 37.7749295)', + wkb: + '0102000020E610000002000000E17A14AE47D25EC0F6F3F6F2F7F94040E17A14AE47D25EC0F6F3F6F2F7F94040' }, { time: null, @@ -99,7 +112,12 @@ test('Processor -> getFieldsFromData', t => { value: '0', surge: '1.4', isTrip: null, - zeroOnes: '0' + zeroOnes: '0', + geojson: + '{"type":"MultiPoint","coordinates":[[-122.4194155,37.7749295],[-122.4194155,37.7749295]]}', + wkt: 'MULTIPOINT (-122.4194155 37.7749295, -122.4194155 37.7749295)', + wkb: + '0104000020E6100000020000000101000000E17A14AE47D25EC0F6F3F6F2F7F94040101000000E17A14AE47D25EC0F6F3F6F2F7F94040' } ]; @@ -112,7 +130,10 @@ test('Processor -> getFieldsFromData', t => { 'integer', 'real', 'boolean', - 'integer' + 'integer', + 'geojson', + 'geojson', + 'geojson' ]; fields.forEach((f, i) => diff --git a/test/node/utils/dataset-utils-test.js b/test/node/utils/dataset-utils-test.js index d1ba17836d..15278e191c 100644 --- a/test/node/utils/dataset-utils-test.js +++ b/test/node/utils/dataset-utils-test.js @@ -79,3 +79,30 @@ test('datasetUtils.findDefaultColorField', t => { } t.end(); }); + +test('datasetUtils.isHexWkb', t => { + t.notOk(isHexWkb(''), 'empty string is not a valid hex wkb'); + + t.notOk(isHexWkb(null), 'null is not a valid hex wkb'); + + const countyFIPS = '06075'; + t.notOk(isHexWkb(countyFIPS), 'FIPS code should not be a valid hex wkb'); + + const h3Code = '8a2a1072b59ffff'; + t.notOk(isHexWkb(h3Code), 'H3 code should not be a valid hex wkb'); + + const randomHexStr = '8a2a1072b59ffff'; + t.notOk(isHexWkb(randomHexStr), 'A random hex string should not be a valid hex wkb'); + + const validWkt = '0101000000000000000000f03f0000000000000040'; + t.ok(isHexWkb(validWkt), 'A valid hex wkb should be valid'); + + const validEWkt = '0101000020e6100000000000000000f03f0000000000000040'; + t.ok(isHexWkb(validEWkt), 'A valid hex ewkb should be valid'); + + const validWktNDR = '00000000013ff0000000000000400000000000000040'; + t.ok(isHexWkb(validWktNDR), 'A valid hex wkb in NDR should be valid'); + + const validEWktNDR = '0020000001000013ff0000000000400000000000000040'; + t.ok(isHexWkb(validEWktNDR), 'A valid hex ewkb in NDR should be valid'); +}); diff --git a/tsconfig.json b/tsconfig.json index deb467a201..91a7b87c47 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -21,7 +21,7 @@ "strict": true, "resolveJsonModule": true, "isolatedModules": true, - "baseUrl": "./src", + "baseUrl": ".", "paths": { "*": ["*"], // Map all modules to their source diff --git a/yarn.lock b/yarn.lock index 054119565c..be5a53ea47 100644 --- a/yarn.lock +++ b/yarn.lock @@ -1622,6 +1622,17 @@ "@math.gl/polygon" "^3.5.1" pbf "^3.2.1" +"@loaders.gl/gis@^3.0.9": + version "3.4.14" + resolved "https://registry.yarnpkg.com/@loaders.gl/gis/-/gis-3.4.14.tgz#a9b3eed45e2a4465a754e3404061222c51b1334a" + integrity sha512-5cmhIwioPpSkfNzFRM3PbFDecjpYIhtEOFbryu3rE37npKHLTD2tF4ocQxUPB+QVED6GLwWBdzJIs64UWGrqjw== + dependencies: + "@loaders.gl/loader-utils" "3.4.14" + "@loaders.gl/schema" "3.4.14" + "@mapbox/vector-tile" "^1.3.1" + "@math.gl/polygon" "^3.5.1" + pbf "^3.2.1" + "@loaders.gl/gltf@3.2.7", "@loaders.gl/gltf@^3.0.9", "@loaders.gl/gltf@^3.2.5": version "3.0.9" resolved "https://registry.yarnpkg.com/@loaders.gl/gltf/-/gltf-3.0.9.tgz#b9a036080e39bec59f065bf2e3699ef49d75a9cd" @@ -1655,7 +1666,7 @@ "@loaders.gl/loader-utils" "3.0.9" "@loaders.gl/schema" "3.0.9" -"@loaders.gl/loader-utils@3.0.9", "@loaders.gl/loader-utils@3.2.7", "@loaders.gl/loader-utils@^2.1.3", "@loaders.gl/loader-utils@^3.0.9", "@loaders.gl/loader-utils@^3.2.5": +"@loaders.gl/loader-utils@3.0.9", "@loaders.gl/loader-utils@3.2.7", "@loaders.gl/loader-utils@3.4.14", "@loaders.gl/loader-utils@^2.1.3", "@loaders.gl/loader-utils@^3.0.9", "@loaders.gl/loader-utils@^3.2.5": version "3.0.9" resolved "https://registry.yarnpkg.com/@loaders.gl/loader-utils/-/loader-utils-3.0.9.tgz#3a0e574510bf89d77fa5c3d3508d41f9c8778450" integrity sha512-DLQWYklEDcKWb6LGtzNUQqlDaHXUVAB/uA5a7bzyXbSswoC6jqwSOxSdJ42UsSHhvHRj7l6HOtNUICJ7q+IMiQ== @@ -1715,6 +1726,13 @@ "@types/geojson" "^7946.0.7" apache-arrow "^4.0.0" +"@loaders.gl/schema@3.4.14": + version "3.4.14" + resolved "https://registry.yarnpkg.com/@loaders.gl/schema/-/schema-3.4.14.tgz#6f145065a2abaf402aa419cfa25ec7f1fdeed487" + integrity sha512-r6BEDfUvbvzgUnh/MtkR5RzrkIwo1x1jtPFRTSJVsIZO7arXXlu3blffuv5ppEkKpNZ1Xzd9WtHp/JIkuctsmw== + dependencies: + "@types/geojson" "^7946.0.7" + "@loaders.gl/shapefile@^3.0.9": version "3.0.9" resolved "https://registry.yarnpkg.com/@loaders.gl/shapefile/-/shapefile-3.0.9.tgz#7a9ad7040d6d574d2eca019b1c153708c73dc049" @@ -1756,6 +1774,14 @@ "@loaders.gl/loader-utils" "^2.1.3" gifshot "^0.4.5" +"@loaders.gl/wkt@^3.0.9": + version "3.4.14" + resolved "https://registry.yarnpkg.com/@loaders.gl/wkt/-/wkt-3.4.14.tgz#1d3b474cf330e14bdd39e2cd829adf8ee27f11b1" + integrity sha512-2Epq+2P7uRx3BwAhmx7MIeaX5rQv/ooYdVh3q3bs2M/xKQ6yPXhx+He+3f8oWxWmWEjL1DnRrfkiGms2vet+cA== + dependencies: + "@loaders.gl/loader-utils" "3.4.14" + "@loaders.gl/schema" "3.4.14" + "@loaders.gl/worker-utils@3.0.9": version "3.0.9" resolved "https://registry.yarnpkg.com/@loaders.gl/worker-utils/-/worker-utils-3.0.9.tgz#7c8f0d259f1b6ed0ba3d65540a116ec149b053d8"