Skip to content

Commit

Permalink
init impl
Browse files Browse the repository at this point in the history
  • Loading branch information
lixun910 committed Sep 12, 2023
1 parent 13bce42 commit edb3227
Show file tree
Hide file tree
Showing 10 changed files with 348 additions and 33 deletions.
5 changes: 4 additions & 1 deletion src/constants/src/default-settings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -496,6 +496,8 @@ const GREEN2 = '74, 165, 150';
const RED = '237, 88, 106';
const ORANGE = '231, 110, 58';

export const ARROW_GEO_METADATA_KEY = 'geo';

export const FIELD_TYPE_DISPLAY = {
[ALL_FIELD_TYPES.boolean]: {
label: 'bool',
Expand Down Expand Up @@ -1107,7 +1109,8 @@ export const DATASET_FORMATS = keyMirror({
row: null,
geojson: null,
csv: null,
keplergl: null
keplergl: null,
arrow: null
});

export const MAP_CONTROLS = keyMirror({
Expand Down
2 changes: 1 addition & 1 deletion src/layers/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@
"@loaders.gl/wkt": "^3.0.9",
"@luma.gl/constants": "^8.5.10",
"@mapbox/geojson-normalize": "0.0.1",
"@nebula.gl/layers": "1.0.2-alpha.1",
"@nebula.gl/edit-modes": "1.0.2-alpha.1",
"@nebula.gl/layers": "1.0.2-alpha.1",
"@turf/bbox": "^6.0.1",
"@turf/helpers": "^6.1.4",
"@types/geojson": "^7946.0.7",
Expand Down
116 changes: 106 additions & 10 deletions src/layers/src/geojson-layer/geojson-utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,15 @@
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.

import {ListVector} from 'apache-arrow';
import wktParser from 'wellknown';
import normalize from '@mapbox/geojson-normalize';
import bbox from '@turf/bbox';
import {parseSync} from '@loaders.gl/core';
import {WKBLoader} from '@loaders.gl/wkt';
import {binaryToGeometry} from '@loaders.gl/gis';

import {Feature, BBox} from 'geojson';
import {Feature, BBox, MultiPolygon, Position, Polygon} from 'geojson';
import {getSampleData} from '@kepler.gl/utils';

export type GetFeature = (d: any) => Feature;
Expand All @@ -48,16 +49,21 @@ type FeatureTypeMap = {
/* eslint-enable */

export function parseGeoJsonRawFeature(rawFeature: unknown): Feature | null {
if (typeof rawFeature === 'object') {
// Support GeoJson feature as object
// probably need to normalize it as well
const normalized = normalize(rawFeature);
if (!normalized || !Array.isArray(normalized.features)) {
// fail to normalize GeoJson
return null;
}
if (rawFeature && typeof rawFeature === 'object') {
if (typeof rawFeature['type'] === 'string') {
// Support GeoJson feature as object
// probably need to normalize it as well
const normalized = normalize(rawFeature);
if (!normalized || !Array.isArray(normalized.features)) {
// fail to normalize GeoJson
return null;
}

return normalized.features[0];
return normalized.features[0];
} else if (rawFeature['encoding'].startsWith('geoarrow')) {
// Support GeoArrow data
return parseGeometryFromArrow(rawFeature);
}
} else if (typeof rawFeature === 'string') {
return parseGeometryFromString(rawFeature);
} else if (Array.isArray(rawFeature)) {
Expand Down Expand Up @@ -215,3 +221,93 @@ export function getGeojsonFeatureTypes(allFeatures: GeojsonDataMaps): FeatureTyp

return featureTypes;
}

/**
* parse geometry from arrow data that is returned from processArrowData()
*
* @param rawData the raw geometry data returned from processArrowData, which is an object with two properties: encoding and data
* @see processArrowData
* @returns
*/
export function parseGeometryFromArrow(rawData: object): Feature | null {
const encoding = rawData['encoding'];
const data = rawData['data'];
if (!encoding || !data) return null;

switch (encoding) {
case 'geoarrow.multipolygon': {
// convert to geojson MultiPolygon
const arrowMultiPolygon: ListVector = data;
const multiPolygon: Position[][][] = [];
for (let m = 0; m < arrowMultiPolygon.length; m++) {
const arrowPolygon = arrowMultiPolygon.get(m);
const polygon: Position[][] = [];
for (let i = 0; arrowPolygon && i < arrowPolygon?.length; i++) {
const arrowRing = arrowPolygon?.get(i);
const ring: Position[] = [];
for (let j = 0; arrowRing && j < arrowRing.length; j++) {
const arrowCoord = arrowRing.get(j);
const coord: Position = Array.from(arrowCoord);
ring.push(coord);
}
polygon.push(ring);
}
multiPolygon.push(polygon);
}
const geometry: MultiPolygon = {
type: 'MultiPolygon',
coordinates: multiPolygon
};
return {
type: 'Feature',
geometry,
properties: {}
};
}
case 'geoarrow.polygon': {
// convert to geojson Polygon
const arrowPolygon: ListVector = data;
const polygon: Position[][] = [];
for (let i = 0; arrowPolygon && i < arrowPolygon.length; i++) {
const arrowRing = arrowPolygon.get(i);
const ring: Position[] = [];
for (let j = 0; arrowRing && j < arrowRing.length; j++) {
const arrowCoord = arrowRing.get(j);
const coords: Position = Array.from(arrowCoord);
ring.push(coords);
}
}
const geometry: Polygon = {
type: 'Polygon',
coordinates: polygon
};
return {
type: 'Feature',
geometry,
properties: {}
};
}
case 'geoarrow.multipoint':
// convert to geojson MultiPoint
break;
case 'geoarrow.point':
// convert to geojson Point
break;
case 'geoarrow.multilinestring':
// convert to geojson MultiLineString
break;
case 'geoarrow.linestring':
// convert to geojson LineString
break;
case 'geoarrow.wkb':
// convert to wkb
break;
case 'geoarrow.wkt':
// convert to wkt
break;
default:
// encoding is not supported, skip
console.error('GeoArrow encoding not supported');
}
return null;
}
1 change: 1 addition & 0 deletions src/processors/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
"@kepler.gl/schemas": "3.0.0-alpha.0",
"@kepler.gl/types": "3.0.0-alpha.0",
"@kepler.gl/utils": "3.0.0-alpha.0",
"@loaders.gl/arrow": "^3.0.9",
"@loaders.gl/core": "^3.0.9",
"@loaders.gl/csv": "^3.0.9",
"@loaders.gl/json": "^3.0.9",
Expand Down
97 changes: 89 additions & 8 deletions src/processors/src/data-processor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,21 +18,30 @@
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.

import {Table as ApacheArrowTable, Field as ArrowField, ListVector} from 'apache-arrow';
import {csvParseRows} from 'd3-dsv';
import {DATA_TYPES as AnalyzerDATA_TYPES} from 'type-analyzer';
import normalize from '@mapbox/geojson-normalize';
import {ALL_FIELD_TYPES, DATASET_FORMATS, GUIDES_FILE_FORMAT_DOC} from '@kepler.gl/constants';
import {ALL_FIELD_TYPES, DATASET_FORMATS, GUIDES_FILE_FORMAT_DOC, ARROW_GEO_METADATA_KEY} from '@kepler.gl/constants';
import {ProcessorResult, Field} from '@kepler.gl/types';
import {
arrowDataTypeToAnalyzerDataType,
arrowDataTypeToFieldType,
notNullorUndefined,
hasOwnProperty,
isPlainObject,
analyzerTypeToFieldType,
getSampleForTypeAnalyze,
getFieldsFromData,
toArray
toArray,
DataContainerInterface
} from '@kepler.gl/utils';
import {KeplerGlSchema, ParsedDataset, SavedMap, LoadedMap} from '@kepler.gl/schemas';
import {Feature} from '@nebula.gl/edit-modes';
import {ArrowLoader} from '@loaders.gl/arrow';
import { load } from '@loaders.gl/core';

import {ProcessFileDataContent} from './file-handler';

// if any of these value occurs in csv, parse it to null;
// const CSV_NULLS = ['', 'null', 'NULL', 'Null', 'NaN', '/N'];
Expand Down Expand Up @@ -388,12 +397,84 @@ export function processKeplerglDataset(
return Array.isArray(rawData) ? results : results[0];
}

export const DATASET_HANDLERS: {
row: typeof processRowObject;
geojson: typeof processGeojson;
csv: typeof processCsvData;
keplergl: typeof processKeplerglDataset;
} = {
export function processArrowColumnarData(content: ProcessFileDataContent): ProcessorResult | null {
const { progress, metadata, fileName, length, data, ...columnarData } = content;
const table = ApacheArrowTable.new(columnarData);
const result = processArrowTable(table);
return result;
}

/**
* Parse a arrow table with geometry columns and return a dataset
*
* @param arrowTable the arrow table to parse
* @returns dataset containing `fields` and `rows` or null
*/
export function processArrowTable(arrowTable: ApacheArrowTable): ProcessorResult | null {
if (!arrowTable) {
return null;
}
const metadata = arrowTable.schema.metadata;
// get geometry columns if metadata has key 'geo'
let geometryColumns = [];
if (metadata.get(ARROW_GEO_METADATA_KEY) !== undefined) {
// load geo metadata
// parse metadata string to JSON object
const geoMeta = JSON.parse(metadata.get(ARROW_GEO_METADATA_KEY) || '');
// check if schema_version in geoMeta equals to '0.1.0'
const SCHEMA_VERSION = '0.1.0';
if (geoMeta.schema_version !== SCHEMA_VERSION) {
console.error('Schema version not supported');
return null;
}
// get all geometry columns
geometryColumns = geoMeta.columns;
}

const fields: Field[] = [];

// parse fields and convert columnar to row format table
const rowFormatTable: any[][] = [];
const columnarTable: {[name: string]: ListVector[]} = {};
arrowTable.schema.fields.forEach((field: ArrowField, index: number) => {
const arrowColumn = arrowTable.getColumn(field.name);
const values = arrowColumn.toArray();
columnarTable[field.name] = values;
fields.push({
name: field.name,
id: field.name,
displayName: field.name,
format: '',
fieldIdx: index,
type: geometryColumns[field.name] ? ALL_FIELD_TYPES.geojson : arrowDataTypeToFieldType(field.type),
analyzerType: geometryColumns[field.name] ? AnalyzerDATA_TYPES.GEOMETRY : arrowDataTypeToAnalyzerDataType(field.type),
valueAccessor: (dc: any) => d => {
return dc.valueAt(d.index, index);
}
});
});

const tableRowsCount = arrowTable.length;
const tableKeys = Object.keys(columnarTable);
for (let index = 0; index < tableRowsCount; index++) {
const tableItem: unknown[] = [];
for (let keyIndex = 0; keyIndex < tableKeys.length; keyIndex++) {
const fieldName = tableKeys[keyIndex];
const cellValue = columnarTable[fieldName][index];
tableItem.push(
geometryColumns[fieldName] ?
{
encoding: geometryColumns[fieldName].encoding,
data: cellValue
} : cellValue);
}
rowFormatTable.push(tableItem);
}

return {fields, rows: rowFormatTable};
}

export const DATASET_HANDLERS = {
[DATASET_FORMATS.row]: processRowObject,
[DATASET_FORMATS.geojson]: processGeojson,
[DATASET_FORMATS.csv]: processCsvData,
Expand Down
Loading

0 comments on commit edb3227

Please sign in to comment.