Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
underbluewaters committed Oct 25, 2024
1 parent c03e888 commit 94f983d
Show file tree
Hide file tree
Showing 6 changed files with 7,688 additions and 7,189 deletions.
194 changes: 194 additions & 0 deletions packages/h3-filter-ingest/add-geohashes.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
// index.ts
import * as DuckDB from "duckdb";
// @ts-ignore
import * as tileCover from "@mapbox/tile-cover";
import * as h3 from "h3-js";
// @ts-ignore
import yargs from "yargs";
import * as cliProgress from "cli-progress";
import { stops, Stop } from "./src/stops";
const tilebelt = require("@mapbox/tilebelt");

const BATCH_SIZE = 5000; // Define the batch size for processing
const zoomLevel = 7;
const limits = { min_zoom: zoomLevel, max_zoom: zoomLevel };

// Define the CLI options
const argv = yargs(process.argv.slice(2))
.option("db", {
alias: "d",
description: "Path to the DuckDB database file",
type: "string",
demandOption: true,
})
.help()
.alias("help", "h").argv;

const dbPath: string = argv.db;

// Connect to DuckDB
const db = new DuckDB.Database(dbPath);
const connection = db.connect();

interface GeohashEntry {
geohash: string;
id: string;
resolution: number;
zoom: number;
}

// Function to process a batch of rows
async function processBatch(
stop: Stop,
offset: number
): Promise<number | false> {
try {
// Query to extract a batch of geometries and H3 ids
const result: { id: string; geojson: string }[] = await all(
`
with ids as (
SELECT
distinct(h3_h3_to_string(r${stop.h3Resolution}_id)) as id
FROM
cells
order by id LIMIT ${BATCH_SIZE} OFFSET ${offset}
)
select
id,
st_asgeojson(h3_id_to_simple_polygon(id)) as geojson
from ids`
);

if (result.length === 0) {
return false; // No more rows to process
}

const valuesStatements: string[] = [];
// Process each geometry in the batch
for (const row of result) {
const geohashes: string[] = tileCover.indexes(
JSON.parse(row.geojson),
limits
);

// console.log("\n");

// console.log(JSON.stringify(JSON.parse(row.geojson), null, 2));
// console.log(geohashes);
// console.log(
// geohashes.map((hash) =>
// JSON.stringify(tilebelt.tileToGeoJSON(tilebelt.quadkeyToTile(hash)))
// )
// );

// process.exit();

geohashes.forEach((geohash) => {
valuesStatements.push(
`(${stop.h3Resolution}, h3_string_to_h3('${row.id}'), '${geohash}', ${stop.zoomLevel})`
);
});
}

await run(
`insert into geohashes (resolution, id, geohash, zoom) values ${valuesStatements.join(
", "
)}`,
[]
);
return result.length;
} catch (err) {
console.error("Error processing batch:", err);
return false;
}
}

// Function to incrementally process all rows
async function processAllRows(stop: Stop): Promise<void> {
try {
// Get the total count of rows to process
const countResult: { count: number }[] = await all(
`SELECT count(distinct(h3_h3_to_string(r${stop.h3Resolution}_id)))::int AS count FROM cells`
);
const totalRows = countResult[0].count;

// Initialize the progress bar
const progressBar = new cliProgress.SingleBar(
{},
cliProgress.Presets.shades_classic
);
progressBar.start(totalRows, 0);

let offset = 0;
let moreRows = true;

await run("begin transaction");
// Process batches until there are no more rows
while (moreRows) {
const processedCount = await processBatch(stop, offset);
if (processedCount === false) {
moreRows = false;
} else {
offset += BATCH_SIZE; // Move to the next batch
progressBar.increment(processedCount);
}
}
await run("commit");

progressBar.stop();
console.log("Processing complete.");
} catch (err) {
console.error("Error processing all rows:", err);
}
}

async function prepare() {
await run(`load h3`);
await run(`load spatial`);
await run(`drop table if exists geohashes`);
// Create the target table if it doesn't exist
await run(`
DROP TABLE IF EXISTS geohashes;
CREATE TABLE if not exists geohashes (
geohash varchar not null,
id uint64 not null,
resolution int not null,
zoom int not null
)
`);
}

(async () => {
await prepare();
for (const stop of stops.reverse()) {
await processAllRows(stop);
}
connection.close();
process.exit();
})();

function all<T>(query: string, values: any[] = []): Promise<T[]> {
return new Promise((resolve, reject) => {
connection.all(query, ...values, (err: any, data: any) => {
if (err) {
reject(err);
} else if (data) {
resolve(data as T[]);
} else {
reject(new Error("No data returned from query"));
}
});
});
}

function run(query: string, values: any[] = []): Promise<void> {
return new Promise((resolve, reject) => {
connection.run(query, ...values, (err: any, data: any) => {
if (err) {
reject(err);
} else {
resolve();
}
});
});
}
17 changes: 7 additions & 10 deletions packages/h3-filter-ingest/build-cell-pmtiles.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@ import { execSync } from "node:child_process";
const MIN_ZOOM = 0;

const usage = `
npx ts-node build-cell-pmtiles.ts <path-to-cells.csv> <path-to-output.pmtiles>
`;
npx ts-node build-cell-pmtiles.ts <path-to-cells.csv>`;

const filePath = process.argv[2];
if (!filePath) {
Expand All @@ -19,13 +18,6 @@ if (!filePath) {
process.exit(1);
}

const outputPath = process.argv[3];
if (!outputPath) {
console.error("Missing path to output pmtiles");
console.error(usage);
process.exit(1);
}

const MIN_RESOLUTION = 6;

(async () => {
Expand Down Expand Up @@ -61,6 +53,7 @@ const MIN_RESOLUTION = 6;
gdal.wkbPolygon
);
layer.fields.add(new gdal.FieldDefn("id", gdal.OFTString));
layer.fields.add(new gdal.FieldDefn("resolution", gdal.OFTReal));
layer.fields.add(new gdal.FieldDefn("r0_id", gdal.OFTString));
layer.fields.add(new gdal.FieldDefn("r1_id", gdal.OFTString));
layer.fields.add(new gdal.FieldDefn("r2_id", gdal.OFTString));
Expand All @@ -72,6 +65,7 @@ const MIN_RESOLUTION = 6;
layer.fields.add(new gdal.FieldDefn("r8_id", gdal.OFTString));
layer.fields.add(new gdal.FieldDefn("r9_id", gdal.OFTString));
layer.fields.add(new gdal.FieldDefn("r10_id", gdal.OFTString));
layer.fields.add(new gdal.FieldDefn("r11_id", gdal.OFTString));
const progressBar = new cliProgress.SingleBar(
{
format: `cells-${stop.h3Resolution}.fgb | {bar} | {percentage}% | {eta}s || {value}/{total} cells processed`,
Expand All @@ -92,9 +86,12 @@ const MIN_RESOLUTION = 6;
})
);
feature.fields.set("id", id);
for (const r of [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) {
feature.fields.set("resolution", stop.h3Resolution);
for (const r of [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]) {
if (r < stop.h3Resolution) {
feature.fields.set(`r${r}_id`, h3.cellToParent(id, r));
} else if (r === stop.h3Resolution) {
feature.fields.set(`r${r}_id`, id);
}
}
parents.add(parent_id);
Expand Down
27 changes: 27 additions & 0 deletions packages/h3-filter-ingest/build-db.ts
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,35 @@ if (engine === "duckdb") {
To do so, run the following from the duckdb cli:
$ duckdb ./output/crdss.duckdb
load h3;
load spatial;
CREATE or replace macro close_polygon_wkt(geom) AS (
-- Extract the coordinates part of the WKT
CASE
WHEN geom LIKE 'POLYGON%' THEN
'POLYGON((' ||
TRIM(BOTH '()' FROM SPLIT_PART(geom, '((', 2)) || ', ' ||
TRIM(SPLIT_PART(SPLIT_PART(geom, '((', 2), ',', 1)) || '))'
ELSE geom
END
);
CREATE or REPLACE MACRO polygon_from_multipolygon_wkt(wkt) as (
st_astext((unnest(st_dump(st_geomfromtext(wkt)))).geom)
);
CREATE or REPLACE MACRO h3_id_to_simple_polygon(id) as (
st_geomfromtext(close_polygon_wkt(polygon_from_multipolygon_wkt(h3_cells_to_multi_polygon_wkt(array_value(id)))))
);
CREATE TABLE temp1 AS
SELECT * FROM read_csv('${cellsPath}',
header = true,
null_padding = true
);
CREATE TABLE cells as
select
h3_string_to_h3(id) as r11_id,
Expand All @@ -64,8 +87,12 @@ CREATE TABLE cells as
h3_cell_to_parent(h3_string_to_h3(id), 7) as r7_id,
h3_cell_to_parent(h3_string_to_h3(id), 6) as r6_id,
h3_cell_to_parent(h3_string_to_h3(id), 5) as r5_id,
h3_cell_to_parent(h3_string_to_h3(id), 4) as r4_id,
h3_id_to_simple_polygon(id) as geom,
*
from temp1;
CREATE INDEX cells_geom ON cells USING RTREE (geom);
DROP TABLE temp1;
`);

Expand Down
Loading

0 comments on commit 94f983d

Please sign in to comment.