From 58cb59507ef1af8dccfaa30704f506ad02130608 Mon Sep 17 00:00:00 2001 From: spwoodcock Date: Fri, 30 Aug 2024 18:56:17 +0100 Subject: [PATCH] feat: start new importer and updater services using osmosis / osmium --- docker-compose.yml | 12 ++++++ importer/Dockerfile | 11 +++++ importer/README.md | 6 ++- importer/import.sh | 103 ++++++++++++++++++++++++++++++++++++++++++++ updater/README.md | 6 +++ 5 files changed, 136 insertions(+), 2 deletions(-) create mode 100644 importer/Dockerfile create mode 100644 importer/import.sh create mode 100644 updater/README.md diff --git a/docker-compose.yml b/docker-compose.yml index 9bb6970..6cf14fa 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -152,6 +152,18 @@ services: --map-nodes 10000000 --map-area 10 && \ tail --pid=\$(cat /tmp/cgimap.pid) -f /dev/null" + importer: + image: "ghcr.io/hotosm/osm-sandbox/importer:latest" + build: + context: importer + depends_on: + osm: + condition: service_healthy + networks: + - osm-net + restart: unless-stopped + entrypoint: sleep infinity + osm-db: image: docker.io/postgres:14 environment: diff --git a/importer/Dockerfile b/importer/Dockerfile new file mode 100644 index 0000000..2fed009 --- /dev/null +++ b/importer/Dockerfile @@ -0,0 +1,11 @@ +FROM docker.io/debian:bookworm-slim +RUN set -ex \ + && apt-get update \ + && DEBIAN_FRONTEND=noninteractive apt-get install \ + -y --no-install-recommends \ + "ca-certificates" \ + "curl" \ + "osmosis" \ + "osmium-tool" \ + && rm -rf /var/lib/apt/lists/* \ + && update-ca-certificates diff --git a/importer/README.md b/importer/README.md index 8c405a6..033536b 100644 --- a/importer/README.md +++ b/importer/README.md @@ -1,3 +1,5 @@ -# OSM Sandbox Importer +# OSM Importer Service -For importing data into the OSM Sandbox environment. +Import data into a fresh OSM Sanbox instance. + +Uses Geofabrik country data, then filters down to the users required BBOX. diff --git a/importer/import.sh b/importer/import.sh new file mode 100644 index 0000000..0fcb6dd --- /dev/null +++ b/importer/import.sh @@ -0,0 +1,103 @@ +#!/bin/bash + +# NOTE that while osmosis is a deprecated tool, it is the only one available to +# import OSM data into the apidb format (for replication of the OSM API). +# +# Tools like osmium are much better, but are for importing into a different +# database format (as PostGIS geometries) for data analysis purposes. + +# TODO +# TODO make this an env var +# TODO +BBOX="32.189941,15.159625,32.961731,15.950766" + +# Get centroid from BBOX +IFS=',' read -r xmin ymin xmax ymax <<< "$BBOX" +cx=$(awk "BEGIN {print ($xmin + $xmax) / 2}") +cy=$(awk "BEGIN {print ($ymin + $ymax) / 2}") +echo "Centroid: ($cx, $cy)" + +# Reverse geocode centroid to get country + +# Download country data from GeoFabrik + +# Filter .osm.pbf using osmium --bbox +# NOTE also possible to filter by polygon (future upgrade) + +# Import filtered data using osmosis into apidb + + +# Next create a separate container / service for the updater +# When user trigger update, download daily .osc diffs from OSM since last update +# Process using osmium, then import into db? +# Alternatively just use osmosis again + + + +### TODO edit me - copied from osm-seed below + +#!/usr/bin/env bash +set -e +export VOLUME_DIR=/mnt/data +export PGPASSWORD=$POSTGRES_PASSWORD + +# OSMOSIS tuning: https://wiki.openstreetmap.org/wiki/Osmosis/Tuning,https://lists.openstreetmap.org/pipermail/talk/2012-October/064771.html +if [ -z "$MEMORY_JAVACMD_OPTIONS" ]; then + echo JAVACMD_OPTIONS=\"-server\" > ~/.osmosis +else + memory="${MEMORY_JAVACMD_OPTIONS//i}" + echo JAVACMD_OPTIONS=\"-server -Xmx$memory\" > ~/.osmosis +fi + +# Get the data +file=$(basename $URL_FILE_TO_IMPORT) +osmFile=$VOLUME_DIR/$file +[ ! -f $osmFile ] && wget $URL_FILE_TO_IMPORT + +function importData () { + # This is using a osmosis 0.47. TODO: test with osmosis 0.48, and remove the following line + psql -U $POSTGRES_USER -h $POSTGRES_HOST -d $POSTGRES_DB -c "ALTER TABLE users ADD COLUMN nearby VARCHAR;" + # In case the import file is a PBF + if [ ${osmFile: -4} == ".pbf" ]; then + pbfFile=$osmFile + echo "Importing $pbfFile ..." + osmosis --read-pbf \ + file=$pbfFile\ + --write-apidb \ + host=$POSTGRES_HOST \ + database=$POSTGRES_DB \ + user=$POSTGRES_USER \ + password=$POSTGRES_PASSWORD \ + allowIncorrectSchemaVersion=yes \ + validateSchemaVersion=no + else + # In case the file is .osm + # Extract the osm file + bzip2 -d $osmFile + osmFile=${osmFile%.*} + echo "Importing $osmFile ..." + osmosis --read-xml \ + file=$osmFile \ + --write-apidb \ + host=$POSTGRES_HOST \ + database=$POSTGRES_DB \ + user=$POSTGRES_USER \ + password=$POSTGRES_PASSWORD \ + validateSchemaVersion=no + fi + # Run required fixes in DB + psql -U $POSTGRES_USER -h $POSTGRES_HOST -d $POSTGRES_DB -c "select setval('current_nodes_id_seq', (select max(node_id) from nodes));" + psql -U $POSTGRES_USER -h $POSTGRES_HOST -d $POSTGRES_DB -c "select setval('current_ways_id_seq', (select max(way_id) from ways));" + psql -U $POSTGRES_USER -h $POSTGRES_HOST -d $POSTGRES_DB -c "select setval('current_relations_id_seq', (select max(relation_id) from relations));" + # psql -U $POSTGRES_USER -h $POSTGRES_HOST -d $POSTGRES_DB -c "select setval('users_id_seq', (select max(id) from users));" + # psql -U $POSTGRES_USER -h $POSTGRES_HOST -d $POSTGRES_DB -c "select setval('changesets_id_seq', (select max(id) from changesets));" + +} + +flag=true +while "$flag" = true; do + pg_isready -h $POSTGRES_HOST -p 5432 -U $POSTGRES_USER >/dev/null 2>&2 || continue + # Change flag to false to stop ping the DB + flag=false + importData +done diff --git a/updater/README.md b/updater/README.md new file mode 100644 index 0000000..92b1ded --- /dev/null +++ b/updater/README.md @@ -0,0 +1,6 @@ +# OSM Updater Service + +Update the data in an existing OSM Sandbox instance with latest OSM data. + +Uses the `.osc` daily diff files provided by OSM, and filters down to the +users required BBOX.