Skip to content
This repository has been archived by the owner on Nov 20, 2024. It is now read-only.

Commit

Permalink
feat: start new importer and updater services using osmosis / osmium
Browse files Browse the repository at this point in the history
  • Loading branch information
spwoodcock committed Aug 30, 2024
1 parent 8562c39 commit 58cb595
Show file tree
Hide file tree
Showing 5 changed files with 136 additions and 2 deletions.
12 changes: 12 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,18 @@ services:
--map-nodes 10000000 --map-area 10 && \
tail --pid=\$(cat /tmp/cgimap.pid) -f /dev/null"
importer:
image: "ghcr.io/hotosm/osm-sandbox/importer:latest"
build:
context: importer
depends_on:
osm:
condition: service_healthy
networks:
- osm-net
restart: unless-stopped
entrypoint: sleep infinity

osm-db:
image: docker.io/postgres:14
environment:
Expand Down
11 changes: 11 additions & 0 deletions importer/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
FROM docker.io/debian:bookworm-slim
RUN set -ex \
&& apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install \
-y --no-install-recommends \
"ca-certificates" \
"curl" \
"osmosis" \
"osmium-tool" \
&& rm -rf /var/lib/apt/lists/* \
&& update-ca-certificates
6 changes: 4 additions & 2 deletions importer/README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# OSM Sandbox Importer
# OSM Importer Service

For importing data into the OSM Sandbox environment.
Import data into a fresh OSM Sanbox instance.

Uses Geofabrik country data, then filters down to the users required BBOX.
103 changes: 103 additions & 0 deletions importer/import.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
#!/bin/bash

# NOTE that while osmosis is a deprecated tool, it is the only one available to
# import OSM data into the apidb format (for replication of the OSM API).
#
# Tools like osmium are much better, but are for importing into a different
# database format (as PostGIS geometries) for data analysis purposes.

# TODO
# TODO make this an env var
# TODO
BBOX="32.189941,15.159625,32.961731,15.950766"

# Get centroid from BBOX
IFS=',' read -r xmin ymin xmax ymax <<< "$BBOX"
cx=$(awk "BEGIN {print ($xmin + $xmax) / 2}")
cy=$(awk "BEGIN {print ($ymin + $ymax) / 2}")
echo "Centroid: ($cx, $cy)"

# Reverse geocode centroid to get country

# Download country data from GeoFabrik

# Filter .osm.pbf using osmium --bbox
# NOTE also possible to filter by polygon (future upgrade)

# Import filtered data using osmosis into apidb


# Next create a separate container / service for the updater
# When user trigger update, download daily .osc diffs from OSM since last update
# Process using osmium, then import into db?
# Alternatively just use osmosis again



### TODO edit me - copied from osm-seed below

#!/usr/bin/env bash
set -e
export VOLUME_DIR=/mnt/data
export PGPASSWORD=$POSTGRES_PASSWORD

# OSMOSIS tuning: https://wiki.openstreetmap.org/wiki/Osmosis/Tuning,https://lists.openstreetmap.org/pipermail/talk/2012-October/064771.html
if [ -z "$MEMORY_JAVACMD_OPTIONS" ]; then
echo JAVACMD_OPTIONS=\"-server\" > ~/.osmosis
else
memory="${MEMORY_JAVACMD_OPTIONS//i}"
echo JAVACMD_OPTIONS=\"-server -Xmx$memory\" > ~/.osmosis
fi

# Get the data
file=$(basename $URL_FILE_TO_IMPORT)
osmFile=$VOLUME_DIR/$file
[ ! -f $osmFile ] && wget $URL_FILE_TO_IMPORT

function importData () {
# This is using a osmosis 0.47. TODO: test with osmosis 0.48, and remove the following line
psql -U $POSTGRES_USER -h $POSTGRES_HOST -d $POSTGRES_DB -c "ALTER TABLE users ADD COLUMN nearby VARCHAR;"
# In case the import file is a PBF
if [ ${osmFile: -4} == ".pbf" ]; then
pbfFile=$osmFile
echo "Importing $pbfFile ..."
osmosis --read-pbf \
file=$pbfFile\
--write-apidb \
host=$POSTGRES_HOST \
database=$POSTGRES_DB \
user=$POSTGRES_USER \
password=$POSTGRES_PASSWORD \
allowIncorrectSchemaVersion=yes \
validateSchemaVersion=no
else
# In case the file is .osm
# Extract the osm file
bzip2 -d $osmFile
osmFile=${osmFile%.*}
echo "Importing $osmFile ..."
osmosis --read-xml \
file=$osmFile \
--write-apidb \
host=$POSTGRES_HOST \
database=$POSTGRES_DB \
user=$POSTGRES_USER \
password=$POSTGRES_PASSWORD \
validateSchemaVersion=no
fi
# Run required fixes in DB
psql -U $POSTGRES_USER -h $POSTGRES_HOST -d $POSTGRES_DB -c "select setval('current_nodes_id_seq', (select max(node_id) from nodes));"
psql -U $POSTGRES_USER -h $POSTGRES_HOST -d $POSTGRES_DB -c "select setval('current_ways_id_seq', (select max(way_id) from ways));"
psql -U $POSTGRES_USER -h $POSTGRES_HOST -d $POSTGRES_DB -c "select setval('current_relations_id_seq', (select max(relation_id) from relations));"
# psql -U $POSTGRES_USER -h $POSTGRES_HOST -d $POSTGRES_DB -c "select setval('users_id_seq', (select max(id) from users));"
# psql -U $POSTGRES_USER -h $POSTGRES_HOST -d $POSTGRES_DB -c "select setval('changesets_id_seq', (select max(id) from changesets));"

}

flag=true
while "$flag" = true; do
pg_isready -h $POSTGRES_HOST -p 5432 -U $POSTGRES_USER >/dev/null 2>&2 || continue
# Change flag to false to stop ping the DB
flag=false
importData
done
6 changes: 6 additions & 0 deletions updater/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# OSM Updater Service

Update the data in an existing OSM Sandbox instance with latest OSM data.

Uses the `.osc` daily diff files provided by OSM, and filters down to the
users required BBOX.

0 comments on commit 58cb595

Please sign in to comment.