diff --git a/docs/postgis.ipynb b/docs/postgis.ipynb new file mode 100644 index 000000000..daa57ba83 --- /dev/null +++ b/docs/postgis.ipynb @@ -0,0 +1,212 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "8b748faa-2df2-49a1-9642-c8df3febf7f4", + "metadata": {}, + "source": [ + "# SedonaDB + PostGIS\n", + "\n", + "This page demonstrates how to read PostGIS tables into SedonaDB DataFrames.\n", + "\n", + "You need to install these Python packages to run this notebook:\n", + "\n", + "* `psycopg2-binary`\n", + "* `sqlalchemy`\n", + "\n", + "Let's start by creating a PostGIS table that SedonaDB can read.\n", + "\n", + "Here's how to create the `my_places` table:\n", + "\n", + "```sql\n", + "CREATE TABLE my_places (\n", + " id SERIAL PRIMARY KEY,\n", + " name VARCHAR(100),\n", + " geom GEOMETRY(Point, 4326)\n", + ");\n", + "```\n", + "\n", + "Now add some data to the table:\n", + "\n", + "```sql\n", + "INSERT INTO my_places (name, geom) VALUES\n", + " ('New York', ST_SetSRID(ST_MakePoint(-74.006, 40.7128), 4326)),\n", + " ('Los Angeles', ST_SetSRID(ST_MakePoint(-118.2437, 34.0522), 4326)),\n", + " ('Chicago', ST_SetSRID(ST_MakePoint(-87.6298, 41.8781), 4326));\n", + "```\n", + "\n", + "View the content of the table to make sure it was created correctly:\n", + "\n", + "```\n", + "SELECT id, name, ST_AsText(geom) FROM my_places;\n", + "\n", + " id | name | st_astext \n", + "----+-------------+--------------------------\n", + " 1 | New York | POINT(-74.006 40.7128)\n", + " 2 | Los Angeles | POINT(-118.2437 34.0522)\n", + " 3 | Chicago | POINT(-87.6298 41.8781)\n", + "```\n", + "\n", + "Let's start by reading the PostGIS table into a GeoPandas DataFrame. The GeoPandas DataFrame can then easily be converted to a SedonaDB DataFrame." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "9f70f137-79ff-40fd-9b3a-c17050a36027", + "metadata": {}, + "outputs": [], + "source": [ + "import geopandas as gpd\n", + "from sqlalchemy import create_engine\n", + "import sedona.db" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "123e4be5-65ae-4755-9902-5010aad46a4f", + "metadata": {}, + "outputs": [], + "source": [ + "# you need to replace your username and database name in the following string\n", + "engine = create_engine('postgresql://matthewpowers@localhost:5432/matthewpowers')" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "e15e0b4d-6293-416d-8cd2-5c79720ba4f7", + "metadata": {}, + "outputs": [], + "source": [ + "gdf = gpd.read_postgis('SELECT * FROM my_places', engine, geom_col='geom')" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "a295ea91-7bea-4af1-bab0-40331e5a988a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " id name geom\n", + "0 1 New York POINT (-74.006 40.7128)\n", + "1 2 Los Angeles POINT (-118.2437 34.0522)\n", + "2 3 Chicago POINT (-87.6298 41.8781)\n" + ] + } + ], + "source": [ + "print(gdf)" + ] + }, + { + "cell_type": "markdown", + "id": "efd22ca9-82f5-4cb8-95be-320dd3c60140", + "metadata": {}, + "source": [ + "## Read PostGIS table into SedonaDB DataFrame" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "3b23448b-1871-4b5e-8527-6a0aea8bcf88", + "metadata": {}, + "outputs": [], + "source": [ + "sd = sedona.db.connect()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "e9b02a12-e9f1-4e5e-87df-146f428ce693", + "metadata": {}, + "outputs": [], + "source": [ + "# read the GeoPandas DataFrame to a SedonaDB DataFrame\n", + "\n", + "df = sd.create_data_frame(gdf)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "070a4230-6b15-4139-8f40-9605a39cd6ee", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "┌───────┬─────────────┬──────────────────────────┐\n", + "│ id ┆ name ┆ geom │\n", + "│ int64 ┆ utf8 ┆ geometry │\n", + "╞═══════╪═════════════╪══════════════════════════╡\n", + "│ 1 ┆ New York ┆ POINT(-74.006 40.7128) │\n", + "├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n", + "│ 2 ┆ Los Angeles ┆ POINT(-118.2437 34.0522) │\n", + "├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n", + "│ 3 ┆ Chicago ┆ POINT(-87.6298 41.8781) │\n", + "└───────┴─────────────┴──────────────────────────┘\n" + ] + } + ], + "source": [ + "df.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "9b14531c-884c-4dcf-8c2c-48ba5d4fb91d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "SedonaSchema with 3 fields:\n", + " id: int64\n", + " name: utf8\n", + " geom: geometry" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# confirm that the SedonaDB DataFrame retains the CRS defined in the PostGIS table\n", + "\n", + "df.schema" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/postgis.md b/docs/postgis.md new file mode 100644 index 000000000..209be4cc3 --- /dev/null +++ b/docs/postgis.md @@ -0,0 +1,120 @@ +# SedonaDB + PostGIS + +This page demonstrates how to read PostGIS tables into SedonaDB DataFrames. + +You need to install these Python packages to run this notebook: + +* `psycopg2-binary` +* `sqlalchemy` + +Let's start by creating a PostGIS table that SedonaDB can read. + +Here's how to create the `my_places` table: + +```sql +CREATE TABLE my_places ( + id SERIAL PRIMARY KEY, + name VARCHAR(100), + geom GEOMETRY(Point, 4326) +); +``` + +Now add some data to the table: + +```sql +INSERT INTO my_places (name, geom) VALUES + ('New York', ST_SetSRID(ST_MakePoint(-74.006, 40.7128), 4326)), + ('Los Angeles', ST_SetSRID(ST_MakePoint(-118.2437, 34.0522), 4326)), + ('Chicago', ST_SetSRID(ST_MakePoint(-87.6298, 41.8781), 4326)); +``` + +View the content of the table to make sure it was created correctly: + +``` +SELECT id, name, ST_AsText(geom) FROM my_places; + + id | name | st_astext +----+-------------+-------------------------- + 1 | New York | POINT(-74.006 40.7128) + 2 | Los Angeles | POINT(-118.2437 34.0522) + 3 | Chicago | POINT(-87.6298 41.8781) +``` + +Let's start by reading the PostGIS table into a GeoPandas DataFrame. The GeoPandas DataFrame can then easily be converted to a SedonaDB DataFrame. + + +```python +import geopandas as gpd +from sqlalchemy import create_engine +import sedona.db +``` + + +```python +# you need to replace your username and database name in the following string +engine = create_engine('postgresql://matthewpowers@localhost:5432/matthewpowers') +``` + + +```python +gdf = gpd.read_postgis('SELECT * FROM my_places', engine, geom_col='geom') +``` + + +```python +print(gdf) +``` + + id name geom + 0 1 New York POINT (-74.006 40.7128) + 1 2 Los Angeles POINT (-118.2437 34.0522) + 2 3 Chicago POINT (-87.6298 41.8781) + + +## Read PostGIS table into SedonaDB DataFrame + + +```python +sd = sedona.db.connect() +``` + + +```python +# read the GeoPandas DataFrame to a SedonaDB DataFrame + +df = sd.create_data_frame(gdf) +``` + + +```python +df.show() +``` + + ┌───────┬─────────────┬──────────────────────────┐ + │ id ┆ name ┆ geom │ + │ int64 ┆ utf8 ┆ geometry │ + ╞═══════╪═════════════╪══════════════════════════╡ + │ 1 ┆ New York ┆ POINT(-74.006 40.7128) │ + ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ 2 ┆ Los Angeles ┆ POINT(-118.2437 34.0522) │ + ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ 3 ┆ Chicago ┆ POINT(-87.6298 41.8781) │ + └───────┴─────────────┴──────────────────────────┘ + + + +```python +# confirm that the SedonaDB DataFrame retains the CRS defined in the PostGIS table + +df.schema +``` + + + + + SedonaSchema with 3 fields: + id: int64 + name: utf8 + geom: geometry + + diff --git a/mkdocs.yml b/mkdocs.yml index f169c8662..7e1fed3f6 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -51,6 +51,7 @@ nav: - CRS Examples: crs-examples.md - Delta Lake: delta-lake.md - Iceberg: iceberg.md + - PostGIS: postgis.md - Working with Parquet Files: working-with-parquet-files.md - Working with SQL in SedonaDB: working-with-sql-sedonadb.md - Contributors Guide: contributors-guide.md