Skip to content

Commit

Permalink
testing jupyter_drives extension
Browse files Browse the repository at this point in the history
  • Loading branch information
angelosnm committed Dec 7, 2024
1 parent 2e69aa8 commit c49e622
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 270 deletions.
2 changes: 1 addition & 1 deletion docker/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ keplergl==0.3.2
rasterio==1.4.2
pydeck==0.9.1
apache-sedona==1.6.1
jupyterlab-s3-browser==0.12.0
jupyter_drives==0.0.1
Binary file not shown.
147 changes: 26 additions & 121 deletions sedona_examples/sedona_raster.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": null,
"id": "5687d3a9-c676-43fc-8a88-70f510f6fd51",
"metadata": {
"scrolled": true,
Expand All @@ -37,13 +37,13 @@
"output_type": "stream",
"text": [
"Connected to Spark Master at: local[*]\n",
"Application ID: local-1732470437880\n",
"Application ID: local-1733609654532\n",
"Spark connection successful.\n"
]
}
],
"source": [
"# spark_master_url = \"spark://192.168.18.110:7077\"\n",
"# spark_master_url = \"spark://spark.tithia.scio.services:7077\"\n",
"spark_master_url = \"local[*]\"\n",
"\n",
"config = (\n",
Expand All @@ -53,12 +53,14 @@
" .config(\"spark.executor.memory\", \"16g\") \\\n",
" .config(\"spark.executor.cores\", \"4\") \\\n",
" .config(\"spark.driver.memory\", \"8g\") \\\n",
" .config(\"spark.hadoop.fs.s3a.access.key\", \"GD5Lg2x7tLaGxShjozFw\") \\\n",
" .config(\"spark.hadoop.fs.s3a.secret.key\", \"H7J3mn7pI6oK35Xpuyrv6Lk4AsgymaSiQ2zwdAlu\") \\\n",
" .config(\"spark.hadoop.fs.s3a.endpoint\", \"http://192.168.18.115:9000\") \\\n",
" .config(\"spark.hadoop.fs.s3a.endpoint\", \"x\") \\\n",
" .config(\"spark.hadoop.fs.s3a.access.key\", \"dsQ\") \\\n",
" .config(\"spark.hadoop.fs.s3a.secret.key\", \"cvxvi\") \\\n",
" .config(\"spark.hadoop.fs.s3a.connection.ssl.enabled\", \"false\") \\\n",
" .config(\"spark.hadoop.fs.s3a.path.style.access\", \"true\") \\\n",
" .config(\n",
" 'spark.jars.packages',\n",
" 'org.apache.sedona:sedona-spark-3.5_2.12:1.6.1,'\n",
" 'org.apache.sedona:sedona-spark-shaded-3.5_2.12:1.6.1,'\n",
" 'org.datasyslab:geotools-wrapper:1.6.1-28.2'\n",
" )\n",
" .getOrCreate()\n",
Expand Down Expand Up @@ -95,126 +97,29 @@
{
"cell_type": "code",
"execution_count": 3,
"id": "b9a6469e-e0fb-46bd-b284-eea46506e524",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# Step 1: Load Raster Data Using Rasterio\n",
"def raster_to_point_geometries(raster_path):\n",
" with rasterio.open(raster_path) as src:\n",
" # Read raster data\n",
" band1 = src.read(1)\n",
" affine = src.transform\n",
"\n",
" # Convert raster to points\n",
" points = []\n",
" rows, cols = band1.shape\n",
" for row in range(rows):\n",
" for col in range(cols):\n",
" value = band1[row, col]\n",
" if not np.isnan(value): # Skip no-data values\n",
" x, y = rasterio.transform.xy(affine, row, col)\n",
" points.append((Point(x, y), value))\n",
" return points"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c008d932-4e68-4e4c-b176-01184f17e346",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# Convert raster to points\n",
"raster_path = \"BFA_hist_land_cover_subindicator_2001_2010.tif\"\n",
"points_with_values = raster_to_point_geometries(raster_path)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ee695895-5ebe-445c-8e34-1710d58e2255",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# Step 2: Convert to Spark DataFrame\n",
"schema = [\"geometry\", \"value\"]\n",
"rdd = spark.sparkContext.parallelize(points_with_values)\n",
"points_df = spark.createDataFrame(rdd, schema=schema)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e7fb305b-4dd8-4cd2-8a84-2fa3ae059152",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# Register as Spatial Table\n",
"points_df = points_df.withColumn(\"geometry\", col(\"geometry\").cast(GeometryType()))\n",
"points_df.createOrReplaceTempView(\"raster_points\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3b3facf0-f6e9-4f9b-9073-b1727aed1bc7",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# Step 3: Perform Spatial Operations with Sedona\n",
"# Example: Query points within a bounding box\n",
"bounding_box_wkt = \"POLYGON((-180 -90, -180 90, 180 90, 180 -90, -180 -90))\"\n",
"result = spark.sql(f\"\"\"\n",
" SELECT *\n",
" FROM raster_points\n",
" WHERE ST_Contains(ST_GeomFromText('{bounding_box_wkt}'), geometry)\n",
"\"\"\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5c15f19a-4caa-4c4f-a94d-fa6f8b1ffc9a",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# Show the results\n",
"result.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8a9ef079-f09c-41c1-b9b5-9d63f3ec9e75",
"metadata": {
"tags": []
},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"+--------------------+-------------------+-------+--------------------+\n",
"| path| modificationTime| length| content|\n",
"+--------------------+-------------------+-------+--------------------+\n",
"|s3a://data/BFA_hi...|2024-12-07 08:35:02|3258373|[49 49 2A 00 08 0...|\n",
"+--------------------+-------------------+-------+--------------------+\n",
"\n"
]
}
],
"source": [
"# Step 4: Save Results\n",
"result.write.csv(\"output/filtered_raster_points.csv\")"
"geotiff_df = sedona.read.format(\"binaryFile\").load(\"s3a://data/BFA_hist_land_cover_subindicator_2001_2010.tif\")\n",
"geotiff_df.show(2)\n",
"geotiff_df.createOrReplaceTempView(\"binary_raster\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0b6b1d68-1f2f-4324-8583-b5e3a66dd262",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
Loading

0 comments on commit c49e622

Please sign in to comment.