From 88e79f0efcefef498a20484340c3d6e34ea72688 Mon Sep 17 00:00:00 2001 From: Noam Ronen Date: Tue, 18 May 2021 13:42:14 +0300 Subject: [PATCH] Replaced Google with OSM Nominatim. Added polygons to areas. Cleaned up city names to improve reverse geolocation. --- OrefAlerts.ipynb | 310 +++++++++++++---------------------------------- OrefAlerts.py | 100 +++++++++++++++ 2 files changed, 184 insertions(+), 226 deletions(-) create mode 100644 OrefAlerts.py diff --git a/OrefAlerts.ipynb b/OrefAlerts.ipynb index 21c5b0e..3171fd0 100644 --- a/OrefAlerts.ipynb +++ b/OrefAlerts.ipynb @@ -2,249 +2,102 @@ "cells": [ { "cell_type": "code", - "execution_count": 108, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
datadatetimedatetimeoutLatoutLonginLatinLong
0נחל עוז14.05.202121:152021-05-14T21:16:0031.47268034.49767531.51334.452
1ניר עוז14.05.202121:102021-05-14T21:11:0031.30969734.40207531.51334.452
2ניצנים14.05.202121:102021-05-14T21:10:0031.71784534.63463631.51334.452
3אשקלון14.05.202121:092021-05-14T21:10:0031.66878934.57425231.51334.452
4אשדוד - ח14.05.202121:092021-05-14T21:10:0031.80438134.65531431.51334.452
...........................
4064נתיב העשרה09.05.202121:112021-05-09T21:11:0031.57240734.53932231.51334.452
4065כרמיה09.05.202121:112021-05-09T21:11:0031.60269534.54041331.51334.452
4066יד מרדכי09.05.202121:112021-05-09T21:11:0031.58696334.55750031.51334.452
4067אזור תעשייה הדרומי אשקלון09.05.202121:112021-05-09T21:11:0031.63608934.55453531.51334.452
4068זיקים09.05.202121:112021-05-09T21:11:0031.60720434.52096831.51334.452
\n", - "

4202 rows × 8 columns

\n", - "
" - ], - "text/plain": [ - " data date time datetime \\\n", - "0 נחל עוז 14.05.2021 21:15 2021-05-14T21:16:00 \n", - "1 ניר עוז 14.05.2021 21:10 2021-05-14T21:11:00 \n", - "2 ניצנים 14.05.2021 21:10 2021-05-14T21:10:00 \n", - "3 אשקלון 14.05.2021 21:09 2021-05-14T21:10:00 \n", - "4 אשדוד - ח 14.05.2021 21:09 2021-05-14T21:10:00 \n", - "... ... ... ... ... \n", - "4064 נתיב העשרה 09.05.2021 21:11 2021-05-09T21:11:00 \n", - "4065 כרמיה 09.05.2021 21:11 2021-05-09T21:11:00 \n", - "4066 יד מרדכי 09.05.2021 21:11 2021-05-09T21:11:00 \n", - "4067 אזור תעשייה הדרומי אשקלון 09.05.2021 21:11 2021-05-09T21:11:00 \n", - "4068 זיקים 09.05.2021 21:11 2021-05-09T21:11:00 \n", - "\n", - " outLat outLong inLat inLong \n", - "0 31.472680 34.497675 31.513 34.452 \n", - "1 31.309697 34.402075 31.513 34.452 \n", - "2 31.717845 34.634636 31.513 34.452 \n", - "3 31.668789 34.574252 31.513 34.452 \n", - "4 31.804381 34.655314 31.513 34.452 \n", - "... ... ... ... ... \n", - "4064 31.572407 34.539322 31.513 34.452 \n", - "4065 31.602695 34.540413 31.513 34.452 \n", - "4066 31.586963 34.557500 31.513 34.452 \n", - "4067 31.636089 34.554535 31.513 34.452 \n", - "4068 31.607204 34.520968 31.513 34.452 \n", - "\n", - "[4202 rows x 8 columns]" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "execution_count": null, + "metadata": { + "tags": [ + "outputPrepend" + ] + }, + "outputs": [], "source": [ "import requests\n", "import pandas as pd\n", + "from progressbar import progressbar\n", + "import json\n", + "\n", + "# define start and end dates - format: Day.Month.Year\n", + "from_date = \"09.05.2021\"\n", + "to_date = \"20.05.2021\"\n", "\n", "# Get data\n", - "alerts_url = 'https://www.oref.org.il//Shared/Ajax/GetAlarmsHistory.aspx?lang=he&fromDate=09.05.2021&toDate=20.05.2021&mode=0'\n", + "alerts_url = f\"https://www.oref.org.il//Shared/Ajax/GetAlarmsHistory.aspx?lang=he&fromDate={from_date}&toDate={to_date}&mode=0\"\n", "alerts_json = requests.get(alerts_url).json()\n", "\n", + "# define gaza coordinates\n", + "gaza_coords = (31.513, 34.452)\n", + "\n", "# Break multi-region alerts into separate records\n", "df = pd.DataFrame.from_records(alerts_json)\n", - "df['data'] = df['data'].str.split(',')\n", - "df = df.explode('data')\n", + "df[\"data\"] = df[\"data\"].str.split(\",\")\n", + "df = df.explode(\"data\")\n", "\n", "# Remove sub-regions such as א, ב, ג, ד\n", - "df = df[df['data'].str.len() > 2]\n", + "df = df[df[\"data\"].str.len() > 2]\n", "\n", "# Change Hatzor to detailed name as the google geocoder fail to detect the correct city\n", - "df['data'] = df['data'].replace('חצור', 'חצור אשדוד')\n", + "df[\"data\"] = df[\"data\"].replace(\"חצור\", \"חצור אשדוד\")\n", + "\n", + "total_cities = len(df[\"data\"].unique())\n", + "global failed_cities\n", + "failed_cities = 0\n", "\n", "# Map city names to coordinates\n", "def get_coordinates(city_name):\n", - " city_name = city_name + ', ישראל'\n", - " api_key = 'YOUR_API_KEY'\n", - " geocoder_url = f'https://maps.googleapis.com/maps/api/geocode/json?address={city_name}&key={api_key}&language=iw'\n", + " city_name = city_name + \", ישראל\"\n", + "\n", + " # areas of cities that make geolocation fail - remove them\n", + " strings_to_remove = [\"והפזורה\", \"מתחם\", \"אזור תעשייה\"]\n", + "\n", + " for i in strings_to_remove:\n", + " city_name = city_name.replace(i, \"\")\n", + "\n", + " # to find cities with areas - תל אביב - מערב becomes תל אביב\n", + " city_name = city_name.split(\" - \")[0]\n", + "\n", + " geocoder_url = f\"https://nominatim.openstreetmap.org/search?q={city_name}&format=json&polygon_geojson=1&addressdetails=1\"\n", " geocoding_result = requests.get(geocoder_url).json()\n", - " lat = geocoding_result['results'][0]['geometry']['location']['lat']\n", - " long = geocoding_result['results'][0]['geometry']['location']['lng']\n", - " return(lat, long)\n", - " \n", + "\n", + " if not geocoding_result:\n", + " global failed_cities\n", + " failed_cities = failed_cities + 1\n", + " return (None, None, None)\n", + "\n", + " lat = geocoding_result[0][\"lat\"]\n", + " long = geocoding_result[0][\"lon\"]\n", + "\n", + " for result in geocoding_result:\n", + " if \"Polygon\" in result[\"geojson\"][\"type\"]: # try to find a polygon\n", + " poly = json.dumps(result[\"geojson\"])\n", + " return (lat, long, poly)\n", + "\n", + " # no polygon found - return the first geojson object\n", + " poly = json.dumps(\n", + " geocoding_result[0][\"geojson\"]\n", + " ) # no polygon found - take the first object\n", + " return (lat, long, poly)\n", + "\n", + "\n", "city_to_coords = {}\n", - "for city in df['data'].unique():\n", - " try:\n", - " city_to_coords[city] = get_coordinates(city)\n", - " print(city, '\\t -', city_to_coords[city])\n", - " except Exception as e:\n", - " city_to_coords[city] = (None, None)\n", - " print(city, '\\t', '- failed finding coordinates')\n", + "# find cities using reverse geolocation\n", + "for city in progressbar(df[\"data\"].unique(), redirect_stdout=True):\n", + " city_to_coords[city] = get_coordinates(city)\n", + " print(city, \"\\t -\", city_to_coords[city][0], city_to_coords[city][1])\n", + "\n", + "print(\n", + " f\"Geocoding complete. Successfuly found {total_cities-failed_cities}/{total_cities}\"\n", + ")\n", "\n", "# Apply mapping on all data\n", - "df['outLat'] = df['data'].apply(lambda x: city_to_coords[x][0])\n", - "df['outLong'] = df['data'].apply(lambda x: city_to_coords[x][1])\n", + "df[\"outLat\"] = df[\"data\"].apply(lambda x: city_to_coords[x][0])\n", + "df[\"outLong\"] = df[\"data\"].apply(lambda x: city_to_coords[x][1])\n", + "df[\"poly\"] = df[\"data\"].apply(lambda x: city_to_coords[x][2])\n", "\n", "# Fixed Gaza coordinates\n", - "df['inLat'] = 31.513\n", - "df['inLong'] = 34.452\n", + "df[\"inLat\"] = gaza_coords[0]\n", + "df[\"inLong\"] = gaza_coords[1]\n", "\n", "# Filter wrong coordinates outside of Israel polygon (only if you use a bad geocoder)\n", - "'''\n", + "\"\"\"\n", "filtered_df = df[(df['outLong'] < 35.8)\n", " & (df['outLong'] > 33.3)\n", " & (df['outLat'] < 34.0)\n", @@ -252,18 +105,18 @@ "\n", "filtered_df.to_csv('RocketLaunchData - Filtered.csv', encoding='utf-8-sig', index=False)\n", "display(filtered_df)\n", - "'''\n", + "\"\"\"\n", + "\n", + "df.to_csv(\"RocketLaunchData.csv\", encoding=\"utf-8-sig\", index=False)\n", "\n", - "df.to_csv('RocketLaunchData.csv', encoding='utf-8-sig', index=False)\n", "display(df)" ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" + "name": "python383jvsc74a57bd0c617fa10d1ba9d6a36573c3c4f7496e3bb2fbcdd4aeff3a055aadb58d26c8355", + "display_name": "Python 3.8.3 32-bit" }, "language_info": { "codemirror_mode": { @@ -275,9 +128,14 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.6" + "version": "3.8.3" + }, + "metadata": { + "interpreter": { + "hash": "c617fa10d1ba9d6a36573c3c4f7496e3bb2fbcdd4aeff3a055aadb58d26c8355" + } } }, "nbformat": 4, "nbformat_minor": 4 -} +} \ No newline at end of file diff --git a/OrefAlerts.py b/OrefAlerts.py new file mode 100644 index 0000000..307c89d --- /dev/null +++ b/OrefAlerts.py @@ -0,0 +1,100 @@ +import requests +import pandas as pd +from progressbar import progressbar +import json + +# define start and end dates - format: Day.Month.Year +from_date = "09.05.2021" +to_date = "20.05.2021" + +# Get data +alerts_url = f"https://www.oref.org.il//Shared/Ajax/GetAlarmsHistory.aspx?lang=he&fromDate={from_date}&toDate={to_date}&mode=0" +alerts_json = requests.get(alerts_url).json() + +# define gaza coordinates +gaza_coords = (31.513, 34.452) + +# Break multi-region alerts into separate records +df = pd.DataFrame.from_records(alerts_json) +df["data"] = df["data"].str.split(",") +df = df.explode("data") + +# Remove sub-regions such as א, ב, ג, ד +df = df[df["data"].str.len() > 2] + +# Change Hatzor to detailed name as the google geocoder fail to detect the correct city +df["data"] = df["data"].replace("חצור", "חצור אשדוד") + +total_cities = len(df["data"].unique()) +global failed_cities +failed_cities = 0 + +# Map city names to coordinates +def get_coordinates(city_name): + city_name = city_name + ", ישראל" + + # areas of cities that make geolocation fail - remove them + strings_to_remove = ["והפזורה", "מתחם", "אזור תעשייה"] + + for i in strings_to_remove: + city_name = city_name.replace(i, "") + + # to find cities with areas - תל אביב - מערב becomes תל אביב + city_name = city_name.split(" - ")[0] + + geocoder_url = f"https://nominatim.openstreetmap.org/search?q={city_name}&format=json&polygon_geojson=1&addressdetails=1" + geocoding_result = requests.get(geocoder_url).json() + + if not geocoding_result: + global failed_cities + failed_cities = failed_cities + 1 + return (None, None, None) + + lat = geocoding_result[0]["lat"] + long = geocoding_result[0]["lon"] + + for result in geocoding_result: + if "Polygon" in result["geojson"]["type"]: # try to find a polygon + poly = json.dumps(result["geojson"]) + return (lat, long, poly) + + # no polygon found - return the first geojson object + poly = json.dumps( + geocoding_result[0]["geojson"] + ) # no polygon found - take the first object + return (lat, long, poly) + + +city_to_coords = {} +# find cities using reverse geolocation +for city in progressbar(df["data"].unique(), redirect_stdout=True): + city_to_coords[city] = get_coordinates(city) + print(city, "\t -", city_to_coords[city][0], city_to_coords[city][1]) + +print( + f"Geocoding complete. Successfuly found {total_cities-failed_cities}/{total_cities}" +) + +# Apply mapping on all data +df["outLat"] = df["data"].apply(lambda x: city_to_coords[x][0]) +df["outLong"] = df["data"].apply(lambda x: city_to_coords[x][1]) +df["poly"] = df["data"].apply(lambda x: city_to_coords[x][2]) + +# Fixed Gaza coordinates +df["inLat"] = gaza_coords[0] +df["inLong"] = gaza_coords[1] + +# Filter wrong coordinates outside of Israel polygon (only if you use a bad geocoder) +""" +filtered_df = df[(df['outLong'] < 35.8) + & (df['outLong'] > 33.3) + & (df['outLat'] < 34.0) + & (df['outLat'] > 29.2)] + +filtered_df.to_csv('RocketLaunchData - Filtered.csv', encoding='utf-8-sig', index=False) +display(filtered_df) +""" + +df.to_csv("RocketLaunchData.csv", encoding="utf-8-sig", index=False) + +# display(df) \ No newline at end of file