Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replaced Google with OSM Nominatim. Added polygons to areas. Cleaned up city names to improve reverse geolocation. #1

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
310 changes: 84 additions & 226 deletions OrefAlerts.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,268 +2,121 @@
"cells": [
{
"cell_type": "code",
"execution_count": 108,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>data</th>\n",
" <th>date</th>\n",
" <th>time</th>\n",
" <th>datetime</th>\n",
" <th>outLat</th>\n",
" <th>outLong</th>\n",
" <th>inLat</th>\n",
" <th>inLong</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>נחל עוז</td>\n",
" <td>14.05.2021</td>\n",
" <td>21:15</td>\n",
" <td>2021-05-14T21:16:00</td>\n",
" <td>31.472680</td>\n",
" <td>34.497675</td>\n",
" <td>31.513</td>\n",
" <td>34.452</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>ניר עוז</td>\n",
" <td>14.05.2021</td>\n",
" <td>21:10</td>\n",
" <td>2021-05-14T21:11:00</td>\n",
" <td>31.309697</td>\n",
" <td>34.402075</td>\n",
" <td>31.513</td>\n",
" <td>34.452</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>ניצנים</td>\n",
" <td>14.05.2021</td>\n",
" <td>21:10</td>\n",
" <td>2021-05-14T21:10:00</td>\n",
" <td>31.717845</td>\n",
" <td>34.634636</td>\n",
" <td>31.513</td>\n",
" <td>34.452</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>אשקלון</td>\n",
" <td>14.05.2021</td>\n",
" <td>21:09</td>\n",
" <td>2021-05-14T21:10:00</td>\n",
" <td>31.668789</td>\n",
" <td>34.574252</td>\n",
" <td>31.513</td>\n",
" <td>34.452</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>אשדוד - ח</td>\n",
" <td>14.05.2021</td>\n",
" <td>21:09</td>\n",
" <td>2021-05-14T21:10:00</td>\n",
" <td>31.804381</td>\n",
" <td>34.655314</td>\n",
" <td>31.513</td>\n",
" <td>34.452</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4064</th>\n",
" <td>נתיב העשרה</td>\n",
" <td>09.05.2021</td>\n",
" <td>21:11</td>\n",
" <td>2021-05-09T21:11:00</td>\n",
" <td>31.572407</td>\n",
" <td>34.539322</td>\n",
" <td>31.513</td>\n",
" <td>34.452</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4065</th>\n",
" <td>כרמיה</td>\n",
" <td>09.05.2021</td>\n",
" <td>21:11</td>\n",
" <td>2021-05-09T21:11:00</td>\n",
" <td>31.602695</td>\n",
" <td>34.540413</td>\n",
" <td>31.513</td>\n",
" <td>34.452</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4066</th>\n",
" <td>יד מרדכי</td>\n",
" <td>09.05.2021</td>\n",
" <td>21:11</td>\n",
" <td>2021-05-09T21:11:00</td>\n",
" <td>31.586963</td>\n",
" <td>34.557500</td>\n",
" <td>31.513</td>\n",
" <td>34.452</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4067</th>\n",
" <td>אזור תעשייה הדרומי אשקלון</td>\n",
" <td>09.05.2021</td>\n",
" <td>21:11</td>\n",
" <td>2021-05-09T21:11:00</td>\n",
" <td>31.636089</td>\n",
" <td>34.554535</td>\n",
" <td>31.513</td>\n",
" <td>34.452</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4068</th>\n",
" <td>זיקים</td>\n",
" <td>09.05.2021</td>\n",
" <td>21:11</td>\n",
" <td>2021-05-09T21:11:00</td>\n",
" <td>31.607204</td>\n",
" <td>34.520968</td>\n",
" <td>31.513</td>\n",
" <td>34.452</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>4202 rows × 8 columns</p>\n",
"</div>"
],
"text/plain": [
" data date time datetime \\\n",
"0 נחל עוז 14.05.2021 21:15 2021-05-14T21:16:00 \n",
"1 ניר עוז 14.05.2021 21:10 2021-05-14T21:11:00 \n",
"2 ניצנים 14.05.2021 21:10 2021-05-14T21:10:00 \n",
"3 אשקלון 14.05.2021 21:09 2021-05-14T21:10:00 \n",
"4 אשדוד - ח 14.05.2021 21:09 2021-05-14T21:10:00 \n",
"... ... ... ... ... \n",
"4064 נתיב העשרה 09.05.2021 21:11 2021-05-09T21:11:00 \n",
"4065 כרמיה 09.05.2021 21:11 2021-05-09T21:11:00 \n",
"4066 יד מרדכי 09.05.2021 21:11 2021-05-09T21:11:00 \n",
"4067 אזור תעשייה הדרומי אשקלון 09.05.2021 21:11 2021-05-09T21:11:00 \n",
"4068 זיקים 09.05.2021 21:11 2021-05-09T21:11:00 \n",
"\n",
" outLat outLong inLat inLong \n",
"0 31.472680 34.497675 31.513 34.452 \n",
"1 31.309697 34.402075 31.513 34.452 \n",
"2 31.717845 34.634636 31.513 34.452 \n",
"3 31.668789 34.574252 31.513 34.452 \n",
"4 31.804381 34.655314 31.513 34.452 \n",
"... ... ... ... ... \n",
"4064 31.572407 34.539322 31.513 34.452 \n",
"4065 31.602695 34.540413 31.513 34.452 \n",
"4066 31.586963 34.557500 31.513 34.452 \n",
"4067 31.636089 34.554535 31.513 34.452 \n",
"4068 31.607204 34.520968 31.513 34.452 \n",
"\n",
"[4202 rows x 8 columns]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"execution_count": null,
"metadata": {
"tags": [
"outputPrepend"
]
},
"outputs": [],
"source": [
"import requests\n",
"import pandas as pd\n",
"from progressbar import progressbar\n",
"import json\n",
"\n",
"# define start and end dates - format: Day.Month.Year\n",
"from_date = \"09.05.2021\"\n",
"to_date = \"20.05.2021\"\n",
"\n",
"# Get data\n",
"alerts_url = 'https://www.oref.org.il//Shared/Ajax/GetAlarmsHistory.aspx?lang=he&fromDate=09.05.2021&toDate=20.05.2021&mode=0'\n",
"alerts_url = f\"https://www.oref.org.il//Shared/Ajax/GetAlarmsHistory.aspx?lang=he&fromDate={from_date}&toDate={to_date}&mode=0\"\n",
"alerts_json = requests.get(alerts_url).json()\n",
"\n",
"# define gaza coordinates\n",
"gaza_coords = (31.513, 34.452)\n",
"\n",
"# Break multi-region alerts into separate records\n",
"df = pd.DataFrame.from_records(alerts_json)\n",
"df['data'] = df['data'].str.split(',')\n",
"df = df.explode('data')\n",
"df[\"data\"] = df[\"data\"].str.split(\",\")\n",
"df = df.explode(\"data\")\n",
"\n",
"# Remove sub-regions such as א, ב, ג, ד\n",
"df = df[df['data'].str.len() > 2]\n",
"df = df[df[\"data\"].str.len() > 2]\n",
"\n",
"# Change Hatzor to detailed name as the google geocoder fail to detect the correct city\n",
"df['data'] = df['data'].replace('חצור', 'חצור אשדוד')\n",
"df[\"data\"] = df[\"data\"].replace(\"חצור\", \"חצור אשדוד\")\n",
"\n",
"total_cities = len(df[\"data\"].unique())\n",
"global failed_cities\n",
"failed_cities = 0\n",
"\n",
"# Map city names to coordinates\n",
"def get_coordinates(city_name):\n",
" city_name = city_name + ', ישראל'\n",
" api_key = 'YOUR_API_KEY'\n",
" geocoder_url = f'https://maps.googleapis.com/maps/api/geocode/json?address={city_name}&key={api_key}&language=iw'\n",
" city_name = city_name + \", ישראל\"\n",
"\n",
" # areas of cities that make geolocation fail - remove them\n",
" strings_to_remove = [\"והפזורה\", \"מתחם\", \"אזור תעשייה\"]\n",
"\n",
" for i in strings_to_remove:\n",
" city_name = city_name.replace(i, \"\")\n",
"\n",
" # to find cities with areas - תל אביב - מערב becomes תל אביב\n",
" city_name = city_name.split(\" - \")[0]\n",
"\n",
" geocoder_url = f\"https://nominatim.openstreetmap.org/search?q={city_name}&format=json&polygon_geojson=1&addressdetails=1\"\n",
" geocoding_result = requests.get(geocoder_url).json()\n",
" lat = geocoding_result['results'][0]['geometry']['location']['lat']\n",
" long = geocoding_result['results'][0]['geometry']['location']['lng']\n",
" return(lat, long)\n",
" \n",
"\n",
" if not geocoding_result:\n",
" global failed_cities\n",
" failed_cities = failed_cities + 1\n",
" return (None, None, None)\n",
"\n",
" lat = geocoding_result[0][\"lat\"]\n",
" long = geocoding_result[0][\"lon\"]\n",
"\n",
" for result in geocoding_result:\n",
" if \"Polygon\" in result[\"geojson\"][\"type\"]: # try to find a polygon\n",
" poly = json.dumps(result[\"geojson\"])\n",
" return (lat, long, poly)\n",
"\n",
" # no polygon found - return the first geojson object\n",
" poly = json.dumps(\n",
" geocoding_result[0][\"geojson\"]\n",
" ) # no polygon found - take the first object\n",
" return (lat, long, poly)\n",
"\n",
"\n",
"city_to_coords = {}\n",
"for city in df['data'].unique():\n",
" try:\n",
" city_to_coords[city] = get_coordinates(city)\n",
" print(city, '\\t -', city_to_coords[city])\n",
" except Exception as e:\n",
" city_to_coords[city] = (None, None)\n",
" print(city, '\\t', '- failed finding coordinates')\n",
"# find cities using reverse geolocation\n",
"for city in progressbar(df[\"data\"].unique(), redirect_stdout=True):\n",
" city_to_coords[city] = get_coordinates(city)\n",
" print(city, \"\\t -\", city_to_coords[city][0], city_to_coords[city][1])\n",
"\n",
"print(\n",
" f\"Geocoding complete. Successfuly found {total_cities-failed_cities}/{total_cities}\"\n",
")\n",
"\n",
"# Apply mapping on all data\n",
"df['outLat'] = df['data'].apply(lambda x: city_to_coords[x][0])\n",
"df['outLong'] = df['data'].apply(lambda x: city_to_coords[x][1])\n",
"df[\"outLat\"] = df[\"data\"].apply(lambda x: city_to_coords[x][0])\n",
"df[\"outLong\"] = df[\"data\"].apply(lambda x: city_to_coords[x][1])\n",
"df[\"poly\"] = df[\"data\"].apply(lambda x: city_to_coords[x][2])\n",
"\n",
"# Fixed Gaza coordinates\n",
"df['inLat'] = 31.513\n",
"df['inLong'] = 34.452\n",
"df[\"inLat\"] = gaza_coords[0]\n",
"df[\"inLong\"] = gaza_coords[1]\n",
"\n",
"# Filter wrong coordinates outside of Israel polygon (only if you use a bad geocoder)\n",
"'''\n",
"\"\"\"\n",
"filtered_df = df[(df['outLong'] < 35.8)\n",
" & (df['outLong'] > 33.3)\n",
" & (df['outLat'] < 34.0)\n",
" & (df['outLat'] > 29.2)]\n",
"\n",
"filtered_df.to_csv('RocketLaunchData - Filtered.csv', encoding='utf-8-sig', index=False)\n",
"display(filtered_df)\n",
"'''\n",
"\"\"\"\n",
"\n",
"df.to_csv(\"RocketLaunchData.csv\", encoding=\"utf-8-sig\", index=False)\n",
"\n",
"df.to_csv('RocketLaunchData.csv', encoding='utf-8-sig', index=False)\n",
"display(df)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
"name": "python383jvsc74a57bd0c617fa10d1ba9d6a36573c3c4f7496e3bb2fbcdd4aeff3a055aadb58d26c8355",
"display_name": "Python 3.8.3 32-bit"
},
"language_info": {
"codemirror_mode": {
Expand All @@ -275,9 +128,14 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
"version": "3.8.3"
},
"metadata": {
"interpreter": {
"hash": "c617fa10d1ba9d6a36573c3c4f7496e3bb2fbcdd4aeff3a055aadb58d26c8355"
}
}
},
"nbformat": 4,
"nbformat_minor": 4
}
}
Loading