diff --git a/data/notebooks/MPAtlas_table.ipynb b/data/notebooks/MPAtlas_table.ipynb
deleted file mode 100644
index 40a2613b..00000000
--- a/data/notebooks/MPAtlas_table.ipynb
+++ /dev/null
@@ -1,717 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Set up"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {},
- "outputs": [],
- "source": [
- "import geopandas as gpd\n",
- "import pandas as pd"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {},
- "outputs": [],
- "source": [
- "path_in = \"/Users/sofia/Documents/Repos/skytruth_30x30/data/raw\"\n",
- "path_out = \"/Users/sofia/Documents/Repos/skytruth_30x30/data/processed/mpatlas\""
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### MPAtlas"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Read data\n",
- "mpatlas = gpd.read_file(path_in + \"/MPAtlas_largest100.shp\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "Index(['OBJECTID', 'WDPAID', 'WDPA_PID', 'NAME', 'English_De', 'PARENT_ISO',\n",
- " 'ISO3', 'MPA_Marine', 'mpa_id', 'Zone_Marin', 'IUCN_Cat', 'Stage_of_E',\n",
- " 'Distant_MP', 'Level_of_P', 'Most_Impac', 'Descrip_Im', 'Vertically',\n",
- " 'SHAPE_Leng', 'SHAPE_Area', 'geometry'],\n",
- " dtype='object')"
- ]
- },
- "execution_count": 8,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "mpatlas.columns"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 12,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "array(['Incompatible', 'Highly', 'TBD', 'Fully', 'Lightly', 'Unknown'],\n",
- " dtype=object)"
- ]
- },
- "execution_count": 12,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "mpatlas.Level_of_P.unique()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " OBJECTID | \n",
- " WDPAID | \n",
- " WDPA_PID | \n",
- " NAME | \n",
- " English_De | \n",
- " PARENT_ISO | \n",
- " ISO3 | \n",
- " MPA_Marine | \n",
- " mpa_id | \n",
- " Zone_Marin | \n",
- " ... | \n",
- " Stage_of_E | \n",
- " Distant_MP | \n",
- " Level_of_P | \n",
- " Most_Impac | \n",
- " Descrip_Im | \n",
- " Vertically | \n",
- " SHAPE_Leng | \n",
- " SHAPE_Area | \n",
- " geometry | \n",
- " P_LEVEL | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 1.0 | \n",
- " 478053.0 | \n",
- " 478053 | \n",
- " Hikurangi Deep | \n",
- " Benthic Protection Area | \n",
- " NZL | \n",
- " NZL | \n",
- " 54022.1 | \n",
- " 5258 | \n",
- " 54022.1 | \n",
- " ... | \n",
- " Implemented | \n",
- " NaN | \n",
- " Incompatible | \n",
- " Mining, Fishing | \n",
- " Benthic protections only. Deep sea mining allo... | \n",
- " X | \n",
- " 12.332952 | \n",
- " 5.833001 | \n",
- " POLYGON ((-175.00000 -42.16661, -175.00000 -42... | \n",
- " Less Protected / Unknown | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 2.0 | \n",
- " 555512062.0 | \n",
- " 555512062 | \n",
- " Kermadec | \n",
- " Benthic Protection Area | \n",
- " NZL | \n",
- " NZL | \n",
- " 619146.0 | \n",
- " 5428 | \n",
- " 458540.5 | \n",
- " ... | \n",
- " Implemented | \n",
- " NaN | \n",
- " Incompatible | \n",
- " Mining, Fishing | \n",
- " Benthic protections only. Deep sea mining allo... | \n",
- " NaN | \n",
- " 25.629352 | \n",
- " 42.963159 | \n",
- " POLYGON ((-174.02370 -29.22191, -174.02370 -29... | \n",
- " Less Protected / Unknown | \n",
- "
\n",
- " \n",
- "
\n",
- "
2 rows × 21 columns
\n",
- "
"
- ],
- "text/plain": [
- " OBJECTID WDPAID WDPA_PID NAME English_De \\\n",
- "0 1.0 478053.0 478053 Hikurangi Deep Benthic Protection Area \n",
- "1 2.0 555512062.0 555512062 Kermadec Benthic Protection Area \n",
- "\n",
- " PARENT_ISO ISO3 MPA_Marine mpa_id Zone_Marin ... Stage_of_E Distant_MP \\\n",
- "0 NZL NZL 54022.1 5258 54022.1 ... Implemented NaN \n",
- "1 NZL NZL 619146.0 5428 458540.5 ... Implemented NaN \n",
- "\n",
- " Level_of_P Most_Impac \\\n",
- "0 Incompatible Mining, Fishing \n",
- "1 Incompatible Mining, Fishing \n",
- "\n",
- " Descrip_Im Vertically SHAPE_Leng \\\n",
- "0 Benthic protections only. Deep sea mining allo... X 12.332952 \n",
- "1 Benthic protections only. Deep sea mining allo... NaN 25.629352 \n",
- "\n",
- " SHAPE_Area geometry \\\n",
- "0 5.833001 POLYGON ((-175.00000 -42.16661, -175.00000 -42... \n",
- "1 42.963159 POLYGON ((-174.02370 -29.22191, -174.02370 -29... \n",
- "\n",
- " P_LEVEL \n",
- "0 Less Protected / Unknown \n",
- "1 Less Protected / Unknown \n",
- "\n",
- "[2 rows x 21 columns]"
- ]
- },
- "execution_count": 8,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# Create new column with protection level reclassified\n",
- "def map_protection_level(value):\n",
- " if value in [\"Fully\", \"Highly\"]:\n",
- " return \"Fully / Highly Protected\"\n",
- " else:\n",
- " return \"Less Protected / Unknown\"\n",
- "\n",
- "# Create a new column based on column1\n",
- "mpatlas['P_LEVEL'] = mpatlas['Level_of_P'].apply(map_protection_level)\n",
- "mpatlas.head(2)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 9,
- "metadata": {},
- "outputs": [],
- "source": [
- "# List of dictionaries for data in Region_ISO3_PP.txt (list of regions used in the Protected Planet database)\n",
- "regions_data = [\n",
- " {\n",
- " 'region_iso': 'AS',\n",
- " 'region_name': 'Asia & Pacific',\n",
- " 'country_iso_3s': [\n",
- " \"AFG\", \"ASM\", \"AUS\", \"BGD\", \"BRN\", \"BTN\", \"CCK\", \"CHN\", \"COK\", \"CXR\", \"FJI\", \"FSM\", \"GUM\", \"HKG\", \"IDN\",\n",
- " \"IND\", \"IOT\", \"IRN\", \"JPN\", \"KHM\", \"KIR\", \"KOR\", \"LAO\", \"LKA\", \"MAC\", \"MDV\", \"MHL\", \"MMR\", \"MNG\", \"MNP\",\n",
- " \"MYS\", \"NCL\", \"NFK\", \"NIU\", \"NPL\", \"NRU\", \"NZL\", \"PAK\", \"PCN\", \"PHL\", \"PLW\", \"PNG\", \"PRK\", \"PYF\", \"SGP\",\n",
- " \"SLB\", \"THA\", \"TKL\", \"TLS\", \"TON\", \"TUV\", \"TWN\", \"VNM\", \"VUT\", \"WLF\", \"WSM\"\n",
- " ]\n",
- " },\n",
- " {\n",
- " 'region_iso': 'AF',\n",
- " 'region_name': 'Africa',\n",
- " 'country_iso_3s': [\n",
- " \"AGO\", \"BDI\", \"BEN\", \"BFA\", \"BWA\", \"CAF\", \"CIV\", \"CMR\", \"COD\", \"COG\", \"COM\", \"CPV\", \"DJI\", \"DZA\", \"EGY\",\n",
- " \"ERI\", \"ESH\", \"ETH\", \"GAB\", \"GHA\", \"GIN\", \"GMB\", \"GNB\", \"GNQ\", \"KEN\", \"LBR\", \"LBY\", \"LSO\", \"MAR\", \"MDG\",\n",
- " \"MLI\", \"MOZ\", \"MRT\", \"MUS\", \"MWI\", \"MYT\", \"NAM\", \"NER\", \"NGA\", \"REU\", \"RWA\", \"SDN\", \"SEN\", \"SHN\", \"SLE\",\n",
- " \"SOM\", \"SSD\", \"STP\", \"SWZ\", \"SYC\", \"TCD\", \"TGO\", \"TUN\", \"TZA\", \"UGA\", \"ZAF\", \"ZMB\", \"ZWE\"\n",
- " ]\n",
- " },\n",
- " {\n",
- " 'region_iso': 'EU',\n",
- " 'region_name': 'Europe',\n",
- " 'country_iso_3s': [\n",
- " \"ALA\", \"ALB\", \"AND\", \"ARM\", \"AUT\", \"AZE\", \"BEL\", \"BGR\", \"BIH\", \"BLR\", \"CHE\", \"CYP\", \"CZE\", \"DEU\", \"DNK\",\n",
- " \"ESP\", \"EST\", \"FIN\", \"FRA\", \"FRO\", \"GBR\", \"GEO\", \"GGY\", \"GIB\", \"GRC\", \"HRV\", \"HUN\", \"IMN\", \"IRL\", \"ISL\",\n",
- " \"ISR\", \"ITA\", \"JEY\", \"KAZ\", \"KGZ\", \"LIE\", \"LTU\", \"LUX\", \"LVA\", \"MCO\", \"MDA\", \"MKD\", \"MLT\", \"MNE\", \"NLD\",\n",
- " \"NOR\", \"POL\", \"PRT\", \"ROU\", \"RUS\", \"SJM\", \"SMR\", \"SRB\", \"SVK\", \"SVN\", \"SWE\", \"TJK\", \"TKM\", \"TUR\", \"UKR\",\n",
- " \"UZB\", \"VAT\"\n",
- " ]\n",
- " },\n",
- " {\n",
- " 'region_iso': 'SA',\n",
- " 'region_name': 'Latin America & Caribbean',\n",
- " 'country_iso_3s': [\n",
- " \"ABW\", \"AIA\", \"ARG\", \"ATG\", \"BES\", \"BHS\", \"BLM\", \"BLZ\", \"BMU\", \"BOL\", \"BRA\", \"BRB\", \"CHL\", \"COL\", \"CRI\",\n",
- " \"CUB\", \"CUW\", \"CYM\", \"DMA\", \"DOM\", \"ECU\", \"FLK\", \"GLP\", \"GRD\", \"GTM\", \"GUF\", \"GUY\", \"HND\", \"HTI\", \"JAM\",\n",
- " \"KNA\", \"LCA\", \"MAF\", \"MEX\", \"MSR\", \"MTQ\", \"NIC\", \"PAN\", \"PER\", \"PRI\", \"PRY\", \"SLV\", \"SUR\", \"SXM\", \"TCA\",\n",
- " \"TTO\", \"UMI\", \"URY\", \"VCT\", \"VEN\", \"VGB\", \"VIR\"\n",
- " ]\n",
- " },\n",
- " {\n",
- " 'region_iso': 'PO',\n",
- " 'region_name': 'Polar',\n",
- " 'country_iso_3s': [\n",
- " \"ATF\", \"BVT\", \"GRL\", \"HMD\", \"SGS\"\n",
- " ]\n",
- " },\n",
- " {\n",
- " 'region_iso': 'NA',\n",
- " 'region_name': 'North America',\n",
- " 'country_iso_3s': [\n",
- " \"CAN\", \"SPM\", \"USA\"\n",
- " ]\n",
- " },\n",
- " {\n",
- " 'region_iso': 'GL',\n",
- " 'region_name': 'Global',\n",
- " 'country_iso_3s': []\n",
- " },\n",
- " {\n",
- " 'region_iso': 'WA',\n",
- " 'region_name': 'West Asia',\n",
- " 'country_iso_3s': [\n",
- " \"ARE\", \"BHR\", \"IRQ\", \"JOR\", \"KWT\", \"LBN\", \"OMN\", \"PSE\", \"QAT\", \"SAU\", \"SYR\", \"YEM\"\n",
- " ]\n",
- " }\n",
- "]\n",
- "\n",
- "# Convert the region data to a dictionary that maps each country to its region name\n",
- "country_to_region = {}\n",
- "for region in regions_data:\n",
- " for country in region['country_iso_3s']:\n",
- " country_to_region[country] = region['region_name']"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 10,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " OBJECTID | \n",
- " WDPAID | \n",
- " WDPA_PID | \n",
- " NAME | \n",
- " English_De | \n",
- " PARENT_ISO | \n",
- " ISO3 | \n",
- " MPA_Marine | \n",
- " mpa_id | \n",
- " Zone_Marin | \n",
- " ... | \n",
- " Distant_MP | \n",
- " Level_of_P | \n",
- " Most_Impac | \n",
- " Descrip_Im | \n",
- " Vertically | \n",
- " SHAPE_Leng | \n",
- " SHAPE_Area | \n",
- " geometry | \n",
- " P_LEVEL | \n",
- " REGIONS | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 1.0 | \n",
- " 478053.0 | \n",
- " 478053 | \n",
- " Hikurangi Deep | \n",
- " Benthic Protection Area | \n",
- " NZL | \n",
- " NZL | \n",
- " 54022.1 | \n",
- " 5258 | \n",
- " 54022.1 | \n",
- " ... | \n",
- " NaN | \n",
- " Incompatible | \n",
- " Mining, Fishing | \n",
- " Benthic protections only. Deep sea mining allo... | \n",
- " X | \n",
- " 12.332952 | \n",
- " 5.833001 | \n",
- " POLYGON ((-175.00000 -42.16661, -175.00000 -42... | \n",
- " Less Protected / Unknown | \n",
- " Asia & Pacific | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 2.0 | \n",
- " 555512062.0 | \n",
- " 555512062 | \n",
- " Kermadec | \n",
- " Benthic Protection Area | \n",
- " NZL | \n",
- " NZL | \n",
- " 619146.0 | \n",
- " 5428 | \n",
- " 458540.5 | \n",
- " ... | \n",
- " NaN | \n",
- " Incompatible | \n",
- " Mining, Fishing | \n",
- " Benthic protections only. Deep sea mining allo... | \n",
- " NaN | \n",
- " 25.629352 | \n",
- " 42.963159 | \n",
- " POLYGON ((-174.02370 -29.22191, -174.02370 -29... | \n",
- " Less Protected / Unknown | \n",
- " Asia & Pacific | \n",
- "
\n",
- " \n",
- "
\n",
- "
2 rows × 22 columns
\n",
- "
"
- ],
- "text/plain": [
- " OBJECTID WDPAID WDPA_PID NAME English_De \\\n",
- "0 1.0 478053.0 478053 Hikurangi Deep Benthic Protection Area \n",
- "1 2.0 555512062.0 555512062 Kermadec Benthic Protection Area \n",
- "\n",
- " PARENT_ISO ISO3 MPA_Marine mpa_id Zone_Marin ... Distant_MP \\\n",
- "0 NZL NZL 54022.1 5258 54022.1 ... NaN \n",
- "1 NZL NZL 619146.0 5428 458540.5 ... NaN \n",
- "\n",
- " Level_of_P Most_Impac \\\n",
- "0 Incompatible Mining, Fishing \n",
- "1 Incompatible Mining, Fishing \n",
- "\n",
- " Descrip_Im Vertically SHAPE_Leng \\\n",
- "0 Benthic protections only. Deep sea mining allo... X 12.332952 \n",
- "1 Benthic protections only. Deep sea mining allo... NaN 25.629352 \n",
- "\n",
- " SHAPE_Area geometry \\\n",
- "0 5.833001 POLYGON ((-175.00000 -42.16661, -175.00000 -42... \n",
- "1 42.963159 POLYGON ((-174.02370 -29.22191, -174.02370 -29... \n",
- "\n",
- " P_LEVEL REGIONS \n",
- "0 Less Protected / Unknown Asia & Pacific \n",
- "1 Less Protected / Unknown Asia & Pacific \n",
- "\n",
- "[2 rows x 22 columns]"
- ]
- },
- "execution_count": 10,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "mpatlas['REGIONS'] = mpatlas['ISO3'].map(country_to_region)\n",
- "mpatlas.head(2)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 11,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " WDPAID | \n",
- " WDPA_PID | \n",
- " NAME | \n",
- " AREA_MPATLAS | \n",
- " DESIG_ENG | \n",
- " ESTABLISHMENT | \n",
- " IMPACT | \n",
- " P_LEVEL | \n",
- " PARENT_ISO | \n",
- " ISO3 | \n",
- " REGIONS | \n",
- " geometry | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 478053.0 | \n",
- " 478053 | \n",
- " Hikurangi Deep | \n",
- " 54022.1 | \n",
- " Benthic Protection Area | \n",
- " Implemented | \n",
- " Mining, Fishing | \n",
- " Less Protected / Unknown | \n",
- " NZL | \n",
- " NZL | \n",
- " Asia & Pacific | \n",
- " POLYGON ((-175.00000 -42.16661, -175.00000 -42... | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 555512062.0 | \n",
- " 555512062 | \n",
- " Kermadec | \n",
- " 458540.5 | \n",
- " Benthic Protection Area | \n",
- " Implemented | \n",
- " Mining, Fishing | \n",
- " Less Protected / Unknown | \n",
- " NZL | \n",
- " NZL | \n",
- " Asia & Pacific | \n",
- " POLYGON ((-174.02370 -29.22191, -174.02370 -29... | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " WDPAID WDPA_PID NAME AREA_MPATLAS \\\n",
- "0 478053.0 478053 Hikurangi Deep 54022.1 \n",
- "1 555512062.0 555512062 Kermadec 458540.5 \n",
- "\n",
- " DESIG_ENG ESTABLISHMENT IMPACT \\\n",
- "0 Benthic Protection Area Implemented Mining, Fishing \n",
- "1 Benthic Protection Area Implemented Mining, Fishing \n",
- "\n",
- " P_LEVEL PARENT_ISO ISO3 REGIONS \\\n",
- "0 Less Protected / Unknown NZL NZL Asia & Pacific \n",
- "1 Less Protected / Unknown NZL NZL Asia & Pacific \n",
- "\n",
- " geometry \n",
- "0 POLYGON ((-175.00000 -42.16661, -175.00000 -42... \n",
- "1 POLYGON ((-174.02370 -29.22191, -174.02370 -29... "
- ]
- },
- "execution_count": 11,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# Rename columns and keep only relevant ones. \n",
- "# Note: We keep \"Zone_Marine\" (area of the geometry), instead of \"MPA_Marine\" (as MPAs can be divided in smaller pieces according to their protection levels)\n",
- "\n",
- "mpatlas = mpatlas.rename(columns={'English_De': 'DESIG_ENG', 'Zone_Marin': 'AREA_MPATLAS', 'Stage_of_E': 'ESTABLISHMENT', 'Most_Impac': 'IMPACT' }) \n",
- "mpatlas2 = mpatlas[['WDPAID', 'WDPA_PID', 'NAME', 'AREA_MPATLAS', 'DESIG_ENG', 'ESTABLISHMENT', 'IMPACT', 'P_LEVEL', 'PARENT_ISO', 'ISO3','REGIONS', 'geometry']]\n",
- "mpatlas2.head(2)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 12,
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/var/folders/98/0pdnjc5s29x2pnzl293pw7hr0000gn/T/ipykernel_25742/67511564.py:1: UserWarning: Column names longer than 10 characters will be truncated when saved to ESRI Shapefile.\n",
- " mpatlas2.to_file(path_out + \"/mpatlas_table.shp\")\n"
- ]
- }
- ],
- "source": [
- "mpatlas2.to_file(path_out + \"/mpatlas_table.shp\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 13,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " WDPAID | \n",
- " WDPA_PID | \n",
- " NAME | \n",
- " AREA_MPATL | \n",
- " DESIG_ENG | \n",
- " ESTABLISHM | \n",
- " IMPACT | \n",
- " P_LEVEL | \n",
- " PARENT_ISO | \n",
- " ISO3 | \n",
- " REGIONS | \n",
- " geometry | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 478053.0 | \n",
- " 478053 | \n",
- " Hikurangi Deep | \n",
- " 54022.1 | \n",
- " Benthic Protection Area | \n",
- " Implemented | \n",
- " Mining, Fishing | \n",
- " Less Protected / Unknown | \n",
- " NZL | \n",
- " NZL | \n",
- " Asia & Pacific | \n",
- " POLYGON ((-175.00000 -42.16661, -175.00000 -42... | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 555512062.0 | \n",
- " 555512062 | \n",
- " Kermadec | \n",
- " 458540.5 | \n",
- " Benthic Protection Area | \n",
- " Implemented | \n",
- " Mining, Fishing | \n",
- " Less Protected / Unknown | \n",
- " NZL | \n",
- " NZL | \n",
- " Asia & Pacific | \n",
- " POLYGON ((-174.02370 -29.22191, -174.02370 -29... | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " WDPAID WDPA_PID NAME AREA_MPATL \\\n",
- "0 478053.0 478053 Hikurangi Deep 54022.1 \n",
- "1 555512062.0 555512062 Kermadec 458540.5 \n",
- "\n",
- " DESIG_ENG ESTABLISHM IMPACT \\\n",
- "0 Benthic Protection Area Implemented Mining, Fishing \n",
- "1 Benthic Protection Area Implemented Mining, Fishing \n",
- "\n",
- " P_LEVEL PARENT_ISO ISO3 REGIONS \\\n",
- "0 Less Protected / Unknown NZL NZL Asia & Pacific \n",
- "1 Less Protected / Unknown NZL NZL Asia & Pacific \n",
- "\n",
- " geometry \n",
- "0 POLYGON ((-175.00000 -42.16661, -175.00000 -42... \n",
- "1 POLYGON ((-174.02370 -29.22191, -174.02370 -29... "
- ]
- },
- "execution_count": 13,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "mpatlas = gpd.read_file(path_out + \"/mpatlas_table.shp\")\n",
- "mpatlas.head(2)"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "skytruth",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.11.4"
- },
- "orig_nbformat": 4
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/data/notebooks/habitats.ipynb b/data/notebooks/habitats.ipynb
new file mode 100644
index 00000000..1185d367
--- /dev/null
+++ b/data/notebooks/habitats.ipynb
@@ -0,0 +1,2003 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Set up"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Collecting openpyxl\n",
+ " Downloading openpyxl-3.1.2-py2.py3-none-any.whl (249 kB)\n",
+ "\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m250.0/250.0 kB\u001b[0m \u001b[31m1.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m[36m0:00:01\u001b[0m[36m0:00:01\u001b[0m:01\u001b[0m\n",
+ "\u001b[?25hCollecting et-xmlfile (from openpyxl)\n",
+ " Downloading et_xmlfile-1.1.0-py3-none-any.whl (4.7 kB)\n",
+ "Installing collected packages: et-xmlfile, openpyxl\n",
+ "Successfully installed et-xmlfile-1.1.0 openpyxl-3.1.2\n"
+ ]
+ }
+ ],
+ "source": [
+ "!pip install openpyxl"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import geopandas as gpd\n",
+ "import pandas as pd\n",
+ "import openpyxl\n",
+ "from datetime import datetime"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "path_in = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/raw/\"\n",
+ "path_out = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/processed/\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Process habitats from [Ocean+](https://habitats.oceanplus.org/) (except mangroves)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "cold = pd.read_csv(path_in + \"Ocean+HabitatsDownload_Global/coldwatercorals.csv\")\n",
+ "salt = pd.read_csv(path_in + \"Ocean+HabitatsDownload_Global/saltmarshes.csv\")\n",
+ "sea = pd.read_csv(path_in + \"Ocean+HabitatsDownload_Global/seagrasses.csv\")\n",
+ "warm = pd.read_csv(path_in + \"Ocean+HabitatsDownload_Global/warmwatercorals.csv\")\n",
+ "glob = pd.read_excel(path_in + \"Ocean+HabitatsDownload_Global/global-stats.xlsx\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "cold2 = cold[['ISO3', 'protected_area', 'total_area']].rename(columns={'ISO3': 'location_id'})\n",
+ "salt2 = salt[['ISO3', 'protected_area', 'total_area']].rename(columns={'ISO3': 'location_id'})\n",
+ "sea2 = sea[['ISO3', 'protected_area', 'total_area']].rename(columns={'ISO3': 'location_id'})\n",
+ "warm2 = warm[['ISO3', 'protected_area', 'total_area']].rename(columns={'ISO3': 'location_id'})"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Remove rows with '-' in 'protected_area' or 'total_area'\n",
+ "cold2 = cold2[~cold2['protected_area'].str.contains('-') & ~cold2['total_area'].str.contains('-')]\n",
+ "salt2 = salt2[~salt2['protected_area'].str.contains('-') & ~salt2['total_area'].str.contains('-')]\n",
+ "sea2 = sea2[~sea2['protected_area'].str.contains('-') & ~sea2['total_area'].str.contains('-')]\n",
+ "warm2 = warm2[~warm2['protected_area'].str.contains('-') & ~warm2['total_area'].str.contains('-')]\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Bring the wdpa file to get the iso3 and parent_iso equivalences\n",
+ "wdpa = gpd.read_file(path_out + \"wdpa/merged_mpa.shp\")\n",
+ "\n",
+ "# Filter out rows with multiple values in either 'ISO3' or 'PARENT_ISO'\n",
+ "wdpa = wdpa[~wdpa['ISO3'].str.contains(';') & ~wdpa['PARENT_ISO'].str.contains(';')]\n",
+ "\n",
+ "# Extract unique ISO3-PARENT_ISO pairs\n",
+ "unique_pairs = wdpa[['ISO3', 'PARENT_ISO']].drop_duplicates()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Create a mapping dictionary for ISO3-PARENT_ISO pairs and modify the 'location_id' column in the habitats dataframes\n",
+ "mapping_dict = dict(zip(unique_pairs['ISO3'], unique_pairs['PARENT_ISO']))\n",
+ "cold2['location_id'] = cold2['location_id'].map(mapping_dict)\n",
+ "salt2['location_id'] = salt2['location_id'].map(mapping_dict)\n",
+ "sea2['location_id'] = sea2['location_id'].map(mapping_dict)\n",
+ "warm2['location_id'] = warm2['location_id'].map(mapping_dict)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Group by 'location_id' and calculate the sum of 'protected_area' and 'total_area'\n",
+ "cold2_grouped = cold2.groupby('location_id').sum().reset_index()\n",
+ "salt2_grouped = salt2.groupby('location_id').sum().reset_index()\n",
+ "sea2_grouped = sea2.groupby('location_id').sum().reset_index()\n",
+ "warm2_grouped = warm2.groupby('location_id').sum().reset_index()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Add the 'habitat_name' column\n",
+ "cold2_grouped['habitat_name'] = 'cold-water corals'\n",
+ "salt2_grouped['habitat_name'] = 'saltmarshes'\n",
+ "sea2_grouped['habitat_name'] = 'seagrasses'\n",
+ "warm2_grouped['habitat_name'] = 'warm-water corals'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " location_id | \n",
+ " protected_area | \n",
+ " total_area | \n",
+ " habitat_name | \n",
+ " year | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " ABNJ | \n",
+ " 421.629372679904 | \n",
+ " 1874.98221422617 | \n",
+ " cold-water corals | \n",
+ " 2023 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " AGO | \n",
+ " 0 | \n",
+ " 3.39567053773998 | \n",
+ " cold-water corals | \n",
+ " 2023 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " location_id protected_area total_area habitat_name year\n",
+ "0 ABNJ 421.629372679904 1874.98221422617 cold-water corals 2023\n",
+ "1 AGO 0 3.39567053773998 cold-water corals 2023"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Concatenate the dataframes\n",
+ "habitats = pd.concat([cold2_grouped, salt2_grouped, sea2_grouped, warm2_grouped])\n",
+ "habitats['year'] = datetime.now().year\n",
+ "habitats.head(2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " habitat_name | \n",
+ " protected_area | \n",
+ " total_area | \n",
+ " location_id | \n",
+ " year | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " saltmarsh | \n",
+ " 111638.252564 | \n",
+ " 224435.075094 | \n",
+ " GLOB | \n",
+ " 2023 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " seagrass | \n",
+ " 74787.449960 | \n",
+ " 314001.940600 | \n",
+ " GLOB | \n",
+ " 2023 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " warmwater-corals | \n",
+ " 63259.499130 | \n",
+ " 149886.974126 | \n",
+ " GLOB | \n",
+ " 2023 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " coldwater-corals | \n",
+ " 4400.140842 | \n",
+ " 15336.975280 | \n",
+ " GLOB | \n",
+ " 2023 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " habitat_name protected_area total_area location_id year\n",
+ "0 saltmarsh 111638.252564 224435.075094 GLOB 2023\n",
+ "1 seagrass 74787.449960 314001.940600 GLOB 2023\n",
+ "2 warmwater-corals 63259.499130 149886.974126 GLOB 2023\n",
+ "4 coldwater-corals 4400.140842 15336.975280 GLOB 2023"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Calculate global stats for habitats\n",
+ "habitats_global = glob[['habitat','protected_area', 'total_area']].rename(columns={'habitat': 'habitat_name'})\n",
+ "habitats_global['location_id'] = 'GLOB'\n",
+ "habitats_global['year'] = datetime.now().year\n",
+ "habitats_global = habitats_global[habitats_global['habitat_name'] != 'mangroves'] # remove mangroves\n",
+ "habitats_global"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " habitat_name | \n",
+ " protected_area | \n",
+ " total_area | \n",
+ " location_id | \n",
+ " year | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " saltmarshes | \n",
+ " 111638.252564 | \n",
+ " 224435.075094 | \n",
+ " GLOB | \n",
+ " 2023 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " seagrasses | \n",
+ " 74787.449960 | \n",
+ " 314001.940600 | \n",
+ " GLOB | \n",
+ " 2023 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " warm-water corals | \n",
+ " 63259.499130 | \n",
+ " 149886.974126 | \n",
+ " GLOB | \n",
+ " 2023 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " cold-water corals | \n",
+ " 4400.140842 | \n",
+ " 15336.975280 | \n",
+ " GLOB | \n",
+ " 2023 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " habitat_name protected_area total_area location_id year\n",
+ "0 saltmarshes 111638.252564 224435.075094 GLOB 2023\n",
+ "1 seagrasses 74787.449960 314001.940600 GLOB 2023\n",
+ "2 warm-water corals 63259.499130 149886.974126 GLOB 2023\n",
+ "4 cold-water corals 4400.140842 15336.975280 GLOB 2023"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Change the name of the habitats to match the ones in the habitats dataframe\n",
+ "habitat_name_mapping = {\n",
+ " 'saltmarsh': 'saltmarshes',\n",
+ " 'seagrass': 'seagrasses',\n",
+ " 'warmwater-corals': 'warm-water corals',\n",
+ " 'coldwater-corals': 'cold-water corals'\n",
+ "}\n",
+ "habitats_global['habitat_name'] = habitats_global['habitat_name'].replace(habitat_name_mapping)\n",
+ "habitats_global"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array(['cold-water corals', 'saltmarshes', 'seagrasses',\n",
+ " 'warm-water corals'], dtype=object)"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Concatenate the global stats to the habitats dataframe\n",
+ "habitats = pd.concat([habitats, habitats_global])\n",
+ "habitats['habitat_name'].unique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# List of dictionaries for data in Region_ISO3_PP.txt (list of regions used in the Protected Planet database)\n",
+ "regions_data = [\n",
+ " {\n",
+ " 'region_iso': 'AS',\n",
+ " 'region_name': 'Asia & Pacific',\n",
+ " 'country_iso_3s': [\n",
+ " \"AFG\", \"ASM\", \"AUS\", \"BGD\", \"BRN\", \"BTN\", \"CCK\", \"CHN\", \"COK\", \"CXR\", \"FJI\", \"FSM\", \"GUM\", \"HKG\", \"IDN\",\n",
+ " \"IND\", \"IOT\", \"IRN\", \"JPN\", \"KHM\", \"KIR\", \"KOR\", \"LAO\", \"LKA\", \"MAC\", \"MDV\", \"MHL\", \"MMR\", \"MNG\", \"MNP\",\n",
+ " \"MYS\", \"NCL\", \"NFK\", \"NIU\", \"NPL\", \"NRU\", \"NZL\", \"PAK\", \"PCN\", \"PHL\", \"PLW\", \"PNG\", \"PRK\", \"PYF\", \"SGP\",\n",
+ " \"SLB\", \"THA\", \"TKL\", \"TLS\", \"TON\", \"TUV\", \"TWN\", \"VNM\", \"VUT\", \"WLF\", \"WSM\"\n",
+ " ]\n",
+ " },\n",
+ " {\n",
+ " 'region_iso': 'AF',\n",
+ " 'region_name': 'Africa',\n",
+ " 'country_iso_3s': [\n",
+ " \"AGO\", \"BDI\", \"BEN\", \"BFA\", \"BWA\", \"CAF\", \"CIV\", \"CMR\", \"COD\", \"COG\", \"COM\", \"CPV\", \"DJI\", \"DZA\", \"EGY\",\n",
+ " \"ERI\", \"ESH\", \"ETH\", \"GAB\", \"GHA\", \"GIN\", \"GMB\", \"GNB\", \"GNQ\", \"KEN\", \"LBR\", \"LBY\", \"LSO\", \"MAR\", \"MDG\",\n",
+ " \"MLI\", \"MOZ\", \"MRT\", \"MUS\", \"MWI\", \"MYT\", \"NAM\", \"NER\", \"NGA\", \"REU\", \"RWA\", \"SDN\", \"SEN\", \"SHN\", \"SLE\",\n",
+ " \"SOM\", \"SSD\", \"STP\", \"SWZ\", \"SYC\", \"TCD\", \"TGO\", \"TUN\", \"TZA\", \"UGA\", \"ZAF\", \"ZMB\", \"ZWE\"\n",
+ " ]\n",
+ " },\n",
+ " {\n",
+ " 'region_iso': 'EU',\n",
+ " 'region_name': 'Europe',\n",
+ " 'country_iso_3s': [\n",
+ " \"ALA\", \"ALB\", \"AND\", \"ARM\", \"AUT\", \"AZE\", \"BEL\", \"BGR\", \"BIH\", \"BLR\", \"CHE\", \"CYP\", \"CZE\", \"DEU\", \"DNK\",\n",
+ " \"ESP\", \"EST\", \"FIN\", \"FRA\", \"FRO\", \"GBR\", \"GEO\", \"GGY\", \"GIB\", \"GRC\", \"HRV\", \"HUN\", \"IMN\", \"IRL\", \"ISL\",\n",
+ " \"ISR\", \"ITA\", \"JEY\", \"KAZ\", \"KGZ\", \"LIE\", \"LTU\", \"LUX\", \"LVA\", \"MCO\", \"MDA\", \"MKD\", \"MLT\", \"MNE\", \"NLD\",\n",
+ " \"NOR\", \"POL\", \"PRT\", \"ROU\", \"RUS\", \"SJM\", \"SMR\", \"SRB\", \"SVK\", \"SVN\", \"SWE\", \"TJK\", \"TKM\", \"TUR\", \"UKR\",\n",
+ " \"UZB\", \"VAT\"\n",
+ " ]\n",
+ " },\n",
+ " {\n",
+ " 'region_iso': 'SA',\n",
+ " 'region_name': 'Latin America & Caribbean',\n",
+ " 'country_iso_3s': [\n",
+ " \"ABW\", \"AIA\", \"ARG\", \"ATG\", \"BES\", \"BHS\", \"BLM\", \"BLZ\", \"BMU\", \"BOL\", \"BRA\", \"BRB\", \"CHL\", \"COL\", \"CRI\",\n",
+ " \"CUB\", \"CUW\", \"CYM\", \"DMA\", \"DOM\", \"ECU\", \"FLK\", \"GLP\", \"GRD\", \"GTM\", \"GUF\", \"GUY\", \"HND\", \"HTI\", \"JAM\",\n",
+ " \"KNA\", \"LCA\", \"MAF\", \"MEX\", \"MSR\", \"MTQ\", \"NIC\", \"PAN\", \"PER\", \"PRI\", \"PRY\", \"SLV\", \"SUR\", \"SXM\", \"TCA\",\n",
+ " \"TTO\", \"UMI\", \"URY\", \"VCT\", \"VEN\", \"VGB\", \"VIR\"\n",
+ " ]\n",
+ " },\n",
+ " {\n",
+ " 'region_iso': 'PO',\n",
+ " 'region_name': 'Polar',\n",
+ " 'country_iso_3s': [\n",
+ " \"ATF\", \"BVT\", \"GRL\", \"HMD\", \"SGS\"\n",
+ " ]\n",
+ " },\n",
+ " {\n",
+ " 'region_iso': 'NA',\n",
+ " 'region_name': 'North America',\n",
+ " 'country_iso_3s': [\n",
+ " \"CAN\", \"SPM\", \"USA\"\n",
+ " ]\n",
+ " },\n",
+ " \n",
+ " {\n",
+ " 'region_iso': 'WA',\n",
+ " 'region_name': 'West Asia',\n",
+ " 'country_iso_3s': [\n",
+ " \"ARE\", \"BHR\", \"IRQ\", \"JOR\", \"KWT\", \"LBN\", \"OMN\", \"PSE\", \"QAT\", \"SAU\", \"SYR\", \"YEM\"\n",
+ " ]\n",
+ " },\n",
+ " {\n",
+ " 'region_iso': 'AT', # this region is not in the Protected Planet database\n",
+ " 'region_name': 'Antartica',\n",
+ " 'country_iso_3s': [\n",
+ " \"ATA\"\n",
+ " ]\n",
+ " }\n",
+ "]\n",
+ "\n",
+ "# Convert the region data to a dictionary that maps each country to its region name\n",
+ "country_to_region = {}\n",
+ "for region in regions_data:\n",
+ " for country in region['country_iso_3s']:\n",
+ " country_to_region[country] = region['region_iso']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " location_id | \n",
+ " habitat_name | \n",
+ " protected_area | \n",
+ " total_area | \n",
+ " year | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " AF | \n",
+ " cold-water corals | \n",
+ " 37.761626 | \n",
+ " 381.993234 | \n",
+ " 2023 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " AF | \n",
+ " saltmarshes | \n",
+ " 6688.702879 | \n",
+ " 19845.915000 | \n",
+ " 2023 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " AF | \n",
+ " seagrasses | \n",
+ " 6319.099491 | \n",
+ " 61939.484904 | \n",
+ " 2023 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " AF | \n",
+ " warm-water corals | \n",
+ " 6591.340083 | \n",
+ " 15216.393947 | \n",
+ " 2023 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " AS | \n",
+ " cold-water corals | \n",
+ " 263.251498 | \n",
+ " 1332.225080 | \n",
+ " 2023 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " AS | \n",
+ " saltmarshes | \n",
+ " 11721.439539 | \n",
+ " 39229.888860 | \n",
+ " 2023 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " AS | \n",
+ " seagrasses | \n",
+ " 28942.705660 | \n",
+ " 72666.482052 | \n",
+ " 2023 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " AS | \n",
+ " warm-water corals | \n",
+ " 13895.870659 | \n",
+ " 67363.486609 | \n",
+ " 2023 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " EU | \n",
+ " cold-water corals | \n",
+ " 2183.050266 | \n",
+ " 6179.526427 | \n",
+ " 2023 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " EU | \n",
+ " saltmarshes | \n",
+ " 7431.043710 | \n",
+ " 13274.326478 | \n",
+ " 2023 | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " EU | \n",
+ " seagrasses | \n",
+ " 5840.372925 | \n",
+ " 10391.189911 | \n",
+ " 2023 | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " EU | \n",
+ " warm-water corals | \n",
+ " 0.605763 | \n",
+ " 0.793357 | \n",
+ " 2023 | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " NA | \n",
+ " cold-water corals | \n",
+ " 22.960099 | \n",
+ " 204.280433 | \n",
+ " 2023 | \n",
+ "
\n",
+ " \n",
+ " 13 | \n",
+ " NA | \n",
+ " saltmarshes | \n",
+ " 51092.644683 | \n",
+ " 68200.081930 | \n",
+ " 2023 | \n",
+ "
\n",
+ " \n",
+ " 14 | \n",
+ " NA | \n",
+ " seagrasses | \n",
+ " 70.012791 | \n",
+ " 301.909141 | \n",
+ " 2023 | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " NA | \n",
+ " warm-water corals | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 2023 | \n",
+ "
\n",
+ " \n",
+ " 16 | \n",
+ " SA | \n",
+ " cold-water corals | \n",
+ " 234.731370 | \n",
+ " 1416.251323 | \n",
+ " 2023 | \n",
+ "
\n",
+ " \n",
+ " 17 | \n",
+ " SA | \n",
+ " saltmarshes | \n",
+ " 22969.815906 | \n",
+ " 35983.392744 | \n",
+ " 2023 | \n",
+ "
\n",
+ " \n",
+ " 18 | \n",
+ " SA | \n",
+ " seagrasses | \n",
+ " 16517.097667 | \n",
+ " 45847.459412 | \n",
+ " 2023 | \n",
+ "
\n",
+ " \n",
+ " 19 | \n",
+ " SA | \n",
+ " warm-water corals | \n",
+ " 5597.366845 | \n",
+ " 12869.801231 | \n",
+ " 2023 | \n",
+ "
\n",
+ " \n",
+ " 20 | \n",
+ " WA | \n",
+ " cold-water corals | \n",
+ " 0.000000 | \n",
+ " 12.970705 | \n",
+ " 2023 | \n",
+ "
\n",
+ " \n",
+ " 21 | \n",
+ " WA | \n",
+ " saltmarshes | \n",
+ " 1309.225736 | \n",
+ " 11798.832619 | \n",
+ " 2023 | \n",
+ "
\n",
+ " \n",
+ " 22 | \n",
+ " WA | \n",
+ " seagrasses | \n",
+ " 1053.448673 | \n",
+ " 25273.727431 | \n",
+ " 2023 | \n",
+ "
\n",
+ " \n",
+ " 23 | \n",
+ " WA | \n",
+ " warm-water corals | \n",
+ " 547.928957 | \n",
+ " 4903.230395 | \n",
+ " 2023 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " location_id habitat_name protected_area total_area year\n",
+ "0 AF cold-water corals 37.761626 381.993234 2023\n",
+ "1 AF saltmarshes 6688.702879 19845.915000 2023\n",
+ "2 AF seagrasses 6319.099491 61939.484904 2023\n",
+ "3 AF warm-water corals 6591.340083 15216.393947 2023\n",
+ "4 AS cold-water corals 263.251498 1332.225080 2023\n",
+ "5 AS saltmarshes 11721.439539 39229.888860 2023\n",
+ "6 AS seagrasses 28942.705660 72666.482052 2023\n",
+ "7 AS warm-water corals 13895.870659 67363.486609 2023\n",
+ "8 EU cold-water corals 2183.050266 6179.526427 2023\n",
+ "9 EU saltmarshes 7431.043710 13274.326478 2023\n",
+ "10 EU seagrasses 5840.372925 10391.189911 2023\n",
+ "11 EU warm-water corals 0.605763 0.793357 2023\n",
+ "12 NA cold-water corals 22.960099 204.280433 2023\n",
+ "13 NA saltmarshes 51092.644683 68200.081930 2023\n",
+ "14 NA seagrasses 70.012791 301.909141 2023\n",
+ "15 NA warm-water corals 0.000000 0.000000 2023\n",
+ "16 SA cold-water corals 234.731370 1416.251323 2023\n",
+ "17 SA saltmarshes 22969.815906 35983.392744 2023\n",
+ "18 SA seagrasses 16517.097667 45847.459412 2023\n",
+ "19 SA warm-water corals 5597.366845 12869.801231 2023\n",
+ "20 WA cold-water corals 0.000000 12.970705 2023\n",
+ "21 WA saltmarshes 1309.225736 11798.832619 2023\n",
+ "22 WA seagrasses 1053.448673 25273.727431 2023\n",
+ "23 WA warm-water corals 547.928957 4903.230395 2023"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Add regions field\n",
+ "habitats_regions = habitats.copy()\n",
+ "habitats_regions['region'] = habitats['location_id'].map(country_to_region)\n",
+ "\n",
+ "# Convert fields to numeric\n",
+ "habitats_regions['protected_area'] = pd.to_numeric(habitats_regions['protected_area'], errors='coerce')\n",
+ "habitats_regions['total_area'] = pd.to_numeric(habitats_regions['total_area'], errors='coerce')\n",
+ "\n",
+ "# Calculate stats for each region\n",
+ "habitats_regions = habitats_regions.groupby(['region', 'habitat_name']).agg({'protected_area': 'sum', 'total_area': 'sum'}).reset_index()\n",
+ "habitats_regions['year'] = datetime.now().year\n",
+ "habitats_regions.rename(columns={'region': 'location_id'}, inplace=True)\n",
+ "habitats_regions\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Concatenate region statistics to the habitats dataframe\n",
+ "habitats = pd.concat([habitats, habitats_regions])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array(['ABNJ', 'AGO', 'ALB', 'ARG', 'ATG', 'AUS', 'BHS', 'BLZ', 'BRA',\n",
+ " 'BRB', 'CAN', 'CHL', 'CHN', 'CIV', 'COK', 'COL', 'CPV', 'CRI',\n",
+ " 'CUB', 'CYP', 'DMA', 'DNK', 'DOM', 'DZA', 'ECU', 'ERI', 'ESP',\n",
+ " 'FJI', 'FRA', 'FSM', 'GBR', 'GHA', 'GIN', 'GNB', 'GNQ', 'GRC',\n",
+ " 'GRD', 'GTM', 'GUY', 'HND', 'HRV', 'HTI', 'IDN', 'IND', 'IRL',\n",
+ " 'ISL', 'ITA', 'JAM', 'JPN', 'KEN', 'KIR', 'KNA', 'LBR', 'LCA',\n",
+ " 'LKA', 'MAR', 'MDG', 'MEX', 'MHL', 'MLT', 'MMR', 'MNE', 'MOZ',\n",
+ " 'MRT', 'MUS', 'MYS', 'NAM', 'NGA', 'NIC', 'NLD', 'NOR', 'NZL',\n",
+ " 'OMN', 'PAN', 'PER', 'PHL', 'PLW', 'PNG', 'PRT', 'RUS', 'SAU',\n",
+ " 'SEN', 'SHN', 'SJM', 'STP', 'SUR', 'SWE', 'SYC', 'THA', 'TLS',\n",
+ " 'TON', 'TTO', 'TUN', 'TUV', 'UMI', 'URY', 'USA', 'VCT', 'VEN',\n",
+ " 'VNM', 'VUT', 'WSM', 'YEM', 'ZAF', 'ARE', 'AZE', 'BEL', 'BGR',\n",
+ " 'BHR', 'DEU', 'DJI', 'EGY', 'EST', 'FIN', 'GEO', 'GMB', 'IRN',\n",
+ " 'KHM', 'KOR', 'KWT', 'LBY', 'LTU', 'LVA', 'PAK', 'POL', 'QAT',\n",
+ " 'ROU', 'SDN', 'SVN', 'TUR', 'TZA', 'UKR', 'COM', 'ISR', 'JOR',\n",
+ " 'MCO', 'MDV', 'SGP', 'SLB', 'SLE', 'BGD', 'BRN', 'NIU', 'GLOB',\n",
+ " 'AF', 'AS', 'EU', 'NA', 'SA', 'WA'], dtype=object)"
+ ]
+ },
+ "execution_count": 32,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "habitats['location_id'].unique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "habitats.to_csv(path_out + \"habitats/ocean+.csv\", index=False)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Process seamounts from [UN WCMC](https://data.unep-wcmc.org/datasets/41)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 52,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Read required data\n",
+ "seamounts = gpd.read_file(path_in + \"Seamounts/DownloadPack-14_001_ZSL002_ModelledSeamounts2011_v1_01_Data_Seamounts_Seamounts.shp\")\n",
+ "eez = gpd.read_file(path_out + \"/administrative/eez_area_mollweide.shp\")\n",
+ "hs = gpd.read_file(path_in + \"/high_seas/high_seas.shp\")\n",
+ "protected_areas = gpd.read_file(path_out + \"wdpa/timeseries/protected_dissolved_2023.shp\").to_crs(\"EPSG:4326\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 53,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Keep relevant fields in eez and hs and merge then in one dataframe\n",
+ "eez = eez[['SOVEREIGN1', 'SOVEREIGN2', 'SOVEREIGN3','ISO_SOV1', 'ISO_SOV2', 'ISO_SOV3', 'geometry']]\n",
+ "hs = hs[['geometry']]\n",
+ "hs['SOVEREIGN1'] = 'High Seas'\n",
+ "hs['ISO_SOV1'] = 'ABNJ'\n",
+ "eez_hs = eez.merge(hs, how='outer')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 54,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Join eez info to seamounts falling within eez polygons\n",
+ "seamounts_eez = gpd.sjoin(seamounts, eez_hs, how=\"left\", predicate=\"within\")\n",
+ "# Drop those not associated with an eez or hs\n",
+ "seamounts_eez = seamounts_eez.dropna(subset=['ISO_SOV1'])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 55,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Create new column \"iso\" with the iso_sov codes\n",
+ "def concatenate_iso(row):\n",
+ " iso_list = [row['ISO_SOV1']]\n",
+ " if not pd.isna(row['ISO_SOV2']):\n",
+ " iso_list.append(row['ISO_SOV2'])\n",
+ " if not pd.isna(row['ISO_SOV3']):\n",
+ " iso_list.append(row['ISO_SOV3'])\n",
+ " return ';'.join(iso_list)\n",
+ "\n",
+ "seamounts_eez['iso'] = seamounts_eez.apply(concatenate_iso, axis=1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 56,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Check which seamounts are protectec\n",
+ "seamounts_wdpa = gpd.sjoin(seamounts, protected_areas, how=\"left\", predicate=\"within\")\n",
+ "seamounts_wdpa['protection'] = \"no\" \n",
+ "seamounts_wdpa.loc[~seamounts_wdpa['index_right'].isna(), 'protection'] = \"yes\"\n",
+ "# Remove rows in which protection is \"no\"\n",
+ "seamounts_wdpa = seamounts_wdpa[seamounts_wdpa['protection'] != \"no\"]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Global stats"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 65,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " habitat_name | \n",
+ " total_area | \n",
+ " location_id | \n",
+ " year | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " seamounts | \n",
+ " 2.690810e+07 | \n",
+ " GLOB | \n",
+ " 2011 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " habitat_name total_area location_id year\n",
+ "0 seamounts 2.690810e+07 GLOB 2011"
+ ]
+ },
+ "execution_count": 65,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Calculate global area of seamounts\n",
+ "seamounts_eez['habitat_name'] = 'seamounts'\n",
+ "seamounts_global = seamounts_eez.groupby(['habitat_name']).agg({'AREA2D': 'sum'}).reset_index().rename(columns={'AREA2D': 'total_area'})\n",
+ "seamounts_global['location_id'] = 'GLOB'\n",
+ "seamounts_global['year'] = 2011\n",
+ "seamounts_global"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 66,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " habitat_name | \n",
+ " protected_area | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " seamounts | \n",
+ " 3.438552e+06 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " habitat_name protected_area\n",
+ "0 seamounts 3.438552e+06"
+ ]
+ },
+ "execution_count": 66,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Calculate global area of seamounts protected\n",
+ "seamounts_wdpa['habitat_name'] = 'seamounts'\n",
+ "seamounts_wdpa_global = seamounts_wdpa.groupby(['habitat_name']).agg({'AREA2D': 'sum'}).reset_index().rename(columns={'AREA2D': 'protected_area'})\n",
+ "seamounts_wdpa_global"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 67,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " habitat_name | \n",
+ " total_area | \n",
+ " location_id | \n",
+ " year | \n",
+ " protected_area | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " seamounts | \n",
+ " 2.690810e+07 | \n",
+ " GLOB | \n",
+ " 2011 | \n",
+ " 3.438552e+06 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " habitat_name total_area location_id year protected_area\n",
+ "0 seamounts 2.690810e+07 GLOB 2011 3.438552e+06"
+ ]
+ },
+ "execution_count": 67,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Bring 'protected_area' field from seamouts_wdpa_global to seamounts_global\n",
+ "seamounts_global = seamounts_global.merge(seamounts_wdpa_global[['habitat_name', 'protected_area']], how='left', on='habitat_name')\n",
+ "seamounts_global"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Country stats"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 68,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Split the 'iso_code' values and create separate rows only for those with multiple values\n",
+ "mask = seamounts_eez['iso'].str.contains(';', na=False)\n",
+ "split_rows = seamounts_eez[mask].copy()\n",
+ "split_rows['iso'] = split_rows['iso'].str.split(';')\n",
+ "split_rows = split_rows.explode('iso')\n",
+ "\n",
+ "# Keep rows with single values in 'iso_code'\n",
+ "single_value_rows = seamounts_eez[~mask]\n",
+ "\n",
+ "# Concatenate the exploded rows with the single value rows\n",
+ "seamounts_eez_new = pd.concat([single_value_rows, split_rows], ignore_index=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 69,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " location_id | \n",
+ " total_area | \n",
+ " habitat_name | \n",
+ " year | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " ABNJ | \n",
+ " 1.483098e+07 | \n",
+ " seamounts | \n",
+ " 2011 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " AGO | \n",
+ " 9.556242e+03 | \n",
+ " seamounts | \n",
+ " 2011 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " ARG | \n",
+ " 3.110730e+05 | \n",
+ " seamounts | \n",
+ " 2011 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " ATA | \n",
+ " 3.551629e+05 | \n",
+ " seamounts | \n",
+ " 2011 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " ATG | \n",
+ " 6.215895e+03 | \n",
+ " seamounts | \n",
+ " 2011 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 88 | \n",
+ " VNM | \n",
+ " 4.421338e+04 | \n",
+ " seamounts | \n",
+ " 2011 | \n",
+ "
\n",
+ " \n",
+ " 89 | \n",
+ " VUT | \n",
+ " 1.199475e+05 | \n",
+ " seamounts | \n",
+ " 2011 | \n",
+ "
\n",
+ " \n",
+ " 90 | \n",
+ " WSM | \n",
+ " 4.117997e+04 | \n",
+ " seamounts | \n",
+ " 2011 | \n",
+ "
\n",
+ " \n",
+ " 91 | \n",
+ " YEM | \n",
+ " 6.294974e+04 | \n",
+ " seamounts | \n",
+ " 2011 | \n",
+ "
\n",
+ " \n",
+ " 92 | \n",
+ " ZAF | \n",
+ " 9.946306e+04 | \n",
+ " seamounts | \n",
+ " 2011 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
93 rows × 4 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " location_id total_area habitat_name year\n",
+ "0 ABNJ 1.483098e+07 seamounts 2011\n",
+ "1 AGO 9.556242e+03 seamounts 2011\n",
+ "2 ARG 3.110730e+05 seamounts 2011\n",
+ "3 ATA 3.551629e+05 seamounts 2011\n",
+ "4 ATG 6.215895e+03 seamounts 2011\n",
+ ".. ... ... ... ...\n",
+ "88 VNM 4.421338e+04 seamounts 2011\n",
+ "89 VUT 1.199475e+05 seamounts 2011\n",
+ "90 WSM 4.117997e+04 seamounts 2011\n",
+ "91 YEM 6.294974e+04 seamounts 2011\n",
+ "92 ZAF 9.946306e+04 seamounts 2011\n",
+ "\n",
+ "[93 rows x 4 columns]"
+ ]
+ },
+ "execution_count": 69,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Get area of seamounts per iso\n",
+ "seamounts_iso = seamounts_eez_new.groupby(['iso']).agg({'AREA2D': 'sum'}).reset_index()\n",
+ "seamounts_iso = seamounts_iso.rename(columns={'AREA2D': 'total_area', 'iso': 'location_id'})\n",
+ "seamounts_iso['habitat_name'] = 'seamounts'\n",
+ "seamounts_iso['year'] = 2011\n",
+ "seamounts_iso "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 70,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Split the 'iso_code' values in seamounts_wdpa and create separate rows only for those with multiple values\n",
+ "mask = seamounts_wdpa['PARENT_ISO'].str.contains(';', na=False)\n",
+ "split_rows = seamounts_wdpa[mask].copy()\n",
+ "split_rows['PARENT_ISO'] = split_rows['PARENT_ISO'].str.split(';')\n",
+ "split_rows = split_rows.explode('PARENT_ISO')\n",
+ "\n",
+ "# Keep rows with single values in 'iso_code'\n",
+ "single_value_rows = seamounts_wdpa[~mask]\n",
+ "\n",
+ "# Concatenate the exploded rows with the single value rows\n",
+ "seamounts_wdpa_new = pd.concat([single_value_rows, split_rows], ignore_index=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 71,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " location_id | \n",
+ " protected_area | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " ABNJ | \n",
+ " 226253.932283 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " ARG | \n",
+ " 38773.659962 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " location_id protected_area\n",
+ "0 ABNJ 226253.932283\n",
+ "1 ARG 38773.659962"
+ ]
+ },
+ "execution_count": 71,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Calculate area protected per iso\n",
+ "seamounts_protected = seamounts_wdpa_new.groupby(['PARENT_ISO']).agg({'AREA2D': 'sum'}).reset_index()\n",
+ "seamounts_protected = seamounts_protected.rename(columns={'AREA2D': 'protected_area', 'PARENT_ISO': 'location_id'})\n",
+ "seamounts_protected.head(2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 73,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " location_id | \n",
+ " total_area | \n",
+ " habitat_name | \n",
+ " year | \n",
+ " protected_area | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " ABNJ | \n",
+ " 1.483098e+07 | \n",
+ " seamounts | \n",
+ " 2011 | \n",
+ " 226253.932283 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " AGO | \n",
+ " 9.556242e+03 | \n",
+ " seamounts | \n",
+ " 2011 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " location_id total_area habitat_name year protected_area\n",
+ "0 ABNJ 1.483098e+07 seamounts 2011 226253.932283\n",
+ "1 AGO 9.556242e+03 seamounts 2011 NaN"
+ ]
+ },
+ "execution_count": 73,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Join seamounts_iso and seamounts_protected\n",
+ "seamounts_iso = seamounts_iso.merge(seamounts_protected, how='left', on='location_id')\n",
+ "seamounts_iso.head(2)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Regions stats"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 74,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " location_id | \n",
+ " habitat_name | \n",
+ " protected_area | \n",
+ " total_area | \n",
+ " year | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " AF | \n",
+ " seamounts | \n",
+ " 94385.178958 | \n",
+ " 6.162351e+05 | \n",
+ " 2011 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " AS | \n",
+ " seamounts | \n",
+ " 832497.783937 | \n",
+ " 5.433433e+06 | \n",
+ " 2011 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " AT | \n",
+ " seamounts | \n",
+ " 0.000000 | \n",
+ " 3.551629e+05 | \n",
+ " 2011 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " EU | \n",
+ " seamounts | \n",
+ " 894514.910255 | \n",
+ " 2.641119e+06 | \n",
+ " 2011 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " NA | \n",
+ " seamounts | \n",
+ " 555588.210725 | \n",
+ " 1.664794e+06 | \n",
+ " 2011 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " SA | \n",
+ " seamounts | \n",
+ " 581172.154389 | \n",
+ " 1.655552e+06 | \n",
+ " 2011 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " WA | \n",
+ " seamounts | \n",
+ " 2487.428050 | \n",
+ " 9.384765e+04 | \n",
+ " 2011 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " location_id habitat_name protected_area total_area year\n",
+ "0 AF seamounts 94385.178958 6.162351e+05 2011\n",
+ "1 AS seamounts 832497.783937 5.433433e+06 2011\n",
+ "2 AT seamounts 0.000000 3.551629e+05 2011\n",
+ "3 EU seamounts 894514.910255 2.641119e+06 2011\n",
+ "4 NA seamounts 555588.210725 1.664794e+06 2011\n",
+ "5 SA seamounts 581172.154389 1.655552e+06 2011\n",
+ "6 WA seamounts 2487.428050 9.384765e+04 2011"
+ ]
+ },
+ "execution_count": 74,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "seamounts_regions = seamounts_iso.copy()\n",
+ "seamounts_regions['region'] = seamounts_regions['location_id'].map(country_to_region)\n",
+ "\n",
+ "# Calculate stats for each region\n",
+ "seamounts_regions = seamounts_regions.groupby(['region', 'habitat_name']).agg({'protected_area': 'sum', 'total_area': 'sum'}).reset_index()\n",
+ "seamounts_regions['year'] = 2011\n",
+ "seamounts_regions.rename(columns={'region': 'location_id'}, inplace=True)\n",
+ "seamounts_regions"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 75,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Concatenate region and global stats to seamounts_iso2\n",
+ "seamounts_all = pd.concat([seamounts_iso, seamounts_regions, seamounts_global])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 77,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "seamounts_all.to_csv(path_out + \"habitats/seamounts.csv\", index=False)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Process mangroves from GMW"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 79,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "mangroves = pd.read_csv(path_out + \"habitats/mangroves.csv\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 80,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " habitat_name | \n",
+ " protected_area | \n",
+ " total_area | \n",
+ " location_id | \n",
+ " year | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " mangroves | \n",
+ " 61287.20375 | \n",
+ " 147358.990971 | \n",
+ " GLOB | \n",
+ " 2020 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " habitat_name protected_area total_area location_id year\n",
+ "0 mangroves 61287.20375 147358.990971 GLOB 2020"
+ ]
+ },
+ "execution_count": 80,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Calculate global stats for mangroves\n",
+ "mangroves_global = mangroves.groupby(['habitat_name']).agg({'protected_area': 'sum', 'total_area': 'sum'}).reset_index()\n",
+ "mangroves_global['location_id'] = 'GLOB'\n",
+ "mangroves_global['year'] = 2020\n",
+ "mangroves_global"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 81,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Concatenate the global stats to the mangroves dataframe\n",
+ "mangroves = pd.concat([mangroves, mangroves_global])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 82,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " location_id | \n",
+ " habitat_name | \n",
+ " protected_area | \n",
+ " total_area | \n",
+ " year | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " AF | \n",
+ " mangroves | \n",
+ " 10006.97000 | \n",
+ " 29344.404399 | \n",
+ " 2020 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " AS | \n",
+ " mangroves | \n",
+ " 21378.75000 | \n",
+ " 74629.194446 | \n",
+ " 2020 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " NA | \n",
+ " mangroves | \n",
+ " 2055.40000 | \n",
+ " 2329.115505 | \n",
+ " 2020 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " PO | \n",
+ " mangroves | \n",
+ " 6.72000 | \n",
+ " 6.723018 | \n",
+ " 2020 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " SA | \n",
+ " mangroves | \n",
+ " 27811.53375 | \n",
+ " 40875.932666 | \n",
+ " 2020 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " WA | \n",
+ " mangroves | \n",
+ " 27.83000 | \n",
+ " 173.620938 | \n",
+ " 2020 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " location_id habitat_name protected_area total_area year\n",
+ "0 AF mangroves 10006.97000 29344.404399 2020\n",
+ "1 AS mangroves 21378.75000 74629.194446 2020\n",
+ "2 NA mangroves 2055.40000 2329.115505 2020\n",
+ "3 PO mangroves 6.72000 6.723018 2020\n",
+ "4 SA mangroves 27811.53375 40875.932666 2020\n",
+ "5 WA mangroves 27.83000 173.620938 2020"
+ ]
+ },
+ "execution_count": 82,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "mangroves_regions = mangroves.copy()\n",
+ "mangroves_regions['region'] = mangroves['location_id'].map(country_to_region)\n",
+ "\n",
+ "# Calculate stats for each region\n",
+ "mangroves_regions = mangroves_regions.groupby(['region', 'habitat_name']).agg({'protected_area': 'sum', 'total_area': 'sum'}).reset_index()\n",
+ "mangroves_regions['year'] = 2020\n",
+ "mangroves_regions.rename(columns={'region': 'location_id'}, inplace=True)\n",
+ "mangroves_regions"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 83,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Concatenate stats for regions with mangroves\n",
+ "mangroves = pd.concat([mangroves, mangroves_regions])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Concatenate all habitats"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 85,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " location_id | \n",
+ " protected_area | \n",
+ " total_area | \n",
+ " habitat_name | \n",
+ " year | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " ABNJ | \n",
+ " 421.629372679904 | \n",
+ " 1874.98221422617 | \n",
+ " cold-water corals | \n",
+ " 2023 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " AGO | \n",
+ " 0 | \n",
+ " 3.39567053773998 | \n",
+ " cold-water corals | \n",
+ " 2023 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " ALB | \n",
+ " 0 | \n",
+ " 5.98647948252716 | \n",
+ " cold-water corals | \n",
+ " 2023 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " ARG | \n",
+ " 6.98422602063557 | \n",
+ " 61.8263440651753 | \n",
+ " cold-water corals | \n",
+ " 2023 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " ATG | \n",
+ " 0 | \n",
+ " 0.997746538545076 | \n",
+ " cold-water corals | \n",
+ " 2023 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " AS | \n",
+ " 21378.75 | \n",
+ " 74629.194446 | \n",
+ " mangroves | \n",
+ " 2020 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " NA | \n",
+ " 2055.4 | \n",
+ " 2329.115505 | \n",
+ " mangroves | \n",
+ " 2020 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " PO | \n",
+ " 6.72 | \n",
+ " 6.723018 | \n",
+ " mangroves | \n",
+ " 2020 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " SA | \n",
+ " 27811.53375 | \n",
+ " 40875.932666 | \n",
+ " mangroves | \n",
+ " 2020 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " WA | \n",
+ " 27.83 | \n",
+ " 173.620938 | \n",
+ " mangroves | \n",
+ " 2020 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
628 rows × 5 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " location_id protected_area total_area habitat_name year\n",
+ "0 ABNJ 421.629372679904 1874.98221422617 cold-water corals 2023\n",
+ "1 AGO 0 3.39567053773998 cold-water corals 2023\n",
+ "2 ALB 0 5.98647948252716 cold-water corals 2023\n",
+ "3 ARG 6.98422602063557 61.8263440651753 cold-water corals 2023\n",
+ "4 ATG 0 0.997746538545076 cold-water corals 2023\n",
+ ".. ... ... ... ... ...\n",
+ "1 AS 21378.75 74629.194446 mangroves 2020\n",
+ "2 NA 2055.4 2329.115505 mangroves 2020\n",
+ "3 PO 6.72 6.723018 mangroves 2020\n",
+ "4 SA 27811.53375 40875.932666 mangroves 2020\n",
+ "5 WA 27.83 173.620938 mangroves 2020\n",
+ "\n",
+ "[628 rows x 5 columns]"
+ ]
+ },
+ "execution_count": 85,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Concatenate the dataframes\n",
+ "habitats_all = pd.concat([habitats, seamounts_all, mangroves])\n",
+ "habitats_all"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 86,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "habitats_all.to_csv(path_out + \"tables/habitats2.csv\", index=False)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "skytruth",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.4"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/data/notebooks/layers.ipynb b/data/notebooks/layers.ipynb
index 3d9f2c16..232b762e 100644
--- a/data/notebooks/layers.ipynb
+++ b/data/notebooks/layers.ipynb
@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
@@ -12,114 +12,19 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
- "path_in = \"/Users/sofia/Documents/Repos/skytruth_30x30/data/raw\"\n",
- "path_out = \"/Users/sofia/Documents/Repos/skytruth_30x30/data/processed\""
+ "path_in = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/raw\"\n",
+ "path_out = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/processed\""
]
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " MRGID | \n",
- " GEONAME | \n",
- " MRGID_TER1 | \n",
- " POL_TYPE | \n",
- " MRGID_SOV1 | \n",
- " TERRITORY1 | \n",
- " ISO_TER1 | \n",
- " SOVEREIGN1 | \n",
- " MRGID_TER2 | \n",
- " MRGID_SOV2 | \n",
- " ... | \n",
- " ISO_SOV1 | \n",
- " ISO_SOV2 | \n",
- " ISO_SOV3 | \n",
- " UN_SOV1 | \n",
- " UN_SOV2 | \n",
- " UN_SOV3 | \n",
- " UN_TER1 | \n",
- " UN_TER2 | \n",
- " UN_TER3 | \n",
- " geometry | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 8444.0 | \n",
- " American Samoa Exclusive Economic Zone | \n",
- " 8670.0 | \n",
- " 200NM | \n",
- " 2204.0 | \n",
- " American Samoa | \n",
- " ASM | \n",
- " United States | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " ... | \n",
- " USA | \n",
- " NaN | \n",
- " NaN | \n",
- " 840 | \n",
- " NaN | \n",
- " NaN | \n",
- " 16.0 | \n",
- " NaN | \n",
- " NaN | \n",
- " POLYGON ((-166.64112 -17.55527, -166.64194 -17... | \n",
- "
\n",
- " \n",
- "
\n",
- "
1 rows × 32 columns
\n",
- "
"
- ],
- "text/plain": [
- " MRGID GEONAME MRGID_TER1 POL_TYPE \\\n",
- "0 8444.0 American Samoa Exclusive Economic Zone 8670.0 200NM \n",
- "\n",
- " MRGID_SOV1 TERRITORY1 ISO_TER1 SOVEREIGN1 MRGID_TER2 MRGID_SOV2 \\\n",
- "0 2204.0 American Samoa ASM United States 0.0 0.0 \n",
- "\n",
- " ... ISO_SOV1 ISO_SOV2 ISO_SOV3 UN_SOV1 UN_SOV2 UN_SOV3 UN_TER1 UN_TER2 \\\n",
- "0 ... USA NaN NaN 840 NaN NaN 16.0 NaN \n",
- "\n",
- " UN_TER3 geometry \n",
- "0 NaN POLYGON ((-166.64112 -17.55527, -166.64194 -17... \n",
- "\n",
- "[1 rows x 32 columns]"
- ]
- },
- "execution_count": 9,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"eez = gpd.read_file(path_in + \"/World_EEZ_v11_20191118/eez_v11.shp\")\n",
"eez.head(1)"
@@ -127,63 +32,25 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "Index(['MRGID', 'GEONAME', 'MRGID_TER1', 'POL_TYPE', 'MRGID_SOV1',\n",
- " 'TERRITORY1', 'ISO_TER1', 'SOVEREIGN1', 'MRGID_TER2', 'MRGID_SOV2',\n",
- " 'TERRITORY2', 'ISO_TER2', 'SOVEREIGN2', 'MRGID_TER3', 'MRGID_SOV3',\n",
- " 'TERRITORY3', 'ISO_TER3', 'SOVEREIGN3', 'X_1', 'Y_1', 'MRGID_EEZ',\n",
- " 'AREA_KM2', 'ISO_SOV1', 'ISO_SOV2', 'ISO_SOV3', 'UN_SOV1', 'UN_SOV2',\n",
- " 'UN_SOV3', 'UN_TER1', 'UN_TER2', 'UN_TER3', 'geometry'],\n",
- " dtype='object')"
- ]
- },
- "execution_count": 4,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"eez.columns"
]
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "\n",
- "Name: WGS 84\n",
- "Axis Info [ellipsoidal]:\n",
- "- Lat[north]: Geodetic latitude (degree)\n",
- "- Lon[east]: Geodetic longitude (degree)\n",
- "Area of Use:\n",
- "- name: World.\n",
- "- bounds: (-180.0, -90.0, 180.0, 90.0)\n",
- "Datum: World Geodetic System 1984 ensemble\n",
- "- Ellipsoid: WGS 84\n",
- "- Prime Meridian: Greenwich"
- ]
- },
- "execution_count": 5,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"eez.crs"
]
},
{
"cell_type": "code",
- "execution_count": 45,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -193,223 +60,9 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " MRGID | \n",
- " GEONAME | \n",
- " MRGID_TER1 | \n",
- " POL_TYPE | \n",
- " MRGID_SOV1 | \n",
- " TERRITORY1 | \n",
- " ISO_TER1 | \n",
- " SOVEREIGN1 | \n",
- " MRGID_TER2 | \n",
- " MRGID_SOV2 | \n",
- " ... | \n",
- " ISO_SOV1 | \n",
- " ISO_SOV2 | \n",
- " ISO_SOV3 | \n",
- " UN_SOV1 | \n",
- " UN_SOV2 | \n",
- " UN_SOV3 | \n",
- " UN_TER1 | \n",
- " UN_TER2 | \n",
- " UN_TER3 | \n",
- " geometry | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 8444.0 | \n",
- " American Samoa Exclusive Economic Zone | \n",
- " 8670.0 | \n",
- " 200NM | \n",
- " 2204.0 | \n",
- " American Samoa | \n",
- " ASM | \n",
- " United States | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " ... | \n",
- " USA | \n",
- " NaN | \n",
- " NaN | \n",
- " 840 | \n",
- " NaN | \n",
- " NaN | \n",
- " 16.0 | \n",
- " NaN | \n",
- " NaN | \n",
- " POLYGON ((-16216412.543 -2157569.856, -1621650... | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 8379.0 | \n",
- " Ascension Exclusive Economic Zone | \n",
- " 8620.0 | \n",
- " 200NM | \n",
- " 2208.0 | \n",
- " Ascension | \n",
- " SHN | \n",
- " United Kingdom | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " ... | \n",
- " GBR | \n",
- " NaN | \n",
- " NaN | \n",
- " 826 | \n",
- " NaN | \n",
- " NaN | \n",
- " 654.0 | \n",
- " NaN | \n",
- " NaN | \n",
- " POLYGON ((-1089355.142 -974062.004, -1089348.4... | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 8446.0 | \n",
- " Cook Islands Exclusive Economic Zone | \n",
- " 8672.0 | \n",
- " 200NM | \n",
- " 2227.0 | \n",
- " Cook Islands | \n",
- " COK | \n",
- " New Zealand | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " ... | \n",
- " NZL | \n",
- " NaN | \n",
- " NaN | \n",
- " 554 | \n",
- " NaN | \n",
- " NaN | \n",
- " 184.0 | \n",
- " NaN | \n",
- " NaN | \n",
- " POLYGON ((-15912583.852 -716733.193, -15813064... | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 8389.0 | \n",
- " Overlapping claim Falkland / Malvinas Islands:... | \n",
- " 8623.0 | \n",
- " Overlapping claim | \n",
- " 2208.0 | \n",
- " Falkland / Malvinas Islands | \n",
- " FLK | \n",
- " United Kingdom | \n",
- " 8623.0 | \n",
- " 2149.0 | \n",
- " ... | \n",
- " GBR | \n",
- " ARG | \n",
- " NaN | \n",
- " 826 | \n",
- " 32.0 | \n",
- " NaN | \n",
- " 238.0 | \n",
- " 238.0 | \n",
- " NaN | \n",
- " POLYGON ((-4061728.309 -6509190.466, -4443979.... | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 8440.0 | \n",
- " French Polynesian Exclusive Economic Zone | \n",
- " 8656.0 | \n",
- " 200NM | \n",
- " 17.0 | \n",
- " French Polynesia | \n",
- " PYF | \n",
- " France | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " ... | \n",
- " FRA | \n",
- " NaN | \n",
- " NaN | \n",
- " 250 | \n",
- " NaN | \n",
- " NaN | \n",
- " 258.0 | \n",
- " NaN | \n",
- " NaN | \n",
- " MULTIPOLYGON (((-13543804.433 -974376.651, -13... | \n",
- "
\n",
- " \n",
- "
\n",
- "
5 rows × 32 columns
\n",
- "
"
- ],
- "text/plain": [
- " MRGID GEONAME MRGID_TER1 \\\n",
- "0 8444.0 American Samoa Exclusive Economic Zone 8670.0 \n",
- "1 8379.0 Ascension Exclusive Economic Zone 8620.0 \n",
- "2 8446.0 Cook Islands Exclusive Economic Zone 8672.0 \n",
- "3 8389.0 Overlapping claim Falkland / Malvinas Islands:... 8623.0 \n",
- "4 8440.0 French Polynesian Exclusive Economic Zone 8656.0 \n",
- "\n",
- " POL_TYPE MRGID_SOV1 TERRITORY1 ISO_TER1 \\\n",
- "0 200NM 2204.0 American Samoa ASM \n",
- "1 200NM 2208.0 Ascension SHN \n",
- "2 200NM 2227.0 Cook Islands COK \n",
- "3 Overlapping claim 2208.0 Falkland / Malvinas Islands FLK \n",
- "4 200NM 17.0 French Polynesia PYF \n",
- "\n",
- " SOVEREIGN1 MRGID_TER2 MRGID_SOV2 ... ISO_SOV1 ISO_SOV2 ISO_SOV3 \\\n",
- "0 United States 0.0 0.0 ... USA NaN NaN \n",
- "1 United Kingdom 0.0 0.0 ... GBR NaN NaN \n",
- "2 New Zealand 0.0 0.0 ... NZL NaN NaN \n",
- "3 United Kingdom 8623.0 2149.0 ... GBR ARG NaN \n",
- "4 France 0.0 0.0 ... FRA NaN NaN \n",
- "\n",
- " UN_SOV1 UN_SOV2 UN_SOV3 UN_TER1 UN_TER2 UN_TER3 \\\n",
- "0 840 NaN NaN 16.0 NaN NaN \n",
- "1 826 NaN NaN 654.0 NaN NaN \n",
- "2 554 NaN NaN 184.0 NaN NaN \n",
- "3 826 32.0 NaN 238.0 238.0 NaN \n",
- "4 250 NaN NaN 258.0 NaN NaN \n",
- "\n",
- " geometry \n",
- "0 POLYGON ((-16216412.543 -2157569.856, -1621650... \n",
- "1 POLYGON ((-1089355.142 -974062.004, -1089348.4... \n",
- "2 POLYGON ((-15912583.852 -716733.193, -15813064... \n",
- "3 POLYGON ((-4061728.309 -6509190.466, -4443979.... \n",
- "4 MULTIPOLYGON (((-13543804.433 -974376.651, -13... \n",
- "\n",
- "[5 rows x 32 columns]"
- ]
- },
- "execution_count": 10,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"eez = eez.to_crs('ESRI:54009')\n",
"eez['AREA_KM2']= eez.geometry.area/ 1000000\n",
@@ -418,7 +71,7 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -427,29 +80,17 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Allocating 16 GB of heap memory\n",
- "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_area_mollweide.shp\n",
- "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_area_mollweide.shx\n",
- "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_area_mollweide.dbf\n",
- "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_area_mollweide.prj\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
- "# Reproject to 4626\n",
+ "# Reproject to 4326\n",
"!mapshaper-xl 16gb /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_area_mollweide.shp -proj EPSG:4326 -o force /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_area_mollweide.shp"
]
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -533,7 +174,425 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "eez['REGIONS'] = eez['ISO_SOV1'].map(country_to_region)\n",
+ "eez.head(2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "eez.to_file(path_out + \"/administrative/eez_mollweide_regions.shp\", driver=\"ESRI Shapefile\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "eez['REGIONS'].unique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Dissolve by relevant fields: REGIONS\n",
+ "!mapshaper-xl 16gb /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_mollweide_regions.shp -dissolve2 fields=REGIONS -explode -o /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.shp"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "regions = gpd.read_file(path_out + \"/administrative/eez_regions.shp\", driver=\"ESRI Shapefile\")\n",
+ "regions.head(2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "regions.crs"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "regions['REGIONS'].unique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "regions['REGIONS'][regions['REGIONS'].isna()] = 'Antartica'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Calculate area of each region\n",
+ "regions['AREA_KM2']= regions.geometry.area/ 1000000"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "regions.to_file(path_out + \"/administrative/eez_regions.shp\", driver=\"ESRI Shapefile\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Reproject to 4326\n",
+ "!mapshaper-xl 16gb /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.shp -proj EPSG:4326 -o force /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.shp"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "regions['REGIONS'].unique()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Extract marine areas"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 38,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "eez = gpd.read_file(path_out + \"/administrative/eez_area_mollweide.shp\", driver=\"ESRI Shapefile\")\n",
+ "regions = gpd.read_file(path_out + \"/administrative/eez_regions.shp\", driver=\"ESRI Shapefile\")\n",
+ "hs = gpd.read_file(path_in + \"/high_seas/high_seas.shp\", driver=\"ESRI Shapefile\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Create new column \"iso\" that has the field \"ISO_SOV1\" for all rows except those in which ISO_SOV2 and ISO_SOV3 are not null. In such cases concatenate ISO_SOV1, ISO_SOV2 and ISO_SOV3\n",
+ "eez['iso'] = eez['ISO_SOV1']\n",
+ "eez.loc[eez['ISO_SOV2'].notnull(), 'iso'] = eez['ISO_SOV1'] + \";\" + eez['ISO_SOV2']\n",
+ "eez.loc[eez['ISO_SOV3'].notnull(), 'iso'] = eez['ISO_SOV1'] + \";\" + eez['ISO_SOV2'] + \";\" + eez['ISO_SOV3']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "49"
+ ]
+ },
+ "execution_count": 40,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "len(eez[eez['iso'].str.contains(';')])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(281, 33)"
+ ]
+ },
+ "execution_count": 41,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "eez.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 42,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(337, 33)"
+ ]
+ },
+ "execution_count": 42,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Create a mask for rows with multiple values in 'iso_code'\n",
+ "mask = eez['iso'].str.contains(';', na=False)\n",
+ "\n",
+ "# Split the 'iso_code' values and create separate rows only for rows with multiple values\n",
+ "split_rows = eez[mask].copy()\n",
+ "split_rows['iso'] = split_rows['iso'].str.split(';')\n",
+ "split_rows = split_rows.explode('iso')\n",
+ "\n",
+ "# Keep rows with single values in 'iso_code'\n",
+ "single_value_rows = eez[~mask]\n",
+ "\n",
+ "# Concatenate the exploded rows with the single value rows\n",
+ "eez_new = pd.concat([single_value_rows, split_rows], ignore_index=True)\n",
+ "\n",
+ "eez_new.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 44,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0"
+ ]
+ },
+ "execution_count": 44,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "len(eez_new[eez_new['iso'].str.contains(';')])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 45,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "iso_country_mapping = {\n",
+ " 'USA': 'United States',\n",
+ " 'GBR': 'United Kingdom',\n",
+ " 'NZL': 'New Zealand',\n",
+ " 'FRA': 'France',\n",
+ " 'WSM': 'Samoa',\n",
+ " 'TON': 'Tonga',\n",
+ " 'CHL': 'Chile',\n",
+ " 'URY': 'Uruguay',\n",
+ " 'PER': 'Peru',\n",
+ " 'BRA': 'Brazil',\n",
+ " 'KIR': 'Kiribati',\n",
+ " 'ARG': 'Argentina',\n",
+ " 'AUS': 'Australia',\n",
+ " 'COM': 'Comoros',\n",
+ " 'MDG': 'Madagascar',\n",
+ " 'ZAF': 'South Africa',\n",
+ " 'MUS': 'Mauritius',\n",
+ " 'VUT': 'Vanuatu',\n",
+ " 'NAM': 'Namibia',\n",
+ " 'TLS': 'Timor-Leste',\n",
+ " 'COG': 'Republic of the Congo',\n",
+ " 'AGO': 'Angola',\n",
+ " 'MOZ': 'Mozambique',\n",
+ " 'KEN': 'Kenya',\n",
+ " 'PNG': 'Papua New Guinea',\n",
+ " 'TZA': 'Tanzania',\n",
+ " 'SLB': 'Solomon Islands',\n",
+ " 'SYC': 'Seychelles',\n",
+ " 'COD': 'Democratic Republic of the Congo',\n",
+ " 'ATG': 'Antigua and Barbuda',\n",
+ " 'NLD': 'Netherlands',\n",
+ " 'PRT': 'Portugal',\n",
+ " 'BHS': 'The Bahamas',\n",
+ " 'BRB': 'Barbados',\n",
+ " 'MEX': 'Mexico',\n",
+ " 'CPV': 'Cape Verde',\n",
+ " 'ESP': 'Spain',\n",
+ " 'PAN': 'Panama',\n",
+ " 'CRI': 'Costa Rica',\n",
+ " 'DMA': 'Dominica',\n",
+ " 'DOM': 'Dominican Republic',\n",
+ " 'GTM': 'Guatemala',\n",
+ " 'DNK': 'Denmark',\n",
+ " 'GMB': 'Gambia',\n",
+ " 'GIB': 'Gibraltar',\n",
+ " 'GRD': 'Grenada',\n",
+ " 'SLE': 'Sierra Leone',\n",
+ " 'ISL': 'Iceland',\n",
+ " 'JAM': 'Jamaica',\n",
+ " 'MRT': 'Mauritania',\n",
+ " 'HTI': 'Haiti',\n",
+ " 'KNA': 'Saint Kitts and Nevis',\n",
+ " 'LCA': 'Saint Lucia',\n",
+ " 'VCT': 'Saint Vincent and the Grenadines',\n",
+ " 'TTO': 'Trinidad and Tobago',\n",
+ " 'SLV': 'El Salvador',\n",
+ " 'BLZ': 'Belize',\n",
+ " 'CUB': 'Cuba',\n",
+ " 'SEN': 'Senegal',\n",
+ " 'VEN': 'Venezuela',\n",
+ " 'CAN': 'Canada',\n",
+ " 'NIC': 'Nicaragua',\n",
+ " 'GUY': 'Guyana',\n",
+ " 'COL': 'Colombia',\n",
+ " 'IRL': 'Ireland',\n",
+ " 'GNB': 'Guinea-Bissau',\n",
+ " 'GIN': 'Guinea',\n",
+ " 'CIV': 'Ivory Coast',\n",
+ " 'LBR': 'Liberia',\n",
+ " 'HND': 'Honduras',\n",
+ " 'ECU': 'Ecuador',\n",
+ " 'ESH': 'Western Sahara',\n",
+ " 'SUR': 'Suriname',\n",
+ " 'MAR': 'Morocco',\n",
+ " 'ARE': 'United Arab Emirates',\n",
+ " 'CYP': 'Cyprus',\n",
+ " 'ERI': 'Eritrea',\n",
+ " 'EGY': 'Egypt',\n",
+ " 'GEO': 'Georgia',\n",
+ " 'IRN': 'Iran',\n",
+ " 'LBN': 'Lebanon',\n",
+ " 'LBY': 'Libya',\n",
+ " 'MLT': 'Malta',\n",
+ " 'OMN': 'Oman',\n",
+ " 'SAU': 'Saudi Arabia',\n",
+ " 'LKA': 'Sri Lanka',\n",
+ " 'SDN': 'Sudan',\n",
+ " 'SYR': 'Syria',\n",
+ " 'TGO': 'Togo',\n",
+ " 'GRC': 'Greece',\n",
+ " 'TUR': 'Turkey',\n",
+ " 'MCO': 'Monaco',\n",
+ " 'TUN': 'Tunisia',\n",
+ " 'MNE': 'Montenegro',\n",
+ " 'ALB': 'Albania',\n",
+ " 'BGR': 'Bulgaria',\n",
+ " 'PSE': 'Palestine',\n",
+ " 'KWT': 'Kuwait',\n",
+ " 'IRQ': 'Iraq',\n",
+ " 'BHR': 'Bahrain',\n",
+ " 'QAT': 'Qatar',\n",
+ " 'YEM': 'Yemen',\n",
+ " 'ISR': 'Israel',\n",
+ " 'JOR': 'Jordan',\n",
+ " 'DJI': 'Djibouti',\n",
+ " 'BGD': 'Bangladesh',\n",
+ " 'NGA': 'Nigeria',\n",
+ " 'CMR': 'Cameroon',\n",
+ " 'STP': 'São Tomé and Príncipe',\n",
+ " 'BIH': 'Bosnia and Herzegovina',\n",
+ " 'MHL': 'Marshall Islands',\n",
+ " 'PLW': 'Palau',\n",
+ " 'PHL': 'Philippines',\n",
+ " 'TWN': 'Taiwan',\n",
+ " 'SGP': 'Singapore',\n",
+ " 'THA': 'Thailand',\n",
+ " 'VNM': 'Vietnam',\n",
+ " 'KOR': 'South Korea',\n",
+ " 'BRN': 'Brunei',\n",
+ " 'PRK': 'North Korea',\n",
+ " 'KHM': 'Cambodia',\n",
+ " 'CHN': 'China',\n",
+ " 'EST': 'Estonia',\n",
+ " 'FIN': 'Finland',\n",
+ " 'SWE': 'Sweden',\n",
+ " 'LTU': 'Lithuania',\n",
+ " 'NOR': 'Norway',\n",
+ " 'BEL': 'Belgium',\n",
+ " 'DEU': 'Germany',\n",
+ " 'LVA': 'Latvia',\n",
+ " 'HRV': 'Croatia',\n",
+ " 'ITA': 'Italy',\n",
+ " 'UKR': 'Ukraine',\n",
+ " 'ROU': 'Romania',\n",
+ " 'JPN': 'Japan',\n",
+ " 'IND': 'India',\n",
+ " 'PAK': 'Pakistan',\n",
+ " 'TKM': 'Turkmenistan',\n",
+ " 'AZE': 'Azerbaijan',\n",
+ " 'KAZ': 'Kazakhstan',\n",
+ " 'MMR': 'Myanmar',\n",
+ " 'POL': 'Poland',\n",
+ " 'BEN': 'Benin',\n",
+ " 'SVN': 'Slovenia',\n",
+ " 'MYS': 'Malaysia',\n",
+ " 'ATA': 'Antarctica',\n",
+ " 'TUV': 'Tuvalu',\n",
+ " 'FJI': 'Fiji',\n",
+ " 'FSM': 'Micronesia',\n",
+ " 'GNQ': 'Equatorial Guinea',\n",
+ " 'MDV': 'Maldives',\n",
+ " 'SOM': 'Somalia',\n",
+ " 'NRU': 'Nauru',\n",
+ " 'GAB': 'Gabon',\n",
+ " 'IDN': 'Indonesia',\n",
+ " 'DZA': 'Algeria',\n",
+ " 'GHA': 'Ghana',\n",
+ " 'RUS': 'Russia'\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 46,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def get_name(country):\n",
+ " return iso_country_mapping.get(country, None)\n",
+ "\n",
+ "# Apply the function to create the 'PARENT_ISO' column\n",
+ "eez_new['name_iso'] = eez_new['iso'].apply(get_name)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 47,
"metadata": {},
"outputs": [
{
@@ -568,7 +627,6 @@
" MRGID_TER2 | \n",
" MRGID_SOV2 | \n",
" ... | \n",
- " ISO_SOV2 | \n",
" ISO_SOV3 | \n",
" UN_SOV1 | \n",
" UN_SOV2 | \n",
@@ -577,7 +635,8 @@
" UN_TER2 | \n",
" UN_TER3 | \n",
" geometry | \n",
- " REGIONS | \n",
+ " iso | \n",
+ " name_iso | \n",
" \n",
" \n",
" \n",
@@ -595,15 +654,15 @@
" 0.0 | \n",
" ... | \n",
" NaN | \n",
- " NaN | \n",
" 840 | \n",
" NaN | \n",
" NaN | \n",
" 16.0 | \n",
" NaN | \n",
" NaN | \n",
- " POLYGON ((-16216412.543 -2157569.856, -1621650... | \n",
- " North America | \n",
+ " POLYGON ((-166.64112 -17.55527, -166.64194 -17... | \n",
+ " USA | \n",
+ " United States | \n",
" \n",
" \n",
" 1 | \n",
@@ -619,19 +678,19 @@
" 0.0 | \n",
" ... | \n",
" NaN | \n",
- " NaN | \n",
" 826 | \n",
" NaN | \n",
" NaN | \n",
" 654.0 | \n",
" NaN | \n",
" NaN | \n",
- " POLYGON ((-1089355.142 -974062.004, -1089348.4... | \n",
- " Europe | \n",
+ " POLYGON ((-10.93328 -7.88745, -10.93324 -7.889... | \n",
+ " GBR | \n",
+ " United Kingdom | \n",
"
\n",
" \n",
"\n",
- "2 rows × 33 columns
\n",
+ "2 rows × 34 columns
\n",
""
],
"text/plain": [
@@ -643,90 +702,161 @@
"0 2204.0 American Samoa ASM United States 0.0 \n",
"1 2208.0 Ascension SHN United Kingdom 0.0 \n",
"\n",
- " MRGID_SOV2 ... ISO_SOV2 ISO_SOV3 UN_SOV1 UN_SOV2 UN_SOV3 UN_TER1 \\\n",
- "0 0.0 ... NaN NaN 840 NaN NaN 16.0 \n",
- "1 0.0 ... NaN NaN 826 NaN NaN 654.0 \n",
- "\n",
- " UN_TER2 UN_TER3 geometry \\\n",
- "0 NaN NaN POLYGON ((-16216412.543 -2157569.856, -1621650... \n",
- "1 NaN NaN POLYGON ((-1089355.142 -974062.004, -1089348.4... \n",
+ " MRGID_SOV2 ... ISO_SOV3 UN_SOV1 UN_SOV2 UN_SOV3 UN_TER1 UN_TER2 UN_TER3 \\\n",
+ "0 0.0 ... NaN 840 NaN NaN 16.0 NaN NaN \n",
+ "1 0.0 ... NaN 826 NaN NaN 654.0 NaN NaN \n",
"\n",
- " REGIONS \n",
- "0 North America \n",
- "1 Europe \n",
+ " geometry iso name_iso \n",
+ "0 POLYGON ((-166.64112 -17.55527, -166.64194 -17... USA United States \n",
+ "1 POLYGON ((-10.93328 -7.88745, -10.93324 -7.889... GBR United Kingdom \n",
"\n",
- "[2 rows x 33 columns]"
+ "[2 rows x 34 columns]"
]
},
- "execution_count": 16,
+ "execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "eez['REGIONS'] = eez['ISO_SOV1'].map(country_to_region)\n",
- "eez.head(2)"
+ "eez_new.head(2)"
]
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": 53,
"metadata": {},
"outputs": [],
"source": [
- "eez.to_file(path_out + \"/administrative/eez_mollweide_regions.shp\", driver=\"ESRI Shapefile\")"
+ "# List of dictionaries for data in Region_ISO3_PP.txt (list of regions used in the Protected Planet database)\n",
+ "regions_data = [\n",
+ " {\n",
+ " 'region_iso': 'AS',\n",
+ " 'region_name': 'Asia & Pacific',\n",
+ " 'country_iso_3s': [\n",
+ " \"AFG\", \"ASM\", \"AUS\", \"BGD\", \"BRN\", \"BTN\", \"CCK\", \"CHN\", \"COK\", \"CXR\", \"FJI\", \"FSM\", \"GUM\", \"HKG\", \"IDN\",\n",
+ " \"IND\", \"IOT\", \"IRN\", \"JPN\", \"KHM\", \"KIR\", \"KOR\", \"LAO\", \"LKA\", \"MAC\", \"MDV\", \"MHL\", \"MMR\", \"MNG\", \"MNP\",\n",
+ " \"MYS\", \"NCL\", \"NFK\", \"NIU\", \"NPL\", \"NRU\", \"NZL\", \"PAK\", \"PCN\", \"PHL\", \"PLW\", \"PNG\", \"PRK\", \"PYF\", \"SGP\",\n",
+ " \"SLB\", \"THA\", \"TKL\", \"TLS\", \"TON\", \"TUV\", \"TWN\", \"VNM\", \"VUT\", \"WLF\", \"WSM\"\n",
+ " ]\n",
+ " },\n",
+ " {\n",
+ " 'region_iso': 'AF',\n",
+ " 'region_name': 'Africa',\n",
+ " 'country_iso_3s': [\n",
+ " \"AGO\", \"BDI\", \"BEN\", \"BFA\", \"BWA\", \"CAF\", \"CIV\", \"CMR\", \"COD\", \"COG\", \"COM\", \"CPV\", \"DJI\", \"DZA\", \"EGY\",\n",
+ " \"ERI\", \"ESH\", \"ETH\", \"GAB\", \"GHA\", \"GIN\", \"GMB\", \"GNB\", \"GNQ\", \"KEN\", \"LBR\", \"LBY\", \"LSO\", \"MAR\", \"MDG\",\n",
+ " \"MLI\", \"MOZ\", \"MRT\", \"MUS\", \"MWI\", \"MYT\", \"NAM\", \"NER\", \"NGA\", \"REU\", \"RWA\", \"SDN\", \"SEN\", \"SHN\", \"SLE\",\n",
+ " \"SOM\", \"SSD\", \"STP\", \"SWZ\", \"SYC\", \"TCD\", \"TGO\", \"TUN\", \"TZA\", \"UGA\", \"ZAF\", \"ZMB\", \"ZWE\"\n",
+ " ]\n",
+ " },\n",
+ " {\n",
+ " 'region_iso': 'EU',\n",
+ " 'region_name': 'Europe',\n",
+ " 'country_iso_3s': [\n",
+ " \"ALA\", \"ALB\", \"AND\", \"ARM\", \"AUT\", \"AZE\", \"BEL\", \"BGR\", \"BIH\", \"BLR\", \"CHE\", \"CYP\", \"CZE\", \"DEU\", \"DNK\",\n",
+ " \"ESP\", \"EST\", \"FIN\", \"FRA\", \"FRO\", \"GBR\", \"GEO\", \"GGY\", \"GIB\", \"GRC\", \"HRV\", \"HUN\", \"IMN\", \"IRL\", \"ISL\",\n",
+ " \"ISR\", \"ITA\", \"JEY\", \"KAZ\", \"KGZ\", \"LIE\", \"LTU\", \"LUX\", \"LVA\", \"MCO\", \"MDA\", \"MKD\", \"MLT\", \"MNE\", \"NLD\",\n",
+ " \"NOR\", \"POL\", \"PRT\", \"ROU\", \"RUS\", \"SJM\", \"SMR\", \"SRB\", \"SVK\", \"SVN\", \"SWE\", \"TJK\", \"TKM\", \"TUR\", \"UKR\",\n",
+ " \"UZB\", \"VAT\"\n",
+ " ]\n",
+ " },\n",
+ " {\n",
+ " 'region_iso': 'SA',\n",
+ " 'region_name': 'Latin America & Caribbean',\n",
+ " 'country_iso_3s': [\n",
+ " \"ABW\", \"AIA\", \"ARG\", \"ATG\", \"BES\", \"BHS\", \"BLM\", \"BLZ\", \"BMU\", \"BOL\", \"BRA\", \"BRB\", \"CHL\", \"COL\", \"CRI\",\n",
+ " \"CUB\", \"CUW\", \"CYM\", \"DMA\", \"DOM\", \"ECU\", \"FLK\", \"GLP\", \"GRD\", \"GTM\", \"GUF\", \"GUY\", \"HND\", \"HTI\", \"JAM\",\n",
+ " \"KNA\", \"LCA\", \"MAF\", \"MEX\", \"MSR\", \"MTQ\", \"NIC\", \"PAN\", \"PER\", \"PRI\", \"PRY\", \"SLV\", \"SUR\", \"SXM\", \"TCA\",\n",
+ " \"TTO\", \"UMI\", \"URY\", \"VCT\", \"VEN\", \"VGB\", \"VIR\"\n",
+ " ]\n",
+ " },\n",
+ " {\n",
+ " 'region_iso': 'PO',\n",
+ " 'region_name': 'Polar',\n",
+ " 'country_iso_3s': [\n",
+ " \"ATF\", \"BVT\", \"GRL\", \"HMD\", \"SGS\"\n",
+ " ]\n",
+ " },\n",
+ " {\n",
+ " 'region_iso': 'NA',\n",
+ " 'region_name': 'North America',\n",
+ " 'country_iso_3s': [\n",
+ " \"CAN\", \"SPM\", \"USA\"\n",
+ " ]\n",
+ " },\n",
+ " {\n",
+ " 'region_iso': 'WA',\n",
+ " 'region_name': 'West Asia',\n",
+ " 'country_iso_3s': [\n",
+ " \"ARE\", \"BHR\", \"IRQ\", \"JOR\", \"KWT\", \"LBN\", \"OMN\", \"PSE\", \"QAT\", \"SAU\", \"SYR\", \"YEM\"\n",
+ " ]\n",
+ " },\n",
+ " {\n",
+ " 'region_iso': 'AT', # this region is not in the Protected Planet database\n",
+ " 'region_name': 'Antartica',\n",
+ " 'country_iso_3s': [\n",
+ " \"ATA\"\n",
+ " ]\n",
+ " }\n",
+ "]\n",
+ "\n",
+ "# Convert the region data to a dictionary that maps each country to its region name\n",
+ "country_to_region = {}\n",
+ "name_to_region = {}\n",
+ "for region in regions_data:\n",
+ " for country in region['country_iso_3s']:\n",
+ " country_to_region[country] = region['region_iso']\n",
+ " name_to_region[country] = region['region_name']"
]
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": 49,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "array(['North America', 'Europe', 'Asia & Pacific',\n",
- " 'Latin America & Caribbean', 'Africa', 'West Asia', nan],\n",
- " dtype=object)"
+ "array(['NA', 'EU', 'AS', 'SA', 'AF', 'WA', 'AT'], dtype=object)"
]
},
- "execution_count": 18,
+ "execution_count": 49,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "eez['REGIONS'].unique()"
+ "eez_new['region'] = eez_new['iso'].map(country_to_region)\n",
+ "eez_new['region'].unique()"
]
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": 54,
"metadata": {},
"outputs": [
{
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Allocating 16 GB of heap memory\n",
- "[dissolve2] Removed 127,740 / 218,614 slivers using 0.033+ sqkm variable threshold\n",
- "[dissolve2] Dissolved 281 features into 7 features\n",
- "[explode] Exploded 7 features into 83 features\n",
- "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.shp\n",
- "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.shx\n",
- "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.dbf\n",
- "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.prj\n"
- ]
+ "data": {
+ "text/plain": [
+ "array(['North America', 'Europe', 'Asia & Pacific',\n",
+ " 'Latin America & Caribbean', 'Africa', 'West Asia', 'Antartica'],\n",
+ " dtype=object)"
+ ]
+ },
+ "execution_count": 54,
+ "metadata": {},
+ "output_type": "execute_result"
}
],
"source": [
- "# Dissolve by relevant fields: REGIONS\n",
- "!mapshaper-xl 16gb /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_mollweide_regions.shp -dissolve2 fields=REGIONS -explode -o /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.shp"
+ "eez_new['region_name'] = eez_new['iso'].map(name_to_region)\n",
+ "eez_new['region_name'].unique()"
]
},
{
"cell_type": "code",
- "execution_count": 21,
+ "execution_count": 55,
"metadata": {},
"outputs": [
{
@@ -750,177 +880,407 @@
" \n",
" \n",
" | \n",
- " REGIONS | \n",
- " geometry | \n",
+ " location_id | \n",
+ " location_name | \n",
+ " total_marine_area | \n",
+ " location_type | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
- " North America | \n",
- " POLYGON ((-16216412.543 -2157569.856, -1621650... | \n",
+ " AGO | \n",
+ " Angola | \n",
+ " 498908.577009 | \n",
+ " country | \n",
"
\n",
" \n",
" 1 | \n",
- " North America | \n",
- " POLYGON ((-15875617.974 972834.674, -15887321.... | \n",
+ " ALB | \n",
+ " Albania | \n",
+ " 12177.287755 | \n",
+ " country | \n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
- " REGIONS geometry\n",
- "0 North America POLYGON ((-16216412.543 -2157569.856, -1621650...\n",
- "1 North America POLYGON ((-15875617.974 972834.674, -15887321...."
+ " location_id location_name total_marine_area location_type\n",
+ "0 AGO Angola 498908.577009 country\n",
+ "1 ALB Albania 12177.287755 country"
]
},
- "execution_count": 21,
+ "execution_count": 55,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "regions = gpd.read_file(path_out + \"/administrative/eez_regions.shp\", driver=\"ESRI Shapefile\")\n",
- "regions.head(2)"
+ "marine_areas = eez_new.groupby(['iso', 'name_iso']).agg({'AREA_KM2': 'sum'}).reset_index()\n",
+ "marine_areas = marine_areas.rename(columns={'iso': 'location_id', 'name_iso':'location_name', 'AREA_KM2': 'total_marine_area'})\n",
+ "marine_areas['location_type'] = 'country'\n",
+ "marine_areas.head(2)"
]
},
{
"cell_type": "code",
- "execution_count": 22,
+ "execution_count": 57,
"metadata": {},
"outputs": [
{
"data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " location_id | \n",
+ " location_name | \n",
+ " total_marine_area | \n",
+ " location_type | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " AF | \n",
+ " Africa | \n",
+ " 1.495538e+07 | \n",
+ " region | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " AS | \n",
+ " Asia & Pacific | \n",
+ " 5.269208e+07 | \n",
+ " region | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
"text/plain": [
- "\n",
- "Name: World_Mollweide\n",
- "Axis Info [cartesian]:\n",
- "- [east]: Easting (metre)\n",
- "- [north]: Northing (metre)\n",
- "Area of Use:\n",
- "- undefined\n",
- "Coordinate Operation:\n",
- "- name: unnamed\n",
- "- method: Mollweide\n",
- "Datum: World Geodetic System 1984\n",
- "- Ellipsoid: WGS 84\n",
- "- Prime Meridian: Greenwich"
+ " location_id location_name total_marine_area location_type\n",
+ "0 AF Africa 1.495538e+07 region\n",
+ "1 AS Asia & Pacific 5.269208e+07 region"
]
},
- "execution_count": 22,
+ "execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "regions.crs"
+ "regions_areas = eez_new.groupby(['region', 'region_name']).agg({'AREA_KM2': 'sum'}).reset_index()\n",
+ "regions_areas = regions_areas.rename(columns={'region': 'location_id', 'region_name':'location_name', 'AREA_KM2': 'total_marine_area'})\n",
+ "regions_areas['location_type'] = 'region'\n",
+ "regions_areas.head(2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 59,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "global_area = pd.DataFrame({'location_id': ['GLOB'], 'location_name': ['Worldwide'], 'total_marine_area': [361000000], 'location_type': ['worldwide']}) \n",
+ "hs_area = pd.DataFrame({'location_id': ['ABNJ'], 'location_name': ['High Seas'], 'total_marine_area': [hs['area_km2'].values[0]], 'location_type': ['country']})"
]
},
{
"cell_type": "code",
- "execution_count": 23,
+ "execution_count": 63,
"metadata": {},
"outputs": [
{
"data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " location_id | \n",
+ " location_name | \n",
+ " total_marine_area | \n",
+ " location_type | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " AGO | \n",
+ " Angola | \n",
+ " 4.989086e+05 | \n",
+ " country | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " ALB | \n",
+ " Albania | \n",
+ " 1.217729e+04 | \n",
+ " country | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " ARE | \n",
+ " United Arab Emirates | \n",
+ " 5.821593e+04 | \n",
+ " country | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " ARG | \n",
+ " Argentina | \n",
+ " 2.897629e+06 | \n",
+ " country | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " ATA | \n",
+ " Antarctica | \n",
+ " 8.842860e+06 | \n",
+ " country | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 162 | \n",
+ " NA | \n",
+ " North America | \n",
+ " 1.791826e+07 | \n",
+ " region | \n",
+ "
\n",
+ " \n",
+ " 163 | \n",
+ " SA | \n",
+ " Latin America & Caribbean | \n",
+ " 2.107800e+07 | \n",
+ " region | \n",
+ "
\n",
+ " \n",
+ " 164 | \n",
+ " WA | \n",
+ " West Asia | \n",
+ " 1.456969e+06 | \n",
+ " region | \n",
+ "
\n",
+ " \n",
+ " 165 | \n",
+ " GLOB | \n",
+ " Worldwide | \n",
+ " 3.610000e+08 | \n",
+ " worldwide | \n",
+ "
\n",
+ " \n",
+ " 166 | \n",
+ " ABNJ | \n",
+ " High Seas | \n",
+ " 2.128814e+08 | \n",
+ " country | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
167 rows × 4 columns
\n",
+ "
"
+ ],
"text/plain": [
- "array(['North America', 'Europe', 'Asia & Pacific',\n",
- " 'Latin America & Caribbean', 'Africa', 'West Asia', nan],\n",
- " dtype=object)"
+ " location_id location_name total_marine_area location_type\n",
+ "0 AGO Angola 4.989086e+05 country\n",
+ "1 ALB Albania 1.217729e+04 country\n",
+ "2 ARE United Arab Emirates 5.821593e+04 country\n",
+ "3 ARG Argentina 2.897629e+06 country\n",
+ "4 ATA Antarctica 8.842860e+06 country\n",
+ ".. ... ... ... ...\n",
+ "162 NA North America 1.791826e+07 region\n",
+ "163 SA Latin America & Caribbean 2.107800e+07 region\n",
+ "164 WA West Asia 1.456969e+06 region\n",
+ "165 GLOB Worldwide 3.610000e+08 worldwide\n",
+ "166 ABNJ High Seas 2.128814e+08 country\n",
+ "\n",
+ "[167 rows x 4 columns]"
]
},
- "execution_count": 23,
+ "execution_count": 63,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "regions['REGIONS'].unique()"
+ "# concat gl_df and hs_df to marine_areas\n",
+ "marine_areas2 = pd.concat([marine_areas, regions_areas, global_area, hs_area], ignore_index=True)\n",
+ "marine_areas2\n"
]
},
{
"cell_type": "code",
- "execution_count": 24,
+ "execution_count": 64,
"metadata": {},
"outputs": [
{
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/var/folders/98/0pdnjc5s29x2pnzl293pw7hr0000gn/T/ipykernel_27590/1686611470.py:1: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame\n",
- "\n",
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
- " regions['REGIONS'][regions['REGIONS'].isna()] = 'Antartic'\n"
- ]
+ "data": {
+ "text/plain": [
+ "array(['country', 'region', 'worldwide'], dtype=object)"
+ ]
+ },
+ "execution_count": 64,
+ "metadata": {},
+ "output_type": "execute_result"
}
],
"source": [
- "regions['REGIONS'][regions['REGIONS'].isna()] = 'Antartica'"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 28,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Calculate area of each region\n",
- "regions['AREA_KM2']= regions.geometry.area/ 1000000"
+ "marine_areas2['location_type'].unique()"
]
},
{
- "cell_type": "code",
- "execution_count": 29,
+ "cell_type": "markdown",
"metadata": {},
- "outputs": [],
"source": [
- "regions.to_file(path_out + \"/administrative/eez_regions.shp\", driver=\"ESRI Shapefile\")"
+ "### Clean WDPA dataset"
]
},
{
"cell_type": "code",
- "execution_count": 30,
+ "execution_count": 5,
"metadata": {},
"outputs": [
{
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Allocating 16 GB of heap memory\n",
- "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.shp\n",
- "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.shx\n",
- "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.dbf\n",
- "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.prj\n"
+ "ename": "KeyboardInterrupt",
+ "evalue": "",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
+ "\u001b[1;32m/Users/sofia/Documents/Repos/skytruth-30x30/data/notebooks/layers.ipynb Cell 42\u001b[0m line \u001b[0;36m2\n\u001b[1;32m 1\u001b[0m \u001b[39m# Read WDPA data\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m poly1 \u001b[39m=\u001b[39m gpd\u001b[39m.\u001b[39;49mread_file(path_in \u001b[39m+\u001b[39;49m \u001b[39m\"\u001b[39;49m\u001b[39m/WDPA_WDOECM_Sep2023_Public_marine_shp/WDPA_WDOECM_Sep2023_Public_marine_shp_0/WDPA_WDOECM_Sep2023_Public_marine_shp-polygons.shp\u001b[39;49m\u001b[39m\"\u001b[39;49m)\n\u001b[1;32m 3\u001b[0m point1 \u001b[39m=\u001b[39m gpd\u001b[39m.\u001b[39mread_file(path_in \u001b[39m+\u001b[39m \u001b[39m\"\u001b[39m\u001b[39m/WDPA_WDOECM_Sep2023_Public_marine_shp/WDPA_WDOECM_Sep2023_Public_marine_shp_0/WDPA_WDOECM_Sep2023_Public_marine_shp-points.shp\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 4\u001b[0m poly2 \u001b[39m=\u001b[39m gpd\u001b[39m.\u001b[39mread_file(path_in \u001b[39m+\u001b[39m \u001b[39m\"\u001b[39m\u001b[39m/WDPA_WDOECM_Sep2023_Public_marine_shp/WDPA_WDOECM_Sep2023_Public_marine_shp_1/WDPA_WDOECM_Sep2023_Public_marine_shp-polygons.shp\u001b[39m\u001b[39m\"\u001b[39m)\n",
+ "File \u001b[0;32m~/mambaforge/envs/skytruth/lib/python3.11/site-packages/geopandas/io/file.py:281\u001b[0m, in \u001b[0;36m_read_file\u001b[0;34m(filename, bbox, mask, rows, engine, **kwargs)\u001b[0m\n\u001b[1;32m 278\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 279\u001b[0m path_or_bytes \u001b[39m=\u001b[39m filename\n\u001b[0;32m--> 281\u001b[0m \u001b[39mreturn\u001b[39;00m _read_file_fiona(\n\u001b[1;32m 282\u001b[0m path_or_bytes, from_bytes, bbox\u001b[39m=\u001b[39;49mbbox, mask\u001b[39m=\u001b[39;49mmask, rows\u001b[39m=\u001b[39;49mrows, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs\n\u001b[1;32m 283\u001b[0m )\n\u001b[1;32m 285\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 286\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39munknown engine \u001b[39m\u001b[39m'\u001b[39m\u001b[39m{\u001b[39;00mengine\u001b[39m}\u001b[39;00m\u001b[39m'\u001b[39m\u001b[39m\"\u001b[39m)\n",
+ "File \u001b[0;32m~/mambaforge/envs/skytruth/lib/python3.11/site-packages/geopandas/io/file.py:379\u001b[0m, in \u001b[0;36m_read_file_fiona\u001b[0;34m(path_or_bytes, from_bytes, bbox, mask, rows, where, **kwargs)\u001b[0m\n\u001b[1;32m 375\u001b[0m df \u001b[39m=\u001b[39m pd\u001b[39m.\u001b[39mDataFrame(\n\u001b[1;32m 376\u001b[0m [record[\u001b[39m\"\u001b[39m\u001b[39mproperties\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39mfor\u001b[39;00m record \u001b[39min\u001b[39;00m f_filt], columns\u001b[39m=\u001b[39mcolumns\n\u001b[1;32m 377\u001b[0m )\n\u001b[1;32m 378\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m--> 379\u001b[0m df \u001b[39m=\u001b[39m GeoDataFrame\u001b[39m.\u001b[39;49mfrom_features(\n\u001b[1;32m 380\u001b[0m f_filt, crs\u001b[39m=\u001b[39;49mcrs, columns\u001b[39m=\u001b[39;49mcolumns \u001b[39m+\u001b[39;49m [\u001b[39m\"\u001b[39;49m\u001b[39mgeometry\u001b[39;49m\u001b[39m\"\u001b[39;49m]\n\u001b[1;32m 381\u001b[0m )\n\u001b[1;32m 382\u001b[0m \u001b[39mfor\u001b[39;00m k \u001b[39min\u001b[39;00m datetime_fields:\n\u001b[1;32m 383\u001b[0m as_dt \u001b[39m=\u001b[39m pd\u001b[39m.\u001b[39mto_datetime(df[k], errors\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mignore\u001b[39m\u001b[39m\"\u001b[39m)\n",
+ "File \u001b[0;32m~/mambaforge/envs/skytruth/lib/python3.11/site-packages/geopandas/geodataframe.py:635\u001b[0m, in \u001b[0;36mGeoDataFrame.from_features\u001b[0;34m(cls, features, crs, columns)\u001b[0m\n\u001b[1;32m 632\u001b[0m features_lst \u001b[39m=\u001b[39m features\n\u001b[1;32m 634\u001b[0m rows \u001b[39m=\u001b[39m []\n\u001b[0;32m--> 635\u001b[0m \u001b[39mfor\u001b[39;00m feature \u001b[39min\u001b[39;00m features_lst:\n\u001b[1;32m 636\u001b[0m \u001b[39m# load geometry\u001b[39;00m\n\u001b[1;32m 637\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mhasattr\u001b[39m(feature, \u001b[39m\"\u001b[39m\u001b[39m__geo_interface__\u001b[39m\u001b[39m\"\u001b[39m):\n\u001b[1;32m 638\u001b[0m feature \u001b[39m=\u001b[39m feature\u001b[39m.\u001b[39m__geo_interface__\n",
+ "File \u001b[0;32mfiona/ogrext.pyx:1739\u001b[0m, in \u001b[0;36mfiona.ogrext.Iterator.__next__\u001b[0;34m()\u001b[0m\n",
+ "File \u001b[0;32mfiona/ogrext.pyx:389\u001b[0m, in \u001b[0;36mfiona.ogrext.FeatureBuilder.build\u001b[0;34m()\u001b[0m\n",
+ "File \u001b[0;32mfiona/_geometry.pyx:193\u001b[0m, in \u001b[0;36mfiona._geometry.GeomBuilder.build_from_feature\u001b[0;34m()\u001b[0m\n",
+ "File \u001b[0;32mfiona/_geometry.pyx:249\u001b[0m, in \u001b[0;36mfiona._geometry.GeomBuilder.build\u001b[0;34m()\u001b[0m\n",
+ "File \u001b[0;32mfiona/_geometry.pyx:169\u001b[0m, in \u001b[0;36mfiona._geometry.GeomBuilder._buildMultiPolygon\u001b[0;34m()\u001b[0m\n",
+ "File \u001b[0;32mfiona/_geometry.pyx:152\u001b[0m, in \u001b[0;36mfiona._geometry.GeomBuilder._buildParts\u001b[0;34m()\u001b[0m\n",
+ "File \u001b[0;32mfiona/_geometry.pyx:243\u001b[0m, in \u001b[0;36mfiona._geometry.GeomBuilder.build\u001b[0;34m()\u001b[0m\n",
+ "File \u001b[0;32mfiona/_geometry.pyx:157\u001b[0m, in \u001b[0;36mfiona._geometry.GeomBuilder._buildPolygon\u001b[0;34m()\u001b[0m\n",
+ "File \u001b[0;32mfiona/_geometry.pyx:152\u001b[0m, in \u001b[0;36mfiona._geometry.GeomBuilder._buildParts\u001b[0;34m()\u001b[0m\n",
+ "File \u001b[0;32mfiona/_geometry.pyx:259\u001b[0m, in \u001b[0;36mfiona._geometry.GeomBuilder.build\u001b[0;34m()\u001b[0m\n",
+ "File \u001b[0;32m~/mambaforge/envs/skytruth/lib/python3.11/site-packages/fiona/model.py:201\u001b[0m, in \u001b[0;36mGeometry.from_dict\u001b[0;34m(cls, ob, **kwargs)\u001b[0m\n\u001b[1;32m 196\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_delegate \u001b[39m=\u001b[39m _Geometry(\n\u001b[1;32m 197\u001b[0m coordinates\u001b[39m=\u001b[39mcoordinates, \u001b[39mtype\u001b[39m\u001b[39m=\u001b[39m\u001b[39mtype\u001b[39m, geometries\u001b[39m=\u001b[39mgeometries\n\u001b[1;32m 198\u001b[0m )\n\u001b[1;32m 199\u001b[0m \u001b[39msuper\u001b[39m()\u001b[39m.\u001b[39m\u001b[39m__init__\u001b[39m(\u001b[39m*\u001b[39m\u001b[39m*\u001b[39mdata)\n\u001b[0;32m--> 201\u001b[0m \u001b[39m@classmethod\u001b[39m\n\u001b[1;32m 202\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mfrom_dict\u001b[39m(\u001b[39mcls\u001b[39m, ob\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs):\n\u001b[1;32m 203\u001b[0m \u001b[39mif\u001b[39;00m ob \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m 204\u001b[0m data \u001b[39m=\u001b[39m \u001b[39mdict\u001b[39m(\u001b[39mgetattr\u001b[39m(ob, \u001b[39m\"\u001b[39m\u001b[39m__geo_interface__\u001b[39m\u001b[39m\"\u001b[39m, ob))\n",
+ "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
]
}
],
"source": [
- "# Reproject to 4626\n",
- "!mapshaper-xl 16gb /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.shp -proj EPSG:4326 -o force /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.shp"
+ "# Read WDPA data\n",
+ "poly1 = gpd.read_file(path_in + \"/WDPA_WDOECM_Sep2023_Public_marine_shp/WDPA_WDOECM_Sep2023_Public_marine_shp_0/WDPA_WDOECM_Sep2023_Public_marine_shp-polygons.shp\")\n",
+ "point1 = gpd.read_file(path_in + \"/WDPA_WDOECM_Sep2023_Public_marine_shp/WDPA_WDOECM_Sep2023_Public_marine_shp_0/WDPA_WDOECM_Sep2023_Public_marine_shp-points.shp\")\n",
+ "poly2 = gpd.read_file(path_in + \"/WDPA_WDOECM_Sep2023_Public_marine_shp/WDPA_WDOECM_Sep2023_Public_marine_shp_1/WDPA_WDOECM_Sep2023_Public_marine_shp-polygons.shp\")\n",
+ "point2 = gpd.read_file(path_in + \"/WDPA_WDOECM_Sep2023_Public_marine_shp/WDPA_WDOECM_Sep2023_Public_marine_shp_1/WDPA_WDOECM_Sep2023_Public_marine_shp-points.shp\")\n",
+ "poly3 = gpd.read_file(path_in + \"/WDPA_WDOECM_Sep2023_Public_marine_shp/WDPA_WDOECM_Sep2023_Public_marine_shp_2/WDPA_WDOECM_Sep2023_Public_marine_shp-polygons.shp\")\n",
+ "point3 = gpd.read_file(path_in + \"/WDPA_WDOECM_Sep2023_Public_marine_shp/WDPA_WDOECM_Sep2023_Public_marine_shp_2/WDPA_WDOECM_Sep2023_Public_marine_shp-points.shp\")\n",
+ "dataframes = [poly1, point1, poly2, point2, poly3, point3]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "**Convert points to polygons and merge all wdpa in one dataset**"
]
},
{
"cell_type": "code",
- "execution_count": 32,
+ "execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "array(['North America', 'Europe', 'Asia & Pacific',\n",
- " 'Latin America & Caribbean', 'Africa', 'West Asia', 'Antarctica'],\n",
- " dtype=object)"
+ "18613"
]
},
- "execution_count": 32,
"metadata": {},
- "output_type": "execute_result"
+ "output_type": "display_data"
}
],
"source": [
- "regions['REGIONS'].unique()"
+ "# Calculate radius based on REP_AREA\n",
+ "def calculate_radius(rep_area):\n",
+ " return (rep_area / 3.14159265358979323846) ** 0.5\n",
+ "\n",
+ "# Iterate through the list and process the desired dataframes\n",
+ "for idx in [1, 3, 5]:\n",
+ " # Get the dataframe at the specified index\n",
+ " gdf = dataframes[idx]\n",
+ "\n",
+ " # Reproject in Mollweide\n",
+ " gdf = gdf.to_crs('ESRI:54009')\n",
+ "\n",
+ " # Transform the reported area from square kilometers to square meters\n",
+ " gdf['REP_AREA_m'] = gdf['REP_AREA'] * 1000000\n",
+ "\n",
+ " # Create the \"radius\" column by applying the calculate_radius function to the \"REP_AREA\" column\n",
+ " gdf['radius'] = gdf['REP_AREA_m'].apply(calculate_radius)\n",
+ "\n",
+ " # Create buffers around the points using the \"radius\" column\n",
+ " gdf_buffered = gdf.copy()\n",
+ " gdf_buffered['geometry'] = gdf.apply(lambda row: row.geometry.buffer(row['radius']), axis=1)\n",
+ "\n",
+ " # Reproject back to WGS84\n",
+ " gdf_buffered = gdf_buffered.to_crs('EPSG:4326')\n",
+ "\n",
+ " # Remove rows with invalid geometries\n",
+ " gdf_buffered = gdf_buffered[gdf_buffered['geometry'].is_valid]\n",
+ " \n",
+ " # Update the original dataframe with the buffered data\n",
+ " dataframes[idx] = gdf_buffered\n",
+ "\n",
+ "# Merge all dataframes\n",
+ "merged_mpa_all = pd.concat(dataframes)\n",
+ "len(merged_mpa_all)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Save the wdpa dataframe as a shapefile\n",
+ "merged_mpa_all.to_file(path_out + \"/wdpa/merged_wdpa_all.shp\")"
]
}
],
diff --git a/data/notebooks/location_areas.ipynb b/data/notebooks/location_areas.ipynb
new file mode 100644
index 00000000..c9c933d7
--- /dev/null
+++ b/data/notebooks/location_areas.ipynb
@@ -0,0 +1,574 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import geopandas as gpd"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "path_in = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/raw/\"\n",
+ "path_out = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/processed/\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Create locations table"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "eez = gpd.read_file(path_out + \"/administrative/eez_area_mollweide.shp\")\n",
+ "regions = gpd.read_file(path_out + \"/administrative/eez_regions.shp\")\n",
+ "hs = gpd.read_file(path_in + \"/high_seas/high_seas.shp\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Create new column \"iso\" that has the field \"ISO_SOV1\" for all rows except those in which ISO_SOV2 and ISO_SOV3 are not null. In such cases concatenate ISO_SOV1, ISO_SOV2 and ISO_SOV3\n",
+ "eez['iso'] = eez['ISO_SOV1']\n",
+ "eez.loc[eez['ISO_SOV2'].notnull(), 'iso'] = eez['ISO_SOV1'] + \";\" + eez['ISO_SOV2']\n",
+ "eez.loc[eez['ISO_SOV3'].notnull(), 'iso'] = eez['ISO_SOV1'] + \";\" + eez['ISO_SOV2'] + \";\" + eez['ISO_SOV3']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(337, 33)"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Create a mask for rows with multiple values in 'iso_code'\n",
+ "mask = eez['iso'].str.contains(';', na=False)\n",
+ "\n",
+ "# Split the 'iso_code' values and create separate rows only for rows with multiple values\n",
+ "split_rows = eez[mask].copy()\n",
+ "split_rows['iso'] = split_rows['iso'].str.split(';')\n",
+ "split_rows = split_rows.explode('iso')\n",
+ "\n",
+ "# Keep rows with single values in 'iso_code'\n",
+ "single_value_rows = eez[~mask]\n",
+ "\n",
+ "# Concatenate the exploded rows with the single value rows\n",
+ "eez_new = pd.concat([single_value_rows, split_rows], ignore_index=True)\n",
+ "\n",
+ "eez_new.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "iso_country_mapping = {\n",
+ " 'USA': 'United States',\n",
+ " 'GBR': 'United Kingdom',\n",
+ " 'NZL': 'New Zealand',\n",
+ " 'FRA': 'France',\n",
+ " 'WSM': 'Samoa',\n",
+ " 'TON': 'Tonga',\n",
+ " 'CHL': 'Chile',\n",
+ " 'URY': 'Uruguay',\n",
+ " 'PER': 'Peru',\n",
+ " 'BRA': 'Brazil',\n",
+ " 'KIR': 'Kiribati',\n",
+ " 'ARG': 'Argentina',\n",
+ " 'AUS': 'Australia',\n",
+ " 'COM': 'Comoros',\n",
+ " 'MDG': 'Madagascar',\n",
+ " 'ZAF': 'South Africa',\n",
+ " 'MUS': 'Mauritius',\n",
+ " 'VUT': 'Vanuatu',\n",
+ " 'NAM': 'Namibia',\n",
+ " 'TLS': 'Timor-Leste',\n",
+ " 'COG': 'Republic of the Congo',\n",
+ " 'AGO': 'Angola',\n",
+ " 'MOZ': 'Mozambique',\n",
+ " 'KEN': 'Kenya',\n",
+ " 'PNG': 'Papua New Guinea',\n",
+ " 'TZA': 'Tanzania',\n",
+ " 'SLB': 'Solomon Islands',\n",
+ " 'SYC': 'Seychelles',\n",
+ " 'COD': 'Democratic Republic of the Congo',\n",
+ " 'ATG': 'Antigua and Barbuda',\n",
+ " 'NLD': 'Netherlands',\n",
+ " 'PRT': 'Portugal',\n",
+ " 'BHS': 'The Bahamas',\n",
+ " 'BRB': 'Barbados',\n",
+ " 'MEX': 'Mexico',\n",
+ " 'CPV': 'Cape Verde',\n",
+ " 'ESP': 'Spain',\n",
+ " 'PAN': 'Panama',\n",
+ " 'CRI': 'Costa Rica',\n",
+ " 'DMA': 'Dominica',\n",
+ " 'DOM': 'Dominican Republic',\n",
+ " 'GTM': 'Guatemala',\n",
+ " 'DNK': 'Denmark',\n",
+ " 'GMB': 'Gambia',\n",
+ " 'GIB': 'Gibraltar',\n",
+ " 'GRD': 'Grenada',\n",
+ " 'SLE': 'Sierra Leone',\n",
+ " 'ISL': 'Iceland',\n",
+ " 'JAM': 'Jamaica',\n",
+ " 'MRT': 'Mauritania',\n",
+ " 'HTI': 'Haiti',\n",
+ " 'KNA': 'Saint Kitts and Nevis',\n",
+ " 'LCA': 'Saint Lucia',\n",
+ " 'VCT': 'Saint Vincent and the Grenadines',\n",
+ " 'TTO': 'Trinidad and Tobago',\n",
+ " 'SLV': 'El Salvador',\n",
+ " 'BLZ': 'Belize',\n",
+ " 'CUB': 'Cuba',\n",
+ " 'SEN': 'Senegal',\n",
+ " 'VEN': 'Venezuela',\n",
+ " 'CAN': 'Canada',\n",
+ " 'NIC': 'Nicaragua',\n",
+ " 'GUY': 'Guyana',\n",
+ " 'COL': 'Colombia',\n",
+ " 'IRL': 'Ireland',\n",
+ " 'GNB': 'Guinea-Bissau',\n",
+ " 'GIN': 'Guinea',\n",
+ " 'CIV': 'Ivory Coast',\n",
+ " 'LBR': 'Liberia',\n",
+ " 'HND': 'Honduras',\n",
+ " 'ECU': 'Ecuador',\n",
+ " 'ESH': 'Western Sahara',\n",
+ " 'SUR': 'Suriname',\n",
+ " 'MAR': 'Morocco',\n",
+ " 'ARE': 'United Arab Emirates',\n",
+ " 'CYP': 'Cyprus',\n",
+ " 'ERI': 'Eritrea',\n",
+ " 'EGY': 'Egypt',\n",
+ " 'GEO': 'Georgia',\n",
+ " 'IRN': 'Iran',\n",
+ " 'LBN': 'Lebanon',\n",
+ " 'LBY': 'Libya',\n",
+ " 'MLT': 'Malta',\n",
+ " 'OMN': 'Oman',\n",
+ " 'SAU': 'Saudi Arabia',\n",
+ " 'LKA': 'Sri Lanka',\n",
+ " 'SDN': 'Sudan',\n",
+ " 'SYR': 'Syria',\n",
+ " 'TGO': 'Togo',\n",
+ " 'GRC': 'Greece',\n",
+ " 'TUR': 'Turkey',\n",
+ " 'MCO': 'Monaco',\n",
+ " 'TUN': 'Tunisia',\n",
+ " 'MNE': 'Montenegro',\n",
+ " 'ALB': 'Albania',\n",
+ " 'BGR': 'Bulgaria',\n",
+ " 'PSE': 'Palestine',\n",
+ " 'KWT': 'Kuwait',\n",
+ " 'IRQ': 'Iraq',\n",
+ " 'BHR': 'Bahrain',\n",
+ " 'QAT': 'Qatar',\n",
+ " 'YEM': 'Yemen',\n",
+ " 'ISR': 'Israel',\n",
+ " 'JOR': 'Jordan',\n",
+ " 'DJI': 'Djibouti',\n",
+ " 'BGD': 'Bangladesh',\n",
+ " 'NGA': 'Nigeria',\n",
+ " 'CMR': 'Cameroon',\n",
+ " 'STP': 'São Tomé and Príncipe',\n",
+ " 'BIH': 'Bosnia and Herzegovina',\n",
+ " 'MHL': 'Marshall Islands',\n",
+ " 'PLW': 'Palau',\n",
+ " 'PHL': 'Philippines',\n",
+ " 'TWN': 'Taiwan',\n",
+ " 'SGP': 'Singapore',\n",
+ " 'THA': 'Thailand',\n",
+ " 'VNM': 'Vietnam',\n",
+ " 'KOR': 'South Korea',\n",
+ " 'BRN': 'Brunei',\n",
+ " 'PRK': 'North Korea',\n",
+ " 'KHM': 'Cambodia',\n",
+ " 'CHN': 'China',\n",
+ " 'EST': 'Estonia',\n",
+ " 'FIN': 'Finland',\n",
+ " 'SWE': 'Sweden',\n",
+ " 'LTU': 'Lithuania',\n",
+ " 'NOR': 'Norway',\n",
+ " 'BEL': 'Belgium',\n",
+ " 'DEU': 'Germany',\n",
+ " 'LVA': 'Latvia',\n",
+ " 'HRV': 'Croatia',\n",
+ " 'ITA': 'Italy',\n",
+ " 'UKR': 'Ukraine',\n",
+ " 'ROU': 'Romania',\n",
+ " 'JPN': 'Japan',\n",
+ " 'IND': 'India',\n",
+ " 'PAK': 'Pakistan',\n",
+ " 'TKM': 'Turkmenistan',\n",
+ " 'AZE': 'Azerbaijan',\n",
+ " 'KAZ': 'Kazakhstan',\n",
+ " 'MMR': 'Myanmar',\n",
+ " 'POL': 'Poland',\n",
+ " 'BEN': 'Benin',\n",
+ " 'SVN': 'Slovenia',\n",
+ " 'MYS': 'Malaysia',\n",
+ " 'ATA': 'Antarctica',\n",
+ " 'TUV': 'Tuvalu',\n",
+ " 'FJI': 'Fiji',\n",
+ " 'FSM': 'Micronesia',\n",
+ " 'GNQ': 'Equatorial Guinea',\n",
+ " 'MDV': 'Maldives',\n",
+ " 'SOM': 'Somalia',\n",
+ " 'NRU': 'Nauru',\n",
+ " 'GAB': 'Gabon',\n",
+ " 'IDN': 'Indonesia',\n",
+ " 'DZA': 'Algeria',\n",
+ " 'GHA': 'Ghana',\n",
+ " 'RUS': 'Russia'\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def get_name(country):\n",
+ " return iso_country_mapping.get(country, None)\n",
+ "\n",
+ "# Apply the function to create the 'PARENT_ISO' column\n",
+ "eez_new['name_iso'] = eez_new['iso'].apply(get_name)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# List of dictionaries for data in Region_ISO3_PP.txt (list of regions used in the Protected Planet database)\n",
+ "regions_data = [\n",
+ " {\n",
+ " 'region_iso': 'AS',\n",
+ " 'region_name': 'Asia & Pacific',\n",
+ " 'country_iso_3s': [\n",
+ " \"AFG\", \"ASM\", \"AUS\", \"BGD\", \"BRN\", \"BTN\", \"CCK\", \"CHN\", \"COK\", \"CXR\", \"FJI\", \"FSM\", \"GUM\", \"HKG\", \"IDN\",\n",
+ " \"IND\", \"IOT\", \"IRN\", \"JPN\", \"KHM\", \"KIR\", \"KOR\", \"LAO\", \"LKA\", \"MAC\", \"MDV\", \"MHL\", \"MMR\", \"MNG\", \"MNP\",\n",
+ " \"MYS\", \"NCL\", \"NFK\", \"NIU\", \"NPL\", \"NRU\", \"NZL\", \"PAK\", \"PCN\", \"PHL\", \"PLW\", \"PNG\", \"PRK\", \"PYF\", \"SGP\",\n",
+ " \"SLB\", \"THA\", \"TKL\", \"TLS\", \"TON\", \"TUV\", \"TWN\", \"VNM\", \"VUT\", \"WLF\", \"WSM\"\n",
+ " ]\n",
+ " },\n",
+ " {\n",
+ " 'region_iso': 'AF',\n",
+ " 'region_name': 'Africa',\n",
+ " 'country_iso_3s': [\n",
+ " \"AGO\", \"BDI\", \"BEN\", \"BFA\", \"BWA\", \"CAF\", \"CIV\", \"CMR\", \"COD\", \"COG\", \"COM\", \"CPV\", \"DJI\", \"DZA\", \"EGY\",\n",
+ " \"ERI\", \"ESH\", \"ETH\", \"GAB\", \"GHA\", \"GIN\", \"GMB\", \"GNB\", \"GNQ\", \"KEN\", \"LBR\", \"LBY\", \"LSO\", \"MAR\", \"MDG\",\n",
+ " \"MLI\", \"MOZ\", \"MRT\", \"MUS\", \"MWI\", \"MYT\", \"NAM\", \"NER\", \"NGA\", \"REU\", \"RWA\", \"SDN\", \"SEN\", \"SHN\", \"SLE\",\n",
+ " \"SOM\", \"SSD\", \"STP\", \"SWZ\", \"SYC\", \"TCD\", \"TGO\", \"TUN\", \"TZA\", \"UGA\", \"ZAF\", \"ZMB\", \"ZWE\"\n",
+ " ]\n",
+ " },\n",
+ " {\n",
+ " 'region_iso': 'EU',\n",
+ " 'region_name': 'Europe',\n",
+ " 'country_iso_3s': [\n",
+ " \"ALA\", \"ALB\", \"AND\", \"ARM\", \"AUT\", \"AZE\", \"BEL\", \"BGR\", \"BIH\", \"BLR\", \"CHE\", \"CYP\", \"CZE\", \"DEU\", \"DNK\",\n",
+ " \"ESP\", \"EST\", \"FIN\", \"FRA\", \"FRO\", \"GBR\", \"GEO\", \"GGY\", \"GIB\", \"GRC\", \"HRV\", \"HUN\", \"IMN\", \"IRL\", \"ISL\",\n",
+ " \"ISR\", \"ITA\", \"JEY\", \"KAZ\", \"KGZ\", \"LIE\", \"LTU\", \"LUX\", \"LVA\", \"MCO\", \"MDA\", \"MKD\", \"MLT\", \"MNE\", \"NLD\",\n",
+ " \"NOR\", \"POL\", \"PRT\", \"ROU\", \"RUS\", \"SJM\", \"SMR\", \"SRB\", \"SVK\", \"SVN\", \"SWE\", \"TJK\", \"TKM\", \"TUR\", \"UKR\",\n",
+ " \"UZB\", \"VAT\"\n",
+ " ]\n",
+ " },\n",
+ " {\n",
+ " 'region_iso': 'SA',\n",
+ " 'region_name': 'Latin America & Caribbean',\n",
+ " 'country_iso_3s': [\n",
+ " \"ABW\", \"AIA\", \"ARG\", \"ATG\", \"BES\", \"BHS\", \"BLM\", \"BLZ\", \"BMU\", \"BOL\", \"BRA\", \"BRB\", \"CHL\", \"COL\", \"CRI\",\n",
+ " \"CUB\", \"CUW\", \"CYM\", \"DMA\", \"DOM\", \"ECU\", \"FLK\", \"GLP\", \"GRD\", \"GTM\", \"GUF\", \"GUY\", \"HND\", \"HTI\", \"JAM\",\n",
+ " \"KNA\", \"LCA\", \"MAF\", \"MEX\", \"MSR\", \"MTQ\", \"NIC\", \"PAN\", \"PER\", \"PRI\", \"PRY\", \"SLV\", \"SUR\", \"SXM\", \"TCA\",\n",
+ " \"TTO\", \"UMI\", \"URY\", \"VCT\", \"VEN\", \"VGB\", \"VIR\"\n",
+ " ]\n",
+ " },\n",
+ " {\n",
+ " 'region_iso': 'PO',\n",
+ " 'region_name': 'Polar',\n",
+ " 'country_iso_3s': [\n",
+ " \"ATF\", \"BVT\", \"GRL\", \"HMD\", \"SGS\"\n",
+ " ]\n",
+ " },\n",
+ " {\n",
+ " 'region_iso': 'NA',\n",
+ " 'region_name': 'North America',\n",
+ " 'country_iso_3s': [\n",
+ " \"CAN\", \"SPM\", \"USA\"\n",
+ " ]\n",
+ " },\n",
+ " {\n",
+ " 'region_iso': 'WA',\n",
+ " 'region_name': 'West Asia',\n",
+ " 'country_iso_3s': [\n",
+ " \"ARE\", \"BHR\", \"IRQ\", \"JOR\", \"KWT\", \"LBN\", \"OMN\", \"PSE\", \"QAT\", \"SAU\", \"SYR\", \"YEM\"\n",
+ " ]\n",
+ " },\n",
+ " {\n",
+ " 'region_iso': 'AT', # this region is not in the Protected Planet database\n",
+ " 'region_name': 'Antartica',\n",
+ " 'country_iso_3s': [\n",
+ " \"ATA\"\n",
+ " ]\n",
+ " }\n",
+ "]\n",
+ "\n",
+ "# Convert the region data to a dictionary that maps each country to its region name\n",
+ "country_to_region = {}\n",
+ "name_to_region = {}\n",
+ "for region in regions_data:\n",
+ " for country in region['country_iso_3s']:\n",
+ " country_to_region[country] = region['region_iso']\n",
+ " name_to_region[country] = region['region_name']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "eez_new['region'] = eez_new['iso'].map(country_to_region)\n",
+ "eez_new['region_name'] = eez_new['iso'].map(name_to_region)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "marine_areas = eez_new.groupby(['iso', 'name_iso']).agg({'AREA_KM2': 'sum'}).reset_index()\n",
+ "marine_areas = marine_areas.rename(columns={'iso': 'location_id', 'name_iso':'location_name', 'AREA_KM2': 'total_marine_area'})\n",
+ "marine_areas['location_type'] = 'country'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "regions_areas = eez_new.groupby(['region', 'region_name']).agg({'AREA_KM2': 'sum'}).reset_index()\n",
+ "regions_areas = regions_areas.rename(columns={'region': 'location_id', 'region_name':'location_name', 'AREA_KM2': 'total_marine_area'})\n",
+ "regions_areas['location_type'] = 'region'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "global_area = pd.DataFrame({'location_id': ['GLOB'], 'location_name': ['Worldwide'], 'total_marine_area': [361000000], 'location_type': ['worldwide']}) \n",
+ "hs_area = pd.DataFrame({'location_id': ['ABNJ'], 'location_name': ['High Seas'], 'total_marine_area': [hs['area_km2'].values[0]], 'location_type': ['country']})"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# concat gl_df and hs_df to marine_areas\n",
+ "marine_areas2 = pd.concat([marine_areas, regions_areas, global_area, hs_area], ignore_index=True)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Save the table as csv\n",
+ "marine_areas2.to_csv(path_out + \"/tables/locations.csv\", index=False)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Create region_locations table"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " region_id | \n",
+ " location_id | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " AS | \n",
+ " AFG | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " AS | \n",
+ " ASM | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " AS | \n",
+ " AUS | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " AS | \n",
+ " BGD | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " AS | \n",
+ " BRN | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 244 | \n",
+ " WA | \n",
+ " QAT | \n",
+ "
\n",
+ " \n",
+ " 245 | \n",
+ " WA | \n",
+ " SAU | \n",
+ "
\n",
+ " \n",
+ " 246 | \n",
+ " WA | \n",
+ " SYR | \n",
+ "
\n",
+ " \n",
+ " 247 | \n",
+ " WA | \n",
+ " YEM | \n",
+ "
\n",
+ " \n",
+ " 248 | \n",
+ " AT | \n",
+ " ATA | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
249 rows × 2 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " region_id location_id\n",
+ "0 AS AFG\n",
+ "1 AS ASM\n",
+ "2 AS AUS\n",
+ "3 AS BGD\n",
+ "4 AS BRN\n",
+ ".. ... ...\n",
+ "244 WA QAT\n",
+ "245 WA SAU\n",
+ "246 WA SYR\n",
+ "247 WA YEM\n",
+ "248 AT ATA\n",
+ "\n",
+ "[249 rows x 2 columns]"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "regions_df = pd.DataFrame([{'region_id': data['region_iso'], 'location_id': iso} for data in regions_data for iso in data['country_iso_3s']])\n",
+ "regions_df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "regions_df.to_csv(path_out + '/tables/region_locations.csv', index=False)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "skytruth",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.4"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/data/notebooks/mpas_table.ipynb b/data/notebooks/mpas_table.ipynb
new file mode 100644
index 00000000..b846ab4c
--- /dev/null
+++ b/data/notebooks/mpas_table.ipynb
@@ -0,0 +1,143 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Set up"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import geopandas as gpd\n",
+ "import pandas as pd\n",
+ "from datetime import datetime"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "path_in = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/raw\"\n",
+ "path_out = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/processed\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Read relevant datasets: MPAtlas, WDPA, and ProtectedSeas"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Read mpatlas data\n",
+ "mpatlas = gpd.read_file(path_out + \"/mpatlas/mpatlas_assess_zone_cleaned.geojson\")\n",
+ "mpatlas = mpatlas.drop_duplicates(subset=['wdpa_id', 'designation','location_id','establishment_stage', 'protection_level','year'], keep='first')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ps = gpd.read_file(path_out + \"/protectedseas/protectedseas.shp\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "wdpa = gpd.read_file(path_out + \"/wdpa/merged_wdpa_all.shp\")\n",
+ "wdpa = wdpa[['WDPA_PID', 'NAME','PA_DEF', 'GIS_M_AREA','PARENT_ISO']].rename(columns={'WDPA_PID': 'wdpa_id', 'NAME': 'name', 'PA_DEF':'protection_type', 'GIS_M_AREA': 'area', 'PARENT_ISO': 'location_id'})\n",
+ "wdpa['protection_type'] = wdpa['protection_type'].astype(int).replace({1: 'mpa', 0: 'oecm'})"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Combine information from different tables"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Add protected_level info from mpatlas and protectedseas to wdpa df\n",
+ "table_prot = wdpa.merge(mpatlas[['wdpa_id','area_km2','protection_level']], on='wdpa_id', how='left').rename(columns={'area_km2':'area_mpatlas','protection_level': 'mpatlas_prot_lvl'})\n",
+ "table_prot = table_prot.merge(ps[['wdpa_id','FPS_cat', 'total_area']], on='wdpa_id', how='left').rename(columns={'FPS_cat': 'fpl', 'total_area': 'area_ps'})"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "table_prot['area'] = table_prot['area_mpatlas'].combine_first(table_prot['area_ps']).combine_first(table_prot['area'])\n",
+ "table_prot = table_prot.drop(columns=['area_mpatlas', 'area_ps'])\n",
+ "table_prot = table_prot.drop(columns={'name', 'protection_type'})"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Add establishment info to wdpa df\n",
+ "table_est = wdpa.merge(mpatlas[['wdpa_id','establishment_stage', 'year']], on='wdpa_id', how='left')\n",
+ "table_est = table_est.drop(columns={'area', 'location_id'})"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Save tables as csv\n",
+ "table_prot.to_csv(path_out + \"/tables/mpas_table.csv\", index=False)\n",
+ "table_est.to_csv(path_out + \"/tables/mpas_table_establishment.csv\", index=False)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "skytruth",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.4"
+ },
+ "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/data/notebooks/mpatlas_stats.ipynb b/data/notebooks/mpatlas_stats.ipynb
new file mode 100644
index 00000000..afdf6408
--- /dev/null
+++ b/data/notebooks/mpatlas_stats.ipynb
@@ -0,0 +1,349 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Set up"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import geopandas as gpd\n",
+ "import pandas as pd\n",
+ "from datetime import datetime"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "path_in = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/raw\"\n",
+ "path_out = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/processed\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Read and prepare data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Read data from MPAtlas\n",
+ "mpatlas = gpd.read_file(path_in + \"/mpatlas_assess_zone.geojson\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Fill missing wdpa_pid with the wdpa_id\n",
+ "mpatlas['wdpa_pid'] = mpatlas['wdpa_pid'].fillna(mpatlas['wdpa_id'])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Create new column with protection level reclassified\n",
+ "def map_protection_level(value):\n",
+ " if value in [\"full\", \"high\"]:\n",
+ " return \"fully or highly protected\"\n",
+ " else:\n",
+ " return \"less protected or unknown\"\n",
+ "\n",
+ "# Create a new column based on column1\n",
+ "mpatlas['protection_level'] = mpatlas['protection_mpaguide_level'].apply(map_protection_level)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 38,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# replace proposed/committed with proposed or committed\n",
+ "mpatlas['establishment_stage'] = mpatlas['establishment_stage'].replace(['proposed/committed'], 'proposed or committed')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Take only year from 'proposed_date', 'designated_date', 'implemented_date'\n",
+ "mpatlas['proposed_date'] = mpatlas['proposed_date'].str[:4].astype('Int64')\n",
+ "mpatlas['designated_date'] = mpatlas['designated_date'].str[:4].astype('Int64')\n",
+ "mpatlas['implemented_date'] = mpatlas['implemented_date'].str[:4].astype('Int64')\n",
+ "\n",
+ "# Create column 'year' with the most recent year from 'proposed_date', 'designated_date', 'implemented_date'\n",
+ "mpatlas['year'] = mpatlas[['proposed_date', 'designated_date', 'implemented_date']].max(axis=1)\n",
+ "\n",
+ "# Convert year to int to be able to save it later (Int64 not allowed)\n",
+ "mpatlas['year'].fillna(0, inplace=True)\n",
+ "mpatlas['year'] = mpatlas['year'].astype(int)\n",
+ "mpatlas['year'] = mpatlas['year'].replace(0, pd.NaT)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Calculate area in km2\n",
+ "mpatlas.to_crs('ESRI:54009', inplace=True)\n",
+ "mpatlas['area_km2'] = mpatlas['geometry'].area / 10**6\n",
+ "mpatlas.to_crs('EPSG:4326', inplace=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Keep relevant columns \n",
+ "mpatlas2 = mpatlas[['wdpa_pid', 'name', 'designation', 'sovereign', 'area_km2', 'establishment_stage', 'protection_level', 'year', 'geometry']].rename(columns={'sovereign': 'location_id', 'wdpa_pid': 'wdpa_id'})\n",
+ "\n",
+ "# Save as geojson (to keep full names)\n",
+ "mpatlas2.to_file(path_out + \"/mpatlas/mpatlas_assess_zone_cleaned.geojson\", driver='GeoJSON')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 42,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# For those with multiple countries, split them\n",
+ "mpatlas_iso = mpatlas2.copy()\n",
+ "mpatlas_iso['location_id'] = mpatlas_iso['location_id'].str.split(';')\n",
+ "mpatlas_iso = mpatlas_iso.explode('location_id')\n",
+ "mpatlas_iso['location_id'] = mpatlas_iso['location_id'].str.split(':')\n",
+ "mpatlas_iso = mpatlas_iso.explode('location_id')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Global stats"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 43,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Calculate global area per protection level\n",
+ "prot_global = mpatlas2.groupby('protection_level').agg({'area_km2': 'sum'}).reset_index().rename(columns={'area_km2': 'area'})\n",
+ "prot_global['location_id'] = 'GLOB'\n",
+ "prot_global['last_updated'] = datetime.now().year"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 44,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Calculate global area per establishment stage\n",
+ "stage_global = mpatlas2.groupby(['establishment_stage']).agg({'area_km2': 'sum'}).reset_index().rename(columns={'area_km2': 'area'})\n",
+ "stage_global['location_id'] = 'GLOB'\n",
+ "stage_global['last_updated'] = datetime.now().year"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Country stats"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 45,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "prot_iso = mpatlas_iso.groupby(['location_id', 'protection_level']).agg({'area_km2': 'sum'}).reset_index().rename(columns={'area_km2': 'area'})\n",
+ "prot_iso['last_updated'] = datetime.now().year"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 47,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "stage_iso = mpatlas_iso.groupby(['location_id', 'establishment_stage']).agg({'area_km2': 'sum'}).reset_index().rename(columns={'area_km2': 'area'})\n",
+ "stage_iso['last_updated'] = datetime.now().year"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Region stats"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 49,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# List of dictionaries for data in Region_ISO3_PP.txt (list of regions used in the Protected Planet database)\n",
+ "regions_data = [\n",
+ " {\n",
+ " 'region_iso': 'AS',\n",
+ " 'region_name': 'Asia & Pacific',\n",
+ " 'country_iso_3s': [\n",
+ " \"AFG\", \"ASM\", \"AUS\", \"BGD\", \"BRN\", \"BTN\", \"CCK\", \"CHN\", \"COK\", \"CXR\", \"FJI\", \"FSM\", \"GUM\", \"HKG\", \"IDN\",\n",
+ " \"IND\", \"IOT\", \"IRN\", \"JPN\", \"KHM\", \"KIR\", \"KOR\", \"LAO\", \"LKA\", \"MAC\", \"MDV\", \"MHL\", \"MMR\", \"MNG\", \"MNP\",\n",
+ " \"MYS\", \"NCL\", \"NFK\", \"NIU\", \"NPL\", \"NRU\", \"NZL\", \"PAK\", \"PCN\", \"PHL\", \"PLW\", \"PNG\", \"PRK\", \"PYF\", \"SGP\",\n",
+ " \"SLB\", \"THA\", \"TKL\", \"TLS\", \"TON\", \"TUV\", \"TWN\", \"VNM\", \"VUT\", \"WLF\", \"WSM\"\n",
+ " ]\n",
+ " },\n",
+ " {\n",
+ " 'region_iso': 'AF',\n",
+ " 'region_name': 'Africa',\n",
+ " 'country_iso_3s': [\n",
+ " \"AGO\", \"BDI\", \"BEN\", \"BFA\", \"BWA\", \"CAF\", \"CIV\", \"CMR\", \"COD\", \"COG\", \"COM\", \"CPV\", \"DJI\", \"DZA\", \"EGY\",\n",
+ " \"ERI\", \"ESH\", \"ETH\", \"GAB\", \"GHA\", \"GIN\", \"GMB\", \"GNB\", \"GNQ\", \"KEN\", \"LBR\", \"LBY\", \"LSO\", \"MAR\", \"MDG\",\n",
+ " \"MLI\", \"MOZ\", \"MRT\", \"MUS\", \"MWI\", \"MYT\", \"NAM\", \"NER\", \"NGA\", \"REU\", \"RWA\", \"SDN\", \"SEN\", \"SHN\", \"SLE\",\n",
+ " \"SOM\", \"SSD\", \"STP\", \"SWZ\", \"SYC\", \"TCD\", \"TGO\", \"TUN\", \"TZA\", \"UGA\", \"ZAF\", \"ZMB\", \"ZWE\"\n",
+ " ]\n",
+ " },\n",
+ " {\n",
+ " 'region_iso': 'EU',\n",
+ " 'region_name': 'Europe',\n",
+ " 'country_iso_3s': [\n",
+ " \"ALA\", \"ALB\", \"AND\", \"ARM\", \"AUT\", \"AZE\", \"BEL\", \"BGR\", \"BIH\", \"BLR\", \"CHE\", \"CYP\", \"CZE\", \"DEU\", \"DNK\",\n",
+ " \"ESP\", \"EST\", \"FIN\", \"FRA\", \"FRO\", \"GBR\", \"GEO\", \"GGY\", \"GIB\", \"GRC\", \"HRV\", \"HUN\", \"IMN\", \"IRL\", \"ISL\",\n",
+ " \"ISR\", \"ITA\", \"JEY\", \"KAZ\", \"KGZ\", \"LIE\", \"LTU\", \"LUX\", \"LVA\", \"MCO\", \"MDA\", \"MKD\", \"MLT\", \"MNE\", \"NLD\",\n",
+ " \"NOR\", \"POL\", \"PRT\", \"ROU\", \"RUS\", \"SJM\", \"SMR\", \"SRB\", \"SVK\", \"SVN\", \"SWE\", \"TJK\", \"TKM\", \"TUR\", \"UKR\",\n",
+ " \"UZB\", \"VAT\"\n",
+ " ]\n",
+ " },\n",
+ " {\n",
+ " 'region_iso': 'SA',\n",
+ " 'region_name': 'Latin America & Caribbean',\n",
+ " 'country_iso_3s': [\n",
+ " \"ABW\", \"AIA\", \"ARG\", \"ATG\", \"BES\", \"BHS\", \"BLM\", \"BLZ\", \"BMU\", \"BOL\", \"BRA\", \"BRB\", \"CHL\", \"COL\", \"CRI\",\n",
+ " \"CUB\", \"CUW\", \"CYM\", \"DMA\", \"DOM\", \"ECU\", \"FLK\", \"GLP\", \"GRD\", \"GTM\", \"GUF\", \"GUY\", \"HND\", \"HTI\", \"JAM\",\n",
+ " \"KNA\", \"LCA\", \"MAF\", \"MEX\", \"MSR\", \"MTQ\", \"NIC\", \"PAN\", \"PER\", \"PRI\", \"PRY\", \"SLV\", \"SUR\", \"SXM\", \"TCA\",\n",
+ " \"TTO\", \"UMI\", \"URY\", \"VCT\", \"VEN\", \"VGB\", \"VIR\"\n",
+ " ]\n",
+ " },\n",
+ " {\n",
+ " 'region_iso': 'PO',\n",
+ " 'region_name': 'Polar',\n",
+ " 'country_iso_3s': [\n",
+ " \"ATF\", \"BVT\", \"GRL\", \"HMD\", \"SGS\"\n",
+ " ]\n",
+ " },\n",
+ " {\n",
+ " 'region_iso': 'NA',\n",
+ " 'region_name': 'North America',\n",
+ " 'country_iso_3s': [\n",
+ " \"CAN\", \"SPM\", \"USA\"\n",
+ " ]\n",
+ " },\n",
+ " {\n",
+ " 'region_iso': 'WA',\n",
+ " 'region_name': 'West Asia',\n",
+ " 'country_iso_3s': [\n",
+ " \"ARE\", \"BHR\", \"IRQ\", \"JOR\", \"KWT\", \"LBN\", \"OMN\", \"PSE\", \"QAT\", \"SAU\", \"SYR\", \"YEM\"\n",
+ " ]\n",
+ " }\n",
+ "]\n",
+ "\n",
+ "# Convert the region data to a dictionary that maps each country to its region name\n",
+ "country_to_region = {}\n",
+ "for region in regions_data:\n",
+ " for country in region['country_iso_3s']:\n",
+ " country_to_region[country] = region['region_iso']\n",
+ "\n",
+ "# Add region column to mpatlas_iso\n",
+ "mpatlas_iso['regions'] = mpatlas_iso['location_id'].map(country_to_region)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 51,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Calculate area per protection level per region\n",
+ "prot_region = mpatlas_iso.groupby(['regions', 'protection_level']).agg({'area_km2': 'sum'}).reset_index().rename(columns={'area_km2': 'area', 'regions': 'location_id'})\n",
+ "prot_region['last_updated'] = datetime.now().year"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 53,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Calculate area per establishment stage per region\n",
+ "stage_region = mpatlas_iso.groupby(['regions', 'establishment_stage']).agg({'area_km2': 'sum'}).reset_index().rename(columns={'area_km2': 'area', 'regions': 'location_id'})\n",
+ "stage_region['last_updated'] = datetime.now().year"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 54,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Concatenate all dataframes for protection stats and establishment stage stats\n",
+ "prot = pd.concat([prot_iso, prot_global, prot_region], ignore_index=True)\n",
+ "stage = pd.concat([stage_iso, stage_global, stage_region], ignore_index=True)\n",
+ "prot.to_csv(path_out + \"/tables/mpatlas_protection_level.csv\", index=False)\n",
+ "stage.to_csv(path_out + \"/tables/mpatlas_establishment_stage.csv\", index=False)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "skytruth",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.4"
+ },
+ "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/data/notebooks/protectedseas.ipynb b/data/notebooks/protectedseas.ipynb
new file mode 100644
index 00000000..54ae3293
--- /dev/null
+++ b/data/notebooks/protectedseas.ipynb
@@ -0,0 +1,546 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Set up"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import geopandas as gpd\n",
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "path_in = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/raw/\"\n",
+ "path_out = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/processed/\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Processing"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Import shp containing geometries\n",
+ "ps = gpd.read_file(path_in + \"ProtectedSeas/ProtectedSeas_ProtectedSeas_06142023_shp_ProtectedSeas_06142023_shp.shp\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Import csv containing information\n",
+ "protectedseas = pd.read_csv(path_in + \"ProtectedSeas/ProtectedSeas_ProtectedSeas_06142023.csv\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Keep only rows in which wdpa_id is not null and it's different than 0\n",
+ "protectedseas = protectedseas[protectedseas['wdpa_id'].notna()]\n",
+ "protectedseas = protectedseas[protectedseas['wdpa_id']!= '0']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Join csv with shapefile and keep only wdpa geometries\n",
+ "ps_gdf = ps.merge(protectedseas, how='inner', left_on='SITE_ID', right_on='site_id')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Keep only columns of interest\n",
+ "ps_gdf = ps_gdf[['site_id','site_name', 'country', 'wdpa_id', 'removal_of_marine_life_is_prohibited','total_area','geometry']]\n",
+ "ps_gdf = ps_gdf.rename(columns={'removal_of_marine_life_is_prohibited':'FPS'})"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# ProtectedSeas only provides country names, not country codes. We need to add country codes to the dataframe\n",
+ "country_iso_dict = {\n",
+ " 'Antigua and Barbuda': 'ATG',\n",
+ " 'USA': 'USA',\n",
+ " 'Albania': 'ALB',\n",
+ " 'Netherlands Antilles': 'NLD',\n",
+ " 'United Arab Emirates': 'ARE',\n",
+ " 'Argentina': 'ARG',\n",
+ " 'France': 'FRA',\n",
+ " 'Australia': 'AUS',\n",
+ " 'Barbados': 'BRB',\n",
+ " 'Belgium': 'BEL',\n",
+ " 'Bangladesh': 'BGD',\n",
+ " 'Bulgaria': 'BGR',\n",
+ " 'Belize': 'BLZ',\n",
+ " 'Brazil': 'BRA',\n",
+ " 'Bahamas': 'BHS',\n",
+ " 'British Virgin Islands': 'GBR',\n",
+ " 'Canada': 'CAN',\n",
+ " 'Chile': 'CHL',\n",
+ " 'Cameroon': 'CMR',\n",
+ " 'Colombia': 'COL',\n",
+ " 'Comoros': 'COM',\n",
+ " 'Costa Rica': 'CRI',\n",
+ " 'Cuba': 'CUB',\n",
+ " 'Cyprus': 'CYP',\n",
+ " 'Germany': 'DEU',\n",
+ " 'Djibouti': 'DJI',\n",
+ " 'Djbouti': 'DJI',\n",
+ " 'Dominica': 'DMA',\n",
+ " 'Denmark': 'DNK',\n",
+ " 'Dominican Republic': 'DOM',\n",
+ " 'Algeria': 'DZA',\n",
+ " 'Ecuador': 'ECU',\n",
+ " 'Egypt': 'EGY',\n",
+ " 'Spain': 'ESP',\n",
+ " 'Estonia': 'EST',\n",
+ " 'Finland': 'FIN',\n",
+ " 'France, Italy, Monaco': 'FRA;ITA;MCO',\n",
+ " 'French Antilles': 'FRA',\n",
+ " 'Gabon': 'GAB',\n",
+ " 'United Kingdom': 'GBR',\n",
+ " 'Grenada': 'GRD',\n",
+ " 'Ghana': 'GHA',\n",
+ " 'Gibraltar': 'GBR',\n",
+ " 'Guinea': 'GIN',\n",
+ " 'The Gambia': 'GMB',\n",
+ " 'Guinea Bissau': 'GNB',\n",
+ " 'Greece': 'GRC',\n",
+ " 'Guatemala': 'GTM',\n",
+ " 'French Guyana': 'FRA',\n",
+ " 'Honduras': 'HND',\n",
+ " 'Croatia': 'HRV',\n",
+ " 'Indonesia': 'IDN',\n",
+ " 'Indonesia ': 'IDN',\n",
+ " 'India': 'IND',\n",
+ " 'Ireland': 'IRL',\n",
+ " 'Iceland': 'ISL',\n",
+ " 'Israel': 'ISR',\n",
+ " 'Italy': 'ITA',\n",
+ " 'Jamaica': 'JAM',\n",
+ " 'Jordan': 'JOR',\n",
+ " 'Japan': 'JPN',\n",
+ " 'Kenya': 'KEN',\n",
+ " 'Cambodia': 'KHM',\n",
+ " 'South Korea': 'KOR',\n",
+ " 'Cayman Islands': 'GBR',\n",
+ " 'Lebanon': 'LBN',\n",
+ " 'Liberia': 'LBR',\n",
+ " 'Saint Lucia': 'LCA',\n",
+ " 'Sri Lanka': 'LKA',\n",
+ " 'Lithuania': 'LTU',\n",
+ " 'Latvia': 'LVA',\n",
+ " 'Morocco': 'MAR',\n",
+ " 'Monaco': 'MCO',\n",
+ " 'Madagascar': 'MDG',\n",
+ " 'Republic of Maldives': 'MDV',\n",
+ " 'Malta': 'MLT',\n",
+ " 'Myanmar': 'MMR',\n",
+ " 'Mozambique': 'MOZ',\n",
+ " 'Mauritania': 'MRT',\n",
+ " 'Malaysia': 'MYS',\n",
+ " 'Namibia': 'NAM',\n",
+ " 'New Caledonia': 'FRA',\n",
+ " 'Niue': 'NIU',\n",
+ " 'The Netherlands': 'NLD',\n",
+ " 'Netherlands': 'NLD',\n",
+ " 'Norway': 'NOR',\n",
+ " 'New Zealand': 'NZL',\n",
+ " 'Panama': 'PAN',\n",
+ " 'British Overseas Territory - Pitcairn': 'GBR',\n",
+ " 'Peru': 'PER',\n",
+ " 'Philippines': 'PHL',\n",
+ " 'Republic of Palau': 'PLW',\n",
+ " 'Poland': 'POL',\n",
+ " 'Portugal': 'PRT',\n",
+ " 'Qatar': 'QAT',\n",
+ " 'Russia': 'RUS',\n",
+ " 'Senegal': 'SEN',\n",
+ " 'Saint Helena, Ascension and Tristan da Cunha Overseas Territory of the United Kingdom of Great Britain and Northern Ireland': 'GBR',\n",
+ " 'Saint Helena, Ascension and Tristan da Cunha Overseas Teritory of the United Kingdom of Great Britain and Northern Ireland': 'GBR',\n",
+ " 'Solomon Islands': 'SLB',\n",
+ " 'El Salvador': 'SLV',\n",
+ " 'São Tomé and Príncipe': 'STP',\n",
+ " 'Suriname': 'SUR',\n",
+ " 'Slovenia': 'SVN',\n",
+ " 'Sweden': 'SWE',\n",
+ " 'Seychelles': 'SYC',\n",
+ " 'Turks and Caicos Islands': 'GBR',\n",
+ " 'Thailand': 'THA',\n",
+ " 'East Timor': 'TLS',\n",
+ " 'Tonga': 'TON',\n",
+ " 'Trinidad and Tobago': 'TTO',\n",
+ " 'Tunisia': 'TUN',\n",
+ " 'Tanzania': 'TZA',\n",
+ " 'Uruguay': 'URY',\n",
+ " 'Saint Vincent and the Grenadines': 'VCT',\n",
+ " 'Vietnam': 'VNM',\n",
+ " 'Yemen': 'YEM',\n",
+ " 'South Africa': 'ZAF',\n",
+ " 'USA; Haiti; Jamaica': 'USA;HTI;JAM',\n",
+ "}\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Add country code to the dataframe\n",
+ "def get_parent_iso(country):\n",
+ " return country_iso_dict.get(country, None)\n",
+ "\n",
+ "# Apply the function to create the 'PARENT_ISO' column\n",
+ "ps_gdf['parent_iso'] = ps_gdf['country'].apply(get_parent_iso)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# There is one row with no parent_iso so let's give it the corresponding country code\n",
+ "ps_gdf.loc[ps_gdf['parent_iso'].isna(), 'parent_iso'] = 'FRA'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " site_id | \n",
+ " site_name | \n",
+ " country | \n",
+ " wdpa_id | \n",
+ " FPS | \n",
+ " total_area | \n",
+ " geometry | \n",
+ " parent_iso | \n",
+ " FPS_cat | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " AIAG10 | \n",
+ " Low Bay Sanctuary | \n",
+ " Antigua and Barbuda | \n",
+ " 555587197 | \n",
+ " 5.0 | \n",
+ " 48.321285 | \n",
+ " POLYGON ((-61.91090 17.57960, -61.91096 17.579... | \n",
+ " ATG | \n",
+ " highly | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " AIAG11 | \n",
+ " Nelson's Dockyard National Park | \n",
+ " Antigua and Barbuda | \n",
+ " 555587192 | \n",
+ " 1.0 | \n",
+ " 40.705369 | \n",
+ " POLYGON ((-61.75807 17.03541, -61.73745 17.021... | \n",
+ " ATG | \n",
+ " less | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " site_id site_name country wdpa_id \\\n",
+ "0 AIAG10 Low Bay Sanctuary Antigua and Barbuda 555587197 \n",
+ "1 AIAG11 Nelson's Dockyard National Park Antigua and Barbuda 555587192 \n",
+ "\n",
+ " FPS total_area geometry \\\n",
+ "0 5.0 48.321285 POLYGON ((-61.91090 17.57960, -61.91096 17.579... \n",
+ "1 1.0 40.705369 POLYGON ((-61.75807 17.03541, -61.73745 17.021... \n",
+ "\n",
+ " parent_iso FPS_cat \n",
+ "0 ATG highly \n",
+ "1 ATG less "
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Reclassify FPS values\n",
+ "fps_classes = {\n",
+ " 1: 'less',\n",
+ " 2: 'less',\n",
+ " 3: 'moderately',\n",
+ " 4: 'highly',\n",
+ " 5: 'highly'\n",
+ "}\n",
+ "\n",
+ "# Create a new column 'FPS_cat' based on the mapping\n",
+ "ps_gdf['FPS_cat'] = ps_gdf['FPS'].apply(lambda x: fps_classes.get(x, None))\n",
+ "ps_gdf.head(2)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ps_gdf.to_file(path_out + \"protectedseas/protectedseas.shp\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Global stats"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "global_area = ps_gdf.groupby(['FPS_cat'], as_index=False)['total_area'].sum().rename(columns={'FPS_cat':'fishing_protection_level', 'total_area':'area'})\n",
+ "global_area['location_id'] = 'GLOB'"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Country stats"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Create a mask for rows with multiple values in 'iso_code'\n",
+ "mask = ps_gdf['parent_iso'].str.contains(';', na=False)\n",
+ "\n",
+ "# Split the 'iso_code' values and create separate rows only for rows with multiple values\n",
+ "split_rows = ps_gdf[mask].copy()\n",
+ "split_rows['parent_iso'] = split_rows['parent_iso'].str.split(';')\n",
+ "split_rows = split_rows.explode('parent_iso')\n",
+ "\n",
+ "# Keep rows with single values in 'iso_code'\n",
+ "single_value_rows = ps_gdf[~mask]\n",
+ "\n",
+ "# Concatenate the exploded rows with the single value rows\n",
+ "ps_iso = pd.concat([single_value_rows, split_rows], ignore_index=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# List of dictionaries for data in Region_ISO3_PP.txt (list of regions used in the Protected Planet database)\n",
+ "regions_data = [\n",
+ " {\n",
+ " 'region_iso': 'AS',\n",
+ " 'region_name': 'Asia & Pacific',\n",
+ " 'country_iso_3s': [\n",
+ " \"AFG\", \"ASM\", \"AUS\", \"BGD\", \"BRN\", \"BTN\", \"CCK\", \"CHN\", \"COK\", \"CXR\", \"FJI\", \"FSM\", \"GUM\", \"HKG\", \"IDN\",\n",
+ " \"IND\", \"IOT\", \"IRN\", \"JPN\", \"KHM\", \"KIR\", \"KOR\", \"LAO\", \"LKA\", \"MAC\", \"MDV\", \"MHL\", \"MMR\", \"MNG\", \"MNP\",\n",
+ " \"MYS\", \"NCL\", \"NFK\", \"NIU\", \"NPL\", \"NRU\", \"NZL\", \"PAK\", \"PCN\", \"PHL\", \"PLW\", \"PNG\", \"PRK\", \"PYF\", \"SGP\",\n",
+ " \"SLB\", \"THA\", \"TKL\", \"TLS\", \"TON\", \"TUV\", \"TWN\", \"VNM\", \"VUT\", \"WLF\", \"WSM\"\n",
+ " ]\n",
+ " },\n",
+ " {\n",
+ " 'region_iso': 'AF',\n",
+ " 'region_name': 'Africa',\n",
+ " 'country_iso_3s': [\n",
+ " \"AGO\", \"BDI\", \"BEN\", \"BFA\", \"BWA\", \"CAF\", \"CIV\", \"CMR\", \"COD\", \"COG\", \"COM\", \"CPV\", \"DJI\", \"DZA\", \"EGY\",\n",
+ " \"ERI\", \"ESH\", \"ETH\", \"GAB\", \"GHA\", \"GIN\", \"GMB\", \"GNB\", \"GNQ\", \"KEN\", \"LBR\", \"LBY\", \"LSO\", \"MAR\", \"MDG\",\n",
+ " \"MLI\", \"MOZ\", \"MRT\", \"MUS\", \"MWI\", \"MYT\", \"NAM\", \"NER\", \"NGA\", \"REU\", \"RWA\", \"SDN\", \"SEN\", \"SHN\", \"SLE\",\n",
+ " \"SOM\", \"SSD\", \"STP\", \"SWZ\", \"SYC\", \"TCD\", \"TGO\", \"TUN\", \"TZA\", \"UGA\", \"ZAF\", \"ZMB\", \"ZWE\"\n",
+ " ]\n",
+ " },\n",
+ " {\n",
+ " 'region_iso': 'EU',\n",
+ " 'region_name': 'Europe',\n",
+ " 'country_iso_3s': [\n",
+ " \"ALA\", \"ALB\", \"AND\", \"ARM\", \"AUT\", \"AZE\", \"BEL\", \"BGR\", \"BIH\", \"BLR\", \"CHE\", \"CYP\", \"CZE\", \"DEU\", \"DNK\",\n",
+ " \"ESP\", \"EST\", \"FIN\", \"FRA\", \"FRO\", \"GBR\", \"GEO\", \"GGY\", \"GIB\", \"GRC\", \"HRV\", \"HUN\", \"IMN\", \"IRL\", \"ISL\",\n",
+ " \"ISR\", \"ITA\", \"JEY\", \"KAZ\", \"KGZ\", \"LIE\", \"LTU\", \"LUX\", \"LVA\", \"MCO\", \"MDA\", \"MKD\", \"MLT\", \"MNE\", \"NLD\",\n",
+ " \"NOR\", \"POL\", \"PRT\", \"ROU\", \"RUS\", \"SJM\", \"SMR\", \"SRB\", \"SVK\", \"SVN\", \"SWE\", \"TJK\", \"TKM\", \"TUR\", \"UKR\",\n",
+ " \"UZB\", \"VAT\"\n",
+ " ]\n",
+ " },\n",
+ " {\n",
+ " 'region_iso': 'SA',\n",
+ " 'region_name': 'Latin America & Caribbean',\n",
+ " 'country_iso_3s': [\n",
+ " \"ABW\", \"AIA\", \"ARG\", \"ATG\", \"BES\", \"BHS\", \"BLM\", \"BLZ\", \"BMU\", \"BOL\", \"BRA\", \"BRB\", \"CHL\", \"COL\", \"CRI\",\n",
+ " \"CUB\", \"CUW\", \"CYM\", \"DMA\", \"DOM\", \"ECU\", \"FLK\", \"GLP\", \"GRD\", \"GTM\", \"GUF\", \"GUY\", \"HND\", \"HTI\", \"JAM\",\n",
+ " \"KNA\", \"LCA\", \"MAF\", \"MEX\", \"MSR\", \"MTQ\", \"NIC\", \"PAN\", \"PER\", \"PRI\", \"PRY\", \"SLV\", \"SUR\", \"SXM\", \"TCA\",\n",
+ " \"TTO\", \"UMI\", \"URY\", \"VCT\", \"VEN\", \"VGB\", \"VIR\"\n",
+ " ]\n",
+ " },\n",
+ " {\n",
+ " 'region_iso': 'PO',\n",
+ " 'region_name': 'Polar',\n",
+ " 'country_iso_3s': [\n",
+ " \"ATF\", \"BVT\", \"GRL\", \"HMD\", \"SGS\"\n",
+ " ]\n",
+ " },\n",
+ " {\n",
+ " 'region_iso': 'NA',\n",
+ " 'region_name': 'North America',\n",
+ " 'country_iso_3s': [\n",
+ " \"CAN\", \"SPM\", \"USA\"\n",
+ " ]\n",
+ " },\n",
+ " {\n",
+ " 'region_iso': 'WA',\n",
+ " 'region_name': 'West Asia',\n",
+ " 'country_iso_3s': [\n",
+ " \"ARE\", \"BHR\", \"IRQ\", \"JOR\", \"KWT\", \"LBN\", \"OMN\", \"PSE\", \"QAT\", \"SAU\", \"SYR\", \"YEM\"\n",
+ " ]\n",
+ " },\n",
+ " {\n",
+ " 'region_iso': 'AT', # this region is not in the Protected Planet database\n",
+ " 'region_name': 'Antartica',\n",
+ " 'country_iso_3s': [\n",
+ " \"ATA\"\n",
+ " ]\n",
+ " }\n",
+ "]\n",
+ "\n",
+ "# Convert the region data to a dictionary that maps each country to its region name\n",
+ "country_to_region = {}\n",
+ "for region in regions_data:\n",
+ " for country in region['country_iso_3s']:\n",
+ " country_to_region[country] = region['region_iso']\n",
+ "\n",
+ "# Create a new column 'region' based on the mapping\n",
+ "ps_iso['region'] = ps_iso['parent_iso'].map(country_to_region)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "country_area = ps_iso.groupby(['parent_iso', 'FPS_cat'], as_index=False)['total_area'].sum()\n",
+ "country_area = country_area.rename(columns={'parent_iso':'location_id', 'FPS_cat':'fishing_protection_level', 'total_area':'area'})"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "region_area = ps_iso.groupby(['region', 'FPS_cat'], as_index=False)['total_area'].sum()\n",
+ "region_area = region_area.rename(columns={'region':'location_id', 'FPS_cat':'fishing_protection_level', 'total_area':'area'})"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ps_coverage = pd.concat([country_area, region_area, global_area], ignore_index=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ps_coverage.to_csv(path_out + '/tables/fishing_protection_level.csv', index=False)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "skytruth",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.4"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/data/notebooks/wdpa_coverage.ipynb b/data/notebooks/wdpa_coverage.ipynb
index 8b38f3bf..921a2d53 100644
--- a/data/notebooks/wdpa_coverage.ipynb
+++ b/data/notebooks/wdpa_coverage.ipynb
@@ -28,7 +28,7 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
@@ -40,12 +40,12 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
- "path_in = \"/Users/sofia/Documents/Repos/skytruth_30x30/data/raw\"\n",
- "path_out = \"/Users/sofia/Documents/Repos/skytruth_30x30/data/processed\""
+ "path_in = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/raw\"\n",
+ "path_out = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/processed\""
]
},
{
@@ -57,7 +57,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
@@ -71,9 +71,22 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 4,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "6033\n",
+ "172\n",
+ "6033\n",
+ "172\n",
+ "6033\n",
+ "171\n"
+ ]
+ }
+ ],
"source": [
"print(len(poly1))\n",
"print(len(point1))\n",
@@ -95,7 +108,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
@@ -114,15 +127,27 @@
" df = df[(df['REP_AREA'] != 0)]\n",
" \n",
" # Update the original dataframes in the list\n",
- " dataframes[i] = df\n",
- "\n"
+ " dataframes[i] = df"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 6,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "5999\n",
+ "157\n",
+ "6018\n",
+ "123\n",
+ "6014\n",
+ "135\n"
+ ]
+ }
+ ],
"source": [
"print(len(dataframes[0]))\n",
"print(len(dataframes[1]))\n",
@@ -141,7 +166,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
@@ -181,14 +206,22 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "### 4. Merge the 6 datasets (polygons and buffered points) in a single layer and segregate those that are \"Proposed\" and those that are OECM"
+ "### 4. Merge the 6 datasets (polygons and buffered points) in a single layer and segregate those that are \"Proposed\""
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 8,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "All gdf have the same crs: EPSG:4326\n"
+ ]
+ }
+ ],
"source": [
"# Check that all of them have the same crs\n",
"first_crs = dataframes[0].crs\n",
@@ -199,6 +232,28 @@
" print(\"gdf have different crs\")"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "18445"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Merge dataframes\n",
+ "merged_mpa = pd.concat(dataframes)\n",
+ "len(merged_mpa)"
+ ]
+ },
{
"cell_type": "code",
"execution_count": null,
@@ -211,7 +266,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
@@ -503,58 +558,12 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "**Countries per PARENT_ISO**"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 52,
- "metadata": {},
- "outputs": [],
- "source": [
- "p2023 = gpd.read_file(path_out + \"/wdpa/timeseries/protected_dissolved_2023.shp\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 53,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "array(['ATG', 'BRB', 'BRA', 'GBR', 'CHL', 'COL', 'CRI', 'DOM', 'ECU',\n",
- " 'JAM', 'NLD', 'PER', 'PAN', 'SUR', 'VEN', 'USA', 'AUS', 'CAN',\n",
- " 'FRA', 'IRN', 'JPN', 'KEN', 'KOR', 'MYS', 'MRT', 'MOZ', 'NOR',\n",
- " 'PHL', 'POL', 'SAU', 'SEN', 'SWE', 'THA', 'TUN', 'CMR', 'IDN',\n",
- " 'MUS', 'PRT', 'SYC', 'ISL', 'NZL', 'EST', 'GEO', 'UKR', 'MEX',\n",
- " 'BHS', 'BLZ', 'GMB', 'MDG', 'HRV', 'FJI', 'LKA', 'ARG', 'ZAF',\n",
- " 'PNG', 'TON', 'PLW', 'COK', 'BGD', 'AGO', 'ALB', 'DNK', 'ITA',\n",
- " 'PAK', 'FIN', 'VNM', 'MMR', 'CHN', 'SGP', 'DEU', 'ROU', 'EGY',\n",
- " 'SLB', 'VUT', 'BGR', 'MAR', 'MLT', 'DMA', 'LCA', 'OMN', 'GTM',\n",
- " 'NIC', 'TTO', 'WSM', 'TZA', 'GRC', 'LBN', 'CUB', 'ISR', 'GRD',\n",
- " 'VCT', 'BRN', 'ESP', 'JOR', 'ARE', 'HND', 'GNQ', 'KNA', 'LTU',\n",
- " 'GNB', 'NGA', 'LVA', 'GUY', 'KAZ', 'BEL', 'GIN', 'IRL', 'RUS',\n",
- " 'KHM', 'QAT', 'GAB', 'MDV', 'AZE', 'NAM', 'TUR', 'CPV', 'COG',\n",
- " 'TUV', 'MCO', 'TKM', 'SVN', 'SLE', 'KIR', 'COM', 'NIU', 'FSM',\n",
- " 'GHA', 'IOT', 'IND', 'LBR', 'CIV', 'SDN', 'SHN', 'SJM', 'UMI',\n",
- " 'ATA', 'SYR', 'TLS', 'FRA;ITA;MCO', 'URY', 'ABNJ', 'NLD;DEU;DNK',\n",
- " 'FIN;SWE', 'MHL', 'SLV', 'DZA', 'STP', 'YEM', 'COD', 'CYP', 'KWT',\n",
- " 'HTI', 'MNE', 'BHR', 'LBY'], dtype=object)"
- ]
- },
- "execution_count": 53,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "p2023['PARENT_ISO'].unique()"
+ "### Global and country stats"
]
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
@@ -564,215 +573,55 @@
"# Create an empty list to store the results\n",
"results_list = []\n",
"\n",
+ "# Create a DataFrame to store the global coverage\n",
+ "global_coverage = pd.DataFrame(columns=['year', 'protection_type', 'location_id', 'cumsum_area'])\n",
+ "\n",
"for year in years_range:\n",
" filename = f'protected_dissolved_{year}.shp'\n",
" file_path = os.path.join(folder_path, filename)\n",
- " \n",
+ "\n",
" if os.path.exists(file_path):\n",
" gdf = gpd.read_file(file_path)\n",
- " grouped = gdf.groupby('PARENT_ISO')['AREA'].sum().reset_index()\n",
- " \n",
- " # Create columns\n",
- " grouped['year'] = year\n",
- " grouped['protection_type'] = 'MPA+OECM'\n",
- " grouped.rename(columns={'PARENT_ISO': 'location_id', 'AREA': 'cumsum_area'}, inplace=True)\n",
- " \n",
- " # Append the result to the list\n",
- " results_list.append(grouped)\n",
"\n",
- "# Concatenate the list of results into a single DataFrame\n",
- "final_df = pd.concat(results_list, ignore_index=True)\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 22,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Convert area to km2\n",
- "final_df['cumsum_area'] =final_df['cumsum_area']/1000000"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "**Global**"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 41,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " location_id | \n",
- " cumsum_area | \n",
- " year | \n",
- " protection_type | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " ABNJ | \n",
- " 594174.66 | \n",
- " 2000 | \n",
- " MPA+OECM | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " AGO | \n",
- " 0.42 | \n",
- " 2000 | \n",
- " MPA+OECM | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " ALB | \n",
- " 103.05 | \n",
- " 2000 | \n",
- " MPA+OECM | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " ARE | \n",
- " 78.52 | \n",
- " 2000 | \n",
- " MPA+OECM | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " ARG | \n",
- " 6155.67 | \n",
- " 2000 | \n",
- " MPA+OECM | \n",
- "
\n",
- " \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " 3571 | \n",
- " GLOB | \n",
- " 28125365.96 | \n",
- " 2019 | \n",
- " MPA+OECM | \n",
- "
\n",
- " \n",
- " 3572 | \n",
- " GLOB | \n",
- " 29624663.84 | \n",
- " 2020 | \n",
- " MPA+OECM | \n",
- "
\n",
- " \n",
- " 3573 | \n",
- " GLOB | \n",
- " 29739178.77 | \n",
- " 2021 | \n",
- " MPA+OECM | \n",
- "
\n",
- " \n",
- " 3574 | \n",
- " GLOB | \n",
- " 29910678.77 | \n",
- " 2022 | \n",
- " MPA+OECM | \n",
- "
\n",
- " \n",
- " 3575 | \n",
- " GLOB | \n",
- " 29910724.21 | \n",
- " 2023 | \n",
- " MPA+OECM | \n",
- "
\n",
- " \n",
- "
\n",
- "
3576 rows × 4 columns
\n",
- "
"
- ],
- "text/plain": [
- " location_id cumsum_area year protection_type\n",
- "0 ABNJ 594174.66 2000 MPA+OECM\n",
- "1 AGO 0.42 2000 MPA+OECM\n",
- "2 ALB 103.05 2000 MPA+OECM\n",
- "3 ARE 78.52 2000 MPA+OECM\n",
- "4 ARG 6155.67 2000 MPA+OECM\n",
- "... ... ... ... ...\n",
- "3571 GLOB 28125365.96 2019 MPA+OECM\n",
- "3572 GLOB 29624663.84 2020 MPA+OECM\n",
- "3573 GLOB 29739178.77 2021 MPA+OECM\n",
- "3574 GLOB 29910678.77 2022 MPA+OECM\n",
- "3575 GLOB 29910724.21 2023 MPA+OECM\n",
- "\n",
- "[3576 rows x 4 columns]"
- ]
- },
- "execution_count": 41,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# Calculate global per year and append it\n",
- "glob_df = final_df.groupby(['year', 'protection_type'])['cumsum_area'].sum().reset_index()\n",
+ " # Calculate global coverage for each year and protection type\n",
+ " global_area = gdf['AREA'].sum()\n",
+ " global_row = pd.DataFrame({'year': [year], 'protection_type': ['MPA+OECM'], 'location_id': ['GLOB'], 'cumsum_area': [global_area]})\n",
+ " global_coverage = pd.concat([global_coverage, global_row], ignore_index=True)\n",
"\n",
- "glob_df['location_id'] = 'GLOB'\n",
+ " # Split rows with multiple ISO codes into separate rows\n",
+ " processed_df = gdf.copy()\n",
+ " processed_df['PARENT_ISO'] = processed_df['PARENT_ISO'].str.split(';')\n",
+ " processed_df = processed_df.explode('PARENT_ISO')\n",
"\n",
- "final_df2 = pd.concat([final_df, glob_df], ignore_index=True)\n",
- "final_df2"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 42,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Reorder the columns and add column last updated\n",
- "final_df2 = final_df2[['location_id', 'year', 'protection_type', 'cumsum_area']]\n",
+ " # Group by 'PARENT_ISO' and aggregate area\n",
+ " iso_area = processed_df.groupby('PARENT_ISO')['AREA'].sum().reset_index()\n",
"\n",
- "current_date = datetime.now().strftime('%Y-%m-%d')\n",
+ " # Create columns to match BE table\n",
+ " iso_area['year'] = year\n",
+ " iso_area['protection_type'] = 'MPA+OECM'\n",
+ " iso_area.rename(columns={'PARENT_ISO': 'location_id', 'AREA': 'cumsum_area'}, inplace=True)\n",
"\n",
- "final_df2 = final_df2.copy()\n",
- "final_df2['last_updated'] = current_date"
+ " # Append the result to the list\n",
+ " results_list.append(iso_area)\n",
+ "\n",
+ "# Concatenate the list of results into a single DataFrame and convert area to sq.km\n",
+ "final_df = pd.concat(results_list, ignore_index=True)\n",
+ "final_df['cumsum_area'] = final_df['cumsum_area'] / 1000000\n",
+ "\n",
+ "# Append global coverage to the final_df\n",
+ "final_df = pd.concat([final_df, global_coverage], ignore_index=True)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "**Regions**"
+ "### Regional stats"
]
},
{
"cell_type": "code",
- "execution_count": 47,
+ "execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
@@ -834,16 +683,18 @@
" ]\n",
" },\n",
" {\n",
- " 'region_iso': 'GL',\n",
- " 'region_name': 'Global',\n",
- " 'country_iso_3s': []\n",
- " },\n",
- " {\n",
" 'region_iso': 'WA',\n",
" 'region_name': 'West Asia',\n",
" 'country_iso_3s': [\n",
" \"ARE\", \"BHR\", \"IRQ\", \"JOR\", \"KWT\", \"LBN\", \"OMN\", \"PSE\", \"QAT\", \"SAU\", \"SYR\", \"YEM\"\n",
" ]\n",
+ " },\n",
+ " {\n",
+ " 'region_iso': 'AT', # this region is not in the Protected Planet database\n",
+ " 'region_name': 'Antartica',\n",
+ " 'country_iso_3s': [\n",
+ " \"ATA\"\n",
+ " ]\n",
" }\n",
"]\n",
"\n",
@@ -851,12 +702,12 @@
"country_to_region = {}\n",
"for region in regions_data:\n",
" for country in region['country_iso_3s']:\n",
- " country_to_region[country] = region['region_name']"
+ " country_to_region[country] = region['region_iso']"
]
},
{
"cell_type": "code",
- "execution_count": 48,
+ "execution_count": 10,
"metadata": {},
"outputs": [
{
@@ -881,52 +732,46 @@
" \n",
" | \n",
" location_id | \n",
- " cumsum_area | \n",
" year | \n",
" protection_type | \n",
- " region | \n",
+ " cumsum_area | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
- " ABNJ | \n",
- " 594174.66 | \n",
+ " AF | \n",
" 2000 | \n",
" MPA+OECM | \n",
- " NaN | \n",
+ " 94507.122820 | \n",
"
\n",
" \n",
" 1 | \n",
- " AGO | \n",
- " 0.42 | \n",
- " 2000 | \n",
+ " AF | \n",
+ " 2001 | \n",
" MPA+OECM | \n",
- " Africa | \n",
+ " 94807.303100 | \n",
"
\n",
" \n",
" 2 | \n",
- " ALB | \n",
- " 103.05 | \n",
- " 2000 | \n",
+ " AF | \n",
+ " 2002 | \n",
" MPA+OECM | \n",
- " Europe | \n",
+ " 102859.393938 | \n",
"
\n",
" \n",
" 3 | \n",
- " ARE | \n",
- " 78.52 | \n",
- " 2000 | \n",
+ " AF | \n",
+ " 2003 | \n",
" MPA+OECM | \n",
- " West Asia | \n",
+ " 111143.352991 | \n",
"
\n",
" \n",
" 4 | \n",
- " ARG | \n",
- " 6155.67 | \n",
- " 2000 | \n",
+ " AF | \n",
+ " 2004 | \n",
" MPA+OECM | \n",
- " Latin America & Caribbean | \n",
+ " 119137.635862 | \n",
"
\n",
" \n",
" ... | \n",
@@ -934,83 +779,139 @@
" ... | \n",
" ... | \n",
" ... | \n",
- " ... | \n",
"
\n",
" \n",
- " 3547 | \n",
- " VNM | \n",
- " 5036.97 | \n",
- " 2023 | \n",
+ " 163 | \n",
+ " WA | \n",
+ " 2019 | \n",
" MPA+OECM | \n",
- " Asia & Pacific | \n",
+ " 30618.254664 | \n",
"
\n",
" \n",
- " 3548 | \n",
- " VUT | \n",
- " 83.83 | \n",
- " 2023 | \n",
+ " 164 | \n",
+ " WA | \n",
+ " 2020 | \n",
" MPA+OECM | \n",
- " Asia & Pacific | \n",
+ " 30624.636536 | \n",
"
\n",
" \n",
- " 3549 | \n",
- " WSM | \n",
- " 199.59 | \n",
- " 2023 | \n",
+ " 165 | \n",
+ " WA | \n",
+ " 2021 | \n",
" MPA+OECM | \n",
- " Asia & Pacific | \n",
+ " 30624.636536 | \n",
"
\n",
" \n",
- " 3550 | \n",
- " YEM | \n",
- " 4108.19 | \n",
- " 2023 | \n",
+ " 166 | \n",
+ " WA | \n",
+ " 2022 | \n",
" MPA+OECM | \n",
- " West Asia | \n",
+ " 31779.597984 | \n",
"
\n",
" \n",
- " 3551 | \n",
- " ZAF | \n",
- " 242387.88 | \n",
+ " 167 | \n",
+ " WA | \n",
" 2023 | \n",
" MPA+OECM | \n",
- " Africa | \n",
+ " 31779.597984 | \n",
"
\n",
" \n",
"\n",
- "3552 rows × 5 columns
\n",
+ "168 rows × 4 columns
\n",
""
],
"text/plain": [
- " location_id cumsum_area year protection_type region\n",
- "0 ABNJ 594174.66 2000 MPA+OECM NaN\n",
- "1 AGO 0.42 2000 MPA+OECM Africa\n",
- "2 ALB 103.05 2000 MPA+OECM Europe\n",
- "3 ARE 78.52 2000 MPA+OECM West Asia\n",
- "4 ARG 6155.67 2000 MPA+OECM Latin America & Caribbean\n",
- "... ... ... ... ... ...\n",
- "3547 VNM 5036.97 2023 MPA+OECM Asia & Pacific\n",
- "3548 VUT 83.83 2023 MPA+OECM Asia & Pacific\n",
- "3549 WSM 199.59 2023 MPA+OECM Asia & Pacific\n",
- "3550 YEM 4108.19 2023 MPA+OECM West Asia\n",
- "3551 ZAF 242387.88 2023 MPA+OECM Africa\n",
+ " location_id year protection_type cumsum_area\n",
+ "0 AF 2000 MPA+OECM 94507.122820\n",
+ "1 AF 2001 MPA+OECM 94807.303100\n",
+ "2 AF 2002 MPA+OECM 102859.393938\n",
+ "3 AF 2003 MPA+OECM 111143.352991\n",
+ "4 AF 2004 MPA+OECM 119137.635862\n",
+ ".. ... ... ... ...\n",
+ "163 WA 2019 MPA+OECM 30618.254664\n",
+ "164 WA 2020 MPA+OECM 30624.636536\n",
+ "165 WA 2021 MPA+OECM 30624.636536\n",
+ "166 WA 2022 MPA+OECM 31779.597984\n",
+ "167 WA 2023 MPA+OECM 31779.597984\n",
"\n",
- "[3552 rows x 5 columns]"
+ "[168 rows x 4 columns]"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "regions = final_df.copy()\n",
+ "regions['location_id'] = regions['location_id'].map(country_to_region)\n",
+ "\n",
+ "# group by region and year to get sum of cumsum_area\n",
+ "regions = regions.groupby(['location_id', 'year', 'protection_type'])['cumsum_area'].sum().reset_index()\n",
+ "regions"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array(['AF', 'AS', 'AT', 'EU', 'NA', 'SA', 'WA'], dtype=object)"
]
},
- "execution_count": 48,
+ "execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "final_df['region'] = final_df['location_id'].map(country_to_region)\n",
- "final_df"
+ "regions['location_id'].unique()"
]
},
{
"cell_type": "code",
- "execution_count": 49,
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array(['ABNJ', 'AGO', 'ALB', 'ARE', 'ARG', 'ATA', 'ATG', 'AUS', 'AZE',\n",
+ " 'BEL', 'BGD', 'BGR', 'BHR', 'BHS', 'BLZ', 'BRA', 'BRB', 'BRN',\n",
+ " 'CAN', 'CHL', 'CHN', 'COD', 'COG', 'COK', 'COL', 'CRI', 'CUB',\n",
+ " 'CYP', 'DEU', 'DMA', 'DNK', 'DOM', 'ECU', 'EGY', 'ESP', 'EST',\n",
+ " 'FIN', 'FJI', 'FRA', 'FSM', 'GBR', 'GEO', 'GHA', 'GIN', 'GMB',\n",
+ " 'GNB', 'GNQ', 'GRC', 'GRD', 'GTM', 'HND', 'HRV', 'IDN', 'IRL',\n",
+ " 'IRN', 'ISL', 'ISR', 'ITA', 'JAM', 'JPN', 'KAZ', 'KEN', 'KHM',\n",
+ " 'KIR', 'KNA', 'KOR', 'LBN', 'LBY', 'LCA', 'LKA', 'LTU', 'LVA',\n",
+ " 'MAR', 'MCO', 'MDG', 'MDV', 'MEX', 'MHL', 'MLT', 'MMR', 'MNE',\n",
+ " 'MOZ', 'MRT', 'MUS', 'MYS', 'NAM', 'NGA', 'NIC', 'NIU', 'NLD',\n",
+ " 'NOR', 'NZL', 'OMN', 'PAK', 'PAN', 'PER', 'PHL', 'PLW', 'PNG',\n",
+ " 'POL', 'PRT', 'ROU', 'RUS', 'SAU', 'SDN', 'SEN', 'SLB', 'SLE',\n",
+ " 'SUR', 'SVN', 'SWE', 'SYC', 'SYR', 'THA', 'TKM', 'TLS', 'TON',\n",
+ " 'TTO', 'TUN', 'TUR', 'TUV', 'TZA', 'UKR', 'USA', 'VCT', 'VEN',\n",
+ " 'VNM', 'VUT', 'WSM', 'ZAF', 'DZA', 'IOT', 'GAB', 'IND', 'SGP',\n",
+ " 'LBR', 'CIV', 'CPV', 'SLV', 'QAT', 'STP', 'SHN', 'YEM', 'URY',\n",
+ " 'CMR', 'COM', 'KWT', 'SJM', 'GUY', 'UMI', 'HTI', 'JOR', 'GLOB',\n",
+ " 'AF', 'AS', 'AT', 'EU', 'NA', 'SA', 'WA'], dtype=object)"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "final_df2 = pd.concat([final_df, regions], ignore_index=True)\n",
+ "final_df2['location_id'].unique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
"metadata": {},
"outputs": [
{
@@ -1038,49 +939,49 @@
" cumsum_area | \n",
" year | \n",
" protection_type | \n",
- " region | \n",
+ " last_updated | \n",
" \n",
" \n",
" \n",
" \n",
" 0 | \n",
" ABNJ | \n",
- " 594174.66 | \n",
+ " 594174.659985 | \n",
" 2000 | \n",
" MPA+OECM | \n",
- " NaN | \n",
+ " 2023-10-18 | \n",
"
\n",
" \n",
- " 5 | \n",
- " ATA | \n",
- " 3594.42 | \n",
+ " 1 | \n",
+ " AGO | \n",
+ " 0.415240 | \n",
" 2000 | \n",
" MPA+OECM | \n",
- " NaN | \n",
+ " 2023-10-18 | \n",
"
\n",
" \n",
- " 37 | \n",
- " FIN;SWE | \n",
- " 3541.14 | \n",
+ " 2 | \n",
+ " ALB | \n",
+ " 103.048347 | \n",
" 2000 | \n",
" MPA+OECM | \n",
- " NaN | \n",
+ " 2023-10-18 | \n",
"
\n",
" \n",
- " 131 | \n",
- " ABNJ | \n",
- " 594174.66 | \n",
- " 2001 | \n",
+ " 3 | \n",
+ " ARE | \n",
+ " 78.516519 | \n",
+ " 2000 | \n",
" MPA+OECM | \n",
- " NaN | \n",
+ " 2023-10-18 | \n",
"
\n",
" \n",
- " 136 | \n",
- " ATA | \n",
- " 3594.42 | \n",
- " 2001 | \n",
+ " 4 | \n",
+ " ARG | \n",
+ " 6155.668078 | \n",
+ " 2000 | \n",
" MPA+OECM | \n",
- " NaN | \n",
+ " 2023-10-18 | \n",
"
\n",
" \n",
" ... | \n",
@@ -1091,116 +992,83 @@
" ... | \n",
"
\n",
" \n",
- " 3397 | \n",
- " ABNJ | \n",
- " 2811451.69 | \n",
- " 2023 | \n",
+ " 3677 | \n",
+ " WA | \n",
+ " 30618.254664 | \n",
+ " 2019 | \n",
" MPA+OECM | \n",
- " NaN | \n",
+ " 2023-10-18 | \n",
"
\n",
" \n",
- " 3402 | \n",
- " ATA | \n",
- " 3570.36 | \n",
- " 2023 | \n",
+ " 3678 | \n",
+ " WA | \n",
+ " 30624.636536 | \n",
+ " 2020 | \n",
" MPA+OECM | \n",
- " NaN | \n",
+ " 2023-10-18 | \n",
"
\n",
" \n",
- " 3439 | \n",
- " FIN;SWE | \n",
- " 3541.14 | \n",
- " 2023 | \n",
+ " 3679 | \n",
+ " WA | \n",
+ " 30624.636536 | \n",
+ " 2021 | \n",
" MPA+OECM | \n",
- " NaN | \n",
+ " 2023-10-18 | \n",
"
\n",
" \n",
- " 3442 | \n",
- " FRA;ITA;MCO | \n",
- " 87742.14 | \n",
- " 2023 | \n",
+ " 3680 | \n",
+ " WA | \n",
+ " 31779.597984 | \n",
+ " 2022 | \n",
" MPA+OECM | \n",
- " NaN | \n",
+ " 2023-10-18 | \n",
"
\n",
" \n",
- " 3502 | \n",
- " NLD;DEU;DNK | \n",
- " 11550.01 | \n",
+ " 3681 | \n",
+ " WA | \n",
+ " 31779.597984 | \n",
" 2023 | \n",
" MPA+OECM | \n",
- " NaN | \n",
+ " 2023-10-18 | \n",
"
\n",
" \n",
"\n",
- "110 rows × 5 columns
\n",
+ "3682 rows × 5 columns
\n",
""
],
"text/plain": [
- " location_id cumsum_area year protection_type region\n",
- "0 ABNJ 594174.66 2000 MPA+OECM NaN\n",
- "5 ATA 3594.42 2000 MPA+OECM NaN\n",
- "37 FIN;SWE 3541.14 2000 MPA+OECM NaN\n",
- "131 ABNJ 594174.66 2001 MPA+OECM NaN\n",
- "136 ATA 3594.42 2001 MPA+OECM NaN\n",
- "... ... ... ... ... ...\n",
- "3397 ABNJ 2811451.69 2023 MPA+OECM NaN\n",
- "3402 ATA 3570.36 2023 MPA+OECM NaN\n",
- "3439 FIN;SWE 3541.14 2023 MPA+OECM NaN\n",
- "3442 FRA;ITA;MCO 87742.14 2023 MPA+OECM NaN\n",
- "3502 NLD;DEU;DNK 11550.01 2023 MPA+OECM NaN\n",
+ " location_id cumsum_area year protection_type last_updated\n",
+ "0 ABNJ 594174.659985 2000 MPA+OECM 2023-10-18\n",
+ "1 AGO 0.415240 2000 MPA+OECM 2023-10-18\n",
+ "2 ALB 103.048347 2000 MPA+OECM 2023-10-18\n",
+ "3 ARE 78.516519 2000 MPA+OECM 2023-10-18\n",
+ "4 ARG 6155.668078 2000 MPA+OECM 2023-10-18\n",
+ "... ... ... ... ... ...\n",
+ "3677 WA 30618.254664 2019 MPA+OECM 2023-10-18\n",
+ "3678 WA 30624.636536 2020 MPA+OECM 2023-10-18\n",
+ "3679 WA 30624.636536 2021 MPA+OECM 2023-10-18\n",
+ "3680 WA 31779.597984 2022 MPA+OECM 2023-10-18\n",
+ "3681 WA 31779.597984 2023 MPA+OECM 2023-10-18\n",
"\n",
- "[110 rows x 5 columns]"
- ]
- },
- "execution_count": 49,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "final_df[final_df.region.isnull()]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 55,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "array(['ABNJ', 'AGO', 'ALB', 'ARE', 'ARG', 'ATA', 'ATG', 'AUS', 'AZE',\n",
- " 'BEL', 'BGD', 'BGR', 'BHR', 'BHS', 'BLZ', 'BRA', 'BRB', 'BRN',\n",
- " 'CAN', 'CHL', 'CHN', 'COD', 'COG', 'COK', 'COL', 'CRI', 'CUB',\n",
- " 'CYP', 'DEU', 'DMA', 'DNK', 'DOM', 'ECU', 'EGY', 'ESP', 'EST',\n",
- " 'FIN', 'FIN;SWE', 'FJI', 'FRA', 'FSM', 'GBR', 'GEO', 'GHA', 'GIN',\n",
- " 'GMB', 'GNB', 'GNQ', 'GRC', 'GRD', 'GTM', 'HND', 'HRV', 'IDN',\n",
- " 'IRL', 'IRN', 'ISL', 'ISR', 'ITA', 'JAM', 'JPN', 'KAZ', 'KEN',\n",
- " 'KHM', 'KIR', 'KNA', 'KOR', 'LBN', 'LBY', 'LCA', 'LKA', 'LTU',\n",
- " 'LVA', 'MAR', 'MCO', 'MDG', 'MDV', 'MEX', 'MHL', 'MLT', 'MMR',\n",
- " 'MNE', 'MOZ', 'MRT', 'MUS', 'MYS', 'NAM', 'NGA', 'NIC', 'NIU',\n",
- " 'NLD', 'NOR', 'NZL', 'OMN', 'PAK', 'PAN', 'PER', 'PHL', 'PLW',\n",
- " 'PNG', 'POL', 'PRT', 'ROU', 'RUS', 'SAU', 'SDN', 'SEN', 'SLB',\n",
- " 'SLE', 'SUR', 'SVN', 'SWE', 'SYC', 'SYR', 'THA', 'TKM', 'TLS',\n",
- " 'TON', 'TTO', 'TUN', 'TUR', 'TUV', 'TZA', 'UKR', 'USA', 'VCT',\n",
- " 'VEN', 'VNM', 'VUT', 'WSM', 'ZAF', 'DZA', 'FRA;ITA;MCO', 'IOT',\n",
- " 'GAB', 'IND', 'SGP', 'LBR', 'CIV', 'CPV', 'SLV', 'QAT', 'STP',\n",
- " 'SHN', 'YEM', 'NLD;DEU;DNK', 'URY', 'CMR', 'COM', 'KWT', 'SJM',\n",
- " 'GUY', 'UMI', 'HTI', 'JOR'], dtype=object)"
+ "[3682 rows x 5 columns]"
]
},
- "execution_count": 55,
+ "execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "final_df['location_id'].unique() "
+ "current_date = datetime.now().strftime('%Y-%m-%d')\n",
+ "\n",
+ "final_df2 = final_df2.copy()\n",
+ "final_df2['last_updated'] = current_date\n",
+ "final_df2"
]
},
{
"cell_type": "code",
- "execution_count": 44,
+ "execution_count": 16,
"metadata": {},
"outputs": [],
"source": [