diff --git a/data/notebooks/MPAtlas_table.ipynb b/data/notebooks/MPAtlas_table.ipynb deleted file mode 100644 index 40a2613b..00000000 --- a/data/notebooks/MPAtlas_table.ipynb +++ /dev/null @@ -1,717 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Set up" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "import geopandas as gpd\n", - "import pandas as pd" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "path_in = \"/Users/sofia/Documents/Repos/skytruth_30x30/data/raw\"\n", - "path_out = \"/Users/sofia/Documents/Repos/skytruth_30x30/data/processed/mpatlas\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### MPAtlas" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "# Read data\n", - "mpatlas = gpd.read_file(path_in + \"/MPAtlas_largest100.shp\")" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Index(['OBJECTID', 'WDPAID', 'WDPA_PID', 'NAME', 'English_De', 'PARENT_ISO',\n", - " 'ISO3', 'MPA_Marine', 'mpa_id', 'Zone_Marin', 'IUCN_Cat', 'Stage_of_E',\n", - " 'Distant_MP', 'Level_of_P', 'Most_Impac', 'Descrip_Im', 'Vertically',\n", - " 'SHAPE_Leng', 'SHAPE_Area', 'geometry'],\n", - " dtype='object')" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "mpatlas.columns" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array(['Incompatible', 'Highly', 'TBD', 'Fully', 'Lightly', 'Unknown'],\n", - " dtype=object)" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "mpatlas.Level_of_P.unique()" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
OBJECTIDWDPAIDWDPA_PIDNAMEEnglish_DePARENT_ISOISO3MPA_Marinempa_idZone_Marin...Stage_of_EDistant_MPLevel_of_PMost_ImpacDescrip_ImVerticallySHAPE_LengSHAPE_AreageometryP_LEVEL
01.0478053.0478053Hikurangi DeepBenthic Protection AreaNZLNZL54022.1525854022.1...ImplementedNaNIncompatibleMining, FishingBenthic protections only. Deep sea mining allo...X12.3329525.833001POLYGON ((-175.00000 -42.16661, -175.00000 -42...Less Protected / Unknown
12.0555512062.0555512062KermadecBenthic Protection AreaNZLNZL619146.05428458540.5...ImplementedNaNIncompatibleMining, FishingBenthic protections only. Deep sea mining allo...NaN25.62935242.963159POLYGON ((-174.02370 -29.22191, -174.02370 -29...Less Protected / Unknown
\n", - "

2 rows × 21 columns

\n", - "
" - ], - "text/plain": [ - " OBJECTID WDPAID WDPA_PID NAME English_De \\\n", - "0 1.0 478053.0 478053 Hikurangi Deep Benthic Protection Area \n", - "1 2.0 555512062.0 555512062 Kermadec Benthic Protection Area \n", - "\n", - " PARENT_ISO ISO3 MPA_Marine mpa_id Zone_Marin ... Stage_of_E Distant_MP \\\n", - "0 NZL NZL 54022.1 5258 54022.1 ... Implemented NaN \n", - "1 NZL NZL 619146.0 5428 458540.5 ... Implemented NaN \n", - "\n", - " Level_of_P Most_Impac \\\n", - "0 Incompatible Mining, Fishing \n", - "1 Incompatible Mining, Fishing \n", - "\n", - " Descrip_Im Vertically SHAPE_Leng \\\n", - "0 Benthic protections only. Deep sea mining allo... X 12.332952 \n", - "1 Benthic protections only. Deep sea mining allo... NaN 25.629352 \n", - "\n", - " SHAPE_Area geometry \\\n", - "0 5.833001 POLYGON ((-175.00000 -42.16661, -175.00000 -42... \n", - "1 42.963159 POLYGON ((-174.02370 -29.22191, -174.02370 -29... \n", - "\n", - " P_LEVEL \n", - "0 Less Protected / Unknown \n", - "1 Less Protected / Unknown \n", - "\n", - "[2 rows x 21 columns]" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Create new column with protection level reclassified\n", - "def map_protection_level(value):\n", - " if value in [\"Fully\", \"Highly\"]:\n", - " return \"Fully / Highly Protected\"\n", - " else:\n", - " return \"Less Protected / Unknown\"\n", - "\n", - "# Create a new column based on column1\n", - "mpatlas['P_LEVEL'] = mpatlas['Level_of_P'].apply(map_protection_level)\n", - "mpatlas.head(2)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "# List of dictionaries for data in Region_ISO3_PP.txt (list of regions used in the Protected Planet database)\n", - "regions_data = [\n", - " {\n", - " 'region_iso': 'AS',\n", - " 'region_name': 'Asia & Pacific',\n", - " 'country_iso_3s': [\n", - " \"AFG\", \"ASM\", \"AUS\", \"BGD\", \"BRN\", \"BTN\", \"CCK\", \"CHN\", \"COK\", \"CXR\", \"FJI\", \"FSM\", \"GUM\", \"HKG\", \"IDN\",\n", - " \"IND\", \"IOT\", \"IRN\", \"JPN\", \"KHM\", \"KIR\", \"KOR\", \"LAO\", \"LKA\", \"MAC\", \"MDV\", \"MHL\", \"MMR\", \"MNG\", \"MNP\",\n", - " \"MYS\", \"NCL\", \"NFK\", \"NIU\", \"NPL\", \"NRU\", \"NZL\", \"PAK\", \"PCN\", \"PHL\", \"PLW\", \"PNG\", \"PRK\", \"PYF\", \"SGP\",\n", - " \"SLB\", \"THA\", \"TKL\", \"TLS\", \"TON\", \"TUV\", \"TWN\", \"VNM\", \"VUT\", \"WLF\", \"WSM\"\n", - " ]\n", - " },\n", - " {\n", - " 'region_iso': 'AF',\n", - " 'region_name': 'Africa',\n", - " 'country_iso_3s': [\n", - " \"AGO\", \"BDI\", \"BEN\", \"BFA\", \"BWA\", \"CAF\", \"CIV\", \"CMR\", \"COD\", \"COG\", \"COM\", \"CPV\", \"DJI\", \"DZA\", \"EGY\",\n", - " \"ERI\", \"ESH\", \"ETH\", \"GAB\", \"GHA\", \"GIN\", \"GMB\", \"GNB\", \"GNQ\", \"KEN\", \"LBR\", \"LBY\", \"LSO\", \"MAR\", \"MDG\",\n", - " \"MLI\", \"MOZ\", \"MRT\", \"MUS\", \"MWI\", \"MYT\", \"NAM\", \"NER\", \"NGA\", \"REU\", \"RWA\", \"SDN\", \"SEN\", \"SHN\", \"SLE\",\n", - " \"SOM\", \"SSD\", \"STP\", \"SWZ\", \"SYC\", \"TCD\", \"TGO\", \"TUN\", \"TZA\", \"UGA\", \"ZAF\", \"ZMB\", \"ZWE\"\n", - " ]\n", - " },\n", - " {\n", - " 'region_iso': 'EU',\n", - " 'region_name': 'Europe',\n", - " 'country_iso_3s': [\n", - " \"ALA\", \"ALB\", \"AND\", \"ARM\", \"AUT\", \"AZE\", \"BEL\", \"BGR\", \"BIH\", \"BLR\", \"CHE\", \"CYP\", \"CZE\", \"DEU\", \"DNK\",\n", - " \"ESP\", \"EST\", \"FIN\", \"FRA\", \"FRO\", \"GBR\", \"GEO\", \"GGY\", \"GIB\", \"GRC\", \"HRV\", \"HUN\", \"IMN\", \"IRL\", \"ISL\",\n", - " \"ISR\", \"ITA\", \"JEY\", \"KAZ\", \"KGZ\", \"LIE\", \"LTU\", \"LUX\", \"LVA\", \"MCO\", \"MDA\", \"MKD\", \"MLT\", \"MNE\", \"NLD\",\n", - " \"NOR\", \"POL\", \"PRT\", \"ROU\", \"RUS\", \"SJM\", \"SMR\", \"SRB\", \"SVK\", \"SVN\", \"SWE\", \"TJK\", \"TKM\", \"TUR\", \"UKR\",\n", - " \"UZB\", \"VAT\"\n", - " ]\n", - " },\n", - " {\n", - " 'region_iso': 'SA',\n", - " 'region_name': 'Latin America & Caribbean',\n", - " 'country_iso_3s': [\n", - " \"ABW\", \"AIA\", \"ARG\", \"ATG\", \"BES\", \"BHS\", \"BLM\", \"BLZ\", \"BMU\", \"BOL\", \"BRA\", \"BRB\", \"CHL\", \"COL\", \"CRI\",\n", - " \"CUB\", \"CUW\", \"CYM\", \"DMA\", \"DOM\", \"ECU\", \"FLK\", \"GLP\", \"GRD\", \"GTM\", \"GUF\", \"GUY\", \"HND\", \"HTI\", \"JAM\",\n", - " \"KNA\", \"LCA\", \"MAF\", \"MEX\", \"MSR\", \"MTQ\", \"NIC\", \"PAN\", \"PER\", \"PRI\", \"PRY\", \"SLV\", \"SUR\", \"SXM\", \"TCA\",\n", - " \"TTO\", \"UMI\", \"URY\", \"VCT\", \"VEN\", \"VGB\", \"VIR\"\n", - " ]\n", - " },\n", - " {\n", - " 'region_iso': 'PO',\n", - " 'region_name': 'Polar',\n", - " 'country_iso_3s': [\n", - " \"ATF\", \"BVT\", \"GRL\", \"HMD\", \"SGS\"\n", - " ]\n", - " },\n", - " {\n", - " 'region_iso': 'NA',\n", - " 'region_name': 'North America',\n", - " 'country_iso_3s': [\n", - " \"CAN\", \"SPM\", \"USA\"\n", - " ]\n", - " },\n", - " {\n", - " 'region_iso': 'GL',\n", - " 'region_name': 'Global',\n", - " 'country_iso_3s': []\n", - " },\n", - " {\n", - " 'region_iso': 'WA',\n", - " 'region_name': 'West Asia',\n", - " 'country_iso_3s': [\n", - " \"ARE\", \"BHR\", \"IRQ\", \"JOR\", \"KWT\", \"LBN\", \"OMN\", \"PSE\", \"QAT\", \"SAU\", \"SYR\", \"YEM\"\n", - " ]\n", - " }\n", - "]\n", - "\n", - "# Convert the region data to a dictionary that maps each country to its region name\n", - "country_to_region = {}\n", - "for region in regions_data:\n", - " for country in region['country_iso_3s']:\n", - " country_to_region[country] = region['region_name']" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
OBJECTIDWDPAIDWDPA_PIDNAMEEnglish_DePARENT_ISOISO3MPA_Marinempa_idZone_Marin...Distant_MPLevel_of_PMost_ImpacDescrip_ImVerticallySHAPE_LengSHAPE_AreageometryP_LEVELREGIONS
01.0478053.0478053Hikurangi DeepBenthic Protection AreaNZLNZL54022.1525854022.1...NaNIncompatibleMining, FishingBenthic protections only. Deep sea mining allo...X12.3329525.833001POLYGON ((-175.00000 -42.16661, -175.00000 -42...Less Protected / UnknownAsia & Pacific
12.0555512062.0555512062KermadecBenthic Protection AreaNZLNZL619146.05428458540.5...NaNIncompatibleMining, FishingBenthic protections only. Deep sea mining allo...NaN25.62935242.963159POLYGON ((-174.02370 -29.22191, -174.02370 -29...Less Protected / UnknownAsia & Pacific
\n", - "

2 rows × 22 columns

\n", - "
" - ], - "text/plain": [ - " OBJECTID WDPAID WDPA_PID NAME English_De \\\n", - "0 1.0 478053.0 478053 Hikurangi Deep Benthic Protection Area \n", - "1 2.0 555512062.0 555512062 Kermadec Benthic Protection Area \n", - "\n", - " PARENT_ISO ISO3 MPA_Marine mpa_id Zone_Marin ... Distant_MP \\\n", - "0 NZL NZL 54022.1 5258 54022.1 ... NaN \n", - "1 NZL NZL 619146.0 5428 458540.5 ... NaN \n", - "\n", - " Level_of_P Most_Impac \\\n", - "0 Incompatible Mining, Fishing \n", - "1 Incompatible Mining, Fishing \n", - "\n", - " Descrip_Im Vertically SHAPE_Leng \\\n", - "0 Benthic protections only. Deep sea mining allo... X 12.332952 \n", - "1 Benthic protections only. Deep sea mining allo... NaN 25.629352 \n", - "\n", - " SHAPE_Area geometry \\\n", - "0 5.833001 POLYGON ((-175.00000 -42.16661, -175.00000 -42... \n", - "1 42.963159 POLYGON ((-174.02370 -29.22191, -174.02370 -29... \n", - "\n", - " P_LEVEL REGIONS \n", - "0 Less Protected / Unknown Asia & Pacific \n", - "1 Less Protected / Unknown Asia & Pacific \n", - "\n", - "[2 rows x 22 columns]" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "mpatlas['REGIONS'] = mpatlas['ISO3'].map(country_to_region)\n", - "mpatlas.head(2)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
WDPAIDWDPA_PIDNAMEAREA_MPATLASDESIG_ENGESTABLISHMENTIMPACTP_LEVELPARENT_ISOISO3REGIONSgeometry
0478053.0478053Hikurangi Deep54022.1Benthic Protection AreaImplementedMining, FishingLess Protected / UnknownNZLNZLAsia & PacificPOLYGON ((-175.00000 -42.16661, -175.00000 -42...
1555512062.0555512062Kermadec458540.5Benthic Protection AreaImplementedMining, FishingLess Protected / UnknownNZLNZLAsia & PacificPOLYGON ((-174.02370 -29.22191, -174.02370 -29...
\n", - "
" - ], - "text/plain": [ - " WDPAID WDPA_PID NAME AREA_MPATLAS \\\n", - "0 478053.0 478053 Hikurangi Deep 54022.1 \n", - "1 555512062.0 555512062 Kermadec 458540.5 \n", - "\n", - " DESIG_ENG ESTABLISHMENT IMPACT \\\n", - "0 Benthic Protection Area Implemented Mining, Fishing \n", - "1 Benthic Protection Area Implemented Mining, Fishing \n", - "\n", - " P_LEVEL PARENT_ISO ISO3 REGIONS \\\n", - "0 Less Protected / Unknown NZL NZL Asia & Pacific \n", - "1 Less Protected / Unknown NZL NZL Asia & Pacific \n", - "\n", - " geometry \n", - "0 POLYGON ((-175.00000 -42.16661, -175.00000 -42... \n", - "1 POLYGON ((-174.02370 -29.22191, -174.02370 -29... " - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Rename columns and keep only relevant ones. \n", - "# Note: We keep \"Zone_Marine\" (area of the geometry), instead of \"MPA_Marine\" (as MPAs can be divided in smaller pieces according to their protection levels)\n", - "\n", - "mpatlas = mpatlas.rename(columns={'English_De': 'DESIG_ENG', 'Zone_Marin': 'AREA_MPATLAS', 'Stage_of_E': 'ESTABLISHMENT', 'Most_Impac': 'IMPACT' }) \n", - "mpatlas2 = mpatlas[['WDPAID', 'WDPA_PID', 'NAME', 'AREA_MPATLAS', 'DESIG_ENG', 'ESTABLISHMENT', 'IMPACT', 'P_LEVEL', 'PARENT_ISO', 'ISO3','REGIONS', 'geometry']]\n", - "mpatlas2.head(2)" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/var/folders/98/0pdnjc5s29x2pnzl293pw7hr0000gn/T/ipykernel_25742/67511564.py:1: UserWarning: Column names longer than 10 characters will be truncated when saved to ESRI Shapefile.\n", - " mpatlas2.to_file(path_out + \"/mpatlas_table.shp\")\n" - ] - } - ], - "source": [ - "mpatlas2.to_file(path_out + \"/mpatlas_table.shp\")" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
WDPAIDWDPA_PIDNAMEAREA_MPATLDESIG_ENGESTABLISHMIMPACTP_LEVELPARENT_ISOISO3REGIONSgeometry
0478053.0478053Hikurangi Deep54022.1Benthic Protection AreaImplementedMining, FishingLess Protected / UnknownNZLNZLAsia & PacificPOLYGON ((-175.00000 -42.16661, -175.00000 -42...
1555512062.0555512062Kermadec458540.5Benthic Protection AreaImplementedMining, FishingLess Protected / UnknownNZLNZLAsia & PacificPOLYGON ((-174.02370 -29.22191, -174.02370 -29...
\n", - "
" - ], - "text/plain": [ - " WDPAID WDPA_PID NAME AREA_MPATL \\\n", - "0 478053.0 478053 Hikurangi Deep 54022.1 \n", - "1 555512062.0 555512062 Kermadec 458540.5 \n", - "\n", - " DESIG_ENG ESTABLISHM IMPACT \\\n", - "0 Benthic Protection Area Implemented Mining, Fishing \n", - "1 Benthic Protection Area Implemented Mining, Fishing \n", - "\n", - " P_LEVEL PARENT_ISO ISO3 REGIONS \\\n", - "0 Less Protected / Unknown NZL NZL Asia & Pacific \n", - "1 Less Protected / Unknown NZL NZL Asia & Pacific \n", - "\n", - " geometry \n", - "0 POLYGON ((-175.00000 -42.16661, -175.00000 -42... \n", - "1 POLYGON ((-174.02370 -29.22191, -174.02370 -29... " - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "mpatlas = gpd.read_file(path_out + \"/mpatlas_table.shp\")\n", - "mpatlas.head(2)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "skytruth", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.4" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/data/notebooks/habitats.ipynb b/data/notebooks/habitats.ipynb new file mode 100644 index 00000000..1185d367 --- /dev/null +++ b/data/notebooks/habitats.ipynb @@ -0,0 +1,2003 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Set up" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting openpyxl\n", + " Downloading openpyxl-3.1.2-py2.py3-none-any.whl (249 kB)\n", + "\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m250.0/250.0 kB\u001b[0m \u001b[31m1.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m[36m0:00:01\u001b[0m[36m0:00:01\u001b[0m:01\u001b[0m\n", + "\u001b[?25hCollecting et-xmlfile (from openpyxl)\n", + " Downloading et_xmlfile-1.1.0-py3-none-any.whl (4.7 kB)\n", + "Installing collected packages: et-xmlfile, openpyxl\n", + "Successfully installed et-xmlfile-1.1.0 openpyxl-3.1.2\n" + ] + } + ], + "source": [ + "!pip install openpyxl" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import geopandas as gpd\n", + "import pandas as pd\n", + "import openpyxl\n", + "from datetime import datetime" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "path_in = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/raw/\"\n", + "path_out = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/processed/\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Process habitats from [Ocean+](https://habitats.oceanplus.org/) (except mangroves)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "cold = pd.read_csv(path_in + \"Ocean+HabitatsDownload_Global/coldwatercorals.csv\")\n", + "salt = pd.read_csv(path_in + \"Ocean+HabitatsDownload_Global/saltmarshes.csv\")\n", + "sea = pd.read_csv(path_in + \"Ocean+HabitatsDownload_Global/seagrasses.csv\")\n", + "warm = pd.read_csv(path_in + \"Ocean+HabitatsDownload_Global/warmwatercorals.csv\")\n", + "glob = pd.read_excel(path_in + \"Ocean+HabitatsDownload_Global/global-stats.xlsx\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "cold2 = cold[['ISO3', 'protected_area', 'total_area']].rename(columns={'ISO3': 'location_id'})\n", + "salt2 = salt[['ISO3', 'protected_area', 'total_area']].rename(columns={'ISO3': 'location_id'})\n", + "sea2 = sea[['ISO3', 'protected_area', 'total_area']].rename(columns={'ISO3': 'location_id'})\n", + "warm2 = warm[['ISO3', 'protected_area', 'total_area']].rename(columns={'ISO3': 'location_id'})" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# Remove rows with '-' in 'protected_area' or 'total_area'\n", + "cold2 = cold2[~cold2['protected_area'].str.contains('-') & ~cold2['total_area'].str.contains('-')]\n", + "salt2 = salt2[~salt2['protected_area'].str.contains('-') & ~salt2['total_area'].str.contains('-')]\n", + "sea2 = sea2[~sea2['protected_area'].str.contains('-') & ~sea2['total_area'].str.contains('-')]\n", + "warm2 = warm2[~warm2['protected_area'].str.contains('-') & ~warm2['total_area'].str.contains('-')]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# Bring the wdpa file to get the iso3 and parent_iso equivalences\n", + "wdpa = gpd.read_file(path_out + \"wdpa/merged_mpa.shp\")\n", + "\n", + "# Filter out rows with multiple values in either 'ISO3' or 'PARENT_ISO'\n", + "wdpa = wdpa[~wdpa['ISO3'].str.contains(';') & ~wdpa['PARENT_ISO'].str.contains(';')]\n", + "\n", + "# Extract unique ISO3-PARENT_ISO pairs\n", + "unique_pairs = wdpa[['ISO3', 'PARENT_ISO']].drop_duplicates()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# Create a mapping dictionary for ISO3-PARENT_ISO pairs and modify the 'location_id' column in the habitats dataframes\n", + "mapping_dict = dict(zip(unique_pairs['ISO3'], unique_pairs['PARENT_ISO']))\n", + "cold2['location_id'] = cold2['location_id'].map(mapping_dict)\n", + "salt2['location_id'] = salt2['location_id'].map(mapping_dict)\n", + "sea2['location_id'] = sea2['location_id'].map(mapping_dict)\n", + "warm2['location_id'] = warm2['location_id'].map(mapping_dict)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# Group by 'location_id' and calculate the sum of 'protected_area' and 'total_area'\n", + "cold2_grouped = cold2.groupby('location_id').sum().reset_index()\n", + "salt2_grouped = salt2.groupby('location_id').sum().reset_index()\n", + "sea2_grouped = sea2.groupby('location_id').sum().reset_index()\n", + "warm2_grouped = warm2.groupby('location_id').sum().reset_index()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# Add the 'habitat_name' column\n", + "cold2_grouped['habitat_name'] = 'cold-water corals'\n", + "salt2_grouped['habitat_name'] = 'saltmarshes'\n", + "sea2_grouped['habitat_name'] = 'seagrasses'\n", + "warm2_grouped['habitat_name'] = 'warm-water corals'" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
location_idprotected_areatotal_areahabitat_nameyear
0ABNJ421.6293726799041874.98221422617cold-water corals2023
1AGO03.39567053773998cold-water corals2023
\n", + "
" + ], + "text/plain": [ + " location_id protected_area total_area habitat_name year\n", + "0 ABNJ 421.629372679904 1874.98221422617 cold-water corals 2023\n", + "1 AGO 0 3.39567053773998 cold-water corals 2023" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Concatenate the dataframes\n", + "habitats = pd.concat([cold2_grouped, salt2_grouped, sea2_grouped, warm2_grouped])\n", + "habitats['year'] = datetime.now().year\n", + "habitats.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
habitat_nameprotected_areatotal_arealocation_idyear
0saltmarsh111638.252564224435.075094GLOB2023
1seagrass74787.449960314001.940600GLOB2023
2warmwater-corals63259.499130149886.974126GLOB2023
4coldwater-corals4400.14084215336.975280GLOB2023
\n", + "
" + ], + "text/plain": [ + " habitat_name protected_area total_area location_id year\n", + "0 saltmarsh 111638.252564 224435.075094 GLOB 2023\n", + "1 seagrass 74787.449960 314001.940600 GLOB 2023\n", + "2 warmwater-corals 63259.499130 149886.974126 GLOB 2023\n", + "4 coldwater-corals 4400.140842 15336.975280 GLOB 2023" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Calculate global stats for habitats\n", + "habitats_global = glob[['habitat','protected_area', 'total_area']].rename(columns={'habitat': 'habitat_name'})\n", + "habitats_global['location_id'] = 'GLOB'\n", + "habitats_global['year'] = datetime.now().year\n", + "habitats_global = habitats_global[habitats_global['habitat_name'] != 'mangroves'] # remove mangroves\n", + "habitats_global" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
habitat_nameprotected_areatotal_arealocation_idyear
0saltmarshes111638.252564224435.075094GLOB2023
1seagrasses74787.449960314001.940600GLOB2023
2warm-water corals63259.499130149886.974126GLOB2023
4cold-water corals4400.14084215336.975280GLOB2023
\n", + "
" + ], + "text/plain": [ + " habitat_name protected_area total_area location_id year\n", + "0 saltmarshes 111638.252564 224435.075094 GLOB 2023\n", + "1 seagrasses 74787.449960 314001.940600 GLOB 2023\n", + "2 warm-water corals 63259.499130 149886.974126 GLOB 2023\n", + "4 cold-water corals 4400.140842 15336.975280 GLOB 2023" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Change the name of the habitats to match the ones in the habitats dataframe\n", + "habitat_name_mapping = {\n", + " 'saltmarsh': 'saltmarshes',\n", + " 'seagrass': 'seagrasses',\n", + " 'warmwater-corals': 'warm-water corals',\n", + " 'coldwater-corals': 'cold-water corals'\n", + "}\n", + "habitats_global['habitat_name'] = habitats_global['habitat_name'].replace(habitat_name_mapping)\n", + "habitats_global" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['cold-water corals', 'saltmarshes', 'seagrasses',\n", + " 'warm-water corals'], dtype=object)" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Concatenate the global stats to the habitats dataframe\n", + "habitats = pd.concat([habitats, habitats_global])\n", + "habitats['habitat_name'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "# List of dictionaries for data in Region_ISO3_PP.txt (list of regions used in the Protected Planet database)\n", + "regions_data = [\n", + " {\n", + " 'region_iso': 'AS',\n", + " 'region_name': 'Asia & Pacific',\n", + " 'country_iso_3s': [\n", + " \"AFG\", \"ASM\", \"AUS\", \"BGD\", \"BRN\", \"BTN\", \"CCK\", \"CHN\", \"COK\", \"CXR\", \"FJI\", \"FSM\", \"GUM\", \"HKG\", \"IDN\",\n", + " \"IND\", \"IOT\", \"IRN\", \"JPN\", \"KHM\", \"KIR\", \"KOR\", \"LAO\", \"LKA\", \"MAC\", \"MDV\", \"MHL\", \"MMR\", \"MNG\", \"MNP\",\n", + " \"MYS\", \"NCL\", \"NFK\", \"NIU\", \"NPL\", \"NRU\", \"NZL\", \"PAK\", \"PCN\", \"PHL\", \"PLW\", \"PNG\", \"PRK\", \"PYF\", \"SGP\",\n", + " \"SLB\", \"THA\", \"TKL\", \"TLS\", \"TON\", \"TUV\", \"TWN\", \"VNM\", \"VUT\", \"WLF\", \"WSM\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'AF',\n", + " 'region_name': 'Africa',\n", + " 'country_iso_3s': [\n", + " \"AGO\", \"BDI\", \"BEN\", \"BFA\", \"BWA\", \"CAF\", \"CIV\", \"CMR\", \"COD\", \"COG\", \"COM\", \"CPV\", \"DJI\", \"DZA\", \"EGY\",\n", + " \"ERI\", \"ESH\", \"ETH\", \"GAB\", \"GHA\", \"GIN\", \"GMB\", \"GNB\", \"GNQ\", \"KEN\", \"LBR\", \"LBY\", \"LSO\", \"MAR\", \"MDG\",\n", + " \"MLI\", \"MOZ\", \"MRT\", \"MUS\", \"MWI\", \"MYT\", \"NAM\", \"NER\", \"NGA\", \"REU\", \"RWA\", \"SDN\", \"SEN\", \"SHN\", \"SLE\",\n", + " \"SOM\", \"SSD\", \"STP\", \"SWZ\", \"SYC\", \"TCD\", \"TGO\", \"TUN\", \"TZA\", \"UGA\", \"ZAF\", \"ZMB\", \"ZWE\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'EU',\n", + " 'region_name': 'Europe',\n", + " 'country_iso_3s': [\n", + " \"ALA\", \"ALB\", \"AND\", \"ARM\", \"AUT\", \"AZE\", \"BEL\", \"BGR\", \"BIH\", \"BLR\", \"CHE\", \"CYP\", \"CZE\", \"DEU\", \"DNK\",\n", + " \"ESP\", \"EST\", \"FIN\", \"FRA\", \"FRO\", \"GBR\", \"GEO\", \"GGY\", \"GIB\", \"GRC\", \"HRV\", \"HUN\", \"IMN\", \"IRL\", \"ISL\",\n", + " \"ISR\", \"ITA\", \"JEY\", \"KAZ\", \"KGZ\", \"LIE\", \"LTU\", \"LUX\", \"LVA\", \"MCO\", \"MDA\", \"MKD\", \"MLT\", \"MNE\", \"NLD\",\n", + " \"NOR\", \"POL\", \"PRT\", \"ROU\", \"RUS\", \"SJM\", \"SMR\", \"SRB\", \"SVK\", \"SVN\", \"SWE\", \"TJK\", \"TKM\", \"TUR\", \"UKR\",\n", + " \"UZB\", \"VAT\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'SA',\n", + " 'region_name': 'Latin America & Caribbean',\n", + " 'country_iso_3s': [\n", + " \"ABW\", \"AIA\", \"ARG\", \"ATG\", \"BES\", \"BHS\", \"BLM\", \"BLZ\", \"BMU\", \"BOL\", \"BRA\", \"BRB\", \"CHL\", \"COL\", \"CRI\",\n", + " \"CUB\", \"CUW\", \"CYM\", \"DMA\", \"DOM\", \"ECU\", \"FLK\", \"GLP\", \"GRD\", \"GTM\", \"GUF\", \"GUY\", \"HND\", \"HTI\", \"JAM\",\n", + " \"KNA\", \"LCA\", \"MAF\", \"MEX\", \"MSR\", \"MTQ\", \"NIC\", \"PAN\", \"PER\", \"PRI\", \"PRY\", \"SLV\", \"SUR\", \"SXM\", \"TCA\",\n", + " \"TTO\", \"UMI\", \"URY\", \"VCT\", \"VEN\", \"VGB\", \"VIR\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'PO',\n", + " 'region_name': 'Polar',\n", + " 'country_iso_3s': [\n", + " \"ATF\", \"BVT\", \"GRL\", \"HMD\", \"SGS\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'NA',\n", + " 'region_name': 'North America',\n", + " 'country_iso_3s': [\n", + " \"CAN\", \"SPM\", \"USA\"\n", + " ]\n", + " },\n", + " \n", + " {\n", + " 'region_iso': 'WA',\n", + " 'region_name': 'West Asia',\n", + " 'country_iso_3s': [\n", + " \"ARE\", \"BHR\", \"IRQ\", \"JOR\", \"KWT\", \"LBN\", \"OMN\", \"PSE\", \"QAT\", \"SAU\", \"SYR\", \"YEM\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'AT', # this region is not in the Protected Planet database\n", + " 'region_name': 'Antartica',\n", + " 'country_iso_3s': [\n", + " \"ATA\"\n", + " ]\n", + " }\n", + "]\n", + "\n", + "# Convert the region data to a dictionary that maps each country to its region name\n", + "country_to_region = {}\n", + "for region in regions_data:\n", + " for country in region['country_iso_3s']:\n", + " country_to_region[country] = region['region_iso']" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
location_idhabitat_nameprotected_areatotal_areayear
0AFcold-water corals37.761626381.9932342023
1AFsaltmarshes6688.70287919845.9150002023
2AFseagrasses6319.09949161939.4849042023
3AFwarm-water corals6591.34008315216.3939472023
4AScold-water corals263.2514981332.2250802023
5ASsaltmarshes11721.43953939229.8888602023
6ASseagrasses28942.70566072666.4820522023
7ASwarm-water corals13895.87065967363.4866092023
8EUcold-water corals2183.0502666179.5264272023
9EUsaltmarshes7431.04371013274.3264782023
10EUseagrasses5840.37292510391.1899112023
11EUwarm-water corals0.6057630.7933572023
12NAcold-water corals22.960099204.2804332023
13NAsaltmarshes51092.64468368200.0819302023
14NAseagrasses70.012791301.9091412023
15NAwarm-water corals0.0000000.0000002023
16SAcold-water corals234.7313701416.2513232023
17SAsaltmarshes22969.81590635983.3927442023
18SAseagrasses16517.09766745847.4594122023
19SAwarm-water corals5597.36684512869.8012312023
20WAcold-water corals0.00000012.9707052023
21WAsaltmarshes1309.22573611798.8326192023
22WAseagrasses1053.44867325273.7274312023
23WAwarm-water corals547.9289574903.2303952023
\n", + "
" + ], + "text/plain": [ + " location_id habitat_name protected_area total_area year\n", + "0 AF cold-water corals 37.761626 381.993234 2023\n", + "1 AF saltmarshes 6688.702879 19845.915000 2023\n", + "2 AF seagrasses 6319.099491 61939.484904 2023\n", + "3 AF warm-water corals 6591.340083 15216.393947 2023\n", + "4 AS cold-water corals 263.251498 1332.225080 2023\n", + "5 AS saltmarshes 11721.439539 39229.888860 2023\n", + "6 AS seagrasses 28942.705660 72666.482052 2023\n", + "7 AS warm-water corals 13895.870659 67363.486609 2023\n", + "8 EU cold-water corals 2183.050266 6179.526427 2023\n", + "9 EU saltmarshes 7431.043710 13274.326478 2023\n", + "10 EU seagrasses 5840.372925 10391.189911 2023\n", + "11 EU warm-water corals 0.605763 0.793357 2023\n", + "12 NA cold-water corals 22.960099 204.280433 2023\n", + "13 NA saltmarshes 51092.644683 68200.081930 2023\n", + "14 NA seagrasses 70.012791 301.909141 2023\n", + "15 NA warm-water corals 0.000000 0.000000 2023\n", + "16 SA cold-water corals 234.731370 1416.251323 2023\n", + "17 SA saltmarshes 22969.815906 35983.392744 2023\n", + "18 SA seagrasses 16517.097667 45847.459412 2023\n", + "19 SA warm-water corals 5597.366845 12869.801231 2023\n", + "20 WA cold-water corals 0.000000 12.970705 2023\n", + "21 WA saltmarshes 1309.225736 11798.832619 2023\n", + "22 WA seagrasses 1053.448673 25273.727431 2023\n", + "23 WA warm-water corals 547.928957 4903.230395 2023" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Add regions field\n", + "habitats_regions = habitats.copy()\n", + "habitats_regions['region'] = habitats['location_id'].map(country_to_region)\n", + "\n", + "# Convert fields to numeric\n", + "habitats_regions['protected_area'] = pd.to_numeric(habitats_regions['protected_area'], errors='coerce')\n", + "habitats_regions['total_area'] = pd.to_numeric(habitats_regions['total_area'], errors='coerce')\n", + "\n", + "# Calculate stats for each region\n", + "habitats_regions = habitats_regions.groupby(['region', 'habitat_name']).agg({'protected_area': 'sum', 'total_area': 'sum'}).reset_index()\n", + "habitats_regions['year'] = datetime.now().year\n", + "habitats_regions.rename(columns={'region': 'location_id'}, inplace=True)\n", + "habitats_regions\n" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "# Concatenate region statistics to the habitats dataframe\n", + "habitats = pd.concat([habitats, habitats_regions])" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['ABNJ', 'AGO', 'ALB', 'ARG', 'ATG', 'AUS', 'BHS', 'BLZ', 'BRA',\n", + " 'BRB', 'CAN', 'CHL', 'CHN', 'CIV', 'COK', 'COL', 'CPV', 'CRI',\n", + " 'CUB', 'CYP', 'DMA', 'DNK', 'DOM', 'DZA', 'ECU', 'ERI', 'ESP',\n", + " 'FJI', 'FRA', 'FSM', 'GBR', 'GHA', 'GIN', 'GNB', 'GNQ', 'GRC',\n", + " 'GRD', 'GTM', 'GUY', 'HND', 'HRV', 'HTI', 'IDN', 'IND', 'IRL',\n", + " 'ISL', 'ITA', 'JAM', 'JPN', 'KEN', 'KIR', 'KNA', 'LBR', 'LCA',\n", + " 'LKA', 'MAR', 'MDG', 'MEX', 'MHL', 'MLT', 'MMR', 'MNE', 'MOZ',\n", + " 'MRT', 'MUS', 'MYS', 'NAM', 'NGA', 'NIC', 'NLD', 'NOR', 'NZL',\n", + " 'OMN', 'PAN', 'PER', 'PHL', 'PLW', 'PNG', 'PRT', 'RUS', 'SAU',\n", + " 'SEN', 'SHN', 'SJM', 'STP', 'SUR', 'SWE', 'SYC', 'THA', 'TLS',\n", + " 'TON', 'TTO', 'TUN', 'TUV', 'UMI', 'URY', 'USA', 'VCT', 'VEN',\n", + " 'VNM', 'VUT', 'WSM', 'YEM', 'ZAF', 'ARE', 'AZE', 'BEL', 'BGR',\n", + " 'BHR', 'DEU', 'DJI', 'EGY', 'EST', 'FIN', 'GEO', 'GMB', 'IRN',\n", + " 'KHM', 'KOR', 'KWT', 'LBY', 'LTU', 'LVA', 'PAK', 'POL', 'QAT',\n", + " 'ROU', 'SDN', 'SVN', 'TUR', 'TZA', 'UKR', 'COM', 'ISR', 'JOR',\n", + " 'MCO', 'MDV', 'SGP', 'SLB', 'SLE', 'BGD', 'BRN', 'NIU', 'GLOB',\n", + " 'AF', 'AS', 'EU', 'NA', 'SA', 'WA'], dtype=object)" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "habitats['location_id'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "habitats.to_csv(path_out + \"habitats/ocean+.csv\", index=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Process seamounts from [UN WCMC](https://data.unep-wcmc.org/datasets/41)" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [], + "source": [ + "# Read required data\n", + "seamounts = gpd.read_file(path_in + \"Seamounts/DownloadPack-14_001_ZSL002_ModelledSeamounts2011_v1_01_Data_Seamounts_Seamounts.shp\")\n", + "eez = gpd.read_file(path_out + \"/administrative/eez_area_mollweide.shp\")\n", + "hs = gpd.read_file(path_in + \"/high_seas/high_seas.shp\")\n", + "protected_areas = gpd.read_file(path_out + \"wdpa/timeseries/protected_dissolved_2023.shp\").to_crs(\"EPSG:4326\")" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [], + "source": [ + "# Keep relevant fields in eez and hs and merge then in one dataframe\n", + "eez = eez[['SOVEREIGN1', 'SOVEREIGN2', 'SOVEREIGN3','ISO_SOV1', 'ISO_SOV2', 'ISO_SOV3', 'geometry']]\n", + "hs = hs[['geometry']]\n", + "hs['SOVEREIGN1'] = 'High Seas'\n", + "hs['ISO_SOV1'] = 'ABNJ'\n", + "eez_hs = eez.merge(hs, how='outer')" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [], + "source": [ + "# Join eez info to seamounts falling within eez polygons\n", + "seamounts_eez = gpd.sjoin(seamounts, eez_hs, how=\"left\", predicate=\"within\")\n", + "# Drop those not associated with an eez or hs\n", + "seamounts_eez = seamounts_eez.dropna(subset=['ISO_SOV1'])" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [], + "source": [ + "# Create new column \"iso\" with the iso_sov codes\n", + "def concatenate_iso(row):\n", + " iso_list = [row['ISO_SOV1']]\n", + " if not pd.isna(row['ISO_SOV2']):\n", + " iso_list.append(row['ISO_SOV2'])\n", + " if not pd.isna(row['ISO_SOV3']):\n", + " iso_list.append(row['ISO_SOV3'])\n", + " return ';'.join(iso_list)\n", + "\n", + "seamounts_eez['iso'] = seamounts_eez.apply(concatenate_iso, axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [], + "source": [ + "# Check which seamounts are protectec\n", + "seamounts_wdpa = gpd.sjoin(seamounts, protected_areas, how=\"left\", predicate=\"within\")\n", + "seamounts_wdpa['protection'] = \"no\" \n", + "seamounts_wdpa.loc[~seamounts_wdpa['index_right'].isna(), 'protection'] = \"yes\"\n", + "# Remove rows in which protection is \"no\"\n", + "seamounts_wdpa = seamounts_wdpa[seamounts_wdpa['protection'] != \"no\"]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Global stats" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
habitat_nametotal_arealocation_idyear
0seamounts2.690810e+07GLOB2011
\n", + "
" + ], + "text/plain": [ + " habitat_name total_area location_id year\n", + "0 seamounts 2.690810e+07 GLOB 2011" + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Calculate global area of seamounts\n", + "seamounts_eez['habitat_name'] = 'seamounts'\n", + "seamounts_global = seamounts_eez.groupby(['habitat_name']).agg({'AREA2D': 'sum'}).reset_index().rename(columns={'AREA2D': 'total_area'})\n", + "seamounts_global['location_id'] = 'GLOB'\n", + "seamounts_global['year'] = 2011\n", + "seamounts_global" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
habitat_nameprotected_area
0seamounts3.438552e+06
\n", + "
" + ], + "text/plain": [ + " habitat_name protected_area\n", + "0 seamounts 3.438552e+06" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Calculate global area of seamounts protected\n", + "seamounts_wdpa['habitat_name'] = 'seamounts'\n", + "seamounts_wdpa_global = seamounts_wdpa.groupby(['habitat_name']).agg({'AREA2D': 'sum'}).reset_index().rename(columns={'AREA2D': 'protected_area'})\n", + "seamounts_wdpa_global" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
habitat_nametotal_arealocation_idyearprotected_area
0seamounts2.690810e+07GLOB20113.438552e+06
\n", + "
" + ], + "text/plain": [ + " habitat_name total_area location_id year protected_area\n", + "0 seamounts 2.690810e+07 GLOB 2011 3.438552e+06" + ] + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Bring 'protected_area' field from seamouts_wdpa_global to seamounts_global\n", + "seamounts_global = seamounts_global.merge(seamounts_wdpa_global[['habitat_name', 'protected_area']], how='left', on='habitat_name')\n", + "seamounts_global" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Country stats" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [], + "source": [ + "# Split the 'iso_code' values and create separate rows only for those with multiple values\n", + "mask = seamounts_eez['iso'].str.contains(';', na=False)\n", + "split_rows = seamounts_eez[mask].copy()\n", + "split_rows['iso'] = split_rows['iso'].str.split(';')\n", + "split_rows = split_rows.explode('iso')\n", + "\n", + "# Keep rows with single values in 'iso_code'\n", + "single_value_rows = seamounts_eez[~mask]\n", + "\n", + "# Concatenate the exploded rows with the single value rows\n", + "seamounts_eez_new = pd.concat([single_value_rows, split_rows], ignore_index=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
location_idtotal_areahabitat_nameyear
0ABNJ1.483098e+07seamounts2011
1AGO9.556242e+03seamounts2011
2ARG3.110730e+05seamounts2011
3ATA3.551629e+05seamounts2011
4ATG6.215895e+03seamounts2011
...............
88VNM4.421338e+04seamounts2011
89VUT1.199475e+05seamounts2011
90WSM4.117997e+04seamounts2011
91YEM6.294974e+04seamounts2011
92ZAF9.946306e+04seamounts2011
\n", + "

93 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " location_id total_area habitat_name year\n", + "0 ABNJ 1.483098e+07 seamounts 2011\n", + "1 AGO 9.556242e+03 seamounts 2011\n", + "2 ARG 3.110730e+05 seamounts 2011\n", + "3 ATA 3.551629e+05 seamounts 2011\n", + "4 ATG 6.215895e+03 seamounts 2011\n", + ".. ... ... ... ...\n", + "88 VNM 4.421338e+04 seamounts 2011\n", + "89 VUT 1.199475e+05 seamounts 2011\n", + "90 WSM 4.117997e+04 seamounts 2011\n", + "91 YEM 6.294974e+04 seamounts 2011\n", + "92 ZAF 9.946306e+04 seamounts 2011\n", + "\n", + "[93 rows x 4 columns]" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Get area of seamounts per iso\n", + "seamounts_iso = seamounts_eez_new.groupby(['iso']).agg({'AREA2D': 'sum'}).reset_index()\n", + "seamounts_iso = seamounts_iso.rename(columns={'AREA2D': 'total_area', 'iso': 'location_id'})\n", + "seamounts_iso['habitat_name'] = 'seamounts'\n", + "seamounts_iso['year'] = 2011\n", + "seamounts_iso " + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [], + "source": [ + "# Split the 'iso_code' values in seamounts_wdpa and create separate rows only for those with multiple values\n", + "mask = seamounts_wdpa['PARENT_ISO'].str.contains(';', na=False)\n", + "split_rows = seamounts_wdpa[mask].copy()\n", + "split_rows['PARENT_ISO'] = split_rows['PARENT_ISO'].str.split(';')\n", + "split_rows = split_rows.explode('PARENT_ISO')\n", + "\n", + "# Keep rows with single values in 'iso_code'\n", + "single_value_rows = seamounts_wdpa[~mask]\n", + "\n", + "# Concatenate the exploded rows with the single value rows\n", + "seamounts_wdpa_new = pd.concat([single_value_rows, split_rows], ignore_index=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
location_idprotected_area
0ABNJ226253.932283
1ARG38773.659962
\n", + "
" + ], + "text/plain": [ + " location_id protected_area\n", + "0 ABNJ 226253.932283\n", + "1 ARG 38773.659962" + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Calculate area protected per iso\n", + "seamounts_protected = seamounts_wdpa_new.groupby(['PARENT_ISO']).agg({'AREA2D': 'sum'}).reset_index()\n", + "seamounts_protected = seamounts_protected.rename(columns={'AREA2D': 'protected_area', 'PARENT_ISO': 'location_id'})\n", + "seamounts_protected.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
location_idtotal_areahabitat_nameyearprotected_area
0ABNJ1.483098e+07seamounts2011226253.932283
1AGO9.556242e+03seamounts2011NaN
\n", + "
" + ], + "text/plain": [ + " location_id total_area habitat_name year protected_area\n", + "0 ABNJ 1.483098e+07 seamounts 2011 226253.932283\n", + "1 AGO 9.556242e+03 seamounts 2011 NaN" + ] + }, + "execution_count": 73, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Join seamounts_iso and seamounts_protected\n", + "seamounts_iso = seamounts_iso.merge(seamounts_protected, how='left', on='location_id')\n", + "seamounts_iso.head(2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Regions stats" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
location_idhabitat_nameprotected_areatotal_areayear
0AFseamounts94385.1789586.162351e+052011
1ASseamounts832497.7839375.433433e+062011
2ATseamounts0.0000003.551629e+052011
3EUseamounts894514.9102552.641119e+062011
4NAseamounts555588.2107251.664794e+062011
5SAseamounts581172.1543891.655552e+062011
6WAseamounts2487.4280509.384765e+042011
\n", + "
" + ], + "text/plain": [ + " location_id habitat_name protected_area total_area year\n", + "0 AF seamounts 94385.178958 6.162351e+05 2011\n", + "1 AS seamounts 832497.783937 5.433433e+06 2011\n", + "2 AT seamounts 0.000000 3.551629e+05 2011\n", + "3 EU seamounts 894514.910255 2.641119e+06 2011\n", + "4 NA seamounts 555588.210725 1.664794e+06 2011\n", + "5 SA seamounts 581172.154389 1.655552e+06 2011\n", + "6 WA seamounts 2487.428050 9.384765e+04 2011" + ] + }, + "execution_count": 74, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "seamounts_regions = seamounts_iso.copy()\n", + "seamounts_regions['region'] = seamounts_regions['location_id'].map(country_to_region)\n", + "\n", + "# Calculate stats for each region\n", + "seamounts_regions = seamounts_regions.groupby(['region', 'habitat_name']).agg({'protected_area': 'sum', 'total_area': 'sum'}).reset_index()\n", + "seamounts_regions['year'] = 2011\n", + "seamounts_regions.rename(columns={'region': 'location_id'}, inplace=True)\n", + "seamounts_regions" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [], + "source": [ + "# Concatenate region and global stats to seamounts_iso2\n", + "seamounts_all = pd.concat([seamounts_iso, seamounts_regions, seamounts_global])" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [], + "source": [ + "seamounts_all.to_csv(path_out + \"habitats/seamounts.csv\", index=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Process mangroves from GMW" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": {}, + "outputs": [], + "source": [ + "mangroves = pd.read_csv(path_out + \"habitats/mangroves.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
habitat_nameprotected_areatotal_arealocation_idyear
0mangroves61287.20375147358.990971GLOB2020
\n", + "
" + ], + "text/plain": [ + " habitat_name protected_area total_area location_id year\n", + "0 mangroves 61287.20375 147358.990971 GLOB 2020" + ] + }, + "execution_count": 80, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Calculate global stats for mangroves\n", + "mangroves_global = mangroves.groupby(['habitat_name']).agg({'protected_area': 'sum', 'total_area': 'sum'}).reset_index()\n", + "mangroves_global['location_id'] = 'GLOB'\n", + "mangroves_global['year'] = 2020\n", + "mangroves_global" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": {}, + "outputs": [], + "source": [ + "# Concatenate the global stats to the mangroves dataframe\n", + "mangroves = pd.concat([mangroves, mangroves_global])" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
location_idhabitat_nameprotected_areatotal_areayear
0AFmangroves10006.9700029344.4043992020
1ASmangroves21378.7500074629.1944462020
2NAmangroves2055.400002329.1155052020
3POmangroves6.720006.7230182020
4SAmangroves27811.5337540875.9326662020
5WAmangroves27.83000173.6209382020
\n", + "
" + ], + "text/plain": [ + " location_id habitat_name protected_area total_area year\n", + "0 AF mangroves 10006.97000 29344.404399 2020\n", + "1 AS mangroves 21378.75000 74629.194446 2020\n", + "2 NA mangroves 2055.40000 2329.115505 2020\n", + "3 PO mangroves 6.72000 6.723018 2020\n", + "4 SA mangroves 27811.53375 40875.932666 2020\n", + "5 WA mangroves 27.83000 173.620938 2020" + ] + }, + "execution_count": 82, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mangroves_regions = mangroves.copy()\n", + "mangroves_regions['region'] = mangroves['location_id'].map(country_to_region)\n", + "\n", + "# Calculate stats for each region\n", + "mangroves_regions = mangroves_regions.groupby(['region', 'habitat_name']).agg({'protected_area': 'sum', 'total_area': 'sum'}).reset_index()\n", + "mangroves_regions['year'] = 2020\n", + "mangroves_regions.rename(columns={'region': 'location_id'}, inplace=True)\n", + "mangroves_regions" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "metadata": {}, + "outputs": [], + "source": [ + "# Concatenate stats for regions with mangroves\n", + "mangroves = pd.concat([mangroves, mangroves_regions])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Concatenate all habitats" + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
location_idprotected_areatotal_areahabitat_nameyear
0ABNJ421.6293726799041874.98221422617cold-water corals2023
1AGO03.39567053773998cold-water corals2023
2ALB05.98647948252716cold-water corals2023
3ARG6.9842260206355761.8263440651753cold-water corals2023
4ATG00.997746538545076cold-water corals2023
..................
1AS21378.7574629.194446mangroves2020
2NA2055.42329.115505mangroves2020
3PO6.726.723018mangroves2020
4SA27811.5337540875.932666mangroves2020
5WA27.83173.620938mangroves2020
\n", + "

628 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " location_id protected_area total_area habitat_name year\n", + "0 ABNJ 421.629372679904 1874.98221422617 cold-water corals 2023\n", + "1 AGO 0 3.39567053773998 cold-water corals 2023\n", + "2 ALB 0 5.98647948252716 cold-water corals 2023\n", + "3 ARG 6.98422602063557 61.8263440651753 cold-water corals 2023\n", + "4 ATG 0 0.997746538545076 cold-water corals 2023\n", + ".. ... ... ... ... ...\n", + "1 AS 21378.75 74629.194446 mangroves 2020\n", + "2 NA 2055.4 2329.115505 mangroves 2020\n", + "3 PO 6.72 6.723018 mangroves 2020\n", + "4 SA 27811.53375 40875.932666 mangroves 2020\n", + "5 WA 27.83 173.620938 mangroves 2020\n", + "\n", + "[628 rows x 5 columns]" + ] + }, + "execution_count": 85, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Concatenate the dataframes\n", + "habitats_all = pd.concat([habitats, seamounts_all, mangroves])\n", + "habitats_all" + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "metadata": {}, + "outputs": [], + "source": [ + "habitats_all.to_csv(path_out + \"tables/habitats2.csv\", index=False)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "skytruth", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/data/notebooks/layers.ipynb b/data/notebooks/layers.ipynb index 3d9f2c16..232b762e 100644 --- a/data/notebooks/layers.ipynb +++ b/data/notebooks/layers.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -12,114 +12,19 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ - "path_in = \"/Users/sofia/Documents/Repos/skytruth_30x30/data/raw\"\n", - "path_out = \"/Users/sofia/Documents/Repos/skytruth_30x30/data/processed\"" + "path_in = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/raw\"\n", + "path_out = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/processed\"" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
MRGIDGEONAMEMRGID_TER1POL_TYPEMRGID_SOV1TERRITORY1ISO_TER1SOVEREIGN1MRGID_TER2MRGID_SOV2...ISO_SOV1ISO_SOV2ISO_SOV3UN_SOV1UN_SOV2UN_SOV3UN_TER1UN_TER2UN_TER3geometry
08444.0American Samoa Exclusive Economic Zone8670.0200NM2204.0American SamoaASMUnited States0.00.0...USANaNNaN840NaNNaN16.0NaNNaNPOLYGON ((-166.64112 -17.55527, -166.64194 -17...
\n", - "

1 rows × 32 columns

\n", - "
" - ], - "text/plain": [ - " MRGID GEONAME MRGID_TER1 POL_TYPE \\\n", - "0 8444.0 American Samoa Exclusive Economic Zone 8670.0 200NM \n", - "\n", - " MRGID_SOV1 TERRITORY1 ISO_TER1 SOVEREIGN1 MRGID_TER2 MRGID_SOV2 \\\n", - "0 2204.0 American Samoa ASM United States 0.0 0.0 \n", - "\n", - " ... ISO_SOV1 ISO_SOV2 ISO_SOV3 UN_SOV1 UN_SOV2 UN_SOV3 UN_TER1 UN_TER2 \\\n", - "0 ... USA NaN NaN 840 NaN NaN 16.0 NaN \n", - "\n", - " UN_TER3 geometry \n", - "0 NaN POLYGON ((-166.64112 -17.55527, -166.64194 -17... \n", - "\n", - "[1 rows x 32 columns]" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "eez = gpd.read_file(path_in + \"/World_EEZ_v11_20191118/eez_v11.shp\")\n", "eez.head(1)" @@ -127,63 +32,25 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Index(['MRGID', 'GEONAME', 'MRGID_TER1', 'POL_TYPE', 'MRGID_SOV1',\n", - " 'TERRITORY1', 'ISO_TER1', 'SOVEREIGN1', 'MRGID_TER2', 'MRGID_SOV2',\n", - " 'TERRITORY2', 'ISO_TER2', 'SOVEREIGN2', 'MRGID_TER3', 'MRGID_SOV3',\n", - " 'TERRITORY3', 'ISO_TER3', 'SOVEREIGN3', 'X_1', 'Y_1', 'MRGID_EEZ',\n", - " 'AREA_KM2', 'ISO_SOV1', 'ISO_SOV2', 'ISO_SOV3', 'UN_SOV1', 'UN_SOV2',\n", - " 'UN_SOV3', 'UN_TER1', 'UN_TER2', 'UN_TER3', 'geometry'],\n", - " dtype='object')" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "eez.columns" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "\n", - "Name: WGS 84\n", - "Axis Info [ellipsoidal]:\n", - "- Lat[north]: Geodetic latitude (degree)\n", - "- Lon[east]: Geodetic longitude (degree)\n", - "Area of Use:\n", - "- name: World.\n", - "- bounds: (-180.0, -90.0, 180.0, 90.0)\n", - "Datum: World Geodetic System 1984 ensemble\n", - "- Ellipsoid: WGS 84\n", - "- Prime Meridian: Greenwich" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "eez.crs" ] }, { "cell_type": "code", - "execution_count": 45, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -193,223 +60,9 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
MRGIDGEONAMEMRGID_TER1POL_TYPEMRGID_SOV1TERRITORY1ISO_TER1SOVEREIGN1MRGID_TER2MRGID_SOV2...ISO_SOV1ISO_SOV2ISO_SOV3UN_SOV1UN_SOV2UN_SOV3UN_TER1UN_TER2UN_TER3geometry
08444.0American Samoa Exclusive Economic Zone8670.0200NM2204.0American SamoaASMUnited States0.00.0...USANaNNaN840NaNNaN16.0NaNNaNPOLYGON ((-16216412.543 -2157569.856, -1621650...
18379.0Ascension Exclusive Economic Zone8620.0200NM2208.0AscensionSHNUnited Kingdom0.00.0...GBRNaNNaN826NaNNaN654.0NaNNaNPOLYGON ((-1089355.142 -974062.004, -1089348.4...
28446.0Cook Islands Exclusive Economic Zone8672.0200NM2227.0Cook IslandsCOKNew Zealand0.00.0...NZLNaNNaN554NaNNaN184.0NaNNaNPOLYGON ((-15912583.852 -716733.193, -15813064...
38389.0Overlapping claim Falkland / Malvinas Islands:...8623.0Overlapping claim2208.0Falkland / Malvinas IslandsFLKUnited Kingdom8623.02149.0...GBRARGNaN82632.0NaN238.0238.0NaNPOLYGON ((-4061728.309 -6509190.466, -4443979....
48440.0French Polynesian Exclusive Economic Zone8656.0200NM17.0French PolynesiaPYFFrance0.00.0...FRANaNNaN250NaNNaN258.0NaNNaNMULTIPOLYGON (((-13543804.433 -974376.651, -13...
\n", - "

5 rows × 32 columns

\n", - "
" - ], - "text/plain": [ - " MRGID GEONAME MRGID_TER1 \\\n", - "0 8444.0 American Samoa Exclusive Economic Zone 8670.0 \n", - "1 8379.0 Ascension Exclusive Economic Zone 8620.0 \n", - "2 8446.0 Cook Islands Exclusive Economic Zone 8672.0 \n", - "3 8389.0 Overlapping claim Falkland / Malvinas Islands:... 8623.0 \n", - "4 8440.0 French Polynesian Exclusive Economic Zone 8656.0 \n", - "\n", - " POL_TYPE MRGID_SOV1 TERRITORY1 ISO_TER1 \\\n", - "0 200NM 2204.0 American Samoa ASM \n", - "1 200NM 2208.0 Ascension SHN \n", - "2 200NM 2227.0 Cook Islands COK \n", - "3 Overlapping claim 2208.0 Falkland / Malvinas Islands FLK \n", - "4 200NM 17.0 French Polynesia PYF \n", - "\n", - " SOVEREIGN1 MRGID_TER2 MRGID_SOV2 ... ISO_SOV1 ISO_SOV2 ISO_SOV3 \\\n", - "0 United States 0.0 0.0 ... USA NaN NaN \n", - "1 United Kingdom 0.0 0.0 ... GBR NaN NaN \n", - "2 New Zealand 0.0 0.0 ... NZL NaN NaN \n", - "3 United Kingdom 8623.0 2149.0 ... GBR ARG NaN \n", - "4 France 0.0 0.0 ... FRA NaN NaN \n", - "\n", - " UN_SOV1 UN_SOV2 UN_SOV3 UN_TER1 UN_TER2 UN_TER3 \\\n", - "0 840 NaN NaN 16.0 NaN NaN \n", - "1 826 NaN NaN 654.0 NaN NaN \n", - "2 554 NaN NaN 184.0 NaN NaN \n", - "3 826 32.0 NaN 238.0 238.0 NaN \n", - "4 250 NaN NaN 258.0 NaN NaN \n", - "\n", - " geometry \n", - "0 POLYGON ((-16216412.543 -2157569.856, -1621650... \n", - "1 POLYGON ((-1089355.142 -974062.004, -1089348.4... \n", - "2 POLYGON ((-15912583.852 -716733.193, -15813064... \n", - "3 POLYGON ((-4061728.309 -6509190.466, -4443979.... \n", - "4 MULTIPOLYGON (((-13543804.433 -974376.651, -13... \n", - "\n", - "[5 rows x 32 columns]" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "eez = eez.to_crs('ESRI:54009')\n", "eez['AREA_KM2']= eez.geometry.area/ 1000000\n", @@ -418,7 +71,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -427,29 +80,17 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Allocating 16 GB of heap memory\n", - "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_area_mollweide.shp\n", - "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_area_mollweide.shx\n", - "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_area_mollweide.dbf\n", - "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_area_mollweide.prj\n" - ] - } - ], + "outputs": [], "source": [ - "# Reproject to 4626\n", + "# Reproject to 4326\n", "!mapshaper-xl 16gb /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_area_mollweide.shp -proj EPSG:4326 -o force /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_area_mollweide.shp" ] }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -533,7 +174,425 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "eez['REGIONS'] = eez['ISO_SOV1'].map(country_to_region)\n", + "eez.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "eez.to_file(path_out + \"/administrative/eez_mollweide_regions.shp\", driver=\"ESRI Shapefile\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "eez['REGIONS'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Dissolve by relevant fields: REGIONS\n", + "!mapshaper-xl 16gb /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_mollweide_regions.shp -dissolve2 fields=REGIONS -explode -o /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.shp" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "regions = gpd.read_file(path_out + \"/administrative/eez_regions.shp\", driver=\"ESRI Shapefile\")\n", + "regions.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "regions.crs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "regions['REGIONS'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "regions['REGIONS'][regions['REGIONS'].isna()] = 'Antartica'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Calculate area of each region\n", + "regions['AREA_KM2']= regions.geometry.area/ 1000000" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "regions.to_file(path_out + \"/administrative/eez_regions.shp\", driver=\"ESRI Shapefile\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Reproject to 4326\n", + "!mapshaper-xl 16gb /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.shp -proj EPSG:4326 -o force /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.shp" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "regions['REGIONS'].unique()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Extract marine areas" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [], + "source": [ + "eez = gpd.read_file(path_out + \"/administrative/eez_area_mollweide.shp\", driver=\"ESRI Shapefile\")\n", + "regions = gpd.read_file(path_out + \"/administrative/eez_regions.shp\", driver=\"ESRI Shapefile\")\n", + "hs = gpd.read_file(path_in + \"/high_seas/high_seas.shp\", driver=\"ESRI Shapefile\")" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [], + "source": [ + "# Create new column \"iso\" that has the field \"ISO_SOV1\" for all rows except those in which ISO_SOV2 and ISO_SOV3 are not null. In such cases concatenate ISO_SOV1, ISO_SOV2 and ISO_SOV3\n", + "eez['iso'] = eez['ISO_SOV1']\n", + "eez.loc[eez['ISO_SOV2'].notnull(), 'iso'] = eez['ISO_SOV1'] + \";\" + eez['ISO_SOV2']\n", + "eez.loc[eez['ISO_SOV3'].notnull(), 'iso'] = eez['ISO_SOV1'] + \";\" + eez['ISO_SOV2'] + \";\" + eez['ISO_SOV3']" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "49" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(eez[eez['iso'].str.contains(';')])" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(281, 33)" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "eez.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(337, 33)" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Create a mask for rows with multiple values in 'iso_code'\n", + "mask = eez['iso'].str.contains(';', na=False)\n", + "\n", + "# Split the 'iso_code' values and create separate rows only for rows with multiple values\n", + "split_rows = eez[mask].copy()\n", + "split_rows['iso'] = split_rows['iso'].str.split(';')\n", + "split_rows = split_rows.explode('iso')\n", + "\n", + "# Keep rows with single values in 'iso_code'\n", + "single_value_rows = eez[~mask]\n", + "\n", + "# Concatenate the exploded rows with the single value rows\n", + "eez_new = pd.concat([single_value_rows, split_rows], ignore_index=True)\n", + "\n", + "eez_new.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(eez_new[eez_new['iso'].str.contains(';')])" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [], + "source": [ + "iso_country_mapping = {\n", + " 'USA': 'United States',\n", + " 'GBR': 'United Kingdom',\n", + " 'NZL': 'New Zealand',\n", + " 'FRA': 'France',\n", + " 'WSM': 'Samoa',\n", + " 'TON': 'Tonga',\n", + " 'CHL': 'Chile',\n", + " 'URY': 'Uruguay',\n", + " 'PER': 'Peru',\n", + " 'BRA': 'Brazil',\n", + " 'KIR': 'Kiribati',\n", + " 'ARG': 'Argentina',\n", + " 'AUS': 'Australia',\n", + " 'COM': 'Comoros',\n", + " 'MDG': 'Madagascar',\n", + " 'ZAF': 'South Africa',\n", + " 'MUS': 'Mauritius',\n", + " 'VUT': 'Vanuatu',\n", + " 'NAM': 'Namibia',\n", + " 'TLS': 'Timor-Leste',\n", + " 'COG': 'Republic of the Congo',\n", + " 'AGO': 'Angola',\n", + " 'MOZ': 'Mozambique',\n", + " 'KEN': 'Kenya',\n", + " 'PNG': 'Papua New Guinea',\n", + " 'TZA': 'Tanzania',\n", + " 'SLB': 'Solomon Islands',\n", + " 'SYC': 'Seychelles',\n", + " 'COD': 'Democratic Republic of the Congo',\n", + " 'ATG': 'Antigua and Barbuda',\n", + " 'NLD': 'Netherlands',\n", + " 'PRT': 'Portugal',\n", + " 'BHS': 'The Bahamas',\n", + " 'BRB': 'Barbados',\n", + " 'MEX': 'Mexico',\n", + " 'CPV': 'Cape Verde',\n", + " 'ESP': 'Spain',\n", + " 'PAN': 'Panama',\n", + " 'CRI': 'Costa Rica',\n", + " 'DMA': 'Dominica',\n", + " 'DOM': 'Dominican Republic',\n", + " 'GTM': 'Guatemala',\n", + " 'DNK': 'Denmark',\n", + " 'GMB': 'Gambia',\n", + " 'GIB': 'Gibraltar',\n", + " 'GRD': 'Grenada',\n", + " 'SLE': 'Sierra Leone',\n", + " 'ISL': 'Iceland',\n", + " 'JAM': 'Jamaica',\n", + " 'MRT': 'Mauritania',\n", + " 'HTI': 'Haiti',\n", + " 'KNA': 'Saint Kitts and Nevis',\n", + " 'LCA': 'Saint Lucia',\n", + " 'VCT': 'Saint Vincent and the Grenadines',\n", + " 'TTO': 'Trinidad and Tobago',\n", + " 'SLV': 'El Salvador',\n", + " 'BLZ': 'Belize',\n", + " 'CUB': 'Cuba',\n", + " 'SEN': 'Senegal',\n", + " 'VEN': 'Venezuela',\n", + " 'CAN': 'Canada',\n", + " 'NIC': 'Nicaragua',\n", + " 'GUY': 'Guyana',\n", + " 'COL': 'Colombia',\n", + " 'IRL': 'Ireland',\n", + " 'GNB': 'Guinea-Bissau',\n", + " 'GIN': 'Guinea',\n", + " 'CIV': 'Ivory Coast',\n", + " 'LBR': 'Liberia',\n", + " 'HND': 'Honduras',\n", + " 'ECU': 'Ecuador',\n", + " 'ESH': 'Western Sahara',\n", + " 'SUR': 'Suriname',\n", + " 'MAR': 'Morocco',\n", + " 'ARE': 'United Arab Emirates',\n", + " 'CYP': 'Cyprus',\n", + " 'ERI': 'Eritrea',\n", + " 'EGY': 'Egypt',\n", + " 'GEO': 'Georgia',\n", + " 'IRN': 'Iran',\n", + " 'LBN': 'Lebanon',\n", + " 'LBY': 'Libya',\n", + " 'MLT': 'Malta',\n", + " 'OMN': 'Oman',\n", + " 'SAU': 'Saudi Arabia',\n", + " 'LKA': 'Sri Lanka',\n", + " 'SDN': 'Sudan',\n", + " 'SYR': 'Syria',\n", + " 'TGO': 'Togo',\n", + " 'GRC': 'Greece',\n", + " 'TUR': 'Turkey',\n", + " 'MCO': 'Monaco',\n", + " 'TUN': 'Tunisia',\n", + " 'MNE': 'Montenegro',\n", + " 'ALB': 'Albania',\n", + " 'BGR': 'Bulgaria',\n", + " 'PSE': 'Palestine',\n", + " 'KWT': 'Kuwait',\n", + " 'IRQ': 'Iraq',\n", + " 'BHR': 'Bahrain',\n", + " 'QAT': 'Qatar',\n", + " 'YEM': 'Yemen',\n", + " 'ISR': 'Israel',\n", + " 'JOR': 'Jordan',\n", + " 'DJI': 'Djibouti',\n", + " 'BGD': 'Bangladesh',\n", + " 'NGA': 'Nigeria',\n", + " 'CMR': 'Cameroon',\n", + " 'STP': 'São Tomé and Príncipe',\n", + " 'BIH': 'Bosnia and Herzegovina',\n", + " 'MHL': 'Marshall Islands',\n", + " 'PLW': 'Palau',\n", + " 'PHL': 'Philippines',\n", + " 'TWN': 'Taiwan',\n", + " 'SGP': 'Singapore',\n", + " 'THA': 'Thailand',\n", + " 'VNM': 'Vietnam',\n", + " 'KOR': 'South Korea',\n", + " 'BRN': 'Brunei',\n", + " 'PRK': 'North Korea',\n", + " 'KHM': 'Cambodia',\n", + " 'CHN': 'China',\n", + " 'EST': 'Estonia',\n", + " 'FIN': 'Finland',\n", + " 'SWE': 'Sweden',\n", + " 'LTU': 'Lithuania',\n", + " 'NOR': 'Norway',\n", + " 'BEL': 'Belgium',\n", + " 'DEU': 'Germany',\n", + " 'LVA': 'Latvia',\n", + " 'HRV': 'Croatia',\n", + " 'ITA': 'Italy',\n", + " 'UKR': 'Ukraine',\n", + " 'ROU': 'Romania',\n", + " 'JPN': 'Japan',\n", + " 'IND': 'India',\n", + " 'PAK': 'Pakistan',\n", + " 'TKM': 'Turkmenistan',\n", + " 'AZE': 'Azerbaijan',\n", + " 'KAZ': 'Kazakhstan',\n", + " 'MMR': 'Myanmar',\n", + " 'POL': 'Poland',\n", + " 'BEN': 'Benin',\n", + " 'SVN': 'Slovenia',\n", + " 'MYS': 'Malaysia',\n", + " 'ATA': 'Antarctica',\n", + " 'TUV': 'Tuvalu',\n", + " 'FJI': 'Fiji',\n", + " 'FSM': 'Micronesia',\n", + " 'GNQ': 'Equatorial Guinea',\n", + " 'MDV': 'Maldives',\n", + " 'SOM': 'Somalia',\n", + " 'NRU': 'Nauru',\n", + " 'GAB': 'Gabon',\n", + " 'IDN': 'Indonesia',\n", + " 'DZA': 'Algeria',\n", + " 'GHA': 'Ghana',\n", + " 'RUS': 'Russia'\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [], + "source": [ + "def get_name(country):\n", + " return iso_country_mapping.get(country, None)\n", + "\n", + "# Apply the function to create the 'PARENT_ISO' column\n", + "eez_new['name_iso'] = eez_new['iso'].apply(get_name)" + ] + }, + { + "cell_type": "code", + "execution_count": 47, "metadata": {}, "outputs": [ { @@ -568,7 +627,6 @@ " MRGID_TER2\n", " MRGID_SOV2\n", " ...\n", - " ISO_SOV2\n", " ISO_SOV3\n", " UN_SOV1\n", " UN_SOV2\n", @@ -577,7 +635,8 @@ " UN_TER2\n", " UN_TER3\n", " geometry\n", - " REGIONS\n", + " iso\n", + " name_iso\n", " \n", " \n", " \n", @@ -595,15 +654,15 @@ " 0.0\n", " ...\n", " NaN\n", - " NaN\n", " 840\n", " NaN\n", " NaN\n", " 16.0\n", " NaN\n", " NaN\n", - " POLYGON ((-16216412.543 -2157569.856, -1621650...\n", - " North America\n", + " POLYGON ((-166.64112 -17.55527, -166.64194 -17...\n", + " USA\n", + " United States\n", " \n", " \n", " 1\n", @@ -619,19 +678,19 @@ " 0.0\n", " ...\n", " NaN\n", - " NaN\n", " 826\n", " NaN\n", " NaN\n", " 654.0\n", " NaN\n", " NaN\n", - " POLYGON ((-1089355.142 -974062.004, -1089348.4...\n", - " Europe\n", + " POLYGON ((-10.93328 -7.88745, -10.93324 -7.889...\n", + " GBR\n", + " United Kingdom\n", " \n", " \n", "\n", - "

2 rows × 33 columns

\n", + "

2 rows × 34 columns

\n", "" ], "text/plain": [ @@ -643,90 +702,161 @@ "0 2204.0 American Samoa ASM United States 0.0 \n", "1 2208.0 Ascension SHN United Kingdom 0.0 \n", "\n", - " MRGID_SOV2 ... ISO_SOV2 ISO_SOV3 UN_SOV1 UN_SOV2 UN_SOV3 UN_TER1 \\\n", - "0 0.0 ... NaN NaN 840 NaN NaN 16.0 \n", - "1 0.0 ... NaN NaN 826 NaN NaN 654.0 \n", - "\n", - " UN_TER2 UN_TER3 geometry \\\n", - "0 NaN NaN POLYGON ((-16216412.543 -2157569.856, -1621650... \n", - "1 NaN NaN POLYGON ((-1089355.142 -974062.004, -1089348.4... \n", + " MRGID_SOV2 ... ISO_SOV3 UN_SOV1 UN_SOV2 UN_SOV3 UN_TER1 UN_TER2 UN_TER3 \\\n", + "0 0.0 ... NaN 840 NaN NaN 16.0 NaN NaN \n", + "1 0.0 ... NaN 826 NaN NaN 654.0 NaN NaN \n", "\n", - " REGIONS \n", - "0 North America \n", - "1 Europe \n", + " geometry iso name_iso \n", + "0 POLYGON ((-166.64112 -17.55527, -166.64194 -17... USA United States \n", + "1 POLYGON ((-10.93328 -7.88745, -10.93324 -7.889... GBR United Kingdom \n", "\n", - "[2 rows x 33 columns]" + "[2 rows x 34 columns]" ] }, - "execution_count": 16, + "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "eez['REGIONS'] = eez['ISO_SOV1'].map(country_to_region)\n", - "eez.head(2)" + "eez_new.head(2)" ] }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 53, "metadata": {}, "outputs": [], "source": [ - "eez.to_file(path_out + \"/administrative/eez_mollweide_regions.shp\", driver=\"ESRI Shapefile\")" + "# List of dictionaries for data in Region_ISO3_PP.txt (list of regions used in the Protected Planet database)\n", + "regions_data = [\n", + " {\n", + " 'region_iso': 'AS',\n", + " 'region_name': 'Asia & Pacific',\n", + " 'country_iso_3s': [\n", + " \"AFG\", \"ASM\", \"AUS\", \"BGD\", \"BRN\", \"BTN\", \"CCK\", \"CHN\", \"COK\", \"CXR\", \"FJI\", \"FSM\", \"GUM\", \"HKG\", \"IDN\",\n", + " \"IND\", \"IOT\", \"IRN\", \"JPN\", \"KHM\", \"KIR\", \"KOR\", \"LAO\", \"LKA\", \"MAC\", \"MDV\", \"MHL\", \"MMR\", \"MNG\", \"MNP\",\n", + " \"MYS\", \"NCL\", \"NFK\", \"NIU\", \"NPL\", \"NRU\", \"NZL\", \"PAK\", \"PCN\", \"PHL\", \"PLW\", \"PNG\", \"PRK\", \"PYF\", \"SGP\",\n", + " \"SLB\", \"THA\", \"TKL\", \"TLS\", \"TON\", \"TUV\", \"TWN\", \"VNM\", \"VUT\", \"WLF\", \"WSM\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'AF',\n", + " 'region_name': 'Africa',\n", + " 'country_iso_3s': [\n", + " \"AGO\", \"BDI\", \"BEN\", \"BFA\", \"BWA\", \"CAF\", \"CIV\", \"CMR\", \"COD\", \"COG\", \"COM\", \"CPV\", \"DJI\", \"DZA\", \"EGY\",\n", + " \"ERI\", \"ESH\", \"ETH\", \"GAB\", \"GHA\", \"GIN\", \"GMB\", \"GNB\", \"GNQ\", \"KEN\", \"LBR\", \"LBY\", \"LSO\", \"MAR\", \"MDG\",\n", + " \"MLI\", \"MOZ\", \"MRT\", \"MUS\", \"MWI\", \"MYT\", \"NAM\", \"NER\", \"NGA\", \"REU\", \"RWA\", \"SDN\", \"SEN\", \"SHN\", \"SLE\",\n", + " \"SOM\", \"SSD\", \"STP\", \"SWZ\", \"SYC\", \"TCD\", \"TGO\", \"TUN\", \"TZA\", \"UGA\", \"ZAF\", \"ZMB\", \"ZWE\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'EU',\n", + " 'region_name': 'Europe',\n", + " 'country_iso_3s': [\n", + " \"ALA\", \"ALB\", \"AND\", \"ARM\", \"AUT\", \"AZE\", \"BEL\", \"BGR\", \"BIH\", \"BLR\", \"CHE\", \"CYP\", \"CZE\", \"DEU\", \"DNK\",\n", + " \"ESP\", \"EST\", \"FIN\", \"FRA\", \"FRO\", \"GBR\", \"GEO\", \"GGY\", \"GIB\", \"GRC\", \"HRV\", \"HUN\", \"IMN\", \"IRL\", \"ISL\",\n", + " \"ISR\", \"ITA\", \"JEY\", \"KAZ\", \"KGZ\", \"LIE\", \"LTU\", \"LUX\", \"LVA\", \"MCO\", \"MDA\", \"MKD\", \"MLT\", \"MNE\", \"NLD\",\n", + " \"NOR\", \"POL\", \"PRT\", \"ROU\", \"RUS\", \"SJM\", \"SMR\", \"SRB\", \"SVK\", \"SVN\", \"SWE\", \"TJK\", \"TKM\", \"TUR\", \"UKR\",\n", + " \"UZB\", \"VAT\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'SA',\n", + " 'region_name': 'Latin America & Caribbean',\n", + " 'country_iso_3s': [\n", + " \"ABW\", \"AIA\", \"ARG\", \"ATG\", \"BES\", \"BHS\", \"BLM\", \"BLZ\", \"BMU\", \"BOL\", \"BRA\", \"BRB\", \"CHL\", \"COL\", \"CRI\",\n", + " \"CUB\", \"CUW\", \"CYM\", \"DMA\", \"DOM\", \"ECU\", \"FLK\", \"GLP\", \"GRD\", \"GTM\", \"GUF\", \"GUY\", \"HND\", \"HTI\", \"JAM\",\n", + " \"KNA\", \"LCA\", \"MAF\", \"MEX\", \"MSR\", \"MTQ\", \"NIC\", \"PAN\", \"PER\", \"PRI\", \"PRY\", \"SLV\", \"SUR\", \"SXM\", \"TCA\",\n", + " \"TTO\", \"UMI\", \"URY\", \"VCT\", \"VEN\", \"VGB\", \"VIR\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'PO',\n", + " 'region_name': 'Polar',\n", + " 'country_iso_3s': [\n", + " \"ATF\", \"BVT\", \"GRL\", \"HMD\", \"SGS\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'NA',\n", + " 'region_name': 'North America',\n", + " 'country_iso_3s': [\n", + " \"CAN\", \"SPM\", \"USA\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'WA',\n", + " 'region_name': 'West Asia',\n", + " 'country_iso_3s': [\n", + " \"ARE\", \"BHR\", \"IRQ\", \"JOR\", \"KWT\", \"LBN\", \"OMN\", \"PSE\", \"QAT\", \"SAU\", \"SYR\", \"YEM\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'AT', # this region is not in the Protected Planet database\n", + " 'region_name': 'Antartica',\n", + " 'country_iso_3s': [\n", + " \"ATA\"\n", + " ]\n", + " }\n", + "]\n", + "\n", + "# Convert the region data to a dictionary that maps each country to its region name\n", + "country_to_region = {}\n", + "name_to_region = {}\n", + "for region in regions_data:\n", + " for country in region['country_iso_3s']:\n", + " country_to_region[country] = region['region_iso']\n", + " name_to_region[country] = region['region_name']" ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 49, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "array(['North America', 'Europe', 'Asia & Pacific',\n", - " 'Latin America & Caribbean', 'Africa', 'West Asia', nan],\n", - " dtype=object)" + "array(['NA', 'EU', 'AS', 'SA', 'AF', 'WA', 'AT'], dtype=object)" ] }, - "execution_count": 18, + "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "eez['REGIONS'].unique()" + "eez_new['region'] = eez_new['iso'].map(country_to_region)\n", + "eez_new['region'].unique()" ] }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 54, "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "Allocating 16 GB of heap memory\n", - "[dissolve2] Removed 127,740 / 218,614 slivers using 0.033+ sqkm variable threshold\n", - "[dissolve2] Dissolved 281 features into 7 features\n", - "[explode] Exploded 7 features into 83 features\n", - "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.shp\n", - "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.shx\n", - "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.dbf\n", - "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.prj\n" - ] + "data": { + "text/plain": [ + "array(['North America', 'Europe', 'Asia & Pacific',\n", + " 'Latin America & Caribbean', 'Africa', 'West Asia', 'Antartica'],\n", + " dtype=object)" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "# Dissolve by relevant fields: REGIONS\n", - "!mapshaper-xl 16gb /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_mollweide_regions.shp -dissolve2 fields=REGIONS -explode -o /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.shp" + "eez_new['region_name'] = eez_new['iso'].map(name_to_region)\n", + "eez_new['region_name'].unique()" ] }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 55, "metadata": {}, "outputs": [ { @@ -750,177 +880,407 @@ " \n", " \n", " \n", - " REGIONS\n", - " geometry\n", + " location_id\n", + " location_name\n", + " total_marine_area\n", + " location_type\n", " \n", " \n", " \n", " \n", " 0\n", - " North America\n", - " POLYGON ((-16216412.543 -2157569.856, -1621650...\n", + " AGO\n", + " Angola\n", + " 498908.577009\n", + " country\n", " \n", " \n", " 1\n", - " North America\n", - " POLYGON ((-15875617.974 972834.674, -15887321....\n", + " ALB\n", + " Albania\n", + " 12177.287755\n", + " country\n", " \n", " \n", "\n", "" ], "text/plain": [ - " REGIONS geometry\n", - "0 North America POLYGON ((-16216412.543 -2157569.856, -1621650...\n", - "1 North America POLYGON ((-15875617.974 972834.674, -15887321...." + " location_id location_name total_marine_area location_type\n", + "0 AGO Angola 498908.577009 country\n", + "1 ALB Albania 12177.287755 country" ] }, - "execution_count": 21, + "execution_count": 55, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "regions = gpd.read_file(path_out + \"/administrative/eez_regions.shp\", driver=\"ESRI Shapefile\")\n", - "regions.head(2)" + "marine_areas = eez_new.groupby(['iso', 'name_iso']).agg({'AREA_KM2': 'sum'}).reset_index()\n", + "marine_areas = marine_areas.rename(columns={'iso': 'location_id', 'name_iso':'location_name', 'AREA_KM2': 'total_marine_area'})\n", + "marine_areas['location_type'] = 'country'\n", + "marine_areas.head(2)" ] }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 57, "metadata": {}, "outputs": [ { "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
location_idlocation_nametotal_marine_arealocation_type
0AFAfrica1.495538e+07region
1ASAsia & Pacific5.269208e+07region
\n", + "
" + ], "text/plain": [ - "\n", - "Name: World_Mollweide\n", - "Axis Info [cartesian]:\n", - "- [east]: Easting (metre)\n", - "- [north]: Northing (metre)\n", - "Area of Use:\n", - "- undefined\n", - "Coordinate Operation:\n", - "- name: unnamed\n", - "- method: Mollweide\n", - "Datum: World Geodetic System 1984\n", - "- Ellipsoid: WGS 84\n", - "- Prime Meridian: Greenwich" + " location_id location_name total_marine_area location_type\n", + "0 AF Africa 1.495538e+07 region\n", + "1 AS Asia & Pacific 5.269208e+07 region" ] }, - "execution_count": 22, + "execution_count": 57, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "regions.crs" + "regions_areas = eez_new.groupby(['region', 'region_name']).agg({'AREA_KM2': 'sum'}).reset_index()\n", + "regions_areas = regions_areas.rename(columns={'region': 'location_id', 'region_name':'location_name', 'AREA_KM2': 'total_marine_area'})\n", + "regions_areas['location_type'] = 'region'\n", + "regions_areas.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [], + "source": [ + "global_area = pd.DataFrame({'location_id': ['GLOB'], 'location_name': ['Worldwide'], 'total_marine_area': [361000000], 'location_type': ['worldwide']}) \n", + "hs_area = pd.DataFrame({'location_id': ['ABNJ'], 'location_name': ['High Seas'], 'total_marine_area': [hs['area_km2'].values[0]], 'location_type': ['country']})" ] }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 63, "metadata": {}, "outputs": [ { "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
location_idlocation_nametotal_marine_arealocation_type
0AGOAngola4.989086e+05country
1ALBAlbania1.217729e+04country
2AREUnited Arab Emirates5.821593e+04country
3ARGArgentina2.897629e+06country
4ATAAntarctica8.842860e+06country
...............
162NANorth America1.791826e+07region
163SALatin America & Caribbean2.107800e+07region
164WAWest Asia1.456969e+06region
165GLOBWorldwide3.610000e+08worldwide
166ABNJHigh Seas2.128814e+08country
\n", + "

167 rows × 4 columns

\n", + "
" + ], "text/plain": [ - "array(['North America', 'Europe', 'Asia & Pacific',\n", - " 'Latin America & Caribbean', 'Africa', 'West Asia', nan],\n", - " dtype=object)" + " location_id location_name total_marine_area location_type\n", + "0 AGO Angola 4.989086e+05 country\n", + "1 ALB Albania 1.217729e+04 country\n", + "2 ARE United Arab Emirates 5.821593e+04 country\n", + "3 ARG Argentina 2.897629e+06 country\n", + "4 ATA Antarctica 8.842860e+06 country\n", + ".. ... ... ... ...\n", + "162 NA North America 1.791826e+07 region\n", + "163 SA Latin America & Caribbean 2.107800e+07 region\n", + "164 WA West Asia 1.456969e+06 region\n", + "165 GLOB Worldwide 3.610000e+08 worldwide\n", + "166 ABNJ High Seas 2.128814e+08 country\n", + "\n", + "[167 rows x 4 columns]" ] }, - "execution_count": 23, + "execution_count": 63, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "regions['REGIONS'].unique()" + "# concat gl_df and hs_df to marine_areas\n", + "marine_areas2 = pd.concat([marine_areas, regions_areas, global_area, hs_area], ignore_index=True)\n", + "marine_areas2\n" ] }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 64, "metadata": {}, "outputs": [ { - "name": "stderr", - "output_type": "stream", - "text": [ - "/var/folders/98/0pdnjc5s29x2pnzl293pw7hr0000gn/T/ipykernel_27590/1686611470.py:1: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame\n", - "\n", - "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", - " regions['REGIONS'][regions['REGIONS'].isna()] = 'Antartic'\n" - ] + "data": { + "text/plain": [ + "array(['country', 'region', 'worldwide'], dtype=object)" + ] + }, + "execution_count": 64, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "regions['REGIONS'][regions['REGIONS'].isna()] = 'Antartica'" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [], - "source": [ - "# Calculate area of each region\n", - "regions['AREA_KM2']= regions.geometry.area/ 1000000" + "marine_areas2['location_type'].unique()" ] }, { - "cell_type": "code", - "execution_count": 29, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "regions.to_file(path_out + \"/administrative/eez_regions.shp\", driver=\"ESRI Shapefile\")" + "### Clean WDPA dataset" ] }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 5, "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "Allocating 16 GB of heap memory\n", - "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.shp\n", - "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.shx\n", - "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.dbf\n", - "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.prj\n" + "ename": "KeyboardInterrupt", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m/Users/sofia/Documents/Repos/skytruth-30x30/data/notebooks/layers.ipynb Cell 42\u001b[0m line \u001b[0;36m2\n\u001b[1;32m 1\u001b[0m \u001b[39m# Read WDPA data\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m poly1 \u001b[39m=\u001b[39m gpd\u001b[39m.\u001b[39;49mread_file(path_in \u001b[39m+\u001b[39;49m \u001b[39m\"\u001b[39;49m\u001b[39m/WDPA_WDOECM_Sep2023_Public_marine_shp/WDPA_WDOECM_Sep2023_Public_marine_shp_0/WDPA_WDOECM_Sep2023_Public_marine_shp-polygons.shp\u001b[39;49m\u001b[39m\"\u001b[39;49m)\n\u001b[1;32m 3\u001b[0m point1 \u001b[39m=\u001b[39m gpd\u001b[39m.\u001b[39mread_file(path_in \u001b[39m+\u001b[39m \u001b[39m\"\u001b[39m\u001b[39m/WDPA_WDOECM_Sep2023_Public_marine_shp/WDPA_WDOECM_Sep2023_Public_marine_shp_0/WDPA_WDOECM_Sep2023_Public_marine_shp-points.shp\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 4\u001b[0m poly2 \u001b[39m=\u001b[39m gpd\u001b[39m.\u001b[39mread_file(path_in \u001b[39m+\u001b[39m \u001b[39m\"\u001b[39m\u001b[39m/WDPA_WDOECM_Sep2023_Public_marine_shp/WDPA_WDOECM_Sep2023_Public_marine_shp_1/WDPA_WDOECM_Sep2023_Public_marine_shp-polygons.shp\u001b[39m\u001b[39m\"\u001b[39m)\n", + "File \u001b[0;32m~/mambaforge/envs/skytruth/lib/python3.11/site-packages/geopandas/io/file.py:281\u001b[0m, in \u001b[0;36m_read_file\u001b[0;34m(filename, bbox, mask, rows, engine, **kwargs)\u001b[0m\n\u001b[1;32m 278\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 279\u001b[0m path_or_bytes \u001b[39m=\u001b[39m filename\n\u001b[0;32m--> 281\u001b[0m \u001b[39mreturn\u001b[39;00m _read_file_fiona(\n\u001b[1;32m 282\u001b[0m path_or_bytes, from_bytes, bbox\u001b[39m=\u001b[39;49mbbox, mask\u001b[39m=\u001b[39;49mmask, rows\u001b[39m=\u001b[39;49mrows, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs\n\u001b[1;32m 283\u001b[0m )\n\u001b[1;32m 285\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 286\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39munknown engine \u001b[39m\u001b[39m'\u001b[39m\u001b[39m{\u001b[39;00mengine\u001b[39m}\u001b[39;00m\u001b[39m'\u001b[39m\u001b[39m\"\u001b[39m)\n", + "File \u001b[0;32m~/mambaforge/envs/skytruth/lib/python3.11/site-packages/geopandas/io/file.py:379\u001b[0m, in \u001b[0;36m_read_file_fiona\u001b[0;34m(path_or_bytes, from_bytes, bbox, mask, rows, where, **kwargs)\u001b[0m\n\u001b[1;32m 375\u001b[0m df \u001b[39m=\u001b[39m pd\u001b[39m.\u001b[39mDataFrame(\n\u001b[1;32m 376\u001b[0m [record[\u001b[39m\"\u001b[39m\u001b[39mproperties\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39mfor\u001b[39;00m record \u001b[39min\u001b[39;00m f_filt], columns\u001b[39m=\u001b[39mcolumns\n\u001b[1;32m 377\u001b[0m )\n\u001b[1;32m 378\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m--> 379\u001b[0m df \u001b[39m=\u001b[39m GeoDataFrame\u001b[39m.\u001b[39;49mfrom_features(\n\u001b[1;32m 380\u001b[0m f_filt, crs\u001b[39m=\u001b[39;49mcrs, columns\u001b[39m=\u001b[39;49mcolumns \u001b[39m+\u001b[39;49m [\u001b[39m\"\u001b[39;49m\u001b[39mgeometry\u001b[39;49m\u001b[39m\"\u001b[39;49m]\n\u001b[1;32m 381\u001b[0m )\n\u001b[1;32m 382\u001b[0m \u001b[39mfor\u001b[39;00m k \u001b[39min\u001b[39;00m datetime_fields:\n\u001b[1;32m 383\u001b[0m as_dt \u001b[39m=\u001b[39m pd\u001b[39m.\u001b[39mto_datetime(df[k], errors\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mignore\u001b[39m\u001b[39m\"\u001b[39m)\n", + "File \u001b[0;32m~/mambaforge/envs/skytruth/lib/python3.11/site-packages/geopandas/geodataframe.py:635\u001b[0m, in \u001b[0;36mGeoDataFrame.from_features\u001b[0;34m(cls, features, crs, columns)\u001b[0m\n\u001b[1;32m 632\u001b[0m features_lst \u001b[39m=\u001b[39m features\n\u001b[1;32m 634\u001b[0m rows \u001b[39m=\u001b[39m []\n\u001b[0;32m--> 635\u001b[0m \u001b[39mfor\u001b[39;00m feature \u001b[39min\u001b[39;00m features_lst:\n\u001b[1;32m 636\u001b[0m \u001b[39m# load geometry\u001b[39;00m\n\u001b[1;32m 637\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mhasattr\u001b[39m(feature, \u001b[39m\"\u001b[39m\u001b[39m__geo_interface__\u001b[39m\u001b[39m\"\u001b[39m):\n\u001b[1;32m 638\u001b[0m feature \u001b[39m=\u001b[39m feature\u001b[39m.\u001b[39m__geo_interface__\n", + "File \u001b[0;32mfiona/ogrext.pyx:1739\u001b[0m, in \u001b[0;36mfiona.ogrext.Iterator.__next__\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32mfiona/ogrext.pyx:389\u001b[0m, in \u001b[0;36mfiona.ogrext.FeatureBuilder.build\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32mfiona/_geometry.pyx:193\u001b[0m, in \u001b[0;36mfiona._geometry.GeomBuilder.build_from_feature\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32mfiona/_geometry.pyx:249\u001b[0m, in \u001b[0;36mfiona._geometry.GeomBuilder.build\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32mfiona/_geometry.pyx:169\u001b[0m, in \u001b[0;36mfiona._geometry.GeomBuilder._buildMultiPolygon\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32mfiona/_geometry.pyx:152\u001b[0m, in \u001b[0;36mfiona._geometry.GeomBuilder._buildParts\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32mfiona/_geometry.pyx:243\u001b[0m, in \u001b[0;36mfiona._geometry.GeomBuilder.build\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32mfiona/_geometry.pyx:157\u001b[0m, in \u001b[0;36mfiona._geometry.GeomBuilder._buildPolygon\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32mfiona/_geometry.pyx:152\u001b[0m, in \u001b[0;36mfiona._geometry.GeomBuilder._buildParts\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32mfiona/_geometry.pyx:259\u001b[0m, in \u001b[0;36mfiona._geometry.GeomBuilder.build\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32m~/mambaforge/envs/skytruth/lib/python3.11/site-packages/fiona/model.py:201\u001b[0m, in \u001b[0;36mGeometry.from_dict\u001b[0;34m(cls, ob, **kwargs)\u001b[0m\n\u001b[1;32m 196\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_delegate \u001b[39m=\u001b[39m _Geometry(\n\u001b[1;32m 197\u001b[0m coordinates\u001b[39m=\u001b[39mcoordinates, \u001b[39mtype\u001b[39m\u001b[39m=\u001b[39m\u001b[39mtype\u001b[39m, geometries\u001b[39m=\u001b[39mgeometries\n\u001b[1;32m 198\u001b[0m )\n\u001b[1;32m 199\u001b[0m \u001b[39msuper\u001b[39m()\u001b[39m.\u001b[39m\u001b[39m__init__\u001b[39m(\u001b[39m*\u001b[39m\u001b[39m*\u001b[39mdata)\n\u001b[0;32m--> 201\u001b[0m \u001b[39m@classmethod\u001b[39m\n\u001b[1;32m 202\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mfrom_dict\u001b[39m(\u001b[39mcls\u001b[39m, ob\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs):\n\u001b[1;32m 203\u001b[0m \u001b[39mif\u001b[39;00m ob \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m 204\u001b[0m data \u001b[39m=\u001b[39m \u001b[39mdict\u001b[39m(\u001b[39mgetattr\u001b[39m(ob, \u001b[39m\"\u001b[39m\u001b[39m__geo_interface__\u001b[39m\u001b[39m\"\u001b[39m, ob))\n", + "\u001b[0;31mKeyboardInterrupt\u001b[0m: " ] } ], "source": [ - "# Reproject to 4626\n", - "!mapshaper-xl 16gb /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.shp -proj EPSG:4326 -o force /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.shp" + "# Read WDPA data\n", + "poly1 = gpd.read_file(path_in + \"/WDPA_WDOECM_Sep2023_Public_marine_shp/WDPA_WDOECM_Sep2023_Public_marine_shp_0/WDPA_WDOECM_Sep2023_Public_marine_shp-polygons.shp\")\n", + "point1 = gpd.read_file(path_in + \"/WDPA_WDOECM_Sep2023_Public_marine_shp/WDPA_WDOECM_Sep2023_Public_marine_shp_0/WDPA_WDOECM_Sep2023_Public_marine_shp-points.shp\")\n", + "poly2 = gpd.read_file(path_in + \"/WDPA_WDOECM_Sep2023_Public_marine_shp/WDPA_WDOECM_Sep2023_Public_marine_shp_1/WDPA_WDOECM_Sep2023_Public_marine_shp-polygons.shp\")\n", + "point2 = gpd.read_file(path_in + \"/WDPA_WDOECM_Sep2023_Public_marine_shp/WDPA_WDOECM_Sep2023_Public_marine_shp_1/WDPA_WDOECM_Sep2023_Public_marine_shp-points.shp\")\n", + "poly3 = gpd.read_file(path_in + \"/WDPA_WDOECM_Sep2023_Public_marine_shp/WDPA_WDOECM_Sep2023_Public_marine_shp_2/WDPA_WDOECM_Sep2023_Public_marine_shp-polygons.shp\")\n", + "point3 = gpd.read_file(path_in + \"/WDPA_WDOECM_Sep2023_Public_marine_shp/WDPA_WDOECM_Sep2023_Public_marine_shp_2/WDPA_WDOECM_Sep2023_Public_marine_shp-points.shp\")\n", + "dataframes = [poly1, point1, poly2, point2, poly3, point3]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Convert points to polygons and merge all wdpa in one dataset**" ] }, { "cell_type": "code", - "execution_count": 32, + "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "array(['North America', 'Europe', 'Asia & Pacific',\n", - " 'Latin America & Caribbean', 'Africa', 'West Asia', 'Antarctica'],\n", - " dtype=object)" + "18613" ] }, - "execution_count": 32, "metadata": {}, - "output_type": "execute_result" + "output_type": "display_data" } ], "source": [ - "regions['REGIONS'].unique()" + "# Calculate radius based on REP_AREA\n", + "def calculate_radius(rep_area):\n", + " return (rep_area / 3.14159265358979323846) ** 0.5\n", + "\n", + "# Iterate through the list and process the desired dataframes\n", + "for idx in [1, 3, 5]:\n", + " # Get the dataframe at the specified index\n", + " gdf = dataframes[idx]\n", + "\n", + " # Reproject in Mollweide\n", + " gdf = gdf.to_crs('ESRI:54009')\n", + "\n", + " # Transform the reported area from square kilometers to square meters\n", + " gdf['REP_AREA_m'] = gdf['REP_AREA'] * 1000000\n", + "\n", + " # Create the \"radius\" column by applying the calculate_radius function to the \"REP_AREA\" column\n", + " gdf['radius'] = gdf['REP_AREA_m'].apply(calculate_radius)\n", + "\n", + " # Create buffers around the points using the \"radius\" column\n", + " gdf_buffered = gdf.copy()\n", + " gdf_buffered['geometry'] = gdf.apply(lambda row: row.geometry.buffer(row['radius']), axis=1)\n", + "\n", + " # Reproject back to WGS84\n", + " gdf_buffered = gdf_buffered.to_crs('EPSG:4326')\n", + "\n", + " # Remove rows with invalid geometries\n", + " gdf_buffered = gdf_buffered[gdf_buffered['geometry'].is_valid]\n", + " \n", + " # Update the original dataframe with the buffered data\n", + " dataframes[idx] = gdf_buffered\n", + "\n", + "# Merge all dataframes\n", + "merged_mpa_all = pd.concat(dataframes)\n", + "len(merged_mpa_all)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Save the wdpa dataframe as a shapefile\n", + "merged_mpa_all.to_file(path_out + \"/wdpa/merged_wdpa_all.shp\")" ] } ], diff --git a/data/notebooks/location_areas.ipynb b/data/notebooks/location_areas.ipynb new file mode 100644 index 00000000..c9c933d7 --- /dev/null +++ b/data/notebooks/location_areas.ipynb @@ -0,0 +1,574 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import geopandas as gpd" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "path_in = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/raw/\"\n", + "path_out = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/processed/\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create locations table" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "eez = gpd.read_file(path_out + \"/administrative/eez_area_mollweide.shp\")\n", + "regions = gpd.read_file(path_out + \"/administrative/eez_regions.shp\")\n", + "hs = gpd.read_file(path_in + \"/high_seas/high_seas.shp\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# Create new column \"iso\" that has the field \"ISO_SOV1\" for all rows except those in which ISO_SOV2 and ISO_SOV3 are not null. In such cases concatenate ISO_SOV1, ISO_SOV2 and ISO_SOV3\n", + "eez['iso'] = eez['ISO_SOV1']\n", + "eez.loc[eez['ISO_SOV2'].notnull(), 'iso'] = eez['ISO_SOV1'] + \";\" + eez['ISO_SOV2']\n", + "eez.loc[eez['ISO_SOV3'].notnull(), 'iso'] = eez['ISO_SOV1'] + \";\" + eez['ISO_SOV2'] + \";\" + eez['ISO_SOV3']" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(337, 33)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Create a mask for rows with multiple values in 'iso_code'\n", + "mask = eez['iso'].str.contains(';', na=False)\n", + "\n", + "# Split the 'iso_code' values and create separate rows only for rows with multiple values\n", + "split_rows = eez[mask].copy()\n", + "split_rows['iso'] = split_rows['iso'].str.split(';')\n", + "split_rows = split_rows.explode('iso')\n", + "\n", + "# Keep rows with single values in 'iso_code'\n", + "single_value_rows = eez[~mask]\n", + "\n", + "# Concatenate the exploded rows with the single value rows\n", + "eez_new = pd.concat([single_value_rows, split_rows], ignore_index=True)\n", + "\n", + "eez_new.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "iso_country_mapping = {\n", + " 'USA': 'United States',\n", + " 'GBR': 'United Kingdom',\n", + " 'NZL': 'New Zealand',\n", + " 'FRA': 'France',\n", + " 'WSM': 'Samoa',\n", + " 'TON': 'Tonga',\n", + " 'CHL': 'Chile',\n", + " 'URY': 'Uruguay',\n", + " 'PER': 'Peru',\n", + " 'BRA': 'Brazil',\n", + " 'KIR': 'Kiribati',\n", + " 'ARG': 'Argentina',\n", + " 'AUS': 'Australia',\n", + " 'COM': 'Comoros',\n", + " 'MDG': 'Madagascar',\n", + " 'ZAF': 'South Africa',\n", + " 'MUS': 'Mauritius',\n", + " 'VUT': 'Vanuatu',\n", + " 'NAM': 'Namibia',\n", + " 'TLS': 'Timor-Leste',\n", + " 'COG': 'Republic of the Congo',\n", + " 'AGO': 'Angola',\n", + " 'MOZ': 'Mozambique',\n", + " 'KEN': 'Kenya',\n", + " 'PNG': 'Papua New Guinea',\n", + " 'TZA': 'Tanzania',\n", + " 'SLB': 'Solomon Islands',\n", + " 'SYC': 'Seychelles',\n", + " 'COD': 'Democratic Republic of the Congo',\n", + " 'ATG': 'Antigua and Barbuda',\n", + " 'NLD': 'Netherlands',\n", + " 'PRT': 'Portugal',\n", + " 'BHS': 'The Bahamas',\n", + " 'BRB': 'Barbados',\n", + " 'MEX': 'Mexico',\n", + " 'CPV': 'Cape Verde',\n", + " 'ESP': 'Spain',\n", + " 'PAN': 'Panama',\n", + " 'CRI': 'Costa Rica',\n", + " 'DMA': 'Dominica',\n", + " 'DOM': 'Dominican Republic',\n", + " 'GTM': 'Guatemala',\n", + " 'DNK': 'Denmark',\n", + " 'GMB': 'Gambia',\n", + " 'GIB': 'Gibraltar',\n", + " 'GRD': 'Grenada',\n", + " 'SLE': 'Sierra Leone',\n", + " 'ISL': 'Iceland',\n", + " 'JAM': 'Jamaica',\n", + " 'MRT': 'Mauritania',\n", + " 'HTI': 'Haiti',\n", + " 'KNA': 'Saint Kitts and Nevis',\n", + " 'LCA': 'Saint Lucia',\n", + " 'VCT': 'Saint Vincent and the Grenadines',\n", + " 'TTO': 'Trinidad and Tobago',\n", + " 'SLV': 'El Salvador',\n", + " 'BLZ': 'Belize',\n", + " 'CUB': 'Cuba',\n", + " 'SEN': 'Senegal',\n", + " 'VEN': 'Venezuela',\n", + " 'CAN': 'Canada',\n", + " 'NIC': 'Nicaragua',\n", + " 'GUY': 'Guyana',\n", + " 'COL': 'Colombia',\n", + " 'IRL': 'Ireland',\n", + " 'GNB': 'Guinea-Bissau',\n", + " 'GIN': 'Guinea',\n", + " 'CIV': 'Ivory Coast',\n", + " 'LBR': 'Liberia',\n", + " 'HND': 'Honduras',\n", + " 'ECU': 'Ecuador',\n", + " 'ESH': 'Western Sahara',\n", + " 'SUR': 'Suriname',\n", + " 'MAR': 'Morocco',\n", + " 'ARE': 'United Arab Emirates',\n", + " 'CYP': 'Cyprus',\n", + " 'ERI': 'Eritrea',\n", + " 'EGY': 'Egypt',\n", + " 'GEO': 'Georgia',\n", + " 'IRN': 'Iran',\n", + " 'LBN': 'Lebanon',\n", + " 'LBY': 'Libya',\n", + " 'MLT': 'Malta',\n", + " 'OMN': 'Oman',\n", + " 'SAU': 'Saudi Arabia',\n", + " 'LKA': 'Sri Lanka',\n", + " 'SDN': 'Sudan',\n", + " 'SYR': 'Syria',\n", + " 'TGO': 'Togo',\n", + " 'GRC': 'Greece',\n", + " 'TUR': 'Turkey',\n", + " 'MCO': 'Monaco',\n", + " 'TUN': 'Tunisia',\n", + " 'MNE': 'Montenegro',\n", + " 'ALB': 'Albania',\n", + " 'BGR': 'Bulgaria',\n", + " 'PSE': 'Palestine',\n", + " 'KWT': 'Kuwait',\n", + " 'IRQ': 'Iraq',\n", + " 'BHR': 'Bahrain',\n", + " 'QAT': 'Qatar',\n", + " 'YEM': 'Yemen',\n", + " 'ISR': 'Israel',\n", + " 'JOR': 'Jordan',\n", + " 'DJI': 'Djibouti',\n", + " 'BGD': 'Bangladesh',\n", + " 'NGA': 'Nigeria',\n", + " 'CMR': 'Cameroon',\n", + " 'STP': 'São Tomé and Príncipe',\n", + " 'BIH': 'Bosnia and Herzegovina',\n", + " 'MHL': 'Marshall Islands',\n", + " 'PLW': 'Palau',\n", + " 'PHL': 'Philippines',\n", + " 'TWN': 'Taiwan',\n", + " 'SGP': 'Singapore',\n", + " 'THA': 'Thailand',\n", + " 'VNM': 'Vietnam',\n", + " 'KOR': 'South Korea',\n", + " 'BRN': 'Brunei',\n", + " 'PRK': 'North Korea',\n", + " 'KHM': 'Cambodia',\n", + " 'CHN': 'China',\n", + " 'EST': 'Estonia',\n", + " 'FIN': 'Finland',\n", + " 'SWE': 'Sweden',\n", + " 'LTU': 'Lithuania',\n", + " 'NOR': 'Norway',\n", + " 'BEL': 'Belgium',\n", + " 'DEU': 'Germany',\n", + " 'LVA': 'Latvia',\n", + " 'HRV': 'Croatia',\n", + " 'ITA': 'Italy',\n", + " 'UKR': 'Ukraine',\n", + " 'ROU': 'Romania',\n", + " 'JPN': 'Japan',\n", + " 'IND': 'India',\n", + " 'PAK': 'Pakistan',\n", + " 'TKM': 'Turkmenistan',\n", + " 'AZE': 'Azerbaijan',\n", + " 'KAZ': 'Kazakhstan',\n", + " 'MMR': 'Myanmar',\n", + " 'POL': 'Poland',\n", + " 'BEN': 'Benin',\n", + " 'SVN': 'Slovenia',\n", + " 'MYS': 'Malaysia',\n", + " 'ATA': 'Antarctica',\n", + " 'TUV': 'Tuvalu',\n", + " 'FJI': 'Fiji',\n", + " 'FSM': 'Micronesia',\n", + " 'GNQ': 'Equatorial Guinea',\n", + " 'MDV': 'Maldives',\n", + " 'SOM': 'Somalia',\n", + " 'NRU': 'Nauru',\n", + " 'GAB': 'Gabon',\n", + " 'IDN': 'Indonesia',\n", + " 'DZA': 'Algeria',\n", + " 'GHA': 'Ghana',\n", + " 'RUS': 'Russia'\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "def get_name(country):\n", + " return iso_country_mapping.get(country, None)\n", + "\n", + "# Apply the function to create the 'PARENT_ISO' column\n", + "eez_new['name_iso'] = eez_new['iso'].apply(get_name)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# List of dictionaries for data in Region_ISO3_PP.txt (list of regions used in the Protected Planet database)\n", + "regions_data = [\n", + " {\n", + " 'region_iso': 'AS',\n", + " 'region_name': 'Asia & Pacific',\n", + " 'country_iso_3s': [\n", + " \"AFG\", \"ASM\", \"AUS\", \"BGD\", \"BRN\", \"BTN\", \"CCK\", \"CHN\", \"COK\", \"CXR\", \"FJI\", \"FSM\", \"GUM\", \"HKG\", \"IDN\",\n", + " \"IND\", \"IOT\", \"IRN\", \"JPN\", \"KHM\", \"KIR\", \"KOR\", \"LAO\", \"LKA\", \"MAC\", \"MDV\", \"MHL\", \"MMR\", \"MNG\", \"MNP\",\n", + " \"MYS\", \"NCL\", \"NFK\", \"NIU\", \"NPL\", \"NRU\", \"NZL\", \"PAK\", \"PCN\", \"PHL\", \"PLW\", \"PNG\", \"PRK\", \"PYF\", \"SGP\",\n", + " \"SLB\", \"THA\", \"TKL\", \"TLS\", \"TON\", \"TUV\", \"TWN\", \"VNM\", \"VUT\", \"WLF\", \"WSM\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'AF',\n", + " 'region_name': 'Africa',\n", + " 'country_iso_3s': [\n", + " \"AGO\", \"BDI\", \"BEN\", \"BFA\", \"BWA\", \"CAF\", \"CIV\", \"CMR\", \"COD\", \"COG\", \"COM\", \"CPV\", \"DJI\", \"DZA\", \"EGY\",\n", + " \"ERI\", \"ESH\", \"ETH\", \"GAB\", \"GHA\", \"GIN\", \"GMB\", \"GNB\", \"GNQ\", \"KEN\", \"LBR\", \"LBY\", \"LSO\", \"MAR\", \"MDG\",\n", + " \"MLI\", \"MOZ\", \"MRT\", \"MUS\", \"MWI\", \"MYT\", \"NAM\", \"NER\", \"NGA\", \"REU\", \"RWA\", \"SDN\", \"SEN\", \"SHN\", \"SLE\",\n", + " \"SOM\", \"SSD\", \"STP\", \"SWZ\", \"SYC\", \"TCD\", \"TGO\", \"TUN\", \"TZA\", \"UGA\", \"ZAF\", \"ZMB\", \"ZWE\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'EU',\n", + " 'region_name': 'Europe',\n", + " 'country_iso_3s': [\n", + " \"ALA\", \"ALB\", \"AND\", \"ARM\", \"AUT\", \"AZE\", \"BEL\", \"BGR\", \"BIH\", \"BLR\", \"CHE\", \"CYP\", \"CZE\", \"DEU\", \"DNK\",\n", + " \"ESP\", \"EST\", \"FIN\", \"FRA\", \"FRO\", \"GBR\", \"GEO\", \"GGY\", \"GIB\", \"GRC\", \"HRV\", \"HUN\", \"IMN\", \"IRL\", \"ISL\",\n", + " \"ISR\", \"ITA\", \"JEY\", \"KAZ\", \"KGZ\", \"LIE\", \"LTU\", \"LUX\", \"LVA\", \"MCO\", \"MDA\", \"MKD\", \"MLT\", \"MNE\", \"NLD\",\n", + " \"NOR\", \"POL\", \"PRT\", \"ROU\", \"RUS\", \"SJM\", \"SMR\", \"SRB\", \"SVK\", \"SVN\", \"SWE\", \"TJK\", \"TKM\", \"TUR\", \"UKR\",\n", + " \"UZB\", \"VAT\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'SA',\n", + " 'region_name': 'Latin America & Caribbean',\n", + " 'country_iso_3s': [\n", + " \"ABW\", \"AIA\", \"ARG\", \"ATG\", \"BES\", \"BHS\", \"BLM\", \"BLZ\", \"BMU\", \"BOL\", \"BRA\", \"BRB\", \"CHL\", \"COL\", \"CRI\",\n", + " \"CUB\", \"CUW\", \"CYM\", \"DMA\", \"DOM\", \"ECU\", \"FLK\", \"GLP\", \"GRD\", \"GTM\", \"GUF\", \"GUY\", \"HND\", \"HTI\", \"JAM\",\n", + " \"KNA\", \"LCA\", \"MAF\", \"MEX\", \"MSR\", \"MTQ\", \"NIC\", \"PAN\", \"PER\", \"PRI\", \"PRY\", \"SLV\", \"SUR\", \"SXM\", \"TCA\",\n", + " \"TTO\", \"UMI\", \"URY\", \"VCT\", \"VEN\", \"VGB\", \"VIR\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'PO',\n", + " 'region_name': 'Polar',\n", + " 'country_iso_3s': [\n", + " \"ATF\", \"BVT\", \"GRL\", \"HMD\", \"SGS\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'NA',\n", + " 'region_name': 'North America',\n", + " 'country_iso_3s': [\n", + " \"CAN\", \"SPM\", \"USA\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'WA',\n", + " 'region_name': 'West Asia',\n", + " 'country_iso_3s': [\n", + " \"ARE\", \"BHR\", \"IRQ\", \"JOR\", \"KWT\", \"LBN\", \"OMN\", \"PSE\", \"QAT\", \"SAU\", \"SYR\", \"YEM\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'AT', # this region is not in the Protected Planet database\n", + " 'region_name': 'Antartica',\n", + " 'country_iso_3s': [\n", + " \"ATA\"\n", + " ]\n", + " }\n", + "]\n", + "\n", + "# Convert the region data to a dictionary that maps each country to its region name\n", + "country_to_region = {}\n", + "name_to_region = {}\n", + "for region in regions_data:\n", + " for country in region['country_iso_3s']:\n", + " country_to_region[country] = region['region_iso']\n", + " name_to_region[country] = region['region_name']" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "eez_new['region'] = eez_new['iso'].map(country_to_region)\n", + "eez_new['region_name'] = eez_new['iso'].map(name_to_region)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "marine_areas = eez_new.groupby(['iso', 'name_iso']).agg({'AREA_KM2': 'sum'}).reset_index()\n", + "marine_areas = marine_areas.rename(columns={'iso': 'location_id', 'name_iso':'location_name', 'AREA_KM2': 'total_marine_area'})\n", + "marine_areas['location_type'] = 'country'" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "regions_areas = eez_new.groupby(['region', 'region_name']).agg({'AREA_KM2': 'sum'}).reset_index()\n", + "regions_areas = regions_areas.rename(columns={'region': 'location_id', 'region_name':'location_name', 'AREA_KM2': 'total_marine_area'})\n", + "regions_areas['location_type'] = 'region'" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "global_area = pd.DataFrame({'location_id': ['GLOB'], 'location_name': ['Worldwide'], 'total_marine_area': [361000000], 'location_type': ['worldwide']}) \n", + "hs_area = pd.DataFrame({'location_id': ['ABNJ'], 'location_name': ['High Seas'], 'total_marine_area': [hs['area_km2'].values[0]], 'location_type': ['country']})" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "# concat gl_df and hs_df to marine_areas\n", + "marine_areas2 = pd.concat([marine_areas, regions_areas, global_area, hs_area], ignore_index=True)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "# Save the table as csv\n", + "marine_areas2.to_csv(path_out + \"/tables/locations.csv\", index=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create region_locations table" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
region_idlocation_id
0ASAFG
1ASASM
2ASAUS
3ASBGD
4ASBRN
.........
244WAQAT
245WASAU
246WASYR
247WAYEM
248ATATA
\n", + "

249 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " region_id location_id\n", + "0 AS AFG\n", + "1 AS ASM\n", + "2 AS AUS\n", + "3 AS BGD\n", + "4 AS BRN\n", + ".. ... ...\n", + "244 WA QAT\n", + "245 WA SAU\n", + "246 WA SYR\n", + "247 WA YEM\n", + "248 AT ATA\n", + "\n", + "[249 rows x 2 columns]" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "regions_df = pd.DataFrame([{'region_id': data['region_iso'], 'location_id': iso} for data in regions_data for iso in data['country_iso_3s']])\n", + "regions_df" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "regions_df.to_csv(path_out + '/tables/region_locations.csv', index=False)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "skytruth", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/data/notebooks/mpas_table.ipynb b/data/notebooks/mpas_table.ipynb new file mode 100644 index 00000000..b846ab4c --- /dev/null +++ b/data/notebooks/mpas_table.ipynb @@ -0,0 +1,143 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Set up" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import geopandas as gpd\n", + "import pandas as pd\n", + "from datetime import datetime" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "path_in = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/raw\"\n", + "path_out = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/processed\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Read relevant datasets: MPAtlas, WDPA, and ProtectedSeas" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# Read mpatlas data\n", + "mpatlas = gpd.read_file(path_out + \"/mpatlas/mpatlas_assess_zone_cleaned.geojson\")\n", + "mpatlas = mpatlas.drop_duplicates(subset=['wdpa_id', 'designation','location_id','establishment_stage', 'protection_level','year'], keep='first')" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "ps = gpd.read_file(path_out + \"/protectedseas/protectedseas.shp\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "wdpa = gpd.read_file(path_out + \"/wdpa/merged_wdpa_all.shp\")\n", + "wdpa = wdpa[['WDPA_PID', 'NAME','PA_DEF', 'GIS_M_AREA','PARENT_ISO']].rename(columns={'WDPA_PID': 'wdpa_id', 'NAME': 'name', 'PA_DEF':'protection_type', 'GIS_M_AREA': 'area', 'PARENT_ISO': 'location_id'})\n", + "wdpa['protection_type'] = wdpa['protection_type'].astype(int).replace({1: 'mpa', 0: 'oecm'})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Combine information from different tables" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# Add protected_level info from mpatlas and protectedseas to wdpa df\n", + "table_prot = wdpa.merge(mpatlas[['wdpa_id','area_km2','protection_level']], on='wdpa_id', how='left').rename(columns={'area_km2':'area_mpatlas','protection_level': 'mpatlas_prot_lvl'})\n", + "table_prot = table_prot.merge(ps[['wdpa_id','FPS_cat', 'total_area']], on='wdpa_id', how='left').rename(columns={'FPS_cat': 'fpl', 'total_area': 'area_ps'})" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "table_prot['area'] = table_prot['area_mpatlas'].combine_first(table_prot['area_ps']).combine_first(table_prot['area'])\n", + "table_prot = table_prot.drop(columns=['area_mpatlas', 'area_ps'])\n", + "table_prot = table_prot.drop(columns={'name', 'protection_type'})" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# Add establishment info to wdpa df\n", + "table_est = wdpa.merge(mpatlas[['wdpa_id','establishment_stage', 'year']], on='wdpa_id', how='left')\n", + "table_est = table_est.drop(columns={'area', 'location_id'})" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "# Save tables as csv\n", + "table_prot.to_csv(path_out + \"/tables/mpas_table.csv\", index=False)\n", + "table_est.to_csv(path_out + \"/tables/mpas_table_establishment.csv\", index=False)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "skytruth", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/data/notebooks/mpatlas_stats.ipynb b/data/notebooks/mpatlas_stats.ipynb new file mode 100644 index 00000000..afdf6408 --- /dev/null +++ b/data/notebooks/mpatlas_stats.ipynb @@ -0,0 +1,349 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Set up" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "import geopandas as gpd\n", + "import pandas as pd\n", + "from datetime import datetime" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "path_in = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/raw\"\n", + "path_out = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/processed\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Read and prepare data" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [], + "source": [ + "# Read data from MPAtlas\n", + "mpatlas = gpd.read_file(path_in + \"/mpatlas_assess_zone.geojson\")" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [], + "source": [ + "# Fill missing wdpa_pid with the wdpa_id\n", + "mpatlas['wdpa_pid'] = mpatlas['wdpa_pid'].fillna(mpatlas['wdpa_id'])" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [], + "source": [ + "# Create new column with protection level reclassified\n", + "def map_protection_level(value):\n", + " if value in [\"full\", \"high\"]:\n", + " return \"fully or highly protected\"\n", + " else:\n", + " return \"less protected or unknown\"\n", + "\n", + "# Create a new column based on column1\n", + "mpatlas['protection_level'] = mpatlas['protection_mpaguide_level'].apply(map_protection_level)" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [], + "source": [ + "# replace proposed/committed with proposed or committed\n", + "mpatlas['establishment_stage'] = mpatlas['establishment_stage'].replace(['proposed/committed'], 'proposed or committed')" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [], + "source": [ + "# Take only year from 'proposed_date', 'designated_date', 'implemented_date'\n", + "mpatlas['proposed_date'] = mpatlas['proposed_date'].str[:4].astype('Int64')\n", + "mpatlas['designated_date'] = mpatlas['designated_date'].str[:4].astype('Int64')\n", + "mpatlas['implemented_date'] = mpatlas['implemented_date'].str[:4].astype('Int64')\n", + "\n", + "# Create column 'year' with the most recent year from 'proposed_date', 'designated_date', 'implemented_date'\n", + "mpatlas['year'] = mpatlas[['proposed_date', 'designated_date', 'implemented_date']].max(axis=1)\n", + "\n", + "# Convert year to int to be able to save it later (Int64 not allowed)\n", + "mpatlas['year'].fillna(0, inplace=True)\n", + "mpatlas['year'] = mpatlas['year'].astype(int)\n", + "mpatlas['year'] = mpatlas['year'].replace(0, pd.NaT)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [], + "source": [ + "# Calculate area in km2\n", + "mpatlas.to_crs('ESRI:54009', inplace=True)\n", + "mpatlas['area_km2'] = mpatlas['geometry'].area / 10**6\n", + "mpatlas.to_crs('EPSG:4326', inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [], + "source": [ + "# Keep relevant columns \n", + "mpatlas2 = mpatlas[['wdpa_pid', 'name', 'designation', 'sovereign', 'area_km2', 'establishment_stage', 'protection_level', 'year', 'geometry']].rename(columns={'sovereign': 'location_id', 'wdpa_pid': 'wdpa_id'})\n", + "\n", + "# Save as geojson (to keep full names)\n", + "mpatlas2.to_file(path_out + \"/mpatlas/mpatlas_assess_zone_cleaned.geojson\", driver='GeoJSON')" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [], + "source": [ + "# For those with multiple countries, split them\n", + "mpatlas_iso = mpatlas2.copy()\n", + "mpatlas_iso['location_id'] = mpatlas_iso['location_id'].str.split(';')\n", + "mpatlas_iso = mpatlas_iso.explode('location_id')\n", + "mpatlas_iso['location_id'] = mpatlas_iso['location_id'].str.split(':')\n", + "mpatlas_iso = mpatlas_iso.explode('location_id')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Global stats" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [], + "source": [ + "# Calculate global area per protection level\n", + "prot_global = mpatlas2.groupby('protection_level').agg({'area_km2': 'sum'}).reset_index().rename(columns={'area_km2': 'area'})\n", + "prot_global['location_id'] = 'GLOB'\n", + "prot_global['last_updated'] = datetime.now().year" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [], + "source": [ + "# Calculate global area per establishment stage\n", + "stage_global = mpatlas2.groupby(['establishment_stage']).agg({'area_km2': 'sum'}).reset_index().rename(columns={'area_km2': 'area'})\n", + "stage_global['location_id'] = 'GLOB'\n", + "stage_global['last_updated'] = datetime.now().year" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Country stats" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [], + "source": [ + "prot_iso = mpatlas_iso.groupby(['location_id', 'protection_level']).agg({'area_km2': 'sum'}).reset_index().rename(columns={'area_km2': 'area'})\n", + "prot_iso['last_updated'] = datetime.now().year" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [], + "source": [ + "stage_iso = mpatlas_iso.groupby(['location_id', 'establishment_stage']).agg({'area_km2': 'sum'}).reset_index().rename(columns={'area_km2': 'area'})\n", + "stage_iso['last_updated'] = datetime.now().year" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Region stats" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [], + "source": [ + "# List of dictionaries for data in Region_ISO3_PP.txt (list of regions used in the Protected Planet database)\n", + "regions_data = [\n", + " {\n", + " 'region_iso': 'AS',\n", + " 'region_name': 'Asia & Pacific',\n", + " 'country_iso_3s': [\n", + " \"AFG\", \"ASM\", \"AUS\", \"BGD\", \"BRN\", \"BTN\", \"CCK\", \"CHN\", \"COK\", \"CXR\", \"FJI\", \"FSM\", \"GUM\", \"HKG\", \"IDN\",\n", + " \"IND\", \"IOT\", \"IRN\", \"JPN\", \"KHM\", \"KIR\", \"KOR\", \"LAO\", \"LKA\", \"MAC\", \"MDV\", \"MHL\", \"MMR\", \"MNG\", \"MNP\",\n", + " \"MYS\", \"NCL\", \"NFK\", \"NIU\", \"NPL\", \"NRU\", \"NZL\", \"PAK\", \"PCN\", \"PHL\", \"PLW\", \"PNG\", \"PRK\", \"PYF\", \"SGP\",\n", + " \"SLB\", \"THA\", \"TKL\", \"TLS\", \"TON\", \"TUV\", \"TWN\", \"VNM\", \"VUT\", \"WLF\", \"WSM\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'AF',\n", + " 'region_name': 'Africa',\n", + " 'country_iso_3s': [\n", + " \"AGO\", \"BDI\", \"BEN\", \"BFA\", \"BWA\", \"CAF\", \"CIV\", \"CMR\", \"COD\", \"COG\", \"COM\", \"CPV\", \"DJI\", \"DZA\", \"EGY\",\n", + " \"ERI\", \"ESH\", \"ETH\", \"GAB\", \"GHA\", \"GIN\", \"GMB\", \"GNB\", \"GNQ\", \"KEN\", \"LBR\", \"LBY\", \"LSO\", \"MAR\", \"MDG\",\n", + " \"MLI\", \"MOZ\", \"MRT\", \"MUS\", \"MWI\", \"MYT\", \"NAM\", \"NER\", \"NGA\", \"REU\", \"RWA\", \"SDN\", \"SEN\", \"SHN\", \"SLE\",\n", + " \"SOM\", \"SSD\", \"STP\", \"SWZ\", \"SYC\", \"TCD\", \"TGO\", \"TUN\", \"TZA\", \"UGA\", \"ZAF\", \"ZMB\", \"ZWE\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'EU',\n", + " 'region_name': 'Europe',\n", + " 'country_iso_3s': [\n", + " \"ALA\", \"ALB\", \"AND\", \"ARM\", \"AUT\", \"AZE\", \"BEL\", \"BGR\", \"BIH\", \"BLR\", \"CHE\", \"CYP\", \"CZE\", \"DEU\", \"DNK\",\n", + " \"ESP\", \"EST\", \"FIN\", \"FRA\", \"FRO\", \"GBR\", \"GEO\", \"GGY\", \"GIB\", \"GRC\", \"HRV\", \"HUN\", \"IMN\", \"IRL\", \"ISL\",\n", + " \"ISR\", \"ITA\", \"JEY\", \"KAZ\", \"KGZ\", \"LIE\", \"LTU\", \"LUX\", \"LVA\", \"MCO\", \"MDA\", \"MKD\", \"MLT\", \"MNE\", \"NLD\",\n", + " \"NOR\", \"POL\", \"PRT\", \"ROU\", \"RUS\", \"SJM\", \"SMR\", \"SRB\", \"SVK\", \"SVN\", \"SWE\", \"TJK\", \"TKM\", \"TUR\", \"UKR\",\n", + " \"UZB\", \"VAT\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'SA',\n", + " 'region_name': 'Latin America & Caribbean',\n", + " 'country_iso_3s': [\n", + " \"ABW\", \"AIA\", \"ARG\", \"ATG\", \"BES\", \"BHS\", \"BLM\", \"BLZ\", \"BMU\", \"BOL\", \"BRA\", \"BRB\", \"CHL\", \"COL\", \"CRI\",\n", + " \"CUB\", \"CUW\", \"CYM\", \"DMA\", \"DOM\", \"ECU\", \"FLK\", \"GLP\", \"GRD\", \"GTM\", \"GUF\", \"GUY\", \"HND\", \"HTI\", \"JAM\",\n", + " \"KNA\", \"LCA\", \"MAF\", \"MEX\", \"MSR\", \"MTQ\", \"NIC\", \"PAN\", \"PER\", \"PRI\", \"PRY\", \"SLV\", \"SUR\", \"SXM\", \"TCA\",\n", + " \"TTO\", \"UMI\", \"URY\", \"VCT\", \"VEN\", \"VGB\", \"VIR\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'PO',\n", + " 'region_name': 'Polar',\n", + " 'country_iso_3s': [\n", + " \"ATF\", \"BVT\", \"GRL\", \"HMD\", \"SGS\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'NA',\n", + " 'region_name': 'North America',\n", + " 'country_iso_3s': [\n", + " \"CAN\", \"SPM\", \"USA\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'WA',\n", + " 'region_name': 'West Asia',\n", + " 'country_iso_3s': [\n", + " \"ARE\", \"BHR\", \"IRQ\", \"JOR\", \"KWT\", \"LBN\", \"OMN\", \"PSE\", \"QAT\", \"SAU\", \"SYR\", \"YEM\"\n", + " ]\n", + " }\n", + "]\n", + "\n", + "# Convert the region data to a dictionary that maps each country to its region name\n", + "country_to_region = {}\n", + "for region in regions_data:\n", + " for country in region['country_iso_3s']:\n", + " country_to_region[country] = region['region_iso']\n", + "\n", + "# Add region column to mpatlas_iso\n", + "mpatlas_iso['regions'] = mpatlas_iso['location_id'].map(country_to_region)" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [], + "source": [ + "# Calculate area per protection level per region\n", + "prot_region = mpatlas_iso.groupby(['regions', 'protection_level']).agg({'area_km2': 'sum'}).reset_index().rename(columns={'area_km2': 'area', 'regions': 'location_id'})\n", + "prot_region['last_updated'] = datetime.now().year" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [], + "source": [ + "# Calculate area per establishment stage per region\n", + "stage_region = mpatlas_iso.groupby(['regions', 'establishment_stage']).agg({'area_km2': 'sum'}).reset_index().rename(columns={'area_km2': 'area', 'regions': 'location_id'})\n", + "stage_region['last_updated'] = datetime.now().year" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [], + "source": [ + "# Concatenate all dataframes for protection stats and establishment stage stats\n", + "prot = pd.concat([prot_iso, prot_global, prot_region], ignore_index=True)\n", + "stage = pd.concat([stage_iso, stage_global, stage_region], ignore_index=True)\n", + "prot.to_csv(path_out + \"/tables/mpatlas_protection_level.csv\", index=False)\n", + "stage.to_csv(path_out + \"/tables/mpatlas_establishment_stage.csv\", index=False)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "skytruth", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/data/notebooks/protectedseas.ipynb b/data/notebooks/protectedseas.ipynb new file mode 100644 index 00000000..54ae3293 --- /dev/null +++ b/data/notebooks/protectedseas.ipynb @@ -0,0 +1,546 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Set up" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import geopandas as gpd\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "path_in = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/raw/\"\n", + "path_out = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/processed/\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Processing" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# Import shp containing geometries\n", + "ps = gpd.read_file(path_in + \"ProtectedSeas/ProtectedSeas_ProtectedSeas_06142023_shp_ProtectedSeas_06142023_shp.shp\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# Import csv containing information\n", + "protectedseas = pd.read_csv(path_in + \"ProtectedSeas/ProtectedSeas_ProtectedSeas_06142023.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# Keep only rows in which wdpa_id is not null and it's different than 0\n", + "protectedseas = protectedseas[protectedseas['wdpa_id'].notna()]\n", + "protectedseas = protectedseas[protectedseas['wdpa_id']!= '0']" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# Join csv with shapefile and keep only wdpa geometries\n", + "ps_gdf = ps.merge(protectedseas, how='inner', left_on='SITE_ID', right_on='site_id')" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# Keep only columns of interest\n", + "ps_gdf = ps_gdf[['site_id','site_name', 'country', 'wdpa_id', 'removal_of_marine_life_is_prohibited','total_area','geometry']]\n", + "ps_gdf = ps_gdf.rename(columns={'removal_of_marine_life_is_prohibited':'FPS'})" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "# ProtectedSeas only provides country names, not country codes. We need to add country codes to the dataframe\n", + "country_iso_dict = {\n", + " 'Antigua and Barbuda': 'ATG',\n", + " 'USA': 'USA',\n", + " 'Albania': 'ALB',\n", + " 'Netherlands Antilles': 'NLD',\n", + " 'United Arab Emirates': 'ARE',\n", + " 'Argentina': 'ARG',\n", + " 'France': 'FRA',\n", + " 'Australia': 'AUS',\n", + " 'Barbados': 'BRB',\n", + " 'Belgium': 'BEL',\n", + " 'Bangladesh': 'BGD',\n", + " 'Bulgaria': 'BGR',\n", + " 'Belize': 'BLZ',\n", + " 'Brazil': 'BRA',\n", + " 'Bahamas': 'BHS',\n", + " 'British Virgin Islands': 'GBR',\n", + " 'Canada': 'CAN',\n", + " 'Chile': 'CHL',\n", + " 'Cameroon': 'CMR',\n", + " 'Colombia': 'COL',\n", + " 'Comoros': 'COM',\n", + " 'Costa Rica': 'CRI',\n", + " 'Cuba': 'CUB',\n", + " 'Cyprus': 'CYP',\n", + " 'Germany': 'DEU',\n", + " 'Djibouti': 'DJI',\n", + " 'Djbouti': 'DJI',\n", + " 'Dominica': 'DMA',\n", + " 'Denmark': 'DNK',\n", + " 'Dominican Republic': 'DOM',\n", + " 'Algeria': 'DZA',\n", + " 'Ecuador': 'ECU',\n", + " 'Egypt': 'EGY',\n", + " 'Spain': 'ESP',\n", + " 'Estonia': 'EST',\n", + " 'Finland': 'FIN',\n", + " 'France, Italy, Monaco': 'FRA;ITA;MCO',\n", + " 'French Antilles': 'FRA',\n", + " 'Gabon': 'GAB',\n", + " 'United Kingdom': 'GBR',\n", + " 'Grenada': 'GRD',\n", + " 'Ghana': 'GHA',\n", + " 'Gibraltar': 'GBR',\n", + " 'Guinea': 'GIN',\n", + " 'The Gambia': 'GMB',\n", + " 'Guinea Bissau': 'GNB',\n", + " 'Greece': 'GRC',\n", + " 'Guatemala': 'GTM',\n", + " 'French Guyana': 'FRA',\n", + " 'Honduras': 'HND',\n", + " 'Croatia': 'HRV',\n", + " 'Indonesia': 'IDN',\n", + " 'Indonesia ': 'IDN',\n", + " 'India': 'IND',\n", + " 'Ireland': 'IRL',\n", + " 'Iceland': 'ISL',\n", + " 'Israel': 'ISR',\n", + " 'Italy': 'ITA',\n", + " 'Jamaica': 'JAM',\n", + " 'Jordan': 'JOR',\n", + " 'Japan': 'JPN',\n", + " 'Kenya': 'KEN',\n", + " 'Cambodia': 'KHM',\n", + " 'South Korea': 'KOR',\n", + " 'Cayman Islands': 'GBR',\n", + " 'Lebanon': 'LBN',\n", + " 'Liberia': 'LBR',\n", + " 'Saint Lucia': 'LCA',\n", + " 'Sri Lanka': 'LKA',\n", + " 'Lithuania': 'LTU',\n", + " 'Latvia': 'LVA',\n", + " 'Morocco': 'MAR',\n", + " 'Monaco': 'MCO',\n", + " 'Madagascar': 'MDG',\n", + " 'Republic of Maldives': 'MDV',\n", + " 'Malta': 'MLT',\n", + " 'Myanmar': 'MMR',\n", + " 'Mozambique': 'MOZ',\n", + " 'Mauritania': 'MRT',\n", + " 'Malaysia': 'MYS',\n", + " 'Namibia': 'NAM',\n", + " 'New Caledonia': 'FRA',\n", + " 'Niue': 'NIU',\n", + " 'The Netherlands': 'NLD',\n", + " 'Netherlands': 'NLD',\n", + " 'Norway': 'NOR',\n", + " 'New Zealand': 'NZL',\n", + " 'Panama': 'PAN',\n", + " 'British Overseas Territory - Pitcairn': 'GBR',\n", + " 'Peru': 'PER',\n", + " 'Philippines': 'PHL',\n", + " 'Republic of Palau': 'PLW',\n", + " 'Poland': 'POL',\n", + " 'Portugal': 'PRT',\n", + " 'Qatar': 'QAT',\n", + " 'Russia': 'RUS',\n", + " 'Senegal': 'SEN',\n", + " 'Saint Helena, Ascension and Tristan da Cunha Overseas Territory of the United Kingdom of Great Britain and Northern Ireland': 'GBR',\n", + " 'Saint Helena, Ascension and Tristan da Cunha Overseas Teritory of the United Kingdom of Great Britain and Northern Ireland': 'GBR',\n", + " 'Solomon Islands': 'SLB',\n", + " 'El Salvador': 'SLV',\n", + " 'São Tomé and Príncipe': 'STP',\n", + " 'Suriname': 'SUR',\n", + " 'Slovenia': 'SVN',\n", + " 'Sweden': 'SWE',\n", + " 'Seychelles': 'SYC',\n", + " 'Turks and Caicos Islands': 'GBR',\n", + " 'Thailand': 'THA',\n", + " 'East Timor': 'TLS',\n", + " 'Tonga': 'TON',\n", + " 'Trinidad and Tobago': 'TTO',\n", + " 'Tunisia': 'TUN',\n", + " 'Tanzania': 'TZA',\n", + " 'Uruguay': 'URY',\n", + " 'Saint Vincent and the Grenadines': 'VCT',\n", + " 'Vietnam': 'VNM',\n", + " 'Yemen': 'YEM',\n", + " 'South Africa': 'ZAF',\n", + " 'USA; Haiti; Jamaica': 'USA;HTI;JAM',\n", + "}\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "# Add country code to the dataframe\n", + "def get_parent_iso(country):\n", + " return country_iso_dict.get(country, None)\n", + "\n", + "# Apply the function to create the 'PARENT_ISO' column\n", + "ps_gdf['parent_iso'] = ps_gdf['country'].apply(get_parent_iso)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "# There is one row with no parent_iso so let's give it the corresponding country code\n", + "ps_gdf.loc[ps_gdf['parent_iso'].isna(), 'parent_iso'] = 'FRA'" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
site_idsite_namecountrywdpa_idFPStotal_areageometryparent_isoFPS_cat
0AIAG10Low Bay SanctuaryAntigua and Barbuda5555871975.048.321285POLYGON ((-61.91090 17.57960, -61.91096 17.579...ATGhighly
1AIAG11Nelson's Dockyard National ParkAntigua and Barbuda5555871921.040.705369POLYGON ((-61.75807 17.03541, -61.73745 17.021...ATGless
\n", + "
" + ], + "text/plain": [ + " site_id site_name country wdpa_id \\\n", + "0 AIAG10 Low Bay Sanctuary Antigua and Barbuda 555587197 \n", + "1 AIAG11 Nelson's Dockyard National Park Antigua and Barbuda 555587192 \n", + "\n", + " FPS total_area geometry \\\n", + "0 5.0 48.321285 POLYGON ((-61.91090 17.57960, -61.91096 17.579... \n", + "1 1.0 40.705369 POLYGON ((-61.75807 17.03541, -61.73745 17.021... \n", + "\n", + " parent_iso FPS_cat \n", + "0 ATG highly \n", + "1 ATG less " + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Reclassify FPS values\n", + "fps_classes = {\n", + " 1: 'less',\n", + " 2: 'less',\n", + " 3: 'moderately',\n", + " 4: 'highly',\n", + " 5: 'highly'\n", + "}\n", + "\n", + "# Create a new column 'FPS_cat' based on the mapping\n", + "ps_gdf['FPS_cat'] = ps_gdf['FPS'].apply(lambda x: fps_classes.get(x, None))\n", + "ps_gdf.head(2)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "ps_gdf.to_file(path_out + \"protectedseas/protectedseas.shp\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Global stats" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "global_area = ps_gdf.groupby(['FPS_cat'], as_index=False)['total_area'].sum().rename(columns={'FPS_cat':'fishing_protection_level', 'total_area':'area'})\n", + "global_area['location_id'] = 'GLOB'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Country stats" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "# Create a mask for rows with multiple values in 'iso_code'\n", + "mask = ps_gdf['parent_iso'].str.contains(';', na=False)\n", + "\n", + "# Split the 'iso_code' values and create separate rows only for rows with multiple values\n", + "split_rows = ps_gdf[mask].copy()\n", + "split_rows['parent_iso'] = split_rows['parent_iso'].str.split(';')\n", + "split_rows = split_rows.explode('parent_iso')\n", + "\n", + "# Keep rows with single values in 'iso_code'\n", + "single_value_rows = ps_gdf[~mask]\n", + "\n", + "# Concatenate the exploded rows with the single value rows\n", + "ps_iso = pd.concat([single_value_rows, split_rows], ignore_index=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "# List of dictionaries for data in Region_ISO3_PP.txt (list of regions used in the Protected Planet database)\n", + "regions_data = [\n", + " {\n", + " 'region_iso': 'AS',\n", + " 'region_name': 'Asia & Pacific',\n", + " 'country_iso_3s': [\n", + " \"AFG\", \"ASM\", \"AUS\", \"BGD\", \"BRN\", \"BTN\", \"CCK\", \"CHN\", \"COK\", \"CXR\", \"FJI\", \"FSM\", \"GUM\", \"HKG\", \"IDN\",\n", + " \"IND\", \"IOT\", \"IRN\", \"JPN\", \"KHM\", \"KIR\", \"KOR\", \"LAO\", \"LKA\", \"MAC\", \"MDV\", \"MHL\", \"MMR\", \"MNG\", \"MNP\",\n", + " \"MYS\", \"NCL\", \"NFK\", \"NIU\", \"NPL\", \"NRU\", \"NZL\", \"PAK\", \"PCN\", \"PHL\", \"PLW\", \"PNG\", \"PRK\", \"PYF\", \"SGP\",\n", + " \"SLB\", \"THA\", \"TKL\", \"TLS\", \"TON\", \"TUV\", \"TWN\", \"VNM\", \"VUT\", \"WLF\", \"WSM\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'AF',\n", + " 'region_name': 'Africa',\n", + " 'country_iso_3s': [\n", + " \"AGO\", \"BDI\", \"BEN\", \"BFA\", \"BWA\", \"CAF\", \"CIV\", \"CMR\", \"COD\", \"COG\", \"COM\", \"CPV\", \"DJI\", \"DZA\", \"EGY\",\n", + " \"ERI\", \"ESH\", \"ETH\", \"GAB\", \"GHA\", \"GIN\", \"GMB\", \"GNB\", \"GNQ\", \"KEN\", \"LBR\", \"LBY\", \"LSO\", \"MAR\", \"MDG\",\n", + " \"MLI\", \"MOZ\", \"MRT\", \"MUS\", \"MWI\", \"MYT\", \"NAM\", \"NER\", \"NGA\", \"REU\", \"RWA\", \"SDN\", \"SEN\", \"SHN\", \"SLE\",\n", + " \"SOM\", \"SSD\", \"STP\", \"SWZ\", \"SYC\", \"TCD\", \"TGO\", \"TUN\", \"TZA\", \"UGA\", \"ZAF\", \"ZMB\", \"ZWE\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'EU',\n", + " 'region_name': 'Europe',\n", + " 'country_iso_3s': [\n", + " \"ALA\", \"ALB\", \"AND\", \"ARM\", \"AUT\", \"AZE\", \"BEL\", \"BGR\", \"BIH\", \"BLR\", \"CHE\", \"CYP\", \"CZE\", \"DEU\", \"DNK\",\n", + " \"ESP\", \"EST\", \"FIN\", \"FRA\", \"FRO\", \"GBR\", \"GEO\", \"GGY\", \"GIB\", \"GRC\", \"HRV\", \"HUN\", \"IMN\", \"IRL\", \"ISL\",\n", + " \"ISR\", \"ITA\", \"JEY\", \"KAZ\", \"KGZ\", \"LIE\", \"LTU\", \"LUX\", \"LVA\", \"MCO\", \"MDA\", \"MKD\", \"MLT\", \"MNE\", \"NLD\",\n", + " \"NOR\", \"POL\", \"PRT\", \"ROU\", \"RUS\", \"SJM\", \"SMR\", \"SRB\", \"SVK\", \"SVN\", \"SWE\", \"TJK\", \"TKM\", \"TUR\", \"UKR\",\n", + " \"UZB\", \"VAT\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'SA',\n", + " 'region_name': 'Latin America & Caribbean',\n", + " 'country_iso_3s': [\n", + " \"ABW\", \"AIA\", \"ARG\", \"ATG\", \"BES\", \"BHS\", \"BLM\", \"BLZ\", \"BMU\", \"BOL\", \"BRA\", \"BRB\", \"CHL\", \"COL\", \"CRI\",\n", + " \"CUB\", \"CUW\", \"CYM\", \"DMA\", \"DOM\", \"ECU\", \"FLK\", \"GLP\", \"GRD\", \"GTM\", \"GUF\", \"GUY\", \"HND\", \"HTI\", \"JAM\",\n", + " \"KNA\", \"LCA\", \"MAF\", \"MEX\", \"MSR\", \"MTQ\", \"NIC\", \"PAN\", \"PER\", \"PRI\", \"PRY\", \"SLV\", \"SUR\", \"SXM\", \"TCA\",\n", + " \"TTO\", \"UMI\", \"URY\", \"VCT\", \"VEN\", \"VGB\", \"VIR\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'PO',\n", + " 'region_name': 'Polar',\n", + " 'country_iso_3s': [\n", + " \"ATF\", \"BVT\", \"GRL\", \"HMD\", \"SGS\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'NA',\n", + " 'region_name': 'North America',\n", + " 'country_iso_3s': [\n", + " \"CAN\", \"SPM\", \"USA\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'WA',\n", + " 'region_name': 'West Asia',\n", + " 'country_iso_3s': [\n", + " \"ARE\", \"BHR\", \"IRQ\", \"JOR\", \"KWT\", \"LBN\", \"OMN\", \"PSE\", \"QAT\", \"SAU\", \"SYR\", \"YEM\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'AT', # this region is not in the Protected Planet database\n", + " 'region_name': 'Antartica',\n", + " 'country_iso_3s': [\n", + " \"ATA\"\n", + " ]\n", + " }\n", + "]\n", + "\n", + "# Convert the region data to a dictionary that maps each country to its region name\n", + "country_to_region = {}\n", + "for region in regions_data:\n", + " for country in region['country_iso_3s']:\n", + " country_to_region[country] = region['region_iso']\n", + "\n", + "# Create a new column 'region' based on the mapping\n", + "ps_iso['region'] = ps_iso['parent_iso'].map(country_to_region)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "country_area = ps_iso.groupby(['parent_iso', 'FPS_cat'], as_index=False)['total_area'].sum()\n", + "country_area = country_area.rename(columns={'parent_iso':'location_id', 'FPS_cat':'fishing_protection_level', 'total_area':'area'})" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "region_area = ps_iso.groupby(['region', 'FPS_cat'], as_index=False)['total_area'].sum()\n", + "region_area = region_area.rename(columns={'region':'location_id', 'FPS_cat':'fishing_protection_level', 'total_area':'area'})" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "ps_coverage = pd.concat([country_area, region_area, global_area], ignore_index=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "ps_coverage.to_csv(path_out + '/tables/fishing_protection_level.csv', index=False)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "skytruth", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/data/notebooks/wdpa_coverage.ipynb b/data/notebooks/wdpa_coverage.ipynb index 8b38f3bf..921a2d53 100644 --- a/data/notebooks/wdpa_coverage.ipynb +++ b/data/notebooks/wdpa_coverage.ipynb @@ -28,7 +28,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -40,12 +40,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ - "path_in = \"/Users/sofia/Documents/Repos/skytruth_30x30/data/raw\"\n", - "path_out = \"/Users/sofia/Documents/Repos/skytruth_30x30/data/processed\"" + "path_in = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/raw\"\n", + "path_out = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/processed\"" ] }, { @@ -57,7 +57,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -71,9 +71,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "6033\n", + "172\n", + "6033\n", + "172\n", + "6033\n", + "171\n" + ] + } + ], "source": [ "print(len(poly1))\n", "print(len(point1))\n", @@ -95,7 +108,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -114,15 +127,27 @@ " df = df[(df['REP_AREA'] != 0)]\n", " \n", " # Update the original dataframes in the list\n", - " dataframes[i] = df\n", - "\n" + " dataframes[i] = df" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "5999\n", + "157\n", + "6018\n", + "123\n", + "6014\n", + "135\n" + ] + } + ], "source": [ "print(len(dataframes[0]))\n", "print(len(dataframes[1]))\n", @@ -141,7 +166,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -181,14 +206,22 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 4. Merge the 6 datasets (polygons and buffered points) in a single layer and segregate those that are \"Proposed\" and those that are OECM" + "### 4. Merge the 6 datasets (polygons and buffered points) in a single layer and segregate those that are \"Proposed\"" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "All gdf have the same crs: EPSG:4326\n" + ] + } + ], "source": [ "# Check that all of them have the same crs\n", "first_crs = dataframes[0].crs\n", @@ -199,6 +232,28 @@ " print(\"gdf have different crs\")" ] }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "18445" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Merge dataframes\n", + "merged_mpa = pd.concat(dataframes)\n", + "len(merged_mpa)" + ] + }, { "cell_type": "code", "execution_count": null, @@ -211,7 +266,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -503,58 +558,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "**Countries per PARENT_ISO**" - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "metadata": {}, - "outputs": [], - "source": [ - "p2023 = gpd.read_file(path_out + \"/wdpa/timeseries/protected_dissolved_2023.shp\")" - ] - }, - { - "cell_type": "code", - "execution_count": 53, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array(['ATG', 'BRB', 'BRA', 'GBR', 'CHL', 'COL', 'CRI', 'DOM', 'ECU',\n", - " 'JAM', 'NLD', 'PER', 'PAN', 'SUR', 'VEN', 'USA', 'AUS', 'CAN',\n", - " 'FRA', 'IRN', 'JPN', 'KEN', 'KOR', 'MYS', 'MRT', 'MOZ', 'NOR',\n", - " 'PHL', 'POL', 'SAU', 'SEN', 'SWE', 'THA', 'TUN', 'CMR', 'IDN',\n", - " 'MUS', 'PRT', 'SYC', 'ISL', 'NZL', 'EST', 'GEO', 'UKR', 'MEX',\n", - " 'BHS', 'BLZ', 'GMB', 'MDG', 'HRV', 'FJI', 'LKA', 'ARG', 'ZAF',\n", - " 'PNG', 'TON', 'PLW', 'COK', 'BGD', 'AGO', 'ALB', 'DNK', 'ITA',\n", - " 'PAK', 'FIN', 'VNM', 'MMR', 'CHN', 'SGP', 'DEU', 'ROU', 'EGY',\n", - " 'SLB', 'VUT', 'BGR', 'MAR', 'MLT', 'DMA', 'LCA', 'OMN', 'GTM',\n", - " 'NIC', 'TTO', 'WSM', 'TZA', 'GRC', 'LBN', 'CUB', 'ISR', 'GRD',\n", - " 'VCT', 'BRN', 'ESP', 'JOR', 'ARE', 'HND', 'GNQ', 'KNA', 'LTU',\n", - " 'GNB', 'NGA', 'LVA', 'GUY', 'KAZ', 'BEL', 'GIN', 'IRL', 'RUS',\n", - " 'KHM', 'QAT', 'GAB', 'MDV', 'AZE', 'NAM', 'TUR', 'CPV', 'COG',\n", - " 'TUV', 'MCO', 'TKM', 'SVN', 'SLE', 'KIR', 'COM', 'NIU', 'FSM',\n", - " 'GHA', 'IOT', 'IND', 'LBR', 'CIV', 'SDN', 'SHN', 'SJM', 'UMI',\n", - " 'ATA', 'SYR', 'TLS', 'FRA;ITA;MCO', 'URY', 'ABNJ', 'NLD;DEU;DNK',\n", - " 'FIN;SWE', 'MHL', 'SLV', 'DZA', 'STP', 'YEM', 'COD', 'CYP', 'KWT',\n", - " 'HTI', 'MNE', 'BHR', 'LBY'], dtype=object)" - ] - }, - "execution_count": 53, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "p2023['PARENT_ISO'].unique()" + "### Global and country stats" ] }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -564,215 +573,55 @@ "# Create an empty list to store the results\n", "results_list = []\n", "\n", + "# Create a DataFrame to store the global coverage\n", + "global_coverage = pd.DataFrame(columns=['year', 'protection_type', 'location_id', 'cumsum_area'])\n", + "\n", "for year in years_range:\n", " filename = f'protected_dissolved_{year}.shp'\n", " file_path = os.path.join(folder_path, filename)\n", - " \n", + "\n", " if os.path.exists(file_path):\n", " gdf = gpd.read_file(file_path)\n", - " grouped = gdf.groupby('PARENT_ISO')['AREA'].sum().reset_index()\n", - " \n", - " # Create columns\n", - " grouped['year'] = year\n", - " grouped['protection_type'] = 'MPA+OECM'\n", - " grouped.rename(columns={'PARENT_ISO': 'location_id', 'AREA': 'cumsum_area'}, inplace=True)\n", - " \n", - " # Append the result to the list\n", - " results_list.append(grouped)\n", "\n", - "# Concatenate the list of results into a single DataFrame\n", - "final_df = pd.concat(results_list, ignore_index=True)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [], - "source": [ - "# Convert area to km2\n", - "final_df['cumsum_area'] =final_df['cumsum_area']/1000000" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Global**" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
location_idcumsum_areayearprotection_type
0ABNJ594174.662000MPA+OECM
1AGO0.422000MPA+OECM
2ALB103.052000MPA+OECM
3ARE78.522000MPA+OECM
4ARG6155.672000MPA+OECM
...............
3571GLOB28125365.962019MPA+OECM
3572GLOB29624663.842020MPA+OECM
3573GLOB29739178.772021MPA+OECM
3574GLOB29910678.772022MPA+OECM
3575GLOB29910724.212023MPA+OECM
\n", - "

3576 rows × 4 columns

\n", - "
" - ], - "text/plain": [ - " location_id cumsum_area year protection_type\n", - "0 ABNJ 594174.66 2000 MPA+OECM\n", - "1 AGO 0.42 2000 MPA+OECM\n", - "2 ALB 103.05 2000 MPA+OECM\n", - "3 ARE 78.52 2000 MPA+OECM\n", - "4 ARG 6155.67 2000 MPA+OECM\n", - "... ... ... ... ...\n", - "3571 GLOB 28125365.96 2019 MPA+OECM\n", - "3572 GLOB 29624663.84 2020 MPA+OECM\n", - "3573 GLOB 29739178.77 2021 MPA+OECM\n", - "3574 GLOB 29910678.77 2022 MPA+OECM\n", - "3575 GLOB 29910724.21 2023 MPA+OECM\n", - "\n", - "[3576 rows x 4 columns]" - ] - }, - "execution_count": 41, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Calculate global per year and append it\n", - "glob_df = final_df.groupby(['year', 'protection_type'])['cumsum_area'].sum().reset_index()\n", + " # Calculate global coverage for each year and protection type\n", + " global_area = gdf['AREA'].sum()\n", + " global_row = pd.DataFrame({'year': [year], 'protection_type': ['MPA+OECM'], 'location_id': ['GLOB'], 'cumsum_area': [global_area]})\n", + " global_coverage = pd.concat([global_coverage, global_row], ignore_index=True)\n", "\n", - "glob_df['location_id'] = 'GLOB'\n", + " # Split rows with multiple ISO codes into separate rows\n", + " processed_df = gdf.copy()\n", + " processed_df['PARENT_ISO'] = processed_df['PARENT_ISO'].str.split(';')\n", + " processed_df = processed_df.explode('PARENT_ISO')\n", "\n", - "final_df2 = pd.concat([final_df, glob_df], ignore_index=True)\n", - "final_df2" - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "metadata": {}, - "outputs": [], - "source": [ - "# Reorder the columns and add column last updated\n", - "final_df2 = final_df2[['location_id', 'year', 'protection_type', 'cumsum_area']]\n", + " # Group by 'PARENT_ISO' and aggregate area\n", + " iso_area = processed_df.groupby('PARENT_ISO')['AREA'].sum().reset_index()\n", "\n", - "current_date = datetime.now().strftime('%Y-%m-%d')\n", + " # Create columns to match BE table\n", + " iso_area['year'] = year\n", + " iso_area['protection_type'] = 'MPA+OECM'\n", + " iso_area.rename(columns={'PARENT_ISO': 'location_id', 'AREA': 'cumsum_area'}, inplace=True)\n", "\n", - "final_df2 = final_df2.copy()\n", - "final_df2['last_updated'] = current_date" + " # Append the result to the list\n", + " results_list.append(iso_area)\n", + "\n", + "# Concatenate the list of results into a single DataFrame and convert area to sq.km\n", + "final_df = pd.concat(results_list, ignore_index=True)\n", + "final_df['cumsum_area'] = final_df['cumsum_area'] / 1000000\n", + "\n", + "# Append global coverage to the final_df\n", + "final_df = pd.concat([final_df, global_coverage], ignore_index=True)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "**Regions**" + "### Regional stats" ] }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -834,16 +683,18 @@ " ]\n", " },\n", " {\n", - " 'region_iso': 'GL',\n", - " 'region_name': 'Global',\n", - " 'country_iso_3s': []\n", - " },\n", - " {\n", " 'region_iso': 'WA',\n", " 'region_name': 'West Asia',\n", " 'country_iso_3s': [\n", " \"ARE\", \"BHR\", \"IRQ\", \"JOR\", \"KWT\", \"LBN\", \"OMN\", \"PSE\", \"QAT\", \"SAU\", \"SYR\", \"YEM\"\n", " ]\n", + " },\n", + " {\n", + " 'region_iso': 'AT', # this region is not in the Protected Planet database\n", + " 'region_name': 'Antartica',\n", + " 'country_iso_3s': [\n", + " \"ATA\"\n", + " ]\n", " }\n", "]\n", "\n", @@ -851,12 +702,12 @@ "country_to_region = {}\n", "for region in regions_data:\n", " for country in region['country_iso_3s']:\n", - " country_to_region[country] = region['region_name']" + " country_to_region[country] = region['region_iso']" ] }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -881,52 +732,46 @@ " \n", " \n", " location_id\n", - " cumsum_area\n", " year\n", " protection_type\n", - " region\n", + " cumsum_area\n", " \n", " \n", " \n", " \n", " 0\n", - " ABNJ\n", - " 594174.66\n", + " AF\n", " 2000\n", " MPA+OECM\n", - " NaN\n", + " 94507.122820\n", " \n", " \n", " 1\n", - " AGO\n", - " 0.42\n", - " 2000\n", + " AF\n", + " 2001\n", " MPA+OECM\n", - " Africa\n", + " 94807.303100\n", " \n", " \n", " 2\n", - " ALB\n", - " 103.05\n", - " 2000\n", + " AF\n", + " 2002\n", " MPA+OECM\n", - " Europe\n", + " 102859.393938\n", " \n", " \n", " 3\n", - " ARE\n", - " 78.52\n", - " 2000\n", + " AF\n", + " 2003\n", " MPA+OECM\n", - " West Asia\n", + " 111143.352991\n", " \n", " \n", " 4\n", - " ARG\n", - " 6155.67\n", - " 2000\n", + " AF\n", + " 2004\n", " MPA+OECM\n", - " Latin America & Caribbean\n", + " 119137.635862\n", " \n", " \n", " ...\n", @@ -934,83 +779,139 @@ " ...\n", " ...\n", " ...\n", - " ...\n", " \n", " \n", - " 3547\n", - " VNM\n", - " 5036.97\n", - " 2023\n", + " 163\n", + " WA\n", + " 2019\n", " MPA+OECM\n", - " Asia & Pacific\n", + " 30618.254664\n", " \n", " \n", - " 3548\n", - " VUT\n", - " 83.83\n", - " 2023\n", + " 164\n", + " WA\n", + " 2020\n", " MPA+OECM\n", - " Asia & Pacific\n", + " 30624.636536\n", " \n", " \n", - " 3549\n", - " WSM\n", - " 199.59\n", - " 2023\n", + " 165\n", + " WA\n", + " 2021\n", " MPA+OECM\n", - " Asia & Pacific\n", + " 30624.636536\n", " \n", " \n", - " 3550\n", - " YEM\n", - " 4108.19\n", - " 2023\n", + " 166\n", + " WA\n", + " 2022\n", " MPA+OECM\n", - " West Asia\n", + " 31779.597984\n", " \n", " \n", - " 3551\n", - " ZAF\n", - " 242387.88\n", + " 167\n", + " WA\n", " 2023\n", " MPA+OECM\n", - " Africa\n", + " 31779.597984\n", " \n", " \n", "\n", - "

3552 rows × 5 columns

\n", + "

168 rows × 4 columns

\n", "" ], "text/plain": [ - " location_id cumsum_area year protection_type region\n", - "0 ABNJ 594174.66 2000 MPA+OECM NaN\n", - "1 AGO 0.42 2000 MPA+OECM Africa\n", - "2 ALB 103.05 2000 MPA+OECM Europe\n", - "3 ARE 78.52 2000 MPA+OECM West Asia\n", - "4 ARG 6155.67 2000 MPA+OECM Latin America & Caribbean\n", - "... ... ... ... ... ...\n", - "3547 VNM 5036.97 2023 MPA+OECM Asia & Pacific\n", - "3548 VUT 83.83 2023 MPA+OECM Asia & Pacific\n", - "3549 WSM 199.59 2023 MPA+OECM Asia & Pacific\n", - "3550 YEM 4108.19 2023 MPA+OECM West Asia\n", - "3551 ZAF 242387.88 2023 MPA+OECM Africa\n", + " location_id year protection_type cumsum_area\n", + "0 AF 2000 MPA+OECM 94507.122820\n", + "1 AF 2001 MPA+OECM 94807.303100\n", + "2 AF 2002 MPA+OECM 102859.393938\n", + "3 AF 2003 MPA+OECM 111143.352991\n", + "4 AF 2004 MPA+OECM 119137.635862\n", + ".. ... ... ... ...\n", + "163 WA 2019 MPA+OECM 30618.254664\n", + "164 WA 2020 MPA+OECM 30624.636536\n", + "165 WA 2021 MPA+OECM 30624.636536\n", + "166 WA 2022 MPA+OECM 31779.597984\n", + "167 WA 2023 MPA+OECM 31779.597984\n", "\n", - "[3552 rows x 5 columns]" + "[168 rows x 4 columns]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "regions = final_df.copy()\n", + "regions['location_id'] = regions['location_id'].map(country_to_region)\n", + "\n", + "# group by region and year to get sum of cumsum_area\n", + "regions = regions.groupby(['location_id', 'year', 'protection_type'])['cumsum_area'].sum().reset_index()\n", + "regions" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['AF', 'AS', 'AT', 'EU', 'NA', 'SA', 'WA'], dtype=object)" ] }, - "execution_count": 48, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "final_df['region'] = final_df['location_id'].map(country_to_region)\n", - "final_df" + "regions['location_id'].unique()" ] }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['ABNJ', 'AGO', 'ALB', 'ARE', 'ARG', 'ATA', 'ATG', 'AUS', 'AZE',\n", + " 'BEL', 'BGD', 'BGR', 'BHR', 'BHS', 'BLZ', 'BRA', 'BRB', 'BRN',\n", + " 'CAN', 'CHL', 'CHN', 'COD', 'COG', 'COK', 'COL', 'CRI', 'CUB',\n", + " 'CYP', 'DEU', 'DMA', 'DNK', 'DOM', 'ECU', 'EGY', 'ESP', 'EST',\n", + " 'FIN', 'FJI', 'FRA', 'FSM', 'GBR', 'GEO', 'GHA', 'GIN', 'GMB',\n", + " 'GNB', 'GNQ', 'GRC', 'GRD', 'GTM', 'HND', 'HRV', 'IDN', 'IRL',\n", + " 'IRN', 'ISL', 'ISR', 'ITA', 'JAM', 'JPN', 'KAZ', 'KEN', 'KHM',\n", + " 'KIR', 'KNA', 'KOR', 'LBN', 'LBY', 'LCA', 'LKA', 'LTU', 'LVA',\n", + " 'MAR', 'MCO', 'MDG', 'MDV', 'MEX', 'MHL', 'MLT', 'MMR', 'MNE',\n", + " 'MOZ', 'MRT', 'MUS', 'MYS', 'NAM', 'NGA', 'NIC', 'NIU', 'NLD',\n", + " 'NOR', 'NZL', 'OMN', 'PAK', 'PAN', 'PER', 'PHL', 'PLW', 'PNG',\n", + " 'POL', 'PRT', 'ROU', 'RUS', 'SAU', 'SDN', 'SEN', 'SLB', 'SLE',\n", + " 'SUR', 'SVN', 'SWE', 'SYC', 'SYR', 'THA', 'TKM', 'TLS', 'TON',\n", + " 'TTO', 'TUN', 'TUR', 'TUV', 'TZA', 'UKR', 'USA', 'VCT', 'VEN',\n", + " 'VNM', 'VUT', 'WSM', 'ZAF', 'DZA', 'IOT', 'GAB', 'IND', 'SGP',\n", + " 'LBR', 'CIV', 'CPV', 'SLV', 'QAT', 'STP', 'SHN', 'YEM', 'URY',\n", + " 'CMR', 'COM', 'KWT', 'SJM', 'GUY', 'UMI', 'HTI', 'JOR', 'GLOB',\n", + " 'AF', 'AS', 'AT', 'EU', 'NA', 'SA', 'WA'], dtype=object)" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "final_df2 = pd.concat([final_df, regions], ignore_index=True)\n", + "final_df2['location_id'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -1038,49 +939,49 @@ " cumsum_area\n", " year\n", " protection_type\n", - " region\n", + " last_updated\n", " \n", " \n", " \n", " \n", " 0\n", " ABNJ\n", - " 594174.66\n", + " 594174.659985\n", " 2000\n", " MPA+OECM\n", - " NaN\n", + " 2023-10-18\n", " \n", " \n", - " 5\n", - " ATA\n", - " 3594.42\n", + " 1\n", + " AGO\n", + " 0.415240\n", " 2000\n", " MPA+OECM\n", - " NaN\n", + " 2023-10-18\n", " \n", " \n", - " 37\n", - " FIN;SWE\n", - " 3541.14\n", + " 2\n", + " ALB\n", + " 103.048347\n", " 2000\n", " MPA+OECM\n", - " NaN\n", + " 2023-10-18\n", " \n", " \n", - " 131\n", - " ABNJ\n", - " 594174.66\n", - " 2001\n", + " 3\n", + " ARE\n", + " 78.516519\n", + " 2000\n", " MPA+OECM\n", - " NaN\n", + " 2023-10-18\n", " \n", " \n", - " 136\n", - " ATA\n", - " 3594.42\n", - " 2001\n", + " 4\n", + " ARG\n", + " 6155.668078\n", + " 2000\n", " MPA+OECM\n", - " NaN\n", + " 2023-10-18\n", " \n", " \n", " ...\n", @@ -1091,116 +992,83 @@ " ...\n", " \n", " \n", - " 3397\n", - " ABNJ\n", - " 2811451.69\n", - " 2023\n", + " 3677\n", + " WA\n", + " 30618.254664\n", + " 2019\n", " MPA+OECM\n", - " NaN\n", + " 2023-10-18\n", " \n", " \n", - " 3402\n", - " ATA\n", - " 3570.36\n", - " 2023\n", + " 3678\n", + " WA\n", + " 30624.636536\n", + " 2020\n", " MPA+OECM\n", - " NaN\n", + " 2023-10-18\n", " \n", " \n", - " 3439\n", - " FIN;SWE\n", - " 3541.14\n", - " 2023\n", + " 3679\n", + " WA\n", + " 30624.636536\n", + " 2021\n", " MPA+OECM\n", - " NaN\n", + " 2023-10-18\n", " \n", " \n", - " 3442\n", - " FRA;ITA;MCO\n", - " 87742.14\n", - " 2023\n", + " 3680\n", + " WA\n", + " 31779.597984\n", + " 2022\n", " MPA+OECM\n", - " NaN\n", + " 2023-10-18\n", " \n", " \n", - " 3502\n", - " NLD;DEU;DNK\n", - " 11550.01\n", + " 3681\n", + " WA\n", + " 31779.597984\n", " 2023\n", " MPA+OECM\n", - " NaN\n", + " 2023-10-18\n", " \n", " \n", "\n", - "

110 rows × 5 columns

\n", + "

3682 rows × 5 columns

\n", "" ], "text/plain": [ - " location_id cumsum_area year protection_type region\n", - "0 ABNJ 594174.66 2000 MPA+OECM NaN\n", - "5 ATA 3594.42 2000 MPA+OECM NaN\n", - "37 FIN;SWE 3541.14 2000 MPA+OECM NaN\n", - "131 ABNJ 594174.66 2001 MPA+OECM NaN\n", - "136 ATA 3594.42 2001 MPA+OECM NaN\n", - "... ... ... ... ... ...\n", - "3397 ABNJ 2811451.69 2023 MPA+OECM NaN\n", - "3402 ATA 3570.36 2023 MPA+OECM NaN\n", - "3439 FIN;SWE 3541.14 2023 MPA+OECM NaN\n", - "3442 FRA;ITA;MCO 87742.14 2023 MPA+OECM NaN\n", - "3502 NLD;DEU;DNK 11550.01 2023 MPA+OECM NaN\n", + " location_id cumsum_area year protection_type last_updated\n", + "0 ABNJ 594174.659985 2000 MPA+OECM 2023-10-18\n", + "1 AGO 0.415240 2000 MPA+OECM 2023-10-18\n", + "2 ALB 103.048347 2000 MPA+OECM 2023-10-18\n", + "3 ARE 78.516519 2000 MPA+OECM 2023-10-18\n", + "4 ARG 6155.668078 2000 MPA+OECM 2023-10-18\n", + "... ... ... ... ... ...\n", + "3677 WA 30618.254664 2019 MPA+OECM 2023-10-18\n", + "3678 WA 30624.636536 2020 MPA+OECM 2023-10-18\n", + "3679 WA 30624.636536 2021 MPA+OECM 2023-10-18\n", + "3680 WA 31779.597984 2022 MPA+OECM 2023-10-18\n", + "3681 WA 31779.597984 2023 MPA+OECM 2023-10-18\n", "\n", - "[110 rows x 5 columns]" - ] - }, - "execution_count": 49, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "final_df[final_df.region.isnull()]" - ] - }, - { - "cell_type": "code", - "execution_count": 55, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array(['ABNJ', 'AGO', 'ALB', 'ARE', 'ARG', 'ATA', 'ATG', 'AUS', 'AZE',\n", - " 'BEL', 'BGD', 'BGR', 'BHR', 'BHS', 'BLZ', 'BRA', 'BRB', 'BRN',\n", - " 'CAN', 'CHL', 'CHN', 'COD', 'COG', 'COK', 'COL', 'CRI', 'CUB',\n", - " 'CYP', 'DEU', 'DMA', 'DNK', 'DOM', 'ECU', 'EGY', 'ESP', 'EST',\n", - " 'FIN', 'FIN;SWE', 'FJI', 'FRA', 'FSM', 'GBR', 'GEO', 'GHA', 'GIN',\n", - " 'GMB', 'GNB', 'GNQ', 'GRC', 'GRD', 'GTM', 'HND', 'HRV', 'IDN',\n", - " 'IRL', 'IRN', 'ISL', 'ISR', 'ITA', 'JAM', 'JPN', 'KAZ', 'KEN',\n", - " 'KHM', 'KIR', 'KNA', 'KOR', 'LBN', 'LBY', 'LCA', 'LKA', 'LTU',\n", - " 'LVA', 'MAR', 'MCO', 'MDG', 'MDV', 'MEX', 'MHL', 'MLT', 'MMR',\n", - " 'MNE', 'MOZ', 'MRT', 'MUS', 'MYS', 'NAM', 'NGA', 'NIC', 'NIU',\n", - " 'NLD', 'NOR', 'NZL', 'OMN', 'PAK', 'PAN', 'PER', 'PHL', 'PLW',\n", - " 'PNG', 'POL', 'PRT', 'ROU', 'RUS', 'SAU', 'SDN', 'SEN', 'SLB',\n", - " 'SLE', 'SUR', 'SVN', 'SWE', 'SYC', 'SYR', 'THA', 'TKM', 'TLS',\n", - " 'TON', 'TTO', 'TUN', 'TUR', 'TUV', 'TZA', 'UKR', 'USA', 'VCT',\n", - " 'VEN', 'VNM', 'VUT', 'WSM', 'ZAF', 'DZA', 'FRA;ITA;MCO', 'IOT',\n", - " 'GAB', 'IND', 'SGP', 'LBR', 'CIV', 'CPV', 'SLV', 'QAT', 'STP',\n", - " 'SHN', 'YEM', 'NLD;DEU;DNK', 'URY', 'CMR', 'COM', 'KWT', 'SJM',\n", - " 'GUY', 'UMI', 'HTI', 'JOR'], dtype=object)" + "[3682 rows x 5 columns]" ] }, - "execution_count": 55, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "final_df['location_id'].unique() " + "current_date = datetime.now().strftime('%Y-%m-%d')\n", + "\n", + "final_df2 = final_df2.copy()\n", + "final_df2['last_updated'] = current_date\n", + "final_df2" ] }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [