diff --git a/data/notebooks/MPAtlas_table.ipynb b/data/notebooks/MPAtlas_table.ipynb
deleted file mode 100644
index 40a2613b..00000000
--- a/data/notebooks/MPAtlas_table.ipynb
+++ /dev/null
@@ -1,717 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Set up"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import geopandas as gpd\n",
-    "import pandas as pd"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "path_in = \"/Users/sofia/Documents/Repos/skytruth_30x30/data/raw\"\n",
-    "path_out = \"/Users/sofia/Documents/Repos/skytruth_30x30/data/processed/mpatlas\""
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### MPAtlas"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Read data\n",
-    "mpatlas = gpd.read_file(path_in + \"/MPAtlas_largest100.shp\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Index(['OBJECTID', 'WDPAID', 'WDPA_PID', 'NAME', 'English_De', 'PARENT_ISO',\n",
-       "       'ISO3', 'MPA_Marine', 'mpa_id', 'Zone_Marin', 'IUCN_Cat', 'Stage_of_E',\n",
-       "       'Distant_MP', 'Level_of_P', 'Most_Impac', 'Descrip_Im', 'Vertically',\n",
-       "       'SHAPE_Leng', 'SHAPE_Area', 'geometry'],\n",
-       "      dtype='object')"
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "mpatlas.columns"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array(['Incompatible', 'Highly', 'TBD', 'Fully', 'Lightly', 'Unknown'],\n",
-       "      dtype=object)"
-      ]
-     },
-     "execution_count": 12,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "mpatlas.Level_of_P.unique()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>OBJECTID</th>\n",
-       "      <th>WDPAID</th>\n",
-       "      <th>WDPA_PID</th>\n",
-       "      <th>NAME</th>\n",
-       "      <th>English_De</th>\n",
-       "      <th>PARENT_ISO</th>\n",
-       "      <th>ISO3</th>\n",
-       "      <th>MPA_Marine</th>\n",
-       "      <th>mpa_id</th>\n",
-       "      <th>Zone_Marin</th>\n",
-       "      <th>...</th>\n",
-       "      <th>Stage_of_E</th>\n",
-       "      <th>Distant_MP</th>\n",
-       "      <th>Level_of_P</th>\n",
-       "      <th>Most_Impac</th>\n",
-       "      <th>Descrip_Im</th>\n",
-       "      <th>Vertically</th>\n",
-       "      <th>SHAPE_Leng</th>\n",
-       "      <th>SHAPE_Area</th>\n",
-       "      <th>geometry</th>\n",
-       "      <th>P_LEVEL</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>478053.0</td>\n",
-       "      <td>478053</td>\n",
-       "      <td>Hikurangi Deep</td>\n",
-       "      <td>Benthic Protection Area</td>\n",
-       "      <td>NZL</td>\n",
-       "      <td>NZL</td>\n",
-       "      <td>54022.1</td>\n",
-       "      <td>5258</td>\n",
-       "      <td>54022.1</td>\n",
-       "      <td>...</td>\n",
-       "      <td>Implemented</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>Incompatible</td>\n",
-       "      <td>Mining, Fishing</td>\n",
-       "      <td>Benthic protections only. Deep sea mining allo...</td>\n",
-       "      <td>X</td>\n",
-       "      <td>12.332952</td>\n",
-       "      <td>5.833001</td>\n",
-       "      <td>POLYGON ((-175.00000 -42.16661, -175.00000 -42...</td>\n",
-       "      <td>Less Protected / Unknown</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>2.0</td>\n",
-       "      <td>555512062.0</td>\n",
-       "      <td>555512062</td>\n",
-       "      <td>Kermadec</td>\n",
-       "      <td>Benthic Protection Area</td>\n",
-       "      <td>NZL</td>\n",
-       "      <td>NZL</td>\n",
-       "      <td>619146.0</td>\n",
-       "      <td>5428</td>\n",
-       "      <td>458540.5</td>\n",
-       "      <td>...</td>\n",
-       "      <td>Implemented</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>Incompatible</td>\n",
-       "      <td>Mining, Fishing</td>\n",
-       "      <td>Benthic protections only. Deep sea mining allo...</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>25.629352</td>\n",
-       "      <td>42.963159</td>\n",
-       "      <td>POLYGON ((-174.02370 -29.22191, -174.02370 -29...</td>\n",
-       "      <td>Less Protected / Unknown</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>2 rows × 21 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   OBJECTID       WDPAID   WDPA_PID            NAME               English_De  \\\n",
-       "0       1.0     478053.0     478053  Hikurangi Deep  Benthic Protection Area   \n",
-       "1       2.0  555512062.0  555512062        Kermadec  Benthic Protection Area   \n",
-       "\n",
-       "  PARENT_ISO ISO3  MPA_Marine mpa_id  Zone_Marin  ...   Stage_of_E Distant_MP  \\\n",
-       "0        NZL  NZL     54022.1   5258     54022.1  ...  Implemented        NaN   \n",
-       "1        NZL  NZL    619146.0   5428    458540.5  ...  Implemented        NaN   \n",
-       "\n",
-       "     Level_of_P       Most_Impac  \\\n",
-       "0  Incompatible  Mining, Fishing   \n",
-       "1  Incompatible  Mining, Fishing   \n",
-       "\n",
-       "                                          Descrip_Im Vertically SHAPE_Leng  \\\n",
-       "0  Benthic protections only. Deep sea mining allo...          X  12.332952   \n",
-       "1  Benthic protections only. Deep sea mining allo...        NaN  25.629352   \n",
-       "\n",
-       "   SHAPE_Area                                           geometry  \\\n",
-       "0    5.833001  POLYGON ((-175.00000 -42.16661, -175.00000 -42...   \n",
-       "1   42.963159  POLYGON ((-174.02370 -29.22191, -174.02370 -29...   \n",
-       "\n",
-       "                    P_LEVEL  \n",
-       "0  Less Protected / Unknown  \n",
-       "1  Less Protected / Unknown  \n",
-       "\n",
-       "[2 rows x 21 columns]"
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# Create new column with protection level reclassified\n",
-    "def map_protection_level(value):\n",
-    "    if value in [\"Fully\", \"Highly\"]:\n",
-    "        return \"Fully / Highly Protected\"\n",
-    "    else:\n",
-    "        return \"Less Protected / Unknown\"\n",
-    "\n",
-    "# Create a new column based on column1\n",
-    "mpatlas['P_LEVEL'] = mpatlas['Level_of_P'].apply(map_protection_level)\n",
-    "mpatlas.head(2)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# List of dictionaries for data in Region_ISO3_PP.txt (list of regions used in the Protected Planet database)\n",
-    "regions_data = [\n",
-    "    {\n",
-    "        'region_iso': 'AS',\n",
-    "        'region_name': 'Asia & Pacific',\n",
-    "        'country_iso_3s': [\n",
-    "            \"AFG\", \"ASM\", \"AUS\", \"BGD\", \"BRN\", \"BTN\", \"CCK\", \"CHN\", \"COK\", \"CXR\", \"FJI\", \"FSM\", \"GUM\", \"HKG\", \"IDN\",\n",
-    "            \"IND\", \"IOT\", \"IRN\", \"JPN\", \"KHM\", \"KIR\", \"KOR\", \"LAO\", \"LKA\", \"MAC\", \"MDV\", \"MHL\", \"MMR\", \"MNG\", \"MNP\",\n",
-    "            \"MYS\", \"NCL\", \"NFK\", \"NIU\", \"NPL\", \"NRU\", \"NZL\", \"PAK\", \"PCN\", \"PHL\", \"PLW\", \"PNG\", \"PRK\", \"PYF\", \"SGP\",\n",
-    "            \"SLB\", \"THA\", \"TKL\", \"TLS\", \"TON\", \"TUV\", \"TWN\", \"VNM\", \"VUT\", \"WLF\", \"WSM\"\n",
-    "        ]\n",
-    "    },\n",
-    "    {\n",
-    "        'region_iso': 'AF',\n",
-    "        'region_name': 'Africa',\n",
-    "        'country_iso_3s': [\n",
-    "            \"AGO\", \"BDI\", \"BEN\", \"BFA\", \"BWA\", \"CAF\", \"CIV\", \"CMR\", \"COD\", \"COG\", \"COM\", \"CPV\", \"DJI\", \"DZA\", \"EGY\",\n",
-    "            \"ERI\", \"ESH\", \"ETH\", \"GAB\", \"GHA\", \"GIN\", \"GMB\", \"GNB\", \"GNQ\", \"KEN\", \"LBR\", \"LBY\", \"LSO\", \"MAR\", \"MDG\",\n",
-    "            \"MLI\", \"MOZ\", \"MRT\", \"MUS\", \"MWI\", \"MYT\", \"NAM\", \"NER\", \"NGA\", \"REU\", \"RWA\", \"SDN\", \"SEN\", \"SHN\", \"SLE\",\n",
-    "            \"SOM\", \"SSD\", \"STP\", \"SWZ\", \"SYC\", \"TCD\", \"TGO\", \"TUN\", \"TZA\", \"UGA\", \"ZAF\", \"ZMB\", \"ZWE\"\n",
-    "        ]\n",
-    "    },\n",
-    "    {\n",
-    "        'region_iso': 'EU',\n",
-    "        'region_name': 'Europe',\n",
-    "        'country_iso_3s': [\n",
-    "            \"ALA\", \"ALB\", \"AND\", \"ARM\", \"AUT\", \"AZE\", \"BEL\", \"BGR\", \"BIH\", \"BLR\", \"CHE\", \"CYP\", \"CZE\", \"DEU\", \"DNK\",\n",
-    "            \"ESP\", \"EST\", \"FIN\", \"FRA\", \"FRO\", \"GBR\", \"GEO\", \"GGY\", \"GIB\", \"GRC\", \"HRV\", \"HUN\", \"IMN\", \"IRL\", \"ISL\",\n",
-    "            \"ISR\", \"ITA\", \"JEY\", \"KAZ\", \"KGZ\", \"LIE\", \"LTU\", \"LUX\", \"LVA\", \"MCO\", \"MDA\", \"MKD\", \"MLT\", \"MNE\", \"NLD\",\n",
-    "            \"NOR\", \"POL\", \"PRT\", \"ROU\", \"RUS\", \"SJM\", \"SMR\", \"SRB\", \"SVK\", \"SVN\", \"SWE\", \"TJK\", \"TKM\", \"TUR\", \"UKR\",\n",
-    "            \"UZB\", \"VAT\"\n",
-    "        ]\n",
-    "    },\n",
-    "    {\n",
-    "        'region_iso': 'SA',\n",
-    "        'region_name': 'Latin America & Caribbean',\n",
-    "        'country_iso_3s': [\n",
-    "            \"ABW\", \"AIA\", \"ARG\", \"ATG\", \"BES\", \"BHS\", \"BLM\", \"BLZ\", \"BMU\", \"BOL\", \"BRA\", \"BRB\", \"CHL\", \"COL\", \"CRI\",\n",
-    "            \"CUB\", \"CUW\", \"CYM\", \"DMA\", \"DOM\", \"ECU\", \"FLK\", \"GLP\", \"GRD\", \"GTM\", \"GUF\", \"GUY\", \"HND\", \"HTI\", \"JAM\",\n",
-    "            \"KNA\", \"LCA\", \"MAF\", \"MEX\", \"MSR\", \"MTQ\", \"NIC\", \"PAN\", \"PER\", \"PRI\", \"PRY\", \"SLV\", \"SUR\", \"SXM\", \"TCA\",\n",
-    "            \"TTO\", \"UMI\", \"URY\", \"VCT\", \"VEN\", \"VGB\", \"VIR\"\n",
-    "        ]\n",
-    "    },\n",
-    "    {\n",
-    "        'region_iso': 'PO',\n",
-    "        'region_name': 'Polar',\n",
-    "        'country_iso_3s': [\n",
-    "            \"ATF\", \"BVT\", \"GRL\", \"HMD\", \"SGS\"\n",
-    "        ]\n",
-    "    },\n",
-    "    {\n",
-    "        'region_iso': 'NA',\n",
-    "        'region_name': 'North America',\n",
-    "        'country_iso_3s': [\n",
-    "            \"CAN\", \"SPM\", \"USA\"\n",
-    "        ]\n",
-    "    },\n",
-    "    {\n",
-    "        'region_iso': 'GL',\n",
-    "        'region_name': 'Global',\n",
-    "        'country_iso_3s': []\n",
-    "    },\n",
-    "    {\n",
-    "        'region_iso': 'WA',\n",
-    "        'region_name': 'West Asia',\n",
-    "        'country_iso_3s': [\n",
-    "            \"ARE\", \"BHR\", \"IRQ\", \"JOR\", \"KWT\", \"LBN\", \"OMN\", \"PSE\", \"QAT\", \"SAU\", \"SYR\", \"YEM\"\n",
-    "        ]\n",
-    "    }\n",
-    "]\n",
-    "\n",
-    "# Convert the region data to a dictionary that maps each country to its region name\n",
-    "country_to_region = {}\n",
-    "for region in regions_data:\n",
-    "    for country in region['country_iso_3s']:\n",
-    "        country_to_region[country] = region['region_name']"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>OBJECTID</th>\n",
-       "      <th>WDPAID</th>\n",
-       "      <th>WDPA_PID</th>\n",
-       "      <th>NAME</th>\n",
-       "      <th>English_De</th>\n",
-       "      <th>PARENT_ISO</th>\n",
-       "      <th>ISO3</th>\n",
-       "      <th>MPA_Marine</th>\n",
-       "      <th>mpa_id</th>\n",
-       "      <th>Zone_Marin</th>\n",
-       "      <th>...</th>\n",
-       "      <th>Distant_MP</th>\n",
-       "      <th>Level_of_P</th>\n",
-       "      <th>Most_Impac</th>\n",
-       "      <th>Descrip_Im</th>\n",
-       "      <th>Vertically</th>\n",
-       "      <th>SHAPE_Leng</th>\n",
-       "      <th>SHAPE_Area</th>\n",
-       "      <th>geometry</th>\n",
-       "      <th>P_LEVEL</th>\n",
-       "      <th>REGIONS</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>478053.0</td>\n",
-       "      <td>478053</td>\n",
-       "      <td>Hikurangi Deep</td>\n",
-       "      <td>Benthic Protection Area</td>\n",
-       "      <td>NZL</td>\n",
-       "      <td>NZL</td>\n",
-       "      <td>54022.1</td>\n",
-       "      <td>5258</td>\n",
-       "      <td>54022.1</td>\n",
-       "      <td>...</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>Incompatible</td>\n",
-       "      <td>Mining, Fishing</td>\n",
-       "      <td>Benthic protections only. Deep sea mining allo...</td>\n",
-       "      <td>X</td>\n",
-       "      <td>12.332952</td>\n",
-       "      <td>5.833001</td>\n",
-       "      <td>POLYGON ((-175.00000 -42.16661, -175.00000 -42...</td>\n",
-       "      <td>Less Protected / Unknown</td>\n",
-       "      <td>Asia &amp; Pacific</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>2.0</td>\n",
-       "      <td>555512062.0</td>\n",
-       "      <td>555512062</td>\n",
-       "      <td>Kermadec</td>\n",
-       "      <td>Benthic Protection Area</td>\n",
-       "      <td>NZL</td>\n",
-       "      <td>NZL</td>\n",
-       "      <td>619146.0</td>\n",
-       "      <td>5428</td>\n",
-       "      <td>458540.5</td>\n",
-       "      <td>...</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>Incompatible</td>\n",
-       "      <td>Mining, Fishing</td>\n",
-       "      <td>Benthic protections only. Deep sea mining allo...</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>25.629352</td>\n",
-       "      <td>42.963159</td>\n",
-       "      <td>POLYGON ((-174.02370 -29.22191, -174.02370 -29...</td>\n",
-       "      <td>Less Protected / Unknown</td>\n",
-       "      <td>Asia &amp; Pacific</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>2 rows × 22 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   OBJECTID       WDPAID   WDPA_PID            NAME               English_De  \\\n",
-       "0       1.0     478053.0     478053  Hikurangi Deep  Benthic Protection Area   \n",
-       "1       2.0  555512062.0  555512062        Kermadec  Benthic Protection Area   \n",
-       "\n",
-       "  PARENT_ISO ISO3  MPA_Marine mpa_id  Zone_Marin  ... Distant_MP  \\\n",
-       "0        NZL  NZL     54022.1   5258     54022.1  ...        NaN   \n",
-       "1        NZL  NZL    619146.0   5428    458540.5  ...        NaN   \n",
-       "\n",
-       "     Level_of_P       Most_Impac  \\\n",
-       "0  Incompatible  Mining, Fishing   \n",
-       "1  Incompatible  Mining, Fishing   \n",
-       "\n",
-       "                                          Descrip_Im Vertically SHAPE_Leng  \\\n",
-       "0  Benthic protections only. Deep sea mining allo...          X  12.332952   \n",
-       "1  Benthic protections only. Deep sea mining allo...        NaN  25.629352   \n",
-       "\n",
-       "  SHAPE_Area                                           geometry  \\\n",
-       "0   5.833001  POLYGON ((-175.00000 -42.16661, -175.00000 -42...   \n",
-       "1  42.963159  POLYGON ((-174.02370 -29.22191, -174.02370 -29...   \n",
-       "\n",
-       "                    P_LEVEL         REGIONS  \n",
-       "0  Less Protected / Unknown  Asia & Pacific  \n",
-       "1  Less Protected / Unknown  Asia & Pacific  \n",
-       "\n",
-       "[2 rows x 22 columns]"
-      ]
-     },
-     "execution_count": 10,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "mpatlas['REGIONS'] = mpatlas['ISO3'].map(country_to_region)\n",
-    "mpatlas.head(2)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>WDPAID</th>\n",
-       "      <th>WDPA_PID</th>\n",
-       "      <th>NAME</th>\n",
-       "      <th>AREA_MPATLAS</th>\n",
-       "      <th>DESIG_ENG</th>\n",
-       "      <th>ESTABLISHMENT</th>\n",
-       "      <th>IMPACT</th>\n",
-       "      <th>P_LEVEL</th>\n",
-       "      <th>PARENT_ISO</th>\n",
-       "      <th>ISO3</th>\n",
-       "      <th>REGIONS</th>\n",
-       "      <th>geometry</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>478053.0</td>\n",
-       "      <td>478053</td>\n",
-       "      <td>Hikurangi Deep</td>\n",
-       "      <td>54022.1</td>\n",
-       "      <td>Benthic Protection Area</td>\n",
-       "      <td>Implemented</td>\n",
-       "      <td>Mining, Fishing</td>\n",
-       "      <td>Less Protected / Unknown</td>\n",
-       "      <td>NZL</td>\n",
-       "      <td>NZL</td>\n",
-       "      <td>Asia &amp; Pacific</td>\n",
-       "      <td>POLYGON ((-175.00000 -42.16661, -175.00000 -42...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>555512062.0</td>\n",
-       "      <td>555512062</td>\n",
-       "      <td>Kermadec</td>\n",
-       "      <td>458540.5</td>\n",
-       "      <td>Benthic Protection Area</td>\n",
-       "      <td>Implemented</td>\n",
-       "      <td>Mining, Fishing</td>\n",
-       "      <td>Less Protected / Unknown</td>\n",
-       "      <td>NZL</td>\n",
-       "      <td>NZL</td>\n",
-       "      <td>Asia &amp; Pacific</td>\n",
-       "      <td>POLYGON ((-174.02370 -29.22191, -174.02370 -29...</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "        WDPAID   WDPA_PID            NAME  AREA_MPATLAS  \\\n",
-       "0     478053.0     478053  Hikurangi Deep       54022.1   \n",
-       "1  555512062.0  555512062        Kermadec      458540.5   \n",
-       "\n",
-       "                 DESIG_ENG ESTABLISHMENT           IMPACT  \\\n",
-       "0  Benthic Protection Area   Implemented  Mining, Fishing   \n",
-       "1  Benthic Protection Area   Implemented  Mining, Fishing   \n",
-       "\n",
-       "                    P_LEVEL PARENT_ISO ISO3         REGIONS  \\\n",
-       "0  Less Protected / Unknown        NZL  NZL  Asia & Pacific   \n",
-       "1  Less Protected / Unknown        NZL  NZL  Asia & Pacific   \n",
-       "\n",
-       "                                            geometry  \n",
-       "0  POLYGON ((-175.00000 -42.16661, -175.00000 -42...  \n",
-       "1  POLYGON ((-174.02370 -29.22191, -174.02370 -29...  "
-      ]
-     },
-     "execution_count": 11,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# Rename columns and keep only relevant ones. \n",
-    "# Note: We keep \"Zone_Marine\" (area of the geometry), instead of \"MPA_Marine\" (as MPAs can be divided in smaller pieces according to their protection levels)\n",
-    "\n",
-    "mpatlas = mpatlas.rename(columns={'English_De': 'DESIG_ENG', 'Zone_Marin': 'AREA_MPATLAS', 'Stage_of_E': 'ESTABLISHMENT', 'Most_Impac': 'IMPACT' }) \n",
-    "mpatlas2 = mpatlas[['WDPAID', 'WDPA_PID', 'NAME', 'AREA_MPATLAS', 'DESIG_ENG', 'ESTABLISHMENT', 'IMPACT', 'P_LEVEL', 'PARENT_ISO', 'ISO3','REGIONS', 'geometry']]\n",
-    "mpatlas2.head(2)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/var/folders/98/0pdnjc5s29x2pnzl293pw7hr0000gn/T/ipykernel_25742/67511564.py:1: UserWarning: Column names longer than 10 characters will be truncated when saved to ESRI Shapefile.\n",
-      "  mpatlas2.to_file(path_out + \"/mpatlas_table.shp\")\n"
-     ]
-    }
-   ],
-   "source": [
-    "mpatlas2.to_file(path_out + \"/mpatlas_table.shp\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>WDPAID</th>\n",
-       "      <th>WDPA_PID</th>\n",
-       "      <th>NAME</th>\n",
-       "      <th>AREA_MPATL</th>\n",
-       "      <th>DESIG_ENG</th>\n",
-       "      <th>ESTABLISHM</th>\n",
-       "      <th>IMPACT</th>\n",
-       "      <th>P_LEVEL</th>\n",
-       "      <th>PARENT_ISO</th>\n",
-       "      <th>ISO3</th>\n",
-       "      <th>REGIONS</th>\n",
-       "      <th>geometry</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>478053.0</td>\n",
-       "      <td>478053</td>\n",
-       "      <td>Hikurangi Deep</td>\n",
-       "      <td>54022.1</td>\n",
-       "      <td>Benthic Protection Area</td>\n",
-       "      <td>Implemented</td>\n",
-       "      <td>Mining, Fishing</td>\n",
-       "      <td>Less Protected / Unknown</td>\n",
-       "      <td>NZL</td>\n",
-       "      <td>NZL</td>\n",
-       "      <td>Asia &amp; Pacific</td>\n",
-       "      <td>POLYGON ((-175.00000 -42.16661, -175.00000 -42...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>555512062.0</td>\n",
-       "      <td>555512062</td>\n",
-       "      <td>Kermadec</td>\n",
-       "      <td>458540.5</td>\n",
-       "      <td>Benthic Protection Area</td>\n",
-       "      <td>Implemented</td>\n",
-       "      <td>Mining, Fishing</td>\n",
-       "      <td>Less Protected / Unknown</td>\n",
-       "      <td>NZL</td>\n",
-       "      <td>NZL</td>\n",
-       "      <td>Asia &amp; Pacific</td>\n",
-       "      <td>POLYGON ((-174.02370 -29.22191, -174.02370 -29...</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "        WDPAID   WDPA_PID            NAME  AREA_MPATL  \\\n",
-       "0     478053.0     478053  Hikurangi Deep     54022.1   \n",
-       "1  555512062.0  555512062        Kermadec    458540.5   \n",
-       "\n",
-       "                 DESIG_ENG   ESTABLISHM           IMPACT  \\\n",
-       "0  Benthic Protection Area  Implemented  Mining, Fishing   \n",
-       "1  Benthic Protection Area  Implemented  Mining, Fishing   \n",
-       "\n",
-       "                    P_LEVEL PARENT_ISO ISO3         REGIONS  \\\n",
-       "0  Less Protected / Unknown        NZL  NZL  Asia & Pacific   \n",
-       "1  Less Protected / Unknown        NZL  NZL  Asia & Pacific   \n",
-       "\n",
-       "                                            geometry  \n",
-       "0  POLYGON ((-175.00000 -42.16661, -175.00000 -42...  \n",
-       "1  POLYGON ((-174.02370 -29.22191, -174.02370 -29...  "
-      ]
-     },
-     "execution_count": 13,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "mpatlas = gpd.read_file(path_out + \"/mpatlas_table.shp\")\n",
-    "mpatlas.head(2)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "skytruth",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.4"
-  },
-  "orig_nbformat": 4
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/data/notebooks/habitats.ipynb b/data/notebooks/habitats.ipynb
new file mode 100644
index 00000000..1185d367
--- /dev/null
+++ b/data/notebooks/habitats.ipynb
@@ -0,0 +1,2003 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Set up"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Collecting openpyxl\n",
+      "  Downloading openpyxl-3.1.2-py2.py3-none-any.whl (249 kB)\n",
+      "\u001b[2K     \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m250.0/250.0 kB\u001b[0m \u001b[31m1.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m[36m0:00:01\u001b[0m[36m0:00:01\u001b[0m:01\u001b[0m\n",
+      "\u001b[?25hCollecting et-xmlfile (from openpyxl)\n",
+      "  Downloading et_xmlfile-1.1.0-py3-none-any.whl (4.7 kB)\n",
+      "Installing collected packages: et-xmlfile, openpyxl\n",
+      "Successfully installed et-xmlfile-1.1.0 openpyxl-3.1.2\n"
+     ]
+    }
+   ],
+   "source": [
+    "!pip install openpyxl"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import geopandas as gpd\n",
+    "import pandas as pd\n",
+    "import openpyxl\n",
+    "from datetime import datetime"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "path_in = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/raw/\"\n",
+    "path_out = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/processed/\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Process habitats from [Ocean+](https://habitats.oceanplus.org/) (except mangroves)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cold = pd.read_csv(path_in + \"Ocean+HabitatsDownload_Global/coldwatercorals.csv\")\n",
+    "salt = pd.read_csv(path_in + \"Ocean+HabitatsDownload_Global/saltmarshes.csv\")\n",
+    "sea = pd.read_csv(path_in + \"Ocean+HabitatsDownload_Global/seagrasses.csv\")\n",
+    "warm = pd.read_csv(path_in + \"Ocean+HabitatsDownload_Global/warmwatercorals.csv\")\n",
+    "glob = pd.read_excel(path_in + \"Ocean+HabitatsDownload_Global/global-stats.xlsx\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cold2 = cold[['ISO3', 'protected_area', 'total_area']].rename(columns={'ISO3': 'location_id'})\n",
+    "salt2 = salt[['ISO3', 'protected_area', 'total_area']].rename(columns={'ISO3': 'location_id'})\n",
+    "sea2 = sea[['ISO3', 'protected_area', 'total_area']].rename(columns={'ISO3': 'location_id'})\n",
+    "warm2 = warm[['ISO3', 'protected_area', 'total_area']].rename(columns={'ISO3': 'location_id'})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Remove rows with '-' in 'protected_area' or 'total_area'\n",
+    "cold2 = cold2[~cold2['protected_area'].str.contains('-') & ~cold2['total_area'].str.contains('-')]\n",
+    "salt2 = salt2[~salt2['protected_area'].str.contains('-') & ~salt2['total_area'].str.contains('-')]\n",
+    "sea2 = sea2[~sea2['protected_area'].str.contains('-') & ~sea2['total_area'].str.contains('-')]\n",
+    "warm2 = warm2[~warm2['protected_area'].str.contains('-') & ~warm2['total_area'].str.contains('-')]\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Bring the wdpa file to get the iso3 and parent_iso equivalences\n",
+    "wdpa = gpd.read_file(path_out + \"wdpa/merged_mpa.shp\")\n",
+    "\n",
+    "# Filter out rows with multiple values in either 'ISO3' or 'PARENT_ISO'\n",
+    "wdpa = wdpa[~wdpa['ISO3'].str.contains(';') & ~wdpa['PARENT_ISO'].str.contains(';')]\n",
+    "\n",
+    "# Extract unique ISO3-PARENT_ISO pairs\n",
+    "unique_pairs = wdpa[['ISO3', 'PARENT_ISO']].drop_duplicates()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create a mapping dictionary for ISO3-PARENT_ISO pairs and modify the 'location_id' column in the habitats dataframes\n",
+    "mapping_dict = dict(zip(unique_pairs['ISO3'], unique_pairs['PARENT_ISO']))\n",
+    "cold2['location_id'] = cold2['location_id'].map(mapping_dict)\n",
+    "salt2['location_id'] = salt2['location_id'].map(mapping_dict)\n",
+    "sea2['location_id'] = sea2['location_id'].map(mapping_dict)\n",
+    "warm2['location_id'] = warm2['location_id'].map(mapping_dict)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Group by 'location_id' and calculate the sum of 'protected_area' and 'total_area'\n",
+    "cold2_grouped = cold2.groupby('location_id').sum().reset_index()\n",
+    "salt2_grouped = salt2.groupby('location_id').sum().reset_index()\n",
+    "sea2_grouped = sea2.groupby('location_id').sum().reset_index()\n",
+    "warm2_grouped = warm2.groupby('location_id').sum().reset_index()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Add the 'habitat_name' column\n",
+    "cold2_grouped['habitat_name'] = 'cold-water corals'\n",
+    "salt2_grouped['habitat_name'] = 'saltmarshes'\n",
+    "sea2_grouped['habitat_name'] = 'seagrasses'\n",
+    "warm2_grouped['habitat_name'] = 'warm-water corals'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>location_id</th>\n",
+       "      <th>protected_area</th>\n",
+       "      <th>total_area</th>\n",
+       "      <th>habitat_name</th>\n",
+       "      <th>year</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>ABNJ</td>\n",
+       "      <td>421.629372679904</td>\n",
+       "      <td>1874.98221422617</td>\n",
+       "      <td>cold-water corals</td>\n",
+       "      <td>2023</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>AGO</td>\n",
+       "      <td>0</td>\n",
+       "      <td>3.39567053773998</td>\n",
+       "      <td>cold-water corals</td>\n",
+       "      <td>2023</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  location_id    protected_area        total_area       habitat_name  year\n",
+       "0        ABNJ  421.629372679904  1874.98221422617  cold-water corals  2023\n",
+       "1         AGO                 0  3.39567053773998  cold-water corals  2023"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Concatenate the dataframes\n",
+    "habitats = pd.concat([cold2_grouped, salt2_grouped, sea2_grouped, warm2_grouped])\n",
+    "habitats['year'] = datetime.now().year\n",
+    "habitats.head(2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>habitat_name</th>\n",
+       "      <th>protected_area</th>\n",
+       "      <th>total_area</th>\n",
+       "      <th>location_id</th>\n",
+       "      <th>year</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>saltmarsh</td>\n",
+       "      <td>111638.252564</td>\n",
+       "      <td>224435.075094</td>\n",
+       "      <td>GLOB</td>\n",
+       "      <td>2023</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>seagrass</td>\n",
+       "      <td>74787.449960</td>\n",
+       "      <td>314001.940600</td>\n",
+       "      <td>GLOB</td>\n",
+       "      <td>2023</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>warmwater-corals</td>\n",
+       "      <td>63259.499130</td>\n",
+       "      <td>149886.974126</td>\n",
+       "      <td>GLOB</td>\n",
+       "      <td>2023</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>coldwater-corals</td>\n",
+       "      <td>4400.140842</td>\n",
+       "      <td>15336.975280</td>\n",
+       "      <td>GLOB</td>\n",
+       "      <td>2023</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       habitat_name  protected_area     total_area location_id  year\n",
+       "0         saltmarsh   111638.252564  224435.075094        GLOB  2023\n",
+       "1          seagrass    74787.449960  314001.940600        GLOB  2023\n",
+       "2  warmwater-corals    63259.499130  149886.974126        GLOB  2023\n",
+       "4  coldwater-corals     4400.140842   15336.975280        GLOB  2023"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Calculate global stats for habitats\n",
+    "habitats_global = glob[['habitat','protected_area', 'total_area']].rename(columns={'habitat': 'habitat_name'})\n",
+    "habitats_global['location_id'] = 'GLOB'\n",
+    "habitats_global['year'] = datetime.now().year\n",
+    "habitats_global = habitats_global[habitats_global['habitat_name'] != 'mangroves'] # remove mangroves\n",
+    "habitats_global"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>habitat_name</th>\n",
+       "      <th>protected_area</th>\n",
+       "      <th>total_area</th>\n",
+       "      <th>location_id</th>\n",
+       "      <th>year</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>saltmarshes</td>\n",
+       "      <td>111638.252564</td>\n",
+       "      <td>224435.075094</td>\n",
+       "      <td>GLOB</td>\n",
+       "      <td>2023</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>seagrasses</td>\n",
+       "      <td>74787.449960</td>\n",
+       "      <td>314001.940600</td>\n",
+       "      <td>GLOB</td>\n",
+       "      <td>2023</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>warm-water corals</td>\n",
+       "      <td>63259.499130</td>\n",
+       "      <td>149886.974126</td>\n",
+       "      <td>GLOB</td>\n",
+       "      <td>2023</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>cold-water corals</td>\n",
+       "      <td>4400.140842</td>\n",
+       "      <td>15336.975280</td>\n",
+       "      <td>GLOB</td>\n",
+       "      <td>2023</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "        habitat_name  protected_area     total_area location_id  year\n",
+       "0        saltmarshes   111638.252564  224435.075094        GLOB  2023\n",
+       "1         seagrasses    74787.449960  314001.940600        GLOB  2023\n",
+       "2  warm-water corals    63259.499130  149886.974126        GLOB  2023\n",
+       "4  cold-water corals     4400.140842   15336.975280        GLOB  2023"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Change the name of the habitats to match the ones in the habitats dataframe\n",
+    "habitat_name_mapping = {\n",
+    "    'saltmarsh': 'saltmarshes',\n",
+    "    'seagrass': 'seagrasses',\n",
+    "    'warmwater-corals': 'warm-water corals',\n",
+    "    'coldwater-corals': 'cold-water corals'\n",
+    "}\n",
+    "habitats_global['habitat_name'] = habitats_global['habitat_name'].replace(habitat_name_mapping)\n",
+    "habitats_global"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array(['cold-water corals', 'saltmarshes', 'seagrasses',\n",
+       "       'warm-water corals'], dtype=object)"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Concatenate the global stats to the habitats dataframe\n",
+    "habitats = pd.concat([habitats, habitats_global])\n",
+    "habitats['habitat_name'].unique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# List of dictionaries for data in Region_ISO3_PP.txt (list of regions used in the Protected Planet database)\n",
+    "regions_data = [\n",
+    "    {\n",
+    "        'region_iso': 'AS',\n",
+    "        'region_name': 'Asia & Pacific',\n",
+    "        'country_iso_3s': [\n",
+    "            \"AFG\", \"ASM\", \"AUS\", \"BGD\", \"BRN\", \"BTN\", \"CCK\", \"CHN\", \"COK\", \"CXR\", \"FJI\", \"FSM\", \"GUM\", \"HKG\", \"IDN\",\n",
+    "            \"IND\", \"IOT\", \"IRN\", \"JPN\", \"KHM\", \"KIR\", \"KOR\", \"LAO\", \"LKA\", \"MAC\", \"MDV\", \"MHL\", \"MMR\", \"MNG\", \"MNP\",\n",
+    "            \"MYS\", \"NCL\", \"NFK\", \"NIU\", \"NPL\", \"NRU\", \"NZL\", \"PAK\", \"PCN\", \"PHL\", \"PLW\", \"PNG\", \"PRK\", \"PYF\", \"SGP\",\n",
+    "            \"SLB\", \"THA\", \"TKL\", \"TLS\", \"TON\", \"TUV\", \"TWN\", \"VNM\", \"VUT\", \"WLF\", \"WSM\"\n",
+    "        ]\n",
+    "    },\n",
+    "    {\n",
+    "        'region_iso': 'AF',\n",
+    "        'region_name': 'Africa',\n",
+    "        'country_iso_3s': [\n",
+    "            \"AGO\", \"BDI\", \"BEN\", \"BFA\", \"BWA\", \"CAF\", \"CIV\", \"CMR\", \"COD\", \"COG\", \"COM\", \"CPV\", \"DJI\", \"DZA\", \"EGY\",\n",
+    "            \"ERI\", \"ESH\", \"ETH\", \"GAB\", \"GHA\", \"GIN\", \"GMB\", \"GNB\", \"GNQ\", \"KEN\", \"LBR\", \"LBY\", \"LSO\", \"MAR\", \"MDG\",\n",
+    "            \"MLI\", \"MOZ\", \"MRT\", \"MUS\", \"MWI\", \"MYT\", \"NAM\", \"NER\", \"NGA\", \"REU\", \"RWA\", \"SDN\", \"SEN\", \"SHN\", \"SLE\",\n",
+    "            \"SOM\", \"SSD\", \"STP\", \"SWZ\", \"SYC\", \"TCD\", \"TGO\", \"TUN\", \"TZA\", \"UGA\", \"ZAF\", \"ZMB\", \"ZWE\"\n",
+    "        ]\n",
+    "    },\n",
+    "    {\n",
+    "        'region_iso': 'EU',\n",
+    "        'region_name': 'Europe',\n",
+    "        'country_iso_3s': [\n",
+    "            \"ALA\", \"ALB\", \"AND\", \"ARM\", \"AUT\", \"AZE\", \"BEL\", \"BGR\", \"BIH\", \"BLR\", \"CHE\", \"CYP\", \"CZE\", \"DEU\", \"DNK\",\n",
+    "            \"ESP\", \"EST\", \"FIN\", \"FRA\", \"FRO\", \"GBR\", \"GEO\", \"GGY\", \"GIB\", \"GRC\", \"HRV\", \"HUN\", \"IMN\", \"IRL\", \"ISL\",\n",
+    "            \"ISR\", \"ITA\", \"JEY\", \"KAZ\", \"KGZ\", \"LIE\", \"LTU\", \"LUX\", \"LVA\", \"MCO\", \"MDA\", \"MKD\", \"MLT\", \"MNE\", \"NLD\",\n",
+    "            \"NOR\", \"POL\", \"PRT\", \"ROU\", \"RUS\", \"SJM\", \"SMR\", \"SRB\", \"SVK\", \"SVN\", \"SWE\", \"TJK\", \"TKM\", \"TUR\", \"UKR\",\n",
+    "            \"UZB\", \"VAT\"\n",
+    "        ]\n",
+    "    },\n",
+    "    {\n",
+    "        'region_iso': 'SA',\n",
+    "        'region_name': 'Latin America & Caribbean',\n",
+    "        'country_iso_3s': [\n",
+    "            \"ABW\", \"AIA\", \"ARG\", \"ATG\", \"BES\", \"BHS\", \"BLM\", \"BLZ\", \"BMU\", \"BOL\", \"BRA\", \"BRB\", \"CHL\", \"COL\", \"CRI\",\n",
+    "            \"CUB\", \"CUW\", \"CYM\", \"DMA\", \"DOM\", \"ECU\", \"FLK\", \"GLP\", \"GRD\", \"GTM\", \"GUF\", \"GUY\", \"HND\", \"HTI\", \"JAM\",\n",
+    "            \"KNA\", \"LCA\", \"MAF\", \"MEX\", \"MSR\", \"MTQ\", \"NIC\", \"PAN\", \"PER\", \"PRI\", \"PRY\", \"SLV\", \"SUR\", \"SXM\", \"TCA\",\n",
+    "            \"TTO\", \"UMI\", \"URY\", \"VCT\", \"VEN\", \"VGB\", \"VIR\"\n",
+    "        ]\n",
+    "    },\n",
+    "    {\n",
+    "        'region_iso': 'PO',\n",
+    "        'region_name': 'Polar',\n",
+    "        'country_iso_3s': [\n",
+    "            \"ATF\", \"BVT\", \"GRL\", \"HMD\", \"SGS\"\n",
+    "        ]\n",
+    "    },\n",
+    "    {\n",
+    "        'region_iso': 'NA',\n",
+    "        'region_name': 'North America',\n",
+    "        'country_iso_3s': [\n",
+    "            \"CAN\", \"SPM\", \"USA\"\n",
+    "        ]\n",
+    "    },\n",
+    "    \n",
+    "    {\n",
+    "        'region_iso': 'WA',\n",
+    "        'region_name': 'West Asia',\n",
+    "        'country_iso_3s': [\n",
+    "            \"ARE\", \"BHR\", \"IRQ\", \"JOR\", \"KWT\", \"LBN\", \"OMN\", \"PSE\", \"QAT\", \"SAU\", \"SYR\", \"YEM\"\n",
+    "        ]\n",
+    "    },\n",
+    "    {\n",
+    "        'region_iso': 'AT', # this region is not in the Protected Planet database\n",
+    "        'region_name': 'Antartica',\n",
+    "        'country_iso_3s': [\n",
+    "            \"ATA\"\n",
+    "        ]\n",
+    "    }\n",
+    "]\n",
+    "\n",
+    "# Convert the region data to a dictionary that maps each country to its region name\n",
+    "country_to_region = {}\n",
+    "for region in regions_data:\n",
+    "    for country in region['country_iso_3s']:\n",
+    "        country_to_region[country] = region['region_iso']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>location_id</th>\n",
+       "      <th>habitat_name</th>\n",
+       "      <th>protected_area</th>\n",
+       "      <th>total_area</th>\n",
+       "      <th>year</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>AF</td>\n",
+       "      <td>cold-water corals</td>\n",
+       "      <td>37.761626</td>\n",
+       "      <td>381.993234</td>\n",
+       "      <td>2023</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>AF</td>\n",
+       "      <td>saltmarshes</td>\n",
+       "      <td>6688.702879</td>\n",
+       "      <td>19845.915000</td>\n",
+       "      <td>2023</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>AF</td>\n",
+       "      <td>seagrasses</td>\n",
+       "      <td>6319.099491</td>\n",
+       "      <td>61939.484904</td>\n",
+       "      <td>2023</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>AF</td>\n",
+       "      <td>warm-water corals</td>\n",
+       "      <td>6591.340083</td>\n",
+       "      <td>15216.393947</td>\n",
+       "      <td>2023</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>AS</td>\n",
+       "      <td>cold-water corals</td>\n",
+       "      <td>263.251498</td>\n",
+       "      <td>1332.225080</td>\n",
+       "      <td>2023</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>AS</td>\n",
+       "      <td>saltmarshes</td>\n",
+       "      <td>11721.439539</td>\n",
+       "      <td>39229.888860</td>\n",
+       "      <td>2023</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>AS</td>\n",
+       "      <td>seagrasses</td>\n",
+       "      <td>28942.705660</td>\n",
+       "      <td>72666.482052</td>\n",
+       "      <td>2023</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>AS</td>\n",
+       "      <td>warm-water corals</td>\n",
+       "      <td>13895.870659</td>\n",
+       "      <td>67363.486609</td>\n",
+       "      <td>2023</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>EU</td>\n",
+       "      <td>cold-water corals</td>\n",
+       "      <td>2183.050266</td>\n",
+       "      <td>6179.526427</td>\n",
+       "      <td>2023</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>EU</td>\n",
+       "      <td>saltmarshes</td>\n",
+       "      <td>7431.043710</td>\n",
+       "      <td>13274.326478</td>\n",
+       "      <td>2023</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>EU</td>\n",
+       "      <td>seagrasses</td>\n",
+       "      <td>5840.372925</td>\n",
+       "      <td>10391.189911</td>\n",
+       "      <td>2023</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>EU</td>\n",
+       "      <td>warm-water corals</td>\n",
+       "      <td>0.605763</td>\n",
+       "      <td>0.793357</td>\n",
+       "      <td>2023</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>NA</td>\n",
+       "      <td>cold-water corals</td>\n",
+       "      <td>22.960099</td>\n",
+       "      <td>204.280433</td>\n",
+       "      <td>2023</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>NA</td>\n",
+       "      <td>saltmarshes</td>\n",
+       "      <td>51092.644683</td>\n",
+       "      <td>68200.081930</td>\n",
+       "      <td>2023</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>NA</td>\n",
+       "      <td>seagrasses</td>\n",
+       "      <td>70.012791</td>\n",
+       "      <td>301.909141</td>\n",
+       "      <td>2023</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15</th>\n",
+       "      <td>NA</td>\n",
+       "      <td>warm-water corals</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>2023</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16</th>\n",
+       "      <td>SA</td>\n",
+       "      <td>cold-water corals</td>\n",
+       "      <td>234.731370</td>\n",
+       "      <td>1416.251323</td>\n",
+       "      <td>2023</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17</th>\n",
+       "      <td>SA</td>\n",
+       "      <td>saltmarshes</td>\n",
+       "      <td>22969.815906</td>\n",
+       "      <td>35983.392744</td>\n",
+       "      <td>2023</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>18</th>\n",
+       "      <td>SA</td>\n",
+       "      <td>seagrasses</td>\n",
+       "      <td>16517.097667</td>\n",
+       "      <td>45847.459412</td>\n",
+       "      <td>2023</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19</th>\n",
+       "      <td>SA</td>\n",
+       "      <td>warm-water corals</td>\n",
+       "      <td>5597.366845</td>\n",
+       "      <td>12869.801231</td>\n",
+       "      <td>2023</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>20</th>\n",
+       "      <td>WA</td>\n",
+       "      <td>cold-water corals</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>12.970705</td>\n",
+       "      <td>2023</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>21</th>\n",
+       "      <td>WA</td>\n",
+       "      <td>saltmarshes</td>\n",
+       "      <td>1309.225736</td>\n",
+       "      <td>11798.832619</td>\n",
+       "      <td>2023</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>22</th>\n",
+       "      <td>WA</td>\n",
+       "      <td>seagrasses</td>\n",
+       "      <td>1053.448673</td>\n",
+       "      <td>25273.727431</td>\n",
+       "      <td>2023</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>23</th>\n",
+       "      <td>WA</td>\n",
+       "      <td>warm-water corals</td>\n",
+       "      <td>547.928957</td>\n",
+       "      <td>4903.230395</td>\n",
+       "      <td>2023</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   location_id       habitat_name  protected_area    total_area  year\n",
+       "0           AF  cold-water corals       37.761626    381.993234  2023\n",
+       "1           AF        saltmarshes     6688.702879  19845.915000  2023\n",
+       "2           AF         seagrasses     6319.099491  61939.484904  2023\n",
+       "3           AF  warm-water corals     6591.340083  15216.393947  2023\n",
+       "4           AS  cold-water corals      263.251498   1332.225080  2023\n",
+       "5           AS        saltmarshes    11721.439539  39229.888860  2023\n",
+       "6           AS         seagrasses    28942.705660  72666.482052  2023\n",
+       "7           AS  warm-water corals    13895.870659  67363.486609  2023\n",
+       "8           EU  cold-water corals     2183.050266   6179.526427  2023\n",
+       "9           EU        saltmarshes     7431.043710  13274.326478  2023\n",
+       "10          EU         seagrasses     5840.372925  10391.189911  2023\n",
+       "11          EU  warm-water corals        0.605763      0.793357  2023\n",
+       "12          NA  cold-water corals       22.960099    204.280433  2023\n",
+       "13          NA        saltmarshes    51092.644683  68200.081930  2023\n",
+       "14          NA         seagrasses       70.012791    301.909141  2023\n",
+       "15          NA  warm-water corals        0.000000      0.000000  2023\n",
+       "16          SA  cold-water corals      234.731370   1416.251323  2023\n",
+       "17          SA        saltmarshes    22969.815906  35983.392744  2023\n",
+       "18          SA         seagrasses    16517.097667  45847.459412  2023\n",
+       "19          SA  warm-water corals     5597.366845  12869.801231  2023\n",
+       "20          WA  cold-water corals        0.000000     12.970705  2023\n",
+       "21          WA        saltmarshes     1309.225736  11798.832619  2023\n",
+       "22          WA         seagrasses     1053.448673  25273.727431  2023\n",
+       "23          WA  warm-water corals      547.928957   4903.230395  2023"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Add regions field\n",
+    "habitats_regions = habitats.copy()\n",
+    "habitats_regions['region'] = habitats['location_id'].map(country_to_region)\n",
+    "\n",
+    "# Convert fields to numeric\n",
+    "habitats_regions['protected_area'] = pd.to_numeric(habitats_regions['protected_area'], errors='coerce')\n",
+    "habitats_regions['total_area'] = pd.to_numeric(habitats_regions['total_area'], errors='coerce')\n",
+    "\n",
+    "# Calculate stats for each region\n",
+    "habitats_regions = habitats_regions.groupby(['region', 'habitat_name']).agg({'protected_area': 'sum', 'total_area': 'sum'}).reset_index()\n",
+    "habitats_regions['year'] = datetime.now().year\n",
+    "habitats_regions.rename(columns={'region': 'location_id'}, inplace=True)\n",
+    "habitats_regions\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Concatenate region statistics to the habitats dataframe\n",
+    "habitats = pd.concat([habitats, habitats_regions])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array(['ABNJ', 'AGO', 'ALB', 'ARG', 'ATG', 'AUS', 'BHS', 'BLZ', 'BRA',\n",
+       "       'BRB', 'CAN', 'CHL', 'CHN', 'CIV', 'COK', 'COL', 'CPV', 'CRI',\n",
+       "       'CUB', 'CYP', 'DMA', 'DNK', 'DOM', 'DZA', 'ECU', 'ERI', 'ESP',\n",
+       "       'FJI', 'FRA', 'FSM', 'GBR', 'GHA', 'GIN', 'GNB', 'GNQ', 'GRC',\n",
+       "       'GRD', 'GTM', 'GUY', 'HND', 'HRV', 'HTI', 'IDN', 'IND', 'IRL',\n",
+       "       'ISL', 'ITA', 'JAM', 'JPN', 'KEN', 'KIR', 'KNA', 'LBR', 'LCA',\n",
+       "       'LKA', 'MAR', 'MDG', 'MEX', 'MHL', 'MLT', 'MMR', 'MNE', 'MOZ',\n",
+       "       'MRT', 'MUS', 'MYS', 'NAM', 'NGA', 'NIC', 'NLD', 'NOR', 'NZL',\n",
+       "       'OMN', 'PAN', 'PER', 'PHL', 'PLW', 'PNG', 'PRT', 'RUS', 'SAU',\n",
+       "       'SEN', 'SHN', 'SJM', 'STP', 'SUR', 'SWE', 'SYC', 'THA', 'TLS',\n",
+       "       'TON', 'TTO', 'TUN', 'TUV', 'UMI', 'URY', 'USA', 'VCT', 'VEN',\n",
+       "       'VNM', 'VUT', 'WSM', 'YEM', 'ZAF', 'ARE', 'AZE', 'BEL', 'BGR',\n",
+       "       'BHR', 'DEU', 'DJI', 'EGY', 'EST', 'FIN', 'GEO', 'GMB', 'IRN',\n",
+       "       'KHM', 'KOR', 'KWT', 'LBY', 'LTU', 'LVA', 'PAK', 'POL', 'QAT',\n",
+       "       'ROU', 'SDN', 'SVN', 'TUR', 'TZA', 'UKR', 'COM', 'ISR', 'JOR',\n",
+       "       'MCO', 'MDV', 'SGP', 'SLB', 'SLE', 'BGD', 'BRN', 'NIU', 'GLOB',\n",
+       "       'AF', 'AS', 'EU', 'NA', 'SA', 'WA'], dtype=object)"
+      ]
+     },
+     "execution_count": 32,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "habitats['location_id'].unique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "habitats.to_csv(path_out + \"habitats/ocean+.csv\", index=False)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Process seamounts from [UN WCMC](https://data.unep-wcmc.org/datasets/41)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 52,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Read required data\n",
+    "seamounts = gpd.read_file(path_in + \"Seamounts/DownloadPack-14_001_ZSL002_ModelledSeamounts2011_v1_01_Data_Seamounts_Seamounts.shp\")\n",
+    "eez = gpd.read_file(path_out + \"/administrative/eez_area_mollweide.shp\")\n",
+    "hs = gpd.read_file(path_in + \"/high_seas/high_seas.shp\")\n",
+    "protected_areas = gpd.read_file(path_out + \"wdpa/timeseries/protected_dissolved_2023.shp\").to_crs(\"EPSG:4326\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 53,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Keep relevant fields in eez and hs and merge then in one dataframe\n",
+    "eez = eez[['SOVEREIGN1', 'SOVEREIGN2', 'SOVEREIGN3','ISO_SOV1', 'ISO_SOV2', 'ISO_SOV3', 'geometry']]\n",
+    "hs = hs[['geometry']]\n",
+    "hs['SOVEREIGN1'] = 'High Seas'\n",
+    "hs['ISO_SOV1'] = 'ABNJ'\n",
+    "eez_hs = eez.merge(hs, how='outer')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 54,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Join eez info to seamounts falling within eez polygons\n",
+    "seamounts_eez = gpd.sjoin(seamounts, eez_hs, how=\"left\", predicate=\"within\")\n",
+    "# Drop those not associated with an eez or hs\n",
+    "seamounts_eez = seamounts_eez.dropna(subset=['ISO_SOV1'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 55,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create new column \"iso\" with the iso_sov codes\n",
+    "def concatenate_iso(row):\n",
+    "    iso_list = [row['ISO_SOV1']]\n",
+    "    if not pd.isna(row['ISO_SOV2']):\n",
+    "        iso_list.append(row['ISO_SOV2'])\n",
+    "    if not pd.isna(row['ISO_SOV3']):\n",
+    "        iso_list.append(row['ISO_SOV3'])\n",
+    "    return ';'.join(iso_list)\n",
+    "\n",
+    "seamounts_eez['iso'] = seamounts_eez.apply(concatenate_iso, axis=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 56,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Check which seamounts are protectec\n",
+    "seamounts_wdpa = gpd.sjoin(seamounts, protected_areas, how=\"left\", predicate=\"within\")\n",
+    "seamounts_wdpa['protection'] = \"no\"  \n",
+    "seamounts_wdpa.loc[~seamounts_wdpa['index_right'].isna(), 'protection'] = \"yes\"\n",
+    "# Remove rows in which protection is \"no\"\n",
+    "seamounts_wdpa = seamounts_wdpa[seamounts_wdpa['protection'] != \"no\"]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Global stats"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 65,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>habitat_name</th>\n",
+       "      <th>total_area</th>\n",
+       "      <th>location_id</th>\n",
+       "      <th>year</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>seamounts</td>\n",
+       "      <td>2.690810e+07</td>\n",
+       "      <td>GLOB</td>\n",
+       "      <td>2011</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  habitat_name    total_area location_id  year\n",
+       "0    seamounts  2.690810e+07        GLOB  2011"
+      ]
+     },
+     "execution_count": 65,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Calculate global area of seamounts\n",
+    "seamounts_eez['habitat_name'] = 'seamounts'\n",
+    "seamounts_global = seamounts_eez.groupby(['habitat_name']).agg({'AREA2D': 'sum'}).reset_index().rename(columns={'AREA2D': 'total_area'})\n",
+    "seamounts_global['location_id'] = 'GLOB'\n",
+    "seamounts_global['year'] = 2011\n",
+    "seamounts_global"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 66,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>habitat_name</th>\n",
+       "      <th>protected_area</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>seamounts</td>\n",
+       "      <td>3.438552e+06</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  habitat_name  protected_area\n",
+       "0    seamounts    3.438552e+06"
+      ]
+     },
+     "execution_count": 66,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Calculate global area of seamounts protected\n",
+    "seamounts_wdpa['habitat_name'] = 'seamounts'\n",
+    "seamounts_wdpa_global = seamounts_wdpa.groupby(['habitat_name']).agg({'AREA2D': 'sum'}).reset_index().rename(columns={'AREA2D': 'protected_area'})\n",
+    "seamounts_wdpa_global"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 67,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>habitat_name</th>\n",
+       "      <th>total_area</th>\n",
+       "      <th>location_id</th>\n",
+       "      <th>year</th>\n",
+       "      <th>protected_area</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>seamounts</td>\n",
+       "      <td>2.690810e+07</td>\n",
+       "      <td>GLOB</td>\n",
+       "      <td>2011</td>\n",
+       "      <td>3.438552e+06</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  habitat_name    total_area location_id  year  protected_area\n",
+       "0    seamounts  2.690810e+07        GLOB  2011    3.438552e+06"
+      ]
+     },
+     "execution_count": 67,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Bring 'protected_area' field from seamouts_wdpa_global to seamounts_global\n",
+    "seamounts_global = seamounts_global.merge(seamounts_wdpa_global[['habitat_name', 'protected_area']], how='left', on='habitat_name')\n",
+    "seamounts_global"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Country stats"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 68,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Split the 'iso_code' values and create separate rows only for those with multiple values\n",
+    "mask = seamounts_eez['iso'].str.contains(';', na=False)\n",
+    "split_rows = seamounts_eez[mask].copy()\n",
+    "split_rows['iso'] = split_rows['iso'].str.split(';')\n",
+    "split_rows = split_rows.explode('iso')\n",
+    "\n",
+    "# Keep rows with single values in 'iso_code'\n",
+    "single_value_rows = seamounts_eez[~mask]\n",
+    "\n",
+    "# Concatenate the exploded rows with the single value rows\n",
+    "seamounts_eez_new = pd.concat([single_value_rows, split_rows], ignore_index=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 69,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>location_id</th>\n",
+       "      <th>total_area</th>\n",
+       "      <th>habitat_name</th>\n",
+       "      <th>year</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>ABNJ</td>\n",
+       "      <td>1.483098e+07</td>\n",
+       "      <td>seamounts</td>\n",
+       "      <td>2011</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>AGO</td>\n",
+       "      <td>9.556242e+03</td>\n",
+       "      <td>seamounts</td>\n",
+       "      <td>2011</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>ARG</td>\n",
+       "      <td>3.110730e+05</td>\n",
+       "      <td>seamounts</td>\n",
+       "      <td>2011</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>ATA</td>\n",
+       "      <td>3.551629e+05</td>\n",
+       "      <td>seamounts</td>\n",
+       "      <td>2011</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>ATG</td>\n",
+       "      <td>6.215895e+03</td>\n",
+       "      <td>seamounts</td>\n",
+       "      <td>2011</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>88</th>\n",
+       "      <td>VNM</td>\n",
+       "      <td>4.421338e+04</td>\n",
+       "      <td>seamounts</td>\n",
+       "      <td>2011</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>89</th>\n",
+       "      <td>VUT</td>\n",
+       "      <td>1.199475e+05</td>\n",
+       "      <td>seamounts</td>\n",
+       "      <td>2011</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>90</th>\n",
+       "      <td>WSM</td>\n",
+       "      <td>4.117997e+04</td>\n",
+       "      <td>seamounts</td>\n",
+       "      <td>2011</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>91</th>\n",
+       "      <td>YEM</td>\n",
+       "      <td>6.294974e+04</td>\n",
+       "      <td>seamounts</td>\n",
+       "      <td>2011</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>92</th>\n",
+       "      <td>ZAF</td>\n",
+       "      <td>9.946306e+04</td>\n",
+       "      <td>seamounts</td>\n",
+       "      <td>2011</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>93 rows × 4 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   location_id    total_area habitat_name  year\n",
+       "0         ABNJ  1.483098e+07    seamounts  2011\n",
+       "1          AGO  9.556242e+03    seamounts  2011\n",
+       "2          ARG  3.110730e+05    seamounts  2011\n",
+       "3          ATA  3.551629e+05    seamounts  2011\n",
+       "4          ATG  6.215895e+03    seamounts  2011\n",
+       "..         ...           ...          ...   ...\n",
+       "88         VNM  4.421338e+04    seamounts  2011\n",
+       "89         VUT  1.199475e+05    seamounts  2011\n",
+       "90         WSM  4.117997e+04    seamounts  2011\n",
+       "91         YEM  6.294974e+04    seamounts  2011\n",
+       "92         ZAF  9.946306e+04    seamounts  2011\n",
+       "\n",
+       "[93 rows x 4 columns]"
+      ]
+     },
+     "execution_count": 69,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Get area of seamounts per iso\n",
+    "seamounts_iso = seamounts_eez_new.groupby(['iso']).agg({'AREA2D': 'sum'}).reset_index()\n",
+    "seamounts_iso = seamounts_iso.rename(columns={'AREA2D': 'total_area', 'iso': 'location_id'})\n",
+    "seamounts_iso['habitat_name'] = 'seamounts'\n",
+    "seamounts_iso['year'] = 2011\n",
+    "seamounts_iso "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 70,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Split the 'iso_code' values in seamounts_wdpa and create separate rows only for those with multiple values\n",
+    "mask = seamounts_wdpa['PARENT_ISO'].str.contains(';', na=False)\n",
+    "split_rows = seamounts_wdpa[mask].copy()\n",
+    "split_rows['PARENT_ISO'] = split_rows['PARENT_ISO'].str.split(';')\n",
+    "split_rows = split_rows.explode('PARENT_ISO')\n",
+    "\n",
+    "# Keep rows with single values in 'iso_code'\n",
+    "single_value_rows = seamounts_wdpa[~mask]\n",
+    "\n",
+    "# Concatenate the exploded rows with the single value rows\n",
+    "seamounts_wdpa_new = pd.concat([single_value_rows, split_rows], ignore_index=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 71,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>location_id</th>\n",
+       "      <th>protected_area</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>ABNJ</td>\n",
+       "      <td>226253.932283</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>ARG</td>\n",
+       "      <td>38773.659962</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  location_id  protected_area\n",
+       "0        ABNJ   226253.932283\n",
+       "1         ARG    38773.659962"
+      ]
+     },
+     "execution_count": 71,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Calculate area protected per iso\n",
+    "seamounts_protected = seamounts_wdpa_new.groupby(['PARENT_ISO']).agg({'AREA2D': 'sum'}).reset_index()\n",
+    "seamounts_protected = seamounts_protected.rename(columns={'AREA2D': 'protected_area', 'PARENT_ISO': 'location_id'})\n",
+    "seamounts_protected.head(2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 73,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>location_id</th>\n",
+       "      <th>total_area</th>\n",
+       "      <th>habitat_name</th>\n",
+       "      <th>year</th>\n",
+       "      <th>protected_area</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>ABNJ</td>\n",
+       "      <td>1.483098e+07</td>\n",
+       "      <td>seamounts</td>\n",
+       "      <td>2011</td>\n",
+       "      <td>226253.932283</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>AGO</td>\n",
+       "      <td>9.556242e+03</td>\n",
+       "      <td>seamounts</td>\n",
+       "      <td>2011</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  location_id    total_area habitat_name  year  protected_area\n",
+       "0        ABNJ  1.483098e+07    seamounts  2011   226253.932283\n",
+       "1         AGO  9.556242e+03    seamounts  2011             NaN"
+      ]
+     },
+     "execution_count": 73,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Join seamounts_iso and seamounts_protected\n",
+    "seamounts_iso = seamounts_iso.merge(seamounts_protected, how='left', on='location_id')\n",
+    "seamounts_iso.head(2)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Regions stats"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 74,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>location_id</th>\n",
+       "      <th>habitat_name</th>\n",
+       "      <th>protected_area</th>\n",
+       "      <th>total_area</th>\n",
+       "      <th>year</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>AF</td>\n",
+       "      <td>seamounts</td>\n",
+       "      <td>94385.178958</td>\n",
+       "      <td>6.162351e+05</td>\n",
+       "      <td>2011</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>AS</td>\n",
+       "      <td>seamounts</td>\n",
+       "      <td>832497.783937</td>\n",
+       "      <td>5.433433e+06</td>\n",
+       "      <td>2011</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>AT</td>\n",
+       "      <td>seamounts</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>3.551629e+05</td>\n",
+       "      <td>2011</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>EU</td>\n",
+       "      <td>seamounts</td>\n",
+       "      <td>894514.910255</td>\n",
+       "      <td>2.641119e+06</td>\n",
+       "      <td>2011</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>NA</td>\n",
+       "      <td>seamounts</td>\n",
+       "      <td>555588.210725</td>\n",
+       "      <td>1.664794e+06</td>\n",
+       "      <td>2011</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>SA</td>\n",
+       "      <td>seamounts</td>\n",
+       "      <td>581172.154389</td>\n",
+       "      <td>1.655552e+06</td>\n",
+       "      <td>2011</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>WA</td>\n",
+       "      <td>seamounts</td>\n",
+       "      <td>2487.428050</td>\n",
+       "      <td>9.384765e+04</td>\n",
+       "      <td>2011</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  location_id habitat_name  protected_area    total_area  year\n",
+       "0          AF    seamounts    94385.178958  6.162351e+05  2011\n",
+       "1          AS    seamounts   832497.783937  5.433433e+06  2011\n",
+       "2          AT    seamounts        0.000000  3.551629e+05  2011\n",
+       "3          EU    seamounts   894514.910255  2.641119e+06  2011\n",
+       "4          NA    seamounts   555588.210725  1.664794e+06  2011\n",
+       "5          SA    seamounts   581172.154389  1.655552e+06  2011\n",
+       "6          WA    seamounts     2487.428050  9.384765e+04  2011"
+      ]
+     },
+     "execution_count": 74,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "seamounts_regions = seamounts_iso.copy()\n",
+    "seamounts_regions['region'] = seamounts_regions['location_id'].map(country_to_region)\n",
+    "\n",
+    "# Calculate stats for each region\n",
+    "seamounts_regions = seamounts_regions.groupby(['region', 'habitat_name']).agg({'protected_area': 'sum', 'total_area': 'sum'}).reset_index()\n",
+    "seamounts_regions['year'] = 2011\n",
+    "seamounts_regions.rename(columns={'region': 'location_id'}, inplace=True)\n",
+    "seamounts_regions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 75,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Concatenate region and global stats to seamounts_iso2\n",
+    "seamounts_all = pd.concat([seamounts_iso, seamounts_regions, seamounts_global])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 77,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "seamounts_all.to_csv(path_out + \"habitats/seamounts.csv\", index=False)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Process mangroves from GMW"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 79,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mangroves = pd.read_csv(path_out + \"habitats/mangroves.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 80,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>habitat_name</th>\n",
+       "      <th>protected_area</th>\n",
+       "      <th>total_area</th>\n",
+       "      <th>location_id</th>\n",
+       "      <th>year</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>mangroves</td>\n",
+       "      <td>61287.20375</td>\n",
+       "      <td>147358.990971</td>\n",
+       "      <td>GLOB</td>\n",
+       "      <td>2020</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  habitat_name  protected_area     total_area location_id  year\n",
+       "0    mangroves     61287.20375  147358.990971        GLOB  2020"
+      ]
+     },
+     "execution_count": 80,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Calculate global stats for mangroves\n",
+    "mangroves_global = mangroves.groupby(['habitat_name']).agg({'protected_area': 'sum', 'total_area': 'sum'}).reset_index()\n",
+    "mangroves_global['location_id'] = 'GLOB'\n",
+    "mangroves_global['year'] = 2020\n",
+    "mangroves_global"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 81,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Concatenate the global stats to the mangroves dataframe\n",
+    "mangroves = pd.concat([mangroves, mangroves_global])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 82,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>location_id</th>\n",
+       "      <th>habitat_name</th>\n",
+       "      <th>protected_area</th>\n",
+       "      <th>total_area</th>\n",
+       "      <th>year</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>AF</td>\n",
+       "      <td>mangroves</td>\n",
+       "      <td>10006.97000</td>\n",
+       "      <td>29344.404399</td>\n",
+       "      <td>2020</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>AS</td>\n",
+       "      <td>mangroves</td>\n",
+       "      <td>21378.75000</td>\n",
+       "      <td>74629.194446</td>\n",
+       "      <td>2020</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>NA</td>\n",
+       "      <td>mangroves</td>\n",
+       "      <td>2055.40000</td>\n",
+       "      <td>2329.115505</td>\n",
+       "      <td>2020</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>PO</td>\n",
+       "      <td>mangroves</td>\n",
+       "      <td>6.72000</td>\n",
+       "      <td>6.723018</td>\n",
+       "      <td>2020</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>SA</td>\n",
+       "      <td>mangroves</td>\n",
+       "      <td>27811.53375</td>\n",
+       "      <td>40875.932666</td>\n",
+       "      <td>2020</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>WA</td>\n",
+       "      <td>mangroves</td>\n",
+       "      <td>27.83000</td>\n",
+       "      <td>173.620938</td>\n",
+       "      <td>2020</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  location_id habitat_name  protected_area    total_area  year\n",
+       "0          AF    mangroves     10006.97000  29344.404399  2020\n",
+       "1          AS    mangroves     21378.75000  74629.194446  2020\n",
+       "2          NA    mangroves      2055.40000   2329.115505  2020\n",
+       "3          PO    mangroves         6.72000      6.723018  2020\n",
+       "4          SA    mangroves     27811.53375  40875.932666  2020\n",
+       "5          WA    mangroves        27.83000    173.620938  2020"
+      ]
+     },
+     "execution_count": 82,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "mangroves_regions = mangroves.copy()\n",
+    "mangroves_regions['region'] = mangroves['location_id'].map(country_to_region)\n",
+    "\n",
+    "# Calculate stats for each region\n",
+    "mangroves_regions = mangroves_regions.groupby(['region', 'habitat_name']).agg({'protected_area': 'sum', 'total_area': 'sum'}).reset_index()\n",
+    "mangroves_regions['year'] = 2020\n",
+    "mangroves_regions.rename(columns={'region': 'location_id'}, inplace=True)\n",
+    "mangroves_regions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 83,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Concatenate stats for regions with mangroves\n",
+    "mangroves = pd.concat([mangroves, mangroves_regions])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Concatenate all habitats"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 85,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>location_id</th>\n",
+       "      <th>protected_area</th>\n",
+       "      <th>total_area</th>\n",
+       "      <th>habitat_name</th>\n",
+       "      <th>year</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>ABNJ</td>\n",
+       "      <td>421.629372679904</td>\n",
+       "      <td>1874.98221422617</td>\n",
+       "      <td>cold-water corals</td>\n",
+       "      <td>2023</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>AGO</td>\n",
+       "      <td>0</td>\n",
+       "      <td>3.39567053773998</td>\n",
+       "      <td>cold-water corals</td>\n",
+       "      <td>2023</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>ALB</td>\n",
+       "      <td>0</td>\n",
+       "      <td>5.98647948252716</td>\n",
+       "      <td>cold-water corals</td>\n",
+       "      <td>2023</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>ARG</td>\n",
+       "      <td>6.98422602063557</td>\n",
+       "      <td>61.8263440651753</td>\n",
+       "      <td>cold-water corals</td>\n",
+       "      <td>2023</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>ATG</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.997746538545076</td>\n",
+       "      <td>cold-water corals</td>\n",
+       "      <td>2023</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>AS</td>\n",
+       "      <td>21378.75</td>\n",
+       "      <td>74629.194446</td>\n",
+       "      <td>mangroves</td>\n",
+       "      <td>2020</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>NA</td>\n",
+       "      <td>2055.4</td>\n",
+       "      <td>2329.115505</td>\n",
+       "      <td>mangroves</td>\n",
+       "      <td>2020</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>PO</td>\n",
+       "      <td>6.72</td>\n",
+       "      <td>6.723018</td>\n",
+       "      <td>mangroves</td>\n",
+       "      <td>2020</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>SA</td>\n",
+       "      <td>27811.53375</td>\n",
+       "      <td>40875.932666</td>\n",
+       "      <td>mangroves</td>\n",
+       "      <td>2020</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>WA</td>\n",
+       "      <td>27.83</td>\n",
+       "      <td>173.620938</td>\n",
+       "      <td>mangroves</td>\n",
+       "      <td>2020</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>628 rows × 5 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   location_id    protected_area         total_area       habitat_name  year\n",
+       "0         ABNJ  421.629372679904   1874.98221422617  cold-water corals  2023\n",
+       "1          AGO                 0   3.39567053773998  cold-water corals  2023\n",
+       "2          ALB                 0   5.98647948252716  cold-water corals  2023\n",
+       "3          ARG  6.98422602063557   61.8263440651753  cold-water corals  2023\n",
+       "4          ATG                 0  0.997746538545076  cold-water corals  2023\n",
+       "..         ...               ...                ...                ...   ...\n",
+       "1           AS          21378.75       74629.194446          mangroves  2020\n",
+       "2           NA            2055.4        2329.115505          mangroves  2020\n",
+       "3           PO              6.72           6.723018          mangroves  2020\n",
+       "4           SA       27811.53375       40875.932666          mangroves  2020\n",
+       "5           WA             27.83         173.620938          mangroves  2020\n",
+       "\n",
+       "[628 rows x 5 columns]"
+      ]
+     },
+     "execution_count": 85,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Concatenate the dataframes\n",
+    "habitats_all = pd.concat([habitats, seamounts_all, mangroves])\n",
+    "habitats_all"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 86,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "habitats_all.to_csv(path_out + \"tables/habitats2.csv\", index=False)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "skytruth",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/data/notebooks/layers.ipynb b/data/notebooks/layers.ipynb
index 3d9f2c16..232b762e 100644
--- a/data/notebooks/layers.ipynb
+++ b/data/notebooks/layers.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -12,114 +12,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
-    "path_in = \"/Users/sofia/Documents/Repos/skytruth_30x30/data/raw\"\n",
-    "path_out = \"/Users/sofia/Documents/Repos/skytruth_30x30/data/processed\""
+    "path_in = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/raw\"\n",
+    "path_out = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/processed\""
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>MRGID</th>\n",
-       "      <th>GEONAME</th>\n",
-       "      <th>MRGID_TER1</th>\n",
-       "      <th>POL_TYPE</th>\n",
-       "      <th>MRGID_SOV1</th>\n",
-       "      <th>TERRITORY1</th>\n",
-       "      <th>ISO_TER1</th>\n",
-       "      <th>SOVEREIGN1</th>\n",
-       "      <th>MRGID_TER2</th>\n",
-       "      <th>MRGID_SOV2</th>\n",
-       "      <th>...</th>\n",
-       "      <th>ISO_SOV1</th>\n",
-       "      <th>ISO_SOV2</th>\n",
-       "      <th>ISO_SOV3</th>\n",
-       "      <th>UN_SOV1</th>\n",
-       "      <th>UN_SOV2</th>\n",
-       "      <th>UN_SOV3</th>\n",
-       "      <th>UN_TER1</th>\n",
-       "      <th>UN_TER2</th>\n",
-       "      <th>UN_TER3</th>\n",
-       "      <th>geometry</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>8444.0</td>\n",
-       "      <td>American Samoa Exclusive Economic Zone</td>\n",
-       "      <td>8670.0</td>\n",
-       "      <td>200NM</td>\n",
-       "      <td>2204.0</td>\n",
-       "      <td>American Samoa</td>\n",
-       "      <td>ASM</td>\n",
-       "      <td>United States</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>...</td>\n",
-       "      <td>USA</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>840</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>16.0</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>POLYGON ((-166.64112 -17.55527, -166.64194 -17...</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>1 rows × 32 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "    MRGID                                 GEONAME  MRGID_TER1 POL_TYPE  \\\n",
-       "0  8444.0  American Samoa Exclusive Economic Zone      8670.0    200NM   \n",
-       "\n",
-       "   MRGID_SOV1      TERRITORY1 ISO_TER1     SOVEREIGN1  MRGID_TER2  MRGID_SOV2  \\\n",
-       "0      2204.0  American Samoa      ASM  United States         0.0         0.0   \n",
-       "\n",
-       "   ... ISO_SOV1 ISO_SOV2 ISO_SOV3  UN_SOV1  UN_SOV2 UN_SOV3 UN_TER1 UN_TER2  \\\n",
-       "0  ...      USA      NaN      NaN      840      NaN     NaN    16.0     NaN   \n",
-       "\n",
-       "   UN_TER3                                           geometry  \n",
-       "0      NaN  POLYGON ((-166.64112 -17.55527, -166.64194 -17...  \n",
-       "\n",
-       "[1 rows x 32 columns]"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "eez = gpd.read_file(path_in + \"/World_EEZ_v11_20191118/eez_v11.shp\")\n",
     "eez.head(1)"
@@ -127,63 +32,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Index(['MRGID', 'GEONAME', 'MRGID_TER1', 'POL_TYPE', 'MRGID_SOV1',\n",
-       "       'TERRITORY1', 'ISO_TER1', 'SOVEREIGN1', 'MRGID_TER2', 'MRGID_SOV2',\n",
-       "       'TERRITORY2', 'ISO_TER2', 'SOVEREIGN2', 'MRGID_TER3', 'MRGID_SOV3',\n",
-       "       'TERRITORY3', 'ISO_TER3', 'SOVEREIGN3', 'X_1', 'Y_1', 'MRGID_EEZ',\n",
-       "       'AREA_KM2', 'ISO_SOV1', 'ISO_SOV2', 'ISO_SOV3', 'UN_SOV1', 'UN_SOV2',\n",
-       "       'UN_SOV3', 'UN_TER1', 'UN_TER2', 'UN_TER3', 'geometry'],\n",
-       "      dtype='object')"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "eez.columns"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "<Geographic 2D CRS: EPSG:4326>\n",
-       "Name: WGS 84\n",
-       "Axis Info [ellipsoidal]:\n",
-       "- Lat[north]: Geodetic latitude (degree)\n",
-       "- Lon[east]: Geodetic longitude (degree)\n",
-       "Area of Use:\n",
-       "- name: World.\n",
-       "- bounds: (-180.0, -90.0, 180.0, 90.0)\n",
-       "Datum: World Geodetic System 1984 ensemble\n",
-       "- Ellipsoid: WGS 84\n",
-       "- Prime Meridian: Greenwich"
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "eez.crs"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 45,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -193,223 +60,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>MRGID</th>\n",
-       "      <th>GEONAME</th>\n",
-       "      <th>MRGID_TER1</th>\n",
-       "      <th>POL_TYPE</th>\n",
-       "      <th>MRGID_SOV1</th>\n",
-       "      <th>TERRITORY1</th>\n",
-       "      <th>ISO_TER1</th>\n",
-       "      <th>SOVEREIGN1</th>\n",
-       "      <th>MRGID_TER2</th>\n",
-       "      <th>MRGID_SOV2</th>\n",
-       "      <th>...</th>\n",
-       "      <th>ISO_SOV1</th>\n",
-       "      <th>ISO_SOV2</th>\n",
-       "      <th>ISO_SOV3</th>\n",
-       "      <th>UN_SOV1</th>\n",
-       "      <th>UN_SOV2</th>\n",
-       "      <th>UN_SOV3</th>\n",
-       "      <th>UN_TER1</th>\n",
-       "      <th>UN_TER2</th>\n",
-       "      <th>UN_TER3</th>\n",
-       "      <th>geometry</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>8444.0</td>\n",
-       "      <td>American Samoa Exclusive Economic Zone</td>\n",
-       "      <td>8670.0</td>\n",
-       "      <td>200NM</td>\n",
-       "      <td>2204.0</td>\n",
-       "      <td>American Samoa</td>\n",
-       "      <td>ASM</td>\n",
-       "      <td>United States</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>...</td>\n",
-       "      <td>USA</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>840</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>16.0</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>POLYGON ((-16216412.543 -2157569.856, -1621650...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>8379.0</td>\n",
-       "      <td>Ascension Exclusive Economic Zone</td>\n",
-       "      <td>8620.0</td>\n",
-       "      <td>200NM</td>\n",
-       "      <td>2208.0</td>\n",
-       "      <td>Ascension</td>\n",
-       "      <td>SHN</td>\n",
-       "      <td>United Kingdom</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>...</td>\n",
-       "      <td>GBR</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>826</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>654.0</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>POLYGON ((-1089355.142 -974062.004, -1089348.4...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>8446.0</td>\n",
-       "      <td>Cook Islands Exclusive Economic Zone</td>\n",
-       "      <td>8672.0</td>\n",
-       "      <td>200NM</td>\n",
-       "      <td>2227.0</td>\n",
-       "      <td>Cook Islands</td>\n",
-       "      <td>COK</td>\n",
-       "      <td>New Zealand</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>...</td>\n",
-       "      <td>NZL</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>554</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>184.0</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>POLYGON ((-15912583.852 -716733.193, -15813064...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>8389.0</td>\n",
-       "      <td>Overlapping claim Falkland / Malvinas Islands:...</td>\n",
-       "      <td>8623.0</td>\n",
-       "      <td>Overlapping claim</td>\n",
-       "      <td>2208.0</td>\n",
-       "      <td>Falkland / Malvinas Islands</td>\n",
-       "      <td>FLK</td>\n",
-       "      <td>United Kingdom</td>\n",
-       "      <td>8623.0</td>\n",
-       "      <td>2149.0</td>\n",
-       "      <td>...</td>\n",
-       "      <td>GBR</td>\n",
-       "      <td>ARG</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>826</td>\n",
-       "      <td>32.0</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>238.0</td>\n",
-       "      <td>238.0</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>POLYGON ((-4061728.309 -6509190.466, -4443979....</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>8440.0</td>\n",
-       "      <td>French Polynesian Exclusive Economic Zone</td>\n",
-       "      <td>8656.0</td>\n",
-       "      <td>200NM</td>\n",
-       "      <td>17.0</td>\n",
-       "      <td>French Polynesia</td>\n",
-       "      <td>PYF</td>\n",
-       "      <td>France</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>...</td>\n",
-       "      <td>FRA</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>250</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>258.0</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>MULTIPOLYGON (((-13543804.433 -974376.651, -13...</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>5 rows × 32 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "    MRGID                                            GEONAME  MRGID_TER1  \\\n",
-       "0  8444.0             American Samoa Exclusive Economic Zone      8670.0   \n",
-       "1  8379.0                  Ascension Exclusive Economic Zone      8620.0   \n",
-       "2  8446.0               Cook Islands Exclusive Economic Zone      8672.0   \n",
-       "3  8389.0  Overlapping claim Falkland / Malvinas Islands:...      8623.0   \n",
-       "4  8440.0          French Polynesian Exclusive Economic Zone      8656.0   \n",
-       "\n",
-       "            POL_TYPE  MRGID_SOV1                   TERRITORY1 ISO_TER1  \\\n",
-       "0              200NM      2204.0               American Samoa      ASM   \n",
-       "1              200NM      2208.0                    Ascension      SHN   \n",
-       "2              200NM      2227.0                 Cook Islands      COK   \n",
-       "3  Overlapping claim      2208.0  Falkland / Malvinas Islands      FLK   \n",
-       "4              200NM        17.0             French Polynesia      PYF   \n",
-       "\n",
-       "       SOVEREIGN1  MRGID_TER2  MRGID_SOV2  ... ISO_SOV1 ISO_SOV2 ISO_SOV3  \\\n",
-       "0   United States         0.0         0.0  ...      USA      NaN      NaN   \n",
-       "1  United Kingdom         0.0         0.0  ...      GBR      NaN      NaN   \n",
-       "2     New Zealand         0.0         0.0  ...      NZL      NaN      NaN   \n",
-       "3  United Kingdom      8623.0      2149.0  ...      GBR      ARG      NaN   \n",
-       "4          France         0.0         0.0  ...      FRA      NaN      NaN   \n",
-       "\n",
-       "   UN_SOV1  UN_SOV2 UN_SOV3 UN_TER1 UN_TER2  UN_TER3  \\\n",
-       "0      840      NaN     NaN    16.0     NaN      NaN   \n",
-       "1      826      NaN     NaN   654.0     NaN      NaN   \n",
-       "2      554      NaN     NaN   184.0     NaN      NaN   \n",
-       "3      826     32.0     NaN   238.0   238.0      NaN   \n",
-       "4      250      NaN     NaN   258.0     NaN      NaN   \n",
-       "\n",
-       "                                            geometry  \n",
-       "0  POLYGON ((-16216412.543 -2157569.856, -1621650...  \n",
-       "1  POLYGON ((-1089355.142 -974062.004, -1089348.4...  \n",
-       "2  POLYGON ((-15912583.852 -716733.193, -15813064...  \n",
-       "3  POLYGON ((-4061728.309 -6509190.466, -4443979....  \n",
-       "4  MULTIPOLYGON (((-13543804.433 -974376.651, -13...  \n",
-       "\n",
-       "[5 rows x 32 columns]"
-      ]
-     },
-     "execution_count": 10,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "eez = eez.to_crs('ESRI:54009')\n",
     "eez['AREA_KM2']= eez.geometry.area/ 1000000\n",
@@ -418,7 +71,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -427,29 +80,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Allocating 16 GB of heap memory\n",
-      "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_area_mollweide.shp\n",
-      "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_area_mollweide.shx\n",
-      "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_area_mollweide.dbf\n",
-      "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_area_mollweide.prj\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
-    "# Reproject to 4626\n",
+    "# Reproject to 4326\n",
     "!mapshaper-xl 16gb /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_area_mollweide.shp -proj EPSG:4326 -o force /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_area_mollweide.shp"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -533,7 +174,425 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "eez['REGIONS'] = eez['ISO_SOV1'].map(country_to_region)\n",
+    "eez.head(2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "eez.to_file(path_out + \"/administrative/eez_mollweide_regions.shp\", driver=\"ESRI Shapefile\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "eez['REGIONS'].unique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Dissolve by relevant fields: REGIONS\n",
+    "!mapshaper-xl 16gb /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_mollweide_regions.shp -dissolve2 fields=REGIONS -explode -o /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.shp"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "regions = gpd.read_file(path_out + \"/administrative/eez_regions.shp\", driver=\"ESRI Shapefile\")\n",
+    "regions.head(2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "regions.crs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "regions['REGIONS'].unique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "regions['REGIONS'][regions['REGIONS'].isna()] = 'Antartica'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Calculate area of each region\n",
+    "regions['AREA_KM2']= regions.geometry.area/ 1000000"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "regions.to_file(path_out + \"/administrative/eez_regions.shp\", driver=\"ESRI Shapefile\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Reproject to 4326\n",
+    "!mapshaper-xl 16gb /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.shp -proj EPSG:4326 -o force /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.shp"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "regions['REGIONS'].unique()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Extract marine areas"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "eez = gpd.read_file(path_out + \"/administrative/eez_area_mollweide.shp\", driver=\"ESRI Shapefile\")\n",
+    "regions = gpd.read_file(path_out + \"/administrative/eez_regions.shp\", driver=\"ESRI Shapefile\")\n",
+    "hs = gpd.read_file(path_in + \"/high_seas/high_seas.shp\", driver=\"ESRI Shapefile\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create new column \"iso\" that has the field \"ISO_SOV1\" for all rows except those in which ISO_SOV2 and ISO_SOV3 are not null. In such cases concatenate ISO_SOV1, ISO_SOV2 and ISO_SOV3\n",
+    "eez['iso'] = eez['ISO_SOV1']\n",
+    "eez.loc[eez['ISO_SOV2'].notnull(), 'iso'] = eez['ISO_SOV1'] + \";\" + eez['ISO_SOV2']\n",
+    "eez.loc[eez['ISO_SOV3'].notnull(), 'iso'] = eez['ISO_SOV1'] + \";\" + eez['ISO_SOV2'] + \";\" + eez['ISO_SOV3']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "49"
+      ]
+     },
+     "execution_count": 40,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(eez[eez['iso'].str.contains(';')])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(281, 33)"
+      ]
+     },
+     "execution_count": 41,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "eez.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(337, 33)"
+      ]
+     },
+     "execution_count": 42,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Create a mask for rows with multiple values in 'iso_code'\n",
+    "mask = eez['iso'].str.contains(';', na=False)\n",
+    "\n",
+    "# Split the 'iso_code' values and create separate rows only for rows with multiple values\n",
+    "split_rows = eez[mask].copy()\n",
+    "split_rows['iso'] = split_rows['iso'].str.split(';')\n",
+    "split_rows = split_rows.explode('iso')\n",
+    "\n",
+    "# Keep rows with single values in 'iso_code'\n",
+    "single_value_rows = eez[~mask]\n",
+    "\n",
+    "# Concatenate the exploded rows with the single value rows\n",
+    "eez_new = pd.concat([single_value_rows, split_rows], ignore_index=True)\n",
+    "\n",
+    "eez_new.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0"
+      ]
+     },
+     "execution_count": 44,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(eez_new[eez_new['iso'].str.contains(';')])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "iso_country_mapping = {\n",
+    "    'USA': 'United States',\n",
+    "    'GBR': 'United Kingdom',\n",
+    "    'NZL': 'New Zealand',\n",
+    "    'FRA': 'France',\n",
+    "    'WSM': 'Samoa',\n",
+    "    'TON': 'Tonga',\n",
+    "    'CHL': 'Chile',\n",
+    "    'URY': 'Uruguay',\n",
+    "    'PER': 'Peru',\n",
+    "    'BRA': 'Brazil',\n",
+    "    'KIR': 'Kiribati',\n",
+    "    'ARG': 'Argentina',\n",
+    "    'AUS': 'Australia',\n",
+    "    'COM': 'Comoros',\n",
+    "    'MDG': 'Madagascar',\n",
+    "    'ZAF': 'South Africa',\n",
+    "    'MUS': 'Mauritius',\n",
+    "    'VUT': 'Vanuatu',\n",
+    "    'NAM': 'Namibia',\n",
+    "    'TLS': 'Timor-Leste',\n",
+    "    'COG': 'Republic of the Congo',\n",
+    "    'AGO': 'Angola',\n",
+    "    'MOZ': 'Mozambique',\n",
+    "    'KEN': 'Kenya',\n",
+    "    'PNG': 'Papua New Guinea',\n",
+    "    'TZA': 'Tanzania',\n",
+    "    'SLB': 'Solomon Islands',\n",
+    "    'SYC': 'Seychelles',\n",
+    "    'COD': 'Democratic Republic of the Congo',\n",
+    "    'ATG': 'Antigua and Barbuda',\n",
+    "    'NLD': 'Netherlands',\n",
+    "    'PRT': 'Portugal',\n",
+    "    'BHS': 'The Bahamas',\n",
+    "    'BRB': 'Barbados',\n",
+    "    'MEX': 'Mexico',\n",
+    "    'CPV': 'Cape Verde',\n",
+    "    'ESP': 'Spain',\n",
+    "    'PAN': 'Panama',\n",
+    "    'CRI': 'Costa Rica',\n",
+    "    'DMA': 'Dominica',\n",
+    "    'DOM': 'Dominican Republic',\n",
+    "    'GTM': 'Guatemala',\n",
+    "    'DNK': 'Denmark',\n",
+    "    'GMB': 'Gambia',\n",
+    "    'GIB': 'Gibraltar',\n",
+    "    'GRD': 'Grenada',\n",
+    "    'SLE': 'Sierra Leone',\n",
+    "    'ISL': 'Iceland',\n",
+    "    'JAM': 'Jamaica',\n",
+    "    'MRT': 'Mauritania',\n",
+    "    'HTI': 'Haiti',\n",
+    "    'KNA': 'Saint Kitts and Nevis',\n",
+    "    'LCA': 'Saint Lucia',\n",
+    "    'VCT': 'Saint Vincent and the Grenadines',\n",
+    "    'TTO': 'Trinidad and Tobago',\n",
+    "    'SLV': 'El Salvador',\n",
+    "    'BLZ': 'Belize',\n",
+    "    'CUB': 'Cuba',\n",
+    "    'SEN': 'Senegal',\n",
+    "    'VEN': 'Venezuela',\n",
+    "    'CAN': 'Canada',\n",
+    "    'NIC': 'Nicaragua',\n",
+    "    'GUY': 'Guyana',\n",
+    "    'COL': 'Colombia',\n",
+    "    'IRL': 'Ireland',\n",
+    "    'GNB': 'Guinea-Bissau',\n",
+    "    'GIN': 'Guinea',\n",
+    "    'CIV': 'Ivory Coast',\n",
+    "    'LBR': 'Liberia',\n",
+    "    'HND': 'Honduras',\n",
+    "    'ECU': 'Ecuador',\n",
+    "    'ESH': 'Western Sahara',\n",
+    "    'SUR': 'Suriname',\n",
+    "    'MAR': 'Morocco',\n",
+    "    'ARE': 'United Arab Emirates',\n",
+    "    'CYP': 'Cyprus',\n",
+    "    'ERI': 'Eritrea',\n",
+    "    'EGY': 'Egypt',\n",
+    "    'GEO': 'Georgia',\n",
+    "    'IRN': 'Iran',\n",
+    "    'LBN': 'Lebanon',\n",
+    "    'LBY': 'Libya',\n",
+    "    'MLT': 'Malta',\n",
+    "    'OMN': 'Oman',\n",
+    "    'SAU': 'Saudi Arabia',\n",
+    "    'LKA': 'Sri Lanka',\n",
+    "    'SDN': 'Sudan',\n",
+    "    'SYR': 'Syria',\n",
+    "    'TGO': 'Togo',\n",
+    "    'GRC': 'Greece',\n",
+    "    'TUR': 'Turkey',\n",
+    "    'MCO': 'Monaco',\n",
+    "    'TUN': 'Tunisia',\n",
+    "    'MNE': 'Montenegro',\n",
+    "    'ALB': 'Albania',\n",
+    "    'BGR': 'Bulgaria',\n",
+    "    'PSE': 'Palestine',\n",
+    "    'KWT': 'Kuwait',\n",
+    "    'IRQ': 'Iraq',\n",
+    "    'BHR': 'Bahrain',\n",
+    "    'QAT': 'Qatar',\n",
+    "    'YEM': 'Yemen',\n",
+    "    'ISR': 'Israel',\n",
+    "    'JOR': 'Jordan',\n",
+    "    'DJI': 'Djibouti',\n",
+    "    'BGD': 'Bangladesh',\n",
+    "    'NGA': 'Nigeria',\n",
+    "    'CMR': 'Cameroon',\n",
+    "    'STP': 'São Tomé and Príncipe',\n",
+    "    'BIH': 'Bosnia and Herzegovina',\n",
+    "    'MHL': 'Marshall Islands',\n",
+    "    'PLW': 'Palau',\n",
+    "    'PHL': 'Philippines',\n",
+    "    'TWN': 'Taiwan',\n",
+    "    'SGP': 'Singapore',\n",
+    "    'THA': 'Thailand',\n",
+    "    'VNM': 'Vietnam',\n",
+    "    'KOR': 'South Korea',\n",
+    "    'BRN': 'Brunei',\n",
+    "    'PRK': 'North Korea',\n",
+    "    'KHM': 'Cambodia',\n",
+    "    'CHN': 'China',\n",
+    "    'EST': 'Estonia',\n",
+    "    'FIN': 'Finland',\n",
+    "    'SWE': 'Sweden',\n",
+    "    'LTU': 'Lithuania',\n",
+    "    'NOR': 'Norway',\n",
+    "    'BEL': 'Belgium',\n",
+    "    'DEU': 'Germany',\n",
+    "    'LVA': 'Latvia',\n",
+    "    'HRV': 'Croatia',\n",
+    "    'ITA': 'Italy',\n",
+    "    'UKR': 'Ukraine',\n",
+    "    'ROU': 'Romania',\n",
+    "    'JPN': 'Japan',\n",
+    "    'IND': 'India',\n",
+    "    'PAK': 'Pakistan',\n",
+    "    'TKM': 'Turkmenistan',\n",
+    "    'AZE': 'Azerbaijan',\n",
+    "    'KAZ': 'Kazakhstan',\n",
+    "    'MMR': 'Myanmar',\n",
+    "    'POL': 'Poland',\n",
+    "    'BEN': 'Benin',\n",
+    "    'SVN': 'Slovenia',\n",
+    "    'MYS': 'Malaysia',\n",
+    "    'ATA': 'Antarctica',\n",
+    "    'TUV': 'Tuvalu',\n",
+    "    'FJI': 'Fiji',\n",
+    "    'FSM': 'Micronesia',\n",
+    "    'GNQ': 'Equatorial Guinea',\n",
+    "    'MDV': 'Maldives',\n",
+    "    'SOM': 'Somalia',\n",
+    "    'NRU': 'Nauru',\n",
+    "    'GAB': 'Gabon',\n",
+    "    'IDN': 'Indonesia',\n",
+    "    'DZA': 'Algeria',\n",
+    "    'GHA': 'Ghana',\n",
+    "    'RUS': 'Russia'\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 46,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_name(country):\n",
+    "    return iso_country_mapping.get(country, None)\n",
+    "\n",
+    "# Apply the function to create the 'PARENT_ISO' column\n",
+    "eez_new['name_iso'] = eez_new['iso'].apply(get_name)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 47,
    "metadata": {},
    "outputs": [
     {
@@ -568,7 +627,6 @@
        "      <th>MRGID_TER2</th>\n",
        "      <th>MRGID_SOV2</th>\n",
        "      <th>...</th>\n",
-       "      <th>ISO_SOV2</th>\n",
        "      <th>ISO_SOV3</th>\n",
        "      <th>UN_SOV1</th>\n",
        "      <th>UN_SOV2</th>\n",
@@ -577,7 +635,8 @@
        "      <th>UN_TER2</th>\n",
        "      <th>UN_TER3</th>\n",
        "      <th>geometry</th>\n",
-       "      <th>REGIONS</th>\n",
+       "      <th>iso</th>\n",
+       "      <th>name_iso</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
@@ -595,15 +654,15 @@
        "      <td>0.0</td>\n",
        "      <td>...</td>\n",
        "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
        "      <td>840</td>\n",
        "      <td>NaN</td>\n",
        "      <td>NaN</td>\n",
        "      <td>16.0</td>\n",
        "      <td>NaN</td>\n",
        "      <td>NaN</td>\n",
-       "      <td>POLYGON ((-16216412.543 -2157569.856, -1621650...</td>\n",
-       "      <td>North America</td>\n",
+       "      <td>POLYGON ((-166.64112 -17.55527, -166.64194 -17...</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>United States</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
@@ -619,19 +678,19 @@
        "      <td>0.0</td>\n",
        "      <td>...</td>\n",
        "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
        "      <td>826</td>\n",
        "      <td>NaN</td>\n",
        "      <td>NaN</td>\n",
        "      <td>654.0</td>\n",
        "      <td>NaN</td>\n",
        "      <td>NaN</td>\n",
-       "      <td>POLYGON ((-1089355.142 -974062.004, -1089348.4...</td>\n",
-       "      <td>Europe</td>\n",
+       "      <td>POLYGON ((-10.93328 -7.88745, -10.93324 -7.889...</td>\n",
+       "      <td>GBR</td>\n",
+       "      <td>United Kingdom</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
-       "<p>2 rows × 33 columns</p>\n",
+       "<p>2 rows × 34 columns</p>\n",
        "</div>"
       ],
       "text/plain": [
@@ -643,90 +702,161 @@
        "0      2204.0  American Samoa      ASM   United States         0.0   \n",
        "1      2208.0       Ascension      SHN  United Kingdom         0.0   \n",
        "\n",
-       "   MRGID_SOV2  ... ISO_SOV2 ISO_SOV3 UN_SOV1  UN_SOV2  UN_SOV3 UN_TER1  \\\n",
-       "0         0.0  ...      NaN      NaN     840      NaN      NaN    16.0   \n",
-       "1         0.0  ...      NaN      NaN     826      NaN      NaN   654.0   \n",
-       "\n",
-       "  UN_TER2 UN_TER3                                           geometry  \\\n",
-       "0     NaN     NaN  POLYGON ((-16216412.543 -2157569.856, -1621650...   \n",
-       "1     NaN     NaN  POLYGON ((-1089355.142 -974062.004, -1089348.4...   \n",
+       "   MRGID_SOV2  ... ISO_SOV3 UN_SOV1 UN_SOV2  UN_SOV3  UN_TER1 UN_TER2 UN_TER3  \\\n",
+       "0         0.0  ...      NaN     840     NaN      NaN     16.0     NaN     NaN   \n",
+       "1         0.0  ...      NaN     826     NaN      NaN    654.0     NaN     NaN   \n",
        "\n",
-       "         REGIONS  \n",
-       "0  North America  \n",
-       "1         Europe  \n",
+       "                                            geometry  iso        name_iso  \n",
+       "0  POLYGON ((-166.64112 -17.55527, -166.64194 -17...  USA   United States  \n",
+       "1  POLYGON ((-10.93328 -7.88745, -10.93324 -7.889...  GBR  United Kingdom  \n",
        "\n",
-       "[2 rows x 33 columns]"
+       "[2 rows x 34 columns]"
       ]
      },
-     "execution_count": 16,
+     "execution_count": 47,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "eez['REGIONS'] = eez['ISO_SOV1'].map(country_to_region)\n",
-    "eez.head(2)"
+    "eez_new.head(2)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 53,
    "metadata": {},
    "outputs": [],
    "source": [
-    "eez.to_file(path_out + \"/administrative/eez_mollweide_regions.shp\", driver=\"ESRI Shapefile\")"
+    "# List of dictionaries for data in Region_ISO3_PP.txt (list of regions used in the Protected Planet database)\n",
+    "regions_data = [\n",
+    "    {\n",
+    "        'region_iso': 'AS',\n",
+    "        'region_name': 'Asia & Pacific',\n",
+    "        'country_iso_3s': [\n",
+    "            \"AFG\", \"ASM\", \"AUS\", \"BGD\", \"BRN\", \"BTN\", \"CCK\", \"CHN\", \"COK\", \"CXR\", \"FJI\", \"FSM\", \"GUM\", \"HKG\", \"IDN\",\n",
+    "            \"IND\", \"IOT\", \"IRN\", \"JPN\", \"KHM\", \"KIR\", \"KOR\", \"LAO\", \"LKA\", \"MAC\", \"MDV\", \"MHL\", \"MMR\", \"MNG\", \"MNP\",\n",
+    "            \"MYS\", \"NCL\", \"NFK\", \"NIU\", \"NPL\", \"NRU\", \"NZL\", \"PAK\", \"PCN\", \"PHL\", \"PLW\", \"PNG\", \"PRK\", \"PYF\", \"SGP\",\n",
+    "            \"SLB\", \"THA\", \"TKL\", \"TLS\", \"TON\", \"TUV\", \"TWN\", \"VNM\", \"VUT\", \"WLF\", \"WSM\"\n",
+    "        ]\n",
+    "    },\n",
+    "    {\n",
+    "        'region_iso': 'AF',\n",
+    "        'region_name': 'Africa',\n",
+    "        'country_iso_3s': [\n",
+    "            \"AGO\", \"BDI\", \"BEN\", \"BFA\", \"BWA\", \"CAF\", \"CIV\", \"CMR\", \"COD\", \"COG\", \"COM\", \"CPV\", \"DJI\", \"DZA\", \"EGY\",\n",
+    "            \"ERI\", \"ESH\", \"ETH\", \"GAB\", \"GHA\", \"GIN\", \"GMB\", \"GNB\", \"GNQ\", \"KEN\", \"LBR\", \"LBY\", \"LSO\", \"MAR\", \"MDG\",\n",
+    "            \"MLI\", \"MOZ\", \"MRT\", \"MUS\", \"MWI\", \"MYT\", \"NAM\", \"NER\", \"NGA\", \"REU\", \"RWA\", \"SDN\", \"SEN\", \"SHN\", \"SLE\",\n",
+    "            \"SOM\", \"SSD\", \"STP\", \"SWZ\", \"SYC\", \"TCD\", \"TGO\", \"TUN\", \"TZA\", \"UGA\", \"ZAF\", \"ZMB\", \"ZWE\"\n",
+    "        ]\n",
+    "    },\n",
+    "    {\n",
+    "        'region_iso': 'EU',\n",
+    "        'region_name': 'Europe',\n",
+    "        'country_iso_3s': [\n",
+    "            \"ALA\", \"ALB\", \"AND\", \"ARM\", \"AUT\", \"AZE\", \"BEL\", \"BGR\", \"BIH\", \"BLR\", \"CHE\", \"CYP\", \"CZE\", \"DEU\", \"DNK\",\n",
+    "            \"ESP\", \"EST\", \"FIN\", \"FRA\", \"FRO\", \"GBR\", \"GEO\", \"GGY\", \"GIB\", \"GRC\", \"HRV\", \"HUN\", \"IMN\", \"IRL\", \"ISL\",\n",
+    "            \"ISR\", \"ITA\", \"JEY\", \"KAZ\", \"KGZ\", \"LIE\", \"LTU\", \"LUX\", \"LVA\", \"MCO\", \"MDA\", \"MKD\", \"MLT\", \"MNE\", \"NLD\",\n",
+    "            \"NOR\", \"POL\", \"PRT\", \"ROU\", \"RUS\", \"SJM\", \"SMR\", \"SRB\", \"SVK\", \"SVN\", \"SWE\", \"TJK\", \"TKM\", \"TUR\", \"UKR\",\n",
+    "            \"UZB\", \"VAT\"\n",
+    "        ]\n",
+    "    },\n",
+    "    {\n",
+    "        'region_iso': 'SA',\n",
+    "        'region_name': 'Latin America & Caribbean',\n",
+    "        'country_iso_3s': [\n",
+    "            \"ABW\", \"AIA\", \"ARG\", \"ATG\", \"BES\", \"BHS\", \"BLM\", \"BLZ\", \"BMU\", \"BOL\", \"BRA\", \"BRB\", \"CHL\", \"COL\", \"CRI\",\n",
+    "            \"CUB\", \"CUW\", \"CYM\", \"DMA\", \"DOM\", \"ECU\", \"FLK\", \"GLP\", \"GRD\", \"GTM\", \"GUF\", \"GUY\", \"HND\", \"HTI\", \"JAM\",\n",
+    "            \"KNA\", \"LCA\", \"MAF\", \"MEX\", \"MSR\", \"MTQ\", \"NIC\", \"PAN\", \"PER\", \"PRI\", \"PRY\", \"SLV\", \"SUR\", \"SXM\", \"TCA\",\n",
+    "            \"TTO\", \"UMI\", \"URY\", \"VCT\", \"VEN\", \"VGB\", \"VIR\"\n",
+    "        ]\n",
+    "    },\n",
+    "    {\n",
+    "        'region_iso': 'PO',\n",
+    "        'region_name': 'Polar',\n",
+    "        'country_iso_3s': [\n",
+    "            \"ATF\", \"BVT\", \"GRL\", \"HMD\", \"SGS\"\n",
+    "        ]\n",
+    "    },\n",
+    "    {\n",
+    "        'region_iso': 'NA',\n",
+    "        'region_name': 'North America',\n",
+    "        'country_iso_3s': [\n",
+    "            \"CAN\", \"SPM\", \"USA\"\n",
+    "        ]\n",
+    "    },\n",
+    "    {\n",
+    "        'region_iso': 'WA',\n",
+    "        'region_name': 'West Asia',\n",
+    "        'country_iso_3s': [\n",
+    "            \"ARE\", \"BHR\", \"IRQ\", \"JOR\", \"KWT\", \"LBN\", \"OMN\", \"PSE\", \"QAT\", \"SAU\", \"SYR\", \"YEM\"\n",
+    "        ]\n",
+    "    },\n",
+    "    {\n",
+    "        'region_iso': 'AT', # this region is not in the Protected Planet database\n",
+    "        'region_name': 'Antartica',\n",
+    "        'country_iso_3s': [\n",
+    "            \"ATA\"\n",
+    "        ]\n",
+    "    }\n",
+    "]\n",
+    "\n",
+    "# Convert the region data to a dictionary that maps each country to its region name\n",
+    "country_to_region = {}\n",
+    "name_to_region = {}\n",
+    "for region in regions_data:\n",
+    "    for country in region['country_iso_3s']:\n",
+    "        country_to_region[country] = region['region_iso']\n",
+    "        name_to_region[country] = region['region_name']"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 49,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "array(['North America', 'Europe', 'Asia & Pacific',\n",
-       "       'Latin America & Caribbean', 'Africa', 'West Asia', nan],\n",
-       "      dtype=object)"
+       "array(['NA', 'EU', 'AS', 'SA', 'AF', 'WA', 'AT'], dtype=object)"
       ]
      },
-     "execution_count": 18,
+     "execution_count": 49,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "eez['REGIONS'].unique()"
+    "eez_new['region'] = eez_new['iso'].map(country_to_region)\n",
+    "eez_new['region'].unique()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 54,
    "metadata": {},
    "outputs": [
     {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Allocating 16 GB of heap memory\n",
-      "[dissolve2] Removed 127,740 / 218,614 slivers using 0.033+ sqkm variable threshold\n",
-      "[dissolve2] Dissolved 281 features into 7 features\n",
-      "[explode] Exploded 7 features into 83 features\n",
-      "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.shp\n",
-      "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.shx\n",
-      "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.dbf\n",
-      "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.prj\n"
-     ]
+     "data": {
+      "text/plain": [
+       "array(['North America', 'Europe', 'Asia & Pacific',\n",
+       "       'Latin America & Caribbean', 'Africa', 'West Asia', 'Antartica'],\n",
+       "      dtype=object)"
+      ]
+     },
+     "execution_count": 54,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
-    "# Dissolve by relevant fields: REGIONS\n",
-    "!mapshaper-xl 16gb /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_mollweide_regions.shp -dissolve2 fields=REGIONS -explode -o /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.shp"
+    "eez_new['region_name'] = eez_new['iso'].map(name_to_region)\n",
+    "eez_new['region_name'].unique()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 55,
    "metadata": {},
    "outputs": [
     {
@@ -750,177 +880,407 @@
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
-       "      <th>REGIONS</th>\n",
-       "      <th>geometry</th>\n",
+       "      <th>location_id</th>\n",
+       "      <th>location_name</th>\n",
+       "      <th>total_marine_area</th>\n",
+       "      <th>location_type</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>North America</td>\n",
-       "      <td>POLYGON ((-16216412.543 -2157569.856, -1621650...</td>\n",
+       "      <td>AGO</td>\n",
+       "      <td>Angola</td>\n",
+       "      <td>498908.577009</td>\n",
+       "      <td>country</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>North America</td>\n",
-       "      <td>POLYGON ((-15875617.974 972834.674, -15887321....</td>\n",
+       "      <td>ALB</td>\n",
+       "      <td>Albania</td>\n",
+       "      <td>12177.287755</td>\n",
+       "      <td>country</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "         REGIONS                                           geometry\n",
-       "0  North America  POLYGON ((-16216412.543 -2157569.856, -1621650...\n",
-       "1  North America  POLYGON ((-15875617.974 972834.674, -15887321...."
+       "  location_id location_name  total_marine_area location_type\n",
+       "0         AGO        Angola      498908.577009       country\n",
+       "1         ALB       Albania       12177.287755       country"
       ]
      },
-     "execution_count": 21,
+     "execution_count": 55,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "regions = gpd.read_file(path_out + \"/administrative/eez_regions.shp\", driver=\"ESRI Shapefile\")\n",
-    "regions.head(2)"
+    "marine_areas = eez_new.groupby(['iso', 'name_iso']).agg({'AREA_KM2': 'sum'}).reset_index()\n",
+    "marine_areas = marine_areas.rename(columns={'iso': 'location_id', 'name_iso':'location_name', 'AREA_KM2': 'total_marine_area'})\n",
+    "marine_areas['location_type'] = 'country'\n",
+    "marine_areas.head(2)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 57,
    "metadata": {},
    "outputs": [
     {
      "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>location_id</th>\n",
+       "      <th>location_name</th>\n",
+       "      <th>total_marine_area</th>\n",
+       "      <th>location_type</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>AF</td>\n",
+       "      <td>Africa</td>\n",
+       "      <td>1.495538e+07</td>\n",
+       "      <td>region</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>AS</td>\n",
+       "      <td>Asia &amp; Pacific</td>\n",
+       "      <td>5.269208e+07</td>\n",
+       "      <td>region</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
       "text/plain": [
-       "<Projected CRS: ESRI:54009>\n",
-       "Name: World_Mollweide\n",
-       "Axis Info [cartesian]:\n",
-       "- [east]: Easting (metre)\n",
-       "- [north]: Northing (metre)\n",
-       "Area of Use:\n",
-       "- undefined\n",
-       "Coordinate Operation:\n",
-       "- name: unnamed\n",
-       "- method: Mollweide\n",
-       "Datum: World Geodetic System 1984\n",
-       "- Ellipsoid: WGS 84\n",
-       "- Prime Meridian: Greenwich"
+       "  location_id   location_name  total_marine_area location_type\n",
+       "0          AF          Africa       1.495538e+07        region\n",
+       "1          AS  Asia & Pacific       5.269208e+07        region"
       ]
      },
-     "execution_count": 22,
+     "execution_count": 57,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "regions.crs"
+    "regions_areas = eez_new.groupby(['region', 'region_name']).agg({'AREA_KM2': 'sum'}).reset_index()\n",
+    "regions_areas = regions_areas.rename(columns={'region': 'location_id', 'region_name':'location_name', 'AREA_KM2': 'total_marine_area'})\n",
+    "regions_areas['location_type'] = 'region'\n",
+    "regions_areas.head(2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 59,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "global_area = pd.DataFrame({'location_id': ['GLOB'], 'location_name': ['Worldwide'], 'total_marine_area': [361000000], 'location_type': ['worldwide']}) \n",
+    "hs_area = pd.DataFrame({'location_id': ['ABNJ'], 'location_name': ['High Seas'], 'total_marine_area': [hs['area_km2'].values[0]], 'location_type': ['country']})"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 63,
    "metadata": {},
    "outputs": [
     {
      "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>location_id</th>\n",
+       "      <th>location_name</th>\n",
+       "      <th>total_marine_area</th>\n",
+       "      <th>location_type</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>AGO</td>\n",
+       "      <td>Angola</td>\n",
+       "      <td>4.989086e+05</td>\n",
+       "      <td>country</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>ALB</td>\n",
+       "      <td>Albania</td>\n",
+       "      <td>1.217729e+04</td>\n",
+       "      <td>country</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>ARE</td>\n",
+       "      <td>United Arab Emirates</td>\n",
+       "      <td>5.821593e+04</td>\n",
+       "      <td>country</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>ARG</td>\n",
+       "      <td>Argentina</td>\n",
+       "      <td>2.897629e+06</td>\n",
+       "      <td>country</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>ATA</td>\n",
+       "      <td>Antarctica</td>\n",
+       "      <td>8.842860e+06</td>\n",
+       "      <td>country</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>162</th>\n",
+       "      <td>NA</td>\n",
+       "      <td>North America</td>\n",
+       "      <td>1.791826e+07</td>\n",
+       "      <td>region</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>163</th>\n",
+       "      <td>SA</td>\n",
+       "      <td>Latin America &amp; Caribbean</td>\n",
+       "      <td>2.107800e+07</td>\n",
+       "      <td>region</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>164</th>\n",
+       "      <td>WA</td>\n",
+       "      <td>West Asia</td>\n",
+       "      <td>1.456969e+06</td>\n",
+       "      <td>region</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>165</th>\n",
+       "      <td>GLOB</td>\n",
+       "      <td>Worldwide</td>\n",
+       "      <td>3.610000e+08</td>\n",
+       "      <td>worldwide</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>166</th>\n",
+       "      <td>ABNJ</td>\n",
+       "      <td>High Seas</td>\n",
+       "      <td>2.128814e+08</td>\n",
+       "      <td>country</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>167 rows × 4 columns</p>\n",
+       "</div>"
+      ],
       "text/plain": [
-       "array(['North America', 'Europe', 'Asia & Pacific',\n",
-       "       'Latin America & Caribbean', 'Africa', 'West Asia', nan],\n",
-       "      dtype=object)"
+       "    location_id              location_name  total_marine_area location_type\n",
+       "0           AGO                     Angola       4.989086e+05       country\n",
+       "1           ALB                    Albania       1.217729e+04       country\n",
+       "2           ARE       United Arab Emirates       5.821593e+04       country\n",
+       "3           ARG                  Argentina       2.897629e+06       country\n",
+       "4           ATA                 Antarctica       8.842860e+06       country\n",
+       "..          ...                        ...                ...           ...\n",
+       "162          NA              North America       1.791826e+07        region\n",
+       "163          SA  Latin America & Caribbean       2.107800e+07        region\n",
+       "164          WA                  West Asia       1.456969e+06        region\n",
+       "165        GLOB                  Worldwide       3.610000e+08     worldwide\n",
+       "166        ABNJ                  High Seas       2.128814e+08       country\n",
+       "\n",
+       "[167 rows x 4 columns]"
       ]
      },
-     "execution_count": 23,
+     "execution_count": 63,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "regions['REGIONS'].unique()"
+    "# concat gl_df and hs_df to marine_areas\n",
+    "marine_areas2 = pd.concat([marine_areas, regions_areas, global_area, hs_area], ignore_index=True)\n",
+    "marine_areas2\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 64,
    "metadata": {},
    "outputs": [
     {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/var/folders/98/0pdnjc5s29x2pnzl293pw7hr0000gn/T/ipykernel_27590/1686611470.py:1: SettingWithCopyWarning: \n",
-      "A value is trying to be set on a copy of a slice from a DataFrame\n",
-      "\n",
-      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
-      "  regions['REGIONS'][regions['REGIONS'].isna()] = 'Antartic'\n"
-     ]
+     "data": {
+      "text/plain": [
+       "array(['country', 'region', 'worldwide'], dtype=object)"
+      ]
+     },
+     "execution_count": 64,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
-    "regions['REGIONS'][regions['REGIONS'].isna()] = 'Antartica'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 28,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Calculate area of each region\n",
-    "regions['AREA_KM2']= regions.geometry.area/ 1000000"
+    "marine_areas2['location_type'].unique()"
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": 29,
+   "cell_type": "markdown",
    "metadata": {},
-   "outputs": [],
    "source": [
-    "regions.to_file(path_out + \"/administrative/eez_regions.shp\", driver=\"ESRI Shapefile\")"
+    "### Clean WDPA dataset"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Allocating 16 GB of heap memory\n",
-      "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.shp\n",
-      "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.shx\n",
-      "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.dbf\n",
-      "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.prj\n"
+     "ename": "KeyboardInterrupt",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
+      "\u001b[1;32m/Users/sofia/Documents/Repos/skytruth-30x30/data/notebooks/layers.ipynb Cell 42\u001b[0m line \u001b[0;36m2\n\u001b[1;32m      <a href='vscode-notebook-cell:/Users/sofia/Documents/Repos/skytruth-30x30/data/notebooks/layers.ipynb#X56sZmlsZQ%3D%3D?line=0'>1</a>\u001b[0m \u001b[39m# Read WDPA data\u001b[39;00m\n\u001b[0;32m----> <a href='vscode-notebook-cell:/Users/sofia/Documents/Repos/skytruth-30x30/data/notebooks/layers.ipynb#X56sZmlsZQ%3D%3D?line=1'>2</a>\u001b[0m poly1 \u001b[39m=\u001b[39m gpd\u001b[39m.\u001b[39;49mread_file(path_in \u001b[39m+\u001b[39;49m \u001b[39m\"\u001b[39;49m\u001b[39m/WDPA_WDOECM_Sep2023_Public_marine_shp/WDPA_WDOECM_Sep2023_Public_marine_shp_0/WDPA_WDOECM_Sep2023_Public_marine_shp-polygons.shp\u001b[39;49m\u001b[39m\"\u001b[39;49m)\n\u001b[1;32m      <a href='vscode-notebook-cell:/Users/sofia/Documents/Repos/skytruth-30x30/data/notebooks/layers.ipynb#X56sZmlsZQ%3D%3D?line=2'>3</a>\u001b[0m point1 \u001b[39m=\u001b[39m gpd\u001b[39m.\u001b[39mread_file(path_in \u001b[39m+\u001b[39m \u001b[39m\"\u001b[39m\u001b[39m/WDPA_WDOECM_Sep2023_Public_marine_shp/WDPA_WDOECM_Sep2023_Public_marine_shp_0/WDPA_WDOECM_Sep2023_Public_marine_shp-points.shp\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m      <a href='vscode-notebook-cell:/Users/sofia/Documents/Repos/skytruth-30x30/data/notebooks/layers.ipynb#X56sZmlsZQ%3D%3D?line=3'>4</a>\u001b[0m poly2 \u001b[39m=\u001b[39m gpd\u001b[39m.\u001b[39mread_file(path_in \u001b[39m+\u001b[39m \u001b[39m\"\u001b[39m\u001b[39m/WDPA_WDOECM_Sep2023_Public_marine_shp/WDPA_WDOECM_Sep2023_Public_marine_shp_1/WDPA_WDOECM_Sep2023_Public_marine_shp-polygons.shp\u001b[39m\u001b[39m\"\u001b[39m)\n",
+      "File \u001b[0;32m~/mambaforge/envs/skytruth/lib/python3.11/site-packages/geopandas/io/file.py:281\u001b[0m, in \u001b[0;36m_read_file\u001b[0;34m(filename, bbox, mask, rows, engine, **kwargs)\u001b[0m\n\u001b[1;32m    278\u001b[0m     \u001b[39melse\u001b[39;00m:\n\u001b[1;32m    279\u001b[0m         path_or_bytes \u001b[39m=\u001b[39m filename\n\u001b[0;32m--> 281\u001b[0m     \u001b[39mreturn\u001b[39;00m _read_file_fiona(\n\u001b[1;32m    282\u001b[0m         path_or_bytes, from_bytes, bbox\u001b[39m=\u001b[39;49mbbox, mask\u001b[39m=\u001b[39;49mmask, rows\u001b[39m=\u001b[39;49mrows, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs\n\u001b[1;32m    283\u001b[0m     )\n\u001b[1;32m    285\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m    286\u001b[0m     \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39munknown engine \u001b[39m\u001b[39m'\u001b[39m\u001b[39m{\u001b[39;00mengine\u001b[39m}\u001b[39;00m\u001b[39m'\u001b[39m\u001b[39m\"\u001b[39m)\n",
+      "File \u001b[0;32m~/mambaforge/envs/skytruth/lib/python3.11/site-packages/geopandas/io/file.py:379\u001b[0m, in \u001b[0;36m_read_file_fiona\u001b[0;34m(path_or_bytes, from_bytes, bbox, mask, rows, where, **kwargs)\u001b[0m\n\u001b[1;32m    375\u001b[0m     df \u001b[39m=\u001b[39m pd\u001b[39m.\u001b[39mDataFrame(\n\u001b[1;32m    376\u001b[0m         [record[\u001b[39m\"\u001b[39m\u001b[39mproperties\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39mfor\u001b[39;00m record \u001b[39min\u001b[39;00m f_filt], columns\u001b[39m=\u001b[39mcolumns\n\u001b[1;32m    377\u001b[0m     )\n\u001b[1;32m    378\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m--> 379\u001b[0m     df \u001b[39m=\u001b[39m GeoDataFrame\u001b[39m.\u001b[39;49mfrom_features(\n\u001b[1;32m    380\u001b[0m         f_filt, crs\u001b[39m=\u001b[39;49mcrs, columns\u001b[39m=\u001b[39;49mcolumns \u001b[39m+\u001b[39;49m [\u001b[39m\"\u001b[39;49m\u001b[39mgeometry\u001b[39;49m\u001b[39m\"\u001b[39;49m]\n\u001b[1;32m    381\u001b[0m     )\n\u001b[1;32m    382\u001b[0m \u001b[39mfor\u001b[39;00m k \u001b[39min\u001b[39;00m datetime_fields:\n\u001b[1;32m    383\u001b[0m     as_dt \u001b[39m=\u001b[39m pd\u001b[39m.\u001b[39mto_datetime(df[k], errors\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mignore\u001b[39m\u001b[39m\"\u001b[39m)\n",
+      "File \u001b[0;32m~/mambaforge/envs/skytruth/lib/python3.11/site-packages/geopandas/geodataframe.py:635\u001b[0m, in \u001b[0;36mGeoDataFrame.from_features\u001b[0;34m(cls, features, crs, columns)\u001b[0m\n\u001b[1;32m    632\u001b[0m     features_lst \u001b[39m=\u001b[39m features\n\u001b[1;32m    634\u001b[0m rows \u001b[39m=\u001b[39m []\n\u001b[0;32m--> 635\u001b[0m \u001b[39mfor\u001b[39;00m feature \u001b[39min\u001b[39;00m features_lst:\n\u001b[1;32m    636\u001b[0m     \u001b[39m# load geometry\u001b[39;00m\n\u001b[1;32m    637\u001b[0m     \u001b[39mif\u001b[39;00m \u001b[39mhasattr\u001b[39m(feature, \u001b[39m\"\u001b[39m\u001b[39m__geo_interface__\u001b[39m\u001b[39m\"\u001b[39m):\n\u001b[1;32m    638\u001b[0m         feature \u001b[39m=\u001b[39m feature\u001b[39m.\u001b[39m__geo_interface__\n",
+      "File \u001b[0;32mfiona/ogrext.pyx:1739\u001b[0m, in \u001b[0;36mfiona.ogrext.Iterator.__next__\u001b[0;34m()\u001b[0m\n",
+      "File \u001b[0;32mfiona/ogrext.pyx:389\u001b[0m, in \u001b[0;36mfiona.ogrext.FeatureBuilder.build\u001b[0;34m()\u001b[0m\n",
+      "File \u001b[0;32mfiona/_geometry.pyx:193\u001b[0m, in \u001b[0;36mfiona._geometry.GeomBuilder.build_from_feature\u001b[0;34m()\u001b[0m\n",
+      "File \u001b[0;32mfiona/_geometry.pyx:249\u001b[0m, in \u001b[0;36mfiona._geometry.GeomBuilder.build\u001b[0;34m()\u001b[0m\n",
+      "File \u001b[0;32mfiona/_geometry.pyx:169\u001b[0m, in \u001b[0;36mfiona._geometry.GeomBuilder._buildMultiPolygon\u001b[0;34m()\u001b[0m\n",
+      "File \u001b[0;32mfiona/_geometry.pyx:152\u001b[0m, in \u001b[0;36mfiona._geometry.GeomBuilder._buildParts\u001b[0;34m()\u001b[0m\n",
+      "File \u001b[0;32mfiona/_geometry.pyx:243\u001b[0m, in \u001b[0;36mfiona._geometry.GeomBuilder.build\u001b[0;34m()\u001b[0m\n",
+      "File \u001b[0;32mfiona/_geometry.pyx:157\u001b[0m, in \u001b[0;36mfiona._geometry.GeomBuilder._buildPolygon\u001b[0;34m()\u001b[0m\n",
+      "File \u001b[0;32mfiona/_geometry.pyx:152\u001b[0m, in \u001b[0;36mfiona._geometry.GeomBuilder._buildParts\u001b[0;34m()\u001b[0m\n",
+      "File \u001b[0;32mfiona/_geometry.pyx:259\u001b[0m, in \u001b[0;36mfiona._geometry.GeomBuilder.build\u001b[0;34m()\u001b[0m\n",
+      "File \u001b[0;32m~/mambaforge/envs/skytruth/lib/python3.11/site-packages/fiona/model.py:201\u001b[0m, in \u001b[0;36mGeometry.from_dict\u001b[0;34m(cls, ob, **kwargs)\u001b[0m\n\u001b[1;32m    196\u001b[0m     \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_delegate \u001b[39m=\u001b[39m _Geometry(\n\u001b[1;32m    197\u001b[0m         coordinates\u001b[39m=\u001b[39mcoordinates, \u001b[39mtype\u001b[39m\u001b[39m=\u001b[39m\u001b[39mtype\u001b[39m, geometries\u001b[39m=\u001b[39mgeometries\n\u001b[1;32m    198\u001b[0m     )\n\u001b[1;32m    199\u001b[0m     \u001b[39msuper\u001b[39m()\u001b[39m.\u001b[39m\u001b[39m__init__\u001b[39m(\u001b[39m*\u001b[39m\u001b[39m*\u001b[39mdata)\n\u001b[0;32m--> 201\u001b[0m \u001b[39m@classmethod\u001b[39m\n\u001b[1;32m    202\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mfrom_dict\u001b[39m(\u001b[39mcls\u001b[39m, ob\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs):\n\u001b[1;32m    203\u001b[0m     \u001b[39mif\u001b[39;00m ob \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m    204\u001b[0m         data \u001b[39m=\u001b[39m \u001b[39mdict\u001b[39m(\u001b[39mgetattr\u001b[39m(ob, \u001b[39m\"\u001b[39m\u001b[39m__geo_interface__\u001b[39m\u001b[39m\"\u001b[39m, ob))\n",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
      ]
     }
    ],
    "source": [
-    "# Reproject to 4626\n",
-    "!mapshaper-xl 16gb /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.shp -proj EPSG:4326 -o force /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.shp"
+    "# Read WDPA data\n",
+    "poly1 = gpd.read_file(path_in + \"/WDPA_WDOECM_Sep2023_Public_marine_shp/WDPA_WDOECM_Sep2023_Public_marine_shp_0/WDPA_WDOECM_Sep2023_Public_marine_shp-polygons.shp\")\n",
+    "point1 = gpd.read_file(path_in + \"/WDPA_WDOECM_Sep2023_Public_marine_shp/WDPA_WDOECM_Sep2023_Public_marine_shp_0/WDPA_WDOECM_Sep2023_Public_marine_shp-points.shp\")\n",
+    "poly2 = gpd.read_file(path_in + \"/WDPA_WDOECM_Sep2023_Public_marine_shp/WDPA_WDOECM_Sep2023_Public_marine_shp_1/WDPA_WDOECM_Sep2023_Public_marine_shp-polygons.shp\")\n",
+    "point2 = gpd.read_file(path_in + \"/WDPA_WDOECM_Sep2023_Public_marine_shp/WDPA_WDOECM_Sep2023_Public_marine_shp_1/WDPA_WDOECM_Sep2023_Public_marine_shp-points.shp\")\n",
+    "poly3 = gpd.read_file(path_in + \"/WDPA_WDOECM_Sep2023_Public_marine_shp/WDPA_WDOECM_Sep2023_Public_marine_shp_2/WDPA_WDOECM_Sep2023_Public_marine_shp-polygons.shp\")\n",
+    "point3 = gpd.read_file(path_in + \"/WDPA_WDOECM_Sep2023_Public_marine_shp/WDPA_WDOECM_Sep2023_Public_marine_shp_2/WDPA_WDOECM_Sep2023_Public_marine_shp-points.shp\")\n",
+    "dataframes = [poly1, point1, poly2, point2, poly3, point3]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Convert points to polygons and merge all wdpa in one dataset**"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "array(['North America', 'Europe', 'Asia & Pacific',\n",
-       "       'Latin America & Caribbean', 'Africa', 'West Asia', 'Antarctica'],\n",
-       "      dtype=object)"
+       "18613"
       ]
      },
-     "execution_count": 32,
      "metadata": {},
-     "output_type": "execute_result"
+     "output_type": "display_data"
     }
    ],
    "source": [
-    "regions['REGIONS'].unique()"
+    "# Calculate radius based on REP_AREA\n",
+    "def calculate_radius(rep_area):\n",
+    "    return (rep_area / 3.14159265358979323846) ** 0.5\n",
+    "\n",
+    "# Iterate through the list and process the desired dataframes\n",
+    "for idx in [1, 3, 5]:\n",
+    "    # Get the dataframe at the specified index\n",
+    "    gdf = dataframes[idx]\n",
+    "\n",
+    "    # Reproject in Mollweide\n",
+    "    gdf = gdf.to_crs('ESRI:54009')\n",
+    "\n",
+    "    # Transform the reported area from square kilometers to square meters\n",
+    "    gdf['REP_AREA_m'] = gdf['REP_AREA'] * 1000000\n",
+    "\n",
+    "    # Create the \"radius\" column by applying the calculate_radius function to the \"REP_AREA\" column\n",
+    "    gdf['radius'] = gdf['REP_AREA_m'].apply(calculate_radius)\n",
+    "\n",
+    "    # Create buffers around the points using the \"radius\" column\n",
+    "    gdf_buffered = gdf.copy()\n",
+    "    gdf_buffered['geometry'] = gdf.apply(lambda row: row.geometry.buffer(row['radius']), axis=1)\n",
+    "\n",
+    "    # Reproject back to WGS84\n",
+    "    gdf_buffered = gdf_buffered.to_crs('EPSG:4326')\n",
+    "\n",
+    "    # Remove rows with invalid geometries\n",
+    "    gdf_buffered = gdf_buffered[gdf_buffered['geometry'].is_valid]\n",
+    "    \n",
+    "    # Update the original dataframe with the buffered data\n",
+    "    dataframes[idx] = gdf_buffered\n",
+    "\n",
+    "# Merge all dataframes\n",
+    "merged_mpa_all = pd.concat(dataframes)\n",
+    "len(merged_mpa_all)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Save the wdpa dataframe as a shapefile\n",
+    "merged_mpa_all.to_file(path_out + \"/wdpa/merged_wdpa_all.shp\")"
    ]
   }
  ],
diff --git a/data/notebooks/location_areas.ipynb b/data/notebooks/location_areas.ipynb
new file mode 100644
index 00000000..c9c933d7
--- /dev/null
+++ b/data/notebooks/location_areas.ipynb
@@ -0,0 +1,574 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import geopandas as gpd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "path_in = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/raw/\"\n",
+    "path_out = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/processed/\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Create locations table"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "eez = gpd.read_file(path_out + \"/administrative/eez_area_mollweide.shp\")\n",
+    "regions = gpd.read_file(path_out + \"/administrative/eez_regions.shp\")\n",
+    "hs = gpd.read_file(path_in + \"/high_seas/high_seas.shp\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create new column \"iso\" that has the field \"ISO_SOV1\" for all rows except those in which ISO_SOV2 and ISO_SOV3 are not null. In such cases concatenate ISO_SOV1, ISO_SOV2 and ISO_SOV3\n",
+    "eez['iso'] = eez['ISO_SOV1']\n",
+    "eez.loc[eez['ISO_SOV2'].notnull(), 'iso'] = eez['ISO_SOV1'] + \";\" + eez['ISO_SOV2']\n",
+    "eez.loc[eez['ISO_SOV3'].notnull(), 'iso'] = eez['ISO_SOV1'] + \";\" + eez['ISO_SOV2'] + \";\" + eez['ISO_SOV3']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(337, 33)"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Create a mask for rows with multiple values in 'iso_code'\n",
+    "mask = eez['iso'].str.contains(';', na=False)\n",
+    "\n",
+    "# Split the 'iso_code' values and create separate rows only for rows with multiple values\n",
+    "split_rows = eez[mask].copy()\n",
+    "split_rows['iso'] = split_rows['iso'].str.split(';')\n",
+    "split_rows = split_rows.explode('iso')\n",
+    "\n",
+    "# Keep rows with single values in 'iso_code'\n",
+    "single_value_rows = eez[~mask]\n",
+    "\n",
+    "# Concatenate the exploded rows with the single value rows\n",
+    "eez_new = pd.concat([single_value_rows, split_rows], ignore_index=True)\n",
+    "\n",
+    "eez_new.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "iso_country_mapping = {\n",
+    "    'USA': 'United States',\n",
+    "    'GBR': 'United Kingdom',\n",
+    "    'NZL': 'New Zealand',\n",
+    "    'FRA': 'France',\n",
+    "    'WSM': 'Samoa',\n",
+    "    'TON': 'Tonga',\n",
+    "    'CHL': 'Chile',\n",
+    "    'URY': 'Uruguay',\n",
+    "    'PER': 'Peru',\n",
+    "    'BRA': 'Brazil',\n",
+    "    'KIR': 'Kiribati',\n",
+    "    'ARG': 'Argentina',\n",
+    "    'AUS': 'Australia',\n",
+    "    'COM': 'Comoros',\n",
+    "    'MDG': 'Madagascar',\n",
+    "    'ZAF': 'South Africa',\n",
+    "    'MUS': 'Mauritius',\n",
+    "    'VUT': 'Vanuatu',\n",
+    "    'NAM': 'Namibia',\n",
+    "    'TLS': 'Timor-Leste',\n",
+    "    'COG': 'Republic of the Congo',\n",
+    "    'AGO': 'Angola',\n",
+    "    'MOZ': 'Mozambique',\n",
+    "    'KEN': 'Kenya',\n",
+    "    'PNG': 'Papua New Guinea',\n",
+    "    'TZA': 'Tanzania',\n",
+    "    'SLB': 'Solomon Islands',\n",
+    "    'SYC': 'Seychelles',\n",
+    "    'COD': 'Democratic Republic of the Congo',\n",
+    "    'ATG': 'Antigua and Barbuda',\n",
+    "    'NLD': 'Netherlands',\n",
+    "    'PRT': 'Portugal',\n",
+    "    'BHS': 'The Bahamas',\n",
+    "    'BRB': 'Barbados',\n",
+    "    'MEX': 'Mexico',\n",
+    "    'CPV': 'Cape Verde',\n",
+    "    'ESP': 'Spain',\n",
+    "    'PAN': 'Panama',\n",
+    "    'CRI': 'Costa Rica',\n",
+    "    'DMA': 'Dominica',\n",
+    "    'DOM': 'Dominican Republic',\n",
+    "    'GTM': 'Guatemala',\n",
+    "    'DNK': 'Denmark',\n",
+    "    'GMB': 'Gambia',\n",
+    "    'GIB': 'Gibraltar',\n",
+    "    'GRD': 'Grenada',\n",
+    "    'SLE': 'Sierra Leone',\n",
+    "    'ISL': 'Iceland',\n",
+    "    'JAM': 'Jamaica',\n",
+    "    'MRT': 'Mauritania',\n",
+    "    'HTI': 'Haiti',\n",
+    "    'KNA': 'Saint Kitts and Nevis',\n",
+    "    'LCA': 'Saint Lucia',\n",
+    "    'VCT': 'Saint Vincent and the Grenadines',\n",
+    "    'TTO': 'Trinidad and Tobago',\n",
+    "    'SLV': 'El Salvador',\n",
+    "    'BLZ': 'Belize',\n",
+    "    'CUB': 'Cuba',\n",
+    "    'SEN': 'Senegal',\n",
+    "    'VEN': 'Venezuela',\n",
+    "    'CAN': 'Canada',\n",
+    "    'NIC': 'Nicaragua',\n",
+    "    'GUY': 'Guyana',\n",
+    "    'COL': 'Colombia',\n",
+    "    'IRL': 'Ireland',\n",
+    "    'GNB': 'Guinea-Bissau',\n",
+    "    'GIN': 'Guinea',\n",
+    "    'CIV': 'Ivory Coast',\n",
+    "    'LBR': 'Liberia',\n",
+    "    'HND': 'Honduras',\n",
+    "    'ECU': 'Ecuador',\n",
+    "    'ESH': 'Western Sahara',\n",
+    "    'SUR': 'Suriname',\n",
+    "    'MAR': 'Morocco',\n",
+    "    'ARE': 'United Arab Emirates',\n",
+    "    'CYP': 'Cyprus',\n",
+    "    'ERI': 'Eritrea',\n",
+    "    'EGY': 'Egypt',\n",
+    "    'GEO': 'Georgia',\n",
+    "    'IRN': 'Iran',\n",
+    "    'LBN': 'Lebanon',\n",
+    "    'LBY': 'Libya',\n",
+    "    'MLT': 'Malta',\n",
+    "    'OMN': 'Oman',\n",
+    "    'SAU': 'Saudi Arabia',\n",
+    "    'LKA': 'Sri Lanka',\n",
+    "    'SDN': 'Sudan',\n",
+    "    'SYR': 'Syria',\n",
+    "    'TGO': 'Togo',\n",
+    "    'GRC': 'Greece',\n",
+    "    'TUR': 'Turkey',\n",
+    "    'MCO': 'Monaco',\n",
+    "    'TUN': 'Tunisia',\n",
+    "    'MNE': 'Montenegro',\n",
+    "    'ALB': 'Albania',\n",
+    "    'BGR': 'Bulgaria',\n",
+    "    'PSE': 'Palestine',\n",
+    "    'KWT': 'Kuwait',\n",
+    "    'IRQ': 'Iraq',\n",
+    "    'BHR': 'Bahrain',\n",
+    "    'QAT': 'Qatar',\n",
+    "    'YEM': 'Yemen',\n",
+    "    'ISR': 'Israel',\n",
+    "    'JOR': 'Jordan',\n",
+    "    'DJI': 'Djibouti',\n",
+    "    'BGD': 'Bangladesh',\n",
+    "    'NGA': 'Nigeria',\n",
+    "    'CMR': 'Cameroon',\n",
+    "    'STP': 'São Tomé and Príncipe',\n",
+    "    'BIH': 'Bosnia and Herzegovina',\n",
+    "    'MHL': 'Marshall Islands',\n",
+    "    'PLW': 'Palau',\n",
+    "    'PHL': 'Philippines',\n",
+    "    'TWN': 'Taiwan',\n",
+    "    'SGP': 'Singapore',\n",
+    "    'THA': 'Thailand',\n",
+    "    'VNM': 'Vietnam',\n",
+    "    'KOR': 'South Korea',\n",
+    "    'BRN': 'Brunei',\n",
+    "    'PRK': 'North Korea',\n",
+    "    'KHM': 'Cambodia',\n",
+    "    'CHN': 'China',\n",
+    "    'EST': 'Estonia',\n",
+    "    'FIN': 'Finland',\n",
+    "    'SWE': 'Sweden',\n",
+    "    'LTU': 'Lithuania',\n",
+    "    'NOR': 'Norway',\n",
+    "    'BEL': 'Belgium',\n",
+    "    'DEU': 'Germany',\n",
+    "    'LVA': 'Latvia',\n",
+    "    'HRV': 'Croatia',\n",
+    "    'ITA': 'Italy',\n",
+    "    'UKR': 'Ukraine',\n",
+    "    'ROU': 'Romania',\n",
+    "    'JPN': 'Japan',\n",
+    "    'IND': 'India',\n",
+    "    'PAK': 'Pakistan',\n",
+    "    'TKM': 'Turkmenistan',\n",
+    "    'AZE': 'Azerbaijan',\n",
+    "    'KAZ': 'Kazakhstan',\n",
+    "    'MMR': 'Myanmar',\n",
+    "    'POL': 'Poland',\n",
+    "    'BEN': 'Benin',\n",
+    "    'SVN': 'Slovenia',\n",
+    "    'MYS': 'Malaysia',\n",
+    "    'ATA': 'Antarctica',\n",
+    "    'TUV': 'Tuvalu',\n",
+    "    'FJI': 'Fiji',\n",
+    "    'FSM': 'Micronesia',\n",
+    "    'GNQ': 'Equatorial Guinea',\n",
+    "    'MDV': 'Maldives',\n",
+    "    'SOM': 'Somalia',\n",
+    "    'NRU': 'Nauru',\n",
+    "    'GAB': 'Gabon',\n",
+    "    'IDN': 'Indonesia',\n",
+    "    'DZA': 'Algeria',\n",
+    "    'GHA': 'Ghana',\n",
+    "    'RUS': 'Russia'\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_name(country):\n",
+    "    return iso_country_mapping.get(country, None)\n",
+    "\n",
+    "# Apply the function to create the 'PARENT_ISO' column\n",
+    "eez_new['name_iso'] = eez_new['iso'].apply(get_name)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# List of dictionaries for data in Region_ISO3_PP.txt (list of regions used in the Protected Planet database)\n",
+    "regions_data = [\n",
+    "    {\n",
+    "        'region_iso': 'AS',\n",
+    "        'region_name': 'Asia & Pacific',\n",
+    "        'country_iso_3s': [\n",
+    "            \"AFG\", \"ASM\", \"AUS\", \"BGD\", \"BRN\", \"BTN\", \"CCK\", \"CHN\", \"COK\", \"CXR\", \"FJI\", \"FSM\", \"GUM\", \"HKG\", \"IDN\",\n",
+    "            \"IND\", \"IOT\", \"IRN\", \"JPN\", \"KHM\", \"KIR\", \"KOR\", \"LAO\", \"LKA\", \"MAC\", \"MDV\", \"MHL\", \"MMR\", \"MNG\", \"MNP\",\n",
+    "            \"MYS\", \"NCL\", \"NFK\", \"NIU\", \"NPL\", \"NRU\", \"NZL\", \"PAK\", \"PCN\", \"PHL\", \"PLW\", \"PNG\", \"PRK\", \"PYF\", \"SGP\",\n",
+    "            \"SLB\", \"THA\", \"TKL\", \"TLS\", \"TON\", \"TUV\", \"TWN\", \"VNM\", \"VUT\", \"WLF\", \"WSM\"\n",
+    "        ]\n",
+    "    },\n",
+    "    {\n",
+    "        'region_iso': 'AF',\n",
+    "        'region_name': 'Africa',\n",
+    "        'country_iso_3s': [\n",
+    "            \"AGO\", \"BDI\", \"BEN\", \"BFA\", \"BWA\", \"CAF\", \"CIV\", \"CMR\", \"COD\", \"COG\", \"COM\", \"CPV\", \"DJI\", \"DZA\", \"EGY\",\n",
+    "            \"ERI\", \"ESH\", \"ETH\", \"GAB\", \"GHA\", \"GIN\", \"GMB\", \"GNB\", \"GNQ\", \"KEN\", \"LBR\", \"LBY\", \"LSO\", \"MAR\", \"MDG\",\n",
+    "            \"MLI\", \"MOZ\", \"MRT\", \"MUS\", \"MWI\", \"MYT\", \"NAM\", \"NER\", \"NGA\", \"REU\", \"RWA\", \"SDN\", \"SEN\", \"SHN\", \"SLE\",\n",
+    "            \"SOM\", \"SSD\", \"STP\", \"SWZ\", \"SYC\", \"TCD\", \"TGO\", \"TUN\", \"TZA\", \"UGA\", \"ZAF\", \"ZMB\", \"ZWE\"\n",
+    "        ]\n",
+    "    },\n",
+    "    {\n",
+    "        'region_iso': 'EU',\n",
+    "        'region_name': 'Europe',\n",
+    "        'country_iso_3s': [\n",
+    "            \"ALA\", \"ALB\", \"AND\", \"ARM\", \"AUT\", \"AZE\", \"BEL\", \"BGR\", \"BIH\", \"BLR\", \"CHE\", \"CYP\", \"CZE\", \"DEU\", \"DNK\",\n",
+    "            \"ESP\", \"EST\", \"FIN\", \"FRA\", \"FRO\", \"GBR\", \"GEO\", \"GGY\", \"GIB\", \"GRC\", \"HRV\", \"HUN\", \"IMN\", \"IRL\", \"ISL\",\n",
+    "            \"ISR\", \"ITA\", \"JEY\", \"KAZ\", \"KGZ\", \"LIE\", \"LTU\", \"LUX\", \"LVA\", \"MCO\", \"MDA\", \"MKD\", \"MLT\", \"MNE\", \"NLD\",\n",
+    "            \"NOR\", \"POL\", \"PRT\", \"ROU\", \"RUS\", \"SJM\", \"SMR\", \"SRB\", \"SVK\", \"SVN\", \"SWE\", \"TJK\", \"TKM\", \"TUR\", \"UKR\",\n",
+    "            \"UZB\", \"VAT\"\n",
+    "        ]\n",
+    "    },\n",
+    "    {\n",
+    "        'region_iso': 'SA',\n",
+    "        'region_name': 'Latin America & Caribbean',\n",
+    "        'country_iso_3s': [\n",
+    "            \"ABW\", \"AIA\", \"ARG\", \"ATG\", \"BES\", \"BHS\", \"BLM\", \"BLZ\", \"BMU\", \"BOL\", \"BRA\", \"BRB\", \"CHL\", \"COL\", \"CRI\",\n",
+    "            \"CUB\", \"CUW\", \"CYM\", \"DMA\", \"DOM\", \"ECU\", \"FLK\", \"GLP\", \"GRD\", \"GTM\", \"GUF\", \"GUY\", \"HND\", \"HTI\", \"JAM\",\n",
+    "            \"KNA\", \"LCA\", \"MAF\", \"MEX\", \"MSR\", \"MTQ\", \"NIC\", \"PAN\", \"PER\", \"PRI\", \"PRY\", \"SLV\", \"SUR\", \"SXM\", \"TCA\",\n",
+    "            \"TTO\", \"UMI\", \"URY\", \"VCT\", \"VEN\", \"VGB\", \"VIR\"\n",
+    "        ]\n",
+    "    },\n",
+    "    {\n",
+    "        'region_iso': 'PO',\n",
+    "        'region_name': 'Polar',\n",
+    "        'country_iso_3s': [\n",
+    "            \"ATF\", \"BVT\", \"GRL\", \"HMD\", \"SGS\"\n",
+    "        ]\n",
+    "    },\n",
+    "    {\n",
+    "        'region_iso': 'NA',\n",
+    "        'region_name': 'North America',\n",
+    "        'country_iso_3s': [\n",
+    "            \"CAN\", \"SPM\", \"USA\"\n",
+    "        ]\n",
+    "    },\n",
+    "    {\n",
+    "        'region_iso': 'WA',\n",
+    "        'region_name': 'West Asia',\n",
+    "        'country_iso_3s': [\n",
+    "            \"ARE\", \"BHR\", \"IRQ\", \"JOR\", \"KWT\", \"LBN\", \"OMN\", \"PSE\", \"QAT\", \"SAU\", \"SYR\", \"YEM\"\n",
+    "        ]\n",
+    "    },\n",
+    "    {\n",
+    "        'region_iso': 'AT', # this region is not in the Protected Planet database\n",
+    "        'region_name': 'Antartica',\n",
+    "        'country_iso_3s': [\n",
+    "            \"ATA\"\n",
+    "        ]\n",
+    "    }\n",
+    "]\n",
+    "\n",
+    "# Convert the region data to a dictionary that maps each country to its region name\n",
+    "country_to_region = {}\n",
+    "name_to_region = {}\n",
+    "for region in regions_data:\n",
+    "    for country in region['country_iso_3s']:\n",
+    "        country_to_region[country] = region['region_iso']\n",
+    "        name_to_region[country] = region['region_name']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "eez_new['region'] = eez_new['iso'].map(country_to_region)\n",
+    "eez_new['region_name'] = eez_new['iso'].map(name_to_region)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "marine_areas = eez_new.groupby(['iso', 'name_iso']).agg({'AREA_KM2': 'sum'}).reset_index()\n",
+    "marine_areas = marine_areas.rename(columns={'iso': 'location_id', 'name_iso':'location_name', 'AREA_KM2': 'total_marine_area'})\n",
+    "marine_areas['location_type'] = 'country'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "regions_areas = eez_new.groupby(['region', 'region_name']).agg({'AREA_KM2': 'sum'}).reset_index()\n",
+    "regions_areas = regions_areas.rename(columns={'region': 'location_id', 'region_name':'location_name', 'AREA_KM2': 'total_marine_area'})\n",
+    "regions_areas['location_type'] = 'region'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "global_area = pd.DataFrame({'location_id': ['GLOB'], 'location_name': ['Worldwide'], 'total_marine_area': [361000000], 'location_type': ['worldwide']}) \n",
+    "hs_area = pd.DataFrame({'location_id': ['ABNJ'], 'location_name': ['High Seas'], 'total_marine_area': [hs['area_km2'].values[0]], 'location_type': ['country']})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# concat gl_df and hs_df to marine_areas\n",
+    "marine_areas2 = pd.concat([marine_areas, regions_areas, global_area, hs_area], ignore_index=True)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Save the table as csv\n",
+    "marine_areas2.to_csv(path_out + \"/tables/locations.csv\", index=False)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Create region_locations table"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>region_id</th>\n",
+       "      <th>location_id</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>AS</td>\n",
+       "      <td>AFG</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>AS</td>\n",
+       "      <td>ASM</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>AS</td>\n",
+       "      <td>AUS</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>AS</td>\n",
+       "      <td>BGD</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>AS</td>\n",
+       "      <td>BRN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>244</th>\n",
+       "      <td>WA</td>\n",
+       "      <td>QAT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>245</th>\n",
+       "      <td>WA</td>\n",
+       "      <td>SAU</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>246</th>\n",
+       "      <td>WA</td>\n",
+       "      <td>SYR</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>247</th>\n",
+       "      <td>WA</td>\n",
+       "      <td>YEM</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>248</th>\n",
+       "      <td>AT</td>\n",
+       "      <td>ATA</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>249 rows × 2 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    region_id location_id\n",
+       "0          AS         AFG\n",
+       "1          AS         ASM\n",
+       "2          AS         AUS\n",
+       "3          AS         BGD\n",
+       "4          AS         BRN\n",
+       "..        ...         ...\n",
+       "244        WA         QAT\n",
+       "245        WA         SAU\n",
+       "246        WA         SYR\n",
+       "247        WA         YEM\n",
+       "248        AT         ATA\n",
+       "\n",
+       "[249 rows x 2 columns]"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "regions_df = pd.DataFrame([{'region_id': data['region_iso'], 'location_id': iso} for data in regions_data for iso in data['country_iso_3s']])\n",
+    "regions_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "regions_df.to_csv(path_out + '/tables/region_locations.csv', index=False)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "skytruth",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/data/notebooks/mpas_table.ipynb b/data/notebooks/mpas_table.ipynb
new file mode 100644
index 00000000..b846ab4c
--- /dev/null
+++ b/data/notebooks/mpas_table.ipynb
@@ -0,0 +1,143 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Set up"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import geopandas as gpd\n",
+    "import pandas as pd\n",
+    "from datetime import datetime"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "path_in = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/raw\"\n",
+    "path_out = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/processed\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Read relevant datasets: MPAtlas, WDPA, and ProtectedSeas"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Read mpatlas data\n",
+    "mpatlas = gpd.read_file(path_out + \"/mpatlas/mpatlas_assess_zone_cleaned.geojson\")\n",
+    "mpatlas = mpatlas.drop_duplicates(subset=['wdpa_id', 'designation','location_id','establishment_stage', 'protection_level','year'], keep='first')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ps = gpd.read_file(path_out + \"/protectedseas/protectedseas.shp\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "wdpa = gpd.read_file(path_out + \"/wdpa/merged_wdpa_all.shp\")\n",
+    "wdpa = wdpa[['WDPA_PID', 'NAME','PA_DEF', 'GIS_M_AREA','PARENT_ISO']].rename(columns={'WDPA_PID': 'wdpa_id', 'NAME': 'name', 'PA_DEF':'protection_type', 'GIS_M_AREA': 'area', 'PARENT_ISO': 'location_id'})\n",
+    "wdpa['protection_type'] = wdpa['protection_type'].astype(int).replace({1: 'mpa', 0: 'oecm'})"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Combine information from different tables"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Add protected_level info from mpatlas and protectedseas to wdpa df\n",
+    "table_prot = wdpa.merge(mpatlas[['wdpa_id','area_km2','protection_level']], on='wdpa_id', how='left').rename(columns={'area_km2':'area_mpatlas','protection_level': 'mpatlas_prot_lvl'})\n",
+    "table_prot = table_prot.merge(ps[['wdpa_id','FPS_cat', 'total_area']], on='wdpa_id', how='left').rename(columns={'FPS_cat': 'fpl', 'total_area': 'area_ps'})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "table_prot['area'] = table_prot['area_mpatlas'].combine_first(table_prot['area_ps']).combine_first(table_prot['area'])\n",
+    "table_prot = table_prot.drop(columns=['area_mpatlas', 'area_ps'])\n",
+    "table_prot = table_prot.drop(columns={'name', 'protection_type'})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Add establishment info to wdpa df\n",
+    "table_est = wdpa.merge(mpatlas[['wdpa_id','establishment_stage', 'year']], on='wdpa_id', how='left')\n",
+    "table_est = table_est.drop(columns={'area', 'location_id'})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Save tables as csv\n",
+    "table_prot.to_csv(path_out + \"/tables/mpas_table.csv\", index=False)\n",
+    "table_est.to_csv(path_out + \"/tables/mpas_table_establishment.csv\", index=False)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "skytruth",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.4"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/data/notebooks/mpatlas_stats.ipynb b/data/notebooks/mpatlas_stats.ipynb
new file mode 100644
index 00000000..afdf6408
--- /dev/null
+++ b/data/notebooks/mpatlas_stats.ipynb
@@ -0,0 +1,349 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Set up"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import geopandas as gpd\n",
+    "import pandas as pd\n",
+    "from datetime import datetime"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "path_in = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/raw\"\n",
+    "path_out = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/processed\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Read and prepare data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Read data from MPAtlas\n",
+    "mpatlas = gpd.read_file(path_in + \"/mpatlas_assess_zone.geojson\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Fill missing wdpa_pid with the wdpa_id\n",
+    "mpatlas['wdpa_pid'] = mpatlas['wdpa_pid'].fillna(mpatlas['wdpa_id'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create new column with protection level reclassified\n",
+    "def map_protection_level(value):\n",
+    "    if value in [\"full\", \"high\"]:\n",
+    "        return \"fully or highly protected\"\n",
+    "    else:\n",
+    "        return \"less protected or unknown\"\n",
+    "\n",
+    "# Create a new column based on column1\n",
+    "mpatlas['protection_level'] = mpatlas['protection_mpaguide_level'].apply(map_protection_level)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# replace proposed/committed with proposed or committed\n",
+    "mpatlas['establishment_stage'] = mpatlas['establishment_stage'].replace(['proposed/committed'], 'proposed or committed')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Take only year from 'proposed_date', 'designated_date', 'implemented_date'\n",
+    "mpatlas['proposed_date'] = mpatlas['proposed_date'].str[:4].astype('Int64')\n",
+    "mpatlas['designated_date'] = mpatlas['designated_date'].str[:4].astype('Int64')\n",
+    "mpatlas['implemented_date'] = mpatlas['implemented_date'].str[:4].astype('Int64')\n",
+    "\n",
+    "# Create column 'year' with the most recent year from 'proposed_date', 'designated_date', 'implemented_date'\n",
+    "mpatlas['year'] = mpatlas[['proposed_date', 'designated_date', 'implemented_date']].max(axis=1)\n",
+    "\n",
+    "# Convert year to int to be able to save it later (Int64 not allowed)\n",
+    "mpatlas['year'].fillna(0, inplace=True)\n",
+    "mpatlas['year'] = mpatlas['year'].astype(int)\n",
+    "mpatlas['year'] = mpatlas['year'].replace(0, pd.NaT)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Calculate area in km2\n",
+    "mpatlas.to_crs('ESRI:54009', inplace=True)\n",
+    "mpatlas['area_km2'] = mpatlas['geometry'].area / 10**6\n",
+    "mpatlas.to_crs('EPSG:4326', inplace=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Keep relevant columns \n",
+    "mpatlas2 = mpatlas[['wdpa_pid', 'name', 'designation', 'sovereign', 'area_km2', 'establishment_stage', 'protection_level', 'year', 'geometry']].rename(columns={'sovereign': 'location_id', 'wdpa_pid': 'wdpa_id'})\n",
+    "\n",
+    "# Save as geojson (to keep full names)\n",
+    "mpatlas2.to_file(path_out + \"/mpatlas/mpatlas_assess_zone_cleaned.geojson\", driver='GeoJSON')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# For those with multiple countries, split them\n",
+    "mpatlas_iso = mpatlas2.copy()\n",
+    "mpatlas_iso['location_id'] = mpatlas_iso['location_id'].str.split(';')\n",
+    "mpatlas_iso = mpatlas_iso.explode('location_id')\n",
+    "mpatlas_iso['location_id'] = mpatlas_iso['location_id'].str.split(':')\n",
+    "mpatlas_iso = mpatlas_iso.explode('location_id')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Global stats"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Calculate global area per protection level\n",
+    "prot_global = mpatlas2.groupby('protection_level').agg({'area_km2': 'sum'}).reset_index().rename(columns={'area_km2': 'area'})\n",
+    "prot_global['location_id'] = 'GLOB'\n",
+    "prot_global['last_updated'] = datetime.now().year"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Calculate global area per establishment stage\n",
+    "stage_global = mpatlas2.groupby(['establishment_stage']).agg({'area_km2': 'sum'}).reset_index().rename(columns={'area_km2': 'area'})\n",
+    "stage_global['location_id'] = 'GLOB'\n",
+    "stage_global['last_updated'] = datetime.now().year"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Country stats"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "prot_iso = mpatlas_iso.groupby(['location_id', 'protection_level']).agg({'area_km2': 'sum'}).reset_index().rename(columns={'area_km2': 'area'})\n",
+    "prot_iso['last_updated'] = datetime.now().year"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 47,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stage_iso = mpatlas_iso.groupby(['location_id', 'establishment_stage']).agg({'area_km2': 'sum'}).reset_index().rename(columns={'area_km2': 'area'})\n",
+    "stage_iso['last_updated'] = datetime.now().year"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Region stats"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 49,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# List of dictionaries for data in Region_ISO3_PP.txt (list of regions used in the Protected Planet database)\n",
+    "regions_data = [\n",
+    "    {\n",
+    "        'region_iso': 'AS',\n",
+    "        'region_name': 'Asia & Pacific',\n",
+    "        'country_iso_3s': [\n",
+    "            \"AFG\", \"ASM\", \"AUS\", \"BGD\", \"BRN\", \"BTN\", \"CCK\", \"CHN\", \"COK\", \"CXR\", \"FJI\", \"FSM\", \"GUM\", \"HKG\", \"IDN\",\n",
+    "            \"IND\", \"IOT\", \"IRN\", \"JPN\", \"KHM\", \"KIR\", \"KOR\", \"LAO\", \"LKA\", \"MAC\", \"MDV\", \"MHL\", \"MMR\", \"MNG\", \"MNP\",\n",
+    "            \"MYS\", \"NCL\", \"NFK\", \"NIU\", \"NPL\", \"NRU\", \"NZL\", \"PAK\", \"PCN\", \"PHL\", \"PLW\", \"PNG\", \"PRK\", \"PYF\", \"SGP\",\n",
+    "            \"SLB\", \"THA\", \"TKL\", \"TLS\", \"TON\", \"TUV\", \"TWN\", \"VNM\", \"VUT\", \"WLF\", \"WSM\"\n",
+    "        ]\n",
+    "    },\n",
+    "    {\n",
+    "        'region_iso': 'AF',\n",
+    "        'region_name': 'Africa',\n",
+    "        'country_iso_3s': [\n",
+    "            \"AGO\", \"BDI\", \"BEN\", \"BFA\", \"BWA\", \"CAF\", \"CIV\", \"CMR\", \"COD\", \"COG\", \"COM\", \"CPV\", \"DJI\", \"DZA\", \"EGY\",\n",
+    "            \"ERI\", \"ESH\", \"ETH\", \"GAB\", \"GHA\", \"GIN\", \"GMB\", \"GNB\", \"GNQ\", \"KEN\", \"LBR\", \"LBY\", \"LSO\", \"MAR\", \"MDG\",\n",
+    "            \"MLI\", \"MOZ\", \"MRT\", \"MUS\", \"MWI\", \"MYT\", \"NAM\", \"NER\", \"NGA\", \"REU\", \"RWA\", \"SDN\", \"SEN\", \"SHN\", \"SLE\",\n",
+    "            \"SOM\", \"SSD\", \"STP\", \"SWZ\", \"SYC\", \"TCD\", \"TGO\", \"TUN\", \"TZA\", \"UGA\", \"ZAF\", \"ZMB\", \"ZWE\"\n",
+    "        ]\n",
+    "    },\n",
+    "    {\n",
+    "        'region_iso': 'EU',\n",
+    "        'region_name': 'Europe',\n",
+    "        'country_iso_3s': [\n",
+    "            \"ALA\", \"ALB\", \"AND\", \"ARM\", \"AUT\", \"AZE\", \"BEL\", \"BGR\", \"BIH\", \"BLR\", \"CHE\", \"CYP\", \"CZE\", \"DEU\", \"DNK\",\n",
+    "            \"ESP\", \"EST\", \"FIN\", \"FRA\", \"FRO\", \"GBR\", \"GEO\", \"GGY\", \"GIB\", \"GRC\", \"HRV\", \"HUN\", \"IMN\", \"IRL\", \"ISL\",\n",
+    "            \"ISR\", \"ITA\", \"JEY\", \"KAZ\", \"KGZ\", \"LIE\", \"LTU\", \"LUX\", \"LVA\", \"MCO\", \"MDA\", \"MKD\", \"MLT\", \"MNE\", \"NLD\",\n",
+    "            \"NOR\", \"POL\", \"PRT\", \"ROU\", \"RUS\", \"SJM\", \"SMR\", \"SRB\", \"SVK\", \"SVN\", \"SWE\", \"TJK\", \"TKM\", \"TUR\", \"UKR\",\n",
+    "            \"UZB\", \"VAT\"\n",
+    "        ]\n",
+    "    },\n",
+    "    {\n",
+    "        'region_iso': 'SA',\n",
+    "        'region_name': 'Latin America & Caribbean',\n",
+    "        'country_iso_3s': [\n",
+    "            \"ABW\", \"AIA\", \"ARG\", \"ATG\", \"BES\", \"BHS\", \"BLM\", \"BLZ\", \"BMU\", \"BOL\", \"BRA\", \"BRB\", \"CHL\", \"COL\", \"CRI\",\n",
+    "            \"CUB\", \"CUW\", \"CYM\", \"DMA\", \"DOM\", \"ECU\", \"FLK\", \"GLP\", \"GRD\", \"GTM\", \"GUF\", \"GUY\", \"HND\", \"HTI\", \"JAM\",\n",
+    "            \"KNA\", \"LCA\", \"MAF\", \"MEX\", \"MSR\", \"MTQ\", \"NIC\", \"PAN\", \"PER\", \"PRI\", \"PRY\", \"SLV\", \"SUR\", \"SXM\", \"TCA\",\n",
+    "            \"TTO\", \"UMI\", \"URY\", \"VCT\", \"VEN\", \"VGB\", \"VIR\"\n",
+    "        ]\n",
+    "    },\n",
+    "    {\n",
+    "        'region_iso': 'PO',\n",
+    "        'region_name': 'Polar',\n",
+    "        'country_iso_3s': [\n",
+    "            \"ATF\", \"BVT\", \"GRL\", \"HMD\", \"SGS\"\n",
+    "        ]\n",
+    "    },\n",
+    "    {\n",
+    "        'region_iso': 'NA',\n",
+    "        'region_name': 'North America',\n",
+    "        'country_iso_3s': [\n",
+    "            \"CAN\", \"SPM\", \"USA\"\n",
+    "        ]\n",
+    "    },\n",
+    "    {\n",
+    "        'region_iso': 'WA',\n",
+    "        'region_name': 'West Asia',\n",
+    "        'country_iso_3s': [\n",
+    "            \"ARE\", \"BHR\", \"IRQ\", \"JOR\", \"KWT\", \"LBN\", \"OMN\", \"PSE\", \"QAT\", \"SAU\", \"SYR\", \"YEM\"\n",
+    "        ]\n",
+    "    }\n",
+    "]\n",
+    "\n",
+    "# Convert the region data to a dictionary that maps each country to its region name\n",
+    "country_to_region = {}\n",
+    "for region in regions_data:\n",
+    "    for country in region['country_iso_3s']:\n",
+    "        country_to_region[country] = region['region_iso']\n",
+    "\n",
+    "# Add region column to mpatlas_iso\n",
+    "mpatlas_iso['regions'] = mpatlas_iso['location_id'].map(country_to_region)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 51,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Calculate area per protection level per region\n",
+    "prot_region = mpatlas_iso.groupby(['regions', 'protection_level']).agg({'area_km2': 'sum'}).reset_index().rename(columns={'area_km2': 'area', 'regions': 'location_id'})\n",
+    "prot_region['last_updated'] = datetime.now().year"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 53,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Calculate area per establishment stage per region\n",
+    "stage_region = mpatlas_iso.groupby(['regions', 'establishment_stage']).agg({'area_km2': 'sum'}).reset_index().rename(columns={'area_km2': 'area', 'regions': 'location_id'})\n",
+    "stage_region['last_updated'] = datetime.now().year"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 54,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Concatenate all dataframes for protection stats and establishment stage stats\n",
+    "prot = pd.concat([prot_iso, prot_global, prot_region], ignore_index=True)\n",
+    "stage = pd.concat([stage_iso, stage_global, stage_region], ignore_index=True)\n",
+    "prot.to_csv(path_out + \"/tables/mpatlas_protection_level.csv\", index=False)\n",
+    "stage.to_csv(path_out + \"/tables/mpatlas_establishment_stage.csv\", index=False)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "skytruth",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.4"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/data/notebooks/protectedseas.ipynb b/data/notebooks/protectedseas.ipynb
new file mode 100644
index 00000000..54ae3293
--- /dev/null
+++ b/data/notebooks/protectedseas.ipynb
@@ -0,0 +1,546 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Set up"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import geopandas as gpd\n",
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "path_in = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/raw/\"\n",
+    "path_out = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/processed/\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Processing"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Import shp containing geometries\n",
+    "ps = gpd.read_file(path_in + \"ProtectedSeas/ProtectedSeas_ProtectedSeas_06142023_shp_ProtectedSeas_06142023_shp.shp\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Import csv containing information\n",
+    "protectedseas = pd.read_csv(path_in + \"ProtectedSeas/ProtectedSeas_ProtectedSeas_06142023.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Keep only rows in which wdpa_id is not null and it's different than 0\n",
+    "protectedseas = protectedseas[protectedseas['wdpa_id'].notna()]\n",
+    "protectedseas = protectedseas[protectedseas['wdpa_id']!= '0']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Join csv with shapefile and keep only wdpa geometries\n",
+    "ps_gdf = ps.merge(protectedseas, how='inner', left_on='SITE_ID', right_on='site_id')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Keep only columns of interest\n",
+    "ps_gdf = ps_gdf[['site_id','site_name', 'country', 'wdpa_id', 'removal_of_marine_life_is_prohibited','total_area','geometry']]\n",
+    "ps_gdf = ps_gdf.rename(columns={'removal_of_marine_life_is_prohibited':'FPS'})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ProtectedSeas only provides country names, not country codes. We need to add country codes to the dataframe\n",
+    "country_iso_dict = {\n",
+    "    'Antigua and Barbuda': 'ATG',\n",
+    "    'USA': 'USA',\n",
+    "    'Albania': 'ALB',\n",
+    "    'Netherlands Antilles': 'NLD',\n",
+    "    'United Arab Emirates': 'ARE',\n",
+    "    'Argentina': 'ARG',\n",
+    "    'France': 'FRA',\n",
+    "    'Australia': 'AUS',\n",
+    "    'Barbados': 'BRB',\n",
+    "    'Belgium': 'BEL',\n",
+    "    'Bangladesh': 'BGD',\n",
+    "    'Bulgaria': 'BGR',\n",
+    "    'Belize': 'BLZ',\n",
+    "    'Brazil': 'BRA',\n",
+    "    'Bahamas': 'BHS',\n",
+    "    'British Virgin Islands': 'GBR',\n",
+    "    'Canada': 'CAN',\n",
+    "    'Chile': 'CHL',\n",
+    "    'Cameroon': 'CMR',\n",
+    "    'Colombia': 'COL',\n",
+    "    'Comoros': 'COM',\n",
+    "    'Costa Rica': 'CRI',\n",
+    "    'Cuba': 'CUB',\n",
+    "    'Cyprus': 'CYP',\n",
+    "    'Germany': 'DEU',\n",
+    "    'Djibouti': 'DJI',\n",
+    "    'Djbouti': 'DJI',\n",
+    "    'Dominica': 'DMA',\n",
+    "    'Denmark': 'DNK',\n",
+    "    'Dominican Republic': 'DOM',\n",
+    "    'Algeria': 'DZA',\n",
+    "    'Ecuador': 'ECU',\n",
+    "    'Egypt': 'EGY',\n",
+    "    'Spain': 'ESP',\n",
+    "    'Estonia': 'EST',\n",
+    "    'Finland': 'FIN',\n",
+    "    'France, Italy, Monaco': 'FRA;ITA;MCO',\n",
+    "    'French Antilles': 'FRA',\n",
+    "    'Gabon': 'GAB',\n",
+    "    'United Kingdom': 'GBR',\n",
+    "    'Grenada': 'GRD',\n",
+    "    'Ghana': 'GHA',\n",
+    "    'Gibraltar': 'GBR',\n",
+    "    'Guinea': 'GIN',\n",
+    "    'The Gambia': 'GMB',\n",
+    "    'Guinea Bissau': 'GNB',\n",
+    "    'Greece': 'GRC',\n",
+    "    'Guatemala': 'GTM',\n",
+    "    'French Guyana': 'FRA',\n",
+    "    'Honduras': 'HND',\n",
+    "    'Croatia': 'HRV',\n",
+    "    'Indonesia': 'IDN',\n",
+    "    'Indonesia ': 'IDN',\n",
+    "    'India': 'IND',\n",
+    "    'Ireland': 'IRL',\n",
+    "    'Iceland': 'ISL',\n",
+    "    'Israel': 'ISR',\n",
+    "    'Italy': 'ITA',\n",
+    "    'Jamaica': 'JAM',\n",
+    "    'Jordan': 'JOR',\n",
+    "    'Japan': 'JPN',\n",
+    "    'Kenya': 'KEN',\n",
+    "    'Cambodia': 'KHM',\n",
+    "    'South Korea': 'KOR',\n",
+    "    'Cayman Islands': 'GBR',\n",
+    "    'Lebanon': 'LBN',\n",
+    "    'Liberia': 'LBR',\n",
+    "    'Saint Lucia': 'LCA',\n",
+    "    'Sri Lanka': 'LKA',\n",
+    "    'Lithuania': 'LTU',\n",
+    "    'Latvia': 'LVA',\n",
+    "    'Morocco': 'MAR',\n",
+    "    'Monaco': 'MCO',\n",
+    "    'Madagascar': 'MDG',\n",
+    "    'Republic of Maldives': 'MDV',\n",
+    "    'Malta': 'MLT',\n",
+    "    'Myanmar': 'MMR',\n",
+    "    'Mozambique': 'MOZ',\n",
+    "    'Mauritania': 'MRT',\n",
+    "    'Malaysia': 'MYS',\n",
+    "    'Namibia': 'NAM',\n",
+    "    'New Caledonia': 'FRA',\n",
+    "    'Niue': 'NIU',\n",
+    "    'The Netherlands': 'NLD',\n",
+    "    'Netherlands': 'NLD',\n",
+    "    'Norway': 'NOR',\n",
+    "    'New Zealand': 'NZL',\n",
+    "    'Panama': 'PAN',\n",
+    "    'British Overseas Territory - Pitcairn': 'GBR',\n",
+    "    'Peru': 'PER',\n",
+    "    'Philippines': 'PHL',\n",
+    "    'Republic of Palau': 'PLW',\n",
+    "    'Poland': 'POL',\n",
+    "    'Portugal': 'PRT',\n",
+    "    'Qatar': 'QAT',\n",
+    "    'Russia': 'RUS',\n",
+    "    'Senegal': 'SEN',\n",
+    "    'Saint Helena, Ascension and Tristan da Cunha Overseas Territory of the United Kingdom of Great Britain and Northern Ireland': 'GBR',\n",
+    "    'Saint Helena, Ascension and Tristan da Cunha Overseas Teritory of the United Kingdom of Great Britain and Northern Ireland': 'GBR',\n",
+    "    'Solomon Islands': 'SLB',\n",
+    "    'El Salvador': 'SLV',\n",
+    "    'São Tomé and Príncipe': 'STP',\n",
+    "    'Suriname': 'SUR',\n",
+    "    'Slovenia': 'SVN',\n",
+    "    'Sweden': 'SWE',\n",
+    "    'Seychelles': 'SYC',\n",
+    "    'Turks and Caicos Islands': 'GBR',\n",
+    "    'Thailand': 'THA',\n",
+    "    'East Timor': 'TLS',\n",
+    "    'Tonga': 'TON',\n",
+    "    'Trinidad and Tobago': 'TTO',\n",
+    "    'Tunisia': 'TUN',\n",
+    "    'Tanzania': 'TZA',\n",
+    "    'Uruguay': 'URY',\n",
+    "    'Saint Vincent and the Grenadines': 'VCT',\n",
+    "    'Vietnam': 'VNM',\n",
+    "    'Yemen': 'YEM',\n",
+    "    'South Africa': 'ZAF',\n",
+    "    'USA; Haiti; Jamaica': 'USA;HTI;JAM',\n",
+    "}\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Add country code to the dataframe\n",
+    "def get_parent_iso(country):\n",
+    "    return country_iso_dict.get(country, None)\n",
+    "\n",
+    "# Apply the function to create the 'PARENT_ISO' column\n",
+    "ps_gdf['parent_iso'] = ps_gdf['country'].apply(get_parent_iso)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# There is one row with no parent_iso so let's give it the corresponding country code\n",
+    "ps_gdf.loc[ps_gdf['parent_iso'].isna(), 'parent_iso'] = 'FRA'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>site_id</th>\n",
+       "      <th>site_name</th>\n",
+       "      <th>country</th>\n",
+       "      <th>wdpa_id</th>\n",
+       "      <th>FPS</th>\n",
+       "      <th>total_area</th>\n",
+       "      <th>geometry</th>\n",
+       "      <th>parent_iso</th>\n",
+       "      <th>FPS_cat</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>AIAG10</td>\n",
+       "      <td>Low Bay Sanctuary</td>\n",
+       "      <td>Antigua and Barbuda</td>\n",
+       "      <td>555587197</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>48.321285</td>\n",
+       "      <td>POLYGON ((-61.91090 17.57960, -61.91096 17.579...</td>\n",
+       "      <td>ATG</td>\n",
+       "      <td>highly</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>AIAG11</td>\n",
+       "      <td>Nelson's Dockyard National Park</td>\n",
+       "      <td>Antigua and Barbuda</td>\n",
+       "      <td>555587192</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>40.705369</td>\n",
+       "      <td>POLYGON ((-61.75807 17.03541, -61.73745 17.021...</td>\n",
+       "      <td>ATG</td>\n",
+       "      <td>less</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  site_id                        site_name              country    wdpa_id  \\\n",
+       "0  AIAG10                Low Bay Sanctuary  Antigua and Barbuda  555587197   \n",
+       "1  AIAG11  Nelson's Dockyard National Park  Antigua and Barbuda  555587192   \n",
+       "\n",
+       "   FPS  total_area                                           geometry  \\\n",
+       "0  5.0   48.321285  POLYGON ((-61.91090 17.57960, -61.91096 17.579...   \n",
+       "1  1.0   40.705369  POLYGON ((-61.75807 17.03541, -61.73745 17.021...   \n",
+       "\n",
+       "  parent_iso FPS_cat  \n",
+       "0        ATG  highly  \n",
+       "1        ATG    less  "
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Reclassify FPS values\n",
+    "fps_classes = {\n",
+    "    1: 'less',\n",
+    "    2: 'less',\n",
+    "    3: 'moderately',\n",
+    "    4: 'highly',\n",
+    "    5: 'highly'\n",
+    "}\n",
+    "\n",
+    "# Create a new column 'FPS_cat' based on the mapping\n",
+    "ps_gdf['FPS_cat'] = ps_gdf['FPS'].apply(lambda x: fps_classes.get(x, None))\n",
+    "ps_gdf.head(2)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ps_gdf.to_file(path_out + \"protectedseas/protectedseas.shp\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Global stats"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "global_area = ps_gdf.groupby(['FPS_cat'], as_index=False)['total_area'].sum().rename(columns={'FPS_cat':'fishing_protection_level', 'total_area':'area'})\n",
+    "global_area['location_id'] = 'GLOB'"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Country stats"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create a mask for rows with multiple values in 'iso_code'\n",
+    "mask = ps_gdf['parent_iso'].str.contains(';', na=False)\n",
+    "\n",
+    "# Split the 'iso_code' values and create separate rows only for rows with multiple values\n",
+    "split_rows = ps_gdf[mask].copy()\n",
+    "split_rows['parent_iso'] = split_rows['parent_iso'].str.split(';')\n",
+    "split_rows = split_rows.explode('parent_iso')\n",
+    "\n",
+    "# Keep rows with single values in 'iso_code'\n",
+    "single_value_rows = ps_gdf[~mask]\n",
+    "\n",
+    "# Concatenate the exploded rows with the single value rows\n",
+    "ps_iso = pd.concat([single_value_rows, split_rows], ignore_index=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# List of dictionaries for data in Region_ISO3_PP.txt (list of regions used in the Protected Planet database)\n",
+    "regions_data = [\n",
+    "    {\n",
+    "        'region_iso': 'AS',\n",
+    "        'region_name': 'Asia & Pacific',\n",
+    "        'country_iso_3s': [\n",
+    "            \"AFG\", \"ASM\", \"AUS\", \"BGD\", \"BRN\", \"BTN\", \"CCK\", \"CHN\", \"COK\", \"CXR\", \"FJI\", \"FSM\", \"GUM\", \"HKG\", \"IDN\",\n",
+    "            \"IND\", \"IOT\", \"IRN\", \"JPN\", \"KHM\", \"KIR\", \"KOR\", \"LAO\", \"LKA\", \"MAC\", \"MDV\", \"MHL\", \"MMR\", \"MNG\", \"MNP\",\n",
+    "            \"MYS\", \"NCL\", \"NFK\", \"NIU\", \"NPL\", \"NRU\", \"NZL\", \"PAK\", \"PCN\", \"PHL\", \"PLW\", \"PNG\", \"PRK\", \"PYF\", \"SGP\",\n",
+    "            \"SLB\", \"THA\", \"TKL\", \"TLS\", \"TON\", \"TUV\", \"TWN\", \"VNM\", \"VUT\", \"WLF\", \"WSM\"\n",
+    "        ]\n",
+    "    },\n",
+    "    {\n",
+    "        'region_iso': 'AF',\n",
+    "        'region_name': 'Africa',\n",
+    "        'country_iso_3s': [\n",
+    "            \"AGO\", \"BDI\", \"BEN\", \"BFA\", \"BWA\", \"CAF\", \"CIV\", \"CMR\", \"COD\", \"COG\", \"COM\", \"CPV\", \"DJI\", \"DZA\", \"EGY\",\n",
+    "            \"ERI\", \"ESH\", \"ETH\", \"GAB\", \"GHA\", \"GIN\", \"GMB\", \"GNB\", \"GNQ\", \"KEN\", \"LBR\", \"LBY\", \"LSO\", \"MAR\", \"MDG\",\n",
+    "            \"MLI\", \"MOZ\", \"MRT\", \"MUS\", \"MWI\", \"MYT\", \"NAM\", \"NER\", \"NGA\", \"REU\", \"RWA\", \"SDN\", \"SEN\", \"SHN\", \"SLE\",\n",
+    "            \"SOM\", \"SSD\", \"STP\", \"SWZ\", \"SYC\", \"TCD\", \"TGO\", \"TUN\", \"TZA\", \"UGA\", \"ZAF\", \"ZMB\", \"ZWE\"\n",
+    "        ]\n",
+    "    },\n",
+    "    {\n",
+    "        'region_iso': 'EU',\n",
+    "        'region_name': 'Europe',\n",
+    "        'country_iso_3s': [\n",
+    "            \"ALA\", \"ALB\", \"AND\", \"ARM\", \"AUT\", \"AZE\", \"BEL\", \"BGR\", \"BIH\", \"BLR\", \"CHE\", \"CYP\", \"CZE\", \"DEU\", \"DNK\",\n",
+    "            \"ESP\", \"EST\", \"FIN\", \"FRA\", \"FRO\", \"GBR\", \"GEO\", \"GGY\", \"GIB\", \"GRC\", \"HRV\", \"HUN\", \"IMN\", \"IRL\", \"ISL\",\n",
+    "            \"ISR\", \"ITA\", \"JEY\", \"KAZ\", \"KGZ\", \"LIE\", \"LTU\", \"LUX\", \"LVA\", \"MCO\", \"MDA\", \"MKD\", \"MLT\", \"MNE\", \"NLD\",\n",
+    "            \"NOR\", \"POL\", \"PRT\", \"ROU\", \"RUS\", \"SJM\", \"SMR\", \"SRB\", \"SVK\", \"SVN\", \"SWE\", \"TJK\", \"TKM\", \"TUR\", \"UKR\",\n",
+    "            \"UZB\", \"VAT\"\n",
+    "        ]\n",
+    "    },\n",
+    "    {\n",
+    "        'region_iso': 'SA',\n",
+    "        'region_name': 'Latin America & Caribbean',\n",
+    "        'country_iso_3s': [\n",
+    "            \"ABW\", \"AIA\", \"ARG\", \"ATG\", \"BES\", \"BHS\", \"BLM\", \"BLZ\", \"BMU\", \"BOL\", \"BRA\", \"BRB\", \"CHL\", \"COL\", \"CRI\",\n",
+    "            \"CUB\", \"CUW\", \"CYM\", \"DMA\", \"DOM\", \"ECU\", \"FLK\", \"GLP\", \"GRD\", \"GTM\", \"GUF\", \"GUY\", \"HND\", \"HTI\", \"JAM\",\n",
+    "            \"KNA\", \"LCA\", \"MAF\", \"MEX\", \"MSR\", \"MTQ\", \"NIC\", \"PAN\", \"PER\", \"PRI\", \"PRY\", \"SLV\", \"SUR\", \"SXM\", \"TCA\",\n",
+    "            \"TTO\", \"UMI\", \"URY\", \"VCT\", \"VEN\", \"VGB\", \"VIR\"\n",
+    "        ]\n",
+    "    },\n",
+    "    {\n",
+    "        'region_iso': 'PO',\n",
+    "        'region_name': 'Polar',\n",
+    "        'country_iso_3s': [\n",
+    "            \"ATF\", \"BVT\", \"GRL\", \"HMD\", \"SGS\"\n",
+    "        ]\n",
+    "    },\n",
+    "    {\n",
+    "        'region_iso': 'NA',\n",
+    "        'region_name': 'North America',\n",
+    "        'country_iso_3s': [\n",
+    "            \"CAN\", \"SPM\", \"USA\"\n",
+    "        ]\n",
+    "    },\n",
+    "        {\n",
+    "        'region_iso': 'WA',\n",
+    "        'region_name': 'West Asia',\n",
+    "        'country_iso_3s': [\n",
+    "            \"ARE\", \"BHR\", \"IRQ\", \"JOR\", \"KWT\", \"LBN\", \"OMN\", \"PSE\", \"QAT\", \"SAU\", \"SYR\", \"YEM\"\n",
+    "        ]\n",
+    "    },\n",
+    "    {\n",
+    "        'region_iso': 'AT', # this region is not in the Protected Planet database\n",
+    "        'region_name': 'Antartica',\n",
+    "        'country_iso_3s': [\n",
+    "            \"ATA\"\n",
+    "        ]\n",
+    "    }\n",
+    "]\n",
+    "\n",
+    "# Convert the region data to a dictionary that maps each country to its region name\n",
+    "country_to_region = {}\n",
+    "for region in regions_data:\n",
+    "    for country in region['country_iso_3s']:\n",
+    "        country_to_region[country] = region['region_iso']\n",
+    "\n",
+    "# Create a new column 'region' based on the mapping\n",
+    "ps_iso['region'] = ps_iso['parent_iso'].map(country_to_region)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "country_area = ps_iso.groupby(['parent_iso', 'FPS_cat'], as_index=False)['total_area'].sum()\n",
+    "country_area = country_area.rename(columns={'parent_iso':'location_id', 'FPS_cat':'fishing_protection_level', 'total_area':'area'})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "region_area = ps_iso.groupby(['region', 'FPS_cat'], as_index=False)['total_area'].sum()\n",
+    "region_area = region_area.rename(columns={'region':'location_id', 'FPS_cat':'fishing_protection_level', 'total_area':'area'})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ps_coverage = pd.concat([country_area, region_area, global_area], ignore_index=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ps_coverage.to_csv(path_out + '/tables/fishing_protection_level.csv', index=False)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "skytruth",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/data/notebooks/wdpa_coverage.ipynb b/data/notebooks/wdpa_coverage.ipynb
index 8b38f3bf..921a2d53 100644
--- a/data/notebooks/wdpa_coverage.ipynb
+++ b/data/notebooks/wdpa_coverage.ipynb
@@ -28,7 +28,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -40,12 +40,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
-    "path_in = \"/Users/sofia/Documents/Repos/skytruth_30x30/data/raw\"\n",
-    "path_out = \"/Users/sofia/Documents/Repos/skytruth_30x30/data/processed\""
+    "path_in = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/raw\"\n",
+    "path_out = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/processed\""
    ]
   },
   {
@@ -57,7 +57,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -71,9 +71,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "6033\n",
+      "172\n",
+      "6033\n",
+      "172\n",
+      "6033\n",
+      "171\n"
+     ]
+    }
+   ],
    "source": [
     "print(len(poly1))\n",
     "print(len(point1))\n",
@@ -95,7 +108,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -114,15 +127,27 @@
     "        df = df[(df['REP_AREA'] != 0)]\n",
     "    \n",
     "    # Update the original dataframes in the list\n",
-    "    dataframes[i] = df\n",
-    "\n"
+    "    dataframes[i] = df"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "5999\n",
+      "157\n",
+      "6018\n",
+      "123\n",
+      "6014\n",
+      "135\n"
+     ]
+    }
+   ],
    "source": [
     "print(len(dataframes[0]))\n",
     "print(len(dataframes[1]))\n",
@@ -141,7 +166,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -181,14 +206,22 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### 4. Merge the 6 datasets (polygons and buffered points) in a single layer and segregate those that are \"Proposed\" and those that are OECM"
+    "### 4. Merge the 6 datasets (polygons and buffered points) in a single layer and segregate those that are \"Proposed\""
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 8,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "All gdf have the same crs: EPSG:4326\n"
+     ]
+    }
+   ],
    "source": [
     "# Check that all of them have the same crs\n",
     "first_crs = dataframes[0].crs\n",
@@ -199,6 +232,28 @@
     "        print(\"gdf have different crs\")"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "18445"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Merge dataframes\n",
+    "merged_mpa = pd.concat(dataframes)\n",
+    "len(merged_mpa)"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -211,7 +266,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -503,58 +558,12 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "**Countries per PARENT_ISO**"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 52,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "p2023 = gpd.read_file(path_out + \"/wdpa/timeseries/protected_dissolved_2023.shp\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 53,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array(['ATG', 'BRB', 'BRA', 'GBR', 'CHL', 'COL', 'CRI', 'DOM', 'ECU',\n",
-       "       'JAM', 'NLD', 'PER', 'PAN', 'SUR', 'VEN', 'USA', 'AUS', 'CAN',\n",
-       "       'FRA', 'IRN', 'JPN', 'KEN', 'KOR', 'MYS', 'MRT', 'MOZ', 'NOR',\n",
-       "       'PHL', 'POL', 'SAU', 'SEN', 'SWE', 'THA', 'TUN', 'CMR', 'IDN',\n",
-       "       'MUS', 'PRT', 'SYC', 'ISL', 'NZL', 'EST', 'GEO', 'UKR', 'MEX',\n",
-       "       'BHS', 'BLZ', 'GMB', 'MDG', 'HRV', 'FJI', 'LKA', 'ARG', 'ZAF',\n",
-       "       'PNG', 'TON', 'PLW', 'COK', 'BGD', 'AGO', 'ALB', 'DNK', 'ITA',\n",
-       "       'PAK', 'FIN', 'VNM', 'MMR', 'CHN', 'SGP', 'DEU', 'ROU', 'EGY',\n",
-       "       'SLB', 'VUT', 'BGR', 'MAR', 'MLT', 'DMA', 'LCA', 'OMN', 'GTM',\n",
-       "       'NIC', 'TTO', 'WSM', 'TZA', 'GRC', 'LBN', 'CUB', 'ISR', 'GRD',\n",
-       "       'VCT', 'BRN', 'ESP', 'JOR', 'ARE', 'HND', 'GNQ', 'KNA', 'LTU',\n",
-       "       'GNB', 'NGA', 'LVA', 'GUY', 'KAZ', 'BEL', 'GIN', 'IRL', 'RUS',\n",
-       "       'KHM', 'QAT', 'GAB', 'MDV', 'AZE', 'NAM', 'TUR', 'CPV', 'COG',\n",
-       "       'TUV', 'MCO', 'TKM', 'SVN', 'SLE', 'KIR', 'COM', 'NIU', 'FSM',\n",
-       "       'GHA', 'IOT', 'IND', 'LBR', 'CIV', 'SDN', 'SHN', 'SJM', 'UMI',\n",
-       "       'ATA', 'SYR', 'TLS', 'FRA;ITA;MCO', 'URY', 'ABNJ', 'NLD;DEU;DNK',\n",
-       "       'FIN;SWE', 'MHL', 'SLV', 'DZA', 'STP', 'YEM', 'COD', 'CYP', 'KWT',\n",
-       "       'HTI', 'MNE', 'BHR', 'LBY'], dtype=object)"
-      ]
-     },
-     "execution_count": 53,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "p2023['PARENT_ISO'].unique()"
+    "### Global and country stats"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -564,215 +573,55 @@
     "# Create an empty list to store the results\n",
     "results_list = []\n",
     "\n",
+    "# Create a DataFrame to store the global coverage\n",
+    "global_coverage = pd.DataFrame(columns=['year', 'protection_type', 'location_id', 'cumsum_area'])\n",
+    "\n",
     "for year in years_range:\n",
     "    filename = f'protected_dissolved_{year}.shp'\n",
     "    file_path = os.path.join(folder_path, filename)\n",
-    "    \n",
+    "\n",
     "    if os.path.exists(file_path):\n",
     "        gdf = gpd.read_file(file_path)\n",
-    "        grouped = gdf.groupby('PARENT_ISO')['AREA'].sum().reset_index()\n",
-    "        \n",
-    "        # Create columns\n",
-    "        grouped['year'] = year\n",
-    "        grouped['protection_type'] = 'MPA+OECM'\n",
-    "        grouped.rename(columns={'PARENT_ISO': 'location_id', 'AREA': 'cumsum_area'}, inplace=True)\n",
-    "        \n",
-    "        # Append the result to the list\n",
-    "        results_list.append(grouped)\n",
     "\n",
-    "# Concatenate the list of results into a single DataFrame\n",
-    "final_df = pd.concat(results_list, ignore_index=True)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Convert area to km2\n",
-    "final_df['cumsum_area'] =final_df['cumsum_area']/1000000"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "**Global**"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 41,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>location_id</th>\n",
-       "      <th>cumsum_area</th>\n",
-       "      <th>year</th>\n",
-       "      <th>protection_type</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>ABNJ</td>\n",
-       "      <td>594174.66</td>\n",
-       "      <td>2000</td>\n",
-       "      <td>MPA+OECM</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>AGO</td>\n",
-       "      <td>0.42</td>\n",
-       "      <td>2000</td>\n",
-       "      <td>MPA+OECM</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>ALB</td>\n",
-       "      <td>103.05</td>\n",
-       "      <td>2000</td>\n",
-       "      <td>MPA+OECM</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>ARE</td>\n",
-       "      <td>78.52</td>\n",
-       "      <td>2000</td>\n",
-       "      <td>MPA+OECM</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>ARG</td>\n",
-       "      <td>6155.67</td>\n",
-       "      <td>2000</td>\n",
-       "      <td>MPA+OECM</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>...</th>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3571</th>\n",
-       "      <td>GLOB</td>\n",
-       "      <td>28125365.96</td>\n",
-       "      <td>2019</td>\n",
-       "      <td>MPA+OECM</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3572</th>\n",
-       "      <td>GLOB</td>\n",
-       "      <td>29624663.84</td>\n",
-       "      <td>2020</td>\n",
-       "      <td>MPA+OECM</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3573</th>\n",
-       "      <td>GLOB</td>\n",
-       "      <td>29739178.77</td>\n",
-       "      <td>2021</td>\n",
-       "      <td>MPA+OECM</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3574</th>\n",
-       "      <td>GLOB</td>\n",
-       "      <td>29910678.77</td>\n",
-       "      <td>2022</td>\n",
-       "      <td>MPA+OECM</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3575</th>\n",
-       "      <td>GLOB</td>\n",
-       "      <td>29910724.21</td>\n",
-       "      <td>2023</td>\n",
-       "      <td>MPA+OECM</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>3576 rows × 4 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "     location_id  cumsum_area  year protection_type\n",
-       "0           ABNJ    594174.66  2000        MPA+OECM\n",
-       "1            AGO         0.42  2000        MPA+OECM\n",
-       "2            ALB       103.05  2000        MPA+OECM\n",
-       "3            ARE        78.52  2000        MPA+OECM\n",
-       "4            ARG      6155.67  2000        MPA+OECM\n",
-       "...          ...          ...   ...             ...\n",
-       "3571        GLOB  28125365.96  2019        MPA+OECM\n",
-       "3572        GLOB  29624663.84  2020        MPA+OECM\n",
-       "3573        GLOB  29739178.77  2021        MPA+OECM\n",
-       "3574        GLOB  29910678.77  2022        MPA+OECM\n",
-       "3575        GLOB  29910724.21  2023        MPA+OECM\n",
-       "\n",
-       "[3576 rows x 4 columns]"
-      ]
-     },
-     "execution_count": 41,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# Calculate global per year and append it\n",
-    "glob_df = final_df.groupby(['year', 'protection_type'])['cumsum_area'].sum().reset_index()\n",
+    "        # Calculate global coverage for each year and protection type\n",
+    "        global_area = gdf['AREA'].sum()\n",
+    "        global_row = pd.DataFrame({'year': [year], 'protection_type': ['MPA+OECM'], 'location_id': ['GLOB'], 'cumsum_area': [global_area]})\n",
+    "        global_coverage = pd.concat([global_coverage, global_row], ignore_index=True)\n",
     "\n",
-    "glob_df['location_id'] = 'GLOB'\n",
+    "        # Split rows with multiple ISO codes into separate rows\n",
+    "        processed_df = gdf.copy()\n",
+    "        processed_df['PARENT_ISO'] = processed_df['PARENT_ISO'].str.split(';')\n",
+    "        processed_df = processed_df.explode('PARENT_ISO')\n",
     "\n",
-    "final_df2 = pd.concat([final_df, glob_df], ignore_index=True)\n",
-    "final_df2"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 42,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Reorder the columns and add column last updated\n",
-    "final_df2 = final_df2[['location_id', 'year', 'protection_type', 'cumsum_area']]\n",
+    "        # Group by 'PARENT_ISO' and aggregate area\n",
+    "        iso_area = processed_df.groupby('PARENT_ISO')['AREA'].sum().reset_index()\n",
     "\n",
-    "current_date = datetime.now().strftime('%Y-%m-%d')\n",
+    "        # Create columns to match BE table\n",
+    "        iso_area['year'] = year\n",
+    "        iso_area['protection_type'] = 'MPA+OECM'\n",
+    "        iso_area.rename(columns={'PARENT_ISO': 'location_id', 'AREA': 'cumsum_area'}, inplace=True)\n",
     "\n",
-    "final_df2 = final_df2.copy()\n",
-    "final_df2['last_updated'] = current_date"
+    "        # Append the result to the list\n",
+    "        results_list.append(iso_area)\n",
+    "\n",
+    "# Concatenate the list of results into a single DataFrame and convert area to sq.km\n",
+    "final_df = pd.concat(results_list, ignore_index=True)\n",
+    "final_df['cumsum_area'] = final_df['cumsum_area'] / 1000000\n",
+    "\n",
+    "# Append global coverage to the final_df\n",
+    "final_df = pd.concat([final_df, global_coverage], ignore_index=True)\n"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "**Regions**"
+    "### Regional stats"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 47,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -834,16 +683,18 @@
     "        ]\n",
     "    },\n",
     "    {\n",
-    "        'region_iso': 'GL',\n",
-    "        'region_name': 'Global',\n",
-    "        'country_iso_3s': []\n",
-    "    },\n",
-    "    {\n",
     "        'region_iso': 'WA',\n",
     "        'region_name': 'West Asia',\n",
     "        'country_iso_3s': [\n",
     "            \"ARE\", \"BHR\", \"IRQ\", \"JOR\", \"KWT\", \"LBN\", \"OMN\", \"PSE\", \"QAT\", \"SAU\", \"SYR\", \"YEM\"\n",
     "        ]\n",
+    "    },\n",
+    "    {\n",
+    "        'region_iso': 'AT', # this region is not in the Protected Planet database\n",
+    "        'region_name': 'Antartica',\n",
+    "        'country_iso_3s': [\n",
+    "            \"ATA\"\n",
+    "        ]\n",
     "    }\n",
     "]\n",
     "\n",
@@ -851,12 +702,12 @@
     "country_to_region = {}\n",
     "for region in regions_data:\n",
     "    for country in region['country_iso_3s']:\n",
-    "        country_to_region[country] = region['region_name']"
+    "        country_to_region[country] = region['region_iso']"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 48,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
@@ -881,52 +732,46 @@
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>location_id</th>\n",
-       "      <th>cumsum_area</th>\n",
        "      <th>year</th>\n",
        "      <th>protection_type</th>\n",
-       "      <th>region</th>\n",
+       "      <th>cumsum_area</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>ABNJ</td>\n",
-       "      <td>594174.66</td>\n",
+       "      <td>AF</td>\n",
        "      <td>2000</td>\n",
        "      <td>MPA+OECM</td>\n",
-       "      <td>NaN</td>\n",
+       "      <td>94507.122820</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>AGO</td>\n",
-       "      <td>0.42</td>\n",
-       "      <td>2000</td>\n",
+       "      <td>AF</td>\n",
+       "      <td>2001</td>\n",
        "      <td>MPA+OECM</td>\n",
-       "      <td>Africa</td>\n",
+       "      <td>94807.303100</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>ALB</td>\n",
-       "      <td>103.05</td>\n",
-       "      <td>2000</td>\n",
+       "      <td>AF</td>\n",
+       "      <td>2002</td>\n",
        "      <td>MPA+OECM</td>\n",
-       "      <td>Europe</td>\n",
+       "      <td>102859.393938</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>ARE</td>\n",
-       "      <td>78.52</td>\n",
-       "      <td>2000</td>\n",
+       "      <td>AF</td>\n",
+       "      <td>2003</td>\n",
        "      <td>MPA+OECM</td>\n",
-       "      <td>West Asia</td>\n",
+       "      <td>111143.352991</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>ARG</td>\n",
-       "      <td>6155.67</td>\n",
-       "      <td>2000</td>\n",
+       "      <td>AF</td>\n",
+       "      <td>2004</td>\n",
        "      <td>MPA+OECM</td>\n",
-       "      <td>Latin America &amp; Caribbean</td>\n",
+       "      <td>119137.635862</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>...</th>\n",
@@ -934,83 +779,139 @@
        "      <td>...</td>\n",
        "      <td>...</td>\n",
        "      <td>...</td>\n",
-       "      <td>...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>3547</th>\n",
-       "      <td>VNM</td>\n",
-       "      <td>5036.97</td>\n",
-       "      <td>2023</td>\n",
+       "      <th>163</th>\n",
+       "      <td>WA</td>\n",
+       "      <td>2019</td>\n",
        "      <td>MPA+OECM</td>\n",
-       "      <td>Asia &amp; Pacific</td>\n",
+       "      <td>30618.254664</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>3548</th>\n",
-       "      <td>VUT</td>\n",
-       "      <td>83.83</td>\n",
-       "      <td>2023</td>\n",
+       "      <th>164</th>\n",
+       "      <td>WA</td>\n",
+       "      <td>2020</td>\n",
        "      <td>MPA+OECM</td>\n",
-       "      <td>Asia &amp; Pacific</td>\n",
+       "      <td>30624.636536</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>3549</th>\n",
-       "      <td>WSM</td>\n",
-       "      <td>199.59</td>\n",
-       "      <td>2023</td>\n",
+       "      <th>165</th>\n",
+       "      <td>WA</td>\n",
+       "      <td>2021</td>\n",
        "      <td>MPA+OECM</td>\n",
-       "      <td>Asia &amp; Pacific</td>\n",
+       "      <td>30624.636536</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>3550</th>\n",
-       "      <td>YEM</td>\n",
-       "      <td>4108.19</td>\n",
-       "      <td>2023</td>\n",
+       "      <th>166</th>\n",
+       "      <td>WA</td>\n",
+       "      <td>2022</td>\n",
        "      <td>MPA+OECM</td>\n",
-       "      <td>West Asia</td>\n",
+       "      <td>31779.597984</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>3551</th>\n",
-       "      <td>ZAF</td>\n",
-       "      <td>242387.88</td>\n",
+       "      <th>167</th>\n",
+       "      <td>WA</td>\n",
        "      <td>2023</td>\n",
        "      <td>MPA+OECM</td>\n",
-       "      <td>Africa</td>\n",
+       "      <td>31779.597984</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
-       "<p>3552 rows × 5 columns</p>\n",
+       "<p>168 rows × 4 columns</p>\n",
        "</div>"
       ],
       "text/plain": [
-       "     location_id  cumsum_area  year protection_type                     region\n",
-       "0           ABNJ    594174.66  2000        MPA+OECM                        NaN\n",
-       "1            AGO         0.42  2000        MPA+OECM                     Africa\n",
-       "2            ALB       103.05  2000        MPA+OECM                     Europe\n",
-       "3            ARE        78.52  2000        MPA+OECM                  West Asia\n",
-       "4            ARG      6155.67  2000        MPA+OECM  Latin America & Caribbean\n",
-       "...          ...          ...   ...             ...                        ...\n",
-       "3547         VNM      5036.97  2023        MPA+OECM             Asia & Pacific\n",
-       "3548         VUT        83.83  2023        MPA+OECM             Asia & Pacific\n",
-       "3549         WSM       199.59  2023        MPA+OECM             Asia & Pacific\n",
-       "3550         YEM      4108.19  2023        MPA+OECM                  West Asia\n",
-       "3551         ZAF    242387.88  2023        MPA+OECM                     Africa\n",
+       "    location_id  year protection_type    cumsum_area\n",
+       "0            AF  2000        MPA+OECM   94507.122820\n",
+       "1            AF  2001        MPA+OECM   94807.303100\n",
+       "2            AF  2002        MPA+OECM  102859.393938\n",
+       "3            AF  2003        MPA+OECM  111143.352991\n",
+       "4            AF  2004        MPA+OECM  119137.635862\n",
+       "..          ...   ...             ...            ...\n",
+       "163          WA  2019        MPA+OECM   30618.254664\n",
+       "164          WA  2020        MPA+OECM   30624.636536\n",
+       "165          WA  2021        MPA+OECM   30624.636536\n",
+       "166          WA  2022        MPA+OECM   31779.597984\n",
+       "167          WA  2023        MPA+OECM   31779.597984\n",
        "\n",
-       "[3552 rows x 5 columns]"
+       "[168 rows x 4 columns]"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "regions = final_df.copy()\n",
+    "regions['location_id'] = regions['location_id'].map(country_to_region)\n",
+    "\n",
+    "# group by region and year to get sum of cumsum_area\n",
+    "regions = regions.groupby(['location_id', 'year', 'protection_type'])['cumsum_area'].sum().reset_index()\n",
+    "regions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array(['AF', 'AS', 'AT', 'EU', 'NA', 'SA', 'WA'], dtype=object)"
       ]
      },
-     "execution_count": 48,
+     "execution_count": 12,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "final_df['region'] = final_df['location_id'].map(country_to_region)\n",
-    "final_df"
+    "regions['location_id'].unique()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 49,
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array(['ABNJ', 'AGO', 'ALB', 'ARE', 'ARG', 'ATA', 'ATG', 'AUS', 'AZE',\n",
+       "       'BEL', 'BGD', 'BGR', 'BHR', 'BHS', 'BLZ', 'BRA', 'BRB', 'BRN',\n",
+       "       'CAN', 'CHL', 'CHN', 'COD', 'COG', 'COK', 'COL', 'CRI', 'CUB',\n",
+       "       'CYP', 'DEU', 'DMA', 'DNK', 'DOM', 'ECU', 'EGY', 'ESP', 'EST',\n",
+       "       'FIN', 'FJI', 'FRA', 'FSM', 'GBR', 'GEO', 'GHA', 'GIN', 'GMB',\n",
+       "       'GNB', 'GNQ', 'GRC', 'GRD', 'GTM', 'HND', 'HRV', 'IDN', 'IRL',\n",
+       "       'IRN', 'ISL', 'ISR', 'ITA', 'JAM', 'JPN', 'KAZ', 'KEN', 'KHM',\n",
+       "       'KIR', 'KNA', 'KOR', 'LBN', 'LBY', 'LCA', 'LKA', 'LTU', 'LVA',\n",
+       "       'MAR', 'MCO', 'MDG', 'MDV', 'MEX', 'MHL', 'MLT', 'MMR', 'MNE',\n",
+       "       'MOZ', 'MRT', 'MUS', 'MYS', 'NAM', 'NGA', 'NIC', 'NIU', 'NLD',\n",
+       "       'NOR', 'NZL', 'OMN', 'PAK', 'PAN', 'PER', 'PHL', 'PLW', 'PNG',\n",
+       "       'POL', 'PRT', 'ROU', 'RUS', 'SAU', 'SDN', 'SEN', 'SLB', 'SLE',\n",
+       "       'SUR', 'SVN', 'SWE', 'SYC', 'SYR', 'THA', 'TKM', 'TLS', 'TON',\n",
+       "       'TTO', 'TUN', 'TUR', 'TUV', 'TZA', 'UKR', 'USA', 'VCT', 'VEN',\n",
+       "       'VNM', 'VUT', 'WSM', 'ZAF', 'DZA', 'IOT', 'GAB', 'IND', 'SGP',\n",
+       "       'LBR', 'CIV', 'CPV', 'SLV', 'QAT', 'STP', 'SHN', 'YEM', 'URY',\n",
+       "       'CMR', 'COM', 'KWT', 'SJM', 'GUY', 'UMI', 'HTI', 'JOR', 'GLOB',\n",
+       "       'AF', 'AS', 'AT', 'EU', 'NA', 'SA', 'WA'], dtype=object)"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "final_df2 = pd.concat([final_df, regions], ignore_index=True)\n",
+    "final_df2['location_id'].unique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
    "metadata": {},
    "outputs": [
     {
@@ -1038,49 +939,49 @@
        "      <th>cumsum_area</th>\n",
        "      <th>year</th>\n",
        "      <th>protection_type</th>\n",
-       "      <th>region</th>\n",
+       "      <th>last_updated</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td>ABNJ</td>\n",
-       "      <td>594174.66</td>\n",
+       "      <td>594174.659985</td>\n",
        "      <td>2000</td>\n",
        "      <td>MPA+OECM</td>\n",
-       "      <td>NaN</td>\n",
+       "      <td>2023-10-18</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>ATA</td>\n",
-       "      <td>3594.42</td>\n",
+       "      <th>1</th>\n",
+       "      <td>AGO</td>\n",
+       "      <td>0.415240</td>\n",
        "      <td>2000</td>\n",
        "      <td>MPA+OECM</td>\n",
-       "      <td>NaN</td>\n",
+       "      <td>2023-10-18</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>37</th>\n",
-       "      <td>FIN;SWE</td>\n",
-       "      <td>3541.14</td>\n",
+       "      <th>2</th>\n",
+       "      <td>ALB</td>\n",
+       "      <td>103.048347</td>\n",
        "      <td>2000</td>\n",
        "      <td>MPA+OECM</td>\n",
-       "      <td>NaN</td>\n",
+       "      <td>2023-10-18</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>131</th>\n",
-       "      <td>ABNJ</td>\n",
-       "      <td>594174.66</td>\n",
-       "      <td>2001</td>\n",
+       "      <th>3</th>\n",
+       "      <td>ARE</td>\n",
+       "      <td>78.516519</td>\n",
+       "      <td>2000</td>\n",
        "      <td>MPA+OECM</td>\n",
-       "      <td>NaN</td>\n",
+       "      <td>2023-10-18</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>136</th>\n",
-       "      <td>ATA</td>\n",
-       "      <td>3594.42</td>\n",
-       "      <td>2001</td>\n",
+       "      <th>4</th>\n",
+       "      <td>ARG</td>\n",
+       "      <td>6155.668078</td>\n",
+       "      <td>2000</td>\n",
        "      <td>MPA+OECM</td>\n",
-       "      <td>NaN</td>\n",
+       "      <td>2023-10-18</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>...</th>\n",
@@ -1091,116 +992,83 @@
        "      <td>...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>3397</th>\n",
-       "      <td>ABNJ</td>\n",
-       "      <td>2811451.69</td>\n",
-       "      <td>2023</td>\n",
+       "      <th>3677</th>\n",
+       "      <td>WA</td>\n",
+       "      <td>30618.254664</td>\n",
+       "      <td>2019</td>\n",
        "      <td>MPA+OECM</td>\n",
-       "      <td>NaN</td>\n",
+       "      <td>2023-10-18</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>3402</th>\n",
-       "      <td>ATA</td>\n",
-       "      <td>3570.36</td>\n",
-       "      <td>2023</td>\n",
+       "      <th>3678</th>\n",
+       "      <td>WA</td>\n",
+       "      <td>30624.636536</td>\n",
+       "      <td>2020</td>\n",
        "      <td>MPA+OECM</td>\n",
-       "      <td>NaN</td>\n",
+       "      <td>2023-10-18</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>3439</th>\n",
-       "      <td>FIN;SWE</td>\n",
-       "      <td>3541.14</td>\n",
-       "      <td>2023</td>\n",
+       "      <th>3679</th>\n",
+       "      <td>WA</td>\n",
+       "      <td>30624.636536</td>\n",
+       "      <td>2021</td>\n",
        "      <td>MPA+OECM</td>\n",
-       "      <td>NaN</td>\n",
+       "      <td>2023-10-18</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>3442</th>\n",
-       "      <td>FRA;ITA;MCO</td>\n",
-       "      <td>87742.14</td>\n",
-       "      <td>2023</td>\n",
+       "      <th>3680</th>\n",
+       "      <td>WA</td>\n",
+       "      <td>31779.597984</td>\n",
+       "      <td>2022</td>\n",
        "      <td>MPA+OECM</td>\n",
-       "      <td>NaN</td>\n",
+       "      <td>2023-10-18</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>3502</th>\n",
-       "      <td>NLD;DEU;DNK</td>\n",
-       "      <td>11550.01</td>\n",
+       "      <th>3681</th>\n",
+       "      <td>WA</td>\n",
+       "      <td>31779.597984</td>\n",
        "      <td>2023</td>\n",
        "      <td>MPA+OECM</td>\n",
-       "      <td>NaN</td>\n",
+       "      <td>2023-10-18</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
-       "<p>110 rows × 5 columns</p>\n",
+       "<p>3682 rows × 5 columns</p>\n",
        "</div>"
       ],
       "text/plain": [
-       "      location_id  cumsum_area  year protection_type region\n",
-       "0            ABNJ    594174.66  2000        MPA+OECM    NaN\n",
-       "5             ATA      3594.42  2000        MPA+OECM    NaN\n",
-       "37        FIN;SWE      3541.14  2000        MPA+OECM    NaN\n",
-       "131          ABNJ    594174.66  2001        MPA+OECM    NaN\n",
-       "136           ATA      3594.42  2001        MPA+OECM    NaN\n",
-       "...           ...          ...   ...             ...    ...\n",
-       "3397         ABNJ   2811451.69  2023        MPA+OECM    NaN\n",
-       "3402          ATA      3570.36  2023        MPA+OECM    NaN\n",
-       "3439      FIN;SWE      3541.14  2023        MPA+OECM    NaN\n",
-       "3442  FRA;ITA;MCO     87742.14  2023        MPA+OECM    NaN\n",
-       "3502  NLD;DEU;DNK     11550.01  2023        MPA+OECM    NaN\n",
+       "     location_id    cumsum_area  year protection_type last_updated\n",
+       "0           ABNJ  594174.659985  2000        MPA+OECM   2023-10-18\n",
+       "1            AGO       0.415240  2000        MPA+OECM   2023-10-18\n",
+       "2            ALB     103.048347  2000        MPA+OECM   2023-10-18\n",
+       "3            ARE      78.516519  2000        MPA+OECM   2023-10-18\n",
+       "4            ARG    6155.668078  2000        MPA+OECM   2023-10-18\n",
+       "...          ...            ...   ...             ...          ...\n",
+       "3677          WA   30618.254664  2019        MPA+OECM   2023-10-18\n",
+       "3678          WA   30624.636536  2020        MPA+OECM   2023-10-18\n",
+       "3679          WA   30624.636536  2021        MPA+OECM   2023-10-18\n",
+       "3680          WA   31779.597984  2022        MPA+OECM   2023-10-18\n",
+       "3681          WA   31779.597984  2023        MPA+OECM   2023-10-18\n",
        "\n",
-       "[110 rows x 5 columns]"
-      ]
-     },
-     "execution_count": 49,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "final_df[final_df.region.isnull()]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 55,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array(['ABNJ', 'AGO', 'ALB', 'ARE', 'ARG', 'ATA', 'ATG', 'AUS', 'AZE',\n",
-       "       'BEL', 'BGD', 'BGR', 'BHR', 'BHS', 'BLZ', 'BRA', 'BRB', 'BRN',\n",
-       "       'CAN', 'CHL', 'CHN', 'COD', 'COG', 'COK', 'COL', 'CRI', 'CUB',\n",
-       "       'CYP', 'DEU', 'DMA', 'DNK', 'DOM', 'ECU', 'EGY', 'ESP', 'EST',\n",
-       "       'FIN', 'FIN;SWE', 'FJI', 'FRA', 'FSM', 'GBR', 'GEO', 'GHA', 'GIN',\n",
-       "       'GMB', 'GNB', 'GNQ', 'GRC', 'GRD', 'GTM', 'HND', 'HRV', 'IDN',\n",
-       "       'IRL', 'IRN', 'ISL', 'ISR', 'ITA', 'JAM', 'JPN', 'KAZ', 'KEN',\n",
-       "       'KHM', 'KIR', 'KNA', 'KOR', 'LBN', 'LBY', 'LCA', 'LKA', 'LTU',\n",
-       "       'LVA', 'MAR', 'MCO', 'MDG', 'MDV', 'MEX', 'MHL', 'MLT', 'MMR',\n",
-       "       'MNE', 'MOZ', 'MRT', 'MUS', 'MYS', 'NAM', 'NGA', 'NIC', 'NIU',\n",
-       "       'NLD', 'NOR', 'NZL', 'OMN', 'PAK', 'PAN', 'PER', 'PHL', 'PLW',\n",
-       "       'PNG', 'POL', 'PRT', 'ROU', 'RUS', 'SAU', 'SDN', 'SEN', 'SLB',\n",
-       "       'SLE', 'SUR', 'SVN', 'SWE', 'SYC', 'SYR', 'THA', 'TKM', 'TLS',\n",
-       "       'TON', 'TTO', 'TUN', 'TUR', 'TUV', 'TZA', 'UKR', 'USA', 'VCT',\n",
-       "       'VEN', 'VNM', 'VUT', 'WSM', 'ZAF', 'DZA', 'FRA;ITA;MCO', 'IOT',\n",
-       "       'GAB', 'IND', 'SGP', 'LBR', 'CIV', 'CPV', 'SLV', 'QAT', 'STP',\n",
-       "       'SHN', 'YEM', 'NLD;DEU;DNK', 'URY', 'CMR', 'COM', 'KWT', 'SJM',\n",
-       "       'GUY', 'UMI', 'HTI', 'JOR'], dtype=object)"
+       "[3682 rows x 5 columns]"
       ]
      },
-     "execution_count": 55,
+     "execution_count": 14,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "final_df['location_id'].unique() "
+    "current_date = datetime.now().strftime('%Y-%m-%d')\n",
+    "\n",
+    "final_df2 = final_df2.copy()\n",
+    "final_df2['last_updated'] = current_date\n",
+    "final_df2"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 44,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [],
    "source": [

	OBJECTID	WDPAID	WDPA_PID	NAME	English_De	PARENT_ISO	ISO3	MPA_Marine	mpa_id	Zone_Marin	...	Stage_of_E	Distant_MP	Level_of_P	Most_Impac	Descrip_Im	Vertically	SHAPE_Leng	SHAPE_Area	geometry	P_LEVEL
0	1.0	478053.0	478053	Hikurangi Deep	Benthic Protection Area	NZL	NZL	54022.1	5258	54022.1	...	Implemented	NaN	Incompatible	Mining, Fishing	Benthic protections only. Deep sea mining allo...	X	12.332952	5.833001	POLYGON ((-175.00000 -42.16661, -175.00000 -42...	Less Protected / Unknown
1	2.0	555512062.0	555512062	Kermadec	Benthic Protection Area	NZL	NZL	619146.0	5428	458540.5	...	Implemented	NaN	Incompatible	Mining, Fishing	Benthic protections only. Deep sea mining allo...	NaN	25.629352	42.963159	POLYGON ((-174.02370 -29.22191, -174.02370 -29...	Less Protected / Unknown
	location_id	protected_area	total_area	habitat_name	year
0	ABNJ	421.629372679904	1874.98221422617	cold-water corals	2023
1	AGO	0	3.39567053773998	cold-water corals	2023
	habitat_name	protected_area	total_area	location_id	year
0	saltmarsh	111638.252564	224435.075094	GLOB	2023
1	seagrass	74787.449960	314001.940600	GLOB	2023
2	warmwater-corals	63259.499130	149886.974126	GLOB	2023
4	coldwater-corals	4400.140842	15336.975280	GLOB	2023
	habitat_name	protected_area	total_area	location_id	year
0	saltmarshes	111638.252564	224435.075094	GLOB	2023
1	seagrasses	74787.449960	314001.940600	GLOB	2023
2	warm-water corals	63259.499130	149886.974126	GLOB	2023
4	cold-water corals	4400.140842	15336.975280	GLOB	2023
	location_id	habitat_name	protected_area	total_area	year
0	AF	cold-water corals	37.761626	381.993234	2023
1	AF	saltmarshes	6688.702879	19845.915000	2023
2	AF	seagrasses	6319.099491	61939.484904	2023
3	AF	warm-water corals	6591.340083	15216.393947	2023
4	AS	cold-water corals	263.251498	1332.225080	2023
5	AS	saltmarshes	11721.439539	39229.888860	2023
6	AS	seagrasses	28942.705660	72666.482052	2023
7	AS	warm-water corals	13895.870659	67363.486609	2023
8	EU	cold-water corals	2183.050266	6179.526427	2023
9	EU	saltmarshes	7431.043710	13274.326478	2023
10	EU	seagrasses	5840.372925	10391.189911	2023
11	EU	warm-water corals	0.605763	0.793357	2023
12	NA	cold-water corals	22.960099	204.280433	2023
13	NA	saltmarshes	51092.644683	68200.081930	2023
14	NA	seagrasses	70.012791	301.909141	2023
15	NA	warm-water corals	0.000000	0.000000	2023
16	SA	cold-water corals	234.731370	1416.251323	2023
17	SA	saltmarshes	22969.815906	35983.392744	2023
18	SA	seagrasses	16517.097667	45847.459412	2023
19	SA	warm-water corals	5597.366845	12869.801231	2023
20	WA	cold-water corals	0.000000	12.970705	2023
21	WA	saltmarshes	1309.225736	11798.832619	2023
22	WA	seagrasses	1053.448673	25273.727431	2023
23	WA	warm-water corals	547.928957	4903.230395	2023
	location_id	total_area	habitat_name	year
0	ABNJ	1.483098e+07	seamounts	2011
1	AGO	9.556242e+03	seamounts	2011
2	ARG	3.110730e+05	seamounts	2011
3	ATA	3.551629e+05	seamounts	2011
4	ATG	6.215895e+03	seamounts	2011
...	...	...	...	...
88	VNM	4.421338e+04	seamounts	2011
89	VUT	1.199475e+05	seamounts	2011
90	WSM	4.117997e+04	seamounts	2011
91	YEM	6.294974e+04	seamounts	2011
92	ZAF	9.946306e+04	seamounts	2011
	location_id	habitat_name	protected_area	total_area	year
0	AF	seamounts	94385.178958	6.162351e+05	2011
1	AS	seamounts	832497.783937	5.433433e+06	2011
2	AT	seamounts	0.000000	3.551629e+05	2011
3	EU	seamounts	894514.910255	2.641119e+06	2011
4	NA	seamounts	555588.210725	1.664794e+06	2011
5	SA	seamounts	581172.154389	1.655552e+06	2011
6	WA	seamounts	2487.428050	9.384765e+04	2011
	location_id	habitat_name	protected_area	total_area	year
0	AF	mangroves	10006.97000	29344.404399	2020
1	AS	mangroves	21378.75000	74629.194446	2020
2	NA	mangroves	2055.40000	2329.115505	2020
3	PO	mangroves	6.72000	6.723018	2020
4	SA	mangroves	27811.53375	40875.932666	2020
5	WA	mangroves	27.83000	173.620938	2020
	MRGID	GEONAME	MRGID_TER1	POL_TYPE	MRGID_SOV1	TERRITORY1	ISO_TER1	SOVEREIGN1	MRGID_TER2	MRGID_SOV2	...	ISO_SOV1	ISO_SOV2	ISO_SOV3	UN_SOV1	UN_SOV2	UN_SOV3	UN_TER1	UN_TER2	UN_TER3	geometry
0	8444.0	American Samoa Exclusive Economic Zone	8670.0	200NM	2204.0	American Samoa	ASM	United States	0.0	0.0	...	USA	NaN	NaN	840	NaN	NaN	16.0	NaN	NaN	POLYGON ((-16216412.543 -2157569.856, -1621650...
1	8379.0	Ascension Exclusive Economic Zone	8620.0	200NM	2208.0	Ascension	SHN	United Kingdom	0.0	0.0	...	GBR	NaN	NaN	826	NaN	NaN	654.0	NaN	NaN	POLYGON ((-1089355.142 -974062.004, -1089348.4...
2	8446.0	Cook Islands Exclusive Economic Zone	8672.0	200NM	2227.0	Cook Islands	COK	New Zealand	0.0	0.0	...	NZL	NaN	NaN	554	NaN	NaN	184.0	NaN	NaN	POLYGON ((-15912583.852 -716733.193, -15813064...
3	8389.0	Overlapping claim Falkland / Malvinas Islands:...	8623.0	Overlapping claim	2208.0	Falkland / Malvinas Islands	FLK	United Kingdom	8623.0	2149.0	...	GBR	ARG	NaN	826	32.0	NaN	238.0	238.0	NaN	POLYGON ((-4061728.309 -6509190.466, -4443979....
4	8440.0	French Polynesian Exclusive Economic Zone	8656.0	200NM	17.0	French Polynesia	PYF	France	0.0	0.0	...	FRA	NaN	NaN	250	NaN	NaN	258.0	NaN	NaN	MULTIPOLYGON (((-13543804.433 -974376.651, -13...
	location_id	location_name	total_marine_area	location_type
0	AF	Africa	1.495538e+07	region
1	AS	Asia & Pacific	5.269208e+07	region