From a5d4b0277bdc327663d3a2b228eca404804617c0 Mon Sep 17 00:00:00 2001 From: sofia Date: Fri, 6 Oct 2023 12:11:17 +0200 Subject: [PATCH 1/9] coverage calculations wdpa --- data/notebooks/wdpa_coverage.ipynb | 579 ++++++++++------------------- 1 file changed, 201 insertions(+), 378 deletions(-) diff --git a/data/notebooks/wdpa_coverage.ipynb b/data/notebooks/wdpa_coverage.ipynb index 8b38f3bf..07bfca74 100644 --- a/data/notebooks/wdpa_coverage.ipynb +++ b/data/notebooks/wdpa_coverage.ipynb @@ -28,7 +28,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -40,12 +40,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ - "path_in = \"/Users/sofia/Documents/Repos/skytruth_30x30/data/raw\"\n", - "path_out = \"/Users/sofia/Documents/Repos/skytruth_30x30/data/processed\"" + "path_in = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/raw\"\n", + "path_out = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/processed\"" ] }, { @@ -508,53 +508,7 @@ }, { "cell_type": "code", - "execution_count": 52, - "metadata": {}, - "outputs": [], - "source": [ - "p2023 = gpd.read_file(path_out + \"/wdpa/timeseries/protected_dissolved_2023.shp\")" - ] - }, - { - "cell_type": "code", - "execution_count": 53, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array(['ATG', 'BRB', 'BRA', 'GBR', 'CHL', 'COL', 'CRI', 'DOM', 'ECU',\n", - " 'JAM', 'NLD', 'PER', 'PAN', 'SUR', 'VEN', 'USA', 'AUS', 'CAN',\n", - " 'FRA', 'IRN', 'JPN', 'KEN', 'KOR', 'MYS', 'MRT', 'MOZ', 'NOR',\n", - " 'PHL', 'POL', 'SAU', 'SEN', 'SWE', 'THA', 'TUN', 'CMR', 'IDN',\n", - " 'MUS', 'PRT', 'SYC', 'ISL', 'NZL', 'EST', 'GEO', 'UKR', 'MEX',\n", - " 'BHS', 'BLZ', 'GMB', 'MDG', 'HRV', 'FJI', 'LKA', 'ARG', 'ZAF',\n", - " 'PNG', 'TON', 'PLW', 'COK', 'BGD', 'AGO', 'ALB', 'DNK', 'ITA',\n", - " 'PAK', 'FIN', 'VNM', 'MMR', 'CHN', 'SGP', 'DEU', 'ROU', 'EGY',\n", - " 'SLB', 'VUT', 'BGR', 'MAR', 'MLT', 'DMA', 'LCA', 'OMN', 'GTM',\n", - " 'NIC', 'TTO', 'WSM', 'TZA', 'GRC', 'LBN', 'CUB', 'ISR', 'GRD',\n", - " 'VCT', 'BRN', 'ESP', 'JOR', 'ARE', 'HND', 'GNQ', 'KNA', 'LTU',\n", - " 'GNB', 'NGA', 'LVA', 'GUY', 'KAZ', 'BEL', 'GIN', 'IRL', 'RUS',\n", - " 'KHM', 'QAT', 'GAB', 'MDV', 'AZE', 'NAM', 'TUR', 'CPV', 'COG',\n", - " 'TUV', 'MCO', 'TKM', 'SVN', 'SLE', 'KIR', 'COM', 'NIU', 'FSM',\n", - " 'GHA', 'IOT', 'IND', 'LBR', 'CIV', 'SDN', 'SHN', 'SJM', 'UMI',\n", - " 'ATA', 'SYR', 'TLS', 'FRA;ITA;MCO', 'URY', 'ABNJ', 'NLD;DEU;DNK',\n", - " 'FIN;SWE', 'MHL', 'SLV', 'DZA', 'STP', 'YEM', 'COD', 'CYP', 'KWT',\n", - " 'HTI', 'MNE', 'BHR', 'LBY'], dtype=object)" - ] - }, - "execution_count": 53, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "p2023['PARENT_ISO'].unique()" - ] - }, - { - "cell_type": "code", - "execution_count": 19, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -570,9 +524,16 @@ " \n", " if os.path.exists(file_path):\n", " gdf = gpd.read_file(file_path)\n", - " grouped = gdf.groupby('PARENT_ISO')['AREA'].sum().reset_index()\n", " \n", - " # Create columns\n", + " # Split rows with multiple ISO codes into separate rows\n", + " processed_df = gdf.copy()\n", + " processed_df['PARENT_ISO'] = processed_df['PARENT_ISO'].str.split(';')\n", + " processed_df = processed_df.explode('PARENT_ISO')\n", + " \n", + " # Group by 'PARENT_ISO' and aggregate area\n", + " grouped = processed_df.groupby('PARENT_ISO')['AREA'].sum().reset_index()\n", + " \n", + " # Create columns to match BE table\n", " grouped['year'] = year\n", " grouped['protection_type'] = 'MPA+OECM'\n", " grouped.rename(columns={'PARENT_ISO': 'location_id', 'AREA': 'cumsum_area'}, inplace=True)\n", @@ -580,17 +541,8 @@ " # Append the result to the list\n", " results_list.append(grouped)\n", "\n", - "# Concatenate the list of results into a single DataFrame\n", - "final_df = pd.concat(results_list, ignore_index=True)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [], - "source": [ - "# Convert area to km2\n", + "# Concatenate the list of results into a single DataFrame and convert area to sq.km\n", + "final_df = pd.concat(results_list, ignore_index=True)\n", "final_df['cumsum_area'] =final_df['cumsum_area']/1000000" ] }, @@ -603,164 +555,24 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 4, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
location_idcumsum_areayearprotection_type
0ABNJ594174.662000MPA+OECM
1AGO0.422000MPA+OECM
2ALB103.052000MPA+OECM
3ARE78.522000MPA+OECM
4ARG6155.672000MPA+OECM
...............
3571GLOB28125365.962019MPA+OECM
3572GLOB29624663.842020MPA+OECM
3573GLOB29739178.772021MPA+OECM
3574GLOB29910678.772022MPA+OECM
3575GLOB29910724.212023MPA+OECM
\n", - "

3576 rows × 4 columns

\n", - "
" - ], - "text/plain": [ - " location_id cumsum_area year protection_type\n", - "0 ABNJ 594174.66 2000 MPA+OECM\n", - "1 AGO 0.42 2000 MPA+OECM\n", - "2 ALB 103.05 2000 MPA+OECM\n", - "3 ARE 78.52 2000 MPA+OECM\n", - "4 ARG 6155.67 2000 MPA+OECM\n", - "... ... ... ... ...\n", - "3571 GLOB 28125365.96 2019 MPA+OECM\n", - "3572 GLOB 29624663.84 2020 MPA+OECM\n", - "3573 GLOB 29739178.77 2021 MPA+OECM\n", - "3574 GLOB 29910678.77 2022 MPA+OECM\n", - "3575 GLOB 29910724.21 2023 MPA+OECM\n", - "\n", - "[3576 rows x 4 columns]" - ] - }, - "execution_count": 41, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "# Calculate global per year and append it\n", + "# Calculate global per year and append it to df\n", "glob_df = final_df.groupby(['year', 'protection_type'])['cumsum_area'].sum().reset_index()\n", - "\n", "glob_df['location_id'] = 'GLOB'\n", "\n", - "final_df2 = pd.concat([final_df, glob_df], ignore_index=True)\n", - "final_df2" + "final_df = pd.concat([final_df, glob_df], ignore_index=True)" ] }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 22, "metadata": {}, "outputs": [], "source": [ - "# Reorder the columns and add column last updated\n", - "final_df2 = final_df2[['location_id', 'year', 'protection_type', 'cumsum_area']]\n", - "\n", - "current_date = datetime.now().strftime('%Y-%m-%d')\n", - "\n", - "final_df2 = final_df2.copy()\n", - "final_df2['last_updated'] = current_date" + "final_df = final_df[['location_id', 'year', 'protection_type', 'cumsum_area']]" ] }, { @@ -772,7 +584,7 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 43, "metadata": {}, "outputs": [], "source": [ @@ -836,7 +648,7 @@ " {\n", " 'region_iso': 'GL',\n", " 'region_name': 'Global',\n", - " 'country_iso_3s': []\n", + " 'country_iso_3s': ['GLOB']\n", " },\n", " {\n", " 'region_iso': 'WA',\n", @@ -844,6 +656,20 @@ " 'country_iso_3s': [\n", " \"ARE\", \"BHR\", \"IRQ\", \"JOR\", \"KWT\", \"LBN\", \"OMN\", \"PSE\", \"QAT\", \"SAU\", \"SYR\", \"YEM\"\n", " ]\n", + " },\n", + " {\n", + " 'region_iso': 'AT', # this region is not in the Protected Planet database\n", + " 'region_name': 'Antartica',\n", + " 'country_iso_3s': [\n", + " \"ATA\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'HS', # this region is not in the Protected Planet database\n", + " 'region_name': 'Areas Beyond National Jurisdiction',\n", + " 'country_iso_3s': [\n", + " \"ABNJ\"\n", + " ]\n", " }\n", "]\n", "\n", @@ -851,12 +677,12 @@ "country_to_region = {}\n", "for region in regions_data:\n", " for country in region['country_iso_3s']:\n", - " country_to_region[country] = region['region_name']" + " country_to_region[country] = region['region_iso']" ] }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 44, "metadata": {}, "outputs": [ { @@ -881,52 +707,46 @@ " \n", " \n", " location_id\n", - " cumsum_area\n", " year\n", " protection_type\n", - " region\n", + " cumsum_area\n", " \n", " \n", " \n", " \n", " 0\n", - " ABNJ\n", - " 594174.66\n", + " AF\n", " 2000\n", " MPA+OECM\n", - " NaN\n", + " 94507.122820\n", " \n", " \n", " 1\n", - " AGO\n", - " 0.42\n", - " 2000\n", + " AF\n", + " 2001\n", " MPA+OECM\n", - " Africa\n", + " 94807.303100\n", " \n", " \n", " 2\n", - " ALB\n", - " 103.05\n", - " 2000\n", + " AF\n", + " 2002\n", " MPA+OECM\n", - " Europe\n", + " 102859.393938\n", " \n", " \n", " 3\n", - " ARE\n", - " 78.52\n", - " 2000\n", + " AF\n", + " 2003\n", " MPA+OECM\n", - " West Asia\n", + " 111143.352991\n", " \n", " \n", " 4\n", - " ARG\n", - " 6155.67\n", - " 2000\n", + " AF\n", + " 2004\n", " MPA+OECM\n", - " Latin America & Caribbean\n", + " 119137.635862\n", " \n", " \n", " ...\n", @@ -934,83 +754,119 @@ " ...\n", " ...\n", " ...\n", - " ...\n", " \n", " \n", - " 3547\n", - " VNM\n", - " 5036.97\n", - " 2023\n", + " 211\n", + " WA\n", + " 2019\n", " MPA+OECM\n", - " Asia & Pacific\n", + " 30618.254664\n", " \n", " \n", - " 3548\n", - " VUT\n", - " 83.83\n", - " 2023\n", + " 212\n", + " WA\n", + " 2020\n", " MPA+OECM\n", - " Asia & Pacific\n", + " 30624.636536\n", " \n", " \n", - " 3549\n", - " WSM\n", - " 199.59\n", - " 2023\n", + " 213\n", + " WA\n", + " 2021\n", " MPA+OECM\n", - " Asia & Pacific\n", + " 30624.636536\n", " \n", " \n", - " 3550\n", - " YEM\n", - " 4108.19\n", - " 2023\n", + " 214\n", + " WA\n", + " 2022\n", " MPA+OECM\n", - " West Asia\n", + " 31779.597984\n", " \n", " \n", - " 3551\n", - " ZAF\n", - " 242387.88\n", + " 215\n", + " WA\n", " 2023\n", " MPA+OECM\n", - " Africa\n", + " 31779.597984\n", " \n", " \n", "\n", - "

3552 rows × 5 columns

\n", + "

216 rows × 4 columns

\n", "" ], "text/plain": [ - " location_id cumsum_area year protection_type region\n", - "0 ABNJ 594174.66 2000 MPA+OECM NaN\n", - "1 AGO 0.42 2000 MPA+OECM Africa\n", - "2 ALB 103.05 2000 MPA+OECM Europe\n", - "3 ARE 78.52 2000 MPA+OECM West Asia\n", - "4 ARG 6155.67 2000 MPA+OECM Latin America & Caribbean\n", - "... ... ... ... ... ...\n", - "3547 VNM 5036.97 2023 MPA+OECM Asia & Pacific\n", - "3548 VUT 83.83 2023 MPA+OECM Asia & Pacific\n", - "3549 WSM 199.59 2023 MPA+OECM Asia & Pacific\n", - "3550 YEM 4108.19 2023 MPA+OECM West Asia\n", - "3551 ZAF 242387.88 2023 MPA+OECM Africa\n", + " location_id year protection_type cumsum_area\n", + "0 AF 2000 MPA+OECM 94507.122820\n", + "1 AF 2001 MPA+OECM 94807.303100\n", + "2 AF 2002 MPA+OECM 102859.393938\n", + "3 AF 2003 MPA+OECM 111143.352991\n", + "4 AF 2004 MPA+OECM 119137.635862\n", + ".. ... ... ... ...\n", + "211 WA 2019 MPA+OECM 30618.254664\n", + "212 WA 2020 MPA+OECM 30624.636536\n", + "213 WA 2021 MPA+OECM 30624.636536\n", + "214 WA 2022 MPA+OECM 31779.597984\n", + "215 WA 2023 MPA+OECM 31779.597984\n", "\n", - "[3552 rows x 5 columns]" + "[216 rows x 4 columns]" ] }, - "execution_count": 48, + "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "final_df['region'] = final_df['location_id'].map(country_to_region)\n", - "final_df" + "regions = final_df.copy()\n", + "regions['location_id'] = regions['location_id'].map(country_to_region)\n", + "\n", + "# group by region and year to get sum of cumsum_area\n", + "regions = regions.groupby(['location_id', 'year', 'protection_type'])['cumsum_area'].sum().reset_index()\n", + "regions\n" ] }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['ABNJ', 'AGO', 'ALB', 'ARE', 'ARG', 'ATA', 'ATG', 'AUS', 'AZE',\n", + " 'BEL', 'BGD', 'BGR', 'BHR', 'BHS', 'BLZ', 'BRA', 'BRB', 'BRN',\n", + " 'CAN', 'CHL', 'CHN', 'COD', 'COG', 'COK', 'COL', 'CRI', 'CUB',\n", + " 'CYP', 'DEU', 'DMA', 'DNK', 'DOM', 'ECU', 'EGY', 'ESP', 'EST',\n", + " 'FIN', 'FJI', 'FRA', 'FSM', 'GBR', 'GEO', 'GHA', 'GIN', 'GMB',\n", + " 'GNB', 'GNQ', 'GRC', 'GRD', 'GTM', 'HND', 'HRV', 'IDN', 'IRL',\n", + " 'IRN', 'ISL', 'ISR', 'ITA', 'JAM', 'JPN', 'KAZ', 'KEN', 'KHM',\n", + " 'KIR', 'KNA', 'KOR', 'LBN', 'LBY', 'LCA', 'LKA', 'LTU', 'LVA',\n", + " 'MAR', 'MCO', 'MDG', 'MDV', 'MEX', 'MHL', 'MLT', 'MMR', 'MNE',\n", + " 'MOZ', 'MRT', 'MUS', 'MYS', 'NAM', 'NGA', 'NIC', 'NIU', 'NLD',\n", + " 'NOR', 'NZL', 'OMN', 'PAK', 'PAN', 'PER', 'PHL', 'PLW', 'PNG',\n", + " 'POL', 'PRT', 'ROU', 'RUS', 'SAU', 'SDN', 'SEN', 'SLB', 'SLE',\n", + " 'SUR', 'SVN', 'SWE', 'SYC', 'SYR', 'THA', 'TKM', 'TLS', 'TON',\n", + " 'TTO', 'TUN', 'TUR', 'TUV', 'TZA', 'UKR', 'USA', 'VCT', 'VEN',\n", + " 'VNM', 'VUT', 'WSM', 'ZAF', 'DZA', 'IOT', 'GAB', 'IND', 'SGP',\n", + " 'LBR', 'CIV', 'CPV', 'SLV', 'QAT', 'STP', 'SHN', 'YEM', 'URY',\n", + " 'CMR', 'COM', 'KWT', 'SJM', 'GUY', 'UMI', 'HTI', 'JOR', 'GLOB',\n", + " 'AF', 'AS', 'AT', 'EU', 'GL', 'HS', 'NA', 'SA', 'WA'], dtype=object)" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "final_df2 = pd.concat([final_df, regions], ignore_index=True)\n", + "final_df2['location_id'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 46, "metadata": {}, "outputs": [ { @@ -1035,52 +891,52 @@ " \n", " \n", " location_id\n", - " cumsum_area\n", " year\n", " protection_type\n", - " region\n", + " cumsum_area\n", + " last_updated\n", " \n", " \n", " \n", " \n", " 0\n", " ABNJ\n", - " 594174.66\n", " 2000\n", " MPA+OECM\n", - " NaN\n", + " 594174.659985\n", + " 2023-10-06\n", " \n", " \n", - " 5\n", - " ATA\n", - " 3594.42\n", + " 1\n", + " AGO\n", " 2000\n", " MPA+OECM\n", - " NaN\n", + " 0.415240\n", + " 2023-10-06\n", " \n", " \n", - " 37\n", - " FIN;SWE\n", - " 3541.14\n", + " 2\n", + " ALB\n", " 2000\n", " MPA+OECM\n", - " NaN\n", + " 103.048347\n", + " 2023-10-06\n", " \n", " \n", - " 131\n", - " ABNJ\n", - " 594174.66\n", - " 2001\n", + " 3\n", + " ARE\n", + " 2000\n", " MPA+OECM\n", - " NaN\n", + " 78.516519\n", + " 2023-10-06\n", " \n", " \n", - " 136\n", - " ATA\n", - " 3594.42\n", - " 2001\n", + " 4\n", + " ARG\n", + " 2000\n", " MPA+OECM\n", - " NaN\n", + " 6155.668078\n", + " 2023-10-06\n", " \n", " \n", " ...\n", @@ -1091,116 +947,83 @@ " ...\n", " \n", " \n", - " 3397\n", - " ABNJ\n", - " 2811451.69\n", - " 2023\n", + " 3725\n", + " WA\n", + " 2019\n", " MPA+OECM\n", - " NaN\n", + " 30618.254664\n", + " 2023-10-06\n", " \n", " \n", - " 3402\n", - " ATA\n", - " 3570.36\n", - " 2023\n", + " 3726\n", + " WA\n", + " 2020\n", " MPA+OECM\n", - " NaN\n", + " 30624.636536\n", + " 2023-10-06\n", " \n", " \n", - " 3439\n", - " FIN;SWE\n", - " 3541.14\n", - " 2023\n", + " 3727\n", + " WA\n", + " 2021\n", " MPA+OECM\n", - " NaN\n", + " 30624.636536\n", + " 2023-10-06\n", " \n", " \n", - " 3442\n", - " FRA;ITA;MCO\n", - " 87742.14\n", - " 2023\n", + " 3728\n", + " WA\n", + " 2022\n", " MPA+OECM\n", - " NaN\n", + " 31779.597984\n", + " 2023-10-06\n", " \n", " \n", - " 3502\n", - " NLD;DEU;DNK\n", - " 11550.01\n", + " 3729\n", + " WA\n", " 2023\n", " MPA+OECM\n", - " NaN\n", + " 31779.597984\n", + " 2023-10-06\n", " \n", " \n", "\n", - "

110 rows × 5 columns

\n", + "

3730 rows × 5 columns

\n", "" ], "text/plain": [ - " location_id cumsum_area year protection_type region\n", - "0 ABNJ 594174.66 2000 MPA+OECM NaN\n", - "5 ATA 3594.42 2000 MPA+OECM NaN\n", - "37 FIN;SWE 3541.14 2000 MPA+OECM NaN\n", - "131 ABNJ 594174.66 2001 MPA+OECM NaN\n", - "136 ATA 3594.42 2001 MPA+OECM NaN\n", - "... ... ... ... ... ...\n", - "3397 ABNJ 2811451.69 2023 MPA+OECM NaN\n", - "3402 ATA 3570.36 2023 MPA+OECM NaN\n", - "3439 FIN;SWE 3541.14 2023 MPA+OECM NaN\n", - "3442 FRA;ITA;MCO 87742.14 2023 MPA+OECM NaN\n", - "3502 NLD;DEU;DNK 11550.01 2023 MPA+OECM NaN\n", + " location_id year protection_type cumsum_area last_updated\n", + "0 ABNJ 2000 MPA+OECM 594174.659985 2023-10-06\n", + "1 AGO 2000 MPA+OECM 0.415240 2023-10-06\n", + "2 ALB 2000 MPA+OECM 103.048347 2023-10-06\n", + "3 ARE 2000 MPA+OECM 78.516519 2023-10-06\n", + "4 ARG 2000 MPA+OECM 6155.668078 2023-10-06\n", + "... ... ... ... ... ...\n", + "3725 WA 2019 MPA+OECM 30618.254664 2023-10-06\n", + "3726 WA 2020 MPA+OECM 30624.636536 2023-10-06\n", + "3727 WA 2021 MPA+OECM 30624.636536 2023-10-06\n", + "3728 WA 2022 MPA+OECM 31779.597984 2023-10-06\n", + "3729 WA 2023 MPA+OECM 31779.597984 2023-10-06\n", "\n", - "[110 rows x 5 columns]" - ] - }, - "execution_count": 49, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "final_df[final_df.region.isnull()]" - ] - }, - { - "cell_type": "code", - "execution_count": 55, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array(['ABNJ', 'AGO', 'ALB', 'ARE', 'ARG', 'ATA', 'ATG', 'AUS', 'AZE',\n", - " 'BEL', 'BGD', 'BGR', 'BHR', 'BHS', 'BLZ', 'BRA', 'BRB', 'BRN',\n", - " 'CAN', 'CHL', 'CHN', 'COD', 'COG', 'COK', 'COL', 'CRI', 'CUB',\n", - " 'CYP', 'DEU', 'DMA', 'DNK', 'DOM', 'ECU', 'EGY', 'ESP', 'EST',\n", - " 'FIN', 'FIN;SWE', 'FJI', 'FRA', 'FSM', 'GBR', 'GEO', 'GHA', 'GIN',\n", - " 'GMB', 'GNB', 'GNQ', 'GRC', 'GRD', 'GTM', 'HND', 'HRV', 'IDN',\n", - " 'IRL', 'IRN', 'ISL', 'ISR', 'ITA', 'JAM', 'JPN', 'KAZ', 'KEN',\n", - " 'KHM', 'KIR', 'KNA', 'KOR', 'LBN', 'LBY', 'LCA', 'LKA', 'LTU',\n", - " 'LVA', 'MAR', 'MCO', 'MDG', 'MDV', 'MEX', 'MHL', 'MLT', 'MMR',\n", - " 'MNE', 'MOZ', 'MRT', 'MUS', 'MYS', 'NAM', 'NGA', 'NIC', 'NIU',\n", - " 'NLD', 'NOR', 'NZL', 'OMN', 'PAK', 'PAN', 'PER', 'PHL', 'PLW',\n", - " 'PNG', 'POL', 'PRT', 'ROU', 'RUS', 'SAU', 'SDN', 'SEN', 'SLB',\n", - " 'SLE', 'SUR', 'SVN', 'SWE', 'SYC', 'SYR', 'THA', 'TKM', 'TLS',\n", - " 'TON', 'TTO', 'TUN', 'TUR', 'TUV', 'TZA', 'UKR', 'USA', 'VCT',\n", - " 'VEN', 'VNM', 'VUT', 'WSM', 'ZAF', 'DZA', 'FRA;ITA;MCO', 'IOT',\n", - " 'GAB', 'IND', 'SGP', 'LBR', 'CIV', 'CPV', 'SLV', 'QAT', 'STP',\n", - " 'SHN', 'YEM', 'NLD;DEU;DNK', 'URY', 'CMR', 'COM', 'KWT', 'SJM',\n", - " 'GUY', 'UMI', 'HTI', 'JOR'], dtype=object)" + "[3730 rows x 5 columns]" ] }, - "execution_count": 55, + "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "final_df['location_id'].unique() " + "current_date = datetime.now().strftime('%Y-%m-%d')\n", + "\n", + "final_df2 = final_df2.copy()\n", + "final_df2['last_updated'] = current_date\n", + "final_df2" ] }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 49, "metadata": {}, "outputs": [], "source": [ From aefb44fcac22694ae7f98d2195ea616f334dcfe0 Mon Sep 17 00:00:00 2001 From: sofia Date: Tue, 10 Oct 2023 08:20:47 +0200 Subject: [PATCH 2/9] remove region for global --- data/notebooks/wdpa_coverage.ipynb | 217 ++++++++++++++++++++++++----- 1 file changed, 180 insertions(+), 37 deletions(-) diff --git a/data/notebooks/wdpa_coverage.ipynb b/data/notebooks/wdpa_coverage.ipynb index 07bfca74..12fd1337 100644 --- a/data/notebooks/wdpa_coverage.ipynb +++ b/data/notebooks/wdpa_coverage.ipynb @@ -568,7 +568,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -584,7 +584,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -682,7 +682,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -812,7 +812,7 @@ "[216 rows x 4 columns]" ] }, - "execution_count": 44, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -828,7 +828,150 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
location_idyearprotection_typecumsum_area
0AF2000MPA+OECM94507.122820
1AF2001MPA+OECM94807.303100
2AF2002MPA+OECM102859.393938
3AF2003MPA+OECM111143.352991
4AF2004MPA+OECM119137.635862
...............
211WA2019MPA+OECM30618.254664
212WA2020MPA+OECM30624.636536
213WA2021MPA+OECM30624.636536
214WA2022MPA+OECM31779.597984
215WA2023MPA+OECM31779.597984
\n", + "

192 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " location_id year protection_type cumsum_area\n", + "0 AF 2000 MPA+OECM 94507.122820\n", + "1 AF 2001 MPA+OECM 94807.303100\n", + "2 AF 2002 MPA+OECM 102859.393938\n", + "3 AF 2003 MPA+OECM 111143.352991\n", + "4 AF 2004 MPA+OECM 119137.635862\n", + ".. ... ... ... ...\n", + "211 WA 2019 MPA+OECM 30618.254664\n", + "212 WA 2020 MPA+OECM 30624.636536\n", + "213 WA 2021 MPA+OECM 30624.636536\n", + "214 WA 2022 MPA+OECM 31779.597984\n", + "215 WA 2023 MPA+OECM 31779.597984\n", + "\n", + "[192 rows x 4 columns]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Remove rows for region GL (Global)\n", + "regions = regions[regions['location_id'] != 'GL']\n", + "regions" + ] + }, + { + "cell_type": "code", + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -851,10 +994,10 @@ " 'VNM', 'VUT', 'WSM', 'ZAF', 'DZA', 'IOT', 'GAB', 'IND', 'SGP',\n", " 'LBR', 'CIV', 'CPV', 'SLV', 'QAT', 'STP', 'SHN', 'YEM', 'URY',\n", " 'CMR', 'COM', 'KWT', 'SJM', 'GUY', 'UMI', 'HTI', 'JOR', 'GLOB',\n", - " 'AF', 'AS', 'AT', 'EU', 'GL', 'HS', 'NA', 'SA', 'WA'], dtype=object)" + " 'AF', 'AS', 'AT', 'EU', 'HS', 'NA', 'SA', 'WA'], dtype=object)" ] }, - "execution_count": 45, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -866,7 +1009,7 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -904,7 +1047,7 @@ " 2000\n", " MPA+OECM\n", " 594174.659985\n", - " 2023-10-06\n", + " 2023-10-10\n", " \n", " \n", " 1\n", @@ -912,7 +1055,7 @@ " 2000\n", " MPA+OECM\n", " 0.415240\n", - " 2023-10-06\n", + " 2023-10-10\n", " \n", " \n", " 2\n", @@ -920,7 +1063,7 @@ " 2000\n", " MPA+OECM\n", " 103.048347\n", - " 2023-10-06\n", + " 2023-10-10\n", " \n", " \n", " 3\n", @@ -928,7 +1071,7 @@ " 2000\n", " MPA+OECM\n", " 78.516519\n", - " 2023-10-06\n", + " 2023-10-10\n", " \n", " \n", " 4\n", @@ -936,7 +1079,7 @@ " 2000\n", " MPA+OECM\n", " 6155.668078\n", - " 2023-10-06\n", + " 2023-10-10\n", " \n", " \n", " ...\n", @@ -947,68 +1090,68 @@ " ...\n", " \n", " \n", - " 3725\n", + " 3701\n", " WA\n", " 2019\n", " MPA+OECM\n", " 30618.254664\n", - " 2023-10-06\n", + " 2023-10-10\n", " \n", " \n", - " 3726\n", + " 3702\n", " WA\n", " 2020\n", " MPA+OECM\n", " 30624.636536\n", - " 2023-10-06\n", + " 2023-10-10\n", " \n", " \n", - " 3727\n", + " 3703\n", " WA\n", " 2021\n", " MPA+OECM\n", " 30624.636536\n", - " 2023-10-06\n", + " 2023-10-10\n", " \n", " \n", - " 3728\n", + " 3704\n", " WA\n", " 2022\n", " MPA+OECM\n", " 31779.597984\n", - " 2023-10-06\n", + " 2023-10-10\n", " \n", " \n", - " 3729\n", + " 3705\n", " WA\n", " 2023\n", " MPA+OECM\n", " 31779.597984\n", - " 2023-10-06\n", + " 2023-10-10\n", " \n", " \n", "\n", - "

3730 rows × 5 columns

\n", + "

3706 rows × 5 columns

\n", "" ], "text/plain": [ " location_id year protection_type cumsum_area last_updated\n", - "0 ABNJ 2000 MPA+OECM 594174.659985 2023-10-06\n", - "1 AGO 2000 MPA+OECM 0.415240 2023-10-06\n", - "2 ALB 2000 MPA+OECM 103.048347 2023-10-06\n", - "3 ARE 2000 MPA+OECM 78.516519 2023-10-06\n", - "4 ARG 2000 MPA+OECM 6155.668078 2023-10-06\n", + "0 ABNJ 2000 MPA+OECM 594174.659985 2023-10-10\n", + "1 AGO 2000 MPA+OECM 0.415240 2023-10-10\n", + "2 ALB 2000 MPA+OECM 103.048347 2023-10-10\n", + "3 ARE 2000 MPA+OECM 78.516519 2023-10-10\n", + "4 ARG 2000 MPA+OECM 6155.668078 2023-10-10\n", "... ... ... ... ... ...\n", - "3725 WA 2019 MPA+OECM 30618.254664 2023-10-06\n", - "3726 WA 2020 MPA+OECM 30624.636536 2023-10-06\n", - "3727 WA 2021 MPA+OECM 30624.636536 2023-10-06\n", - "3728 WA 2022 MPA+OECM 31779.597984 2023-10-06\n", - "3729 WA 2023 MPA+OECM 31779.597984 2023-10-06\n", + "3701 WA 2019 MPA+OECM 30618.254664 2023-10-10\n", + "3702 WA 2020 MPA+OECM 30624.636536 2023-10-10\n", + "3703 WA 2021 MPA+OECM 30624.636536 2023-10-10\n", + "3704 WA 2022 MPA+OECM 31779.597984 2023-10-10\n", + "3705 WA 2023 MPA+OECM 31779.597984 2023-10-10\n", "\n", - "[3730 rows x 5 columns]" + "[3706 rows x 5 columns]" ] }, - "execution_count": 46, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -1023,7 +1166,7 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ From 04b44b7c7597111d4f663b9c31193d7598e6d2bd Mon Sep 17 00:00:00 2001 From: sofia Date: Tue, 10 Oct 2023 17:23:31 +0200 Subject: [PATCH 3/9] protectedseas coverage table --- data/notebooks/protectedseas.ipynb | 1726 ++++++++++++++++++++++++++++ 1 file changed, 1726 insertions(+) create mode 100644 data/notebooks/protectedseas.ipynb diff --git a/data/notebooks/protectedseas.ipynb b/data/notebooks/protectedseas.ipynb new file mode 100644 index 00000000..85909d31 --- /dev/null +++ b/data/notebooks/protectedseas.ipynb @@ -0,0 +1,1726 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Set up" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [], + "source": [ + "import geopandas as gpd\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [], + "source": [ + "path_in = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/raw/\"\n", + "path_out = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/processed/\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Processing" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SITE_IDSITE_NAMEgeometry
0AIAG1Antigua & Barbuda EEZ (0-200NM)POLYGON ((-58.87762 19.58008, -58.82167 19.502...
1AIAG10Low Bay SanctuaryPOLYGON ((-61.91090 17.57960, -61.91096 17.579...
\n", + "
" + ], + "text/plain": [ + " SITE_ID SITE_NAME \\\n", + "0 AIAG1 Antigua & Barbuda EEZ (0-200NM) \n", + "1 AIAG10 Low Bay Sanctuary \n", + "\n", + " geometry \n", + "0 POLYGON ((-58.87762 19.58008, -58.82167 19.502... \n", + "1 POLYGON ((-61.91090 17.57960, -61.91096 17.579... " + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Import shp containing geometries\n", + "ps = gpd.read_file(path_in + \"ProtectedSeas/ProtectedSeas_ProtectedSeas_06142023_shp_ProtectedSeas_06142023_shp.shp\")\n", + "ps.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['site_id', 'site_name', 'url', 'country', 'state', 'managing_authority',\n", + " 'designation', 'purpose', 'restrictions', 'allowed', 'season',\n", + " 'effective_from', 'effective_to', 'report_violations', 'latest_updates',\n", + " 'protection_focus', 'species_of_concern',\n", + " 'removal_of_marine_life_is_prohibited', 'entry_prohibited',\n", + " 'speed_restricted', 'discharge_prohibited', 'diving_prohibited',\n", + " 'removal_of_historic_artifacts_prohibited', 'stopping_prohibited',\n", + " 'anchoring_prohibited', 'landing_prohibited', 'dragging_prohibited',\n", + " 'dredging_prohibited', 'industrial_or_mineral_exploration_prohibited',\n", + " 'construction_prohibited', 'drilling_prohibited',\n", + " 'overflight_or_drones_prohibited', 'tribal', 'bottom_trawl',\n", + " 'gillnetting', 'hook_n_line', 'trolling', 'nets', 'traps_n_pots',\n", + " 'spear_fishing', 'longlining', 'misc_gear', 'recreational_restrictions',\n", + " 'commercial_restrictions', 'wdpa_id', 'iucn_cat', 'year_est',\n", + " 'gov_level', 'inshore_only', 'coastline_match', 'boundary_source',\n", + " 'modification_level', 'total_area'],\n", + " dtype='object')" + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Import csv containing information\n", + "protectedseas = pd.read_csv(path_in + \"ProtectedSeas/ProtectedSeas_ProtectedSeas_06142023.csv\")\n", + "protectedseas.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(21197, 53)" + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "protectedseas.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(10879, 53)" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Keep only rows in which wdpa_id is not null and it's different than 0\n", + "protectedseas = protectedseas[protectedseas['wdpa_id'].notna()]\n", + "protectedseas = protectedseas[protectedseas['wdpa_id']!= '0']\n", + "protectedseas.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 136, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(10879, 56)" + ] + }, + "execution_count": 136, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Join csv with shapefile and keep only wdpa geometries\n", + "ps_gdf = ps.merge(protectedseas, how='inner', left_on='SITE_ID', right_on='site_id')\n", + "ps_gdf.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 137, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
site_idsite_namecountrywdpa_idFPStotal_areageometry
0AIAG10Low Bay SanctuaryAntigua and Barbuda5555871975.048.321285POLYGON ((-61.91090 17.57960, -61.91096 17.579...
1AIAG11Nelson's Dockyard National ParkAntigua and Barbuda5555871921.040.705369POLYGON ((-61.75807 17.03541, -61.73745 17.021...
2AIAG13Palastar Reef SanctuaryAntigua and Barbuda5555871955.022.754514POLYGON ((-61.73350 17.52440, -61.78360 17.497...
3AIAG14Palaster Reef Marine National ParkAntigua and Barbuda21.03.207554POLYGON ((-61.74275 17.51737, -61.77440 17.523...
4AIAG16Two Foot Bay SanctuaryAntigua and Barbuda5555871985.047.933200POLYGON ((-61.70064 17.66752, -61.70039 17.667...
\n", + "
" + ], + "text/plain": [ + " site_id site_name country wdpa_id \\\n", + "0 AIAG10 Low Bay Sanctuary Antigua and Barbuda 555587197 \n", + "1 AIAG11 Nelson's Dockyard National Park Antigua and Barbuda 555587192 \n", + "2 AIAG13 Palastar Reef Sanctuary Antigua and Barbuda 555587195 \n", + "3 AIAG14 Palaster Reef Marine National Park Antigua and Barbuda 2 \n", + "4 AIAG16 Two Foot Bay Sanctuary Antigua and Barbuda 555587198 \n", + "\n", + " FPS total_area geometry \n", + "0 5.0 48.321285 POLYGON ((-61.91090 17.57960, -61.91096 17.579... \n", + "1 1.0 40.705369 POLYGON ((-61.75807 17.03541, -61.73745 17.021... \n", + "2 5.0 22.754514 POLYGON ((-61.73350 17.52440, -61.78360 17.497... \n", + "3 1.0 3.207554 POLYGON ((-61.74275 17.51737, -61.77440 17.523... \n", + "4 5.0 47.933200 POLYGON ((-61.70064 17.66752, -61.70039 17.667... " + ] + }, + "execution_count": 137, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ps_gdf = ps_gdf[['site_id','site_name', 'country', 'wdpa_id', 'removal_of_marine_life_is_prohibited','total_area','geometry']]\n", + "ps_gdf = ps_gdf.rename(columns={'removal_of_marine_life_is_prohibited':'FPS'})\n", + "ps_gdf.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 138, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['Antigua and Barbuda', 'USA', 'Albania', 'Netherlands Antilles',\n", + " 'United Arab Emirates', 'Argentina', 'France', 'Australia',\n", + " 'Barbados', 'Belgium', 'Bangladesh', 'Bulgaria', 'Belize',\n", + " 'Brazil', 'Bahamas', 'British Virgin Islands', 'Canada', 'Chile',\n", + " 'Cameroon', 'Colombia', 'Comoros', 'Costa Rica', 'Cuba', 'Cyprus',\n", + " 'Germany', 'Djbouti', 'Dominica', 'Denmark', 'Dominican Republic',\n", + " 'Algeria', 'Ecuador', 'Egypt', 'Spain', 'Estonia', 'Finland',\n", + " 'France, Italy, Monaco', 'French Antilles', nan, 'Gabon',\n", + " 'United Kingdom', 'Grenada', 'Ghana', 'Gibraltar', 'Guinea',\n", + " 'The Gambia', 'Guinea Bissau', 'Greece', 'Guatemala',\n", + " 'French Guyana', 'Honduras', 'Croatia', 'Indonesia', 'Indonesia ',\n", + " 'India', 'Ireland', 'Iceland', 'Israel', 'Italy', 'Jamaica',\n", + " 'Jordan', 'Japan', 'Kenya', 'Cambodia', 'South Korea',\n", + " 'Cayman Islands', 'Lebanon', 'Liberia', 'Saint Lucia', 'Sri Lanka',\n", + " 'Lithuania', 'Latvia', 'Morocco', 'Monaco', 'Madagascar',\n", + " 'Republic of Maldives', 'Malta', 'Myanmar', 'Mozambique',\n", + " 'Mauritania', 'Malaysia', 'Namibia', 'New Caledonia', 'Niue',\n", + " 'The Netherlands', 'Netherlands', 'Norway', 'New Zealand',\n", + " 'Panama', 'British Overseas Territory - Pitcairn', 'Peru',\n", + " 'Philippines', 'Republic of Palau', 'Poland', 'Portugal', 'Qatar',\n", + " 'Russia', 'Senegal',\n", + " 'Saint Helena, Ascension and Tristan da Cunha Overseas Teritory of the United Kingdom of Great Britain and Northern Ireland',\n", + " 'Solomon Islands', 'El Salvador', 'São Tomé and Príncipe',\n", + " 'Suriname', 'Slovenia', 'Sweden', 'Seychelles',\n", + " 'Turks and Caicos Islands', 'Thailand', 'East Timor', 'Tonga',\n", + " 'Trinidad and Tobago', 'Tunisia', 'Tanzania', 'Uruguay',\n", + " 'Saint Vincent and the Grenadines', 'Vietnam', 'Yemen',\n", + " 'South Africa', 'USA; Haiti; Jamaica'], dtype=object)" + ] + }, + "execution_count": 138, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Save txt file with unique names for countries\n", + "countries = ps_gdf['country'].unique()\n", + "countries" + ] + }, + { + "cell_type": "code", + "execution_count": 139, + "metadata": {}, + "outputs": [], + "source": [ + "country_iso_dict = {\n", + " 'Antigua and Barbuda': 'ATG',\n", + " 'USA': 'USA',\n", + " 'Albania': 'ALB',\n", + " 'Netherlands Antilles': 'NLD',\n", + " 'United Arab Emirates': 'ARE',\n", + " 'Argentina': 'ARG',\n", + " 'France': 'FRA',\n", + " 'Australia': 'AUS',\n", + " 'Barbados': 'BRB',\n", + " 'Belgium': 'BEL',\n", + " 'Bangladesh': 'BGD',\n", + " 'Bulgaria': 'BGR',\n", + " 'Belize': 'BLZ',\n", + " 'Brazil': 'BRA',\n", + " 'Bahamas': 'BHS',\n", + " 'British Virgin Islands': 'VGB',\n", + " 'Canada': 'CAN',\n", + " 'Chile': 'CHL',\n", + " 'Cameroon': 'CMR',\n", + " 'Colombia': 'COL',\n", + " 'Comoros': 'COM',\n", + " 'Costa Rica': 'CRI',\n", + " 'Cuba': 'CUB',\n", + " 'Cyprus': 'CYP',\n", + " 'Germany': 'DEU',\n", + " 'Djibouti': 'DJI',\n", + " 'Djbouti': 'DJI',\n", + " 'Dominica': 'DMA',\n", + " 'Denmark': 'DNK',\n", + " 'Dominican Republic': 'DOM',\n", + " 'Algeria': 'DZA',\n", + " 'Ecuador': 'ECU',\n", + " 'Egypt': 'EGY',\n", + " 'Spain': 'ESP',\n", + " 'Estonia': 'EST',\n", + " 'Finland': 'FIN',\n", + " 'France, Italy, Monaco': 'FRA;ITA;MCO',\n", + " 'French Antilles': 'FRA',\n", + " 'Gabon': 'GAB',\n", + " 'United Kingdom': 'GBR',\n", + " 'Grenada': 'GRD',\n", + " 'Ghana': 'GHA',\n", + " 'Gibraltar': 'GIB',\n", + " 'Guinea': 'GIN',\n", + " 'The Gambia': 'GMB',\n", + " 'Guinea Bissau': 'GNB',\n", + " 'Greece': 'GRC',\n", + " 'Guatemala': 'GTM',\n", + " 'French Guyana': 'GUF',\n", + " 'Honduras': 'HND',\n", + " 'Croatia': 'HRV',\n", + " 'Indonesia': 'IDN',\n", + " 'Indonesia ': 'IDN',\n", + " 'India': 'IND',\n", + " 'Ireland': 'IRL',\n", + " 'Iceland': 'ISL',\n", + " 'Israel': 'ISR',\n", + " 'Italy': 'ITA',\n", + " 'Jamaica': 'JAM',\n", + " 'Jordan': 'JOR',\n", + " 'Japan': 'JPN',\n", + " 'Kenya': 'KEN',\n", + " 'Cambodia': 'KHM',\n", + " 'South Korea': 'KOR',\n", + " 'Cayman Islands': 'CYM',\n", + " 'Lebanon': 'LBN',\n", + " 'Liberia': 'LBR',\n", + " 'Saint Lucia': 'LCA',\n", + " 'Sri Lanka': 'LKA',\n", + " 'Lithuania': 'LTU',\n", + " 'Latvia': 'LVA',\n", + " 'Morocco': 'MAR',\n", + " 'Monaco': 'MCO',\n", + " 'Madagascar': 'MDG',\n", + " 'Republic of Maldives': 'MDV',\n", + " 'Malta': 'MLT',\n", + " 'Myanmar': 'MMR',\n", + " 'Mozambique': 'MOZ',\n", + " 'Mauritania': 'MRT',\n", + " 'Malaysia': 'MYS',\n", + " 'Namibia': 'NAM',\n", + " 'New Caledonia': 'NCL',\n", + " 'Niue': 'NIU',\n", + " 'The Netherlands': 'NLD',\n", + " 'Netherlands': 'NLD',\n", + " 'Norway': 'NOR',\n", + " 'New Zealand': 'NZL',\n", + " 'Panama': 'PAN',\n", + " 'British Overseas Territory - Pitcairn': 'GBR',\n", + " 'Peru': 'PER',\n", + " 'Philippines': 'PHL',\n", + " 'Republic of Palau': 'PLW',\n", + " 'Poland': 'POL',\n", + " 'Portugal': 'PRT',\n", + " 'Qatar': 'QAT',\n", + " 'Russia': 'RUS',\n", + " 'Senegal': 'SEN',\n", + " 'Saint Helena, Ascension and Tristan da Cunha Overseas Territory of the United Kingdom of Great Britain and Northern Ireland': 'GBR',\n", + " 'Saint Helena, Ascension and Tristan da Cunha Overseas Teritory of the United Kingdom of Great Britain and Northern Ireland': 'GBR',\n", + " 'Solomon Islands': 'SLB',\n", + " 'El Salvador': 'SLV',\n", + " 'São Tomé and Príncipe': 'STP',\n", + " 'Suriname': 'SUR',\n", + " 'Slovenia': 'SVN',\n", + " 'Sweden': 'SWE',\n", + " 'Seychelles': 'SYC',\n", + " 'Turks and Caicos Islands': 'TCA',\n", + " 'Thailand': 'THA',\n", + " 'East Timor': 'TLS',\n", + " 'Tonga': 'TON',\n", + " 'Trinidad and Tobago': 'TTO',\n", + " 'Tunisia': 'TUN',\n", + " 'Tanzania': 'TZA',\n", + " 'Uruguay': 'URY',\n", + " 'Saint Vincent and the Grenadines': 'VCT',\n", + " 'Vietnam': 'VNM',\n", + " 'Yemen': 'YEM',\n", + " 'South Africa': 'ZAF',\n", + " 'USA; Haiti; Jamaica': 'USA;HTI;JAM',\n", + "}\n" + ] + }, + { + "cell_type": "code", + "execution_count": 140, + "metadata": {}, + "outputs": [], + "source": [ + "def get_parent_iso(country):\n", + " return country_iso_dict.get(country, None)\n", + "\n", + "# Apply the function to create the 'PARENT_ISO' column\n", + "ps_gdf['parent_iso'] = ps_gdf['country'].apply(get_parent_iso)" + ] + }, + { + "cell_type": "code", + "execution_count": 141, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(10879, 8)" + ] + }, + "execution_count": 141, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ps_gdf.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 143, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
site_idsite_namecountrywdpa_idFPStotal_areageometryparent_iso
3372AIFRCAR91pointe Pasquereau - pointe Saint-VaastNaN3921072.01.642913MULTIPOLYGON (((-61.59887 16.27728, -61.59861 ...None
\n", + "
" + ], + "text/plain": [ + " site_id site_name country wdpa_id FPS \\\n", + "3372 AIFRCAR91 pointe Pasquereau - pointe Saint-Vaast NaN 392107 2.0 \n", + "\n", + " total_area geometry parent_iso \n", + "3372 1.642913 MULTIPOLYGON (((-61.59887 16.27728, -61.59861 ... None " + ] + }, + "execution_count": 143, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ps_gdf[ps_gdf['parent_iso'].isna()]" + ] + }, + { + "cell_type": "code", + "execution_count": 144, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/98/0pdnjc5s29x2pnzl293pw7hr0000gn/T/ipykernel_4067/3237173437.py:1: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " ps_gdf['parent_iso'][ps_gdf['parent_iso'].isna()] = 'FRA'\n" + ] + } + ], + "source": [ + "ps_gdf['parent_iso'][ps_gdf['parent_iso'].isna()] = 'FRA'" + ] + }, + { + "cell_type": "code", + "execution_count": 145, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 145, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(ps_gdf['country'][ps_gdf['parent_iso'].isna()])" + ] + }, + { + "cell_type": "code", + "execution_count": 146, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
site_idsite_namecountrywdpa_idFPStotal_areageometryparent_iso
3246AIFRA3Pelagos / Pelagos Sanctuary For The Conservati...France, Italy, Monaco3650151.087830.341648POLYGON ((8.76729 44.42728, 8.77066 44.42690, ...FRA;ITA;MCO
10598NWR184Navassa Island National Wildlife RefugeUSA; Haiti; Jamaica5556081205.01473.656765POLYGON ((-75.00384 18.20744, -75.01645 18.207...USA;HTI;JAM
\n", + "
" + ], + "text/plain": [ + " site_id site_name \\\n", + "3246 AIFRA3 Pelagos / Pelagos Sanctuary For The Conservati... \n", + "10598 NWR184 Navassa Island National Wildlife Refuge \n", + "\n", + " country wdpa_id FPS total_area \\\n", + "3246 France, Italy, Monaco 365015 1.0 87830.341648 \n", + "10598 USA; Haiti; Jamaica 555608120 5.0 1473.656765 \n", + "\n", + " geometry parent_iso \n", + "3246 POLYGON ((8.76729 44.42728, 8.77066 44.42690, ... FRA;ITA;MCO \n", + "10598 POLYGON ((-75.00384 18.20744, -75.01645 18.207... USA;HTI;JAM " + ] + }, + "execution_count": 146, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ps_gdf[ps_gdf['parent_iso'].str.contains(';')]" + ] + }, + { + "cell_type": "code", + "execution_count": 167, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(10883, 8)" + ] + }, + "execution_count": 167, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Create a mask for rows with multiple values in 'iso_code'\n", + "mask = ps_gdf['parent_iso'].str.contains(';', na=False)\n", + "\n", + "# Split the 'iso_code' values and create separate rows only for rows with multiple values\n", + "split_rows = ps_gdf[mask].copy()\n", + "split_rows['parent_iso'] = split_rows['parent_iso'].str.split(';')\n", + "split_rows = split_rows.explode('parent_iso')\n", + "\n", + "# Keep rows with single values in 'iso_code'\n", + "single_value_rows = ps_gdf[~mask]\n", + "\n", + "# Concatenate the exploded rows with the single value rows\n", + "ps_new = pd.concat([single_value_rows, split_rows], ignore_index=True)\n", + "\n", + "ps_new.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 168, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
site_idsite_namecountrywdpa_idFPStotal_areageometryparent_iso
\n", + "
" + ], + "text/plain": [ + "Empty GeoDataFrame\n", + "Columns: [site_id, site_name, country, wdpa_id, FPS, total_area, geometry, parent_iso]\n", + "Index: []" + ] + }, + "execution_count": 168, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ps_new[ps_new['parent_iso'].str.contains(';')]" + ] + }, + { + "cell_type": "code", + "execution_count": 169, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
site_idsite_namecountrywdpa_idFPStotal_areageometryparent_iso
10880NWR184Navassa Island National Wildlife RefugeUSA; Haiti; Jamaica5556081205.01473.656765POLYGON ((-75.00384 18.20744, -75.01645 18.207...USA
10881NWR184Navassa Island National Wildlife RefugeUSA; Haiti; Jamaica5556081205.01473.656765POLYGON ((-75.00384 18.20744, -75.01645 18.207...HTI
10882NWR184Navassa Island National Wildlife RefugeUSA; Haiti; Jamaica5556081205.01473.656765POLYGON ((-75.00384 18.20744, -75.01645 18.207...JAM
\n", + "
" + ], + "text/plain": [ + " site_id site_name country \\\n", + "10880 NWR184 Navassa Island National Wildlife Refuge USA; Haiti; Jamaica \n", + "10881 NWR184 Navassa Island National Wildlife Refuge USA; Haiti; Jamaica \n", + "10882 NWR184 Navassa Island National Wildlife Refuge USA; Haiti; Jamaica \n", + "\n", + " wdpa_id FPS total_area \\\n", + "10880 555608120 5.0 1473.656765 \n", + "10881 555608120 5.0 1473.656765 \n", + "10882 555608120 5.0 1473.656765 \n", + "\n", + " geometry parent_iso \n", + "10880 POLYGON ((-75.00384 18.20744, -75.01645 18.207... USA \n", + "10881 POLYGON ((-75.00384 18.20744, -75.01645 18.207... HTI \n", + "10882 POLYGON ((-75.00384 18.20744, -75.01645 18.207... JAM " + ] + }, + "execution_count": 169, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ps_new[ps_new['country']=='USA; Haiti; Jamaica']" + ] + }, + { + "cell_type": "code", + "execution_count": 170, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
site_idsite_namecountrywdpa_idFPStotal_areageometryparent_isoFPS_cat
0AIAG10Low Bay SanctuaryAntigua and Barbuda5555871975.048.321285POLYGON ((-61.91090 17.57960, -61.91096 17.579...ATGHighly
1AIAG11Nelson's Dockyard National ParkAntigua and Barbuda5555871921.040.705369POLYGON ((-61.75807 17.03541, -61.73745 17.021...ATGLess
\n", + "
" + ], + "text/plain": [ + " site_id site_name country wdpa_id \\\n", + "0 AIAG10 Low Bay Sanctuary Antigua and Barbuda 555587197 \n", + "1 AIAG11 Nelson's Dockyard National Park Antigua and Barbuda 555587192 \n", + "\n", + " FPS total_area geometry \\\n", + "0 5.0 48.321285 POLYGON ((-61.91090 17.57960, -61.91096 17.579... \n", + "1 1.0 40.705369 POLYGON ((-61.75807 17.03541, -61.73745 17.021... \n", + "\n", + " parent_iso FPS_cat \n", + "0 ATG Highly \n", + "1 ATG Less " + ] + }, + "execution_count": 170, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Reclassify FPS values\n", + "fps_classes = {\n", + " 1: 'Less',\n", + " 2: 'Less',\n", + " 3: 'Moderately',\n", + " 4: 'Highly',\n", + " 5: 'Highly'\n", + "}\n", + "\n", + "# Create a new column 'FPS_cat' based on the mapping\n", + "ps_new['FPS_cat'] = ps_new['FPS'].apply(lambda x: fps_classes.get(x, None))\n", + "ps_new.head(2)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 171, + "metadata": {}, + "outputs": [], + "source": [ + "# List of dictionaries for data in Region_ISO3_PP.txt (list of regions used in the Protected Planet database)\n", + "regions_data = [\n", + " {\n", + " 'region_iso': 'AS',\n", + " 'region_name': 'Asia & Pacific',\n", + " 'country_iso_3s': [\n", + " \"AFG\", \"ASM\", \"AUS\", \"BGD\", \"BRN\", \"BTN\", \"CCK\", \"CHN\", \"COK\", \"CXR\", \"FJI\", \"FSM\", \"GUM\", \"HKG\", \"IDN\",\n", + " \"IND\", \"IOT\", \"IRN\", \"JPN\", \"KHM\", \"KIR\", \"KOR\", \"LAO\", \"LKA\", \"MAC\", \"MDV\", \"MHL\", \"MMR\", \"MNG\", \"MNP\",\n", + " \"MYS\", \"NCL\", \"NFK\", \"NIU\", \"NPL\", \"NRU\", \"NZL\", \"PAK\", \"PCN\", \"PHL\", \"PLW\", \"PNG\", \"PRK\", \"PYF\", \"SGP\",\n", + " \"SLB\", \"THA\", \"TKL\", \"TLS\", \"TON\", \"TUV\", \"TWN\", \"VNM\", \"VUT\", \"WLF\", \"WSM\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'AF',\n", + " 'region_name': 'Africa',\n", + " 'country_iso_3s': [\n", + " \"AGO\", \"BDI\", \"BEN\", \"BFA\", \"BWA\", \"CAF\", \"CIV\", \"CMR\", \"COD\", \"COG\", \"COM\", \"CPV\", \"DJI\", \"DZA\", \"EGY\",\n", + " \"ERI\", \"ESH\", \"ETH\", \"GAB\", \"GHA\", \"GIN\", \"GMB\", \"GNB\", \"GNQ\", \"KEN\", \"LBR\", \"LBY\", \"LSO\", \"MAR\", \"MDG\",\n", + " \"MLI\", \"MOZ\", \"MRT\", \"MUS\", \"MWI\", \"MYT\", \"NAM\", \"NER\", \"NGA\", \"REU\", \"RWA\", \"SDN\", \"SEN\", \"SHN\", \"SLE\",\n", + " \"SOM\", \"SSD\", \"STP\", \"SWZ\", \"SYC\", \"TCD\", \"TGO\", \"TUN\", \"TZA\", \"UGA\", \"ZAF\", \"ZMB\", \"ZWE\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'EU',\n", + " 'region_name': 'Europe',\n", + " 'country_iso_3s': [\n", + " \"ALA\", \"ALB\", \"AND\", \"ARM\", \"AUT\", \"AZE\", \"BEL\", \"BGR\", \"BIH\", \"BLR\", \"CHE\", \"CYP\", \"CZE\", \"DEU\", \"DNK\",\n", + " \"ESP\", \"EST\", \"FIN\", \"FRA\", \"FRO\", \"GBR\", \"GEO\", \"GGY\", \"GIB\", \"GRC\", \"HRV\", \"HUN\", \"IMN\", \"IRL\", \"ISL\",\n", + " \"ISR\", \"ITA\", \"JEY\", \"KAZ\", \"KGZ\", \"LIE\", \"LTU\", \"LUX\", \"LVA\", \"MCO\", \"MDA\", \"MKD\", \"MLT\", \"MNE\", \"NLD\",\n", + " \"NOR\", \"POL\", \"PRT\", \"ROU\", \"RUS\", \"SJM\", \"SMR\", \"SRB\", \"SVK\", \"SVN\", \"SWE\", \"TJK\", \"TKM\", \"TUR\", \"UKR\",\n", + " \"UZB\", \"VAT\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'SA',\n", + " 'region_name': 'Latin America & Caribbean',\n", + " 'country_iso_3s': [\n", + " \"ABW\", \"AIA\", \"ARG\", \"ATG\", \"BES\", \"BHS\", \"BLM\", \"BLZ\", \"BMU\", \"BOL\", \"BRA\", \"BRB\", \"CHL\", \"COL\", \"CRI\",\n", + " \"CUB\", \"CUW\", \"CYM\", \"DMA\", \"DOM\", \"ECU\", \"FLK\", \"GLP\", \"GRD\", \"GTM\", \"GUF\", \"GUY\", \"HND\", \"HTI\", \"JAM\",\n", + " \"KNA\", \"LCA\", \"MAF\", \"MEX\", \"MSR\", \"MTQ\", \"NIC\", \"PAN\", \"PER\", \"PRI\", \"PRY\", \"SLV\", \"SUR\", \"SXM\", \"TCA\",\n", + " \"TTO\", \"UMI\", \"URY\", \"VCT\", \"VEN\", \"VGB\", \"VIR\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'PO',\n", + " 'region_name': 'Polar',\n", + " 'country_iso_3s': [\n", + " \"ATF\", \"BVT\", \"GRL\", \"HMD\", \"SGS\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'NA',\n", + " 'region_name': 'North America',\n", + " 'country_iso_3s': [\n", + " \"CAN\", \"SPM\", \"USA\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'GL',\n", + " 'region_name': 'Global',\n", + " 'country_iso_3s': ['GLOB']\n", + " },\n", + " {\n", + " 'region_iso': 'WA',\n", + " 'region_name': 'West Asia',\n", + " 'country_iso_3s': [\n", + " \"ARE\", \"BHR\", \"IRQ\", \"JOR\", \"KWT\", \"LBN\", \"OMN\", \"PSE\", \"QAT\", \"SAU\", \"SYR\", \"YEM\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'AT', # this region is not in the Protected Planet database\n", + " 'region_name': 'Antartica',\n", + " 'country_iso_3s': [\n", + " \"ATA\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'HS', # this region is not in the Protected Planet database\n", + " 'region_name': 'Areas Beyond National Jurisdiction',\n", + " 'country_iso_3s': [\n", + " \"ABNJ\"\n", + " ]\n", + " }\n", + "]\n", + "\n", + "# Convert the region data to a dictionary that maps each country to its region name\n", + "country_to_region = {}\n", + "for region in regions_data:\n", + " for country in region['country_iso_3s']:\n", + " country_to_region[country] = region['region_iso']" + ] + }, + { + "cell_type": "code", + "execution_count": 172, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(10883, 10)" + ] + }, + "execution_count": 172, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ps_new['region'] = ps_new['parent_iso'].map(country_to_region)\n", + "ps_new.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 173, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
site_idsite_namecountrywdpa_idFPStotal_areageometryparent_isoFPS_catregion
0AIAG10Low Bay SanctuaryAntigua and Barbuda5555871975.048.321285POLYGON ((-61.91090 17.57960, -61.91096 17.579...ATGHighlySA
1AIAG11Nelson's Dockyard National ParkAntigua and Barbuda5555871921.040.705369POLYGON ((-61.75807 17.03541, -61.73745 17.021...ATGLessSA
\n", + "
" + ], + "text/plain": [ + " site_id site_name country wdpa_id \\\n", + "0 AIAG10 Low Bay Sanctuary Antigua and Barbuda 555587197 \n", + "1 AIAG11 Nelson's Dockyard National Park Antigua and Barbuda 555587192 \n", + "\n", + " FPS total_area geometry \\\n", + "0 5.0 48.321285 POLYGON ((-61.91090 17.57960, -61.91096 17.579... \n", + "1 1.0 40.705369 POLYGON ((-61.75807 17.03541, -61.73745 17.021... \n", + "\n", + " parent_iso FPS_cat region \n", + "0 ATG Highly SA \n", + "1 ATG Less SA " + ] + }, + "execution_count": 173, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ps_new.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 174, + "metadata": {}, + "outputs": [], + "source": [ + "ps_new.to_file(path_out + \"protectedseas/protectedseas.shp\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Calculate coverage" + ] + }, + { + "cell_type": "code", + "execution_count": 175, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
location_idfishing_protection_levelarea
0ALBLess197.296039
1ALBModerately344.028483
2AREHighly6962.497530
3ARGHighly5958.615237
4ARGLess38419.623899
5ARGModerately7546.694893
6ATGHighly168.263212
7ATGLess45.247978
8ATGModerately21.163152
9AUSHighly723824.463100
\n", + "
" + ], + "text/plain": [ + " location_id fishing_protection_level area\n", + "0 ALB Less 197.296039\n", + "1 ALB Moderately 344.028483\n", + "2 ARE Highly 6962.497530\n", + "3 ARG Highly 5958.615237\n", + "4 ARG Less 38419.623899\n", + "5 ARG Moderately 7546.694893\n", + "6 ATG Highly 168.263212\n", + "7 ATG Less 45.247978\n", + "8 ATG Moderately 21.163152\n", + "9 AUS Highly 723824.463100" + ] + }, + "execution_count": 175, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ps_country = ps_new.groupby(['parent_iso', 'FPS_cat'], as_index=False)['total_area'].sum()\n", + "ps_country = ps_country.rename(columns={'parent_iso':'location_id', 'FPS_cat':'fishing_protection_level', 'total_area':'area'})\n", + "ps_country.head(10)" + ] + }, + { + "cell_type": "code", + "execution_count": 177, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
location_idfishing_protection_levelarea
0AFHighly3.518155e+04
1AFLess2.862643e+05
2AFModerately4.253144e+04
3ASHighly8.916216e+05
4ASLess2.950564e+06
5ASModerately2.856941e+05
6EUHighly2.671565e+06
7EULess2.468697e+06
8EUModerately1.881583e+06
9NAHighly3.961031e+06
\n", + "
" + ], + "text/plain": [ + " location_id fishing_protection_level area\n", + "0 AF Highly 3.518155e+04\n", + "1 AF Less 2.862643e+05\n", + "2 AF Moderately 4.253144e+04\n", + "3 AS Highly 8.916216e+05\n", + "4 AS Less 2.950564e+06\n", + "5 AS Moderately 2.856941e+05\n", + "6 EU Highly 2.671565e+06\n", + "7 EU Less 2.468697e+06\n", + "8 EU Moderately 1.881583e+06\n", + "9 NA Highly 3.961031e+06" + ] + }, + "execution_count": 177, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ps_region = ps_new.groupby(['region', 'FPS_cat'], as_index=False)['total_area'].sum()\n", + "ps_region = ps_region.rename(columns={'region':'location_id', 'FPS_cat':'fishing_protection_level', 'total_area':'area'})\n", + "ps_region.head(10)" + ] + }, + { + "cell_type": "code", + "execution_count": 182, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
fishing_protection_levelarealocation_id
0Highly8.180599e+06GLOB
1Less9.942402e+06GLOB
2Moderately3.177137e+06GLOB
\n", + "
" + ], + "text/plain": [ + " fishing_protection_level area location_id\n", + "0 Highly 8.180599e+06 GLOB\n", + "1 Less 9.942402e+06 GLOB\n", + "2 Moderately 3.177137e+06 GLOB" + ] + }, + "execution_count": 182, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ps_global = ps_new.groupby(['FPS_cat'], as_index=False)['total_area'].sum()\n", + "ps_global['location_id'] = 'GLOB'\n", + "ps_global = ps_global.rename(columns={'FPS_cat':'fishing_protection_level', 'total_area':'area'})\n", + "ps_global" + ] + }, + { + "cell_type": "code", + "execution_count": 183, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
location_idfishing_protection_levelarea
0ALBLess1.972960e+02
1ALBModerately3.440285e+02
2AREHighly6.962498e+03
3ARGHighly5.958615e+03
4ARGLess3.841962e+04
............
227WAHighly1.024296e+04
228WALess2.129971e+04
229GLOBHighly8.180599e+06
230GLOBLess9.942402e+06
231GLOBModerately3.177137e+06
\n", + "

232 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " location_id fishing_protection_level area\n", + "0 ALB Less 1.972960e+02\n", + "1 ALB Moderately 3.440285e+02\n", + "2 ARE Highly 6.962498e+03\n", + "3 ARG Highly 5.958615e+03\n", + "4 ARG Less 3.841962e+04\n", + ".. ... ... ...\n", + "227 WA Highly 1.024296e+04\n", + "228 WA Less 2.129971e+04\n", + "229 GLOB Highly 8.180599e+06\n", + "230 GLOB Less 9.942402e+06\n", + "231 GLOB Moderately 3.177137e+06\n", + "\n", + "[232 rows x 3 columns]" + ] + }, + "execution_count": 183, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ps_coverage = pd.concat([ps_country, ps_region, ps_global], ignore_index=True)\n", + "ps_coverage" + ] + }, + { + "cell_type": "code", + "execution_count": 184, + "metadata": {}, + "outputs": [], + "source": [ + "ps_coverage.to_csv(path_out + '/tables/fishing_protection_level.csv', index=False)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "skytruth", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From d2ecd0f83b6cc5062e8f86bc5e11342128c631ab Mon Sep 17 00:00:00 2001 From: sofia Date: Tue, 10 Oct 2023 17:34:54 +0200 Subject: [PATCH 4/9] adding region_locations table --- data/notebooks/protectedseas.ipynb | 127 +++++++++++++++++++++++++++++ 1 file changed, 127 insertions(+) diff --git a/data/notebooks/protectedseas.ipynb b/data/notebooks/protectedseas.ipynb index 85909d31..396cf9e3 100644 --- a/data/notebooks/protectedseas.ipynb +++ b/data/notebooks/protectedseas.ipynb @@ -1700,6 +1700,133 @@ "source": [ "ps_coverage.to_csv(path_out + '/tables/fishing_protection_level.csv', index=False)" ] + }, + { + "cell_type": "code", + "execution_count": 185, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
region_idlocation_id
0ASAFG
1ASASM
2ASAUS
3ASBGD
4ASBRN
.........
246WASAU
247WASYR
248WAYEM
249ATATA
250HSABNJ
\n", + "

251 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " region_id location_id\n", + "0 AS AFG\n", + "1 AS ASM\n", + "2 AS AUS\n", + "3 AS BGD\n", + "4 AS BRN\n", + ".. ... ...\n", + "246 WA SAU\n", + "247 WA SYR\n", + "248 WA YEM\n", + "249 AT ATA\n", + "250 HS ABNJ\n", + "\n", + "[251 rows x 2 columns]" + ] + }, + "execution_count": 185, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "regions_df = pd.DataFrame([{'region_id': data['region_iso'], 'location_id': iso} for data in regions_data for iso in data['country_iso_3s']])\n", + "regions_df" + ] + }, + { + "cell_type": "code", + "execution_count": 186, + "metadata": {}, + "outputs": [], + "source": [ + "regions_df.to_csv(path_out + '/tables/region_locations.csv', index=False)" + ] } ], "metadata": { From 418a6c6e9f15f02f802f633c87dd11a2080fedc1 Mon Sep 17 00:00:00 2001 From: sofia Date: Wed, 11 Oct 2023 16:59:10 +0200 Subject: [PATCH 5/9] new notebooks to generate lookup tables and seamounts --- data/notebooks/layers.ipynb | 1304 ++++++++++++++++----------- data/notebooks/location_areas.ipynb | 574 ++++++++++++ data/notebooks/protectedseas.ipynb | 127 --- data/notebooks/seamounts.ipynb | 1000 ++++++++++++++++++++ 4 files changed, 2347 insertions(+), 658 deletions(-) create mode 100644 data/notebooks/location_areas.ipynb create mode 100644 data/notebooks/seamounts.ipynb diff --git a/data/notebooks/layers.ipynb b/data/notebooks/layers.ipynb index 3d9f2c16..27bbc7be 100644 --- a/data/notebooks/layers.ipynb +++ b/data/notebooks/layers.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -12,114 +12,19 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "path_in = \"/Users/sofia/Documents/Repos/skytruth_30x30/data/raw\"\n", - "path_out = \"/Users/sofia/Documents/Repos/skytruth_30x30/data/processed\"" + "path_in = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/raw\"\n", + "path_out = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/processed\"" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
MRGIDGEONAMEMRGID_TER1POL_TYPEMRGID_SOV1TERRITORY1ISO_TER1SOVEREIGN1MRGID_TER2MRGID_SOV2...ISO_SOV1ISO_SOV2ISO_SOV3UN_SOV1UN_SOV2UN_SOV3UN_TER1UN_TER2UN_TER3geometry
08444.0American Samoa Exclusive Economic Zone8670.0200NM2204.0American SamoaASMUnited States0.00.0...USANaNNaN840NaNNaN16.0NaNNaNPOLYGON ((-166.64112 -17.55527, -166.64194 -17...
\n", - "

1 rows × 32 columns

\n", - "
" - ], - "text/plain": [ - " MRGID GEONAME MRGID_TER1 POL_TYPE \\\n", - "0 8444.0 American Samoa Exclusive Economic Zone 8670.0 200NM \n", - "\n", - " MRGID_SOV1 TERRITORY1 ISO_TER1 SOVEREIGN1 MRGID_TER2 MRGID_SOV2 \\\n", - "0 2204.0 American Samoa ASM United States 0.0 0.0 \n", - "\n", - " ... ISO_SOV1 ISO_SOV2 ISO_SOV3 UN_SOV1 UN_SOV2 UN_SOV3 UN_TER1 UN_TER2 \\\n", - "0 ... USA NaN NaN 840 NaN NaN 16.0 NaN \n", - "\n", - " UN_TER3 geometry \n", - "0 NaN POLYGON ((-166.64112 -17.55527, -166.64194 -17... \n", - "\n", - "[1 rows x 32 columns]" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "eez = gpd.read_file(path_in + \"/World_EEZ_v11_20191118/eez_v11.shp\")\n", "eez.head(1)" @@ -127,63 +32,25 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Index(['MRGID', 'GEONAME', 'MRGID_TER1', 'POL_TYPE', 'MRGID_SOV1',\n", - " 'TERRITORY1', 'ISO_TER1', 'SOVEREIGN1', 'MRGID_TER2', 'MRGID_SOV2',\n", - " 'TERRITORY2', 'ISO_TER2', 'SOVEREIGN2', 'MRGID_TER3', 'MRGID_SOV3',\n", - " 'TERRITORY3', 'ISO_TER3', 'SOVEREIGN3', 'X_1', 'Y_1', 'MRGID_EEZ',\n", - " 'AREA_KM2', 'ISO_SOV1', 'ISO_SOV2', 'ISO_SOV3', 'UN_SOV1', 'UN_SOV2',\n", - " 'UN_SOV3', 'UN_TER1', 'UN_TER2', 'UN_TER3', 'geometry'],\n", - " dtype='object')" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "eez.columns" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "\n", - "Name: WGS 84\n", - "Axis Info [ellipsoidal]:\n", - "- Lat[north]: Geodetic latitude (degree)\n", - "- Lon[east]: Geodetic longitude (degree)\n", - "Area of Use:\n", - "- name: World.\n", - "- bounds: (-180.0, -90.0, 180.0, 90.0)\n", - "Datum: World Geodetic System 1984 ensemble\n", - "- Ellipsoid: WGS 84\n", - "- Prime Meridian: Greenwich" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "eez.crs" ] }, { "cell_type": "code", - "execution_count": 45, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -193,223 +60,9 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
MRGIDGEONAMEMRGID_TER1POL_TYPEMRGID_SOV1TERRITORY1ISO_TER1SOVEREIGN1MRGID_TER2MRGID_SOV2...ISO_SOV1ISO_SOV2ISO_SOV3UN_SOV1UN_SOV2UN_SOV3UN_TER1UN_TER2UN_TER3geometry
08444.0American Samoa Exclusive Economic Zone8670.0200NM2204.0American SamoaASMUnited States0.00.0...USANaNNaN840NaNNaN16.0NaNNaNPOLYGON ((-16216412.543 -2157569.856, -1621650...
18379.0Ascension Exclusive Economic Zone8620.0200NM2208.0AscensionSHNUnited Kingdom0.00.0...GBRNaNNaN826NaNNaN654.0NaNNaNPOLYGON ((-1089355.142 -974062.004, -1089348.4...
28446.0Cook Islands Exclusive Economic Zone8672.0200NM2227.0Cook IslandsCOKNew Zealand0.00.0...NZLNaNNaN554NaNNaN184.0NaNNaNPOLYGON ((-15912583.852 -716733.193, -15813064...
38389.0Overlapping claim Falkland / Malvinas Islands:...8623.0Overlapping claim2208.0Falkland / Malvinas IslandsFLKUnited Kingdom8623.02149.0...GBRARGNaN82632.0NaN238.0238.0NaNPOLYGON ((-4061728.309 -6509190.466, -4443979....
48440.0French Polynesian Exclusive Economic Zone8656.0200NM17.0French PolynesiaPYFFrance0.00.0...FRANaNNaN250NaNNaN258.0NaNNaNMULTIPOLYGON (((-13543804.433 -974376.651, -13...
\n", - "

5 rows × 32 columns

\n", - "
" - ], - "text/plain": [ - " MRGID GEONAME MRGID_TER1 \\\n", - "0 8444.0 American Samoa Exclusive Economic Zone 8670.0 \n", - "1 8379.0 Ascension Exclusive Economic Zone 8620.0 \n", - "2 8446.0 Cook Islands Exclusive Economic Zone 8672.0 \n", - "3 8389.0 Overlapping claim Falkland / Malvinas Islands:... 8623.0 \n", - "4 8440.0 French Polynesian Exclusive Economic Zone 8656.0 \n", - "\n", - " POL_TYPE MRGID_SOV1 TERRITORY1 ISO_TER1 \\\n", - "0 200NM 2204.0 American Samoa ASM \n", - "1 200NM 2208.0 Ascension SHN \n", - "2 200NM 2227.0 Cook Islands COK \n", - "3 Overlapping claim 2208.0 Falkland / Malvinas Islands FLK \n", - "4 200NM 17.0 French Polynesia PYF \n", - "\n", - " SOVEREIGN1 MRGID_TER2 MRGID_SOV2 ... ISO_SOV1 ISO_SOV2 ISO_SOV3 \\\n", - "0 United States 0.0 0.0 ... USA NaN NaN \n", - "1 United Kingdom 0.0 0.0 ... GBR NaN NaN \n", - "2 New Zealand 0.0 0.0 ... NZL NaN NaN \n", - "3 United Kingdom 8623.0 2149.0 ... GBR ARG NaN \n", - "4 France 0.0 0.0 ... FRA NaN NaN \n", - "\n", - " UN_SOV1 UN_SOV2 UN_SOV3 UN_TER1 UN_TER2 UN_TER3 \\\n", - "0 840 NaN NaN 16.0 NaN NaN \n", - "1 826 NaN NaN 654.0 NaN NaN \n", - "2 554 NaN NaN 184.0 NaN NaN \n", - "3 826 32.0 NaN 238.0 238.0 NaN \n", - "4 250 NaN NaN 258.0 NaN NaN \n", - "\n", - " geometry \n", - "0 POLYGON ((-16216412.543 -2157569.856, -1621650... \n", - "1 POLYGON ((-1089355.142 -974062.004, -1089348.4... \n", - "2 POLYGON ((-15912583.852 -716733.193, -15813064... \n", - "3 POLYGON ((-4061728.309 -6509190.466, -4443979.... \n", - "4 MULTIPOLYGON (((-13543804.433 -974376.651, -13... \n", - "\n", - "[5 rows x 32 columns]" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "eez = eez.to_crs('ESRI:54009')\n", "eez['AREA_KM2']= eez.geometry.area/ 1000000\n", @@ -418,7 +71,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -427,29 +80,17 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Allocating 16 GB of heap memory\n", - "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_area_mollweide.shp\n", - "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_area_mollweide.shx\n", - "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_area_mollweide.dbf\n", - "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_area_mollweide.prj\n" - ] - } - ], + "outputs": [], "source": [ - "# Reproject to 4626\n", + "# Reproject to 4326\n", "!mapshaper-xl 16gb /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_area_mollweide.shp -proj EPSG:4326 -o force /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_area_mollweide.shp" ] }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -524,16 +165,434 @@ " }\n", "]\n", "\n", - "# Convert the region data to a dictionary that maps each country to its region name\n", - "country_to_region = {}\n", - "for region in regions_data:\n", - " for country in region['country_iso_3s']:\n", - " country_to_region[country] = region['region_name']" + "# Convert the region data to a dictionary that maps each country to its region name\n", + "country_to_region = {}\n", + "for region in regions_data:\n", + " for country in region['country_iso_3s']:\n", + " country_to_region[country] = region['region_name']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "eez['REGIONS'] = eez['ISO_SOV1'].map(country_to_region)\n", + "eez.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "eez.to_file(path_out + \"/administrative/eez_mollweide_regions.shp\", driver=\"ESRI Shapefile\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "eez['REGIONS'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Dissolve by relevant fields: REGIONS\n", + "!mapshaper-xl 16gb /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_mollweide_regions.shp -dissolve2 fields=REGIONS -explode -o /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.shp" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "regions = gpd.read_file(path_out + \"/administrative/eez_regions.shp\", driver=\"ESRI Shapefile\")\n", + "regions.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "regions.crs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "regions['REGIONS'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "regions['REGIONS'][regions['REGIONS'].isna()] = 'Antartica'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Calculate area of each region\n", + "regions['AREA_KM2']= regions.geometry.area/ 1000000" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "regions.to_file(path_out + \"/administrative/eez_regions.shp\", driver=\"ESRI Shapefile\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Reproject to 4326\n", + "!mapshaper-xl 16gb /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.shp -proj EPSG:4326 -o force /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.shp" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "regions['REGIONS'].unique()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Extract marine areas" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [], + "source": [ + "eez = gpd.read_file(path_out + \"/administrative/eez_area_mollweide.shp\", driver=\"ESRI Shapefile\")\n", + "regions = gpd.read_file(path_out + \"/administrative/eez_regions.shp\", driver=\"ESRI Shapefile\")\n", + "hs = gpd.read_file(path_in + \"/high_seas/high_seas.shp\", driver=\"ESRI Shapefile\")" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [], + "source": [ + "# Create new column \"iso\" that has the field \"ISO_SOV1\" for all rows except those in which ISO_SOV2 and ISO_SOV3 are not null. In such cases concatenate ISO_SOV1, ISO_SOV2 and ISO_SOV3\n", + "eez['iso'] = eez['ISO_SOV1']\n", + "eez.loc[eez['ISO_SOV2'].notnull(), 'iso'] = eez['ISO_SOV1'] + \";\" + eez['ISO_SOV2']\n", + "eez.loc[eez['ISO_SOV3'].notnull(), 'iso'] = eez['ISO_SOV1'] + \";\" + eez['ISO_SOV2'] + \";\" + eez['ISO_SOV3']" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "49" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(eez[eez['iso'].str.contains(';')])" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(281, 33)" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "eez.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(337, 33)" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Create a mask for rows with multiple values in 'iso_code'\n", + "mask = eez['iso'].str.contains(';', na=False)\n", + "\n", + "# Split the 'iso_code' values and create separate rows only for rows with multiple values\n", + "split_rows = eez[mask].copy()\n", + "split_rows['iso'] = split_rows['iso'].str.split(';')\n", + "split_rows = split_rows.explode('iso')\n", + "\n", + "# Keep rows with single values in 'iso_code'\n", + "single_value_rows = eez[~mask]\n", + "\n", + "# Concatenate the exploded rows with the single value rows\n", + "eez_new = pd.concat([single_value_rows, split_rows], ignore_index=True)\n", + "\n", + "eez_new.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(eez_new[eez_new['iso'].str.contains(';')])" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [], + "source": [ + "iso_country_mapping = {\n", + " 'USA': 'United States',\n", + " 'GBR': 'United Kingdom',\n", + " 'NZL': 'New Zealand',\n", + " 'FRA': 'France',\n", + " 'WSM': 'Samoa',\n", + " 'TON': 'Tonga',\n", + " 'CHL': 'Chile',\n", + " 'URY': 'Uruguay',\n", + " 'PER': 'Peru',\n", + " 'BRA': 'Brazil',\n", + " 'KIR': 'Kiribati',\n", + " 'ARG': 'Argentina',\n", + " 'AUS': 'Australia',\n", + " 'COM': 'Comoros',\n", + " 'MDG': 'Madagascar',\n", + " 'ZAF': 'South Africa',\n", + " 'MUS': 'Mauritius',\n", + " 'VUT': 'Vanuatu',\n", + " 'NAM': 'Namibia',\n", + " 'TLS': 'Timor-Leste',\n", + " 'COG': 'Republic of the Congo',\n", + " 'AGO': 'Angola',\n", + " 'MOZ': 'Mozambique',\n", + " 'KEN': 'Kenya',\n", + " 'PNG': 'Papua New Guinea',\n", + " 'TZA': 'Tanzania',\n", + " 'SLB': 'Solomon Islands',\n", + " 'SYC': 'Seychelles',\n", + " 'COD': 'Democratic Republic of the Congo',\n", + " 'ATG': 'Antigua and Barbuda',\n", + " 'NLD': 'Netherlands',\n", + " 'PRT': 'Portugal',\n", + " 'BHS': 'The Bahamas',\n", + " 'BRB': 'Barbados',\n", + " 'MEX': 'Mexico',\n", + " 'CPV': 'Cape Verde',\n", + " 'ESP': 'Spain',\n", + " 'PAN': 'Panama',\n", + " 'CRI': 'Costa Rica',\n", + " 'DMA': 'Dominica',\n", + " 'DOM': 'Dominican Republic',\n", + " 'GTM': 'Guatemala',\n", + " 'DNK': 'Denmark',\n", + " 'GMB': 'Gambia',\n", + " 'GIB': 'Gibraltar',\n", + " 'GRD': 'Grenada',\n", + " 'SLE': 'Sierra Leone',\n", + " 'ISL': 'Iceland',\n", + " 'JAM': 'Jamaica',\n", + " 'MRT': 'Mauritania',\n", + " 'HTI': 'Haiti',\n", + " 'KNA': 'Saint Kitts and Nevis',\n", + " 'LCA': 'Saint Lucia',\n", + " 'VCT': 'Saint Vincent and the Grenadines',\n", + " 'TTO': 'Trinidad and Tobago',\n", + " 'SLV': 'El Salvador',\n", + " 'BLZ': 'Belize',\n", + " 'CUB': 'Cuba',\n", + " 'SEN': 'Senegal',\n", + " 'VEN': 'Venezuela',\n", + " 'CAN': 'Canada',\n", + " 'NIC': 'Nicaragua',\n", + " 'GUY': 'Guyana',\n", + " 'COL': 'Colombia',\n", + " 'IRL': 'Ireland',\n", + " 'GNB': 'Guinea-Bissau',\n", + " 'GIN': 'Guinea',\n", + " 'CIV': 'Ivory Coast',\n", + " 'LBR': 'Liberia',\n", + " 'HND': 'Honduras',\n", + " 'ECU': 'Ecuador',\n", + " 'ESH': 'Western Sahara',\n", + " 'SUR': 'Suriname',\n", + " 'MAR': 'Morocco',\n", + " 'ARE': 'United Arab Emirates',\n", + " 'CYP': 'Cyprus',\n", + " 'ERI': 'Eritrea',\n", + " 'EGY': 'Egypt',\n", + " 'GEO': 'Georgia',\n", + " 'IRN': 'Iran',\n", + " 'LBN': 'Lebanon',\n", + " 'LBY': 'Libya',\n", + " 'MLT': 'Malta',\n", + " 'OMN': 'Oman',\n", + " 'SAU': 'Saudi Arabia',\n", + " 'LKA': 'Sri Lanka',\n", + " 'SDN': 'Sudan',\n", + " 'SYR': 'Syria',\n", + " 'TGO': 'Togo',\n", + " 'GRC': 'Greece',\n", + " 'TUR': 'Turkey',\n", + " 'MCO': 'Monaco',\n", + " 'TUN': 'Tunisia',\n", + " 'MNE': 'Montenegro',\n", + " 'ALB': 'Albania',\n", + " 'BGR': 'Bulgaria',\n", + " 'PSE': 'Palestine',\n", + " 'KWT': 'Kuwait',\n", + " 'IRQ': 'Iraq',\n", + " 'BHR': 'Bahrain',\n", + " 'QAT': 'Qatar',\n", + " 'YEM': 'Yemen',\n", + " 'ISR': 'Israel',\n", + " 'JOR': 'Jordan',\n", + " 'DJI': 'Djibouti',\n", + " 'BGD': 'Bangladesh',\n", + " 'NGA': 'Nigeria',\n", + " 'CMR': 'Cameroon',\n", + " 'STP': 'São Tomé and Príncipe',\n", + " 'BIH': 'Bosnia and Herzegovina',\n", + " 'MHL': 'Marshall Islands',\n", + " 'PLW': 'Palau',\n", + " 'PHL': 'Philippines',\n", + " 'TWN': 'Taiwan',\n", + " 'SGP': 'Singapore',\n", + " 'THA': 'Thailand',\n", + " 'VNM': 'Vietnam',\n", + " 'KOR': 'South Korea',\n", + " 'BRN': 'Brunei',\n", + " 'PRK': 'North Korea',\n", + " 'KHM': 'Cambodia',\n", + " 'CHN': 'China',\n", + " 'EST': 'Estonia',\n", + " 'FIN': 'Finland',\n", + " 'SWE': 'Sweden',\n", + " 'LTU': 'Lithuania',\n", + " 'NOR': 'Norway',\n", + " 'BEL': 'Belgium',\n", + " 'DEU': 'Germany',\n", + " 'LVA': 'Latvia',\n", + " 'HRV': 'Croatia',\n", + " 'ITA': 'Italy',\n", + " 'UKR': 'Ukraine',\n", + " 'ROU': 'Romania',\n", + " 'JPN': 'Japan',\n", + " 'IND': 'India',\n", + " 'PAK': 'Pakistan',\n", + " 'TKM': 'Turkmenistan',\n", + " 'AZE': 'Azerbaijan',\n", + " 'KAZ': 'Kazakhstan',\n", + " 'MMR': 'Myanmar',\n", + " 'POL': 'Poland',\n", + " 'BEN': 'Benin',\n", + " 'SVN': 'Slovenia',\n", + " 'MYS': 'Malaysia',\n", + " 'ATA': 'Antarctica',\n", + " 'TUV': 'Tuvalu',\n", + " 'FJI': 'Fiji',\n", + " 'FSM': 'Micronesia',\n", + " 'GNQ': 'Equatorial Guinea',\n", + " 'MDV': 'Maldives',\n", + " 'SOM': 'Somalia',\n", + " 'NRU': 'Nauru',\n", + " 'GAB': 'Gabon',\n", + " 'IDN': 'Indonesia',\n", + " 'DZA': 'Algeria',\n", + " 'GHA': 'Ghana',\n", + " 'RUS': 'Russia'\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [], + "source": [ + "def get_name(country):\n", + " return iso_country_mapping.get(country, None)\n", + "\n", + "# Apply the function to create the 'PARENT_ISO' column\n", + "eez_new['name_iso'] = eez_new['iso'].apply(get_name)" ] }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 47, "metadata": {}, "outputs": [ { @@ -568,7 +627,6 @@ " MRGID_TER2\n", " MRGID_SOV2\n", " ...\n", - " ISO_SOV2\n", " ISO_SOV3\n", " UN_SOV1\n", " UN_SOV2\n", @@ -577,7 +635,8 @@ " UN_TER2\n", " UN_TER3\n", " geometry\n", - " REGIONS\n", + " iso\n", + " name_iso\n", " \n", " \n", " \n", @@ -595,15 +654,15 @@ " 0.0\n", " ...\n", " NaN\n", - " NaN\n", " 840\n", " NaN\n", " NaN\n", " 16.0\n", " NaN\n", " NaN\n", - " POLYGON ((-16216412.543 -2157569.856, -1621650...\n", - " North America\n", + " POLYGON ((-166.64112 -17.55527, -166.64194 -17...\n", + " USA\n", + " United States\n", " \n", " \n", " 1\n", @@ -619,19 +678,19 @@ " 0.0\n", " ...\n", " NaN\n", - " NaN\n", " 826\n", " NaN\n", " NaN\n", " 654.0\n", " NaN\n", " NaN\n", - " POLYGON ((-1089355.142 -974062.004, -1089348.4...\n", - " Europe\n", + " POLYGON ((-10.93328 -7.88745, -10.93324 -7.889...\n", + " GBR\n", + " United Kingdom\n", " \n", " \n", "\n", - "

2 rows × 33 columns

\n", + "

2 rows × 34 columns

\n", "" ], "text/plain": [ @@ -643,90 +702,161 @@ "0 2204.0 American Samoa ASM United States 0.0 \n", "1 2208.0 Ascension SHN United Kingdom 0.0 \n", "\n", - " MRGID_SOV2 ... ISO_SOV2 ISO_SOV3 UN_SOV1 UN_SOV2 UN_SOV3 UN_TER1 \\\n", - "0 0.0 ... NaN NaN 840 NaN NaN 16.0 \n", - "1 0.0 ... NaN NaN 826 NaN NaN 654.0 \n", + " MRGID_SOV2 ... ISO_SOV3 UN_SOV1 UN_SOV2 UN_SOV3 UN_TER1 UN_TER2 UN_TER3 \\\n", + "0 0.0 ... NaN 840 NaN NaN 16.0 NaN NaN \n", + "1 0.0 ... NaN 826 NaN NaN 654.0 NaN NaN \n", "\n", - " UN_TER2 UN_TER3 geometry \\\n", - "0 NaN NaN POLYGON ((-16216412.543 -2157569.856, -1621650... \n", - "1 NaN NaN POLYGON ((-1089355.142 -974062.004, -1089348.4... \n", + " geometry iso name_iso \n", + "0 POLYGON ((-166.64112 -17.55527, -166.64194 -17... USA United States \n", + "1 POLYGON ((-10.93328 -7.88745, -10.93324 -7.889... GBR United Kingdom \n", "\n", - " REGIONS \n", - "0 North America \n", - "1 Europe \n", - "\n", - "[2 rows x 33 columns]" + "[2 rows x 34 columns]" ] }, - "execution_count": 16, + "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "eez['REGIONS'] = eez['ISO_SOV1'].map(country_to_region)\n", - "eez.head(2)" + "eez_new.head(2)" ] }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 53, "metadata": {}, "outputs": [], "source": [ - "eez.to_file(path_out + \"/administrative/eez_mollweide_regions.shp\", driver=\"ESRI Shapefile\")" + "# List of dictionaries for data in Region_ISO3_PP.txt (list of regions used in the Protected Planet database)\n", + "regions_data = [\n", + " {\n", + " 'region_iso': 'AS',\n", + " 'region_name': 'Asia & Pacific',\n", + " 'country_iso_3s': [\n", + " \"AFG\", \"ASM\", \"AUS\", \"BGD\", \"BRN\", \"BTN\", \"CCK\", \"CHN\", \"COK\", \"CXR\", \"FJI\", \"FSM\", \"GUM\", \"HKG\", \"IDN\",\n", + " \"IND\", \"IOT\", \"IRN\", \"JPN\", \"KHM\", \"KIR\", \"KOR\", \"LAO\", \"LKA\", \"MAC\", \"MDV\", \"MHL\", \"MMR\", \"MNG\", \"MNP\",\n", + " \"MYS\", \"NCL\", \"NFK\", \"NIU\", \"NPL\", \"NRU\", \"NZL\", \"PAK\", \"PCN\", \"PHL\", \"PLW\", \"PNG\", \"PRK\", \"PYF\", \"SGP\",\n", + " \"SLB\", \"THA\", \"TKL\", \"TLS\", \"TON\", \"TUV\", \"TWN\", \"VNM\", \"VUT\", \"WLF\", \"WSM\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'AF',\n", + " 'region_name': 'Africa',\n", + " 'country_iso_3s': [\n", + " \"AGO\", \"BDI\", \"BEN\", \"BFA\", \"BWA\", \"CAF\", \"CIV\", \"CMR\", \"COD\", \"COG\", \"COM\", \"CPV\", \"DJI\", \"DZA\", \"EGY\",\n", + " \"ERI\", \"ESH\", \"ETH\", \"GAB\", \"GHA\", \"GIN\", \"GMB\", \"GNB\", \"GNQ\", \"KEN\", \"LBR\", \"LBY\", \"LSO\", \"MAR\", \"MDG\",\n", + " \"MLI\", \"MOZ\", \"MRT\", \"MUS\", \"MWI\", \"MYT\", \"NAM\", \"NER\", \"NGA\", \"REU\", \"RWA\", \"SDN\", \"SEN\", \"SHN\", \"SLE\",\n", + " \"SOM\", \"SSD\", \"STP\", \"SWZ\", \"SYC\", \"TCD\", \"TGO\", \"TUN\", \"TZA\", \"UGA\", \"ZAF\", \"ZMB\", \"ZWE\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'EU',\n", + " 'region_name': 'Europe',\n", + " 'country_iso_3s': [\n", + " \"ALA\", \"ALB\", \"AND\", \"ARM\", \"AUT\", \"AZE\", \"BEL\", \"BGR\", \"BIH\", \"BLR\", \"CHE\", \"CYP\", \"CZE\", \"DEU\", \"DNK\",\n", + " \"ESP\", \"EST\", \"FIN\", \"FRA\", \"FRO\", \"GBR\", \"GEO\", \"GGY\", \"GIB\", \"GRC\", \"HRV\", \"HUN\", \"IMN\", \"IRL\", \"ISL\",\n", + " \"ISR\", \"ITA\", \"JEY\", \"KAZ\", \"KGZ\", \"LIE\", \"LTU\", \"LUX\", \"LVA\", \"MCO\", \"MDA\", \"MKD\", \"MLT\", \"MNE\", \"NLD\",\n", + " \"NOR\", \"POL\", \"PRT\", \"ROU\", \"RUS\", \"SJM\", \"SMR\", \"SRB\", \"SVK\", \"SVN\", \"SWE\", \"TJK\", \"TKM\", \"TUR\", \"UKR\",\n", + " \"UZB\", \"VAT\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'SA',\n", + " 'region_name': 'Latin America & Caribbean',\n", + " 'country_iso_3s': [\n", + " \"ABW\", \"AIA\", \"ARG\", \"ATG\", \"BES\", \"BHS\", \"BLM\", \"BLZ\", \"BMU\", \"BOL\", \"BRA\", \"BRB\", \"CHL\", \"COL\", \"CRI\",\n", + " \"CUB\", \"CUW\", \"CYM\", \"DMA\", \"DOM\", \"ECU\", \"FLK\", \"GLP\", \"GRD\", \"GTM\", \"GUF\", \"GUY\", \"HND\", \"HTI\", \"JAM\",\n", + " \"KNA\", \"LCA\", \"MAF\", \"MEX\", \"MSR\", \"MTQ\", \"NIC\", \"PAN\", \"PER\", \"PRI\", \"PRY\", \"SLV\", \"SUR\", \"SXM\", \"TCA\",\n", + " \"TTO\", \"UMI\", \"URY\", \"VCT\", \"VEN\", \"VGB\", \"VIR\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'PO',\n", + " 'region_name': 'Polar',\n", + " 'country_iso_3s': [\n", + " \"ATF\", \"BVT\", \"GRL\", \"HMD\", \"SGS\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'NA',\n", + " 'region_name': 'North America',\n", + " 'country_iso_3s': [\n", + " \"CAN\", \"SPM\", \"USA\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'WA',\n", + " 'region_name': 'West Asia',\n", + " 'country_iso_3s': [\n", + " \"ARE\", \"BHR\", \"IRQ\", \"JOR\", \"KWT\", \"LBN\", \"OMN\", \"PSE\", \"QAT\", \"SAU\", \"SYR\", \"YEM\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'AT', # this region is not in the Protected Planet database\n", + " 'region_name': 'Antartica',\n", + " 'country_iso_3s': [\n", + " \"ATA\"\n", + " ]\n", + " }\n", + "]\n", + "\n", + "# Convert the region data to a dictionary that maps each country to its region name\n", + "country_to_region = {}\n", + "name_to_region = {}\n", + "for region in regions_data:\n", + " for country in region['country_iso_3s']:\n", + " country_to_region[country] = region['region_iso']\n", + " name_to_region[country] = region['region_name']" ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 49, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "array(['North America', 'Europe', 'Asia & Pacific',\n", - " 'Latin America & Caribbean', 'Africa', 'West Asia', nan],\n", - " dtype=object)" + "array(['NA', 'EU', 'AS', 'SA', 'AF', 'WA', 'AT'], dtype=object)" ] }, - "execution_count": 18, + "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "eez['REGIONS'].unique()" + "eez_new['region'] = eez_new['iso'].map(country_to_region)\n", + "eez_new['region'].unique()" ] }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 54, "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "Allocating 16 GB of heap memory\n", - "[dissolve2] Removed 127,740 / 218,614 slivers using 0.033+ sqkm variable threshold\n", - "[dissolve2] Dissolved 281 features into 7 features\n", - "[explode] Exploded 7 features into 83 features\n", - "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.shp\n", - "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.shx\n", - "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.dbf\n", - "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.prj\n" - ] + "data": { + "text/plain": [ + "array(['North America', 'Europe', 'Asia & Pacific',\n", + " 'Latin America & Caribbean', 'Africa', 'West Asia', 'Antartica'],\n", + " dtype=object)" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "# Dissolve by relevant fields: REGIONS\n", - "!mapshaper-xl 16gb /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_mollweide_regions.shp -dissolve2 fields=REGIONS -explode -o /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.shp" + "eez_new['region_name'] = eez_new['iso'].map(name_to_region)\n", + "eez_new['region_name'].unique()" ] }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 55, "metadata": {}, "outputs": [ { @@ -750,177 +880,289 @@ " \n", " \n", " \n", - " REGIONS\n", - " geometry\n", + " location_id\n", + " location_name\n", + " total_marine_area\n", + " location_type\n", " \n", " \n", " \n", " \n", " 0\n", - " North America\n", - " POLYGON ((-16216412.543 -2157569.856, -1621650...\n", + " AGO\n", + " Angola\n", + " 498908.577009\n", + " country\n", " \n", " \n", " 1\n", - " North America\n", - " POLYGON ((-15875617.974 972834.674, -15887321....\n", + " ALB\n", + " Albania\n", + " 12177.287755\n", + " country\n", " \n", " \n", "\n", "" ], "text/plain": [ - " REGIONS geometry\n", - "0 North America POLYGON ((-16216412.543 -2157569.856, -1621650...\n", - "1 North America POLYGON ((-15875617.974 972834.674, -15887321...." - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "regions = gpd.read_file(path_out + \"/administrative/eez_regions.shp\", driver=\"ESRI Shapefile\")\n", - "regions.head(2)" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "\n", - "Name: World_Mollweide\n", - "Axis Info [cartesian]:\n", - "- [east]: Easting (metre)\n", - "- [north]: Northing (metre)\n", - "Area of Use:\n", - "- undefined\n", - "Coordinate Operation:\n", - "- name: unnamed\n", - "- method: Mollweide\n", - "Datum: World Geodetic System 1984\n", - "- Ellipsoid: WGS 84\n", - "- Prime Meridian: Greenwich" + " location_id location_name total_marine_area location_type\n", + "0 AGO Angola 498908.577009 country\n", + "1 ALB Albania 12177.287755 country" ] }, - "execution_count": 22, + "execution_count": 55, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "regions.crs" + "marine_areas = eez_new.groupby(['iso', 'name_iso']).agg({'AREA_KM2': 'sum'}).reset_index()\n", + "marine_areas = marine_areas.rename(columns={'iso': 'location_id', 'name_iso':'location_name', 'AREA_KM2': 'total_marine_area'})\n", + "marine_areas['location_type'] = 'country'\n", + "marine_areas.head(2)" ] }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 57, "metadata": {}, "outputs": [ { "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
location_idlocation_nametotal_marine_arealocation_type
0AFAfrica1.495538e+07region
1ASAsia & Pacific5.269208e+07region
\n", + "
" + ], "text/plain": [ - "array(['North America', 'Europe', 'Asia & Pacific',\n", - " 'Latin America & Caribbean', 'Africa', 'West Asia', nan],\n", - " dtype=object)" + " location_id location_name total_marine_area location_type\n", + "0 AF Africa 1.495538e+07 region\n", + "1 AS Asia & Pacific 5.269208e+07 region" ] }, - "execution_count": 23, + "execution_count": 57, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "regions['REGIONS'].unique()" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/var/folders/98/0pdnjc5s29x2pnzl293pw7hr0000gn/T/ipykernel_27590/1686611470.py:1: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame\n", - "\n", - "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", - " regions['REGIONS'][regions['REGIONS'].isna()] = 'Antartic'\n" - ] - } - ], - "source": [ - "regions['REGIONS'][regions['REGIONS'].isna()] = 'Antartica'" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [], - "source": [ - "# Calculate area of each region\n", - "regions['AREA_KM2']= regions.geometry.area/ 1000000" + "regions_areas = eez_new.groupby(['region', 'region_name']).agg({'AREA_KM2': 'sum'}).reset_index()\n", + "regions_areas = regions_areas.rename(columns={'region': 'location_id', 'region_name':'location_name', 'AREA_KM2': 'total_marine_area'})\n", + "regions_areas['location_type'] = 'region'\n", + "regions_areas.head(2)" ] }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 59, "metadata": {}, "outputs": [], "source": [ - "regions.to_file(path_out + \"/administrative/eez_regions.shp\", driver=\"ESRI Shapefile\")" + "global_area = pd.DataFrame({'location_id': ['GLOB'], 'location_name': ['Worldwide'], 'total_marine_area': [361000000], 'location_type': ['worldwide']}) \n", + "hs_area = pd.DataFrame({'location_id': ['ABNJ'], 'location_name': ['High Seas'], 'total_marine_area': [hs['area_km2'].values[0]], 'location_type': ['country']})" ] }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 63, "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "Allocating 16 GB of heap memory\n", - "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.shp\n", - "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.shx\n", - "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.dbf\n", - "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.prj\n" - ] + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
location_idlocation_nametotal_marine_arealocation_type
0AGOAngola4.989086e+05country
1ALBAlbania1.217729e+04country
2AREUnited Arab Emirates5.821593e+04country
3ARGArgentina2.897629e+06country
4ATAAntarctica8.842860e+06country
...............
162NANorth America1.791826e+07region
163SALatin America & Caribbean2.107800e+07region
164WAWest Asia1.456969e+06region
165GLOBWorldwide3.610000e+08worldwide
166ABNJHigh Seas2.128814e+08country
\n", + "

167 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " location_id location_name total_marine_area location_type\n", + "0 AGO Angola 4.989086e+05 country\n", + "1 ALB Albania 1.217729e+04 country\n", + "2 ARE United Arab Emirates 5.821593e+04 country\n", + "3 ARG Argentina 2.897629e+06 country\n", + "4 ATA Antarctica 8.842860e+06 country\n", + ".. ... ... ... ...\n", + "162 NA North America 1.791826e+07 region\n", + "163 SA Latin America & Caribbean 2.107800e+07 region\n", + "164 WA West Asia 1.456969e+06 region\n", + "165 GLOB Worldwide 3.610000e+08 worldwide\n", + "166 ABNJ High Seas 2.128814e+08 country\n", + "\n", + "[167 rows x 4 columns]" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "# Reproject to 4626\n", - "!mapshaper-xl 16gb /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.shp -proj EPSG:4326 -o force /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.shp" + "# concat gl_df and hs_df to marine_areas\n", + "marine_areas2 = pd.concat([marine_areas, regions_areas, global_area, hs_area], ignore_index=True)\n", + "marine_areas2\n" ] }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 64, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "array(['North America', 'Europe', 'Asia & Pacific',\n", - " 'Latin America & Caribbean', 'Africa', 'West Asia', 'Antarctica'],\n", - " dtype=object)" + "array(['country', 'region', 'worldwide'], dtype=object)" ] }, - "execution_count": 32, + "execution_count": 64, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "regions['REGIONS'].unique()" + "marine_areas2['location_type'].unique()" ] } ], diff --git a/data/notebooks/location_areas.ipynb b/data/notebooks/location_areas.ipynb new file mode 100644 index 00000000..c9c933d7 --- /dev/null +++ b/data/notebooks/location_areas.ipynb @@ -0,0 +1,574 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import geopandas as gpd" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "path_in = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/raw/\"\n", + "path_out = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/processed/\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create locations table" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "eez = gpd.read_file(path_out + \"/administrative/eez_area_mollweide.shp\")\n", + "regions = gpd.read_file(path_out + \"/administrative/eez_regions.shp\")\n", + "hs = gpd.read_file(path_in + \"/high_seas/high_seas.shp\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# Create new column \"iso\" that has the field \"ISO_SOV1\" for all rows except those in which ISO_SOV2 and ISO_SOV3 are not null. In such cases concatenate ISO_SOV1, ISO_SOV2 and ISO_SOV3\n", + "eez['iso'] = eez['ISO_SOV1']\n", + "eez.loc[eez['ISO_SOV2'].notnull(), 'iso'] = eez['ISO_SOV1'] + \";\" + eez['ISO_SOV2']\n", + "eez.loc[eez['ISO_SOV3'].notnull(), 'iso'] = eez['ISO_SOV1'] + \";\" + eez['ISO_SOV2'] + \";\" + eez['ISO_SOV3']" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(337, 33)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Create a mask for rows with multiple values in 'iso_code'\n", + "mask = eez['iso'].str.contains(';', na=False)\n", + "\n", + "# Split the 'iso_code' values and create separate rows only for rows with multiple values\n", + "split_rows = eez[mask].copy()\n", + "split_rows['iso'] = split_rows['iso'].str.split(';')\n", + "split_rows = split_rows.explode('iso')\n", + "\n", + "# Keep rows with single values in 'iso_code'\n", + "single_value_rows = eez[~mask]\n", + "\n", + "# Concatenate the exploded rows with the single value rows\n", + "eez_new = pd.concat([single_value_rows, split_rows], ignore_index=True)\n", + "\n", + "eez_new.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "iso_country_mapping = {\n", + " 'USA': 'United States',\n", + " 'GBR': 'United Kingdom',\n", + " 'NZL': 'New Zealand',\n", + " 'FRA': 'France',\n", + " 'WSM': 'Samoa',\n", + " 'TON': 'Tonga',\n", + " 'CHL': 'Chile',\n", + " 'URY': 'Uruguay',\n", + " 'PER': 'Peru',\n", + " 'BRA': 'Brazil',\n", + " 'KIR': 'Kiribati',\n", + " 'ARG': 'Argentina',\n", + " 'AUS': 'Australia',\n", + " 'COM': 'Comoros',\n", + " 'MDG': 'Madagascar',\n", + " 'ZAF': 'South Africa',\n", + " 'MUS': 'Mauritius',\n", + " 'VUT': 'Vanuatu',\n", + " 'NAM': 'Namibia',\n", + " 'TLS': 'Timor-Leste',\n", + " 'COG': 'Republic of the Congo',\n", + " 'AGO': 'Angola',\n", + " 'MOZ': 'Mozambique',\n", + " 'KEN': 'Kenya',\n", + " 'PNG': 'Papua New Guinea',\n", + " 'TZA': 'Tanzania',\n", + " 'SLB': 'Solomon Islands',\n", + " 'SYC': 'Seychelles',\n", + " 'COD': 'Democratic Republic of the Congo',\n", + " 'ATG': 'Antigua and Barbuda',\n", + " 'NLD': 'Netherlands',\n", + " 'PRT': 'Portugal',\n", + " 'BHS': 'The Bahamas',\n", + " 'BRB': 'Barbados',\n", + " 'MEX': 'Mexico',\n", + " 'CPV': 'Cape Verde',\n", + " 'ESP': 'Spain',\n", + " 'PAN': 'Panama',\n", + " 'CRI': 'Costa Rica',\n", + " 'DMA': 'Dominica',\n", + " 'DOM': 'Dominican Republic',\n", + " 'GTM': 'Guatemala',\n", + " 'DNK': 'Denmark',\n", + " 'GMB': 'Gambia',\n", + " 'GIB': 'Gibraltar',\n", + " 'GRD': 'Grenada',\n", + " 'SLE': 'Sierra Leone',\n", + " 'ISL': 'Iceland',\n", + " 'JAM': 'Jamaica',\n", + " 'MRT': 'Mauritania',\n", + " 'HTI': 'Haiti',\n", + " 'KNA': 'Saint Kitts and Nevis',\n", + " 'LCA': 'Saint Lucia',\n", + " 'VCT': 'Saint Vincent and the Grenadines',\n", + " 'TTO': 'Trinidad and Tobago',\n", + " 'SLV': 'El Salvador',\n", + " 'BLZ': 'Belize',\n", + " 'CUB': 'Cuba',\n", + " 'SEN': 'Senegal',\n", + " 'VEN': 'Venezuela',\n", + " 'CAN': 'Canada',\n", + " 'NIC': 'Nicaragua',\n", + " 'GUY': 'Guyana',\n", + " 'COL': 'Colombia',\n", + " 'IRL': 'Ireland',\n", + " 'GNB': 'Guinea-Bissau',\n", + " 'GIN': 'Guinea',\n", + " 'CIV': 'Ivory Coast',\n", + " 'LBR': 'Liberia',\n", + " 'HND': 'Honduras',\n", + " 'ECU': 'Ecuador',\n", + " 'ESH': 'Western Sahara',\n", + " 'SUR': 'Suriname',\n", + " 'MAR': 'Morocco',\n", + " 'ARE': 'United Arab Emirates',\n", + " 'CYP': 'Cyprus',\n", + " 'ERI': 'Eritrea',\n", + " 'EGY': 'Egypt',\n", + " 'GEO': 'Georgia',\n", + " 'IRN': 'Iran',\n", + " 'LBN': 'Lebanon',\n", + " 'LBY': 'Libya',\n", + " 'MLT': 'Malta',\n", + " 'OMN': 'Oman',\n", + " 'SAU': 'Saudi Arabia',\n", + " 'LKA': 'Sri Lanka',\n", + " 'SDN': 'Sudan',\n", + " 'SYR': 'Syria',\n", + " 'TGO': 'Togo',\n", + " 'GRC': 'Greece',\n", + " 'TUR': 'Turkey',\n", + " 'MCO': 'Monaco',\n", + " 'TUN': 'Tunisia',\n", + " 'MNE': 'Montenegro',\n", + " 'ALB': 'Albania',\n", + " 'BGR': 'Bulgaria',\n", + " 'PSE': 'Palestine',\n", + " 'KWT': 'Kuwait',\n", + " 'IRQ': 'Iraq',\n", + " 'BHR': 'Bahrain',\n", + " 'QAT': 'Qatar',\n", + " 'YEM': 'Yemen',\n", + " 'ISR': 'Israel',\n", + " 'JOR': 'Jordan',\n", + " 'DJI': 'Djibouti',\n", + " 'BGD': 'Bangladesh',\n", + " 'NGA': 'Nigeria',\n", + " 'CMR': 'Cameroon',\n", + " 'STP': 'São Tomé and Príncipe',\n", + " 'BIH': 'Bosnia and Herzegovina',\n", + " 'MHL': 'Marshall Islands',\n", + " 'PLW': 'Palau',\n", + " 'PHL': 'Philippines',\n", + " 'TWN': 'Taiwan',\n", + " 'SGP': 'Singapore',\n", + " 'THA': 'Thailand',\n", + " 'VNM': 'Vietnam',\n", + " 'KOR': 'South Korea',\n", + " 'BRN': 'Brunei',\n", + " 'PRK': 'North Korea',\n", + " 'KHM': 'Cambodia',\n", + " 'CHN': 'China',\n", + " 'EST': 'Estonia',\n", + " 'FIN': 'Finland',\n", + " 'SWE': 'Sweden',\n", + " 'LTU': 'Lithuania',\n", + " 'NOR': 'Norway',\n", + " 'BEL': 'Belgium',\n", + " 'DEU': 'Germany',\n", + " 'LVA': 'Latvia',\n", + " 'HRV': 'Croatia',\n", + " 'ITA': 'Italy',\n", + " 'UKR': 'Ukraine',\n", + " 'ROU': 'Romania',\n", + " 'JPN': 'Japan',\n", + " 'IND': 'India',\n", + " 'PAK': 'Pakistan',\n", + " 'TKM': 'Turkmenistan',\n", + " 'AZE': 'Azerbaijan',\n", + " 'KAZ': 'Kazakhstan',\n", + " 'MMR': 'Myanmar',\n", + " 'POL': 'Poland',\n", + " 'BEN': 'Benin',\n", + " 'SVN': 'Slovenia',\n", + " 'MYS': 'Malaysia',\n", + " 'ATA': 'Antarctica',\n", + " 'TUV': 'Tuvalu',\n", + " 'FJI': 'Fiji',\n", + " 'FSM': 'Micronesia',\n", + " 'GNQ': 'Equatorial Guinea',\n", + " 'MDV': 'Maldives',\n", + " 'SOM': 'Somalia',\n", + " 'NRU': 'Nauru',\n", + " 'GAB': 'Gabon',\n", + " 'IDN': 'Indonesia',\n", + " 'DZA': 'Algeria',\n", + " 'GHA': 'Ghana',\n", + " 'RUS': 'Russia'\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "def get_name(country):\n", + " return iso_country_mapping.get(country, None)\n", + "\n", + "# Apply the function to create the 'PARENT_ISO' column\n", + "eez_new['name_iso'] = eez_new['iso'].apply(get_name)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# List of dictionaries for data in Region_ISO3_PP.txt (list of regions used in the Protected Planet database)\n", + "regions_data = [\n", + " {\n", + " 'region_iso': 'AS',\n", + " 'region_name': 'Asia & Pacific',\n", + " 'country_iso_3s': [\n", + " \"AFG\", \"ASM\", \"AUS\", \"BGD\", \"BRN\", \"BTN\", \"CCK\", \"CHN\", \"COK\", \"CXR\", \"FJI\", \"FSM\", \"GUM\", \"HKG\", \"IDN\",\n", + " \"IND\", \"IOT\", \"IRN\", \"JPN\", \"KHM\", \"KIR\", \"KOR\", \"LAO\", \"LKA\", \"MAC\", \"MDV\", \"MHL\", \"MMR\", \"MNG\", \"MNP\",\n", + " \"MYS\", \"NCL\", \"NFK\", \"NIU\", \"NPL\", \"NRU\", \"NZL\", \"PAK\", \"PCN\", \"PHL\", \"PLW\", \"PNG\", \"PRK\", \"PYF\", \"SGP\",\n", + " \"SLB\", \"THA\", \"TKL\", \"TLS\", \"TON\", \"TUV\", \"TWN\", \"VNM\", \"VUT\", \"WLF\", \"WSM\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'AF',\n", + " 'region_name': 'Africa',\n", + " 'country_iso_3s': [\n", + " \"AGO\", \"BDI\", \"BEN\", \"BFA\", \"BWA\", \"CAF\", \"CIV\", \"CMR\", \"COD\", \"COG\", \"COM\", \"CPV\", \"DJI\", \"DZA\", \"EGY\",\n", + " \"ERI\", \"ESH\", \"ETH\", \"GAB\", \"GHA\", \"GIN\", \"GMB\", \"GNB\", \"GNQ\", \"KEN\", \"LBR\", \"LBY\", \"LSO\", \"MAR\", \"MDG\",\n", + " \"MLI\", \"MOZ\", \"MRT\", \"MUS\", \"MWI\", \"MYT\", \"NAM\", \"NER\", \"NGA\", \"REU\", \"RWA\", \"SDN\", \"SEN\", \"SHN\", \"SLE\",\n", + " \"SOM\", \"SSD\", \"STP\", \"SWZ\", \"SYC\", \"TCD\", \"TGO\", \"TUN\", \"TZA\", \"UGA\", \"ZAF\", \"ZMB\", \"ZWE\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'EU',\n", + " 'region_name': 'Europe',\n", + " 'country_iso_3s': [\n", + " \"ALA\", \"ALB\", \"AND\", \"ARM\", \"AUT\", \"AZE\", \"BEL\", \"BGR\", \"BIH\", \"BLR\", \"CHE\", \"CYP\", \"CZE\", \"DEU\", \"DNK\",\n", + " \"ESP\", \"EST\", \"FIN\", \"FRA\", \"FRO\", \"GBR\", \"GEO\", \"GGY\", \"GIB\", \"GRC\", \"HRV\", \"HUN\", \"IMN\", \"IRL\", \"ISL\",\n", + " \"ISR\", \"ITA\", \"JEY\", \"KAZ\", \"KGZ\", \"LIE\", \"LTU\", \"LUX\", \"LVA\", \"MCO\", \"MDA\", \"MKD\", \"MLT\", \"MNE\", \"NLD\",\n", + " \"NOR\", \"POL\", \"PRT\", \"ROU\", \"RUS\", \"SJM\", \"SMR\", \"SRB\", \"SVK\", \"SVN\", \"SWE\", \"TJK\", \"TKM\", \"TUR\", \"UKR\",\n", + " \"UZB\", \"VAT\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'SA',\n", + " 'region_name': 'Latin America & Caribbean',\n", + " 'country_iso_3s': [\n", + " \"ABW\", \"AIA\", \"ARG\", \"ATG\", \"BES\", \"BHS\", \"BLM\", \"BLZ\", \"BMU\", \"BOL\", \"BRA\", \"BRB\", \"CHL\", \"COL\", \"CRI\",\n", + " \"CUB\", \"CUW\", \"CYM\", \"DMA\", \"DOM\", \"ECU\", \"FLK\", \"GLP\", \"GRD\", \"GTM\", \"GUF\", \"GUY\", \"HND\", \"HTI\", \"JAM\",\n", + " \"KNA\", \"LCA\", \"MAF\", \"MEX\", \"MSR\", \"MTQ\", \"NIC\", \"PAN\", \"PER\", \"PRI\", \"PRY\", \"SLV\", \"SUR\", \"SXM\", \"TCA\",\n", + " \"TTO\", \"UMI\", \"URY\", \"VCT\", \"VEN\", \"VGB\", \"VIR\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'PO',\n", + " 'region_name': 'Polar',\n", + " 'country_iso_3s': [\n", + " \"ATF\", \"BVT\", \"GRL\", \"HMD\", \"SGS\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'NA',\n", + " 'region_name': 'North America',\n", + " 'country_iso_3s': [\n", + " \"CAN\", \"SPM\", \"USA\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'WA',\n", + " 'region_name': 'West Asia',\n", + " 'country_iso_3s': [\n", + " \"ARE\", \"BHR\", \"IRQ\", \"JOR\", \"KWT\", \"LBN\", \"OMN\", \"PSE\", \"QAT\", \"SAU\", \"SYR\", \"YEM\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'AT', # this region is not in the Protected Planet database\n", + " 'region_name': 'Antartica',\n", + " 'country_iso_3s': [\n", + " \"ATA\"\n", + " ]\n", + " }\n", + "]\n", + "\n", + "# Convert the region data to a dictionary that maps each country to its region name\n", + "country_to_region = {}\n", + "name_to_region = {}\n", + "for region in regions_data:\n", + " for country in region['country_iso_3s']:\n", + " country_to_region[country] = region['region_iso']\n", + " name_to_region[country] = region['region_name']" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "eez_new['region'] = eez_new['iso'].map(country_to_region)\n", + "eez_new['region_name'] = eez_new['iso'].map(name_to_region)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "marine_areas = eez_new.groupby(['iso', 'name_iso']).agg({'AREA_KM2': 'sum'}).reset_index()\n", + "marine_areas = marine_areas.rename(columns={'iso': 'location_id', 'name_iso':'location_name', 'AREA_KM2': 'total_marine_area'})\n", + "marine_areas['location_type'] = 'country'" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "regions_areas = eez_new.groupby(['region', 'region_name']).agg({'AREA_KM2': 'sum'}).reset_index()\n", + "regions_areas = regions_areas.rename(columns={'region': 'location_id', 'region_name':'location_name', 'AREA_KM2': 'total_marine_area'})\n", + "regions_areas['location_type'] = 'region'" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "global_area = pd.DataFrame({'location_id': ['GLOB'], 'location_name': ['Worldwide'], 'total_marine_area': [361000000], 'location_type': ['worldwide']}) \n", + "hs_area = pd.DataFrame({'location_id': ['ABNJ'], 'location_name': ['High Seas'], 'total_marine_area': [hs['area_km2'].values[0]], 'location_type': ['country']})" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "# concat gl_df and hs_df to marine_areas\n", + "marine_areas2 = pd.concat([marine_areas, regions_areas, global_area, hs_area], ignore_index=True)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "# Save the table as csv\n", + "marine_areas2.to_csv(path_out + \"/tables/locations.csv\", index=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create region_locations table" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
region_idlocation_id
0ASAFG
1ASASM
2ASAUS
3ASBGD
4ASBRN
.........
244WAQAT
245WASAU
246WASYR
247WAYEM
248ATATA
\n", + "

249 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " region_id location_id\n", + "0 AS AFG\n", + "1 AS ASM\n", + "2 AS AUS\n", + "3 AS BGD\n", + "4 AS BRN\n", + ".. ... ...\n", + "244 WA QAT\n", + "245 WA SAU\n", + "246 WA SYR\n", + "247 WA YEM\n", + "248 AT ATA\n", + "\n", + "[249 rows x 2 columns]" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "regions_df = pd.DataFrame([{'region_id': data['region_iso'], 'location_id': iso} for data in regions_data for iso in data['country_iso_3s']])\n", + "regions_df" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "regions_df.to_csv(path_out + '/tables/region_locations.csv', index=False)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "skytruth", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/data/notebooks/protectedseas.ipynb b/data/notebooks/protectedseas.ipynb index 396cf9e3..85909d31 100644 --- a/data/notebooks/protectedseas.ipynb +++ b/data/notebooks/protectedseas.ipynb @@ -1700,133 +1700,6 @@ "source": [ "ps_coverage.to_csv(path_out + '/tables/fishing_protection_level.csv', index=False)" ] - }, - { - "cell_type": "code", - "execution_count": 185, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
region_idlocation_id
0ASAFG
1ASASM
2ASAUS
3ASBGD
4ASBRN
.........
246WASAU
247WASYR
248WAYEM
249ATATA
250HSABNJ
\n", - "

251 rows × 2 columns

\n", - "
" - ], - "text/plain": [ - " region_id location_id\n", - "0 AS AFG\n", - "1 AS ASM\n", - "2 AS AUS\n", - "3 AS BGD\n", - "4 AS BRN\n", - ".. ... ...\n", - "246 WA SAU\n", - "247 WA SYR\n", - "248 WA YEM\n", - "249 AT ATA\n", - "250 HS ABNJ\n", - "\n", - "[251 rows x 2 columns]" - ] - }, - "execution_count": 185, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "regions_df = pd.DataFrame([{'region_id': data['region_iso'], 'location_id': iso} for data in regions_data for iso in data['country_iso_3s']])\n", - "regions_df" - ] - }, - { - "cell_type": "code", - "execution_count": 186, - "metadata": {}, - "outputs": [], - "source": [ - "regions_df.to_csv(path_out + '/tables/region_locations.csv', index=False)" - ] } ], "metadata": { diff --git a/data/notebooks/seamounts.ipynb b/data/notebooks/seamounts.ipynb new file mode 100644 index 00000000..d68b7bfc --- /dev/null +++ b/data/notebooks/seamounts.ipynb @@ -0,0 +1,1000 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import geopandas as gpd" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "path_in = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/raw/\"\n", + "path_out = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/processed/\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# Read required data\n", + "seamounts = gpd.read_file(path_in + \"Seamounts/DownloadPack-14_001_ZSL002_ModelledSeamounts2011_v1_01_Data_Seamounts_Seamounts.shp\")\n", + "eez = gpd.read_file(path_out + \"/administrative/eez_area_mollweide.shp\")\n", + "hs = gpd.read_file(path_in + \"/high_seas/high_seas.shp\")\n", + "protected_areas = gpd.read_file(path_out + \"wdpa/timeseries/protected_dissolved_2023.shp\").to_crs(\"EPSG:4326\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# Keep relevant fields in eez and hs and merge then in one dataframe\n", + "eez = eez[['SOVEREIGN1', 'SOVEREIGN2', 'SOVEREIGN3','ISO_SOV1', 'ISO_SOV2', 'ISO_SOV3', 'geometry']]\n", + "hs = hs[['geometry']]\n", + "hs['SOVEREIGN1'] = 'High Seas'\n", + "hs['ISO_SOV1'] = 'ABNJ'\n", + "eez_hs = eez.merge(hs, how='outer')" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "33461" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Join eez info to seamounts falling within eez polygons\n", + "seamounts_eez = gpd.sjoin(seamounts, eez_hs, how=\"left\", predicate=\"within\")\n", + "len(seamounts_eez)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "43" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "seamounts_eez['ISO_SOV1'].isna().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# Drop those not associated with an eez or hs\n", + "seamounts_eez = seamounts_eez.dropna(subset=['ISO_SOV1'])" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# Create new column \"iso\" with the iso_sov codes\n", + "def concatenate_iso(row):\n", + " iso_list = [row['ISO_SOV1']]\n", + " if not pd.isna(row['ISO_SOV2']):\n", + " iso_list.append(row['ISO_SOV2'])\n", + " if not pd.isna(row['ISO_SOV3']):\n", + " iso_list.append(row['ISO_SOV3'])\n", + " return ';'.join(iso_list)\n", + "\n", + "seamounts_eez['iso'] = seamounts_eez.apply(concatenate_iso, axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# Split the 'iso_code' values and create separate rows only for rows with multiple values\n", + "mask = seamounts_eez['iso'].str.contains(';', na=False)\n", + "split_rows = seamounts_eez[mask].copy()\n", + "split_rows['iso'] = split_rows['iso'].str.split(';')\n", + "split_rows = split_rows.explode('iso')\n", + "\n", + "# Keep rows with single values in 'iso_code'\n", + "single_value_rows = seamounts_eez[~mask]\n", + "\n", + "# Concatenate the exploded rows with the single value rows\n", + "seamounts_eez_new = pd.concat([single_value_rows, split_rows], ignore_index=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['DNK', 'ABNJ', 'RUS', 'NOR', 'CAN', 'USA', 'FRA', 'ESP', 'JPN',\n", + " 'PRT', 'ITA', 'KOR', 'GRC', 'LBY', 'MLT', 'GBR', 'MAR', 'MEX',\n", + " 'BHS', 'CUB', 'DOM', 'OMN', 'PHL', 'HND', 'HTI', 'JAM', 'TWN',\n", + " 'ATG', 'NLD', 'CPV', 'MHL', 'COL', 'SEN', 'VEN', 'DMA', 'VNM',\n", + " 'CHN', 'IND', 'YEM', 'BRB', 'SOM', 'FSM', 'NIC', 'PLW', 'CRI',\n", + " 'MYS', 'BRN', 'KIR', 'IDN', 'PAN', 'MDV', 'BRA', 'ECU', 'LKA',\n", + " 'GNQ', 'PNG', 'NRU', 'MUS', 'PER', 'SYC', 'TUV', 'SLB', 'NZL',\n", + " 'AUS', 'FJI', 'MDG', 'COM', 'MOZ', 'WSM', 'VUT', 'TON', 'CHL',\n", + " 'ZAF', 'ARG', 'ATA', 'ISL', 'PRK', 'TUR', 'PAK', 'BLZ', 'MMR',\n", + " 'VCT', 'GNB', 'GIN', 'CIV', 'GHA', 'LBR', 'STP', 'TZA', 'AGO',\n", + " 'NAM', 'URY', 'ESH'], dtype=object)" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "seamounts_eez_new['iso'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
location_idtotal_areahabitat_nameyear
0ABNJ1.483098e+07seamounts2023
1AGO9.556242e+03seamounts2023
2ARG3.110730e+05seamounts2023
3ATA3.551629e+05seamounts2023
4ATG6.215895e+03seamounts2023
...............
88VNM4.421338e+04seamounts2023
89VUT1.199475e+05seamounts2023
90WSM4.117997e+04seamounts2023
91YEM6.294974e+04seamounts2023
92ZAF9.946306e+04seamounts2023
\n", + "

93 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " location_id total_area habitat_name year\n", + "0 ABNJ 1.483098e+07 seamounts 2023\n", + "1 AGO 9.556242e+03 seamounts 2023\n", + "2 ARG 3.110730e+05 seamounts 2023\n", + "3 ATA 3.551629e+05 seamounts 2023\n", + "4 ATG 6.215895e+03 seamounts 2023\n", + ".. ... ... ... ...\n", + "88 VNM 4.421338e+04 seamounts 2023\n", + "89 VUT 1.199475e+05 seamounts 2023\n", + "90 WSM 4.117997e+04 seamounts 2023\n", + "91 YEM 6.294974e+04 seamounts 2023\n", + "92 ZAF 9.946306e+04 seamounts 2023\n", + "\n", + "[93 rows x 4 columns]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Get area of seamounts per iso\n", + "seamounts_iso = seamounts_eez_new.groupby(['iso']).agg({'AREA2D': 'sum'}).reset_index()\n", + "seamounts_iso = seamounts_iso.rename(columns={'AREA2D': 'total_area', 'iso': 'location_id'})\n", + "seamounts_iso['habitat_name'] = 'seamounts'\n", + "seamounts_iso['year'] = 2023\n", + "seamounts_iso " + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "# Join protection info to seamounts\n", + "seamounts_wdpa = gpd.sjoin(seamounts, protected_areas, how=\"left\", predicate=\"within\")\n", + "seamounts_wdpa['protection'] = \"no\" \n", + "seamounts_wdpa.loc[~seamounts_wdpa['index_right'].isna(), 'protection'] = \"yes\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ + "# Remove rows in which protection is \"no\"\n", + "seamounts_wdpa = seamounts_wdpa[seamounts_wdpa['protection'] != \"no\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['CAN', 'ABNJ', 'FRA', 'FRA;ITA;MCO', 'JPN', 'USA', 'PRT', 'ESP',\n", + " 'BHS', 'MEX', 'DOM', 'HND', 'NLD', 'PHL', 'VEN', 'MHL', 'YEM',\n", + " 'COL', 'PLW', 'CRI', 'PAN', 'BRA', 'ECU', 'GNQ', 'KIR', 'GBR',\n", + " 'IDN', 'SYC', 'COK', 'AUS', 'COM', 'FJI', 'NIU', 'CHL', 'NZL',\n", + " 'ZAF', 'ARG', 'ITA', 'GRC', 'CUB', 'TUV', 'PER', 'SHN', 'NOR'],\n", + " dtype=object)" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "seamounts_wdpa['PARENT_ISO'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['CAN', 'ABNJ', 'FRA', 'JPN', 'USA', 'PRT', 'ESP', 'BHS', 'MEX',\n", + " 'DOM', 'HND', 'NLD', 'PHL', 'VEN', 'MHL', 'YEM', 'COL', 'PLW',\n", + " 'CRI', 'PAN', 'BRA', 'ECU', 'GNQ', 'KIR', 'GBR', 'IDN', 'SYC',\n", + " 'COK', 'AUS', 'COM', 'FJI', 'NIU', 'CHL', 'NZL', 'ZAF', 'ARG',\n", + " 'ITA', 'GRC', 'CUB', 'TUV', 'PER', 'SHN', 'NOR', 'MCO'],\n", + " dtype=object)" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Split the 'iso_code' values and create separate rows only for rows with multiple values\n", + "mask = seamounts_wdpa['PARENT_ISO'].str.contains(';', na=False)\n", + "split_rows = seamounts_wdpa[mask].copy()\n", + "split_rows['PARENT_ISO'] = split_rows['PARENT_ISO'].str.split(';')\n", + "split_rows = split_rows.explode('PARENT_ISO')\n", + "\n", + "# Keep rows with single values in 'iso_code'\n", + "single_value_rows = seamounts_wdpa[~mask]\n", + "\n", + "# Concatenate the exploded rows with the single value rows\n", + "seamounts_wdpa_new = pd.concat([single_value_rows, split_rows], ignore_index=True)\n", + "seamounts_wdpa_new['PARENT_ISO'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
location_idprotected_areahabitat_nameyear
0ABNJ226253.932283seamounts2023
1ARG38773.659962seamounts2023
2AUS250507.827932seamounts2023
3BHS9405.718473seamounts2023
4BRA89687.890132seamounts2023
5CAN66235.357502seamounts2023
6CHL239414.964764seamounts2023
7COK238289.821637seamounts2023
8COL30080.163652seamounts2023
9COM1584.809650seamounts2023
10CRI22592.506989seamounts2023
11CUB569.313304seamounts2023
12DOM14889.791781seamounts2023
13ECU16940.242843seamounts2023
14ESP8432.586895seamounts2023
15FJI1520.596802seamounts2023
16FRA238350.837708seamounts2023
17GBR528491.978612seamounts2023
18GNQ4138.403739seamounts2023
19GRC935.569528seamounts2023
20HND4665.192397seamounts2023
21IDN10445.380128seamounts2023
22ITA3753.574953seamounts2023
23JPN119622.849645seamounts2023
24KIR60231.516081seamounts2023
25MCO2123.222307seamounts2023
26MEX84702.255983seamounts2023
27MHL11824.490191seamounts2023
28NIU14703.179190seamounts2023
29NLD435.628871seamounts2023
30NOR2377.872096seamounts2023
31NZL168593.001600seamounts2023
32PAN24185.333200seamounts2023
33PER2043.936634seamounts2023
34PHL11700.480430seamounts2023
35PLW196940.861775seamounts2023
36PRT111736.861592seamounts2023
37SHN782.626658seamounts2023
38SYC46103.929891seamounts2023
39TUV1110.779352seamounts2023
40USA489352.853224seamounts2023
41VEN3221.184275seamounts2023
42YEM2487.428050seamounts2023
43ZAF42558.035678seamounts2023
\n", + "
" + ], + "text/plain": [ + " location_id protected_area habitat_name year\n", + "0 ABNJ 226253.932283 seamounts 2023\n", + "1 ARG 38773.659962 seamounts 2023\n", + "2 AUS 250507.827932 seamounts 2023\n", + "3 BHS 9405.718473 seamounts 2023\n", + "4 BRA 89687.890132 seamounts 2023\n", + "5 CAN 66235.357502 seamounts 2023\n", + "6 CHL 239414.964764 seamounts 2023\n", + "7 COK 238289.821637 seamounts 2023\n", + "8 COL 30080.163652 seamounts 2023\n", + "9 COM 1584.809650 seamounts 2023\n", + "10 CRI 22592.506989 seamounts 2023\n", + "11 CUB 569.313304 seamounts 2023\n", + "12 DOM 14889.791781 seamounts 2023\n", + "13 ECU 16940.242843 seamounts 2023\n", + "14 ESP 8432.586895 seamounts 2023\n", + "15 FJI 1520.596802 seamounts 2023\n", + "16 FRA 238350.837708 seamounts 2023\n", + "17 GBR 528491.978612 seamounts 2023\n", + "18 GNQ 4138.403739 seamounts 2023\n", + "19 GRC 935.569528 seamounts 2023\n", + "20 HND 4665.192397 seamounts 2023\n", + "21 IDN 10445.380128 seamounts 2023\n", + "22 ITA 3753.574953 seamounts 2023\n", + "23 JPN 119622.849645 seamounts 2023\n", + "24 KIR 60231.516081 seamounts 2023\n", + "25 MCO 2123.222307 seamounts 2023\n", + "26 MEX 84702.255983 seamounts 2023\n", + "27 MHL 11824.490191 seamounts 2023\n", + "28 NIU 14703.179190 seamounts 2023\n", + "29 NLD 435.628871 seamounts 2023\n", + "30 NOR 2377.872096 seamounts 2023\n", + "31 NZL 168593.001600 seamounts 2023\n", + "32 PAN 24185.333200 seamounts 2023\n", + "33 PER 2043.936634 seamounts 2023\n", + "34 PHL 11700.480430 seamounts 2023\n", + "35 PLW 196940.861775 seamounts 2023\n", + "36 PRT 111736.861592 seamounts 2023\n", + "37 SHN 782.626658 seamounts 2023\n", + "38 SYC 46103.929891 seamounts 2023\n", + "39 TUV 1110.779352 seamounts 2023\n", + "40 USA 489352.853224 seamounts 2023\n", + "41 VEN 3221.184275 seamounts 2023\n", + "42 YEM 2487.428050 seamounts 2023\n", + "43 ZAF 42558.035678 seamounts 2023" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "seamounts_protected = seamounts_wdpa_new.groupby(['PARENT_ISO']).agg({'AREA2D': 'sum'}).reset_index()\n", + "seamounts_protected = seamounts_protected.rename(columns={'AREA2D': 'protected_area', 'PARENT_ISO': 'location_id'})\n", + "seamounts_protected['habitat_name'] = 'seamounts'\n", + "seamounts_protected['year'] = 2023\n", + "seamounts_protected " + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [], + "source": [ + "# join 'protected area' field in seamounts_protected to seamounts_iso based on location_id\n", + "seamounts_iso2 = seamounts_iso.merge(seamounts_protected[['location_id', 'protected_area']], left_on='location_id', right_on='location_id', how='left')" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
location_idtotal_areahabitat_nameyearprotected_area
0ABNJ1.483098e+07seamounts2023226253.932283
1AGO9.556242e+03seamounts20230.000000
2ARG3.110730e+05seamounts202338773.659962
3ATA3.551629e+05seamounts20230.000000
4ATG6.215895e+03seamounts20230.000000
..................
88VNM4.421338e+04seamounts20230.000000
89VUT1.199475e+05seamounts20230.000000
90WSM4.117997e+04seamounts20230.000000
91YEM6.294974e+04seamounts20232487.428050
92ZAF9.946306e+04seamounts202342558.035678
\n", + "

93 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " location_id total_area habitat_name year protected_area\n", + "0 ABNJ 1.483098e+07 seamounts 2023 226253.932283\n", + "1 AGO 9.556242e+03 seamounts 2023 0.000000\n", + "2 ARG 3.110730e+05 seamounts 2023 38773.659962\n", + "3 ATA 3.551629e+05 seamounts 2023 0.000000\n", + "4 ATG 6.215895e+03 seamounts 2023 0.000000\n", + ".. ... ... ... ... ...\n", + "88 VNM 4.421338e+04 seamounts 2023 0.000000\n", + "89 VUT 1.199475e+05 seamounts 2023 0.000000\n", + "90 WSM 4.117997e+04 seamounts 2023 0.000000\n", + "91 YEM 6.294974e+04 seamounts 2023 2487.428050\n", + "92 ZAF 9.946306e+04 seamounts 2023 42558.035678\n", + "\n", + "[93 rows x 5 columns]" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# set to 0 the protected_area values that are NaN\n", + "seamounts_iso2['protected_area'] = seamounts_iso2['protected_area'].fillna(0)\n", + "seamounts_iso2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "seamounts_iso2.to_csv(path_out + \"habitat/seamounts.csv\", index=False)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "skytruth", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 03ae8e8ebbceaf2ec066a0b063387c427bcf38b2 Mon Sep 17 00:00:00 2001 From: sofia Date: Fri, 13 Oct 2023 08:25:10 +0200 Subject: [PATCH 6/9] fixes in iso codes to use parent_iso --- data/notebooks/protectedseas.ipynb | 164 ++++++++++++++--------------- 1 file changed, 82 insertions(+), 82 deletions(-) diff --git a/data/notebooks/protectedseas.ipynb b/data/notebooks/protectedseas.ipynb index 85909d31..e5a3d879 100644 --- a/data/notebooks/protectedseas.ipynb +++ b/data/notebooks/protectedseas.ipynb @@ -9,7 +9,7 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -19,7 +19,7 @@ }, { "cell_type": "code", - "execution_count": 58, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -36,7 +36,7 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -92,7 +92,7 @@ "1 POLYGON ((-61.91090 17.57960, -61.91096 17.579... " ] }, - "execution_count": 51, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -105,7 +105,7 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -130,7 +130,7 @@ " dtype='object')" ] }, - "execution_count": 52, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -143,7 +143,7 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -152,7 +152,7 @@ "(21197, 53)" ] }, - "execution_count": 53, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -163,7 +163,7 @@ }, { "cell_type": "code", - "execution_count": 54, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -172,7 +172,7 @@ "(10879, 53)" ] }, - "execution_count": 54, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -186,7 +186,7 @@ }, { "cell_type": "code", - "execution_count": 136, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -195,7 +195,7 @@ "(10879, 56)" ] }, - "execution_count": 136, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -208,7 +208,7 @@ }, { "cell_type": "code", - "execution_count": 137, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -312,7 +312,7 @@ "4 5.0 47.933200 POLYGON ((-61.70064 17.66752, -61.70039 17.667... " ] }, - "execution_count": 137, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -325,7 +325,7 @@ }, { "cell_type": "code", - "execution_count": 138, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -361,7 +361,7 @@ " 'South Africa', 'USA; Haiti; Jamaica'], dtype=object)" ] }, - "execution_count": 138, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -374,7 +374,7 @@ }, { "cell_type": "code", - "execution_count": 139, + "execution_count": 18, "metadata": {}, "outputs": [], "source": [ @@ -394,7 +394,7 @@ " 'Belize': 'BLZ',\n", " 'Brazil': 'BRA',\n", " 'Bahamas': 'BHS',\n", - " 'British Virgin Islands': 'VGB',\n", + " 'British Virgin Islands': 'GBR',\n", " 'Canada': 'CAN',\n", " 'Chile': 'CHL',\n", " 'Cameroon': 'CMR',\n", @@ -421,13 +421,13 @@ " 'United Kingdom': 'GBR',\n", " 'Grenada': 'GRD',\n", " 'Ghana': 'GHA',\n", - " 'Gibraltar': 'GIB',\n", + " 'Gibraltar': 'GBR',\n", " 'Guinea': 'GIN',\n", " 'The Gambia': 'GMB',\n", " 'Guinea Bissau': 'GNB',\n", " 'Greece': 'GRC',\n", " 'Guatemala': 'GTM',\n", - " 'French Guyana': 'GUF',\n", + " 'French Guyana': 'FRA',\n", " 'Honduras': 'HND',\n", " 'Croatia': 'HRV',\n", " 'Indonesia': 'IDN',\n", @@ -443,7 +443,7 @@ " 'Kenya': 'KEN',\n", " 'Cambodia': 'KHM',\n", " 'South Korea': 'KOR',\n", - " 'Cayman Islands': 'CYM',\n", + " 'Cayman Islands': 'GBR',\n", " 'Lebanon': 'LBN',\n", " 'Liberia': 'LBR',\n", " 'Saint Lucia': 'LCA',\n", @@ -460,7 +460,7 @@ " 'Mauritania': 'MRT',\n", " 'Malaysia': 'MYS',\n", " 'Namibia': 'NAM',\n", - " 'New Caledonia': 'NCL',\n", + " 'New Caledonia': 'FRA',\n", " 'Niue': 'NIU',\n", " 'The Netherlands': 'NLD',\n", " 'Netherlands': 'NLD',\n", @@ -485,7 +485,7 @@ " 'Slovenia': 'SVN',\n", " 'Sweden': 'SWE',\n", " 'Seychelles': 'SYC',\n", - " 'Turks and Caicos Islands': 'TCA',\n", + " 'Turks and Caicos Islands': 'GBR',\n", " 'Thailand': 'THA',\n", " 'East Timor': 'TLS',\n", " 'Tonga': 'TON',\n", @@ -503,7 +503,7 @@ }, { "cell_type": "code", - "execution_count": 140, + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ @@ -516,7 +516,7 @@ }, { "cell_type": "code", - "execution_count": 141, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -525,7 +525,7 @@ "(10879, 8)" ] }, - "execution_count": 141, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -536,7 +536,7 @@ }, { "cell_type": "code", - "execution_count": 143, + "execution_count": 21, "metadata": {}, "outputs": [ { @@ -594,7 +594,7 @@ "3372 1.642913 MULTIPOLYGON (((-61.59887 16.27728, -61.59861 ... None " ] }, - "execution_count": 143, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -605,14 +605,14 @@ }, { "cell_type": "code", - "execution_count": 144, + "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/var/folders/98/0pdnjc5s29x2pnzl293pw7hr0000gn/T/ipykernel_4067/3237173437.py:1: SettingWithCopyWarning: \n", + "/var/folders/98/0pdnjc5s29x2pnzl293pw7hr0000gn/T/ipykernel_2034/3237173437.py:1: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", @@ -626,7 +626,7 @@ }, { "cell_type": "code", - "execution_count": 145, + "execution_count": 23, "metadata": {}, "outputs": [ { @@ -635,7 +635,7 @@ "0" ] }, - "execution_count": 145, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } @@ -646,7 +646,7 @@ }, { "cell_type": "code", - "execution_count": 146, + "execution_count": 24, "metadata": {}, "outputs": [ { @@ -721,7 +721,7 @@ "10598 POLYGON ((-75.00384 18.20744, -75.01645 18.207... USA;HTI;JAM " ] }, - "execution_count": 146, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } @@ -732,7 +732,7 @@ }, { "cell_type": "code", - "execution_count": 167, + "execution_count": 25, "metadata": {}, "outputs": [ { @@ -741,7 +741,7 @@ "(10883, 8)" ] }, - "execution_count": 167, + "execution_count": 25, "metadata": {}, "output_type": "execute_result" } @@ -766,7 +766,7 @@ }, { "cell_type": "code", - "execution_count": 168, + "execution_count": 26, "metadata": {}, "outputs": [ { @@ -811,7 +811,7 @@ "Index: []" ] }, - "execution_count": 168, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } @@ -822,7 +822,7 @@ }, { "cell_type": "code", - "execution_count": 169, + "execution_count": 27, "metadata": {}, "outputs": [ { @@ -911,7 +911,7 @@ "10882 POLYGON ((-75.00384 18.20744, -75.01645 18.207... JAM " ] }, - "execution_count": 169, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } @@ -922,7 +922,7 @@ }, { "cell_type": "code", - "execution_count": 170, + "execution_count": 28, "metadata": {}, "outputs": [ { @@ -1000,7 +1000,7 @@ "1 ATG Less " ] }, - "execution_count": 170, + "execution_count": 28, "metadata": {}, "output_type": "execute_result" } @@ -1022,7 +1022,7 @@ }, { "cell_type": "code", - "execution_count": 171, + "execution_count": 29, "metadata": {}, "outputs": [], "source": [ @@ -1103,7 +1103,7 @@ " ]\n", " },\n", " {\n", - " 'region_iso': 'HS', # this region is not in the Protected Planet database\n", + " 'region_iso': 'ABNJ', # this region is not in the Protected Planet database\n", " 'region_name': 'Areas Beyond National Jurisdiction',\n", " 'country_iso_3s': [\n", " \"ABNJ\"\n", @@ -1120,7 +1120,7 @@ }, { "cell_type": "code", - "execution_count": 172, + "execution_count": 30, "metadata": {}, "outputs": [ { @@ -1129,7 +1129,7 @@ "(10883, 10)" ] }, - "execution_count": 172, + "execution_count": 30, "metadata": {}, "output_type": "execute_result" } @@ -1141,7 +1141,7 @@ }, { "cell_type": "code", - "execution_count": 173, + "execution_count": 31, "metadata": {}, "outputs": [ { @@ -1222,7 +1222,7 @@ "1 ATG Less SA " ] }, - "execution_count": 173, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" } @@ -1233,7 +1233,7 @@ }, { "cell_type": "code", - "execution_count": 174, + "execution_count": 32, "metadata": {}, "outputs": [], "source": [ @@ -1249,7 +1249,7 @@ }, { "cell_type": "code", - "execution_count": 175, + "execution_count": 33, "metadata": {}, "outputs": [ { @@ -1357,7 +1357,7 @@ "9 AUS Highly 723824.463100" ] }, - "execution_count": 175, + "execution_count": 33, "metadata": {}, "output_type": "execute_result" } @@ -1370,7 +1370,7 @@ }, { "cell_type": "code", - "execution_count": 177, + "execution_count": 34, "metadata": {}, "outputs": [ { @@ -1422,37 +1422,37 @@ " 3\n", " AS\n", " Highly\n", - " 8.916216e+05\n", + " 8.846479e+05\n", " \n", " \n", " 4\n", " AS\n", " Less\n", - " 2.950564e+06\n", + " 1.659106e+06\n", " \n", " \n", " 5\n", " AS\n", " Moderately\n", - " 2.856941e+05\n", + " 2.825264e+05\n", " \n", " \n", " 6\n", " EU\n", " Highly\n", - " 2.671565e+06\n", + " 2.679297e+06\n", " \n", " \n", " 7\n", " EU\n", " Less\n", - " 2.468697e+06\n", + " 3.761438e+06\n", " \n", " \n", " 8\n", " EU\n", " Moderately\n", - " 1.881583e+06\n", + " 1.884751e+06\n", " \n", " \n", " 9\n", @@ -1469,16 +1469,16 @@ "0 AF Highly 3.518155e+04\n", "1 AF Less 2.862643e+05\n", "2 AF Moderately 4.253144e+04\n", - "3 AS Highly 8.916216e+05\n", - "4 AS Less 2.950564e+06\n", - "5 AS Moderately 2.856941e+05\n", - "6 EU Highly 2.671565e+06\n", - "7 EU Less 2.468697e+06\n", - "8 EU Moderately 1.881583e+06\n", + "3 AS Highly 8.846479e+05\n", + "4 AS Less 1.659106e+06\n", + "5 AS Moderately 2.825264e+05\n", + "6 EU Highly 2.679297e+06\n", + "7 EU Less 3.761438e+06\n", + "8 EU Moderately 1.884751e+06\n", "9 NA Highly 3.961031e+06" ] }, - "execution_count": 177, + "execution_count": 34, "metadata": {}, "output_type": "execute_result" } @@ -1491,7 +1491,7 @@ }, { "cell_type": "code", - "execution_count": 182, + "execution_count": 35, "metadata": {}, "outputs": [ { @@ -1550,7 +1550,7 @@ "2 Moderately 3.177137e+06 GLOB" ] }, - "execution_count": 182, + "execution_count": 35, "metadata": {}, "output_type": "execute_result" } @@ -1564,7 +1564,7 @@ }, { "cell_type": "code", - "execution_count": 183, + "execution_count": 36, "metadata": {}, "outputs": [ { @@ -1631,38 +1631,38 @@ " ...\n", " \n", " \n", - " 227\n", + " 216\n", " WA\n", " Highly\n", " 1.024296e+04\n", " \n", " \n", - " 228\n", + " 217\n", " WA\n", " Less\n", " 2.129971e+04\n", " \n", " \n", - " 229\n", + " 218\n", " GLOB\n", " Highly\n", " 8.180599e+06\n", " \n", " \n", - " 230\n", + " 219\n", " GLOB\n", " Less\n", " 9.942402e+06\n", " \n", " \n", - " 231\n", + " 220\n", " GLOB\n", " Moderately\n", " 3.177137e+06\n", " \n", " \n", "\n", - "

232 rows × 3 columns

\n", + "

221 rows × 3 columns

\n", "" ], "text/plain": [ @@ -1673,16 +1673,16 @@ "3 ARG Highly 5.958615e+03\n", "4 ARG Less 3.841962e+04\n", ".. ... ... ...\n", - "227 WA Highly 1.024296e+04\n", - "228 WA Less 2.129971e+04\n", - "229 GLOB Highly 8.180599e+06\n", - "230 GLOB Less 9.942402e+06\n", - "231 GLOB Moderately 3.177137e+06\n", + "216 WA Highly 1.024296e+04\n", + "217 WA Less 2.129971e+04\n", + "218 GLOB Highly 8.180599e+06\n", + "219 GLOB Less 9.942402e+06\n", + "220 GLOB Moderately 3.177137e+06\n", "\n", - "[232 rows x 3 columns]" + "[221 rows x 3 columns]" ] }, - "execution_count": 183, + "execution_count": 36, "metadata": {}, "output_type": "execute_result" } @@ -1694,7 +1694,7 @@ }, { "cell_type": "code", - "execution_count": 184, + "execution_count": 40, "metadata": {}, "outputs": [], "source": [ From 60d43c7861f260f4092e5ac0bb9dc6be4b2b9b6c Mon Sep 17 00:00:00 2001 From: sofia Date: Fri, 13 Oct 2023 09:09:55 +0200 Subject: [PATCH 7/9] remove duplicated region ABNJ --- data/notebooks/wdpa_coverage.ipynb | 564 +++++++++++++++++++++++------ 1 file changed, 458 insertions(+), 106 deletions(-) diff --git a/data/notebooks/wdpa_coverage.ipynb b/data/notebooks/wdpa_coverage.ipynb index 12fd1337..462aec59 100644 --- a/data/notebooks/wdpa_coverage.ipynb +++ b/data/notebooks/wdpa_coverage.ipynb @@ -575,6 +575,248 @@ "final_df = final_df[['location_id', 'year', 'protection_type', 'cumsum_area']]" ] }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
location_idyearprotection_typecumsum_area
0ABNJ2000MPA+OECM5.941747e+05
130ABNJ2001MPA+OECM5.941747e+05
262ABNJ2002MPA+OECM5.941747e+05
397ABNJ2003MPA+OECM5.941747e+05
533ABNJ2004MPA+OECM5.941747e+05
669ABNJ2005MPA+OECM5.941747e+05
808ABNJ2006MPA+OECM5.941747e+05
949ABNJ2007MPA+OECM5.941747e+05
1090ABNJ2008MPA+OECM5.941747e+05
1233ABNJ2009MPA+OECM5.941747e+05
1377ABNJ2010MPA+OECM9.532081e+05
1525ABNJ2011MPA+OECM8.568056e+05
1675ABNJ2012MPA+OECM1.034105e+06
1825ABNJ2013MPA+OECM1.034105e+06
1976ABNJ2014MPA+OECM1.034105e+06
2127ABNJ2015MPA+OECM1.034105e+06
2278ABNJ2016MPA+OECM1.034105e+06
2429ABNJ2017MPA+OECM2.811452e+06
2580ABNJ2018MPA+OECM2.811452e+06
2731ABNJ2019MPA+OECM2.811452e+06
2882ABNJ2020MPA+OECM2.811452e+06
3034ABNJ2021MPA+OECM2.811452e+06
3186ABNJ2022MPA+OECM2.811452e+06
3338ABNJ2023MPA+OECM2.811452e+06
\n", + "
" + ], + "text/plain": [ + " location_id year protection_type cumsum_area\n", + "0 ABNJ 2000 MPA+OECM 5.941747e+05\n", + "130 ABNJ 2001 MPA+OECM 5.941747e+05\n", + "262 ABNJ 2002 MPA+OECM 5.941747e+05\n", + "397 ABNJ 2003 MPA+OECM 5.941747e+05\n", + "533 ABNJ 2004 MPA+OECM 5.941747e+05\n", + "669 ABNJ 2005 MPA+OECM 5.941747e+05\n", + "808 ABNJ 2006 MPA+OECM 5.941747e+05\n", + "949 ABNJ 2007 MPA+OECM 5.941747e+05\n", + "1090 ABNJ 2008 MPA+OECM 5.941747e+05\n", + "1233 ABNJ 2009 MPA+OECM 5.941747e+05\n", + "1377 ABNJ 2010 MPA+OECM 9.532081e+05\n", + "1525 ABNJ 2011 MPA+OECM 8.568056e+05\n", + "1675 ABNJ 2012 MPA+OECM 1.034105e+06\n", + "1825 ABNJ 2013 MPA+OECM 1.034105e+06\n", + "1976 ABNJ 2014 MPA+OECM 1.034105e+06\n", + "2127 ABNJ 2015 MPA+OECM 1.034105e+06\n", + "2278 ABNJ 2016 MPA+OECM 1.034105e+06\n", + "2429 ABNJ 2017 MPA+OECM 2.811452e+06\n", + "2580 ABNJ 2018 MPA+OECM 2.811452e+06\n", + "2731 ABNJ 2019 MPA+OECM 2.811452e+06\n", + "2882 ABNJ 2020 MPA+OECM 2.811452e+06\n", + "3034 ABNJ 2021 MPA+OECM 2.811452e+06\n", + "3186 ABNJ 2022 MPA+OECM 2.811452e+06\n", + "3338 ABNJ 2023 MPA+OECM 2.811452e+06" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "final_df[final_df['location_id'] == 'ABNJ']" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -584,7 +826,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -665,7 +907,7 @@ " ]\n", " },\n", " {\n", - " 'region_iso': 'HS', # this region is not in the Protected Planet database\n", + " 'region_iso': 'ABNJ', # this region is not in the Protected Planet database\n", " 'region_name': 'Areas Beyond National Jurisdiction',\n", " 'country_iso_3s': [\n", " \"ABNJ\"\n", @@ -682,7 +924,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -715,38 +957,38 @@ " \n", " \n", " 0\n", - " AF\n", + " ABNJ\n", " 2000\n", " MPA+OECM\n", - " 94507.122820\n", + " 594174.659985\n", " \n", " \n", " 1\n", - " AF\n", + " ABNJ\n", " 2001\n", " MPA+OECM\n", - " 94807.303100\n", + " 594174.659985\n", " \n", " \n", " 2\n", - " AF\n", + " ABNJ\n", " 2002\n", " MPA+OECM\n", - " 102859.393938\n", + " 594174.659985\n", " \n", " \n", " 3\n", - " AF\n", + " ABNJ\n", " 2003\n", " MPA+OECM\n", - " 111143.352991\n", + " 594174.659985\n", " \n", " \n", " 4\n", - " AF\n", + " ABNJ\n", " 2004\n", " MPA+OECM\n", - " 119137.635862\n", + " 594174.659985\n", " \n", " \n", " ...\n", @@ -797,11 +1039,11 @@ ], "text/plain": [ " location_id year protection_type cumsum_area\n", - "0 AF 2000 MPA+OECM 94507.122820\n", - "1 AF 2001 MPA+OECM 94807.303100\n", - "2 AF 2002 MPA+OECM 102859.393938\n", - "3 AF 2003 MPA+OECM 111143.352991\n", - "4 AF 2004 MPA+OECM 119137.635862\n", + "0 ABNJ 2000 MPA+OECM 594174.659985\n", + "1 ABNJ 2001 MPA+OECM 594174.659985\n", + "2 ABNJ 2002 MPA+OECM 594174.659985\n", + "3 ABNJ 2003 MPA+OECM 594174.659985\n", + "4 ABNJ 2004 MPA+OECM 594174.659985\n", ".. ... ... ... ...\n", "211 WA 2019 MPA+OECM 30618.254664\n", "212 WA 2020 MPA+OECM 30624.636536\n", @@ -812,7 +1054,7 @@ "[216 rows x 4 columns]" ] }, - "execution_count": 7, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -828,7 +1070,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -860,118 +1102,228 @@ " \n", " \n", " \n", - " 0\n", - " AF\n", + " 120\n", + " GL\n", " 2000\n", " MPA+OECM\n", - " 94507.122820\n", + " 3.193363e+06\n", " \n", " \n", - " 1\n", - " AF\n", + " 121\n", + " GL\n", " 2001\n", " MPA+OECM\n", - " 94807.303100\n", + " 3.569706e+06\n", " \n", " \n", - " 2\n", - " AF\n", + " 122\n", + " GL\n", " 2002\n", " MPA+OECM\n", - " 102859.393938\n", + " 3.695628e+06\n", " \n", " \n", - " 3\n", - " AF\n", + " 123\n", + " GL\n", " 2003\n", " MPA+OECM\n", - " 111143.352991\n", + " 3.748312e+06\n", " \n", " \n", - " 4\n", - " AF\n", + " 124\n", + " GL\n", " 2004\n", " MPA+OECM\n", - " 119137.635862\n", + " 3.866803e+06\n", " \n", " \n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", + " 125\n", + " GL\n", + " 2005\n", + " MPA+OECM\n", + " 3.980030e+06\n", " \n", " \n", - " 211\n", - " WA\n", + " 126\n", + " GL\n", + " 2006\n", + " MPA+OECM\n", + " 7.559205e+06\n", + " \n", + " \n", + " 127\n", + " GL\n", + " 2007\n", + " MPA+OECM\n", + " 8.741698e+06\n", + " \n", + " \n", + " 128\n", + " GL\n", + " 2008\n", + " MPA+OECM\n", + " 8.825878e+06\n", + " \n", + " \n", + " 129\n", + " GL\n", + " 2009\n", + " MPA+OECM\n", + " 1.060580e+07\n", + " \n", + " \n", + " 130\n", + " GL\n", + " 2010\n", + " MPA+OECM\n", + " 1.202293e+07\n", + " \n", + " \n", + " 131\n", + " GL\n", + " 2011\n", + " MPA+OECM\n", + " 1.208784e+07\n", + " \n", + " \n", + " 132\n", + " GL\n", + " 2012\n", + " MPA+OECM\n", + " 1.395678e+07\n", + " \n", + " \n", + " 133\n", + " GL\n", + " 2013\n", + " MPA+OECM\n", + " 1.423613e+07\n", + " \n", + " \n", + " 134\n", + " GL\n", + " 2014\n", + " MPA+OECM\n", + " 1.566334e+07\n", + " \n", + " \n", + " 135\n", + " GL\n", + " 2015\n", + " MPA+OECM\n", + " 1.634694e+07\n", + " \n", + " \n", + " 136\n", + " GL\n", + " 2016\n", + " MPA+OECM\n", + " 1.823543e+07\n", + " \n", + " \n", + " 137\n", + " GL\n", + " 2017\n", + " MPA+OECM\n", + " 2.270050e+07\n", + " \n", + " \n", + " 138\n", + " GL\n", + " 2018\n", + " MPA+OECM\n", + " 2.712299e+07\n", + " \n", + " \n", + " 139\n", + " GL\n", " 2019\n", " MPA+OECM\n", - " 30618.254664\n", + " 2.832749e+07\n", " \n", " \n", - " 212\n", - " WA\n", + " 140\n", + " GL\n", " 2020\n", " MPA+OECM\n", - " 30624.636536\n", + " 2.982679e+07\n", " \n", " \n", - " 213\n", - " WA\n", + " 141\n", + " GL\n", " 2021\n", " MPA+OECM\n", - " 30624.636536\n", + " 2.994130e+07\n", " \n", " \n", - " 214\n", - " WA\n", + " 142\n", + " GL\n", " 2022\n", " MPA+OECM\n", - " 31779.597984\n", + " 3.011280e+07\n", " \n", " \n", - " 215\n", - " WA\n", + " 143\n", + " GL\n", " 2023\n", " MPA+OECM\n", - " 31779.597984\n", + " 3.011285e+07\n", " \n", " \n", "\n", - "

192 rows × 4 columns

\n", "" ], "text/plain": [ - " location_id year protection_type cumsum_area\n", - "0 AF 2000 MPA+OECM 94507.122820\n", - "1 AF 2001 MPA+OECM 94807.303100\n", - "2 AF 2002 MPA+OECM 102859.393938\n", - "3 AF 2003 MPA+OECM 111143.352991\n", - "4 AF 2004 MPA+OECM 119137.635862\n", - ".. ... ... ... ...\n", - "211 WA 2019 MPA+OECM 30618.254664\n", - "212 WA 2020 MPA+OECM 30624.636536\n", - "213 WA 2021 MPA+OECM 30624.636536\n", - "214 WA 2022 MPA+OECM 31779.597984\n", - "215 WA 2023 MPA+OECM 31779.597984\n", - "\n", - "[192 rows x 4 columns]" + " location_id year protection_type cumsum_area\n", + "120 GL 2000 MPA+OECM 3.193363e+06\n", + "121 GL 2001 MPA+OECM 3.569706e+06\n", + "122 GL 2002 MPA+OECM 3.695628e+06\n", + "123 GL 2003 MPA+OECM 3.748312e+06\n", + "124 GL 2004 MPA+OECM 3.866803e+06\n", + "125 GL 2005 MPA+OECM 3.980030e+06\n", + "126 GL 2006 MPA+OECM 7.559205e+06\n", + "127 GL 2007 MPA+OECM 8.741698e+06\n", + "128 GL 2008 MPA+OECM 8.825878e+06\n", + "129 GL 2009 MPA+OECM 1.060580e+07\n", + "130 GL 2010 MPA+OECM 1.202293e+07\n", + "131 GL 2011 MPA+OECM 1.208784e+07\n", + "132 GL 2012 MPA+OECM 1.395678e+07\n", + "133 GL 2013 MPA+OECM 1.423613e+07\n", + "134 GL 2014 MPA+OECM 1.566334e+07\n", + "135 GL 2015 MPA+OECM 1.634694e+07\n", + "136 GL 2016 MPA+OECM 1.823543e+07\n", + "137 GL 2017 MPA+OECM 2.270050e+07\n", + "138 GL 2018 MPA+OECM 2.712299e+07\n", + "139 GL 2019 MPA+OECM 2.832749e+07\n", + "140 GL 2020 MPA+OECM 2.982679e+07\n", + "141 GL 2021 MPA+OECM 2.994130e+07\n", + "142 GL 2022 MPA+OECM 3.011280e+07\n", + "143 GL 2023 MPA+OECM 3.011285e+07" ] }, - "execution_count": 10, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], + "source": [ + "regions[regions['location_id'] == 'GL']" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], "source": [ "# Remove rows for region GL (Global)\n", - "regions = regions[regions['location_id'] != 'GL']\n", - "regions" + "regions2 = regions[regions['location_id'] != 'GL']\n", + "regions2 = regions2[regions2['location_id'] != 'ABNJ']" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -994,10 +1346,10 @@ " 'VNM', 'VUT', 'WSM', 'ZAF', 'DZA', 'IOT', 'GAB', 'IND', 'SGP',\n", " 'LBR', 'CIV', 'CPV', 'SLV', 'QAT', 'STP', 'SHN', 'YEM', 'URY',\n", " 'CMR', 'COM', 'KWT', 'SJM', 'GUY', 'UMI', 'HTI', 'JOR', 'GLOB',\n", - " 'AF', 'AS', 'AT', 'EU', 'HS', 'NA', 'SA', 'WA'], dtype=object)" + " 'AF', 'AS', 'AT', 'EU', 'GL', 'NA', 'SA', 'WA'], dtype=object)" ] }, - "execution_count": 11, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -1009,7 +1361,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -1047,7 +1399,7 @@ " 2000\n", " MPA+OECM\n", " 594174.659985\n", - " 2023-10-10\n", + " 2023-10-13\n", " \n", " \n", " 1\n", @@ -1055,7 +1407,7 @@ " 2000\n", " MPA+OECM\n", " 0.415240\n", - " 2023-10-10\n", + " 2023-10-13\n", " \n", " \n", " 2\n", @@ -1063,7 +1415,7 @@ " 2000\n", " MPA+OECM\n", " 103.048347\n", - " 2023-10-10\n", + " 2023-10-13\n", " \n", " \n", " 3\n", @@ -1071,7 +1423,7 @@ " 2000\n", " MPA+OECM\n", " 78.516519\n", - " 2023-10-10\n", + " 2023-10-13\n", " \n", " \n", " 4\n", @@ -1079,7 +1431,7 @@ " 2000\n", " MPA+OECM\n", " 6155.668078\n", - " 2023-10-10\n", + " 2023-10-13\n", " \n", " \n", " ...\n", @@ -1090,68 +1442,68 @@ " ...\n", " \n", " \n", - " 3701\n", + " 3725\n", " WA\n", " 2019\n", " MPA+OECM\n", " 30618.254664\n", - " 2023-10-10\n", + " 2023-10-13\n", " \n", " \n", - " 3702\n", + " 3726\n", " WA\n", " 2020\n", " MPA+OECM\n", " 30624.636536\n", - " 2023-10-10\n", + " 2023-10-13\n", " \n", " \n", - " 3703\n", + " 3727\n", " WA\n", " 2021\n", " MPA+OECM\n", " 30624.636536\n", - " 2023-10-10\n", + " 2023-10-13\n", " \n", " \n", - " 3704\n", + " 3728\n", " WA\n", " 2022\n", " MPA+OECM\n", " 31779.597984\n", - " 2023-10-10\n", + " 2023-10-13\n", " \n", " \n", - " 3705\n", + " 3729\n", " WA\n", " 2023\n", " MPA+OECM\n", " 31779.597984\n", - " 2023-10-10\n", + " 2023-10-13\n", " \n", " \n", "\n", - "

3706 rows × 5 columns

\n", + "

3730 rows × 5 columns

\n", "" ], "text/plain": [ " location_id year protection_type cumsum_area last_updated\n", - "0 ABNJ 2000 MPA+OECM 594174.659985 2023-10-10\n", - "1 AGO 2000 MPA+OECM 0.415240 2023-10-10\n", - "2 ALB 2000 MPA+OECM 103.048347 2023-10-10\n", - "3 ARE 2000 MPA+OECM 78.516519 2023-10-10\n", - "4 ARG 2000 MPA+OECM 6155.668078 2023-10-10\n", + "0 ABNJ 2000 MPA+OECM 594174.659985 2023-10-13\n", + "1 AGO 2000 MPA+OECM 0.415240 2023-10-13\n", + "2 ALB 2000 MPA+OECM 103.048347 2023-10-13\n", + "3 ARE 2000 MPA+OECM 78.516519 2023-10-13\n", + "4 ARG 2000 MPA+OECM 6155.668078 2023-10-13\n", "... ... ... ... ... ...\n", - "3701 WA 2019 MPA+OECM 30618.254664 2023-10-10\n", - "3702 WA 2020 MPA+OECM 30624.636536 2023-10-10\n", - "3703 WA 2021 MPA+OECM 30624.636536 2023-10-10\n", - "3704 WA 2022 MPA+OECM 31779.597984 2023-10-10\n", - "3705 WA 2023 MPA+OECM 31779.597984 2023-10-10\n", + "3725 WA 2019 MPA+OECM 30618.254664 2023-10-13\n", + "3726 WA 2020 MPA+OECM 30624.636536 2023-10-13\n", + "3727 WA 2021 MPA+OECM 30624.636536 2023-10-13\n", + "3728 WA 2022 MPA+OECM 31779.597984 2023-10-13\n", + "3729 WA 2023 MPA+OECM 31779.597984 2023-10-13\n", "\n", - "[3706 rows x 5 columns]" + "[3730 rows x 5 columns]" ] }, - "execution_count": 12, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -1166,7 +1518,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 18, "metadata": {}, "outputs": [], "source": [ From cb79389c859188dd0a3c54c6ce13ba2e93ad2b2f Mon Sep 17 00:00:00 2001 From: sofia Date: Fri, 13 Oct 2023 11:46:49 +0200 Subject: [PATCH 8/9] new notebook to process all habitats --- data/notebooks/habitats.ipynb | 2126 ++++++++++++++++++++++++++++++++ data/notebooks/seamounts.ipynb | 1000 --------------- 2 files changed, 2126 insertions(+), 1000 deletions(-) create mode 100644 data/notebooks/habitats.ipynb delete mode 100644 data/notebooks/seamounts.ipynb diff --git a/data/notebooks/habitats.ipynb b/data/notebooks/habitats.ipynb new file mode 100644 index 00000000..043a5ad6 --- /dev/null +++ b/data/notebooks/habitats.ipynb @@ -0,0 +1,2126 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Set up" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting openpyxl\n", + " Downloading openpyxl-3.1.2-py2.py3-none-any.whl (249 kB)\n", + "\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m250.0/250.0 kB\u001b[0m \u001b[31m1.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m[36m0:00:01\u001b[0m[36m0:00:01\u001b[0m:01\u001b[0m\n", + "\u001b[?25hCollecting et-xmlfile (from openpyxl)\n", + " Downloading et_xmlfile-1.1.0-py3-none-any.whl (4.7 kB)\n", + "Installing collected packages: et-xmlfile, openpyxl\n", + "Successfully installed et-xmlfile-1.1.0 openpyxl-3.1.2\n" + ] + } + ], + "source": [ + "!pip install openpyxl" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "import geopandas as gpd\n", + "import pandas as pd\n", + "import openpyxl" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "path_in = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/raw/\"\n", + "path_out = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/processed/\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Process habitats from [Ocean+](https://habitats.oceanplus.org/) (except mangroves)" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [], + "source": [ + "cold = pd.read_csv(path_in + \"Ocean+HabitatsDownload_Global/coldwatercorals.csv\")\n", + "salt = pd.read_csv(path_in + \"Ocean+HabitatsDownload_Global/saltmarshes.csv\")\n", + "sea = pd.read_csv(path_in + \"Ocean+HabitatsDownload_Global/seagrasses.csv\")\n", + "warm = pd.read_csv(path_in + \"Ocean+HabitatsDownload_Global/warmwatercorals.csv\")\n", + "glob = pd.read_excel(path_in + \"Ocean+HabitatsDownload_Global/global-stats.xlsx\")" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [], + "source": [ + "cold2 = cold[['ISO3', 'protected_area', 'total_area']].rename(columns={'ISO3': 'location_id'})\n", + "salt2 = salt[['ISO3', 'protected_area', 'total_area']].rename(columns={'ISO3': 'location_id'})\n", + "sea2 = sea[['ISO3', 'protected_area', 'total_area']].rename(columns={'ISO3': 'location_id'})\n", + "warm2 = warm[['ISO3', 'protected_area', 'total_area']].rename(columns={'ISO3': 'location_id'})" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [], + "source": [ + "# Remove rows with '-' in 'protected_area' or 'total_area'\n", + "cold2 = cold2[~cold2['protected_area'].str.contains('-') & ~cold2['total_area'].str.contains('-')]\n", + "salt2 = salt2[~salt2['protected_area'].str.contains('-') & ~salt2['total_area'].str.contains('-')]\n", + "sea2 = sea2[~sea2['protected_area'].str.contains('-') & ~sea2['total_area'].str.contains('-')]\n", + "warm2 = warm2[~warm2['protected_area'].str.contains('-') & ~warm2['total_area'].str.contains('-')]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [], + "source": [ + "# Bring the wdpa file to get the iso3 and parent_iso equivalences\n", + "wdpa = gpd.read_file(path_out + \"wdpa/merged_mpa.shp\")\n", + "\n", + "# Filter out rows with multiple values in either 'ISO3' or 'PARENT_ISO'\n", + "wdpa = wdpa[~wdpa['ISO3'].str.contains(';') & ~wdpa['PARENT_ISO'].str.contains(';')]\n", + "\n", + "# Extract unique ISO3-PARENT_ISO pairs\n", + "unique_pairs = wdpa[['ISO3', 'PARENT_ISO']].drop_duplicates()" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [], + "source": [ + "# Create a mapping dictionary for ISO3-PARENT_ISO pairs and modify the 'location_id' column in the habitats dataframes\n", + "mapping_dict = dict(zip(unique_pairs['ISO3'], unique_pairs['PARENT_ISO']))\n", + "cold2['location_id'] = cold2['location_id'].map(mapping_dict)\n", + "salt2['location_id'] = salt2['location_id'].map(mapping_dict)\n", + "sea2['location_id'] = sea2['location_id'].map(mapping_dict)\n", + "warm2['location_id'] = warm2['location_id'].map(mapping_dict)" + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "metadata": {}, + "outputs": [], + "source": [ + "# Group by 'location_id' and calculate the sum of 'protected_area' and 'total_area'\n", + "cold2_grouped = cold2.groupby('location_id').sum().reset_index()\n", + "salt2_grouped = salt2.groupby('location_id').sum().reset_index()\n", + "sea2_grouped = sea2.groupby('location_id').sum().reset_index()\n", + "warm2_grouped = warm2.groupby('location_id').sum().reset_index()" + ] + }, + { + "cell_type": "code", + "execution_count": 91, + "metadata": {}, + "outputs": [], + "source": [ + "# Add the 'habitat_name' column\n", + "cold2_grouped['habitat_name'] = 'cold-water corals'\n", + "salt2_grouped['habitat_name'] = 'saltmarshes'\n", + "sea2_grouped['habitat_name'] = 'seagrasses'\n", + "warm2_grouped['habitat_name'] = 'warm-water corals'" + ] + }, + { + "cell_type": "code", + "execution_count": 198, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
location_idprotected_areatotal_areahabitat_nameyear
0ABNJ421.6293731874.982214cold-water corals2023
1AGO0.0000003.395671cold-water corals2023
\n", + "
" + ], + "text/plain": [ + " location_id protected_area total_area habitat_name year\n", + "0 ABNJ 421.629373 1874.982214 cold-water corals 2023\n", + "1 AGO 0.000000 3.395671 cold-water corals 2023" + ] + }, + "execution_count": 198, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Concatenate the dataframes\n", + "habitats = pd.concat([cold2_grouped, salt2_grouped, sea2_grouped, warm2_grouped])\n", + "habitats['year'] = 2023\n", + "habitats.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 199, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
habitat_nameprotected_areatotal_arealocation_idyear
0cold-water corals4214.89720315057.016684GLOB2023
1saltmarshes111540.045205217798.398466GLOB2023
2seagrasses71543.667168295004.516919GLOB2023
3warm-water corals62074.768550147100.573092GLOB2023
\n", + "
" + ], + "text/plain": [ + " habitat_name protected_area total_area location_id year\n", + "0 cold-water corals 4214.897203 15057.016684 GLOB 2023\n", + "1 saltmarshes 111540.045205 217798.398466 GLOB 2023\n", + "2 seagrasses 71543.667168 295004.516919 GLOB 2023\n", + "3 warm-water corals 62074.768550 147100.573092 GLOB 2023" + ] + }, + "execution_count": 199, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Calculate global stats for habitats\n", + "habitats_global = habitats.groupby(['habitat_name']).agg({'protected_area': 'sum', 'total_area': 'sum'}).reset_index()\n", + "habitats_global['location_id'] = 'GLOB'\n", + "habitats_global['year'] = 2023\n", + "habitats_global" + ] + }, + { + "cell_type": "code", + "execution_count": 200, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
location_idprotected_areatotal_areahabitat_nameyear
0ABNJ421.6293731874.982214cold-water corals2023
1AGO0.0000003.395671cold-water corals2023
2ALB0.0000005.986479cold-water corals2023
3ARG6.98422661.826344cold-water corals2023
4ATG0.0000000.997747cold-water corals2023
..................
81ZAF1.3988131.398813warm-water corals2023
0GLOB4214.89720315057.016684cold-water corals2023
1GLOB111540.045205217798.398466saltmarshes2023
2GLOB71543.667168295004.516919seagrasses2023
3GLOB62074.768550147100.573092warm-water corals2023
\n", + "

374 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " location_id protected_area total_area habitat_name year\n", + "0 ABNJ 421.629373 1874.982214 cold-water corals 2023\n", + "1 AGO 0.000000 3.395671 cold-water corals 2023\n", + "2 ALB 0.000000 5.986479 cold-water corals 2023\n", + "3 ARG 6.984226 61.826344 cold-water corals 2023\n", + "4 ATG 0.000000 0.997747 cold-water corals 2023\n", + ".. ... ... ... ... ...\n", + "81 ZAF 1.398813 1.398813 warm-water corals 2023\n", + "0 GLOB 4214.897203 15057.016684 cold-water corals 2023\n", + "1 GLOB 111540.045205 217798.398466 saltmarshes 2023\n", + "2 GLOB 71543.667168 295004.516919 seagrasses 2023\n", + "3 GLOB 62074.768550 147100.573092 warm-water corals 2023\n", + "\n", + "[374 rows x 5 columns]" + ] + }, + "execution_count": 200, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Concatenate the global stats to the habitats dataframe\n", + "habitats = pd.concat([habitats, habitats_global])\n", + "habitats" + ] + }, + { + "cell_type": "code", + "execution_count": 201, + "metadata": {}, + "outputs": [], + "source": [ + "# List of dictionaries for data in Region_ISO3_PP.txt (list of regions used in the Protected Planet database)\n", + "regions_data = [\n", + " {\n", + " 'region_iso': 'AS',\n", + " 'region_name': 'Asia & Pacific',\n", + " 'country_iso_3s': [\n", + " \"AFG\", \"ASM\", \"AUS\", \"BGD\", \"BRN\", \"BTN\", \"CCK\", \"CHN\", \"COK\", \"CXR\", \"FJI\", \"FSM\", \"GUM\", \"HKG\", \"IDN\",\n", + " \"IND\", \"IOT\", \"IRN\", \"JPN\", \"KHM\", \"KIR\", \"KOR\", \"LAO\", \"LKA\", \"MAC\", \"MDV\", \"MHL\", \"MMR\", \"MNG\", \"MNP\",\n", + " \"MYS\", \"NCL\", \"NFK\", \"NIU\", \"NPL\", \"NRU\", \"NZL\", \"PAK\", \"PCN\", \"PHL\", \"PLW\", \"PNG\", \"PRK\", \"PYF\", \"SGP\",\n", + " \"SLB\", \"THA\", \"TKL\", \"TLS\", \"TON\", \"TUV\", \"TWN\", \"VNM\", \"VUT\", \"WLF\", \"WSM\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'AF',\n", + " 'region_name': 'Africa',\n", + " 'country_iso_3s': [\n", + " \"AGO\", \"BDI\", \"BEN\", \"BFA\", \"BWA\", \"CAF\", \"CIV\", \"CMR\", \"COD\", \"COG\", \"COM\", \"CPV\", \"DJI\", \"DZA\", \"EGY\",\n", + " \"ERI\", \"ESH\", \"ETH\", \"GAB\", \"GHA\", \"GIN\", \"GMB\", \"GNB\", \"GNQ\", \"KEN\", \"LBR\", \"LBY\", \"LSO\", \"MAR\", \"MDG\",\n", + " \"MLI\", \"MOZ\", \"MRT\", \"MUS\", \"MWI\", \"MYT\", \"NAM\", \"NER\", \"NGA\", \"REU\", \"RWA\", \"SDN\", \"SEN\", \"SHN\", \"SLE\",\n", + " \"SOM\", \"SSD\", \"STP\", \"SWZ\", \"SYC\", \"TCD\", \"TGO\", \"TUN\", \"TZA\", \"UGA\", \"ZAF\", \"ZMB\", \"ZWE\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'EU',\n", + " 'region_name': 'Europe',\n", + " 'country_iso_3s': [\n", + " \"ALA\", \"ALB\", \"AND\", \"ARM\", \"AUT\", \"AZE\", \"BEL\", \"BGR\", \"BIH\", \"BLR\", \"CHE\", \"CYP\", \"CZE\", \"DEU\", \"DNK\",\n", + " \"ESP\", \"EST\", \"FIN\", \"FRA\", \"FRO\", \"GBR\", \"GEO\", \"GGY\", \"GIB\", \"GRC\", \"HRV\", \"HUN\", \"IMN\", \"IRL\", \"ISL\",\n", + " \"ISR\", \"ITA\", \"JEY\", \"KAZ\", \"KGZ\", \"LIE\", \"LTU\", \"LUX\", \"LVA\", \"MCO\", \"MDA\", \"MKD\", \"MLT\", \"MNE\", \"NLD\",\n", + " \"NOR\", \"POL\", \"PRT\", \"ROU\", \"RUS\", \"SJM\", \"SMR\", \"SRB\", \"SVK\", \"SVN\", \"SWE\", \"TJK\", \"TKM\", \"TUR\", \"UKR\",\n", + " \"UZB\", \"VAT\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'SA',\n", + " 'region_name': 'Latin America & Caribbean',\n", + " 'country_iso_3s': [\n", + " \"ABW\", \"AIA\", \"ARG\", \"ATG\", \"BES\", \"BHS\", \"BLM\", \"BLZ\", \"BMU\", \"BOL\", \"BRA\", \"BRB\", \"CHL\", \"COL\", \"CRI\",\n", + " \"CUB\", \"CUW\", \"CYM\", \"DMA\", \"DOM\", \"ECU\", \"FLK\", \"GLP\", \"GRD\", \"GTM\", \"GUF\", \"GUY\", \"HND\", \"HTI\", \"JAM\",\n", + " \"KNA\", \"LCA\", \"MAF\", \"MEX\", \"MSR\", \"MTQ\", \"NIC\", \"PAN\", \"PER\", \"PRI\", \"PRY\", \"SLV\", \"SUR\", \"SXM\", \"TCA\",\n", + " \"TTO\", \"UMI\", \"URY\", \"VCT\", \"VEN\", \"VGB\", \"VIR\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'PO',\n", + " 'region_name': 'Polar',\n", + " 'country_iso_3s': [\n", + " \"ATF\", \"BVT\", \"GRL\", \"HMD\", \"SGS\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'NA',\n", + " 'region_name': 'North America',\n", + " 'country_iso_3s': [\n", + " \"CAN\", \"SPM\", \"USA\"\n", + " ]\n", + " },\n", + " \n", + " {\n", + " 'region_iso': 'WA',\n", + " 'region_name': 'West Asia',\n", + " 'country_iso_3s': [\n", + " \"ARE\", \"BHR\", \"IRQ\", \"JOR\", \"KWT\", \"LBN\", \"OMN\", \"PSE\", \"QAT\", \"SAU\", \"SYR\", \"YEM\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'AT', # this region is not in the Protected Planet database\n", + " 'region_name': 'Antartica',\n", + " 'country_iso_3s': [\n", + " \"ATA\"\n", + " ]\n", + " }\n", + "]\n", + "\n", + "# Convert the region data to a dictionary that maps each country to its region name\n", + "country_to_region = {}\n", + "for region in regions_data:\n", + " for country in region['country_iso_3s']:\n", + " country_to_region[country] = region['region_iso']" + ] + }, + { + "cell_type": "code", + "execution_count": 202, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
location_idhabitat_nameprotected_areatotal_areayear
0AFcold-water corals37.761626381.9932342023
1AFsaltmarshes6688.70287919845.9150002023
2AFseagrasses6319.09949161939.4849042023
3AFwarm-water corals6591.34008315216.3939472023
4AScold-water corals433.7771001733.4484522023
5ASsaltmarshes11965.69391044696.3651492023
6ASseagrasses29085.739962123207.6283442023
7ASwarm-water corals41327.715018100106.4659482023
8EUcold-water corals2657.6458747253.0542712023
9EUsaltmarshes11397.00359818425.7284612023
10EUseagrasses9767.76058116539.5896322023
11EUwarm-water corals4357.9310189459.6238252023
12NAcold-water corals429.3518592384.3164842023
13NAsaltmarshes57209.60317687048.1644942023
14NAseagrasses8800.52079415860.8997572023
15NAwarm-water corals3652.4866284545.0577452023
16SAcold-water corals234.7313701416.2513232023
17SAsaltmarshes22969.81590635983.3927442023
18SAseagrasses16517.09766745847.4594122023
19SAwarm-water corals5597.36684512869.8012312023
20WAcold-water corals0.00000012.9707052023
21WAsaltmarshes1309.22573611798.8326192023
22WAseagrasses1053.44867325273.7274312023
23WAwarm-water corals547.9289574903.2303952023
\n", + "
" + ], + "text/plain": [ + " location_id habitat_name protected_area total_area year\n", + "0 AF cold-water corals 37.761626 381.993234 2023\n", + "1 AF saltmarshes 6688.702879 19845.915000 2023\n", + "2 AF seagrasses 6319.099491 61939.484904 2023\n", + "3 AF warm-water corals 6591.340083 15216.393947 2023\n", + "4 AS cold-water corals 433.777100 1733.448452 2023\n", + "5 AS saltmarshes 11965.693910 44696.365149 2023\n", + "6 AS seagrasses 29085.739962 123207.628344 2023\n", + "7 AS warm-water corals 41327.715018 100106.465948 2023\n", + "8 EU cold-water corals 2657.645874 7253.054271 2023\n", + "9 EU saltmarshes 11397.003598 18425.728461 2023\n", + "10 EU seagrasses 9767.760581 16539.589632 2023\n", + "11 EU warm-water corals 4357.931018 9459.623825 2023\n", + "12 NA cold-water corals 429.351859 2384.316484 2023\n", + "13 NA saltmarshes 57209.603176 87048.164494 2023\n", + "14 NA seagrasses 8800.520794 15860.899757 2023\n", + "15 NA warm-water corals 3652.486628 4545.057745 2023\n", + "16 SA cold-water corals 234.731370 1416.251323 2023\n", + "17 SA saltmarshes 22969.815906 35983.392744 2023\n", + "18 SA seagrasses 16517.097667 45847.459412 2023\n", + "19 SA warm-water corals 5597.366845 12869.801231 2023\n", + "20 WA cold-water corals 0.000000 12.970705 2023\n", + "21 WA saltmarshes 1309.225736 11798.832619 2023\n", + "22 WA seagrasses 1053.448673 25273.727431 2023\n", + "23 WA warm-water corals 547.928957 4903.230395 2023" + ] + }, + "execution_count": 202, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "habitats_regions = habitats.copy()\n", + "habitats_regions['region'] = habitats['location_id'].map(country_to_region)\n", + "\n", + "# Calculate stats for each region\n", + "habitats_regions = habitats_regions.groupby(['region', 'habitat_name']).agg({'protected_area': 'sum', 'total_area': 'sum'}).reset_index()\n", + "habitats_regions['year'] = 2023\n", + "habitats_regions.rename(columns={'region': 'location_id'}, inplace=True)\n", + "habitats_regions\n" + ] + }, + { + "cell_type": "code", + "execution_count": 204, + "metadata": {}, + "outputs": [], + "source": [ + "# Concatenate region statistics to the habitats dataframe\n", + "habitats = pd.concat([habitats, habitats_regions])" + ] + }, + { + "cell_type": "code", + "execution_count": 205, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['ABNJ', 'AGO', 'ALB', 'ARG', 'ATG', 'AUS', 'BHS', 'BLZ', 'BRA',\n", + " 'BRB', 'CAN', 'CHL', 'CHN', 'CIV', 'COK', 'COL', 'CPV', 'CRI',\n", + " 'CUB', 'CYP', 'DMA', 'DNK', 'DOM', 'DZA', 'ECU', 'ERI', 'ESP',\n", + " 'FJI', 'FRA', 'FSM', 'GBR', 'GHA', 'GIN', 'GNB', 'GNQ', 'GRC',\n", + " 'GRD', 'GTM', 'GUY', 'HND', 'HRV', 'HTI', 'IDN', 'IND', 'IRL',\n", + " 'ISL', 'ITA', 'JAM', 'JPN', 'KEN', 'KIR', 'KNA', 'LBR', 'LCA',\n", + " 'LKA', 'MAR', 'MDG', 'MEX', 'MHL', 'MLT', 'MMR', 'MNE', 'MOZ',\n", + " 'MRT', 'MUS', 'MYS', 'NAM', 'NGA', 'NIC', 'NLD', 'NOR', 'NZL',\n", + " 'OMN', 'PAN', 'PER', 'PHL', 'PLW', 'PNG', 'PRT', 'RUS', 'SAU',\n", + " 'SEN', 'SHN', 'SJM', 'STP', 'SUR', 'SWE', 'SYC', 'THA', 'TLS',\n", + " 'TON', 'TTO', 'TUN', 'TUV', 'UMI', 'URY', 'USA', 'VCT', 'VEN',\n", + " 'VNM', 'VUT', 'WSM', 'YEM', 'ZAF', 'ARE', 'AZE', 'BEL', 'BGR',\n", + " 'BHR', 'DEU', 'DJI', 'EGY', 'EST', 'FIN', 'GEO', 'GMB', 'IRN',\n", + " 'KHM', 'KOR', 'KWT', 'LBY', 'LTU', 'LVA', 'PAK', 'POL', 'QAT',\n", + " 'ROU', 'SDN', 'SVN', 'TUR', 'TZA', 'UKR', 'COM', 'ISR', 'JOR',\n", + " 'MCO', 'MDV', 'SGP', 'SLB', 'SLE', 'BGD', 'BRN', 'NIU', 'GLOB',\n", + " 'AF', 'AS', 'EU', 'NA', 'SA', 'WA'], dtype=object)" + ] + }, + "execution_count": 205, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "habitats['location_id'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 206, + "metadata": {}, + "outputs": [], + "source": [ + "habitats.to_csv(path_out + \"habitats/ocean+.csv\", index=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Process seamounts from [UN WCMC](https://data.unep-wcmc.org/datasets/41)" + ] + }, + { + "cell_type": "code", + "execution_count": 213, + "metadata": {}, + "outputs": [], + "source": [ + "# Read required data\n", + "seamounts = gpd.read_file(path_in + \"Seamounts/DownloadPack-14_001_ZSL002_ModelledSeamounts2011_v1_01_Data_Seamounts_Seamounts.shp\")\n", + "eez = gpd.read_file(path_out + \"/administrative/eez_area_mollweide.shp\")\n", + "hs = gpd.read_file(path_in + \"/high_seas/high_seas.shp\")\n", + "protected_areas = gpd.read_file(path_out + \"wdpa/timeseries/protected_dissolved_2023.shp\").to_crs(\"EPSG:4326\")" + ] + }, + { + "cell_type": "code", + "execution_count": 214, + "metadata": {}, + "outputs": [], + "source": [ + "# Keep relevant fields in eez and hs and merge then in one dataframe\n", + "eez = eez[['SOVEREIGN1', 'SOVEREIGN2', 'SOVEREIGN3','ISO_SOV1', 'ISO_SOV2', 'ISO_SOV3', 'geometry']]\n", + "hs = hs[['geometry']]\n", + "hs['SOVEREIGN1'] = 'High Seas'\n", + "hs['ISO_SOV1'] = 'ABNJ'\n", + "eez_hs = eez.merge(hs, how='outer')" + ] + }, + { + "cell_type": "code", + "execution_count": 215, + "metadata": {}, + "outputs": [], + "source": [ + "# Join eez info to seamounts falling within eez polygons\n", + "seamounts_eez = gpd.sjoin(seamounts, eez_hs, how=\"left\", predicate=\"within\")" + ] + }, + { + "cell_type": "code", + "execution_count": 216, + "metadata": {}, + "outputs": [], + "source": [ + "# Drop those not associated with an eez or hs\n", + "seamounts_eez = seamounts_eez.dropna(subset=['ISO_SOV1'])" + ] + }, + { + "cell_type": "code", + "execution_count": 217, + "metadata": {}, + "outputs": [], + "source": [ + "# Create new column \"iso\" with the iso_sov codes\n", + "def concatenate_iso(row):\n", + " iso_list = [row['ISO_SOV1']]\n", + " if not pd.isna(row['ISO_SOV2']):\n", + " iso_list.append(row['ISO_SOV2'])\n", + " if not pd.isna(row['ISO_SOV3']):\n", + " iso_list.append(row['ISO_SOV3'])\n", + " return ';'.join(iso_list)\n", + "\n", + "seamounts_eez['iso'] = seamounts_eez.apply(concatenate_iso, axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 218, + "metadata": {}, + "outputs": [], + "source": [ + "# Split the 'iso_code' values and create separate rows only for rows with multiple values\n", + "mask = seamounts_eez['iso'].str.contains(';', na=False)\n", + "split_rows = seamounts_eez[mask].copy()\n", + "split_rows['iso'] = split_rows['iso'].str.split(';')\n", + "split_rows = split_rows.explode('iso')\n", + "\n", + "# Keep rows with single values in 'iso_code'\n", + "single_value_rows = seamounts_eez[~mask]\n", + "\n", + "# Concatenate the exploded rows with the single value rows\n", + "seamounts_eez_new = pd.concat([single_value_rows, split_rows], ignore_index=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 219, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
location_idtotal_areahabitat_nameyear
0ABNJ1.483098e+07seamounts2011
1AGO9.556242e+03seamounts2011
2ARG3.110730e+05seamounts2011
3ATA3.551629e+05seamounts2011
4ATG6.215895e+03seamounts2011
...............
88VNM4.421338e+04seamounts2011
89VUT1.199475e+05seamounts2011
90WSM4.117997e+04seamounts2011
91YEM6.294974e+04seamounts2011
92ZAF9.946306e+04seamounts2011
\n", + "

93 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " location_id total_area habitat_name year\n", + "0 ABNJ 1.483098e+07 seamounts 2011\n", + "1 AGO 9.556242e+03 seamounts 2011\n", + "2 ARG 3.110730e+05 seamounts 2011\n", + "3 ATA 3.551629e+05 seamounts 2011\n", + "4 ATG 6.215895e+03 seamounts 2011\n", + ".. ... ... ... ...\n", + "88 VNM 4.421338e+04 seamounts 2011\n", + "89 VUT 1.199475e+05 seamounts 2011\n", + "90 WSM 4.117997e+04 seamounts 2011\n", + "91 YEM 6.294974e+04 seamounts 2011\n", + "92 ZAF 9.946306e+04 seamounts 2011\n", + "\n", + "[93 rows x 4 columns]" + ] + }, + "execution_count": 219, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Get area of seamounts per iso\n", + "seamounts_iso = seamounts_eez_new.groupby(['iso']).agg({'AREA2D': 'sum'}).reset_index()\n", + "seamounts_iso = seamounts_iso.rename(columns={'AREA2D': 'total_area', 'iso': 'location_id'})\n", + "seamounts_iso['habitat_name'] = 'seamounts'\n", + "seamounts_iso['year'] = 2011\n", + "seamounts_iso " + ] + }, + { + "cell_type": "code", + "execution_count": 220, + "metadata": {}, + "outputs": [], + "source": [ + "# Join protection info to seamounts\n", + "seamounts_wdpa = gpd.sjoin(seamounts, protected_areas, how=\"left\", predicate=\"within\")\n", + "seamounts_wdpa['protection'] = \"no\" \n", + "seamounts_wdpa.loc[~seamounts_wdpa['index_right'].isna(), 'protection'] = \"yes\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": 221, + "metadata": {}, + "outputs": [], + "source": [ + "# Remove rows in which protection is \"no\"\n", + "seamounts_wdpa = seamounts_wdpa[seamounts_wdpa['protection'] != \"no\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 222, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['CAN', 'ABNJ', 'FRA', 'JPN', 'USA', 'PRT', 'ESP', 'BHS', 'MEX',\n", + " 'DOM', 'HND', 'NLD', 'PHL', 'VEN', 'MHL', 'YEM', 'COL', 'PLW',\n", + " 'CRI', 'PAN', 'BRA', 'ECU', 'GNQ', 'KIR', 'GBR', 'IDN', 'SYC',\n", + " 'COK', 'AUS', 'COM', 'FJI', 'NIU', 'CHL', 'NZL', 'ZAF', 'ARG',\n", + " 'ITA', 'GRC', 'CUB', 'TUV', 'PER', 'SHN', 'NOR', 'MCO'],\n", + " dtype=object)" + ] + }, + "execution_count": 222, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Split the 'iso_code' values and create separate rows only for rows with multiple values\n", + "mask = seamounts_wdpa['PARENT_ISO'].str.contains(';', na=False)\n", + "split_rows = seamounts_wdpa[mask].copy()\n", + "split_rows['PARENT_ISO'] = split_rows['PARENT_ISO'].str.split(';')\n", + "split_rows = split_rows.explode('PARENT_ISO')\n", + "\n", + "# Keep rows with single values in 'iso_code'\n", + "single_value_rows = seamounts_wdpa[~mask]\n", + "\n", + "# Concatenate the exploded rows with the single value rows\n", + "seamounts_wdpa_new = pd.concat([single_value_rows, split_rows], ignore_index=True)\n", + "seamounts_wdpa_new['PARENT_ISO'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 223, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
location_idprotected_area
0ABNJ226253.932283
1ARG38773.659962
\n", + "
" + ], + "text/plain": [ + " location_id protected_area\n", + "0 ABNJ 226253.932283\n", + "1 ARG 38773.659962" + ] + }, + "execution_count": 223, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "seamounts_protected = seamounts_wdpa_new.groupby(['PARENT_ISO']).agg({'AREA2D': 'sum'}).reset_index()\n", + "seamounts_protected = seamounts_protected.rename(columns={'AREA2D': 'protected_area', 'PARENT_ISO': 'location_id'})\n", + "seamounts_protected.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 224, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
location_idtotal_areahabitat_nameyearprotected_area
0ABNJ1.483098e+07seamounts2011226253.932283
1AGO9.556242e+03seamounts2011NaN
2ARG3.110730e+05seamounts201138773.659962
3ATA3.551629e+05seamounts2011NaN
4ATG6.215895e+03seamounts2011NaN
\n", + "
" + ], + "text/plain": [ + " location_id total_area habitat_name year protected_area\n", + "0 ABNJ 1.483098e+07 seamounts 2011 226253.932283\n", + "1 AGO 9.556242e+03 seamounts 2011 NaN\n", + "2 ARG 3.110730e+05 seamounts 2011 38773.659962\n", + "3 ATA 3.551629e+05 seamounts 2011 NaN\n", + "4 ATG 6.215895e+03 seamounts 2011 NaN" + ] + }, + "execution_count": 224, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# join 'protected area' field in seamounts_protected to seamounts_iso based on location_id\n", + "seamounts_iso2 = seamounts_iso.merge(seamounts_protected, left_on='location_id', right_on='location_id', how='left')\n", + "seamounts_iso2.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 225, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
location_idtotal_areahabitat_nameyearprotected_area
0ABNJ1.483098e+07seamounts2011226253.932283
1AGO9.556242e+03seamounts20110.000000
2ARG3.110730e+05seamounts201138773.659962
3ATA3.551629e+05seamounts20110.000000
4ATG6.215895e+03seamounts20110.000000
\n", + "
" + ], + "text/plain": [ + " location_id total_area habitat_name year protected_area\n", + "0 ABNJ 1.483098e+07 seamounts 2011 226253.932283\n", + "1 AGO 9.556242e+03 seamounts 2011 0.000000\n", + "2 ARG 3.110730e+05 seamounts 2011 38773.659962\n", + "3 ATA 3.551629e+05 seamounts 2011 0.000000\n", + "4 ATG 6.215895e+03 seamounts 2011 0.000000" + ] + }, + "execution_count": 225, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# set to 0 the protected_area values that are NaN\n", + "seamounts_iso2['protected_area'] = seamounts_iso2['protected_area'].fillna(0)\n", + "seamounts_iso2.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 226, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
habitat_nameprotected_areatotal_arealocation_idyear
0seamounts3.186900e+062.729113e+07GLOB2011
\n", + "
" + ], + "text/plain": [ + " habitat_name protected_area total_area location_id year\n", + "0 seamounts 3.186900e+06 2.729113e+07 GLOB 2011" + ] + }, + "execution_count": 226, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Calculate global stats for seamounts\n", + "seamounts_global = seamounts_iso2.groupby(['habitat_name']).agg({'protected_area': 'sum', 'total_area': 'sum'}).reset_index()\n", + "seamounts_global['location_id'] = 'GLOB'\n", + "seamounts_global['year'] = 2011\n", + "seamounts_global" + ] + }, + { + "cell_type": "code", + "execution_count": 227, + "metadata": {}, + "outputs": [], + "source": [ + "# Concatenate the global stats to the seamounts dataframe\n", + "seamounts_iso2 = pd.concat([seamounts_iso2, seamounts_global])" + ] + }, + { + "cell_type": "code", + "execution_count": 228, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
location_idhabitat_nameprotected_areatotal_areayear
0AFseamounts94385.1789586.162351e+052011
1ASseamounts832497.7839375.433433e+062011
2ATseamounts0.0000003.551629e+052011
3EUseamounts894514.9102552.641119e+062011
4NAseamounts555588.2107251.664794e+062011
5SAseamounts581172.1543891.655552e+062011
6WAseamounts2487.4280509.384765e+042011
\n", + "
" + ], + "text/plain": [ + " location_id habitat_name protected_area total_area year\n", + "0 AF seamounts 94385.178958 6.162351e+05 2011\n", + "1 AS seamounts 832497.783937 5.433433e+06 2011\n", + "2 AT seamounts 0.000000 3.551629e+05 2011\n", + "3 EU seamounts 894514.910255 2.641119e+06 2011\n", + "4 NA seamounts 555588.210725 1.664794e+06 2011\n", + "5 SA seamounts 581172.154389 1.655552e+06 2011\n", + "6 WA seamounts 2487.428050 9.384765e+04 2011" + ] + }, + "execution_count": 228, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "seamounts_regions = seamounts_iso2.copy()\n", + "seamounts_regions['region'] = seamounts_regions['location_id'].map(country_to_region)\n", + "\n", + "# Calculate stats for each region\n", + "seamounts_regions = seamounts_regions.groupby(['region', 'habitat_name']).agg({'protected_area': 'sum', 'total_area': 'sum'}).reset_index()\n", + "seamounts_regions['year'] = 2011\n", + "seamounts_regions.rename(columns={'region': 'location_id'}, inplace=True)\n", + "seamounts_regions" + ] + }, + { + "cell_type": "code", + "execution_count": 229, + "metadata": {}, + "outputs": [], + "source": [ + "# Concatenate region stats to seamounts_iso2\n", + "seamounts_iso2 = pd.concat([seamounts_iso2, seamounts_regions])" + ] + }, + { + "cell_type": "code", + "execution_count": 230, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['ABNJ', 'AGO', 'ARG', 'ATA', 'ATG', 'AUS', 'BHS', 'BLZ', 'BRA',\n", + " 'BRB', 'BRN', 'CAN', 'CHL', 'CHN', 'CIV', 'COL', 'COM', 'CPV',\n", + " 'CRI', 'CUB', 'DMA', 'DNK', 'DOM', 'ECU', 'ESH', 'ESP', 'FJI',\n", + " 'FRA', 'FSM', 'GBR', 'GHA', 'GIN', 'GNB', 'GNQ', 'GRC', 'HND',\n", + " 'HTI', 'IDN', 'IND', 'ISL', 'ITA', 'JAM', 'JPN', 'KIR', 'KOR',\n", + " 'LBR', 'LBY', 'LKA', 'MAR', 'MDG', 'MDV', 'MEX', 'MHL', 'MLT',\n", + " 'MMR', 'MOZ', 'MUS', 'MYS', 'NAM', 'NIC', 'NLD', 'NOR', 'NRU',\n", + " 'NZL', 'OMN', 'PAK', 'PAN', 'PER', 'PHL', 'PLW', 'PNG', 'PRK',\n", + " 'PRT', 'RUS', 'SEN', 'SLB', 'SOM', 'STP', 'SYC', 'TON', 'TUR',\n", + " 'TUV', 'TWN', 'TZA', 'URY', 'USA', 'VCT', 'VEN', 'VNM', 'VUT',\n", + " 'WSM', 'YEM', 'ZAF', 'GLOB', 'AF', 'AS', 'AT', 'EU', 'NA', 'SA',\n", + " 'WA'], dtype=object)" + ] + }, + "execution_count": 230, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "seamounts_iso2['location_id'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 231, + "metadata": {}, + "outputs": [], + "source": [ + "seamounts_iso2.to_csv(path_out + \"habitats/seamounts.csv\", index=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Process mangroves from GMW" + ] + }, + { + "cell_type": "code", + "execution_count": 232, + "metadata": {}, + "outputs": [], + "source": [ + "mangroves = pd.read_csv(path_out + \"habitats/mangroves.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 233, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
habitat_nameprotected_areatotal_arealocation_idyear
0mangroves61287.20375147358.990971GLOB2020
\n", + "
" + ], + "text/plain": [ + " habitat_name protected_area total_area location_id year\n", + "0 mangroves 61287.20375 147358.990971 GLOB 2020" + ] + }, + "execution_count": 233, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Calculate global stats for mangroves\n", + "mangroves_global = mangroves.groupby(['habitat_name']).agg({'protected_area': 'sum', 'total_area': 'sum'}).reset_index()\n", + "mangroves_global['location_id'] = 'GLOB'\n", + "mangroves_global['year'] = 2020\n", + "mangroves_global" + ] + }, + { + "cell_type": "code", + "execution_count": 234, + "metadata": {}, + "outputs": [], + "source": [ + "# Concatenate the global stats to the mangroves dataframe\n", + "mangroves = pd.concat([mangroves, mangroves_global])" + ] + }, + { + "cell_type": "code", + "execution_count": 235, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
location_idhabitat_nameprotected_areatotal_areayear
0AFmangroves10006.9700029344.4043992020
1ASmangroves21378.7500074629.1944462020
2NAmangroves2055.400002329.1155052020
3POmangroves6.720006.7230182020
4SAmangroves27811.5337540875.9326662020
5WAmangroves27.83000173.6209382020
\n", + "
" + ], + "text/plain": [ + " location_id habitat_name protected_area total_area year\n", + "0 AF mangroves 10006.97000 29344.404399 2020\n", + "1 AS mangroves 21378.75000 74629.194446 2020\n", + "2 NA mangroves 2055.40000 2329.115505 2020\n", + "3 PO mangroves 6.72000 6.723018 2020\n", + "4 SA mangroves 27811.53375 40875.932666 2020\n", + "5 WA mangroves 27.83000 173.620938 2020" + ] + }, + "execution_count": 235, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mangroves_regions = mangroves.copy()\n", + "mangroves_regions['region'] = mangroves['location_id'].map(country_to_region)\n", + "\n", + "# Calculate stats for each region\n", + "mangroves_regions = mangroves_regions.groupby(['region', 'habitat_name']).agg({'protected_area': 'sum', 'total_area': 'sum'}).reset_index()\n", + "mangroves_regions['year'] = 2020\n", + "mangroves_regions.rename(columns={'region': 'location_id'}, inplace=True)\n", + "mangroves_regions" + ] + }, + { + "cell_type": "code", + "execution_count": 236, + "metadata": {}, + "outputs": [], + "source": [ + "# Concatenate stats for regions with mangroves\n", + "mangroves = pd.concat([mangroves, mangroves_regions])" + ] + }, + { + "cell_type": "code", + "execution_count": 237, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['ZAF', 'YEM', 'WSM', 'WLF', 'VUT', 'VNM', 'VIR', 'VGB', 'VEN',\n", + " 'VCT', 'USA', 'TZA', 'TWN', 'TUV', 'TTO', 'TON', 'TLS', 'THA',\n", + " 'TGO', 'TCA', 'SYC', 'SXM', 'SUR', 'STP', 'SOM', 'SLV', 'SLE',\n", + " 'SLB', 'SGP', 'SEN', 'SDN', 'SAU', 'QAT', 'PYF', 'PRI', 'PNG',\n", + " 'PLW', 'PHL', 'PER', 'PAN', 'PAK', 'OMN', 'NZL', 'NIC', 'NGA',\n", + " 'NCL', 'MYT', 'MYS', 'MUS', 'MTQ', 'MRT', 'MOZ', 'MMR', 'MHL',\n", + " 'MEX', 'MDV', 'MDG', 'MAF', 'LKA', 'LCA', 'LBR', 'KNA', 'KIR',\n", + " 'KHM', 'KEN', 'JPN', 'JAM', 'IRN', 'IND', 'IDN', 'HTI', 'HND',\n", + " 'GUY', 'GUM', 'GUF', 'GTM', 'GRD', 'GNQ', 'GNB', 'GMB', 'GLP',\n", + " 'GIN', 'GHA', 'GAB', 'FSM', 'FJI', 'ERI', 'EGY', 'ECU', 'DOM',\n", + " 'DMA', 'DJI', 'CYM', 'CUW', 'CUB', 'CRI', 'COM', 'COL', 'COK',\n", + " 'COG', 'COD', 'CMR', 'CIV', 'CHN', 'BRN', 'BRB', 'BRA', 'BMU',\n", + " 'BLZ', 'BHS', 'BHR', 'BGD', 'BES', 'BEN', 'AUS', 'ATG', 'ATF',\n", + " 'ASM', 'ARE', 'AIA', 'AGO', 'ABW', 'GLOB', 'AF', 'AS', 'NA', 'PO',\n", + " 'SA', 'WA'], dtype=object)" + ] + }, + "execution_count": 237, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mangroves['location_id'].unique()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Concatenate all habitats" + ] + }, + { + "cell_type": "code", + "execution_count": 238, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
location_idprotected_areatotal_areahabitat_nameyear
0ABNJ421.6293731874.982214cold-water corals2023
1AGO0.0000003.395671cold-water corals2023
2ALB0.0000005.986479cold-water corals2023
3ARG6.98422661.826344cold-water corals2023
4ATG0.0000000.997747cold-water corals2023
..................
1AS21378.75000074629.194446mangroves2020
2NA2055.4000002329.115505mangroves2020
3PO6.7200006.723018mangroves2020
4SA27811.53375040875.932666mangroves2020
5WA27.830000173.620938mangroves2020
\n", + "

628 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " location_id protected_area total_area habitat_name year\n", + "0 ABNJ 421.629373 1874.982214 cold-water corals 2023\n", + "1 AGO 0.000000 3.395671 cold-water corals 2023\n", + "2 ALB 0.000000 5.986479 cold-water corals 2023\n", + "3 ARG 6.984226 61.826344 cold-water corals 2023\n", + "4 ATG 0.000000 0.997747 cold-water corals 2023\n", + ".. ... ... ... ... ...\n", + "1 AS 21378.750000 74629.194446 mangroves 2020\n", + "2 NA 2055.400000 2329.115505 mangroves 2020\n", + "3 PO 6.720000 6.723018 mangroves 2020\n", + "4 SA 27811.533750 40875.932666 mangroves 2020\n", + "5 WA 27.830000 173.620938 mangroves 2020\n", + "\n", + "[628 rows x 5 columns]" + ] + }, + "execution_count": 238, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Concatenate the dataframes\n", + "habitats_all = pd.concat([habitats, seamounts_iso2, mangroves])\n", + "habitats_all" + ] + }, + { + "cell_type": "code", + "execution_count": 239, + "metadata": {}, + "outputs": [], + "source": [ + "habitats_all.to_csv(path_out + \"habitats/habitats.csv\", index=False)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "skytruth", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/data/notebooks/seamounts.ipynb b/data/notebooks/seamounts.ipynb deleted file mode 100644 index d68b7bfc..00000000 --- a/data/notebooks/seamounts.ipynb +++ /dev/null @@ -1,1000 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import geopandas as gpd" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "path_in = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/raw/\"\n", - "path_out = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/processed/\"" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "# Read required data\n", - "seamounts = gpd.read_file(path_in + \"Seamounts/DownloadPack-14_001_ZSL002_ModelledSeamounts2011_v1_01_Data_Seamounts_Seamounts.shp\")\n", - "eez = gpd.read_file(path_out + \"/administrative/eez_area_mollweide.shp\")\n", - "hs = gpd.read_file(path_in + \"/high_seas/high_seas.shp\")\n", - "protected_areas = gpd.read_file(path_out + \"wdpa/timeseries/protected_dissolved_2023.shp\").to_crs(\"EPSG:4326\")" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "# Keep relevant fields in eez and hs and merge then in one dataframe\n", - "eez = eez[['SOVEREIGN1', 'SOVEREIGN2', 'SOVEREIGN3','ISO_SOV1', 'ISO_SOV2', 'ISO_SOV3', 'geometry']]\n", - "hs = hs[['geometry']]\n", - "hs['SOVEREIGN1'] = 'High Seas'\n", - "hs['ISO_SOV1'] = 'ABNJ'\n", - "eez_hs = eez.merge(hs, how='outer')" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "33461" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Join eez info to seamounts falling within eez polygons\n", - "seamounts_eez = gpd.sjoin(seamounts, eez_hs, how=\"left\", predicate=\"within\")\n", - "len(seamounts_eez)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "43" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "seamounts_eez['ISO_SOV1'].isna().sum()" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "# Drop those not associated with an eez or hs\n", - "seamounts_eez = seamounts_eez.dropna(subset=['ISO_SOV1'])" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "# Create new column \"iso\" with the iso_sov codes\n", - "def concatenate_iso(row):\n", - " iso_list = [row['ISO_SOV1']]\n", - " if not pd.isna(row['ISO_SOV2']):\n", - " iso_list.append(row['ISO_SOV2'])\n", - " if not pd.isna(row['ISO_SOV3']):\n", - " iso_list.append(row['ISO_SOV3'])\n", - " return ';'.join(iso_list)\n", - "\n", - "seamounts_eez['iso'] = seamounts_eez.apply(concatenate_iso, axis=1)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "# Split the 'iso_code' values and create separate rows only for rows with multiple values\n", - "mask = seamounts_eez['iso'].str.contains(';', na=False)\n", - "split_rows = seamounts_eez[mask].copy()\n", - "split_rows['iso'] = split_rows['iso'].str.split(';')\n", - "split_rows = split_rows.explode('iso')\n", - "\n", - "# Keep rows with single values in 'iso_code'\n", - "single_value_rows = seamounts_eez[~mask]\n", - "\n", - "# Concatenate the exploded rows with the single value rows\n", - "seamounts_eez_new = pd.concat([single_value_rows, split_rows], ignore_index=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array(['DNK', 'ABNJ', 'RUS', 'NOR', 'CAN', 'USA', 'FRA', 'ESP', 'JPN',\n", - " 'PRT', 'ITA', 'KOR', 'GRC', 'LBY', 'MLT', 'GBR', 'MAR', 'MEX',\n", - " 'BHS', 'CUB', 'DOM', 'OMN', 'PHL', 'HND', 'HTI', 'JAM', 'TWN',\n", - " 'ATG', 'NLD', 'CPV', 'MHL', 'COL', 'SEN', 'VEN', 'DMA', 'VNM',\n", - " 'CHN', 'IND', 'YEM', 'BRB', 'SOM', 'FSM', 'NIC', 'PLW', 'CRI',\n", - " 'MYS', 'BRN', 'KIR', 'IDN', 'PAN', 'MDV', 'BRA', 'ECU', 'LKA',\n", - " 'GNQ', 'PNG', 'NRU', 'MUS', 'PER', 'SYC', 'TUV', 'SLB', 'NZL',\n", - " 'AUS', 'FJI', 'MDG', 'COM', 'MOZ', 'WSM', 'VUT', 'TON', 'CHL',\n", - " 'ZAF', 'ARG', 'ATA', 'ISL', 'PRK', 'TUR', 'PAK', 'BLZ', 'MMR',\n", - " 'VCT', 'GNB', 'GIN', 'CIV', 'GHA', 'LBR', 'STP', 'TZA', 'AGO',\n", - " 'NAM', 'URY', 'ESH'], dtype=object)" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "seamounts_eez_new['iso'].unique()" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
location_idtotal_areahabitat_nameyear
0ABNJ1.483098e+07seamounts2023
1AGO9.556242e+03seamounts2023
2ARG3.110730e+05seamounts2023
3ATA3.551629e+05seamounts2023
4ATG6.215895e+03seamounts2023
...............
88VNM4.421338e+04seamounts2023
89VUT1.199475e+05seamounts2023
90WSM4.117997e+04seamounts2023
91YEM6.294974e+04seamounts2023
92ZAF9.946306e+04seamounts2023
\n", - "

93 rows × 4 columns

\n", - "
" - ], - "text/plain": [ - " location_id total_area habitat_name year\n", - "0 ABNJ 1.483098e+07 seamounts 2023\n", - "1 AGO 9.556242e+03 seamounts 2023\n", - "2 ARG 3.110730e+05 seamounts 2023\n", - "3 ATA 3.551629e+05 seamounts 2023\n", - "4 ATG 6.215895e+03 seamounts 2023\n", - ".. ... ... ... ...\n", - "88 VNM 4.421338e+04 seamounts 2023\n", - "89 VUT 1.199475e+05 seamounts 2023\n", - "90 WSM 4.117997e+04 seamounts 2023\n", - "91 YEM 6.294974e+04 seamounts 2023\n", - "92 ZAF 9.946306e+04 seamounts 2023\n", - "\n", - "[93 rows x 4 columns]" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Get area of seamounts per iso\n", - "seamounts_iso = seamounts_eez_new.groupby(['iso']).agg({'AREA2D': 'sum'}).reset_index()\n", - "seamounts_iso = seamounts_iso.rename(columns={'AREA2D': 'total_area', 'iso': 'location_id'})\n", - "seamounts_iso['habitat_name'] = 'seamounts'\n", - "seamounts_iso['year'] = 2023\n", - "seamounts_iso " - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [], - "source": [ - "# Join protection info to seamounts\n", - "seamounts_wdpa = gpd.sjoin(seamounts, protected_areas, how=\"left\", predicate=\"within\")\n", - "seamounts_wdpa['protection'] = \"no\" \n", - "seamounts_wdpa.loc[~seamounts_wdpa['index_right'].isna(), 'protection'] = \"yes\"\n" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [], - "source": [ - "# Remove rows in which protection is \"no\"\n", - "seamounts_wdpa = seamounts_wdpa[seamounts_wdpa['protection'] != \"no\"]" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array(['CAN', 'ABNJ', 'FRA', 'FRA;ITA;MCO', 'JPN', 'USA', 'PRT', 'ESP',\n", - " 'BHS', 'MEX', 'DOM', 'HND', 'NLD', 'PHL', 'VEN', 'MHL', 'YEM',\n", - " 'COL', 'PLW', 'CRI', 'PAN', 'BRA', 'ECU', 'GNQ', 'KIR', 'GBR',\n", - " 'IDN', 'SYC', 'COK', 'AUS', 'COM', 'FJI', 'NIU', 'CHL', 'NZL',\n", - " 'ZAF', 'ARG', 'ITA', 'GRC', 'CUB', 'TUV', 'PER', 'SHN', 'NOR'],\n", - " dtype=object)" - ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "seamounts_wdpa['PARENT_ISO'].unique()" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array(['CAN', 'ABNJ', 'FRA', 'JPN', 'USA', 'PRT', 'ESP', 'BHS', 'MEX',\n", - " 'DOM', 'HND', 'NLD', 'PHL', 'VEN', 'MHL', 'YEM', 'COL', 'PLW',\n", - " 'CRI', 'PAN', 'BRA', 'ECU', 'GNQ', 'KIR', 'GBR', 'IDN', 'SYC',\n", - " 'COK', 'AUS', 'COM', 'FJI', 'NIU', 'CHL', 'NZL', 'ZAF', 'ARG',\n", - " 'ITA', 'GRC', 'CUB', 'TUV', 'PER', 'SHN', 'NOR', 'MCO'],\n", - " dtype=object)" - ] - }, - "execution_count": 32, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Split the 'iso_code' values and create separate rows only for rows with multiple values\n", - "mask = seamounts_wdpa['PARENT_ISO'].str.contains(';', na=False)\n", - "split_rows = seamounts_wdpa[mask].copy()\n", - "split_rows['PARENT_ISO'] = split_rows['PARENT_ISO'].str.split(';')\n", - "split_rows = split_rows.explode('PARENT_ISO')\n", - "\n", - "# Keep rows with single values in 'iso_code'\n", - "single_value_rows = seamounts_wdpa[~mask]\n", - "\n", - "# Concatenate the exploded rows with the single value rows\n", - "seamounts_wdpa_new = pd.concat([single_value_rows, split_rows], ignore_index=True)\n", - "seamounts_wdpa_new['PARENT_ISO'].unique()" - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
location_idprotected_areahabitat_nameyear
0ABNJ226253.932283seamounts2023
1ARG38773.659962seamounts2023
2AUS250507.827932seamounts2023
3BHS9405.718473seamounts2023
4BRA89687.890132seamounts2023
5CAN66235.357502seamounts2023
6CHL239414.964764seamounts2023
7COK238289.821637seamounts2023
8COL30080.163652seamounts2023
9COM1584.809650seamounts2023
10CRI22592.506989seamounts2023
11CUB569.313304seamounts2023
12DOM14889.791781seamounts2023
13ECU16940.242843seamounts2023
14ESP8432.586895seamounts2023
15FJI1520.596802seamounts2023
16FRA238350.837708seamounts2023
17GBR528491.978612seamounts2023
18GNQ4138.403739seamounts2023
19GRC935.569528seamounts2023
20HND4665.192397seamounts2023
21IDN10445.380128seamounts2023
22ITA3753.574953seamounts2023
23JPN119622.849645seamounts2023
24KIR60231.516081seamounts2023
25MCO2123.222307seamounts2023
26MEX84702.255983seamounts2023
27MHL11824.490191seamounts2023
28NIU14703.179190seamounts2023
29NLD435.628871seamounts2023
30NOR2377.872096seamounts2023
31NZL168593.001600seamounts2023
32PAN24185.333200seamounts2023
33PER2043.936634seamounts2023
34PHL11700.480430seamounts2023
35PLW196940.861775seamounts2023
36PRT111736.861592seamounts2023
37SHN782.626658seamounts2023
38SYC46103.929891seamounts2023
39TUV1110.779352seamounts2023
40USA489352.853224seamounts2023
41VEN3221.184275seamounts2023
42YEM2487.428050seamounts2023
43ZAF42558.035678seamounts2023
\n", - "
" - ], - "text/plain": [ - " location_id protected_area habitat_name year\n", - "0 ABNJ 226253.932283 seamounts 2023\n", - "1 ARG 38773.659962 seamounts 2023\n", - "2 AUS 250507.827932 seamounts 2023\n", - "3 BHS 9405.718473 seamounts 2023\n", - "4 BRA 89687.890132 seamounts 2023\n", - "5 CAN 66235.357502 seamounts 2023\n", - "6 CHL 239414.964764 seamounts 2023\n", - "7 COK 238289.821637 seamounts 2023\n", - "8 COL 30080.163652 seamounts 2023\n", - "9 COM 1584.809650 seamounts 2023\n", - "10 CRI 22592.506989 seamounts 2023\n", - "11 CUB 569.313304 seamounts 2023\n", - "12 DOM 14889.791781 seamounts 2023\n", - "13 ECU 16940.242843 seamounts 2023\n", - "14 ESP 8432.586895 seamounts 2023\n", - "15 FJI 1520.596802 seamounts 2023\n", - "16 FRA 238350.837708 seamounts 2023\n", - "17 GBR 528491.978612 seamounts 2023\n", - "18 GNQ 4138.403739 seamounts 2023\n", - "19 GRC 935.569528 seamounts 2023\n", - "20 HND 4665.192397 seamounts 2023\n", - "21 IDN 10445.380128 seamounts 2023\n", - "22 ITA 3753.574953 seamounts 2023\n", - "23 JPN 119622.849645 seamounts 2023\n", - "24 KIR 60231.516081 seamounts 2023\n", - "25 MCO 2123.222307 seamounts 2023\n", - "26 MEX 84702.255983 seamounts 2023\n", - "27 MHL 11824.490191 seamounts 2023\n", - "28 NIU 14703.179190 seamounts 2023\n", - "29 NLD 435.628871 seamounts 2023\n", - "30 NOR 2377.872096 seamounts 2023\n", - "31 NZL 168593.001600 seamounts 2023\n", - "32 PAN 24185.333200 seamounts 2023\n", - "33 PER 2043.936634 seamounts 2023\n", - "34 PHL 11700.480430 seamounts 2023\n", - "35 PLW 196940.861775 seamounts 2023\n", - "36 PRT 111736.861592 seamounts 2023\n", - "37 SHN 782.626658 seamounts 2023\n", - "38 SYC 46103.929891 seamounts 2023\n", - "39 TUV 1110.779352 seamounts 2023\n", - "40 USA 489352.853224 seamounts 2023\n", - "41 VEN 3221.184275 seamounts 2023\n", - "42 YEM 2487.428050 seamounts 2023\n", - "43 ZAF 42558.035678 seamounts 2023" - ] - }, - "execution_count": 38, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "seamounts_protected = seamounts_wdpa_new.groupby(['PARENT_ISO']).agg({'AREA2D': 'sum'}).reset_index()\n", - "seamounts_protected = seamounts_protected.rename(columns={'AREA2D': 'protected_area', 'PARENT_ISO': 'location_id'})\n", - "seamounts_protected['habitat_name'] = 'seamounts'\n", - "seamounts_protected['year'] = 2023\n", - "seamounts_protected " - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "metadata": {}, - "outputs": [], - "source": [ - "# join 'protected area' field in seamounts_protected to seamounts_iso based on location_id\n", - "seamounts_iso2 = seamounts_iso.merge(seamounts_protected[['location_id', 'protected_area']], left_on='location_id', right_on='location_id', how='left')" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
location_idtotal_areahabitat_nameyearprotected_area
0ABNJ1.483098e+07seamounts2023226253.932283
1AGO9.556242e+03seamounts20230.000000
2ARG3.110730e+05seamounts202338773.659962
3ATA3.551629e+05seamounts20230.000000
4ATG6.215895e+03seamounts20230.000000
..................
88VNM4.421338e+04seamounts20230.000000
89VUT1.199475e+05seamounts20230.000000
90WSM4.117997e+04seamounts20230.000000
91YEM6.294974e+04seamounts20232487.428050
92ZAF9.946306e+04seamounts202342558.035678
\n", - "

93 rows × 5 columns

\n", - "
" - ], - "text/plain": [ - " location_id total_area habitat_name year protected_area\n", - "0 ABNJ 1.483098e+07 seamounts 2023 226253.932283\n", - "1 AGO 9.556242e+03 seamounts 2023 0.000000\n", - "2 ARG 3.110730e+05 seamounts 2023 38773.659962\n", - "3 ATA 3.551629e+05 seamounts 2023 0.000000\n", - "4 ATG 6.215895e+03 seamounts 2023 0.000000\n", - ".. ... ... ... ... ...\n", - "88 VNM 4.421338e+04 seamounts 2023 0.000000\n", - "89 VUT 1.199475e+05 seamounts 2023 0.000000\n", - "90 WSM 4.117997e+04 seamounts 2023 0.000000\n", - "91 YEM 6.294974e+04 seamounts 2023 2487.428050\n", - "92 ZAF 9.946306e+04 seamounts 2023 42558.035678\n", - "\n", - "[93 rows x 5 columns]" - ] - }, - "execution_count": 41, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# set to 0 the protected_area values that are NaN\n", - "seamounts_iso2['protected_area'] = seamounts_iso2['protected_area'].fillna(0)\n", - "seamounts_iso2" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "seamounts_iso2.to_csv(path_out + \"habitat/seamounts.csv\", index=False)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "skytruth", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.4" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} From 578f453848a3559e4539b5e9fcfef580ee73f583 Mon Sep 17 00:00:00 2001 From: sofia Date: Thu, 19 Oct 2023 10:19:12 +0200 Subject: [PATCH 9/9] data cleaning and tables creation --- data/notebooks/MPAtlas_table.ipynb | 717 ------------- data/notebooks/habitats.ipynb | 1013 ++++++++---------- data/notebooks/layers.ipynb | 122 ++- data/notebooks/mpas_table.ipynb | 143 +++ data/notebooks/mpatlas_stats.ipynb | 349 +++++++ data/notebooks/protectedseas.ipynb | 1562 ++++------------------------ data/notebooks/wdpa_coverage.ipynb | 790 +++----------- 7 files changed, 1418 insertions(+), 3278 deletions(-) delete mode 100644 data/notebooks/MPAtlas_table.ipynb create mode 100644 data/notebooks/mpas_table.ipynb create mode 100644 data/notebooks/mpatlas_stats.ipynb diff --git a/data/notebooks/MPAtlas_table.ipynb b/data/notebooks/MPAtlas_table.ipynb deleted file mode 100644 index 40a2613b..00000000 --- a/data/notebooks/MPAtlas_table.ipynb +++ /dev/null @@ -1,717 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Set up" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "import geopandas as gpd\n", - "import pandas as pd" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "path_in = \"/Users/sofia/Documents/Repos/skytruth_30x30/data/raw\"\n", - "path_out = \"/Users/sofia/Documents/Repos/skytruth_30x30/data/processed/mpatlas\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### MPAtlas" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "# Read data\n", - "mpatlas = gpd.read_file(path_in + \"/MPAtlas_largest100.shp\")" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Index(['OBJECTID', 'WDPAID', 'WDPA_PID', 'NAME', 'English_De', 'PARENT_ISO',\n", - " 'ISO3', 'MPA_Marine', 'mpa_id', 'Zone_Marin', 'IUCN_Cat', 'Stage_of_E',\n", - " 'Distant_MP', 'Level_of_P', 'Most_Impac', 'Descrip_Im', 'Vertically',\n", - " 'SHAPE_Leng', 'SHAPE_Area', 'geometry'],\n", - " dtype='object')" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "mpatlas.columns" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array(['Incompatible', 'Highly', 'TBD', 'Fully', 'Lightly', 'Unknown'],\n", - " dtype=object)" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "mpatlas.Level_of_P.unique()" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
OBJECTIDWDPAIDWDPA_PIDNAMEEnglish_DePARENT_ISOISO3MPA_Marinempa_idZone_Marin...Stage_of_EDistant_MPLevel_of_PMost_ImpacDescrip_ImVerticallySHAPE_LengSHAPE_AreageometryP_LEVEL
01.0478053.0478053Hikurangi DeepBenthic Protection AreaNZLNZL54022.1525854022.1...ImplementedNaNIncompatibleMining, FishingBenthic protections only. Deep sea mining allo...X12.3329525.833001POLYGON ((-175.00000 -42.16661, -175.00000 -42...Less Protected / Unknown
12.0555512062.0555512062KermadecBenthic Protection AreaNZLNZL619146.05428458540.5...ImplementedNaNIncompatibleMining, FishingBenthic protections only. Deep sea mining allo...NaN25.62935242.963159POLYGON ((-174.02370 -29.22191, -174.02370 -29...Less Protected / Unknown
\n", - "

2 rows × 21 columns

\n", - "
" - ], - "text/plain": [ - " OBJECTID WDPAID WDPA_PID NAME English_De \\\n", - "0 1.0 478053.0 478053 Hikurangi Deep Benthic Protection Area \n", - "1 2.0 555512062.0 555512062 Kermadec Benthic Protection Area \n", - "\n", - " PARENT_ISO ISO3 MPA_Marine mpa_id Zone_Marin ... Stage_of_E Distant_MP \\\n", - "0 NZL NZL 54022.1 5258 54022.1 ... Implemented NaN \n", - "1 NZL NZL 619146.0 5428 458540.5 ... Implemented NaN \n", - "\n", - " Level_of_P Most_Impac \\\n", - "0 Incompatible Mining, Fishing \n", - "1 Incompatible Mining, Fishing \n", - "\n", - " Descrip_Im Vertically SHAPE_Leng \\\n", - "0 Benthic protections only. Deep sea mining allo... X 12.332952 \n", - "1 Benthic protections only. Deep sea mining allo... NaN 25.629352 \n", - "\n", - " SHAPE_Area geometry \\\n", - "0 5.833001 POLYGON ((-175.00000 -42.16661, -175.00000 -42... \n", - "1 42.963159 POLYGON ((-174.02370 -29.22191, -174.02370 -29... \n", - "\n", - " P_LEVEL \n", - "0 Less Protected / Unknown \n", - "1 Less Protected / Unknown \n", - "\n", - "[2 rows x 21 columns]" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Create new column with protection level reclassified\n", - "def map_protection_level(value):\n", - " if value in [\"Fully\", \"Highly\"]:\n", - " return \"Fully / Highly Protected\"\n", - " else:\n", - " return \"Less Protected / Unknown\"\n", - "\n", - "# Create a new column based on column1\n", - "mpatlas['P_LEVEL'] = mpatlas['Level_of_P'].apply(map_protection_level)\n", - "mpatlas.head(2)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "# List of dictionaries for data in Region_ISO3_PP.txt (list of regions used in the Protected Planet database)\n", - "regions_data = [\n", - " {\n", - " 'region_iso': 'AS',\n", - " 'region_name': 'Asia & Pacific',\n", - " 'country_iso_3s': [\n", - " \"AFG\", \"ASM\", \"AUS\", \"BGD\", \"BRN\", \"BTN\", \"CCK\", \"CHN\", \"COK\", \"CXR\", \"FJI\", \"FSM\", \"GUM\", \"HKG\", \"IDN\",\n", - " \"IND\", \"IOT\", \"IRN\", \"JPN\", \"KHM\", \"KIR\", \"KOR\", \"LAO\", \"LKA\", \"MAC\", \"MDV\", \"MHL\", \"MMR\", \"MNG\", \"MNP\",\n", - " \"MYS\", \"NCL\", \"NFK\", \"NIU\", \"NPL\", \"NRU\", \"NZL\", \"PAK\", \"PCN\", \"PHL\", \"PLW\", \"PNG\", \"PRK\", \"PYF\", \"SGP\",\n", - " \"SLB\", \"THA\", \"TKL\", \"TLS\", \"TON\", \"TUV\", \"TWN\", \"VNM\", \"VUT\", \"WLF\", \"WSM\"\n", - " ]\n", - " },\n", - " {\n", - " 'region_iso': 'AF',\n", - " 'region_name': 'Africa',\n", - " 'country_iso_3s': [\n", - " \"AGO\", \"BDI\", \"BEN\", \"BFA\", \"BWA\", \"CAF\", \"CIV\", \"CMR\", \"COD\", \"COG\", \"COM\", \"CPV\", \"DJI\", \"DZA\", \"EGY\",\n", - " \"ERI\", \"ESH\", \"ETH\", \"GAB\", \"GHA\", \"GIN\", \"GMB\", \"GNB\", \"GNQ\", \"KEN\", \"LBR\", \"LBY\", \"LSO\", \"MAR\", \"MDG\",\n", - " \"MLI\", \"MOZ\", \"MRT\", \"MUS\", \"MWI\", \"MYT\", \"NAM\", \"NER\", \"NGA\", \"REU\", \"RWA\", \"SDN\", \"SEN\", \"SHN\", \"SLE\",\n", - " \"SOM\", \"SSD\", \"STP\", \"SWZ\", \"SYC\", \"TCD\", \"TGO\", \"TUN\", \"TZA\", \"UGA\", \"ZAF\", \"ZMB\", \"ZWE\"\n", - " ]\n", - " },\n", - " {\n", - " 'region_iso': 'EU',\n", - " 'region_name': 'Europe',\n", - " 'country_iso_3s': [\n", - " \"ALA\", \"ALB\", \"AND\", \"ARM\", \"AUT\", \"AZE\", \"BEL\", \"BGR\", \"BIH\", \"BLR\", \"CHE\", \"CYP\", \"CZE\", \"DEU\", \"DNK\",\n", - " \"ESP\", \"EST\", \"FIN\", \"FRA\", \"FRO\", \"GBR\", \"GEO\", \"GGY\", \"GIB\", \"GRC\", \"HRV\", \"HUN\", \"IMN\", \"IRL\", \"ISL\",\n", - " \"ISR\", \"ITA\", \"JEY\", \"KAZ\", \"KGZ\", \"LIE\", \"LTU\", \"LUX\", \"LVA\", \"MCO\", \"MDA\", \"MKD\", \"MLT\", \"MNE\", \"NLD\",\n", - " \"NOR\", \"POL\", \"PRT\", \"ROU\", \"RUS\", \"SJM\", \"SMR\", \"SRB\", \"SVK\", \"SVN\", \"SWE\", \"TJK\", \"TKM\", \"TUR\", \"UKR\",\n", - " \"UZB\", \"VAT\"\n", - " ]\n", - " },\n", - " {\n", - " 'region_iso': 'SA',\n", - " 'region_name': 'Latin America & Caribbean',\n", - " 'country_iso_3s': [\n", - " \"ABW\", \"AIA\", \"ARG\", \"ATG\", \"BES\", \"BHS\", \"BLM\", \"BLZ\", \"BMU\", \"BOL\", \"BRA\", \"BRB\", \"CHL\", \"COL\", \"CRI\",\n", - " \"CUB\", \"CUW\", \"CYM\", \"DMA\", \"DOM\", \"ECU\", \"FLK\", \"GLP\", \"GRD\", \"GTM\", \"GUF\", \"GUY\", \"HND\", \"HTI\", \"JAM\",\n", - " \"KNA\", \"LCA\", \"MAF\", \"MEX\", \"MSR\", \"MTQ\", \"NIC\", \"PAN\", \"PER\", \"PRI\", \"PRY\", \"SLV\", \"SUR\", \"SXM\", \"TCA\",\n", - " \"TTO\", \"UMI\", \"URY\", \"VCT\", \"VEN\", \"VGB\", \"VIR\"\n", - " ]\n", - " },\n", - " {\n", - " 'region_iso': 'PO',\n", - " 'region_name': 'Polar',\n", - " 'country_iso_3s': [\n", - " \"ATF\", \"BVT\", \"GRL\", \"HMD\", \"SGS\"\n", - " ]\n", - " },\n", - " {\n", - " 'region_iso': 'NA',\n", - " 'region_name': 'North America',\n", - " 'country_iso_3s': [\n", - " \"CAN\", \"SPM\", \"USA\"\n", - " ]\n", - " },\n", - " {\n", - " 'region_iso': 'GL',\n", - " 'region_name': 'Global',\n", - " 'country_iso_3s': []\n", - " },\n", - " {\n", - " 'region_iso': 'WA',\n", - " 'region_name': 'West Asia',\n", - " 'country_iso_3s': [\n", - " \"ARE\", \"BHR\", \"IRQ\", \"JOR\", \"KWT\", \"LBN\", \"OMN\", \"PSE\", \"QAT\", \"SAU\", \"SYR\", \"YEM\"\n", - " ]\n", - " }\n", - "]\n", - "\n", - "# Convert the region data to a dictionary that maps each country to its region name\n", - "country_to_region = {}\n", - "for region in regions_data:\n", - " for country in region['country_iso_3s']:\n", - " country_to_region[country] = region['region_name']" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
OBJECTIDWDPAIDWDPA_PIDNAMEEnglish_DePARENT_ISOISO3MPA_Marinempa_idZone_Marin...Distant_MPLevel_of_PMost_ImpacDescrip_ImVerticallySHAPE_LengSHAPE_AreageometryP_LEVELREGIONS
01.0478053.0478053Hikurangi DeepBenthic Protection AreaNZLNZL54022.1525854022.1...NaNIncompatibleMining, FishingBenthic protections only. Deep sea mining allo...X12.3329525.833001POLYGON ((-175.00000 -42.16661, -175.00000 -42...Less Protected / UnknownAsia & Pacific
12.0555512062.0555512062KermadecBenthic Protection AreaNZLNZL619146.05428458540.5...NaNIncompatibleMining, FishingBenthic protections only. Deep sea mining allo...NaN25.62935242.963159POLYGON ((-174.02370 -29.22191, -174.02370 -29...Less Protected / UnknownAsia & Pacific
\n", - "

2 rows × 22 columns

\n", - "
" - ], - "text/plain": [ - " OBJECTID WDPAID WDPA_PID NAME English_De \\\n", - "0 1.0 478053.0 478053 Hikurangi Deep Benthic Protection Area \n", - "1 2.0 555512062.0 555512062 Kermadec Benthic Protection Area \n", - "\n", - " PARENT_ISO ISO3 MPA_Marine mpa_id Zone_Marin ... Distant_MP \\\n", - "0 NZL NZL 54022.1 5258 54022.1 ... NaN \n", - "1 NZL NZL 619146.0 5428 458540.5 ... NaN \n", - "\n", - " Level_of_P Most_Impac \\\n", - "0 Incompatible Mining, Fishing \n", - "1 Incompatible Mining, Fishing \n", - "\n", - " Descrip_Im Vertically SHAPE_Leng \\\n", - "0 Benthic protections only. Deep sea mining allo... X 12.332952 \n", - "1 Benthic protections only. Deep sea mining allo... NaN 25.629352 \n", - "\n", - " SHAPE_Area geometry \\\n", - "0 5.833001 POLYGON ((-175.00000 -42.16661, -175.00000 -42... \n", - "1 42.963159 POLYGON ((-174.02370 -29.22191, -174.02370 -29... \n", - "\n", - " P_LEVEL REGIONS \n", - "0 Less Protected / Unknown Asia & Pacific \n", - "1 Less Protected / Unknown Asia & Pacific \n", - "\n", - "[2 rows x 22 columns]" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "mpatlas['REGIONS'] = mpatlas['ISO3'].map(country_to_region)\n", - "mpatlas.head(2)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
WDPAIDWDPA_PIDNAMEAREA_MPATLASDESIG_ENGESTABLISHMENTIMPACTP_LEVELPARENT_ISOISO3REGIONSgeometry
0478053.0478053Hikurangi Deep54022.1Benthic Protection AreaImplementedMining, FishingLess Protected / UnknownNZLNZLAsia & PacificPOLYGON ((-175.00000 -42.16661, -175.00000 -42...
1555512062.0555512062Kermadec458540.5Benthic Protection AreaImplementedMining, FishingLess Protected / UnknownNZLNZLAsia & PacificPOLYGON ((-174.02370 -29.22191, -174.02370 -29...
\n", - "
" - ], - "text/plain": [ - " WDPAID WDPA_PID NAME AREA_MPATLAS \\\n", - "0 478053.0 478053 Hikurangi Deep 54022.1 \n", - "1 555512062.0 555512062 Kermadec 458540.5 \n", - "\n", - " DESIG_ENG ESTABLISHMENT IMPACT \\\n", - "0 Benthic Protection Area Implemented Mining, Fishing \n", - "1 Benthic Protection Area Implemented Mining, Fishing \n", - "\n", - " P_LEVEL PARENT_ISO ISO3 REGIONS \\\n", - "0 Less Protected / Unknown NZL NZL Asia & Pacific \n", - "1 Less Protected / Unknown NZL NZL Asia & Pacific \n", - "\n", - " geometry \n", - "0 POLYGON ((-175.00000 -42.16661, -175.00000 -42... \n", - "1 POLYGON ((-174.02370 -29.22191, -174.02370 -29... " - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Rename columns and keep only relevant ones. \n", - "# Note: We keep \"Zone_Marine\" (area of the geometry), instead of \"MPA_Marine\" (as MPAs can be divided in smaller pieces according to their protection levels)\n", - "\n", - "mpatlas = mpatlas.rename(columns={'English_De': 'DESIG_ENG', 'Zone_Marin': 'AREA_MPATLAS', 'Stage_of_E': 'ESTABLISHMENT', 'Most_Impac': 'IMPACT' }) \n", - "mpatlas2 = mpatlas[['WDPAID', 'WDPA_PID', 'NAME', 'AREA_MPATLAS', 'DESIG_ENG', 'ESTABLISHMENT', 'IMPACT', 'P_LEVEL', 'PARENT_ISO', 'ISO3','REGIONS', 'geometry']]\n", - "mpatlas2.head(2)" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/var/folders/98/0pdnjc5s29x2pnzl293pw7hr0000gn/T/ipykernel_25742/67511564.py:1: UserWarning: Column names longer than 10 characters will be truncated when saved to ESRI Shapefile.\n", - " mpatlas2.to_file(path_out + \"/mpatlas_table.shp\")\n" - ] - } - ], - "source": [ - "mpatlas2.to_file(path_out + \"/mpatlas_table.shp\")" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
WDPAIDWDPA_PIDNAMEAREA_MPATLDESIG_ENGESTABLISHMIMPACTP_LEVELPARENT_ISOISO3REGIONSgeometry
0478053.0478053Hikurangi Deep54022.1Benthic Protection AreaImplementedMining, FishingLess Protected / UnknownNZLNZLAsia & PacificPOLYGON ((-175.00000 -42.16661, -175.00000 -42...
1555512062.0555512062Kermadec458540.5Benthic Protection AreaImplementedMining, FishingLess Protected / UnknownNZLNZLAsia & PacificPOLYGON ((-174.02370 -29.22191, -174.02370 -29...
\n", - "
" - ], - "text/plain": [ - " WDPAID WDPA_PID NAME AREA_MPATL \\\n", - "0 478053.0 478053 Hikurangi Deep 54022.1 \n", - "1 555512062.0 555512062 Kermadec 458540.5 \n", - "\n", - " DESIG_ENG ESTABLISHM IMPACT \\\n", - "0 Benthic Protection Area Implemented Mining, Fishing \n", - "1 Benthic Protection Area Implemented Mining, Fishing \n", - "\n", - " P_LEVEL PARENT_ISO ISO3 REGIONS \\\n", - "0 Less Protected / Unknown NZL NZL Asia & Pacific \n", - "1 Less Protected / Unknown NZL NZL Asia & Pacific \n", - "\n", - " geometry \n", - "0 POLYGON ((-175.00000 -42.16661, -175.00000 -42... \n", - "1 POLYGON ((-174.02370 -29.22191, -174.02370 -29... " - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "mpatlas = gpd.read_file(path_out + \"/mpatlas_table.shp\")\n", - "mpatlas.head(2)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "skytruth", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.4" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/data/notebooks/habitats.ipynb b/data/notebooks/habitats.ipynb index 043a5ad6..1185d367 100644 --- a/data/notebooks/habitats.ipynb +++ b/data/notebooks/habitats.ipynb @@ -32,18 +32,19 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import geopandas as gpd\n", "import pandas as pd\n", - "import openpyxl" + "import openpyxl\n", + "from datetime import datetime" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -60,7 +61,7 @@ }, { "cell_type": "code", - "execution_count": 59, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -73,7 +74,7 @@ }, { "cell_type": "code", - "execution_count": 64, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -85,7 +86,7 @@ }, { "cell_type": "code", - "execution_count": 65, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -98,7 +99,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -114,7 +115,7 @@ }, { "cell_type": "code", - "execution_count": 66, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -128,7 +129,7 @@ }, { "cell_type": "code", - "execution_count": 85, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -141,7 +142,7 @@ }, { "cell_type": "code", - "execution_count": 91, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -154,7 +155,7 @@ }, { "cell_type": "code", - "execution_count": 198, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -189,16 +190,16 @@ " \n", " 0\n", " ABNJ\n", - " 421.629373\n", - " 1874.982214\n", + " 421.629372679904\n", + " 1874.98221422617\n", " cold-water corals\n", " 2023\n", " \n", " \n", " 1\n", " AGO\n", - " 0.000000\n", - " 3.395671\n", + " 0\n", + " 3.39567053773998\n", " cold-water corals\n", " 2023\n", " \n", @@ -207,12 +208,12 @@ "" ], "text/plain": [ - " location_id protected_area total_area habitat_name year\n", - "0 ABNJ 421.629373 1874.982214 cold-water corals 2023\n", - "1 AGO 0.000000 3.395671 cold-water corals 2023" + " location_id protected_area total_area habitat_name year\n", + "0 ABNJ 421.629372679904 1874.98221422617 cold-water corals 2023\n", + "1 AGO 0 3.39567053773998 cold-water corals 2023" ] }, - "execution_count": 198, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -220,13 +221,13 @@ "source": [ "# Concatenate the dataframes\n", "habitats = pd.concat([cold2_grouped, salt2_grouped, sea2_grouped, warm2_grouped])\n", - "habitats['year'] = 2023\n", + "habitats['year'] = datetime.now().year\n", "habitats.head(2)" ] }, { "cell_type": "code", - "execution_count": 199, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -260,33 +261,33 @@ " \n", " \n", " 0\n", - " cold-water corals\n", - " 4214.897203\n", - " 15057.016684\n", + " saltmarsh\n", + " 111638.252564\n", + " 224435.075094\n", " GLOB\n", " 2023\n", " \n", " \n", " 1\n", - " saltmarshes\n", - " 111540.045205\n", - " 217798.398466\n", + " seagrass\n", + " 74787.449960\n", + " 314001.940600\n", " GLOB\n", " 2023\n", " \n", " \n", " 2\n", - " seagrasses\n", - " 71543.667168\n", - " 295004.516919\n", + " warmwater-corals\n", + " 63259.499130\n", + " 149886.974126\n", " GLOB\n", " 2023\n", " \n", " \n", - " 3\n", - " warm-water corals\n", - " 62074.768550\n", - " 147100.573092\n", + " 4\n", + " coldwater-corals\n", + " 4400.140842\n", + " 15336.975280\n", " GLOB\n", " 2023\n", " \n", @@ -295,29 +296,30 @@ "" ], "text/plain": [ - " habitat_name protected_area total_area location_id year\n", - "0 cold-water corals 4214.897203 15057.016684 GLOB 2023\n", - "1 saltmarshes 111540.045205 217798.398466 GLOB 2023\n", - "2 seagrasses 71543.667168 295004.516919 GLOB 2023\n", - "3 warm-water corals 62074.768550 147100.573092 GLOB 2023" + " habitat_name protected_area total_area location_id year\n", + "0 saltmarsh 111638.252564 224435.075094 GLOB 2023\n", + "1 seagrass 74787.449960 314001.940600 GLOB 2023\n", + "2 warmwater-corals 63259.499130 149886.974126 GLOB 2023\n", + "4 coldwater-corals 4400.140842 15336.975280 GLOB 2023" ] }, - "execution_count": 199, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Calculate global stats for habitats\n", - "habitats_global = habitats.groupby(['habitat_name']).agg({'protected_area': 'sum', 'total_area': 'sum'}).reset_index()\n", + "habitats_global = glob[['habitat','protected_area', 'total_area']].rename(columns={'habitat': 'habitat_name'})\n", "habitats_global['location_id'] = 'GLOB'\n", - "habitats_global['year'] = 2023\n", + "habitats_global['year'] = datetime.now().year\n", + "habitats_global = habitats_global[habitats_global['habitat_name'] != 'mangroves'] # remove mangroves\n", "habitats_global" ] }, { "cell_type": "code", - "execution_count": 200, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -341,125 +343,88 @@ " \n", " \n", " \n", - " location_id\n", + " habitat_name\n", " protected_area\n", " total_area\n", - " habitat_name\n", + " location_id\n", " year\n", " \n", " \n", " \n", " \n", " 0\n", - " ABNJ\n", - " 421.629373\n", - " 1874.982214\n", - " cold-water corals\n", - " 2023\n", - " \n", - " \n", - " 1\n", - " AGO\n", - " 0.000000\n", - " 3.395671\n", - " cold-water corals\n", - " 2023\n", - " \n", - " \n", - " 2\n", - " ALB\n", - " 0.000000\n", - " 5.986479\n", - " cold-water corals\n", - " 2023\n", - " \n", - " \n", - " 3\n", - " ARG\n", - " 6.984226\n", - " 61.826344\n", - " cold-water corals\n", - " 2023\n", - " \n", - " \n", - " 4\n", - " ATG\n", - " 0.000000\n", - " 0.997747\n", - " cold-water corals\n", - " 2023\n", - " \n", - " \n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " \n", - " \n", - " 81\n", - " ZAF\n", - " 1.398813\n", - " 1.398813\n", - " warm-water corals\n", - " 2023\n", - " \n", - " \n", - " 0\n", + " saltmarshes\n", + " 111638.252564\n", + " 224435.075094\n", " GLOB\n", - " 4214.897203\n", - " 15057.016684\n", - " cold-water corals\n", " 2023\n", " \n", " \n", " 1\n", + " seagrasses\n", + " 74787.449960\n", + " 314001.940600\n", " GLOB\n", - " 111540.045205\n", - " 217798.398466\n", - " saltmarshes\n", " 2023\n", " \n", " \n", " 2\n", + " warm-water corals\n", + " 63259.499130\n", + " 149886.974126\n", " GLOB\n", - " 71543.667168\n", - " 295004.516919\n", - " seagrasses\n", " 2023\n", " \n", " \n", - " 3\n", + " 4\n", + " cold-water corals\n", + " 4400.140842\n", + " 15336.975280\n", " GLOB\n", - " 62074.768550\n", - " 147100.573092\n", - " warm-water corals\n", " 2023\n", " \n", " \n", "\n", - "

374 rows × 5 columns

\n", "" ], "text/plain": [ - " location_id protected_area total_area habitat_name year\n", - "0 ABNJ 421.629373 1874.982214 cold-water corals 2023\n", - "1 AGO 0.000000 3.395671 cold-water corals 2023\n", - "2 ALB 0.000000 5.986479 cold-water corals 2023\n", - "3 ARG 6.984226 61.826344 cold-water corals 2023\n", - "4 ATG 0.000000 0.997747 cold-water corals 2023\n", - ".. ... ... ... ... ...\n", - "81 ZAF 1.398813 1.398813 warm-water corals 2023\n", - "0 GLOB 4214.897203 15057.016684 cold-water corals 2023\n", - "1 GLOB 111540.045205 217798.398466 saltmarshes 2023\n", - "2 GLOB 71543.667168 295004.516919 seagrasses 2023\n", - "3 GLOB 62074.768550 147100.573092 warm-water corals 2023\n", - "\n", - "[374 rows x 5 columns]" + " habitat_name protected_area total_area location_id year\n", + "0 saltmarshes 111638.252564 224435.075094 GLOB 2023\n", + "1 seagrasses 74787.449960 314001.940600 GLOB 2023\n", + "2 warm-water corals 63259.499130 149886.974126 GLOB 2023\n", + "4 cold-water corals 4400.140842 15336.975280 GLOB 2023" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Change the name of the habitats to match the ones in the habitats dataframe\n", + "habitat_name_mapping = {\n", + " 'saltmarsh': 'saltmarshes',\n", + " 'seagrass': 'seagrasses',\n", + " 'warmwater-corals': 'warm-water corals',\n", + " 'coldwater-corals': 'cold-water corals'\n", + "}\n", + "habitats_global['habitat_name'] = habitats_global['habitat_name'].replace(habitat_name_mapping)\n", + "habitats_global" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['cold-water corals', 'saltmarshes', 'seagrasses',\n", + " 'warm-water corals'], dtype=object)" ] }, - "execution_count": 200, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -467,12 +432,12 @@ "source": [ "# Concatenate the global stats to the habitats dataframe\n", "habitats = pd.concat([habitats, habitats_global])\n", - "habitats" + "habitats['habitat_name'].unique()" ] }, { "cell_type": "code", - "execution_count": 201, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -559,7 +524,7 @@ }, { "cell_type": "code", - "execution_count": 202, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -627,96 +592,96 @@ " 4\n", " AS\n", " cold-water corals\n", - " 433.777100\n", - " 1733.448452\n", + " 263.251498\n", + " 1332.225080\n", " 2023\n", " \n", " \n", " 5\n", " AS\n", " saltmarshes\n", - " 11965.693910\n", - " 44696.365149\n", + " 11721.439539\n", + " 39229.888860\n", " 2023\n", " \n", " \n", " 6\n", " AS\n", " seagrasses\n", - " 29085.739962\n", - " 123207.628344\n", + " 28942.705660\n", + " 72666.482052\n", " 2023\n", " \n", " \n", " 7\n", " AS\n", " warm-water corals\n", - " 41327.715018\n", - " 100106.465948\n", + " 13895.870659\n", + " 67363.486609\n", " 2023\n", " \n", " \n", " 8\n", " EU\n", " cold-water corals\n", - " 2657.645874\n", - " 7253.054271\n", + " 2183.050266\n", + " 6179.526427\n", " 2023\n", " \n", " \n", " 9\n", " EU\n", " saltmarshes\n", - " 11397.003598\n", - " 18425.728461\n", + " 7431.043710\n", + " 13274.326478\n", " 2023\n", " \n", " \n", " 10\n", " EU\n", " seagrasses\n", - " 9767.760581\n", - " 16539.589632\n", + " 5840.372925\n", + " 10391.189911\n", " 2023\n", " \n", " \n", " 11\n", " EU\n", " warm-water corals\n", - " 4357.931018\n", - " 9459.623825\n", + " 0.605763\n", + " 0.793357\n", " 2023\n", " \n", " \n", " 12\n", " NA\n", " cold-water corals\n", - " 429.351859\n", - " 2384.316484\n", + " 22.960099\n", + " 204.280433\n", " 2023\n", " \n", " \n", " 13\n", " NA\n", " saltmarshes\n", - " 57209.603176\n", - " 87048.164494\n", + " 51092.644683\n", + " 68200.081930\n", " 2023\n", " \n", " \n", " 14\n", " NA\n", " seagrasses\n", - " 8800.520794\n", - " 15860.899757\n", + " 70.012791\n", + " 301.909141\n", " 2023\n", " \n", " \n", " 15\n", " NA\n", " warm-water corals\n", - " 3652.486628\n", - " 4545.057745\n", + " 0.000000\n", + " 0.000000\n", " 2023\n", " \n", " \n", @@ -788,52 +753,57 @@ "" ], "text/plain": [ - " location_id habitat_name protected_area total_area year\n", - "0 AF cold-water corals 37.761626 381.993234 2023\n", - "1 AF saltmarshes 6688.702879 19845.915000 2023\n", - "2 AF seagrasses 6319.099491 61939.484904 2023\n", - "3 AF warm-water corals 6591.340083 15216.393947 2023\n", - "4 AS cold-water corals 433.777100 1733.448452 2023\n", - "5 AS saltmarshes 11965.693910 44696.365149 2023\n", - "6 AS seagrasses 29085.739962 123207.628344 2023\n", - "7 AS warm-water corals 41327.715018 100106.465948 2023\n", - "8 EU cold-water corals 2657.645874 7253.054271 2023\n", - "9 EU saltmarshes 11397.003598 18425.728461 2023\n", - "10 EU seagrasses 9767.760581 16539.589632 2023\n", - "11 EU warm-water corals 4357.931018 9459.623825 2023\n", - "12 NA cold-water corals 429.351859 2384.316484 2023\n", - "13 NA saltmarshes 57209.603176 87048.164494 2023\n", - "14 NA seagrasses 8800.520794 15860.899757 2023\n", - "15 NA warm-water corals 3652.486628 4545.057745 2023\n", - "16 SA cold-water corals 234.731370 1416.251323 2023\n", - "17 SA saltmarshes 22969.815906 35983.392744 2023\n", - "18 SA seagrasses 16517.097667 45847.459412 2023\n", - "19 SA warm-water corals 5597.366845 12869.801231 2023\n", - "20 WA cold-water corals 0.000000 12.970705 2023\n", - "21 WA saltmarshes 1309.225736 11798.832619 2023\n", - "22 WA seagrasses 1053.448673 25273.727431 2023\n", - "23 WA warm-water corals 547.928957 4903.230395 2023" + " location_id habitat_name protected_area total_area year\n", + "0 AF cold-water corals 37.761626 381.993234 2023\n", + "1 AF saltmarshes 6688.702879 19845.915000 2023\n", + "2 AF seagrasses 6319.099491 61939.484904 2023\n", + "3 AF warm-water corals 6591.340083 15216.393947 2023\n", + "4 AS cold-water corals 263.251498 1332.225080 2023\n", + "5 AS saltmarshes 11721.439539 39229.888860 2023\n", + "6 AS seagrasses 28942.705660 72666.482052 2023\n", + "7 AS warm-water corals 13895.870659 67363.486609 2023\n", + "8 EU cold-water corals 2183.050266 6179.526427 2023\n", + "9 EU saltmarshes 7431.043710 13274.326478 2023\n", + "10 EU seagrasses 5840.372925 10391.189911 2023\n", + "11 EU warm-water corals 0.605763 0.793357 2023\n", + "12 NA cold-water corals 22.960099 204.280433 2023\n", + "13 NA saltmarshes 51092.644683 68200.081930 2023\n", + "14 NA seagrasses 70.012791 301.909141 2023\n", + "15 NA warm-water corals 0.000000 0.000000 2023\n", + "16 SA cold-water corals 234.731370 1416.251323 2023\n", + "17 SA saltmarshes 22969.815906 35983.392744 2023\n", + "18 SA seagrasses 16517.097667 45847.459412 2023\n", + "19 SA warm-water corals 5597.366845 12869.801231 2023\n", + "20 WA cold-water corals 0.000000 12.970705 2023\n", + "21 WA saltmarshes 1309.225736 11798.832619 2023\n", + "22 WA seagrasses 1053.448673 25273.727431 2023\n", + "23 WA warm-water corals 547.928957 4903.230395 2023" ] }, - "execution_count": 202, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ + "# Add regions field\n", "habitats_regions = habitats.copy()\n", "habitats_regions['region'] = habitats['location_id'].map(country_to_region)\n", "\n", + "# Convert fields to numeric\n", + "habitats_regions['protected_area'] = pd.to_numeric(habitats_regions['protected_area'], errors='coerce')\n", + "habitats_regions['total_area'] = pd.to_numeric(habitats_regions['total_area'], errors='coerce')\n", + "\n", "# Calculate stats for each region\n", "habitats_regions = habitats_regions.groupby(['region', 'habitat_name']).agg({'protected_area': 'sum', 'total_area': 'sum'}).reset_index()\n", - "habitats_regions['year'] = 2023\n", + "habitats_regions['year'] = datetime.now().year\n", "habitats_regions.rename(columns={'region': 'location_id'}, inplace=True)\n", "habitats_regions\n" ] }, { "cell_type": "code", - "execution_count": 204, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -843,7 +813,7 @@ }, { "cell_type": "code", - "execution_count": 205, + "execution_count": 32, "metadata": {}, "outputs": [ { @@ -868,7 +838,7 @@ " 'AF', 'AS', 'EU', 'NA', 'SA', 'WA'], dtype=object)" ] }, - "execution_count": 205, + "execution_count": 32, "metadata": {}, "output_type": "execute_result" } @@ -879,7 +849,7 @@ }, { "cell_type": "code", - "execution_count": 206, + "execution_count": 33, "metadata": {}, "outputs": [], "source": [ @@ -895,7 +865,7 @@ }, { "cell_type": "code", - "execution_count": 213, + "execution_count": 52, "metadata": {}, "outputs": [], "source": [ @@ -908,7 +878,7 @@ }, { "cell_type": "code", - "execution_count": 214, + "execution_count": 53, "metadata": {}, "outputs": [], "source": [ @@ -922,27 +892,19 @@ }, { "cell_type": "code", - "execution_count": 215, + "execution_count": 54, "metadata": {}, "outputs": [], "source": [ "# Join eez info to seamounts falling within eez polygons\n", - "seamounts_eez = gpd.sjoin(seamounts, eez_hs, how=\"left\", predicate=\"within\")" - ] - }, - { - "cell_type": "code", - "execution_count": 216, - "metadata": {}, - "outputs": [], - "source": [ + "seamounts_eez = gpd.sjoin(seamounts, eez_hs, how=\"left\", predicate=\"within\")\n", "# Drop those not associated with an eez or hs\n", "seamounts_eez = seamounts_eez.dropna(subset=['ISO_SOV1'])" ] }, { "cell_type": "code", - "execution_count": 217, + "execution_count": 55, "metadata": {}, "outputs": [], "source": [ @@ -960,26 +922,28 @@ }, { "cell_type": "code", - "execution_count": 218, + "execution_count": 56, "metadata": {}, "outputs": [], "source": [ - "# Split the 'iso_code' values and create separate rows only for rows with multiple values\n", - "mask = seamounts_eez['iso'].str.contains(';', na=False)\n", - "split_rows = seamounts_eez[mask].copy()\n", - "split_rows['iso'] = split_rows['iso'].str.split(';')\n", - "split_rows = split_rows.explode('iso')\n", - "\n", - "# Keep rows with single values in 'iso_code'\n", - "single_value_rows = seamounts_eez[~mask]\n", - "\n", - "# Concatenate the exploded rows with the single value rows\n", - "seamounts_eez_new = pd.concat([single_value_rows, split_rows], ignore_index=True)" + "# Check which seamounts are protectec\n", + "seamounts_wdpa = gpd.sjoin(seamounts, protected_areas, how=\"left\", predicate=\"within\")\n", + "seamounts_wdpa['protection'] = \"no\" \n", + "seamounts_wdpa.loc[~seamounts_wdpa['index_right'].isna(), 'protection'] = \"yes\"\n", + "# Remove rows in which protection is \"no\"\n", + "seamounts_wdpa = seamounts_wdpa[seamounts_wdpa['protection'] != \"no\"]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Global stats" ] }, { "cell_type": "code", - "execution_count": 219, + "execution_count": 65, "metadata": {}, "outputs": [ { @@ -1003,187 +967,103 @@ " \n", " \n", " \n", - " location_id\n", - " total_area\n", " habitat_name\n", + " total_area\n", + " location_id\n", " year\n", " \n", " \n", " \n", " \n", " 0\n", - " ABNJ\n", - " 1.483098e+07\n", - " seamounts\n", - " 2011\n", - " \n", - " \n", - " 1\n", - " AGO\n", - " 9.556242e+03\n", - " seamounts\n", - " 2011\n", - " \n", - " \n", - " 2\n", - " ARG\n", - " 3.110730e+05\n", - " seamounts\n", - " 2011\n", - " \n", - " \n", - " 3\n", - " ATA\n", - " 3.551629e+05\n", - " seamounts\n", - " 2011\n", - " \n", - " \n", - " 4\n", - " ATG\n", - " 6.215895e+03\n", - " seamounts\n", - " 2011\n", - " \n", - " \n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " \n", - " \n", - " 88\n", - " VNM\n", - " 4.421338e+04\n", - " seamounts\n", - " 2011\n", - " \n", - " \n", - " 89\n", - " VUT\n", - " 1.199475e+05\n", - " seamounts\n", - " 2011\n", - " \n", - " \n", - " 90\n", - " WSM\n", - " 4.117997e+04\n", - " seamounts\n", - " 2011\n", - " \n", - " \n", - " 91\n", - " YEM\n", - " 6.294974e+04\n", - " seamounts\n", - " 2011\n", - " \n", - " \n", - " 92\n", - " ZAF\n", - " 9.946306e+04\n", " seamounts\n", + " 2.690810e+07\n", + " GLOB\n", " 2011\n", " \n", " \n", "\n", - "

93 rows × 4 columns

\n", "" ], "text/plain": [ - " location_id total_area habitat_name year\n", - "0 ABNJ 1.483098e+07 seamounts 2011\n", - "1 AGO 9.556242e+03 seamounts 2011\n", - "2 ARG 3.110730e+05 seamounts 2011\n", - "3 ATA 3.551629e+05 seamounts 2011\n", - "4 ATG 6.215895e+03 seamounts 2011\n", - ".. ... ... ... ...\n", - "88 VNM 4.421338e+04 seamounts 2011\n", - "89 VUT 1.199475e+05 seamounts 2011\n", - "90 WSM 4.117997e+04 seamounts 2011\n", - "91 YEM 6.294974e+04 seamounts 2011\n", - "92 ZAF 9.946306e+04 seamounts 2011\n", - "\n", - "[93 rows x 4 columns]" + " habitat_name total_area location_id year\n", + "0 seamounts 2.690810e+07 GLOB 2011" ] }, - "execution_count": 219, + "execution_count": 65, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# Get area of seamounts per iso\n", - "seamounts_iso = seamounts_eez_new.groupby(['iso']).agg({'AREA2D': 'sum'}).reset_index()\n", - "seamounts_iso = seamounts_iso.rename(columns={'AREA2D': 'total_area', 'iso': 'location_id'})\n", - "seamounts_iso['habitat_name'] = 'seamounts'\n", - "seamounts_iso['year'] = 2011\n", - "seamounts_iso " - ] - }, - { - "cell_type": "code", - "execution_count": 220, - "metadata": {}, - "outputs": [], - "source": [ - "# Join protection info to seamounts\n", - "seamounts_wdpa = gpd.sjoin(seamounts, protected_areas, how=\"left\", predicate=\"within\")\n", - "seamounts_wdpa['protection'] = \"no\" \n", - "seamounts_wdpa.loc[~seamounts_wdpa['index_right'].isna(), 'protection'] = \"yes\"\n" - ] - }, - { - "cell_type": "code", - "execution_count": 221, - "metadata": {}, - "outputs": [], - "source": [ - "# Remove rows in which protection is \"no\"\n", - "seamounts_wdpa = seamounts_wdpa[seamounts_wdpa['protection'] != \"no\"]" + "# Calculate global area of seamounts\n", + "seamounts_eez['habitat_name'] = 'seamounts'\n", + "seamounts_global = seamounts_eez.groupby(['habitat_name']).agg({'AREA2D': 'sum'}).reset_index().rename(columns={'AREA2D': 'total_area'})\n", + "seamounts_global['location_id'] = 'GLOB'\n", + "seamounts_global['year'] = 2011\n", + "seamounts_global" ] }, { "cell_type": "code", - "execution_count": 222, + "execution_count": 66, "metadata": {}, "outputs": [ { "data": { - "text/plain": [ - "array(['CAN', 'ABNJ', 'FRA', 'JPN', 'USA', 'PRT', 'ESP', 'BHS', 'MEX',\n", - " 'DOM', 'HND', 'NLD', 'PHL', 'VEN', 'MHL', 'YEM', 'COL', 'PLW',\n", - " 'CRI', 'PAN', 'BRA', 'ECU', 'GNQ', 'KIR', 'GBR', 'IDN', 'SYC',\n", - " 'COK', 'AUS', 'COM', 'FJI', 'NIU', 'CHL', 'NZL', 'ZAF', 'ARG',\n", - " 'ITA', 'GRC', 'CUB', 'TUV', 'PER', 'SHN', 'NOR', 'MCO'],\n", - " dtype=object)" - ] - }, - "execution_count": 222, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Split the 'iso_code' values and create separate rows only for rows with multiple values\n", - "mask = seamounts_wdpa['PARENT_ISO'].str.contains(';', na=False)\n", - "split_rows = seamounts_wdpa[mask].copy()\n", - "split_rows['PARENT_ISO'] = split_rows['PARENT_ISO'].str.split(';')\n", - "split_rows = split_rows.explode('PARENT_ISO')\n", - "\n", - "# Keep rows with single values in 'iso_code'\n", - "single_value_rows = seamounts_wdpa[~mask]\n", - "\n", - "# Concatenate the exploded rows with the single value rows\n", - "seamounts_wdpa_new = pd.concat([single_value_rows, split_rows], ignore_index=True)\n", - "seamounts_wdpa_new['PARENT_ISO'].unique()" + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
habitat_nameprotected_area
0seamounts3.438552e+06
\n", + "
" + ], + "text/plain": [ + " habitat_name protected_area\n", + "0 seamounts 3.438552e+06" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Calculate global area of seamounts protected\n", + "seamounts_wdpa['habitat_name'] = 'seamounts'\n", + "seamounts_wdpa_global = seamounts_wdpa.groupby(['habitat_name']).agg({'AREA2D': 'sum'}).reset_index().rename(columns={'AREA2D': 'protected_area'})\n", + "seamounts_wdpa_global" ] }, { "cell_type": "code", - "execution_count": 223, + "execution_count": 67, "metadata": {}, "outputs": [ { @@ -1207,45 +1087,71 @@ " \n", " \n", " \n", + " habitat_name\n", + " total_area\n", " location_id\n", + " year\n", " protected_area\n", " \n", " \n", " \n", " \n", " 0\n", - " ABNJ\n", - " 226253.932283\n", - " \n", - " \n", - " 1\n", - " ARG\n", - " 38773.659962\n", + " seamounts\n", + " 2.690810e+07\n", + " GLOB\n", + " 2011\n", + " 3.438552e+06\n", " \n", " \n", "\n", "" ], "text/plain": [ - " location_id protected_area\n", - "0 ABNJ 226253.932283\n", - "1 ARG 38773.659962" + " habitat_name total_area location_id year protected_area\n", + "0 seamounts 2.690810e+07 GLOB 2011 3.438552e+06" ] }, - "execution_count": 223, + "execution_count": 67, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "seamounts_protected = seamounts_wdpa_new.groupby(['PARENT_ISO']).agg({'AREA2D': 'sum'}).reset_index()\n", - "seamounts_protected = seamounts_protected.rename(columns={'AREA2D': 'protected_area', 'PARENT_ISO': 'location_id'})\n", - "seamounts_protected.head(2)" + "# Bring 'protected_area' field from seamouts_wdpa_global to seamounts_global\n", + "seamounts_global = seamounts_global.merge(seamounts_wdpa_global[['habitat_name', 'protected_area']], how='left', on='habitat_name')\n", + "seamounts_global" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Country stats" ] }, { "cell_type": "code", - "execution_count": 224, + "execution_count": 68, + "metadata": {}, + "outputs": [], + "source": [ + "# Split the 'iso_code' values and create separate rows only for those with multiple values\n", + "mask = seamounts_eez['iso'].str.contains(';', na=False)\n", + "split_rows = seamounts_eez[mask].copy()\n", + "split_rows['iso'] = split_rows['iso'].str.split(';')\n", + "split_rows = split_rows.explode('iso')\n", + "\n", + "# Keep rows with single values in 'iso_code'\n", + "single_value_rows = seamounts_eez[~mask]\n", + "\n", + "# Concatenate the exploded rows with the single value rows\n", + "seamounts_eez_new = pd.concat([single_value_rows, split_rows], ignore_index=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 69, "metadata": {}, "outputs": [ { @@ -1273,7 +1179,6 @@ " total_area\n", " habitat_name\n", " year\n", - " protected_area\n", " \n", " \n", " \n", @@ -1283,7 +1188,6 @@ " 1.483098e+07\n", " seamounts\n", " 2011\n", - " 226253.932283\n", " \n", " \n", " 1\n", @@ -1291,7 +1195,6 @@ " 9.556242e+03\n", " seamounts\n", " 2011\n", - " NaN\n", " \n", " \n", " 2\n", @@ -1299,7 +1202,6 @@ " 3.110730e+05\n", " seamounts\n", " 2011\n", - " 38773.659962\n", " \n", " \n", " 3\n", @@ -1307,7 +1209,6 @@ " 3.551629e+05\n", " seamounts\n", " 2011\n", - " NaN\n", " \n", " \n", " 4\n", @@ -1315,35 +1216,107 @@ " 6.215895e+03\n", " seamounts\n", " 2011\n", - " NaN\n", + " \n", + " \n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " \n", + " \n", + " 88\n", + " VNM\n", + " 4.421338e+04\n", + " seamounts\n", + " 2011\n", + " \n", + " \n", + " 89\n", + " VUT\n", + " 1.199475e+05\n", + " seamounts\n", + " 2011\n", + " \n", + " \n", + " 90\n", + " WSM\n", + " 4.117997e+04\n", + " seamounts\n", + " 2011\n", + " \n", + " \n", + " 91\n", + " YEM\n", + " 6.294974e+04\n", + " seamounts\n", + " 2011\n", + " \n", + " \n", + " 92\n", + " ZAF\n", + " 9.946306e+04\n", + " seamounts\n", + " 2011\n", " \n", " \n", "\n", + "

93 rows × 4 columns

\n", "" ], "text/plain": [ - " location_id total_area habitat_name year protected_area\n", - "0 ABNJ 1.483098e+07 seamounts 2011 226253.932283\n", - "1 AGO 9.556242e+03 seamounts 2011 NaN\n", - "2 ARG 3.110730e+05 seamounts 2011 38773.659962\n", - "3 ATA 3.551629e+05 seamounts 2011 NaN\n", - "4 ATG 6.215895e+03 seamounts 2011 NaN" + " location_id total_area habitat_name year\n", + "0 ABNJ 1.483098e+07 seamounts 2011\n", + "1 AGO 9.556242e+03 seamounts 2011\n", + "2 ARG 3.110730e+05 seamounts 2011\n", + "3 ATA 3.551629e+05 seamounts 2011\n", + "4 ATG 6.215895e+03 seamounts 2011\n", + ".. ... ... ... ...\n", + "88 VNM 4.421338e+04 seamounts 2011\n", + "89 VUT 1.199475e+05 seamounts 2011\n", + "90 WSM 4.117997e+04 seamounts 2011\n", + "91 YEM 6.294974e+04 seamounts 2011\n", + "92 ZAF 9.946306e+04 seamounts 2011\n", + "\n", + "[93 rows x 4 columns]" ] }, - "execution_count": 224, + "execution_count": 69, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# join 'protected area' field in seamounts_protected to seamounts_iso based on location_id\n", - "seamounts_iso2 = seamounts_iso.merge(seamounts_protected, left_on='location_id', right_on='location_id', how='left')\n", - "seamounts_iso2.head(5)" + "# Get area of seamounts per iso\n", + "seamounts_iso = seamounts_eez_new.groupby(['iso']).agg({'AREA2D': 'sum'}).reset_index()\n", + "seamounts_iso = seamounts_iso.rename(columns={'AREA2D': 'total_area', 'iso': 'location_id'})\n", + "seamounts_iso['habitat_name'] = 'seamounts'\n", + "seamounts_iso['year'] = 2011\n", + "seamounts_iso " + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [], + "source": [ + "# Split the 'iso_code' values in seamounts_wdpa and create separate rows only for those with multiple values\n", + "mask = seamounts_wdpa['PARENT_ISO'].str.contains(';', na=False)\n", + "split_rows = seamounts_wdpa[mask].copy()\n", + "split_rows['PARENT_ISO'] = split_rows['PARENT_ISO'].str.split(';')\n", + "split_rows = split_rows.explode('PARENT_ISO')\n", + "\n", + "# Keep rows with single values in 'iso_code'\n", + "single_value_rows = seamounts_wdpa[~mask]\n", + "\n", + "# Concatenate the exploded rows with the single value rows\n", + "seamounts_wdpa_new = pd.concat([single_value_rows, split_rows], ignore_index=True)" ] }, { "cell_type": "code", - "execution_count": 225, + "execution_count": 71, "metadata": {}, "outputs": [ { @@ -1368,9 +1341,6 @@ " \n", " \n", " location_id\n", - " total_area\n", - " habitat_name\n", - " year\n", " protected_area\n", " \n", " \n", @@ -1378,70 +1348,38 @@ " \n", " 0\n", " ABNJ\n", - " 1.483098e+07\n", - " seamounts\n", - " 2011\n", " 226253.932283\n", " \n", " \n", " 1\n", - " AGO\n", - " 9.556242e+03\n", - " seamounts\n", - " 2011\n", - " 0.000000\n", - " \n", - " \n", - " 2\n", " ARG\n", - " 3.110730e+05\n", - " seamounts\n", - " 2011\n", " 38773.659962\n", " \n", - " \n", - " 3\n", - " ATA\n", - " 3.551629e+05\n", - " seamounts\n", - " 2011\n", - " 0.000000\n", - " \n", - " \n", - " 4\n", - " ATG\n", - " 6.215895e+03\n", - " seamounts\n", - " 2011\n", - " 0.000000\n", - " \n", " \n", "\n", "" ], "text/plain": [ - " location_id total_area habitat_name year protected_area\n", - "0 ABNJ 1.483098e+07 seamounts 2011 226253.932283\n", - "1 AGO 9.556242e+03 seamounts 2011 0.000000\n", - "2 ARG 3.110730e+05 seamounts 2011 38773.659962\n", - "3 ATA 3.551629e+05 seamounts 2011 0.000000\n", - "4 ATG 6.215895e+03 seamounts 2011 0.000000" + " location_id protected_area\n", + "0 ABNJ 226253.932283\n", + "1 ARG 38773.659962" ] }, - "execution_count": 225, + "execution_count": 71, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# set to 0 the protected_area values that are NaN\n", - "seamounts_iso2['protected_area'] = seamounts_iso2['protected_area'].fillna(0)\n", - "seamounts_iso2.head(5)" + "# Calculate area protected per iso\n", + "seamounts_protected = seamounts_wdpa_new.groupby(['PARENT_ISO']).agg({'AREA2D': 'sum'}).reset_index()\n", + "seamounts_protected = seamounts_protected.rename(columns={'AREA2D': 'protected_area', 'PARENT_ISO': 'location_id'})\n", + "seamounts_protected.head(2)" ] }, { "cell_type": "code", - "execution_count": 226, + "execution_count": 73, "metadata": {}, "outputs": [ { @@ -1465,57 +1403,61 @@ " \n", " \n", " \n", - " habitat_name\n", - " protected_area\n", - " total_area\n", " location_id\n", + " total_area\n", + " habitat_name\n", " year\n", + " protected_area\n", " \n", " \n", " \n", " \n", " 0\n", + " ABNJ\n", + " 1.483098e+07\n", " seamounts\n", - " 3.186900e+06\n", - " 2.729113e+07\n", - " GLOB\n", " 2011\n", + " 226253.932283\n", + " \n", + " \n", + " 1\n", + " AGO\n", + " 9.556242e+03\n", + " seamounts\n", + " 2011\n", + " NaN\n", " \n", " \n", "\n", "" ], "text/plain": [ - " habitat_name protected_area total_area location_id year\n", - "0 seamounts 3.186900e+06 2.729113e+07 GLOB 2011" + " location_id total_area habitat_name year protected_area\n", + "0 ABNJ 1.483098e+07 seamounts 2011 226253.932283\n", + "1 AGO 9.556242e+03 seamounts 2011 NaN" ] }, - "execution_count": 226, + "execution_count": 73, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# Calculate global stats for seamounts\n", - "seamounts_global = seamounts_iso2.groupby(['habitat_name']).agg({'protected_area': 'sum', 'total_area': 'sum'}).reset_index()\n", - "seamounts_global['location_id'] = 'GLOB'\n", - "seamounts_global['year'] = 2011\n", - "seamounts_global" + "# Join seamounts_iso and seamounts_protected\n", + "seamounts_iso = seamounts_iso.merge(seamounts_protected, how='left', on='location_id')\n", + "seamounts_iso.head(2)" ] }, { - "cell_type": "code", - "execution_count": 227, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "# Concatenate the global stats to the seamounts dataframe\n", - "seamounts_iso2 = pd.concat([seamounts_iso2, seamounts_global])" + "### Regions stats" ] }, { "cell_type": "code", - "execution_count": 228, + "execution_count": 74, "metadata": {}, "outputs": [ { @@ -1618,13 +1560,13 @@ "6 WA seamounts 2487.428050 9.384765e+04 2011" ] }, - "execution_count": 228, + "execution_count": 74, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "seamounts_regions = seamounts_iso2.copy()\n", + "seamounts_regions = seamounts_iso.copy()\n", "seamounts_regions['region'] = seamounts_regions['location_id'].map(country_to_region)\n", "\n", "# Calculate stats for each region\n", @@ -1636,52 +1578,21 @@ }, { "cell_type": "code", - "execution_count": 229, + "execution_count": 75, "metadata": {}, "outputs": [], "source": [ - "# Concatenate region stats to seamounts_iso2\n", - "seamounts_iso2 = pd.concat([seamounts_iso2, seamounts_regions])" - ] - }, - { - "cell_type": "code", - "execution_count": 230, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array(['ABNJ', 'AGO', 'ARG', 'ATA', 'ATG', 'AUS', 'BHS', 'BLZ', 'BRA',\n", - " 'BRB', 'BRN', 'CAN', 'CHL', 'CHN', 'CIV', 'COL', 'COM', 'CPV',\n", - " 'CRI', 'CUB', 'DMA', 'DNK', 'DOM', 'ECU', 'ESH', 'ESP', 'FJI',\n", - " 'FRA', 'FSM', 'GBR', 'GHA', 'GIN', 'GNB', 'GNQ', 'GRC', 'HND',\n", - " 'HTI', 'IDN', 'IND', 'ISL', 'ITA', 'JAM', 'JPN', 'KIR', 'KOR',\n", - " 'LBR', 'LBY', 'LKA', 'MAR', 'MDG', 'MDV', 'MEX', 'MHL', 'MLT',\n", - " 'MMR', 'MOZ', 'MUS', 'MYS', 'NAM', 'NIC', 'NLD', 'NOR', 'NRU',\n", - " 'NZL', 'OMN', 'PAK', 'PAN', 'PER', 'PHL', 'PLW', 'PNG', 'PRK',\n", - " 'PRT', 'RUS', 'SEN', 'SLB', 'SOM', 'STP', 'SYC', 'TON', 'TUR',\n", - " 'TUV', 'TWN', 'TZA', 'URY', 'USA', 'VCT', 'VEN', 'VNM', 'VUT',\n", - " 'WSM', 'YEM', 'ZAF', 'GLOB', 'AF', 'AS', 'AT', 'EU', 'NA', 'SA',\n", - " 'WA'], dtype=object)" - ] - }, - "execution_count": 230, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "seamounts_iso2['location_id'].unique()" + "# Concatenate region and global stats to seamounts_iso2\n", + "seamounts_all = pd.concat([seamounts_iso, seamounts_regions, seamounts_global])" ] }, { "cell_type": "code", - "execution_count": 231, + "execution_count": 77, "metadata": {}, "outputs": [], "source": [ - "seamounts_iso2.to_csv(path_out + \"habitats/seamounts.csv\", index=False)" + "seamounts_all.to_csv(path_out + \"habitats/seamounts.csv\", index=False)" ] }, { @@ -1693,7 +1604,7 @@ }, { "cell_type": "code", - "execution_count": 232, + "execution_count": 79, "metadata": {}, "outputs": [], "source": [ @@ -1702,7 +1613,7 @@ }, { "cell_type": "code", - "execution_count": 233, + "execution_count": 80, "metadata": {}, "outputs": [ { @@ -1751,7 +1662,7 @@ "0 mangroves 61287.20375 147358.990971 GLOB 2020" ] }, - "execution_count": 233, + "execution_count": 80, "metadata": {}, "output_type": "execute_result" } @@ -1766,7 +1677,7 @@ }, { "cell_type": "code", - "execution_count": 234, + "execution_count": 81, "metadata": {}, "outputs": [], "source": [ @@ -1776,7 +1687,7 @@ }, { "cell_type": "code", - "execution_count": 235, + "execution_count": 82, "metadata": {}, "outputs": [ { @@ -1870,7 +1781,7 @@ "5 WA mangroves 27.83000 173.620938 2020" ] }, - "execution_count": 235, + "execution_count": 82, "metadata": {}, "output_type": "execute_result" } @@ -1888,7 +1799,7 @@ }, { "cell_type": "code", - "execution_count": 236, + "execution_count": 83, "metadata": {}, "outputs": [], "source": [ @@ -1896,40 +1807,6 @@ "mangroves = pd.concat([mangroves, mangroves_regions])" ] }, - { - "cell_type": "code", - "execution_count": 237, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array(['ZAF', 'YEM', 'WSM', 'WLF', 'VUT', 'VNM', 'VIR', 'VGB', 'VEN',\n", - " 'VCT', 'USA', 'TZA', 'TWN', 'TUV', 'TTO', 'TON', 'TLS', 'THA',\n", - " 'TGO', 'TCA', 'SYC', 'SXM', 'SUR', 'STP', 'SOM', 'SLV', 'SLE',\n", - " 'SLB', 'SGP', 'SEN', 'SDN', 'SAU', 'QAT', 'PYF', 'PRI', 'PNG',\n", - " 'PLW', 'PHL', 'PER', 'PAN', 'PAK', 'OMN', 'NZL', 'NIC', 'NGA',\n", - " 'NCL', 'MYT', 'MYS', 'MUS', 'MTQ', 'MRT', 'MOZ', 'MMR', 'MHL',\n", - " 'MEX', 'MDV', 'MDG', 'MAF', 'LKA', 'LCA', 'LBR', 'KNA', 'KIR',\n", - " 'KHM', 'KEN', 'JPN', 'JAM', 'IRN', 'IND', 'IDN', 'HTI', 'HND',\n", - " 'GUY', 'GUM', 'GUF', 'GTM', 'GRD', 'GNQ', 'GNB', 'GMB', 'GLP',\n", - " 'GIN', 'GHA', 'GAB', 'FSM', 'FJI', 'ERI', 'EGY', 'ECU', 'DOM',\n", - " 'DMA', 'DJI', 'CYM', 'CUW', 'CUB', 'CRI', 'COM', 'COL', 'COK',\n", - " 'COG', 'COD', 'CMR', 'CIV', 'CHN', 'BRN', 'BRB', 'BRA', 'BMU',\n", - " 'BLZ', 'BHS', 'BHR', 'BGD', 'BES', 'BEN', 'AUS', 'ATG', 'ATF',\n", - " 'ASM', 'ARE', 'AIA', 'AGO', 'ABW', 'GLOB', 'AF', 'AS', 'NA', 'PO',\n", - " 'SA', 'WA'], dtype=object)" - ] - }, - "execution_count": 237, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "mangroves['location_id'].unique()" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -1939,7 +1816,7 @@ }, { "cell_type": "code", - "execution_count": 238, + "execution_count": 85, "metadata": {}, "outputs": [ { @@ -1974,40 +1851,40 @@ " \n", " 0\n", " ABNJ\n", - " 421.629373\n", - " 1874.982214\n", + " 421.629372679904\n", + " 1874.98221422617\n", " cold-water corals\n", " 2023\n", " \n", " \n", " 1\n", " AGO\n", - " 0.000000\n", - " 3.395671\n", + " 0\n", + " 3.39567053773998\n", " cold-water corals\n", " 2023\n", " \n", " \n", " 2\n", " ALB\n", - " 0.000000\n", - " 5.986479\n", + " 0\n", + " 5.98647948252716\n", " cold-water corals\n", " 2023\n", " \n", " \n", " 3\n", " ARG\n", - " 6.984226\n", - " 61.826344\n", + " 6.98422602063557\n", + " 61.8263440651753\n", " cold-water corals\n", " 2023\n", " \n", " \n", " 4\n", " ATG\n", - " 0.000000\n", - " 0.997747\n", + " 0\n", + " 0.997746538545076\n", " cold-water corals\n", " 2023\n", " \n", @@ -2022,7 +1899,7 @@ " \n", " 1\n", " AS\n", - " 21378.750000\n", + " 21378.75\n", " 74629.194446\n", " mangroves\n", " 2020\n", @@ -2030,7 +1907,7 @@ " \n", " 2\n", " NA\n", - " 2055.400000\n", + " 2055.4\n", " 2329.115505\n", " mangroves\n", " 2020\n", @@ -2038,7 +1915,7 @@ " \n", " 3\n", " PO\n", - " 6.720000\n", + " 6.72\n", " 6.723018\n", " mangroves\n", " 2020\n", @@ -2046,7 +1923,7 @@ " \n", " 4\n", " SA\n", - " 27811.533750\n", + " 27811.53375\n", " 40875.932666\n", " mangroves\n", " 2020\n", @@ -2054,7 +1931,7 @@ " \n", " 5\n", " WA\n", - " 27.830000\n", + " 27.83\n", " 173.620938\n", " mangroves\n", " 2020\n", @@ -2065,40 +1942,40 @@ "" ], "text/plain": [ - " location_id protected_area total_area habitat_name year\n", - "0 ABNJ 421.629373 1874.982214 cold-water corals 2023\n", - "1 AGO 0.000000 3.395671 cold-water corals 2023\n", - "2 ALB 0.000000 5.986479 cold-water corals 2023\n", - "3 ARG 6.984226 61.826344 cold-water corals 2023\n", - "4 ATG 0.000000 0.997747 cold-water corals 2023\n", - ".. ... ... ... ... ...\n", - "1 AS 21378.750000 74629.194446 mangroves 2020\n", - "2 NA 2055.400000 2329.115505 mangroves 2020\n", - "3 PO 6.720000 6.723018 mangroves 2020\n", - "4 SA 27811.533750 40875.932666 mangroves 2020\n", - "5 WA 27.830000 173.620938 mangroves 2020\n", + " location_id protected_area total_area habitat_name year\n", + "0 ABNJ 421.629372679904 1874.98221422617 cold-water corals 2023\n", + "1 AGO 0 3.39567053773998 cold-water corals 2023\n", + "2 ALB 0 5.98647948252716 cold-water corals 2023\n", + "3 ARG 6.98422602063557 61.8263440651753 cold-water corals 2023\n", + "4 ATG 0 0.997746538545076 cold-water corals 2023\n", + ".. ... ... ... ... ...\n", + "1 AS 21378.75 74629.194446 mangroves 2020\n", + "2 NA 2055.4 2329.115505 mangroves 2020\n", + "3 PO 6.72 6.723018 mangroves 2020\n", + "4 SA 27811.53375 40875.932666 mangroves 2020\n", + "5 WA 27.83 173.620938 mangroves 2020\n", "\n", "[628 rows x 5 columns]" ] }, - "execution_count": 238, + "execution_count": 85, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Concatenate the dataframes\n", - "habitats_all = pd.concat([habitats, seamounts_iso2, mangroves])\n", + "habitats_all = pd.concat([habitats, seamounts_all, mangroves])\n", "habitats_all" ] }, { "cell_type": "code", - "execution_count": 239, + "execution_count": 86, "metadata": {}, "outputs": [], "source": [ - "habitats_all.to_csv(path_out + \"habitats/habitats.csv\", index=False)" + "habitats_all.to_csv(path_out + \"tables/habitats2.csv\", index=False)" ] } ], diff --git a/data/notebooks/layers.ipynb b/data/notebooks/layers.ipynb index 27bbc7be..232b762e 100644 --- a/data/notebooks/layers.ipynb +++ b/data/notebooks/layers.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -12,7 +12,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -1164,6 +1164,124 @@ "source": [ "marine_areas2['location_type'].unique()" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Clean WDPA dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "ename": "KeyboardInterrupt", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m/Users/sofia/Documents/Repos/skytruth-30x30/data/notebooks/layers.ipynb Cell 42\u001b[0m line \u001b[0;36m2\n\u001b[1;32m 1\u001b[0m \u001b[39m# Read WDPA data\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m poly1 \u001b[39m=\u001b[39m gpd\u001b[39m.\u001b[39;49mread_file(path_in \u001b[39m+\u001b[39;49m \u001b[39m\"\u001b[39;49m\u001b[39m/WDPA_WDOECM_Sep2023_Public_marine_shp/WDPA_WDOECM_Sep2023_Public_marine_shp_0/WDPA_WDOECM_Sep2023_Public_marine_shp-polygons.shp\u001b[39;49m\u001b[39m\"\u001b[39;49m)\n\u001b[1;32m 3\u001b[0m point1 \u001b[39m=\u001b[39m gpd\u001b[39m.\u001b[39mread_file(path_in \u001b[39m+\u001b[39m \u001b[39m\"\u001b[39m\u001b[39m/WDPA_WDOECM_Sep2023_Public_marine_shp/WDPA_WDOECM_Sep2023_Public_marine_shp_0/WDPA_WDOECM_Sep2023_Public_marine_shp-points.shp\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 4\u001b[0m poly2 \u001b[39m=\u001b[39m gpd\u001b[39m.\u001b[39mread_file(path_in \u001b[39m+\u001b[39m \u001b[39m\"\u001b[39m\u001b[39m/WDPA_WDOECM_Sep2023_Public_marine_shp/WDPA_WDOECM_Sep2023_Public_marine_shp_1/WDPA_WDOECM_Sep2023_Public_marine_shp-polygons.shp\u001b[39m\u001b[39m\"\u001b[39m)\n", + "File \u001b[0;32m~/mambaforge/envs/skytruth/lib/python3.11/site-packages/geopandas/io/file.py:281\u001b[0m, in \u001b[0;36m_read_file\u001b[0;34m(filename, bbox, mask, rows, engine, **kwargs)\u001b[0m\n\u001b[1;32m 278\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 279\u001b[0m path_or_bytes \u001b[39m=\u001b[39m filename\n\u001b[0;32m--> 281\u001b[0m \u001b[39mreturn\u001b[39;00m _read_file_fiona(\n\u001b[1;32m 282\u001b[0m path_or_bytes, from_bytes, bbox\u001b[39m=\u001b[39;49mbbox, mask\u001b[39m=\u001b[39;49mmask, rows\u001b[39m=\u001b[39;49mrows, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs\n\u001b[1;32m 283\u001b[0m )\n\u001b[1;32m 285\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 286\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39munknown engine \u001b[39m\u001b[39m'\u001b[39m\u001b[39m{\u001b[39;00mengine\u001b[39m}\u001b[39;00m\u001b[39m'\u001b[39m\u001b[39m\"\u001b[39m)\n", + "File \u001b[0;32m~/mambaforge/envs/skytruth/lib/python3.11/site-packages/geopandas/io/file.py:379\u001b[0m, in \u001b[0;36m_read_file_fiona\u001b[0;34m(path_or_bytes, from_bytes, bbox, mask, rows, where, **kwargs)\u001b[0m\n\u001b[1;32m 375\u001b[0m df \u001b[39m=\u001b[39m pd\u001b[39m.\u001b[39mDataFrame(\n\u001b[1;32m 376\u001b[0m [record[\u001b[39m\"\u001b[39m\u001b[39mproperties\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39mfor\u001b[39;00m record \u001b[39min\u001b[39;00m f_filt], columns\u001b[39m=\u001b[39mcolumns\n\u001b[1;32m 377\u001b[0m )\n\u001b[1;32m 378\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m--> 379\u001b[0m df \u001b[39m=\u001b[39m GeoDataFrame\u001b[39m.\u001b[39;49mfrom_features(\n\u001b[1;32m 380\u001b[0m f_filt, crs\u001b[39m=\u001b[39;49mcrs, columns\u001b[39m=\u001b[39;49mcolumns \u001b[39m+\u001b[39;49m [\u001b[39m\"\u001b[39;49m\u001b[39mgeometry\u001b[39;49m\u001b[39m\"\u001b[39;49m]\n\u001b[1;32m 381\u001b[0m )\n\u001b[1;32m 382\u001b[0m \u001b[39mfor\u001b[39;00m k \u001b[39min\u001b[39;00m datetime_fields:\n\u001b[1;32m 383\u001b[0m as_dt \u001b[39m=\u001b[39m pd\u001b[39m.\u001b[39mto_datetime(df[k], errors\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mignore\u001b[39m\u001b[39m\"\u001b[39m)\n", + "File \u001b[0;32m~/mambaforge/envs/skytruth/lib/python3.11/site-packages/geopandas/geodataframe.py:635\u001b[0m, in \u001b[0;36mGeoDataFrame.from_features\u001b[0;34m(cls, features, crs, columns)\u001b[0m\n\u001b[1;32m 632\u001b[0m features_lst \u001b[39m=\u001b[39m features\n\u001b[1;32m 634\u001b[0m rows \u001b[39m=\u001b[39m []\n\u001b[0;32m--> 635\u001b[0m \u001b[39mfor\u001b[39;00m feature \u001b[39min\u001b[39;00m features_lst:\n\u001b[1;32m 636\u001b[0m \u001b[39m# load geometry\u001b[39;00m\n\u001b[1;32m 637\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mhasattr\u001b[39m(feature, \u001b[39m\"\u001b[39m\u001b[39m__geo_interface__\u001b[39m\u001b[39m\"\u001b[39m):\n\u001b[1;32m 638\u001b[0m feature \u001b[39m=\u001b[39m feature\u001b[39m.\u001b[39m__geo_interface__\n", + "File \u001b[0;32mfiona/ogrext.pyx:1739\u001b[0m, in \u001b[0;36mfiona.ogrext.Iterator.__next__\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32mfiona/ogrext.pyx:389\u001b[0m, in \u001b[0;36mfiona.ogrext.FeatureBuilder.build\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32mfiona/_geometry.pyx:193\u001b[0m, in \u001b[0;36mfiona._geometry.GeomBuilder.build_from_feature\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32mfiona/_geometry.pyx:249\u001b[0m, in \u001b[0;36mfiona._geometry.GeomBuilder.build\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32mfiona/_geometry.pyx:169\u001b[0m, in \u001b[0;36mfiona._geometry.GeomBuilder._buildMultiPolygon\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32mfiona/_geometry.pyx:152\u001b[0m, in \u001b[0;36mfiona._geometry.GeomBuilder._buildParts\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32mfiona/_geometry.pyx:243\u001b[0m, in \u001b[0;36mfiona._geometry.GeomBuilder.build\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32mfiona/_geometry.pyx:157\u001b[0m, in \u001b[0;36mfiona._geometry.GeomBuilder._buildPolygon\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32mfiona/_geometry.pyx:152\u001b[0m, in \u001b[0;36mfiona._geometry.GeomBuilder._buildParts\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32mfiona/_geometry.pyx:259\u001b[0m, in \u001b[0;36mfiona._geometry.GeomBuilder.build\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32m~/mambaforge/envs/skytruth/lib/python3.11/site-packages/fiona/model.py:201\u001b[0m, in \u001b[0;36mGeometry.from_dict\u001b[0;34m(cls, ob, **kwargs)\u001b[0m\n\u001b[1;32m 196\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_delegate \u001b[39m=\u001b[39m _Geometry(\n\u001b[1;32m 197\u001b[0m coordinates\u001b[39m=\u001b[39mcoordinates, \u001b[39mtype\u001b[39m\u001b[39m=\u001b[39m\u001b[39mtype\u001b[39m, geometries\u001b[39m=\u001b[39mgeometries\n\u001b[1;32m 198\u001b[0m )\n\u001b[1;32m 199\u001b[0m \u001b[39msuper\u001b[39m()\u001b[39m.\u001b[39m\u001b[39m__init__\u001b[39m(\u001b[39m*\u001b[39m\u001b[39m*\u001b[39mdata)\n\u001b[0;32m--> 201\u001b[0m \u001b[39m@classmethod\u001b[39m\n\u001b[1;32m 202\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mfrom_dict\u001b[39m(\u001b[39mcls\u001b[39m, ob\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs):\n\u001b[1;32m 203\u001b[0m \u001b[39mif\u001b[39;00m ob \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m 204\u001b[0m data \u001b[39m=\u001b[39m \u001b[39mdict\u001b[39m(\u001b[39mgetattr\u001b[39m(ob, \u001b[39m\"\u001b[39m\u001b[39m__geo_interface__\u001b[39m\u001b[39m\"\u001b[39m, ob))\n", + "\u001b[0;31mKeyboardInterrupt\u001b[0m: " + ] + } + ], + "source": [ + "# Read WDPA data\n", + "poly1 = gpd.read_file(path_in + \"/WDPA_WDOECM_Sep2023_Public_marine_shp/WDPA_WDOECM_Sep2023_Public_marine_shp_0/WDPA_WDOECM_Sep2023_Public_marine_shp-polygons.shp\")\n", + "point1 = gpd.read_file(path_in + \"/WDPA_WDOECM_Sep2023_Public_marine_shp/WDPA_WDOECM_Sep2023_Public_marine_shp_0/WDPA_WDOECM_Sep2023_Public_marine_shp-points.shp\")\n", + "poly2 = gpd.read_file(path_in + \"/WDPA_WDOECM_Sep2023_Public_marine_shp/WDPA_WDOECM_Sep2023_Public_marine_shp_1/WDPA_WDOECM_Sep2023_Public_marine_shp-polygons.shp\")\n", + "point2 = gpd.read_file(path_in + \"/WDPA_WDOECM_Sep2023_Public_marine_shp/WDPA_WDOECM_Sep2023_Public_marine_shp_1/WDPA_WDOECM_Sep2023_Public_marine_shp-points.shp\")\n", + "poly3 = gpd.read_file(path_in + \"/WDPA_WDOECM_Sep2023_Public_marine_shp/WDPA_WDOECM_Sep2023_Public_marine_shp_2/WDPA_WDOECM_Sep2023_Public_marine_shp-polygons.shp\")\n", + "point3 = gpd.read_file(path_in + \"/WDPA_WDOECM_Sep2023_Public_marine_shp/WDPA_WDOECM_Sep2023_Public_marine_shp_2/WDPA_WDOECM_Sep2023_Public_marine_shp-points.shp\")\n", + "dataframes = [poly1, point1, poly2, point2, poly3, point3]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Convert points to polygons and merge all wdpa in one dataset**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "18613" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Calculate radius based on REP_AREA\n", + "def calculate_radius(rep_area):\n", + " return (rep_area / 3.14159265358979323846) ** 0.5\n", + "\n", + "# Iterate through the list and process the desired dataframes\n", + "for idx in [1, 3, 5]:\n", + " # Get the dataframe at the specified index\n", + " gdf = dataframes[idx]\n", + "\n", + " # Reproject in Mollweide\n", + " gdf = gdf.to_crs('ESRI:54009')\n", + "\n", + " # Transform the reported area from square kilometers to square meters\n", + " gdf['REP_AREA_m'] = gdf['REP_AREA'] * 1000000\n", + "\n", + " # Create the \"radius\" column by applying the calculate_radius function to the \"REP_AREA\" column\n", + " gdf['radius'] = gdf['REP_AREA_m'].apply(calculate_radius)\n", + "\n", + " # Create buffers around the points using the \"radius\" column\n", + " gdf_buffered = gdf.copy()\n", + " gdf_buffered['geometry'] = gdf.apply(lambda row: row.geometry.buffer(row['radius']), axis=1)\n", + "\n", + " # Reproject back to WGS84\n", + " gdf_buffered = gdf_buffered.to_crs('EPSG:4326')\n", + "\n", + " # Remove rows with invalid geometries\n", + " gdf_buffered = gdf_buffered[gdf_buffered['geometry'].is_valid]\n", + " \n", + " # Update the original dataframe with the buffered data\n", + " dataframes[idx] = gdf_buffered\n", + "\n", + "# Merge all dataframes\n", + "merged_mpa_all = pd.concat(dataframes)\n", + "len(merged_mpa_all)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Save the wdpa dataframe as a shapefile\n", + "merged_mpa_all.to_file(path_out + \"/wdpa/merged_wdpa_all.shp\")" + ] } ], "metadata": { diff --git a/data/notebooks/mpas_table.ipynb b/data/notebooks/mpas_table.ipynb new file mode 100644 index 00000000..b846ab4c --- /dev/null +++ b/data/notebooks/mpas_table.ipynb @@ -0,0 +1,143 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Set up" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import geopandas as gpd\n", + "import pandas as pd\n", + "from datetime import datetime" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "path_in = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/raw\"\n", + "path_out = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/processed\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Read relevant datasets: MPAtlas, WDPA, and ProtectedSeas" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# Read mpatlas data\n", + "mpatlas = gpd.read_file(path_out + \"/mpatlas/mpatlas_assess_zone_cleaned.geojson\")\n", + "mpatlas = mpatlas.drop_duplicates(subset=['wdpa_id', 'designation','location_id','establishment_stage', 'protection_level','year'], keep='first')" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "ps = gpd.read_file(path_out + \"/protectedseas/protectedseas.shp\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "wdpa = gpd.read_file(path_out + \"/wdpa/merged_wdpa_all.shp\")\n", + "wdpa = wdpa[['WDPA_PID', 'NAME','PA_DEF', 'GIS_M_AREA','PARENT_ISO']].rename(columns={'WDPA_PID': 'wdpa_id', 'NAME': 'name', 'PA_DEF':'protection_type', 'GIS_M_AREA': 'area', 'PARENT_ISO': 'location_id'})\n", + "wdpa['protection_type'] = wdpa['protection_type'].astype(int).replace({1: 'mpa', 0: 'oecm'})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Combine information from different tables" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# Add protected_level info from mpatlas and protectedseas to wdpa df\n", + "table_prot = wdpa.merge(mpatlas[['wdpa_id','area_km2','protection_level']], on='wdpa_id', how='left').rename(columns={'area_km2':'area_mpatlas','protection_level': 'mpatlas_prot_lvl'})\n", + "table_prot = table_prot.merge(ps[['wdpa_id','FPS_cat', 'total_area']], on='wdpa_id', how='left').rename(columns={'FPS_cat': 'fpl', 'total_area': 'area_ps'})" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "table_prot['area'] = table_prot['area_mpatlas'].combine_first(table_prot['area_ps']).combine_first(table_prot['area'])\n", + "table_prot = table_prot.drop(columns=['area_mpatlas', 'area_ps'])\n", + "table_prot = table_prot.drop(columns={'name', 'protection_type'})" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# Add establishment info to wdpa df\n", + "table_est = wdpa.merge(mpatlas[['wdpa_id','establishment_stage', 'year']], on='wdpa_id', how='left')\n", + "table_est = table_est.drop(columns={'area', 'location_id'})" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "# Save tables as csv\n", + "table_prot.to_csv(path_out + \"/tables/mpas_table.csv\", index=False)\n", + "table_est.to_csv(path_out + \"/tables/mpas_table_establishment.csv\", index=False)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "skytruth", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/data/notebooks/mpatlas_stats.ipynb b/data/notebooks/mpatlas_stats.ipynb new file mode 100644 index 00000000..afdf6408 --- /dev/null +++ b/data/notebooks/mpatlas_stats.ipynb @@ -0,0 +1,349 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Set up" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "import geopandas as gpd\n", + "import pandas as pd\n", + "from datetime import datetime" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "path_in = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/raw\"\n", + "path_out = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/processed\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Read and prepare data" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [], + "source": [ + "# Read data from MPAtlas\n", + "mpatlas = gpd.read_file(path_in + \"/mpatlas_assess_zone.geojson\")" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [], + "source": [ + "# Fill missing wdpa_pid with the wdpa_id\n", + "mpatlas['wdpa_pid'] = mpatlas['wdpa_pid'].fillna(mpatlas['wdpa_id'])" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [], + "source": [ + "# Create new column with protection level reclassified\n", + "def map_protection_level(value):\n", + " if value in [\"full\", \"high\"]:\n", + " return \"fully or highly protected\"\n", + " else:\n", + " return \"less protected or unknown\"\n", + "\n", + "# Create a new column based on column1\n", + "mpatlas['protection_level'] = mpatlas['protection_mpaguide_level'].apply(map_protection_level)" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [], + "source": [ + "# replace proposed/committed with proposed or committed\n", + "mpatlas['establishment_stage'] = mpatlas['establishment_stage'].replace(['proposed/committed'], 'proposed or committed')" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [], + "source": [ + "# Take only year from 'proposed_date', 'designated_date', 'implemented_date'\n", + "mpatlas['proposed_date'] = mpatlas['proposed_date'].str[:4].astype('Int64')\n", + "mpatlas['designated_date'] = mpatlas['designated_date'].str[:4].astype('Int64')\n", + "mpatlas['implemented_date'] = mpatlas['implemented_date'].str[:4].astype('Int64')\n", + "\n", + "# Create column 'year' with the most recent year from 'proposed_date', 'designated_date', 'implemented_date'\n", + "mpatlas['year'] = mpatlas[['proposed_date', 'designated_date', 'implemented_date']].max(axis=1)\n", + "\n", + "# Convert year to int to be able to save it later (Int64 not allowed)\n", + "mpatlas['year'].fillna(0, inplace=True)\n", + "mpatlas['year'] = mpatlas['year'].astype(int)\n", + "mpatlas['year'] = mpatlas['year'].replace(0, pd.NaT)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [], + "source": [ + "# Calculate area in km2\n", + "mpatlas.to_crs('ESRI:54009', inplace=True)\n", + "mpatlas['area_km2'] = mpatlas['geometry'].area / 10**6\n", + "mpatlas.to_crs('EPSG:4326', inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [], + "source": [ + "# Keep relevant columns \n", + "mpatlas2 = mpatlas[['wdpa_pid', 'name', 'designation', 'sovereign', 'area_km2', 'establishment_stage', 'protection_level', 'year', 'geometry']].rename(columns={'sovereign': 'location_id', 'wdpa_pid': 'wdpa_id'})\n", + "\n", + "# Save as geojson (to keep full names)\n", + "mpatlas2.to_file(path_out + \"/mpatlas/mpatlas_assess_zone_cleaned.geojson\", driver='GeoJSON')" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [], + "source": [ + "# For those with multiple countries, split them\n", + "mpatlas_iso = mpatlas2.copy()\n", + "mpatlas_iso['location_id'] = mpatlas_iso['location_id'].str.split(';')\n", + "mpatlas_iso = mpatlas_iso.explode('location_id')\n", + "mpatlas_iso['location_id'] = mpatlas_iso['location_id'].str.split(':')\n", + "mpatlas_iso = mpatlas_iso.explode('location_id')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Global stats" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [], + "source": [ + "# Calculate global area per protection level\n", + "prot_global = mpatlas2.groupby('protection_level').agg({'area_km2': 'sum'}).reset_index().rename(columns={'area_km2': 'area'})\n", + "prot_global['location_id'] = 'GLOB'\n", + "prot_global['last_updated'] = datetime.now().year" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [], + "source": [ + "# Calculate global area per establishment stage\n", + "stage_global = mpatlas2.groupby(['establishment_stage']).agg({'area_km2': 'sum'}).reset_index().rename(columns={'area_km2': 'area'})\n", + "stage_global['location_id'] = 'GLOB'\n", + "stage_global['last_updated'] = datetime.now().year" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Country stats" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [], + "source": [ + "prot_iso = mpatlas_iso.groupby(['location_id', 'protection_level']).agg({'area_km2': 'sum'}).reset_index().rename(columns={'area_km2': 'area'})\n", + "prot_iso['last_updated'] = datetime.now().year" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [], + "source": [ + "stage_iso = mpatlas_iso.groupby(['location_id', 'establishment_stage']).agg({'area_km2': 'sum'}).reset_index().rename(columns={'area_km2': 'area'})\n", + "stage_iso['last_updated'] = datetime.now().year" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Region stats" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [], + "source": [ + "# List of dictionaries for data in Region_ISO3_PP.txt (list of regions used in the Protected Planet database)\n", + "regions_data = [\n", + " {\n", + " 'region_iso': 'AS',\n", + " 'region_name': 'Asia & Pacific',\n", + " 'country_iso_3s': [\n", + " \"AFG\", \"ASM\", \"AUS\", \"BGD\", \"BRN\", \"BTN\", \"CCK\", \"CHN\", \"COK\", \"CXR\", \"FJI\", \"FSM\", \"GUM\", \"HKG\", \"IDN\",\n", + " \"IND\", \"IOT\", \"IRN\", \"JPN\", \"KHM\", \"KIR\", \"KOR\", \"LAO\", \"LKA\", \"MAC\", \"MDV\", \"MHL\", \"MMR\", \"MNG\", \"MNP\",\n", + " \"MYS\", \"NCL\", \"NFK\", \"NIU\", \"NPL\", \"NRU\", \"NZL\", \"PAK\", \"PCN\", \"PHL\", \"PLW\", \"PNG\", \"PRK\", \"PYF\", \"SGP\",\n", + " \"SLB\", \"THA\", \"TKL\", \"TLS\", \"TON\", \"TUV\", \"TWN\", \"VNM\", \"VUT\", \"WLF\", \"WSM\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'AF',\n", + " 'region_name': 'Africa',\n", + " 'country_iso_3s': [\n", + " \"AGO\", \"BDI\", \"BEN\", \"BFA\", \"BWA\", \"CAF\", \"CIV\", \"CMR\", \"COD\", \"COG\", \"COM\", \"CPV\", \"DJI\", \"DZA\", \"EGY\",\n", + " \"ERI\", \"ESH\", \"ETH\", \"GAB\", \"GHA\", \"GIN\", \"GMB\", \"GNB\", \"GNQ\", \"KEN\", \"LBR\", \"LBY\", \"LSO\", \"MAR\", \"MDG\",\n", + " \"MLI\", \"MOZ\", \"MRT\", \"MUS\", \"MWI\", \"MYT\", \"NAM\", \"NER\", \"NGA\", \"REU\", \"RWA\", \"SDN\", \"SEN\", \"SHN\", \"SLE\",\n", + " \"SOM\", \"SSD\", \"STP\", \"SWZ\", \"SYC\", \"TCD\", \"TGO\", \"TUN\", \"TZA\", \"UGA\", \"ZAF\", \"ZMB\", \"ZWE\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'EU',\n", + " 'region_name': 'Europe',\n", + " 'country_iso_3s': [\n", + " \"ALA\", \"ALB\", \"AND\", \"ARM\", \"AUT\", \"AZE\", \"BEL\", \"BGR\", \"BIH\", \"BLR\", \"CHE\", \"CYP\", \"CZE\", \"DEU\", \"DNK\",\n", + " \"ESP\", \"EST\", \"FIN\", \"FRA\", \"FRO\", \"GBR\", \"GEO\", \"GGY\", \"GIB\", \"GRC\", \"HRV\", \"HUN\", \"IMN\", \"IRL\", \"ISL\",\n", + " \"ISR\", \"ITA\", \"JEY\", \"KAZ\", \"KGZ\", \"LIE\", \"LTU\", \"LUX\", \"LVA\", \"MCO\", \"MDA\", \"MKD\", \"MLT\", \"MNE\", \"NLD\",\n", + " \"NOR\", \"POL\", \"PRT\", \"ROU\", \"RUS\", \"SJM\", \"SMR\", \"SRB\", \"SVK\", \"SVN\", \"SWE\", \"TJK\", \"TKM\", \"TUR\", \"UKR\",\n", + " \"UZB\", \"VAT\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'SA',\n", + " 'region_name': 'Latin America & Caribbean',\n", + " 'country_iso_3s': [\n", + " \"ABW\", \"AIA\", \"ARG\", \"ATG\", \"BES\", \"BHS\", \"BLM\", \"BLZ\", \"BMU\", \"BOL\", \"BRA\", \"BRB\", \"CHL\", \"COL\", \"CRI\",\n", + " \"CUB\", \"CUW\", \"CYM\", \"DMA\", \"DOM\", \"ECU\", \"FLK\", \"GLP\", \"GRD\", \"GTM\", \"GUF\", \"GUY\", \"HND\", \"HTI\", \"JAM\",\n", + " \"KNA\", \"LCA\", \"MAF\", \"MEX\", \"MSR\", \"MTQ\", \"NIC\", \"PAN\", \"PER\", \"PRI\", \"PRY\", \"SLV\", \"SUR\", \"SXM\", \"TCA\",\n", + " \"TTO\", \"UMI\", \"URY\", \"VCT\", \"VEN\", \"VGB\", \"VIR\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'PO',\n", + " 'region_name': 'Polar',\n", + " 'country_iso_3s': [\n", + " \"ATF\", \"BVT\", \"GRL\", \"HMD\", \"SGS\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'NA',\n", + " 'region_name': 'North America',\n", + " 'country_iso_3s': [\n", + " \"CAN\", \"SPM\", \"USA\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'WA',\n", + " 'region_name': 'West Asia',\n", + " 'country_iso_3s': [\n", + " \"ARE\", \"BHR\", \"IRQ\", \"JOR\", \"KWT\", \"LBN\", \"OMN\", \"PSE\", \"QAT\", \"SAU\", \"SYR\", \"YEM\"\n", + " ]\n", + " }\n", + "]\n", + "\n", + "# Convert the region data to a dictionary that maps each country to its region name\n", + "country_to_region = {}\n", + "for region in regions_data:\n", + " for country in region['country_iso_3s']:\n", + " country_to_region[country] = region['region_iso']\n", + "\n", + "# Add region column to mpatlas_iso\n", + "mpatlas_iso['regions'] = mpatlas_iso['location_id'].map(country_to_region)" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [], + "source": [ + "# Calculate area per protection level per region\n", + "prot_region = mpatlas_iso.groupby(['regions', 'protection_level']).agg({'area_km2': 'sum'}).reset_index().rename(columns={'area_km2': 'area', 'regions': 'location_id'})\n", + "prot_region['last_updated'] = datetime.now().year" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [], + "source": [ + "# Calculate area per establishment stage per region\n", + "stage_region = mpatlas_iso.groupby(['regions', 'establishment_stage']).agg({'area_km2': 'sum'}).reset_index().rename(columns={'area_km2': 'area', 'regions': 'location_id'})\n", + "stage_region['last_updated'] = datetime.now().year" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [], + "source": [ + "# Concatenate all dataframes for protection stats and establishment stage stats\n", + "prot = pd.concat([prot_iso, prot_global, prot_region], ignore_index=True)\n", + "stage = pd.concat([stage_iso, stage_global, stage_region], ignore_index=True)\n", + "prot.to_csv(path_out + \"/tables/mpatlas_protection_level.csv\", index=False)\n", + "stage.to_csv(path_out + \"/tables/mpatlas_establishment_stage.csv\", index=False)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "skytruth", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/data/notebooks/protectedseas.ipynb b/data/notebooks/protectedseas.ipynb index e5a3d879..54ae3293 100644 --- a/data/notebooks/protectedseas.ipynb +++ b/data/notebooks/protectedseas.ipynb @@ -9,7 +9,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -19,7 +19,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -36,348 +36,63 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 5, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
SITE_IDSITE_NAMEgeometry
0AIAG1Antigua & Barbuda EEZ (0-200NM)POLYGON ((-58.87762 19.58008, -58.82167 19.502...
1AIAG10Low Bay SanctuaryPOLYGON ((-61.91090 17.57960, -61.91096 17.579...
\n", - "
" - ], - "text/plain": [ - " SITE_ID SITE_NAME \\\n", - "0 AIAG1 Antigua & Barbuda EEZ (0-200NM) \n", - "1 AIAG10 Low Bay Sanctuary \n", - "\n", - " geometry \n", - "0 POLYGON ((-58.87762 19.58008, -58.82167 19.502... \n", - "1 POLYGON ((-61.91090 17.57960, -61.91096 17.579... " - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# Import shp containing geometries\n", - "ps = gpd.read_file(path_in + \"ProtectedSeas/ProtectedSeas_ProtectedSeas_06142023_shp_ProtectedSeas_06142023_shp.shp\")\n", - "ps.head(2)" + "ps = gpd.read_file(path_in + \"ProtectedSeas/ProtectedSeas_ProtectedSeas_06142023_shp_ProtectedSeas_06142023_shp.shp\")" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 6, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Index(['site_id', 'site_name', 'url', 'country', 'state', 'managing_authority',\n", - " 'designation', 'purpose', 'restrictions', 'allowed', 'season',\n", - " 'effective_from', 'effective_to', 'report_violations', 'latest_updates',\n", - " 'protection_focus', 'species_of_concern',\n", - " 'removal_of_marine_life_is_prohibited', 'entry_prohibited',\n", - " 'speed_restricted', 'discharge_prohibited', 'diving_prohibited',\n", - " 'removal_of_historic_artifacts_prohibited', 'stopping_prohibited',\n", - " 'anchoring_prohibited', 'landing_prohibited', 'dragging_prohibited',\n", - " 'dredging_prohibited', 'industrial_or_mineral_exploration_prohibited',\n", - " 'construction_prohibited', 'drilling_prohibited',\n", - " 'overflight_or_drones_prohibited', 'tribal', 'bottom_trawl',\n", - " 'gillnetting', 'hook_n_line', 'trolling', 'nets', 'traps_n_pots',\n", - " 'spear_fishing', 'longlining', 'misc_gear', 'recreational_restrictions',\n", - " 'commercial_restrictions', 'wdpa_id', 'iucn_cat', 'year_est',\n", - " 'gov_level', 'inshore_only', 'coastline_match', 'boundary_source',\n", - " 'modification_level', 'total_area'],\n", - " dtype='object')" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# Import csv containing information\n", - "protectedseas = pd.read_csv(path_in + \"ProtectedSeas/ProtectedSeas_ProtectedSeas_06142023.csv\")\n", - "protectedseas.columns" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(21197, 53)" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "protectedseas.shape" + "protectedseas = pd.read_csv(path_in + \"ProtectedSeas/ProtectedSeas_ProtectedSeas_06142023.csv\")" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(10879, 53)" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# Keep only rows in which wdpa_id is not null and it's different than 0\n", "protectedseas = protectedseas[protectedseas['wdpa_id'].notna()]\n", - "protectedseas = protectedseas[protectedseas['wdpa_id']!= '0']\n", - "protectedseas.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(10879, 56)" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Join csv with shapefile and keep only wdpa geometries\n", - "ps_gdf = ps.merge(protectedseas, how='inner', left_on='SITE_ID', right_on='site_id')\n", - "ps_gdf.shape" + "protectedseas = protectedseas[protectedseas['wdpa_id']!= '0']" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
site_idsite_namecountrywdpa_idFPStotal_areageometry
0AIAG10Low Bay SanctuaryAntigua and Barbuda5555871975.048.321285POLYGON ((-61.91090 17.57960, -61.91096 17.579...
1AIAG11Nelson's Dockyard National ParkAntigua and Barbuda5555871921.040.705369POLYGON ((-61.75807 17.03541, -61.73745 17.021...
2AIAG13Palastar Reef SanctuaryAntigua and Barbuda5555871955.022.754514POLYGON ((-61.73350 17.52440, -61.78360 17.497...
3AIAG14Palaster Reef Marine National ParkAntigua and Barbuda21.03.207554POLYGON ((-61.74275 17.51737, -61.77440 17.523...
4AIAG16Two Foot Bay SanctuaryAntigua and Barbuda5555871985.047.933200POLYGON ((-61.70064 17.66752, -61.70039 17.667...
\n", - "
" - ], - "text/plain": [ - " site_id site_name country wdpa_id \\\n", - "0 AIAG10 Low Bay Sanctuary Antigua and Barbuda 555587197 \n", - "1 AIAG11 Nelson's Dockyard National Park Antigua and Barbuda 555587192 \n", - "2 AIAG13 Palastar Reef Sanctuary Antigua and Barbuda 555587195 \n", - "3 AIAG14 Palaster Reef Marine National Park Antigua and Barbuda 2 \n", - "4 AIAG16 Two Foot Bay Sanctuary Antigua and Barbuda 555587198 \n", - "\n", - " FPS total_area geometry \n", - "0 5.0 48.321285 POLYGON ((-61.91090 17.57960, -61.91096 17.579... \n", - "1 1.0 40.705369 POLYGON ((-61.75807 17.03541, -61.73745 17.021... \n", - "2 5.0 22.754514 POLYGON ((-61.73350 17.52440, -61.78360 17.497... \n", - "3 1.0 3.207554 POLYGON ((-61.74275 17.51737, -61.77440 17.523... \n", - "4 5.0 47.933200 POLYGON ((-61.70064 17.66752, -61.70039 17.667... " - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "ps_gdf = ps_gdf[['site_id','site_name', 'country', 'wdpa_id', 'removal_of_marine_life_is_prohibited','total_area','geometry']]\n", - "ps_gdf = ps_gdf.rename(columns={'removal_of_marine_life_is_prohibited':'FPS'})\n", - "ps_gdf.head(5)" + "# Join csv with shapefile and keep only wdpa geometries\n", + "ps_gdf = ps.merge(protectedseas, how='inner', left_on='SITE_ID', right_on='site_id')" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array(['Antigua and Barbuda', 'USA', 'Albania', 'Netherlands Antilles',\n", - " 'United Arab Emirates', 'Argentina', 'France', 'Australia',\n", - " 'Barbados', 'Belgium', 'Bangladesh', 'Bulgaria', 'Belize',\n", - " 'Brazil', 'Bahamas', 'British Virgin Islands', 'Canada', 'Chile',\n", - " 'Cameroon', 'Colombia', 'Comoros', 'Costa Rica', 'Cuba', 'Cyprus',\n", - " 'Germany', 'Djbouti', 'Dominica', 'Denmark', 'Dominican Republic',\n", - " 'Algeria', 'Ecuador', 'Egypt', 'Spain', 'Estonia', 'Finland',\n", - " 'France, Italy, Monaco', 'French Antilles', nan, 'Gabon',\n", - " 'United Kingdom', 'Grenada', 'Ghana', 'Gibraltar', 'Guinea',\n", - " 'The Gambia', 'Guinea Bissau', 'Greece', 'Guatemala',\n", - " 'French Guyana', 'Honduras', 'Croatia', 'Indonesia', 'Indonesia ',\n", - " 'India', 'Ireland', 'Iceland', 'Israel', 'Italy', 'Jamaica',\n", - " 'Jordan', 'Japan', 'Kenya', 'Cambodia', 'South Korea',\n", - " 'Cayman Islands', 'Lebanon', 'Liberia', 'Saint Lucia', 'Sri Lanka',\n", - " 'Lithuania', 'Latvia', 'Morocco', 'Monaco', 'Madagascar',\n", - " 'Republic of Maldives', 'Malta', 'Myanmar', 'Mozambique',\n", - " 'Mauritania', 'Malaysia', 'Namibia', 'New Caledonia', 'Niue',\n", - " 'The Netherlands', 'Netherlands', 'Norway', 'New Zealand',\n", - " 'Panama', 'British Overseas Territory - Pitcairn', 'Peru',\n", - " 'Philippines', 'Republic of Palau', 'Poland', 'Portugal', 'Qatar',\n", - " 'Russia', 'Senegal',\n", - " 'Saint Helena, Ascension and Tristan da Cunha Overseas Teritory of the United Kingdom of Great Britain and Northern Ireland',\n", - " 'Solomon Islands', 'El Salvador', 'São Tomé and Príncipe',\n", - " 'Suriname', 'Slovenia', 'Sweden', 'Seychelles',\n", - " 'Turks and Caicos Islands', 'Thailand', 'East Timor', 'Tonga',\n", - " 'Trinidad and Tobago', 'Tunisia', 'Tanzania', 'Uruguay',\n", - " 'Saint Vincent and the Grenadines', 'Vietnam', 'Yemen',\n", - " 'South Africa', 'USA; Haiti; Jamaica'], dtype=object)" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "# Save txt file with unique names for countries\n", - "countries = ps_gdf['country'].unique()\n", - "countries" + "# Keep only columns of interest\n", + "ps_gdf = ps_gdf[['site_id','site_name', 'country', 'wdpa_id', 'removal_of_marine_life_is_prohibited','total_area','geometry']]\n", + "ps_gdf = ps_gdf.rename(columns={'removal_of_marine_life_is_prohibited':'FPS'})" ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ + "# ProtectedSeas only provides country names, not country codes. We need to add country codes to the dataframe\n", "country_iso_dict = {\n", " 'Antigua and Barbuda': 'ATG',\n", " 'USA': 'USA',\n", @@ -503,10 +218,11 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ + "# Add country code to the dataframe\n", "def get_parent_iso(country):\n", " return country_iso_dict.get(country, None)\n", "\n", @@ -516,27 +232,17 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 12, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(10879, 8)" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "ps_gdf.shape" + "# There is one row with no parent_iso so let's give it the corresponding country code\n", + "ps_gdf.loc[ps_gdf['parent_iso'].isna(), 'parent_iso'] = 'FRA'" ] }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -568,184 +274,110 @@ " total_area\n", " geometry\n", " parent_iso\n", + " FPS_cat\n", " \n", " \n", " \n", " \n", - " 3372\n", - " AIFRCAR91\n", - " pointe Pasquereau - pointe Saint-Vaast\n", - " NaN\n", - " 392107\n", - " 2.0\n", - " 1.642913\n", - " MULTIPOLYGON (((-61.59887 16.27728, -61.59861 ...\n", - " None\n", + " 0\n", + " AIAG10\n", + " Low Bay Sanctuary\n", + " Antigua and Barbuda\n", + " 555587197\n", + " 5.0\n", + " 48.321285\n", + " POLYGON ((-61.91090 17.57960, -61.91096 17.579...\n", + " ATG\n", + " highly\n", + " \n", + " \n", + " 1\n", + " AIAG11\n", + " Nelson's Dockyard National Park\n", + " Antigua and Barbuda\n", + " 555587192\n", + " 1.0\n", + " 40.705369\n", + " POLYGON ((-61.75807 17.03541, -61.73745 17.021...\n", + " ATG\n", + " less\n", " \n", " \n", "\n", "" ], "text/plain": [ - " site_id site_name country wdpa_id FPS \\\n", - "3372 AIFRCAR91 pointe Pasquereau - pointe Saint-Vaast NaN 392107 2.0 \n", + " site_id site_name country wdpa_id \\\n", + "0 AIAG10 Low Bay Sanctuary Antigua and Barbuda 555587197 \n", + "1 AIAG11 Nelson's Dockyard National Park Antigua and Barbuda 555587192 \n", + "\n", + " FPS total_area geometry \\\n", + "0 5.0 48.321285 POLYGON ((-61.91090 17.57960, -61.91096 17.579... \n", + "1 1.0 40.705369 POLYGON ((-61.75807 17.03541, -61.73745 17.021... \n", "\n", - " total_area geometry parent_iso \n", - "3372 1.642913 MULTIPOLYGON (((-61.59887 16.27728, -61.59861 ... None " + " parent_iso FPS_cat \n", + "0 ATG highly \n", + "1 ATG less " ] }, - "execution_count": 21, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "ps_gdf[ps_gdf['parent_iso'].isna()]" + "# Reclassify FPS values\n", + "fps_classes = {\n", + " 1: 'less',\n", + " 2: 'less',\n", + " 3: 'moderately',\n", + " 4: 'highly',\n", + " 5: 'highly'\n", + "}\n", + "\n", + "# Create a new column 'FPS_cat' based on the mapping\n", + "ps_gdf['FPS_cat'] = ps_gdf['FPS'].apply(lambda x: fps_classes.get(x, None))\n", + "ps_gdf.head(2)\n" ] }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 14, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/var/folders/98/0pdnjc5s29x2pnzl293pw7hr0000gn/T/ipykernel_2034/3237173437.py:1: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame\n", - "\n", - "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", - " ps_gdf['parent_iso'][ps_gdf['parent_iso'].isna()] = 'FRA'\n" - ] - } - ], + "outputs": [], "source": [ - "ps_gdf['parent_iso'][ps_gdf['parent_iso'].isna()] = 'FRA'" + "ps_gdf.to_file(path_out + \"protectedseas/protectedseas.shp\")" ] }, { - "cell_type": "code", - "execution_count": 23, + "cell_type": "markdown", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], "source": [ - "len(ps_gdf['country'][ps_gdf['parent_iso'].isna()])" + "### Global stats" ] }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "global_area = ps_gdf.groupby(['FPS_cat'], as_index=False)['total_area'].sum().rename(columns={'FPS_cat':'fishing_protection_level', 'total_area':'area'})\n", + "global_area['location_id'] = 'GLOB'" + ] + }, + { + "cell_type": "markdown", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
site_idsite_namecountrywdpa_idFPStotal_areageometryparent_iso
3246AIFRA3Pelagos / Pelagos Sanctuary For The Conservati...France, Italy, Monaco3650151.087830.341648POLYGON ((8.76729 44.42728, 8.77066 44.42690, ...FRA;ITA;MCO
10598NWR184Navassa Island National Wildlife RefugeUSA; Haiti; Jamaica5556081205.01473.656765POLYGON ((-75.00384 18.20744, -75.01645 18.207...USA;HTI;JAM
\n", - "
" - ], - "text/plain": [ - " site_id site_name \\\n", - "3246 AIFRA3 Pelagos / Pelagos Sanctuary For The Conservati... \n", - "10598 NWR184 Navassa Island National Wildlife Refuge \n", - "\n", - " country wdpa_id FPS total_area \\\n", - "3246 France, Italy, Monaco 365015 1.0 87830.341648 \n", - "10598 USA; Haiti; Jamaica 555608120 5.0 1473.656765 \n", - "\n", - " geometry parent_iso \n", - "3246 POLYGON ((8.76729 44.42728, 8.77066 44.42690, ... FRA;ITA;MCO \n", - "10598 POLYGON ((-75.00384 18.20744, -75.01645 18.207... USA;HTI;JAM " - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" - } - ], "source": [ - "ps_gdf[ps_gdf['parent_iso'].str.contains(';')]" + "### Country stats" ] }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 17, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(10883, 8)" - ] - }, - "execution_count": 25, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# Create a mask for rows with multiple values in 'iso_code'\n", "mask = ps_gdf['parent_iso'].str.contains(';', na=False)\n", @@ -759,942 +391,130 @@ "single_value_rows = ps_gdf[~mask]\n", "\n", "# Concatenate the exploded rows with the single value rows\n", - "ps_new = pd.concat([single_value_rows, split_rows], ignore_index=True)\n", + "ps_iso = pd.concat([single_value_rows, split_rows], ignore_index=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "# List of dictionaries for data in Region_ISO3_PP.txt (list of regions used in the Protected Planet database)\n", + "regions_data = [\n", + " {\n", + " 'region_iso': 'AS',\n", + " 'region_name': 'Asia & Pacific',\n", + " 'country_iso_3s': [\n", + " \"AFG\", \"ASM\", \"AUS\", \"BGD\", \"BRN\", \"BTN\", \"CCK\", \"CHN\", \"COK\", \"CXR\", \"FJI\", \"FSM\", \"GUM\", \"HKG\", \"IDN\",\n", + " \"IND\", \"IOT\", \"IRN\", \"JPN\", \"KHM\", \"KIR\", \"KOR\", \"LAO\", \"LKA\", \"MAC\", \"MDV\", \"MHL\", \"MMR\", \"MNG\", \"MNP\",\n", + " \"MYS\", \"NCL\", \"NFK\", \"NIU\", \"NPL\", \"NRU\", \"NZL\", \"PAK\", \"PCN\", \"PHL\", \"PLW\", \"PNG\", \"PRK\", \"PYF\", \"SGP\",\n", + " \"SLB\", \"THA\", \"TKL\", \"TLS\", \"TON\", \"TUV\", \"TWN\", \"VNM\", \"VUT\", \"WLF\", \"WSM\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'AF',\n", + " 'region_name': 'Africa',\n", + " 'country_iso_3s': [\n", + " \"AGO\", \"BDI\", \"BEN\", \"BFA\", \"BWA\", \"CAF\", \"CIV\", \"CMR\", \"COD\", \"COG\", \"COM\", \"CPV\", \"DJI\", \"DZA\", \"EGY\",\n", + " \"ERI\", \"ESH\", \"ETH\", \"GAB\", \"GHA\", \"GIN\", \"GMB\", \"GNB\", \"GNQ\", \"KEN\", \"LBR\", \"LBY\", \"LSO\", \"MAR\", \"MDG\",\n", + " \"MLI\", \"MOZ\", \"MRT\", \"MUS\", \"MWI\", \"MYT\", \"NAM\", \"NER\", \"NGA\", \"REU\", \"RWA\", \"SDN\", \"SEN\", \"SHN\", \"SLE\",\n", + " \"SOM\", \"SSD\", \"STP\", \"SWZ\", \"SYC\", \"TCD\", \"TGO\", \"TUN\", \"TZA\", \"UGA\", \"ZAF\", \"ZMB\", \"ZWE\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'EU',\n", + " 'region_name': 'Europe',\n", + " 'country_iso_3s': [\n", + " \"ALA\", \"ALB\", \"AND\", \"ARM\", \"AUT\", \"AZE\", \"BEL\", \"BGR\", \"BIH\", \"BLR\", \"CHE\", \"CYP\", \"CZE\", \"DEU\", \"DNK\",\n", + " \"ESP\", \"EST\", \"FIN\", \"FRA\", \"FRO\", \"GBR\", \"GEO\", \"GGY\", \"GIB\", \"GRC\", \"HRV\", \"HUN\", \"IMN\", \"IRL\", \"ISL\",\n", + " \"ISR\", \"ITA\", \"JEY\", \"KAZ\", \"KGZ\", \"LIE\", \"LTU\", \"LUX\", \"LVA\", \"MCO\", \"MDA\", \"MKD\", \"MLT\", \"MNE\", \"NLD\",\n", + " \"NOR\", \"POL\", \"PRT\", \"ROU\", \"RUS\", \"SJM\", \"SMR\", \"SRB\", \"SVK\", \"SVN\", \"SWE\", \"TJK\", \"TKM\", \"TUR\", \"UKR\",\n", + " \"UZB\", \"VAT\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'SA',\n", + " 'region_name': 'Latin America & Caribbean',\n", + " 'country_iso_3s': [\n", + " \"ABW\", \"AIA\", \"ARG\", \"ATG\", \"BES\", \"BHS\", \"BLM\", \"BLZ\", \"BMU\", \"BOL\", \"BRA\", \"BRB\", \"CHL\", \"COL\", \"CRI\",\n", + " \"CUB\", \"CUW\", \"CYM\", \"DMA\", \"DOM\", \"ECU\", \"FLK\", \"GLP\", \"GRD\", \"GTM\", \"GUF\", \"GUY\", \"HND\", \"HTI\", \"JAM\",\n", + " \"KNA\", \"LCA\", \"MAF\", \"MEX\", \"MSR\", \"MTQ\", \"NIC\", \"PAN\", \"PER\", \"PRI\", \"PRY\", \"SLV\", \"SUR\", \"SXM\", \"TCA\",\n", + " \"TTO\", \"UMI\", \"URY\", \"VCT\", \"VEN\", \"VGB\", \"VIR\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'PO',\n", + " 'region_name': 'Polar',\n", + " 'country_iso_3s': [\n", + " \"ATF\", \"BVT\", \"GRL\", \"HMD\", \"SGS\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'NA',\n", + " 'region_name': 'North America',\n", + " 'country_iso_3s': [\n", + " \"CAN\", \"SPM\", \"USA\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'WA',\n", + " 'region_name': 'West Asia',\n", + " 'country_iso_3s': [\n", + " \"ARE\", \"BHR\", \"IRQ\", \"JOR\", \"KWT\", \"LBN\", \"OMN\", \"PSE\", \"QAT\", \"SAU\", \"SYR\", \"YEM\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'AT', # this region is not in the Protected Planet database\n", + " 'region_name': 'Antartica',\n", + " 'country_iso_3s': [\n", + " \"ATA\"\n", + " ]\n", + " }\n", + "]\n", + "\n", + "# Convert the region data to a dictionary that maps each country to its region name\n", + "country_to_region = {}\n", + "for region in regions_data:\n", + " for country in region['country_iso_3s']:\n", + " country_to_region[country] = region['region_iso']\n", "\n", - "ps_new.shape" + "# Create a new column 'region' based on the mapping\n", + "ps_iso['region'] = ps_iso['parent_iso'].map(country_to_region)" ] }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 20, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
site_idsite_namecountrywdpa_idFPStotal_areageometryparent_iso
\n", - "
" - ], - "text/plain": [ - "Empty GeoDataFrame\n", - "Columns: [site_id, site_name, country, wdpa_id, FPS, total_area, geometry, parent_iso]\n", - "Index: []" - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "ps_new[ps_new['parent_iso'].str.contains(';')]" + "country_area = ps_iso.groupby(['parent_iso', 'FPS_cat'], as_index=False)['total_area'].sum()\n", + "country_area = country_area.rename(columns={'parent_iso':'location_id', 'FPS_cat':'fishing_protection_level', 'total_area':'area'})" ] }, { "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
site_idsite_namecountrywdpa_idFPStotal_areageometryparent_iso
10880NWR184Navassa Island National Wildlife RefugeUSA; Haiti; Jamaica5556081205.01473.656765POLYGON ((-75.00384 18.20744, -75.01645 18.207...USA
10881NWR184Navassa Island National Wildlife RefugeUSA; Haiti; Jamaica5556081205.01473.656765POLYGON ((-75.00384 18.20744, -75.01645 18.207...HTI
10882NWR184Navassa Island National Wildlife RefugeUSA; Haiti; Jamaica5556081205.01473.656765POLYGON ((-75.00384 18.20744, -75.01645 18.207...JAM
\n", - "
" - ], - "text/plain": [ - " site_id site_name country \\\n", - "10880 NWR184 Navassa Island National Wildlife Refuge USA; Haiti; Jamaica \n", - "10881 NWR184 Navassa Island National Wildlife Refuge USA; Haiti; Jamaica \n", - "10882 NWR184 Navassa Island National Wildlife Refuge USA; Haiti; Jamaica \n", - "\n", - " wdpa_id FPS total_area \\\n", - "10880 555608120 5.0 1473.656765 \n", - "10881 555608120 5.0 1473.656765 \n", - "10882 555608120 5.0 1473.656765 \n", - "\n", - " geometry parent_iso \n", - "10880 POLYGON ((-75.00384 18.20744, -75.01645 18.207... USA \n", - "10881 POLYGON ((-75.00384 18.20744, -75.01645 18.207... HTI \n", - "10882 POLYGON ((-75.00384 18.20744, -75.01645 18.207... JAM " - ] - }, - "execution_count": 27, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ps_new[ps_new['country']=='USA; Haiti; Jamaica']" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
site_idsite_namecountrywdpa_idFPStotal_areageometryparent_isoFPS_cat
0AIAG10Low Bay SanctuaryAntigua and Barbuda5555871975.048.321285POLYGON ((-61.91090 17.57960, -61.91096 17.579...ATGHighly
1AIAG11Nelson's Dockyard National ParkAntigua and Barbuda5555871921.040.705369POLYGON ((-61.75807 17.03541, -61.73745 17.021...ATGLess
\n", - "
" - ], - "text/plain": [ - " site_id site_name country wdpa_id \\\n", - "0 AIAG10 Low Bay Sanctuary Antigua and Barbuda 555587197 \n", - "1 AIAG11 Nelson's Dockyard National Park Antigua and Barbuda 555587192 \n", - "\n", - " FPS total_area geometry \\\n", - "0 5.0 48.321285 POLYGON ((-61.91090 17.57960, -61.91096 17.579... \n", - "1 1.0 40.705369 POLYGON ((-61.75807 17.03541, -61.73745 17.021... \n", - "\n", - " parent_iso FPS_cat \n", - "0 ATG Highly \n", - "1 ATG Less " - ] - }, - "execution_count": 28, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Reclassify FPS values\n", - "fps_classes = {\n", - " 1: 'Less',\n", - " 2: 'Less',\n", - " 3: 'Moderately',\n", - " 4: 'Highly',\n", - " 5: 'Highly'\n", - "}\n", - "\n", - "# Create a new column 'FPS_cat' based on the mapping\n", - "ps_new['FPS_cat'] = ps_new['FPS'].apply(lambda x: fps_classes.get(x, None))\n", - "ps_new.head(2)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [], - "source": [ - "# List of dictionaries for data in Region_ISO3_PP.txt (list of regions used in the Protected Planet database)\n", - "regions_data = [\n", - " {\n", - " 'region_iso': 'AS',\n", - " 'region_name': 'Asia & Pacific',\n", - " 'country_iso_3s': [\n", - " \"AFG\", \"ASM\", \"AUS\", \"BGD\", \"BRN\", \"BTN\", \"CCK\", \"CHN\", \"COK\", \"CXR\", \"FJI\", \"FSM\", \"GUM\", \"HKG\", \"IDN\",\n", - " \"IND\", \"IOT\", \"IRN\", \"JPN\", \"KHM\", \"KIR\", \"KOR\", \"LAO\", \"LKA\", \"MAC\", \"MDV\", \"MHL\", \"MMR\", \"MNG\", \"MNP\",\n", - " \"MYS\", \"NCL\", \"NFK\", \"NIU\", \"NPL\", \"NRU\", \"NZL\", \"PAK\", \"PCN\", \"PHL\", \"PLW\", \"PNG\", \"PRK\", \"PYF\", \"SGP\",\n", - " \"SLB\", \"THA\", \"TKL\", \"TLS\", \"TON\", \"TUV\", \"TWN\", \"VNM\", \"VUT\", \"WLF\", \"WSM\"\n", - " ]\n", - " },\n", - " {\n", - " 'region_iso': 'AF',\n", - " 'region_name': 'Africa',\n", - " 'country_iso_3s': [\n", - " \"AGO\", \"BDI\", \"BEN\", \"BFA\", \"BWA\", \"CAF\", \"CIV\", \"CMR\", \"COD\", \"COG\", \"COM\", \"CPV\", \"DJI\", \"DZA\", \"EGY\",\n", - " \"ERI\", \"ESH\", \"ETH\", \"GAB\", \"GHA\", \"GIN\", \"GMB\", \"GNB\", \"GNQ\", \"KEN\", \"LBR\", \"LBY\", \"LSO\", \"MAR\", \"MDG\",\n", - " \"MLI\", \"MOZ\", \"MRT\", \"MUS\", \"MWI\", \"MYT\", \"NAM\", \"NER\", \"NGA\", \"REU\", \"RWA\", \"SDN\", \"SEN\", \"SHN\", \"SLE\",\n", - " \"SOM\", \"SSD\", \"STP\", \"SWZ\", \"SYC\", \"TCD\", \"TGO\", \"TUN\", \"TZA\", \"UGA\", \"ZAF\", \"ZMB\", \"ZWE\"\n", - " ]\n", - " },\n", - " {\n", - " 'region_iso': 'EU',\n", - " 'region_name': 'Europe',\n", - " 'country_iso_3s': [\n", - " \"ALA\", \"ALB\", \"AND\", \"ARM\", \"AUT\", \"AZE\", \"BEL\", \"BGR\", \"BIH\", \"BLR\", \"CHE\", \"CYP\", \"CZE\", \"DEU\", \"DNK\",\n", - " \"ESP\", \"EST\", \"FIN\", \"FRA\", \"FRO\", \"GBR\", \"GEO\", \"GGY\", \"GIB\", \"GRC\", \"HRV\", \"HUN\", \"IMN\", \"IRL\", \"ISL\",\n", - " \"ISR\", \"ITA\", \"JEY\", \"KAZ\", \"KGZ\", \"LIE\", \"LTU\", \"LUX\", \"LVA\", \"MCO\", \"MDA\", \"MKD\", \"MLT\", \"MNE\", \"NLD\",\n", - " \"NOR\", \"POL\", \"PRT\", \"ROU\", \"RUS\", \"SJM\", \"SMR\", \"SRB\", \"SVK\", \"SVN\", \"SWE\", \"TJK\", \"TKM\", \"TUR\", \"UKR\",\n", - " \"UZB\", \"VAT\"\n", - " ]\n", - " },\n", - " {\n", - " 'region_iso': 'SA',\n", - " 'region_name': 'Latin America & Caribbean',\n", - " 'country_iso_3s': [\n", - " \"ABW\", \"AIA\", \"ARG\", \"ATG\", \"BES\", \"BHS\", \"BLM\", \"BLZ\", \"BMU\", \"BOL\", \"BRA\", \"BRB\", \"CHL\", \"COL\", \"CRI\",\n", - " \"CUB\", \"CUW\", \"CYM\", \"DMA\", \"DOM\", \"ECU\", \"FLK\", \"GLP\", \"GRD\", \"GTM\", \"GUF\", \"GUY\", \"HND\", \"HTI\", \"JAM\",\n", - " \"KNA\", \"LCA\", \"MAF\", \"MEX\", \"MSR\", \"MTQ\", \"NIC\", \"PAN\", \"PER\", \"PRI\", \"PRY\", \"SLV\", \"SUR\", \"SXM\", \"TCA\",\n", - " \"TTO\", \"UMI\", \"URY\", \"VCT\", \"VEN\", \"VGB\", \"VIR\"\n", - " ]\n", - " },\n", - " {\n", - " 'region_iso': 'PO',\n", - " 'region_name': 'Polar',\n", - " 'country_iso_3s': [\n", - " \"ATF\", \"BVT\", \"GRL\", \"HMD\", \"SGS\"\n", - " ]\n", - " },\n", - " {\n", - " 'region_iso': 'NA',\n", - " 'region_name': 'North America',\n", - " 'country_iso_3s': [\n", - " \"CAN\", \"SPM\", \"USA\"\n", - " ]\n", - " },\n", - " {\n", - " 'region_iso': 'GL',\n", - " 'region_name': 'Global',\n", - " 'country_iso_3s': ['GLOB']\n", - " },\n", - " {\n", - " 'region_iso': 'WA',\n", - " 'region_name': 'West Asia',\n", - " 'country_iso_3s': [\n", - " \"ARE\", \"BHR\", \"IRQ\", \"JOR\", \"KWT\", \"LBN\", \"OMN\", \"PSE\", \"QAT\", \"SAU\", \"SYR\", \"YEM\"\n", - " ]\n", - " },\n", - " {\n", - " 'region_iso': 'AT', # this region is not in the Protected Planet database\n", - " 'region_name': 'Antartica',\n", - " 'country_iso_3s': [\n", - " \"ATA\"\n", - " ]\n", - " },\n", - " {\n", - " 'region_iso': 'ABNJ', # this region is not in the Protected Planet database\n", - " 'region_name': 'Areas Beyond National Jurisdiction',\n", - " 'country_iso_3s': [\n", - " \"ABNJ\"\n", - " ]\n", - " }\n", - "]\n", - "\n", - "# Convert the region data to a dictionary that maps each country to its region name\n", - "country_to_region = {}\n", - "for region in regions_data:\n", - " for country in region['country_iso_3s']:\n", - " country_to_region[country] = region['region_iso']" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(10883, 10)" - ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ps_new['region'] = ps_new['parent_iso'].map(country_to_region)\n", - "ps_new.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
site_idsite_namecountrywdpa_idFPStotal_areageometryparent_isoFPS_catregion
0AIAG10Low Bay SanctuaryAntigua and Barbuda5555871975.048.321285POLYGON ((-61.91090 17.57960, -61.91096 17.579...ATGHighlySA
1AIAG11Nelson's Dockyard National ParkAntigua and Barbuda5555871921.040.705369POLYGON ((-61.75807 17.03541, -61.73745 17.021...ATGLessSA
\n", - "
" - ], - "text/plain": [ - " site_id site_name country wdpa_id \\\n", - "0 AIAG10 Low Bay Sanctuary Antigua and Barbuda 555587197 \n", - "1 AIAG11 Nelson's Dockyard National Park Antigua and Barbuda 555587192 \n", - "\n", - " FPS total_area geometry \\\n", - "0 5.0 48.321285 POLYGON ((-61.91090 17.57960, -61.91096 17.579... \n", - "1 1.0 40.705369 POLYGON ((-61.75807 17.03541, -61.73745 17.021... \n", - "\n", - " parent_iso FPS_cat region \n", - "0 ATG Highly SA \n", - "1 ATG Less SA " - ] - }, - "execution_count": 31, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ps_new.head(2)" - ] - }, - { - "cell_type": "code", - "execution_count": 32, + "execution_count": 21, "metadata": {}, "outputs": [], "source": [ - "ps_new.to_file(path_out + \"protectedseas/protectedseas.shp\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Calculate coverage" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
location_idfishing_protection_levelarea
0ALBLess197.296039
1ALBModerately344.028483
2AREHighly6962.497530
3ARGHighly5958.615237
4ARGLess38419.623899
5ARGModerately7546.694893
6ATGHighly168.263212
7ATGLess45.247978
8ATGModerately21.163152
9AUSHighly723824.463100
\n", - "
" - ], - "text/plain": [ - " location_id fishing_protection_level area\n", - "0 ALB Less 197.296039\n", - "1 ALB Moderately 344.028483\n", - "2 ARE Highly 6962.497530\n", - "3 ARG Highly 5958.615237\n", - "4 ARG Less 38419.623899\n", - "5 ARG Moderately 7546.694893\n", - "6 ATG Highly 168.263212\n", - "7 ATG Less 45.247978\n", - "8 ATG Moderately 21.163152\n", - "9 AUS Highly 723824.463100" - ] - }, - "execution_count": 33, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ps_country = ps_new.groupby(['parent_iso', 'FPS_cat'], as_index=False)['total_area'].sum()\n", - "ps_country = ps_country.rename(columns={'parent_iso':'location_id', 'FPS_cat':'fishing_protection_level', 'total_area':'area'})\n", - "ps_country.head(10)" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
location_idfishing_protection_levelarea
0AFHighly3.518155e+04
1AFLess2.862643e+05
2AFModerately4.253144e+04
3ASHighly8.846479e+05
4ASLess1.659106e+06
5ASModerately2.825264e+05
6EUHighly2.679297e+06
7EULess3.761438e+06
8EUModerately1.884751e+06
9NAHighly3.961031e+06
\n", - "
" - ], - "text/plain": [ - " location_id fishing_protection_level area\n", - "0 AF Highly 3.518155e+04\n", - "1 AF Less 2.862643e+05\n", - "2 AF Moderately 4.253144e+04\n", - "3 AS Highly 8.846479e+05\n", - "4 AS Less 1.659106e+06\n", - "5 AS Moderately 2.825264e+05\n", - "6 EU Highly 2.679297e+06\n", - "7 EU Less 3.761438e+06\n", - "8 EU Moderately 1.884751e+06\n", - "9 NA Highly 3.961031e+06" - ] - }, - "execution_count": 34, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ps_region = ps_new.groupby(['region', 'FPS_cat'], as_index=False)['total_area'].sum()\n", - "ps_region = ps_region.rename(columns={'region':'location_id', 'FPS_cat':'fishing_protection_level', 'total_area':'area'})\n", - "ps_region.head(10)" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
fishing_protection_levelarealocation_id
0Highly8.180599e+06GLOB
1Less9.942402e+06GLOB
2Moderately3.177137e+06GLOB
\n", - "
" - ], - "text/plain": [ - " fishing_protection_level area location_id\n", - "0 Highly 8.180599e+06 GLOB\n", - "1 Less 9.942402e+06 GLOB\n", - "2 Moderately 3.177137e+06 GLOB" - ] - }, - "execution_count": 35, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ps_global = ps_new.groupby(['FPS_cat'], as_index=False)['total_area'].sum()\n", - "ps_global['location_id'] = 'GLOB'\n", - "ps_global = ps_global.rename(columns={'FPS_cat':'fishing_protection_level', 'total_area':'area'})\n", - "ps_global" + "region_area = ps_iso.groupby(['region', 'FPS_cat'], as_index=False)['total_area'].sum()\n", + "region_area = region_area.rename(columns={'region':'location_id', 'FPS_cat':'fishing_protection_level', 'total_area':'area'})" ] }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 22, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
location_idfishing_protection_levelarea
0ALBLess1.972960e+02
1ALBModerately3.440285e+02
2AREHighly6.962498e+03
3ARGHighly5.958615e+03
4ARGLess3.841962e+04
............
216WAHighly1.024296e+04
217WALess2.129971e+04
218GLOBHighly8.180599e+06
219GLOBLess9.942402e+06
220GLOBModerately3.177137e+06
\n", - "

221 rows × 3 columns

\n", - "
" - ], - "text/plain": [ - " location_id fishing_protection_level area\n", - "0 ALB Less 1.972960e+02\n", - "1 ALB Moderately 3.440285e+02\n", - "2 ARE Highly 6.962498e+03\n", - "3 ARG Highly 5.958615e+03\n", - "4 ARG Less 3.841962e+04\n", - ".. ... ... ...\n", - "216 WA Highly 1.024296e+04\n", - "217 WA Less 2.129971e+04\n", - "218 GLOB Highly 8.180599e+06\n", - "219 GLOB Less 9.942402e+06\n", - "220 GLOB Moderately 3.177137e+06\n", - "\n", - "[221 rows x 3 columns]" - ] - }, - "execution_count": 36, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "ps_coverage = pd.concat([ps_country, ps_region, ps_global], ignore_index=True)\n", - "ps_coverage" + "ps_coverage = pd.concat([country_area, region_area, global_area], ignore_index=True)" ] }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 23, "metadata": {}, "outputs": [], "source": [ diff --git a/data/notebooks/wdpa_coverage.ipynb b/data/notebooks/wdpa_coverage.ipynb index 462aec59..921a2d53 100644 --- a/data/notebooks/wdpa_coverage.ipynb +++ b/data/notebooks/wdpa_coverage.ipynb @@ -57,7 +57,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -71,9 +71,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "6033\n", + "172\n", + "6033\n", + "172\n", + "6033\n", + "171\n" + ] + } + ], "source": [ "print(len(poly1))\n", "print(len(point1))\n", @@ -95,7 +108,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -114,15 +127,27 @@ " df = df[(df['REP_AREA'] != 0)]\n", " \n", " # Update the original dataframes in the list\n", - " dataframes[i] = df\n", - "\n" + " dataframes[i] = df" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "5999\n", + "157\n", + "6018\n", + "123\n", + "6014\n", + "135\n" + ] + } + ], "source": [ "print(len(dataframes[0]))\n", "print(len(dataframes[1]))\n", @@ -141,7 +166,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -181,14 +206,22 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 4. Merge the 6 datasets (polygons and buffered points) in a single layer and segregate those that are \"Proposed\" and those that are OECM" + "### 4. Merge the 6 datasets (polygons and buffered points) in a single layer and segregate those that are \"Proposed\"" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "All gdf have the same crs: EPSG:4326\n" + ] + } + ], "source": [ "# Check that all of them have the same crs\n", "first_crs = dataframes[0].crs\n", @@ -199,6 +232,28 @@ " print(\"gdf have different crs\")" ] }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "18445" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Merge dataframes\n", + "merged_mpa = pd.concat(dataframes)\n", + "len(merged_mpa)" + ] + }, { "cell_type": "code", "execution_count": null, @@ -211,7 +266,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -503,12 +558,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "**Countries per PARENT_ISO**" + "### Global and country stats" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -518,315 +573,55 @@ "# Create an empty list to store the results\n", "results_list = []\n", "\n", + "# Create a DataFrame to store the global coverage\n", + "global_coverage = pd.DataFrame(columns=['year', 'protection_type', 'location_id', 'cumsum_area'])\n", + "\n", "for year in years_range:\n", " filename = f'protected_dissolved_{year}.shp'\n", " file_path = os.path.join(folder_path, filename)\n", - " \n", + "\n", " if os.path.exists(file_path):\n", " gdf = gpd.read_file(file_path)\n", - " \n", + "\n", + " # Calculate global coverage for each year and protection type\n", + " global_area = gdf['AREA'].sum()\n", + " global_row = pd.DataFrame({'year': [year], 'protection_type': ['MPA+OECM'], 'location_id': ['GLOB'], 'cumsum_area': [global_area]})\n", + " global_coverage = pd.concat([global_coverage, global_row], ignore_index=True)\n", + "\n", " # Split rows with multiple ISO codes into separate rows\n", " processed_df = gdf.copy()\n", " processed_df['PARENT_ISO'] = processed_df['PARENT_ISO'].str.split(';')\n", " processed_df = processed_df.explode('PARENT_ISO')\n", - " \n", + "\n", " # Group by 'PARENT_ISO' and aggregate area\n", - " grouped = processed_df.groupby('PARENT_ISO')['AREA'].sum().reset_index()\n", - " \n", + " iso_area = processed_df.groupby('PARENT_ISO')['AREA'].sum().reset_index()\n", + "\n", " # Create columns to match BE table\n", - " grouped['year'] = year\n", - " grouped['protection_type'] = 'MPA+OECM'\n", - " grouped.rename(columns={'PARENT_ISO': 'location_id', 'AREA': 'cumsum_area'}, inplace=True)\n", - " \n", + " iso_area['year'] = year\n", + " iso_area['protection_type'] = 'MPA+OECM'\n", + " iso_area.rename(columns={'PARENT_ISO': 'location_id', 'AREA': 'cumsum_area'}, inplace=True)\n", + "\n", " # Append the result to the list\n", - " results_list.append(grouped)\n", + " results_list.append(iso_area)\n", "\n", "# Concatenate the list of results into a single DataFrame and convert area to sq.km\n", "final_df = pd.concat(results_list, ignore_index=True)\n", - "final_df['cumsum_area'] =final_df['cumsum_area']/1000000" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Global**" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "# Calculate global per year and append it to df\n", - "glob_df = final_df.groupby(['year', 'protection_type'])['cumsum_area'].sum().reset_index()\n", - "glob_df['location_id'] = 'GLOB'\n", + "final_df['cumsum_area'] = final_df['cumsum_area'] / 1000000\n", "\n", - "final_df = pd.concat([final_df, glob_df], ignore_index=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "final_df = final_df[['location_id', 'year', 'protection_type', 'cumsum_area']]" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
location_idyearprotection_typecumsum_area
0ABNJ2000MPA+OECM5.941747e+05
130ABNJ2001MPA+OECM5.941747e+05
262ABNJ2002MPA+OECM5.941747e+05
397ABNJ2003MPA+OECM5.941747e+05
533ABNJ2004MPA+OECM5.941747e+05
669ABNJ2005MPA+OECM5.941747e+05
808ABNJ2006MPA+OECM5.941747e+05
949ABNJ2007MPA+OECM5.941747e+05
1090ABNJ2008MPA+OECM5.941747e+05
1233ABNJ2009MPA+OECM5.941747e+05
1377ABNJ2010MPA+OECM9.532081e+05
1525ABNJ2011MPA+OECM8.568056e+05
1675ABNJ2012MPA+OECM1.034105e+06
1825ABNJ2013MPA+OECM1.034105e+06
1976ABNJ2014MPA+OECM1.034105e+06
2127ABNJ2015MPA+OECM1.034105e+06
2278ABNJ2016MPA+OECM1.034105e+06
2429ABNJ2017MPA+OECM2.811452e+06
2580ABNJ2018MPA+OECM2.811452e+06
2731ABNJ2019MPA+OECM2.811452e+06
2882ABNJ2020MPA+OECM2.811452e+06
3034ABNJ2021MPA+OECM2.811452e+06
3186ABNJ2022MPA+OECM2.811452e+06
3338ABNJ2023MPA+OECM2.811452e+06
\n", - "
" - ], - "text/plain": [ - " location_id year protection_type cumsum_area\n", - "0 ABNJ 2000 MPA+OECM 5.941747e+05\n", - "130 ABNJ 2001 MPA+OECM 5.941747e+05\n", - "262 ABNJ 2002 MPA+OECM 5.941747e+05\n", - "397 ABNJ 2003 MPA+OECM 5.941747e+05\n", - "533 ABNJ 2004 MPA+OECM 5.941747e+05\n", - "669 ABNJ 2005 MPA+OECM 5.941747e+05\n", - "808 ABNJ 2006 MPA+OECM 5.941747e+05\n", - "949 ABNJ 2007 MPA+OECM 5.941747e+05\n", - "1090 ABNJ 2008 MPA+OECM 5.941747e+05\n", - "1233 ABNJ 2009 MPA+OECM 5.941747e+05\n", - "1377 ABNJ 2010 MPA+OECM 9.532081e+05\n", - "1525 ABNJ 2011 MPA+OECM 8.568056e+05\n", - "1675 ABNJ 2012 MPA+OECM 1.034105e+06\n", - "1825 ABNJ 2013 MPA+OECM 1.034105e+06\n", - "1976 ABNJ 2014 MPA+OECM 1.034105e+06\n", - "2127 ABNJ 2015 MPA+OECM 1.034105e+06\n", - "2278 ABNJ 2016 MPA+OECM 1.034105e+06\n", - "2429 ABNJ 2017 MPA+OECM 2.811452e+06\n", - "2580 ABNJ 2018 MPA+OECM 2.811452e+06\n", - "2731 ABNJ 2019 MPA+OECM 2.811452e+06\n", - "2882 ABNJ 2020 MPA+OECM 2.811452e+06\n", - "3034 ABNJ 2021 MPA+OECM 2.811452e+06\n", - "3186 ABNJ 2022 MPA+OECM 2.811452e+06\n", - "3338 ABNJ 2023 MPA+OECM 2.811452e+06" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "final_df[final_df['location_id'] == 'ABNJ']" + "# Append global coverage to the final_df\n", + "final_df = pd.concat([final_df, global_coverage], ignore_index=True)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "**Regions**" + "### Regional stats" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -888,11 +683,6 @@ " ]\n", " },\n", " {\n", - " 'region_iso': 'GL',\n", - " 'region_name': 'Global',\n", - " 'country_iso_3s': ['GLOB']\n", - " },\n", - " {\n", " 'region_iso': 'WA',\n", " 'region_name': 'West Asia',\n", " 'country_iso_3s': [\n", @@ -905,13 +695,6 @@ " 'country_iso_3s': [\n", " \"ATA\"\n", " ]\n", - " },\n", - " {\n", - " 'region_iso': 'ABNJ', # this region is not in the Protected Planet database\n", - " 'region_name': 'Areas Beyond National Jurisdiction',\n", - " 'country_iso_3s': [\n", - " \"ABNJ\"\n", - " ]\n", " }\n", "]\n", "\n", @@ -924,7 +707,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -957,38 +740,38 @@ " \n", " \n", " 0\n", - " ABNJ\n", + " AF\n", " 2000\n", " MPA+OECM\n", - " 594174.659985\n", + " 94507.122820\n", " \n", " \n", " 1\n", - " ABNJ\n", + " AF\n", " 2001\n", " MPA+OECM\n", - " 594174.659985\n", + " 94807.303100\n", " \n", " \n", " 2\n", - " ABNJ\n", + " AF\n", " 2002\n", " MPA+OECM\n", - " 594174.659985\n", + " 102859.393938\n", " \n", " \n", " 3\n", - " ABNJ\n", + " AF\n", " 2003\n", " MPA+OECM\n", - " 594174.659985\n", + " 111143.352991\n", " \n", " \n", " 4\n", - " ABNJ\n", + " AF\n", " 2004\n", " MPA+OECM\n", - " 594174.659985\n", + " 119137.635862\n", " \n", " \n", " ...\n", @@ -998,35 +781,35 @@ " ...\n", " \n", " \n", - " 211\n", + " 163\n", " WA\n", " 2019\n", " MPA+OECM\n", " 30618.254664\n", " \n", " \n", - " 212\n", + " 164\n", " WA\n", " 2020\n", " MPA+OECM\n", " 30624.636536\n", " \n", " \n", - " 213\n", + " 165\n", " WA\n", " 2021\n", " MPA+OECM\n", " 30624.636536\n", " \n", " \n", - " 214\n", + " 166\n", " WA\n", " 2022\n", " MPA+OECM\n", " 31779.597984\n", " \n", " \n", - " 215\n", + " 167\n", " WA\n", " 2023\n", " MPA+OECM\n", @@ -1034,27 +817,27 @@ " \n", " \n", "\n", - "

216 rows × 4 columns

\n", + "

168 rows × 4 columns

\n", "" ], "text/plain": [ " location_id year protection_type cumsum_area\n", - "0 ABNJ 2000 MPA+OECM 594174.659985\n", - "1 ABNJ 2001 MPA+OECM 594174.659985\n", - "2 ABNJ 2002 MPA+OECM 594174.659985\n", - "3 ABNJ 2003 MPA+OECM 594174.659985\n", - "4 ABNJ 2004 MPA+OECM 594174.659985\n", + "0 AF 2000 MPA+OECM 94507.122820\n", + "1 AF 2001 MPA+OECM 94807.303100\n", + "2 AF 2002 MPA+OECM 102859.393938\n", + "3 AF 2003 MPA+OECM 111143.352991\n", + "4 AF 2004 MPA+OECM 119137.635862\n", ".. ... ... ... ...\n", - "211 WA 2019 MPA+OECM 30618.254664\n", - "212 WA 2020 MPA+OECM 30624.636536\n", - "213 WA 2021 MPA+OECM 30624.636536\n", - "214 WA 2022 MPA+OECM 31779.597984\n", - "215 WA 2023 MPA+OECM 31779.597984\n", + "163 WA 2019 MPA+OECM 30618.254664\n", + "164 WA 2020 MPA+OECM 30624.636536\n", + "165 WA 2021 MPA+OECM 30624.636536\n", + "166 WA 2022 MPA+OECM 31779.597984\n", + "167 WA 2023 MPA+OECM 31779.597984\n", "\n", - "[216 rows x 4 columns]" + "[168 rows x 4 columns]" ] }, - "execution_count": 9, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -1065,7 +848,7 @@ "\n", "# group by region and year to get sum of cumsum_area\n", "regions = regions.groupby(['location_id', 'year', 'protection_type'])['cumsum_area'].sum().reset_index()\n", - "regions\n" + "regions" ] }, { @@ -1075,230 +858,8 @@ "outputs": [ { "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
location_idyearprotection_typecumsum_area
120GL2000MPA+OECM3.193363e+06
121GL2001MPA+OECM3.569706e+06
122GL2002MPA+OECM3.695628e+06
123GL2003MPA+OECM3.748312e+06
124GL2004MPA+OECM3.866803e+06
125GL2005MPA+OECM3.980030e+06
126GL2006MPA+OECM7.559205e+06
127GL2007MPA+OECM8.741698e+06
128GL2008MPA+OECM8.825878e+06
129GL2009MPA+OECM1.060580e+07
130GL2010MPA+OECM1.202293e+07
131GL2011MPA+OECM1.208784e+07
132GL2012MPA+OECM1.395678e+07
133GL2013MPA+OECM1.423613e+07
134GL2014MPA+OECM1.566334e+07
135GL2015MPA+OECM1.634694e+07
136GL2016MPA+OECM1.823543e+07
137GL2017MPA+OECM2.270050e+07
138GL2018MPA+OECM2.712299e+07
139GL2019MPA+OECM2.832749e+07
140GL2020MPA+OECM2.982679e+07
141GL2021MPA+OECM2.994130e+07
142GL2022MPA+OECM3.011280e+07
143GL2023MPA+OECM3.011285e+07
\n", - "
" - ], "text/plain": [ - " location_id year protection_type cumsum_area\n", - "120 GL 2000 MPA+OECM 3.193363e+06\n", - "121 GL 2001 MPA+OECM 3.569706e+06\n", - "122 GL 2002 MPA+OECM 3.695628e+06\n", - "123 GL 2003 MPA+OECM 3.748312e+06\n", - "124 GL 2004 MPA+OECM 3.866803e+06\n", - "125 GL 2005 MPA+OECM 3.980030e+06\n", - "126 GL 2006 MPA+OECM 7.559205e+06\n", - "127 GL 2007 MPA+OECM 8.741698e+06\n", - "128 GL 2008 MPA+OECM 8.825878e+06\n", - "129 GL 2009 MPA+OECM 1.060580e+07\n", - "130 GL 2010 MPA+OECM 1.202293e+07\n", - "131 GL 2011 MPA+OECM 1.208784e+07\n", - "132 GL 2012 MPA+OECM 1.395678e+07\n", - "133 GL 2013 MPA+OECM 1.423613e+07\n", - "134 GL 2014 MPA+OECM 1.566334e+07\n", - "135 GL 2015 MPA+OECM 1.634694e+07\n", - "136 GL 2016 MPA+OECM 1.823543e+07\n", - "137 GL 2017 MPA+OECM 2.270050e+07\n", - "138 GL 2018 MPA+OECM 2.712299e+07\n", - "139 GL 2019 MPA+OECM 2.832749e+07\n", - "140 GL 2020 MPA+OECM 2.982679e+07\n", - "141 GL 2021 MPA+OECM 2.994130e+07\n", - "142 GL 2022 MPA+OECM 3.011280e+07\n", - "143 GL 2023 MPA+OECM 3.011285e+07" + "array(['AF', 'AS', 'AT', 'EU', 'NA', 'SA', 'WA'], dtype=object)" ] }, "execution_count": 12, @@ -1307,24 +868,13 @@ } ], "source": [ - "regions[regions['location_id'] == 'GL']" + "regions['location_id'].unique()" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, - "outputs": [], - "source": [ - "# Remove rows for region GL (Global)\n", - "regions2 = regions[regions['location_id'] != 'GL']\n", - "regions2 = regions2[regions2['location_id'] != 'ABNJ']" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, "outputs": [ { "data": { @@ -1346,10 +896,10 @@ " 'VNM', 'VUT', 'WSM', 'ZAF', 'DZA', 'IOT', 'GAB', 'IND', 'SGP',\n", " 'LBR', 'CIV', 'CPV', 'SLV', 'QAT', 'STP', 'SHN', 'YEM', 'URY',\n", " 'CMR', 'COM', 'KWT', 'SJM', 'GUY', 'UMI', 'HTI', 'JOR', 'GLOB',\n", - " 'AF', 'AS', 'AT', 'EU', 'GL', 'NA', 'SA', 'WA'], dtype=object)" + " 'AF', 'AS', 'AT', 'EU', 'NA', 'SA', 'WA'], dtype=object)" ] }, - "execution_count": 15, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -1361,7 +911,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -1386,9 +936,9 @@ " \n", " \n", " location_id\n", + " cumsum_area\n", " year\n", " protection_type\n", - " cumsum_area\n", " last_updated\n", " \n", " \n", @@ -1396,42 +946,42 @@ " \n", " 0\n", " ABNJ\n", + " 594174.659985\n", " 2000\n", " MPA+OECM\n", - " 594174.659985\n", - " 2023-10-13\n", + " 2023-10-18\n", " \n", " \n", " 1\n", " AGO\n", + " 0.415240\n", " 2000\n", " MPA+OECM\n", - " 0.415240\n", - " 2023-10-13\n", + " 2023-10-18\n", " \n", " \n", " 2\n", " ALB\n", + " 103.048347\n", " 2000\n", " MPA+OECM\n", - " 103.048347\n", - " 2023-10-13\n", + " 2023-10-18\n", " \n", " \n", " 3\n", " ARE\n", + " 78.516519\n", " 2000\n", " MPA+OECM\n", - " 78.516519\n", - " 2023-10-13\n", + " 2023-10-18\n", " \n", " \n", " 4\n", " ARG\n", + " 6155.668078\n", " 2000\n", " MPA+OECM\n", - " 6155.668078\n", - " 2023-10-13\n", + " 2023-10-18\n", " \n", " \n", " ...\n", @@ -1442,68 +992,68 @@ " ...\n", " \n", " \n", - " 3725\n", + " 3677\n", " WA\n", + " 30618.254664\n", " 2019\n", " MPA+OECM\n", - " 30618.254664\n", - " 2023-10-13\n", + " 2023-10-18\n", " \n", " \n", - " 3726\n", + " 3678\n", " WA\n", + " 30624.636536\n", " 2020\n", " MPA+OECM\n", - " 30624.636536\n", - " 2023-10-13\n", + " 2023-10-18\n", " \n", " \n", - " 3727\n", + " 3679\n", " WA\n", + " 30624.636536\n", " 2021\n", " MPA+OECM\n", - " 30624.636536\n", - " 2023-10-13\n", + " 2023-10-18\n", " \n", " \n", - " 3728\n", + " 3680\n", " WA\n", + " 31779.597984\n", " 2022\n", " MPA+OECM\n", - " 31779.597984\n", - " 2023-10-13\n", + " 2023-10-18\n", " \n", " \n", - " 3729\n", + " 3681\n", " WA\n", + " 31779.597984\n", " 2023\n", " MPA+OECM\n", - " 31779.597984\n", - " 2023-10-13\n", + " 2023-10-18\n", " \n", " \n", "\n", - "

3730 rows × 5 columns

\n", + "

3682 rows × 5 columns

\n", "" ], "text/plain": [ - " location_id year protection_type cumsum_area last_updated\n", - "0 ABNJ 2000 MPA+OECM 594174.659985 2023-10-13\n", - "1 AGO 2000 MPA+OECM 0.415240 2023-10-13\n", - "2 ALB 2000 MPA+OECM 103.048347 2023-10-13\n", - "3 ARE 2000 MPA+OECM 78.516519 2023-10-13\n", - "4 ARG 2000 MPA+OECM 6155.668078 2023-10-13\n", - "... ... ... ... ... ...\n", - "3725 WA 2019 MPA+OECM 30618.254664 2023-10-13\n", - "3726 WA 2020 MPA+OECM 30624.636536 2023-10-13\n", - "3727 WA 2021 MPA+OECM 30624.636536 2023-10-13\n", - "3728 WA 2022 MPA+OECM 31779.597984 2023-10-13\n", - "3729 WA 2023 MPA+OECM 31779.597984 2023-10-13\n", + " location_id cumsum_area year protection_type last_updated\n", + "0 ABNJ 594174.659985 2000 MPA+OECM 2023-10-18\n", + "1 AGO 0.415240 2000 MPA+OECM 2023-10-18\n", + "2 ALB 103.048347 2000 MPA+OECM 2023-10-18\n", + "3 ARE 78.516519 2000 MPA+OECM 2023-10-18\n", + "4 ARG 6155.668078 2000 MPA+OECM 2023-10-18\n", + "... ... ... ... ... ...\n", + "3677 WA 30618.254664 2019 MPA+OECM 2023-10-18\n", + "3678 WA 30624.636536 2020 MPA+OECM 2023-10-18\n", + "3679 WA 30624.636536 2021 MPA+OECM 2023-10-18\n", + "3680 WA 31779.597984 2022 MPA+OECM 2023-10-18\n", + "3681 WA 31779.597984 2023 MPA+OECM 2023-10-18\n", "\n", - "[3730 rows x 5 columns]" + "[3682 rows x 5 columns]" ] }, - "execution_count": 16, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -1518,7 +1068,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [