From b887749a9a12c28eae05c4263b07a70d3864fede Mon Sep 17 00:00:00 2001 From: Alicia Date: Wed, 4 Sep 2024 09:07:10 +0200 Subject: [PATCH 01/16] added environment to layer --- .../sync/admin-role.strapi-super-admin.json | 953 ++++++++++-------- ...ation_content_types##api##layer.layer.json | 33 +- .../api/layer/content-types/layer/schema.json | 5 + cms/types/generated/contentTypes.d.ts | 5 + 4 files changed, 557 insertions(+), 439 deletions(-) diff --git a/cms/config/sync/admin-role.strapi-super-admin.json b/cms/config/sync/admin-role.strapi-super-admin.json index 13dda1df..79de5fdb 100644 --- a/cms/config/sync/admin-role.strapi-super-admin.json +++ b/cms/config/sync/admin-role.strapi-super-admin.json @@ -5,6 +5,7 @@ "permissions": [ { "action": "plugin::content-manager.explorer.create", + "actionParameters": {}, "subject": "api::contact-detail.contact-detail", "properties": { "fields": [ @@ -16,41 +17,44 @@ ], "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.delete", + "actionParameters": {}, "subject": "api::contact-detail.contact-detail", "properties": { "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.publish", + "actionParameters": {}, "subject": "api::contact-detail.contact-detail", "properties": { "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.read", + "actionParameters": {}, "subject": "api::contact-detail.contact-detail", "properties": { "fields": [ @@ -62,15 +66,16 @@ ], "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.update", + "actionParameters": {}, "subject": "api::contact-detail.contact-detail", "properties": { "fields": [ @@ -82,15 +87,16 @@ ], "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.create", + "actionParameters": {}, "subject": "api::data-info.data-info", "properties": { "fields": [ @@ -100,28 +106,30 @@ ], "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.delete", + "actionParameters": {}, "subject": "api::data-info.data-info", "properties": { "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.read", + "actionParameters": {}, "subject": "api::data-info.data-info", "properties": { "fields": [ @@ -131,15 +139,16 @@ ], "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.update", + "actionParameters": {}, "subject": "api::data-info.data-info", "properties": { "fields": [ @@ -149,15 +158,16 @@ ], "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.create", + "actionParameters": {}, "subject": "api::data-source.data-source", "properties": { "fields": [ @@ -167,28 +177,30 @@ ], "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.delete", + "actionParameters": {}, "subject": "api::data-source.data-source", "properties": { "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.read", + "actionParameters": {}, "subject": "api::data-source.data-source", "properties": { "fields": [ @@ -198,15 +210,16 @@ ], "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.update", + "actionParameters": {}, "subject": "api::data-source.data-source", "properties": { "fields": [ @@ -216,15 +229,16 @@ ], "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.create", + "actionParameters": {}, "subject": "api::data-tool-ecosystem.data-tool-ecosystem", "properties": { "fields": [ @@ -232,41 +246,44 @@ ], "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.delete", + "actionParameters": {}, "subject": "api::data-tool-ecosystem.data-tool-ecosystem", "properties": { "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.publish", + "actionParameters": {}, "subject": "api::data-tool-ecosystem.data-tool-ecosystem", "properties": { "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.read", + "actionParameters": {}, "subject": "api::data-tool-ecosystem.data-tool-ecosystem", "properties": { "fields": [ @@ -274,15 +291,16 @@ ], "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.update", + "actionParameters": {}, "subject": "api::data-tool-ecosystem.data-tool-ecosystem", "properties": { "fields": [ @@ -290,15 +308,16 @@ ], "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.create", + "actionParameters": {}, "subject": "api::data-tool-language.data-tool-language", "properties": { "fields": [ @@ -308,41 +327,44 @@ ], "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.delete", + "actionParameters": {}, "subject": "api::data-tool-language.data-tool-language", "properties": { "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.publish", + "actionParameters": {}, "subject": "api::data-tool-language.data-tool-language", "properties": { "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.read", + "actionParameters": {}, "subject": "api::data-tool-language.data-tool-language", "properties": { "fields": [ @@ -352,15 +374,16 @@ ], "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.update", + "actionParameters": {}, "subject": "api::data-tool-language.data-tool-language", "properties": { "fields": [ @@ -370,15 +393,16 @@ ], "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.create", + "actionParameters": {}, "subject": "api::data-tool-resource-type.data-tool-resource-type", "properties": { "fields": [ @@ -386,41 +410,44 @@ ], "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.delete", + "actionParameters": {}, "subject": "api::data-tool-resource-type.data-tool-resource-type", "properties": { "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.publish", + "actionParameters": {}, "subject": "api::data-tool-resource-type.data-tool-resource-type", "properties": { "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.read", + "actionParameters": {}, "subject": "api::data-tool-resource-type.data-tool-resource-type", "properties": { "fields": [ @@ -428,15 +455,16 @@ ], "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.update", + "actionParameters": {}, "subject": "api::data-tool-resource-type.data-tool-resource-type", "properties": { "fields": [ @@ -444,15 +472,16 @@ ], "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.create", + "actionParameters": {}, "subject": "api::data-tool.data-tool", "properties": { "fields": [ @@ -466,41 +495,44 @@ ], "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.delete", + "actionParameters": {}, "subject": "api::data-tool.data-tool", "properties": { "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.publish", + "actionParameters": {}, "subject": "api::data-tool.data-tool", "properties": { "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.read", + "actionParameters": {}, "subject": "api::data-tool.data-tool", "properties": { "fields": [ @@ -514,15 +546,16 @@ ], "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.update", + "actionParameters": {}, "subject": "api::data-tool.data-tool", "properties": { "fields": [ @@ -536,15 +569,16 @@ ], "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.create", + "actionParameters": {}, "subject": "api::dataset-group.dataset-group", "properties": { "fields": [ @@ -554,28 +588,30 @@ ], "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.delete", + "actionParameters": {}, "subject": "api::dataset-group.dataset-group", "properties": { "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.read", + "actionParameters": {}, "subject": "api::dataset-group.dataset-group", "properties": { "fields": [ @@ -585,15 +621,16 @@ ], "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.update", + "actionParameters": {}, "subject": "api::dataset-group.dataset-group", "properties": { "fields": [ @@ -603,15 +640,16 @@ ], "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.create", + "actionParameters": {}, "subject": "api::dataset.dataset", "properties": { "fields": [ @@ -622,41 +660,44 @@ ], "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.delete", + "actionParameters": {}, "subject": "api::dataset.dataset", "properties": { "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.publish", + "actionParameters": {}, "subject": "api::dataset.dataset", "properties": { "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.read", + "actionParameters": {}, "subject": "api::dataset.dataset", "properties": { "fields": [ @@ -667,15 +708,16 @@ ], "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.update", + "actionParameters": {}, "subject": "api::dataset.dataset", "properties": { "fields": [ @@ -686,15 +728,16 @@ ], "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.create", + "actionParameters": {}, "subject": "api::environment.environment", "properties": { "fields": [ @@ -703,28 +746,30 @@ ], "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.delete", + "actionParameters": {}, "subject": "api::environment.environment", "properties": { "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.read", + "actionParameters": {}, "subject": "api::environment.environment", "properties": { "fields": [ @@ -733,15 +778,16 @@ ], "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.update", + "actionParameters": {}, "subject": "api::environment.environment", "properties": { "fields": [ @@ -750,15 +796,16 @@ ], "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.create", + "actionParameters": {}, "subject": "api::fishing-protection-level-stat.fishing-protection-level-stat", "properties": { "fields": [ @@ -768,18 +815,18 @@ "pct" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.delete", + "actionParameters": {}, "subject": "api::fishing-protection-level-stat.fishing-protection-level-stat", "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.read", + "actionParameters": {}, "subject": "api::fishing-protection-level-stat.fishing-protection-level-stat", "properties": { "fields": [ @@ -789,11 +836,11 @@ "pct" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.update", + "actionParameters": {}, "subject": "api::fishing-protection-level-stat.fishing-protection-level-stat", "properties": { "fields": [ @@ -803,11 +850,11 @@ "pct" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.create", + "actionParameters": {}, "subject": "api::fishing-protection-level.fishing-protection-level", "properties": { "fields": [ @@ -817,28 +864,30 @@ ], "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.delete", + "actionParameters": {}, "subject": "api::fishing-protection-level.fishing-protection-level", "properties": { "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.read", + "actionParameters": {}, "subject": "api::fishing-protection-level.fishing-protection-level", "properties": { "fields": [ @@ -848,15 +897,16 @@ ], "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.update", + "actionParameters": {}, "subject": "api::fishing-protection-level.fishing-protection-level", "properties": { "fields": [ @@ -866,15 +916,16 @@ ], "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.create", + "actionParameters": {}, "subject": "api::habitat-stat.habitat-stat", "properties": { "fields": [ @@ -886,18 +937,18 @@ "environment" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.delete", + "actionParameters": {}, "subject": "api::habitat-stat.habitat-stat", "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.read", + "actionParameters": {}, "subject": "api::habitat-stat.habitat-stat", "properties": { "fields": [ @@ -909,11 +960,11 @@ "environment" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.update", + "actionParameters": {}, "subject": "api::habitat-stat.habitat-stat", "properties": { "fields": [ @@ -925,11 +976,11 @@ "environment" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.create", + "actionParameters": {}, "subject": "api::habitat.habitat", "properties": { "fields": [ @@ -939,28 +990,30 @@ ], "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.delete", + "actionParameters": {}, "subject": "api::habitat.habitat", "properties": { "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.read", + "actionParameters": {}, "subject": "api::habitat.habitat", "properties": { "fields": [ @@ -970,15 +1023,16 @@ ], "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.update", + "actionParameters": {}, "subject": "api::habitat.habitat", "properties": { "fields": [ @@ -988,15 +1042,16 @@ ], "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.create", + "actionParameters": {}, "subject": "api::layer.layer", "properties": { "fields": [ @@ -1017,45 +1072,49 @@ "legend_config.items.color", "legend_config.items.value", "legend_config.items.description", - "default" + "default", + "environment" ], "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.delete", + "actionParameters": {}, "subject": "api::layer.layer", "properties": { "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.publish", + "actionParameters": {}, "subject": "api::layer.layer", "properties": { "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.read", + "actionParameters": {}, "subject": "api::layer.layer", "properties": { "fields": [ @@ -1076,19 +1135,21 @@ "legend_config.items.color", "legend_config.items.value", "legend_config.items.description", - "default" + "default", + "environment" ], "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.update", + "actionParameters": {}, "subject": "api::layer.layer", "properties": { "fields": [ @@ -1109,19 +1170,21 @@ "legend_config.items.color", "legend_config.items.value", "legend_config.items.description", - "default" + "default", + "environment" ], "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.create", + "actionParameters": {}, "subject": "api::location.location", "properties": { "fields": [ @@ -1140,28 +1203,30 @@ ], "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.delete", + "actionParameters": {}, "subject": "api::location.location", "properties": { "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.read", + "actionParameters": {}, "subject": "api::location.location", "properties": { "fields": [ @@ -1180,15 +1245,16 @@ ], "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.update", + "actionParameters": {}, "subject": "api::location.location", "properties": { "fields": [ @@ -1207,15 +1273,16 @@ ], "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.create", + "actionParameters": {}, "subject": "api::mpa-iucn-category.mpa-iucn-category", "properties": { "fields": [ @@ -1225,28 +1292,30 @@ ], "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.delete", + "actionParameters": {}, "subject": "api::mpa-iucn-category.mpa-iucn-category", "properties": { "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.read", + "actionParameters": {}, "subject": "api::mpa-iucn-category.mpa-iucn-category", "properties": { "fields": [ @@ -1256,15 +1325,16 @@ ], "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.update", + "actionParameters": {}, "subject": "api::mpa-iucn-category.mpa-iucn-category", "properties": { "fields": [ @@ -1274,15 +1344,16 @@ ], "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.create", + "actionParameters": {}, "subject": "api::mpa.mpa", "properties": { "fields": [ @@ -1303,18 +1374,18 @@ "environment" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.delete", + "actionParameters": {}, "subject": "api::mpa.mpa", "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.read", + "actionParameters": {}, "subject": "api::mpa.mpa", "properties": { "fields": [ @@ -1335,11 +1406,11 @@ "environment" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.update", + "actionParameters": {}, "subject": "api::mpa.mpa", "properties": { "fields": [ @@ -1360,11 +1431,11 @@ "environment" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.create", + "actionParameters": {}, "subject": "api::mpaa-establishment-stage-stat.mpaa-establishment-stage-stat", "properties": { "fields": [ @@ -1375,18 +1446,18 @@ "area" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.delete", + "actionParameters": {}, "subject": "api::mpaa-establishment-stage-stat.mpaa-establishment-stage-stat", "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.read", + "actionParameters": {}, "subject": "api::mpaa-establishment-stage-stat.mpaa-establishment-stage-stat", "properties": { "fields": [ @@ -1397,11 +1468,11 @@ "area" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.update", + "actionParameters": {}, "subject": "api::mpaa-establishment-stage-stat.mpaa-establishment-stage-stat", "properties": { "fields": [ @@ -1412,11 +1483,11 @@ "area" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.create", + "actionParameters": {}, "subject": "api::mpaa-establishment-stage.mpaa-establishment-stage", "properties": { "fields": [ @@ -1426,28 +1497,30 @@ ], "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.delete", + "actionParameters": {}, "subject": "api::mpaa-establishment-stage.mpaa-establishment-stage", "properties": { "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.read", + "actionParameters": {}, "subject": "api::mpaa-establishment-stage.mpaa-establishment-stage", "properties": { "fields": [ @@ -1457,15 +1530,16 @@ ], "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.update", + "actionParameters": {}, "subject": "api::mpaa-establishment-stage.mpaa-establishment-stage", "properties": { "fields": [ @@ -1475,15 +1549,16 @@ ], "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.create", + "actionParameters": {}, "subject": "api::mpaa-protection-level-stat.mpaa-protection-level-stat", "properties": { "fields": [ @@ -1492,18 +1567,18 @@ "area" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.delete", + "actionParameters": {}, "subject": "api::mpaa-protection-level-stat.mpaa-protection-level-stat", "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.read", + "actionParameters": {}, "subject": "api::mpaa-protection-level-stat.mpaa-protection-level-stat", "properties": { "fields": [ @@ -1512,11 +1587,11 @@ "area" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.update", + "actionParameters": {}, "subject": "api::mpaa-protection-level-stat.mpaa-protection-level-stat", "properties": { "fields": [ @@ -1525,11 +1600,11 @@ "area" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.create", + "actionParameters": {}, "subject": "api::mpaa-protection-level.mpaa-protection-level", "properties": { "fields": [ @@ -1539,28 +1614,30 @@ ], "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.delete", + "actionParameters": {}, "subject": "api::mpaa-protection-level.mpaa-protection-level", "properties": { "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.read", + "actionParameters": {}, "subject": "api::mpaa-protection-level.mpaa-protection-level", "properties": { "fields": [ @@ -1570,15 +1647,16 @@ ], "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.update", + "actionParameters": {}, "subject": "api::mpaa-protection-level.mpaa-protection-level", "properties": { "fields": [ @@ -1588,15 +1666,16 @@ ], "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.create", + "actionParameters": {}, "subject": "api::protection-coverage-stat.protection-coverage-stat", "properties": { "fields": [ @@ -1609,18 +1688,18 @@ "environment" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.delete", + "actionParameters": {}, "subject": "api::protection-coverage-stat.protection-coverage-stat", "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.read", + "actionParameters": {}, "subject": "api::protection-coverage-stat.protection-coverage-stat", "properties": { "fields": [ @@ -1633,11 +1712,11 @@ "environment" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.update", + "actionParameters": {}, "subject": "api::protection-coverage-stat.protection-coverage-stat", "properties": { "fields": [ @@ -1650,11 +1729,11 @@ "environment" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.create", + "actionParameters": {}, "subject": "api::protection-status.protection-status", "properties": { "fields": [ @@ -1664,28 +1743,30 @@ ], "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.delete", + "actionParameters": {}, "subject": "api::protection-status.protection-status", "properties": { "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.read", + "actionParameters": {}, "subject": "api::protection-status.protection-status", "properties": { "fields": [ @@ -1695,15 +1776,16 @@ ], "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.update", + "actionParameters": {}, "subject": "api::protection-status.protection-status", "properties": { "fields": [ @@ -1713,15 +1795,16 @@ ], "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.create", + "actionParameters": {}, "subject": "api::static-indicator.static-indicator", "properties": { "fields": [ @@ -1732,41 +1815,44 @@ ], "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.delete", + "actionParameters": {}, "subject": "api::static-indicator.static-indicator", "properties": { "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.publish", + "actionParameters": {}, "subject": "api::static-indicator.static-indicator", "properties": { "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.read", + "actionParameters": {}, "subject": "api::static-indicator.static-indicator", "properties": { "fields": [ @@ -1777,15 +1863,16 @@ ], "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.update", + "actionParameters": {}, "subject": "api::static-indicator.static-indicator", "properties": { "fields": [ @@ -1796,232 +1883,233 @@ ], "locales": [ "en", + "fr", "es", - "fr" + "es" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "admin::api-tokens.access", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "admin::api-tokens.create", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "admin::api-tokens.delete", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "admin::api-tokens.read", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "admin::api-tokens.regenerate", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "admin::api-tokens.update", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "admin::marketplace.read", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "admin::project-settings.read", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "admin::project-settings.update", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "admin::roles.create", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "admin::roles.delete", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "admin::roles.read", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "admin::roles.update", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "admin::transfer.tokens.access", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "admin::transfer.tokens.create", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "admin::transfer.tokens.delete", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "admin::transfer.tokens.read", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "admin::transfer.tokens.regenerate", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "admin::transfer.tokens.update", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "admin::users.create", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "admin::users.delete", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "admin::users.read", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "admin::users.update", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "admin::webhooks.create", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "admin::webhooks.delete", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "admin::webhooks.read", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "admin::webhooks.update", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::config-sync.menu-link", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::config-sync.settings.read", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.collection-types.configure-view", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.components.configure-layout", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.create", + "actionParameters": {}, "subject": "plugin::users-permissions.user", "properties": { "fields": [ @@ -2036,18 +2124,18 @@ "role" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.delete", + "actionParameters": {}, "subject": "plugin::users-permissions.user", "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.read", + "actionParameters": {}, "subject": "plugin::users-permissions.user", "properties": { "fields": [ @@ -2062,11 +2150,11 @@ "role" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.explorer.update", + "actionParameters": {}, "subject": "plugin::users-permissions.user", "properties": { "fields": [ @@ -2081,218 +2169,217 @@ "role" ] }, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-manager.single-types.configure-view", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::content-type-builder.read", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::documentation.read", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::documentation.settings.read", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::documentation.settings.regenerate", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::documentation.settings.update", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::email.settings.read", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::i18n.locale.create", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::i18n.locale.delete", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::i18n.locale.read", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::i18n.locale.update", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::import-export-entries.export", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::import-export-entries.import", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::upload.assets.copy-link", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::upload.assets.create", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::upload.assets.download", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::upload.assets.update", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::upload.configure-view", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::upload.read", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::upload.settings.read", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::users-permissions.advanced-settings.read", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::users-permissions.advanced-settings.update", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::users-permissions.email-templates.read", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::users-permissions.email-templates.update", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::users-permissions.providers.read", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::users-permissions.providers.update", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::users-permissions.roles.create", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::users-permissions.roles.delete", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::users-permissions.roles.read", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] }, { "action": "plugin::users-permissions.roles.update", + "actionParameters": {}, "subject": null, "properties": {}, - "conditions": [], - "actionParameters": {} + "conditions": [] } ] } \ No newline at end of file diff --git a/cms/config/sync/core-store.plugin_content_manager_configuration_content_types##api##layer.layer.json b/cms/config/sync/core-store.plugin_content_manager_configuration_content_types##api##layer.layer.json index 9295fa2e..8ccd3d5b 100644 --- a/cms/config/sync/core-store.plugin_content_manager_configuration_content_types##api##layer.layer.json +++ b/cms/config/sync/core-store.plugin_content_manager_configuration_content_types##api##layer.layer.json @@ -147,6 +147,21 @@ "sortable": true } }, + "environment": { + "edit": { + "label": "environment", + "description": "", + "placeholder": "", + "visible": true, + "editable": true, + "mainField": "name" + }, + "list": { + "label": "environment", + "searchable": true, + "sortable": true + } + }, "createdAt": { "edit": { "label": "createdAt", @@ -207,6 +222,12 @@ } }, "layouts": { + "list": [ + "id", + "title", + "type", + "metadata" + ], "edit": [ [ { @@ -257,13 +278,13 @@ "name": "metadata", "size": 12 } + ], + [ + { + "name": "environment", + "size": 6 + } ] - ], - "list": [ - "id", - "title", - "type", - "metadata" ] } }, diff --git a/cms/src/api/layer/content-types/layer/schema.json b/cms/src/api/layer/content-types/layer/schema.json index eb15b6ae..04343adb 100644 --- a/cms/src/api/layer/content-types/layer/schema.json +++ b/cms/src/api/layer/content-types/layer/schema.json @@ -98,6 +98,11 @@ }, "type": "boolean", "default": false + }, + "environment": { + "type": "relation", + "relation": "oneToOne", + "target": "api::environment.environment" } } } diff --git a/cms/types/generated/contentTypes.d.ts b/cms/types/generated/contentTypes.d.ts index 95b2f566..a9a5be7c 100644 --- a/cms/types/generated/contentTypes.d.ts +++ b/cms/types/generated/contentTypes.d.ts @@ -1577,6 +1577,11 @@ export interface ApiLayerLayer extends Schema.CollectionType { }; }> & Attribute.DefaultTo; + environment: Attribute.Relation< + 'api::layer.layer', + 'oneToOne', + 'api::environment.environment' + >; createdAt: Attribute.DateTime; updatedAt: Attribute.DateTime; publishedAt: Attribute.DateTime; From 306882313ea87a08eac7ef042622543d994a7f60 Mon Sep 17 00:00:00 2001 From: Alicia Date: Tue, 10 Sep 2024 09:50:13 +0200 Subject: [PATCH 02/16] updated pipe for wdpa terrestrial --- data/environment.yml | 1 + data/notebooks/pipes_mock/intermediate.ipynb | 213 +++ data/notebooks/pipes_mock/tiles.ipynb | 1614 ++++++++++++++++-- data/src/helpers/utils.py | 4 +- data/src/pipelines/processors.py | 7 +- 5 files changed, 1649 insertions(+), 190 deletions(-) diff --git a/data/environment.yml b/data/environment.yml index 5cadde21..5c01c5b8 100644 --- a/data/environment.yml +++ b/data/environment.yml @@ -38,6 +38,7 @@ dependencies: - pandera-geopandas=>0.18.0 - libgdal-arrow-parquet=>0.1.0 - openpyxl=>3.1.0 + - pyarrow - pip: - jupyterlab-code-formatter==2.2.1 - jupyter_collaboration diff --git a/data/notebooks/pipes_mock/intermediate.ipynb b/data/notebooks/pipes_mock/intermediate.ipynb index 9a3efac2..761d1673 100644 --- a/data/notebooks/pipes_mock/intermediate.ipynb +++ b/data/notebooks/pipes_mock/intermediate.ipynb @@ -50,6 +50,7 @@ " set_location_iso,\n", " set_fps_classes,\n", " filter_by_methodology,\n", + " filter_by_terrestrial,\n", " transform_points,\n", " clean_geometries,\n", ")" @@ -835,6 +836,206 @@ "rm_tree(output_path) if output_path.exists() else None" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Mpas protected planet intermediate terrestrial" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "force_clean = True\n", + "step = \"preprocess\"\n", + "pipe = \"mpa-terrestrial\"" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "mpa_url = \"https://www.protectedplanet.net/downloads\"\n", + "mpa_body = {\n", + " \"domain\": \"general\",\n", + " \"format\": \"shp\",\n", + " \"token\": \"wdpa\",\n", + " \"id\": 76011,\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "working_folder = FileConventionHandler(pipe)\n", + "input_path = working_folder.pipe_raw_path\n", + "temp_working_path = working_folder.get_temp_file_path(step)\n", + "\n", + "output_path = working_folder.get_processed_step_path(step)\n", + "output_file = working_folder.get_step_fmt_file_path(step, \"gpkg\")\n", + "zipped_output_file = working_folder.get_step_fmt_file_path(step, \"zip\", True)\n", + "remote_path = working_folder.get_remote_path(step)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'id': 'wdpa-shp', 'title': 'WDPA_Sep2024_Public_shp', 'url': 'https://d1gam3xoknrgr2.cloudfront.net/current/WDPA_Sep2024_Public_shp.zip', 'hasFailed': False, 'token': 'wdpa'}\n" + ] + } + ], + "source": [ + "# download data\n", + "r = requests.post(url=mpa_url, data=mpa_body)\n", + "r.raise_for_status()\n", + "\n", + "download_url = r.json().get(\"url\")\n", + "input_file_name = f'{r.json().get(\"title\")}.zip'\n", + "print(r.json())\n", + "\n", + "input_file = downloadFile(\n", + " url=download_url,\n", + " output_path=input_path,\n", + " overwrite=force_clean,\n", + " file=input_file_name,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# unzip file twice due how data is provisioned by protected planet\n", + "shutil.unpack_archive(\n", + " input_file,\n", + " temp_working_path,\n", + " \"zip\",\n", + ")\n", + "\n", + "for file in temp_working_path.glob(\"*.zip\"):\n", + " shutil.unpack_archive(file, temp_working_path.joinpath(file.stem), \"zip\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# load data & Transform it\n", + "unziped_folders = []\n", + "for file in temp_working_path.glob(\"*/*.shp\"):\n", + " df = (\n", + " gpd.read_file(file)\n", + " .pipe(filter_by_methodology)\n", + " .pipe(filter_by_terrestrial)\n", + " .pipe(transform_points)\n", + " .pipe(clean_geometries)\n", + " )\n", + " unziped_folders.append(df)\n", + "\n", + "# merge datasets\n", + "gdf = gpd.GeoDataFrame(\n", + " pd.concat(unziped_folders, ignore_index=True),\n", + " crs=unziped_folders[0].crs,\n", + ")\n", + "\n", + "gdf.drop(\n", + " columns=list(\n", + " set(gdf.columns)\n", + " - set(\n", + " [\n", + " \"geometry\",\n", + " \"WDPAID\",\n", + " \"WDPA_PID\",\n", + " \"PA_DEF\",\n", + " \"NAME\",\n", + " \"PARENT_ISO\",\n", + " \"DESIG_ENG\",\n", + " \"IUCN_CAT\",\n", + " \"STATUS\",\n", + " \"STATUS_YR\",\n", + " \"GIS_AREA\",\n", + " \"MARINE\",\n", + " ]\n", + " )\n", + " ),\n", + " inplace=True,\n", + ")\n", + "gdf[\"WDPAID\"] = pd.to_numeric(gdf[\"WDPAID\"], downcast=\"integer\")" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "# save data & zip it\n", + "gdf.to_file(\n", + " filename=output_file,\n", + " driver=\"GPKG\",\n", + " layer=\"name\",\n", + " encoding=\"utf-8\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "ename": "", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n", + "\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n", + "\u001b[1;31mClick here for more info. \n", + "\u001b[1;31mView Jupyter log for further details." + ] + } + ], + "source": [ + "# LOAD\n", + "## load zipped file to GCS\n", + "writeReadGCP(\n", + " credentials=mysettings.GCS_KEYFILE_JSON,\n", + " bucket_name=mysettings.GCS_BUCKET,\n", + " blob_name=remote_path,\n", + " file=output_file,\n", + " operation=\"w\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# clean unzipped files\n", + "rm_tree(temp_working_path) if temp_working_path.exists() else None\n", + "rm_tree(output_path) if output_path.exists() else None" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -1169,6 +1370,18 @@ "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" } }, "nbformat": 4, diff --git a/data/notebooks/pipes_mock/tiles.ipynb b/data/notebooks/pipes_mock/tiles.ipynb index 96a7a6c4..328d9f0d 100644 --- a/data/notebooks/pipes_mock/tiles.ipynb +++ b/data/notebooks/pipes_mock/tiles.ipynb @@ -1,239 +1,1492 @@ { "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ - "%load_ext autoreload\n", - "%autoreload 2" + "from pathlib import Path\n", + "import sys\n", + "import geopandas as gpd\n", + "import pandas as pd\n", + "\n", + "\n", + "scripts_dir = Path(\"../..\").joinpath(\"src\")\n", + "if scripts_dir not in sys.path:\n", + " sys.path.insert(0, scripts_dir.resolve().as_posix())\n", + "from helpers.mapshaper import Mapshaper\n", + "from helpers.tippcanoe import mbtileGeneration\n", + "from helpers.mapbox_uploader import uploadToMapbox\n", + "from helpers.settings import get_settings\n", + "from helpers.file_handler import FileConventionHandler\n", + "from helpers.utils import download_and_unzip_if_needed, writeReadGCP\n", + "\n", + "from data_commons.loader import load_regions" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "mysettings = get_settings()\n", + "prev_step = \"preprocess\"\n", + "current_step = \"tiles\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "jp-MarkdownHeadingCollapsed": true + }, + "source": [ + "### EEZs: Exclusive Economic Zones " + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/home/mambauser/data/eez/processed/eez_preprocess.zip\n", + "/home/mambauser/data/eez/processed/preprocess\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Allocating 8 GB of heap memory\n", + "[o] Wrote /home/mambauser/data/eez/processed/tiles/eez_tiles.json\n" + ] + }, + { + "data": { + "text/plain": [ + "PosixPath('/home/mambauser/data/eez/processed/tiles/eez_v11.mbtiles')" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pipe = \"eez\"\n", + "collection_name = f\"{pipe}_v11\"\n", + "\n", + "eez_dir = FileConventionHandler(pipe)\n", + "# Download the EEZ file && unzip it\n", + "download_and_unzip_if_needed(eez_dir, prev_step, mysettings)\n", + "\n", + "# simplify the geometries\n", + "Mapshaper(8).input([eez_dir.get_step_fmt_file_path(prev_step, \"shp\").as_posix()]).filter_fields(\n", + " fields=\",\".join([\"GEONAME\", \"POL_TYPE\", \"ISO_SOV1\", \"ISO_SOV2\", \"ISO_SOV3\"])\n", + ").output(\n", + " eez_dir.get_step_fmt_file_path(current_step, \"json\").as_posix(), force=True, format=\"geojson\"\n", + ").execute()\n", + "mbtileGeneration(\n", + " eez_dir.get_step_fmt_file_path(current_step, \"json\"),\n", + " eez_dir.get_processed_step_path(current_step).joinpath(f\"{collection_name}.mbtiles\"),\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "uploadToMapbox(\n", + " eez_dir.get_processed_step_path(current_step).joinpath(f\"{collection_name}.mbtiles\"),\n", + " collection_name,\n", + " mysettings.MAPBOX_USER,\n", + " mysettings.MAPBOX_TOKEN,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### EEZs: wdpa Regions" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ + "collection_name = \"regions\"\n", + "\n", + "# load the EEZ file & the regions file\n", + "eez_data = gpd.read_file(eez_dir.get_step_fmt_file_path(prev_step, \"shp\").as_posix())\n", + "regions_df = pd.DataFrame(\n", + " [\n", + " {\"region_id\": data[\"region_iso\"], \"location_id\": iso}\n", + " for data in load_regions().get(\"data\", [])\n", + " for iso in data[\"country_iso_3s\"]\n", + " ]\n", + ")\n", + "\n", + "# merge the two files\n", + "gpd.GeoDataFrame(\n", + " pd.merge(\n", + " eez_data,\n", + " regions_df,\n", + " how=\"left\",\n", + " left_on=\"ISO_SOV1\",\n", + " right_on=\"location_id\",\n", + " sort=True,\n", + " copy=True,\n", + " ),\n", + " crs=eez_data.crs,\n", + ").to_file(\n", + " filename=eez_dir.get_processed_step_path(prev_step)\n", + " .joinpath(f\"{pipe}_{prev_step}_{collection_name}.shp\")\n", + " .as_posix(),\n", + " driver=\"ESRI Shapefile\",\n", + ")\n", + "\n", + "# dissolve by region_id\n", + "\n", + "Mapshaper(16).input(\n", + " [\n", + " eez_dir.get_processed_step_path(prev_step)\n", + " .joinpath(f\"{pipe}_{prev_step}_{collection_name}.shp\")\n", + " .as_posix()\n", + " ]\n", + ").dissolve2(fields=\"region_id\").output(\n", + " eez_dir.get_processed_step_path(current_step).joinpath(f\"{collection_name}.json\").as_posix(),\n", + " force=True,\n", + " format=\"geojson\",\n", + ").execute()\n", + "\n", + "# generate the mbtiles\n", + "mbtileGeneration(eez_dir.get_processed_step_path(current_step).joinpath(f\"{collection_name}.json\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "upload: ../../data/eez/processed/tiles/regions.mbtiles to s3://tilestream-tilesets-production/97/_pending/ojc7oxn5cpu10yo0o9tsl1xlc/skytruth\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Linking tileset to Mapbox: 100%|██████████| 100/100 [03:00<00:00, 1.81s/it]\n" + ] + }, + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "uploadToMapbox(\n", + " eez_dir.get_processed_step_path(current_step).joinpath(f\"{collection_name}.mbtiles\"),\n", + " collection_name,\n", + " mysettings.MAPBOX_USER,\n", + " mysettings.MAPBOX_TOKEN,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "jp-MarkdownHeadingCollapsed": true + }, + "source": [ + "### MPAs: Marine Protected Areas from WDPA" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/home/mambauser/data/mpa/processed/mpa_preprocess.zip\n", + "/home/mambauser/data/mpa/processed/preprocess\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Allocating 16 GB of heap memory\n", + "[clean] Retained 19,184 of 19,184 features\n", + "[simplify] Repaired 277,407 intersections; 443,678 intersections could not be repaired\n", + "[clean] Retained 19,184 of 19,184 features\n", + "[o] Wrote /home/mambauser/data/mpa/processed/tiles/mpa_tiles.json\n" + ] + }, + { + "data": { + "text/plain": [ + "PosixPath('/home/mambauser/data/mpa/processed/tiles/mpa_tiles.mbtiles')" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pipe = \"mpa\"\n", + "collection_name = \"mpas_wdpa\"\n", + "\n", + "source_dir = FileConventionHandler(pipe)\n", + "\n", + "# Download the EEZ file && unzip it\n", + "download_and_unzip_if_needed(source_dir, prev_step, mysettings)\n", + "\n", + "# simplify the geometries\n", + "Mapshaper(16).input([source_dir.get_step_fmt_file_path(prev_step, \"shp\").as_posix()]).filter_fields(\n", + " fields=\"WDPAID,NAME,PA_DEF,GIS_M_AREA,PARENT_ISO\"\n", + ").clean(allow_overlaps=True, rewind=True).simplify(\"dp 10% keep-shapes planar\").clean(\n", + " allow_overlaps=True\n", + ").output(\n", + " source_dir.get_step_fmt_file_path(current_step, \"json\").as_posix(), force=True, format=\"geojson\"\n", + ").execute()\n", + "\n", + "# generate the mbtiles\n", + "mbtileGeneration(source_dir.get_step_fmt_file_path(current_step, \"json\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "upload: ../../data/mpa/processed/tiles/mpa_tiles.mbtiles to s3://tilestream-tilesets-production/de/_pending/yvng0dxxxru12eq9ye80350mc/skytruth\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Linking tileset to Mapbox: 100%|██████████| 100/100 [02:34<00:00, 1.54s/it]\n" + ] + }, + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "uploadToMapbox(\n", + " source_dir.get_step_fmt_file_path(current_step, \"mbtiles\"),\n", + " collection_name,\n", + " mysettings.MAPBOX_USER,\n", + " mysettings.MAPBOX_TOKEN,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### PAs: Terrestrial Protected Areas from WDPA" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "def split_n_parts(gdf: gpd.GeoDataFrame, folder: Path, n:int) -> None:\n", + " \n", + " for i in range(n):\n", + " path = folder.joinpath(f\"part{i}.shp\")\n", + " gdf.iloc[i * len(gdf) // n : (i + 1) * len(gdf) // n].to_file(path, driver=\"ESRI Shapefile\")\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "import asyncio\n", + "from pipelines.utils import background\n", + "from pipelines.processors import repair_geometry\n", + "\n", + "from tqdm.asyncio import tqdm\n", + "\n", + "\n", + "@background\n", + "def simplify(geometry, pbar, tlrc=0.0001) -> gpd.GeoDataFrame:\n", + " try:\n", + " return repair_geometry(geometry.simplify(tlrc))\n", + " except Exception as e:\n", + " print(e)\n", + " return geometry\n", + " finally:\n", + " pbar.update(1)\n", + "\n", + "\n", + "async def simplify_async(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:\n", + " with tqdm(total=gdf.shape[0]) as pbar:\n", + " gdf[\"geometry\"] = await asyncio.gather(\n", + " *(simplify(val, pbar) for val in gdf[\"geometry\"])\n", + " )\n", + " return gdf" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "pipe = \"mpa-terrestrial\"\n", + "collection_name = \"pas_wdpa\"\n", + "\n", + "source_dir = FileConventionHandler(pipe)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/home/mambauser/data/mpa-terrestrial/processed/mpa-terrestrial_preprocess.gpkg\n", + "/home/mambauser/data/mpa-terrestrial/processed/preprocess\n" + ] + } + ], + "source": [ + "# Download the EEZ file && unzip it\n", + "download_and_unzip_if_needed(source_dir, prev_step, mysettings, \"gpkg\")\n", + "# split the file in two parts\n", + "file = gpd.read_file(source_dir.get_step_fmt_file_path(prev_step, \"gpkg\").as_posix())" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 3%|▎ | 7482/292261 [00:13<00:40, 7027.40it/s] " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 4%|▍ | 12182/292261 [00:14<00:57, 4912.45it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 9%|▉ | 25865/292261 [00:17<03:58, 1115.08it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 9%|▉ | 27219/292261 [00:18<10:50, 407.49it/s] " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 18%|█▊ | 53181/292261 [00:26<04:40, 852.40it/s] " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "'Polygon' object has no attribute 'geoms'\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 27%|██▋ | 77894/292261 [00:33<01:35, 2247.11it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 28%|██▊ | 80952/292261 [00:34<01:00, 3471.37it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 28%|██▊ | 81812/292261 [00:35<04:25, 793.04it/s] " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 28%|██▊ | 82393/292261 [00:35<04:41, 744.80it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 29%|██▉ | 84436/292261 [00:36<03:49, 904.50it/s] " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 33%|███▎ | 96159/292261 [00:56<06:27, 506.25it/s] " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 34%|███▍ | 98927/292261 [00:57<01:41, 1907.31it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 34%|███▍ | 100063/292261 [01:01<16:25, 194.94it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 35%|███▌ | 102491/292261 [01:06<09:26, 335.12it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "'Polygon' object has no attribute 'geoms'\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 35%|███▌ | 102932/292261 [01:08<07:54, 398.64it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 37%|███▋ | 107337/292261 [01:15<07:45, 397.14it/s] " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 54%|█████▍ | 158758/292261 [01:42<03:06, 716.16it/s] " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 56%|█████▌ | 164344/292261 [01:44<01:39, 1289.68it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 66%|██████▋ | 193651/292261 [01:53<01:19, 1237.86it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 68%|██████▊ | 199046/292261 [01:54<01:19, 1171.99it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 70%|██████▉ | 203524/292261 [01:56<03:13, 457.51it/s] " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 74%|███████▍ | 217703/292261 [02:01<01:14, 1003.62it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 80%|███████▉ | 232945/292261 [02:10<02:24, 410.84it/s] " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "'Polygon' object has no attribute 'geoms'\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 81%|████████ | 235500/292261 [02:14<04:28, 211.70it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 84%|████████▍ | 246516/292261 [02:19<00:28, 1606.01it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 87%|████████▋ | 255262/292261 [02:22<00:26, 1403.03it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "'Polygon' object has no attribute 'geoms'\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 88%|████████▊ | 257138/292261 [02:22<00:16, 2145.00it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "'Polygon' object has no attribute 'geoms'\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 89%|████████▉ | 261390/292261 [02:25<01:42, 301.68it/s] " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 91%|█████████▏| 267318/292261 [02:28<00:18, 1335.53it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 292261/292261 [03:01<00:00, 1611.07it/s]\n" + ] + } + ], + "source": [ + "test = await simplify_async(file)" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 36, "metadata": {}, "outputs": [], "source": [ - "from pathlib import Path\n", - "import sys\n", - "import geopandas as gpd\n", - "import pandas as pd\n", - "\n", - "\n", - "scripts_dir = Path(\"../..\").joinpath(\"src\")\n", - "if scripts_dir not in sys.path:\n", - " sys.path.insert(0, scripts_dir.resolve().as_posix())\n", - "from helpers.mapshaper import Mapshaper\n", - "from helpers.tippcanoe import mbtileGeneration\n", - "from helpers.mapbox_uploader import uploadToMapbox\n", - "from helpers.settings import get_settings\n", - "from helpers.file_handler import FileConventionHandler\n", - "from helpers.utils import download_and_unzip_if_needed, writeReadGCP\n", - "\n", - "from data_commons.loader import load_regions" + "test.to_file(source_dir.get_step_fmt_file_path(current_step, \"shp\").as_posix(), driver=\"ESRI Shapefile\")" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ - "mysettings = get_settings()\n", - "prev_step = \"preprocess\"\n", - "current_step = \"tiles\"" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "jp-MarkdownHeadingCollapsed": true - }, - "source": [ - "### EEZs: Exclusive Economic Zones " + "i_folder = source_dir.get_processed_step_path(prev_step).joinpath(\"parts\")\n", + "i_folder.mkdir(exist_ok=True, parents=True)\n", + "folders = split_n_parts(test, i_folder, 100)\n", + "del test\n", + "del file" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "/home/mambauser/data/eez/processed/eez_preprocess.zip\n", - "/home/mambauser/data/eez/processed/preprocess\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Allocating 8 GB of heap memory\n", - "[o] Wrote /home/mambauser/data/eez/processed/tiles/eez_tiles.json\n" + "mapshaper-xl 32gb -i /home/mambauser/data/mpa-terrestrial/processed/preprocess/parts/*.shp -filter-fields fields=WDPAID,NAME,PA_DEF,GIS_AREA,PARENT_ISO -clean allow-overlaps rewind -simplify dp 10% keep-shapes planar -clean allow-overlaps -o /home/mambauser/data/mpa-terrestrial/processed/tiles force format=geojson\n" ] - }, - { - "data": { - "text/plain": [ - "PosixPath('/home/mambauser/data/eez/processed/tiles/eez_v11.mbtiles')" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" } ], "source": [ - "pipe = \"eez\"\n", - "collection_name = f\"{pipe}_v11\"\n", - "\n", - "eez_dir = FileConventionHandler(pipe)\n", - "# Download the EEZ file && unzip it\n", - "download_and_unzip_if_needed(eez_dir, prev_step, mysettings)\n", - "\n", - "# simplify the geometries\n", - "Mapshaper(8).input([eez_dir.get_step_fmt_file_path(prev_step, \"shp\").as_posix()]).filter_fields(\n", - " fields=\",\".join([\"GEONAME\", \"POL_TYPE\", \"ISO_SOV1\", \"ISO_SOV2\", \"ISO_SOV3\"])\n", + "Mapshaper(32).input(\n", + " [f\"{i_folder}/*.shp\"],\n", + ").filter_fields(\n", + " fields=\"WDPAID,NAME,PA_DEF,GIS_AREA,PARENT_ISO\"\n", + ").clean(allow_overlaps=True, rewind=True).simplify(\"dp 10% keep-shapes planar\").clean(\n", + " allow_overlaps=True\n", ").output(\n", - " eez_dir.get_step_fmt_file_path(current_step, \"json\").as_posix(), force=True, format=\"geojson\"\n", - ").execute()\n", - "mbtileGeneration(\n", - " eez_dir.get_step_fmt_file_path(current_step, \"json\"),\n", - " eez_dir.get_processed_step_path(current_step).joinpath(f\"{collection_name}.mbtiles\"),\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "uploadToMapbox(\n", - " eez_dir.get_processed_step_path(current_step).joinpath(f\"{collection_name}.mbtiles\"),\n", - " collection_name,\n", - " mysettings.MAPBOX_USER,\n", - " mysettings.MAPBOX_TOKEN,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### EEZs: wdpa Regions" + " source_dir.get_processed_step_path(current_step), force=True, format=\"geojson\"\n", + ").debug()" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Allocating 32 GB of heap memory\n", + "[o] RFC 7946 warning: non-WGS84 GeoJSON output.\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/mpa-terrestrial-tiles.json\n" + ] + } + ], "source": [ - "collection_name = \"regions\"\n", - "\n", - "# load the EEZ file & the regions file\n", - "eez_data = gpd.read_file(eez_dir.get_step_fmt_file_path(prev_step, \"shp\").as_posix())\n", - "regions_df = pd.DataFrame(\n", - " [\n", - " {\"region_id\": data[\"region_iso\"], \"location_id\": iso}\n", - " for data in load_regions().get(\"data\", [])\n", - " for iso in data[\"country_iso_3s\"]\n", - " ]\n", - ")\n", - "\n", - "# merge the two files\n", - "gpd.GeoDataFrame(\n", - " pd.merge(\n", - " eez_data,\n", - " regions_df,\n", - " how=\"left\",\n", - " left_on=\"ISO_SOV1\",\n", - " right_on=\"location_id\",\n", - " sort=True,\n", - " copy=True,\n", - " ),\n", - " crs=eez_data.crs,\n", - ").to_file(\n", - " filename=eez_dir.get_processed_step_path(prev_step)\n", - " .joinpath(f\"{pipe}_{prev_step}_{collection_name}.shp\")\n", - " .as_posix(),\n", - " driver=\"ESRI Shapefile\",\n", - ")\n", - "\n", - "# dissolve by region_id\n", - "\n", - "Mapshaper(16).input(\n", - " [\n", - " eez_dir.get_processed_step_path(prev_step)\n", - " .joinpath(f\"{pipe}_{prev_step}_{collection_name}.shp\")\n", - " .as_posix()\n", - " ]\n", - ").dissolve2(fields=\"region_id\").output(\n", - " eez_dir.get_processed_step_path(current_step).joinpath(f\"{collection_name}.json\").as_posix(),\n", - " force=True,\n", - " format=\"geojson\",\n", - ").execute()\n", - "\n", - "# generate the mbtiles\n", - "mbtileGeneration(eez_dir.get_processed_step_path(current_step).joinpath(f\"{collection_name}.json\"))" + "!mapshaper-xl 32gb -i /home/mambauser/data/mpa-terrestrial/processed/tiles/part*.json combine-files -merge-layers -o /home/mambauser/data/mpa-terrestrial/processed/tiles/mpa-terrestrial-tiles.json force format=geojson" ] }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 28, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "upload: ../../data/eez/processed/tiles/regions.mbtiles to s3://tilestream-tilesets-production/97/_pending/ojc7oxn5cpu10yo0o9tsl1xlc/skytruth\n" + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[simplify] Repaired 14,231 intersections; 128 intersections could not be repaired\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part0.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 2,300 intersections; 253 intersections could not be repaired\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part1.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[simplify] Repaired 464 intersections\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part10.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 434 intersections\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part11.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[simplify] Repaired 2,655 intersections; 48 intersections could not be repaired\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part12.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 615 intersections; 3 intersections could not be repaired\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part13.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 878 intersections; 154 intersections could not be repaired\n", + "[clean] Retained 2,922 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part14.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[simplify] Repaired 2,265 intersections; 330 intersections could not be repaired\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part15.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 773 intersections\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part16.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[simplify] Repaired 1,791 intersections; 8 intersections could not be repaired\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part17.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 3,202 intersections; 28 intersections could not be repaired\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part18.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 817 intersections\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part19.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[simplify] Repaired 659 intersections; 48 intersections could not be repaired\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part2.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[simplify] Repaired 397 intersections\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part20.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 1,638 intersections; 4 intersections could not be repaired\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part21.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 70,976 intersections; 1,029 intersections could not be repaired\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part22.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[simplify] Repaired 1,149 intersections; 20 intersections could not be repaired\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part23.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 4,472 intersections; 11 intersections could not be repaired\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part24.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[simplify] Repaired 1,483 intersections; 52 intersections could not be repaired\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part25.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 769 intersections; 18 intersections could not be repaired\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part26.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 3,186 intersections; 358 intersections could not be repaired\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part27.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[simplify] Repaired 2,071 intersections; 31 intersections could not be repaired\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part28.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 1,563 intersections; 24 intersections could not be repaired\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part29.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 1,343 intersections; 16 intersections could not be repaired\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part3.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[simplify] Repaired 37,179 intersections; 7,576 intersections could not be repaired\n", + "[clean] Retained 2,921 of 2,922 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part30.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 12,639 intersections; 2,386 intersections could not be repaired\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part31.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 13,114 intersections; 119 intersections could not be repaired\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part32.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[simplify] Repaired 291 intersections; 2 intersections could not be repaired\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part33.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 5,448 intersections; 36 intersections could not be repaired\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part34.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[simplify] Repaired 8,936 intersections; 124 intersections could not be repaired\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part35.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 1,785 intersections; 8 intersections could not be repaired\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part36.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 548 intersections\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part37.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[simplify] Repaired 2,049 intersections; 48 intersections could not be repaired\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part38.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 2,724 intersections; 75 intersections could not be repaired\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part39.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 6,848 intersections; 1 intersection could not be repaired\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part4.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 1,576 intersections\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part40.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[simplify] Repaired 1,263 intersections\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part41.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 3,539 intersections; 44 intersections could not be repaired\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part42.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[simplify] Repaired 2,252 intersections; 20 intersections could not be repaired\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part43.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 172 intersections\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part44.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 267 intersections; 8 intersections could not be repaired\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part45.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[simplify] Repaired 540 intersections; 8 intersections could not be repaired\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part46.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 3,570 intersections; 24 intersections could not be repaired\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part47.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[simplify] Repaired 942 intersections; 32 intersections could not be repaired\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part48.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 2,262 intersections; 8 intersections could not be repaired\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part49.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[simplify] Repaired 952 intersections; 15 intersections could not be repaired\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part5.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 697 intersections; 16 intersections could not be repaired\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part50.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[simplify] Repaired 537 intersections\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part51.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 1,066 intersections; 32 intersections could not be repaired\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part52.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[simplify] Repaired 3,586 intersections; 16 intersections could not be repaired\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part53.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 3,821 intersections; 17 intersections could not be repaired\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part54.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 1,213 intersections; 16 intersections could not be repaired\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part55.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[simplify] Repaired 1,465 intersections; 75 intersections could not be repaired\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part56.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 1,644 intersections; 120 intersections could not be repaired\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part57.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[simplify] Repaired 981 intersections; 100 intersections could not be repaired\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part58.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 975 intersections; 4 intersections could not be repaired\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part59.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 1,009 intersections; 51 intersections could not be repaired\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part6.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 2,397 intersections; 200 intersections could not be repaired\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part60.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[simplify] Repaired 1,494 intersections; 8 intersections could not be repaired\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part61.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 2,872 intersections; 110 intersections could not be repaired\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part62.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 1,130 intersections\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part63.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[simplify] Repaired 82 intersections\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part64.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 857 intersections; 40 intersections could not be repaired\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part65.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[simplify] Repaired 1,611 intersections; 131 intersections could not be repaired\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part66.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 1,892 intersections; 27 intersections could not be repaired\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part67.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 473 intersections\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part68.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[simplify] Repaired 3,528 intersections; 16 intersections could not be repaired\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part69.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[simplify] Repaired 1,456 intersections; 80 intersections could not be repaired\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part7.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 2,041 intersections; 68 intersections could not be repaired\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part70.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[simplify] Repaired 681 intersections; 16 intersections could not be repaired\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part71.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 1,515 intersections; 12 intersections could not be repaired\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part72.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 3,047 intersections; 88 intersections could not be repaired\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part73.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[simplify] Repaired 5,277 intersections; 83 intersections could not be repaired\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part74.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 9,639 intersections; 88 intersections could not be repaired\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part75.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[simplify] Repaired 9,942 intersections; 56 intersections could not be repaired\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part76.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 3,505 intersections; 8 intersections could not be repaired\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part77.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 2,200 intersections; 64 intersections could not be repaired\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part78.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[simplify] Repaired 70,952 intersections; 380 intersections could not be repaired\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part79.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 662 intersections\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part8.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 2,913 intersections; 88 intersections could not be repaired\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part80.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 2,438 intersections; 245 intersections could not be repaired\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part81.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[simplify] Repaired 1,637 intersections; 104 intersections could not be repaired\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part82.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 3,825 intersections; 106 intersections could not be repaired\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part83.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[simplify] Repaired 3,118 intersections; 68 intersections could not be repaired\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part84.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 1,886 intersections; 7 intersections could not be repaired\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part85.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 585 intersections; 40 intersections could not be repaired\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part86.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[simplify] Repaired 387 intersections\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part87.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 964 intersections; 379 intersections could not be repaired\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part88.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[simplify] Repaired 6,027 intersections; 528 intersections could not be repaired\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part89.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 1,028 intersections; 3 intersections could not be repaired\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part9.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 3,929 intersections; 209 intersections could not be repaired\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part90.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 2,370 intersections; 32 intersections could not be repaired\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part91.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[simplify] Repaired 15,988 intersections; 148 intersections could not be repaired\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part92.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 2,955 intersections; 48 intersections could not be repaired\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part93.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[simplify] Repaired 1,643 intersections; 68 intersections could not be repaired\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part94.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 1,855 intersections\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part95.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 6,680 intersections; 388 intersections could not be repaired\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part96.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[simplify] Repaired 1,730 intersections; 24 intersections could not be repaired\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part97.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 765 intersections; 100 intersections could not be repaired\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part98.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 33 intersections\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part99.json\n" ] - }, + } + ], + "source": [ + "!mapshaper-xl 32gb -i /home/mambauser/data/mpa-terrestrial/processed/preprocess/parts/*.shp -filter-fields fields=WDPAID,NAME,PA_DEF,GIS_AREA,PARENT_ISO -clean allow-overlaps rewind -simplify dp 30% keep-shapes planar -clean allow-overlaps -o /home/mambauser/data/mpa-terrestrial/processed/tiles force combine-layers format=geojson\n" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "Linking tileset to Mapbox: 100%|██████████| 100/100 [03:00<00:00, 1.81s/it]\n" + "For layer 0, using name \"mpaterrestrialtiles\"\n", + "/home/mambauser/data/mpa-terrestrial/processed/tiles/mpa-terrestrial-tiles.json:24380: Found ] at top level\n", + "/home/mambauser/data/mpa-terrestrial/processed/tiles/mpa-terrestrial-tiles.json:12366: Reached EOF without all containers being closed\n", + "In JSON object {\"type\":\"FeatureCollection\",\"features\":[]}\n", + "292259 features, 138020430 bytes of geometry, 6110864 bytes of separate metadata, 14481131 bytes of string pool\n", + "Choosing a maxzoom of -z5 for features about 13778 feet (4200 meters) apart\n", + "Choosing a maxzoom of -z8 for resolution of about 1485 feet (452 meters) within features\n", + "tile 0/0/0 size is 845483 with detail 12, >500000 \n", + "tile 1/0/0 size is 528833 with detail 12, >500000 \n", + "tile 1/1/0 size is 972990 with detail 12, >500000 \n", + "tile 2/1/1 size is 614654 with detail 12, >500000 \n", + "tile 2/2/1 size is 1540068 with detail 12, >500000 \n", + "tile 2/2/1 size is 806964 with detail 11, >500000 \n", + "tile 3/4/2 size is 2272518 with detail 12, >500000 \n", + "tile 3/4/2 size is 1238297 with detail 11, >500000 \n", + "tile 3/4/2 size is 619274 with detail 10, >500000 \n", + "tile 4/4/5 size is 515510 with detail 12, >500000 \n", + "tile 4/9/4 size is 558716 with detail 12, >500000 \n", + "tile 4/9/5 size is 537453 with detail 12, >500000 \n", + "tile 4/8/4 size is 764105 with detail 12, >500000 \n", + "tile 4/8/5 size is 2167008 with detail 12, >500000 \n", + "tile 4/8/5 size is 1253048 with detail 11, >500000 \n", + "tile 4/8/5 size is 686870 with detail 10, >500000 \n", + "tile 5/18/9 size is 506492 with detail 12, >500000 \n", + "tile 5/17/9 size is 676608 with detail 12, >500000 \n", + "tile 5/16/11 size is 820961 with detail 12, >500000 \n", + "tile 5/17/10 size is 815791 with detail 12, >500000 \n", + "tile 5/17/11 size is 644684 with detail 12, >500000 \n", + "tile 5/16/10 size is 1459552 with detail 12, >500000 \n", + "tile 5/16/10 size is 859224 with detail 11, >500000 \n", + "tile 6/34/21 size is 540332 with detail 12, >500000 \n", + "tile 6/33/22 size is 790092 with detail 12, >500000 \n", + "tile 6/33/21 size is 1462170 with detail 12, >500000 \n", + "tile 6/33/21 size is 932291 with detail 11, >500000 \n", + "tile 6/33/21 size is 549289 with detail 10, >500000 \n", + "tile 7/66/42 size is 790837 with detail 12, >500000 \n", + "tile 7/66/42 size is 548367 with detail 11, >500000 \n", + " 99.9% 8/80/87 \n" ] }, { "data": { "text/plain": [ - "True" + "PosixPath('/home/mambauser/data/mpa-terrestrial/processed/tiles/mpa-terrestrial-tiles.mbtiles')" ] }, - "execution_count": 34, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "uploadToMapbox(\n", - " eez_dir.get_processed_step_path(current_step).joinpath(f\"{collection_name}.mbtiles\"),\n", - " collection_name,\n", - " mysettings.MAPBOX_USER,\n", - " mysettings.MAPBOX_TOKEN,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "jp-MarkdownHeadingCollapsed": true - }, - "source": [ - "### MPAs: Marine Protected Areas from WDPA" + "mbtileGeneration(Path(\"/home/mambauser/data/mpa-terrestrial/processed/tiles/mpa-terrestrial-tiles.json\"))" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -261,26 +1514,26 @@ "PosixPath('/home/mambauser/data/mpa/processed/tiles/mpa_tiles.mbtiles')" ] }, - "execution_count": 9, "metadata": {}, - "output_type": "execute_result" + "output_type": "display_data" } ], "source": [ - "pipe = \"mpa\"\n", - "collection_name = \"mpas_wdpa\"\n", + "pipe = \"mpa-terrestrial\"\n", + "collection_name = \"pas_wdpa\"\n", "\n", "source_dir = FileConventionHandler(pipe)\n", "\n", "# Download the EEZ file && unzip it\n", - "download_and_unzip_if_needed(source_dir, prev_step, mysettings)\n", - "\n", + "download_and_unzip_if_needed(source_dir, prev_step, mysettings, \"gpkg\")\n", + "# split the file in two parts\n", + "file = gpd.read_file(source_dir.get_step_fmt_file_path(prev_step, \"shp\").as_posix())\n", + "i_folder = source_dir.get_step_path(prev_step)\n", + "folders = split_2_parts(file, i_folder)\n", "# simplify the geometries\n", - "Mapshaper(16).input([source_dir.get_step_fmt_file_path(prev_step, \"shp\").as_posix()]).filter_fields(\n", - " fields=\"WDPAID,NAME,PA_DEF,GIS_M_AREA,PARENT_ISO\"\n", - ").clean(allow_overlaps=True, rewind=True).simplify(\"dp 10% keep-shapes planar\").clean(\n", - " allow_overlaps=True\n", - ").output(\n", + "Mapshaper(16).input(folders).filter_fields(fields=\"WDPAID,NAME,PA_DEF,GIS_AREA,PARENT_ISO\").clean(\n", + " allow_overlaps=True, rewind=True\n", + ").simplify(\"dp 10% keep-shapes planar\").clean(allow_overlaps=True).merge_layers().output(\n", " source_dir.get_step_fmt_file_path(current_step, \"json\").as_posix(), force=True, format=\"geojson\"\n", ").execute()\n", "\n", @@ -290,7 +1543,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -313,9 +1566,8 @@ "True" ] }, - "execution_count": 10, "metadata": {}, - "output_type": "execute_result" + "output_type": "display_data" } ], "source": [ @@ -629,18 +1881,6 @@ "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.5" } }, "nbformat": 4, diff --git a/data/src/helpers/utils.py b/data/src/helpers/utils.py index fe674629..a9e06b27 100644 --- a/data/src/helpers/utils.py +++ b/data/src/helpers/utils.py @@ -119,10 +119,10 @@ def make_archive(source: Path, destination: Path) -> None: def download_and_unzip_if_needed( - file_handler: FileConventionHandler, prev_step: STEPS, mysettings: Settings + file_handler: FileConventionHandler, prev_step: STEPS, mysettings: Settings, fmt: str = "zip" ): - zip_path = file_handler.get_step_fmt_file_path(prev_step, "zip", parent=True) + zip_path = file_handler.get_step_fmt_file_path(prev_step, fmt, parent=True) unzzipped_path = file_handler.get_processed_step_path(prev_step) diff --git a/data/src/pipelines/processors.py b/data/src/pipelines/processors.py index ac766cbf..31cc37bf 100644 --- a/data/src/pipelines/processors.py +++ b/data/src/pipelines/processors.py @@ -5,6 +5,7 @@ import numpy as np from shapely.geometry import Polygon from shapely.ops import unary_union +from shapely.validation import make_valid import json import asyncio @@ -25,6 +26,10 @@ def filter_by_methodology(df: gpd.GeoDataFrame) -> gpd.GeoDataFrame: return df[mask].reset_index(drop=True) +def filter_by_terrestrial(gdf): + return gdf[gdf["MARINE"].astype(int) != 2].reset_index(drop=True) + + def filter_by_exluding_propossed_mpas(df: gpd.GeoDataFrame) -> gpd.GeoDataFrame: mask = df["STATUS"] != "Proposed" return df[mask].reset_index(drop=True) @@ -243,7 +248,7 @@ def repair_geometry(geom): if not geom: return Polygon() elif not geom.is_valid: - geom = collection_to_multipolygon(geom.buffer(0.0).make_valid()) + geom = collection_to_multipolygon(make_valid(geom.buffer(0.0))) elif geom.geom_type == "GeometryCollection": geom = collection_to_multipolygon(geom) return geom From 9e6584d3c3819a5632d63da7913b2bfc2433736a Mon Sep 17 00:00:00 2001 From: sofia Date: Thu, 12 Sep 2024 09:23:20 +0200 Subject: [PATCH 03/16] gadm processing in intermediate and tiles --- data/notebooks/pipes_mock/intermediate.ipynb | 828 ++++++++++++++++++- data/notebooks/pipes_mock/tiles.ipynb | 601 ++++---------- data/src/data_commons/data/iso_map.json | 43 +- 3 files changed, 1024 insertions(+), 448 deletions(-) diff --git a/data/notebooks/pipes_mock/intermediate.ipynb b/data/notebooks/pipes_mock/intermediate.ipynb index 761d1673..00713648 100644 --- a/data/notebooks/pipes_mock/intermediate.ipynb +++ b/data/notebooks/pipes_mock/intermediate.ipynb @@ -11,9 +11,18 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 234, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The autoreload extension is already loaded. To reload it, use:\n", + " %reload_ext autoreload\n" + ] + } + ], "source": [ "%load_ext autoreload\n", "%autoreload 2" @@ -21,7 +30,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 262, "metadata": {}, "outputs": [], "source": [ @@ -46,6 +55,7 @@ " protection_level,\n", " status,\n", " create_year,\n", + " calculate_area,\n", " get_mpas,\n", " set_location_iso,\n", " set_fps_classes,\n", @@ -58,7 +68,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 182, "metadata": {}, "outputs": [], "source": [ @@ -74,7 +84,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -86,7 +96,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -145,7 +155,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ @@ -161,7 +171,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 64, "metadata": {}, "outputs": [ { @@ -170,7 +180,7 @@ "PosixPath('/home/mambauser/data/eez/raw/World_High_Seas_v1_20200826.zip')" ] }, - "execution_count": 8, + "execution_count": 64, "metadata": {}, "output_type": "execute_result" } @@ -191,15 +201,15 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 71, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "/home/mambauser/data/eez/raw/temp_preprocess/World_EEZ_v11_20191118\n", - "/home/mambauser/data/eez/raw/temp_preprocess/World_High_Seas_v1_20200826\n" + "/home/mambauser/data/eez/raw/temp_preprocess/World_High_Seas_v1_20200826\n", + "/home/mambauser/data/eez/raw/temp_preprocess/World_EEZ_v11_20191118\n" ] } ], @@ -213,7 +223,7 @@ " if unziped_folder.exists() and force_clean:\n", " rm_tree(unziped_folder)\n", "\n", - " shutil.unpack_archive(path, unziped_folder.parent if idx == 0 else unziped_folder)\n", + " shutil.unpack_archive(path, unziped_folder)\n", "\n", " files = [gpd.read_file(file) for file in unziped_folder.rglob(\"*.shp\") if \"boundaries\" not in file.stem]\n", " unziped_folders.append(\n", @@ -223,14 +233,33 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 72, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "GeoDataFrame 0 has 1 rows and 6 columns\n", + "GeoDataFrame 1 has 281 rows and 32 columns\n" + ] + } + ], + "source": [ + "for idx, gdf in enumerate(unziped_folders):\n", + " print(f\"GeoDataFrame {idx} has {len(gdf)} rows and {len(gdf.columns)} columns\")" + ] + }, + { + "cell_type": "code", + "execution_count": 73, "metadata": {}, "outputs": [], "source": [ "# Transform data\n", "## set the same structure for both datasets updating the high seas one\n", - "unziped_folders[1] = (\n", - " unziped_folders[1]\n", + "unziped_folders[0] = (\n", + " unziped_folders[0]\n", " .rename(\n", " columns={\"name\": \"GEONAME\", \"area_km2\": \"AREA_KM2\", \"mrgid\": \"MRGID\"},\n", " )\n", @@ -265,9 +294,18 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 75, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/conda/lib/python3.12/site-packages/pyogrio/raw.py:709: RuntimeWarning: Value 212881389 of field AREA_KM2 of feature 0 not successfully written. Possibly due to too larger number with respect to field width\n", + " ogr_write(\n" + ] + } + ], "source": [ "# save data\n", "gpd.GeoDataFrame(\n", @@ -281,7 +319,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 76, "metadata": {}, "outputs": [], "source": [ @@ -307,6 +345,728 @@ ")" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Countries gadm intermediate" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [], + "source": [ + "# Pipe params\n", + "force_clean = True\n", + "step = \"preprocess\"\n", + "pipe = \"gadm\"" + ] + }, + { + "cell_type": "code", + "execution_count": 263, + "metadata": {}, + "outputs": [], + "source": [ + "working_folder = FileConventionHandler(pipe)\n", + "input_path = working_folder.pipe_raw_path\n", + "temp_working_path = working_folder.get_temp_file_path(step)\n", + "\n", + "output_path = working_folder.get_processed_step_path(step)\n", + "output_file = working_folder.get_step_fmt_file_path(step, \"shp\")\n", + "zipped_output_file = working_folder.get_step_fmt_file_path(step, \"zip\", True)\n", + "remote_path = working_folder.get_remote_path(step)" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "metadata": {}, + "outputs": [], + "source": [ + "gadm_url = \"https://geodata.ucdavis.edu/gadm/gadm4.1/gadm_410-levels.zip\"\n", + "gadm_file_name = \"gadm_410-levels.zip\"" + ] + }, + { + "cell_type": "code", + "execution_count": 111, + "metadata": {}, + "outputs": [], + "source": [ + "# Download data\n", + "input_file = downloadFile(\n", + " gadm_url,\n", + " input_path,\n", + " overwrite=force_clean,\n", + " file=gadm_file_name,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 183, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing: /home/mambauser/data/gadm/raw/temp_preprocess/gadm_410-levels\n", + "Removed existing folder: /home/mambauser/data/gadm/raw/temp_preprocess/gadm_410-levels\n", + "Unpacked /home/mambauser/data/gadm/raw/gadm_410-levels.zip to /home/mambauser/data/gadm/raw/temp_preprocess/gadm_410-levels\n" + ] + } + ], + "source": [ + "# Check if there is a zip file in the input_path\n", + "zip_file = next(input_path.glob(\"*.zip\"), None)\n", + "if zip_file:\n", + " unziped_folder = temp_working_path.joinpath(zip_file.stem)\n", + " print(f\"Processing: {unziped_folder}\")\n", + "\n", + " if unziped_folder.exists() and force_clean:\n", + " shutil.rmtree(unziped_folder)\n", + " print(f\"Removed existing folder: {unziped_folder}\")\n", + "\n", + " # Unpack the archive\n", + " shutil.unpack_archive(zip_file, unziped_folder)\n", + " print(f\"Unpacked {zip_file} to {unziped_folder}\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 304, + "metadata": {}, + "outputs": [], + "source": [ + "# Add columns for translated names\n", + "data = [\n", + " {\"GID_0\": \"AFG\", \"COUNTRY_ES\": \"Afganistán\", \"COUNTRY_FR\": \"Afghanistan\"},\n", + " {\"GID_0\": \"XAD\", \"COUNTRY_ES\": \"Akrotiri y Dhekelia\", \"COUNTRY_FR\": \"Akrotiri et Dhekelia\"},\n", + " {\"GID_0\": \"ALB\", \"COUNTRY_ES\": \"Albania\", \"COUNTRY_FR\": \"Albanie\"},\n", + " {\"GID_0\": \"DZA\", \"COUNTRY_ES\": \"Argelia\", \"COUNTRY_FR\": \"Algérie\"},\n", + " {\"GID_0\": \"ASM\", \"COUNTRY_ES\": \"Samoa Americana\", \"COUNTRY_FR\": \"Samoa américaines\"},\n", + " {\"GID_0\": \"AND\", \"COUNTRY_ES\": \"Andorra\", \"COUNTRY_FR\": \"Andorre\"},\n", + " {\"GID_0\": \"AGO\", \"COUNTRY_ES\": \"Angola\", \"COUNTRY_FR\": \"Angola\"},\n", + " {\"GID_0\": \"AIA\", \"COUNTRY_ES\": \"Anguila\", \"COUNTRY_FR\": \"Anguilla\"},\n", + " {\"GID_0\": \"ATA\", \"COUNTRY_ES\": \"Antártida\", \"COUNTRY_FR\": \"Antarctique\"},\n", + " {\"GID_0\": \"ATG\", \"COUNTRY_ES\": \"Antigua y Barbuda\", \"COUNTRY_FR\": \"Antigua-et-Barbuda\"},\n", + " {\"GID_0\": \"ARG\", \"COUNTRY_ES\": \"Argentina\", \"COUNTRY_FR\": \"Argentine\"},\n", + " {\"GID_0\": \"ARM\", \"COUNTRY_ES\": \"Armenia\", \"COUNTRY_FR\": \"Arménie\"},\n", + " {\"GID_0\": \"ABW\", \"COUNTRY_ES\": \"Aruba\", \"COUNTRY_FR\": \"Aruba\"},\n", + " {\"GID_0\": \"AUS\", \"COUNTRY_ES\": \"Australia\", \"COUNTRY_FR\": \"Australie\"},\n", + " {\"GID_0\": \"AUT\", \"COUNTRY_ES\": \"Austria\", \"COUNTRY_FR\": \"Autriche\"},\n", + " {\"GID_0\": \"AZE\", \"COUNTRY_ES\": \"Azerbaiyán\", \"COUNTRY_FR\": \"Azerbaïdjan\"},\n", + " {\"GID_0\": \"BHS\", \"COUNTRY_ES\": \"Bahamas\", \"COUNTRY_FR\": \"Bahamas\"},\n", + " {\"GID_0\": \"BHR\", \"COUNTRY_ES\": \"Baréin\", \"COUNTRY_FR\": \"Bahreïn\"},\n", + " {\"GID_0\": \"BGD\", \"COUNTRY_ES\": \"Bangladés\", \"COUNTRY_FR\": \"Bangladesh\"},\n", + " {\"GID_0\": \"BRB\", \"COUNTRY_ES\": \"Barbados\", \"COUNTRY_FR\": \"Barbade\"},\n", + " {\"GID_0\": \"BLR\", \"COUNTRY_ES\": \"Bielorrusia\", \"COUNTRY_FR\": \"Biélorussie\"},\n", + " {\"GID_0\": \"BEL\", \"COUNTRY_ES\": \"Bélgica\", \"COUNTRY_FR\": \"Belgique\"},\n", + " {\"GID_0\": \"BLZ\", \"COUNTRY_ES\": \"Belice\", \"COUNTRY_FR\": \"Belize\"},\n", + " {\"GID_0\": \"BEN\", \"COUNTRY_ES\": \"Benín\", \"COUNTRY_FR\": \"Bénin\"},\n", + " {\"GID_0\": \"BMU\", \"COUNTRY_ES\": \"Bermudas\", \"COUNTRY_FR\": \"Bermudes\"},\n", + " {\"GID_0\": \"BTN\", \"COUNTRY_ES\": \"Bután\", \"COUNTRY_FR\": \"Bhoutan\"},\n", + " {\"GID_0\": \"BOL\", \"COUNTRY_ES\": \"Bolivia\", \"COUNTRY_FR\": \"Bolivie\"},\n", + " {\"GID_0\": \"BES\", \"COUNTRY_ES\": \"Bonaire, San Eustaquio y Saba\", \"COUNTRY_FR\": \"Bonaire, Saint-Eustache et Saba\"},\n", + " {\"GID_0\": \"BIH\", \"COUNTRY_ES\": \"Bosnia y Herzegovina\", \"COUNTRY_FR\": \"Bosnie-Herzégovine\"},\n", + " {\"GID_0\": \"BWA\", \"COUNTRY_ES\": \"Botsuana\", \"COUNTRY_FR\": \"Botswana\"},\n", + " {\"GID_0\": \"BVT\", \"COUNTRY_ES\": \"Isla Bouvet\", \"COUNTRY_FR\": \"Île Bouvet\"},\n", + " {\"GID_0\": \"BRA\", \"COUNTRY_ES\": \"Brasil\", \"COUNTRY_FR\": \"Brésil\"},\n", + " {\"GID_0\": \"IOT\", \"COUNTRY_ES\": \"Territorio Británico del Océano Índico\", \"COUNTRY_FR\": \"Territoire britannique de l'océan Indien\"},\n", + " {\"GID_0\": \"VGB\", \"COUNTRY_ES\": \"Islas Vírgenes Británicas\", \"COUNTRY_FR\": \"Îles Vierges britanniques\"},\n", + " {\"GID_0\": \"BRN\", \"COUNTRY_ES\": \"Brunéi\", \"COUNTRY_FR\": \"Brunei\"},\n", + " {\"GID_0\": \"BGR\", \"COUNTRY_ES\": \"Bulgaria\", \"COUNTRY_FR\": \"Bulgarie\"},\n", + " {\"GID_0\": \"BFA\", \"COUNTRY_ES\": \"Burkina Faso\", \"COUNTRY_FR\": \"Burkina Faso\"},\n", + " {\"GID_0\": \"BDI\", \"COUNTRY_ES\": \"Burundi\", \"COUNTRY_FR\": \"Burundi\"},\n", + " {\"GID_0\": \"CPV\", \"COUNTRY_ES\": \"Cabo Verde\", \"COUNTRY_FR\": \"Cap-Vert\"},\n", + " {\"GID_0\": \"KHM\", \"COUNTRY_ES\": \"Camboya\", \"COUNTRY_FR\": \"Cambodge\"},\n", + " {\"GID_0\": \"CMR\", \"COUNTRY_ES\": \"Camerún\", \"COUNTRY_FR\": \"Cameroun\"},\n", + " {\"GID_0\": \"CAN\", \"COUNTRY_ES\": \"Canadá\", \"COUNTRY_FR\": \"Canada\"},\n", + " {\"GID_0\": \"XCA\", \"COUNTRY_ES\": \"Mar Caspio\", \"COUNTRY_FR\": \"Mer Caspienne\"},\n", + " {\"GID_0\": \"CYM\", \"COUNTRY_ES\": \"Islas Caimán\", \"COUNTRY_FR\": \"Îles Caïmans\"},\n", + " {\"GID_0\": \"CAF\", \"COUNTRY_ES\": \"República Centroafricana\", \"COUNTRY_FR\": \"République centrafricaine\"},\n", + " {\"GID_0\": \"TCD\", \"COUNTRY_ES\": \"Chad\", \"COUNTRY_FR\": \"Tchad\"},\n", + " {\"GID_0\": \"CHL\", \"COUNTRY_ES\": \"Chile\", \"COUNTRY_FR\": \"Chili\"},\n", + " {\"GID_0\": \"CHN\", \"COUNTRY_ES\": \"China\", \"COUNTRY_FR\": \"Chine\"},\n", + " {\"GID_0\": \"CXR\", \"COUNTRY_ES\": \"Isla de Navidad\", \"COUNTRY_FR\": \"Île Christmas\"},\n", + " {\"GID_0\": \"XCL\", \"COUNTRY_ES\": \"Isla Clipperton\", \"COUNTRY_FR\": \"Île Clipperton\"},\n", + " {\"GID_0\": \"CCK\", \"COUNTRY_ES\": \"Islas Cocos\", \"COUNTRY_FR\": \"Îles Cocos\"},\n", + " {\"GID_0\": \"COL\", \"COUNTRY_ES\": \"Colombia\", \"COUNTRY_FR\": \"Colombie\"},\n", + " {\"GID_0\": \"COM\", \"COUNTRY_ES\": \"Comoras\", \"COUNTRY_FR\": \"Comores\"},\n", + " {\"GID_0\": \"COK\", \"COUNTRY_ES\": \"Islas Cook\", \"COUNTRY_FR\": \"Îles Cook\"},\n", + " {\"GID_0\": \"CRI\", \"COUNTRY_ES\": \"Costa Rica\", \"COUNTRY_FR\": \"Costa Rica\"},\n", + " {\"GID_0\": \"HRV\", \"COUNTRY_ES\": \"Croacia\", \"COUNTRY_FR\": \"Croatie\"},\n", + " {\"GID_0\": \"CUB\", \"COUNTRY_ES\": \"Cuba\", \"COUNTRY_FR\": \"Cuba\"},\n", + " {\"GID_0\": \"CUW\", \"COUNTRY_ES\": \"Curazao\", \"COUNTRY_FR\": \"Curaçao\"},\n", + " {\"GID_0\": \"CYP\", \"COUNTRY_ES\": \"Chipre\", \"COUNTRY_FR\": \"Chypre\"},\n", + " {\"GID_0\": \"CZE\", \"COUNTRY_ES\": \"Chequia\", \"COUNTRY_FR\": \"Tchéquie\"},\n", + " {\"GID_0\": \"CIV\", \"COUNTRY_ES\": \"Costa de Marfil\", \"COUNTRY_FR\": \"Côte d'Ivoire\"},\n", + " {\"GID_0\": \"COD\", \"COUNTRY_ES\": \"República Democrática del Congo\", \"COUNTRY_FR\": \"République démocratique du Congo\"},\n", + " {\"GID_0\": \"DNK\", \"COUNTRY_ES\": \"Dinamarca\", \"COUNTRY_FR\": \"Danemark\"},\n", + " {\"GID_0\": \"DJI\", \"COUNTRY_ES\": \"Yibuti\", \"COUNTRY_FR\": \"Djibouti\"},\n", + " {\"GID_0\": \"DMA\", \"COUNTRY_ES\": \"Dominica\", \"COUNTRY_FR\": \"Dominique\"},\n", + " {\"GID_0\": \"DOM\", \"COUNTRY_ES\": \"República Dominicana\", \"COUNTRY_FR\": \"République dominicaine\"},\n", + " {\"GID_0\": \"ECU\", \"COUNTRY_ES\": \"Ecuador\", \"COUNTRY_FR\": \"Équateur\"},\n", + " {\"GID_0\": \"EGY\", \"COUNTRY_ES\": \"Egipto\", \"COUNTRY_FR\": \"Égypte\"},\n", + " {\"GID_0\": \"SLV\", \"COUNTRY_ES\": \"El Salvador\", \"COUNTRY_FR\": \"Salvador\"},\n", + " {\"GID_0\": \"GNQ\", \"COUNTRY_ES\": \"Guinea Ecuatorial\", \"COUNTRY_FR\": \"Guinée équatoriale\"},\n", + " {\"GID_0\": \"ERI\", \"COUNTRY_ES\": \"Eritrea\", \"COUNTRY_FR\": \"Érythrée\"},\n", + " {\"GID_0\": \"EST\", \"COUNTRY_ES\": \"Estonia\", \"COUNTRY_FR\": \"Estonie\"},\n", + " {\"GID_0\": \"ETH\", \"COUNTRY_ES\": \"Etiopía\", \"COUNTRY_FR\": \"Éthiopie\"},\n", + " {\"GID_0\": \"FLK\", \"COUNTRY_ES\": \"Islas Malvinas\", \"COUNTRY_FR\": \"Îles Malouines\"},\n", + " {\"GID_0\": \"FRO\", \"COUNTRY_ES\": \"Islas Feroe\", \"COUNTRY_FR\": \"Îles Féroé\"},\n", + " {\"GID_0\": \"FJI\", \"COUNTRY_ES\": \"Fiyi\", \"COUNTRY_FR\": \"Fidji\"},\n", + " {\"GID_0\": \"FIN\", \"COUNTRY_ES\": \"Finlandia\", \"COUNTRY_FR\": \"Finlande\"},\n", + " {\"GID_0\": \"FRA\", \"COUNTRY_ES\": \"Francia\", \"COUNTRY_FR\": \"France\"},\n", + " {\"GID_0\": \"GUF\", \"COUNTRY_ES\": \"Guayana Francesa\", \"COUNTRY_FR\": \"Guyane française\"},\n", + " {\"GID_0\": \"PYF\", \"COUNTRY_ES\": \"Polinesia Francesa\", \"COUNTRY_FR\": \"Polynésie française\"},\n", + " {\"GID_0\": \"ATF\", \"COUNTRY_ES\": \"Territorios Australes Franceses\", \"COUNTRY_FR\": \"Terres australes françaises\"},\n", + " {\"GID_0\": \"GAB\", \"COUNTRY_ES\": \"Gabón\", \"COUNTRY_FR\": \"Gabon\"},\n", + " {\"GID_0\": \"GMB\", \"COUNTRY_ES\": \"Gambia\", \"COUNTRY_FR\": \"Gambie\"},\n", + " {\"GID_0\": \"GEO\", \"COUNTRY_ES\": \"Georgia\", \"COUNTRY_FR\": \"Géorgie\"},\n", + " {\"GID_0\": \"DEU\", \"COUNTRY_ES\": \"Alemania\", \"COUNTRY_FR\": \"Allemagne\"},\n", + " {\"GID_0\": \"GHA\", \"COUNTRY_ES\": \"Ghana\", \"COUNTRY_FR\": \"Ghana\"},\n", + " {\"GID_0\": \"GIB\", \"COUNTRY_ES\": \"Gibraltar\", \"COUNTRY_FR\": \"Gibraltar\"},\n", + " {\"GID_0\": \"GRC\", \"COUNTRY_ES\": \"Grecia\", \"COUNTRY_FR\": \"Grèce\"},\n", + " {\"GID_0\": \"GRL\", \"COUNTRY_ES\": \"Groenlandia\", \"COUNTRY_FR\": \"Groenland\"},\n", + " {\"GID_0\": \"GRD\", \"COUNTRY_ES\": \"Granada\", \"COUNTRY_FR\": \"Grenade\"},\n", + " {\"GID_0\": \"GLP\", \"COUNTRY_ES\": \"Guadalupe\", \"COUNTRY_FR\": \"Guadeloupe\"},\n", + " {\"GID_0\": \"GUM\", \"COUNTRY_ES\": \"Guam\", \"COUNTRY_FR\": \"Guam\"},\n", + " {\"GID_0\": \"GTM\", \"COUNTRY_ES\": \"Guatemala\", \"COUNTRY_FR\": \"Guatemala\"},\n", + " {\"GID_0\": \"GGY\", \"COUNTRY_ES\": \"Guernesey\", \"COUNTRY_FR\": \"Guernesey\"},\n", + " {\"GID_0\": \"GIN\", \"COUNTRY_ES\": \"Guinea\", \"COUNTRY_FR\": \"Guinée\"},\n", + " {\"GID_0\": \"GNB\", \"COUNTRY_ES\": \"Guinea-Bisáu\", \"COUNTRY_FR\": \"Guinée-Bissau\"},\n", + " {\"GID_0\": \"GUY\", \"COUNTRY_ES\": \"Guyana\", \"COUNTRY_FR\": \"Guyana\"},\n", + " {\"GID_0\": \"HTI\", \"COUNTRY_ES\": \"Haití\", \"COUNTRY_FR\": \"Haïti\"},\n", + " {\"GID_0\": \"HMD\", \"COUNTRY_ES\": \"Isla Heard y McDonald\", \"COUNTRY_FR\": \"Île Heard et îles McDonald\"}, \n", + " {\"GID_0\": \"HND\", \"COUNTRY_ES\": \"Honduras\", \"COUNTRY_FR\": \"Honduras\"},\n", + " {\"GID_0\": \"HUN\", \"COUNTRY_ES\": \"Hungría\", \"COUNTRY_FR\": \"Hongrie\"},\n", + " {\"GID_0\": \"ISL\", \"COUNTRY_ES\": \"Islandia\", \"COUNTRY_FR\": \"Islande\"},\n", + " {\"GID_0\": \"IND\", \"COUNTRY_ES\": \"India\", \"COUNTRY_FR\": \"Inde\"},\n", + " {\"GID_0\": \"IDN\", \"COUNTRY_ES\": \"Indonesia\", \"COUNTRY_FR\": \"Indonésie\"},\n", + " {\"GID_0\": \"IRN\", \"COUNTRY_ES\": \"Irán\", \"COUNTRY_FR\": \"Iran\"},\n", + " {\"GID_0\": \"IRQ\", \"COUNTRY_ES\": \"Irak\", \"COUNTRY_FR\": \"Irak\"},\n", + " {\"GID_0\": \"IRL\", \"COUNTRY_ES\": \"Irlanda\", \"COUNTRY_FR\": \"Irlande\"},\n", + " {\"GID_0\": \"IMN\", \"COUNTRY_ES\": \"Isla de Man\", \"COUNTRY_FR\": \"Île de Man\"},\n", + " {\"GID_0\": \"ISR\", \"COUNTRY_ES\": \"Israel\", \"COUNTRY_FR\": \"Israël\"},\n", + " {\"GID_0\": \"ITA\", \"COUNTRY_ES\": \"Italia\", \"COUNTRY_FR\": \"Italie\"},\n", + " {\"GID_0\": \"JAM\", \"COUNTRY_ES\": \"Jamaica\", \"COUNTRY_FR\": \"Jamaïque\"},\n", + " {\"GID_0\": \"JPN\", \"COUNTRY_ES\": \"Japón\", \"COUNTRY_FR\": \"Japon\"},\n", + " {\"GID_0\": \"JEY\", \"COUNTRY_ES\": \"Jersey\", \"COUNTRY_FR\": \"Jersey\"},\n", + " {\"GID_0\": \"JOR\", \"COUNTRY_ES\": \"Jordania\", \"COUNTRY_FR\": \"Jordanie\"},\n", + " {\"GID_0\": \"KAZ\", \"COUNTRY_ES\": \"Kazajistán\", \"COUNTRY_FR\": \"Kazakhstan\"},\n", + " {\"GID_0\": \"KEN\", \"COUNTRY_ES\": \"Kenia\", \"COUNTRY_FR\": \"Kenya\"},\n", + " {\"GID_0\": \"KIR\", \"COUNTRY_ES\": \"Kiribati\", \"COUNTRY_FR\": \"Kiribati\"},\n", + " {\"GID_0\": \"XKO\", \"COUNTRY_ES\": \"Kosovo\", \"COUNTRY_FR\": \"Kosovo\"},\n", + " {\"GID_0\": \"KWT\", \"COUNTRY_ES\": \"Kuwait\", \"COUNTRY_FR\": \"Koweït\"},\n", + " {\"GID_0\": \"KGZ\", \"COUNTRY_ES\": \"Kirguistán\", \"COUNTRY_FR\": \"Kirghizistan\"},\n", + " {\"GID_0\": \"LAO\", \"COUNTRY_ES\": \"Laos\", \"COUNTRY_FR\": \"Laos\"},\n", + " {\"GID_0\": \"LVA\", \"COUNTRY_ES\": \"Letonia\", \"COUNTRY_FR\": \"Lettonie\"},\n", + " {\"GID_0\": \"LBN\", \"COUNTRY_ES\": \"Líbano\", \"COUNTRY_FR\": \"Liban\"},\n", + " {\"GID_0\": \"LSO\", \"COUNTRY_ES\": \"Lesoto\", \"COUNTRY_FR\": \"Lesotho\"},\n", + " {\"GID_0\": \"LBR\", \"COUNTRY_ES\": \"Liberia\", \"COUNTRY_FR\": \"Liberia\"},\n", + " {\"GID_0\": \"LBY\", \"COUNTRY_ES\": \"Libia\", \"COUNTRY_FR\": \"Libye\"},\n", + " {\"GID_0\": \"LIE\", \"COUNTRY_ES\": \"Liechtenstein\", \"COUNTRY_FR\": \"Liechtenstein\"},\n", + " {\"GID_0\": \"LTU\", \"COUNTRY_ES\": \"Lituania\", \"COUNTRY_FR\": \"Lituanie\"},\n", + " {\"GID_0\": \"LUX\", \"COUNTRY_ES\": \"Luxemburgo\", \"COUNTRY_FR\": \"Luxembourg\"},\n", + " {\"GID_0\": \"MDG\", \"COUNTRY_ES\": \"Madagascar\", \"COUNTRY_FR\": \"Madagascar\"},\n", + " {\"GID_0\": \"MWI\", \"COUNTRY_ES\": \"Malaui\", \"COUNTRY_FR\": \"Malawi\"},\n", + " {\"GID_0\": \"MYS\", \"COUNTRY_ES\": \"Malasia\", \"COUNTRY_FR\": \"Malaisie\"},\n", + " {\"GID_0\": \"MDV\", \"COUNTRY_ES\": \"Maldivas\", \"COUNTRY_FR\": \"Maldives\"},\n", + " {\"GID_0\": \"MLI\", \"COUNTRY_ES\": \"Malí\", \"COUNTRY_FR\": \"Mali\"},\n", + " {\"GID_0\": \"MLT\", \"COUNTRY_ES\": \"Malta\", \"COUNTRY_FR\": \"Malte\"},\n", + " {\"GID_0\": \"MHL\", \"COUNTRY_ES\": \"Islas Marshall\", \"COUNTRY_FR\": \"Îles Marshall\"},\n", + " {\"GID_0\": \"MTQ\", \"COUNTRY_ES\": \"Martinica\", \"COUNTRY_FR\": \"Martinique\"},\n", + " {\"GID_0\": \"MRT\", \"COUNTRY_ES\": \"Mauritania\", \"COUNTRY_FR\": \"Mauritanie\"},\n", + " {\"GID_0\": \"MUS\", \"COUNTRY_ES\": \"Mauricio\", \"COUNTRY_FR\": \"Maurice\"},\n", + " {\"GID_0\": \"MYT\", \"COUNTRY_ES\": \"Mayotte\", \"COUNTRY_FR\": \"Mayotte\"},\n", + " {\"GID_0\": \"FSM\", \"COUNTRY_ES\": \"Micronesia\", \"COUNTRY_FR\": \"Micronésie\"},\n", + " {\"GID_0\": \"MDA\", \"COUNTRY_ES\": \"Moldavia\", \"COUNTRY_FR\": \"Moldavie\"},\n", + " {\"GID_0\": \"MCO\", \"COUNTRY_ES\": \"Mónaco\", \"COUNTRY_FR\": \"Monaco\"},\n", + " {\"GID_0\": \"MNG\", \"COUNTRY_ES\": \"Mongolia\", \"COUNTRY_FR\": \"Mongolie\"},\n", + " {\"GID_0\": \"MNE\", \"COUNTRY_ES\": \"Montenegro\", \"COUNTRY_FR\": \"Monténégro\"},\n", + " {\"GID_0\": \"MSR\", \"COUNTRY_ES\": \"Montserrat\", \"COUNTRY_FR\": \"Montserrat\"},\n", + " {\"GID_0\": \"MAR\", \"COUNTRY_ES\": \"Marruecos\", \"COUNTRY_FR\": \"Maroc\"},\n", + " {\"GID_0\": \"MOZ\", \"COUNTRY_ES\": \"Mozambique\", \"COUNTRY_FR\": \"Mozambique\"},\n", + " {\"GID_0\": \"MMR\", \"COUNTRY_ES\": \"Myanmar\", \"COUNTRY_FR\": \"Myanmar\"},\n", + " {\"GID_0\": \"MEX\", \"COUNTRY_ES\": \"México\", \"COUNTRY_FR\": \"Mexique\"},\n", + " {\"GID_0\": \"NAM\", \"COUNTRY_ES\": \"Namibia\", \"COUNTRY_FR\": \"Namibie\"},\n", + " {\"GID_0\": \"NRU\", \"COUNTRY_ES\": \"Nauru\", \"COUNTRY_FR\": \"Nauru\"},\n", + " {\"GID_0\": \"NPL\", \"COUNTRY_ES\": \"Nepal\", \"COUNTRY_FR\": \"Népal\"},\n", + " {\"GID_0\": \"NLD\", \"COUNTRY_ES\": \"Países Bajos\", \"COUNTRY_FR\": \"Pays-Bas\"},\n", + " {\"GID_0\": \"NCL\", \"COUNTRY_ES\": \"Nueva Caledonia\", \"COUNTRY_FR\": \"Nouvelle-Calédonie\"},\n", + " {\"GID_0\": \"NZL\", \"COUNTRY_ES\": \"Nueva Zelanda\", \"COUNTRY_FR\": \"Nouvelle-Zélande\"},\n", + " {\"GID_0\": \"NIC\", \"COUNTRY_ES\": \"Nicaragua\", \"COUNTRY_FR\": \"Nicaragua\"},\n", + " {\"GID_0\": \"NER\", \"COUNTRY_ES\": \"Níger\", \"COUNTRY_FR\": \"Niger\"},\n", + " {\"GID_0\": \"NGA\", \"COUNTRY_ES\": \"Nigeria\", \"COUNTRY_FR\": \"Nigéria\"},\n", + " {\"GID_0\": \"NIU\", \"COUNTRY_ES\": \"Niue\", \"COUNTRY_FR\": \"Niue\"},\n", + " {\"GID_0\": \"NFK\", \"COUNTRY_ES\": \"Isla Norfolk\", \"COUNTRY_FR\": \"Île Norfolk\"},\n", + " {\"GID_0\": \"PRK\", \"COUNTRY_ES\": \"Corea del Norte\", \"COUNTRY_FR\": \"Corée du Nord\"},\n", + " {\"GID_0\": \"MKD\", \"COUNTRY_ES\": \"Macedonia del Norte\", \"COUNTRY_FR\": \"Macédoine du Nord\"},\n", + " {\"GID_0\": \"ZNC\", \"COUNTRY_ES\": \"Chipre del Norte\", \"COUNTRY_FR\": \"Chypre du Nord\"},\n", + " {\"GID_0\": \"MNP\", \"COUNTRY_ES\": \"Islas Marianas del Norte\", \"COUNTRY_FR\": \"Îles Mariannes du Nord\"},\n", + " {\"GID_0\": \"NOR\", \"COUNTRY_ES\": \"Noruega\", \"COUNTRY_FR\": \"Norvège\"},\n", + " {\"GID_0\": \"OMN\", \"COUNTRY_ES\": \"Omán\", \"COUNTRY_FR\": \"Oman\"},\n", + " {\"GID_0\": \"PAK\", \"COUNTRY_ES\": \"Pakistán\", \"COUNTRY_FR\": \"Pakistan\"},\n", + " {\"GID_0\": \"PLW\", \"COUNTRY_ES\": \"Palaos\", \"COUNTRY_FR\": \"Palaos\"},\n", + " {\"GID_0\": \"PSE\", \"COUNTRY_ES\": \"Palestina\", \"COUNTRY_FR\": \"Palestine\"},\n", + " {\"GID_0\": \"PAN\", \"COUNTRY_ES\": \"Panamá\", \"COUNTRY_FR\": \"Panama\"},\n", + " {\"GID_0\": \"PNG\", \"COUNTRY_ES\": \"Papúa Nueva Guinea\", \"COUNTRY_FR\": \"Papouasie-Nouvelle-Guinée\"},\n", + " {\"GID_0\": \"XPI\", \"COUNTRY_ES\": \"Islas Paracelso\", \"COUNTRY_FR\": \"Îles Paracels\"},\n", + " {\"GID_0\": \"PRY\", \"COUNTRY_ES\": \"Paraguay\", \"COUNTRY_FR\": \"Paraguay\"},\n", + " {\"GID_0\": \"PER\", \"COUNTRY_ES\": \"Perú\", \"COUNTRY_FR\": \"Pérou\"},\n", + " {\"GID_0\": \"PHL\", \"COUNTRY_ES\": \"Filipinas\", \"COUNTRY_FR\": \"Philippines\"},\n", + " {\"GID_0\": \"PCN\", \"COUNTRY_ES\": \"Islas Pitcairn\", \"COUNTRY_FR\": \"Îles Pitcairn\"},\n", + " {\"GID_0\": \"POL\", \"COUNTRY_ES\": \"Polonia\", \"COUNTRY_FR\": \"Pologne\"},\n", + " {\"GID_0\": \"PRT\", \"COUNTRY_ES\": \"Portugal\", \"COUNTRY_FR\": \"Portugal\"},\n", + " {\"GID_0\": \"PRI\", \"COUNTRY_ES\": \"Puerto Rico\", \"COUNTRY_FR\": \"Porto Rico\"},\n", + " {\"GID_0\": \"QAT\", \"COUNTRY_ES\": \"Catar\", \"COUNTRY_FR\": \"Qatar\"},\n", + " {\"GID_0\": \"COG\", \"COUNTRY_ES\": \"República del Congo\", \"COUNTRY_FR\": \"République du Congo\"},\n", + " {\"GID_0\": \"ROU\", \"COUNTRY_ES\": \"Rumania\", \"COUNTRY_FR\": \"Roumanie\"},\n", + " {\"GID_0\": \"RUS\", \"COUNTRY_ES\": \"Rusia\", \"COUNTRY_FR\": \"Russie\"},\n", + " {\"GID_0\": \"RWA\", \"COUNTRY_ES\": \"Ruanda\", \"COUNTRY_FR\": \"Rwanda\"},\n", + " {\"GID_0\": \"REU\", \"COUNTRY_ES\": \"Reunión\", \"COUNTRY_FR\": \"La Réunion\"},\n", + " {\"GID_0\": \"SHN\", \"COUNTRY_ES\": \"Santa Elena, Ascensión y Tristán de Acuña\", \"COUNTRY_FR\": \"Sainte-Hélène, Ascension et Tristan da Cunha\"},\n", + " {\"GID_0\": \"KNA\", \"COUNTRY_ES\": \"San Cristóbal y Nieves\", \"COUNTRY_FR\": \"Saint-Christophe-et-Niévès\"},\n", + " {\"GID_0\": \"LCA\", \"COUNTRY_ES\": \"Santa Lucía\", \"COUNTRY_FR\": \"Sainte-Lucie\"},\n", + " {\"GID_0\": \"SPM\", \"COUNTRY_ES\": \"San Pedro y Miquelón\", \"COUNTRY_FR\": \"Saint-Pierre-et-Miquelon\"},\n", + " {\"GID_0\": \"VCT\", \"COUNTRY_ES\": \"San Vicente y las Granadinas\", \"COUNTRY_FR\": \"Saint-Vincent-et-les-Grenadines\"},\n", + " {\"GID_0\": \"BLM\", \"COUNTRY_ES\": \"San Bartolomé\", \"COUNTRY_FR\": \"Saint-Barthélemy\"},\n", + " {\"GID_0\": \"MAF\", \"COUNTRY_ES\": \"San Martín\", \"COUNTRY_FR\": \"Saint-Martin\"},\n", + " {\"GID_0\": \"WSM\", \"COUNTRY_ES\": \"Samoa\", \"COUNTRY_FR\": \"Samoa\"},\n", + " {\"GID_0\": \"SMR\", \"COUNTRY_ES\": \"San Marino\", \"COUNTRY_FR\": \"Saint-Marin\"},\n", + " {\"GID_0\": \"SAU\", \"COUNTRY_ES\": \"Arabia Saudita\", \"COUNTRY_FR\": \"Arabie Saoudite\"},\n", + " {\"GID_0\": \"SEN\", \"COUNTRY_ES\": \"Senegal\", \"COUNTRY_FR\": \"Sénégal\"},\n", + " {\"GID_0\": \"SRB\", \"COUNTRY_ES\": \"Serbia\", \"COUNTRY_FR\": \"Serbie\"},\n", + " {\"GID_0\": \"SYC\", \"COUNTRY_ES\": \"Seychelles\", \"COUNTRY_FR\": \"Seychelles\"},\n", + " {\"GID_0\": \"SLE\", \"COUNTRY_ES\": \"Sierra Leona\", \"COUNTRY_FR\": \"Sierra Leone\"},\n", + " {\"GID_0\": \"SGP\", \"COUNTRY_ES\": \"Singapur\", \"COUNTRY_FR\": \"Singapour\"},\n", + " {\"GID_0\": \"SXM\", \"COUNTRY_ES\": \"Sint Maarten\", \"COUNTRY_FR\": \"Saint-Martin\"},\n", + " {\"GID_0\": \"SVK\", \"COUNTRY_ES\": \"Eslovaquia\", \"COUNTRY_FR\": \"Slovaquie\"},\n", + " {\"GID_0\": \"SVN\", \"COUNTRY_ES\": \"Eslovenia\", \"COUNTRY_FR\": \"Slovénie\"},\n", + " {\"GID_0\": \"SLB\", \"COUNTRY_ES\": \"Islas Salomón\", \"COUNTRY_FR\": \"Îles Salomon\"},\n", + " {\"GID_0\": \"SOM\", \"COUNTRY_ES\": \"Somalia\", \"COUNTRY_FR\": \"Somalie\"},\n", + " {\"GID_0\": \"ZAF\", \"COUNTRY_ES\": \"Sudáfrica\", \"COUNTRY_FR\": \"Afrique du Sud\"},\n", + " {\"GID_0\": \"SGS\", \"COUNTRY_ES\": \"Georgia del Sur y las Islas Sandwich del Sur\", \"COUNTRY_FR\": \"Géorgie du Sud et îles Sandwich du Sud\"},\n", + " {\"GID_0\": \"KOR\", \"COUNTRY_ES\": \"Corea del Sur\", \"COUNTRY_FR\": \"Corée du Sud\"},\n", + " {\"GID_0\": \"SSD\", \"COUNTRY_ES\": \"Sudán del Sur\", \"COUNTRY_FR\": \"Soudan du Sud\"},\n", + " {\"GID_0\": \"ESP\", \"COUNTRY_ES\": \"España\", \"COUNTRY_FR\": \"Espagne\"},\n", + " {\"GID_0\": \"XSP\", \"COUNTRY_ES\": \"Islas Spratly\", \"COUNTRY_FR\": \"Îles Spratleys\"},\n", + " {\"GID_0\": \"LKA\", \"COUNTRY_ES\": \"Sri Lanka\", \"COUNTRY_FR\": \"Sri Lanka\"},\n", + " {\"GID_0\": \"SDN\", \"COUNTRY_ES\": \"Sudán\", \"COUNTRY_FR\": \"Soudan\"},\n", + " {\"GID_0\": \"SUR\", \"COUNTRY_ES\": \"Surinam\", \"COUNTRY_FR\": \"Suriname\"},\n", + " {\"GID_0\": \"SJM\", \"COUNTRY_ES\": \"Svalbard y Jan Mayen\", \"COUNTRY_FR\": \"Svalbard et Jan Mayen\"},\n", + " {\"GID_0\": \"SWZ\", \"COUNTRY_ES\": \"Suazilandia\", \"COUNTRY_FR\": \"Swaziland\"},\n", + " {\"GID_0\": \"SWE\", \"COUNTRY_ES\": \"Suecia\", \"COUNTRY_FR\": \"Suède\"},\n", + " {\"GID_0\": \"CHE\", \"COUNTRY_ES\": \"Suiza\", \"COUNTRY_FR\": \"Suisse\"},\n", + " {\"GID_0\": \"SYR\", \"COUNTRY_ES\": \"Siria\", \"COUNTRY_FR\": \"Syrie\"},\n", + " {\"GID_0\": \"STP\", \"COUNTRY_ES\": \"Santo Tomé y Príncipe\", \"COUNTRY_FR\": \"São Tomé-et-Principe\"},\n", + " {\"GID_0\": \"TWN\", \"COUNTRY_ES\": \"Taiwán\", \"COUNTRY_FR\": \"Taïwan\"},\n", + " {\"GID_0\": \"TJK\", \"COUNTRY_ES\": \"Tayikistán\", \"COUNTRY_FR\": \"Tadjikistan\"},\n", + " {\"GID_0\": \"TZA\", \"COUNTRY_ES\": \"Tanzania\", \"COUNTRY_FR\": \"Tanzanie\"},\n", + " {\"GID_0\": \"THA\", \"COUNTRY_ES\": \"Tailandia\", \"COUNTRY_FR\": \"Thaïlande\"},\n", + " {\"GID_0\": \"TLS\", \"COUNTRY_ES\": \"Timor-Leste\", \"COUNTRY_FR\": \"Timor-Leste\"},\n", + " {\"GID_0\": \"TGO\", \"COUNTRY_ES\": \"Togo\", \"COUNTRY_FR\": \"Togo\"},\n", + " {\"GID_0\": \"TKL\", \"COUNTRY_ES\": \"Tokelau\", \"COUNTRY_FR\": \"Tokelau\"},\n", + " {\"GID_0\": \"TON\", \"COUNTRY_ES\": \"Tonga\", \"COUNTRY_FR\": \"Tonga\"},\n", + " {\"GID_0\": \"TTO\", \"COUNTRY_ES\": \"Trinidad y Tobago\", \"COUNTRY_FR\": \"Trinité-et-Tobago\"},\n", + " {\"GID_0\": \"TUN\", \"COUNTRY_ES\": \"Túnez\", \"COUNTRY_FR\": \"Tunisie\"},\n", + " {\"GID_0\": \"TUR\", \"COUNTRY_ES\": \"Turquía\", \"COUNTRY_FR\": \"Turquie\"},\n", + " {\"GID_0\": \"TKM\", \"COUNTRY_ES\": \"Turkmenistán\", \"COUNTRY_FR\": \"Turkménistan\"},\n", + " {\"GID_0\": \"TCA\", \"COUNTRY_ES\": \"Islas Turcas y Caicos\", \"COUNTRY_FR\": \"Îles Turques-et-Caïques\"},\n", + " {\"GID_0\": \"TUV\", \"COUNTRY_ES\": \"Tuvalu\", \"COUNTRY_FR\": \"Tuvalu\"},\n", + " {\"GID_0\": \"UGA\", \"COUNTRY_ES\": \"Uganda\", \"COUNTRY_FR\": \"Ouganda\"},\n", + " {\"GID_0\": \"UKR\", \"COUNTRY_ES\": \"Ucrania\", \"COUNTRY_FR\": \"Ukraine\"},\n", + " {\"GID_0\": \"ARE\", \"COUNTRY_ES\": \"Emiratos Árabes Unidos\", \"COUNTRY_FR\": \"Émirats Arabes Unis\"},\n", + " {\"GID_0\": \"GBR\", \"COUNTRY_ES\": \"Reino Unido\", \"COUNTRY_FR\": \"Royaume-Uni\"},\n", + " {\"GID_0\": \"USA\", \"COUNTRY_ES\": \"Estados Unidos\", \"COUNTRY_FR\": \"États-Unis\"},\n", + " {\"GID_0\": \"UMI\", \"COUNTRY_ES\": \"Islas Ultramarinas Menores de los Estados Unidos\", \"COUNTRY_FR\": \"Îles mineures éloignées des États-Unis\"},\n", + " {\"GID_0\": \"URY\", \"COUNTRY_ES\": \"Uruguay\", \"COUNTRY_FR\": \"Uruguay\"},\n", + " {\"GID_0\": \"UZB\", \"COUNTRY_ES\": \"Uzbekistán\", \"COUNTRY_FR\": \"Ouzbékistan\"},\n", + " {\"GID_0\": \"VUT\", \"COUNTRY_ES\": \"Vanuatu\", \"COUNTRY_FR\": \"Vanuatu\"},\n", + " {\"GID_0\": \"VAT\", \"COUNTRY_ES\": \"Ciudad del Vaticano\", \"COUNTRY_FR\": \"Cité du Vatican\"},\n", + " {\"GID_0\": \"VEN\", \"COUNTRY_ES\": \"Venezuela\", \"COUNTRY_FR\": \"Venezuela\"},\n", + " {\"GID_0\": \"VNM\", \"COUNTRY_ES\": \"Vietnam\", \"COUNTRY_FR\": \"Vietnam\"},\n", + " {\"GID_0\": \"VIR\", \"COUNTRY_ES\": \"Islas Vírgenes de los Estados Unidos\", \"COUNTRY_FR\": \"Îles Vierges des États-Unis\"},\n", + " {\"GID_0\": \"WLF\", \"COUNTRY_ES\": \"Wallis y Futuna\", \"COUNTRY_FR\": \"Wallis-et-Futuna\"},\n", + " {\"GID_0\": \"ESH\", \"COUNTRY_ES\": \"Sahara Occidental\", \"COUNTRY_FR\": \"Sahara occidental\"},\n", + " {\"GID_0\": \"YEM\", \"COUNTRY_ES\": \"Yemen\", \"COUNTRY_FR\": \"Yémen\"},\n", + " {\"GID_0\": \"ZMB\", \"COUNTRY_ES\": \"Zambia\", \"COUNTRY_FR\": \"Zambie\"},\n", + " {\"GID_0\": \"ZWE\", \"COUNTRY_ES\": \"Zimbabue\", \"COUNTRY_FR\": \"Zimbabwe\"},\n", + " {\"GID_0\": \"ALA\", \"COUNTRY_ES\": \"Islas Åland\", \"COUNTRY_FR\": \"Îles Åland\"}]\n", + "\n", + "df = pd.DataFrame(data)" + ] + }, + { + "cell_type": "code", + "execution_count": 286, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Found GeoPackage: /home/mambauser/data/gadm/raw/temp_preprocess/gadm_410-levels/gadm_410-levels.gpkg\n", + "Selected layer: ADM_0\n" + ] + } + ], + "source": [ + "# Select data adm_0, dissolve and save as shp\n", + "geopackage_file = next(unziped_folder.rglob(\"*.gpkg\"), None)\n", + "\n", + "if geopackage_file:\n", + " print(f\"Found GeoPackage: {geopackage_file}\")\n", + "\n", + " # Specify the layer to read\n", + " layer_name = \"ADM_0\"\n", + " gdf = gpd.read_file(geopackage_file, layer=layer_name)\n", + " print(f\"Selected layer: {layer_name}\") \n", + " \n", + "else:\n", + " print(\"No GeoPackage file found in the unzipped folder.\")" + ] + }, + { + "cell_type": "code", + "execution_count": 287, + "metadata": {}, + "outputs": [], + "source": [ + "dependency_to_parent = {\n", + " \"ABW\": (\"NLD\", \"Netherlands\"),\n", + " \"AIA\": (\"GBR\", \"United Kingdom\"),\n", + " \"ALA\": (\"FIN\", \"Finland\"),\n", + " \"ASM\": (\"USA\", \"United States\"),\n", + " \"ATF\": (\"FRA\", \"France\"),\n", + " \"BES\": (\"NLD\", \"Netherlands\"),\n", + " \"BLM\": (\"FRA\", \"France\"),\n", + " \"BMU\": (\"GBR\", \"United Kingdom\"),\n", + " \"BVT\": (\"NOR\", \"Norway\"),\n", + " \"CCK\": (\"AUS\", \"Australia\"),\n", + " \"COK\": (\"NZL\", \"New Zealand\"),\n", + " \"CUW\": (\"NLD\", \"Netherlands\"),\n", + " \"CXR\": (\"AUS\", \"Australia\"),\n", + " \"CYM\": (\"GBR\", \"United Kingdom\"),\n", + " \"FLK\": (\"GBR\", \"United Kingdom\"),\n", + " \"FRO\": (\"DNK\", \"Denmark\"),\n", + " \"GGY\": (\"GBR\", \"United Kingdom\"),\n", + " \"GLP\": (\"FRA\", \"France\"),\n", + " \"GRL\": (\"DNK\", \"Denmark\"),\n", + " \"GUF\": (\"FRA\", \"France\"),\n", + " \"GUM\": (\"USA\", \"United States\"),\n", + " \"HMD\": (\"AUS\", \"Australia\"),\n", + " \"IMN\": (\"GBR\", \"United Kingdom\"),\n", + " \"IOT\": (\"GBR\", \"United Kingdom\"),\n", + " \"JEY\": (\"GBR\", \"United Kingdom\"),\n", + " \"KGZ\": (\"KGZ\", \"Kyrgyzstan\"),\n", + " \"LAO\": (\"LAO\", \"Laos\"),\n", + " \"LIE\": (\"LIE\", \"Liechtenstein\"),\n", + " \"LSO\": (\"LSO\", \"Lesotho\"),\n", + " \"LUX\": (\"LUX\", \"Luxembourg\"),\n", + " \"MAF\": (\"FRA\", \"France\"),\n", + " \"MDA\": (\"MDA\", \"Moldova\"),\n", + " \"MKD\": (\"MKD\", \"North Macedonia\"),\n", + " \"MLI\": (\"MLI\", \"Mali\"),\n", + " \"MNG\": (\"MNG\", \"Mongolia\"),\n", + " \"MNP\": (\"USA\", \"United States\"),\n", + " \"MSR\": (\"GBR\", \"United Kingdom\"),\n", + " \"MTQ\": (\"FRA\", \"France\"),\n", + " \"MWI\": (\"MWI\", \"Malawi\"),\n", + " \"MYT\": (\"FRA\", \"France\"),\n", + " \"NCL\": (\"FRA\", \"France\"),\n", + " \"NER\": (\"NER\", \"Niger\"),\n", + " \"NFK\": (\"AUS\", \"Australia\"),\n", + " \"NIU\": (\"NZL\", \"New Zealand\"),\n", + " \"NPL\": (\"NPL\", \"Nepal\"),\n", + " \"PCN\": (\"GBR\", \"United Kingdom\"),\n", + " \"PRI\": (\"USA\", \"United States\"),\n", + " \"PRY\": (\"PRY\", \"Paraguay\"),\n", + " \"PYF\": (\"FRA\", \"France\"),\n", + " \"REU\": (\"FRA\", \"France\"),\n", + " \"RWA\": (\"RWA\", \"Rwanda\"),\n", + " \"SGS\": (\"GBR\", \"United Kingdom\"),\n", + " \"SHN\": (\"GBR\", \"United Kingdom\"),\n", + " \"SJM\": (\"NOR\", \"Norway\"),\n", + " \"SMR\": (\"SMR\", \"San Marino\"),\n", + " \"SPM\": (\"FRA\", \"France\"),\n", + " \"SRB\": (\"SRB\", \"Serbia\"),\n", + " \"SSD\": (\"SSD\", \"South Sudan\"),\n", + " \"SVK\": (\"SVK\", \"Slovakia\"),\n", + " \"SWZ\": (\"SWZ\", \"Eswatini\"),\n", + " \"SXM\": (\"NLD\", \"Netherlands\"),\n", + " \"TCA\": (\"GBR\", \"United Kingdom\"),\n", + " \"TCD\": (\"TCD\", \"Chad\"),\n", + " \"TJK\": (\"TJK\", \"Tajikistan\"),\n", + " \"TKL\": (\"NZL\", \"New Zealand\"),\n", + " \"UGA\": (\"UGA\", \"Uganda\"),\n", + " \"UMI\": (\"USA\", \"United States\"),\n", + " \"UZB\": (\"UZB\", \"Uzbekistan\"),\n", + " \"VAT\": (\"VAT\", \"Vatican City\"),\n", + " \"VGB\": (\"GBR\", \"United Kingdom\"),\n", + " \"VIR\": (\"USA\", \"United States\"),\n", + " \"WLF\": (\"FRA\", \"France\"),\n", + " \"ZMB\": (\"ZMB\", \"Zambia\"),\n", + " \"ZWE\": (\"ZWE\", \"Zimbabwe\"),\n", + "}\n", + "\n", + "def update_gid_0_and_country(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:\n", + " \"\"\"\n", + " Updates the GID_0 and COUNTRY values in the GeoDataFrame for dependent territories \n", + " with the GID_0 and COUNTRY of their sovereign parent countries.\n", + "\n", + " Parameters:\n", + " gdf (gpd.GeoDataFrame): The input GeoDataFrame with 'GID_0' and 'COUNTRY' columns.\n", + "\n", + " Returns:\n", + " gpd.GeoDataFrame: The GeoDataFrame with updated 'GID_0' and 'COUNTRY' values for dependent territories.\n", + " \"\"\"\n", + " # Map GID_0 to the updated values\n", + " gdf['GID_0'] = gdf['GID_0'].map(lambda x: dependency_to_parent.get(x, (x, x))[0])\n", + " \n", + " # Update COUNTRY based on the updated GID_0\n", + " gdf['COUNTRY'] = gdf['GID_0'].map(lambda x: {v[0]: v[1] for k, v in dependency_to_parent.items()}.get(x, gdf['COUNTRY'].loc[gdf['GID_0'] == x].values[0]))\n", + "\n", + " return gdf\n", + "\n", + "# Assuming your GeoDataFrame is called `gdf`\n", + "gdf_updated = update_gid_0_and_country(gdf)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Dissolve by country\n", + "gdf_updated = gdf_updated.dissolve(by='COUNTRY').reset_index()\n", + "\n", + "# Calculate area\n", + "gdf_updated = gdf_updated.pipe(calculate_area)" + ] + }, + { + "cell_type": "code", + "execution_count": 309, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
COUNTRYgeometryGID_0area_km2COUNTRY_ESCOUNTRY_FR
0AfghanistanMULTIPOLYGON (((63.61425 29.46993, 63.60868 29...AFG644050.28AfganistánAfghanistan
1Akrotiri and DhekeliaMULTIPOLYGON (((33.00764 34.62583, 33.00708 34...XAD233.64Akrotiri y DhekeliaAkrotiri et Dhekelia
2AlbaniaMULTIPOLYGON (((19.27804 40.50524, 19.28189 40...ALB28689.62AlbaniaAlbanie
3AlgeriaMULTIPOLYGON (((2.84535 36.74691, 2.84597 36.7...DZA2311455.23ArgeliaAlgérie
4AndorraPOLYGON ((1.61725 42.62406, 1.63334 42.62553, ...AND450.35AndorraAndorre
.....................
201VietnamMULTIPOLYGON (((103.46895 9.25602, 103.46736 9...VNM330364.87VietnamVietnam
202Western SaharaMULTIPOLYGON (((-16.83569 22.15403, -16.83597 ...ESH267892.77Sahara OccidentalSahara occidental
203YemenMULTIPOLYGON (((42.1618 15.03042, 42.16236 15....YEM453741.18YemenYémen
204ZambiaPOLYGON ((25.87834 -17.97218, 25.87034 -17.970...ZMB753990.33ZambiaZambie
205ZimbabwePOLYGON ((32.70425 -18.96022, 32.70537 -18.965...ZWE391234.88ZimbabueZimbabwe
\n", + "

206 rows × 6 columns

\n", + "
" + ], + "text/plain": [ + " COUNTRY geometry \\\n", + "0 Afghanistan MULTIPOLYGON (((63.61425 29.46993, 63.60868 29... \n", + "1 Akrotiri and Dhekelia MULTIPOLYGON (((33.00764 34.62583, 33.00708 34... \n", + "2 Albania MULTIPOLYGON (((19.27804 40.50524, 19.28189 40... \n", + "3 Algeria MULTIPOLYGON (((2.84535 36.74691, 2.84597 36.7... \n", + "4 Andorra POLYGON ((1.61725 42.62406, 1.63334 42.62553, ... \n", + ".. ... ... \n", + "201 Vietnam MULTIPOLYGON (((103.46895 9.25602, 103.46736 9... \n", + "202 Western Sahara MULTIPOLYGON (((-16.83569 22.15403, -16.83597 ... \n", + "203 Yemen MULTIPOLYGON (((42.1618 15.03042, 42.16236 15.... \n", + "204 Zambia POLYGON ((25.87834 -17.97218, 25.87034 -17.970... \n", + "205 Zimbabwe POLYGON ((32.70425 -18.96022, 32.70537 -18.965... \n", + "\n", + " GID_0 area_km2 COUNTRY_ES COUNTRY_FR \n", + "0 AFG 644050.28 Afganistán Afghanistan \n", + "1 XAD 233.64 Akrotiri y Dhekelia Akrotiri et Dhekelia \n", + "2 ALB 28689.62 Albania Albanie \n", + "3 DZA 2311455.23 Argelia Algérie \n", + "4 AND 450.35 Andorra Andorre \n", + ".. ... ... ... ... \n", + "201 VNM 330364.87 Vietnam Vietnam \n", + "202 ESH 267892.77 Sahara Occidental Sahara occidental \n", + "203 YEM 453741.18 Yemen Yémen \n", + "204 ZMB 753990.33 Zambia Zambie \n", + "205 ZWE 391234.88 Zimbabue Zimbabwe \n", + "\n", + "[206 rows x 6 columns]" + ] + }, + "execution_count": 309, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Add translations\n", + "gdf_updated = gdf_updated.merge(df, on='GID_0')\n", + "gdf_updated" + ] + }, + { + "cell_type": "code", + "execution_count": 312, + "metadata": {}, + "outputs": [], + "source": [ + "# Save the file\n", + "gdf_updated.to_file(output_file.as_posix(), driver=\"ESRI Shapefile\")" + ] + }, + { + "cell_type": "code", + "execution_count": 313, + "metadata": {}, + "outputs": [], + "source": [ + "# zip data\n", + "make_archive(output_path, zipped_output_file)" + ] + }, + { + "cell_type": "code", + "execution_count": 314, + "metadata": {}, + "outputs": [], + "source": [ + "# load zipped file to GCS\n", + "writeReadGCP(\n", + " credentials=mysettings.GCS_KEYFILE_JSON,\n", + " bucket_name=mysettings.GCS_BUCKET,\n", + " blob_name=remote_path,\n", + " file=zipped_output_file,\n", + " operation=\"w\",\n", + ")" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -871,7 +1631,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -887,7 +1647,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -917,7 +1677,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -934,7 +1694,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -983,7 +1743,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -996,6 +1756,26 @@ ")" ] }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "PosixPath('/home/mambauser/data/mpa-terrestrial/processed/preprocess/mpa-terrestrial_preprocess.gpkg')" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "output_file" + ] + }, { "cell_type": "code", "execution_count": 12, diff --git a/data/notebooks/pipes_mock/tiles.ipynb b/data/notebooks/pipes_mock/tiles.ipynb index 328d9f0d..5f17b8d8 100644 --- a/data/notebooks/pipes_mock/tiles.ipynb +++ b/data/notebooks/pipes_mock/tiles.ipynb @@ -2,9 +2,18 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 9, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The autoreload extension is already loaded. To reload it, use:\n", + " %reload_ext autoreload\n" + ] + } + ], "source": [ "%load_ext autoreload\n", "%autoreload 2" @@ -130,9 +139,21 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 21, "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'eez_dir' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[21], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m collection_name \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mregions\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;66;03m# load the EEZ file & the regions file\u001b[39;00m\n\u001b[0;32m----> 4\u001b[0m eez_data \u001b[38;5;241m=\u001b[39m gpd\u001b[38;5;241m.\u001b[39mread_file(\u001b[43meez_dir\u001b[49m\u001b[38;5;241m.\u001b[39mget_step_fmt_file_path(prev_step, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mshp\u001b[39m\u001b[38;5;124m\"\u001b[39m)\u001b[38;5;241m.\u001b[39mas_posix())\n\u001b[1;32m 5\u001b[0m regions_df \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mDataFrame(\n\u001b[1;32m 6\u001b[0m [\n\u001b[1;32m 7\u001b[0m {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mregion_id\u001b[39m\u001b[38;5;124m\"\u001b[39m: data[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mregion_iso\u001b[39m\u001b[38;5;124m\"\u001b[39m], \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlocation_id\u001b[39m\u001b[38;5;124m\"\u001b[39m: iso}\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 10\u001b[0m ]\n\u001b[1;32m 11\u001b[0m )\n\u001b[1;32m 13\u001b[0m \u001b[38;5;66;03m# merge the two files\u001b[39;00m\n", + "\u001b[0;31mNameError\u001b[0m: name 'eez_dir' is not defined" + ] + } + ], "source": [ "collection_name = \"regions\"\n", "\n", @@ -222,6 +243,116 @@ ")" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Countries - gadm" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/home/mambauser/data/gadm/processed/gadm_preprocess.zip\n", + "/home/mambauser/data/gadm/processed/preprocess\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Allocating 64 GB of heap memory\n", + "[o] Wrote /home/mambauser/data/gadm/processed/tiles/gadm_tiles.json\n", + "For layer 0, using name \"gadm_tiles\"\n", + "/home/mambauser/data/gadm/processed/tiles/gadm_tiles.json:12: Found ] at top level\n", + "/home/mambauser/data/gadm/processed/tiles/gadm_tiles.json:10: Reached EOF without all containers being closed\n", + "In JSON object {\"type\":\"FeatureCollection\",\"features\":[]}\n", + "206 features, 181286774 bytes of geometry, 3347 bytes of separate metadata, 9901 bytes of string pool\n", + "Choosing a maxzoom of -z0 for features about 2772178 feet (844960 meters) apart\n", + "Choosing a maxzoom of -z10 for resolution of about 317 feet (96 meters) within features\n", + " 99.9% 10/128/306 \n" + ] + }, + { + "data": { + "text/plain": [ + "PosixPath('/home/mambauser/data/gadm/processed/tiles/gadm_simplified.mbtiles')" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pipe = \"gadm\"\n", + "collection_name = f\"{pipe}_simplified\"\n", + "\n", + "gadm_dir = FileConventionHandler(pipe)\n", + "\n", + "# Download the EEZ file && unzip it\n", + "download_and_unzip_if_needed(gadm_dir, prev_step, mysettings)\n", + "\n", + "# simplify the geometries\n", + "Mapshaper(64).input([gadm_dir.get_step_fmt_file_path(prev_step, \"shp\").as_posix()]).filter_fields(\n", + " fields=\",\".join([\"GID_0\", \"COUNTRY\", \"COUNTRY_ES\", \"COUNTRY_FR\", 'area_km2'])\n", + ").output(\n", + " gadm_dir.get_step_fmt_file_path(current_step, \"json\").as_posix(), force=True, format=\"geojson\"\n", + ").execute()\n", + "\n", + "# Generate the mbtiles\n", + "mbtileGeneration(\n", + " gadm_dir.get_step_fmt_file_path(current_step, \"json\"),\n", + " gadm_dir.get_processed_step_path(current_step).joinpath(f\"{collection_name}.mbtiles\"),\n", + " True\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "upload: ../../data/gadm/processed/tiles/gadm_simplified.mbtiles to s3://tilestream-tilesets-production/69/_pending/6o7ulrkerlm1xi20i3scyy0mc/skytruth\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Linking tileset to Mapbox: 100%|██████████| 100/100 [03:43<00:00, 2.23s/it]\n" + ] + }, + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "uploadToMapbox(\n", + " gadm_dir.get_processed_step_path(current_step).joinpath(f\"{collection_name}.mbtiles\"),\n", + " collection_name,\n", + " mysettings.MAPBOX_USER,\n", + " mysettings.MAPBOX_TOKEN,\n", + ")" + ] + }, { "cell_type": "markdown", "metadata": { @@ -336,23 +467,20 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 30, "metadata": {}, "outputs": [], "source": [ - "\n", "def split_n_parts(gdf: gpd.GeoDataFrame, folder: Path, n:int) -> None:\n", " \n", " for i in range(n):\n", " path = folder.joinpath(f\"part{i}.shp\")\n", - " gdf.iloc[i * len(gdf) // n : (i + 1) * len(gdf) // n].to_file(path, driver=\"ESRI Shapefile\")\n", - "\n", - "\n" + " gdf.iloc[i * len(gdf) // n : (i + 1) * len(gdf) // n].to_file(path, driver=\"ESRI Shapefile\")" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 31, "metadata": {}, "outputs": [], "source": [ @@ -384,7 +512,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 32, "metadata": {}, "outputs": [], "source": [ @@ -396,7 +524,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 33, "metadata": {}, "outputs": [ { @@ -409,428 +537,23 @@ } ], "source": [ - "# Download the EEZ file && unzip it\n", + "# Download the file && unzip it\n", "download_and_unzip_if_needed(source_dir, prev_step, mysettings, \"gpkg\")\n", + "\n", "# split the file in two parts\n", "file = gpd.read_file(source_dir.get_step_fmt_file_path(prev_step, \"gpkg\").as_posix())" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 34, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - " 3%|▎ | 7482/292261 [00:13<00:40, 7027.40it/s] " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 4%|▍ | 12182/292261 [00:14<00:57, 4912.45it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 9%|▉ | 25865/292261 [00:17<03:58, 1115.08it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 9%|▉ | 27219/292261 [00:18<10:50, 407.49it/s] " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 18%|█▊ | 53181/292261 [00:26<04:40, 852.40it/s] " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "'Polygon' object has no attribute 'geoms'\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 27%|██▋ | 77894/292261 [00:33<01:35, 2247.11it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 28%|██▊ | 80952/292261 [00:34<01:00, 3471.37it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 28%|██▊ | 81812/292261 [00:35<04:25, 793.04it/s] " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 28%|██▊ | 82393/292261 [00:35<04:41, 744.80it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 29%|██▉ | 84436/292261 [00:36<03:49, 904.50it/s] " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 33%|███▎ | 96159/292261 [00:56<06:27, 506.25it/s] " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 34%|███▍ | 98927/292261 [00:57<01:41, 1907.31it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 34%|███▍ | 100063/292261 [01:01<16:25, 194.94it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 35%|███▌ | 102491/292261 [01:06<09:26, 335.12it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "'Polygon' object has no attribute 'geoms'\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 35%|███▌ | 102932/292261 [01:08<07:54, 398.64it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 37%|███▋ | 107337/292261 [01:15<07:45, 397.14it/s] " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 54%|█████▍ | 158758/292261 [01:42<03:06, 716.16it/s] " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 56%|█████▌ | 164344/292261 [01:44<01:39, 1289.68it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 66%|██████▋ | 193651/292261 [01:53<01:19, 1237.86it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 68%|██████▊ | 199046/292261 [01:54<01:19, 1171.99it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 70%|██████▉ | 203524/292261 [01:56<03:13, 457.51it/s] " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 74%|███████▍ | 217703/292261 [02:01<01:14, 1003.62it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 80%|███████▉ | 232945/292261 [02:10<02:24, 410.84it/s] " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "'Polygon' object has no attribute 'geoms'\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 81%|████████ | 235500/292261 [02:14<04:28, 211.70it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 84%|████████▍ | 246516/292261 [02:19<00:28, 1606.01it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 87%|████████▋ | 255262/292261 [02:22<00:26, 1403.03it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "'Polygon' object has no attribute 'geoms'\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 88%|████████▊ | 257138/292261 [02:22<00:16, 2145.00it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "'Polygon' object has no attribute 'geoms'\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 89%|████████▉ | 261390/292261 [02:25<01:42, 301.68it/s] " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 91%|█████████▏| 267318/292261 [02:28<00:18, 1335.53it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 292261/292261 [03:01<00:00, 1611.07it/s]\n" + "100%|██████████| 292261/292261 [03:19<00:00, 1461.41it/s]\n" ] } ], @@ -840,11 +563,31 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 46, "metadata": {}, "outputs": [], "source": [ - "test.to_file(source_dir.get_step_fmt_file_path(current_step, \"shp\").as_posix(), driver=\"ESRI Shapefile\")" + "test.to_file(source_dir.get_step_fmt_file_path(prev_step, \"shp\").as_posix(), driver=\"ESRI Shapefile\")" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/home/mambauser/data/mpa-terrestrial/processed/preprocess/mpa-terrestrial_preprocess.shp'" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "source_dir.get_step_fmt_file_path(prev_step, \"shp\").as_posix()" ] }, { @@ -1881,6 +1624,18 @@ "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" } }, "nbformat": 4, diff --git a/data/src/data_commons/data/iso_map.json b/data/src/data_commons/data/iso_map.json index 258af92f..37eb3868 100644 --- a/data/src/data_commons/data/iso_map.json +++ b/data/src/data_commons/data/iso_map.json @@ -166,5 +166,46 @@ "SA": "Latin America & Caribbean", "AT": "Antarctica", "PO": "Polar", - "WA": "West Asia" + "WA": "West Asia", + "AFG": "Afghanistan", + "AND": "Andorra", + "ARM": "Armenia", + "AUT": "Austria", + "BDI": "Burundi", + "BFA": "Burkina Faso", + "BLR": "Belarus", + "BOL": "Bolivia", + "BTN": "Bhutan", + "BWA": "Botswana", + "CAF": "Central African Republic", + "CHE": "Switzerland", + "CZE": "Czechia", + "ETH": "Ethiopia", + "HUN": "Hungary", + "KGZ": "Kyrgyzstan", + "LAO": "Laos", + "LIE": "Liechtenstein", + "LSO": "Lesotho", + "LUX": "Luxembourg", + "MDA": "Moldova", + "MKD": "North Macedonia", + "MLI": "Mali", + "MNG": "Mongolia", + "MWI": "Malawi", + "NER": "Niger", + "NPL": "Nepal", + "PRY": "Paraguay", + "RWA": "Rwanda", + "SMR": "San Marino", + "SRB": "Serbia", + "SSD": "South Sudan", + "SVK": "Slovakia", + "SWZ": "Eswatini", + "TCD": "Chad", + "TJK": "Tajikistan", + "UGA": "Uganda", + "UZB": "Uzbekistan", + "VAT": "Vatican City", + "ZMB": "Zambia", + "ZWE": "Zimbabwe" } From d5bdc342c269f75dc46b74c15a2ab71820e15c61 Mon Sep 17 00:00:00 2001 From: sofia Date: Thu, 12 Sep 2024 09:53:05 +0200 Subject: [PATCH 04/16] add coverage stats for terrestrial --- data/notebooks/pipes_mock/precalc.ipynb | 386 ++++++++++++++++++++++-- 1 file changed, 363 insertions(+), 23 deletions(-) diff --git a/data/notebooks/pipes_mock/precalc.ipynb b/data/notebooks/pipes_mock/precalc.ipynb index 14ba8db8..ce88b5a2 100644 --- a/data/notebooks/pipes_mock/precalc.ipynb +++ b/data/notebooks/pipes_mock/precalc.ipynb @@ -12,7 +12,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -82,7 +82,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -93,20 +93,9 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 6, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# Strapi setup\n", "strapi = Strapi(url=mysettings.STRAPI_URL)\n", @@ -115,7 +104,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -132,16 +121,133 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "metadata": {}, "outputs": [ { - "name": "stderr", + "name": "stdout", "output_type": "stream", "text": [ - "DEBUG:google.auth.transport.requests:Making request: POST https://oauth2.googleapis.com/token\n" + "/home/mambauser/data/eez/processed/eez_preprocess.zip\n", + "/home/mambauser/data/eez/processed/preprocess\n" ] }, + { + "data": { + "text/plain": [ + "PosixPath('/home/mambauser/data/eez/processed/preprocess')" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pipe = \"eez\"\n", + "strapi_collection = \"\"\n", + "pipe_dir = FileConventionHandler(pipe)\n", + "\n", + "output_file = pipe_dir.get_processed_step_path(current_step).joinpath(\"locations.json\")\n", + "\n", + "# Download the EEZ file && unzip it\n", + "download_and_unzip_if_needed(pipe_dir, prev_step, mysettings)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "locations = (\n", + " gpd.read_file(pipe_dir.get_step_fmt_file_path(prev_step, \"shp\"))\n", + " .pipe(add_envelope)\n", + " .pipe(add_location_iso)\n", + " .pipe(expand_multiple_locations)\n", + " .pipe(add_region_iso,'iso')\n", + " .pipe(calculate_eez_area)\n", + " .pipe(add_bbox)\n", + " .pipe(add_groups_and_members)\n", + " .pipe(add_location_name)\n", + " .rename(\n", + " columns={\n", + " \"iso\": \"code\",\n", + " \"AREA_KM2\": \"totalMarineArea\",\n", + " \"location_type\": \"type\",\n", + " }\n", + " )\n", + ")\n", + "\n", + "locations.drop(\n", + " columns=list(\n", + " set(locations.columns) - \n", + " set([\"code\", \"name\", \"totalMarineArea\", \"type\", \"groups\", \"bounds\", \"id\"])),\n", + " inplace=True,\n", + ")\n", + "\n", + "output_locations = {\n", + " \"version\": 2,\n", + " \"data\": {\n", + " \"api::location.location\": LocationSchema(pd.DataFrame(locations)).to_dict(\n", + " orient=\"index\"\n", + " )\n", + " },\n", + "}\n", + "with open(output_file, \"w\") as f:\n", + " json.dump(output_locations, f)\n", + "\n", + "del output_locations" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "##\n", + "(locations[['id', 'code']]\n", + " .to_csv(pipe_dir.get_processed_step_path(current_step)\n", + " .joinpath('locations_code.csv'), index=False))\n", + "\n", + "del locations" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "strapi.deleteCollectionData(strapi_collection, list(range(1, 300)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "strapi.importCollectionData(\n", + " strapi_collection,\n", + " output_file,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Locations \n", + "### eez + regions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ { "name": "stdout", "output_type": "stream", @@ -156,9 +262,8 @@ "PosixPath('/home/mambauser/data/eez/processed/preprocess')" ] }, - "execution_count": 7, "metadata": {}, - "output_type": "execute_result" + "output_type": "display_data" } ], "source": [ @@ -174,7 +279,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -220,7 +325,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -1519,6 +1624,241 @@ " mpa_folder.joinpath(f\"mpa_detail_{i}.csv\"),\n", " )" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Coverage stats - Tpas" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We are going to use the intermediate data from eez, in order to create a dataset that can be used as a land mask.\n", + "The steps are:\n", + "1. Load eez\n", + "2. Spatial inner Join the eez dataset with the Mpas one\n", + "3. Assign the location iso\n", + "4. dissolve by location iso and cummulative year\n", + "5. calculate the area for global regions and eez countries\n", + "6. prepare the data to be ingested in strapi\n", + "7. upload the data to strapi" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/home/mambauser/data/gadm/processed/gadm_preprocess.zip\n", + "/home/mambauser/data/gadm/processed/preprocess\n", + "/home/mambauser/data/mpa-terrestrial/processed/mpa-terrestrial_preprocess.zip\n", + "/home/mambauser/data/mpa-terrestrial/processed/preprocess\n" + ] + } + ], + "source": [ + "pipe = \"mpa-terrestrial\"\n", + "strapi_collection = \"\"\n", + "\n", + "pipe_dir_gadm = FileConventionHandler(\"gadm\")\n", + "pipe_dir_pas = FileConventionHandler(pipe)\n", + "output_file = pipe_dir_pas.get_processed_step_path(current_step).joinpath(\n", + " \"pa_landmask_strapi.csv\"\n", + ")\n", + "\n", + "# Download the EEZ file && unzip it\n", + "download_and_unzip_if_needed(pipe_dir_gadm, prev_step, mysettings)\n", + "# Download the mpas file && unzip it\n", + "download_and_unzip_if_needed(pipe_dir_pas, prev_step, mysettings)\n", + "\n", + "# Load the data\n", + "gadm = gpd.read_file(pipe_dir_gadm.get_step_fmt_file_path(prev_step, \"shp\")).pipe(clean_geometries)\n", + "pas = gpd.read_file(pipe_dir_pas.get_step_fmt_file_path(prev_step, \"shp\")).pipe(clean_geometries)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 27%|██▋ | 69/254 [00:04<00:14, 12.62it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 77%|███████▋ | 195/254 [01:11<01:13, 1.24s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|█████████▉| 253/254 [3:08:04<32:24, 1944.09s/it]" + ] + } + ], + "source": [ + "gadm_pas_data_join = await spatial_join(gadm, pas.pipe(filter_by_exluding_propossed_mpas))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:pyogrio._io:Created 17,697 records\n" + ] + } + ], + "source": [ + "# To get an idea of the spatial join results\n", + "eez_mpas_data_join.pipe(add_location_iso).pipe(assign_iso3).to_file(\n", + " pipe_dir_mpas.get_processed_step_path(current_step).joinpath(\"mpas_sjoin.shp\"), driver=\"ESRI Shapefile\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 14/14 [03:59<00:00, 17.14s/it]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|█████████▉| 281/282 [00:19<00:02, 2.21s/it]" + ] + } + ], + "source": [ + "final_data = await process_mpa_data(\n", + " eez_mpas_data_join.pipe(add_location_iso).pipe(assign_iso3),\n", + " range(2011, time.localtime().tm_year + 1),\n", + " [\"PA_DEF\", \"iso_3\"],\n", + " {\"protectedAreasCount\": \"sum\"},\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "coverage = (\n", + " final_data.pipe(calculate_global_area, [\"year\", \"PA_DEF\"], {\"area\": \"sum\"}, \"iso_3\")\n", + " .pipe(separate_parent_iso, \"iso_3\")\n", + " .pipe(add_region_iso, \"iso_3\")\n", + " .replace(\n", + " {\n", + " \"iso_3\": {\n", + " \"ATA\": \"ABNJ\",\n", + " \"COK\": \"NZL\",\n", + " \"IOT\": \"GBR\",\n", + " \"NIU\": \"NZL\",\n", + " \"SHN\": \"GBR\",\n", + " \"SJM\": \"NOR\",\n", + " \"UMI\": \"USA\",\n", + " \"NCL\": \"FRA\",\n", + " \"GIB\": \"GBR\",\n", + " }\n", + " }\n", + " )\n", + " .pipe(calculate_stats_cov, [\"year\", \"PA_DEF\"], \"iso_3\")\n", + " .pipe(coverage_stats)\n", + ")\n", + "\n", + "ProtectedAreaExtentSchema(\n", + " coverage.pipe(\n", + " output,\n", + " \"iso_3\",\n", + " {\"PA_DEF\": {\"0\": 2, \"1\": 1}},\n", + " {\"PARENT_NAME\": \"location\", \"PA_DEF\": \"protection_status\"},\n", + " [\"area\", \"iso_3\"],\n", + " )\n", + ").to_csv(\n", + " output_file,\n", + " index=True,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "strapi_collection = \"protection-coverage-stat\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "strapi.deleteCollectionData(strapi_collection, list(range(1, 2300)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "strapi.importCollectionData(\n", + " strapi_collection,\n", + " output_file,\n", + ")" + ] } ], "metadata": { From 5adffd60522577ee6e12d1be8582e773978786c0 Mon Sep 17 00:00:00 2001 From: Alicia Date: Fri, 13 Sep 2024 22:17:36 +0200 Subject: [PATCH 05/16] added processing improvements to terrestrial country summary coverage stats --- data/notebooks/pipes_mock/intermediate.ipynb | 23 +- data/notebooks/pipes_mock/precalc.ipynb | 566 ++++++++++++++----- data/notebooks/pipes_mock/tiles.ipynb | 462 +++++++++++++-- data/notebooks/test_bygrid.ipynb | 529 +++++++++++++++++ data/src/pipelines/processors.py | 50 +- 5 files changed, 1426 insertions(+), 204 deletions(-) create mode 100644 data/notebooks/test_bygrid.ipynb diff --git a/data/notebooks/pipes_mock/intermediate.ipynb b/data/notebooks/pipes_mock/intermediate.ipynb index 00713648..5632d575 100644 --- a/data/notebooks/pipes_mock/intermediate.ipynb +++ b/data/notebooks/pipes_mock/intermediate.ipynb @@ -63,6 +63,7 @@ " filter_by_terrestrial,\n", " transform_points,\n", " clean_geometries,\n", + " simplify_async,\n", ")" ] }, @@ -1031,6 +1032,15 @@ "gdf_updated" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "final_gadm = await simplify_async(gdf_updated)" + ] + }, { "cell_type": "code", "execution_count": 312, @@ -1038,7 +1048,7 @@ "outputs": [], "source": [ "# Save the file\n", - "gdf_updated.to_file(output_file.as_posix(), driver=\"ESRI Shapefile\")" + "final_gadm.to_file(output_file.as_posix(), driver=\"ESRI Shapefile\")" ] }, { @@ -1741,6 +1751,15 @@ "gdf[\"WDPAID\"] = pd.to_numeric(gdf[\"WDPAID\"], downcast=\"integer\")" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "final_wdpa_terrestrial = await simplify_async(gdf)" + ] + }, { "cell_type": "code", "execution_count": 11, @@ -1748,7 +1767,7 @@ "outputs": [], "source": [ "# save data & zip it\n", - "gdf.to_file(\n", + "final_wdpa_terrestrial.to_file(\n", " filename=output_file,\n", " driver=\"GPKG\",\n", " layer=\"name\",\n", diff --git a/data/notebooks/pipes_mock/precalc.ipynb b/data/notebooks/pipes_mock/precalc.ipynb index ce88b5a2..d1fbef2e 100644 --- a/data/notebooks/pipes_mock/precalc.ipynb +++ b/data/notebooks/pipes_mock/precalc.ipynb @@ -12,7 +12,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ @@ -72,6 +72,8 @@ " add_child_parent_relationship,\n", " columns_to_lower,\n", " extract_wdpaid_mpaatlas,\n", + " simplify_async,\n", + " process_tpa_data,\n", ")\n", "\n", "logging.basicConfig(level=logging.DEBUG)\n", @@ -82,7 +84,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -104,7 +106,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -235,129 +237,6 @@ ")" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Locations \n", - "### eez + regions" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/home/mambauser/data/eez/processed/eez_preprocess.zip\n", - "/home/mambauser/data/eez/processed/preprocess\n" - ] - }, - { - "data": { - "text/plain": [ - "PosixPath('/home/mambauser/data/eez/processed/preprocess')" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "pipe = \"eez\"\n", - "strapi_collection = \"\"\n", - "pipe_dir = FileConventionHandler(pipe)\n", - "\n", - "output_file = pipe_dir.get_processed_step_path(current_step).joinpath(\"locations.json\")\n", - "\n", - "# Download the EEZ file && unzip it\n", - "download_and_unzip_if_needed(pipe_dir, prev_step, mysettings)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "locations = (\n", - " gpd.read_file(pipe_dir.get_step_fmt_file_path(prev_step, \"shp\"))\n", - " .pipe(add_envelope)\n", - " .pipe(add_location_iso)\n", - " .pipe(expand_multiple_locations)\n", - " .pipe(add_region_iso,'iso')\n", - " .pipe(calculate_eez_area)\n", - " .pipe(add_bbox)\n", - " .pipe(add_groups_and_members)\n", - " .pipe(add_location_name)\n", - " .rename(\n", - " columns={\n", - " \"iso\": \"code\",\n", - " \"AREA_KM2\": \"totalMarineArea\",\n", - " \"location_type\": \"type\",\n", - " }\n", - " )\n", - ")\n", - "\n", - "locations.drop(\n", - " columns=list(\n", - " set(locations.columns) - \n", - " set([\"code\", \"name\", \"totalMarineArea\", \"type\", \"groups\", \"bounds\", \"id\"])),\n", - " inplace=True,\n", - ")\n", - "\n", - "output_locations = {\n", - " \"version\": 2,\n", - " \"data\": {\n", - " \"api::location.location\": LocationSchema(pd.DataFrame(locations)).to_dict(\n", - " orient=\"index\"\n", - " )\n", - " },\n", - "}\n", - "with open(output_file, \"w\") as f:\n", - " json.dump(output_locations, f)\n", - "\n", - "del output_locations" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "##\n", - "(locations[['id', 'code']]\n", - " .to_csv(pipe_dir.get_processed_step_path(current_step)\n", - " .joinpath('locations_code.csv'), index=False))\n", - "\n", - "del locations" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "strapi.deleteCollectionData(strapi_collection, list(range(1, 300)))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "strapi.importCollectionData(\n", - " strapi_collection,\n", - " output_file,\n", - ")" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -1649,7 +1528,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -1685,14 +1564,52 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 206/206 [02:20<00:00, 1.47it/s]\n" + ] + } + ], + "source": [ + "gadm_simp = await simplify_async(gadm)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - " 27%|██▋ | 69/254 [00:04<00:14, 12.62it/s]" + "100%|██████████| 206/206 [7:47:52<00:00, 136.27s/it] \n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 16%|█▌ | 32/206 [00:04<00:32, 5.29it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 34%|███▍ | 71/206 [00:18<01:00, 2.24it/s]" ] }, { @@ -1706,7 +1623,7 @@ "name": "stderr", "output_type": "stream", "text": [ - " 77%|███████▋ | 195/254 [01:11<01:13, 1.24s/it]" + " 73%|███████▎ | 150/206 [03:14<07:39, 8.21s/it]" ] }, { @@ -1720,57 +1637,412 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|█████████▉| 253/254 [3:08:04<32:24, 1944.09s/it]" + "100%|██████████| 206/206 [7:47:52<00:00, 5051.81s/it] " ] } ], "source": [ - "gadm_pas_data_join = await spatial_join(gadm, pas.pipe(filter_by_exluding_propossed_mpas))" + "gadm_pas_data_join = await spatial_join(gadm_simp, pas.pipe(filter_by_exluding_propossed_mpas))" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
WDPAIDWDPA_PIDPA_DEFNAMEDESIG_ENGIUCN_CATMARINEGIS_AREASTATUSSTATUS_YRPARENT_ISOgeometryindex_rightCOUNTRYGID_0area_km2COUNTRY_ESCOUNTRY_FR
017131.0171311HamounProtected AreaV03022.952813Designated1968.0IRNPOLYGON ((61.19578 31.44834, 61.36976 31.42576...0.0AfghanistanAFG644050.28AfganistánAfghanistan
117160.0171601Hamun-e-Puzak, south endRamsar Site, Wetland of International ImportanceNot Reported0172.551965Designated1975.0IRNPOLYGON ((61.75037 31.33179, 61.72363 31.35944...0.0AfghanistanAFG644050.28AfganistánAfghanistan
21118.011181Dasht-i-NawarWaterfowl SanctuaryIV0375.359815Designated2020.0AFGPOLYGON ((67.76059 33.78497, 67.75989 33.78315...0.0AfghanistanAFG644050.28AfganistánAfghanistan
315133.0151331Kol-i-Hashmat KhanWaterfowl SanctuaryIV01.665554Designated2017.0AFGMULTIPOLYGON (((69.20214 34.49681, 69.2028 34....0.0AfghanistanAFG644050.28AfganistánAfghanistan
4555705308.05557053081Koh-e Baba (Shah Foladi)Protected LandscapeV0341.997539Designated2019.0AFGPOLYGON ((67.99935 34.66263, 67.9932 34.66009,...0.0AfghanistanAFG644050.28AfganistánAfghanistan
.........................................................
2893472531.025311Mana PoolsNational ParkII02134.271397Designated1975.0ZWEPOLYGON ((29.56479 -15.6758, 29.56611 -15.6757...0.0ZimbabweZWE391234.88ZimbabueZimbabwe
2893482526.025261SapiSafari AreaVI01200.644367Designated1975.0ZWEPOLYGON ((29.88011 -15.67272, 29.87637 -15.679...0.0ZimbabweZWE391234.88ZimbabueZimbabwe
28934962095.0620951ChiawaGame Management AreaVI02413.162703Designated1989.0ZMBMULTIPOLYGON (((29.10233 -15.86402, 29.09482 -...0.0ZimbabweZWE391234.88ZimbabueZimbabwe
2893507962.079621Lower ZambeziNational ParkII04161.873753Designated1983.0ZMBMULTIPOLYGON (((30.2016 -15.65147, 30.20137 -1...0.0ZimbabweZWE391234.88ZimbabueZimbabwe
289351303859.03038591RufunsaGame Management AreaVI03282.220906Designated1980.0ZMBPOLYGON ((30.41517 -15.61634, 30.41688 -15.616...0.0ZimbabweZWE391234.88ZimbabueZimbabwe
\n", + "

289352 rows × 18 columns

\n", + "
" + ], + "text/plain": [ + " WDPAID WDPA_PID PA_DEF NAME \\\n", + "0 17131.0 17131 1 Hamoun \n", + "1 17160.0 17160 1 Hamun-e-Puzak, south end \n", + "2 1118.0 1118 1 Dasht-i-Nawar \n", + "3 15133.0 15133 1 Kol-i-Hashmat Khan \n", + "4 555705308.0 555705308 1 Koh-e Baba (Shah Foladi) \n", + "... ... ... ... ... \n", + "289347 2531.0 2531 1 Mana Pools \n", + "289348 2526.0 2526 1 Sapi \n", + "289349 62095.0 62095 1 Chiawa \n", + "289350 7962.0 7962 1 Lower Zambezi \n", + "289351 303859.0 303859 1 Rufunsa \n", + "\n", + " DESIG_ENG IUCN_CAT MARINE \\\n", + "0 Protected Area V 0 \n", + "1 Ramsar Site, Wetland of International Importance Not Reported 0 \n", + "2 Waterfowl Sanctuary IV 0 \n", + "3 Waterfowl Sanctuary IV 0 \n", + "4 Protected Landscape V 0 \n", + "... ... ... ... \n", + "289347 National Park II 0 \n", + "289348 Safari Area VI 0 \n", + "289349 Game Management Area VI 0 \n", + "289350 National Park II 0 \n", + "289351 Game Management Area VI 0 \n", + "\n", + " GIS_AREA STATUS STATUS_YR PARENT_ISO \\\n", + "0 3022.952813 Designated 1968.0 IRN \n", + "1 172.551965 Designated 1975.0 IRN \n", + "2 375.359815 Designated 2020.0 AFG \n", + "3 1.665554 Designated 2017.0 AFG \n", + "4 341.997539 Designated 2019.0 AFG \n", + "... ... ... ... ... \n", + "289347 2134.271397 Designated 1975.0 ZWE \n", + "289348 1200.644367 Designated 1975.0 ZWE \n", + "289349 2413.162703 Designated 1989.0 ZMB \n", + "289350 4161.873753 Designated 1983.0 ZMB \n", + "289351 3282.220906 Designated 1980.0 ZMB \n", + "\n", + " geometry index_right \\\n", + "0 POLYGON ((61.19578 31.44834, 61.36976 31.42576... 0.0 \n", + "1 POLYGON ((61.75037 31.33179, 61.72363 31.35944... 0.0 \n", + "2 POLYGON ((67.76059 33.78497, 67.75989 33.78315... 0.0 \n", + "3 MULTIPOLYGON (((69.20214 34.49681, 69.2028 34.... 0.0 \n", + "4 POLYGON ((67.99935 34.66263, 67.9932 34.66009,... 0.0 \n", + "... ... ... \n", + "289347 POLYGON ((29.56479 -15.6758, 29.56611 -15.6757... 0.0 \n", + "289348 POLYGON ((29.88011 -15.67272, 29.87637 -15.679... 0.0 \n", + "289349 MULTIPOLYGON (((29.10233 -15.86402, 29.09482 -... 0.0 \n", + "289350 MULTIPOLYGON (((30.2016 -15.65147, 30.20137 -1... 0.0 \n", + "289351 POLYGON ((30.41517 -15.61634, 30.41688 -15.616... 0.0 \n", + "\n", + " COUNTRY GID_0 area_km2 COUNTRY_ES COUNTRY_FR \n", + "0 Afghanistan AFG 644050.28 Afganistán Afghanistan \n", + "1 Afghanistan AFG 644050.28 Afganistán Afghanistan \n", + "2 Afghanistan AFG 644050.28 Afganistán Afghanistan \n", + "3 Afghanistan AFG 644050.28 Afganistán Afghanistan \n", + "4 Afghanistan AFG 644050.28 Afganistán Afghanistan \n", + "... ... ... ... ... ... \n", + "289347 Zimbabwe ZWE 391234.88 Zimbabue Zimbabwe \n", + "289348 Zimbabwe ZWE 391234.88 Zimbabue Zimbabwe \n", + "289349 Zimbabwe ZWE 391234.88 Zimbabue Zimbabwe \n", + "289350 Zimbabwe ZWE 391234.88 Zimbabue Zimbabwe \n", + "289351 Zimbabwe ZWE 391234.88 Zimbabue Zimbabwe \n", + "\n", + "[289352 rows x 18 columns]" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gadm_pas_data_join" + ] + }, + { + "cell_type": "code", + "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "INFO:pyogrio._io:Created 17,697 records\n" + "INFO:pyogrio._io:Created 289,352 records\n" ] } ], "source": [ "# To get an idea of the spatial join results\n", - "eez_mpas_data_join.pipe(add_location_iso).pipe(assign_iso3).to_file(\n", - " pipe_dir_mpas.get_processed_step_path(current_step).joinpath(\"mpas_sjoin.shp\"), driver=\"ESRI Shapefile\"\n", + "gadm_pas_data_join.pipe(assign_iso3, False).to_file(\n", + " pipe_dir_pas.get_processed_step_path(current_step).joinpath(\"mpas_sjoin.shp\"),\n", + " driver=\"ESRI Shapefile\",\n", ")" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 14/14 [03:59<00:00, 17.14s/it]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|█████████▉| 281/282 [00:19<00:02, 2.21s/it]" + " 0%| | 0/14 [00:00 gpd.GeoDataFrame:\n", - " try:\n", - " return repair_geometry(geometry.simplify(tlrc))\n", - " except Exception as e:\n", - " print(e)\n", - " return geometry\n", - " finally:\n", - " pbar.update(1)\n", - "\n", - "\n", - "async def simplify_async(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:\n", - " with tqdm(total=gdf.shape[0]) as pbar:\n", - " gdf[\"geometry\"] = await asyncio.gather(\n", - " *(simplify(val, pbar) for val in gdf[\"geometry\"])\n", - " )\n", - " return gdf" - ] - }, - { - "cell_type": "code", - "execution_count": 32, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -524,7 +487,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -546,14 +509,407 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 292261/292261 [03:19<00:00, 1461.41it/s]\n" + " 2%|▏ | 6511/292261 [00:12<00:35, 8148.33it/s] " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 4%|▍ | 12019/292261 [00:13<01:04, 4363.45it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 9%|▉ | 26702/292261 [00:17<01:37, 2722.22it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 9%|▉ | 27206/292261 [00:17<04:30, 980.59it/s] " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 18%|█▊ | 53263/292261 [00:25<06:31, 609.87it/s] " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "'Polygon' object has no attribute 'geoms'\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 27%|██▋ | 77713/292261 [00:32<00:56, 3799.17it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 28%|██▊ | 81780/292261 [00:34<03:11, 1097.17it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 28%|██▊ | 82483/292261 [00:34<01:27, 2385.63it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 29%|██▉ | 84671/292261 [00:35<02:11, 1580.33it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 33%|███▎ | 96205/292261 [00:53<07:36, 429.82it/s] " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 34%|███▍ | 99156/292261 [00:55<04:11, 766.30it/s] " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 34%|███▍ | 100110/292261 [00:58<05:36, 571.85it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 35%|███▌ | 102473/292261 [01:02<12:39, 249.94it/s] " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "'Polygon' object has no attribute 'geoms'\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 35%|███▌ | 102982/292261 [01:04<06:56, 454.71it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 37%|███▋ | 107131/292261 [01:11<04:11, 736.67it/s] " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 54%|█████▍ | 158622/292261 [01:38<02:26, 911.79it/s] " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 56%|█████▌ | 164270/292261 [01:40<01:53, 1125.52it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 66%|██████▋ | 193634/292261 [01:48<02:32, 647.47it/s] " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 68%|██████▊ | 198829/292261 [01:50<02:00, 777.87it/s] " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 70%|██████▉ | 203548/292261 [01:51<02:10, 682.27it/s] " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 75%|███████▍ | 217768/292261 [01:56<00:57, 1295.56it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 80%|███████▉ | 232966/292261 [02:05<02:51, 346.33it/s] " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "'Polygon' object has no attribute 'geoms'\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 81%|████████ | 236337/292261 [02:09<00:42, 1311.65it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 84%|████████▍ | 246533/292261 [02:13<00:31, 1436.44it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 87%|████████▋ | 255326/292261 [02:16<00:39, 943.14it/s] " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "'Polygon' object has no attribute 'geoms'\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 88%|████████▊ | 257145/292261 [02:16<00:19, 1804.46it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "'Polygon' object has no attribute 'geoms'\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 89%|████████▉ | 261395/292261 [02:18<01:41, 304.37it/s] " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 91%|█████████▏| 267248/292261 [02:22<00:20, 1225.90it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 292261/292261 [02:57<00:00, 1642.99it/s]\n" ] } ], @@ -563,7 +919,7 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -1428,7 +1784,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "jp-MarkdownHeadingCollapsed": true + }, "source": [ "### Protected seas" ] diff --git a/data/notebooks/test_bygrid.ipynb b/data/notebooks/test_bygrid.ipynb new file mode 100644 index 00000000..4a274e53 --- /dev/null +++ b/data/notebooks/test_bygrid.ipynb @@ -0,0 +1,529 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import logging\n", + "from typing import Tuple, List\n", + "import sys\n", + "from pathlib import Path\n", + "import pandas as pd\n", + "import geopandas as gpd\n", + "import numpy as np\n", + "import asyncio\n", + "from tqdm.asyncio import tqdm\n", + "from itertools import product\n", + "from shapely.geometry import box\n", + "\n", + "\n", + "scripts_dir = Path(\"..\").joinpath(\"src\")\n", + "if scripts_dir not in sys.path:\n", + " sys.path.insert(0, scripts_dir.resolve().as_posix())\n", + "\n", + "from pipelines.utils import background\n", + "from pipelines.processors import calculate_area, get_matches, repair_geometry, arrange_dimensions, clean_geometries, simplify_async" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "logging.basicConfig(level=logging.DEBUG)\n", + "logging.getLogger(\"requests\").setLevel(logging.WARNING)\n", + "logging.getLogger(\"urllib3\").setLevel(logging.WARNING)\n", + "logging.getLogger(\"fiona\").setLevel(logging.WARNING)\n", + "logger = logging.getLogger(\"notebook\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "def split_by_year(\n", + " gdf: gpd.GeoDataFrame, year_col: str = \"STATUS_YR\", year_val: int = 2010\n", + ") -> List[gpd.GeoDataFrame]:\n", + " \"\"\"Split data by year. relevant for MPA data.(coverage indicator)\"\"\"\n", + " prior_2010 = (\n", + " gdf[gdf[year_col] <= year_val][[\"iso_3\", \"STATUS_YR\", \"geometry\"]]\n", + " .dissolve(\n", + " by=[\"iso_3\"],\n", + " )\n", + " .assign(year=2010)\n", + " .reset_index()\n", + " )\n", + "\n", + " after_2010 = (\n", + " gdf[gdf[\"STATUS_YR\"] > 2010][[\"iso_3\", \"STATUS_YR\", \"geometry\"]]\n", + " .rename(columns={\"STATUS_YR\": \"year\"})\n", + " )\n", + " return [prior_2010, after_2010]" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "def create_grid(bounds: Tuple[float, float, float, float], cell_size: int = 1) -> gpd.GeoDataFrame:\n", + " \"\"\"Create a grid of cells for a given GeoDataFrame\"\"\"\n", + " minx, miny, maxx, maxy = bounds\n", + " x = np.arange(minx, maxx, cell_size)\n", + " y = np.arange(miny, maxy, cell_size)\n", + " polygons = [\n", + " {\n", + " \"geometry\": box(i, j, i + cell_size, j + cell_size),\n", + " \"cell_id\": f\"{i}_{j}\",\n", + " }\n", + " for i, j in product(x, y)\n", + " ]\n", + " return gpd.GeoDataFrame(polygons)\n", + "\n", + "\n", + "def subdivide_grid(\n", + " grid_gdf: gpd.GeoDataFrame, gdf: gpd.GeoDataFrame, max_cellsize: float, max_complexity: int\n", + ") -> List:\n", + " subdivided_elements = []\n", + " for grid_element in grid_gdf.geometry:\n", + " candidates = get_matches(grid_element, gdf)\n", + " density = len(candidates)\n", + " if density > max_complexity:\n", + " \n", + " subdivision_cellsize = max_cellsize / 2\n", + " # Subdivide the grid element recursively\n", + " subgrid = create_grid(grid_element.bounds, subdivision_cellsize)\n", + " subdivided_elements.extend(\n", + " subdivide_grid(subgrid, gdf, subdivision_cellsize, max_complexity)\n", + " )\n", + " elif density > 0:\n", + " subdivided_elements.append(grid_element)\n", + "\n", + " return subdivided_elements\n", + "\n", + "\n", + "def create_density_based_grid(\n", + " gdf: gpd.GeoDataFrame, max_cellsize: int = 10, max_complexity: int = 10000\n", + ") -> gpd.GeoDataFrame:\n", + " # Get the bounds of the GeoDataFrame\n", + " minx, miny, maxx, maxy = gdf.total_bounds\n", + "\n", + " # Create an initial grid\n", + " grid_gdf = create_grid((minx, miny, maxx, maxy), max_cellsize)\n", + "\n", + " # Subdivide grid elements based on density and complexity\n", + " subdivided_elements = subdivide_grid(grid_gdf, gdf, max_cellsize, max_complexity)\n", + "\n", + " return gpd.GeoDataFrame(geometry=subdivided_elements)" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [], + "source": [ + "# TODO: refactor this so old function mantains functionality for marine areas\n", + "\n", + "def split_gdf_by_grid(gdf: gpd.GeoDataFrame, grid_gdf: gpd.GeoDataFrame):\n", + " result = []\n", + " gdf[\"already_processed\"] = False\n", + " for geometry in grid_gdf.geometry:\n", + " candidates = get_matches(geometry, gdf)\n", + " subset = gdf.loc[candidates.index][~gdf[\"already_processed\"]]\n", + " gdf.loc[subset.index, \"already_processed\"] = True\n", + " if not subset.empty:\n", + " result.append(subset.drop(columns=[\"already_processed\"]).reset_index(drop=True).copy())\n", + " return result\n", + "\n", + "\n", + "@background\n", + "def spatial_join_chunk(df_large_chunk, df_small, pbar):\n", + " try:\n", + " bbox = df_large_chunk.total_bounds\n", + "\n", + " candidates = get_matches(box(*bbox), df_small.geometry)\n", + " if len(candidates) > 0:\n", + " subset = df_small.loc[candidates.index].clip(box(*bbox))\n", + "\n", + " result = (\n", + " df_large_chunk.sjoin(subset, how=\"inner\")\n", + " .clip(subset.geometry)\n", + " .reset_index(drop=True)\n", + " )\n", + " result.geometry = result.geometry.apply(repair_geometry)\n", + " else:\n", + " result = gpd.GeoDataFrame(columns=df_large_chunk.columns)\n", + " return result\n", + " except Exception as e:\n", + " logging.error(e)\n", + " return gpd.GeoDataFrame()\n", + " finally:\n", + " pbar.update(1)\n", + "\n", + "\n", + "async def spatial_join(\n", + " geodataframe_a: gpd.GeoDataFrame, geodataframe_b: gpd.GeoDataFrame\n", + ") -> gpd.GeoDataFrame:\n", + " \"\"\"Create spatial join between two GeoDataFrames.\"\"\"\n", + " # we build the spatial index for the larger GeoDataFrame\n", + " smaller_dim, larger_dim = arrange_dimensions(geodataframe_a, geodataframe_b)\n", + "\n", + " logger.info(f\"Processing {len(larger_dim)} elements\")\n", + "\n", + " grid = create_density_based_grid(larger_dim, max_cellsize=10, max_complexity=5000)\n", + "\n", + " logger.info(f\"grid created with {len(grid)} cells\")\n", + "\n", + " list_of_chunks = split_gdf_by_grid(larger_dim, grid)\n", + "\n", + " logger.info(f\"grid split into {len(list_of_chunks)} chunks\")\n", + "\n", + " with tqdm(total=len(list_of_chunks)) as pbar: # we create a progress bar\n", + " new_df = await asyncio.gather(\n", + " *(spatial_join_chunk(chunk, smaller_dim, pbar) for chunk in list_of_chunks)\n", + " )\n", + "\n", + " return gpd.GeoDataFrame(pd.concat(new_df, ignore_index=True), crs=smaller_dim.crs)\n", + "\n", + "\n", + "@background\n", + "def spatial_dissolve_chunk(geometry, gdf, pbar):\n", + "\n", + " try:\n", + " candidates = get_matches(\n", + " geometry,\n", + " gdf.geometry,\n", + " )\n", + " subset = gdf.loc[candidates.index]\n", + "\n", + " result = pd.concat(\n", + " subset.clip(geometry).pipe(split_by_year, year_col=\"STATUS_YR\"), ignore_index=True\n", + " ).copy()\n", + "\n", + " data_chunk = [\n", + " (\n", + " result[result[\"year\"] <= 2010]\n", + " .reset_index()\n", + " .pipe(calculate_area, \"area\", None)\n", + " .drop(columns=[\"geometry\"])\n", + " )\n", + " ]\n", + " for year in range(2011, 2025):\n", + " data_chunk.append(\n", + " result[result[\"year\"] <= year]\n", + " .dissolve(\n", + " by=[\"iso_3\"],\n", + " )\n", + " .assign(year=year)\n", + " .reset_index()\n", + " .pipe(calculate_area, \"area\", None)\n", + " .drop(columns=[\"geometry\"])\n", + " )\n", + "\n", + " return pd.concat(data_chunk, ignore_index=True)\n", + " except Exception as e:\n", + " logging.error(e)\n", + " return gpd.GeoDataFrame()\n", + " finally:\n", + " pbar.update(1)\n", + "\n", + "async def process_grid(gdf):\n", + " grid_gdf = create_density_based_grid(gdf, max_cellsize=10, max_complexity=5000)\n", + " with tqdm(total=grid_gdf.shape[0]) as pbar:\n", + " pbar = tqdm(total=len(grid_gdf), desc=\"Processing grid elements\")\n", + " result = await asyncio.gather(*[spatial_dissolve_chunk(geometry, gdf, pbar) for geometry in grid_gdf.geometry.values])\n", + " return result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "gadm = gpd.read_file(\"../data/gadm/processed/preprocess/gadm_preprocess.shp\").pipe(clean_geometries)\n", + "wdpa = gpd.read_file(\n", + " \"../data/mpa-terrestrial/processed/preprocess/mpa-terrestrial_preprocess.shp\"\n", + ").pipe(clean_geometries)\n", + "gadm.sindex\n", + "wdpa.sindex" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "wdpa_subset = wdpa[\n", + " ~(\n", + " (wdpa.bounds.minx < -181)\n", + " | (wdpa.bounds.miny < -91)\n", + " | (wdpa.bounds.maxx > 181)\n", + " | (wdpa.bounds.maxy > 91)\n", + " )\n", + "].reset_index(drop=True)\n", + "\n", + "gadm_sync = await simplify_async(gadm)\n", + "sjoin_gdf = await spatial_join(wdpa_subset, gadm_sync)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# test that we have not produce duplicates\n", + "sjoin_gdf.loc[sjoin_gdf.duplicated(subset=[\"WDPA_PID\", \"GID_0\"], keep=False)].sort_values(\n", + " \"WDPA_PID\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "data = await process_grid(sjoin_gdf)" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "metadata": {}, + "outputs": [], + "source": [ + "result_oecms = (\n", + " sjoin_gdf.groupby([\"iso_3\", \"PA_DEF\"])\n", + " .agg({\"PA_DEF\": \"count\"})\n", + " .rename(columns={\"PA_DEF\": \"count\"})\n", + " .reset_index()\n", + " .pivot(index=\"iso_3\", columns=\"PA_DEF\", values=\"count\")\n", + " .fillna(0)\n", + " .reset_index()\n", + " .rename(columns={\"0\": \"oecm\", \"1\": \"pa\"})\n", + ")\n", + "# ).reset_index().pivot(index=\"iso_3\", columns=\"PA_DEF\", values=\"count\").reset_index(names=[\"PA_DEF\"], level=0, drop=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "metadata": {}, + "outputs": [], + "source": [ + "result_oecms[\"oecm_perc\"] = result_oecms[\"oecm\"] / (result_oecms[\"oecm\"] + result_oecms[\"pa\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 112, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PA_DEFiso_3oecmpaoecm_perc
180USA0.050674.00.000000
161SWE0.030813.00.000000
44DEU0.023703.00.000000
55EST0.020579.00.000000
57FIN0.018427.00.000000
29CAN2.012566.00.000159
61GBR0.011712.00.000000
9AUS0.011154.00.000000
30CHE0.010632.00.000000
130NZL0.010205.00.000000
\n", + "
" + ], + "text/plain": [ + "PA_DEF iso_3 oecm pa oecm_perc\n", + "180 USA 0.0 50674.0 0.000000\n", + "161 SWE 0.0 30813.0 0.000000\n", + "44 DEU 0.0 23703.0 0.000000\n", + "55 EST 0.0 20579.0 0.000000\n", + "57 FIN 0.0 18427.0 0.000000\n", + "29 CAN 2.0 12566.0 0.000159\n", + "61 GBR 0.0 11712.0 0.000000\n", + "9 AUS 0.0 11154.0 0.000000\n", + "30 CHE 0.0 10632.0 0.000000\n", + "130 NZL 0.0 10205.0 0.000000" + ] + }, + "execution_count": 112, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "result_oecms.sort_values(\"pa\", ascending=False).head(10)" + ] + }, + { + "cell_type": "code", + "execution_count": 113, + "metadata": {}, + "outputs": [], + "source": [ + "result_area = pd.concat(data)[['iso_3', 'year', 'area']].groupby(['iso_3', 'year']).sum().reset_index()" + ] + }, + { + "cell_type": "code", + "execution_count": 114, + "metadata": {}, + "outputs": [], + "source": [ + "result = result_area.merge(result_oecms, on=\"iso_3\")" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'result' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[21], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Todo: this needs to be merged with the marine data and validated with the pandera model\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m \u001b[43mresult\u001b[49m\n", + "\u001b[0;31mNameError\u001b[0m: name 'result' is not defined" + ] + } + ], + "source": [ + "# Todo: this needs to be merged with the marine data and validated with the pandera model\n", + "result" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/data/src/pipelines/processors.py b/data/src/pipelines/processors.py index 31cc37bf..2d1db64e 100644 --- a/data/src/pipelines/processors.py +++ b/data/src/pipelines/processors.py @@ -162,10 +162,12 @@ def get_parent_iso(country): return df.assign(iso=df.country.apply(get_parent_iso)) -def assign_iso3(df: pd.DataFrame | gpd.GeoDataFrame) -> pd.DataFrame | gpd.GeoDataFrame: +def assign_iso3( + df: pd.DataFrame | gpd.GeoDataFrame, marine: bool = True +) -> pd.DataFrame | gpd.GeoDataFrame: """Assign ISO3 code. specific for Mpa data""" - def set_iso3(row): + def set_iso3_marine(row): """relevant for MPA data.""" return ( row["PARENT_ISO"] @@ -177,7 +179,13 @@ def set_iso3(row): else row["iso"] ) - return df.assign(iso_3=df.apply(lambda row: set_iso3(row), axis=1)) + def set_iso3_terrestrial(row): + """relevant for MPA data.""" + return row["GID_0"] + + assign_func = set_iso3_marine if marine else set_iso3_terrestrial + + return df.assign(iso_3=df.apply(lambda row: assign_func(row), axis=1)) def add_location_iso( @@ -383,6 +391,23 @@ def spatial_dissolve_chunk(i, gdf, pbar, _by, _aggfunc): return result +@background +def simplify(geometry, pbar, tlrc=0.0001) -> gpd.GeoDataFrame: + try: + return repair_geometry(geometry.simplify(tlrc)) + except Exception as e: + print(e) + return geometry + finally: + pbar.update(1) + + +async def simplify_async(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: + with tqdm(total=gdf.shape[0]) as pbar: + gdf["geometry"] = await asyncio.gather(*(simplify(val, pbar) for val in gdf["geometry"])) + return gdf + + ## Calculations @@ -565,6 +590,25 @@ async def process_mpa_data( ) +async def process_tpa_data( + gdf: gpd.GeoDataFrame, loop: list[int], by: list[str], aggfunc: dict +) -> pd.DataFrame: + """process protected planet data. relevant for acc coverage extent by year indicator.""" + # we split the data by =< year so we can acumulate the coverage + base = split_by_year(gdf) + + result_to_iter = pd.concat(base, ignore_index=True).copy() + + with tqdm(total=len(loop)) as pbar: # we create a progress bar + new_df = await asyncio.gather( + *(spatial_dissolve_chunk(year, result_to_iter, pbar, by, aggfunc) for year in loop) + ) + return pd.concat( + [base[0].pipe(calculate_area, "area", None).drop(columns=["geometry"]), *new_df], + ignore_index=True, + ) + + def process_mpaatlas_data(gdf: gpd.GeoDataFrame) -> pd.DataFrame: return ( gdf.dissolve(by=["protecti_1", "location_i"], aggfunc={"name": "count"}) From 0c1b8b5bf1d47fda92a00afbbdc7c07ac4bb2b35 Mon Sep 17 00:00:00 2001 From: sofia Date: Mon, 16 Sep 2024 15:40:29 +0200 Subject: [PATCH 06/16] add locations notebook to account for terrestrial and marine --- data/notebooks/pipes_mock/locations.ipynb | 368 ++++++++++++++++++++++ 1 file changed, 368 insertions(+) create mode 100644 data/notebooks/pipes_mock/locations.ipynb diff --git a/data/notebooks/pipes_mock/locations.ipynb b/data/notebooks/pipes_mock/locations.ipynb new file mode 100644 index 00000000..07e7e131 --- /dev/null +++ b/data/notebooks/pipes_mock/locations.ipynb @@ -0,0 +1,368 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import logging\n", + "import sys\n", + "from pathlib import Path\n", + "import time\n", + "import pandas as pd\n", + "import geopandas as gpd\n", + "import numpy as np\n", + "import json\n", + "\n", + "scripts_dir = Path(\"../../\").joinpath(\"src\")\n", + "if scripts_dir not in sys.path:\n", + " sys.path.insert(0, scripts_dir.resolve().as_posix())\n", + "\n", + "from helpers.strapi import Strapi\n", + "from helpers.settings import get_settings, Settings\n", + "from helpers.file_handler import FileConventionHandler\n", + "from helpers.utils import download_and_unzip_if_needed, writeReadGCP\n", + "\n", + "from pipelines.output_schemas import (\n", + " FPLSchema,\n", + " ProtectionLevelSchema,\n", + " MPAsSchema,\n", + " HabitatsSchema,\n", + " LocationSchema,\n", + " ProtectedAreaExtentSchema,\n", + ")\n", + "from pipelines.processors import (\n", + " add_envelope,\n", + " add_location_iso,\n", + " expand_multiple_locations,\n", + " add_region_iso,\n", + " calculate_eez_area,\n", + " add_bbox,\n", + " add_groups_and_members,\n", + " add_location_name,\n", + " output,\n", + " clean_geometries,\n", + " filter_by_exluding_propossed_mpas,\n", + " spatial_join,\n", + " process_mpa_data,\n", + " assign_iso3,\n", + " calculate_global_area,\n", + " separate_parent_iso,\n", + " calculate_stats_cov,\n", + " coverage_stats,\n", + " mpaatlas_filter_stablishment,\n", + " process_mpaatlas_data,\n", + " calculate_stats,\n", + " fix_monaco,\n", + " batch_export,\n", + " calculate_area,\n", + " define_is_child,\n", + " set_child_id,\n", + " add_child_parent_relationship,\n", + " columns_to_lower,\n", + " extract_wdpaid_mpaatlas,\n", + ")\n", + "\n", + "logging.basicConfig(level=logging.DEBUG)\n", + "logging.getLogger(\"requests\").setLevel(logging.WARNING)\n", + "logging.getLogger(\"urllib3\").setLevel(logging.WARNING)\n", + "logging.getLogger(\"fiona\").setLevel(logging.WARNING)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mysettings = get_settings()\n", + "prev_step = \"preprocess\"\n", + "current_step = \"stats\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pipe_eez = \"eez\"\n", + "pipe_eez_dir = FileConventionHandler(pipe_eez)\n", + "pipe_gadm = \"gadm\"\n", + "pipe_gadm_dir = FileConventionHandler(pipe_gadm)\n", + "\n", + "output_file = pipe_gadm_dir.get_processed_step_path(current_step).joinpath(\"locations.json\")\n", + "\n", + "# Download the EEZ file && unzip it\n", + "download_and_unzip_if_needed(pipe_eez_dir, prev_step, mysettings)\n", + "\n", + "# Download the EEZ file && unzip it\n", + "download_and_unzip_if_needed(pipe_gadm_dir, prev_step, mysettings)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from typing import Union, List\n", + "import pandera as pa\n", + "from pandera.typing import Index, Series\n", + "from pandera.typing.geopandas import GeoDataFrame, GeoSeries\n", + "import pandas as pd\n", + "\n", + "class LocationSchemaAll(pa.DataFrameModel):\n", + " id: Index[int] = pa.Field(gt=0, coerce=True)\n", + " code: Series[str] = pa.Field(coerce=True)\n", + " name: Series[str] = pa.Field(coerce=True)\n", + " totalMarineArea: Series[float] = pa.Field(ge=0, coerce=True) # noqa: N815\n", + " totalLandArea: Series[float] = pa.Field(ge=0, coerce=True) # noqa: N815\n", + " type: Series[str] = pa.Field(\n", + " unique_values_eq=[\"country\", \"worldwide\", \"region\", \"highseas\"], coerce=True\n", + " )\n", + " groups: Series[List[int]] = pa.Field(coerce=True)\n", + " bounds: Series[List[float]] = pa.Field(coerce=True)\n", + "\n", + "def calculate_gadm_area(df: pd.DataFrame) -> pd.DataFrame:\n", + " glob = gpd.GeoDataFrame(\n", + " {\n", + " \"iso\": \"GLOB\",\n", + " \"AREA_KM2\": 134954835,\n", + " \"location_type\": \"worldwide\",\n", + " \"region\": np.nan,\n", + " \"geometry\": gpd.GeoSeries([gpd.GeoSeries(df[\"geometry\"]).unary_union]),\n", + " },\n", + " crs=\"EPSG:4326\",\n", + " )\n", + "\n", + " terrestrial_areas = (\n", + " df\n", + " .dissolve(by=[\"iso\", \"region\"], aggfunc={\"AREA_KM2\": \"sum\"})\n", + " .reset_index()\n", + " .assign(location_type=\"country\")\n", + " )\n", + " regions_areas = (\n", + " df\n", + " .dissolve(by=[\"region\"], aggfunc={\"AREA_KM2\": \"sum\"})\n", + " .reset_index()\n", + " .rename(columns={\"region\": \"iso\"})\n", + " .assign(location_type=\"region\")\n", + " )\n", + " result = (\n", + " pd.concat(\n", + " [\n", + " glob,\n", + " regions_areas,\n", + " terrestrial_areas,\n", + " ],\n", + " ignore_index=True,\n", + " )\n", + " .dropna(subset=[\"iso\"])\n", + " .reset_index(drop=True)\n", + " )\n", + " result.index = result.index + 1\n", + " result.index.name = \"id\"\n", + "\n", + " return result.assign(id=result.index)\n", + "\n", + "def add_groups_and_members_land(df: pd.DataFrame | gpd.GeoDataFrame) -> pd.DataFrame | gpd.GeoDataFrame:\n", + " return df.assign(\n", + " groups=lambda row: row[[\"region\", \"location_type\"]].apply(\n", + " lambda x: (np.where(df.iso == x[\"region\"])[0] + 2).tolist()\n", + " if x[\"location_type\"] == \"country\"\n", + " else [],\n", + " axis=1,\n", + " )\n", + " )\n", + "\n", + "def combine_bounds(marine_bounds, land_bounds):\n", + " # Check if marine bounds are valid\n", + " if isinstance(marine_bounds, list) and len(marine_bounds) == 4:\n", + " return marine_bounds\n", + " # If marine bounds are not valid, check land bounds\n", + " elif isinstance(land_bounds, list) and len(land_bounds) == 4:\n", + " return land_bounds\n", + " # If neither bounds are valid, return an empty list\n", + " else:\n", + " return []\n", + "\n", + "def combine_columns(df, col1, col2, new_col):\n", + " \"\"\"\n", + " Combine two columns in a DataFrame using combine_first and assign to a new column.\n", + "\n", + " Parameters:\n", + " df (pd.DataFrame): The DataFrame containing the columns to combine.\n", + " col1 (str): The name of the first column.\n", + " col2 (str): The name of the second column.\n", + " new_col (str): The name of the new column to assign the combined result.\n", + "\n", + " Returns:\n", + " pd.DataFrame: The DataFrame with the new combined column.\n", + " \"\"\"\n", + " df[new_col] = df[col1].combine_first(df[col2])\n", + " return df\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Process EEZ data (marine data)\n", + "locations = (\n", + " gpd.read_file(pipe_eez_dir.get_step_fmt_file_path(prev_step, \"shp\"))\n", + " .pipe(add_envelope)\n", + " .pipe(add_location_iso)\n", + " .pipe(expand_multiple_locations)\n", + " .pipe(add_region_iso, 'iso')\n", + " .pipe(calculate_eez_area)\n", + " .pipe(add_bbox)\n", + " .pipe(add_groups_and_members)\n", + " .pipe(add_location_name)\n", + " .rename(\n", + " columns={\n", + " \"iso\": \"code\",\n", + " \"AREA_KM2\": \"totalMarineArea\",\n", + " \"location_type\": \"type\",\n", + " }\n", + " )\n", + ").reset_index(drop=True)\n", + "\n", + "locations.drop(\n", + " columns=list(\n", + " set(locations.columns) -\n", + " set([\"code\", \"name\", \"totalMarineArea\", \"type\", \"groups\", \"bounds\", \"id\"])\n", + " ),\n", + " inplace=True,\n", + ")\n", + "\n", + "\n", + "# Create a lookup dictionary for IDs from EEZ data\n", + "id_lookup = locations.set_index('code')['id'].to_dict()\n", + "\n", + "# Process GADM data (land data)\n", + "locations_land = (\n", + " gpd.read_file(pipe_gadm_dir.get_step_fmt_file_path(prev_step, \"shp\"))\n", + " .rename(columns={\"GID_0\": \"iso\", 'area_km2': 'AREA_KM2'})\n", + " .pipe(add_envelope)\n", + " .pipe(add_region_iso, 'iso')\n", + " .pipe(calculate_gadm_area)\n", + " .pipe(add_bbox)\n", + " .pipe(add_groups_and_members_land)\n", + " .pipe(add_location_name)\n", + " .rename(\n", + " columns={\n", + " \"iso\": \"code\",\n", + " \"AREA_KM2\": \"totalLandArea\",\n", + " \"location_type\": \"type\",\n", + " }\n", + " )\n", + ").reset_index(drop=True)\n", + "\n", + "# Apply the EEZ IDs to the GADM dataset\n", + "locations_land['id'] = locations_land['code'].map(id_lookup)\n", + "\n", + "# Identify the NaN values in the id column\n", + "nan_mask = locations_land['id'].isna()\n", + "\n", + "# Generate new IDs for any GADM rows without an EEZ match\n", + "new_ids = pd.Series(\n", + " range(max(id_lookup.values()) + 1, max(id_lookup.values()) + 1 + nan_mask.sum()),\n", + " index=locations_land[nan_mask].index\n", + ")\n", + "\n", + "# Assign the new IDs to the NaN values in the id column\n", + "locations_land['id'] = locations_land['id'].fillna(new_ids).astype(int)\n", + "\n", + "# Drop unnecessary columns in GADM data\n", + "locations_land.drop(\n", + " columns=list(\n", + " set(locations_land.columns) -\n", + " set([\"code\", \"name\", \"totalLandArea\", \"type\", \"groups\", \"bounds\", \"id\"])\n", + " ),\n", + " inplace=True,\n", + ")\n", + "\n", + "# Merge EEZ and GADM datasets\n", + "combined_locations = pd.merge(\n", + " locations, locations_land,\n", + " on=['code', 'id'],\n", + " suffixes=('_marine', '_land'),\n", + " how='outer' # Use 'outer' join to keep all records\n", + ")\n", + "\n", + "# Replace NaN values in TotalMarineArea and TotalLandArea with 0\n", + "combined_locations['totalMarineArea'] = combined_locations['totalMarineArea'].fillna(0)\n", + "combined_locations['totalLandArea'] = combined_locations['totalLandArea'].fillna(0)\n", + "combined_locations['id'] = combined_locations['id'].astype(int)\n", + "\n", + "# Combine bounding boxes from both datasets\n", + "combined_locations['bounds'] = combined_locations.apply(lambda row: combine_bounds(row['bounds_marine'], row['bounds_land']), axis=1)\n", + "\n", + "# Combine data from land and marine\n", + "combined_locations = combine_columns(combined_locations, 'type_marine', 'type_land', 'type')\n", + "combined_locations = combine_columns(combined_locations, 'groups_marine', 'groups_land', 'groups')\n", + "combined_locations = combine_columns(combined_locations, 'name_marine', 'name_land', 'name')\n", + "\n", + "# Drop unnecessary columns\n", + "combined_locations.drop(\n", + " columns=[col for col in combined_locations.columns if col.endswith('_marine') or col.endswith('_land')],\n", + " inplace=True\n", + ")\n", + "combined_locations = combined_locations.reset_index(drop=True)\n", + "\n", + "combined_locations['index'] = combined_locations['id']\n", + "combined_locations.set_index('index', inplace=True)\n", + "combined_locations.sort_index(inplace=True)\n", + "\n", + "# Step 8: Prepare final JSON output (stored in gadm folder)\n", + "output_locations_combined = {\n", + " \"version\": 2,\n", + " \"data\": {\n", + " \"api::location.location\": LocationSchemaAll(pd.DataFrame(combined_locations)).to_dict(\n", + " orient=\"index\"\n", + " )\n", + " },\n", + "}\n", + "\n", + "# Step 9: Write the output to a JSON file (stored in gadm folder)\n", + "with open(output_file, \"w\") as f:\n", + " json.dump(output_locations_combined, f)\n", + "\n", + "del output_locations_combined\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "## Create locations_code (stored in gadm folder)\n", + "(combined_locations[['id', 'code']]\n", + " .to_csv(pipe_gadm_dir.get_processed_step_path(current_step)\n", + " .joinpath('locations_code.csv'), index=False))\n" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 197878f1bef5de9f5b54755632aff004538305bb Mon Sep 17 00:00:00 2001 From: sofia Date: Thu, 26 Sep 2024 10:26:30 +0200 Subject: [PATCH 07/16] sofia changes for terrestrial --- data/notebooks/habitat2.ipynb | 323 +- data/notebooks/pipes_mock/intermediate.ipynb | 1886 ++++++--- data/notebooks/pipes_mock/locations.ipynb | 209 +- data/notebooks/pipes_mock/precalc_sofia.ipynb | 3719 +++++++++++++++++ 4 files changed, 5315 insertions(+), 822 deletions(-) create mode 100644 data/notebooks/pipes_mock/precalc_sofia.ipynb diff --git a/data/notebooks/habitat2.ipynb b/data/notebooks/habitat2.ipynb index f2768fac..8db4eaaa 100644 --- a/data/notebooks/habitat2.ipynb +++ b/data/notebooks/habitat2.ipynb @@ -29,19 +29,18 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "import geopandas as gpd\n", "import pandas as pd\n", - "import openpyxl\n", "from datetime import datetime" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -107,7 +106,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Replace 'ATA' with 'ABNJ' in 'location_id' and make all fields numeric\n", + "# Replace 'ATA' with 'ABNJ' in 'location_id'\n", "cold2 = cold2.replace('ATA', 'ABNJ')\n", "salt2 = salt2.replace('ATA', 'ABNJ')\n", "sea2 = sea2.replace('ATA', 'ABNJ')\n", @@ -116,7 +115,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -131,7 +130,7 @@ " dtype='object')" ] }, - "execution_count": 9, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -144,7 +143,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -167,7 +166,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -212,7 +211,7 @@ "120 UMI 9.38785685362166 9.38785685362166" ] }, - "execution_count": 11, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -223,7 +222,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -236,7 +235,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -276,7 +275,7 @@ "Index: []" ] }, - "execution_count": 13, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -287,7 +286,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -301,7 +300,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -314,7 +313,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ @@ -327,7 +326,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -385,7 +384,7 @@ "1 AGO 0.000000 3.395671 cold-water corals 2024" ] }, - "execution_count": 19, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -406,7 +405,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -482,7 +481,7 @@ "4 coldwater-corals 4400.140842 15336.975280 GLOB 2024" ] }, - "execution_count": 20, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -498,7 +497,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 18, "metadata": {}, "outputs": [ { @@ -574,7 +573,7 @@ "4 cold-water corals 4400.140842 15336.975280 GLOB 2024" ] }, - "execution_count": 21, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -593,7 +592,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -603,7 +602,7 @@ " 'warm-water corals'], dtype=object)" ] }, - "execution_count": 22, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -623,7 +622,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 20, "metadata": {}, "outputs": [], "source": [ @@ -710,7 +709,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 21, "metadata": {}, "outputs": [ { @@ -966,7 +965,7 @@ "23 WA warm-water corals 547.928957 4903.230395 2024" ] }, - "execution_count": 24, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -985,7 +984,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 22, "metadata": {}, "outputs": [], "source": [ @@ -995,7 +994,85 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
location_idprotected_areatotal_areahabitat_nameyearenvironment
0ABNJ427.0485241893.871282cold-water corals2024marine
1AGO0.0000003.395671cold-water corals2024marine
\n", + "
" + ], + "text/plain": [ + " location_id protected_area total_area habitat_name year \\\n", + "0 ABNJ 427.048524 1893.871282 cold-water corals 2024 \n", + "1 AGO 0.000000 3.395671 cold-water corals 2024 \n", + "\n", + " environment \n", + "0 marine \n", + "1 marine " + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Add environment\n", + "habitats['environment'] = 'marine'\n", + "habitats.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, "metadata": {}, "outputs": [], "source": [ @@ -1011,7 +1088,7 @@ }, { "cell_type": "code", - "execution_count": 58, + "execution_count": 25, "metadata": {}, "outputs": [], "source": [ @@ -1024,7 +1101,7 @@ }, { "cell_type": "code", - "execution_count": 59, + "execution_count": 26, "metadata": {}, "outputs": [], "source": [ @@ -1038,7 +1115,7 @@ }, { "cell_type": "code", - "execution_count": 60, + "execution_count": 27, "metadata": {}, "outputs": [], "source": [ @@ -1049,7 +1126,7 @@ }, { "cell_type": "code", - "execution_count": 61, + "execution_count": 28, "metadata": {}, "outputs": [], "source": [ @@ -1067,7 +1144,7 @@ }, { "cell_type": "code", - "execution_count": 62, + "execution_count": 29, "metadata": {}, "outputs": [], "source": [ @@ -1082,7 +1159,7 @@ }, { "cell_type": "code", - "execution_count": 63, + "execution_count": 30, "metadata": {}, "outputs": [ { @@ -1120,7 +1197,7 @@ " DNK\n", " 982.028337\n", " no\n", - " POINT (2.7625 84.97974)\n", + " POINT (2.76250 84.97974)\n", " \n", " \n", " 1\n", @@ -1152,7 +1229,7 @@ " ABNJ\n", " 309.588492\n", " no\n", - " POINT (8.8875 84.91307)\n", + " POINT (8.88750 84.91307)\n", " \n", " \n", "\n", @@ -1160,14 +1237,14 @@ ], "text/plain": [ " PEAKID iso AREA2D protection geometry\n", - "0 26000 DNK 982.028337 no POINT (2.7625 84.97974)\n", + "0 26000 DNK 982.028337 no POINT (2.76250 84.97974)\n", "1 26157 ABNJ 348.473055 no POINT (9.14306 84.93529)\n", "2 26158 ABNJ 367.540380 no POINT (9.18333 84.93807)\n", "3 26228 ABNJ 299.443636 no POINT (8.74861 84.90751)\n", - "4 26229 ABNJ 309.588492 no POINT (8.8875 84.91307)" + "4 26229 ABNJ 309.588492 no POINT (8.88750 84.91307)" ] }, - "execution_count": 63, + "execution_count": 30, "metadata": {}, "output_type": "execute_result" } @@ -1180,7 +1257,7 @@ }, { "cell_type": "code", - "execution_count": 65, + "execution_count": 31, "metadata": {}, "outputs": [ { @@ -1222,7 +1299,7 @@ "Index: []" ] }, - "execution_count": 65, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" } @@ -1242,7 +1319,7 @@ }, { "cell_type": "code", - "execution_count": 67, + "execution_count": 32, "metadata": {}, "outputs": [ { @@ -1291,7 +1368,7 @@ "0 seamounts 2.690810e+07 3.426630e+06 GLOB 2011" ] }, - "execution_count": 67, + "execution_count": 32, "metadata": {}, "output_type": "execute_result" } @@ -1324,7 +1401,7 @@ }, { "cell_type": "code", - "execution_count": 68, + "execution_count": 33, "metadata": {}, "outputs": [], "source": [ @@ -1343,7 +1420,7 @@ }, { "cell_type": "code", - "execution_count": 69, + "execution_count": 34, "metadata": {}, "outputs": [], "source": [ @@ -1356,7 +1433,7 @@ }, { "cell_type": "code", - "execution_count": 70, + "execution_count": 35, "metadata": {}, "outputs": [ { @@ -1498,7 +1575,7 @@ "[92 rows x 5 columns]" ] }, - "execution_count": 70, + "execution_count": 35, "metadata": {}, "output_type": "execute_result" } @@ -1520,7 +1597,7 @@ }, { "cell_type": "code", - "execution_count": 72, + "execution_count": 36, "metadata": {}, "outputs": [ { @@ -1614,7 +1691,7 @@ "5 WA seamounts 2.487428e+03 9.384765e+04 2011" ] }, - "execution_count": 72, + "execution_count": 36, "metadata": {}, "output_type": "execute_result" } @@ -1632,7 +1709,7 @@ }, { "cell_type": "code", - "execution_count": 73, + "execution_count": 37, "metadata": {}, "outputs": [], "source": [ @@ -1642,7 +1719,17 @@ }, { "cell_type": "code", - "execution_count": 74, + "execution_count": 38, + "metadata": {}, + "outputs": [], + "source": [ + "# Add environment\n", + "seamounts_all['environment'] = 'marine'" + ] + }, + { + "cell_type": "code", + "execution_count": 39, "metadata": {}, "outputs": [], "source": [ @@ -1658,7 +1745,7 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 40, "metadata": {}, "outputs": [], "source": [ @@ -1667,7 +1754,7 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 41, "metadata": {}, "outputs": [], "source": [ @@ -1677,7 +1764,7 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 42, "metadata": {}, "outputs": [], "source": [ @@ -1691,7 +1778,7 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 43, "metadata": {}, "outputs": [ { @@ -1740,7 +1827,7 @@ "0 mangroves 61287.20375 147358.990971 GLOB 2020" ] }, - "execution_count": 53, + "execution_count": 43, "metadata": {}, "output_type": "execute_result" } @@ -1755,7 +1842,7 @@ }, { "cell_type": "code", - "execution_count": 54, + "execution_count": 44, "metadata": {}, "outputs": [], "source": [ @@ -1765,7 +1852,7 @@ }, { "cell_type": "code", - "execution_count": 55, + "execution_count": 45, "metadata": {}, "outputs": [ { @@ -1859,7 +1946,7 @@ "5 WA mangroves 27.83000 173.620938 2020" ] }, - "execution_count": 55, + "execution_count": 45, "metadata": {}, "output_type": "execute_result" } @@ -1877,7 +1964,7 @@ }, { "cell_type": "code", - "execution_count": 56, + "execution_count": 46, "metadata": {}, "outputs": [], "source": [ @@ -1887,7 +1974,17 @@ }, { "cell_type": "code", - "execution_count": 57, + "execution_count": 47, + "metadata": {}, + "outputs": [], + "source": [ + "# Add environment\n", + "mangroves_all['environment'] = 'marine'" + ] + }, + { + "cell_type": "code", + "execution_count": 48, "metadata": {}, "outputs": [], "source": [ @@ -1904,7 +2001,7 @@ }, { "cell_type": "code", - "execution_count": 75, + "execution_count": 49, "metadata": {}, "outputs": [ { @@ -1933,6 +2030,7 @@ " total_area\n", " habitat_name\n", " year\n", + " environment\n", " \n", " \n", " \n", @@ -1943,6 +2041,7 @@ " 1893.871282\n", " cold-water corals\n", " 2024\n", + " marine\n", " \n", " \n", " 1\n", @@ -1951,6 +2050,7 @@ " 3.395671\n", " cold-water corals\n", " 2024\n", + " marine\n", " \n", " \n", " 2\n", @@ -1959,6 +2059,7 @@ " 5.986479\n", " cold-water corals\n", " 2024\n", + " marine\n", " \n", " \n", " 3\n", @@ -1967,6 +2068,7 @@ " 61.826344\n", " cold-water corals\n", " 2024\n", + " marine\n", " \n", " \n", " 4\n", @@ -1975,6 +2077,7 @@ " 0.997747\n", " cold-water corals\n", " 2024\n", + " marine\n", " \n", " \n", " ...\n", @@ -1983,6 +2086,7 @@ " ...\n", " ...\n", " ...\n", + " ...\n", " \n", " \n", " 1\n", @@ -1991,6 +2095,7 @@ " 74292.673146\n", " mangroves\n", " 2020\n", + " marine\n", " \n", " \n", " 2\n", @@ -1999,6 +2104,7 @@ " 1246.189677\n", " mangroves\n", " 2020\n", + " marine\n", " \n", " \n", " 3\n", @@ -2007,6 +2113,7 @@ " 2415.418557\n", " mangroves\n", " 2020\n", + " marine\n", " \n", " \n", " 4\n", @@ -2015,6 +2122,7 @@ " 39893.444608\n", " mangroves\n", " 2020\n", + " marine\n", " \n", " \n", " 5\n", @@ -2023,30 +2131,44 @@ " 173.620938\n", " mangroves\n", " 2020\n", + " marine\n", " \n", " \n", "\n", - "

614 rows × 5 columns

\n", + "

614 rows × 6 columns

\n", "" ], "text/plain": [ - " location_id protected_area total_area habitat_name year\n", - "0 ABNJ 427.048524 1893.871282 cold-water corals 2024\n", - "1 AGO 0.000000 3.395671 cold-water corals 2024\n", - "2 ALB 0.000000 5.986479 cold-water corals 2024\n", - "3 ARG 6.984226 61.826344 cold-water corals 2024\n", - "4 ATG 0.000000 0.997747 cold-water corals 2024\n", - ".. ... ... ... ... ...\n", - "1 AS 21277.220000 74292.673146 mangroves 2020\n", - "2 EU 732.143750 1246.189677 mangroves 2020\n", - "3 NA 2097.740000 2415.418557 mangroves 2020\n", - "4 SA 27151.740000 39893.444608 mangroves 2020\n", - "5 WA 27.830000 173.620938 mangroves 2020\n", + " location_id protected_area total_area habitat_name year \\\n", + "0 ABNJ 427.048524 1893.871282 cold-water corals 2024 \n", + "1 AGO 0.000000 3.395671 cold-water corals 2024 \n", + "2 ALB 0.000000 5.986479 cold-water corals 2024 \n", + "3 ARG 6.984226 61.826344 cold-water corals 2024 \n", + "4 ATG 0.000000 0.997747 cold-water corals 2024 \n", + ".. ... ... ... ... ... \n", + "1 AS 21277.220000 74292.673146 mangroves 2020 \n", + "2 EU 732.143750 1246.189677 mangroves 2020 \n", + "3 NA 2097.740000 2415.418557 mangroves 2020 \n", + "4 SA 27151.740000 39893.444608 mangroves 2020 \n", + "5 WA 27.830000 173.620938 mangroves 2020 \n", "\n", - "[614 rows x 5 columns]" + " environment \n", + "0 marine \n", + "1 marine \n", + "2 marine \n", + "3 marine \n", + "4 marine \n", + ".. ... \n", + "1 marine \n", + "2 marine \n", + "3 marine \n", + "4 marine \n", + "5 marine \n", + "\n", + "[614 rows x 6 columns]" ] }, - "execution_count": 75, + "execution_count": 49, "metadata": {}, "output_type": "execute_result" } @@ -2059,7 +2181,7 @@ }, { "cell_type": "code", - "execution_count": 81, + "execution_count": 50, "metadata": {}, "outputs": [ { @@ -2088,6 +2210,7 @@ " total_area\n", " habitat_name\n", " year\n", + " environment\n", " \n", " \n", " \n", @@ -2098,6 +2221,7 @@ " 1.893871e+03\n", " cold-water corals\n", " 2024\n", + " marine\n", " \n", " \n", " 0\n", @@ -2106,6 +2230,7 @@ " 6.335727e+03\n", " seagrasses\n", " 2024\n", + " marine\n", " \n", " \n", " 0\n", @@ -2114,19 +2239,25 @@ " 1.518615e+07\n", " seamounts\n", " 2011\n", + " marine\n", " \n", " \n", "\n", "" ], "text/plain": [ - " location_id protected_area total_area habitat_name year\n", - "0 ABNJ 427.048524 1.893871e+03 cold-water corals 2024\n", - "0 ABNJ 0.000000 6.335727e+03 seagrasses 2024\n", - "0 ABNJ 308819.904730 1.518615e+07 seamounts 2011" + " location_id protected_area total_area habitat_name year \\\n", + "0 ABNJ 427.048524 1.893871e+03 cold-water corals 2024 \n", + "0 ABNJ 0.000000 6.335727e+03 seagrasses 2024 \n", + "0 ABNJ 308819.904730 1.518615e+07 seamounts 2011 \n", + "\n", + " environment \n", + "0 marine \n", + "0 marine \n", + "0 marine " ] }, - "execution_count": 81, + "execution_count": 50, "metadata": {}, "output_type": "execute_result" } @@ -2137,11 +2268,31 @@ }, { "cell_type": "code", - "execution_count": 83, + "execution_count": 51, "metadata": {}, "outputs": [], "source": [ - "habitats_all.to_csv(path_out + \"tables/habitats5.csv\", index=False, encoding='utf-8', sep=',', decimal='.')" + "habitats_all.to_csv(path_out + \"tables/habitats6.csv\", index=False, encoding='utf-8', sep=',', decimal='.')" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'../data/processed/tables/habitats6.csv'" + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "path_out + \"tables/habitats6.csv\"" ] } ], @@ -2161,7 +2312,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.5" + "version": "3.11.4" } }, "nbformat": 4, diff --git a/data/notebooks/pipes_mock/intermediate.ipynb b/data/notebooks/pipes_mock/intermediate.ipynb index 5632d575..acb5bd28 100644 --- a/data/notebooks/pipes_mock/intermediate.ipynb +++ b/data/notebooks/pipes_mock/intermediate.ipynb @@ -11,18 +11,9 @@ }, { "cell_type": "code", - "execution_count": 234, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The autoreload extension is already loaded. To reload it, use:\n", - " %reload_ext autoreload\n" - ] - } - ], + "outputs": [], "source": [ "%load_ext autoreload\n", "%autoreload 2" @@ -30,7 +21,7 @@ }, { "cell_type": "code", - "execution_count": 262, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -40,8 +31,11 @@ "import geopandas as gpd\n", "import pandas as pd\n", "import requests\n", + "import dotenv \n", + "\n", + "dotenv.load_dotenv()\n", "\n", - "scripts_dir = Path(\"../..\").joinpath(\"src\")\n", + "scripts_dir = Path(\".\").joinpath(\"src\")\n", "import sys\n", "if scripts_dir not in sys.path:\n", " sys.path.insert(0, scripts_dir.resolve().as_posix())\n", @@ -69,7 +63,7 @@ }, { "cell_type": "code", - "execution_count": 182, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -172,20 +166,9 @@ }, { "cell_type": "code", - "execution_count": 64, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "PosixPath('/home/mambauser/data/eez/raw/World_High_Seas_v1_20200826.zip')" - ] - }, - "execution_count": 64, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# Extract data\n", "## download files EEZ & High seas\n", @@ -202,18 +185,9 @@ }, { "cell_type": "code", - "execution_count": 71, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/home/mambauser/data/eez/raw/temp_preprocess/World_High_Seas_v1_20200826\n", - "/home/mambauser/data/eez/raw/temp_preprocess/World_EEZ_v11_20191118\n" - ] - } - ], + "outputs": [], "source": [ "## unzip file if needed & load data\n", "unziped_folders = []\n", @@ -234,18 +208,9 @@ }, { "cell_type": "code", - "execution_count": 72, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "GeoDataFrame 0 has 1 rows and 6 columns\n", - "GeoDataFrame 1 has 281 rows and 32 columns\n" - ] - } - ], + "outputs": [], "source": [ "for idx, gdf in enumerate(unziped_folders):\n", " print(f\"GeoDataFrame {idx} has {len(gdf)} rows and {len(gdf.columns)} columns\")" @@ -295,18 +260,9 @@ }, { "cell_type": "code", - "execution_count": 75, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/opt/conda/lib/python3.12/site-packages/pyogrio/raw.py:709: RuntimeWarning: Value 212881389 of field AREA_KM2 of feature 0 not successfully written. Possibly due to too larger number with respect to field width\n", - " ogr_write(\n" - ] - } - ], + "outputs": [], "source": [ "# save data\n", "gpd.GeoDataFrame(\n", @@ -355,7 +311,7 @@ }, { "cell_type": "code", - "execution_count": 78, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -367,7 +323,7 @@ }, { "cell_type": "code", - "execution_count": 263, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -383,7 +339,7 @@ }, { "cell_type": "code", - "execution_count": 110, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -393,7 +349,7 @@ }, { "cell_type": "code", - "execution_count": 111, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -408,16 +364,15 @@ }, { "cell_type": "code", - "execution_count": 183, + "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Processing: /home/mambauser/data/gadm/raw/temp_preprocess/gadm_410-levels\n", - "Removed existing folder: /home/mambauser/data/gadm/raw/temp_preprocess/gadm_410-levels\n", - "Unpacked /home/mambauser/data/gadm/raw/gadm_410-levels.zip to /home/mambauser/data/gadm/raw/temp_preprocess/gadm_410-levels\n" + "Processing: /home/sofia/dev/skytruth-30x30/data/data/gadm/raw/temp_preprocess/gadm_410-levels\n", + "Unpacked /home/sofia/dev/skytruth-30x30/data/data/gadm/raw/gadm_410-levels.zip to /home/sofia/dev/skytruth-30x30/data/data/gadm/raw/temp_preprocess/gadm_410-levels\n" ] } ], @@ -439,280 +394,14 @@ }, { "cell_type": "code", - "execution_count": 304, - "metadata": {}, - "outputs": [], - "source": [ - "# Add columns for translated names\n", - "data = [\n", - " {\"GID_0\": \"AFG\", \"COUNTRY_ES\": \"Afganistán\", \"COUNTRY_FR\": \"Afghanistan\"},\n", - " {\"GID_0\": \"XAD\", \"COUNTRY_ES\": \"Akrotiri y Dhekelia\", \"COUNTRY_FR\": \"Akrotiri et Dhekelia\"},\n", - " {\"GID_0\": \"ALB\", \"COUNTRY_ES\": \"Albania\", \"COUNTRY_FR\": \"Albanie\"},\n", - " {\"GID_0\": \"DZA\", \"COUNTRY_ES\": \"Argelia\", \"COUNTRY_FR\": \"Algérie\"},\n", - " {\"GID_0\": \"ASM\", \"COUNTRY_ES\": \"Samoa Americana\", \"COUNTRY_FR\": \"Samoa américaines\"},\n", - " {\"GID_0\": \"AND\", \"COUNTRY_ES\": \"Andorra\", \"COUNTRY_FR\": \"Andorre\"},\n", - " {\"GID_0\": \"AGO\", \"COUNTRY_ES\": \"Angola\", \"COUNTRY_FR\": \"Angola\"},\n", - " {\"GID_0\": \"AIA\", \"COUNTRY_ES\": \"Anguila\", \"COUNTRY_FR\": \"Anguilla\"},\n", - " {\"GID_0\": \"ATA\", \"COUNTRY_ES\": \"Antártida\", \"COUNTRY_FR\": \"Antarctique\"},\n", - " {\"GID_0\": \"ATG\", \"COUNTRY_ES\": \"Antigua y Barbuda\", \"COUNTRY_FR\": \"Antigua-et-Barbuda\"},\n", - " {\"GID_0\": \"ARG\", \"COUNTRY_ES\": \"Argentina\", \"COUNTRY_FR\": \"Argentine\"},\n", - " {\"GID_0\": \"ARM\", \"COUNTRY_ES\": \"Armenia\", \"COUNTRY_FR\": \"Arménie\"},\n", - " {\"GID_0\": \"ABW\", \"COUNTRY_ES\": \"Aruba\", \"COUNTRY_FR\": \"Aruba\"},\n", - " {\"GID_0\": \"AUS\", \"COUNTRY_ES\": \"Australia\", \"COUNTRY_FR\": \"Australie\"},\n", - " {\"GID_0\": \"AUT\", \"COUNTRY_ES\": \"Austria\", \"COUNTRY_FR\": \"Autriche\"},\n", - " {\"GID_0\": \"AZE\", \"COUNTRY_ES\": \"Azerbaiyán\", \"COUNTRY_FR\": \"Azerbaïdjan\"},\n", - " {\"GID_0\": \"BHS\", \"COUNTRY_ES\": \"Bahamas\", \"COUNTRY_FR\": \"Bahamas\"},\n", - " {\"GID_0\": \"BHR\", \"COUNTRY_ES\": \"Baréin\", \"COUNTRY_FR\": \"Bahreïn\"},\n", - " {\"GID_0\": \"BGD\", \"COUNTRY_ES\": \"Bangladés\", \"COUNTRY_FR\": \"Bangladesh\"},\n", - " {\"GID_0\": \"BRB\", \"COUNTRY_ES\": \"Barbados\", \"COUNTRY_FR\": \"Barbade\"},\n", - " {\"GID_0\": \"BLR\", \"COUNTRY_ES\": \"Bielorrusia\", \"COUNTRY_FR\": \"Biélorussie\"},\n", - " {\"GID_0\": \"BEL\", \"COUNTRY_ES\": \"Bélgica\", \"COUNTRY_FR\": \"Belgique\"},\n", - " {\"GID_0\": \"BLZ\", \"COUNTRY_ES\": \"Belice\", \"COUNTRY_FR\": \"Belize\"},\n", - " {\"GID_0\": \"BEN\", \"COUNTRY_ES\": \"Benín\", \"COUNTRY_FR\": \"Bénin\"},\n", - " {\"GID_0\": \"BMU\", \"COUNTRY_ES\": \"Bermudas\", \"COUNTRY_FR\": \"Bermudes\"},\n", - " {\"GID_0\": \"BTN\", \"COUNTRY_ES\": \"Bután\", \"COUNTRY_FR\": \"Bhoutan\"},\n", - " {\"GID_0\": \"BOL\", \"COUNTRY_ES\": \"Bolivia\", \"COUNTRY_FR\": \"Bolivie\"},\n", - " {\"GID_0\": \"BES\", \"COUNTRY_ES\": \"Bonaire, San Eustaquio y Saba\", \"COUNTRY_FR\": \"Bonaire, Saint-Eustache et Saba\"},\n", - " {\"GID_0\": \"BIH\", \"COUNTRY_ES\": \"Bosnia y Herzegovina\", \"COUNTRY_FR\": \"Bosnie-Herzégovine\"},\n", - " {\"GID_0\": \"BWA\", \"COUNTRY_ES\": \"Botsuana\", \"COUNTRY_FR\": \"Botswana\"},\n", - " {\"GID_0\": \"BVT\", \"COUNTRY_ES\": \"Isla Bouvet\", \"COUNTRY_FR\": \"Île Bouvet\"},\n", - " {\"GID_0\": \"BRA\", \"COUNTRY_ES\": \"Brasil\", \"COUNTRY_FR\": \"Brésil\"},\n", - " {\"GID_0\": \"IOT\", \"COUNTRY_ES\": \"Territorio Británico del Océano Índico\", \"COUNTRY_FR\": \"Territoire britannique de l'océan Indien\"},\n", - " {\"GID_0\": \"VGB\", \"COUNTRY_ES\": \"Islas Vírgenes Británicas\", \"COUNTRY_FR\": \"Îles Vierges britanniques\"},\n", - " {\"GID_0\": \"BRN\", \"COUNTRY_ES\": \"Brunéi\", \"COUNTRY_FR\": \"Brunei\"},\n", - " {\"GID_0\": \"BGR\", \"COUNTRY_ES\": \"Bulgaria\", \"COUNTRY_FR\": \"Bulgarie\"},\n", - " {\"GID_0\": \"BFA\", \"COUNTRY_ES\": \"Burkina Faso\", \"COUNTRY_FR\": \"Burkina Faso\"},\n", - " {\"GID_0\": \"BDI\", \"COUNTRY_ES\": \"Burundi\", \"COUNTRY_FR\": \"Burundi\"},\n", - " {\"GID_0\": \"CPV\", \"COUNTRY_ES\": \"Cabo Verde\", \"COUNTRY_FR\": \"Cap-Vert\"},\n", - " {\"GID_0\": \"KHM\", \"COUNTRY_ES\": \"Camboya\", \"COUNTRY_FR\": \"Cambodge\"},\n", - " {\"GID_0\": \"CMR\", \"COUNTRY_ES\": \"Camerún\", \"COUNTRY_FR\": \"Cameroun\"},\n", - " {\"GID_0\": \"CAN\", \"COUNTRY_ES\": \"Canadá\", \"COUNTRY_FR\": \"Canada\"},\n", - " {\"GID_0\": \"XCA\", \"COUNTRY_ES\": \"Mar Caspio\", \"COUNTRY_FR\": \"Mer Caspienne\"},\n", - " {\"GID_0\": \"CYM\", \"COUNTRY_ES\": \"Islas Caimán\", \"COUNTRY_FR\": \"Îles Caïmans\"},\n", - " {\"GID_0\": \"CAF\", \"COUNTRY_ES\": \"República Centroafricana\", \"COUNTRY_FR\": \"République centrafricaine\"},\n", - " {\"GID_0\": \"TCD\", \"COUNTRY_ES\": \"Chad\", \"COUNTRY_FR\": \"Tchad\"},\n", - " {\"GID_0\": \"CHL\", \"COUNTRY_ES\": \"Chile\", \"COUNTRY_FR\": \"Chili\"},\n", - " {\"GID_0\": \"CHN\", \"COUNTRY_ES\": \"China\", \"COUNTRY_FR\": \"Chine\"},\n", - " {\"GID_0\": \"CXR\", \"COUNTRY_ES\": \"Isla de Navidad\", \"COUNTRY_FR\": \"Île Christmas\"},\n", - " {\"GID_0\": \"XCL\", \"COUNTRY_ES\": \"Isla Clipperton\", \"COUNTRY_FR\": \"Île Clipperton\"},\n", - " {\"GID_0\": \"CCK\", \"COUNTRY_ES\": \"Islas Cocos\", \"COUNTRY_FR\": \"Îles Cocos\"},\n", - " {\"GID_0\": \"COL\", \"COUNTRY_ES\": \"Colombia\", \"COUNTRY_FR\": \"Colombie\"},\n", - " {\"GID_0\": \"COM\", \"COUNTRY_ES\": \"Comoras\", \"COUNTRY_FR\": \"Comores\"},\n", - " {\"GID_0\": \"COK\", \"COUNTRY_ES\": \"Islas Cook\", \"COUNTRY_FR\": \"Îles Cook\"},\n", - " {\"GID_0\": \"CRI\", \"COUNTRY_ES\": \"Costa Rica\", \"COUNTRY_FR\": \"Costa Rica\"},\n", - " {\"GID_0\": \"HRV\", \"COUNTRY_ES\": \"Croacia\", \"COUNTRY_FR\": \"Croatie\"},\n", - " {\"GID_0\": \"CUB\", \"COUNTRY_ES\": \"Cuba\", \"COUNTRY_FR\": \"Cuba\"},\n", - " {\"GID_0\": \"CUW\", \"COUNTRY_ES\": \"Curazao\", \"COUNTRY_FR\": \"Curaçao\"},\n", - " {\"GID_0\": \"CYP\", \"COUNTRY_ES\": \"Chipre\", \"COUNTRY_FR\": \"Chypre\"},\n", - " {\"GID_0\": \"CZE\", \"COUNTRY_ES\": \"Chequia\", \"COUNTRY_FR\": \"Tchéquie\"},\n", - " {\"GID_0\": \"CIV\", \"COUNTRY_ES\": \"Costa de Marfil\", \"COUNTRY_FR\": \"Côte d'Ivoire\"},\n", - " {\"GID_0\": \"COD\", \"COUNTRY_ES\": \"República Democrática del Congo\", \"COUNTRY_FR\": \"République démocratique du Congo\"},\n", - " {\"GID_0\": \"DNK\", \"COUNTRY_ES\": \"Dinamarca\", \"COUNTRY_FR\": \"Danemark\"},\n", - " {\"GID_0\": \"DJI\", \"COUNTRY_ES\": \"Yibuti\", \"COUNTRY_FR\": \"Djibouti\"},\n", - " {\"GID_0\": \"DMA\", \"COUNTRY_ES\": \"Dominica\", \"COUNTRY_FR\": \"Dominique\"},\n", - " {\"GID_0\": \"DOM\", \"COUNTRY_ES\": \"República Dominicana\", \"COUNTRY_FR\": \"République dominicaine\"},\n", - " {\"GID_0\": \"ECU\", \"COUNTRY_ES\": \"Ecuador\", \"COUNTRY_FR\": \"Équateur\"},\n", - " {\"GID_0\": \"EGY\", \"COUNTRY_ES\": \"Egipto\", \"COUNTRY_FR\": \"Égypte\"},\n", - " {\"GID_0\": \"SLV\", \"COUNTRY_ES\": \"El Salvador\", \"COUNTRY_FR\": \"Salvador\"},\n", - " {\"GID_0\": \"GNQ\", \"COUNTRY_ES\": \"Guinea Ecuatorial\", \"COUNTRY_FR\": \"Guinée équatoriale\"},\n", - " {\"GID_0\": \"ERI\", \"COUNTRY_ES\": \"Eritrea\", \"COUNTRY_FR\": \"Érythrée\"},\n", - " {\"GID_0\": \"EST\", \"COUNTRY_ES\": \"Estonia\", \"COUNTRY_FR\": \"Estonie\"},\n", - " {\"GID_0\": \"ETH\", \"COUNTRY_ES\": \"Etiopía\", \"COUNTRY_FR\": \"Éthiopie\"},\n", - " {\"GID_0\": \"FLK\", \"COUNTRY_ES\": \"Islas Malvinas\", \"COUNTRY_FR\": \"Îles Malouines\"},\n", - " {\"GID_0\": \"FRO\", \"COUNTRY_ES\": \"Islas Feroe\", \"COUNTRY_FR\": \"Îles Féroé\"},\n", - " {\"GID_0\": \"FJI\", \"COUNTRY_ES\": \"Fiyi\", \"COUNTRY_FR\": \"Fidji\"},\n", - " {\"GID_0\": \"FIN\", \"COUNTRY_ES\": \"Finlandia\", \"COUNTRY_FR\": \"Finlande\"},\n", - " {\"GID_0\": \"FRA\", \"COUNTRY_ES\": \"Francia\", \"COUNTRY_FR\": \"France\"},\n", - " {\"GID_0\": \"GUF\", \"COUNTRY_ES\": \"Guayana Francesa\", \"COUNTRY_FR\": \"Guyane française\"},\n", - " {\"GID_0\": \"PYF\", \"COUNTRY_ES\": \"Polinesia Francesa\", \"COUNTRY_FR\": \"Polynésie française\"},\n", - " {\"GID_0\": \"ATF\", \"COUNTRY_ES\": \"Territorios Australes Franceses\", \"COUNTRY_FR\": \"Terres australes françaises\"},\n", - " {\"GID_0\": \"GAB\", \"COUNTRY_ES\": \"Gabón\", \"COUNTRY_FR\": \"Gabon\"},\n", - " {\"GID_0\": \"GMB\", \"COUNTRY_ES\": \"Gambia\", \"COUNTRY_FR\": \"Gambie\"},\n", - " {\"GID_0\": \"GEO\", \"COUNTRY_ES\": \"Georgia\", \"COUNTRY_FR\": \"Géorgie\"},\n", - " {\"GID_0\": \"DEU\", \"COUNTRY_ES\": \"Alemania\", \"COUNTRY_FR\": \"Allemagne\"},\n", - " {\"GID_0\": \"GHA\", \"COUNTRY_ES\": \"Ghana\", \"COUNTRY_FR\": \"Ghana\"},\n", - " {\"GID_0\": \"GIB\", \"COUNTRY_ES\": \"Gibraltar\", \"COUNTRY_FR\": \"Gibraltar\"},\n", - " {\"GID_0\": \"GRC\", \"COUNTRY_ES\": \"Grecia\", \"COUNTRY_FR\": \"Grèce\"},\n", - " {\"GID_0\": \"GRL\", \"COUNTRY_ES\": \"Groenlandia\", \"COUNTRY_FR\": \"Groenland\"},\n", - " {\"GID_0\": \"GRD\", \"COUNTRY_ES\": \"Granada\", \"COUNTRY_FR\": \"Grenade\"},\n", - " {\"GID_0\": \"GLP\", \"COUNTRY_ES\": \"Guadalupe\", \"COUNTRY_FR\": \"Guadeloupe\"},\n", - " {\"GID_0\": \"GUM\", \"COUNTRY_ES\": \"Guam\", \"COUNTRY_FR\": \"Guam\"},\n", - " {\"GID_0\": \"GTM\", \"COUNTRY_ES\": \"Guatemala\", \"COUNTRY_FR\": \"Guatemala\"},\n", - " {\"GID_0\": \"GGY\", \"COUNTRY_ES\": \"Guernesey\", \"COUNTRY_FR\": \"Guernesey\"},\n", - " {\"GID_0\": \"GIN\", \"COUNTRY_ES\": \"Guinea\", \"COUNTRY_FR\": \"Guinée\"},\n", - " {\"GID_0\": \"GNB\", \"COUNTRY_ES\": \"Guinea-Bisáu\", \"COUNTRY_FR\": \"Guinée-Bissau\"},\n", - " {\"GID_0\": \"GUY\", \"COUNTRY_ES\": \"Guyana\", \"COUNTRY_FR\": \"Guyana\"},\n", - " {\"GID_0\": \"HTI\", \"COUNTRY_ES\": \"Haití\", \"COUNTRY_FR\": \"Haïti\"},\n", - " {\"GID_0\": \"HMD\", \"COUNTRY_ES\": \"Isla Heard y McDonald\", \"COUNTRY_FR\": \"Île Heard et îles McDonald\"}, \n", - " {\"GID_0\": \"HND\", \"COUNTRY_ES\": \"Honduras\", \"COUNTRY_FR\": \"Honduras\"},\n", - " {\"GID_0\": \"HUN\", \"COUNTRY_ES\": \"Hungría\", \"COUNTRY_FR\": \"Hongrie\"},\n", - " {\"GID_0\": \"ISL\", \"COUNTRY_ES\": \"Islandia\", \"COUNTRY_FR\": \"Islande\"},\n", - " {\"GID_0\": \"IND\", \"COUNTRY_ES\": \"India\", \"COUNTRY_FR\": \"Inde\"},\n", - " {\"GID_0\": \"IDN\", \"COUNTRY_ES\": \"Indonesia\", \"COUNTRY_FR\": \"Indonésie\"},\n", - " {\"GID_0\": \"IRN\", \"COUNTRY_ES\": \"Irán\", \"COUNTRY_FR\": \"Iran\"},\n", - " {\"GID_0\": \"IRQ\", \"COUNTRY_ES\": \"Irak\", \"COUNTRY_FR\": \"Irak\"},\n", - " {\"GID_0\": \"IRL\", \"COUNTRY_ES\": \"Irlanda\", \"COUNTRY_FR\": \"Irlande\"},\n", - " {\"GID_0\": \"IMN\", \"COUNTRY_ES\": \"Isla de Man\", \"COUNTRY_FR\": \"Île de Man\"},\n", - " {\"GID_0\": \"ISR\", \"COUNTRY_ES\": \"Israel\", \"COUNTRY_FR\": \"Israël\"},\n", - " {\"GID_0\": \"ITA\", \"COUNTRY_ES\": \"Italia\", \"COUNTRY_FR\": \"Italie\"},\n", - " {\"GID_0\": \"JAM\", \"COUNTRY_ES\": \"Jamaica\", \"COUNTRY_FR\": \"Jamaïque\"},\n", - " {\"GID_0\": \"JPN\", \"COUNTRY_ES\": \"Japón\", \"COUNTRY_FR\": \"Japon\"},\n", - " {\"GID_0\": \"JEY\", \"COUNTRY_ES\": \"Jersey\", \"COUNTRY_FR\": \"Jersey\"},\n", - " {\"GID_0\": \"JOR\", \"COUNTRY_ES\": \"Jordania\", \"COUNTRY_FR\": \"Jordanie\"},\n", - " {\"GID_0\": \"KAZ\", \"COUNTRY_ES\": \"Kazajistán\", \"COUNTRY_FR\": \"Kazakhstan\"},\n", - " {\"GID_0\": \"KEN\", \"COUNTRY_ES\": \"Kenia\", \"COUNTRY_FR\": \"Kenya\"},\n", - " {\"GID_0\": \"KIR\", \"COUNTRY_ES\": \"Kiribati\", \"COUNTRY_FR\": \"Kiribati\"},\n", - " {\"GID_0\": \"XKO\", \"COUNTRY_ES\": \"Kosovo\", \"COUNTRY_FR\": \"Kosovo\"},\n", - " {\"GID_0\": \"KWT\", \"COUNTRY_ES\": \"Kuwait\", \"COUNTRY_FR\": \"Koweït\"},\n", - " {\"GID_0\": \"KGZ\", \"COUNTRY_ES\": \"Kirguistán\", \"COUNTRY_FR\": \"Kirghizistan\"},\n", - " {\"GID_0\": \"LAO\", \"COUNTRY_ES\": \"Laos\", \"COUNTRY_FR\": \"Laos\"},\n", - " {\"GID_0\": \"LVA\", \"COUNTRY_ES\": \"Letonia\", \"COUNTRY_FR\": \"Lettonie\"},\n", - " {\"GID_0\": \"LBN\", \"COUNTRY_ES\": \"Líbano\", \"COUNTRY_FR\": \"Liban\"},\n", - " {\"GID_0\": \"LSO\", \"COUNTRY_ES\": \"Lesoto\", \"COUNTRY_FR\": \"Lesotho\"},\n", - " {\"GID_0\": \"LBR\", \"COUNTRY_ES\": \"Liberia\", \"COUNTRY_FR\": \"Liberia\"},\n", - " {\"GID_0\": \"LBY\", \"COUNTRY_ES\": \"Libia\", \"COUNTRY_FR\": \"Libye\"},\n", - " {\"GID_0\": \"LIE\", \"COUNTRY_ES\": \"Liechtenstein\", \"COUNTRY_FR\": \"Liechtenstein\"},\n", - " {\"GID_0\": \"LTU\", \"COUNTRY_ES\": \"Lituania\", \"COUNTRY_FR\": \"Lituanie\"},\n", - " {\"GID_0\": \"LUX\", \"COUNTRY_ES\": \"Luxemburgo\", \"COUNTRY_FR\": \"Luxembourg\"},\n", - " {\"GID_0\": \"MDG\", \"COUNTRY_ES\": \"Madagascar\", \"COUNTRY_FR\": \"Madagascar\"},\n", - " {\"GID_0\": \"MWI\", \"COUNTRY_ES\": \"Malaui\", \"COUNTRY_FR\": \"Malawi\"},\n", - " {\"GID_0\": \"MYS\", \"COUNTRY_ES\": \"Malasia\", \"COUNTRY_FR\": \"Malaisie\"},\n", - " {\"GID_0\": \"MDV\", \"COUNTRY_ES\": \"Maldivas\", \"COUNTRY_FR\": \"Maldives\"},\n", - " {\"GID_0\": \"MLI\", \"COUNTRY_ES\": \"Malí\", \"COUNTRY_FR\": \"Mali\"},\n", - " {\"GID_0\": \"MLT\", \"COUNTRY_ES\": \"Malta\", \"COUNTRY_FR\": \"Malte\"},\n", - " {\"GID_0\": \"MHL\", \"COUNTRY_ES\": \"Islas Marshall\", \"COUNTRY_FR\": \"Îles Marshall\"},\n", - " {\"GID_0\": \"MTQ\", \"COUNTRY_ES\": \"Martinica\", \"COUNTRY_FR\": \"Martinique\"},\n", - " {\"GID_0\": \"MRT\", \"COUNTRY_ES\": \"Mauritania\", \"COUNTRY_FR\": \"Mauritanie\"},\n", - " {\"GID_0\": \"MUS\", \"COUNTRY_ES\": \"Mauricio\", \"COUNTRY_FR\": \"Maurice\"},\n", - " {\"GID_0\": \"MYT\", \"COUNTRY_ES\": \"Mayotte\", \"COUNTRY_FR\": \"Mayotte\"},\n", - " {\"GID_0\": \"FSM\", \"COUNTRY_ES\": \"Micronesia\", \"COUNTRY_FR\": \"Micronésie\"},\n", - " {\"GID_0\": \"MDA\", \"COUNTRY_ES\": \"Moldavia\", \"COUNTRY_FR\": \"Moldavie\"},\n", - " {\"GID_0\": \"MCO\", \"COUNTRY_ES\": \"Mónaco\", \"COUNTRY_FR\": \"Monaco\"},\n", - " {\"GID_0\": \"MNG\", \"COUNTRY_ES\": \"Mongolia\", \"COUNTRY_FR\": \"Mongolie\"},\n", - " {\"GID_0\": \"MNE\", \"COUNTRY_ES\": \"Montenegro\", \"COUNTRY_FR\": \"Monténégro\"},\n", - " {\"GID_0\": \"MSR\", \"COUNTRY_ES\": \"Montserrat\", \"COUNTRY_FR\": \"Montserrat\"},\n", - " {\"GID_0\": \"MAR\", \"COUNTRY_ES\": \"Marruecos\", \"COUNTRY_FR\": \"Maroc\"},\n", - " {\"GID_0\": \"MOZ\", \"COUNTRY_ES\": \"Mozambique\", \"COUNTRY_FR\": \"Mozambique\"},\n", - " {\"GID_0\": \"MMR\", \"COUNTRY_ES\": \"Myanmar\", \"COUNTRY_FR\": \"Myanmar\"},\n", - " {\"GID_0\": \"MEX\", \"COUNTRY_ES\": \"México\", \"COUNTRY_FR\": \"Mexique\"},\n", - " {\"GID_0\": \"NAM\", \"COUNTRY_ES\": \"Namibia\", \"COUNTRY_FR\": \"Namibie\"},\n", - " {\"GID_0\": \"NRU\", \"COUNTRY_ES\": \"Nauru\", \"COUNTRY_FR\": \"Nauru\"},\n", - " {\"GID_0\": \"NPL\", \"COUNTRY_ES\": \"Nepal\", \"COUNTRY_FR\": \"Népal\"},\n", - " {\"GID_0\": \"NLD\", \"COUNTRY_ES\": \"Países Bajos\", \"COUNTRY_FR\": \"Pays-Bas\"},\n", - " {\"GID_0\": \"NCL\", \"COUNTRY_ES\": \"Nueva Caledonia\", \"COUNTRY_FR\": \"Nouvelle-Calédonie\"},\n", - " {\"GID_0\": \"NZL\", \"COUNTRY_ES\": \"Nueva Zelanda\", \"COUNTRY_FR\": \"Nouvelle-Zélande\"},\n", - " {\"GID_0\": \"NIC\", \"COUNTRY_ES\": \"Nicaragua\", \"COUNTRY_FR\": \"Nicaragua\"},\n", - " {\"GID_0\": \"NER\", \"COUNTRY_ES\": \"Níger\", \"COUNTRY_FR\": \"Niger\"},\n", - " {\"GID_0\": \"NGA\", \"COUNTRY_ES\": \"Nigeria\", \"COUNTRY_FR\": \"Nigéria\"},\n", - " {\"GID_0\": \"NIU\", \"COUNTRY_ES\": \"Niue\", \"COUNTRY_FR\": \"Niue\"},\n", - " {\"GID_0\": \"NFK\", \"COUNTRY_ES\": \"Isla Norfolk\", \"COUNTRY_FR\": \"Île Norfolk\"},\n", - " {\"GID_0\": \"PRK\", \"COUNTRY_ES\": \"Corea del Norte\", \"COUNTRY_FR\": \"Corée du Nord\"},\n", - " {\"GID_0\": \"MKD\", \"COUNTRY_ES\": \"Macedonia del Norte\", \"COUNTRY_FR\": \"Macédoine du Nord\"},\n", - " {\"GID_0\": \"ZNC\", \"COUNTRY_ES\": \"Chipre del Norte\", \"COUNTRY_FR\": \"Chypre du Nord\"},\n", - " {\"GID_0\": \"MNP\", \"COUNTRY_ES\": \"Islas Marianas del Norte\", \"COUNTRY_FR\": \"Îles Mariannes du Nord\"},\n", - " {\"GID_0\": \"NOR\", \"COUNTRY_ES\": \"Noruega\", \"COUNTRY_FR\": \"Norvège\"},\n", - " {\"GID_0\": \"OMN\", \"COUNTRY_ES\": \"Omán\", \"COUNTRY_FR\": \"Oman\"},\n", - " {\"GID_0\": \"PAK\", \"COUNTRY_ES\": \"Pakistán\", \"COUNTRY_FR\": \"Pakistan\"},\n", - " {\"GID_0\": \"PLW\", \"COUNTRY_ES\": \"Palaos\", \"COUNTRY_FR\": \"Palaos\"},\n", - " {\"GID_0\": \"PSE\", \"COUNTRY_ES\": \"Palestina\", \"COUNTRY_FR\": \"Palestine\"},\n", - " {\"GID_0\": \"PAN\", \"COUNTRY_ES\": \"Panamá\", \"COUNTRY_FR\": \"Panama\"},\n", - " {\"GID_0\": \"PNG\", \"COUNTRY_ES\": \"Papúa Nueva Guinea\", \"COUNTRY_FR\": \"Papouasie-Nouvelle-Guinée\"},\n", - " {\"GID_0\": \"XPI\", \"COUNTRY_ES\": \"Islas Paracelso\", \"COUNTRY_FR\": \"Îles Paracels\"},\n", - " {\"GID_0\": \"PRY\", \"COUNTRY_ES\": \"Paraguay\", \"COUNTRY_FR\": \"Paraguay\"},\n", - " {\"GID_0\": \"PER\", \"COUNTRY_ES\": \"Perú\", \"COUNTRY_FR\": \"Pérou\"},\n", - " {\"GID_0\": \"PHL\", \"COUNTRY_ES\": \"Filipinas\", \"COUNTRY_FR\": \"Philippines\"},\n", - " {\"GID_0\": \"PCN\", \"COUNTRY_ES\": \"Islas Pitcairn\", \"COUNTRY_FR\": \"Îles Pitcairn\"},\n", - " {\"GID_0\": \"POL\", \"COUNTRY_ES\": \"Polonia\", \"COUNTRY_FR\": \"Pologne\"},\n", - " {\"GID_0\": \"PRT\", \"COUNTRY_ES\": \"Portugal\", \"COUNTRY_FR\": \"Portugal\"},\n", - " {\"GID_0\": \"PRI\", \"COUNTRY_ES\": \"Puerto Rico\", \"COUNTRY_FR\": \"Porto Rico\"},\n", - " {\"GID_0\": \"QAT\", \"COUNTRY_ES\": \"Catar\", \"COUNTRY_FR\": \"Qatar\"},\n", - " {\"GID_0\": \"COG\", \"COUNTRY_ES\": \"República del Congo\", \"COUNTRY_FR\": \"République du Congo\"},\n", - " {\"GID_0\": \"ROU\", \"COUNTRY_ES\": \"Rumania\", \"COUNTRY_FR\": \"Roumanie\"},\n", - " {\"GID_0\": \"RUS\", \"COUNTRY_ES\": \"Rusia\", \"COUNTRY_FR\": \"Russie\"},\n", - " {\"GID_0\": \"RWA\", \"COUNTRY_ES\": \"Ruanda\", \"COUNTRY_FR\": \"Rwanda\"},\n", - " {\"GID_0\": \"REU\", \"COUNTRY_ES\": \"Reunión\", \"COUNTRY_FR\": \"La Réunion\"},\n", - " {\"GID_0\": \"SHN\", \"COUNTRY_ES\": \"Santa Elena, Ascensión y Tristán de Acuña\", \"COUNTRY_FR\": \"Sainte-Hélène, Ascension et Tristan da Cunha\"},\n", - " {\"GID_0\": \"KNA\", \"COUNTRY_ES\": \"San Cristóbal y Nieves\", \"COUNTRY_FR\": \"Saint-Christophe-et-Niévès\"},\n", - " {\"GID_0\": \"LCA\", \"COUNTRY_ES\": \"Santa Lucía\", \"COUNTRY_FR\": \"Sainte-Lucie\"},\n", - " {\"GID_0\": \"SPM\", \"COUNTRY_ES\": \"San Pedro y Miquelón\", \"COUNTRY_FR\": \"Saint-Pierre-et-Miquelon\"},\n", - " {\"GID_0\": \"VCT\", \"COUNTRY_ES\": \"San Vicente y las Granadinas\", \"COUNTRY_FR\": \"Saint-Vincent-et-les-Grenadines\"},\n", - " {\"GID_0\": \"BLM\", \"COUNTRY_ES\": \"San Bartolomé\", \"COUNTRY_FR\": \"Saint-Barthélemy\"},\n", - " {\"GID_0\": \"MAF\", \"COUNTRY_ES\": \"San Martín\", \"COUNTRY_FR\": \"Saint-Martin\"},\n", - " {\"GID_0\": \"WSM\", \"COUNTRY_ES\": \"Samoa\", \"COUNTRY_FR\": \"Samoa\"},\n", - " {\"GID_0\": \"SMR\", \"COUNTRY_ES\": \"San Marino\", \"COUNTRY_FR\": \"Saint-Marin\"},\n", - " {\"GID_0\": \"SAU\", \"COUNTRY_ES\": \"Arabia Saudita\", \"COUNTRY_FR\": \"Arabie Saoudite\"},\n", - " {\"GID_0\": \"SEN\", \"COUNTRY_ES\": \"Senegal\", \"COUNTRY_FR\": \"Sénégal\"},\n", - " {\"GID_0\": \"SRB\", \"COUNTRY_ES\": \"Serbia\", \"COUNTRY_FR\": \"Serbie\"},\n", - " {\"GID_0\": \"SYC\", \"COUNTRY_ES\": \"Seychelles\", \"COUNTRY_FR\": \"Seychelles\"},\n", - " {\"GID_0\": \"SLE\", \"COUNTRY_ES\": \"Sierra Leona\", \"COUNTRY_FR\": \"Sierra Leone\"},\n", - " {\"GID_0\": \"SGP\", \"COUNTRY_ES\": \"Singapur\", \"COUNTRY_FR\": \"Singapour\"},\n", - " {\"GID_0\": \"SXM\", \"COUNTRY_ES\": \"Sint Maarten\", \"COUNTRY_FR\": \"Saint-Martin\"},\n", - " {\"GID_0\": \"SVK\", \"COUNTRY_ES\": \"Eslovaquia\", \"COUNTRY_FR\": \"Slovaquie\"},\n", - " {\"GID_0\": \"SVN\", \"COUNTRY_ES\": \"Eslovenia\", \"COUNTRY_FR\": \"Slovénie\"},\n", - " {\"GID_0\": \"SLB\", \"COUNTRY_ES\": \"Islas Salomón\", \"COUNTRY_FR\": \"Îles Salomon\"},\n", - " {\"GID_0\": \"SOM\", \"COUNTRY_ES\": \"Somalia\", \"COUNTRY_FR\": \"Somalie\"},\n", - " {\"GID_0\": \"ZAF\", \"COUNTRY_ES\": \"Sudáfrica\", \"COUNTRY_FR\": \"Afrique du Sud\"},\n", - " {\"GID_0\": \"SGS\", \"COUNTRY_ES\": \"Georgia del Sur y las Islas Sandwich del Sur\", \"COUNTRY_FR\": \"Géorgie du Sud et îles Sandwich du Sud\"},\n", - " {\"GID_0\": \"KOR\", \"COUNTRY_ES\": \"Corea del Sur\", \"COUNTRY_FR\": \"Corée du Sud\"},\n", - " {\"GID_0\": \"SSD\", \"COUNTRY_ES\": \"Sudán del Sur\", \"COUNTRY_FR\": \"Soudan du Sud\"},\n", - " {\"GID_0\": \"ESP\", \"COUNTRY_ES\": \"España\", \"COUNTRY_FR\": \"Espagne\"},\n", - " {\"GID_0\": \"XSP\", \"COUNTRY_ES\": \"Islas Spratly\", \"COUNTRY_FR\": \"Îles Spratleys\"},\n", - " {\"GID_0\": \"LKA\", \"COUNTRY_ES\": \"Sri Lanka\", \"COUNTRY_FR\": \"Sri Lanka\"},\n", - " {\"GID_0\": \"SDN\", \"COUNTRY_ES\": \"Sudán\", \"COUNTRY_FR\": \"Soudan\"},\n", - " {\"GID_0\": \"SUR\", \"COUNTRY_ES\": \"Surinam\", \"COUNTRY_FR\": \"Suriname\"},\n", - " {\"GID_0\": \"SJM\", \"COUNTRY_ES\": \"Svalbard y Jan Mayen\", \"COUNTRY_FR\": \"Svalbard et Jan Mayen\"},\n", - " {\"GID_0\": \"SWZ\", \"COUNTRY_ES\": \"Suazilandia\", \"COUNTRY_FR\": \"Swaziland\"},\n", - " {\"GID_0\": \"SWE\", \"COUNTRY_ES\": \"Suecia\", \"COUNTRY_FR\": \"Suède\"},\n", - " {\"GID_0\": \"CHE\", \"COUNTRY_ES\": \"Suiza\", \"COUNTRY_FR\": \"Suisse\"},\n", - " {\"GID_0\": \"SYR\", \"COUNTRY_ES\": \"Siria\", \"COUNTRY_FR\": \"Syrie\"},\n", - " {\"GID_0\": \"STP\", \"COUNTRY_ES\": \"Santo Tomé y Príncipe\", \"COUNTRY_FR\": \"São Tomé-et-Principe\"},\n", - " {\"GID_0\": \"TWN\", \"COUNTRY_ES\": \"Taiwán\", \"COUNTRY_FR\": \"Taïwan\"},\n", - " {\"GID_0\": \"TJK\", \"COUNTRY_ES\": \"Tayikistán\", \"COUNTRY_FR\": \"Tadjikistan\"},\n", - " {\"GID_0\": \"TZA\", \"COUNTRY_ES\": \"Tanzania\", \"COUNTRY_FR\": \"Tanzanie\"},\n", - " {\"GID_0\": \"THA\", \"COUNTRY_ES\": \"Tailandia\", \"COUNTRY_FR\": \"Thaïlande\"},\n", - " {\"GID_0\": \"TLS\", \"COUNTRY_ES\": \"Timor-Leste\", \"COUNTRY_FR\": \"Timor-Leste\"},\n", - " {\"GID_0\": \"TGO\", \"COUNTRY_ES\": \"Togo\", \"COUNTRY_FR\": \"Togo\"},\n", - " {\"GID_0\": \"TKL\", \"COUNTRY_ES\": \"Tokelau\", \"COUNTRY_FR\": \"Tokelau\"},\n", - " {\"GID_0\": \"TON\", \"COUNTRY_ES\": \"Tonga\", \"COUNTRY_FR\": \"Tonga\"},\n", - " {\"GID_0\": \"TTO\", \"COUNTRY_ES\": \"Trinidad y Tobago\", \"COUNTRY_FR\": \"Trinité-et-Tobago\"},\n", - " {\"GID_0\": \"TUN\", \"COUNTRY_ES\": \"Túnez\", \"COUNTRY_FR\": \"Tunisie\"},\n", - " {\"GID_0\": \"TUR\", \"COUNTRY_ES\": \"Turquía\", \"COUNTRY_FR\": \"Turquie\"},\n", - " {\"GID_0\": \"TKM\", \"COUNTRY_ES\": \"Turkmenistán\", \"COUNTRY_FR\": \"Turkménistan\"},\n", - " {\"GID_0\": \"TCA\", \"COUNTRY_ES\": \"Islas Turcas y Caicos\", \"COUNTRY_FR\": \"Îles Turques-et-Caïques\"},\n", - " {\"GID_0\": \"TUV\", \"COUNTRY_ES\": \"Tuvalu\", \"COUNTRY_FR\": \"Tuvalu\"},\n", - " {\"GID_0\": \"UGA\", \"COUNTRY_ES\": \"Uganda\", \"COUNTRY_FR\": \"Ouganda\"},\n", - " {\"GID_0\": \"UKR\", \"COUNTRY_ES\": \"Ucrania\", \"COUNTRY_FR\": \"Ukraine\"},\n", - " {\"GID_0\": \"ARE\", \"COUNTRY_ES\": \"Emiratos Árabes Unidos\", \"COUNTRY_FR\": \"Émirats Arabes Unis\"},\n", - " {\"GID_0\": \"GBR\", \"COUNTRY_ES\": \"Reino Unido\", \"COUNTRY_FR\": \"Royaume-Uni\"},\n", - " {\"GID_0\": \"USA\", \"COUNTRY_ES\": \"Estados Unidos\", \"COUNTRY_FR\": \"États-Unis\"},\n", - " {\"GID_0\": \"UMI\", \"COUNTRY_ES\": \"Islas Ultramarinas Menores de los Estados Unidos\", \"COUNTRY_FR\": \"Îles mineures éloignées des États-Unis\"},\n", - " {\"GID_0\": \"URY\", \"COUNTRY_ES\": \"Uruguay\", \"COUNTRY_FR\": \"Uruguay\"},\n", - " {\"GID_0\": \"UZB\", \"COUNTRY_ES\": \"Uzbekistán\", \"COUNTRY_FR\": \"Ouzbékistan\"},\n", - " {\"GID_0\": \"VUT\", \"COUNTRY_ES\": \"Vanuatu\", \"COUNTRY_FR\": \"Vanuatu\"},\n", - " {\"GID_0\": \"VAT\", \"COUNTRY_ES\": \"Ciudad del Vaticano\", \"COUNTRY_FR\": \"Cité du Vatican\"},\n", - " {\"GID_0\": \"VEN\", \"COUNTRY_ES\": \"Venezuela\", \"COUNTRY_FR\": \"Venezuela\"},\n", - " {\"GID_0\": \"VNM\", \"COUNTRY_ES\": \"Vietnam\", \"COUNTRY_FR\": \"Vietnam\"},\n", - " {\"GID_0\": \"VIR\", \"COUNTRY_ES\": \"Islas Vírgenes de los Estados Unidos\", \"COUNTRY_FR\": \"Îles Vierges des États-Unis\"},\n", - " {\"GID_0\": \"WLF\", \"COUNTRY_ES\": \"Wallis y Futuna\", \"COUNTRY_FR\": \"Wallis-et-Futuna\"},\n", - " {\"GID_0\": \"ESH\", \"COUNTRY_ES\": \"Sahara Occidental\", \"COUNTRY_FR\": \"Sahara occidental\"},\n", - " {\"GID_0\": \"YEM\", \"COUNTRY_ES\": \"Yemen\", \"COUNTRY_FR\": \"Yémen\"},\n", - " {\"GID_0\": \"ZMB\", \"COUNTRY_ES\": \"Zambia\", \"COUNTRY_FR\": \"Zambie\"},\n", - " {\"GID_0\": \"ZWE\", \"COUNTRY_ES\": \"Zimbabue\", \"COUNTRY_FR\": \"Zimbabwe\"},\n", - " {\"GID_0\": \"ALA\", \"COUNTRY_ES\": \"Islas Åland\", \"COUNTRY_FR\": \"Îles Åland\"}]\n", - "\n", - "df = pd.DataFrame(data)" - ] - }, - { - "cell_type": "code", - "execution_count": 286, + "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Found GeoPackage: /home/mambauser/data/gadm/raw/temp_preprocess/gadm_410-levels/gadm_410-levels.gpkg\n", + "Found GeoPackage: /home/sofia/dev/skytruth-30x30/data/data/gadm/raw/temp_preprocess/gadm_410-levels/gadm_410-levels.gpkg\n", "Selected layer: ADM_0\n" ] } @@ -735,7 +424,7 @@ }, { "cell_type": "code", - "execution_count": 287, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -841,7 +530,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -854,7 +543,53 @@ }, { "cell_type": "code", - "execution_count": 309, + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Download country translations\n", + "working_folder = FileConventionHandler(pipe)\n", + "input_path = working_folder.pipe_raw_path\n", + "input_path\n", + "\n", + "translations_csv_url = \"vizzuality_processed_data/gadm/preprocess/locations_translated.csv\"\n", + "translations_csv_output = input_path.joinpath(translations_csv_url.split(\"/\")[-1])\n", + "\n", + "writeReadGCP(\n", + " credentials=mysettings.GCS_KEYFILE_JSON,\n", + " bucket_name=mysettings.GCS_BUCKET,\n", + " blob_name=translations_csv_url,\n", + " file=translations_csv_output,\n", + " operation=\"r\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "def add_translations(df, translations_csv_path):\n", + " translations_df = pd.read_csv(translations_csv_path, keep_default_na=False, na_values=[])\n", + " \n", + " df = df.merge(translations_df[['code', 'name_es', 'name_fr']], left_on='GID_0', right_on='code', how='left')\n", + " \n", + " return df" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "translations_path = input_path.joinpath('locations_translated.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 22, "metadata": {}, "outputs": [ { @@ -882,8 +617,8 @@ " geometry\n", " GID_0\n", " area_km2\n", - " COUNTRY_ES\n", - " COUNTRY_FR\n", + " name_es\n", + " name_fr\n", " \n", " \n", " \n", @@ -1005,7 +740,7 @@ "204 Zambia POLYGON ((25.87834 -17.97218, 25.87034 -17.970... \n", "205 Zimbabwe POLYGON ((32.70425 -18.96022, 32.70537 -18.965... \n", "\n", - " GID_0 area_km2 COUNTRY_ES COUNTRY_FR \n", + " GID_0 area_km2 name_es name_fr \n", "0 AFG 644050.28 Afganistán Afghanistan \n", "1 XAD 233.64 Akrotiri y Dhekelia Akrotiri et Dhekelia \n", "2 ALB 28689.62 Albania Albanie \n", @@ -1021,29 +756,36 @@ "[206 rows x 6 columns]" ] }, - "execution_count": 309, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# Add translations\n", - "gdf_updated = gdf_updated.merge(df, on='GID_0')\n", - "gdf_updated" + "gdf_translated = add_translations(gdf_updated, translations_path).drop(columns=['code'])\n", + "gdf_translated" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 206/206 [05:43<00:00, 1.67s/it]\n" + ] + } + ], "source": [ "final_gadm = await simplify_async(gdf_updated)" ] }, { "cell_type": "code", - "execution_count": 312, + "execution_count": 24, "metadata": {}, "outputs": [], "source": [ @@ -1053,7 +795,7 @@ }, { "cell_type": "code", - "execution_count": 313, + "execution_count": 25, "metadata": {}, "outputs": [], "source": [ @@ -1063,7 +805,7 @@ }, { "cell_type": "code", - "execution_count": 314, + "execution_count": 26, "metadata": {}, "outputs": [], "source": [ @@ -1086,7 +828,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -1097,7 +839,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -1108,7 +850,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -1124,7 +866,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ @@ -1139,7 +881,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -1180,26 +922,26 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_160/3601108936.py:5: UserWarning: Column names longer than 10 characters will be truncated when saved to ESRI Shapefile.\n", + "/tmp/ipykernel_3376415/3601108936.py:5: UserWarning: Column names longer than 10 characters will be truncated when saved to ESRI Shapefile.\n", " ).to_file(filename=output_file.as_posix(), driver=\"ESRI Shapefile\", encoding=\"utf-8\")\n", - "/opt/conda/lib/python3.12/site-packages/pyogrio/raw.py:709: RuntimeWarning: Normalized/laundered field name: 'mpa_zone_id' to 'mpa_zone_i'\n", + "/home/sofia/miniforge3/envs/skytruth/lib/python3.12/site-packages/pyogrio/raw.py:709: RuntimeWarning: Normalized/laundered field name: 'mpa_zone_id' to 'mpa_zone_i'\n", " ogr_write(\n", - "/opt/conda/lib/python3.12/site-packages/pyogrio/raw.py:709: RuntimeWarning: Normalized/laundered field name: 'designation' to 'designatio'\n", + "/home/sofia/miniforge3/envs/skytruth/lib/python3.12/site-packages/pyogrio/raw.py:709: RuntimeWarning: Normalized/laundered field name: 'designation' to 'designatio'\n", " ogr_write(\n", - "/opt/conda/lib/python3.12/site-packages/pyogrio/raw.py:709: RuntimeWarning: Normalized/laundered field name: 'location_id' to 'location_i'\n", + "/home/sofia/miniforge3/envs/skytruth/lib/python3.12/site-packages/pyogrio/raw.py:709: RuntimeWarning: Normalized/laundered field name: 'location_id' to 'location_i'\n", " ogr_write(\n", - "/opt/conda/lib/python3.12/site-packages/pyogrio/raw.py:709: RuntimeWarning: Normalized/laundered field name: 'establishment_stage' to 'establishm'\n", + "/home/sofia/miniforge3/envs/skytruth/lib/python3.12/site-packages/pyogrio/raw.py:709: RuntimeWarning: Normalized/laundered field name: 'establishment_stage' to 'establishm'\n", " ogr_write(\n", - "/opt/conda/lib/python3.12/site-packages/pyogrio/raw.py:709: RuntimeWarning: Normalized/laundered field name: 'protection_mpaguide_level' to 'protection'\n", + "/home/sofia/miniforge3/envs/skytruth/lib/python3.12/site-packages/pyogrio/raw.py:709: RuntimeWarning: Normalized/laundered field name: 'protection_mpaguide_level' to 'protection'\n", " ogr_write(\n", - "/opt/conda/lib/python3.12/site-packages/pyogrio/raw.py:709: RuntimeWarning: Normalized/laundered field name: 'protection_level' to 'protecti_1'\n", + "/home/sofia/miniforge3/envs/skytruth/lib/python3.12/site-packages/pyogrio/raw.py:709: RuntimeWarning: Normalized/laundered field name: 'protection_level' to 'protecti_1'\n", " ogr_write(\n" ] } @@ -1251,7 +993,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -1261,21 +1003,6 @@ "pipe = \"protectedseas\"" ] }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [], - "source": [ - "ps_csv_url = \"ProtectedSeas/ProtectedSeas_06142023.csv\"\n", - "ps_csv_output = input_path.joinpath(ps_csv_url.split(\"/\")[-1])\n", - "\n", - "ps_geometries_url = (\n", - " \"ProtectedSeas/ProtectedSeas_ProtectedSeas_06142023_shp_ProtectedSeas_06142023_shp.zip\"\n", - ")\n", - "ps_geometries_output = input_path.joinpath(ps_geometries_url.split(\"/\")[-1])" - ] - }, { "cell_type": "code", "execution_count": 24, @@ -1292,6 +1019,21 @@ "remote_path = working_folder.get_remote_path(step)" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ps_csv_url = \"ProtectedSeas/ProtectedSeas_06142023.csv\"\n", + "ps_csv_output = input_path.joinpath(ps_csv_url.split(\"/\")[-1])\n", + "\n", + "ps_geometries_url = (\n", + " \"ProtectedSeas/ProtectedSeas_ProtectedSeas_06142023_shp_ProtectedSeas_06142023_shp.zip\"\n", + ")\n", + "ps_geometries_output = input_path.joinpath(ps_geometries_url.split(\"/\")[-1])" + ] + }, { "cell_type": "code", "execution_count": 25, @@ -1339,18 +1081,9 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/mambauser/src/pipelines/processors.py:77: UserWarning: Boolean Series key will be reindexed to match DataFrame index.\n", - " return df[mask1][mask2].reset_index()\n" - ] - } - ], + "outputs": [], "source": [ "# transform data\n", "# TODO: Modify the preprocessing steps so we do not eliminate the geometries that does not intersect with MPAs - do to a change in the processing methodology\n", @@ -1431,7 +1164,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -1442,7 +1175,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -1457,34 +1190,27 @@ }, { "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "working_folder = FileConventionHandler(pipe)\n", - "input_path = working_folder.pipe_raw_path\n", - "temp_working_path = working_folder.get_temp_file_path(step)\n", - "\n", - "output_path = working_folder.get_processed_step_path(step)\n", - "output_file = working_folder.get_step_fmt_file_path(step, \"shp\")\n", - "zipped_output_file = working_folder.get_step_fmt_file_path(step, \"zip\", True)\n", - "remote_path = working_folder.get_remote_path(step)" - ] - }, - { - "cell_type": "code", - "execution_count": 14, + "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "{'id': 'marine-shp', 'title': 'WDPA_WDOECM_Aug2024_Public_marine_shp', 'url': 'https://d1gam3xoknrgr2.cloudfront.net/current/WDPA_WDOECM_Aug2024_Public_marine_shp.zip', 'hasFailed': False, 'token': 'marine'}\n" + "{'id': 'marine-shp', 'title': 'WDPA_WDOECM_Sep2024_Public_marine_shp', 'url': 'https://d1gam3xoknrgr2.cloudfront.net/current/WDPA_WDOECM_Sep2024_Public_marine_shp.zip', 'hasFailed': False, 'token': 'marine'}\n" ] } ], "source": [ + "working_folder = FileConventionHandler(pipe)\n", + "input_path = working_folder.pipe_raw_path\n", + "temp_working_path = working_folder.get_temp_file_path(step)\n", + "\n", + "output_path = working_folder.get_processed_step_path(step)\n", + "output_file = working_folder.get_step_fmt_file_path(step, \"shp\")\n", + "zipped_output_file = working_folder.get_step_fmt_file_path(step, \"zip\", True)\n", + "remote_path = working_folder.get_remote_path(step)\n", + "\n", "# download data\n", "r = requests.post(url=mpa_url, data=mpa_body)\n", "r.raise_for_status()\n", @@ -1503,7 +1229,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -1520,7 +1246,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -1568,7 +1294,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -1595,6 +1321,26 @@ ")" ] }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'vizzuality_processed_data/mpa/preprocess/mpa_preprocess.zip'" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "remote_path" + ] + }, { "cell_type": "code", "execution_count": 19, @@ -1615,7 +1361,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 37, "metadata": {}, "outputs": [], "source": [ @@ -1626,7 +1372,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 38, "metadata": {}, "outputs": [], "source": [ @@ -1641,7 +1387,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 39, "metadata": {}, "outputs": [], "source": [ @@ -1657,17 +1403,9 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'id': 'wdpa-shp', 'title': 'WDPA_Sep2024_Public_shp', 'url': 'https://d1gam3xoknrgr2.cloudfront.net/current/WDPA_Sep2024_Public_shp.zip', 'hasFailed': False, 'token': 'wdpa'}\n" - ] - } - ], + "outputs": [], "source": [ "# download data\n", "r = requests.post(url=mpa_url, data=mpa_body)\n", @@ -1687,7 +1425,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ @@ -1704,7 +1442,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -1753,19 +1491,368 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "final_wdpa_terrestrial = await simplify_async(gdf)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, + "execution_count": 17, "metadata": {}, - "outputs": [], - "source": [ + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 0%| | 85/292261 [00:00<14:27, 336.92it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 0%|▎ | 661/292261 [00:07<145:23:41, 1.79s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 1%|██ | 4100/292261 [00:10<00:50, 5746.09it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "'Polygon' object has no attribute 'geoms'\n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 5%|██████▋ | 13673/292261 [00:12<02:06, 2195.25it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 21%|█████████████████████████████▋ | 60200/292261 [00:20<01:26, 2696.80it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 22%|████████████████████████████████▏ | 65454/292261 [00:22<02:10, 1744.30it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 32%|██████████████████████████████████████████████▋ | 94837/292261 [00:27<00:41, 4733.72it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 34%|█████████████████████████████████████████████████ | 100205/292261 [00:28<00:47, 4031.78it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 36%|███████████████████████████████████████████████████▎ | 104962/292261 [00:29<00:43, 4304.70it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 41%|██████████████████████████████████████████████████████████▎ | 119249/292261 [00:32<00:35, 4855.21it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 46%|█████████████████████████████████████████████████████████████████▋ | 134186/292261 [00:35<00:52, 3035.13it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "'Polygon' object has no attribute 'geoms'\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 51%|████████████████████████████████████████████████████████████████████████▎ | 147855/292261 [00:38<00:42, 3390.00it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 53%|███████████████████████████████████████████████████████████████████████████▋ | 154770/292261 [00:40<00:37, 3677.63it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 54%|████████████████████████████████████████████████████████████████████████████▌ | 156511/292261 [00:40<01:10, 1915.83it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "'Polygon' object has no attribute 'geoms'\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 56%|███████████████████████████████████████████████████████████████████████████████▍ | 162422/292261 [00:42<00:31, 4181.06it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "'Polygon' object has no attribute 'geoms'\n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 58%|██████████████████████████████████████████████████████████████████████████████████▌ | 168614/292261 [00:43<00:55, 2234.02it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 68%|█████████████████████████████████████████████████████████████████████████████████████████████████▋ | 199564/292261 [00:49<00:19, 4753.37it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 70%|████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 205065/292261 [00:50<00:27, 3198.44it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 75%|███████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 219986/292261 [00:53<00:17, 4065.50it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 84%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 246638/292261 [00:58<00:10, 4368.04it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "'Polygon' object has no attribute 'geoms'\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 93%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 271598/292261 [01:03<00:06, 3242.58it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 94%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 275485/292261 [01:03<00:04, 3707.50it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 95%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 278285/292261 [01:04<00:02, 6507.33it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 290011/292261 [01:11<00:00, 5892.57it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 292261/292261 [03:13<00:00, 1507.66it/s]\n" + ] + } + ], + "source": [ + "final_wdpa_terrestrial = await simplify_async(gdf)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ "# save data & zip it\n", "final_wdpa_terrestrial.to_file(\n", " filename=output_file,\n", @@ -1777,41 +1864,690 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "PosixPath('/home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/preprocess/mpa-terrestrial_preprocess.gpkg')" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "output_file" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['0', '1'], dtype=object)" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "final_wdpa_terrestrial['MARINE'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# LOAD\n", + "## load zipped file to GCS\n", + "writeReadGCP(\n", + " credentials=mysettings.GCS_KEYFILE_JSON,\n", + " bucket_name=mysettings.GCS_BUCKET,\n", + " blob_name=remote_path,\n", + " file=output_file,\n", + " operation=\"w\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# clean unzipped files\n", + "rm_tree(temp_working_path) if temp_working_path.exists() else None\n", + "rm_tree(output_path) if output_path.exists() else None" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Protected planet intermediate all" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [], + "source": [ + "force_clean = True\n", + "step = \"preprocess\"\n", + "pipe = \"pa\"" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [], + "source": [ + "mpa_url = \"https://www.protectedplanet.net/downloads\"\n", + "mpa_body = {\n", + " \"domain\": \"general\",\n", + " \"format\": \"shp\",\n", + " \"token\": \"wdpa\",\n", + " \"id\": 76011,\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [], + "source": [ + "working_folder = FileConventionHandler(pipe)\n", + "input_path = working_folder.pipe_raw_path\n", + "temp_working_path = working_folder.get_temp_file_path(step)\n", + "\n", + "output_path = working_folder.get_processed_step_path(step)\n", + "output_file = working_folder.get_step_fmt_file_path(step, \"gpkg\")\n", + "zipped_output_file = working_folder.get_step_fmt_file_path(step, \"zip\", True)\n", + "remote_path = working_folder.get_remote_path(step)" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'id': 'wdpa-shp', 'title': 'WDPA_Sep2024_Public_shp', 'url': 'https://d1gam3xoknrgr2.cloudfront.net/current/WDPA_Sep2024_Public_shp.zip', 'hasFailed': False, 'token': 'wdpa'}\n" + ] + } + ], + "source": [ + "# download data\n", + "r = requests.post(url=mpa_url, data=mpa_body)\n", + "r.raise_for_status()\n", + "\n", + "download_url = r.json().get(\"url\")\n", + "input_file_name = f'{r.json().get(\"title\")}.zip'\n", + "print(r.json())\n", + "\n", + "# input_file = downloadFile(\n", + "# url=download_url,\n", + "# output_path=input_path,\n", + "# overwrite=force_clean,\n", + "# file=input_file_name,\n", + "# )" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [], + "source": [ + "# unzip file twice due how data is provisioned by protected planet\n", + "shutil.unpack_archive(\n", + " input_file,\n", + " temp_working_path,\n", + " \"zip\",\n", + ")\n", + "\n", + "for file in temp_working_path.glob(\"*.zip\"):\n", + " shutil.unpack_archive(file, temp_working_path.joinpath(file.stem), \"zip\")" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [], + "source": [ + "# load data & Transform it\n", + "unziped_folders = []\n", + "for file in temp_working_path.glob(\"*/*.shp\"):\n", + " df = (\n", + " gpd.read_file(file)\n", + " .pipe(filter_by_methodology)\n", + " .pipe(transform_points)\n", + " .pipe(clean_geometries)\n", + " )\n", + " unziped_folders.append(df)\n", + "\n", + "# merge datasets\n", + "gdf = gpd.GeoDataFrame(\n", + " pd.concat(unziped_folders, ignore_index=True),\n", + " crs=unziped_folders[0].crs,\n", + ")\n", + "\n", + "gdf.drop(\n", + " columns=list(\n", + " set(gdf.columns)\n", + " - set(\n", + " [\n", + " \"geometry\",\n", + " \"WDPAID\",\n", + " \"WDPA_PID\",\n", + " \"PA_DEF\",\n", + " \"NAME\",\n", + " \"PARENT_ISO\",\n", + " \"DESIG_ENG\",\n", + " \"IUCN_CAT\",\n", + " \"STATUS\",\n", + " \"STATUS_YR\",\n", + " \"GIS_AREA\",\n", + " \"GIS_M_AREA\",\n", + " \"MARINE\",\n", + " ]\n", + " )\n", + " ),\n", + " inplace=True,\n", + ")\n", + "gdf[\"WDPAID\"] = pd.to_numeric(gdf[\"WDPAID\"], downcast=\"integer\")" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 298912/298912 [03:53<00:00, 1277.78it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 1%|▉ | 1817/298912 [00:11<03:42, 1338.09it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 1%|█▉ | 3731/298912 [00:12<03:25, 1433.85it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "'Polygon' object has no attribute 'geoms'\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 1%|██▏ | 4223/298912 [00:12<01:23, 3536.39it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 4%|██████ | 11698/298912 [00:15<04:00, 1191.93it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 21%|████████████████████████████████ | 61318/298912 [00:27<03:03, 1298.19it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 22%|██████████████████████████████████▉ | 66972/298912 [00:29<02:28, 1566.84it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 32%|██████████████████████████████████████████████████▌ | 96777/298912 [00:35<01:04, 3139.64it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 34%|█████████████████████████████████████████████████████▏ | 102462/298912 [00:37<00:46, 4270.30it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 36%|███████████████████████████████████████████████████████▍ | 106818/298912 [00:38<01:33, 2059.87it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 41%|██████████████████████████████████████████████████████████████▉ | 121477/298912 [00:41<01:20, 2212.60it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 43%|███████████████████████████████████████████████████████████████████ | 129353/298912 [00:44<01:10, 2404.69it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "'Polygon' object has no attribute 'geoms'\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 46%|██████████████████████████████████████████████████████████████████████▊ | 136616/298912 [00:46<01:06, 2457.71it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "'Polygon' object has no attribute 'geoms'\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 50%|████████████████████████████████████████████████████████████████████████████▊ | 148130/298912 [00:50<01:02, 2399.55it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 51%|██████████████████████████████████████████████████████████████████████████████▍ | 151376/298912 [00:51<01:09, 2121.47it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 54%|███████████████████████████████████████████████████████████████████████████████████ | 160280/298912 [00:53<01:55, 1197.48it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "'Polygon' object has no attribute 'geoms'\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 55%|█████████████████████████████████████████████████████████████████████████████████████▌ | 164997/298912 [00:54<01:16, 1760.31it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "'Polygon' object has no attribute 'geoms'\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 56%|██████████████████████████████████████████████████████████████████████████████████████▍ | 166577/298912 [00:55<01:03, 2072.04it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 58%|█████████████████████████████████████████████████████████████████████████████████████████▌ | 172769/298912 [00:56<01:01, 2037.28it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 58%|██████████████████████████████████████████████████████████████████████████████████████████▎ | 174238/298912 [00:57<00:30, 4024.73it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 67%|████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 201035/298912 [01:03<00:17, 5566.19it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 69%|██████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 205073/298912 [01:04<00:17, 5454.62it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 70%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 210501/298912 [01:05<00:27, 3184.24it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 75%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 225210/298912 [01:08<00:17, 4259.37it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 76%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 225947/298912 [01:09<00:48, 1498.54it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 77%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 229699/298912 [01:10<00:23, 2896.14it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 84%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 252333/298912 [01:15<00:15, 3001.02it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "'Polygon' object has no attribute 'geoms'\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 93%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 277458/298912 [01:21<00:07, 2831.56it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 94%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 281117/298912 [01:22<00:12, 1449.83it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 95%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 284183/298912 [01:23<00:04, 3294.97it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 296106/298912 [01:32<00:00, 3532.30it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 298912/298912 [03:53<00:00, 2.59it/s]" + ] + } + ], + "source": [ + "final_wdpa = await simplify_async(gdf)" + ] + }, + { + "cell_type": "code", + "execution_count": 71, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "PosixPath('/home/mambauser/data/mpa-terrestrial/processed/preprocess/mpa-terrestrial_preprocess.gpkg')" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "output_file" + "# save data & zip it\n", + "final_wdpa.to_file(\n", + " filename=output_file,\n", + " driver=\"GPKG\",\n", + " layer=\"name\",\n", + " encoding=\"utf-8\",\n", + ")" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 75, "metadata": {}, - "outputs": [ - { - "ename": "", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n", - "\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n", - "\u001b[1;31mClick here for more info. \n", - "\u001b[1;31mView Jupyter log for further details." - ] - } - ], + "outputs": [], "source": [ "# LOAD\n", "## load zipped file to GCS\n", @@ -1924,20 +2660,9 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "PosixPath('/home/mambauser/data/habitats/raw/temp_preprocess')" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "temp_working_path" ] @@ -1953,202 +2678,9 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
PEAKIDDEPTHHEIGHTLONGLATAREA2DFILTERgeometry
026000-254711482.76250084.979736982.0283370POLYGON ((2.91249 84.82976, 2.76249 84.79636, ...
126157-308412969.14305684.935292348.4730550POLYGON ((9.99309 84.93526, 9.25139 84.82696, ...
226158-304313429.18333384.938070367.5403800POLYGON ((9.07499 85.04636, 9.18329 85.03806, ...
326228-314213798.74861184.907514299.4436360POLYGON ((9.79859 84.90756, 8.83199 84.82416, ...
426229-314613838.88750084.913070309.5884920POLYGON ((8.88749 84.83806, 8.81249 84.83806, ...
...........................
334474999430-2981376-142.295833-74.566097819.6088010POLYGON ((-142.29582 -74.72444, -142.46251 -74...
334484999462-2951274-142.250000-74.570264777.5980791POLYGON ((-142.25001 -74.72864, -142.41671 -74...
334494999913-3483288-164.179167-74.7660971000.0230881POLYGON ((-164.01251 -74.93274, -164.17921 -74...
334505000862-27391060-158.162500-75.141097814.4262340POLYGON ((-158.16251 -75.28274, -158.30421 -75...
334515000879-27101104-158.116667-75.145264762.5183281POLYGON ((-158.11671 -75.27864, -158.25001 -75...
\n", - "

33452 rows × 8 columns

\n", - "
" - ], - "text/plain": [ - " PEAKID DEPTH HEIGHT LONG LAT AREA2D FILTER \\\n", - "0 26000 -2547 1148 2.762500 84.979736 982.028337 0 \n", - "1 26157 -3084 1296 9.143056 84.935292 348.473055 0 \n", - "2 26158 -3043 1342 9.183333 84.938070 367.540380 0 \n", - "3 26228 -3142 1379 8.748611 84.907514 299.443636 0 \n", - "4 26229 -3146 1383 8.887500 84.913070 309.588492 0 \n", - "... ... ... ... ... ... ... ... \n", - "33447 4999430 -298 1376 -142.295833 -74.566097 819.608801 0 \n", - "33448 4999462 -295 1274 -142.250000 -74.570264 777.598079 1 \n", - "33449 4999913 -348 3288 -164.179167 -74.766097 1000.023088 1 \n", - "33450 5000862 -2739 1060 -158.162500 -75.141097 814.426234 0 \n", - "33451 5000879 -2710 1104 -158.116667 -75.145264 762.518328 1 \n", - "\n", - " geometry \n", - "0 POLYGON ((2.91249 84.82976, 2.76249 84.79636, ... \n", - "1 POLYGON ((9.99309 84.93526, 9.25139 84.82696, ... \n", - "2 POLYGON ((9.07499 85.04636, 9.18329 85.03806, ... \n", - "3 POLYGON ((9.79859 84.90756, 8.83199 84.82416, ... \n", - "4 POLYGON ((8.88749 84.83806, 8.81249 84.83806, ... \n", - "... ... \n", - "33447 POLYGON ((-142.29582 -74.72444, -142.46251 -74... \n", - "33448 POLYGON ((-142.25001 -74.72864, -142.41671 -74... \n", - "33449 POLYGON ((-164.01251 -74.93274, -164.17921 -74... \n", - "33450 POLYGON ((-158.16251 -75.28274, -158.30421 -75... \n", - "33451 POLYGON ((-158.11671 -75.27864, -158.25001 -75... \n", - "\n", - "[33452 rows x 8 columns]" - ] - }, - "execution_count": 25, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "first" ] diff --git a/data/notebooks/pipes_mock/locations.ipynb b/data/notebooks/pipes_mock/locations.ipynb index 07e7e131..5fa31e98 100644 --- a/data/notebooks/pipes_mock/locations.ipynb +++ b/data/notebooks/pipes_mock/locations.ipynb @@ -12,7 +12,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -24,8 +24,11 @@ "import geopandas as gpd\n", "import numpy as np\n", "import json\n", + "import dotenv\n", + "\n", + "dotenv.load_dotenv()\n", "\n", - "scripts_dir = Path(\"../../\").joinpath(\"src\")\n", + "scripts_dir = Path(\".\").joinpath(\"src\")\n", "if scripts_dir not in sys.path:\n", " sys.path.insert(0, scripts_dir.resolve().as_posix())\n", "\n", @@ -82,7 +85,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -93,47 +96,77 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/home/sofia/dev/skytruth-30x30/data/data/eez/processed/eez_preprocess.zip\n", + "/home/sofia/dev/skytruth-30x30/data/data/eez/processed/preprocess\n", + "/home/sofia/dev/skytruth-30x30/data/data/gadm/processed/gadm_preprocess.zip\n", + "/home/sofia/dev/skytruth-30x30/data/data/gadm/processed/preprocess\n" + ] + }, + { + "data": { + "text/plain": [ + "PosixPath('/home/sofia/dev/skytruth-30x30/data/data/gadm/processed/preprocess')" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "pipe_eez = \"eez\"\n", "pipe_eez_dir = FileConventionHandler(pipe_eez)\n", "pipe_gadm = \"gadm\"\n", "pipe_gadm_dir = FileConventionHandler(pipe_gadm)\n", "\n", - "output_file = pipe_gadm_dir.get_processed_step_path(current_step).joinpath(\"locations.json\")\n", + "output_file = pipe_gadm_dir.get_processed_step_path(current_step).joinpath(\"locations_all.json\")\n", "\n", "# Download the EEZ file && unzip it\n", "download_and_unzip_if_needed(pipe_eez_dir, prev_step, mysettings)\n", "\n", - "# Download the EEZ file && unzip it\n", + "# Download the gadm file && unzip it\n", "download_and_unzip_if_needed(pipe_gadm_dir, prev_step, mysettings)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ - "from typing import Union, List\n", + "from typing import List\n", "import pandera as pa\n", "from pandera.typing import Index, Series\n", - "from pandera.typing.geopandas import GeoDataFrame, GeoSeries\n", "import pandas as pd\n", "\n", "class LocationSchemaAll(pa.DataFrameModel):\n", " id: Index[int] = pa.Field(gt=0, coerce=True)\n", " code: Series[str] = pa.Field(coerce=True)\n", " name: Series[str] = pa.Field(coerce=True)\n", - " totalMarineArea: Series[float] = pa.Field(ge=0, coerce=True) # noqa: N815\n", - " totalLandArea: Series[float] = pa.Field(ge=0, coerce=True) # noqa: N815\n", + " name_es: Series[str] = pa.Field(coerce=True)\n", + " name_fr: Series[str] = pa.Field(coerce=True)\n", + " total_marine_area: Series[float] = pa.Field(ge=0, coerce=True) # noqa: N815\n", + " total_terrestrial_area: Series[float] = pa.Field(ge=0, coerce=True) # noqa: N815\n", " type: Series[str] = pa.Field(\n", " unique_values_eq=[\"country\", \"worldwide\", \"region\", \"highseas\"], coerce=True\n", " )\n", " groups: Series[List[int]] = pa.Field(coerce=True)\n", - " bounds: Series[List[float]] = pa.Field(coerce=True)\n", + " marine_bounds: Series[List[float]] = pa.Field(coerce=True, nullable=True)\n", + " terrestrial_bounds: Series[List[float]] = pa.Field(coerce=True, nullable=True)\n", + "\n", + "def add_translations(df, translations_csv_path):\n", + " translations_df = pd.read_csv(translations_csv_path, keep_default_na=False, na_values=[])\n", + " \n", + " df = df.merge(translations_df[['code', 'name_es', 'name_fr']], left_on='iso', right_on='code', how='left')\n", + " \n", + " return df\n", "\n", "def calculate_gadm_area(df: pd.DataFrame) -> pd.DataFrame:\n", " glob = gpd.GeoDataFrame(\n", @@ -187,16 +220,16 @@ " )\n", " )\n", "\n", - "def combine_bounds(marine_bounds, land_bounds):\n", - " # Check if marine bounds are valid\n", - " if isinstance(marine_bounds, list) and len(marine_bounds) == 4:\n", - " return marine_bounds\n", - " # If marine bounds are not valid, check land bounds\n", - " elif isinstance(land_bounds, list) and len(land_bounds) == 4:\n", - " return land_bounds\n", - " # If neither bounds are valid, return an empty list\n", - " else:\n", - " return []\n", + "# def combine_bounds(marine_bounds, land_bounds):\n", + "# # Check if marine bounds are valid\n", + "# if isinstance(marine_bounds, list) and len(marine_bounds) == 4:\n", + "# return marine_bounds\n", + "# # If marine bounds are not valid, check land bounds\n", + "# elif isinstance(land_bounds, list) and len(land_bounds) == 4:\n", + "# return land_bounds\n", + "# # If neither bounds are valid, return an empty list\n", + "# else:\n", + "# return []\n", "\n", "def combine_columns(df, col1, col2, new_col):\n", " \"\"\"\n", @@ -217,9 +250,41 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, "outputs": [], + "source": [ + "# # Download country translations\n", + "working_folder = FileConventionHandler(pipe_gadm)\n", + "input_path = working_folder.pipe_raw_path\n", + "input_path\n", + "\n", + "translations_csv_url = \"vizzuality_processed_data/gadm/preprocess/locations_translated.csv\"\n", + "translations_csv_output = input_path.joinpath(translations_csv_url.split(\"/\")[-1])\n", + "\n", + "# writeReadGCP(\n", + "# credentials=mysettings.GCS_KEYFILE_JSON,\n", + "# bucket_name=mysettings.GCS_BUCKET,\n", + "# blob_name=translations_csv_url,\n", + "# file=translations_csv_output,\n", + "# operation=\"r\",\n", + "# )" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_3347624/1577571524.py:35: DeprecationWarning: The 'unary_union' attribute is deprecated, use the 'union_all()' method instead.\n", + " \"geometry\": gpd.GeoSeries([gpd.GeoSeries(df[\"geometry\"]).unary_union]),\n" + ] + } + ], "source": [ "# Process EEZ data (marine data)\n", "locations = (\n", @@ -232,11 +297,13 @@ " .pipe(add_bbox)\n", " .pipe(add_groups_and_members)\n", " .pipe(add_location_name)\n", + " .pipe(add_translations, translations_csv_output)\n", " .rename(\n", " columns={\n", - " \"iso\": \"code\",\n", - " \"AREA_KM2\": \"totalMarineArea\",\n", + " \n", + " \"AREA_KM2\": \"total_marine_area\",\n", " \"location_type\": \"type\",\n", + " \"bounds\":'marine_bounds'\n", " }\n", " )\n", ").reset_index(drop=True)\n", @@ -244,16 +311,15 @@ "locations.drop(\n", " columns=list(\n", " set(locations.columns) -\n", - " set([\"code\", \"name\", \"totalMarineArea\", \"type\", \"groups\", \"bounds\", \"id\"])\n", + " set([\"code\", \"name\", \"name_es\", \"name_fr\", \"total_marine_area\", \"marine_bounds\", \"type\", \"groups\", \"id\"])\n", " ),\n", " inplace=True,\n", ")\n", "\n", "\n", - "# Create a lookup dictionary for IDs from EEZ data\n", - "id_lookup = locations.set_index('code')['id'].to_dict()\n", + "id_lookup = locations.set_index('code')['id'].to_dict() # Create a lookup dictionary for IDs from EEZ data\n", "\n", - "# Process GADM data (land data)\n", + "# Process GADM data \n", "locations_land = (\n", " gpd.read_file(pipe_gadm_dir.get_step_fmt_file_path(prev_step, \"shp\"))\n", " .rename(columns={\"GID_0\": \"iso\", 'area_km2': 'AREA_KM2'})\n", @@ -263,35 +329,31 @@ " .pipe(add_bbox)\n", " .pipe(add_groups_and_members_land)\n", " .pipe(add_location_name)\n", + " .pipe(add_translations, translations_csv_output)\n", " .rename(\n", " columns={\n", - " \"iso\": \"code\",\n", - " \"AREA_KM2\": \"totalLandArea\",\n", + " \"AREA_KM2\": \"total_terrestrial_area\",\n", " \"location_type\": \"type\",\n", + " \"bounds\": \"terrestrial_bounds\"\n", " }\n", " )\n", ").reset_index(drop=True)\n", "\n", - "# Apply the EEZ IDs to the GADM dataset\n", - "locations_land['id'] = locations_land['code'].map(id_lookup)\n", + "locations_land['id'] = locations_land['code'].map(id_lookup) # Apply the EEZ IDs to the GADM dataset\n", "\n", - "# Identify the NaN values in the id column\n", - "nan_mask = locations_land['id'].isna()\n", + "nan_mask = locations_land['id'].isna() # Identify the NaN values in the id column\n", "\n", - "# Generate new IDs for any GADM rows without an EEZ match\n", "new_ids = pd.Series(\n", " range(max(id_lookup.values()) + 1, max(id_lookup.values()) + 1 + nan_mask.sum()),\n", " index=locations_land[nan_mask].index\n", - ")\n", + ") # Generate new IDs for any GADM rows without an EEZ match\n", "\n", - "# Assign the new IDs to the NaN values in the id column\n", - "locations_land['id'] = locations_land['id'].fillna(new_ids).astype(int)\n", + "locations_land['id'] = locations_land['id'].fillna(new_ids).astype(int) # Assign the new IDs to the NaN values in the id column\n", "\n", - "# Drop unnecessary columns in GADM data\n", "locations_land.drop(\n", " columns=list(\n", " set(locations_land.columns) -\n", - " set([\"code\", \"name\", \"totalLandArea\", \"type\", \"groups\", \"bounds\", \"id\"])\n", + " set([\"code\", \"name\", \"name_es\", \"name_fr\", \"total_terrestrial_area\", \"type\", \"groups\", \"terrestrial_bounds\", \"id\"])\n", " ),\n", " inplace=True,\n", ")\n", @@ -301,21 +363,24 @@ " locations, locations_land,\n", " on=['code', 'id'],\n", " suffixes=('_marine', '_land'),\n", - " how='outer' # Use 'outer' join to keep all records\n", + " how='outer' \n", ")\n", "\n", - "# Replace NaN values in TotalMarineArea and TotalLandArea with 0\n", - "combined_locations['totalMarineArea'] = combined_locations['totalMarineArea'].fillna(0)\n", - "combined_locations['totalLandArea'] = combined_locations['totalLandArea'].fillna(0)\n", - "combined_locations['id'] = combined_locations['id'].astype(int)\n", + "# Combine data from land and marine for each base column\n", + "base_columns = ['type', 'groups', 'name', 'name_es', 'name_fr']\n", + "for base_col in base_columns:\n", + " marine_col = f\"{base_col}_marine\"\n", + " land_col = f\"{base_col}_land\"\n", + " combined_locations = combine_columns(combined_locations, marine_col, land_col, base_col)\n", "\n", - "# Combine bounding boxes from both datasets\n", - "combined_locations['bounds'] = combined_locations.apply(lambda row: combine_bounds(row['bounds_marine'], row['bounds_land']), axis=1)\n", + "# Fill NaN values with 0 for each column\n", + "columns_to_fill = ['total_marine_area', 'total_terrestrial_area']\n", + "for col in columns_to_fill:\n", + " combined_locations[col] = combined_locations[col].fillna(0)\n", + "\n", + "# Force the id column to be an integer\n", + "combined_locations['id'] = combined_locations['id'].astype(int)\n", "\n", - "# Combine data from land and marine\n", - "combined_locations = combine_columns(combined_locations, 'type_marine', 'type_land', 'type')\n", - "combined_locations = combine_columns(combined_locations, 'groups_marine', 'groups_land', 'groups')\n", - "combined_locations = combine_columns(combined_locations, 'name_marine', 'name_land', 'name')\n", "\n", "# Drop unnecessary columns\n", "combined_locations.drop(\n", @@ -324,11 +389,12 @@ ")\n", "combined_locations = combined_locations.reset_index(drop=True)\n", "\n", + "# Force the index to have the values in id column (so they follow the same order in the previous table)\n", "combined_locations['index'] = combined_locations['id']\n", "combined_locations.set_index('index', inplace=True)\n", "combined_locations.sort_index(inplace=True)\n", "\n", - "# Step 8: Prepare final JSON output (stored in gadm folder)\n", + "# Prepare final JSON output (stored in gadm folder)\n", "output_locations_combined = {\n", " \"version\": 2,\n", " \"data\": {\n", @@ -338,7 +404,7 @@ " },\n", "}\n", "\n", - "# Step 9: Write the output to a JSON file (stored in gadm folder)\n", + "# Write the output to a JSON file (stored in gadm folder)\n", "with open(output_file, \"w\") as f:\n", " json.dump(output_locations_combined, f)\n", "\n", @@ -347,20 +413,45 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "## Create locations_code (stored in gadm folder)\n", - "(combined_locations[['id', 'code']]\n", + "(combined_locations[['id', 'code']].rename(columns={'id': 'location'})\n", " .to_csv(pipe_gadm_dir.get_processed_step_path(current_step)\n", - " .joinpath('locations_code.csv'), index=False))\n" + " .joinpath('locations_code_all.csv'), index=False))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "## Save locations_code in data_commons/data folder\n", + "(combined_locations[['id', 'code']].rename(columns={'id': 'location'})\n", + " .to_csv(scripts_dir.joinpath('data_commons/data/locations_code_all.csv'), index=False))" ] } ], "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, "language_info": { - "name": "python" + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" } }, "nbformat": 4, diff --git a/data/notebooks/pipes_mock/precalc_sofia.ipynb b/data/notebooks/pipes_mock/precalc_sofia.ipynb new file mode 100644 index 00000000..6aececfd --- /dev/null +++ b/data/notebooks/pipes_mock/precalc_sofia.ipynb @@ -0,0 +1,3719 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 194, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import logging\n", + "import sys\n", + "from pathlib import Path\n", + "import time\n", + "import pandas as pd\n", + "import geopandas as gpd\n", + "import numpy as np\n", + "import json\n", + "import dotenv\n", + "import os\n", + "\n", + "dotenv.load_dotenv()\n", + "\n", + "scripts_dir = Path(\".\").joinpath(\"src\")\n", + "if scripts_dir not in sys.path:\n", + " sys.path.insert(0, scripts_dir.resolve().as_posix())\n", + "\n", + "from helpers.strapi import Strapi\n", + "from helpers.settings import get_settings, Settings\n", + "from helpers.file_handler import FileConventionHandler\n", + "from helpers.utils import download_and_unzip_if_needed, writeReadGCP\n", + "\n", + "from pipelines.output_schemas import (\n", + " FPLSchema,\n", + " ProtectionLevelSchema,\n", + " MPAsSchema,\n", + " HabitatsSchema,\n", + " LocationSchema,\n", + " ProtectedAreaExtentSchema,\n", + ")\n", + "from pipelines.processors import (\n", + " add_envelope,\n", + " add_location_iso,\n", + " expand_multiple_locations,\n", + " add_region_iso,\n", + " calculate_eez_area,\n", + " add_bbox,\n", + " add_groups_and_members,\n", + " add_location_name,\n", + " output,\n", + " clean_geometries,\n", + " filter_by_exluding_propossed_mpas,\n", + " spatial_join,\n", + " process_mpa_data,\n", + " assign_iso3,\n", + " calculate_global_area,\n", + " separate_parent_iso,\n", + " calculate_stats_cov,\n", + " coverage_stats,\n", + " mpaatlas_filter_stablishment,\n", + " process_mpaatlas_data,\n", + " calculate_stats,\n", + " fix_monaco,\n", + " batch_export,\n", + " calculate_area,\n", + " define_is_child,\n", + " set_child_id,\n", + " add_child_parent_relationship,\n", + " columns_to_lower,\n", + " extract_wdpaid_mpaatlas,\n", + " simplify_async,\n", + " process_tpa_data,\n", + ")\n", + "\n", + "logging.basicConfig(level=logging.DEBUG)\n", + "logging.getLogger(\"requests\").setLevel(logging.WARNING)\n", + "logging.getLogger(\"urllib3\").setLevel(logging.WARNING)\n", + "logging.getLogger(\"fiona\").setLevel(logging.WARNING)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "mysettings = get_settings()\n", + "prev_step = \"preprocess\"\n", + "current_step = \"stats\"" + ] + }, + { + "cell_type": "code", + "execution_count": 225, + "metadata": {}, + "outputs": [], + "source": [ + "from typing import List, Dict\n", + "import pandera as pa\n", + "from pandera.typing import Index, Series\n", + "\n", + "def change_ata_to_abnj(df):\n", + " \"\"\"\n", + " Changes values in the parent_iso column from 'ATA' to 'ABNJ' as there is no 'ATA' stats in Protected Planet.\n", + " \"\"\"\n", + " # Count the occurrences of 'ATA'\n", + " count_changes = df['parent_iso'].value_counts().get('ATA', 0)\n", + " \n", + " # Replace 'ATA' with 'ABNJ'\n", + " df['parent_iso'] = df['parent_iso'].replace('ATA', 'ABNJ')\n", + "\n", + " return df\n", + "\n", + "\n", + "def add_total_marine_area(df):\n", + " # Read the JSON file\n", + " with open(scripts_dir.joinpath('data_commons/data/locations_all.json'), 'r') as f:\n", + " locations_data = json.load(f)\n", + " \n", + " # Access the nested dictionary\n", + " locations_dict = locations_data.get('data', {}).get('api::location.location', {})\n", + " \n", + " # Create a lookup dictionary from the nested dictionary\n", + " marine_area_lookup = {item['code']: item['total_marine_area'] for item in locations_dict.values()}\n", + " \n", + " # Identify the column that contains the word 'iso'\n", + " iso_column = [col for col in df.columns if 'iso' in col][0]\n", + "\n", + " # Perform the mapping using the identified column\n", + " df['total_marine_area'] = df[iso_column].map(marine_area_lookup)\n", + " \n", + " return df\n", + "\n", + "def add_total_terrestrial_area(df):\n", + " # Read the JSON file\n", + " with open(scripts_dir.joinpath('data_commons/data/locations_all.json'), 'r') as f:\n", + " locations_data = json.load(f)\n", + " \n", + " # Access the nested dictionary\n", + " locations_dict = locations_data.get('data', {}).get('api::location.location', {})\n", + " \n", + " # Create a lookup dictionary from the nested dictionary\n", + " marine_area_lookup = {item['code']: item['total_terrestrial_area'] for item in locations_dict.values()}\n", + " \n", + " # Identify the column that contains the word 'iso'\n", + " iso_column = [col for col in df.columns if 'iso' in col][0]\n", + "\n", + " # Perform the mapping using the identified column\n", + " df['total_terrestrial_area'] = df[iso_column].map(marine_area_lookup)\n", + " \n", + " return df\n", + "\n", + "def add_pa_oecm_percentages(df):\n", + " # Calculate the total protectedAreasCount for each year and iso_3\n", + " total_counts = df.groupby(['year', 'iso_3'])['protectedAreasCount'].transform('sum')\n", + "\n", + " # Calculate the counts for PA_DEF == 0 and PA_DEF == 1\n", + " df['oecm_count'] = df['protectedAreasCount'].where(df['PA_DEF'] == 0, 0)\n", + " df['pa_count'] = df['protectedAreasCount'].where(df['PA_DEF'] == 1, 0)\n", + "\n", + " # Calculate the percentages\n", + " df['oecms'] = df.groupby(['year', 'iso_3'])['oecm_count'].transform('sum') / total_counts * 100\n", + " df['pas'] = df.groupby(['year', 'iso_3'])['pa_count'].transform('sum') / total_counts * 100\n", + "\n", + " # Aggregate the results and fill NaN values with 0\n", + " final_df = df.groupby(['year', 'iso_3']).agg(\n", + " area=('area', 'sum'),\n", + " protected_areas_count=('protectedAreasCount', 'sum'),\n", + " oecms=('oecms', 'first'),\n", + " pas=('pas', 'first')\n", + " ).reset_index().fillna(0)\n", + "\n", + " return final_df\n", + "\n", + "def calculate_coverage_percentage_mpa(df):\n", + " if 'total_marine_area' in df.columns:\n", + " df['coverage'] = (df['protected_area'] / df['total_marine_area']) * 100\n", + " elif 'total_terrestrial_area' in df.columns:\n", + " df['coverage'] = (df['protected_area'] / df['total_terrestrial_area']) * 100\n", + " else:\n", + " df['coverage'] = np.nan\n", + "\n", + " return df\n", + "\n", + "def calculate_coverage_percentage_mpatlas(df):\n", + " df['percentage'] = (df['area_km2'] / df['total_marine_area']) * 100\n", + " return df\n", + "\n", + "def calculate_coverage_percentage_pa(df):\n", + " if 'total_marine_area' in df.columns:\n", + " df['coverage'] = (df['area_km2'] / df['total_marine_area']) * 100\n", + " elif 'total_terrestrial_area' in df.columns:\n", + " df['coverage'] = (df['area_km2'] / df['total_terrestrial_area']) * 100\n", + " else:\n", + " df['coverage'] = np.nan\n", + "\n", + " return df\n", + "\n", + "def calculate_global_contribution(df):\n", + " if 'total_marine_area' in df.columns:\n", + " df['global_contribution'] = (df['protected_area'] / 361000000) * 100\n", + " elif 'total_terrestrial_area' in df.columns:\n", + " df['global_contribution'] = (df['protected_area'] / 134954835) * 100\n", + " else:\n", + " df['global_contribution'] = np.nan\n", + " return df\n", + "\n", + "def add_is_last_year(df):\n", + " # Find the latest year for each iso_3\n", + " latest_years = df.groupby('iso_3')['year'].transform('max')\n", + " \n", + " # Create the is_last_year column\n", + " df['is_last_year'] = df['year'] == latest_years\n", + " \n", + " return df\n", + "\n", + "def add_environment(df):\n", + " \"\"\"\n", + " Adds a column 'environment' based on the presence of 'totalMarineArea' or 'totalLandArea'.\n", + "\n", + " Parameters:\n", + " df (pd.DataFrame): The input DataFrame.\n", + "\n", + " Returns:\n", + " pd.DataFrame: The DataFrame with the 'environment' column added.\n", + " \"\"\"\n", + " if 'total_marine_area' in df.columns:\n", + " df['environment'] = 'marine'\n", + " elif 'total_terrestrial_area' in df.columns:\n", + " df['environment'] = 'terrestrial'\n", + " else:\n", + " df['environment'] = 'unknown' \n", + " \n", + " return df\n", + "\n", + "def coverage_stats2(\n", + " df: pd.DataFrame,\n", + " area_col: str = \"area\",\n", + " sort_vals: List[str] = [\"iso_3\", \"year\"],\n", + ") -> pd.DataFrame:\n", + " \"\"\"only relevant to get the coverage numbers for mpa\"\"\"\n", + " return df.assign(\n", + " protected_area=(\n", + " df.sort_values(by=sort_vals)[area_col]\n", + " - df.sort_values(by=sort_vals)\n", + " .groupby(sort_vals)[area_col]\n", + " .shift(-1, fill_value=0)\n", + " .reset_index(drop=True)\n", + " ).round(2),\n", + " )\n", + "\n", + "def process_mpaatlas_data(gdf: gpd.GeoDataFrame) -> pd.DataFrame:\n", + " return (\n", + " gdf.dissolve(by=[\"protecti_1\", \"iso_3\"], aggfunc={\"name\": \"count\"})\n", + " .reset_index()\n", + " .pipe(calculate_area, \"area_km2\", None)\n", + " .drop(columns=[\"geometry\"])\n", + " )\n", + "\n", + "def separate_parent_iso(df: pd.DataFrame, iso_column=\"iso_3\", separator=\";\") -> pd.DataFrame:\n", + " df[iso_column] = (\n", + " df[iso_column].str.replace(\" \", \"\").str.replace(\":\", separator).str.split(separator)\n", + " )\n", + " return df.explode(iso_column)\n", + "\n", + "def output2(\n", + " df: pd.DataFrame, iso_column: str, rep_d: dict, rename: Dict[str, str], drop_cols: List[str]\n", + ") -> pd.DataFrame:\n", + " \"\"\"Output function formatter for the data.\n", + "\n", + " Args:\n", + " df (pd.DataFrame): The DataFrame to process.\n", + " iso_column (str): The column containing the ISO codes.\n", + " rep_d (dict): A dictionary of values to replace.\n", + " rename (Dict[str, str]): A dictionary of columns to rename.\n", + " drop_cols (List[str]): A list of columns to drop.\n", + "\n", + " Returns:\n", + " pd.DataFrame: The processed DataFrame.\n", + " \"\"\"\n", + " if iso_column:\n", + " locations_code = pd.read_csv(scripts_dir.joinpath(\"data_commons/data/locations_code_all.csv\"))\n", + " df = df.join(locations_code.set_index(\"code\"), on=iso_column, how=\"left\")\n", + " return (\n", + " df.replace(rep_d)\n", + " .rename(columns=rename)\n", + " .drop(columns=drop_cols)\n", + " .assign(\n", + " id=df.index + 1,\n", + " )\n", + " .set_index(\"id\")\n", + " )\n", + "\n", + "def set_child_id_pa(\n", + " df: pd.DataFrame | gpd.GeoDataFrame, columns: list[str] = [\"wdpa_pid\"]\n", + ") -> pd.DataFrame | gpd.GeoDataFrame:\n", + " return df.assign(child_id=df[columns].bfill(axis=1)[columns[0]])\n", + "\n", + "\n", + "class NewProtectedAreaExtentSchema(pa.DataFrameModel):\n", + " id: Index[int] = pa.Field(gt=0, coerce=True)\n", + " location: Series[int] = pa.Field(gt=0, coerce=True)\n", + " protected_area: Series[float] = pa.Field(ge=0, coerce=True)\n", + " protected_areas_count: Series[int] = pa.Field(ge=0, coerce=True)\n", + " oecms: Series[float] = pa.Field(ge=0, le=100, coerce=True)\n", + " pas: Series[float] = pa.Field(ge=0, le=100, coerce=True)\n", + " coverage: Series[float] = pa.Field(ge=0, le=100, coerce=True)\n", + " global_contribution: Series[float] = pa.Field(ge=0, le=100, coerce=True)\n", + " year: Series[int] = pa.Field(ge=2000, coerce=True)\n", + " is_last_year: Series[bool] = pa.Field(coerce=True)\n", + " environment: Series[str] = pa.Field(isin=[\"marine\", \"terrestrial\"], coerce=True)\n", + "\n", + "class NewProtectionLevelSchema(pa.DataFrameModel):\n", + " id: Index[int] = pa.Field(gt=0, coerce=True)\n", + " location: Series[int] = pa.Field(gt=0, coerce=True)\n", + " mpaa_protection_level: Series[int] = pa.Field(ge=0, coerce=True)\n", + " year: Series[int] = pa.Field(gt=1900, coerce=True)\n", + " area: Series[float] = pa.Field(ge=0, coerce=True)\n", + " percentage: Series[float] = pa.Field(ge=0, le=100, coerce=True)\n", + "\n", + "class PAsSchema(pa.DataFrameModel):\n", + " id: Index[int] = pa.Field(gt=0, coerce=True)\n", + " wdpaid: Series[pd.Int64Dtype] = pa.Field(coerce=True, nullable=True)\n", + " child_id: Series[str] = pa.Field(coerce=True)\n", + " name: Series[str] = pa.Field(coerce=True)\n", + " year: Series[pd.Int32Dtype] = pa.Field(gt=1700, nullable=True)\n", + " area: Series[float] = pa.Field(ge=0, coerce=True)\n", + " bbox: Series[List[float]] = pa.Field(coerce=True)\n", + " location: Series[int] = pa.Field(ge=0, coerce=True)\n", + " protection_status: Series[int] = pa.Field(ge=0, nullable=True)\n", + " mpaa_establishment_stage: Series[pd.Int32Dtype] = pa.Field(ge=0, nullable=True, coerce=True)\n", + " mpaa_protection_level: Series[pd.Int32Dtype] = pa.Field(ge=0, nullable=True, coerce=True)\n", + " pa_iucn_category: Series[pd.Int32Dtype] = pa.Field(coerce=True, nullable=True)\n", + " designation: Series[str] = pa.Field(coerce=True, nullable=True)\n", + " is_child: Series[bool] = pa.Field(coerce=True)\n", + " children: Series[List[int]] = pa.Field(coerce=True, nullable=True)\n", + " data_source: Series[int] = pa.Field(coerce=True)\n", + " coverage: Series[float] = pa.Field(ge=0, le=100, nullable=True)\n", + " environment: Series[str] = pa.Field(isin=[\"marine\", \"terrestrial\"], coerce=True)\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Coverage stats - Mpas" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We are going to use the intermediate data from eez, in order to create a dataset that can be used as a land mask.\n", + "The steps are:\n", + "1. Load eez\n", + "2. Spatial inner Join the eez dataset with the Mpas one\n", + "3. Assign the location iso\n", + "4. dissolve by location iso and cummulative year\n", + "5. calculate the area for global regions and eez countries\n", + "6. prepare the data to be ingested in strapi\n", + "7. upload the data to strapi" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/home/sofia/dev/skytruth-30x30/data/data/eez/processed/eez_preprocess.zip\n", + "/home/sofia/dev/skytruth-30x30/data/data/eez/processed/preprocess\n", + "/home/sofia/dev/skytruth-30x30/data/data/mpa/processed/mpa_preprocess.zip\n", + "/home/sofia/dev/skytruth-30x30/data/data/mpa/processed/preprocess\n" + ] + } + ], + "source": [ + "pipe = \"mpa\"\n", + "strapi_collection = \"\"\n", + "\n", + "pipe_dir_eez = FileConventionHandler(\"eez\")\n", + "pipe_dir_mpas = FileConventionHandler(pipe)\n", + "output_file = pipe_dir_mpas.get_processed_step_path(current_step).joinpath(\n", + " \"mpa_landmask_strapi.csv\"\n", + ")\n", + "\n", + "# Download the EEZ file && unzip it\n", + "download_and_unzip_if_needed(pipe_dir_eez, prev_step, mysettings)\n", + "# Download the mpas file && unzip it\n", + "download_and_unzip_if_needed(pipe_dir_mpas, prev_step, mysettings)\n", + "\n", + "# Load the data\n", + "eez = gpd.read_file(pipe_dir_eez.get_step_fmt_file_path(prev_step, \"shp\")).pipe(clean_geometries)\n", + "mpas = gpd.read_file(pipe_dir_mpas.get_step_fmt_file_path(prev_step, \"shp\")).pipe(clean_geometries)" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 282/282 [08:21<00:00, 1.78s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "eez_mpas_data_join = await spatial_join(eez, mpas.pipe(filter_by_exluding_propossed_mpas))" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
WDPAIDWDPA_PIDPA_DEFNAMEDESIG_ENGIUCN_CATSTATUSSTATUS_YRPARENT_ISOGIS_M_AREAgeometryindex_rightGEONAMEMRGIDAREA_KM2POL_TYPEISO_SOV1ISO_SOV2ISO_SOV3
0555624810.0555624810_D1Ross Sea Region Marine Protected AreaMarine Protected Area (CCAMLR)Not ReportedDesignated2017.0ABNJ326507.190744POLYGON ((150 -62.5, 150.90909 -62.5, 151.8181...0.0High Seas63203.0212881389.0High SeasABNJNoneNone
\n", + "
" + ], + "text/plain": [ + " WDPAID WDPA_PID PA_DEF NAME \\\n", + "0 555624810.0 555624810_D 1 Ross Sea Region Marine Protected Area \n", + "\n", + " DESIG_ENG IUCN_CAT STATUS STATUS_YR \\\n", + "0 Marine Protected Area (CCAMLR) Not Reported Designated 2017.0 \n", + "\n", + " PARENT_ISO GIS_M_AREA \\\n", + "0 ABNJ 326507.190744 \n", + "\n", + " geometry index_right GEONAME \\\n", + "0 POLYGON ((150 -62.5, 150.90909 -62.5, 151.8181... 0.0 High Seas \n", + "\n", + " MRGID AREA_KM2 POL_TYPE ISO_SOV1 ISO_SOV2 ISO_SOV3 \n", + "0 63203.0 212881389.0 High Seas ABNJ None None " + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "eez_mpas_data_join.head(1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:pyogrio._io:Created 17,697 records\n" + ] + } + ], + "source": [ + "# # To get an idea of the spatial join results\n", + "# eez_mpas_data_join.pipe(add_location_iso).pipe(assign_iso3).to_file(\n", + "# pipe_dir_mpas.get_processed_step_path(current_step).joinpath(\"mpas_sjoin.shp\"), driver=\"ESRI Shapefile\"\n", + "# )" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 14/14 [03:31<00:00, 15.09s/it]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍| 281/282 [00:20<00:01, 1.84s/it]" + ] + } + ], + "source": [ + "final_data = await process_mpa_data(\n", + " eez_mpas_data_join.pipe(add_location_iso).pipe(assign_iso3),\n", + " range(2011, time.localtime().tm_year + 1),\n", + " [\"PA_DEF\", \"iso_3\"],\n", + " {\"protectedAreasCount\": \"sum\"},\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
yeariso_3areaprotected_areas_countoecmspastotal_marine_areaprotected_areacoverageglobal_contributionis_last_yearenvironment
02010ABNJ996236.12549829.00.00000100.00000212881389.0996236.130.4679770.275966Falsemarine
12010AF129790.939474427.02.3419297.6580814878058.0129790.940.8723650.035953Falsemarine
\n", + "
" + ], + "text/plain": [ + " year iso_3 area protected_areas_count oecms pas \\\n", + "0 2010 ABNJ 996236.125498 29.0 0.00000 100.00000 \n", + "1 2010 AF 129790.939474 427.0 2.34192 97.65808 \n", + "\n", + " total_marine_area protected_area coverage global_contribution \\\n", + "0 212881389.0 996236.13 0.467977 0.275966 \n", + "1 14878058.0 129790.94 0.872365 0.035953 \n", + "\n", + " is_last_year environment \n", + "0 False marine \n", + "1 False marine " + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "coverage = (\n", + " final_data.pipe(calculate_global_area, [\"year\", \"PA_DEF\"], {\"area\": \"sum\"}, \"iso_3\")\n", + " .pipe(separate_parent_iso, \"iso_3\")\n", + " .pipe(add_region_iso, \"iso_3\")\n", + " .replace(\n", + " {\n", + " \"iso_3\": {\n", + " \"ATA\": \"ABNJ\",\n", + " \"COK\": \"NZL\",\n", + " \"IOT\": \"GBR\",\n", + " \"NIU\": \"NZL\",\n", + " \"SHN\": \"GBR\",\n", + " \"SJM\": \"NOR\",\n", + " \"UMI\": \"USA\",\n", + " \"NCL\": \"FRA\",\n", + " \"GIB\": \"GBR\",\n", + " }\n", + " }\n", + " )\n", + " .pipe(calculate_stats_cov, [\"year\", \"PA_DEF\"], \"iso_3\").astype({\"PA_DEF\": int})\n", + " .pipe(add_pa_oecm_percentages)\n", + " .pipe(add_total_marine_area)\n", + " .pipe(coverage_stats2)\n", + " .pipe(calculate_coverage_percentage_mpa)\n", + " .pipe(calculate_global_contribution)\n", + " .pipe(add_is_last_year)\n", + " .pipe(add_environment)\n", + ")\n", + "\n", + "\n", + "NewProtectedAreaExtentSchema(\n", + " coverage.pipe(\n", + " output,\n", + " \"iso_3\",\n", + " {},\n", + " {},\n", + " [\"area\", \"iso_3\", 'total_marine_area'],\n", + " )\n", + ").to_csv(\n", + " output_file,\n", + " index=True,\n", + ")\n", + "coverage.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "DEBUG:google.auth.transport.requests:Making request: POST https://oauth2.googleapis.com/token\n" + ] + } + ], + "source": [ + "remote_path = 'vizzuality_processed_data/strapi_tables/mpa_coverage.csv'\n", + "\n", + "writeReadGCP(\n", + " credentials=mysettings.GCS_KEYFILE_JSON,\n", + " bucket_name=mysettings.GCS_BUCKET,\n", + " blob_name=remote_path,\n", + " file=output_file,\n", + " operation=\"w\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# strapi_collection = \"protection-coverage-stat\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# strapi.deleteCollectionData(strapi_collection, list(range(1, 2300)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# strapi.importCollectionData(\n", + "# strapi_collection,\n", + "# output_file,\n", + "# )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Mpa atlas - country stats Fully or highly protected" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We are going to use the intermediate data from eez, in order to create a dataset that can be used as a land mask.\n", + "The steps are:\n", + "1. Load eez\n", + "2. Spatial inner Join the eez dataset with the Mpaatlas one\n", + "3. iso assign using the sovereign one provided by mpaatlas\n", + "4. dissolve by location\n", + "5. calculate the area for global regions and eez countries ussing mollwide projection\n", + "6. prepare the data to be ingested in strapi\n", + "7. upload the data to strapi" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/home/sofia/dev/skytruth-30x30/data/data/eez/processed/eez_preprocess.zip\n", + "/home/sofia/dev/skytruth-30x30/data/data/eez/processed/preprocess\n", + "/home/sofia/dev/skytruth-30x30/data/data/mpaatlas/processed/mpaatlas_preprocess.zip\n", + "/home/sofia/dev/skytruth-30x30/data/data/mpaatlas/processed/preprocess\n" + ] + } + ], + "source": [ + "pipe = \"mpaatlas\"\n", + "strapi_collection = \"mpaa-protection-level-stat\"\n", + "\n", + "pipe_dir_eez = FileConventionHandler(\"eez\")\n", + "pipe_dir_mpaatlas = FileConventionHandler(pipe)\n", + "output_file = pipe_dir_mpaatlas.get_processed_step_path(current_step).joinpath(\n", + " \"mpaatlas_protection_level.csv\"\n", + ")\n", + "\n", + "# Download the EEZ file && unzip it\n", + "download_and_unzip_if_needed(pipe_dir_eez, prev_step, mysettings)\n", + "# Download the mpas file && unzip it\n", + "download_and_unzip_if_needed(pipe_dir_mpaatlas, prev_step, mysettings)\n", + "\n", + "# Load the data\n", + "eez = gpd.read_file(pipe_dir_eez.get_step_fmt_file_path(prev_step, \"shp\")).pipe(clean_geometries)\n", + "mpaatlas_intermediate = gpd.read_file(\n", + " pipe_dir_mpaatlas.get_step_fmt_file_path(prev_step, \"shp\")\n", + ").pipe(clean_geometries)" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 282/282 [00:29<00:00, 9.59it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 282/282 [00:29<00:00, 2.95s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "eez_mpaatlas_data_join = await spatial_join(\n", + " eez, mpaatlas_intermediate.pipe(mpaatlas_filter_stablishment)\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# To get an idea of the spatial join results\n", + "# eez_mpaatlas_data_join.to_file(\n", + "# pipe_dir_mpaatlas.get_processed_step_path(current_step).joinpath(\"mpaatlas_sjoin.shp\"),\n", + "# driver=\"ESRI Shapefile\",\n", + "# )" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:pyogrio._io:Created 54 records\n" + ] + } + ], + "source": [ + "eez_mpaatlas_data_join.dissolve(by=[\"protecti_1\", \"location_i\"], aggfunc={\"name\": \"count\"}).reset_index().to_file(\n", + "pipe_dir_mpaatlas.get_processed_step_path(current_step).joinpath(\"mpaatlas_sjoin_dissolved.shp\"),\n", + "driver=\"ESRI Shapefile\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": {}, + "outputs": [], + "source": [ + "result = (\n", + " eez_mpaatlas_data_join.rename(columns={\"location_i\": \"iso_3\"})\n", + " .pipe(process_mpaatlas_data) \n", + " .pipe(calculate_global_area, gby_col=[\"protecti_1\"], iso_column=\"iso_3\")\n", + " .pipe(separate_parent_iso)\n", + " .replace(\n", + " {\n", + " \"location_i\": {\n", + " \"COK\": \"NZL\",\n", + " \"IOT\": \"GBR\",\n", + " \"NIU\": \"NZL\",\n", + " \"SHN\": \"GBR\",\n", + " \"SJM\": \"NOR\",\n", + " \"UMI\": \"USA\",\n", + " \"NCL\": \"FRA\",\n", + " }\n", + " }\n", + " )\n", + " .pipe(add_region_iso, iso_column=\"iso_3\")\n", + " .pipe(calculate_stats, gby_col=[\"protecti_1\"], iso_column=\"iso_3\")\n", + " .query('protecti_1 != \"less protected or unknown\"')\n", + " .pipe(fix_monaco, iso_column=\"iso_3\", area_column=\"area_km2\")\n", + " .pipe(add_total_marine_area)\n", + " .pipe(calculate_coverage_percentage_mpatlas)\n", + " .pipe(\n", + " output,\n", + " iso_column=\"iso_3\",\n", + " rep_d={\n", + " \"protecti_1\": {\n", + " \"fully or highly protected\": 1,\n", + " }\n", + " },\n", + " rename={\"protecti_1\": \"mpaa_protection_level\", \"area_km2\": \"area\"},\n", + " drop_cols=[\"total_marine_area\", \"iso_3\"],\n", + " )\n", + ")\n", + "\n", + "NewProtectionLevelSchema(result[~result.location.isna()].assign(year=2024)).to_csv(\n", + " output_file, index=True\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "DEBUG:google.auth.transport.requests:Making request: POST https://oauth2.googleapis.com/token\n" + ] + } + ], + "source": [ + "remote_path = 'vizzuality_processed_data/strapi_tables/mpaatlas_protection_level.csv'\n", + "\n", + "writeReadGCP(\n", + " credentials=mysettings.GCS_KEYFILE_JSON,\n", + " bucket_name=mysettings.GCS_BUCKET,\n", + " blob_name=remote_path,\n", + " file=output_file,\n", + " operation=\"w\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# strapi_collection = \"mpaa-protection-level-stat\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# strapi.deleteCollectionData(strapi_collection, list(range(1, 300)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# strapi.importCollectionData(\n", + "# strapi_collection,\n", + "# output_file,\n", + "# )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Protected seas - fishing protection level" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "DEBUG:google.auth.transport.requests:Making request: POST https://oauth2.googleapis.com/token\n" + ] + } + ], + "source": [ + "pipe = \"protectedseas\"\n", + "strapi_collection = \"fishing-protection-level-stat\"\n", + "\n", + "pipe_dir = FileConventionHandler(pipe)\n", + "input_file = pipe_dir.get_processed_step_path(prev_step).joinpath(\"protectedseas_stats.xlsx\")\n", + "output_file = pipe_dir.get_processed_step_path(current_step).joinpath(\"lfp.csv\")\n", + "\n", + "# Download the protected seas file && unzip it\n", + "writeReadGCP(\n", + " credentials=mysettings.GCS_KEYFILE_JSON,\n", + " bucket_name=mysettings.GCS_BUCKET,\n", + " blob_name=\"vizzuality_processed_data/protectedseas/preprocess/protectedseas_stats.xlsx\",\n", + " file=input_file,\n", + " operation=\"r\",\n", + ")\n", + "\n", + "# Load the data\n", + "protectedseas_intermediate = pd.read_excel(input_file)" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
iso_teriso_sovincludes_multi_jurisdictional_areaslfparea_sqkmtotal_areapct_total
320NaNESPTrue5142.9730101011023.7760.014141
321NaNESPTrue41639.6820761011023.7760.162180
322NaNESPTrue3214532.8498001011023.77621.219367
323NaNESPTrue215064.1327701011023.7761.489988
324NaNESPTrue1779644.1388001011023.77677.114323
\n", + "
" + ], + "text/plain": [ + " iso_ter iso_sov includes_multi_jurisdictional_areas lfp area_sqkm \\\n", + "320 NaN ESP True 5 142.973010 \n", + "321 NaN ESP True 4 1639.682076 \n", + "322 NaN ESP True 3 214532.849800 \n", + "323 NaN ESP True 2 15064.132770 \n", + "324 NaN ESP True 1 779644.138800 \n", + "\n", + " total_area pct_total \n", + "320 1011023.776 0.014141 \n", + "321 1011023.776 0.162180 \n", + "322 1011023.776 21.219367 \n", + "323 1011023.776 1.489988 \n", + "324 1011023.776 77.114323 " + ] + }, + "execution_count": 84, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "protectedseas_intermediate[\n", + " (\n", + " protectedseas_intermediate.iso_ter.isna()\n", + " & protectedseas_intermediate.includes_multi_jurisdictional_areas.eq(True)\n", + " )\n", + " | (\n", + " protectedseas_intermediate.iso_ter.isna()\n", + " & protectedseas_intermediate.includes_multi_jurisdictional_areas.eq(False)\n", + " & ~protectedseas_intermediate.iso_sov.isin(\n", + " protectedseas_intermediate[\n", + " protectedseas_intermediate.includes_multi_jurisdictional_areas.eq(True)\n", + " ].iso_sov.unique()\n", + " )\n", + " )\n", + "][protectedseas_intermediate.iso_sov.eq(\"ESP\")]" + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "metadata": {}, + "outputs": [], + "source": [ + "final = (\n", + " protectedseas_intermediate[\n", + " (\n", + " protectedseas_intermediate.iso_ter.isna()\n", + " & protectedseas_intermediate.includes_multi_jurisdictional_areas.eq(True)\n", + " )\n", + " | (\n", + " protectedseas_intermediate.iso_ter.isna()\n", + " & protectedseas_intermediate.includes_multi_jurisdictional_areas.eq(False)\n", + " & ~protectedseas_intermediate.iso_sov.isin(\n", + " protectedseas_intermediate[\n", + " protectedseas_intermediate.includes_multi_jurisdictional_areas.eq(True)\n", + " ].iso_sov.unique()\n", + " )\n", + " )\n", + " ].replace(\n", + " {\n", + " \"lfp\": {\n", + " 5: \"highly\",\n", + " 4: \"highly\",\n", + " 3: \"moderately\",\n", + " 2: \"less\",\n", + " 1: \"less\",\n", + " },\n", + " }\n", + " ).groupby([\"iso_sov\", \"lfp\"]).agg({\"area_sqkm\": \"sum\", \"total_area\": \"max\"}).reset_index()\n", + " .pipe(\n", + " calculate_global_area,\n", + " gby_col=[\"lfp\"],\n", + " iso_column=\"iso_sov\",\n", + " agg_ops={\"area_sqkm\": \"sum\", \"total_area\": \"sum\"},\n", + " )\n", + " .pipe(add_region_iso, iso_column=\"iso_sov\")\n", + " .pipe(\n", + " calculate_stats,\n", + " gby_col=[\"lfp\"],\n", + " ops={\"area_sqkm\": \"sum\", \"total_area\": \"sum\"},\n", + " iso_column=\"iso_sov\",\n", + " )\n", + " .pipe(lambda x: x.assign(pct=round((x.area_sqkm / x.total_area)*100, 2)))\n", + " .pipe(\n", + " output,\n", + " iso_column=\"iso_sov\",\n", + " rep_d={\n", + " \"lfp\": {\n", + " \"highly\": 1,\n", + " \"moderately\": 2,\n", + " \"less\": 3,\n", + " }\n", + " },\n", + " rename={\"lfp\": \"fishing_protection_level\", \"area_sqkm\": \"area\"},\n", + " drop_cols=[\"iso_sov\", \"total_area\"],\n", + " )\n", + ")\n", + "FPLSchema(final[final.location.notna()]).to_csv(output_file, index=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 88, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "DEBUG:google.auth.transport.requests:Making request: POST https://oauth2.googleapis.com/token\n" + ] + } + ], + "source": [ + "remote_path = 'vizzuality_processed_data/strapi_tables/lfp.csv'\n", + "\n", + "writeReadGCP(\n", + " credentials=mysettings.GCS_KEYFILE_JSON,\n", + " bucket_name=mysettings.GCS_BUCKET,\n", + " blob_name=remote_path,\n", + " file=output_file,\n", + " operation=\"w\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# strapi.deleteCollectionData(strapi_collection, list(range(1, 500)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# strapi.importCollectionData(\n", + "# strapi_collection,\n", + "# output_file,\n", + "# )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Country detail table data - all together WIP" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " 1- lower case the columns \n", + "2- separate location that its regime is in dispute or on join regime \n", + "3- calcualte area for mpaatlas data \n", + "4- rename columns for merge \n", + "5- merge maaatlas and mpa data identifying the source \n", + "6- identify child resources and set them as childs \n", + "7- calculate bbox \n", + "8- set child resources \n", + "9- prepare output for batch export \n", + "10- upload data to strapi " + ] + }, + { + "cell_type": "code", + "execution_count": 89, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/home/sofia/dev/skytruth-30x30/data/data/pa/processed/pa_preprocess.zip\n", + "/home/sofia/dev/skytruth-30x30/data/data/pa/processed/preprocess\n", + "/home/sofia/dev/skytruth-30x30/data/data/mpaatlas/processed/mpaatlas_preprocess.zip\n", + "/home/sofia/dev/skytruth-30x30/data/data/mpaatlas/processed/preprocess\n" + ] + }, + { + "data": { + "text/plain": [ + "PosixPath('/home/sofia/dev/skytruth-30x30/data/data/mpaatlas/processed/preprocess')" + ] + }, + "execution_count": 89, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pipe = \"pa\"\n", + "strapi_collection_pas = \"pa\"\n", + "\n", + "pipe_dir = FileConventionHandler(pipe)\n", + "pipe_dir_mpaatlas = FileConventionHandler(\"mpaatlas\")\n", + "output_file_mpas = pipe_dir.get_processed_step_path(current_step).joinpath(\"pa_detail.csv\")\n", + "\n", + "# Download the protected atlas file && unzip it\n", + "download_and_unzip_if_needed(pipe_dir, prev_step, mysettings)\n", + "# Download the mpaatlas file \n", + "download_and_unzip_if_needed(pipe_dir_mpaatlas, prev_step, mysettings)" + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "metadata": {}, + "outputs": [], + "source": [ + "# Load the data\n", + "pa_intermediate = gpd.read_file(pipe_dir.get_step_fmt_file_path(prev_step, \"gpkg\")).pipe(\n", + " clean_geometries\n", + ")\n", + "mpaatlas_intermediate = gpd.read_file(\n", + " pipe_dir_mpaatlas.get_step_fmt_file_path(prev_step, \"shp\")\n", + ").pipe(clean_geometries)" + ] + }, + { + "cell_type": "code", + "execution_count": 101, + "metadata": {}, + "outputs": [ + { + "ename": "KeyboardInterrupt", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandas/core/indexes/base.py:3805\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 3804\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 3805\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_engine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcasted_key\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3806\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n", + "File \u001b[0;32mindex.pyx:167\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32mindex.pyx:196\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32mpandas/_libs/hashtable_class_helper.pxi:7081\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32mpandas/_libs/hashtable_class_helper.pxi:7089\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n", + "\u001b[0;31mKeyError\u001b[0m: 'environment'", + "\nThe above exception was the direct cause of the following exception:\n", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandas/core/series.py:1298\u001b[0m, in \u001b[0;36mSeries.__setitem__\u001b[0;34m(self, key, value)\u001b[0m\n\u001b[1;32m 1297\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1298\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_set_with_engine\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mwarn\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mwarn\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1299\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n\u001b[1;32m 1300\u001b[0m \u001b[38;5;66;03m# We have a scalar (or for MultiIndex or object-dtype, scalar-like)\u001b[39;00m\n\u001b[1;32m 1301\u001b[0m \u001b[38;5;66;03m# key that is not present in self.index.\u001b[39;00m\n", + "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandas/core/series.py:1370\u001b[0m, in \u001b[0;36mSeries._set_with_engine\u001b[0;34m(self, key, value, warn)\u001b[0m\n\u001b[1;32m 1369\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_set_with_engine\u001b[39m(\u001b[38;5;28mself\u001b[39m, key, value, warn: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 1370\u001b[0m loc \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mindex\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1372\u001b[0m \u001b[38;5;66;03m# this is equivalent to self._values[key] = value\u001b[39;00m\n", + "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandas/core/indexes/base.py:3812\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 3811\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m InvalidIndexError(key)\n\u001b[0;32m-> 3812\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01merr\u001b[39;00m\n\u001b[1;32m 3813\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n\u001b[1;32m 3814\u001b[0m \u001b[38;5;66;03m# If we have a listlike key, _check_indexing_error will raise\u001b[39;00m\n\u001b[1;32m 3815\u001b[0m \u001b[38;5;66;03m# InvalidIndexError. Otherwise we fall through and re-raise\u001b[39;00m\n\u001b[1;32m 3816\u001b[0m \u001b[38;5;66;03m# the TypeError.\u001b[39;00m\n", + "\u001b[0;31mKeyError\u001b[0m: 'environment'", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[92], line 48\u001b[0m\n\u001b[1;32m 1\u001b[0m init_table \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 2\u001b[0m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconcat\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[43m \u001b[49m\u001b[43m[\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m \u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[43m \u001b[49m\u001b[43mpa_intermediate\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpipe\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcolumns_to_lower\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 6\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpipe\u001b[49m\u001b[43m(\u001b[49m\u001b[43mseparate_parent_iso\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43miso_column\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mparent_iso\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 7\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpipe\u001b[49m\u001b[43m(\u001b[49m\u001b[43mchange_ata_to_abnj\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 8\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrename\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 9\u001b[0m \u001b[43m \u001b[49m\u001b[43mcolumns\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m{\u001b[49m\n\u001b[1;32m 10\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mparent_iso\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43miso\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 11\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstatus_yr\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43myear\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 12\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mgis_m_area\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43marea_km2_marine\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 13\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mgis_area\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43marea_km2_terrestrial\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 14\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\n\u001b[1;32m 15\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdrop\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcolumns\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mstatus\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 16\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43massign\u001b[49m\u001b[43m(\u001b[49m\u001b[43msource\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mprotected_planet\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 17\u001b[0m \u001b[43m \u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 18\u001b[0m \u001b[43m \u001b[49m\u001b[43mmpaatlas_intermediate\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpipe\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcalculate_area\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 19\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpipe\u001b[49m\u001b[43m(\u001b[49m\u001b[43mextract_wdpaid_mpaatlas\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 20\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpipe\u001b[49m\u001b[43m(\u001b[49m\u001b[43mseparate_parent_iso\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43miso_column\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mlocation_i\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 21\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrename\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 22\u001b[0m \u001b[43m \u001b[49m\u001b[43mcolumns\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m{\u001b[49m\n\u001b[1;32m 23\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mlocation_i\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43miso\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 24\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mwdpa_id\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mwdpa_pid\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 25\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mdesignatio\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mdesig_eng\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 26\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\n\u001b[1;32m 27\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 28\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43massign\u001b[49m\u001b[43m(\u001b[49m\u001b[43msource\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmpaatlas\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\n\u001b[1;32m 29\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mastype\u001b[49m\u001b[43m(\u001b[49m\u001b[43m{\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmpa_zone_i\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mInt64\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m}\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 30\u001b[0m \u001b[43m \u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 31\u001b[0m \u001b[43m \u001b[49m\u001b[43mignore_index\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 32\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 33\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mreset_index\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdrop\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 34\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mreplace\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 35\u001b[0m \u001b[43m \u001b[49m\u001b[43m{\u001b[49m\n\u001b[1;32m 36\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43miso\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43m{\u001b[49m\n\u001b[1;32m 37\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mCOK\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mNZL\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 38\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mIOT\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mGBR\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 39\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mNIU\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mNZL\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 40\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mSHN\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mGBR\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 41\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mSJM\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mNOR\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 42\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mUMI\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mUSA\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 43\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mNCL\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mFRA\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 44\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\n\u001b[1;32m 45\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\n\u001b[1;32m 46\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 47\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msort_values\u001b[49m\u001b[43m(\u001b[49m\u001b[43mby\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mwdpa_pid\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mwdpa_pid\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msource\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mascending\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m---> 48\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpipe\u001b[49m\u001b[43m(\u001b[49m\u001b[43mprocess_area_and_environment\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 49\u001b[0m )\n", + "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandas/core/generic.py:6231\u001b[0m, in \u001b[0;36mNDFrame.pipe\u001b[0;34m(self, func, *args, **kwargs)\u001b[0m\n\u001b[1;32m 6229\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m using_copy_on_write():\n\u001b[1;32m 6230\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m common\u001b[38;5;241m.\u001b[39mpipe(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcopy(deep\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m), func, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m-> 6231\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mcommon\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpipe\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandas/core/common.py:502\u001b[0m, in \u001b[0;36mpipe\u001b[0;34m(obj, func, *args, **kwargs)\u001b[0m\n\u001b[1;32m 500\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m func(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 501\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 502\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "Cell \u001b[0;32mIn[91], line 18\u001b[0m, in \u001b[0;36mprocess_area_and_environment\u001b[0;34m(gdf)\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m row[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmarine\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m0\u001b[39m\u001b[38;5;124m'\u001b[39m:\n\u001b[1;32m 17\u001b[0m row[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124marea_km2\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m row[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124marea_km2_terrestrial\u001b[39m\u001b[38;5;124m'\u001b[39m]\n\u001b[0;32m---> 18\u001b[0m \u001b[43mrow\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43menvironment\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mterrestrial\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 19\u001b[0m new_rows\u001b[38;5;241m.\u001b[39mappend(row)\n\u001b[1;32m 20\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m row[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmarine\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m2\u001b[39m\u001b[38;5;124m'\u001b[39m:\n", + "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandas/core/series.py:1322\u001b[0m, in \u001b[0;36mSeries.__setitem__\u001b[0;34m(self, key, value)\u001b[0m\n\u001b[1;32m 1319\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_set_values(key, value)\n\u001b[1;32m 1320\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1321\u001b[0m \u001b[38;5;66;03m# GH#12862 adding a new key to the Series\u001b[39;00m\n\u001b[0;32m-> 1322\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mloc\u001b[49m\u001b[43m[\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m]\u001b[49m \u001b[38;5;241m=\u001b[39m value\n\u001b[1;32m 1324\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (\u001b[38;5;167;01mTypeError\u001b[39;00m, \u001b[38;5;167;01mValueError\u001b[39;00m, LossySetitemError):\n\u001b[1;32m 1325\u001b[0m \u001b[38;5;66;03m# The key was OK, but we cannot set the value losslessly\u001b[39;00m\n\u001b[1;32m 1326\u001b[0m indexer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mindex\u001b[38;5;241m.\u001b[39mget_loc(key)\n", + "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandas/core/indexing.py:911\u001b[0m, in \u001b[0;36m_LocationIndexer.__setitem__\u001b[0;34m(self, key, value)\u001b[0m\n\u001b[1;32m 908\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_has_valid_setitem_indexer(key)\n\u001b[1;32m 910\u001b[0m iloc \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mname \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124miloc\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mobj\u001b[38;5;241m.\u001b[39miloc\n\u001b[0;32m--> 911\u001b[0m \u001b[43miloc\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_setitem_with_indexer\u001b[49m\u001b[43m(\u001b[49m\u001b[43mindexer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mname\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandas/core/indexing.py:1932\u001b[0m, in \u001b[0;36m_iLocIndexer._setitem_with_indexer\u001b[0;34m(self, indexer, value, name)\u001b[0m\n\u001b[1;32m 1929\u001b[0m indexer, missing \u001b[38;5;241m=\u001b[39m convert_missing_indexer(indexer)\n\u001b[1;32m 1931\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m missing:\n\u001b[0;32m-> 1932\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_setitem_with_indexer_missing\u001b[49m\u001b[43m(\u001b[49m\u001b[43mindexer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1933\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m\n\u001b[1;32m 1935\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m name \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mloc\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 1936\u001b[0m \u001b[38;5;66;03m# must come after setting of missing\u001b[39;00m\n", + "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandas/core/indexing.py:2238\u001b[0m, in \u001b[0;36m_iLocIndexer._setitem_with_indexer_missing\u001b[0;34m(self, indexer, value)\u001b[0m\n\u001b[1;32m 2231\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m warnings\u001b[38;5;241m.\u001b[39mcatch_warnings():\n\u001b[1;32m 2232\u001b[0m \u001b[38;5;66;03m# TODO: re-issue this with setitem-specific message?\u001b[39;00m\n\u001b[1;32m 2233\u001b[0m warnings\u001b[38;5;241m.\u001b[39mfilterwarnings(\n\u001b[1;32m 2234\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mignore\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 2235\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe behavior of Index.insert with object-dtype is deprecated\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 2236\u001b[0m category\u001b[38;5;241m=\u001b[39m\u001b[38;5;167;01mFutureWarning\u001b[39;00m,\n\u001b[1;32m 2237\u001b[0m )\n\u001b[0;32m-> 2238\u001b[0m new_index \u001b[38;5;241m=\u001b[39m \u001b[43mindex\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minsert\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mindex\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindexer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2240\u001b[0m \u001b[38;5;66;03m# we have a coerced indexer, e.g. a float\u001b[39;00m\n\u001b[1;32m 2241\u001b[0m \u001b[38;5;66;03m# that matches in an int64 Index, so\u001b[39;00m\n\u001b[1;32m 2242\u001b[0m \u001b[38;5;66;03m# we will not create a duplicate index, rather\u001b[39;00m\n\u001b[1;32m 2243\u001b[0m \u001b[38;5;66;03m# index to that element\u001b[39;00m\n\u001b[1;32m 2244\u001b[0m \u001b[38;5;66;03m# e.g. 0.0 -> 0\u001b[39;00m\n\u001b[1;32m 2245\u001b[0m \u001b[38;5;66;03m# GH#12246\u001b[39;00m\n\u001b[1;32m 2246\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m index\u001b[38;5;241m.\u001b[39mis_unique:\n\u001b[1;32m 2247\u001b[0m \u001b[38;5;66;03m# pass new_index[-1:] instead if [new_index[-1]]\u001b[39;00m\n\u001b[1;32m 2248\u001b[0m \u001b[38;5;66;03m# so that we retain dtype\u001b[39;00m\n", + "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandas/core/indexes/base.py:7012\u001b[0m, in \u001b[0;36mIndex.insert\u001b[0;34m(self, loc, item)\u001b[0m\n\u001b[1;32m 7009\u001b[0m loc \u001b[38;5;241m=\u001b[39m loc \u001b[38;5;28;01mif\u001b[39;00m loc \u001b[38;5;241m>\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m loc \u001b[38;5;241m-\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[1;32m 7010\u001b[0m new_values[loc] \u001b[38;5;241m=\u001b[39m item\n\u001b[0;32m-> 7012\u001b[0m out \u001b[38;5;241m=\u001b[39m Index\u001b[38;5;241m.\u001b[39m_with_infer(new_values, name\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mname\u001b[49m)\n\u001b[1;32m 7013\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[1;32m 7014\u001b[0m using_pyarrow_string_dtype()\n\u001b[1;32m 7015\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m is_string_dtype(out\u001b[38;5;241m.\u001b[39mdtype)\n\u001b[1;32m 7016\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m new_values\u001b[38;5;241m.\u001b[39mdtype \u001b[38;5;241m==\u001b[39m \u001b[38;5;28mobject\u001b[39m\n\u001b[1;32m 7017\u001b[0m ):\n\u001b[1;32m 7018\u001b[0m out \u001b[38;5;241m=\u001b[39m out\u001b[38;5;241m.\u001b[39mastype(new_values\u001b[38;5;241m.\u001b[39mdtype)\n", + "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandas/core/indexes/base.py:1671\u001b[0m, in \u001b[0;36mIndex.name\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1666\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m result\n\u001b[1;32m 1668\u001b[0m \u001b[38;5;66;03m# --------------------------------------------------------------------\u001b[39;00m\n\u001b[1;32m 1669\u001b[0m \u001b[38;5;66;03m# Name-Centric Methods\u001b[39;00m\n\u001b[0;32m-> 1671\u001b[0m \u001b[38;5;129m@property\u001b[39m\n\u001b[1;32m 1672\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mname\u001b[39m(\u001b[38;5;28mself\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Hashable:\n\u001b[1;32m 1673\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 1674\u001b[0m \u001b[38;5;124;03m Return Index or MultiIndex name.\u001b[39;00m\n\u001b[1;32m 1675\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1682\u001b[0m \u001b[38;5;124;03m 'x'\u001b[39;00m\n\u001b[1;32m 1683\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m 1684\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_name\n", + "\u001b[0;31mKeyboardInterrupt\u001b[0m: " + ] + } + ], + "source": [ + "init_table = (\n", + " pd.concat(\n", + " [\n", + " (\n", + " pa_intermediate.pipe(columns_to_lower)\n", + " .pipe(separate_parent_iso, iso_column=\"parent_iso\")\n", + " .pipe(change_ata_to_abnj)\n", + " .rename(\n", + " columns={\n", + " \"parent_iso\": \"iso\",\n", + " \"status_yr\": \"year\",\n", + " \"gis_m_area\": \"area_km2_marine\",\n", + " \"gis_area\": \"area_km2_terrestrial\",\n", + " }\n", + " )\n", + " .drop(columns=['status'])\n", + " .assign(source=\"protected_planet\")\n", + " ),\n", + " (\n", + " mpaatlas_intermediate.pipe(calculate_area)\n", + " .pipe(extract_wdpaid_mpaatlas)\n", + " .pipe(separate_parent_iso, iso_column=\"location_i\")\n", + " .rename(\n", + " columns={\n", + " \"location_i\": \"iso\",\n", + " \"wdpa_id\": \"wdpa_pid\",\n", + " \"designatio\": \"desig_eng\",\n", + " }\n", + " )\n", + " .assign(source=\"mpaatlas\")\n", + " .astype({\"mpa_zone_i\": \"Int64\"})\n", + " ),\n", + " ],\n", + " ignore_index=True,\n", + " )\n", + " .reset_index(drop=True)\n", + " .replace(\n", + " {\n", + " \"iso\": {\n", + " \"COK\": \"NZL\",\n", + " \"IOT\": \"GBR\",\n", + " \"NIU\": \"NZL\",\n", + " \"SHN\": \"GBR\",\n", + " \"SJM\": \"NOR\",\n", + " \"UMI\": \"USA\",\n", + " \"NCL\": \"FRA\",\n", + " }\n", + " }\n", + " )\n", + " .sort_values(by=[\"wdpa_pid\", \"source\"], ascending=[True, False])\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "metadata": {}, + "outputs": [], + "source": [ + "# to be run if things change a lot in the future\n", + "iucn_cat = pd.DataFrame(\n", + " {\"slug\": init_table.iucn_cat.dropna().unique(), \"name\": init_table.iucn_cat.dropna().unique()},\n", + " index=pd.Index(np.arange(1, len(init_table.iucn_cat.dropna().unique()) + 1)),\n", + ")\n", + "iucn_cat.to_csv(pipe_dir.get_processed_step_path(current_step).joinpath(\"iucn_categories.csv\"), index=True)\n", + "\n", + "iucn_cat = pd.read_csv(\n", + " pipe_dir.get_processed_step_path(current_step).joinpath(\"iucn_categories.csv\"), index_col=0\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "metadata": {}, + "outputs": [], + "source": [ + "def define_is_child(\n", + " gdf: pd.DataFrame | gpd.GeoDataFrame,\n", + " gby: str = \"wdpaid\",\n", + " env_col: str = \"environment\",\n", + " sort_by: dict[str, bool] = {\"wdpa_pid\": True, \"source\": False},\n", + " col_name: str = \"is_child\",\n", + ") -> pd.DataFrame | gpd.GeoDataFrame:\n", + " return gdf.assign(\n", + " **{\n", + " col_name: np.where(\n", + " gdf.index.isin(\n", + " gdf.sort_values(by=list(sort_by.keys()), ascending=list(sort_by.values()))\n", + " .groupby([gby, env_col]) # Group by wdpaid and environment\n", + " .nth(slice(1, None))\n", + " .index\n", + " ),\n", + " True,\n", + " False,\n", + " )\n", + " }\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "metadata": {}, + "outputs": [ + { + "ename": "KeyError", + "evalue": "\"Columns not found: 'data_source'\"", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[110], line 65\u001b[0m\n\u001b[1;32m 1\u001b[0m pa_table \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 2\u001b[0m \u001b[43minit_table\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpipe\u001b[49m\u001b[43m(\u001b[49m\u001b[43madd_bbox\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mbbox\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpipe\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdefine_is_child\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpipe\u001b[49m\u001b[43m(\u001b[49m\u001b[43mset_child_id\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msort_values\u001b[49m\u001b[43m(\u001b[49m\u001b[43mby\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mwdpaid\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mis_child\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mascending\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 6\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mreset_index\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdrop\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 7\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# .pipe(add_total_areas)\u001b[39;49;00m\n\u001b[1;32m 8\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# .pipe(calculate_coverage_percentage_pa)\u001b[39;49;00m\n\u001b[1;32m 9\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# .pipe(add_environment)\u001b[39;49;00m\n\u001b[1;32m 10\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# .pipe(\u001b[39;49;00m\n\u001b[1;32m 11\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# output,\u001b[39;49;00m\n\u001b[1;32m 12\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# iso_column=\"iso\",\u001b[39;49;00m\n\u001b[1;32m 13\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# rep_d={\u001b[39;49;00m\n\u001b[1;32m 14\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"status\": {\u001b[39;49;00m\n\u001b[1;32m 15\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"Adopted\": 4,\u001b[39;49;00m\n\u001b[1;32m 16\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"implemented\": 6,\u001b[39;49;00m\n\u001b[1;32m 17\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"Established\": 6,\u001b[39;49;00m\n\u001b[1;32m 18\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"Designated\": 5,\u001b[39;49;00m\n\u001b[1;32m 19\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"Proposed\": 3,\u001b[39;49;00m\n\u001b[1;32m 20\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"Inscribed\": 3,\u001b[39;49;00m\n\u001b[1;32m 21\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"unknown\": 1,\u001b[39;49;00m\n\u001b[1;32m 22\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# },\u001b[39;49;00m\n\u001b[1;32m 23\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"pa_def\": {\"0\": 2, \"1\": 1},\u001b[39;49;00m\n\u001b[1;32m 24\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"year\": {0: pd.NA},\u001b[39;49;00m\n\u001b[1;32m 25\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"iucn_cat\": dict(\u001b[39;49;00m\n\u001b[1;32m 26\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# iucn_cat[[\"slug\"]]\u001b[39;49;00m\n\u001b[1;32m 27\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# .reset_index(drop=False)\u001b[39;49;00m\n\u001b[1;32m 28\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# .iloc[:, [1, 0]]\u001b[39;49;00m\n\u001b[1;32m 29\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# .to_dict(orient=\"tight\")[\"data\"]\u001b[39;49;00m\n\u001b[1;32m 30\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# ),\u001b[39;49;00m\n\u001b[1;32m 31\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"source\": {\"protected_planet\": 3, \"mpaatlas\": 1},\u001b[39;49;00m\n\u001b[1;32m 32\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"protection\": {\u001b[39;49;00m\n\u001b[1;32m 33\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"full\": 3,\u001b[39;49;00m\n\u001b[1;32m 34\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"light\": 4,\u001b[39;49;00m\n\u001b[1;32m 35\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"incompatible\": 5,\u001b[39;49;00m\n\u001b[1;32m 36\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"high\": 6,\u001b[39;49;00m\n\u001b[1;32m 37\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"minimal\": 7,\u001b[39;49;00m\n\u001b[1;32m 38\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"unknown\": 8,\u001b[39;49;00m\n\u001b[1;32m 39\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"unknown/to be determined\": 8,\u001b[39;49;00m\n\u001b[1;32m 40\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# },\u001b[39;49;00m\n\u001b[1;32m 41\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"establishm\": {\u001b[39;49;00m\n\u001b[1;32m 42\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"actively managed\": 4,\u001b[39;49;00m\n\u001b[1;32m 43\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"implemented\": 6,\u001b[39;49;00m\n\u001b[1;32m 44\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"designated\": 5,\u001b[39;49;00m\n\u001b[1;32m 45\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"Designated\": 5,\u001b[39;49;00m\n\u001b[1;32m 46\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"proposed or committed\": 3,\u001b[39;49;00m\n\u001b[1;32m 47\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"Proposed\": 3,\u001b[39;49;00m\n\u001b[1;32m 48\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"Inscribed\": 3,\u001b[39;49;00m\n\u001b[1;32m 49\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"Established\": 5,\u001b[39;49;00m\n\u001b[1;32m 50\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"Adopted\": 5,\u001b[39;49;00m\n\u001b[1;32m 51\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"unknown\": 1,\u001b[39;49;00m\n\u001b[1;32m 52\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# },\u001b[39;49;00m\n\u001b[1;32m 53\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# },\u001b[39;49;00m\n\u001b[1;32m 54\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# rename={\u001b[39;49;00m\n\u001b[1;32m 55\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"pa_def\": \"protection_status\",\u001b[39;49;00m\n\u001b[1;32m 56\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"area_km2\": \"area\",\u001b[39;49;00m\n\u001b[1;32m 57\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"iucn_cat\": \"pa_iucn_category\",\u001b[39;49;00m\n\u001b[1;32m 58\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"desig_eng\": \"designation\",\u001b[39;49;00m\n\u001b[1;32m 59\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"protection\": \"mpaa_protection_level\",\u001b[39;49;00m\n\u001b[1;32m 60\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"establishm\": \"mpaa_establishment_stage\",\u001b[39;49;00m\n\u001b[1;32m 61\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"source\": \"data_source\",\u001b[39;49;00m\n\u001b[1;32m 62\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# },\u001b[39;49;00m\n\u001b[1;32m 63\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# drop_cols=[\"geometry\", \"protecti_1\",\"mpa_zone_i\", \"iso\", \"total_marine_area\"]\u001b[39;49;00m\n\u001b[1;32m 64\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# )\u001b[39;49;00m\n\u001b[0;32m---> 65\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpipe\u001b[49m\u001b[43m(\u001b[49m\u001b[43madd_child_parent_relationship\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 66\u001b[0m \u001b[38;5;66;03m# .astype(\u001b[39;00m\n\u001b[1;32m 67\u001b[0m \u001b[38;5;66;03m# {\u001b[39;00m\n\u001b[1;32m 68\u001b[0m \u001b[38;5;66;03m# \"year\": \"Int32\",\u001b[39;00m\n\u001b[1;32m 69\u001b[0m \u001b[38;5;66;03m# \"pa_iucn_category\": \"Int64\",\u001b[39;00m\n\u001b[1;32m 70\u001b[0m \u001b[38;5;66;03m# \"protection_status\": \"Int64\",\u001b[39;00m\n\u001b[1;32m 71\u001b[0m \u001b[38;5;66;03m# }\u001b[39;00m\n\u001b[1;32m 72\u001b[0m \u001b[38;5;66;03m# )\u001b[39;00m\n\u001b[1;32m 73\u001b[0m \u001b[38;5;66;03m# .query(\"coverage <= 100\") \u001b[39;00m\n\u001b[1;32m 74\u001b[0m \u001b[38;5;66;03m# .sort_index()\u001b[39;00m\n\u001b[1;32m 75\u001b[0m )\n\u001b[1;32m 76\u001b[0m pa_table\u001b[38;5;241m.\u001b[39mhead(\u001b[38;5;241m5\u001b[39m)\n", + "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandas/core/generic.py:6231\u001b[0m, in \u001b[0;36mNDFrame.pipe\u001b[0;34m(self, func, *args, **kwargs)\u001b[0m\n\u001b[1;32m 6229\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m using_copy_on_write():\n\u001b[1;32m 6230\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m common\u001b[38;5;241m.\u001b[39mpipe(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcopy(deep\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m), func, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m-> 6231\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mcommon\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpipe\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandas/core/common.py:502\u001b[0m, in \u001b[0;36mpipe\u001b[0;34m(obj, func, *args, **kwargs)\u001b[0m\n\u001b[1;32m 500\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m func(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 501\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 502\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/dev/skytruth-30x30/data/src/pipelines/processors.py:696\u001b[0m, in \u001b[0;36madd_child_parent_relationship\u001b[0;34m(df, gby, cols)\u001b[0m\n\u001b[1;32m 691\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21madd_child_parent_relationship\u001b[39m(\n\u001b[1;32m 692\u001b[0m df: pd\u001b[38;5;241m.\u001b[39mDataFrame \u001b[38;5;241m|\u001b[39m gpd\u001b[38;5;241m.\u001b[39mGeoDataFrame,\n\u001b[1;32m 693\u001b[0m gby: \u001b[38;5;28mstr\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mwdpaid\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 694\u001b[0m cols: \u001b[38;5;28mlist\u001b[39m \u001b[38;5;241m=\u001b[39m [\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mwdpaid\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mwdpa_pid\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mis_child\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdata_source\u001b[39m\u001b[38;5;124m\"\u001b[39m],\n\u001b[1;32m 695\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m pd\u001b[38;5;241m.\u001b[39mDataFrame \u001b[38;5;241m|\u001b[39m gpd\u001b[38;5;241m.\u001b[39mGeoDataFrame:\n\u001b[0;32m--> 696\u001b[0m groups: pd\u001b[38;5;241m.\u001b[39mSeries \u001b[38;5;241m=\u001b[39m \u001b[43mdf\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgroupby\u001b[49m\u001b[43m(\u001b[49m\u001b[43mgby\u001b[49m\u001b[43m)\u001b[49m\u001b[43m[\u001b[49m\u001b[43mcols\u001b[49m\u001b[43m]\u001b[49m\u001b[38;5;241m.\u001b[39mapply(define_childs_ids)\n\u001b[1;32m 697\u001b[0m df[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mchildren\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 698\u001b[0m pd\u001b[38;5;241m.\u001b[39mDataFrame([[a, b] \u001b[38;5;28;01mfor\u001b[39;00m a, b \u001b[38;5;129;01min\u001b[39;00m groups\u001b[38;5;241m.\u001b[39mvalues], columns\u001b[38;5;241m=\u001b[39m[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mparent\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mchildren\u001b[39m\u001b[38;5;124m\"\u001b[39m])\n\u001b[1;32m 699\u001b[0m \u001b[38;5;241m.\u001b[39mdropna(subset\u001b[38;5;241m=\u001b[39m[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mparent\u001b[39m\u001b[38;5;124m\"\u001b[39m])\n\u001b[1;32m 700\u001b[0m \u001b[38;5;241m.\u001b[39mset_index(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mparent\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 701\u001b[0m )\n\u001b[1;32m 703\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m df\n", + "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandas/core/groupby/generic.py:1951\u001b[0m, in \u001b[0;36mDataFrameGroupBy.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 1944\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(key, \u001b[38;5;28mtuple\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(key) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[1;32m 1945\u001b[0m \u001b[38;5;66;03m# if len == 1, then it becomes a SeriesGroupBy and this is actually\u001b[39;00m\n\u001b[1;32m 1946\u001b[0m \u001b[38;5;66;03m# valid syntax, so don't raise\u001b[39;00m\n\u001b[1;32m 1947\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 1948\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCannot subset columns with a tuple with more than one element. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1949\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUse a list instead.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1950\u001b[0m )\n\u001b[0;32m-> 1951\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__getitem__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandas/core/base.py:239\u001b[0m, in \u001b[0;36mSelectionMixin.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 237\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mobj\u001b[38;5;241m.\u001b[39mcolumns\u001b[38;5;241m.\u001b[39mintersection(key)) \u001b[38;5;241m!=\u001b[39m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mset\u001b[39m(key)):\n\u001b[1;32m 238\u001b[0m bad_keys \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(\u001b[38;5;28mset\u001b[39m(key)\u001b[38;5;241m.\u001b[39mdifference(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mobj\u001b[38;5;241m.\u001b[39mcolumns))\n\u001b[0;32m--> 239\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mColumns not found: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mstr\u001b[39m(bad_keys)[\u001b[38;5;241m1\u001b[39m:\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 240\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_gotitem(\u001b[38;5;28mlist\u001b[39m(key), ndim\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m2\u001b[39m)\n\u001b[1;32m 242\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", + "\u001b[0;31mKeyError\u001b[0m: \"Columns not found: 'data_source'\"" + ] + } + ], + "source": [ + "pa_table = (\n", + " init_table.pipe(add_bbox, \"bbox\")\n", + " .pipe(define_is_child)\n", + " .pipe(set_child_id)\n", + " .sort_values(by=[\"wdpaid\", \"is_child\"], ascending=[True, True])\n", + " .reset_index(drop=True)\n", + " # .pipe(add_total_areas)\n", + " # .pipe(calculate_coverage_percentage_pa)\n", + " # .pipe(add_environment)\n", + " # .pipe(\n", + " # output,\n", + " # iso_column=\"iso\",\n", + " # rep_d={\n", + " # \"status\": {\n", + " # \"Adopted\": 4,\n", + " # \"implemented\": 6,\n", + " # \"Established\": 6,\n", + " # \"Designated\": 5,\n", + " # \"Proposed\": 3,\n", + " # \"Inscribed\": 3,\n", + " # \"unknown\": 1,\n", + " # },\n", + " # \"pa_def\": {\"0\": 2, \"1\": 1},\n", + " # \"year\": {0: pd.NA},\n", + " # \"iucn_cat\": dict(\n", + " # iucn_cat[[\"slug\"]]\n", + " # .reset_index(drop=False)\n", + " # .iloc[:, [1, 0]]\n", + " # .to_dict(orient=\"tight\")[\"data\"]\n", + " # ),\n", + " # \"source\": {\"protected_planet\": 3, \"mpaatlas\": 1},\n", + " # \"protection\": {\n", + " # \"full\": 3,\n", + " # \"light\": 4,\n", + " # \"incompatible\": 5,\n", + " # \"high\": 6,\n", + " # \"minimal\": 7,\n", + " # \"unknown\": 8,\n", + " # \"unknown/to be determined\": 8,\n", + " # },\n", + " # \"establishm\": {\n", + " # \"actively managed\": 4,\n", + " # \"implemented\": 6,\n", + " # \"designated\": 5,\n", + " # \"Designated\": 5,\n", + " # \"proposed or committed\": 3,\n", + " # \"Proposed\": 3,\n", + " # \"Inscribed\": 3,\n", + " # \"Established\": 5,\n", + " # \"Adopted\": 5,\n", + " # \"unknown\": 1,\n", + " # },\n", + " # },\n", + " # rename={\n", + " # \"pa_def\": \"protection_status\",\n", + " # \"area_km2\": \"area\",\n", + " # \"iucn_cat\": \"pa_iucn_category\",\n", + " # \"desig_eng\": \"designation\",\n", + " # \"protection\": \"mpaa_protection_level\",\n", + " # \"establishm\": \"mpaa_establishment_stage\",\n", + " # \"source\": \"data_source\",\n", + " # },\n", + " # drop_cols=[\"geometry\", \"protecti_1\",\"mpa_zone_i\", \"iso\", \"total_marine_area\"]\n", + " # )\n", + " .pipe(add_child_parent_relationship)\n", + " # .astype(\n", + " # {\n", + " # \"year\": \"Int32\",\n", + " # \"pa_iucn_category\": \"Int64\",\n", + " # \"protection_status\": \"Int64\",\n", + " # }\n", + " # )\n", + " # .query(\"coverage <= 100\") \n", + " # .sort_index()\n", + ")\n", + "pa_table.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 106, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
wdpaidwdpa_pidpa_defnamedesig_engiucn_catmarineyearisogeometrysourcempa_zone_iestablishmprotectionprotecti_1area_km2environmentbboxis_childchild_id
19056100672100672_A1Ivvavik National Park Of CanadaNational ParkII01984.0CANMULTIPOLYGON (((-140.83302 69.63132, -140.8350...protected_planet<NA>NaNNaNNaN9695.837607terrestrial[-141.000000001, 68.556807999, -138.1338199979...False100672_A
19057100672100672_B1Ivvavik National Park Of CanadaNational ParkII11984.0CANMULTIPOLYGON (((-139.78657 69.59821, -139.7872...protected_planet<NA>NaNNaNNaN79.375056terrestrial[-140.894068268, 69.19278843000001, -138.37542...True100672_B
19058100672100672_B1Ivvavik National Park Of CanadaNational ParkII11984.0CANMULTIPOLYGON (((-139.78657 69.59821, -139.7872...protected_planet<NA>NaNNaNNaN52.170080marine[-140.894068268, 69.19278843000001, -138.37542...True100672_B
\n", + "
" + ], + "text/plain": [ + " wdpaid wdpa_pid pa_def name \\\n", + "19056 100672 100672_A 1 Ivvavik National Park Of Canada \n", + "19057 100672 100672_B 1 Ivvavik National Park Of Canada \n", + "19058 100672 100672_B 1 Ivvavik National Park Of Canada \n", + "\n", + " desig_eng iucn_cat marine year iso \\\n", + "19056 National Park II 0 1984.0 CAN \n", + "19057 National Park II 1 1984.0 CAN \n", + "19058 National Park II 1 1984.0 CAN \n", + "\n", + " geometry source \\\n", + "19056 MULTIPOLYGON (((-140.83302 69.63132, -140.8350... protected_planet \n", + "19057 MULTIPOLYGON (((-139.78657 69.59821, -139.7872... protected_planet \n", + "19058 MULTIPOLYGON (((-139.78657 69.59821, -139.7872... protected_planet \n", + "\n", + " mpa_zone_i establishm protection protecti_1 area_km2 \\\n", + "19056 NaN NaN NaN 9695.837607 \n", + "19057 NaN NaN NaN 79.375056 \n", + "19058 NaN NaN NaN 52.170080 \n", + "\n", + " environment bbox \\\n", + "19056 terrestrial [-141.000000001, 68.556807999, -138.1338199979... \n", + "19057 terrestrial [-140.894068268, 69.19278843000001, -138.37542... \n", + "19058 marine [-140.894068268, 69.19278843000001, -138.37542... \n", + "\n", + " is_child child_id \n", + "19056 False 100672_A \n", + "19057 True 100672_B \n", + "19058 True 100672_B " + ] + }, + "execution_count": 106, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pa_table[pa_table['name'] == 'Ivvavik National Park Of Canada']" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
wdpaidwdpa_pidprotection_statusnamedesignationpa_iucn_categoryyearareadata_sourcempaa_establishment_stagempaa_protection_levelbboxis_childchild_idcoverageenvironmentlocationchildren
id
2137100672100672_A1Ivvavik National Park Of CanadaNational Park2198439.2018113NaNNaN[-141.000000001, 68.556807999, -138.1338199979...False100672_A0.000680marine29.0[2138]
2138100672100672_B1Ivvavik National Park Of CanadaNational Park2198452.1700803NaNNaN[-140.894068268, 69.19278843000001, -138.37542...True100672_B0.000905marine29.0NaN
\n", + "
" + ], + "text/plain": [ + " wdpaid wdpa_pid protection_status name \\\n", + "id \n", + "2137 100672 100672_A 1 Ivvavik National Park Of Canada \n", + "2138 100672 100672_B 1 Ivvavik National Park Of Canada \n", + "\n", + " designation pa_iucn_category year area data_source \\\n", + "id \n", + "2137 National Park 2 1984 39.201811 3 \n", + "2138 National Park 2 1984 52.170080 3 \n", + "\n", + " mpaa_establishment_stage mpaa_protection_level \\\n", + "id \n", + "2137 NaN NaN \n", + "2138 NaN NaN \n", + "\n", + " bbox is_child child_id \\\n", + "id \n", + "2137 [-141.000000001, 68.556807999, -138.1338199979... False 100672_A \n", + "2138 [-140.894068268, 69.19278843000001, -138.37542... True 100672_B \n", + "\n", + " coverage environment location children \n", + "id \n", + "2137 0.000680 marine 29.0 [2138] \n", + "2138 0.000905 marine 29.0 NaN " + ] + }, + "execution_count": 107, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mpa_table[mpa_table['name'] == 'Ivvavik National Park Of Canada']" + ] + }, + { + "cell_type": "code", + "execution_count": 109, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
wdpaidwdpa_pidprotection_statusnamedesignationpa_iucn_categoryareayeardata_sourcebboxis_childchild_idcoverageenvironmentlocationchildren
id
17036100672100672_A1Ivvavik National Park Of CanadaNational Park29695.83760719843[-141.000000001, 68.556807999, -138.1338199979...False100672_A0.097898terrestrial29.0[17037]
17037100672100672_B1Ivvavik National Park Of CanadaNational Park279.37505619843[-140.894068268, 69.19278843000001, -138.37542...True100672_B0.000801terrestrial29.0NaN
\n", + "
" + ], + "text/plain": [ + " wdpaid wdpa_pid protection_status name \\\n", + "id \n", + "17036 100672 100672_A 1 Ivvavik National Park Of Canada \n", + "17037 100672 100672_B 1 Ivvavik National Park Of Canada \n", + "\n", + " designation pa_iucn_category area year data_source \\\n", + "id \n", + "17036 National Park 2 9695.837607 1984 3 \n", + "17037 National Park 2 79.375056 1984 3 \n", + "\n", + " bbox is_child child_id \\\n", + "id \n", + "17036 [-141.000000001, 68.556807999, -138.1338199979... False 100672_A \n", + "17037 [-140.894068268, 69.19278843000001, -138.37542... True 100672_B \n", + "\n", + " coverage environment location children \n", + "id \n", + "17036 0.097898 terrestrial 29.0 [17037] \n", + "17037 0.000801 terrestrial 29.0 NaN " + ] + }, + "execution_count": 109, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tpa_table[tpa_table['name'] == 'Ivvavik National Park Of Canada']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "PAsSchema(mpa_table[mpa_table.location.notna()]).to_csv(output_file_mpas, index=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# todo investigate the issue with area as null" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# batch_export(\n", + "# mpa_table[mpa_table.area.notna()],\n", + "# 5000,\n", + "# PAsSchema,\n", + "# pipe_dir.get_processed_step_path(current_step),\n", + "# \"mpa_detail\",\n", + "# format=\"json\",\n", + "# strapi_colection=strapi_collection_mpas,\n", + "# )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# # This code is to be able to identify groups that has wdpa_pid so in the future if needed we could combine the group geometries to generate a wdpa coverage geometry\n", + "# init_table[\n", + "# (\n", + "# init_table.sort_values(by=[\"wdpaid\", \"source\"], ascending=[True, False])\n", + "# .groupby(\"wdpaid\")\n", + "# .transform(\"size\")\n", + "# .gt(1)\n", + "# )\n", + "# & (init_table.wdpa_pid.str.extract(r\"([A-Za-z]+)\", expand=False).notna())\n", + "# ].groupby(\"wdpaid\")\n", + "# .geometry.apply(lambda x: x.union_all())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### upload data to strapi" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# strapi.deleteCollectionData(\"mpa\", list(range(1, 20914)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# for i in range(0, 4):\n", + "# strapi.importCollectionData(\n", + "# strapi_collection_mpas,\n", + "# mpa_folder.joinpath(f\"mpa_detail_{i}.csv\"),\n", + "# )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Country mpas detail table data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " 1- lower case the columns \n", + "2- separate location that its regime is in dispute or on join regime \n", + "3- calcualte area for mpaatlas data \n", + "4- rename columns for merge \n", + "5- merge maaatlas and mpa data identifying the source \n", + "6- identify child resources and set them as childs \n", + "7- calculate bbox \n", + "8- set child resources \n", + "9- prepare output for batch export \n", + "10- upload data to strapi " + ] + }, + { + "cell_type": "code", + "execution_count": 256, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/home/sofia/dev/skytruth-30x30/data/data/mpa/processed/mpa_preprocess.zip\n", + "/home/sofia/dev/skytruth-30x30/data/data/mpa/processed/preprocess\n", + "/home/sofia/dev/skytruth-30x30/data/data/mpaatlas/processed/mpaatlas_preprocess.zip\n", + "/home/sofia/dev/skytruth-30x30/data/data/mpaatlas/processed/preprocess\n" + ] + }, + { + "data": { + "text/plain": [ + "PosixPath('/home/sofia/dev/skytruth-30x30/data/data/mpaatlas/processed/preprocess')" + ] + }, + "execution_count": 256, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pipe = \"mpa\"\n", + "strapi_collection_mpas = \"mpa\"\n", + "\n", + "pipe_dir = FileConventionHandler(pipe)\n", + "pipe_dir_mpaatlas = FileConventionHandler(\"mpaatlas\")\n", + "output_file_mpas = pipe_dir.get_processed_step_path(current_step).joinpath(\"mpa_detail.csv\")\n", + "\n", + "# Download the protected atlas file && unzip it\n", + "download_and_unzip_if_needed(pipe_dir, prev_step, mysettings)\n", + "# Download the mpaatlas file \n", + "download_and_unzip_if_needed(pipe_dir_mpaatlas, prev_step, mysettings)" + ] + }, + { + "cell_type": "code", + "execution_count": 257, + "metadata": {}, + "outputs": [], + "source": [ + "# Load the data\n", + "mpa_intermediate = gpd.read_file(pipe_dir.get_step_fmt_file_path(prev_step, \"shp\")).pipe(\n", + " clean_geometries\n", + ")\n", + "mpaatlas_intermediate = gpd.read_file(\n", + " pipe_dir_mpaatlas.get_step_fmt_file_path(prev_step, \"shp\")\n", + ").pipe(clean_geometries)" + ] + }, + { + "cell_type": "code", + "execution_count": 258, + "metadata": {}, + "outputs": [], + "source": [ + "init_table = (\n", + " pd.concat(\n", + " [\n", + " (\n", + " mpa_intermediate.pipe(columns_to_lower)\n", + " .pipe(separate_parent_iso, iso_column=\"parent_iso\")\n", + " .pipe(change_ata_to_abnj)\n", + " .rename(\n", + " columns={\n", + " \"parent_iso\": \"iso\",\n", + " \"status_yr\": \"year\",\n", + " \"gis_m_area\": \"area_km2\",\n", + " }\n", + " ).drop(columns=['status'])\n", + " ).assign(source=\"protected_planet\"),\n", + " (\n", + " mpaatlas_intermediate.pipe(calculate_area)\n", + " .pipe(extract_wdpaid_mpaatlas)\n", + " .pipe(separate_parent_iso, iso_column=\"location_i\")\n", + " .rename(\n", + " columns={\n", + " \"location_i\": \"iso\",\n", + " \"wdpa_id\": \"wdpa_pid\",\n", + " \"designatio\": \"desig_eng\",\n", + " }\n", + " )\n", + " ).assign(source=\"mpaatlas\"\n", + " ).astype({\"mpa_zone_i\": \"Int64\"}),\n", + " ],\n", + " ignore_index=True,\n", + " )\n", + " .reset_index(drop=True)\n", + " .replace(\n", + " {\n", + " \"iso\": {\n", + " \"COK\": \"NZL\",\n", + " \"IOT\": \"GBR\",\n", + " \"NIU\": \"NZL\",\n", + " \"SHN\": \"GBR\",\n", + " \"SJM\": \"NOR\",\n", + " \"UMI\": \"USA\",\n", + " \"NCL\": \"FRA\",\n", + " }\n", + " }\n", + " )\n", + " .sort_values(by=[\"wdpa_pid\", \"wdpa_pid\", \"source\"], ascending=[True, True, False])\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "metadata": {}, + "outputs": [], + "source": [ + "# to be run if things change a lot in the future\n", + "# iucn_cat = pd.DataFrame(\n", + "# {\"slug\": init_table.iucn_cat.dropna().unique(), \"name\": init_table.iucn_cat.dropna().unique()},\n", + "# index=pd.Index(np.arange(1, len(init_table.iucn_cat.dropna().unique()) + 1)),\n", + "# )\n", + "# iucn_cat.to_csv(pipe_dir.get_processed_step_path(current_step).joinpath(\"iucn_categories.csv\"), index=True)\n", + "\n", + "iucn_cat = pd.read_csv(\n", + " pipe_dir.get_processed_step_path(current_step).joinpath(\"iucn_categories.csv\"), index_col=0\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 259, + "metadata": {}, + "outputs": [], + "source": [ + "mpa_table = (\n", + " init_table.pipe(add_bbox, \"bbox\")\n", + " .pipe(define_is_child)\n", + " .pipe(set_child_id)\n", + " .sort_values(by=[\"wdpaid\", \"is_child\"], ascending=[True, True])\n", + " .reset_index(drop=True)\n", + " .pipe(add_total_marine_area)\n", + " .pipe(calculate_coverage_percentage_pa)\n", + " .pipe(add_environment)\n", + " .pipe(\n", + " output,\n", + " iso_column=\"iso\",\n", + " rep_d={\n", + " \"status\": {\n", + " \"Adopted\": 4,\n", + " \"implemented\": 6,\n", + " \"Established\": 6,\n", + " \"Designated\": 5,\n", + " \"Proposed\": 3,\n", + " \"Inscribed\": 3,\n", + " \"unknown\": 1,\n", + " },\n", + " \"pa_def\": {\"0\": 2, \"1\": 1},\n", + " \"year\": {0: pd.NA},\n", + " \"iucn_cat\": dict(\n", + " iucn_cat[[\"slug\"]]\n", + " .reset_index(drop=False)\n", + " .iloc[:, [1, 0]]\n", + " .to_dict(orient=\"tight\")[\"data\"]\n", + " ),\n", + " \"source\": {\"protected_planet\": 3, \"mpaatlas\": 1},\n", + " \"protection\": {\n", + " \"full\": 3,\n", + " \"light\": 4,\n", + " \"incompatible\": 5,\n", + " \"high\": 6,\n", + " \"minimal\": 7,\n", + " \"unknown\": 8,\n", + " \"unknown/to be determined\": 8,\n", + " },\n", + " \"establishm\": {\n", + " \"actively managed\": 4,\n", + " \"implemented\": 6,\n", + " \"designated\": 5,\n", + " \"Designated\": 5,\n", + " \"proposed or committed\": 3,\n", + " \"Proposed\": 3,\n", + " \"Inscribed\": 3,\n", + " \"Established\": 5,\n", + " \"Adopted\": 5,\n", + " \"unknown\": 1,\n", + " },\n", + " },\n", + " rename={\n", + " \"pa_def\": \"protection_status\",\n", + " \"area_km2\": \"area\",\n", + " \"iucn_cat\": \"pa_iucn_category\",\n", + " \"desig_eng\": \"designation\",\n", + " \"protection\": \"mpaa_protection_level\",\n", + " \"establishm\": \"mpaa_establishment_stage\",\n", + " \"source\": \"data_source\",\n", + " },\n", + " drop_cols=[\"geometry\", \"protecti_1\",\"mpa_zone_i\", \"iso\", \"total_marine_area\"]\n", + " )\n", + " .pipe(add_child_parent_relationship)\n", + " .astype(\n", + " {\n", + " \"year\": \"Int32\",\n", + " \"pa_iucn_category\": \"Int64\",\n", + " \"protection_status\": \"Int64\",\n", + " }\n", + " )\n", + " .query(\"coverage <= 100\") \n", + " .sort_index()\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 260, + "metadata": {}, + "outputs": [], + "source": [ + "# Validate and save\n", + "PAsSchema(mpa_table[mpa_table.location.notna()]).to_csv(output_file_mpas, index=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# todo investigate the issue with area as null" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# batch_export(\n", + "# mpa_table[mpa_table.area.notna()],\n", + "# 5000,\n", + "# PAsSchema,\n", + "# pipe_dir.get_processed_step_path(current_step),\n", + "# \"mpa_detail\",\n", + "# format=\"json\",\n", + "# strapi_colection=strapi_collection_mpas,\n", + "# )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# # This code is to be able to identify groups that has wdpa_pid so in the future if needed we could combine the group geometries to generate a wdpa coverage geometry\n", + "# init_table[\n", + "# (\n", + "# init_table.sort_values(by=[\"wdpaid\", \"source\"], ascending=[True, False])\n", + "# .groupby(\"wdpaid\")\n", + "# .transform(\"size\")\n", + "# .gt(1)\n", + "# )\n", + "# & (init_table.wdpa_pid.str.extract(r\"([A-Za-z]+)\", expand=False).notna())\n", + "# ].groupby(\"wdpaid\")\n", + "# .geometry.apply(lambda x: x.union_all())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### upload data to strapi" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# strapi.deleteCollectionData(\"mpa\", list(range(1, 20914)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# for i in range(0, 4):\n", + "# strapi.importCollectionData(\n", + "# strapi_collection_mpas,\n", + "# mpa_folder.joinpath(f\"mpa_detail_{i}.csv\"),\n", + "# )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Terrestrial pas - detail table data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "1- lower case the columns \n", + "2- separate location that its regime is in dispute or on join regime \n", + "3- remove ATA and ABNJ because Protected planet doesn't include stats for ATA and ABNJ is marine \n", + "4- rename columns for merge \n", + "5- identify child resources and set them as childs \n", + "6- calculate bbox \n", + "7- set child resources \n", + "8- prepare output for batch export \n", + "9- upload data to strapi " + ] + }, + { + "cell_type": "code", + "execution_count": 261, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/mpa-terrestrial_preprocess.zip\n", + "/home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/preprocess\n", + "/home/sofia/dev/skytruth-30x30/data/data/gadm/processed/gadm_preprocess.zip\n", + "/home/sofia/dev/skytruth-30x30/data/data/gadm/processed/preprocess\n" + ] + }, + { + "data": { + "text/plain": [ + "PosixPath('/home/sofia/dev/skytruth-30x30/data/data/gadm/processed/preprocess')" + ] + }, + "execution_count": 261, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pipe = \"mpa-terrestrial\"\n", + "strapi_collection_mpas = \"mpa-terrestrial\"\n", + "\n", + "pipe_dir = FileConventionHandler(pipe)\n", + "pipe_dir_gadm = FileConventionHandler(\"gadm\")\n", + "output_file_tpas = pipe_dir.get_processed_step_path(current_step).joinpath(\"tpa_detail.csv\")\n", + "\n", + "# Download the protected atlas file && unzip it\n", + "download_and_unzip_if_needed(pipe_dir, prev_step, mysettings)\n", + "# Download the mpaatlas file \n", + "download_and_unzip_if_needed(pipe_dir_gadm, prev_step, mysettings)" + ] + }, + { + "cell_type": "code", + "execution_count": 262, + "metadata": {}, + "outputs": [], + "source": [ + "tpa_intermediate = gpd.read_file(pipe_dir.get_step_fmt_file_path(prev_step, \"gpkg\")).pipe(\n", + " clean_geometries\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 263, + "metadata": {}, + "outputs": [], + "source": [ + "init_table = (\n", + " pd.concat(\n", + " [\n", + " (\n", + " tpa_intermediate.pipe(columns_to_lower)\n", + " .pipe(separate_parent_iso, iso_column=\"parent_iso\")\n", + " .query(\"parent_iso != 'ATA' and parent_iso != 'ABNJ'\")\n", + " .rename(\n", + " columns={\n", + " \"parent_iso\": \"iso\",\n", + " \"status_yr\": \"year\",\n", + " \"gis_area\": \"area_km2\",\n", + " }\n", + " ).drop(columns=['status'])\n", + " ).assign(source=\"protected_planet\"),\n", + " ],\n", + " ignore_index=True,\n", + " )\n", + " .reset_index(drop=True)\n", + " .replace(\n", + " {\n", + " \"iso\": {\n", + " \"COK\": \"NZL\",\n", + " \"IOT\": \"GBR\",\n", + " \"NIU\": \"NZL\",\n", + " \"SHN\": \"GBR\",\n", + " \"SJM\": \"NOR\",\n", + " \"UMI\": \"USA\",\n", + " \"NCL\": \"FRA\",\n", + " }\n", + " }\n", + " )\n", + " .sort_values(by=[\"wdpa_pid\", \"wdpa_pid\", \"source\"], ascending=[True, True, False])\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 264, + "metadata": {}, + "outputs": [], + "source": [ + "tpa_table = (\n", + " init_table.pipe(add_bbox, \"bbox\")\n", + " .pipe(define_is_child)\n", + " .pipe(set_child_id_pa)\n", + " .sort_values(by=[\"wdpaid\", \"is_child\"], ascending=[True, True])\n", + " .reset_index(drop=True)\n", + " .pipe(add_total_terrestrial_area)\n", + " .pipe(calculate_coverage_percentage_pa)\n", + " .pipe(add_environment)\n", + " .pipe(\n", + " output2,\n", + " iso_column=\"iso\",\n", + " rep_d={\n", + " \"pa_def\": {\"0\": 2, \"1\": 1},\n", + " \"year\": {0: pd.NA},\n", + " \"iucn_cat\": dict(\n", + " iucn_cat[[\"slug\"]]\n", + " .reset_index(drop=False)\n", + " .iloc[:, [1, 0]]\n", + " .to_dict(orient=\"tight\")[\"data\"]\n", + " ),\n", + " \"source\": {\"protected_planet\": 3},\n", + " },\n", + " rename={\n", + " \"pa_def\": \"protection_status\",\n", + " \"area_km2\": \"area\",\n", + " \"iucn_cat\": \"pa_iucn_category\",\n", + " \"desig_eng\": \"designation\",\n", + " \"source\": \"data_source\",\n", + " },\n", + " drop_cols=[\"geometry\", \"iso\", \"marine\", \"total_terrestrial_area\"]\n", + " )\n", + " .pipe(add_child_parent_relationship)\n", + " .astype(\n", + " {\n", + " \"year\": \"Int32\",\n", + " \"pa_iucn_category\": \"Int64\",\n", + " \"protection_status\": \"Int64\",\n", + " }\n", + " )\n", + " .query(\"coverage <= 100\") \n", + " .sort_index()\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 265, + "metadata": {}, + "outputs": [], + "source": [ + "# Add col mpaa_protection_level and mpa_establishment_stage to the table to validate it\n", + "tpa_table['mpaa_protection_level'] = np.nan\n", + "tpa_table['mpaa_establishment_stage'] = np.nan" + ] + }, + { + "cell_type": "code", + "execution_count": 266, + "metadata": {}, + "outputs": [], + "source": [ + "# Validate and save\n", + "PAsSchema(tpa_table[tpa_table.location.notna()]).to_csv(output_file_tpas, index=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 274, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
wdpaidwdpa_pidprotection_statusnamedesignationpa_iucn_categoryyearareadata_sourcempaa_establishment_stagempaa_protection_levelbboxis_childchild_idcoverageenvironmentlocationchildren
id
1111Diamond Reef and Salt Fish Tail ReefMarine Reserve1197314.6361353NaNNaN[-61.88691617799998, 17.184972703000028, -61.8...False10.013119marine15.0NaN
2221Palaster ReefMarine Reserve119733.8456233NaNNaN[-61.771742115999984, 17.520006550999994, -61....False20.003447marine15.0NaN
\n", + "
" + ], + "text/plain": [ + " wdpaid wdpa_pid protection_status name \\\n", + "id \n", + "1 1 1 1 Diamond Reef and Salt Fish Tail Reef \n", + "2 2 2 1 Palaster Reef \n", + "\n", + " designation pa_iucn_category year area data_source \\\n", + "id \n", + "1 Marine Reserve 1 1973 14.636135 3 \n", + "2 Marine Reserve 1 1973 3.845623 3 \n", + "\n", + " mpaa_establishment_stage mpaa_protection_level \\\n", + "id \n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "\n", + " bbox is_child child_id \\\n", + "id \n", + "1 [-61.88691617799998, 17.184972703000028, -61.8... False 1 \n", + "2 [-61.771742115999984, 17.520006550999994, -61.... False 2 \n", + "\n", + " coverage environment location children \n", + "id \n", + "1 0.013119 marine 15.0 NaN \n", + "2 0.003447 marine 15.0 NaN " + ] + }, + "execution_count": 274, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Create final table with all the data\n", + "final_table = pd.concat([mpa_table, tpa_table])\n", + "final_table.index = range(1, len(final_table) + 1)\n", + "final_table.index.name = 'id'\n", + "final_table.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 281, + "metadata": {}, + "outputs": [], + "source": [ + "pipe_dir_pa = FileConventionHandler(\"pa\")\n", + "output_file_pas = pipe_dir_pa.get_processed_step_path(current_step).joinpath(\"pa_detail.csv\")\n", + "PAsSchema(final_table[final_table.location.notna()]).to_csv(output_file_pas, index=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 283, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "DEBUG:google.auth.transport.requests:Making request: POST https://oauth2.googleapis.com/token\n" + ] + } + ], + "source": [ + "remote_path = 'vizzuality_processed_data/strapi_tables/pa.csv'\n", + "\n", + "writeReadGCP(\n", + " credentials=mysettings.GCS_KEYFILE_JSON,\n", + " bucket_name=mysettings.GCS_BUCKET,\n", + " blob_name=remote_path,\n", + " file=output_file_pas,\n", + " operation=\"w\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# batch_export(\n", + "# mpa_table[mpa_table.area.notna()],\n", + "# 5000,\n", + "# PAsSchema,\n", + "# pipe_dir.get_processed_step_path(current_step),\n", + "# \"mpa_detail\",\n", + "# format=\"json\",\n", + "# strapi_colection=strapi_collection_mpas,\n", + "# )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# # This code is to be able to identify groups that has wdpa_pid so in the future if needed we could combine the group geometries to generate a wdpa coverage geometry\n", + "# init_table[\n", + "# (\n", + "# init_table.sort_values(by=[\"wdpaid\", \"source\"], ascending=[True, False])\n", + "# .groupby(\"wdpaid\")\n", + "# .transform(\"size\")\n", + "# .gt(1)\n", + "# )\n", + "# & (init_table.wdpa_pid.str.extract(r\"([A-Za-z]+)\", expand=False).notna())\n", + "# ].groupby(\"wdpaid\")\n", + "# .geometry.apply(lambda x: x.union_all())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### upload data to strapi" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# strapi.deleteCollectionData(\"mpa\", list(range(1, 20914)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# for i in range(0, 4):\n", + "# strapi.importCollectionData(\n", + "# strapi_collection_mpas,\n", + "# mpa_folder.joinpath(f\"mpa_detail_{i}.csv\"),\n", + "# )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### PA coverage" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/mpa-terrestrial_preprocess.zip\n", + "/home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/preprocess\n", + "/home/sofia/dev/skytruth-30x30/data/data/gadm/processed/gadm_preprocess.zip\n", + "/home/sofia/dev/skytruth-30x30/data/data/gadm/processed/preprocess\n" + ] + }, + { + "data": { + "text/plain": [ + "PosixPath('/home/sofia/dev/skytruth-30x30/data/data/gadm/processed/preprocess')" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pipe = \"mpa-terrestrial\"\n", + "strapi_collection_mpas = \"mpa-terrestrial\"\n", + "\n", + "pipe_dir = FileConventionHandler(pipe)\n", + "pipe_dir_gadm = FileConventionHandler(\"gadm\")\n", + "output_file_tpas = pipe_dir.get_processed_step_path(current_step).joinpath(\"tpa_detail.csv\")\n", + "\n", + "# Download the protected atlas file && unzip it\n", + "download_and_unzip_if_needed(pipe_dir, prev_step, mysettings)\n", + "# Download the mpaatlas file \n", + "download_and_unzip_if_needed(pipe_dir_gadm, prev_step, mysettings)" + ] + }, + { + "cell_type": "code", + "execution_count": 209, + "metadata": {}, + "outputs": [], + "source": [ + "# Load the data\n", + "tpa_intermediate = gpd.read_file(pipe_dir.get_step_fmt_file_path(prev_step, \"gpkg\")).pipe(\n", + " clean_geometries\n", + ")\n", + "gadm = gpd.read_file(pipe_dir_gadm.get_step_fmt_file_path(prev_step, \"shp\")).pipe(clean_geometries)" + ] + }, + { + "cell_type": "code", + "execution_count": 210, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 210, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gadm.sindex\n", + "tpa_intermediate.sindex" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "import logging\n", + "from typing import Tuple, List\n", + "import sys\n", + "from pathlib import Path\n", + "import pandas as pd\n", + "import geopandas as gpd\n", + "import numpy as np\n", + "import asyncio\n", + "from tqdm.asyncio import tqdm\n", + "from itertools import product\n", + "from shapely.geometry import box\n", + "\n", + "\n", + "scripts_dir = Path(\".\").joinpath(\"src\")\n", + "if scripts_dir not in sys.path:\n", + " sys.path.insert(0, scripts_dir.resolve().as_posix())\n", + "\n", + "from pipelines.utils import background\n", + "from pipelines.processors import calculate_area, get_matches, repair_geometry, arrange_dimensions, clean_geometries, simplify_async\n", + "\n", + "logging.basicConfig(level=logging.DEBUG)\n", + "logging.getLogger(\"requests\").setLevel(logging.WARNING)\n", + "logging.getLogger(\"urllib3\").setLevel(logging.WARNING)\n", + "logging.getLogger(\"fiona\").setLevel(logging.WARNING)\n", + "logger = logging.getLogger(\"notebook\")\n", + "\n", + "\n", + "def split_by_year(\n", + " gdf: gpd.GeoDataFrame, year_col: str = \"STATUS_YR\", year_val: int = 2010\n", + ") -> List[gpd.GeoDataFrame]:\n", + " \"\"\"Split data by year. relevant for MPA data.(coverage indicator)\"\"\"\n", + " prior_2010 = (\n", + " gdf[gdf[year_col] <= year_val][[\"iso_3\", \"STATUS_YR\", \"geometry\"]]\n", + " .dissolve(\n", + " by=[\"iso_3\"],\n", + " )\n", + " .assign(year=2010)\n", + " .reset_index()\n", + " )\n", + "\n", + " after_2010 = (\n", + " gdf[gdf[\"STATUS_YR\"] > 2010][[\"iso_3\", \"STATUS_YR\", \"geometry\"]]\n", + " .rename(columns={\"STATUS_YR\": \"year\"})\n", + " )\n", + " return [prior_2010, after_2010]\n", + "\n", + "\n", + "def create_grid(bounds: Tuple[float, float, float, float], cell_size: int = 1) -> gpd.GeoDataFrame:\n", + " \"\"\"Create a grid of cells for a given GeoDataFrame\"\"\"\n", + " minx, miny, maxx, maxy = bounds\n", + " x = np.arange(minx, maxx, cell_size)\n", + " y = np.arange(miny, maxy, cell_size)\n", + " polygons = [\n", + " {\n", + " \"geometry\": box(i, j, i + cell_size, j + cell_size),\n", + " \"cell_id\": f\"{i}_{j}\",\n", + " }\n", + " for i, j in product(x, y)\n", + " ]\n", + " return gpd.GeoDataFrame(polygons)\n", + "\n", + "\n", + "def subdivide_grid(\n", + " grid_gdf: gpd.GeoDataFrame, gdf: gpd.GeoDataFrame, max_cellsize: float, max_complexity: int\n", + ") -> List:\n", + " subdivided_elements = []\n", + " for grid_element in grid_gdf.geometry:\n", + " candidates = get_matches(grid_element, gdf)\n", + " density = len(candidates)\n", + " if density > max_complexity:\n", + " \n", + " subdivision_cellsize = max_cellsize / 2\n", + " # Subdivide the grid element recursively\n", + " subgrid = create_grid(grid_element.bounds, subdivision_cellsize)\n", + " subdivided_elements.extend(\n", + " subdivide_grid(subgrid, gdf, subdivision_cellsize, max_complexity)\n", + " )\n", + " elif density > 0:\n", + " subdivided_elements.append(grid_element)\n", + "\n", + " return subdivided_elements\n", + "\n", + "\n", + "def create_density_based_grid(\n", + " gdf: gpd.GeoDataFrame, max_cellsize: int = 10, max_complexity: int = 10000\n", + ") -> gpd.GeoDataFrame:\n", + " # Get the bounds of the GeoDataFrame\n", + " minx, miny, maxx, maxy = gdf.total_bounds\n", + "\n", + " # Create an initial grid\n", + " grid_gdf = create_grid((minx, miny, maxx, maxy), max_cellsize)\n", + "\n", + " # Subdivide grid elements based on density and complexity\n", + " subdivided_elements = subdivide_grid(grid_gdf, gdf, max_cellsize, max_complexity)\n", + "\n", + " return gpd.GeoDataFrame(geometry=subdivided_elements)\n", + "\n", + "# TODO: refactor this so old function mantains functionality for marine areas\n", + "\n", + "def split_gdf_by_grid(gdf: gpd.GeoDataFrame, grid_gdf: gpd.GeoDataFrame):\n", + " result = []\n", + " gdf[\"already_processed\"] = False\n", + " for geometry in grid_gdf.geometry:\n", + " candidates = get_matches(geometry, gdf)\n", + " subset = gdf.loc[candidates.index][~gdf[\"already_processed\"]]\n", + " gdf.loc[subset.index, \"already_processed\"] = True\n", + " if not subset.empty:\n", + " result.append(subset.drop(columns=[\"already_processed\"]).reset_index(drop=True).copy())\n", + " return result\n", + "\n", + "\n", + "@background\n", + "def spatial_join_chunk(df_large_chunk, df_small, pbar):\n", + " try:\n", + " bbox = df_large_chunk.total_bounds\n", + "\n", + " candidates = get_matches(box(*bbox), df_small.geometry)\n", + " if len(candidates) > 0:\n", + " subset = df_small.loc[candidates.index].clip(box(*bbox))\n", + "\n", + " result = (\n", + " df_large_chunk.sjoin(subset, how=\"inner\")\n", + " .clip(subset.geometry)\n", + " .reset_index(drop=True)\n", + " )\n", + " result.geometry = result.geometry.apply(repair_geometry)\n", + " else:\n", + " result = gpd.GeoDataFrame(columns=df_large_chunk.columns)\n", + " return result\n", + " except Exception as e:\n", + " logging.error(e)\n", + " return gpd.GeoDataFrame()\n", + " finally:\n", + " pbar.update(1)\n", + "\n", + "\n", + "async def spatial_join(\n", + " geodataframe_a: gpd.GeoDataFrame, geodataframe_b: gpd.GeoDataFrame\n", + ") -> gpd.GeoDataFrame:\n", + " \"\"\"Create spatial join between two GeoDataFrames.\"\"\"\n", + " # we build the spatial index for the larger GeoDataFrame\n", + " smaller_dim, larger_dim = arrange_dimensions(geodataframe_a, geodataframe_b)\n", + "\n", + " logger.info(f\"Processing {len(larger_dim)} elements\")\n", + "\n", + " grid = create_density_based_grid(larger_dim, max_cellsize=10, max_complexity=5000)\n", + "\n", + " logger.info(f\"grid created with {len(grid)} cells\")\n", + "\n", + " list_of_chunks = split_gdf_by_grid(larger_dim, grid)\n", + "\n", + " logger.info(f\"grid split into {len(list_of_chunks)} chunks\")\n", + "\n", + " with tqdm(total=len(list_of_chunks)) as pbar: # we create a progress bar\n", + " new_df = await asyncio.gather(\n", + " *(spatial_join_chunk(chunk, smaller_dim, pbar) for chunk in list_of_chunks)\n", + " )\n", + "\n", + " return gpd.GeoDataFrame(pd.concat(new_df, ignore_index=True), crs=smaller_dim.crs)\n", + "\n", + "\n", + "@background\n", + "def spatial_dissolve_chunk(geometry, gdf, pbar):\n", + "\n", + " try:\n", + " candidates = get_matches(\n", + " geometry,\n", + " gdf.geometry,\n", + " )\n", + " subset = gdf.loc[candidates.index]\n", + "\n", + " result = pd.concat(\n", + " subset.clip(geometry).pipe(split_by_year, year_col=\"STATUS_YR\"), ignore_index=True\n", + " ).copy()\n", + "\n", + " data_chunk = [\n", + " (\n", + " result[result[\"year\"] <= 2010]\n", + " .reset_index()\n", + " .pipe(calculate_area, \"area\", None)\n", + " .drop(columns=[\"geometry\"])\n", + " )\n", + " ]\n", + " for year in range(2011, 2025):\n", + " data_chunk.append(\n", + " result[result[\"year\"] <= year]\n", + " .dissolve(\n", + " by=[\"iso_3\"],\n", + " )\n", + " .assign(year=year)\n", + " .reset_index()\n", + " .pipe(calculate_area, \"area\", None)\n", + " .drop(columns=[\"geometry\"])\n", + " )\n", + "\n", + " return pd.concat(data_chunk, ignore_index=True)\n", + " except Exception as e:\n", + " logging.error(e)\n", + " return gpd.GeoDataFrame()\n", + " finally:\n", + " pbar.update(1)\n", + "\n", + "async def process_grid(gdf):\n", + " grid_gdf = create_density_based_grid(gdf, max_cellsize=10, max_complexity=5000)\n", + " with tqdm(total=grid_gdf.shape[0]) as pbar:\n", + " pbar = tqdm(total=len(grid_gdf), desc=\"Processing grid elements\")\n", + " result = await asyncio.gather(*[spatial_dissolve_chunk(geometry, gdf, pbar) for geometry in grid_gdf.geometry.values])\n", + " return result\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 212, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 206/206 [00:04<00:00, 42.41it/s]\n", + "INFO:notebook:Processing 286305 elements\n", + "INFO:notebook:grid created with 433 cells\n", + "INFO:notebook:grid split into 392 chunks\n", + "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 392/392 [10:01<00:00, 1.53s/it]\n", + "INFO:pyogrio._io:Created 290,561 records\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 69%|███████████████████████████████████████████████████████████████████████████████████████████████████████████ | 269/392 [01:06<00:11, 10.71it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 392/392 [10:01<00:00, 122.96s/it]" + ] + } + ], + "source": [ + "wdpa_subset = tpa_intermediate[\n", + " ~(\n", + " (tpa_intermediate.bounds.minx < -181)\n", + " | (tpa_intermediate.bounds.miny < -91)\n", + " | (tpa_intermediate.bounds.maxx > 181)\n", + " | (tpa_intermediate.bounds.maxy > 91)\n", + " )\n", + "].reset_index(drop=True)\n", + "\n", + "gadm_sync = await simplify_async(gadm)\n", + "sjoin_gdf = await spatial_join(wdpa_subset, gadm_sync)\n", + "\n", + "# save sjoin_gdf to file\n", + "sjoin_gdf.to_file(pipe_dir.get_processed_step_path(current_step).joinpath(\"tpa_sjoin.shp\"), driver=\"ESRI Shapefile\")" + ] + }, + { + "cell_type": "code", + "execution_count": 213, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
WDPAIDWDPA_PIDPA_DEFNAMEDESIG_ENGIUCN_CATMARINEGIS_AREASTATUSSTATUS_YRPARENT_ISOgeometryindex_rightCOUNTRYGID_0area_km2
0555624810555624810_A1Ross Sea Region Marine Protected AreaMarine Protected Area (CCAMLR)Not Reported11.521513e+06Designated2017ABNJMULTIPOLYGON (((179.79727 -84.3402, 179.77745 ...6.0AntarcticaATA12088229.65
15555656815555656811RangatiraNature ReserveIa12.173346e+00Designated1977NZLPOLYGON ((-176.17405 -44.35403, -176.1742 -44....127.0New ZealandNZL268885.81
\n", + "
" + ], + "text/plain": [ + " WDPAID WDPA_PID PA_DEF NAME \\\n", + "0 555624810 555624810_A 1 Ross Sea Region Marine Protected Area \n", + "1 555565681 555565681 1 Rangatira \n", + "\n", + " DESIG_ENG IUCN_CAT MARINE GIS_AREA \\\n", + "0 Marine Protected Area (CCAMLR) Not Reported 1 1.521513e+06 \n", + "1 Nature Reserve Ia 1 2.173346e+00 \n", + "\n", + " STATUS STATUS_YR PARENT_ISO \\\n", + "0 Designated 2017 ABNJ \n", + "1 Designated 1977 NZL \n", + "\n", + " geometry index_right \\\n", + "0 MULTIPOLYGON (((179.79727 -84.3402, 179.77745 ... 6.0 \n", + "1 POLYGON ((-176.17405 -44.35403, -176.1742 -44.... 127.0 \n", + "\n", + " COUNTRY GID_0 area_km2 \n", + "0 Antarctica ATA 12088229.65 \n", + "1 New Zealand NZL 268885.81 " + ] + }, + "execution_count": 213, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sjoin_gdf.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# save sjoin_gdf to file\n", + "sjoin_gdf.to_file(pipe_dir.get_processed_step_path(current_step).joinpath(\"tpa_sjoin.shp\"), driver=\"ESRI Shapefile\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "sjoin_gdf = gpd.read_file(pipe_dir.get_processed_step_path(current_step).joinpath(\"tpa_sjoin.shp\")).pipe(clean_geometries)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['WDPAID', 'WDPA_PID', 'PA_DEF', 'NAME', 'DESIG_ENG', 'IUCN_CAT',\n", + " 'MARINE', 'GIS_AREA', 'STATUS', 'STATUS_YR', 'PARENT_ISO', 'index_righ',\n", + " 'COUNTRY', 'GID_0', 'area_km2', 'geometry'],\n", + " dtype='object')" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sjoin_gdf.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 218, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PA_DEFGID_0oecmpa
0AFG0.025.0
1AGO0.037.0
2ALB0.0117.0
3AND0.023.0
4ARE0.054.0
5ARG0.0403.0
6ARM0.068.0
7ATA0.09.0
8ATG0.010.0
9AUS0.011234.0
\n", + "
" + ], + "text/plain": [ + "PA_DEF GID_0 oecm pa\n", + "0 AFG 0.0 25.0\n", + "1 AGO 0.0 37.0\n", + "2 ALB 0.0 117.0\n", + "3 AND 0.0 23.0\n", + "4 ARE 0.0 54.0\n", + "5 ARG 0.0 403.0\n", + "6 ARM 0.0 68.0\n", + "7 ATA 0.0 9.0\n", + "8 ATG 0.0 10.0\n", + "9 AUS 0.0 11234.0" + ] + }, + "execution_count": 218, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "result_oecms = (\n", + " sjoin_gdf.groupby([\"GID_0\", \"PA_DEF\"])\n", + " .agg({\"PA_DEF\": \"count\"})\n", + " .rename(columns={\"PA_DEF\": \"count\"})\n", + " .reset_index()\n", + " .pivot(index=\"GID_0\", columns=\"PA_DEF\", values=\"count\")\n", + " .fillna(0)\n", + " .reset_index()\n", + " .rename(columns={\"0\": \"oecm\", \"1\": \"pa\"})\n", + ")\n", + "# ).reset_index().pivot(index=\"iso_3\", columns=\"PA_DEF\", values=\"count\").reset_index(names=[\"PA_DEF\"], level=0, drop=True)\n", + "\n", + "result_oecms.head(10)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# async def process_mpa_data(\n", + "# gdf: gpd.GeoDataFrame, loop: list[int], by: list[str], aggfunc: dict\n", + "# ) -> pd.DataFrame:\n", + "# \"\"\"process protected planet data. relevant for acc coverage extent by year indicator.\"\"\"\n", + "# # we split the data by =< year so we can acumulate the coverage\n", + "# base = split_by_year(gdf)\n", + "\n", + "# result_to_iter = pd.concat(base, ignore_index=True).copy()\n", + "\n", + "# with tqdm(total=len(loop)) as pbar: # we create a progress bar\n", + "# new_df = await asyncio.gather(\n", + "# *(spatial_dissolve_chunk(year, result_to_iter, pbar, by, aggfunc) for year in loop)\n", + "# )\n", + "# return pd.concat(\n", + "# [base[0].pipe(calculate_area, \"area\", None).drop(columns=[\"geometry\"]), *new_df],\n", + "# ignore_index=True,\n", + "# )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# final_data = await process_mpa_data(\n", + "# eez_mpas_data_join.pipe(add_location_iso).pipe(assign_iso3),\n", + "# range(2011, time.localtime().tm_year + 1),\n", + "# [\"PA_DEF\", \"iso_3\"],\n", + "# {\"protectedAreasCount\": \"sum\"},\n", + "# )\n", + "# coverage = (\n", + "# final_data.pipe(calculate_global_area, [\"year\", \"PA_DEF\"], {\"area\": \"sum\"}, \"iso_3\")\n", + "# .pipe(separate_parent_iso, \"iso_3\")\n", + "# .pipe(add_region_iso, \"iso_3\")\n", + "# .replace(\n", + "# {\n", + "# \"iso_3\": {\n", + "# \"ATA\": \"ABNJ\",\n", + "# \"COK\": \"NZL\",\n", + "# \"IOT\": \"GBR\",\n", + "# \"NIU\": \"NZL\",\n", + "# \"SHN\": \"GBR\",\n", + "# \"SJM\": \"NOR\",\n", + "# \"UMI\": \"USA\",\n", + "# \"NCL\": \"FRA\",\n", + "# \"GIB\": \"GBR\",\n", + "# }\n", + "# }\n", + "# )\n", + "# .pipe(calculate_stats_cov, [\"year\", \"PA_DEF\"], \"iso_3\").astype({\"PA_DEF\": int})\n", + "# .pipe(add_pa_oecm_percentages)\n", + "# .pipe(add_total_marine_area)\n", + "# .pipe(coverage_stats2)\n", + "# .pipe(calculate_coverage_percentage_mpa)\n", + "# .pipe(calculate_global_contribution)\n", + "# .pipe(add_is_last_year)\n", + "# .pipe(add_environment)\n", + "# )\n", + "\n", + "\n", + "# NewProtectedAreaExtentSchema(\n", + "# coverage.pipe(\n", + "# output,\n", + "# \"iso_3\",\n", + "# {},\n", + "# {},\n", + "# [\"area\", \"iso_3\", 'total_marine_area'],\n", + "# )\n", + "# ).to_csv(\n", + "# output_file,\n", + "# index=True,\n", + "# )\n", + "# coverage.head(2)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 092279681e883aae2a66c51d9585d2b4feabf35e Mon Sep 17 00:00:00 2001 From: Alicia Date: Fri, 27 Sep 2024 08:27:44 +0200 Subject: [PATCH 08/16] fixed conflicts --- data/notebooks/pipes_mock/precalc.ipynb | 65 +- data/notebooks/test_bygrid.ipynb | 10432 +++++++++++++++++++++- data/src/pipelines/processors.py | 5 +- 3 files changed, 10462 insertions(+), 40 deletions(-) diff --git a/data/notebooks/pipes_mock/precalc.ipynb b/data/notebooks/pipes_mock/precalc.ipynb index d1fbef2e..4f8658de 100644 --- a/data/notebooks/pipes_mock/precalc.ipynb +++ b/data/notebooks/pipes_mock/precalc.ipynb @@ -12,7 +12,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -95,9 +95,20 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Strapi setup\n", "strapi = Strapi(url=mysettings.STRAPI_URL)\n", @@ -106,7 +117,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -116,7 +127,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "jp-MarkdownHeadingCollapsed": true + }, "source": [ "### Locations (eez + regions)" ] @@ -239,7 +252,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "jp-MarkdownHeadingCollapsed": true + }, "source": [ "### Habitats" ] @@ -334,7 +349,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "jp-MarkdownHeadingCollapsed": true + }, "source": [ "### Coverage stats - Mpas" ] @@ -557,7 +574,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "jp-MarkdownHeadingCollapsed": true + }, "source": [ "### Mpa atlas - country stats Fully or highly protected" ] @@ -783,7 +802,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "jp-MarkdownHeadingCollapsed": true + }, "source": [ "### Protected seas - fishing protection level" ] @@ -1107,7 +1128,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -1126,7 +1147,7 @@ "PosixPath('/home/mambauser/data/mpaatlas/processed/preprocess')" ] }, - "execution_count": 16, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -1473,16 +1494,16 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 12, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -1493,9 +1514,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'mpa_folder' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[8], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(\u001b[38;5;241m0\u001b[39m, \u001b[38;5;241m4\u001b[39m):\n\u001b[1;32m 2\u001b[0m strapi\u001b[38;5;241m.\u001b[39mimportCollectionData(\n\u001b[1;32m 3\u001b[0m strapi_collection_mpas,\n\u001b[0;32m----> 4\u001b[0m \u001b[43mmpa_folder\u001b[49m\u001b[38;5;241m.\u001b[39mjoinpath(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmpa_detail_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mi\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.csv\u001b[39m\u001b[38;5;124m\"\u001b[39m),\n\u001b[1;32m 5\u001b[0m )\n", + "\u001b[0;31mNameError\u001b[0m: name 'mpa_folder' is not defined" + ] + } + ], "source": [ "for i in range(0, 4):\n", " strapi.importCollectionData(\n", diff --git a/data/notebooks/test_bygrid.ipynb b/data/notebooks/test_bygrid.ipynb index 4a274e53..76185b4c 100644 --- a/data/notebooks/test_bygrid.ipynb +++ b/data/notebooks/test_bygrid.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -12,7 +12,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -39,7 +39,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -52,7 +52,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 34, "metadata": {}, "outputs": [], "source": [ @@ -78,7 +78,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -135,7 +135,7 @@ }, { "cell_type": "code", - "execution_count": 69, + "execution_count": 39, "metadata": {}, "outputs": [], "source": [ @@ -205,8 +205,8 @@ "\n", "@background\n", "def spatial_dissolve_chunk(geometry, gdf, pbar):\n", - "\n", " try:\n", + " logger.info(\"Processing chunk\")\n", " candidates = get_matches(\n", " geometry,\n", " gdf.geometry,\n", @@ -246,17 +246,30 @@ "\n", "async def process_grid(gdf):\n", " grid_gdf = create_density_based_grid(gdf, max_cellsize=10, max_complexity=5000)\n", - " with tqdm(total=grid_gdf.shape[0]) as pbar:\n", - " pbar = tqdm(total=len(grid_gdf), desc=\"Processing grid elements\")\n", - " result = await asyncio.gather(*[spatial_dissolve_chunk(geometry, gdf, pbar) for geometry in grid_gdf.geometry.values])\n", + " logger.info(f\"grid created with {grid_gdf.shape[0]} cells\")\n", + "\n", + " with tqdm(total=grid_gdf.shape[0], desc=\"Processing grid elements\") as pbar:\n", + " jobs = [spatial_dissolve_chunk(geometry, gdf, pbar) for geometry in grid_gdf.geometry.values]\n", + " result = await asyncio.gather(*jobs)\n", " return result" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "gadm = gpd.read_file(\"../data/gadm/processed/preprocess/gadm_preprocess.shp\").pipe(clean_geometries)\n", "wdpa = gpd.read_file(\n", @@ -268,9 +281,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 206/206 [02:20<00:00, 1.46it/s]\n", + "INFO:notebook:Processing 286305 elements\n", + "INFO:notebook:grid created with 433 cells\n", + "INFO:notebook:grid split into 392 chunks\n", + "100%|██████████| 392/392 [17:24<00:00, 2.66s/it] \n" + ] + } + ], "source": [ "wdpa_subset = wdpa[\n", " ~(\n", @@ -287,25 +312,10392 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 30, "metadata": {}, "outputs": [], + "source": [ + "sjoin_gdf.rename(columns={\"GID_0\": \"iso_3\"}, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
WDPAIDWDPA_PIDPA_DEFNAMEDESIG_ENGIUCN_CATMARINEGIS_AREASTATUSSTATUS_YRPARENT_ISOgeometryindex_rightCOUNTRYiso_3area_km2COUNTRY_ESCOUNTRY_FR
\n", + "
" + ], + "text/plain": [ + "Empty GeoDataFrame\n", + "Columns: [WDPAID, WDPA_PID, PA_DEF, NAME, DESIG_ENG, IUCN_CAT, MARINE, GIS_AREA, STATUS, STATUS_YR, PARENT_ISO, geometry, index_right, COUNTRY, iso_3, area_km2, COUNTRY_ES, COUNTRY_FR]\n", + "Index: []" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# test that we have not produce duplicates\n", - "sjoin_gdf.loc[sjoin_gdf.duplicated(subset=[\"WDPA_PID\", \"GID_0\"], keep=False)].sort_values(\n", + "sjoin_gdf.loc[sjoin_gdf.duplicated(subset=[\"WDPA_PID\", \"iso_3\"], keep=False)].sort_values(\n", " \"WDPA_PID\"\n", ")" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 40, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:notebook:grid created with 425 cells\n", + "Processing grid elements: 100%|██████████| 425/425 [22:39<00:00, 3.20s/it] \n" + ] + } + ], "source": [ "data = await process_grid(sjoin_gdf)" ] }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[ index iso_3 STATUS_YR year area\n", + " 0 NaN ATA None 2017 1395.028044\n", + " 1 NaN ATA None 2018 1395.028044\n", + " 2 NaN ATA None 2019 1395.028044\n", + " 3 NaN ATA None 2020 1395.028044\n", + " 4 NaN ATA None 2021 1395.028044\n", + " 5 NaN ATA None 2022 1395.028044\n", + " 6 NaN ATA None 2023 1395.028044\n", + " 7 NaN ATA None 2024 1395.028044,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 NZL 1977 2010 69.358222\n", + " 1 NaN NZL 1977 2011 81.690847\n", + " 2 NaN NZL 1977 2012 81.690847\n", + " 3 NaN NZL 1977 2013 81.690847\n", + " 4 NaN NZL 1977 2014 81.690847\n", + " 5 NaN NZL 1977 2015 81.690847\n", + " 6 NaN NZL 1977 2016 81.690847\n", + " 7 NaN NZL 1977 2017 81.690847\n", + " 8 NaN NZL 1977 2018 81.690847\n", + " 9 NaN NZL 1977 2019 81.690847\n", + " 10 NaN NZL 1977 2020 81.690847\n", + " 11 NaN NZL 1977 2021 81.690847\n", + " 12 NaN NZL 1977 2022 81.690847\n", + " 13 NaN NZL 1977 2023 81.690847\n", + " 14 NaN NZL 1977 2024 81.690847,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 NZL 1934 2010 31.955832\n", + " 1 NaN NZL 1934 2011 31.955832\n", + " 2 NaN NZL 1934 2012 31.955832\n", + " 3 NaN NZL 1934 2013 31.955832\n", + " 4 NaN NZL 1934 2014 31.955832\n", + " 5 NaN NZL 1934 2015 31.955832\n", + " 6 NaN NZL 1934 2016 31.955832\n", + " 7 NaN NZL 1934 2017 31.955832\n", + " 8 NaN NZL 1934 2018 31.955832\n", + " 9 NaN NZL 1934 2019 31.955832\n", + " 10 NaN NZL 1934 2020 31.955832\n", + " 11 NaN NZL 1934 2021 31.955832\n", + " 12 NaN NZL 1934 2022 31.955832\n", + " 13 NaN NZL 1934 2023 31.955832\n", + " 14 NaN NZL 1934 2024 31.955832,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 FJI 2007 2010 91.743770\n", + " 1 1.0 TON 1992 2010 80.706751\n", + " 2 NaN FJI 2007 2011 91.743770\n", + " 3 NaN TON 1992 2011 80.706751\n", + " 4 NaN FJI 2007 2012 91.743770\n", + " 5 NaN TON 1992 2012 80.706751\n", + " 6 NaN FJI 2007 2013 91.743770\n", + " 7 NaN TON 1992 2013 80.706751\n", + " 8 NaN FJI 2007 2014 91.743770\n", + " 9 NaN TON 1992 2014 80.706751\n", + " 10 NaN FJI 2007 2015 91.743770\n", + " 11 NaN TON 1992 2015 80.706751\n", + " 12 NaN FJI 2007 2016 91.743770\n", + " 13 NaN TON 1992 2016 80.706751\n", + " 14 NaN FJI 2007 2017 91.743770\n", + " 15 NaN TON 1992 2017 80.706751\n", + " 16 NaN FJI 2007 2018 91.743770\n", + " 17 NaN TON 1992 2018 80.706751\n", + " 18 NaN FJI 2007 2019 91.743770\n", + " 19 NaN TON 1992 2019 80.706751\n", + " 20 NaN FJI 2007 2020 91.743770\n", + " 21 NaN TON 1992 2020 80.706751\n", + " 22 NaN FJI 2007 2021 91.743770\n", + " 23 NaN TON 1992 2021 80.706751\n", + " 24 NaN FJI 2007 2022 91.743770\n", + " 25 NaN TON 1992 2022 80.706751\n", + " 26 NaN FJI 2007 2023 91.743770\n", + " 27 NaN TON 1992 2023 80.706751\n", + " 28 NaN FJI 2007 2024 91.743770\n", + " 29 NaN TON 1992 2024 80.706751,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 USA 1994 2010 13.107477\n", + " 1 1.0 WSM 1978 2010 182.478923\n", + " 2 NaN USA 1994 2011 13.107477\n", + " 3 NaN WSM 1978 2011 182.478923\n", + " 4 NaN USA 1994 2012 13.107477\n", + " 5 NaN WSM 1978 2012 182.478923\n", + " 6 NaN USA 1994 2013 13.107477\n", + " 7 NaN WSM 1978 2013 182.478923\n", + " 8 NaN USA 1994 2014 13.107477\n", + " 9 NaN WSM 1978 2014 182.478923\n", + " 10 NaN USA 1994 2015 13.107477\n", + " 11 NaN WSM 1978 2015 182.691180\n", + " 12 NaN USA 1994 2016 13.107477\n", + " 13 NaN WSM 1978 2016 182.787717\n", + " 14 NaN USA 1994 2017 13.107477\n", + " 15 NaN WSM 1978 2017 182.787717\n", + " 16 NaN USA 1994 2018 13.107477\n", + " 17 NaN WSM 1978 2018 182.787717\n", + " 18 NaN USA 1994 2019 13.107477\n", + " 19 NaN WSM 1978 2019 182.787717\n", + " 20 NaN USA 1994 2020 13.107477\n", + " 21 NaN WSM 1978 2020 182.787717\n", + " 22 NaN USA 1994 2021 13.107477\n", + " 23 NaN WSM 1978 2021 182.787717\n", + " 24 NaN USA 1994 2022 13.107477\n", + " 25 NaN WSM 1978 2022 182.787717\n", + " 26 NaN USA 1994 2023 13.107477\n", + " 27 NaN WSM 1978 2023 182.787717\n", + " 28 NaN USA 1994 2024 13.107477\n", + " 29 NaN WSM 1978 2024 182.787717,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 USA 1974 2010 4.540545\n", + " 1 NaN USA 1974 2011 4.540545\n", + " 2 NaN USA 1974 2012 4.540545\n", + " 3 NaN USA 1974 2013 4.540545\n", + " 4 NaN USA 1974 2014 4.540545\n", + " 5 NaN USA 1974 2015 4.540545\n", + " 6 NaN USA 1974 2016 4.540545\n", + " 7 NaN USA 1974 2017 4.540545\n", + " 8 NaN USA 1974 2018 4.540545\n", + " 9 NaN USA 1974 2019 4.540545\n", + " 10 NaN USA 1974 2020 4.540545\n", + " 11 NaN USA 1974 2021 4.540545\n", + " 12 NaN USA 1974 2022 4.540545\n", + " 13 NaN USA 1974 2023 4.540545\n", + " 14 NaN USA 1974 2024 4.540545,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 USA 1980 2010 3717.390307\n", + " 1 NaN USA 1980 2011 3717.390307\n", + " 2 NaN USA 1980 2012 3717.390307\n", + " 3 NaN USA 1980 2013 3717.390307\n", + " 4 NaN USA 1980 2014 3717.390307\n", + " 5 NaN USA 1980 2015 3717.390307\n", + " 6 NaN USA 1980 2016 3717.390307\n", + " 7 NaN USA 1980 2017 3717.390307\n", + " 8 NaN USA 1980 2018 3717.390307\n", + " 9 NaN USA 1980 2019 3717.390307\n", + " 10 NaN USA 1980 2020 3717.390307\n", + " 11 NaN USA 1980 2021 3717.390307\n", + " 12 NaN USA 1980 2022 3717.390307\n", + " 13 NaN USA 1980 2023 3717.390307\n", + " 14 NaN USA 1980 2024 3717.390307,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 USA 0 2010 300.808099\n", + " 1 NaN USA 0 2011 300.808099\n", + " 2 NaN USA 0 2012 300.808099\n", + " 3 NaN RUS None 2013 787.548260\n", + " 4 NaN USA 0 2013 300.808099\n", + " 5 NaN RUS None 2014 787.548260\n", + " 6 NaN USA 0 2014 300.808099\n", + " 7 NaN RUS None 2015 787.548260\n", + " 8 NaN USA 0 2015 300.808099\n", + " 9 NaN RUS None 2016 787.548260\n", + " 10 NaN USA 0 2016 300.808099\n", + " 11 NaN RUS None 2017 787.548260\n", + " 12 NaN USA 0 2017 300.808099\n", + " 13 NaN RUS None 2018 787.548260\n", + " 14 NaN USA 0 2018 300.808099\n", + " 15 NaN RUS None 2019 787.548260\n", + " 16 NaN USA 0 2019 300.808099\n", + " 17 NaN RUS None 2020 787.548260\n", + " 18 NaN USA 0 2020 300.808099\n", + " 19 NaN RUS None 2021 787.548260\n", + " 20 NaN USA 0 2021 300.808099\n", + " 21 NaN RUS None 2022 787.548260\n", + " 22 NaN USA 0 2022 300.808099\n", + " 23 NaN RUS None 2023 787.548260\n", + " 24 NaN USA 0 2023 300.808099\n", + " 25 NaN RUS None 2024 787.548260\n", + " 26 NaN USA 0 2024 300.808099,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 RUS 2004 2010 5159.412345\n", + " 1 NaN RUS 2004 2011 5159.412345\n", + " 2 NaN RUS 2004 2012 5159.412345\n", + " 3 NaN RUS 2004 2013 18698.599197\n", + " 4 NaN RUS 2004 2014 18698.599197\n", + " 5 NaN RUS 2004 2015 18698.599197\n", + " 6 NaN RUS 2004 2016 18698.599197\n", + " 7 NaN RUS 2004 2017 18698.599197\n", + " 8 NaN RUS 2004 2018 18698.599197\n", + " 9 NaN RUS 2004 2019 18698.599197\n", + " 10 NaN RUS 2004 2020 18698.599197\n", + " 11 NaN RUS 2004 2021 18698.599197\n", + " 12 NaN RUS 2004 2022 18698.599197\n", + " 13 NaN RUS 2004 2023 18698.599197\n", + " 14 NaN RUS 2004 2024 18698.599197,\n", + " index iso_3 STATUS_YR year area\n", + " 0 NaN ATA None 2017 10535.996579\n", + " 1 NaN ATA None 2018 10535.996579\n", + " 2 NaN ATA None 2019 10535.996579\n", + " 3 NaN ATA None 2020 10535.996579\n", + " 4 NaN ATA None 2021 10535.996579\n", + " 5 NaN ATA None 2022 10535.996579\n", + " 6 NaN ATA None 2023 10535.996579\n", + " 7 NaN ATA None 2024 10535.996579,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 NZL 1998 2010 58.831702\n", + " 1 NaN NZL 1998 2011 58.831702\n", + " 2 NaN NZL 1998 2012 58.831702\n", + " 3 NaN NZL 1998 2013 58.831702\n", + " 4 NaN NZL 1998 2014 58.831702\n", + " 5 NaN NZL 1998 2015 58.831702\n", + " 6 NaN NZL 1998 2016 58.831702\n", + " 7 NaN NZL 1998 2017 58.831702\n", + " 8 NaN NZL 1998 2018 58.831702\n", + " 9 NaN NZL 1998 2019 58.831702\n", + " 10 NaN NZL 1998 2020 58.831702\n", + " 11 NaN NZL 1998 2021 58.831702\n", + " 12 NaN NZL 1998 2022 58.831702\n", + " 13 NaN NZL 1998 2023 58.831702\n", + " 14 NaN NZL 1998 2024 58.831702,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 USA 1988 2010 20.878294\n", + " 1 NaN USA 1988 2011 20.878294\n", + " 2 NaN USA 1988 2012 20.878294\n", + " 3 NaN USA 1988 2013 20.878294\n", + " 4 NaN USA 1988 2014 20.878294\n", + " 5 NaN USA 1988 2015 20.878294\n", + " 6 NaN USA 1988 2016 20.878294\n", + " 7 NaN USA 1988 2017 20.878294\n", + " 8 NaN USA 1988 2018 20.878294\n", + " 9 NaN USA 1988 2019 20.878294\n", + " 10 NaN USA 1988 2020 20.878294\n", + " 11 NaN USA 1988 2021 20.878294\n", + " 12 NaN USA 1988 2022 20.878294\n", + " 13 NaN USA 1988 2023 20.878294\n", + " 14 NaN USA 1988 2024 20.878294,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 USA 2001 2010 2.991209\n", + " 1 NaN USA 2001 2011 2.991209\n", + " 2 NaN USA 2001 2012 2.991209\n", + " 3 NaN USA 2001 2013 2.991209\n", + " 4 NaN USA 2001 2014 2.991209\n", + " 5 NaN USA 2001 2015 2.991209\n", + " 6 NaN USA 2001 2016 2.991209\n", + " 7 NaN USA 2001 2017 2.991209\n", + " 8 NaN USA 2001 2018 2.991209\n", + " 9 NaN USA 2001 2019 2.991209\n", + " 10 NaN USA 2001 2020 2.991209\n", + " 11 NaN USA 2001 2021 2.991209\n", + " 12 NaN USA 2001 2022 2.991209\n", + " 13 NaN USA 2001 2023 2.991209\n", + " 14 NaN USA 2001 2024 2.991209,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 USA 0 2010 2.322748\n", + " 1 NaN USA 0 2011 2.322748\n", + " 2 NaN USA 0 2012 2.322748\n", + " 3 NaN USA 0 2013 2.322748\n", + " 4 NaN USA 0 2014 2.322748\n", + " 5 NaN USA 0 2015 2.322748\n", + " 6 NaN USA 0 2016 2.322748\n", + " 7 NaN USA 0 2017 2.322748\n", + " 8 NaN USA 0 2018 2.322748\n", + " 9 NaN USA 0 2019 2.322748\n", + " 10 NaN USA 0 2020 2.322748\n", + " 11 NaN USA 0 2021 2.322748\n", + " 12 NaN USA 0 2022 2.322748\n", + " 13 NaN USA 0 2023 2.322748\n", + " 14 NaN USA 0 2024 2.322748,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 USA 1980 2010 3981.595787\n", + " 1 NaN USA 1980 2011 3981.595787\n", + " 2 NaN USA 1980 2012 3981.595787\n", + " 3 NaN USA 1980 2013 3981.595787\n", + " 4 NaN USA 1980 2014 3981.595787\n", + " 5 NaN USA 1980 2015 3981.595787\n", + " 6 NaN USA 1980 2016 3981.595787\n", + " 7 NaN USA 1980 2017 3981.595787\n", + " 8 NaN USA 1980 2018 3981.595787\n", + " 9 NaN USA 1980 2019 3981.595787\n", + " 10 NaN USA 1980 2020 3981.595787\n", + " 11 NaN USA 1980 2021 3981.595787\n", + " 12 NaN USA 1980 2022 3981.595787\n", + " 13 NaN USA 1980 2023 3981.595787\n", + " 14 NaN USA 1980 2024 3981.595787,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 USA 1980 2010 88110.936742\n", + " 1 NaN USA 1980 2011 88110.936742\n", + " 2 NaN USA 1980 2012 88110.936742\n", + " 3 NaN USA 1980 2013 88110.936742\n", + " 4 NaN USA 1980 2014 88110.936742\n", + " 5 NaN USA 1980 2015 88110.936742\n", + " 6 NaN USA 1980 2016 88110.936742\n", + " 7 NaN USA 1980 2017 88110.936742\n", + " 8 NaN USA 1980 2018 88110.936742\n", + " 9 NaN USA 1980 2019 88110.936742\n", + " 10 NaN USA 1980 2020 88110.936742\n", + " 11 NaN USA 1980 2021 88110.936742\n", + " 12 NaN USA 1980 2022 88110.936742\n", + " 13 NaN USA 1980 2023 88110.936742\n", + " 14 NaN USA 1980 2024 88110.936742,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 USA 0 2010 29137.893617\n", + " 1 NaN USA 0 2011 29137.893617\n", + " 2 NaN USA 0 2012 29137.893617\n", + " 3 NaN RUS None 2013 192.503365\n", + " 4 NaN USA 0 2013 29137.893617\n", + " 5 NaN RUS None 2014 192.503365\n", + " 6 NaN USA 0 2014 29137.893617\n", + " 7 NaN RUS None 2015 192.503365\n", + " 8 NaN USA 0 2015 29137.893617\n", + " 9 NaN RUS None 2016 192.503365\n", + " 10 NaN USA 0 2016 29137.893617\n", + " 11 NaN RUS None 2017 192.503365\n", + " 12 NaN USA 0 2017 29137.893617\n", + " 13 NaN RUS None 2018 192.503365\n", + " 14 NaN USA 0 2018 29137.893617\n", + " 15 NaN RUS None 2019 192.503365\n", + " 16 NaN USA 0 2019 29137.893617\n", + " 17 NaN RUS None 2020 192.503365\n", + " 18 NaN USA 0 2020 29137.893617\n", + " 19 NaN RUS None 2021 192.503365\n", + " 20 NaN USA 0 2021 29137.893617\n", + " 21 NaN RUS None 2022 192.503365\n", + " 22 NaN USA 0 2022 29137.893617\n", + " 23 NaN RUS None 2023 192.503365\n", + " 24 NaN USA 0 2023 29137.893617\n", + " 25 NaN RUS None 2024 192.503365\n", + " 26 NaN USA 0 2024 29137.893617,\n", + " index iso_3 STATUS_YR year area\n", + " 0 NaN ATA None 2017 5107.135048\n", + " 1 NaN ATA None 2018 5107.135048\n", + " 2 NaN ATA None 2019 5107.135048\n", + " 3 NaN ATA None 2020 5107.135048\n", + " 4 NaN ATA None 2021 5107.135048\n", + " 5 NaN ATA None 2022 5107.135048\n", + " 6 NaN ATA None 2023 5107.135048\n", + " 7 NaN ATA None 2024 5107.135048,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 NZL 1903 2010 1.123862\n", + " 1 NaN NZL 1903 2011 1.123862\n", + " 2 NaN NZL 1903 2012 1.123862\n", + " 3 NaN NZL 1903 2013 1.123862\n", + " 4 NaN NZL 1903 2014 1.123862\n", + " 5 NaN NZL 1903 2015 1.123862\n", + " 6 NaN NZL 1903 2016 1.123862\n", + " 7 NaN NZL 1903 2017 1.123862\n", + " 8 NaN NZL 1903 2018 1.123862\n", + " 9 NaN NZL 1903 2019 1.123862\n", + " 10 NaN NZL 1903 2020 1.123862\n", + " 11 NaN NZL 1903 2021 1.123862\n", + " 12 NaN NZL 1903 2022 1.123862\n", + " 13 NaN NZL 1903 2023 1.123862\n", + " 14 NaN NZL 1903 2024 1.123862,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 USA 2002 2010 2909.660445\n", + " 1 NaN USA 2002 2011 2937.101316\n", + " 2 NaN USA 2002 2012 2937.101316\n", + " 3 NaN USA 2002 2013 2937.101316\n", + " 4 NaN USA 2002 2014 2951.510032\n", + " 5 NaN USA 2002 2015 2951.510032\n", + " 6 NaN USA 2002 2016 2953.577367\n", + " 7 NaN USA 2002 2017 2953.577367\n", + " 8 NaN USA 2002 2018 2953.577367\n", + " 9 NaN USA 2002 2019 2953.577367\n", + " 10 NaN USA 2002 2020 2953.577367\n", + " 11 NaN USA 2002 2021 2953.577367\n", + " 12 NaN USA 2002 2022 2953.577367\n", + " 13 NaN USA 2002 2023 2953.577367\n", + " 14 NaN USA 2002 2024 2953.577367,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 USA 1976 2010 125874.707990\n", + " 1 NaN USA 1976 2011 125875.242812\n", + " 2 NaN USA 1976 2012 125875.944714\n", + " 3 NaN USA 1976 2013 125875.944714\n", + " 4 NaN USA 1976 2014 125875.944714\n", + " 5 NaN USA 1976 2015 125875.944714\n", + " 6 NaN USA 1976 2016 125875.944714\n", + " 7 NaN USA 1976 2017 125875.944714\n", + " 8 NaN USA 1976 2018 125875.944714\n", + " 9 NaN USA 1976 2019 125875.944714\n", + " 10 NaN USA 1976 2020 125875.944714\n", + " 11 NaN USA 1976 2021 125875.944714\n", + " 12 NaN USA 1976 2022 125875.944714\n", + " 13 NaN USA 1976 2023 125875.944714\n", + " 14 NaN USA 1976 2024 125875.944714,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 USA 1980 2010 88912.166478\n", + " 1 NaN USA 1980 2011 88912.166478\n", + " 2 NaN USA 1980 2012 88912.166478\n", + " 3 NaN USA 1980 2013 88912.166478\n", + " 4 NaN USA 1980 2014 88912.166478\n", + " 5 NaN USA 1980 2015 88912.166478\n", + " 6 NaN USA 1980 2016 88912.166478\n", + " 7 NaN USA 1980 2017 88912.166478\n", + " 8 NaN USA 1980 2018 88912.166478\n", + " 9 NaN USA 1980 2019 88912.166478\n", + " 10 NaN USA 1980 2020 88912.166478\n", + " 11 NaN USA 1980 2021 88912.166478\n", + " 12 NaN USA 1980 2022 88912.166478\n", + " 13 NaN USA 1980 2023 88912.166478\n", + " 14 NaN USA 1980 2024 88912.166478,\n", + " index iso_3 STATUS_YR year area\n", + " 0 NaN ATA None 2017 246.243478\n", + " 1 NaN ATA None 2018 246.243478\n", + " 2 NaN ATA None 2019 246.243478\n", + " 3 NaN ATA None 2020 246.243478\n", + " 4 NaN ATA None 2021 246.243478\n", + " 5 NaN ATA None 2022 246.243478\n", + " 6 NaN ATA None 2023 246.243478\n", + " 7 NaN ATA None 2024 246.243478,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CAN 1979 2010 63298.385102\n", + " 1 1.0 USA 1990 2010 85529.450479\n", + " 2 NaN CAN 1979 2011 63298.385102\n", + " 3 NaN USA 1990 2011 85529.450479\n", + " 4 NaN CAN 1979 2012 63298.385102\n", + " 5 NaN USA 1990 2012 85529.450479\n", + " 6 NaN CAN 1979 2013 63298.385102\n", + " 7 NaN USA 1990 2013 85529.450479\n", + " 8 NaN CAN 1979 2014 63298.385102\n", + " 9 NaN USA 1990 2014 85529.450479\n", + " 10 NaN CAN 1979 2015 63298.385102\n", + " 11 NaN USA 1990 2015 85529.450479\n", + " 12 NaN CAN 1979 2016 63298.385102\n", + " 13 NaN USA 1990 2016 85529.450479\n", + " 14 NaN CAN 1979 2017 63298.385102\n", + " 15 NaN USA 1990 2017 85529.450479\n", + " 16 NaN CAN 1979 2018 63298.385102\n", + " 17 NaN USA 1990 2018 85529.450479\n", + " 18 NaN CAN 1979 2019 63298.385102\n", + " 19 NaN USA 1990 2019 85529.450479\n", + " 20 NaN CAN 1979 2020 63298.385102\n", + " 21 NaN USA 1990 2020 85529.450479\n", + " 22 NaN CAN 1979 2021 63298.385102\n", + " 23 NaN USA 1990 2021 85529.450479\n", + " 24 NaN CAN 1979 2022 63298.385102\n", + " 25 NaN USA 1990 2022 85529.450479\n", + " 26 NaN CAN 1979 2023 63298.385102\n", + " 27 NaN USA 1990 2023 85529.450479\n", + " 28 NaN CAN 1979 2024 63298.385102\n", + " 29 NaN USA 1990 2024 85529.450479,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CAN 1980 2010 96820.874955\n", + " 1 1.0 USA 0 2010 135226.135957\n", + " 2 NaN CAN 1980 2011 96820.874955\n", + " 3 NaN USA 0 2011 135226.135957\n", + " 4 NaN CAN 1980 2012 96820.874955\n", + " 5 NaN USA 0 2012 135226.135957\n", + " 6 NaN CAN 1980 2013 96820.874955\n", + " 7 NaN USA 0 2013 135226.135957\n", + " 8 NaN CAN 1980 2014 96820.874955\n", + " 9 NaN USA 0 2014 135226.135957\n", + " 10 NaN CAN 1980 2015 96820.874955\n", + " 11 NaN USA 0 2015 135226.135957\n", + " 12 NaN CAN 1980 2016 96820.874955\n", + " 13 NaN USA 0 2016 135226.135957\n", + " 14 NaN CAN 1980 2017 96820.874955\n", + " 15 NaN USA 0 2017 135226.135957\n", + " 16 NaN CAN 1980 2018 96820.874955\n", + " 17 NaN USA 0 2018 135226.135957\n", + " 18 NaN CAN 1980 2019 96820.874955\n", + " 19 NaN USA 0 2019 135226.135957\n", + " 20 NaN CAN 1980 2020 96820.874955\n", + " 21 NaN USA 0 2020 135226.135957\n", + " 22 NaN CAN 1980 2021 96820.874955\n", + " 23 NaN USA 0 2021 135226.135957\n", + " 24 NaN CAN 1980 2022 96820.874955\n", + " 25 NaN USA 0 2022 135226.135957\n", + " 26 NaN CAN 1980 2023 96820.874955\n", + " 27 NaN USA 0 2023 135226.135957\n", + " 28 NaN CAN 1980 2024 96820.874955\n", + " 29 NaN USA 0 2024 135226.135957,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CAN 1988 2010 5577.732003\n", + " 1 NaN CAN 1988 2011 5577.732003\n", + " 2 NaN CAN 1988 2012 5577.732003\n", + " 3 NaN CAN 1988 2013 5577.732003\n", + " 4 NaN CAN 1988 2014 5577.732003\n", + " 5 NaN CAN 1988 2015 5577.732003\n", + " 6 NaN CAN 1988 2016 5577.732003\n", + " 7 NaN CAN 1988 2017 5577.732003\n", + " 8 NaN CAN 1988 2018 5577.732003\n", + " 9 NaN CAN 1988 2019 5579.799668\n", + " 10 NaN CAN 1988 2020 5579.799668\n", + " 11 NaN CAN 1988 2021 5579.799668\n", + " 12 NaN CAN 1988 2022 5579.799668\n", + " 13 NaN CAN 1988 2023 5579.799668\n", + " 14 NaN CAN 1988 2024 5579.799668,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CAN 1976 2010 62763.953036\n", + " 1 1.0 USA 1976 2010 64769.545867\n", + " 2 NaN CAN 1976 2011 64478.798879\n", + " 3 NaN USA 1976 2011 64769.545867\n", + " 4 NaN CAN 1976 2012 68784.536164\n", + " 5 NaN USA 1976 2012 65570.369151\n", + " 6 NaN CAN 1976 2013 68784.536164\n", + " 7 NaN USA 1976 2013 65570.369151\n", + " 8 NaN CAN 1976 2014 68784.536164\n", + " 9 NaN USA 1976 2014 65570.415380\n", + " 10 NaN CAN 1976 2015 68784.536164\n", + " 11 NaN USA 1976 2015 65571.376120\n", + " 12 NaN CAN 1976 2016 68784.536164\n", + " 13 NaN USA 1976 2016 65571.376120\n", + " 14 NaN CAN 1976 2017 68784.536164\n", + " 15 NaN USA 1976 2017 65571.376120\n", + " 16 NaN CAN 1976 2018 68784.536164\n", + " 17 NaN USA 1976 2018 65571.376120\n", + " 18 NaN CAN 1976 2019 68784.536164\n", + " 19 NaN USA 1976 2019 65571.376120\n", + " 20 NaN CAN 1976 2020 68784.536164\n", + " 21 NaN USA 1976 2020 65571.376120\n", + " 22 NaN CAN 1976 2021 73030.248109\n", + " 23 NaN USA 1976 2021 65571.376120\n", + " 24 NaN CAN 1976 2022 73030.248109\n", + " 25 NaN USA 1976 2022 65571.376120\n", + " 26 NaN CAN 1976 2023 73030.248109\n", + " 27 NaN USA 1976 2023 65571.376120\n", + " 28 NaN CAN 1976 2024 73030.248109\n", + " 29 NaN USA 1976 2024 65571.376120,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CAN 2004 2010 16729.137271\n", + " 1 1.0 USA 1993 2010 2561.197076\n", + " 2 NaN CAN 2004 2011 16729.137271\n", + " 3 NaN USA 1993 2011 2561.197076\n", + " 4 NaN CAN 2004 2012 16729.137271\n", + " 5 NaN USA 1993 2012 2561.197076\n", + " 6 NaN CAN 2004 2013 16729.137271\n", + " 7 NaN USA 1993 2013 2561.197076\n", + " 8 NaN CAN 2004 2014 16729.137271\n", + " 9 NaN USA 1993 2014 2561.197076\n", + " 10 NaN CAN 2004 2015 16729.137271\n", + " 11 NaN USA 1993 2015 2561.197076\n", + " 12 NaN CAN 2004 2016 16729.137271\n", + " 13 NaN USA 1993 2016 2561.197076\n", + " 14 NaN CAN 2004 2017 16729.137271\n", + " 15 NaN USA 1993 2017 2561.197076\n", + " 16 NaN CAN 2004 2018 16729.137271\n", + " 17 NaN USA 1993 2018 2561.197076\n", + " 18 NaN CAN 2004 2019 22182.981987\n", + " 19 NaN USA 1993 2019 2561.197076\n", + " 20 NaN CAN 2004 2020 22182.981987\n", + " 21 NaN USA 1993 2020 2561.197076\n", + " 22 NaN CAN 2004 2021 54251.983932\n", + " 23 NaN USA 1993 2021 2561.197076\n", + " 24 NaN CAN 2004 2022 54251.983932\n", + " 25 NaN USA 1993 2022 2561.197076\n", + " 26 NaN CAN 2004 2023 62608.732038\n", + " 27 NaN USA 1993 2023 2561.197076\n", + " 28 NaN CAN 2004 2024 62608.732038\n", + " 29 NaN USA 1993 2024 2561.197076,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 GBR 1988 2010 29.345375\n", + " 1 NaN GBR 1988 2011 29.345375\n", + " 2 NaN GBR 1988 2012 29.345375\n", + " 3 NaN GBR 1988 2013 29.345375\n", + " 4 NaN GBR 1988 2014 29.345375\n", + " 5 NaN GBR 1988 2015 29.345375\n", + " 6 NaN GBR 1988 2016 29.345375\n", + " 7 NaN GBR 1988 2017 29.345375\n", + " 8 NaN GBR 1988 2018 29.345375\n", + " 9 NaN GBR 1988 2019 29.345375\n", + " 10 NaN GBR 1988 2020 29.345375\n", + " 11 NaN GBR 1988 2021 29.345375\n", + " 12 NaN GBR 1988 2022 29.345375\n", + " 13 NaN GBR 1988 2023 29.345375\n", + " 14 NaN GBR 1988 2024 29.345375,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 USA 1938 2010 355.471742\n", + " 1 NaN USA 1938 2011 355.471742\n", + " 2 NaN USA 1938 2012 355.471742\n", + " 3 NaN USA 1938 2013 355.471742\n", + " 4 NaN USA 1938 2014 355.471742\n", + " 5 NaN USA 1938 2015 355.471742\n", + " 6 NaN USA 1938 2016 355.471742\n", + " 7 NaN USA 1938 2017 355.471742\n", + " 8 NaN USA 1938 2018 355.471742\n", + " 9 NaN USA 1938 2019 355.471742\n", + " 10 NaN USA 1938 2020 355.471742\n", + " 11 NaN USA 1938 2021 355.471742\n", + " 12 NaN USA 1938 2022 355.471742\n", + " 13 NaN USA 1938 2023 355.471742\n", + " 14 NaN USA 1938 2024 355.471742,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 USA 0 2010 29804.423380\n", + " 1 NaN USA 0 2011 30181.275364\n", + " 2 NaN USA 0 2012 30240.085033\n", + " 3 NaN USA 0 2013 30321.350094\n", + " 4 NaN USA 0 2014 30357.777085\n", + " 5 NaN USA 0 2015 31078.870253\n", + " 6 NaN USA 0 2016 31125.648457\n", + " 7 NaN USA 0 2017 31130.266885\n", + " 8 NaN USA 0 2018 31137.308418\n", + " 9 NaN USA 0 2019 31288.556725\n", + " 10 NaN USA 0 2020 31302.487017\n", + " 11 NaN USA 0 2021 31302.487017\n", + " 12 NaN USA 0 2022 31302.487017\n", + " 13 NaN USA 0 2023 31302.487017\n", + " 14 NaN USA 0 2024 31302.487017,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CAN 1974 2010 63328.498165\n", + " 1 1.0 USA 1935 2010 27037.801131\n", + " 2 NaN CAN 1974 2011 63385.696182\n", + " 3 NaN USA 1935 2011 27061.993622\n", + " 4 NaN CAN 1974 2012 63386.343733\n", + " 5 NaN USA 1935 2012 27071.184923\n", + " 6 NaN CAN 1974 2013 63540.683553\n", + " 7 NaN USA 1935 2013 27416.547227\n", + " 8 NaN CAN 1974 2014 63540.683553\n", + " 9 NaN USA 1935 2014 27489.617020\n", + " 10 NaN CAN 1974 2015 63540.683553\n", + " 11 NaN USA 1935 2015 27500.301109\n", + " 12 NaN CAN 1974 2016 63665.464828\n", + " 13 NaN USA 1935 2016 27500.347211\n", + " 14 NaN CAN 1974 2017 63665.464828\n", + " 15 NaN USA 1935 2017 27500.347211\n", + " 16 NaN CAN 1974 2018 63667.012813\n", + " 17 NaN USA 1935 2018 27500.484889\n", + " 18 NaN CAN 1974 2019 63784.578003\n", + " 19 NaN USA 1935 2019 27521.857545\n", + " 20 NaN CAN 1974 2020 63797.368571\n", + " 21 NaN USA 1935 2020 27521.857545\n", + " 22 NaN CAN 1974 2021 63807.579645\n", + " 23 NaN USA 1935 2021 27521.857545\n", + " 24 NaN CAN 1974 2022 63949.904261\n", + " 25 NaN USA 1935 2022 27521.857545\n", + " 26 NaN CAN 1974 2023 63961.757844\n", + " 27 NaN USA 1935 2023 27521.857545\n", + " 28 NaN CAN 1974 2024 63961.757844\n", + " 29 NaN USA 1935 2024 27521.857545,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CAN 2000 2010 47901.052975\n", + " 1 1.0 USA 1980 2010 0.536548\n", + " 2 NaN CAN 2000 2011 47901.158396\n", + " 3 NaN USA 1980 2011 0.536548\n", + " 4 NaN CAN 2000 2012 72970.438121\n", + " 5 NaN USA 1980 2012 0.536548\n", + " 6 NaN CAN 2000 2013 75519.384536\n", + " 7 NaN USA 1980 2013 0.536548\n", + " 8 NaN CAN 2000 2014 75519.384536\n", + " 9 NaN USA 1980 2014 0.536548\n", + " 10 NaN CAN 2000 2015 75520.013256\n", + " 11 NaN USA 1980 2015 0.536548\n", + " 12 NaN CAN 2000 2016 75520.013256\n", + " 13 NaN USA 1980 2016 0.536548\n", + " 14 NaN CAN 2000 2017 75520.013256\n", + " 15 NaN USA 1980 2017 0.536548\n", + " 16 NaN CAN 2000 2018 80453.061564\n", + " 17 NaN USA 1980 2018 0.536548\n", + " 18 NaN CAN 2000 2019 80496.789462\n", + " 19 NaN USA 1980 2019 0.536548\n", + " 20 NaN CAN 2000 2020 80865.359730\n", + " 21 NaN USA 1980 2020 0.536548\n", + " 22 NaN CAN 2000 2021 80865.359730\n", + " 23 NaN USA 1980 2021 0.536548\n", + " 24 NaN CAN 2000 2022 80865.601647\n", + " 25 NaN USA 1980 2022 0.536548\n", + " 26 NaN CAN 2000 2023 80868.497205\n", + " 27 NaN USA 1980 2023 0.536548\n", + " 28 NaN CAN 2000 2024 80868.497205\n", + " 29 NaN USA 1980 2024 0.536548,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CAN 1994 2010 49011.150452\n", + " 1 NaN CAN 1994 2011 49011.150452\n", + " 2 NaN CAN 1994 2012 49011.150452\n", + " 3 NaN CAN 1994 2013 49011.150452\n", + " 4 NaN CAN 1994 2014 49011.150452\n", + " 5 NaN CAN 1994 2015 49011.150452\n", + " 6 NaN CAN 1994 2016 49011.150452\n", + " 7 NaN CAN 1994 2017 49011.150452\n", + " 8 NaN CAN 1994 2018 49011.150452\n", + " 9 NaN CAN 1994 2019 54070.794769\n", + " 10 NaN CAN 1994 2020 54070.794769\n", + " 11 NaN CAN 1994 2021 54070.794769\n", + " 12 NaN CAN 1994 2022 54070.794769\n", + " 13 NaN CAN 1994 2023 54070.794769\n", + " 14 NaN CAN 1994 2024 54070.794769,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 MEX 1994 2010 1027.001034\n", + " 1 NaN MEX 1994 2011 1027.001034\n", + " 2 NaN MEX 1994 2012 1029.490215\n", + " 3 NaN MEX 1994 2013 1029.490215\n", + " 4 NaN MEX 1994 2014 1029.490215\n", + " 5 NaN MEX 1994 2015 1029.490215\n", + " 6 NaN MEX 1994 2016 1029.490215\n", + " 7 NaN MEX 1994 2017 1029.490215\n", + " 8 NaN MEX 1994 2018 1029.490215\n", + " 9 NaN MEX 1994 2019 1029.490215\n", + " 10 NaN MEX 1994 2020 1029.490215\n", + " 11 NaN MEX 1994 2021 1029.490215\n", + " 12 NaN MEX 1994 2022 1030.291985\n", + " 13 NaN MEX 1994 2023 1030.894302\n", + " 14 NaN MEX 1994 2024 1030.894302,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 MEX 2008 2010 79485.526545\n", + " 1 1.0 USA 1984 2010 46879.731453\n", + " 2 NaN MEX 2008 2011 79527.550293\n", + " 3 NaN USA 1984 2011 46911.222599\n", + " 4 NaN MEX 2008 2012 79617.008667\n", + " 5 NaN USA 1984 2012 46928.352715\n", + " 6 NaN MEX 2008 2013 79862.370389\n", + " 7 NaN USA 1984 2013 47064.675166\n", + " 8 NaN MEX 2008 2014 79885.180826\n", + " 9 NaN USA 1984 2014 47969.685784\n", + " 10 NaN MEX 2008 2015 79891.693596\n", + " 11 NaN USA 1984 2015 47971.364099\n", + " 12 NaN MEX 2008 2016 79896.499363\n", + " 13 NaN USA 1984 2016 48013.396423\n", + " 14 NaN MEX 2008 2017 79907.204469\n", + " 15 NaN USA 1984 2017 48016.451525\n", + " 16 NaN MEX 2008 2018 79907.204469\n", + " 17 NaN USA 1984 2018 48016.496604\n", + " 18 NaN MEX 2008 2019 79973.777227\n", + " 19 NaN USA 1984 2019 48178.346956\n", + " 20 NaN MEX 2008 2020 79973.777227\n", + " 21 NaN USA 1984 2020 48186.014220\n", + " 22 NaN MEX 2008 2021 79973.777226\n", + " 23 NaN USA 1984 2021 48186.014220\n", + " 24 NaN MEX 2008 2022 79976.706127\n", + " 25 NaN USA 1984 2022 48186.014220\n", + " 26 NaN MEX 2008 2023 80199.902339\n", + " 27 NaN USA 1984 2023 48186.014220\n", + " 28 NaN MEX 2008 2024 80199.902339\n", + " 29 NaN USA 1984 2024 48186.014220,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 USA 0 2010 166531.990301\n", + " 1 NaN USA 0 2011 166539.576102\n", + " 2 NaN USA 0 2012 166823.425811\n", + " 3 NaN USA 0 2013 167421.967330\n", + " 4 NaN USA 0 2014 170620.100252\n", + " 5 NaN USA 0 2015 171747.454862\n", + " 6 NaN USA 0 2016 171817.108917\n", + " 7 NaN USA 0 2017 171818.837413\n", + " 8 NaN USA 0 2018 171825.200098\n", + " 9 NaN USA 0 2019 175562.277299\n", + " 10 NaN USA 0 2020 175562.277299\n", + " 11 NaN USA 0 2021 175562.277299\n", + " 12 NaN USA 0 2022 175562.277299\n", + " 13 NaN USA 0 2023 175562.277299\n", + " 14 NaN USA 0 2024 175562.277299,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CAN 1956 2010 54559.373665\n", + " 1 1.0 USA 1990 2010 52547.287589\n", + " 2 NaN CAN 1956 2011 54559.373665\n", + " 3 NaN USA 1990 2011 52598.551673\n", + " 4 NaN CAN 1956 2012 54559.373665\n", + " 5 NaN USA 1990 2012 52631.305999\n", + " 6 NaN CAN 1956 2013 54627.243535\n", + " 7 NaN USA 1990 2013 52697.271999\n", + " 8 NaN CAN 1956 2014 54631.211442\n", + " 9 NaN USA 1990 2014 53677.471886\n", + " 10 NaN CAN 1956 2015 56096.974405\n", + " 11 NaN USA 1990 2015 53727.880188\n", + " 12 NaN CAN 1956 2016 56319.988629\n", + " 13 NaN USA 1990 2016 53733.055195\n", + " 14 NaN CAN 1956 2017 56321.906401\n", + " 15 NaN USA 1990 2017 53734.554274\n", + " 16 NaN CAN 1956 2018 56326.606632\n", + " 17 NaN USA 1990 2018 53734.978877\n", + " 18 NaN CAN 1956 2019 57245.454423\n", + " 19 NaN USA 1990 2019 53742.311646\n", + " 20 NaN CAN 1956 2020 57253.555998\n", + " 21 NaN USA 1990 2020 53742.311646\n", + " 22 NaN CAN 1956 2021 57259.192975\n", + " 23 NaN USA 1990 2021 53742.311646\n", + " 24 NaN CAN 1956 2022 57261.925690\n", + " 25 NaN USA 1990 2022 53742.311646\n", + " 26 NaN CAN 1956 2023 58163.353047\n", + " 27 NaN USA 1990 2023 53742.311646\n", + " 28 NaN CAN 1956 2024 58163.353047\n", + " 29 NaN USA 1990 2024 53742.311646,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CAN 2000 2010 62679.240848\n", + " 1 NaN CAN 2000 2011 62679.240848\n", + " 2 NaN CAN 2000 2012 62679.240848\n", + " 3 NaN CAN 2000 2013 63033.395345\n", + " 4 NaN CAN 2000 2014 63033.395345\n", + " 5 NaN CAN 2000 2015 76086.820479\n", + " 6 NaN CAN 2000 2016 76086.820479\n", + " 7 NaN CAN 2000 2017 76086.820479\n", + " 8 NaN CAN 2000 2018 88611.274622\n", + " 9 NaN CAN 2000 2019 97927.861844\n", + " 10 NaN CAN 2000 2020 97927.861844\n", + " 11 NaN CAN 2000 2021 97927.861844\n", + " 12 NaN CAN 2000 2022 97927.861844\n", + " 13 NaN CAN 2000 2023 97980.683888\n", + " 14 NaN CAN 2000 2024 97980.683888,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CAN 2005 2010 7943.558885\n", + " 1 NaN CAN 2005 2011 7943.558885\n", + " 2 NaN CAN 2005 2012 7943.558885\n", + " 3 NaN CAN 2005 2013 8559.928287\n", + " 4 NaN CAN 2005 2014 8559.928287\n", + " 5 NaN CAN 2005 2015 8559.928287\n", + " 6 NaN CAN 2005 2016 8559.928287\n", + " 7 NaN CAN 2005 2017 8559.928287\n", + " 8 NaN CAN 2005 2018 8568.189109\n", + " 9 NaN CAN 2005 2019 8568.189109\n", + " 10 NaN CAN 2005 2020 8568.189109\n", + " 11 NaN CAN 2005 2021 8568.189109\n", + " 12 NaN CAN 2005 2022 8568.189109\n", + " 13 NaN CAN 2005 2023 8568.189109\n", + " 14 NaN CAN 2005 2024 8568.189109,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CHL 1976 2010 48.5258\n", + " 1 NaN CHL 1976 2011 48.5258\n", + " 2 NaN CHL 1976 2012 48.5258\n", + " 3 NaN CHL 1976 2013 48.5258\n", + " 4 NaN CHL 1976 2014 48.5258\n", + " 5 NaN CHL 1976 2015 48.5258\n", + " 6 NaN CHL 1976 2016 48.5258\n", + " 7 NaN CHL 1976 2017 48.5258\n", + " 8 NaN CHL 1976 2018 48.5258\n", + " 9 NaN CHL 1976 2019 48.5258\n", + " 10 NaN CHL 1976 2020 48.5258\n", + " 11 NaN CHL 1976 2021 48.5258\n", + " 12 NaN CHL 1976 2022 48.5258\n", + " 13 NaN CHL 1976 2023 48.5258\n", + " 14 NaN CHL 1976 2024 48.5258,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 MEX 1993 2010 52851.480038\n", + " 1 NaN MEX 1993 2011 53007.461565\n", + " 2 NaN MEX 1993 2012 53865.204565\n", + " 3 NaN MEX 1993 2013 53865.204565\n", + " 4 NaN MEX 1993 2014 53865.204607\n", + " 5 NaN MEX 1993 2015 53865.572896\n", + " 6 NaN MEX 1993 2016 53962.783991\n", + " 7 NaN MEX 1993 2017 53967.417909\n", + " 8 NaN MEX 1993 2018 53967.417909\n", + " 9 NaN MEX 1993 2019 53973.459757\n", + " 10 NaN MEX 1993 2020 53973.459757\n", + " 11 NaN MEX 1993 2021 55073.269355\n", + " 12 NaN MEX 1993 2022 55159.201693\n", + " 13 NaN MEX 1993 2023 55915.117916\n", + " 14 NaN MEX 1993 2024 62061.834031,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 MEX 2004 2010 61554.360822\n", + " 1 1.0 USA 2010 2010 33959.081565\n", + " 2 NaN MEX 2004 2011 61554.360822\n", + " 3 NaN USA 2010 2011 33976.626968\n", + " 4 NaN MEX 2004 2012 61795.836726\n", + " 5 NaN USA 2010 2012 34021.421504\n", + " 6 NaN MEX 2004 2013 61803.616377\n", + " 7 NaN USA 2010 2013 34021.713942\n", + " 8 NaN MEX 2004 2014 62219.608947\n", + " 9 NaN USA 2010 2014 34022.322345\n", + " 10 NaN MEX 2004 2015 62359.868144\n", + " 11 NaN USA 2010 2015 34118.766661\n", + " 12 NaN MEX 2004 2016 62425.491311\n", + " 13 NaN USA 2010 2016 34124.346263\n", + " 14 NaN MEX 2004 2017 64430.753205\n", + " 15 NaN USA 2010 2017 34124.346263\n", + " 16 NaN MEX 2004 2018 64500.078793\n", + " 17 NaN USA 2010 2018 34124.346263\n", + " 18 NaN MEX 2004 2019 64501.225284\n", + " 19 NaN USA 2010 2019 34124.751802\n", + " 20 NaN MEX 2004 2020 64501.225284\n", + " 21 NaN USA 2010 2020 34124.751802\n", + " 22 NaN MEX 2004 2021 64585.799248\n", + " 23 NaN USA 2010 2021 34124.751802\n", + " 24 NaN MEX 2004 2022 64750.163831\n", + " 25 NaN USA 2010 2022 34124.751802\n", + " 26 NaN MEX 2004 2023 64877.939699\n", + " 27 NaN USA 2010 2023 34124.751802\n", + " 28 NaN MEX 2004 2024 66675.711751\n", + " 29 NaN USA 2010 2024 34124.751802,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 USA 0 2010 53909.597613\n", + " 1 NaN USA 0 2011 53983.121889\n", + " 2 NaN USA 0 2012 54102.465017\n", + " 3 NaN USA 0 2013 54153.332339\n", + " 4 NaN USA 0 2014 54518.310878\n", + " 5 NaN USA 0 2015 54808.586119\n", + " 6 NaN USA 0 2016 55811.137046\n", + " 7 NaN USA 0 2017 55811.137383\n", + " 8 NaN USA 0 2018 55813.133600\n", + " 9 NaN USA 0 2019 55847.191023\n", + " 10 NaN USA 0 2020 55848.700787\n", + " 11 NaN USA 0 2021 55848.700787\n", + " 12 NaN USA 0 2022 55848.700787\n", + " 13 NaN USA 0 2023 55848.700787\n", + " 14 NaN USA 0 2024 55848.700787,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CAN 2001 2010 24056.559254\n", + " 1 1.0 USA 1996 2010 18036.086297\n", + " 2 NaN CAN 2001 2011 24069.218156\n", + " 3 NaN USA 1996 2011 18058.815213\n", + " 4 NaN CAN 2001 2012 24080.729517\n", + " 5 NaN USA 1996 2012 18139.432792\n", + " 6 NaN CAN 2001 2013 24978.657711\n", + " 7 NaN USA 1996 2013 18328.587566\n", + " 8 NaN CAN 2001 2014 25740.210557\n", + " 9 NaN USA 1996 2014 18340.586184\n", + " 10 NaN CAN 2001 2015 27851.041563\n", + " 11 NaN USA 1996 2015 18349.800150\n", + " 12 NaN CAN 2001 2016 29246.565624\n", + " 13 NaN USA 1996 2016 18499.848414\n", + " 14 NaN CAN 2001 2017 31549.722212\n", + " 15 NaN USA 1996 2017 18499.848414\n", + " 16 NaN CAN 2001 2018 32051.435995\n", + " 17 NaN USA 1996 2018 18504.213219\n", + " 18 NaN CAN 2001 2019 32117.672027\n", + " 19 NaN USA 1996 2019 18504.213219\n", + " 20 NaN CAN 2001 2020 34051.440145\n", + " 21 NaN USA 1996 2020 19181.354963\n", + " 22 NaN CAN 2001 2021 34077.979252\n", + " 23 NaN USA 1996 2021 19181.354963\n", + " 24 NaN CAN 2001 2022 34100.723067\n", + " 25 NaN USA 1996 2022 19181.354963\n", + " 26 NaN CAN 2001 2023 34359.785533\n", + " 27 NaN USA 1996 2023 19181.354963\n", + " 28 NaN CAN 2001 2024 34359.785533\n", + " 29 NaN USA 1996 2024 19181.354963,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CAN 1959 2010 40600.912689\n", + " 1 NaN CAN 1959 2011 40600.912689\n", + " 2 NaN CAN 1959 2012 40600.912689\n", + " 3 NaN CAN 1959 2013 44257.704939\n", + " 4 NaN CAN 1959 2014 44257.704939\n", + " 5 NaN CAN 1959 2015 44257.704939\n", + " 6 NaN CAN 1959 2016 44257.704939\n", + " 7 NaN CAN 1959 2017 44257.704939\n", + " 8 NaN CAN 1959 2018 59769.447507\n", + " 9 NaN CAN 1959 2019 77743.279559\n", + " 10 NaN CAN 1959 2020 77743.279559\n", + " 11 NaN CAN 1959 2021 77743.279559\n", + " 12 NaN CAN 1959 2022 77743.279559\n", + " 13 NaN CAN 1959 2023 77743.279559\n", + " 14 NaN CAN 1959 2024 77743.279559,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CAN 1982 2010 41037.460408\n", + " 1 NaN CAN 1982 2011 41037.460408\n", + " 2 NaN CAN 1982 2012 41037.460408\n", + " 3 NaN CAN 1982 2013 41037.460408\n", + " 4 NaN CAN 1982 2014 41037.460408\n", + " 5 NaN CAN 1982 2015 41037.460408\n", + " 6 NaN CAN 1982 2016 41037.460408\n", + " 7 NaN CAN 1982 2017 41037.460408\n", + " 8 NaN CAN 1982 2018 60045.006773\n", + " 9 NaN CAN 1982 2019 60045.006773\n", + " 10 NaN CAN 1982 2020 60045.006773\n", + " 11 NaN CAN 1982 2021 60045.006773\n", + " 12 NaN CAN 1982 2022 60045.006773\n", + " 13 NaN CAN 1982 2023 60045.006773\n", + " 14 NaN CAN 1982 2024 60045.006773,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CAN 1985 2010 6880.48202\n", + " 1 NaN CAN 1985 2011 6880.48202\n", + " 2 NaN CAN 1985 2012 6880.48202\n", + " 3 NaN CAN 1985 2013 6880.48202\n", + " 4 NaN CAN 1985 2014 6880.48202\n", + " 5 NaN CAN 1985 2015 6880.48202\n", + " 6 NaN CAN 1985 2016 6880.48202\n", + " 7 NaN CAN 1985 2017 6880.48202\n", + " 8 NaN CAN 1985 2018 6880.48202\n", + " 9 NaN CAN 1985 2019 6880.48202\n", + " 10 NaN CAN 1985 2020 6880.48202\n", + " 11 NaN CAN 1985 2021 6880.48202\n", + " 12 NaN CAN 1985 2022 6880.48202\n", + " 13 NaN CAN 1985 2023 6880.48202\n", + " 14 NaN CAN 1985 2024 6880.48202,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 ECU 2002 2010 7185.70418\n", + " 1 NaN ECU 2002 2011 7185.70418\n", + " 2 NaN ECU 2002 2012 7185.70418\n", + " 3 NaN ECU 2002 2013 7185.70418\n", + " 4 NaN ECU 2002 2014 7185.70418\n", + " 5 NaN ECU 2002 2015 7185.70418\n", + " 6 NaN ECU 2002 2016 7185.70418\n", + " 7 NaN ECU 2002 2017 7185.70418\n", + " 8 NaN ECU 2002 2018 7185.70418\n", + " 9 NaN ECU 2002 2019 7185.70418\n", + " 10 NaN ECU 2002 2020 7185.70418\n", + " 11 NaN ECU 2002 2021 7185.70418\n", + " 12 NaN ECU 2002 2022 7185.70418\n", + " 13 NaN ECU 2002 2023 7185.70418\n", + " 14 NaN ECU 2002 2024 7185.70418,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 GTM 1994 2010 643.097107\n", + " 1 1.0 MEX 1999 2010 12.137273\n", + " 2 2.0 SLV 1994 2010 16.899629\n", + " 3 NaN GTM 1994 2011 654.716144\n", + " 4 NaN MEX 1999 2011 12.137273\n", + " 5 NaN SLV 1994 2011 16.899629\n", + " 6 NaN GTM 1994 2012 654.716144\n", + " 7 NaN MEX 1999 2012 12.137273\n", + " 8 NaN SLV 1994 2012 16.899629\n", + " 9 NaN GTM 1994 2013 664.853233\n", + " 10 NaN MEX 1999 2013 12.137273\n", + " 11 NaN SLV 1994 2013 16.899629\n", + " 12 NaN GTM 1994 2014 729.867036\n", + " 13 NaN MEX 1999 2014 12.137273\n", + " 14 NaN SLV 1994 2014 58.613856\n", + " 15 NaN GTM 1994 2015 729.867036\n", + " 16 NaN MEX 1999 2015 12.137273\n", + " 17 NaN SLV 1994 2015 58.613856\n", + " 18 NaN GTM 1994 2016 787.059407\n", + " 19 NaN MEX 1999 2016 12.137273\n", + " 20 NaN SLV 1994 2016 58.613856\n", + " 21 NaN GTM 1994 2017 800.998351\n", + " 22 NaN MEX 1999 2017 12.137273\n", + " 23 NaN SLV 1994 2017 58.613856\n", + " 24 NaN GTM 1994 2018 802.402616\n", + " 25 NaN MEX 1999 2018 12.137273\n", + " 26 NaN SLV 1994 2018 58.613856\n", + " 27 NaN GTM 1994 2019 806.574721\n", + " 28 NaN MEX 1999 2019 12.137273\n", + " 29 NaN SLV 1994 2019 58.613856\n", + " 30 NaN GTM 1994 2020 806.574721\n", + " 31 NaN MEX 1999 2020 12.137273\n", + " 32 NaN SLV 1994 2020 58.613856\n", + " 33 NaN GTM 1994 2021 806.574721\n", + " 34 NaN MEX 1999 2021 12.137273\n", + " 35 NaN SLV 1994 2021 58.613856\n", + " 36 NaN GTM 1994 2022 806.574721\n", + " 37 NaN MEX 1999 2022 12.137273\n", + " 38 NaN SLV 1994 2022 58.613856\n", + " 39 NaN GTM 1994 2023 806.574721\n", + " 40 NaN MEX 1999 2023 12.137273\n", + " 41 NaN SLV 1994 2023 58.613856\n", + " 42 NaN GTM 1994 2024 806.574721\n", + " 43 NaN MEX 1999 2024 12.137273\n", + " 44 NaN SLV 1994 2024 58.613856,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 GTM 2006 2010 13300.695289\n", + " 1 1.0 MEX 1999 2010 60991.760481\n", + " 2 2.0 USA 2005 2010 1049.476805\n", + " 3 NaN GTM 2006 2011 13309.028750\n", + " 4 NaN MEX 1999 2011 62403.233791\n", + " 5 NaN USA 2005 2011 1049.476805\n", + " 6 NaN GTM 2006 2012 13315.558457\n", + " 7 NaN MEX 1999 2012 62880.464214\n", + " 8 NaN USA 2005 2012 1049.476805\n", + " 9 NaN GTM 2006 2013 13318.925780\n", + " 10 NaN MEX 1999 2013 63490.790912\n", + " 11 NaN USA 2005 2013 1049.476805\n", + " 12 NaN GTM 2006 2014 13331.015326\n", + " 13 NaN MEX 1999 2014 63516.388862\n", + " 14 NaN USA 2005 2014 1049.476805\n", + " 15 NaN GTM 2006 2015 13332.096861\n", + " 16 NaN MEX 1999 2015 63521.831567\n", + " 17 NaN USA 2005 2015 1049.476805\n", + " 18 NaN GTM 2006 2016 13333.027748\n", + " 19 NaN MEX 1999 2016 66626.179466\n", + " 20 NaN USA 2005 2016 1049.476805\n", + " 21 NaN GTM 2006 2017 13355.465929\n", + " 22 NaN MEX 1999 2017 66677.882723\n", + " 23 NaN USA 2005 2017 1049.476805\n", + " 24 NaN GTM 2006 2018 13356.554988\n", + " 25 NaN MEX 1999 2018 66762.924871\n", + " 26 NaN USA 2005 2018 1049.476805\n", + " 27 NaN GTM 2006 2019 13388.158430\n", + " 28 NaN MEX 1999 2019 66994.281089\n", + " 29 NaN USA 2005 2019 1049.476805\n", + " 30 NaN GTM 2006 2020 13391.276099\n", + " 31 NaN MEX 1999 2020 67039.830815\n", + " 32 NaN USA 2005 2020 1049.476805\n", + " 33 NaN GTM 2006 2021 13391.276099\n", + " 34 NaN MEX 1999 2021 67627.609695\n", + " 35 NaN USA 2005 2021 1049.476805\n", + " 36 NaN GTM 2006 2022 13391.276099\n", + " 37 NaN MEX 1999 2022 67790.768490\n", + " 38 NaN USA 2005 2022 1049.476805\n", + " 39 NaN GTM 2006 2023 14462.818358\n", + " 40 NaN MEX 1999 2023 73007.886100\n", + " 41 NaN USA 2005 2023 1049.476805\n", + " 42 NaN GTM 2006 2024 14462.818358\n", + " 43 NaN MEX 1999 2024 73459.029117\n", + " 44 NaN USA 2005 2024 1049.476805,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 MEX 2005 2010 3834.942926\n", + " 1 1.0 USA 1991 2010 18680.900208\n", + " 2 NaN MEX 2005 2011 3834.942926\n", + " 3 NaN USA 1991 2011 18885.795456\n", + " 4 NaN MEX 2005 2012 3834.942926\n", + " 5 NaN USA 1991 2012 18922.642733\n", + " 6 NaN MEX 2005 2013 3834.942926\n", + " 7 NaN USA 1991 2013 18951.088859\n", + " 8 NaN MEX 2005 2014 3834.942926\n", + " 9 NaN USA 1991 2014 18982.026575\n", + " 10 NaN MEX 2005 2015 3834.942926\n", + " 11 NaN USA 1991 2015 19039.253269\n", + " 12 NaN MEX 2005 2016 3834.942926\n", + " 13 NaN USA 1991 2016 19092.220286\n", + " 14 NaN MEX 2005 2017 3834.942926\n", + " 15 NaN USA 1991 2017 19092.607476\n", + " 16 NaN MEX 2005 2018 3834.942926\n", + " 17 NaN USA 1991 2018 19092.863353\n", + " 18 NaN MEX 2005 2019 3834.942926\n", + " 19 NaN USA 1991 2019 19094.632991\n", + " 20 NaN MEX 2005 2020 3834.942926\n", + " 21 NaN USA 1991 2020 19094.632991\n", + " 22 NaN MEX 2005 2021 3834.942926\n", + " 23 NaN USA 1991 2021 19094.632991\n", + " 24 NaN MEX 2005 2022 3835.653927\n", + " 25 NaN USA 1991 2022 19094.632991\n", + " 26 NaN MEX 2005 2023 3835.653927\n", + " 27 NaN USA 1991 2023 19094.632991\n", + " 28 NaN MEX 2005 2024 3835.653927\n", + " 29 NaN USA 1991 2024 19094.632991,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 USA 1994 2010 3262.003244\n", + " 1 NaN USA 1994 2011 3268.286718\n", + " 2 NaN USA 1994 2012 3277.296609\n", + " 3 NaN USA 1994 2013 3277.458101\n", + " 4 NaN USA 1994 2014 3283.018431\n", + " 5 NaN USA 1994 2015 3284.831625\n", + " 6 NaN USA 1994 2016 3300.045915\n", + " 7 NaN USA 1994 2017 3300.045915\n", + " 8 NaN USA 1994 2018 3300.640030\n", + " 9 NaN USA 1994 2019 3301.638982\n", + " 10 NaN USA 1994 2020 3305.233792\n", + " 11 NaN USA 1994 2021 3305.233792\n", + " 12 NaN USA 1994 2022 3305.233792\n", + " 13 NaN USA 1994 2023 3305.233792\n", + " 14 NaN USA 1994 2024 3305.233792,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 USA 0 2010 4131.039611\n", + " 1 NaN USA 0 2011 4148.638207\n", + " 2 NaN USA 0 2012 4154.230541\n", + " 3 NaN USA 0 2013 4159.406717\n", + " 4 NaN USA 0 2014 4161.583551\n", + " 5 NaN USA 0 2015 4167.842912\n", + " 6 NaN USA 0 2016 4168.335418\n", + " 7 NaN USA 0 2017 4169.557415\n", + " 8 NaN USA 0 2018 4170.158787\n", + " 9 NaN USA 0 2019 4170.769889\n", + " 10 NaN USA 0 2020 4171.066818\n", + " 11 NaN USA 0 2021 4171.066818\n", + " 12 NaN USA 0 2022 4171.066818\n", + " 13 NaN USA 0 2023 4171.066818\n", + " 14 NaN USA 0 2024 4171.066818,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 USA 1989 2010 10750.879902\n", + " 1 NaN USA 1989 2011 10777.891783\n", + " 2 NaN USA 1989 2012 10801.773077\n", + " 3 NaN USA 1989 2013 10821.296984\n", + " 4 NaN USA 1989 2014 10827.957755\n", + " 5 NaN USA 1989 2015 10876.931304\n", + " 6 NaN USA 1989 2016 10899.645188\n", + " 7 NaN USA 1989 2017 10899.851723\n", + " 8 NaN USA 1989 2018 10900.764199\n", + " 9 NaN USA 1989 2019 10901.042815\n", + " 10 NaN USA 1989 2020 10901.042815\n", + " 11 NaN USA 1989 2021 10901.042815\n", + " 12 NaN USA 1989 2022 10901.042815\n", + " 13 NaN USA 1989 2023 10901.042815\n", + " 14 NaN USA 1989 2024 10901.042815,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 USA 1996 2010 5719.691097\n", + " 1 NaN USA 1996 2011 5729.116928\n", + " 2 NaN USA 1996 2012 5734.065477\n", + " 3 NaN USA 1996 2013 5735.610216\n", + " 4 NaN USA 1996 2014 5736.249342\n", + " 5 NaN USA 1996 2015 5745.477795\n", + " 6 NaN USA 1996 2016 5746.171597\n", + " 7 NaN USA 1996 2017 5746.588837\n", + " 8 NaN USA 1996 2018 5747.871851\n", + " 9 NaN USA 1996 2019 5748.780713\n", + " 10 NaN USA 1996 2020 5751.727452\n", + " 11 NaN USA 1996 2021 5751.727452\n", + " 12 NaN USA 1996 2022 5751.727452\n", + " 13 NaN USA 1996 2023 5751.727452\n", + " 14 NaN USA 1996 2024 5751.727452,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CAN 1960 2010 40997.036108\n", + " 1 1.0 USA 1996 2010 28744.337811\n", + " 2 NaN CAN 1960 2011 61734.448213\n", + " 3 NaN USA 1996 2011 28766.173608\n", + " 4 NaN CAN 1960 2012 65424.569970\n", + " 5 NaN USA 1996 2012 28787.831106\n", + " 6 NaN CAN 1960 2013 65425.301092\n", + " 7 NaN USA 1996 2013 28794.760101\n", + " 8 NaN CAN 1960 2014 65425.301092\n", + " 9 NaN USA 1996 2014 28817.663431\n", + " 10 NaN CAN 1960 2015 65483.204057\n", + " 11 NaN USA 1996 2015 28838.425601\n", + " 12 NaN CAN 1960 2016 65483.204057\n", + " 13 NaN USA 1996 2016 28842.342310\n", + " 14 NaN CAN 1960 2017 65483.204057\n", + " 15 NaN USA 1996 2017 28847.992748\n", + " 16 NaN CAN 1960 2018 69526.609961\n", + " 17 NaN USA 1996 2018 28850.442679\n", + " 18 NaN CAN 1960 2019 69534.453697\n", + " 19 NaN USA 1996 2019 28850.447454\n", + " 20 NaN CAN 1960 2020 69591.621620\n", + " 21 NaN USA 1996 2020 28853.068474\n", + " 22 NaN CAN 1960 2021 69592.368363\n", + " 23 NaN USA 1996 2021 28853.562114\n", + " 24 NaN CAN 1960 2022 69593.217773\n", + " 25 NaN USA 1996 2022 28853.562114\n", + " 26 NaN CAN 1960 2023 69782.929892\n", + " 27 NaN USA 1996 2023 28853.562114\n", + " 28 NaN CAN 1960 2024 69782.929892\n", + " 29 NaN USA 1996 2024 28853.562114,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CAN 2009 2010 39585.449515\n", + " 1 NaN CAN 2009 2011 39585.449515\n", + " 2 NaN CAN 2009 2012 39585.449515\n", + " 3 NaN CAN 2009 2013 39585.449515\n", + " 4 NaN CAN 2009 2014 39585.449515\n", + " 5 NaN CAN 2009 2015 39585.449515\n", + " 6 NaN CAN 2009 2016 39585.449515\n", + " 7 NaN CAN 2009 2017 39585.449515\n", + " 8 NaN CAN 2009 2018 40013.677455\n", + " 9 NaN CAN 2009 2019 40013.677455\n", + " 10 NaN CAN 2009 2020 40013.677455\n", + " 11 NaN CAN 2009 2021 40013.677455\n", + " 12 NaN CAN 2009 2022 40013.677455\n", + " 13 NaN CAN 2009 2023 40013.677455\n", + " 14 NaN CAN 2009 2024 40013.677455,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CAN 2003 2010 25689.466148\n", + " 1 NaN CAN 2003 2011 25689.466148\n", + " 2 NaN CAN 2003 2012 25689.466148\n", + " 3 NaN CAN 2003 2013 25689.466148\n", + " 4 NaN CAN 2003 2014 25689.466148\n", + " 5 NaN CAN 2003 2015 25689.466148\n", + " 6 NaN CAN 2003 2016 25689.466148\n", + " 7 NaN CAN 2003 2017 25689.466148\n", + " 8 NaN CAN 2003 2018 25689.466148\n", + " 9 NaN CAN 2003 2019 25691.127910\n", + " 10 NaN CAN 2003 2020 25691.127910\n", + " 11 NaN CAN 2003 2021 25691.127910\n", + " 12 NaN CAN 2003 2022 25691.127910\n", + " 13 NaN CAN 2003 2023 25691.127910\n", + " 14 NaN CAN 2003 2024 25691.127910,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CAN 1982 2010 5004.731061\n", + " 1 NaN CAN 1982 2011 5004.731061\n", + " 2 NaN CAN 1982 2012 5004.731061\n", + " 3 NaN CAN 1982 2013 5004.731061\n", + " 4 NaN CAN 1982 2014 5004.731061\n", + " 5 NaN CAN 1982 2015 5004.731061\n", + " 6 NaN CAN 1982 2016 5004.731061\n", + " 7 NaN CAN 1982 2017 5004.731061\n", + " 8 NaN CAN 1982 2018 5004.731061\n", + " 9 NaN CAN 1982 2019 5007.684260\n", + " 10 NaN CAN 1982 2020 5007.684260\n", + " 11 NaN CAN 1982 2021 5007.684260\n", + " 12 NaN CAN 1982 2022 5007.684260\n", + " 13 NaN CAN 1982 2023 5007.684260\n", + " 14 NaN CAN 1982 2024 5007.684260,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CHL 1935 2010 47.610149\n", + " 1 NaN CHL 1935 2011 47.610149\n", + " 2 NaN CHL 1935 2012 47.610149\n", + " 3 NaN CHL 1935 2013 47.610149\n", + " 4 NaN CHL 1935 2014 47.610149\n", + " 5 NaN CHL 1935 2015 47.610149\n", + " 6 NaN CHL 1935 2016 47.610149\n", + " 7 NaN CHL 1935 2017 47.610149\n", + " 8 NaN CHL 1935 2018 47.610149\n", + " 9 NaN CHL 1935 2019 47.610149\n", + " 10 NaN CHL 1935 2020 47.610149\n", + " 11 NaN CHL 1935 2021 47.610149\n", + " 12 NaN CHL 1935 2022 47.610149\n", + " 13 NaN CHL 1935 2023 47.610149\n", + " 14 NaN CHL 1935 2024 47.610149,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 PER 2008 2010 33.705823\n", + " 1 NaN PER 2008 2011 33.829150\n", + " 2 NaN PER 2008 2012 33.829150\n", + " 3 NaN PER 2008 2013 33.829150\n", + " 4 NaN PER 2008 2014 33.829150\n", + " 5 NaN PER 2008 2015 33.829150\n", + " 6 NaN PER 2008 2016 33.829150\n", + " 7 NaN PER 2008 2017 33.829150\n", + " 8 NaN PER 2008 2018 33.829150\n", + " 9 NaN PER 2008 2019 33.829150\n", + " 10 NaN PER 2008 2020 33.829150\n", + " 11 NaN PER 2008 2021 401.221155\n", + " 12 NaN PER 2008 2022 401.221155\n", + " 13 NaN PER 2008 2023 401.221155\n", + " 14 NaN PER 2008 2024 401.221155,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 ECU 2006 2010 2792.694560\n", + " 1 1.0 PER 2006 2010 2395.908145\n", + " 2 NaN ECU 2006 2011 2797.869127\n", + " 3 NaN PER 2006 2011 2484.309012\n", + " 4 NaN ECU 2006 2012 2962.106496\n", + " 5 NaN PER 2006 2012 2620.269749\n", + " 6 NaN ECU 2006 2013 2962.106496\n", + " 7 NaN PER 2006 2013 2620.269749\n", + " 8 NaN ECU 2006 2014 3013.435258\n", + " 9 NaN PER 2006 2014 2620.269749\n", + " 10 NaN ECU 2006 2015 3013.435258\n", + " 11 NaN PER 2006 2015 2620.269749\n", + " 12 NaN ECU 2006 2016 3013.435258\n", + " 13 NaN PER 2006 2016 2624.283721\n", + " 14 NaN ECU 2006 2017 3065.490818\n", + " 15 NaN PER 2006 2017 2631.102010\n", + " 16 NaN ECU 2006 2018 3065.490818\n", + " 17 NaN PER 2006 2018 2631.102010\n", + " 18 NaN ECU 2006 2019 3065.490818\n", + " 19 NaN PER 2006 2019 2631.102010\n", + " 20 NaN ECU 2006 2020 3065.490818\n", + " 21 NaN PER 2006 2020 2631.102010\n", + " 22 NaN ECU 2006 2021 3065.490818\n", + " 23 NaN PER 2006 2021 2631.102010\n", + " 24 NaN ECU 2006 2022 3115.000940\n", + " 25 NaN PER 2006 2022 2680.612133\n", + " 26 NaN ECU 2006 2023 3115.000940\n", + " 27 NaN PER 2006 2023 2680.612133\n", + " 28 NaN ECU 2006 2024 3115.000940\n", + " 29 NaN PER 2006 2024 2680.612133,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 COL 2001 2010 1.205183\n", + " 1 1.0 CRI 2000 2010 16784.925911\n", + " 2 2.0 GTM 1994 2010 507.055222\n", + " 3 3.0 HND 2001 2010 17134.948380\n", + " 4 4.0 NIC 2010 2010 30986.155125\n", + " .. ... ... ... ... ...\n", + " 100 NaN GTM 1994 2024 520.240475\n", + " 101 NaN HND 2001 2024 17155.600796\n", + " 102 NaN NIC 2010 2024 31084.055043\n", + " 103 NaN PAN 1960 2024 14161.675482\n", + " 104 NaN SLV 1994 2024 2494.205105\n", + " \n", + " [105 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 BLZ 2005 2010 11166.154776\n", + " 1 1.0 CUB 2002 2010 8767.608711\n", + " 2 2.0 GBR 1986 2010 24.128977\n", + " 3 3.0 GTM 1956 2010 22247.128913\n", + " 4 4.0 HND 1987 2010 24140.575270\n", + " .. ... ... ... ... ...\n", + " 115 NaN GTM 1956 2024 22547.909747\n", + " 116 NaN HND 1987 2024 26690.967004\n", + " 117 NaN MEX 1995 2024 27576.461590\n", + " 118 NaN NIC 2001 2024 6627.773026\n", + " 119 NaN USA 1998 2024 7.590589\n", + " \n", + " [120 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 USA 1969 2010 27179.217719\n", + " 1 NaN USA 1969 2011 27263.420862\n", + " 2 NaN USA 1969 2012 27429.300241\n", + " 3 NaN USA 1969 2013 27546.984111\n", + " 4 NaN USA 1969 2014 27596.780261\n", + " 5 NaN USA 1969 2015 27822.663165\n", + " 6 NaN USA 1969 2016 27986.186397\n", + " 7 NaN USA 1969 2017 27997.516814\n", + " 8 NaN USA 1969 2018 28001.686341\n", + " 9 NaN USA 1969 2019 28016.017996\n", + " 10 NaN USA 1969 2020 28016.235769\n", + " 11 NaN USA 1969 2021 28016.235769\n", + " 12 NaN USA 1969 2022 28016.235769\n", + " 13 NaN USA 1969 2023 28016.235769\n", + " 14 NaN USA 1969 2024 28016.235769,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 USA 0 2010 6702.351485\n", + " 1 NaN USA 0 2011 6744.125753\n", + " 2 NaN USA 0 2012 6775.399952\n", + " 3 NaN USA 0 2013 6781.951244\n", + " 4 NaN USA 0 2014 6785.648491\n", + " 5 NaN USA 0 2015 6797.316809\n", + " 6 NaN USA 0 2016 6822.156857\n", + " 7 NaN USA 0 2017 6822.918057\n", + " 8 NaN USA 0 2018 6823.833936\n", + " 9 NaN USA 0 2019 6827.638970\n", + " 10 NaN USA 0 2020 6827.715838\n", + " 11 NaN USA 0 2021 6827.715838\n", + " 12 NaN USA 0 2022 6827.715838\n", + " 13 NaN USA 0 2023 6827.715838\n", + " 14 NaN USA 0 2024 6827.715838,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 USA 1996 2010 6329.643976\n", + " 1 NaN USA 1996 2011 6336.039527\n", + " 2 NaN USA 1996 2012 6350.347425\n", + " 3 NaN USA 1996 2013 6357.184140\n", + " 4 NaN USA 1996 2014 6362.457685\n", + " 5 NaN USA 1996 2015 6373.424844\n", + " 6 NaN USA 1996 2016 6377.620273\n", + " 7 NaN USA 1996 2017 6378.528812\n", + " 8 NaN USA 1996 2018 6378.528812\n", + " 9 NaN USA 1996 2019 6379.868500\n", + " 10 NaN USA 1996 2020 6380.296779\n", + " 11 NaN USA 1996 2021 8881.541125\n", + " 12 NaN USA 1996 2022 8881.541125\n", + " 13 NaN USA 1996 2023 8881.541125\n", + " 14 NaN USA 1996 2024 8881.541125,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 USA 2009 2010 14782.450800\n", + " 1 NaN USA 2009 2011 14791.291734\n", + " 2 NaN USA 2009 2012 14832.437671\n", + " 3 NaN USA 2009 2013 14848.971473\n", + " 4 NaN USA 2009 2014 14857.399519\n", + " 5 NaN USA 2009 2015 14876.780978\n", + " 6 NaN USA 2009 2016 14897.428552\n", + " 7 NaN USA 2009 2017 14904.829291\n", + " 8 NaN USA 2009 2018 14977.528310\n", + " 9 NaN USA 2009 2019 14980.434803\n", + " 10 NaN USA 2009 2020 14980.434803\n", + " 11 NaN USA 2009 2021 14980.434803\n", + " 12 NaN USA 2009 2022 14980.434803\n", + " 13 NaN USA 2009 2023 14980.434803\n", + " 14 NaN USA 2009 2024 14980.434803,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CAN 1977 2010 954.006482\n", + " 1 1.0 USA 2006 2010 7079.261363\n", + " 2 NaN CAN 1977 2011 954.006482\n", + " 3 NaN USA 2006 2011 7087.991241\n", + " 4 NaN CAN 1977 2012 954.006482\n", + " 5 NaN USA 2006 2012 7101.155373\n", + " 6 NaN CAN 1977 2013 954.006482\n", + " 7 NaN USA 2006 2013 7107.459640\n", + " 8 NaN CAN 1977 2014 958.724200\n", + " 9 NaN USA 2006 2014 7111.094685\n", + " 10 NaN CAN 1977 2015 958.724200\n", + " 11 NaN USA 2006 2015 7115.014212\n", + " 12 NaN CAN 1977 2016 958.724200\n", + " 13 NaN USA 2006 2016 7119.694685\n", + " 14 NaN CAN 1977 2017 958.724200\n", + " 15 NaN USA 2006 2017 7120.450306\n", + " 16 NaN CAN 1977 2018 961.434429\n", + " 17 NaN USA 2006 2018 7120.450306\n", + " 18 NaN CAN 1977 2019 999.762839\n", + " 19 NaN USA 2006 2019 7120.771289\n", + " 20 NaN CAN 1977 2020 1026.290937\n", + " 21 NaN USA 2006 2020 7120.771289\n", + " 22 NaN CAN 1977 2021 1063.788465\n", + " 23 NaN USA 2006 2021 7120.771289\n", + " 24 NaN CAN 1977 2022 1089.902823\n", + " 25 NaN USA 2006 2022 7120.771289\n", + " 26 NaN CAN 1977 2023 1097.471117\n", + " 27 NaN USA 2006 2023 7120.771289\n", + " 28 NaN CAN 1977 2024 1097.471117\n", + " 29 NaN USA 2006 2024 7120.771289,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CAN 1971 2010 69697.706288\n", + " 1 1.0 USA 2008 2010 43195.346995\n", + " 2 NaN CAN 1971 2011 69697.706288\n", + " 3 NaN USA 2008 2011 43200.767826\n", + " 4 NaN CAN 1971 2012 69868.516000\n", + " 5 NaN USA 2008 2012 43217.399976\n", + " 6 NaN CAN 1971 2013 69868.516000\n", + " 7 NaN USA 2008 2013 43227.775344\n", + " 8 NaN CAN 1971 2014 69945.132328\n", + " 9 NaN USA 2008 2014 43233.544755\n", + " 10 NaN CAN 1971 2015 69945.132328\n", + " 11 NaN USA 2008 2015 43246.913659\n", + " 12 NaN CAN 1971 2016 69945.132328\n", + " 13 NaN USA 2008 2016 43251.821737\n", + " 14 NaN CAN 1971 2017 69945.132328\n", + " 15 NaN USA 2008 2017 43252.146102\n", + " 16 NaN CAN 1971 2018 70038.544733\n", + " 17 NaN USA 2008 2018 43252.660174\n", + " 18 NaN CAN 1971 2019 70055.121385\n", + " 19 NaN USA 2008 2019 43252.660174\n", + " 20 NaN CAN 1971 2020 70070.785625\n", + " 21 NaN USA 2008 2020 43252.660174\n", + " 22 NaN CAN 1971 2021 70381.564335\n", + " 23 NaN USA 2008 2021 43252.660174\n", + " 24 NaN CAN 1971 2022 71821.602251\n", + " 25 NaN USA 2008 2022 43252.660174\n", + " 26 NaN CAN 1971 2023 71854.189787\n", + " 27 NaN USA 2008 2023 43252.660174\n", + " 28 NaN CAN 1971 2024 71854.189787\n", + " 29 NaN USA 2008 2024 43252.660174,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CAN 1969 2010 18856.620703\n", + " 1 NaN CAN 1969 2011 18856.620703\n", + " 2 NaN CAN 1969 2012 18856.620703\n", + " 3 NaN CAN 1969 2013 18856.620703\n", + " 4 NaN CAN 1969 2014 18856.620703\n", + " 5 NaN CAN 1969 2015 18856.620703\n", + " 6 NaN CAN 1969 2016 18856.620703\n", + " 7 NaN CAN 1969 2017 18856.620703\n", + " 8 NaN CAN 1969 2018 18856.620703\n", + " 9 NaN CAN 1969 2019 18856.620703\n", + " 10 NaN CAN 1969 2020 18856.620703\n", + " 11 NaN CAN 1969 2021 18856.620703\n", + " 12 NaN CAN 1969 2022 18856.620703\n", + " 13 NaN CAN 1969 2023 18856.620703\n", + " 14 NaN CAN 1969 2024 18856.620703,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CAN 2003 2010 18857.800966\n", + " 1 NaN CAN 2003 2011 18857.800966\n", + " 2 NaN CAN 2003 2012 18857.800966\n", + " 3 NaN CAN 2003 2013 18857.800966\n", + " 4 NaN CAN 2003 2014 18857.800966\n", + " 5 NaN CAN 2003 2015 18857.800966\n", + " 6 NaN CAN 2003 2016 18857.800966\n", + " 7 NaN CAN 2003 2017 18857.800966\n", + " 8 NaN CAN 2003 2018 18857.800966\n", + " 9 NaN CAN 2003 2019 18870.484389\n", + " 10 NaN CAN 2003 2020 18870.484389\n", + " 11 NaN CAN 2003 2021 18870.484389\n", + " 12 NaN CAN 2003 2022 18870.484389\n", + " 13 NaN CAN 2003 2023 18870.484389\n", + " 14 NaN CAN 2003 2024 18870.484389,\n", + " index iso_3 STATUS_YR year area\n", + " 0 NaN CAN None 2019 0.6567\n", + " 1 NaN CAN None 2020 0.6567\n", + " 2 NaN CAN None 2021 0.6567\n", + " 3 NaN CAN None 2022 0.6567\n", + " 4 NaN CAN None 2023 0.6567\n", + " 5 NaN CAN None 2024 0.6567,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 ARG 2009 2010 51010.165489\n", + " 1 1.0 CHL 1965 2010 85985.813945\n", + " 2 NaN ARG 2009 2011 51010.165489\n", + " 3 NaN CHL 1965 2011 85985.813945\n", + " 4 NaN ARG 2009 2012 51010.165489\n", + " 5 NaN CHL 1965 2012 85985.813945\n", + " 6 NaN ARG 2009 2013 51010.165489\n", + " 7 NaN CHL 1965 2013 85985.813945\n", + " 8 NaN ARG 2009 2014 51010.165489\n", + " 9 NaN CHL 1965 2014 85985.813945\n", + " 10 NaN ARG 2009 2015 51545.056997\n", + " 11 NaN CHL 1965 2015 85985.813945\n", + " 12 NaN ARG 2009 2016 51694.722834\n", + " 13 NaN CHL 1965 2016 85985.813945\n", + " 14 NaN ARG 2009 2017 51694.722834\n", + " 15 NaN CHL 1965 2017 85985.813945\n", + " 16 NaN ARG 2009 2018 55102.462852\n", + " 17 NaN CHL 1965 2018 89003.238050\n", + " 18 NaN ARG 2009 2019 55247.830786\n", + " 19 NaN CHL 1965 2019 117154.495040\n", + " 20 NaN ARG 2009 2020 55247.830786\n", + " 21 NaN CHL 1965 2020 117154.495040\n", + " 22 NaN ARG 2009 2021 55247.830786\n", + " 23 NaN CHL 1965 2021 117154.495040\n", + " 24 NaN ARG 2009 2022 55247.830786\n", + " 25 NaN CHL 1965 2022 117154.495040\n", + " 26 NaN ARG 2009 2023 55247.830786\n", + " 27 NaN CHL 1965 2023 117154.495040\n", + " 28 NaN ARG 2009 2024 55247.830786\n", + " 29 NaN CHL 1965 2024 117154.495040,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 ARG 1982 2010 27690.817917\n", + " 1 1.0 CHL 1938 2010 42579.958598\n", + " 2 NaN ARG 1982 2011 27690.817917\n", + " 3 NaN CHL 1938 2011 42579.958598\n", + " 4 NaN ARG 1982 2012 27690.817917\n", + " 5 NaN CHL 1938 2012 42579.958598\n", + " 6 NaN ARG 1982 2013 27690.817917\n", + " 7 NaN CHL 1938 2013 42579.958598\n", + " 8 NaN ARG 1982 2014 27690.817917\n", + " 9 NaN CHL 1938 2014 42579.958598\n", + " 10 NaN ARG 1982 2015 27690.817917\n", + " 11 NaN CHL 1938 2015 42633.332069\n", + " 12 NaN ARG 1982 2016 27690.817917\n", + " 13 NaN CHL 1938 2016 42633.332069\n", + " 14 NaN ARG 1982 2017 27692.742531\n", + " 15 NaN CHL 1938 2017 42635.256684\n", + " 16 NaN ARG 1982 2018 28881.767607\n", + " 17 NaN CHL 1938 2018 44865.868349\n", + " 18 NaN ARG 1982 2019 28881.767607\n", + " 19 NaN CHL 1938 2019 44865.868349\n", + " 20 NaN ARG 1982 2020 28881.767607\n", + " 21 NaN CHL 1938 2020 44911.384382\n", + " 22 NaN ARG 1982 2021 28881.767607\n", + " 23 NaN CHL 1938 2021 44911.638156\n", + " 24 NaN ARG 1982 2022 28881.767607\n", + " 25 NaN CHL 1938 2022 45311.066539\n", + " 26 NaN ARG 1982 2023 28881.767607\n", + " 27 NaN CHL 1938 2023 45332.750256\n", + " 28 NaN ARG 1982 2024 28881.767607\n", + " 29 NaN CHL 1938 2024 45443.733134,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 ARG 1985 2010 505.623924\n", + " 1 1.0 CHL 2007 2010 3472.922434\n", + " 2 NaN ARG 1985 2011 505.623924\n", + " 3 NaN CHL 2007 2011 3492.604332\n", + " 4 NaN ARG 1985 2012 505.623924\n", + " 5 NaN CHL 2007 2012 3492.604332\n", + " 6 NaN ARG 1985 2013 505.623924\n", + " 7 NaN CHL 2007 2013 3509.016243\n", + " 8 NaN ARG 1985 2014 505.623924\n", + " 9 NaN CHL 2007 2014 3509.016243\n", + " 10 NaN ARG 1985 2015 505.623924\n", + " 11 NaN CHL 2007 2015 3854.846223\n", + " 12 NaN ARG 1985 2016 505.623924\n", + " 13 NaN CHL 2007 2016 3876.200648\n", + " 14 NaN ARG 1985 2017 746.630203\n", + " 15 NaN CHL 2007 2017 3888.697024\n", + " 16 NaN ARG 1985 2018 746.630203\n", + " 17 NaN CHL 2007 2018 3951.903978\n", + " 18 NaN ARG 1985 2019 746.630203\n", + " 19 NaN CHL 2007 2019 3951.903978\n", + " 20 NaN ARG 1985 2020 746.630203\n", + " 21 NaN CHL 2007 2020 4032.218901\n", + " 22 NaN ARG 1985 2021 2440.747014\n", + " 23 NaN CHL 2007 2021 5753.297064\n", + " 24 NaN ARG 1985 2022 2934.644581\n", + " 25 NaN CHL 2007 2022 6249.385727\n", + " 26 NaN ARG 1985 2023 4006.062428\n", + " 27 NaN CHL 2007 2023 7892.412489\n", + " 28 NaN ARG 1985 2024 4006.062428\n", + " 29 NaN CHL 2007 2024 7900.323017,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 BOL 1997 2010 3228.652654\n", + " 1 1.0 CHL 1990 2010 97.465695\n", + " 2 2.0 PER 1992 2010 7722.695490\n", + " 3 NaN BOL 1997 2011 3228.652654\n", + " 4 NaN CHL 1990 2011 97.465695\n", + " 5 NaN PER 1992 2011 7913.774008\n", + " 6 NaN BOL 1997 2012 3228.652654\n", + " 7 NaN CHL 1990 2012 97.465695\n", + " 8 NaN PER 1992 2012 7913.774008\n", + " 9 NaN BOL 1997 2013 3228.652654\n", + " 10 NaN CHL 1990 2013 97.465695\n", + " 11 NaN PER 1992 2013 7913.774008\n", + " 12 NaN BOL 1997 2014 3228.652654\n", + " 13 NaN CHL 1990 2014 172.926698\n", + " 14 NaN PER 1992 2014 7913.774008\n", + " 15 NaN BOL 1997 2015 3228.652654\n", + " 16 NaN CHL 1990 2015 172.926698\n", + " 17 NaN PER 1992 2015 7913.774008\n", + " 18 NaN BOL 1997 2016 3228.652654\n", + " 19 NaN CHL 1990 2016 172.926698\n", + " 20 NaN PER 1992 2016 7913.774008\n", + " 21 NaN BOL 1997 2017 3228.652654\n", + " 22 NaN CHL 1990 2017 172.926698\n", + " 23 NaN PER 1992 2017 7913.774008\n", + " 24 NaN BOL 1997 2018 3228.652654\n", + " 25 NaN CHL 1990 2018 172.926698\n", + " 26 NaN PER 1992 2018 7913.774008\n", + " 27 NaN BOL 1997 2019 3228.652654\n", + " 28 NaN CHL 1990 2019 173.035515\n", + " 29 NaN PER 1992 2019 7913.774008\n", + " 30 NaN BOL 1997 2020 3228.652654\n", + " 31 NaN CHL 1990 2020 173.035515\n", + " 32 NaN PER 1992 2020 7913.774008\n", + " 33 NaN BOL 1997 2021 3228.652654\n", + " 34 NaN CHL 1990 2021 173.057958\n", + " 35 NaN PER 1992 2021 7913.774008\n", + " 36 NaN BOL 1997 2022 3228.652654\n", + " 37 NaN CHL 1990 2022 182.134759\n", + " 38 NaN PER 1992 2022 7913.774008\n", + " 39 NaN BOL 1997 2023 3228.652654\n", + " 40 NaN CHL 1990 2023 182.158236\n", + " 41 NaN PER 1992 2023 7913.774008\n", + " 42 NaN BOL 1997 2024 3228.652654\n", + " 43 NaN CHL 1990 2024 182.158236\n", + " 44 NaN PER 1992 2024 7913.774008,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 BOL 1997 2010 1522.603530\n", + " 1 1.0 BRA 1981 2010 143102.913001\n", + " 2 2.0 PER 1997 2010 209882.502530\n", + " 3 NaN BOL 1997 2011 1522.603530\n", + " 4 NaN BRA 1981 2011 143102.913001\n", + " 5 NaN PER 1997 2011 212200.896378\n", + " 6 NaN BOL 1997 2012 1522.603530\n", + " 7 NaN BRA 1981 2012 143102.913001\n", + " 8 NaN PER 1997 2012 212771.896444\n", + " 9 NaN BOL 1997 2013 1522.603530\n", + " 10 NaN BRA 1981 2013 143102.913001\n", + " 11 NaN PER 1997 2013 212800.791643\n", + " 12 NaN BOL 1997 2014 1522.603530\n", + " 13 NaN BRA 1981 2014 143102.913001\n", + " 14 NaN PER 1997 2014 212828.401546\n", + " 15 NaN BOL 1997 2015 1522.603530\n", + " 16 NaN BRA 1981 2015 156595.900906\n", + " 17 NaN PER 1997 2015 226805.860583\n", + " 18 NaN BOL 1997 2016 1522.603530\n", + " 19 NaN BRA 1981 2016 160858.487368\n", + " 20 NaN PER 1997 2016 226922.126630\n", + " 21 NaN BOL 1997 2017 1522.603530\n", + " 22 NaN BRA 1981 2017 160858.487368\n", + " 23 NaN PER 1997 2017 227371.559482\n", + " 24 NaN BOL 1997 2018 1522.603530\n", + " 25 NaN BRA 1981 2018 160858.487368\n", + " 26 NaN PER 1997 2018 230026.536457\n", + " 27 NaN BOL 1997 2019 1522.603530\n", + " 28 NaN BRA 1981 2019 160858.487368\n", + " 29 NaN PER 1997 2019 231384.309487\n", + " 30 NaN BOL 1997 2020 1522.603530\n", + " 31 NaN BRA 1981 2020 160858.487368\n", + " 32 NaN PER 1997 2020 231417.576283\n", + " 33 NaN BOL 1997 2021 1522.603530\n", + " 34 NaN BRA 1981 2021 162362.658063\n", + " 35 NaN PER 1997 2021 234994.697550\n", + " 36 NaN BOL 1997 2022 1522.603530\n", + " 37 NaN BRA 1981 2022 162362.658063\n", + " 38 NaN PER 1997 2022 235094.783094\n", + " 39 NaN BOL 1997 2023 1522.603530\n", + " 40 NaN BRA 1981 2023 162362.658063\n", + " 41 NaN PER 1997 2023 235185.459563\n", + " 42 NaN BOL 1997 2024 1522.603530\n", + " 43 NaN BRA 1981 2024 162362.658063\n", + " 44 NaN PER 1997 2024 235185.459563,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 BRA 2002 2010 26734.594433\n", + " 1 1.0 COL 1975 2010 107988.055324\n", + " 2 2.0 ECU 1992 2010 56609.149618\n", + " 3 3.0 PER 2002 2010 84784.666674\n", + " 4 NaN BRA 2002 2011 26734.594433\n", + " 5 NaN COL 1975 2011 108001.376332\n", + " 6 NaN ECU 1992 2011 56655.677568\n", + " 7 NaN PER 2002 2011 94396.856156\n", + " 8 NaN BRA 2002 2012 26734.594433\n", + " 9 NaN COL 1975 2012 108179.313254\n", + " 10 NaN ECU 1992 2012 61576.658386\n", + " 11 NaN PER 2002 2012 100368.759219\n", + " 12 NaN BRA 2002 2013 26734.594433\n", + " 13 NaN COL 1975 2013 108185.021525\n", + " 14 NaN ECU 1992 2013 61576.658386\n", + " 15 NaN PER 2002 2013 100398.129047\n", + " 16 NaN BRA 2002 2014 26734.594433\n", + " 17 NaN COL 1975 2014 108301.032455\n", + " 18 NaN ECU 1992 2014 62510.168570\n", + " 19 NaN PER 2002 2014 100399.275613\n", + " 20 NaN BRA 2002 2015 26734.594433\n", + " 21 NaN COL 1975 2015 108803.274637\n", + " 22 NaN ECU 1992 2015 62510.168570\n", + " 23 NaN PER 2002 2015 104340.238873\n", + " 24 NaN BRA 2002 2016 26734.594433\n", + " 25 NaN COL 1975 2016 109321.595731\n", + " 26 NaN ECU 1992 2016 62531.740483\n", + " 27 NaN PER 2002 2016 104778.721378\n", + " 28 NaN BRA 2002 2017 26734.594433\n", + " 29 NaN COL 1975 2017 109646.917617\n", + " 30 NaN ECU 1992 2017 68906.117667\n", + " 31 NaN PER 2002 2017 107741.716201\n", + " 32 NaN BRA 2002 2018 26734.594433\n", + " 33 NaN COL 1975 2018 112171.495563\n", + " 34 NaN ECU 1992 2018 69264.290718\n", + " 35 NaN PER 2002 2018 116577.765821\n", + " 36 NaN BRA 2002 2019 26734.594433\n", + " 37 NaN COL 1975 2019 112666.532381\n", + " 38 NaN ECU 1992 2019 69558.951518\n", + " 39 NaN PER 2002 2019 116810.043049\n", + " 40 NaN BRA 2002 2020 26734.594433\n", + " 41 NaN COL 1975 2020 113149.982255\n", + " 42 NaN ECU 1992 2020 69606.799986\n", + " 43 NaN PER 2002 2020 116821.617916\n", + " 44 NaN BRA 2002 2021 26734.594433\n", + " 45 NaN COL 1975 2021 113288.106876\n", + " 46 NaN ECU 1992 2021 69943.476396\n", + " 47 NaN PER 2002 2021 116850.202448\n", + " 48 NaN BRA 2002 2022 26734.594433\n", + " 49 NaN COL 1975 2022 113458.305389\n", + " 50 NaN ECU 1992 2022 70053.912249\n", + " 51 NaN PER 2002 2022 116850.202448\n", + " 52 NaN BRA 2002 2023 26734.594433\n", + " 53 NaN COL 1975 2023 114192.494934\n", + " 54 NaN ECU 1992 2023 70426.696176\n", + " 55 NaN PER 2002 2023 116850.202448\n", + " 56 NaN BRA 2002 2024 26734.594433\n", + " 57 NaN COL 1975 2024 114192.494934\n", + " 58 NaN ECU 1992 2024 70426.696176\n", + " 59 NaN PER 2002 2024 116850.202448,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 COL 2001 2010 95091.551564\n", + " 1 1.0 NLD 0 2010 0.580379\n", + " 2 2.0 PAN 1981 2010 14189.675915\n", + " 3 3.0 VEN 1977 2010 84853.363676\n", + " 4 NaN COL 2001 2011 96120.246735\n", + " 5 NaN NLD 0 2011 0.580379\n", + " 6 NaN PAN 1981 2011 14189.675915\n", + " 7 NaN VEN 1977 2011 85273.144373\n", + " 8 NaN COL 2001 2012 96532.942591\n", + " 9 NaN NLD 0 2012 0.580379\n", + " 10 NaN PAN 1981 2012 14189.675915\n", + " 11 NaN VEN 1977 2012 85273.144373\n", + " 12 NaN COL 2001 2013 97461.010626\n", + " 13 NaN NLD 0 2013 0.580379\n", + " 14 NaN PAN 1981 2013 14570.563029\n", + " 15 NaN VEN 1977 2013 85273.144372\n", + " 16 NaN COL 2001 2014 98238.318388\n", + " 17 NaN NLD 0 2014 0.580379\n", + " 18 NaN PAN 1981 2014 14570.563029\n", + " 19 NaN VEN 1977 2014 85273.144372\n", + " 20 NaN COL 2001 2015 100021.049205\n", + " 21 NaN NLD 0 2015 0.580379\n", + " 22 NaN PAN 1981 2015 14570.968374\n", + " 23 NaN VEN 1977 2015 85273.144372\n", + " 24 NaN COL 2001 2016 100617.873339\n", + " 25 NaN NLD 0 2016 0.580379\n", + " 26 NaN PAN 1981 2016 14570.968374\n", + " 27 NaN VEN 1977 2016 85592.398582\n", + " 28 NaN COL 2001 2017 102936.609655\n", + " 29 NaN NLD 0 2017 0.580379\n", + " 30 NaN PAN 1981 2017 14764.155448\n", + " 31 NaN VEN 1977 2017 85592.398582\n", + " 32 NaN COL 2001 2018 105142.885638\n", + " 33 NaN NLD 0 2018 0.580379\n", + " 34 NaN PAN 1981 2018 14764.155448\n", + " 35 NaN VEN 1977 2018 86942.066219\n", + " 36 NaN COL 2001 2019 108648.076261\n", + " 37 NaN NLD 0 2019 0.580379\n", + " 38 NaN PAN 1981 2019 14765.209193\n", + " 39 NaN VEN 1977 2019 86941.117723\n", + " 40 NaN COL 2001 2020 109228.459312\n", + " 41 NaN NLD 0 2020 0.580379\n", + " 42 NaN PAN 1981 2020 14765.209193\n", + " 43 NaN VEN 1977 2020 86941.117723\n", + " 44 NaN COL 2001 2021 110581.218071\n", + " 45 NaN NLD 0 2021 0.580379\n", + " 46 NaN PAN 1981 2021 14765.209193\n", + " 47 NaN VEN 1977 2021 86941.117723\n", + " 48 NaN COL 2001 2022 110742.620553\n", + " 49 NaN NLD 0 2022 0.580379\n", + " 50 NaN PAN 1981 2022 14765.209193\n", + " 51 NaN VEN 1977 2022 86941.117723\n", + " 52 NaN COL 2001 2023 110934.508098\n", + " 53 NaN NLD 0 2023 0.580379\n", + " 54 NaN PAN 1981 2023 14765.209193\n", + " 55 NaN VEN 1977 2023 86941.117723\n", + " 56 NaN COL 2001 2024 110934.508098\n", + " 57 NaN NLD 0 2024 0.580379\n", + " 58 NaN PAN 1981 2024 14765.209193\n", + " 59 NaN VEN 1977 2024 86941.117723,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 BHS 1997 2010 900.402845\n", + " 1 1.0 CUB 1999 2010 8759.698352\n", + " 2 2.0 DOM 1983 2010 10379.940967\n", + " 3 3.0 GBR 1986 2010 445.511158\n", + " 4 4.0 HTI 1983 2010 2281.966969\n", + " .. ... ... ... ... ...\n", + " 85 NaN CUB 1999 2024 10488.381312\n", + " 86 NaN DOM 1983 2024 11205.551016\n", + " 87 NaN GBR 1986 2024 445.554495\n", + " 88 NaN HTI 1983 2024 4035.470685\n", + " 89 NaN JAM 1969 2024 2209.578562\n", + " \n", + " [90 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 BHS 2002 2010 239.129919\n", + " 1 1.0 USA 2000 2010 1534.926160\n", + " 2 NaN BHS 2002 2011 239.129919\n", + " 3 NaN USA 2000 2011 1568.174826\n", + " 4 NaN BHS 2002 2012 890.299348\n", + " 5 NaN USA 2000 2012 1576.534385\n", + " 6 NaN BHS 2002 2013 890.299348\n", + " 7 NaN USA 2000 2013 1592.560578\n", + " 8 NaN BHS 2002 2014 890.299348\n", + " 9 NaN USA 2000 2014 1592.603274\n", + " 10 NaN BHS 2002 2015 1512.653183\n", + " 11 NaN USA 2000 2015 1602.486859\n", + " 12 NaN BHS 2002 2016 1512.653183\n", + " 13 NaN USA 2000 2016 1602.486859\n", + " 14 NaN BHS 2002 2017 1512.653183\n", + " 15 NaN USA 2000 2017 1602.486859\n", + " 16 NaN BHS 2002 2018 1512.653183\n", + " 17 NaN USA 2000 2018 1602.486859\n", + " 18 NaN BHS 2002 2019 1512.653183\n", + " 19 NaN USA 2000 2019 1602.486859\n", + " 20 NaN BHS 2002 2020 1512.653183\n", + " 21 NaN USA 2000 2020 1604.211343\n", + " 22 NaN BHS 2002 2021 1512.653183\n", + " 23 NaN USA 2000 2021 1604.211343\n", + " 24 NaN BHS 2002 2022 1512.653183\n", + " 25 NaN USA 2000 2022 1604.211343\n", + " 26 NaN BHS 2002 2023 1512.653183\n", + " 27 NaN USA 2000 2023 1604.211343\n", + " 28 NaN BHS 2002 2024 1512.653183\n", + " 29 NaN USA 2000 2024 1604.211343,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 USA 0 2010 13053.121200\n", + " 1 NaN USA 0 2011 13130.216662\n", + " 2 NaN USA 0 2012 13156.740884\n", + " 3 NaN USA 0 2013 13165.673740\n", + " 4 NaN USA 0 2014 13167.065215\n", + " 5 NaN USA 0 2015 13285.052204\n", + " 6 NaN USA 0 2016 13301.947674\n", + " 7 NaN USA 0 2017 13323.981948\n", + " 8 NaN USA 0 2018 13326.173807\n", + " 9 NaN USA 0 2019 13326.288177\n", + " 10 NaN USA 0 2020 13330.287833\n", + " 11 NaN USA 0 2021 13330.287833\n", + " 12 NaN USA 0 2022 13330.287833\n", + " 13 NaN USA 0 2023 13330.287833\n", + " 14 NaN USA 0 2024 13330.287833,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CAN 1978 2010 250.523871\n", + " 1 1.0 USA 0 2010 6434.912072\n", + " 2 NaN CAN 1978 2011 250.523871\n", + " 3 NaN USA 0 2011 6541.137270\n", + " 4 NaN CAN 1978 2012 250.523871\n", + " 5 NaN USA 0 2012 6554.336477\n", + " 6 NaN CAN 1978 2013 250.523871\n", + " 7 NaN USA 0 2013 6583.510295\n", + " 8 NaN CAN 1978 2014 250.523871\n", + " 9 NaN USA 0 2014 6584.879002\n", + " 10 NaN CAN 1978 2015 297.095285\n", + " 11 NaN USA 0 2015 6595.371299\n", + " 12 NaN CAN 1978 2016 297.095285\n", + " 13 NaN USA 0 2016 6621.856161\n", + " 14 NaN CAN 1978 2017 297.095285\n", + " 15 NaN USA 0 2017 6624.340182\n", + " 16 NaN CAN 1978 2018 358.406490\n", + " 17 NaN USA 0 2018 6624.340182\n", + " 18 NaN CAN 1978 2019 392.621722\n", + " 19 NaN USA 0 2019 6624.505653\n", + " 20 NaN CAN 1978 2020 422.407535\n", + " 21 NaN USA 0 2020 6624.505653\n", + " 22 NaN CAN 1978 2021 451.580506\n", + " 23 NaN USA 0 2021 6624.505653\n", + " 24 NaN CAN 1978 2022 530.465352\n", + " 25 NaN USA 0 2022 6624.505653\n", + " 26 NaN CAN 1978 2023 657.097383\n", + " 27 NaN USA 0 2023 6624.505653\n", + " 28 NaN CAN 1978 2024 657.097383\n", + " 29 NaN USA 0 2024 6624.505653,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 USA 0 2010 975.534589\n", + " 1 NaN USA 0 2011 975.534589\n", + " 2 NaN USA 0 2012 975.534589\n", + " 3 NaN USA 0 2013 975.534589\n", + " 4 NaN USA 0 2014 975.698991\n", + " 5 NaN USA 0 2015 975.698991\n", + " 6 NaN USA 0 2016 975.698991\n", + " 7 NaN USA 0 2017 975.698991\n", + " 8 NaN USA 0 2018 975.774004\n", + " 9 NaN USA 0 2019 975.774004\n", + " 10 NaN USA 0 2020 975.774004\n", + " 11 NaN USA 0 2021 975.774004\n", + " 12 NaN USA 0 2022 975.774004\n", + " 13 NaN USA 0 2023 975.774004\n", + " 14 NaN USA 0 2024 975.774004,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 USA 1986 2010 3934.651607\n", + " 1 NaN USA 1986 2011 3937.114572\n", + " 2 NaN USA 1986 2012 3940.137612\n", + " 3 NaN USA 1986 2013 3943.504891\n", + " 4 NaN USA 1986 2014 3943.900765\n", + " 5 NaN USA 1986 2015 3944.575704\n", + " 6 NaN USA 1986 2016 3945.448607\n", + " 7 NaN USA 1986 2017 3945.453040\n", + " 8 NaN USA 1986 2018 3945.453040\n", + " 9 NaN USA 1986 2019 3945.517624\n", + " 10 NaN USA 1986 2020 3945.517624\n", + " 11 NaN USA 1986 2021 3945.517624\n", + " 12 NaN USA 1986 2022 3945.517624\n", + " 13 NaN USA 1986 2023 3945.517624\n", + " 14 NaN USA 1986 2024 3945.517624,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 USA 2010 2010 11483.784453\n", + " 1 NaN USA 2010 2011 11493.674017\n", + " 2 NaN USA 2010 2012 11505.704052\n", + " 3 NaN USA 2010 2013 11516.496146\n", + " 4 NaN USA 2010 2014 11524.127383\n", + " 5 NaN USA 2010 2015 11525.267681\n", + " 6 NaN USA 2010 2016 11526.066397\n", + " 7 NaN USA 2010 2017 11526.066397\n", + " 8 NaN USA 2010 2018 11526.066397\n", + " 9 NaN USA 2010 2019 11526.066397\n", + " 10 NaN USA 2010 2020 11526.066397\n", + " 11 NaN USA 2010 2021 11526.066397\n", + " 12 NaN USA 2010 2022 11526.066397\n", + " 13 NaN USA 2010 2023 11526.066397\n", + " 14 NaN USA 2010 2024 11526.066397,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 USA 0 2010 772.529239\n", + " 1 NaN USA 0 2011 778.209712\n", + " 2 NaN USA 0 2012 786.496916\n", + " 3 NaN USA 0 2013 790.027743\n", + " 4 NaN USA 0 2014 793.245708\n", + " 5 NaN USA 0 2015 794.038378\n", + " 6 NaN USA 0 2016 794.432697\n", + " 7 NaN USA 0 2017 794.432697\n", + " 8 NaN USA 0 2018 794.432697\n", + " 9 NaN USA 0 2019 794.432697\n", + " 10 NaN USA 0 2020 794.536283\n", + " 11 NaN USA 0 2021 794.536283\n", + " 12 NaN USA 0 2022 794.536283\n", + " 13 NaN USA 0 2023 794.536283\n", + " 14 NaN USA 0 2024 794.536283,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 USA 2002 2010 1941.969141\n", + " 1 NaN USA 2002 2011 1982.856578\n", + " 2 NaN USA 2002 2012 2003.956676\n", + " 3 NaN USA 2002 2013 2018.258801\n", + " 4 NaN USA 2002 2014 2034.051727\n", + " 5 NaN USA 2002 2015 2061.612303\n", + " 6 NaN USA 2002 2016 2070.558535\n", + " 7 NaN USA 2002 2017 2071.570984\n", + " 8 NaN USA 2002 2018 2071.570984\n", + " 9 NaN USA 2002 2019 2071.570984\n", + " 10 NaN USA 2002 2020 2071.743397\n", + " 11 NaN USA 2002 2021 2071.743397\n", + " 12 NaN USA 2002 2022 2071.743397\n", + " 13 NaN USA 2002 2023 2071.743397\n", + " 14 NaN USA 2002 2024 2071.743397,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CAN 1963 2010 49365.791454\n", + " 1 1.0 USA 0 2010 2334.285642\n", + " 2 NaN CAN 1963 2011 52596.537535\n", + " 3 NaN USA 0 2011 2347.155672\n", + " 4 NaN CAN 1963 2012 55103.149619\n", + " 5 NaN USA 0 2012 2347.155672\n", + " 6 NaN CAN 1963 2013 55386.915171\n", + " 7 NaN USA 0 2013 2348.402673\n", + " 8 NaN CAN 1963 2014 55960.370377\n", + " 9 NaN USA 0 2014 2396.346097\n", + " 10 NaN CAN 1963 2015 56042.793350\n", + " 11 NaN USA 0 2015 2425.540775\n", + " 12 NaN CAN 1963 2016 56263.411571\n", + " 13 NaN USA 0 2016 2425.696305\n", + " 14 NaN CAN 1963 2017 56940.541202\n", + " 15 NaN USA 0 2017 2425.696305\n", + " 16 NaN CAN 1963 2018 63034.897256\n", + " 17 NaN USA 0 2018 2427.207033\n", + " 18 NaN CAN 1963 2019 64065.801402\n", + " 19 NaN USA 0 2019 2427.207033\n", + " 20 NaN CAN 1963 2020 104799.447252\n", + " 21 NaN USA 0 2020 2428.508856\n", + " 22 NaN CAN 1963 2021 108903.540481\n", + " 23 NaN USA 0 2021 2428.508856\n", + " 24 NaN CAN 1963 2022 109958.711084\n", + " 25 NaN USA 0 2022 2428.508856\n", + " 26 NaN CAN 1963 2023 111310.980300\n", + " 27 NaN USA 0 2023 2428.508856\n", + " 28 NaN CAN 1963 2024 111310.980300\n", + " 29 NaN USA 0 2024 2428.508856,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CAN 2008 2010 20597.748156\n", + " 1 NaN CAN 2008 2011 20597.748156\n", + " 2 NaN CAN 2008 2012 20597.748156\n", + " 3 NaN CAN 2008 2013 45887.362289\n", + " 4 NaN CAN 2008 2014 45887.362289\n", + " 5 NaN CAN 2008 2015 45887.362289\n", + " 6 NaN CAN 2008 2016 45887.362289\n", + " 7 NaN CAN 2008 2017 45887.362289\n", + " 8 NaN CAN 2008 2018 50515.212128\n", + " 9 NaN CAN 2008 2019 50515.212128\n", + " 10 NaN CAN 2008 2020 71833.742561\n", + " 11 NaN CAN 2008 2021 71833.742561\n", + " 12 NaN CAN 2008 2022 71846.665384\n", + " 13 NaN CAN 2008 2023 71846.665384\n", + " 14 NaN CAN 2008 2024 71846.665384,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CAN 1982 2010 19722.543802\n", + " 1 NaN CAN 1982 2011 19722.543802\n", + " 2 NaN CAN 1982 2012 19722.543802\n", + " 3 NaN CAN 1982 2013 19722.543802\n", + " 4 NaN CAN 1982 2014 19722.543802\n", + " 5 NaN CAN 1982 2015 19722.543802\n", + " 6 NaN CAN 1982 2016 19722.543802\n", + " 7 NaN CAN 1982 2017 19722.543802\n", + " 8 NaN CAN 1982 2018 19738.249661\n", + " 9 NaN CAN 1982 2019 19748.724090\n", + " 10 NaN CAN 1982 2020 19748.724090\n", + " 11 NaN CAN 1982 2021 19748.724090\n", + " 12 NaN CAN 1982 2022 19748.724090\n", + " 13 NaN CAN 1982 2023 19748.724090\n", + " 14 NaN CAN 1982 2024 19748.724090,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CAN 1995 2010 300.591420\n", + " 1 NaN CAN 1995 2011 300.591420\n", + " 2 NaN CAN 1995 2012 300.591420\n", + " 3 NaN CAN 1995 2013 300.591420\n", + " 4 NaN CAN 1995 2014 300.591420\n", + " 5 NaN CAN 1995 2015 23667.120367\n", + " 6 NaN CAN 1995 2016 23667.120367\n", + " 7 NaN CAN 1995 2017 23667.120367\n", + " 8 NaN CAN 1995 2018 23667.120367\n", + " 9 NaN CAN 1995 2019 23667.147145\n", + " 10 NaN CAN 1995 2020 23667.147145\n", + " 11 NaN CAN 1995 2021 23667.147145\n", + " 12 NaN CAN 1995 2022 23667.147145\n", + " 13 NaN CAN 1995 2023 23667.147145\n", + " 14 NaN CAN 1995 2024 23667.147145,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CHL 1945 2010 779.842364\n", + " 1 NaN CHL 1945 2011 779.842364\n", + " 2 NaN CHL 1945 2012 779.842364\n", + " 3 NaN CHL 1945 2013 779.842364\n", + " 4 NaN CHL 1945 2014 779.842364\n", + " 5 NaN CHL 1945 2015 779.842364\n", + " 6 NaN CHL 1945 2016 779.842364\n", + " 7 NaN CHL 1945 2017 779.842364\n", + " 8 NaN CHL 1945 2018 779.842364\n", + " 9 NaN CHL 1945 2019 779.842364\n", + " 10 NaN CHL 1945 2020 779.842364\n", + " 11 NaN CHL 1945 2021 779.842364\n", + " 12 NaN CHL 1945 2022 779.842364\n", + " 13 NaN CHL 1945 2023 779.842364\n", + " 14 NaN CHL 1945 2024 779.842364,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 ARG 1997 2010 5704.539725\n", + " 1 1.0 CHL 1965 2010 6116.932266\n", + " 2 2.0 GBR 1978 2010 41.532858\n", + " 3 NaN ARG 1997 2011 5704.539725\n", + " 4 NaN CHL 1965 2011 6116.932266\n", + " 5 NaN GBR 1978 2011 41.532858\n", + " 6 NaN ARG 1997 2012 6337.634195\n", + " 7 NaN CHL 1965 2012 6116.932266\n", + " 8 NaN GBR 1978 2012 41.532858\n", + " 9 NaN ARG 1997 2013 6337.634195\n", + " 10 NaN CHL 1965 2013 6116.932266\n", + " 11 NaN GBR 1978 2013 41.532858\n", + " 12 NaN ARG 1997 2014 7449.247291\n", + " 13 NaN CHL 1965 2014 7205.780091\n", + " 14 NaN GBR 1978 2014 41.532858\n", + " 15 NaN ARG 1997 2015 7449.247291\n", + " 16 NaN CHL 1965 2015 7205.780091\n", + " 17 NaN GBR 1978 2015 41.532858\n", + " 18 NaN ARG 1997 2016 7945.108460\n", + " 19 NaN CHL 1965 2016 7205.780091\n", + " 20 NaN GBR 1978 2016 41.532858\n", + " 21 NaN ARG 1997 2017 7945.108460\n", + " 22 NaN CHL 1965 2017 7205.780091\n", + " 23 NaN GBR 1978 2017 41.532858\n", + " 24 NaN ARG 1997 2018 7945.108460\n", + " 25 NaN CHL 1965 2018 7208.615086\n", + " 26 NaN GBR 1978 2018 41.532858\n", + " 27 NaN ARG 1997 2019 7945.108460\n", + " 28 NaN CHL 1965 2019 7208.615086\n", + " 29 NaN GBR 1978 2019 41.532858\n", + " 30 NaN ARG 1997 2020 7945.108460\n", + " 31 NaN CHL 1965 2020 7208.615086\n", + " 32 NaN GBR 1978 2020 41.532858\n", + " 33 NaN ARG 1997 2021 7945.108460\n", + " 34 NaN CHL 1965 2021 7208.615086\n", + " 35 NaN GBR 1978 2021 41.532858\n", + " 36 NaN ARG 1997 2022 7945.108460\n", + " 37 NaN CHL 1965 2022 7208.615086\n", + " 38 NaN GBR 1978 2022 41.532858\n", + " 39 NaN ARG 1997 2023 7945.108460\n", + " 40 NaN CHL 1965 2023 7208.615086\n", + " 41 NaN GBR 1978 2023 41.532858\n", + " 42 NaN ARG 1997 2024 7945.108460\n", + " 43 NaN CHL 1965 2024 7208.615086\n", + " 44 NaN GBR 1978 2024 41.532858,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 ARG 2009 2010 36796.559383\n", + " 1 NaN ARG 2009 2011 36804.668128\n", + " 2 NaN ARG 2009 2012 36804.668128\n", + " 3 NaN ARG 2009 2013 37059.298123\n", + " 4 NaN ARG 2009 2014 37059.298123\n", + " 5 NaN ARG 2009 2015 37059.298123\n", + " 6 NaN ARG 2009 2016 37059.298123\n", + " 7 NaN ARG 2009 2017 38027.545133\n", + " 8 NaN ARG 2009 2018 38027.545133\n", + " 9 NaN ARG 2009 2019 38027.545133\n", + " 10 NaN ARG 2009 2020 38027.545133\n", + " 11 NaN ARG 2009 2021 38027.545133\n", + " 12 NaN ARG 2009 2022 38029.587737\n", + " 13 NaN ARG 2009 2023 38029.587737\n", + " 14 NaN ARG 2009 2024 38029.587737,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 ARG 2004 2010 78459.115795\n", + " 1 1.0 CHL 1994 2010 28962.072664\n", + " 2 NaN ARG 2004 2011 80211.213351\n", + " 3 NaN CHL 1994 2011 28962.072664\n", + " 4 NaN ARG 2004 2012 80237.796946\n", + " 5 NaN CHL 1994 2012 28962.072664\n", + " 6 NaN ARG 2004 2013 80237.796946\n", + " 7 NaN CHL 1994 2013 28962.072664\n", + " 8 NaN ARG 2004 2014 80298.465728\n", + " 9 NaN CHL 1994 2014 28962.072664\n", + " 10 NaN ARG 2004 2015 84547.284418\n", + " 11 NaN CHL 1994 2015 28962.072664\n", + " 12 NaN ARG 2004 2016 84547.284418\n", + " 13 NaN CHL 1994 2016 28962.072664\n", + " 14 NaN ARG 2004 2017 84628.231630\n", + " 15 NaN CHL 1994 2017 28962.072664\n", + " 16 NaN ARG 2004 2018 85546.559380\n", + " 17 NaN CHL 1994 2018 28962.072664\n", + " 18 NaN ARG 2004 2019 85546.559380\n", + " 19 NaN CHL 1994 2019 28962.072664\n", + " 20 NaN ARG 2004 2020 85546.559380\n", + " 21 NaN CHL 1994 2020 28962.072664\n", + " 22 NaN ARG 2004 2021 89757.078976\n", + " 23 NaN CHL 1994 2021 30981.165727\n", + " 24 NaN ARG 2004 2022 89759.714516\n", + " 25 NaN CHL 1994 2022 30981.165727\n", + " 26 NaN ARG 2004 2023 90494.284332\n", + " 27 NaN CHL 1994 2023 31715.735543\n", + " 28 NaN ARG 2004 2024 90494.284332\n", + " 29 NaN CHL 1994 2024 31715.735543,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 ARG 1996 2010 39988.509823\n", + " 1 1.0 BOL 1974 2010 162667.325585\n", + " 2 2.0 BRA 2001 2010 29804.848660\n", + " 3 3.0 CHL 1980 2010 37554.742734\n", + " 4 4.0 PER 2009 2010 11030.710081\n", + " .. ... ... ... ... ...\n", + " 85 NaN BOL 1974 2024 163771.869617\n", + " 86 NaN BRA 2001 2024 29804.848660\n", + " 87 NaN CHL 1980 2024 38580.555050\n", + " 88 NaN PER 2009 2024 11036.319137\n", + " 89 NaN PRY 1966 2024 70391.050640\n", + " \n", + " [90 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 BOL 1997 2010 136129.689944\n", + " 1 1.0 BRA 1995 2010 374604.457670\n", + " 2 2.0 PER 1997 2010 57073.999425\n", + " 3 NaN BOL 1997 2011 136129.689944\n", + " 4 NaN BRA 1995 2011 386228.352848\n", + " 5 NaN PER 1997 2011 57073.999425\n", + " 6 NaN BOL 1997 2012 136129.689944\n", + " 7 NaN BRA 1995 2012 386229.080160\n", + " 8 NaN PER 1997 2012 57074.209651\n", + " 9 NaN BOL 1997 2013 136129.689944\n", + " 10 NaN BRA 1995 2013 386229.080160\n", + " 11 NaN PER 1997 2013 57074.209651\n", + " 12 NaN BOL 1997 2014 136129.689944\n", + " 13 NaN BRA 1995 2014 386785.393080\n", + " 14 NaN PER 1997 2014 57074.706205\n", + " 15 NaN BOL 1997 2015 136129.689944\n", + " 16 NaN BRA 1995 2015 386787.308790\n", + " 17 NaN PER 1997 2015 57074.706205\n", + " 18 NaN BOL 1997 2016 136129.689944\n", + " 19 NaN BRA 1995 2016 399433.858292\n", + " 20 NaN PER 1997 2016 57101.569358\n", + " 21 NaN BOL 1997 2017 136141.998828\n", + " 22 NaN BRA 1995 2017 399440.216481\n", + " 23 NaN PER 1997 2017 57102.900347\n", + " 24 NaN BOL 1997 2018 137037.263510\n", + " 25 NaN BRA 1995 2018 404003.768222\n", + " 26 NaN PER 1997 2018 57104.025793\n", + " 27 NaN BOL 1997 2019 137037.263510\n", + " 28 NaN BRA 1995 2019 404009.270624\n", + " 29 NaN PER 1997 2019 57133.322771\n", + " 30 NaN BOL 1997 2020 137037.263510\n", + " 31 NaN BRA 1995 2020 404009.270624\n", + " 32 NaN PER 1997 2020 57133.322771\n", + " 33 NaN BOL 1997 2021 137037.263510\n", + " 34 NaN BRA 1995 2021 404009.270624\n", + " 35 NaN PER 1997 2021 57156.349435\n", + " 36 NaN BOL 1997 2022 137037.263510\n", + " 37 NaN BRA 1995 2022 404009.342575\n", + " 38 NaN PER 1997 2022 57156.349435\n", + " 39 NaN BOL 1997 2023 137037.263510\n", + " 40 NaN BRA 1995 2023 404009.372949\n", + " 41 NaN PER 1997 2023 57156.349435\n", + " 42 NaN BOL 1997 2024 137037.263510\n", + " 43 NaN BRA 1995 2024 404009.372949\n", + " 44 NaN PER 1997 2024 57156.349435,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 BRA 2002 2010 636066.352098\n", + " 1 1.0 COL 2010 2010 109971.870125\n", + " 2 2.0 GUY 2006 2010 9308.892558\n", + " 3 3.0 PER 2002 2010 3372.504964\n", + " 4 4.0 VEN 1989 2010 245290.131517\n", + " .. ... ... ... ... ...\n", + " 70 NaN BRA 2002 2024 683116.673456\n", + " 71 NaN COL 2010 2024 109972.624133\n", + " 72 NaN GUY 2006 2024 9308.892558\n", + " 73 NaN PER 2002 2024 3372.504964\n", + " 74 NaN VEN 1989 2024 246386.848547\n", + " \n", + " [75 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 BRA 1990 2010 124316.072819\n", + " 1 1.0 COL 1963 2010 21284.479341\n", + " 2 2.0 FRA 1988 2010 282.014931\n", + " 3 3.0 GRD 0 2010 70.179348\n", + " 4 4.0 GUY 1962 2010 70642.201952\n", + " .. ... ... ... ... ...\n", + " 145 NaN LCA 1986 2024 244.111009\n", + " 146 NaN NLD 1899 2024 170.423367\n", + " 147 NaN TTO 0 2024 1481.009041\n", + " 148 NaN VCT 0 2024 146.350021\n", + " 149 NaN VEN 1990 2024 325176.716026\n", + " \n", + " [150 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 ATG 0 2010 100.971552\n", + " 1 1.0 DMA 1998 2010 161.379924\n", + " 2 2.0 DOM 2004 2010 1840.018735\n", + " 3 3.0 FRA 2004 2010 1643.762247\n", + " 4 4.0 GBR 0 2010 28.607646\n", + " .. ... ... ... ... ...\n", + " 115 NaN FRA 2004 2024 1644.740061\n", + " 116 NaN GBR 0 2024 39.764523\n", + " 117 NaN KNA 0 2024 55.641489\n", + " 118 NaN NLD 1998 2024 76.979728\n", + " 119 NaN USA 0 2024 641.261859\n", + " \n", + " [120 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 GBR 1963 2010 1.402686\n", + " 1 NaN GBR 1963 2011 1.402686\n", + " 2 NaN GBR 1963 2012 1.402686\n", + " 3 NaN GBR 1963 2013 1.402686\n", + " 4 NaN GBR 1963 2014 1.402686\n", + " 5 NaN GBR 1963 2015 1.402686\n", + " 6 NaN GBR 1963 2016 1.402686\n", + " 7 NaN GBR 1963 2017 1.402686\n", + " 8 NaN GBR 1963 2018 1.402686\n", + " 9 NaN GBR 1963 2019 1.402686\n", + " 10 NaN GBR 1963 2020 1.402686\n", + " 11 NaN GBR 1963 2021 1.402686\n", + " 12 NaN GBR 1963 2022 1.402686\n", + " 13 NaN GBR 1963 2023 1.402686\n", + " 14 NaN GBR 1963 2024 1.402686,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CAN 2008 2010 1820.522592\n", + " 1 1.0 USA 0 2010 452.631455\n", + " 2 NaN CAN 2008 2011 1851.050573\n", + " 3 NaN USA 0 2011 455.165842\n", + " 4 NaN CAN 2008 2012 1883.468799\n", + " 5 NaN USA 0 2012 458.447854\n", + " 6 NaN CAN 2008 2013 1884.030903\n", + " 7 NaN USA 0 2013 459.795979\n", + " 8 NaN CAN 2008 2014 1885.477773\n", + " 9 NaN USA 0 2014 461.599484\n", + " 10 NaN CAN 2008 2015 1941.698012\n", + " 11 NaN USA 0 2015 462.909677\n", + " 12 NaN CAN 2008 2016 2428.490173\n", + " 13 NaN USA 0 2016 462.909677\n", + " 14 NaN CAN 2008 2017 2436.198938\n", + " 15 NaN USA 0 2017 462.909677\n", + " 16 NaN CAN 2008 2018 2437.685660\n", + " 17 NaN USA 0 2018 462.909677\n", + " 18 NaN CAN 2008 2019 2451.373507\n", + " 19 NaN USA 0 2019 462.909677\n", + " 20 NaN CAN 2008 2020 2479.986493\n", + " 21 NaN USA 0 2020 462.909677\n", + " 22 NaN CAN 2008 2021 2538.052352\n", + " 23 NaN USA 0 2021 462.909677\n", + " 24 NaN CAN 2008 2022 2561.977202\n", + " 25 NaN USA 0 2022 462.909677\n", + " 26 NaN CAN 2008 2023 2657.199819\n", + " 27 NaN USA 0 2023 462.909677\n", + " 28 NaN CAN 2008 2024 2657.199819\n", + " 29 NaN USA 0 2024 462.909677,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CAN 2002 2010 24210.976463\n", + " 1 1.0 USA 1989 2010 4170.269049\n", + " 2 NaN CAN 2002 2011 24271.948158\n", + " 3 NaN USA 1989 2011 4172.298829\n", + " 4 NaN CAN 2002 2012 25184.818879\n", + " 5 NaN USA 1989 2012 4175.827309\n", + " 6 NaN CAN 2002 2013 25203.062178\n", + " 7 NaN USA 1989 2013 4177.690539\n", + " 8 NaN CAN 2002 2014 26495.389003\n", + " 9 NaN USA 1989 2014 4179.287177\n", + " 10 NaN CAN 2002 2015 26745.910355\n", + " 11 NaN USA 1989 2015 4199.668031\n", + " 12 NaN CAN 2002 2016 27577.863318\n", + " 13 NaN USA 1989 2016 4217.926832\n", + " 14 NaN CAN 2002 2017 27762.545300\n", + " 15 NaN USA 1989 2017 4217.926832\n", + " 16 NaN CAN 2002 2018 27924.110239\n", + " 17 NaN USA 1989 2018 4217.926832\n", + " 18 NaN CAN 2002 2019 28882.151028\n", + " 19 NaN USA 1989 2019 4217.926832\n", + " 20 NaN CAN 2002 2020 42994.134041\n", + " 21 NaN USA 1989 2020 4217.926832\n", + " 22 NaN CAN 2002 2021 46890.817663\n", + " 23 NaN USA 1989 2021 4217.926832\n", + " 24 NaN CAN 2002 2022 50496.696459\n", + " 25 NaN USA 1989 2022 4252.985059\n", + " 26 NaN CAN 2002 2023 51782.744943\n", + " 27 NaN USA 1989 2023 4346.801905\n", + " 28 NaN CAN 2002 2024 51782.744943\n", + " 29 NaN USA 1989 2024 4346.801905,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CAN 2009 2010 36369.009802\n", + " 1 NaN CAN 2009 2011 36369.009802\n", + " 2 NaN CAN 2009 2012 36369.009802\n", + " 3 NaN CAN 2009 2013 36369.009802\n", + " 4 NaN CAN 2009 2014 36369.009802\n", + " 5 NaN CAN 2009 2015 36369.009802\n", + " 6 NaN CAN 2009 2016 38553.523355\n", + " 7 NaN CAN 2009 2017 38553.523355\n", + " 8 NaN CAN 2009 2018 40049.440620\n", + " 9 NaN CAN 2009 2019 40049.440620\n", + " 10 NaN CAN 2009 2020 45877.610978\n", + " 11 NaN CAN 2009 2021 45877.610978\n", + " 12 NaN CAN 2009 2022 45877.610978\n", + " 13 NaN CAN 2009 2023 45877.610978\n", + " 14 NaN CAN 2009 2024 45877.610978,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CAN 2010 2010 18817.414468\n", + " 1 NaN CAN 2010 2011 18817.414468\n", + " 2 NaN CAN 2010 2012 18817.414468\n", + " 3 NaN CAN 2010 2013 18817.414468\n", + " 4 NaN CAN 2010 2014 18817.414468\n", + " 5 NaN CAN 2010 2015 18817.414468\n", + " 6 NaN CAN 2010 2016 18817.414468\n", + " 7 NaN CAN 2010 2017 18817.414468\n", + " 8 NaN CAN 2010 2018 18829.637061\n", + " 9 NaN CAN 2010 2019 18829.637061\n", + " 10 NaN CAN 2010 2020 18829.637061\n", + " 11 NaN CAN 2010 2021 18829.637061\n", + " 12 NaN CAN 2010 2022 18829.637061\n", + " 13 NaN CAN 2010 2023 18829.637061\n", + " 14 NaN CAN 2010 2024 18829.637061,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 DNK 1977 2010 4964.942908\n", + " 1 NaN DNK 1977 2011 4964.942908\n", + " 2 NaN DNK 1977 2012 4964.942908\n", + " 3 NaN DNK 1977 2013 4964.942908\n", + " 4 NaN DNK 1977 2014 4964.942908\n", + " 5 NaN CAN None 2015 11412.276493\n", + " 6 NaN DNK 1977 2015 4964.942908\n", + " 7 NaN CAN None 2016 11412.276493\n", + " 8 NaN DNK 1977 2016 4964.942908\n", + " 9 NaN CAN None 2017 11412.276493\n", + " 10 NaN DNK 1977 2017 4964.942908\n", + " 11 NaN CAN None 2018 11412.276493\n", + " 12 NaN DNK 1977 2018 4964.942908\n", + " 13 NaN CAN None 2019 11412.276493\n", + " 14 NaN DNK 1977 2019 4964.942908\n", + " 15 NaN CAN None 2020 11412.276493\n", + " 16 NaN DNK 1977 2020 4964.942908\n", + " 17 NaN CAN None 2021 11412.276493\n", + " 18 NaN DNK 1977 2021 4964.942908\n", + " 19 NaN CAN None 2022 11412.276493\n", + " 20 NaN DNK 1977 2022 4964.942908\n", + " 21 NaN CAN None 2023 11412.276493\n", + " 22 NaN DNK 1977 2023 4964.942908\n", + " 23 NaN CAN None 2024 11412.276493\n", + " 24 NaN DNK 1977 2024 4964.942908,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 GBR 2001 2010 23.455629\n", + " 1 NaN GBR 2001 2011 23.455629\n", + " 2 NaN GBR 2001 2012 23.455629\n", + " 3 NaN GBR 2001 2013 23.455629\n", + " 4 NaN GBR 2001 2014 23.455629\n", + " 5 NaN GBR 2001 2015 23.455629\n", + " 6 NaN GBR 2001 2016 23.455629\n", + " 7 NaN GBR 2001 2017 23.455629\n", + " 8 NaN GBR 2001 2018 23.455629\n", + " 9 NaN GBR 2001 2019 23.455629\n", + " 10 NaN GBR 2001 2020 23.455629\n", + " 11 NaN GBR 2001 2021 23.455629\n", + " 12 NaN GBR 2001 2022 23.455629\n", + " 13 NaN GBR 2001 2023 23.455629\n", + " 14 NaN GBR 2001 2024 23.455629,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 ARG 2001 2010 5696.564369\n", + " 1 NaN ARG 2001 2011 5696.564369\n", + " 2 NaN ARG 2001 2012 5696.564369\n", + " 3 NaN ARG 2001 2013 5696.564369\n", + " 4 NaN ARG 2001 2014 5696.564369\n", + " 5 NaN ARG 2001 2015 5696.564369\n", + " 6 NaN ARG 2001 2016 5696.564369\n", + " 7 NaN ARG 2001 2017 5696.564369\n", + " 8 NaN ARG 2001 2018 5696.564369\n", + " 9 NaN ARG 2001 2019 5696.564369\n", + " 10 NaN ARG 2001 2020 5696.564369\n", + " 11 NaN ARG 2001 2021 5697.009647\n", + " 12 NaN ARG 2001 2022 5697.009647\n", + " 13 NaN ARG 2001 2023 5697.009647\n", + " 14 NaN ARG 2001 2024 5697.009647,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 ARG 2006 2010 28769.264689\n", + " 1 1.0 BRA 2009 2010 17859.734043\n", + " 2 2.0 PRY 1994 2010 8286.047567\n", + " 3 3.0 URY 2010 2010 8872.638616\n", + " 4 NaN ARG 2006 2011 28900.063545\n", + " 5 NaN BRA 2009 2011 17859.734043\n", + " 6 NaN PRY 1994 2011 8286.922823\n", + " 7 NaN URY 2010 2011 9115.012140\n", + " 8 NaN ARG 2006 2012 28904.803093\n", + " 9 NaN BRA 2009 2012 17861.778766\n", + " 10 NaN PRY 1994 2012 8300.623898\n", + " 11 NaN URY 2010 2012 9115.012140\n", + " 12 NaN ARG 2006 2013 28916.922439\n", + " 13 NaN BRA 2009 2013 17862.050250\n", + " 14 NaN PRY 1994 2013 8300.623898\n", + " 15 NaN URY 2010 2013 9126.903779\n", + " 16 NaN ARG 2006 2014 28916.922439\n", + " 17 NaN BRA 2009 2014 17924.687593\n", + " 18 NaN PRY 1994 2014 8300.623898\n", + " 19 NaN URY 2010 2014 9424.776953\n", + " 20 NaN ARG 2006 2015 28916.922439\n", + " 21 NaN BRA 2009 2015 17937.339170\n", + " 22 NaN PRY 1994 2015 8300.623898\n", + " 23 NaN URY 2010 2015 10013.944879\n", + " 24 NaN ARG 2006 2016 28916.922439\n", + " 25 NaN BRA 2009 2016 17942.387979\n", + " 26 NaN PRY 1994 2016 8303.027663\n", + " 27 NaN URY 2010 2016 10013.944879\n", + " 28 NaN ARG 2006 2017 28916.922439\n", + " 29 NaN BRA 2009 2017 17944.814157\n", + " 30 NaN PRY 1994 2017 8303.027663\n", + " 31 NaN URY 2010 2017 10016.371057\n", + " 32 NaN ARG 2006 2018 29197.658347\n", + " 33 NaN BRA 2009 2018 17950.110784\n", + " 34 NaN PRY 1994 2018 8303.027663\n", + " 35 NaN URY 2010 2018 10016.371057\n", + " 36 NaN ARG 2006 2019 29198.296989\n", + " 37 NaN BRA 2009 2019 18239.713788\n", + " 38 NaN PRY 1994 2019 8303.027663\n", + " 39 NaN URY 2010 2019 10274.684876\n", + " 40 NaN ARG 2006 2020 29198.296989\n", + " 41 NaN BRA 2009 2020 18241.127179\n", + " 42 NaN PRY 1994 2020 8303.027663\n", + " 43 NaN URY 2010 2020 10274.730520\n", + " 44 NaN ARG 2006 2021 29198.296989\n", + " 45 NaN BRA 2009 2021 18270.621302\n", + " 46 NaN PRY 1994 2021 8303.027663\n", + " 47 NaN URY 2010 2021 10274.730520\n", + " 48 NaN ARG 2006 2022 29253.005268\n", + " 49 NaN BRA 2009 2022 18272.009171\n", + " 50 NaN PRY 1994 2022 8360.366388\n", + " 51 NaN URY 2010 2022 10274.730520\n", + " 52 NaN ARG 2006 2023 29253.005268\n", + " 53 NaN BRA 2009 2023 18272.011561\n", + " 54 NaN PRY 1994 2023 8360.366388\n", + " 55 NaN URY 2010 2023 10274.730520\n", + " 56 NaN ARG 2006 2024 29253.005268\n", + " 57 NaN BRA 2009 2024 18272.011561\n", + " 58 NaN PRY 1994 2024 8360.366388\n", + " 59 NaN URY 2010 2024 10274.730520,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 ARG 1939 2010 2236.417730\n", + " 1 1.0 BOL 2001 2010 58237.604685\n", + " 2 2.0 BRA 1995 2010 111224.569389\n", + " 3 3.0 PRY 1973 2010 41179.545064\n", + " 4 NaN ARG 1939 2011 2236.417730\n", + " 5 NaN BOL 2001 2011 58237.604685\n", + " 6 NaN BRA 1995 2011 111304.185839\n", + " 7 NaN PRY 1973 2011 41276.156887\n", + " 8 NaN ARG 1939 2012 2236.417730\n", + " 9 NaN BOL 2001 2012 58237.604685\n", + " 10 NaN BRA 1995 2012 111430.608957\n", + " 11 NaN PRY 1973 2012 41319.362835\n", + " 12 NaN ARG 1939 2013 2236.417730\n", + " 13 NaN BOL 2001 2013 58237.604685\n", + " 14 NaN BRA 1995 2013 111533.906726\n", + " 15 NaN PRY 1973 2013 41399.136851\n", + " 16 NaN ARG 1939 2014 2236.417730\n", + " 17 NaN BOL 2001 2014 58237.604685\n", + " 18 NaN BRA 1995 2014 111706.207573\n", + " 19 NaN PRY 1973 2014 41763.637416\n", + " 20 NaN ARG 1939 2015 2236.417730\n", + " 21 NaN BOL 2001 2015 58237.604685\n", + " 22 NaN BRA 1995 2015 111706.207573\n", + " 23 NaN PRY 1973 2015 41836.328118\n", + " 24 NaN ARG 1939 2016 2236.417730\n", + " 25 NaN BOL 2001 2016 58237.604685\n", + " 26 NaN BRA 1995 2016 111746.449251\n", + " 27 NaN PRY 1973 2016 41848.551505\n", + " 28 NaN ARG 1939 2017 2236.417730\n", + " 29 NaN BOL 2001 2017 58237.604685\n", + " 30 NaN BRA 1995 2017 111748.421688\n", + " 31 NaN PRY 1973 2017 41848.551505\n", + " 32 NaN ARG 1939 2018 2236.417730\n", + " 33 NaN BOL 2001 2018 58237.604685\n", + " 34 NaN BRA 1995 2018 111790.225164\n", + " 35 NaN PRY 1973 2018 41848.551505\n", + " 36 NaN ARG 1939 2019 2236.417730\n", + " 37 NaN BOL 2001 2019 58237.604685\n", + " 38 NaN BRA 1995 2019 111790.421833\n", + " 39 NaN PRY 1973 2019 41848.551505\n", + " 40 NaN ARG 1939 2020 2236.417730\n", + " 41 NaN BOL 2001 2020 58237.604685\n", + " 42 NaN BRA 1995 2020 111796.055339\n", + " 43 NaN PRY 1973 2020 41848.551505\n", + " 44 NaN ARG 1939 2021 2236.417730\n", + " 45 NaN BOL 2001 2021 58237.604685\n", + " 46 NaN BRA 1995 2021 111806.146961\n", + " 47 NaN PRY 1973 2021 41848.551505\n", + " 48 NaN ARG 1939 2022 2236.417730\n", + " 49 NaN BOL 2001 2022 58237.604685\n", + " 50 NaN BRA 1995 2022 112920.144428\n", + " 51 NaN PRY 1973 2022 41848.551505\n", + " 52 NaN ARG 1939 2023 2236.417730\n", + " 53 NaN BOL 2001 2023 58237.604685\n", + " 54 NaN BRA 1995 2023 112921.473147\n", + " 55 NaN PRY 1973 2023 41848.551505\n", + " 56 NaN ARG 1939 2024 2236.417730\n", + " 57 NaN BOL 2001 2024 58237.604685\n", + " 58 NaN BRA 1995 2024 112924.504861\n", + " 59 NaN PRY 1973 2024 41848.551505,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 BRA 1987 2010 469138.250035\n", + " 1 NaN BRA 1987 2011 469203.297878\n", + " 2 NaN BRA 1987 2012 470402.278863\n", + " 3 NaN BRA 1987 2013 470402.278863\n", + " 4 NaN BRA 1987 2014 475226.533495\n", + " 5 NaN BRA 1987 2015 475232.496824\n", + " 6 NaN BRA 1987 2016 489632.897081\n", + " 7 NaN BRA 1987 2017 489656.982034\n", + " 8 NaN BRA 1987 2018 489659.532338\n", + " 9 NaN BRA 1987 2019 489659.532338\n", + " 10 NaN BRA 1987 2020 489660.088636\n", + " 11 NaN BRA 1987 2021 489659.533800\n", + " 12 NaN BRA 1987 2022 489659.817592\n", + " 13 NaN BRA 1987 2023 489661.017926\n", + " 14 NaN BRA 1987 2024 489661.017926,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 BRA 2006 2010 600838.326250\n", + " 1 1.0 FRA 2006 2010 101894.435680\n", + " 2 2.0 GUY 2004 2010 60660.143171\n", + " 3 3.0 SUR 1980 2010 161133.323662\n", + " 4 4.0 VEN 2006 2010 3407.117764\n", + " .. ... ... ... ... ...\n", + " 70 NaN BRA 2006 2024 610094.243242\n", + " 71 NaN FRA 2006 2024 101997.953154\n", + " 72 NaN GUY 2004 2024 66786.165809\n", + " 73 NaN SUR 1980 2024 161133.323662\n", + " 74 NaN VEN 2006 2024 3407.117764\n", + " \n", + " [75 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 BRA 2006 2010 31.400023\n", + " 1 1.0 BRB 1979 2010 6.869354\n", + " 2 2.0 FRA 1995 2010 4265.236321\n", + " 3 3.0 GUY 2006 2010 1966.850511\n", + " 4 4.0 SUR 2000 2010 7433.507985\n", + " .. ... ... ... ... ...\n", + " 85 NaN BRB 1979 2024 6.869354\n", + " 86 NaN FRA 1995 2024 4313.248452\n", + " 87 NaN GUY 2006 2024 3150.006473\n", + " 88 NaN SUR 2000 2024 7433.507985\n", + " 89 NaN VEN 2006 2024 496.475458\n", + " \n", + " [90 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CAN 1977 2010 23.740587\n", + " 1 NaN CAN 1977 2011 24.248058\n", + " 2 NaN CAN 1977 2012 24.248058\n", + " 3 NaN CAN 1977 2013 24.248058\n", + " 4 NaN CAN 1977 2014 24.248058\n", + " 5 NaN CAN 1977 2015 24.248058\n", + " 6 NaN CAN 1977 2016 24.248058\n", + " 7 NaN CAN 1977 2017 24.248058\n", + " 8 NaN CAN 1977 2018 24.248058\n", + " 9 NaN CAN 1977 2019 24.248058\n", + " 10 NaN CAN 1977 2020 24.248058\n", + " 11 NaN CAN 1977 2021 24.248058\n", + " 12 NaN CAN 1977 2022 24.248058\n", + " 13 NaN CAN 1977 2023 24.248058\n", + " 14 NaN CAN 1977 2024 24.248058,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CAN 1998 2010 10710.972576\n", + " 1 1.0 FRA 2005 2010 4.364500\n", + " 2 NaN CAN 1998 2011 10710.972576\n", + " 3 NaN FRA 2005 2011 4.364500\n", + " 4 NaN CAN 1998 2012 10710.972576\n", + " 5 NaN FRA 2005 2012 4.364500\n", + " 6 NaN CAN 1998 2013 10710.972576\n", + " 7 NaN FRA 2005 2013 4.364500\n", + " 8 NaN CAN 1998 2014 10710.972576\n", + " 9 NaN FRA 2005 2014 4.364500\n", + " 10 NaN CAN 1998 2015 21350.950247\n", + " 11 NaN FRA 2005 2015 4.364500\n", + " 12 NaN CAN 1998 2016 21351.509838\n", + " 13 NaN FRA 2005 2016 4.364500\n", + " 14 NaN CAN 1998 2017 21351.509838\n", + " 15 NaN FRA 2005 2017 4.364500\n", + " 16 NaN CAN 1998 2018 21352.479058\n", + " 17 NaN FRA 2005 2018 4.364500\n", + " 18 NaN CAN 1998 2019 21352.635796\n", + " 19 NaN FRA 2005 2019 4.364500\n", + " 20 NaN CAN 1998 2020 22391.736533\n", + " 21 NaN FRA 2005 2020 4.364500\n", + " 22 NaN CAN 1998 2021 22391.736533\n", + " 23 NaN FRA 2005 2021 4.364500\n", + " 24 NaN CAN 1998 2022 22392.169209\n", + " 25 NaN FRA 2005 2022 4.364500\n", + " 26 NaN CAN 1998 2023 22457.128159\n", + " 27 NaN FRA 2005 2023 4.364500\n", + " 28 NaN CAN 1998 2024 22457.128159\n", + " 29 NaN FRA 2005 2024 4.364500,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 DNK 1988 2010 368.278009\n", + " 1 NaN DNK 1988 2011 368.278009\n", + " 2 NaN DNK 1988 2012 368.278009\n", + " 3 NaN DNK 1988 2013 368.278009\n", + " 4 NaN DNK 1988 2014 368.278009\n", + " 5 NaN DNK 1988 2015 368.278009\n", + " 6 NaN DNK 1988 2016 368.278009\n", + " 7 NaN DNK 1988 2017 368.278009\n", + " 8 NaN DNK 1988 2018 368.278009\n", + " 9 NaN DNK 1988 2019 368.278009\n", + " 10 NaN DNK 1988 2020 368.278009\n", + " 11 NaN DNK 1988 2021 368.278009\n", + " 12 NaN DNK 1988 2022 368.278009\n", + " 13 NaN DNK 1988 2023 368.278009\n", + " 14 NaN DNK 1988 2024 368.278009,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 DNK 1970 2010 7572.671289\n", + " 1 NaN DNK 1970 2011 7572.671289\n", + " 2 NaN DNK 1970 2012 7572.671289\n", + " 3 NaN DNK 1970 2013 7572.671289\n", + " 4 NaN DNK 1970 2014 7572.671289\n", + " 5 NaN DNK 1970 2015 7572.671289\n", + " 6 NaN DNK 1970 2016 7572.671289\n", + " 7 NaN DNK 1970 2017 7572.671289\n", + " 8 NaN DNK 1970 2018 7572.671289\n", + " 9 NaN DNK 1970 2019 7572.671289\n", + " 10 NaN DNK 1970 2020 7572.671289\n", + " 11 NaN DNK 1970 2021 7572.671289\n", + " 12 NaN DNK 1970 2022 7572.671289\n", + " 13 NaN DNK 1970 2023 7572.671289\n", + " 14 NaN DNK 1970 2024 7572.671289,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 DNK 1977 2010 51390.814326\n", + " 1 NaN DNK 1977 2011 51390.814326\n", + " 2 NaN DNK 1977 2012 51390.814326\n", + " 3 NaN DNK 1977 2013 51390.814326\n", + " 4 NaN DNK 1977 2014 51390.814326\n", + " 5 NaN DNK 1977 2015 51390.814326\n", + " 6 NaN DNK 1977 2016 51390.814326\n", + " 7 NaN DNK 1977 2017 51390.814326\n", + " 8 NaN DNK 1977 2018 51390.814326\n", + " 9 NaN DNK 1977 2019 51390.814326\n", + " 10 NaN DNK 1977 2020 51390.814326\n", + " 11 NaN DNK 1977 2021 51390.814326\n", + " 12 NaN DNK 1977 2022 51390.814326\n", + " 13 NaN DNK 1977 2023 51390.814326\n", + " 14 NaN DNK 1977 2024 51390.814326,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 BRA 1999 2010 7308.689207\n", + " 1 NaN BRA 1999 2011 7588.378794\n", + " 2 NaN BRA 1999 2012 7629.962956\n", + " 3 NaN BRA 1999 2013 7632.267048\n", + " 4 NaN BRA 1999 2014 7739.536959\n", + " 5 NaN BRA 1999 2015 7739.836833\n", + " 6 NaN BRA 1999 2016 8229.382590\n", + " 7 NaN BRA 1999 2017 8236.612549\n", + " 8 NaN BRA 1999 2018 8253.837611\n", + " 9 NaN BRA 1999 2019 8270.433110\n", + " 10 NaN BRA 1999 2020 8272.185894\n", + " 11 NaN BRA 1999 2021 8332.895358\n", + " 12 NaN BRA 1999 2022 8414.983340\n", + " 13 NaN BRA 1999 2023 8416.527660\n", + " 14 NaN BRA 1999 2024 8416.527660,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 BRA 2000 2010 79111.646070\n", + " 1 NaN BRA 2000 2011 82724.212515\n", + " 2 NaN BRA 2000 2012 83223.882373\n", + " 3 NaN BRA 2000 2013 83887.863975\n", + " 4 NaN BRA 2000 2014 84570.804131\n", + " 5 NaN BRA 2000 2015 84623.983202\n", + " 6 NaN BRA 2000 2016 84750.121449\n", + " 7 NaN BRA 2000 2017 85006.424797\n", + " 8 NaN BRA 2000 2018 86101.206149\n", + " 9 NaN BRA 2000 2019 86166.257289\n", + " 10 NaN BRA 2000 2020 86348.912075\n", + " 11 NaN BRA 2000 2021 86532.740208\n", + " 12 NaN BRA 2000 2022 86641.232447\n", + " 13 NaN BRA 2000 2023 86780.541076\n", + " 14 NaN BRA 2000 2024 86798.292750,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 BRA 2002 2010 142167.390400\n", + " 1 NaN BRA 2002 2011 142227.473613\n", + " 2 NaN BRA 2002 2012 142240.252264\n", + " 3 NaN BRA 2002 2013 142761.545144\n", + " 4 NaN BRA 2002 2014 143118.829516\n", + " 5 NaN BRA 2002 2015 151741.030463\n", + " 6 NaN BRA 2002 2016 151743.259630\n", + " 7 NaN BRA 2002 2017 151957.948670\n", + " 8 NaN BRA 2002 2018 159762.314541\n", + " 9 NaN BRA 2002 2019 159778.609491\n", + " 10 NaN BRA 2002 2020 159779.150415\n", + " 11 NaN BRA 2002 2021 159814.498165\n", + " 12 NaN BRA 2002 2022 159821.429600\n", + " 13 NaN BRA 2002 2023 159852.691292\n", + " 14 NaN BRA 2002 2024 159926.920312,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 BRA 2002 2010 122267.399284\n", + " 1 NaN BRA 2002 2011 122371.893905\n", + " 2 NaN BRA 2002 2012 122371.906305\n", + " 3 NaN BRA 2002 2013 122373.128812\n", + " 4 NaN BRA 2002 2014 122837.842616\n", + " 5 NaN BRA 2002 2015 122837.842616\n", + " 6 NaN BRA 2002 2016 122908.838461\n", + " 7 NaN BRA 2002 2017 123132.388855\n", + " 8 NaN BRA 2002 2018 126380.727864\n", + " 9 NaN BRA 2002 2019 126454.900372\n", + " 10 NaN BRA 2002 2020 126454.900372\n", + " 11 NaN BRA 2002 2021 126491.927021\n", + " 12 NaN BRA 2002 2022 126493.258193\n", + " 13 NaN BRA 2002 2023 126493.258193\n", + " 14 NaN BRA 2002 2024 126564.937110,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 DNK 2005 2010 973.311882\n", + " 1 NaN DNK 2005 2011 973.311882\n", + " 2 NaN DNK 2005 2012 973.311882\n", + " 3 NaN DNK 2005 2013 973.311882\n", + " 4 NaN DNK 2005 2014 973.311882\n", + " 5 NaN DNK 2005 2015 973.311882\n", + " 6 NaN DNK 2005 2016 973.311882\n", + " 7 NaN DNK 2005 2017 973.311882\n", + " 8 NaN DNK 2005 2018 973.311882\n", + " 9 NaN DNK 2005 2019 973.311882\n", + " 10 NaN DNK 2005 2020 973.311882\n", + " 11 NaN DNK 2005 2021 973.311882\n", + " 12 NaN DNK 2005 2022 973.311882\n", + " 13 NaN DNK 2005 2023 973.311882\n", + " 14 NaN DNK 2005 2024 973.311882,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 DNK 1988 2010 56084.601152\n", + " 1 NaN DNK 1988 2011 56084.601152\n", + " 2 NaN DNK 1988 2012 56084.601152\n", + " 3 NaN DNK 1988 2013 56084.601152\n", + " 4 NaN DNK 1988 2014 56084.601152\n", + " 5 NaN DNK 1988 2015 56084.601152\n", + " 6 NaN DNK 1988 2016 56084.601152\n", + " 7 NaN DNK 1988 2017 56084.601152\n", + " 8 NaN DNK 1988 2018 56084.601152\n", + " 9 NaN DNK 1988 2019 56084.601152\n", + " 10 NaN DNK 1988 2020 56084.601152\n", + " 11 NaN DNK 1988 2021 56084.601152\n", + " 12 NaN DNK 1988 2022 56084.601152\n", + " 13 NaN DNK 1988 2023 56084.601152\n", + " 14 NaN DNK 1988 2024 56084.601152,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 DNK 1974 2010 140679.761296\n", + " 1 NaN DNK 1974 2011 140679.761296\n", + " 2 NaN DNK 1974 2012 140679.761296\n", + " 3 NaN DNK 1974 2013 140679.761296\n", + " 4 NaN DNK 1974 2014 140679.761296\n", + " 5 NaN DNK 1974 2015 140679.761296\n", + " 6 NaN DNK 1974 2016 140679.761296\n", + " 7 NaN DNK 1974 2017 140679.761296\n", + " 8 NaN DNK 1974 2018 140679.761296\n", + " 9 NaN DNK 1974 2019 140679.761296\n", + " 10 NaN DNK 1974 2020 140679.761296\n", + " 11 NaN DNK 1974 2021 140679.761296\n", + " 12 NaN DNK 1974 2022 140679.761296\n", + " 13 NaN DNK 1974 2023 140679.761296\n", + " 14 NaN DNK 1974 2024 140679.761296,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 BRA 2003 2010 3243.523245\n", + " 1 NaN BRA 2003 2011 3250.063559\n", + " 2 NaN BRA 2003 2012 3268.287821\n", + " 3 NaN BRA 2003 2013 3270.080391\n", + " 4 NaN BRA 2003 2014 3275.970109\n", + " 5 NaN BRA 2003 2015 3276.028299\n", + " 6 NaN BRA 2003 2016 3277.592094\n", + " 7 NaN BRA 2003 2017 3291.503146\n", + " 8 NaN BRA 2003 2018 3292.040641\n", + " 9 NaN BRA 2003 2019 3292.040641\n", + " 10 NaN BRA 2003 2020 3292.040641\n", + " 11 NaN BRA 2003 2021 3292.040641\n", + " 12 NaN BRA 2003 2022 3292.040641\n", + " 13 NaN BRA 2003 2023 3293.266054\n", + " 14 NaN BRA 2003 2024 3293.266054,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 BRA 1999 2010 23556.275267\n", + " 1 NaN BRA 1999 2011 23985.621936\n", + " 2 NaN BRA 1999 2012 24110.510949\n", + " 3 NaN BRA 1999 2013 24110.956074\n", + " 4 NaN BRA 1999 2014 24230.864011\n", + " 5 NaN BRA 1999 2015 24236.870203\n", + " 6 NaN BRA 1999 2016 26528.556919\n", + " 7 NaN BRA 1999 2017 26554.997282\n", + " 8 NaN BRA 1999 2018 27790.447811\n", + " 9 NaN BRA 1999 2019 28014.006736\n", + " 10 NaN BRA 1999 2020 28850.800586\n", + " 11 NaN BRA 1999 2021 28876.914196\n", + " 12 NaN BRA 1999 2022 29950.800075\n", + " 13 NaN BRA 1999 2023 30565.432117\n", + " 14 NaN BRA 1999 2024 30565.432117,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 BRA 2010 2010 1030.149650\n", + " 1 NaN BRA 2010 2011 1030.226205\n", + " 2 NaN BRA 2010 2012 1117.487104\n", + " 3 NaN BRA 2010 2013 1122.220821\n", + " 4 NaN BRA 2010 2014 1152.457136\n", + " 5 NaN BRA 2010 2015 1152.457136\n", + " 6 NaN BRA 2010 2016 1152.457136\n", + " 7 NaN BRA 2010 2017 1152.457136\n", + " 8 NaN BRA 2010 2018 1300.423844\n", + " 9 NaN BRA 2010 2019 1389.476191\n", + " 10 NaN BRA 2010 2020 1391.515248\n", + " 11 NaN BRA 2010 2021 1439.408808\n", + " 12 NaN BRA 2010 2022 1439.408808\n", + " 13 NaN BRA 2010 2023 1439.408808\n", + " 14 NaN BRA 2010 2024 1439.408808,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 PRT 1990 2010 44.582274\n", + " 1 NaN PRT 1990 2011 66.979948\n", + " 2 NaN PRT 1990 2012 66.979948\n", + " 3 NaN PRT 1990 2013 66.979948\n", + " 4 NaN PRT 1990 2014 66.979948\n", + " 5 NaN PRT 1990 2015 66.979948\n", + " 6 NaN PRT 1990 2016 66.979948\n", + " 7 NaN PRT 1990 2017 66.979948\n", + " 8 NaN PRT 1990 2018 66.979948\n", + " 9 NaN PRT 1990 2019 66.979948\n", + " 10 NaN PRT 1990 2020 66.979948\n", + " 11 NaN PRT 1990 2021 66.979948\n", + " 12 NaN PRT 1990 2022 66.979948\n", + " 13 NaN PRT 1990 2023 66.979948\n", + " 14 NaN PRT 1990 2024 66.979948,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 DNK 1974 2010 130972.108319\n", + " 1 NaN DNK 1974 2011 130972.108319\n", + " 2 NaN DNK 1974 2012 130972.108319\n", + " 3 NaN DNK 1974 2013 130972.108319\n", + " 4 NaN DNK 1974 2014 130972.108319\n", + " 5 NaN DNK 1974 2015 130972.108319\n", + " 6 NaN DNK 1974 2016 130972.108319\n", + " 7 NaN DNK 1974 2017 130972.108319\n", + " 8 NaN DNK 1974 2018 130972.108319\n", + " 9 NaN DNK 1974 2019 130972.108319\n", + " 10 NaN DNK 1974 2020 130972.108319\n", + " 11 NaN DNK 1974 2021 130972.108319\n", + " 12 NaN DNK 1974 2022 130972.108319\n", + " 13 NaN DNK 1974 2023 130972.108319\n", + " 14 NaN DNK 1974 2024 130972.108319,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 DNK 1974 2010 201056.561452\n", + " 1 NaN DNK 1974 2011 201056.561452\n", + " 2 NaN DNK 1974 2012 201056.561452\n", + " 3 NaN DNK 1974 2013 201056.561452\n", + " 4 NaN DNK 1974 2014 201056.561452\n", + " 5 NaN DNK 1974 2015 201056.561452\n", + " 6 NaN DNK 1974 2016 201056.561452\n", + " 7 NaN DNK 1974 2017 201056.561452\n", + " 8 NaN DNK 1974 2018 201056.561452\n", + " 9 NaN DNK 1974 2019 201056.561452\n", + " 10 NaN DNK 1974 2020 201056.561452\n", + " 11 NaN DNK 1974 2021 201056.561452\n", + " 12 NaN DNK 1974 2022 201056.561452\n", + " 13 NaN DNK 1974 2023 201056.561452\n", + " 14 NaN DNK 1974 2024 201056.561452,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CPV 2005 2010 648.113979\n", + " 1 NaN CPV 2005 2011 648.113979\n", + " 2 NaN CPV 2005 2012 648.113979\n", + " 3 NaN CPV 2005 2013 648.119083\n", + " 4 NaN CPV 2005 2014 695.570314\n", + " 5 NaN CPV 2005 2015 695.570314\n", + " 6 NaN CPV 2005 2016 695.570314\n", + " 7 NaN CPV 2005 2017 695.570314\n", + " 8 NaN CPV 2005 2018 695.570314\n", + " 9 NaN CPV 2005 2019 695.570314\n", + " 10 NaN CPV 2005 2020 695.570314\n", + " 11 NaN CPV 2005 2021 695.570314\n", + " 12 NaN CPV 2005 2022 695.570314\n", + " 13 NaN CPV 2005 2023 695.570314\n", + " 14 NaN CPV 2005 2024 695.570314,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 PRT 2009 2010 435.726728\n", + " 1 NaN PRT 2009 2011 488.416377\n", + " 2 NaN PRT 2009 2012 488.416377\n", + " 3 NaN PRT 2009 2013 488.420140\n", + " 4 NaN PRT 2009 2014 488.420140\n", + " 5 NaN PRT 2009 2015 488.420140\n", + " 6 NaN PRT 2009 2016 488.420140\n", + " 7 NaN PRT 2009 2017 488.420140\n", + " 8 NaN PRT 2009 2018 488.420140\n", + " 9 NaN PRT 2009 2019 488.420140\n", + " 10 NaN PRT 2009 2020 488.420140\n", + " 11 NaN PRT 2009 2021 488.420140\n", + " 12 NaN PRT 2009 2022 488.420140\n", + " 13 NaN PRT 2009 2023 488.420140\n", + " 14 NaN PRT 2009 2024 488.420140,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 ISL 1987 2010 669.449652\n", + " 1 NaN ISL 1987 2011 684.471268\n", + " 2 NaN ISL 1987 2012 684.716105\n", + " 3 NaN ISL 1987 2013 684.742701\n", + " 4 NaN ISL 1987 2014 685.503953\n", + " 5 NaN ISL 1987 2015 685.503953\n", + " 6 NaN ISL 1987 2016 686.203826\n", + " 7 NaN ISL 1987 2017 686.203826\n", + " 8 NaN ISL 1987 2018 686.203826\n", + " 9 NaN ISL 1987 2019 686.203826\n", + " 10 NaN ISL 1987 2020 688.646481\n", + " 11 NaN ISL 1987 2021 689.530726\n", + " 12 NaN ISL 1987 2022 690.094594\n", + " 13 NaN ISL 1987 2023 690.094594\n", + " 14 NaN ISL 1987 2024 690.094594,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 DNK 1988 2010 87723.065320\n", + " 1 1.0 ISL 2005 2010 1064.731303\n", + " 2 NaN DNK 1988 2011 89572.947566\n", + " 3 NaN ISL 2005 2011 1069.876875\n", + " 4 NaN DNK 1988 2012 89572.947566\n", + " 5 NaN ISL 2005 2012 1069.876875\n", + " 6 NaN DNK 1988 2013 89572.947566\n", + " 7 NaN ISL 2005 2013 1069.876875\n", + " 8 NaN DNK 1988 2014 89572.947566\n", + " 9 NaN ISL 2005 2014 1069.876875\n", + " 10 NaN DNK 1988 2015 89572.947566\n", + " 11 NaN ISL 2005 2015 1069.876875\n", + " 12 NaN DNK 1988 2016 89572.947566\n", + " 13 NaN ISL 2005 2016 1069.876875\n", + " 14 NaN DNK 1988 2017 89572.947566\n", + " 15 NaN ISL 2005 2017 1069.876875\n", + " 16 NaN DNK 1988 2018 89572.947566\n", + " 17 NaN ISL 2005 2018 1069.876875\n", + " 18 NaN DNK 1988 2019 89572.947566\n", + " 19 NaN ISL 2005 2019 1069.876875\n", + " 20 NaN DNK 1988 2020 89572.947566\n", + " 21 NaN ISL 2005 2020 1069.876875\n", + " 22 NaN DNK 1988 2021 89572.947566\n", + " 23 NaN ISL 2005 2021 1172.305564\n", + " 24 NaN DNK 1988 2022 89572.947566\n", + " 25 NaN ISL 2005 2022 1172.305564\n", + " 26 NaN DNK 1988 2023 89572.947566\n", + " 27 NaN ISL 2005 2023 1172.305564\n", + " 28 NaN DNK 1988 2024 89572.947566\n", + " 29 NaN ISL 2005 2024 1172.305564,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 DNK 1988 2010 169200.104879\n", + " 1 NaN DNK 1988 2011 169200.104879\n", + " 2 NaN DNK 1988 2012 169200.104879\n", + " 3 NaN DNK 1988 2013 169200.104879\n", + " 4 NaN DNK 1988 2014 169200.104879\n", + " 5 NaN DNK 1988 2015 169200.104879\n", + " 6 NaN DNK 1988 2016 169200.104879\n", + " 7 NaN DNK 1988 2017 169200.104879\n", + " 8 NaN DNK 1988 2018 169200.104879\n", + " 9 NaN DNK 1988 2019 169200.104879\n", + " 10 NaN DNK 1988 2020 169200.104879\n", + " 11 NaN DNK 1988 2021 169200.104879\n", + " 12 NaN DNK 1988 2022 169200.104879\n", + " 13 NaN DNK 1988 2023 169200.104879\n", + " 14 NaN DNK 1988 2024 169200.104879,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 GBR 2005 2010 10.270585\n", + " 1 NaN GBR 2005 2011 10.270585\n", + " 2 NaN GBR 2005 2012 10.270585\n", + " 3 NaN GBR 2005 2013 10.270585\n", + " 4 NaN GBR 2005 2014 19.120202\n", + " 5 NaN GBR 2005 2015 19.120202\n", + " 6 NaN GBR 2005 2016 19.120202\n", + " 7 NaN GBR 2005 2017 19.120202\n", + " 8 NaN GBR 2005 2018 19.120202\n", + " 9 NaN GBR 2005 2019 19.120202\n", + " 10 NaN GBR 2005 2020 19.120202\n", + " 11 NaN GBR 2005 2021 19.120202\n", + " 12 NaN GBR 2005 2022 19.120202\n", + " 13 NaN GBR 2005 2023 19.120202\n", + " 14 NaN GBR 2005 2024 19.120202,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 GIN 1943 2010 36426.908715\n", + " 1 1.0 GMB 2007 2010 3111.720409\n", + " 2 2.0 GNB 0 2010 8650.923931\n", + " 3 3.0 LBR 2003 2010 4578.115920\n", + " 4 4.0 MLI 2004 2010 3262.757855\n", + " .. ... ... ... ... ...\n", + " 100 NaN GNB 0 2024 9568.348737\n", + " 101 NaN LBR 2003 2024 4578.115920\n", + " 102 NaN MLI 2004 2024 10009.502293\n", + " 103 NaN SEN 0 2024 29301.101927\n", + " 104 NaN SLE 2003 2024 14477.911877\n", + " \n", + " [105 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 MLI 1959 2010 82.257427\n", + " 1 1.0 MRT 0 2010 6421.135262\n", + " 2 2.0 SEN 0 2010 27648.719556\n", + " 3 NaN MLI 1959 2011 82.257427\n", + " 4 NaN MRT 0 2011 6421.135262\n", + " 5 NaN SEN 0 2011 27651.340294\n", + " 6 NaN MLI 1959 2012 82.257427\n", + " 7 NaN MRT 0 2012 6421.135262\n", + " 8 NaN SEN 0 2012 27651.340294\n", + " 9 NaN MLI 1959 2013 260.376655\n", + " 10 NaN MRT 0 2013 6421.135262\n", + " 11 NaN SEN 0 2013 27651.340294\n", + " 12 NaN MLI 1959 2014 260.376655\n", + " 13 NaN MRT 0 2014 6421.135262\n", + " 14 NaN SEN 0 2014 27651.340294\n", + " 15 NaN MLI 1959 2015 260.376655\n", + " 16 NaN MRT 0 2015 6421.135262\n", + " 17 NaN SEN 0 2015 27651.340294\n", + " 18 NaN MLI 1959 2016 260.376655\n", + " 19 NaN MRT 0 2016 6421.135262\n", + " 20 NaN SEN 0 2016 27651.340294\n", + " 21 NaN MLI 1959 2017 260.376655\n", + " 22 NaN MRT 0 2017 6421.135262\n", + " 23 NaN SEN 0 2017 27651.340294\n", + " 24 NaN MLI 1959 2018 260.376655\n", + " 25 NaN MRT 0 2018 6421.135262\n", + " 26 NaN SEN 0 2018 27651.340294\n", + " 27 NaN MLI 1959 2019 260.376655\n", + " 28 NaN MRT 0 2019 6421.135262\n", + " 29 NaN SEN 0 2019 27657.844935\n", + " 30 NaN MLI 1959 2020 260.376655\n", + " 31 NaN MRT 0 2020 6421.135262\n", + " 32 NaN SEN 0 2020 27657.844935\n", + " 33 NaN MLI 1959 2021 260.376655\n", + " 34 NaN MRT 0 2021 6421.135262\n", + " 35 NaN SEN 0 2021 27657.844935\n", + " 36 NaN MLI 1959 2022 260.376655\n", + " 37 NaN MRT 0 2022 6421.135262\n", + " 38 NaN SEN 0 2022 27657.844935\n", + " 39 NaN MLI 1959 2023 260.376655\n", + " 40 NaN MRT 0 2023 6421.135262\n", + " 41 NaN SEN 0 2023 27657.844935\n", + " 42 NaN MLI 1959 2024 260.376655\n", + " 43 NaN MRT 0 2024 6421.135262\n", + " 44 NaN SEN 0 2024 27657.844935,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 ESH 2006 2010 1549.962889\n", + " 1 1.0 ESP 2010 2010 3516.241211\n", + " 2 2.0 MAR 2006 2010 1781.387109\n", + " 3 3.0 PRT 1990 2010 476.210864\n", + " 4 NaN ESH 2006 2011 1549.962889\n", + " 5 NaN ESP 2010 2011 3520.974818\n", + " 6 NaN MAR 2006 2011 1781.387109\n", + " 7 NaN PRT 1990 2011 476.210864\n", + " 8 NaN ESH 2006 2012 1549.962889\n", + " 9 NaN ESP 2010 2012 3520.974818\n", + " 10 NaN MAR 2006 2012 1781.387109\n", + " 11 NaN PRT 1990 2012 476.210864\n", + " 12 NaN ESH 2006 2013 1549.962889\n", + " 13 NaN ESP 2010 2013 3520.974818\n", + " 14 NaN MAR 2006 2013 1781.387109\n", + " 15 NaN PRT 1990 2013 476.210864\n", + " 16 NaN ESH 2006 2014 1549.962889\n", + " 17 NaN ESP 2010 2014 3520.974818\n", + " 18 NaN MAR 2006 2014 1781.387109\n", + " 19 NaN PRT 1990 2014 476.210864\n", + " 20 NaN ESH 2006 2015 1549.962889\n", + " 21 NaN ESP 2010 2015 3520.974818\n", + " 22 NaN MAR 2006 2015 1781.387109\n", + " 23 NaN PRT 1990 2015 476.210864\n", + " 24 NaN ESH 2006 2016 1549.962889\n", + " 25 NaN ESP 2010 2016 3520.974818\n", + " 26 NaN MAR 2006 2016 1781.387109\n", + " 27 NaN PRT 1990 2016 476.210864\n", + " 28 NaN ESH 2006 2017 1549.962889\n", + " 29 NaN ESP 2010 2017 3520.974818\n", + " 30 NaN MAR 2006 2017 1781.387109\n", + " 31 NaN PRT 1990 2017 476.430774\n", + " 32 NaN ESH 2006 2018 1549.962889\n", + " 33 NaN ESP 2010 2018 3520.974818\n", + " 34 NaN MAR 2006 2018 1781.387109\n", + " 35 NaN PRT 1990 2018 478.271580\n", + " 36 NaN ESH 2006 2019 1549.962889\n", + " 37 NaN ESP 2010 2019 3520.974818\n", + " 38 NaN MAR 2006 2019 1781.387109\n", + " 39 NaN PRT 1990 2019 478.271580\n", + " 40 NaN ESH 2006 2020 1549.962889\n", + " 41 NaN ESP 2010 2020 3520.974818\n", + " 42 NaN MAR 2006 2020 1781.387109\n", + " 43 NaN PRT 1990 2020 478.271580\n", + " 44 NaN ESH 2006 2021 1549.962889\n", + " 45 NaN ESP 2010 2021 3520.974818\n", + " 46 NaN MAR 2006 2021 1786.382420\n", + " 47 NaN PRT 1990 2021 478.994319\n", + " 48 NaN ESH 2006 2022 1549.962889\n", + " 49 NaN ESP 2010 2022 3695.081593\n", + " 50 NaN MAR 2006 2022 1786.382420\n", + " 51 NaN PRT 1990 2022 478.994319\n", + " 52 NaN ESH 2006 2023 1549.962889\n", + " 53 NaN ESP 2010 2023 3695.081593\n", + " 54 NaN MAR 2006 2023 3681.782587\n", + " 55 NaN PRT 1990 2023 478.994319\n", + " 56 NaN ESH 2006 2024 1549.962889\n", + " 57 NaN ESP 2010 2024 3695.081593\n", + " 58 NaN MAR 2006 2024 3681.782587\n", + " 59 NaN PRT 1990 2024 478.994319,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 IRL 2010 2010 446.809554\n", + " 1 NaN IRL 2010 2011 450.762615\n", + " 2 NaN IRL 2010 2012 450.762615\n", + " 3 NaN IRL 2010 2013 450.762615\n", + " 4 NaN IRL 2010 2014 450.762615\n", + " 5 NaN IRL 2010 2015 450.762615\n", + " 6 NaN IRL 2010 2016 451.207233\n", + " 7 NaN IRL 2010 2017 456.480300\n", + " 8 NaN IRL 2010 2018 460.004572\n", + " 9 NaN IRL 2010 2019 488.765545\n", + " 10 NaN IRL 2010 2020 488.765545\n", + " 11 NaN IRL 2010 2021 516.104879\n", + " 12 NaN IRL 2010 2022 548.795009\n", + " 13 NaN IRL 2010 2023 548.795009\n", + " 14 NaN IRL 2010 2024 548.795009,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 ISL 1978 2010 8835.691519\n", + " 1 NaN ISL 1978 2011 8835.691519\n", + " 2 NaN ISL 1978 2012 8835.691519\n", + " 3 NaN ISL 1978 2013 8835.691519\n", + " 4 NaN ISL 1978 2014 8835.691519\n", + " 5 NaN ISL 1978 2015 8835.691519\n", + " 6 NaN ISL 1978 2016 8835.691519\n", + " 7 NaN ISL 1978 2017 8835.691519\n", + " 8 NaN ISL 1978 2018 8835.691519\n", + " 9 NaN ISL 1978 2019 8836.203272\n", + " 10 NaN ISL 1978 2020 8987.662639\n", + " 11 NaN ISL 1978 2021 8987.662639\n", + " 12 NaN ISL 1978 2022 8987.662639\n", + " 13 NaN ISL 1978 2023 8987.662639\n", + " 14 NaN ISL 1978 2024 8987.662639,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 DNK 1974 2010 969.593126\n", + " 1 1.0 ISL 1977 2010 9535.973707\n", + " 2 NaN DNK 1974 2011 969.593126\n", + " 3 NaN ISL 1977 2011 9541.111943\n", + " 4 NaN DNK 1974 2012 969.593126\n", + " 5 NaN ISL 1977 2012 9551.691016\n", + " 6 NaN DNK 1974 2013 969.593126\n", + " 7 NaN ISL 1977 2013 9569.927072\n", + " 8 NaN DNK 1974 2014 969.593126\n", + " 9 NaN ISL 1977 2014 9569.927072\n", + " 10 NaN DNK 1974 2015 969.593126\n", + " 11 NaN ISL 1977 2015 9569.927072\n", + " 12 NaN DNK 1974 2016 969.593126\n", + " 13 NaN ISL 1977 2016 9643.789879\n", + " 14 NaN DNK 1974 2017 969.593126\n", + " 15 NaN ISL 1977 2017 9643.789879\n", + " 16 NaN DNK 1974 2018 969.593126\n", + " 17 NaN ISL 1977 2018 9643.789879\n", + " 18 NaN DNK 1974 2019 969.593126\n", + " 19 NaN ISL 1977 2019 9645.274382\n", + " 20 NaN DNK 1974 2020 969.593126\n", + " 21 NaN ISL 1977 2020 9893.281664\n", + " 22 NaN DNK 1974 2021 969.593126\n", + " 23 NaN ISL 1977 2021 10003.725732\n", + " 24 NaN DNK 1974 2022 969.593126\n", + " 25 NaN ISL 1977 2022 10004.459238\n", + " 26 NaN DNK 1974 2023 969.593126\n", + " 27 NaN ISL 1977 2023 10004.459238\n", + " 28 NaN DNK 1974 2024 969.593126\n", + " 29 NaN ISL 1977 2024 10004.459238,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 DNK 1988 2010 23756.7598\n", + " 1 NaN DNK 1988 2011 23756.7598\n", + " 2 NaN DNK 1988 2012 23756.7598\n", + " 3 NaN DNK 1988 2013 23756.7598\n", + " 4 NaN DNK 1988 2014 23756.7598\n", + " 5 NaN DNK 1988 2015 23756.7598\n", + " 6 NaN DNK 1988 2016 23756.7598\n", + " 7 NaN DNK 1988 2017 23756.7598\n", + " 8 NaN DNK 1988 2018 23756.7598\n", + " 9 NaN DNK 1988 2019 23756.7598\n", + " 10 NaN DNK 1988 2020 23756.7598\n", + " 11 NaN DNK 1988 2021 23756.7598\n", + " 12 NaN DNK 1988 2022 23756.7598\n", + " 13 NaN DNK 1988 2023 23756.7598\n", + " 14 NaN DNK 1988 2024 23756.7598,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 BFA 1955 2010 26491.289607\n", + " 1 1.0 CIV 0 2010 91168.929578\n", + " 2 2.0 GHA 1950 2010 37708.650961\n", + " 3 3.0 GIN 1956 2010 35862.787141\n", + " 4 4.0 LBR 2003 2010 9050.002769\n", + " .. ... ... ... ... ...\n", + " 115 NaN GIN 1956 2024 36625.636719\n", + " 116 NaN LBR 2003 2024 9734.910180\n", + " 117 NaN MLI 1985 2024 54204.792569\n", + " 118 NaN NER 1970 2024 9482.386227\n", + " 119 NaN TGO 1954 2024 115.738921\n", + " \n", + " [120 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 BFA 1970 2010 6164.623244\n", + " 1 1.0 MLI 1989 2010 49936.198741\n", + " 2 2.0 MRT 1948 2010 62.567165\n", + " 3 3.0 NER 1970 2010 6164.623244\n", + " 4 NaN BFA 1970 2011 6164.623244\n", + " 5 NaN MLI 1989 2011 49936.198741\n", + " 6 NaN MRT 1948 2011 62.567165\n", + " 7 NaN NER 1970 2011 6164.623244\n", + " 8 NaN BFA 1970 2012 6164.623244\n", + " 9 NaN MLI 1989 2012 49936.198741\n", + " 10 NaN MRT 1948 2012 62.567165\n", + " 11 NaN NER 1970 2012 6164.623244\n", + " 12 NaN BFA 1970 2013 6164.623244\n", + " 13 NaN MLI 1989 2013 49936.198741\n", + " 14 NaN MRT 1948 2013 62.567165\n", + " 15 NaN NER 1970 2013 6164.623244\n", + " 16 NaN BFA 1970 2014 6164.623244\n", + " 17 NaN MLI 1989 2014 49936.198741\n", + " 18 NaN MRT 1948 2014 62.567165\n", + " 19 NaN NER 1970 2014 6164.623244\n", + " 20 NaN BFA 1970 2015 6164.623244\n", + " 21 NaN MLI 1989 2015 49936.198741\n", + " 22 NaN MRT 1948 2015 62.567165\n", + " 23 NaN NER 1970 2015 6164.623244\n", + " 24 NaN BFA 1970 2016 6164.623244\n", + " 25 NaN MLI 1989 2016 49936.198741\n", + " 26 NaN MRT 1948 2016 62.567165\n", + " 27 NaN NER 1970 2016 6164.623244\n", + " 28 NaN BFA 1970 2017 6164.623244\n", + " 29 NaN MLI 1989 2017 49936.198741\n", + " 30 NaN MRT 1948 2017 62.567165\n", + " 31 NaN NER 1970 2017 6164.623244\n", + " 32 NaN BFA 1970 2018 6164.623244\n", + " 33 NaN MLI 1989 2018 49936.198741\n", + " 34 NaN MRT 1948 2018 62.567165\n", + " 35 NaN NER 1970 2018 6164.623244\n", + " 36 NaN BFA 1970 2019 6164.623244\n", + " 37 NaN MLI 1989 2019 49936.198741\n", + " 38 NaN MRT 1948 2019 62.567165\n", + " 39 NaN NER 1970 2019 6164.623244\n", + " 40 NaN BFA 1970 2020 6164.623244\n", + " 41 NaN MLI 1989 2020 49936.198741\n", + " 42 NaN MRT 1948 2020 62.567165\n", + " 43 NaN NER 1970 2020 6164.623244\n", + " 44 NaN BFA 1970 2021 6164.623244\n", + " 45 NaN MLI 1989 2021 49936.198741\n", + " 46 NaN MRT 1948 2021 62.567165\n", + " 47 NaN NER 1970 2021 6164.623244\n", + " 48 NaN BFA 1970 2022 6164.623244\n", + " 49 NaN MLI 1989 2022 49936.198741\n", + " 50 NaN MRT 1948 2022 62.567165\n", + " 51 NaN NER 1970 2022 6164.623244\n", + " 52 NaN BFA 1970 2023 6164.623244\n", + " 53 NaN MLI 1989 2023 49936.198741\n", + " 54 NaN MRT 1948 2023 62.567165\n", + " 55 NaN NER 1970 2023 6164.623244\n", + " 56 NaN BFA 1970 2024 6164.623244\n", + " 57 NaN MLI 1989 2024 49936.198741\n", + " 58 NaN MRT 1948 2024 62.567165\n", + " 59 NaN NER 1970 2024 6164.623244,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 DZA 2001 2010 5111.986464\n", + " 1 1.0 MAR 1991 2010 6257.028756\n", + " 2 NaN DZA 2001 2011 5111.986464\n", + " 3 NaN MAR 1991 2011 6257.028756\n", + " 4 NaN DZA 2001 2012 5111.986464\n", + " 5 NaN MAR 1991 2012 6257.028756\n", + " 6 NaN DZA 2001 2013 5111.986464\n", + " 7 NaN MAR 1991 2013 6257.028756\n", + " 8 NaN DZA 2001 2014 5111.986464\n", + " 9 NaN MAR 1991 2014 6257.028756\n", + " 10 NaN DZA 2001 2015 5111.986464\n", + " 11 NaN MAR 1991 2015 6257.028756\n", + " 12 NaN DZA 2001 2016 5111.986464\n", + " 13 NaN MAR 1991 2016 6257.028756\n", + " 14 NaN DZA 2001 2017 5111.986464\n", + " 15 NaN MAR 1991 2017 6257.028756\n", + " 16 NaN DZA 2001 2018 5111.986464\n", + " 17 NaN MAR 1991 2018 6261.670954\n", + " 18 NaN DZA 2001 2019 5111.986464\n", + " 19 NaN MAR 1991 2019 6367.316646\n", + " 20 NaN DZA 2001 2020 5111.986464\n", + " 21 NaN MAR 1991 2020 6367.316646\n", + " 22 NaN DZA 2001 2021 5111.986464\n", + " 23 NaN MAR 1991 2021 6392.619863\n", + " 24 NaN DZA 2001 2022 5111.986464\n", + " 25 NaN MAR 1991 2022 6392.619863\n", + " 26 NaN DZA 2001 2023 6899.445097\n", + " 27 NaN MAR 1991 2023 12399.546314\n", + " 28 NaN DZA 2001 2024 6899.445097\n", + " 29 NaN MAR 1991 2024 12399.546314,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 ALB 2007 2010 8.843091\n", + " 1 1.0 AUT 2007 2010 8.843091\n", + " 2 2.0 BEL 2007 2010 8.843091\n", + " 3 3.0 BGR 2007 2010 8.843091\n", + " 4 4.0 BIH 2007 2010 8.843091\n", + " .. ... ... ... ... ...\n", + " 310 NaN PRT 1989 2024 26013.120229\n", + " 311 NaN ROU 2007 2024 8.843091\n", + " 312 NaN SVK 2007 2024 8.843091\n", + " 313 NaN SVN 2007 2024 8.843091\n", + " 314 NaN UKR 2007 2024 8.843091\n", + " \n", + " [315 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 FRA 1969 2010 16.611946\n", + " 1 NaN FRA 1969 2011 16.611946\n", + " 2 NaN FRA 1969 2012 16.611946\n", + " 3 NaN FRA 1969 2013 16.611946\n", + " 4 NaN FRA 1969 2014 16.611946\n", + " 5 NaN FRA 1969 2015 16.611946\n", + " 6 NaN FRA 1969 2016 16.611946\n", + " 7 NaN FRA 1969 2017 16.611946\n", + " 8 NaN FRA 1969 2018 16.611946\n", + " 9 NaN FRA 1969 2019 16.611946\n", + " 10 NaN FRA 1969 2020 16.611946\n", + " 11 NaN FRA 1969 2021 16.616763\n", + " 12 NaN FRA 1969 2022 16.616763\n", + " 13 NaN FRA 1969 2023 16.616763\n", + " 14 NaN FRA 1969 2024 16.616763,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 GBR 1967 2010 2489.866715\n", + " 1 1.0 IRL 1986 2010 7466.461131\n", + " 2 NaN GBR 1967 2011 2491.177033\n", + " 3 NaN IRL 1986 2011 7468.721056\n", + " 4 NaN GBR 1967 2012 2510.738850\n", + " 5 NaN IRL 1986 2012 7484.407413\n", + " 6 NaN GBR 1967 2013 2512.786880\n", + " 7 NaN IRL 1986 2013 7484.527732\n", + " 8 NaN GBR 1967 2014 2518.950529\n", + " 9 NaN IRL 1986 2014 7484.534879\n", + " 10 NaN GBR 1967 2015 2519.065034\n", + " 11 NaN IRL 1986 2015 7484.534879\n", + " 12 NaN GBR 1967 2016 2521.110082\n", + " 13 NaN IRL 1986 2016 7531.773722\n", + " 14 NaN GBR 1967 2017 2522.735274\n", + " 15 NaN IRL 1986 2017 7621.931236\n", + " 16 NaN GBR 1967 2018 2527.331200\n", + " 17 NaN IRL 1986 2018 7720.258503\n", + " 18 NaN GBR 1967 2019 2527.331202\n", + " 19 NaN IRL 1986 2019 7971.983620\n", + " 20 NaN GBR 1967 2020 2527.331202\n", + " 21 NaN IRL 1986 2020 8097.853806\n", + " 22 NaN GBR 1967 2021 2570.312794\n", + " 23 NaN IRL 1986 2021 8832.617551\n", + " 24 NaN GBR 1967 2022 2602.095654\n", + " 25 NaN IRL 1986 2022 9056.933250\n", + " 26 NaN GBR 1967 2023 2602.095654\n", + " 27 NaN IRL 1986 2023 9056.933250\n", + " 28 NaN GBR 1967 2024 2602.095654\n", + " 29 NaN IRL 1986 2024 9056.933250,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 FRA 2001 2010 12453.556762\n", + " 1 1.0 GBR 0 2010 20.822905\n", + " 2 NaN FRA 2001 2011 12539.410313\n", + " 3 NaN GBR 0 2011 20.823284\n", + " 4 NaN FRA 2001 2012 12686.462446\n", + " 5 NaN GBR 0 2012 20.823284\n", + " 6 NaN FRA 2001 2013 12705.236723\n", + " 7 NaN GBR 0 2013 20.823284\n", + " 8 NaN FRA 2001 2014 14634.188853\n", + " 9 NaN GBR 0 2014 20.823284\n", + " 10 NaN FRA 2001 2015 14740.741647\n", + " 11 NaN GBR 0 2015 22.642854\n", + " 12 NaN FRA 2001 2016 14855.025951\n", + " 13 NaN GBR 0 2016 22.642854\n", + " 14 NaN FRA 2001 2017 15045.455782\n", + " 15 NaN GBR 0 2017 22.642854\n", + " 16 NaN FRA 2001 2018 15141.730262\n", + " 17 NaN GBR 0 2018 22.647996\n", + " 18 NaN FRA 2001 2019 17142.864898\n", + " 19 NaN GBR 0 2019 22.647996\n", + " 20 NaN FRA 2001 2020 17145.566233\n", + " 21 NaN GBR 0 2020 22.647996\n", + " 22 NaN FRA 2001 2021 17466.477812\n", + " 23 NaN GBR 0 2021 22.647996\n", + " 24 NaN FRA 2001 2022 17468.195796\n", + " 25 NaN GBR 0 2022 22.647996\n", + " 26 NaN FRA 2001 2023 17468.239933\n", + " 27 NaN GBR 0 2023 22.647996\n", + " 28 NaN FRA 2001 2024 17468.239933\n", + " 29 NaN GBR 0 2024 22.647996,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 GBR 1986 2010 7126.701449\n", + " 1 NaN GBR 1986 2011 7129.148567\n", + " 2 NaN GBR 1986 2012 7131.397488\n", + " 3 NaN GBR 1986 2013 7135.405484\n", + " 4 NaN GBR 1986 2014 7136.231730\n", + " 5 NaN GBR 1986 2015 7136.747283\n", + " 6 NaN GBR 1986 2016 7136.976981\n", + " 7 NaN GBR 1986 2017 7147.177279\n", + " 8 NaN GBR 1986 2018 7147.789850\n", + " 9 NaN GBR 1986 2019 7149.468513\n", + " 10 NaN GBR 1986 2020 7149.477333\n", + " 11 NaN GBR 1986 2021 7149.477333\n", + " 12 NaN GBR 1986 2022 7161.821517\n", + " 13 NaN GBR 1986 2023 7162.114278\n", + " 14 NaN GBR 1986 2024 7162.114278,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 GBR 0 2010 7135.954134\n", + " 1 NaN GBR 0 2011 7435.814331\n", + " 2 NaN GBR 0 2012 7436.292469\n", + " 3 NaN GBR 0 2013 7438.810784\n", + " 4 NaN GBR 0 2014 7440.655445\n", + " 5 NaN GBR 0 2015 7440.756527\n", + " 6 NaN GBR 0 2016 7475.223759\n", + " 7 NaN GBR 0 2017 7475.421597\n", + " 8 NaN GBR 0 2018 7476.677921\n", + " 9 NaN GBR 0 2019 7479.243029\n", + " 10 NaN GBR 0 2020 7480.871340\n", + " 11 NaN GBR 0 2021 7481.236999\n", + " 12 NaN GBR 0 2022 7486.765518\n", + " 13 NaN GBR 0 2023 7486.884905\n", + " 14 NaN GBR 0 2024 7486.884905,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 FRA 2004 2010 32.365594\n", + " 1 1.0 GBR 2008 2010 9807.264271\n", + " 2 NaN FRA 2004 2011 32.424260\n", + " 3 NaN GBR 2008 2011 9808.816960\n", + " 4 NaN FRA 2004 2012 32.424260\n", + " 5 NaN GBR 2008 2012 9811.475065\n", + " 6 NaN FRA 2004 2013 32.424260\n", + " 7 NaN GBR 2008 2013 9812.589315\n", + " 8 NaN FRA 2004 2014 32.424260\n", + " 9 NaN GBR 2008 2014 9817.996447\n", + " 10 NaN FRA 2004 2015 32.424260\n", + " 11 NaN GBR 2008 2015 9818.002028\n", + " 12 NaN FRA 2004 2016 32.424260\n", + " 13 NaN GBR 2008 2016 9818.063012\n", + " 14 NaN FRA 2004 2017 32.491536\n", + " 15 NaN GBR 2008 2017 9818.575650\n", + " 16 NaN FRA 2004 2018 32.491536\n", + " 17 NaN GBR 2008 2018 9819.174867\n", + " 18 NaN FRA 2004 2019 32.491536\n", + " 19 NaN GBR 2008 2019 9819.318221\n", + " 20 NaN FRA 2004 2020 32.491536\n", + " 21 NaN GBR 2008 2020 9819.790826\n", + " 22 NaN FRA 2004 2021 32.491536\n", + " 23 NaN GBR 2008 2021 9842.099049\n", + " 24 NaN FRA 2004 2022 32.491536\n", + " 25 NaN GBR 2008 2022 9842.125333\n", + " 26 NaN FRA 2004 2023 32.499454\n", + " 27 NaN GBR 2008 2023 9842.288005\n", + " 28 NaN FRA 2004 2024 32.499454\n", + " 29 NaN GBR 2008 2024 9842.288005,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 GBR 1966 2010 7656.877875\n", + " 1 NaN GBR 1966 2011 7661.245942\n", + " 2 NaN GBR 1966 2012 7662.999767\n", + " 3 NaN GBR 1966 2013 7667.552266\n", + " 4 NaN GBR 1966 2014 7669.996366\n", + " 5 NaN GBR 1966 2015 7673.222999\n", + " 6 NaN GBR 1966 2016 7716.396239\n", + " 7 NaN GBR 1966 2017 7716.432746\n", + " 8 NaN GBR 1966 2018 7717.407357\n", + " 9 NaN GBR 1966 2019 7718.556282\n", + " 10 NaN GBR 1966 2020 7719.103489\n", + " 11 NaN GBR 1966 2021 7723.278630\n", + " 12 NaN GBR 1966 2022 7723.658468\n", + " 13 NaN GBR 1966 2023 7723.702280\n", + " 14 NaN GBR 1966 2024 7723.702280,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 GBR 0 2010 30181.555606\n", + " 1 1.0 IRL 2004 2010 790.298963\n", + " 2 NaN GBR 0 2011 30187.761969\n", + " 3 NaN IRL 2004 2011 797.281457\n", + " 4 NaN DNK None 2012 31.955601\n", + " 5 NaN GBR 0 2012 30189.580037\n", + " 6 NaN IRL 2004 2012 798.195858\n", + " 7 NaN DNK None 2013 31.955601\n", + " 8 NaN GBR 0 2013 30193.655107\n", + " 9 NaN IRL 2004 2013 798.195858\n", + " 10 NaN DNK None 2014 31.955601\n", + " 11 NaN GBR 0 2014 30195.194497\n", + " 12 NaN IRL 2004 2014 798.195858\n", + " 13 NaN DNK None 2015 31.955601\n", + " 14 NaN GBR 0 2015 30199.996169\n", + " 15 NaN IRL 2004 2015 798.195858\n", + " 16 NaN DNK None 2016 31.955601\n", + " 17 NaN GBR 0 2016 30200.615984\n", + " 18 NaN IRL 2004 2016 799.207440\n", + " 19 NaN DNK None 2017 31.955601\n", + " 20 NaN GBR 0 2017 30202.426416\n", + " 21 NaN IRL 2004 2017 799.666711\n", + " 22 NaN DNK None 2018 31.955601\n", + " 23 NaN GBR 0 2018 30204.023336\n", + " 24 NaN IRL 2004 2018 814.886712\n", + " 25 NaN DNK None 2019 31.955601\n", + " 26 NaN GBR 0 2019 30204.029924\n", + " 27 NaN IRL 2004 2019 852.220306\n", + " 28 NaN DNK None 2020 31.955601\n", + " 29 NaN GBR 0 2020 30204.035232\n", + " 30 NaN IRL 2004 2020 852.220306\n", + " 31 NaN DNK None 2021 31.955601\n", + " 32 NaN GBR 0 2021 30205.556711\n", + " 33 NaN IRL 2004 2021 937.738222\n", + " 34 NaN DNK None 2022 31.955601\n", + " 35 NaN GBR 0 2022 30209.474434\n", + " 36 NaN IRL 2004 2022 945.844296\n", + " 37 NaN DNK None 2023 31.955601\n", + " 38 NaN GBR 0 2023 30209.529537\n", + " 39 NaN IRL 2004 2023 945.844296\n", + " 40 NaN DNK None 2024 31.955601\n", + " 41 NaN GBR 0 2024 30209.529537\n", + " 42 NaN IRL 2004 2024 945.844296,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CMR 2003 2010 1645.166779\n", + " 1 1.0 GAB 1986 2010 15204.622692\n", + " 2 2.0 GNQ 1990 2010 2772.727066\n", + " 3 3.0 NGA 0 2010 736.379400\n", + " 4 4.0 STP 2006 2010 533.105993\n", + " .. ... ... ... ... ...\n", + " 70 NaN CMR 2003 2024 3309.101416\n", + " 71 NaN GAB 1986 2024 15523.984617\n", + " 72 NaN GNQ 1990 2024 2772.727066\n", + " 73 NaN NGA 0 2024 767.162758\n", + " 74 NaN STP 2006 2024 533.105993\n", + " \n", + " [75 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 BEN 2005 2010 51103.491245\n", + " 1 1.0 BFA 1980 2010 28155.481036\n", + " 2 2.0 CMR 2010 2010 9474.042670\n", + " 3 3.0 GHA 1992 2010 4756.331018\n", + " 4 4.0 MLI 1970 2010 1728.119704\n", + " .. ... ... ... ... ...\n", + " 115 NaN GHA 1992 2024 4756.331018\n", + " 116 NaN MLI 1970 2024 9248.875554\n", + " 117 NaN NER 0 2024 38061.253093\n", + " 118 NaN NGA 2010 2024 91605.334656\n", + " 119 NaN TGO 2005 2024 8209.607990\n", + " \n", + " [120 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 BFA 1970 2010 900.593107\n", + " 1 1.0 DZA 2003 2010 2027.092429\n", + " 2 2.0 LBY 1982 2010 1492.254523\n", + " 3 3.0 MLI 1970 2010 20111.284656\n", + " 4 4.0 NER 1970 2010 68679.243740\n", + " .. ... ... ... ... ...\n", + " 70 NaN BFA 1970 2024 5472.581781\n", + " 71 NaN DZA 2003 2024 2027.092429\n", + " 72 NaN LBY 1982 2024 1492.254523\n", + " 73 NaN MLI 1970 2024 24680.093794\n", + " 74 NaN NER 1970 2024 85914.963533\n", + " \n", + " [75 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 DZA 1982 2010 70521.534520\n", + " 1 1.0 LBY 1982 2010 55206.904776\n", + " 2 2.0 NER 1982 2010 55206.904776\n", + " 3 3.0 TUN 2010 2010 10070.626005\n", + " 4 NaN DZA 1982 2011 70521.534520\n", + " 5 NaN LBY 1982 2011 55206.904776\n", + " 6 NaN NER 1982 2011 55206.904776\n", + " 7 NaN TUN 2010 2011 10070.626005\n", + " 8 NaN DZA 1982 2012 70521.534520\n", + " 9 NaN LBY 1982 2012 55206.904776\n", + " 10 NaN NER 1982 2012 55206.904776\n", + " 11 NaN TUN 2010 2012 10123.153146\n", + " 12 NaN DZA 1982 2013 70521.534520\n", + " 13 NaN LBY 1982 2013 55206.904776\n", + " 14 NaN NER 1982 2013 55206.904776\n", + " 15 NaN TUN 2010 2013 10123.153146\n", + " 16 NaN DZA 1982 2014 70521.534520\n", + " 17 NaN LBY 1982 2014 55206.904776\n", + " 18 NaN NER 1982 2014 55206.904776\n", + " 19 NaN TUN 2010 2014 10123.153146\n", + " 20 NaN DZA 1982 2015 70521.534520\n", + " 21 NaN LBY 1982 2015 55206.904776\n", + " 22 NaN NER 1982 2015 55206.904776\n", + " 23 NaN TUN 2010 2015 10123.153146\n", + " 24 NaN DZA 1982 2016 70521.534520\n", + " 25 NaN LBY 1982 2016 55206.904776\n", + " 26 NaN NER 1982 2016 55206.904776\n", + " 27 NaN TUN 2010 2016 10123.153146\n", + " 28 NaN DZA 1982 2017 70521.534520\n", + " 29 NaN LBY 1982 2017 55206.904776\n", + " 30 NaN NER 1982 2017 55206.904776\n", + " 31 NaN TUN 2010 2017 10123.153146\n", + " 32 NaN DZA 1982 2018 70521.534520\n", + " 33 NaN LBY 1982 2018 55206.904776\n", + " 34 NaN NER 1982 2018 55206.904776\n", + " 35 NaN TUN 2010 2018 10123.153146\n", + " 36 NaN DZA 1982 2019 70521.534520\n", + " 37 NaN LBY 1982 2019 55206.904776\n", + " 38 NaN NER 1982 2019 55206.904776\n", + " 39 NaN TUN 2010 2019 10123.153146\n", + " 40 NaN DZA 1982 2020 70521.534520\n", + " 41 NaN LBY 1982 2020 55206.904776\n", + " 42 NaN NER 1982 2020 55206.904776\n", + " 43 NaN TUN 2010 2020 10123.153146\n", + " 44 NaN DZA 1982 2021 70521.534520\n", + " 45 NaN LBY 1982 2021 55206.904776\n", + " 46 NaN NER 1982 2021 55206.904776\n", + " 47 NaN TUN 2010 2021 10123.153146\n", + " 48 NaN DZA 1982 2022 70521.534520\n", + " 49 NaN LBY 1982 2022 55206.904776\n", + " 50 NaN NER 1982 2022 55206.904776\n", + " 51 NaN TUN 2010 2022 10123.153146\n", + " 52 NaN DZA 1982 2023 70521.534520\n", + " 53 NaN LBY 1982 2023 55206.904776\n", + " 54 NaN NER 1982 2023 55206.904776\n", + " 55 NaN TUN 2010 2023 10123.153146\n", + " 56 NaN DZA 1982 2024 70521.534520\n", + " 57 NaN LBY 1982 2024 55206.904776\n", + " 58 NaN NER 1982 2024 55206.904776\n", + " 59 NaN TUN 2010 2024 10123.153146,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 ALB 2007 2010 3.323951\n", + " 1 1.0 AND 2001 2010 5140.801755\n", + " 2 2.0 AUT 2007 2010 3.323951\n", + " 3 3.0 BEL 2007 2010 3.323951\n", + " 4 4.0 BGR 2007 2010 3.323951\n", + " .. ... ... ... ... ...\n", + " 295 NaN ROU 2007 2024 3.323951\n", + " 296 NaN SVK 2007 2024 3.323951\n", + " 297 NaN SVN 2007 2024 3.323951\n", + " 298 NaN TUN 2010 2024 1684.956106\n", + " 299 NaN UKR 2007 2024 3.323951\n", + " \n", + " [300 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 FRA 1999 2010 49202.546090\n", + " 1 NaN FRA 1999 2011 49274.690769\n", + " 2 NaN FRA 1999 2012 49434.993808\n", + " 3 NaN FRA 1999 2013 49503.081410\n", + " 4 NaN FRA 1999 2014 49599.005466\n", + " 5 NaN FRA 1999 2015 49767.596752\n", + " 6 NaN FRA 1999 2016 50049.937643\n", + " 7 NaN FRA 1999 2017 50233.941056\n", + " 8 NaN FRA 1999 2018 51538.154141\n", + " 9 NaN FRA 1999 2019 52484.273473\n", + " 10 NaN FRA 1999 2020 52485.869910\n", + " 11 NaN FRA 1999 2021 52737.005293\n", + " 12 NaN FRA 1999 2022 52743.646575\n", + " 13 NaN FRA 1999 2023 52744.093571\n", + " 14 NaN FRA 1999 2024 52748.143583,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 ALB 2007 2010 2.686574\n", + " 1 1.0 AUT 2007 2010 2.686574\n", + " 2 2.0 BEL 2007 2010 3453.752701\n", + " 3 3.0 BGR 2007 2010 2.686574\n", + " 4 4.0 BIH 2007 2010 2.686574\n", + " .. ... ... ... ... ...\n", + " 280 NaN POL 2007 2024 2.686574\n", + " 281 NaN ROU 2007 2024 2.686574\n", + " 282 NaN SVK 2007 2024 2.686574\n", + " 283 NaN SVN 2007 2024 2.686574\n", + " 284 NaN UKR 2007 2024 2.686574\n", + " \n", + " [285 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 ALB 2007 2010 2.110611\n", + " 1 1.0 AUT 2007 2010 2.110611\n", + " 2 2.0 BEL 2007 2010 2.110611\n", + " 3 3.0 BGR 2007 2010 2.110611\n", + " 4 4.0 BIH 2007 2010 2.110611\n", + " .. ... ... ... ... ...\n", + " 250 NaN POL 2007 2024 2.110611\n", + " 251 NaN ROU 2007 2024 2.110611\n", + " 252 NaN SVK 2007 2024 2.110611\n", + " 253 NaN SVN 2007 2024 2.110611\n", + " 254 NaN UKR 2007 2024 2.110611\n", + " \n", + " [255 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 ALB 2007 2010 10.317626\n", + " 1 1.0 AUT 2007 2010 10.317626\n", + " 2 2.0 BEL 2007 2010 11.950758\n", + " 3 3.0 BGR 2007 2010 10.317626\n", + " 4 4.0 BIH 2007 2010 10.317626\n", + " .. ... ... ... ... ...\n", + " 265 NaN POL 2007 2024 10.317626\n", + " 266 NaN ROU 2007 2024 10.317626\n", + " 267 NaN SVK 2007 2024 10.317626\n", + " 268 NaN SVN 2007 2024 10.317626\n", + " 269 NaN UKR 2007 2024 10.317626\n", + " \n", + " [270 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 ALB 2007 2010 7.681977\n", + " 1 1.0 AUT 2009 2010 87.207955\n", + " 2 2.0 BEL 2007 2010 7.681977\n", + " 3 3.0 BGR 2007 2010 7.681977\n", + " 4 4.0 BIH 2007 2010 7.681977\n", + " .. ... ... ... ... ...\n", + " 265 NaN POL 2007 2024 7.681977\n", + " 266 NaN ROU 2007 2024 7.681977\n", + " 267 NaN SVK 2007 2024 7.681977\n", + " 268 NaN SVN 2007 2024 7.681977\n", + " 269 NaN UKR 2007 2024 7.681977\n", + " \n", + " [270 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CHE 1994 2010 485.181967\n", + " 1 1.0 DEU 2004 2010 2541.445925\n", + " 2 2.0 FRA 2002 2010 286.586247\n", + " 3 NaN CHE 1994 2011 485.181967\n", + " 4 NaN DEU 2004 2011 2541.445925\n", + " 5 NaN FRA 2002 2011 286.586247\n", + " 6 NaN CHE 1994 2012 485.181967\n", + " 7 NaN DEU 2004 2012 2541.445925\n", + " 8 NaN FRA 2002 2012 286.736354\n", + " 9 NaN CHE 1994 2013 499.041721\n", + " 10 NaN DEU 2004 2013 2555.305666\n", + " 11 NaN FRA 2002 2013 286.736267\n", + " 12 NaN CHE 1994 2014 499.041721\n", + " 13 NaN DEU 2004 2014 2555.305666\n", + " 14 NaN FRA 2002 2014 286.736267\n", + " 15 NaN CHE 1994 2015 499.041721\n", + " 16 NaN DEU 2004 2015 2555.305666\n", + " 17 NaN FRA 2002 2015 286.736267\n", + " 18 NaN CHE 1994 2016 503.450310\n", + " 19 NaN DEU 2004 2016 2555.306067\n", + " 20 NaN FRA 2002 2016 286.852865\n", + " 21 NaN CHE 1994 2017 513.563511\n", + " 22 NaN DEU 2004 2017 2585.544144\n", + " 23 NaN FRA 2002 2017 300.742337\n", + " 24 NaN CHE 1994 2018 522.996012\n", + " 25 NaN DEU 2004 2018 2585.600552\n", + " 26 NaN FRA 2002 2018 300.742337\n", + " 27 NaN CHE 1994 2019 589.885766\n", + " 28 NaN DEU 2004 2019 3142.347890\n", + " 29 NaN FRA 2002 2019 316.886990\n", + " 30 NaN CHE 1994 2020 589.885766\n", + " 31 NaN DEU 2004 2020 3142.347890\n", + " 32 NaN FRA 2002 2020 316.886990\n", + " 33 NaN CHE 1994 2021 589.885766\n", + " 34 NaN DEU 2004 2021 3144.875968\n", + " 35 NaN FRA 2002 2021 319.248673\n", + " 36 NaN CHE 1994 2022 589.885766\n", + " 37 NaN DEU 2004 2022 3144.880402\n", + " 38 NaN FRA 2002 2022 319.261204\n", + " 39 NaN CHE 1994 2023 760.761068\n", + " 40 NaN DEU 2004 2023 3145.967319\n", + " 41 NaN FRA 2002 2023 319.261805\n", + " 42 NaN CHE 1994 2024 760.761068\n", + " 43 NaN DEU 2004 2024 3145.967319\n", + " 44 NaN FRA 2002 2024 319.261805,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 DEU 2008 2010 5257.048470\n", + " 1 1.0 FRA 2008 2010 3128.883764\n", + " 2 NaN DEU 2008 2011 5259.109284\n", + " 3 NaN FRA 2008 2011 3128.883764\n", + " 4 NaN DEU 2008 2012 5259.109284\n", + " 5 NaN FRA 2008 2012 3138.066486\n", + " 6 NaN DEU 2008 2013 5261.088651\n", + " 7 NaN FRA 2008 2013 3138.080023\n", + " 8 NaN DEU 2008 2014 5271.674351\n", + " 9 NaN FRA 2008 2014 3138.550845\n", + " 10 NaN DEU 2008 2015 5274.981168\n", + " 11 NaN FRA 2008 2015 3138.589821\n", + " 12 NaN DEU 2008 2016 5277.612945\n", + " 13 NaN FRA 2008 2016 3138.881253\n", + " 14 NaN DEU 2008 2017 5311.905884\n", + " 15 NaN FRA 2008 2017 3157.543298\n", + " 16 NaN DEU 2008 2018 5319.426873\n", + " 17 NaN FRA 2008 2018 3157.551271\n", + " 18 NaN DEU 2008 2019 5537.471684\n", + " 19 NaN FRA 2008 2019 3197.986367\n", + " 20 NaN DEU 2008 2020 5537.916377\n", + " 21 NaN FRA 2008 2020 3199.293686\n", + " 22 NaN DEU 2008 2021 5544.753356\n", + " 23 NaN FRA 2008 2021 3205.386363\n", + " 24 NaN DEU 2008 2022 5544.753356\n", + " 25 NaN FRA 2008 2022 3205.421978\n", + " 26 NaN DEU 2008 2023 5544.753356\n", + " 27 NaN FRA 2008 2023 3205.421978\n", + " 28 NaN DEU 2008 2024 5544.753356\n", + " 29 NaN FRA 2008 2024 3205.421978,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 AUT 1978 2010 313.568205\n", + " 1 1.0 CHE 2008 2010 341.804407\n", + " 2 2.0 DEU 1995 2010 1822.616473\n", + " 3 3.0 LIE 2000 2010 71.013081\n", + " 4 NaN AUT 1978 2011 313.568205\n", + " 5 NaN CHE 2008 2011 341.804407\n", + " 6 NaN DEU 1995 2011 1824.342188\n", + " 7 NaN LIE 2000 2011 71.153712\n", + " 8 NaN AUT 1978 2012 313.568205\n", + " 9 NaN CHE 2008 2012 341.804407\n", + " 10 NaN DEU 1995 2012 1824.342188\n", + " 11 NaN LIE 2000 2012 71.153712\n", + " 12 NaN AUT 1978 2013 390.074942\n", + " 13 NaN CHE 2008 2013 341.804407\n", + " 14 NaN DEU 1995 2013 1838.952427\n", + " 15 NaN LIE 2000 2013 71.799567\n", + " 16 NaN AUT 1978 2014 390.074942\n", + " 17 NaN CHE 2008 2014 341.804407\n", + " 18 NaN DEU 1995 2014 1838.952427\n", + " 19 NaN LIE 2000 2014 71.799567\n", + " 20 NaN AUT 1978 2015 390.074942\n", + " 21 NaN CHE 2008 2015 341.804407\n", + " 22 NaN DEU 1995 2015 1846.648346\n", + " 23 NaN LIE 2000 2015 71.799567\n", + " 24 NaN AUT 1978 2016 391.397452\n", + " 25 NaN CHE 2008 2016 341.804407\n", + " 26 NaN DEU 1995 2016 1853.599648\n", + " 27 NaN LIE 2000 2016 71.799567\n", + " 28 NaN AUT 1978 2017 391.579034\n", + " 29 NaN CHE 2008 2017 344.221770\n", + " 30 NaN DEU 1995 2017 1874.690330\n", + " 31 NaN LIE 2000 2017 71.799567\n", + " 32 NaN AUT 1978 2018 391.579034\n", + " 33 NaN CHE 2008 2018 350.641179\n", + " 34 NaN DEU 1995 2018 1874.690330\n", + " 35 NaN LIE 2000 2018 71.799567\n", + " 36 NaN AUT 1978 2019 391.579034\n", + " 37 NaN CHE 2008 2019 471.892081\n", + " 38 NaN DEU 1995 2019 2087.892054\n", + " 39 NaN LIE 2000 2019 71.799567\n", + " 40 NaN AUT 1978 2020 496.014041\n", + " 41 NaN CHE 2008 2020 471.892081\n", + " 42 NaN DEU 1995 2020 2186.300349\n", + " 43 NaN LIE 2000 2020 71.799567\n", + " 44 NaN AUT 1978 2021 496.014041\n", + " 45 NaN CHE 2008 2021 471.892081\n", + " 46 NaN DEU 1995 2021 2186.488671\n", + " 47 NaN LIE 2000 2021 71.799567\n", + " 48 NaN AUT 1978 2022 498.360746\n", + " 49 NaN CHE 2008 2022 471.892631\n", + " 50 NaN DEU 1995 2022 2186.489221\n", + " 51 NaN LIE 2000 2022 71.799567\n", + " 52 NaN AUT 1978 2023 498.360746\n", + " 53 NaN CHE 2008 2023 555.863278\n", + " 54 NaN DEU 1995 2023 2186.489221\n", + " 55 NaN LIE 2000 2023 71.799567\n", + " 56 NaN AUT 1978 2024 498.360746\n", + " 57 NaN CHE 2008 2024 555.863278\n", + " 58 NaN DEU 1995 2024 2186.489221\n", + " 59 NaN LIE 2000 2024 71.799567,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 DEU 2007 2010 4313.658283\n", + " 1 NaN DEU 2007 2011 4314.832091\n", + " 2 NaN DEU 2007 2012 4317.661221\n", + " 3 NaN DEU 2007 2013 4317.662180\n", + " 4 NaN DEU 2007 2014 4317.668036\n", + " 5 NaN DEU 2007 2015 4317.668161\n", + " 6 NaN DEU 2007 2016 4317.770899\n", + " 7 NaN DEU 2007 2017 4318.402252\n", + " 8 NaN DEU 2007 2018 4318.402252\n", + " 9 NaN DEU 2007 2019 4660.599738\n", + " 10 NaN DEU 2007 2020 4660.599738\n", + " 11 NaN DEU 2007 2021 4660.599738\n", + " 12 NaN DEU 2007 2022 4660.599738\n", + " 13 NaN DEU 2007 2023 4660.599738\n", + " 14 NaN DEU 2007 2024 4660.599738,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 BEL 2000 2010 1109.797842\n", + " 1 1.0 DEU 2009 2010 437.945382\n", + " 2 2.0 FRA 0 2010 110.743407\n", + " 3 3.0 LUX 2009 2010 920.136260\n", + " 4 4.0 NLD 0 2010 82.042346\n", + " .. ... ... ... ... ...\n", + " 70 NaN BEL 2000 2024 2533.501065\n", + " 71 NaN DEU 2009 2024 520.747250\n", + " 72 NaN FRA 0 2024 337.435521\n", + " 73 NaN LUX 2009 2024 1078.302695\n", + " 74 NaN NLD 0 2024 157.514014\n", + " \n", + " [75 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 BEL 2003 2010 814.705569\n", + " 1 1.0 DEU 2005 2010 836.567329\n", + " 2 2.0 NLD 2003 2010 1510.476012\n", + " 3 NaN BEL 2003 2011 814.705569\n", + " 4 NaN DEU 2005 2011 836.601149\n", + " 5 NaN NLD 2003 2011 1510.510963\n", + " 6 NaN BEL 2003 2012 814.705569\n", + " 7 NaN DEU 2005 2012 836.601149\n", + " 8 NaN NLD 2003 2012 1510.510963\n", + " 9 NaN BEL 2003 2013 836.586168\n", + " 10 NaN DEU 2005 2013 858.893743\n", + " 11 NaN NLD 2003 2013 1631.812849\n", + " 12 NaN BEL 2003 2014 895.335412\n", + " 13 NaN DEU 2005 2014 1263.433458\n", + " 14 NaN NLD 2003 2014 1812.938327\n", + " 15 NaN BEL 2003 2015 1736.470031\n", + " 16 NaN DEU 2005 2015 1272.733734\n", + " 17 NaN NLD 2003 2015 2526.899261\n", + " 18 NaN BEL 2003 2016 1736.470031\n", + " 19 NaN DEU 2005 2016 1349.367543\n", + " 20 NaN NLD 2003 2016 2549.936530\n", + " 21 NaN BEL 2003 2017 2186.665542\n", + " 22 NaN DEU 2005 2017 1740.390515\n", + " 23 NaN NLD 2003 2017 2877.670441\n", + " 24 NaN BEL 2003 2018 2194.487020\n", + " 25 NaN DEU 2005 2018 1742.146789\n", + " 26 NaN NLD 2003 2018 2880.056324\n", + " 27 NaN BEL 2003 2019 2247.659314\n", + " 28 NaN DEU 2005 2019 1742.146789\n", + " 29 NaN NLD 2003 2019 2993.455165\n", + " 30 NaN BEL 2003 2020 2251.172541\n", + " 31 NaN DEU 2005 2020 1742.146789\n", + " 32 NaN NLD 2003 2020 2993.455165\n", + " 33 NaN BEL 2003 2021 2261.067478\n", + " 34 NaN DEU 2005 2021 1742.146789\n", + " 35 NaN NLD 2003 2021 2993.473919\n", + " 36 NaN BEL 2003 2022 2286.372769\n", + " 37 NaN DEU 2005 2022 1742.146789\n", + " 38 NaN NLD 2003 2022 2998.077044\n", + " 39 NaN BEL 2003 2023 2313.167884\n", + " 40 NaN DEU 2005 2023 1742.146789\n", + " 41 NaN NLD 2003 2023 2998.275893\n", + " 42 NaN BEL 2003 2024 2313.167884\n", + " 43 NaN DEU 2005 2024 1742.146789\n", + " 44 NaN NLD 2003 2024 2998.275893,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 BEL 1970 2010 625.812817\n", + " 1 1.0 DEU 1976 2010 6594.525700\n", + " 2 2.0 LUX 2005 2010 204.063185\n", + " 3 3.0 NLD 2006 2010 2.934692\n", + " 4 NaN BEL 1970 2011 625.812817\n", + " 5 NaN DEU 1976 2011 6598.116690\n", + " 6 NaN LUX 2005 2011 204.063185\n", + " 7 NaN NLD 2006 2011 2.934692\n", + " 8 NaN BEL 1970 2012 625.812817\n", + " 9 NaN DEU 1976 2012 6598.116690\n", + " 10 NaN LUX 2005 2012 204.064504\n", + " 11 NaN NLD 2006 2012 2.934692\n", + " 12 NaN BEL 1970 2013 626.628068\n", + " 13 NaN DEU 1976 2013 6625.283164\n", + " 14 NaN LUX 2005 2013 204.064504\n", + " 15 NaN NLD 2006 2013 2.934692\n", + " 16 NaN BEL 1970 2014 626.736879\n", + " 17 NaN DEU 1976 2014 6673.251004\n", + " 18 NaN LUX 2005 2014 204.066325\n", + " 19 NaN NLD 2006 2014 2.934692\n", + " 20 NaN BEL 1970 2015 626.736879\n", + " 21 NaN DEU 1976 2015 6692.895929\n", + " 22 NaN LUX 2005 2015 210.196056\n", + " 23 NaN NLD 2006 2015 2.934692\n", + " 24 NaN BEL 1970 2016 657.454389\n", + " 25 NaN DEU 1976 2016 6741.526975\n", + " 26 NaN LUX 2005 2016 210.200652\n", + " 27 NaN NLD 2006 2016 2.934692\n", + " 28 NaN BEL 1970 2017 657.454389\n", + " 29 NaN DEU 1976 2017 6747.772691\n", + " 30 NaN LUX 2005 2017 222.546539\n", + " 31 NaN NLD 2006 2017 2.934692\n", + " 32 NaN BEL 1970 2018 657.454389\n", + " 33 NaN DEU 1976 2018 6747.788478\n", + " 34 NaN LUX 2005 2018 222.547343\n", + " 35 NaN NLD 2006 2018 2.934692\n", + " 36 NaN BEL 1970 2019 657.456370\n", + " 37 NaN DEU 1976 2019 6747.790459\n", + " 38 NaN LUX 2005 2019 222.547343\n", + " 39 NaN NLD 2006 2019 2.934692\n", + " 40 NaN BEL 1970 2020 657.615752\n", + " 41 NaN DEU 1976 2020 6762.627638\n", + " 42 NaN LUX 2005 2020 230.450691\n", + " 43 NaN NLD 2006 2020 2.934692\n", + " 44 NaN BEL 1970 2021 657.615752\n", + " 45 NaN DEU 1976 2021 6762.627638\n", + " 46 NaN LUX 2005 2021 232.632644\n", + " 47 NaN NLD 2006 2021 2.934692\n", + " 48 NaN BEL 1970 2022 657.615752\n", + " 49 NaN DEU 1976 2022 6762.627638\n", + " 50 NaN LUX 2005 2022 233.384043\n", + " 51 NaN NLD 2006 2022 2.934692\n", + " 52 NaN BEL 1970 2023 657.615752\n", + " 53 NaN DEU 1976 2023 6762.627638\n", + " 54 NaN LUX 2005 2023 233.384043\n", + " 55 NaN NLD 2006 2023 2.934692\n", + " 56 NaN BEL 1970 2024 657.615752\n", + " 57 NaN DEU 1976 2024 6762.627638\n", + " 58 NaN LUX 2005 2024 233.384043\n", + " 59 NaN NLD 2006 2024 2.934692,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 DEU 2005 2010 3731.295895\n", + " 1 1.0 NLD 1983 2010 339.283447\n", + " 2 NaN DEU 2005 2011 4053.994846\n", + " 3 NaN NLD 1983 2011 339.283447\n", + " 4 NaN DEU 2005 2012 4555.017690\n", + " 5 NaN NLD 1983 2012 339.428613\n", + " 6 NaN DEU 2005 2013 4859.736913\n", + " 7 NaN NLD 1983 2013 356.015010\n", + " 8 NaN DEU 2005 2014 5041.069798\n", + " 9 NaN NLD 1983 2014 440.544461\n", + " 10 NaN DEU 2005 2015 5157.870965\n", + " 11 NaN NLD 1983 2015 440.544486\n", + " 12 NaN DEU 2005 2016 5523.465219\n", + " 13 NaN NLD 1983 2016 456.368289\n", + " 14 NaN DEU 2005 2017 5566.904255\n", + " 15 NaN NLD 1983 2017 484.674851\n", + " 16 NaN DEU 2005 2018 5620.706718\n", + " 17 NaN NLD 1983 2018 484.674851\n", + " 18 NaN DEU 2005 2019 5620.827058\n", + " 19 NaN NLD 1983 2019 484.674851\n", + " 20 NaN DEU 2005 2020 5756.995102\n", + " 21 NaN NLD 1983 2020 484.674851\n", + " 22 NaN DEU 2005 2021 5756.995102\n", + " 23 NaN NLD 1983 2021 484.674851\n", + " 24 NaN DEU 2005 2022 5756.995102\n", + " 25 NaN NLD 1983 2022 484.674851\n", + " 26 NaN DEU 2005 2023 5756.995102\n", + " 27 NaN NLD 1983 2023 484.674851\n", + " 28 NaN DEU 2005 2024 5756.995102\n", + " 29 NaN NLD 1983 2024 484.674851,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 DEU 2002 2010 939.873181\n", + " 1 1.0 NLD 1985 2010 2259.833686\n", + " 2 NaN DEU 2002 2011 948.743434\n", + " 3 NaN NLD 1985 2011 2259.840886\n", + " 4 NaN DEU 2002 2012 950.939774\n", + " 5 NaN NLD 1985 2012 2259.840886\n", + " 6 NaN DEU 2002 2013 984.264532\n", + " 7 NaN NLD 1985 2013 2421.584393\n", + " 8 NaN DEU 2002 2014 1783.982870\n", + " 9 NaN NLD 1985 2014 3420.063854\n", + " 10 NaN DEU 2002 2015 2478.954172\n", + " 11 NaN NLD 1985 2015 3891.380924\n", + " 12 NaN DEU 2002 2016 2760.792357\n", + " 13 NaN NLD 1985 2016 4072.410228\n", + " 14 NaN DEU 2002 2017 3464.025560\n", + " 15 NaN NLD 1985 2017 4458.951645\n", + " 16 NaN DEU 2002 2018 3484.452347\n", + " 17 NaN NLD 1985 2018 5201.574168\n", + " 18 NaN DEU 2002 2019 3542.318057\n", + " 19 NaN NLD 1985 2019 5201.574168\n", + " 20 NaN DEU 2002 2020 3547.186918\n", + " 21 NaN NLD 1985 2020 5201.574168\n", + " 22 NaN DEU 2002 2021 3694.398199\n", + " 23 NaN NLD 1985 2021 5562.266350\n", + " 24 NaN DEU 2002 2022 3827.132059\n", + " 25 NaN NLD 1985 2022 5562.266350\n", + " 26 NaN DEU 2002 2023 3827.132059\n", + " 27 NaN NLD 1985 2023 5562.266350\n", + " 28 NaN DEU 2002 2024 3827.132059\n", + " 29 NaN NLD 1985 2024 5562.266350,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 DEU 1971 2010 3798.447631\n", + " 1 1.0 FRA 1971 2010 10.154437\n", + " 2 NaN DEU 1971 2011 3836.104712\n", + " 3 NaN FRA 1971 2011 10.154437\n", + " 4 NaN DEU 1971 2012 3860.438861\n", + " 5 NaN FRA 1971 2012 10.154437\n", + " 6 NaN DEU 1971 2013 3862.894104\n", + " 7 NaN FRA 1971 2013 10.154437\n", + " 8 NaN DEU 1971 2014 3862.903038\n", + " 9 NaN FRA 1971 2014 10.154437\n", + " 10 NaN DEU 1971 2015 3864.289822\n", + " 11 NaN FRA 1971 2015 10.154437\n", + " 12 NaN DEU 1971 2016 3864.289822\n", + " 13 NaN FRA 1971 2016 10.154437\n", + " 14 NaN DEU 1971 2017 3865.141382\n", + " 15 NaN FRA 1971 2017 10.154437\n", + " 16 NaN DEU 1971 2018 3865.240320\n", + " 17 NaN FRA 1971 2018 10.154437\n", + " 18 NaN DEU 1971 2019 3865.557221\n", + " 19 NaN FRA 1971 2019 10.154437\n", + " 20 NaN DEU 1971 2020 3865.946754\n", + " 21 NaN FRA 1971 2020 10.154437\n", + " 22 NaN DEU 1971 2021 3865.946754\n", + " 23 NaN FRA 1971 2021 10.154437\n", + " 24 NaN DEU 1971 2022 3865.958886\n", + " 25 NaN FRA 1971 2022 10.154437\n", + " 26 NaN DEU 1971 2023 3865.958886\n", + " 27 NaN FRA 1971 2023 10.154437\n", + " 28 NaN DEU 1971 2024 3865.958886\n", + " 29 NaN FRA 1971 2024 10.154437,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 DEU 2008 2010 6669.829958\n", + " 1 NaN DEU 2008 2011 6783.067059\n", + " 2 NaN DEU 2008 2012 6906.743499\n", + " 3 NaN DEU 2008 2013 7216.435212\n", + " 4 NaN DEU 2008 2014 7305.063313\n", + " 5 NaN DEU 2008 2015 7360.934847\n", + " 6 NaN DEU 2008 2016 7733.339815\n", + " 7 NaN DEU 2008 2017 7807.285206\n", + " 8 NaN DEU 2008 2018 7816.651791\n", + " 9 NaN DEU 2008 2019 7817.405971\n", + " 10 NaN DEU 2008 2020 7863.446885\n", + " 11 NaN DEU 2008 2021 7865.880912\n", + " 12 NaN DEU 2008 2022 7865.881969\n", + " 13 NaN DEU 2008 2023 7865.881969\n", + " 14 NaN DEU 2008 2024 7865.881969,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 DEU 2008 2010 4894.332972\n", + " 1 NaN DEU 2008 2011 4905.066433\n", + " 2 NaN DEU 2008 2012 4905.066433\n", + " 3 NaN DEU 2008 2013 4949.447601\n", + " 4 NaN DEU 2008 2014 4950.091086\n", + " 5 NaN DEU 2008 2015 4968.436590\n", + " 6 NaN DEU 2008 2016 5091.092669\n", + " 7 NaN DEU 2008 2017 5091.092669\n", + " 8 NaN DEU 2008 2018 5091.485248\n", + " 9 NaN DEU 2008 2019 5116.241003\n", + " 10 NaN DEU 2008 2020 5116.241003\n", + " 11 NaN DEU 2008 2021 5116.241003\n", + " 12 NaN DEU 2008 2022 5116.284140\n", + " 13 NaN DEU 2008 2023 5116.451057\n", + " 14 NaN DEU 2008 2024 5116.451057,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 ALB 2007 2010 14.624697\n", + " 1 1.0 AUT 2007 2010 14.624697\n", + " 2 2.0 BEL 2007 2010 14.624697\n", + " 3 3.0 BGR 2007 2010 14.624697\n", + " 4 4.0 BIH 2007 2010 14.624697\n", + " .. ... ... ... ... ...\n", + " 250 NaN POL 2007 2024 14.624697\n", + " 251 NaN ROU 2007 2024 14.624697\n", + " 252 NaN SVK 2007 2024 14.624697\n", + " 253 NaN SVN 2007 2024 14.624697\n", + " 254 NaN UKR 2007 2024 14.624697\n", + " \n", + " [255 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 DEU 2005 2010 6361.944068\n", + " 1 NaN DEU 2005 2011 6524.134675\n", + " 2 NaN DEU 2005 2012 6643.811721\n", + " 3 NaN DEU 2005 2013 6813.282585\n", + " 4 NaN DEU 2005 2014 6889.365493\n", + " 5 NaN DEU 2005 2015 6941.578610\n", + " 6 NaN DEU 2005 2016 7151.533620\n", + " 7 NaN DEU 2005 2017 7331.630969\n", + " 8 NaN DEU 2005 2018 7894.855418\n", + " 9 NaN DEU 2005 2019 8405.423033\n", + " 10 NaN DEU 2005 2020 8549.441529\n", + " 11 NaN DEU 2005 2021 9808.080052\n", + " 12 NaN NLD None 2021 549.088094\n", + " 13 NaN DEU 2005 2022 10306.147653\n", + " 14 NaN NLD None 2022 549.088094\n", + " 15 NaN DEU 2005 2023 10306.147653\n", + " 16 NaN NLD None 2023 549.088094\n", + " 17 NaN DEU 2005 2024 10306.147653\n", + " 18 NaN NLD None 2024 549.088094,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 DEU 2010 2010 920.124600\n", + " 1 1.0 DNK 1984 2010 3803.411236\n", + " 2 2.0 NOR 2002 2010 27537.957291\n", + " 3 NaN DEU 2010 2011 924.544933\n", + " 4 NaN DNK 1984 2011 4004.050480\n", + " 5 NaN NOR 2002 2011 27963.324907\n", + " 6 NaN DEU 2010 2012 926.741931\n", + " 7 NaN DNK 1984 2012 4004.050480\n", + " 8 NaN NOR 2002 2012 27963.324907\n", + " 9 NaN DEU 2010 2013 929.492287\n", + " 10 NaN DNK 1984 2013 4004.050480\n", + " 11 NaN NOR 2002 2013 27971.602549\n", + " 12 NaN DEU 2010 2014 929.492287\n", + " 13 NaN DNK 1984 2014 4004.831528\n", + " 14 NaN NOR 2002 2014 28035.650453\n", + " 15 NaN DEU 2010 2015 939.206678\n", + " 16 NaN DNK 1984 2015 4004.831528\n", + " 17 NaN NOR 2002 2015 28137.042290\n", + " 18 NaN DEU 2010 2016 939.206678\n", + " 19 NaN DNK 1984 2016 4006.039990\n", + " 20 NaN NOR 2002 2016 28285.475044\n", + " 21 NaN DEU 2010 2017 939.443397\n", + " 22 NaN DNK 1984 2017 4007.645340\n", + " 23 NaN NOR 2002 2017 28402.111486\n", + " 24 NaN DEU 2010 2018 1015.070037\n", + " 25 NaN DNK 1984 2018 4007.645340\n", + " 26 NaN NOR 2002 2018 28657.731392\n", + " 27 NaN DEU 2010 2019 1016.061303\n", + " 28 NaN DNK 1984 2019 4008.549836\n", + " 29 NaN NOR 2002 2019 28875.182758\n", + " 30 NaN DEU 2010 2020 1028.440470\n", + " 31 NaN DNK 1984 2020 4008.549836\n", + " 32 NaN NOR 2002 2020 28989.875070\n", + " 33 NaN DEU 2010 2021 1058.169378\n", + " 34 NaN DNK 1984 2021 4008.549836\n", + " 35 NaN NOR 2002 2021 29032.579319\n", + " 36 NaN DEU 2010 2022 1058.169378\n", + " 37 NaN DNK 1984 2022 4008.549836\n", + " 38 NaN NOR 2002 2022 29114.743112\n", + " 39 NaN DEU 2010 2023 1058.169378\n", + " 40 NaN DNK 1984 2023 4008.549836\n", + " 41 NaN NOR 2002 2023 29202.798560\n", + " 42 NaN DEU 2010 2024 1058.169378\n", + " 43 NaN DNK 1984 2024 4008.549836\n", + " 44 NaN NOR 2002 2024 29202.798560,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 BWA 1931 2010 2.234266\n", + " 1 1.0 NAM 1931 2010 49514.282347\n", + " 2 2.0 ZAF 1978 2010 39764.095428\n", + " 3 NaN BWA 1931 2011 2.234266\n", + " 4 NaN NAM 1931 2011 49514.283918\n", + " 5 NaN ZAF 1978 2011 39764.095428\n", + " 6 NaN BWA 1931 2012 2.234266\n", + " 7 NaN NAM 1931 2012 49514.283918\n", + " 8 NaN ZAF 1978 2012 39853.674237\n", + " 9 NaN BWA 1931 2013 2.234266\n", + " 10 NaN NAM 1931 2013 49515.221483\n", + " 11 NaN ZAF 1978 2013 39875.956192\n", + " 12 NaN BWA 1931 2014 2.234266\n", + " 13 NaN NAM 1931 2014 49515.221483\n", + " 14 NaN ZAF 1978 2014 40813.708080\n", + " 15 NaN BWA 1931 2015 2.234266\n", + " 16 NaN NAM 1931 2015 49515.221483\n", + " 17 NaN ZAF 1978 2015 40876.448975\n", + " 18 NaN BWA 1931 2016 2.234266\n", + " 19 NaN NAM 1931 2016 49515.221483\n", + " 20 NaN ZAF 1978 2016 40877.836543\n", + " 21 NaN BWA 1931 2017 2.234266\n", + " 22 NaN NAM 1931 2017 49515.221483\n", + " 23 NaN ZAF 1978 2017 40886.124054\n", + " 24 NaN BWA 1931 2018 2.234266\n", + " 25 NaN NAM 1931 2018 49515.221483\n", + " 26 NaN ZAF 1978 2018 41017.326199\n", + " 27 NaN BWA 1931 2019 2.234266\n", + " 28 NaN NAM 1931 2019 49515.221483\n", + " 29 NaN ZAF 1978 2019 41251.993411\n", + " 30 NaN BWA 1931 2020 2.234266\n", + " 31 NaN NAM 1931 2020 49515.221483\n", + " 32 NaN ZAF 1978 2020 41601.978212\n", + " 33 NaN BWA 1931 2021 2.234266\n", + " 34 NaN NAM 1931 2021 49515.221483\n", + " 35 NaN ZAF 1978 2021 41696.177194\n", + " 36 NaN BWA 1931 2022 2.234266\n", + " 37 NaN NAM 1931 2022 49515.221483\n", + " 38 NaN ZAF 1978 2022 41707.786645\n", + " 39 NaN BWA 1931 2023 2.234266\n", + " 40 NaN NAM 1931 2023 49515.221483\n", + " 41 NaN ZAF 1978 2023 41707.786645\n", + " 42 NaN BWA 1931 2024 2.234266\n", + " 43 NaN NAM 1931 2024 49515.221483\n", + " 44 NaN ZAF 1978 2024 41719.261274,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 AGO 2009 2010 54114.508016\n", + " 1 1.0 BWA 1931 2010 3104.789172\n", + " 2 2.0 NAM 2001 2010 204572.994296\n", + " 3 3.0 ZAF 1931 2010 11.410916\n", + " 4 NaN AGO 2009 2011 69027.266141\n", + " .. ... ... ... ... ...\n", + " 69 NaN AGO 2009 2024 73473.089206\n", + " 70 NaN BWA 1931 2024 3104.789172\n", + " 71 NaN NAM 2001 2024 245776.475821\n", + " 72 NaN ZAF 1931 2024 11.410916\n", + " 73 NaN ZMB None 2024 13735.447878\n", + " \n", + " [74 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 AGO 1964 2010 40409.026180\n", + " 1 1.0 COD 2006 2010 3412.991220\n", + " 2 NaN AGO 1964 2011 44046.348492\n", + " 3 NaN COD 2006 2011 3412.991220\n", + " 4 NaN ZMB None 2011 3637.322312\n", + " 5 NaN AGO 1964 2012 44046.348492\n", + " 6 NaN COD 2006 2012 3412.991220\n", + " 7 NaN ZMB None 2012 3637.322312\n", + " 8 NaN AGO 1964 2013 44046.348492\n", + " 9 NaN COD 2006 2013 3412.991220\n", + " 10 NaN ZMB None 2013 3637.322312\n", + " 11 NaN AGO 1964 2014 44046.348492\n", + " 12 NaN COD 2006 2014 3412.991220\n", + " 13 NaN ZMB None 2014 3637.322312\n", + " 14 NaN AGO 1964 2015 44046.348492\n", + " 15 NaN COD 2006 2015 3412.991220\n", + " 16 NaN ZMB None 2015 3637.322312\n", + " 17 NaN AGO 1964 2016 44046.348492\n", + " 18 NaN COD 2006 2016 3412.991220\n", + " 19 NaN ZMB None 2016 3637.322312\n", + " 20 NaN AGO 1964 2017 44046.348492\n", + " 21 NaN COD 2006 2017 3412.991220\n", + " 22 NaN ZMB None 2017 3637.322312\n", + " 23 NaN AGO 1964 2018 44046.348492\n", + " 24 NaN COD 2006 2018 3412.991220\n", + " 25 NaN ZMB None 2018 3637.322312\n", + " 26 NaN AGO 1964 2019 44046.348492\n", + " 27 NaN COD 2006 2019 3412.991220\n", + " 28 NaN ZMB None 2019 3637.322312\n", + " 29 NaN AGO 1964 2020 44046.348492\n", + " 30 NaN COD 2006 2020 3412.991220\n", + " 31 NaN ZMB None 2020 3637.322312\n", + " 32 NaN AGO 1964 2021 44046.348492\n", + " 33 NaN COD 2006 2021 3412.991220\n", + " 34 NaN ZMB None 2021 3637.322312\n", + " 35 NaN AGO 1964 2022 44046.348492\n", + " 36 NaN COD 2006 2022 3412.991220\n", + " 37 NaN ZMB None 2022 3637.322312\n", + " 38 NaN AGO 1964 2023 44046.348492\n", + " 39 NaN COD 2006 2023 3412.991220\n", + " 40 NaN ZMB None 2023 3637.322312\n", + " 41 NaN AGO 1964 2024 44046.348492\n", + " 42 NaN COD 2006 2024 3412.991220\n", + " 43 NaN ZMB None 2024 3637.322312,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CAF 2007 2010 20792.341766\n", + " 1 1.0 CMR 2007 2010 44531.658777\n", + " 2 2.0 COD 1983 2010 76735.765578\n", + " 3 3.0 COG 2009 2010 180514.007763\n", + " 4 4.0 GAB 2002 2010 58751.109107\n", + " .. ... ... ... ... ...\n", + " 99 NaN CMR 2007 2024 47266.709614\n", + " 100 NaN COD 1983 2024 79287.408562\n", + " 101 NaN COG 2009 2024 206944.834669\n", + " 102 NaN GAB 2002 2024 70821.890882\n", + " 103 NaN GNQ 2002 2024 6280.457801\n", + " \n", + " [104 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CAF 2004 2010 31412.186988\n", + " 1 1.0 CMR 2004 2010 33396.653998\n", + " 2 2.0 NER 2008 2010 6060.661234\n", + " 3 3.0 NGA 0 2010 33118.300064\n", + " 4 4.0 TCD 1953 2010 81249.499597\n", + " .. ... ... ... ... ...\n", + " 70 NaN CAF 2004 2024 31412.186988\n", + " 71 NaN CMR 2004 2024 41464.554811\n", + " 72 NaN NER 2008 2024 6060.661234\n", + " 73 NaN NGA 0 2024 41789.057495\n", + " 74 NaN TCD 1953 2024 93099.796863\n", + " \n", + " [75 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 DZA 1982 2010 18054.428890\n", + " 1 1.0 LBY 1982 2010 18054.428890\n", + " 2 2.0 NER 2005 2010 49983.977912\n", + " 3 3.0 TCD 1969 2010 57553.857982\n", + " 4 NaN DZA 1982 2011 18054.428890\n", + " 5 NaN LBY 1982 2011 18054.428890\n", + " 6 NaN NER 2005 2011 49983.977912\n", + " 7 NaN TCD 1969 2011 57553.857982\n", + " 8 NaN DZA 1982 2012 18054.428890\n", + " 9 NaN LBY 1982 2012 18054.428890\n", + " 10 NaN NER 2005 2012 49983.977912\n", + " 11 NaN TCD 1969 2012 57553.857982\n", + " 12 NaN DZA 1982 2013 18054.428890\n", + " 13 NaN LBY 1982 2013 18054.428890\n", + " 14 NaN NER 2005 2013 49983.977912\n", + " 15 NaN TCD 1969 2013 57553.857982\n", + " 16 NaN DZA 1982 2014 18054.428890\n", + " 17 NaN LBY 1982 2014 18054.428890\n", + " 18 NaN NER 2005 2014 49983.977912\n", + " 19 NaN TCD 1969 2014 57553.857982\n", + " 20 NaN DZA 1982 2015 18054.428890\n", + " 21 NaN LBY 1982 2015 18054.428890\n", + " 22 NaN NER 2005 2015 49983.977912\n", + " 23 NaN TCD 1969 2015 57553.857982\n", + " 24 NaN DZA 1982 2016 18054.428890\n", + " 25 NaN LBY 1982 2016 18054.428890\n", + " 26 NaN NER 2005 2016 49983.977912\n", + " 27 NaN TCD 1969 2016 57553.857982\n", + " 28 NaN DZA 1982 2017 18054.428890\n", + " 29 NaN LBY 1982 2017 18054.428890\n", + " 30 NaN NER 2005 2017 49983.977912\n", + " 31 NaN TCD 1969 2017 57553.857982\n", + " 32 NaN DZA 1982 2018 18054.428890\n", + " 33 NaN LBY 1982 2018 18054.428890\n", + " 34 NaN NER 2005 2018 49983.977912\n", + " 35 NaN TCD 1969 2018 57553.857982\n", + " 36 NaN DZA 1982 2019 18054.428890\n", + " 37 NaN LBY 1982 2019 18054.428890\n", + " 38 NaN NER 2005 2019 134726.733318\n", + " 39 NaN TCD 1969 2019 57553.857982\n", + " 40 NaN DZA 1982 2020 18054.428890\n", + " 41 NaN LBY 1982 2020 18054.428890\n", + " 42 NaN NER 2005 2020 134726.733318\n", + " 43 NaN TCD 1969 2020 57553.857982\n", + " 44 NaN DZA 1982 2021 18054.428890\n", + " 45 NaN LBY 1982 2021 18054.428890\n", + " 46 NaN NER 2005 2021 134726.733318\n", + " 47 NaN TCD 1969 2021 57553.857982\n", + " 48 NaN DZA 1982 2022 18054.428890\n", + " 49 NaN LBY 1982 2022 18054.428890\n", + " 50 NaN NER 2005 2022 134726.733318\n", + " 51 NaN TCD 1969 2022 57553.857982\n", + " 52 NaN DZA 1982 2023 18054.428890\n", + " 53 NaN LBY 1982 2023 18054.428890\n", + " 54 NaN NER 2005 2023 134726.733318\n", + " 55 NaN TCD 1969 2023 57553.857982\n", + " 56 NaN DZA 1982 2024 18054.428890\n", + " 57 NaN LBY 1982 2024 18054.428890\n", + " 58 NaN NER 2005 2024 134726.733318\n", + " 59 NaN TCD 1969 2024 57553.857982,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 DZA 1982 2010 789.628149\n", + " 1 1.0 LBY 1982 2010 789.628149\n", + " 2 2.0 NER 1982 2010 789.628149\n", + " 3 3.0 TUN 2009 2010 230.952104\n", + " 4 NaN DZA 1982 2011 789.628149\n", + " 5 NaN LBY 1982 2011 789.628149\n", + " 6 NaN NER 1982 2011 789.628149\n", + " 7 NaN TUN 2009 2011 230.952104\n", + " 8 NaN DZA 1982 2012 789.628149\n", + " 9 NaN LBY 1982 2012 789.628149\n", + " 10 NaN NER 1982 2012 789.628149\n", + " 11 NaN TUN 2009 2012 234.800099\n", + " 12 NaN DZA 1982 2013 789.628149\n", + " 13 NaN LBY 1982 2013 789.628149\n", + " 14 NaN NER 1982 2013 789.628149\n", + " 15 NaN TUN 2009 2013 313.931745\n", + " 16 NaN DZA 1982 2014 789.628149\n", + " 17 NaN LBY 1982 2014 789.628149\n", + " 18 NaN NER 1982 2014 789.628149\n", + " 19 NaN TUN 2009 2014 313.931745\n", + " 20 NaN DZA 1982 2015 789.628149\n", + " 21 NaN LBY 1982 2015 789.628149\n", + " 22 NaN NER 1982 2015 789.628149\n", + " 23 NaN TUN 2009 2015 313.931745\n", + " 24 NaN DZA 1982 2016 789.628149\n", + " 25 NaN LBY 1982 2016 789.628149\n", + " 26 NaN NER 1982 2016 789.628149\n", + " 27 NaN TUN 2009 2016 313.931745\n", + " 28 NaN DZA 1982 2017 789.628149\n", + " 29 NaN LBY 1982 2017 789.628149\n", + " 30 NaN NER 1982 2017 789.628149\n", + " 31 NaN TUN 2009 2017 313.931745\n", + " 32 NaN DZA 1982 2018 789.628149\n", + " 33 NaN LBY 1982 2018 789.628149\n", + " 34 NaN NER 1982 2018 789.628149\n", + " 35 NaN TUN 2009 2018 313.931745\n", + " 36 NaN DZA 1982 2019 789.628149\n", + " 37 NaN LBY 1982 2019 789.628149\n", + " 38 NaN NER 1982 2019 789.628149\n", + " 39 NaN TUN 2009 2019 313.931745\n", + " 40 NaN DZA 1982 2020 789.628149\n", + " 41 NaN LBY 1982 2020 789.628149\n", + " 42 NaN NER 1982 2020 789.628149\n", + " 43 NaN TUN 2009 2020 313.931745\n", + " 44 NaN DZA 1982 2021 789.628149\n", + " 45 NaN LBY 1982 2021 789.628149\n", + " 46 NaN NER 1982 2021 789.628149\n", + " 47 NaN TUN 2009 2021 313.931745\n", + " 48 NaN DZA 1982 2022 789.628149\n", + " 49 NaN LBY 1982 2022 789.628149\n", + " 50 NaN NER 1982 2022 789.628149\n", + " 51 NaN TUN 2009 2022 313.931745\n", + " 52 NaN DZA 1982 2023 789.628149\n", + " 53 NaN LBY 1982 2023 789.628149\n", + " 54 NaN NER 1982 2023 789.628149\n", + " 55 NaN TUN 2009 2023 313.931745\n", + " 56 NaN DZA 1982 2024 789.628149\n", + " 57 NaN LBY 1982 2024 789.628149\n", + " 58 NaN NER 1982 2024 789.628149\n", + " 59 NaN TUN 2009 2024 313.931745,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 ALB 0 2010 3755.635851\n", + " 1 1.0 AUT 2007 2010 46.449862\n", + " 2 2.0 BEL 2007 2010 46.449862\n", + " 3 3.0 BGR 2007 2010 46.449862\n", + " 4 4.0 BIH 0 2010 5992.442893\n", + " .. ... ... ... ... ...\n", + " 340 NaN SVK 2007 2024 46.449862\n", + " 341 NaN SVN 2007 2024 46.449862\n", + " 342 NaN TUN 2010 2024 696.882197\n", + " 343 NaN UKR 2007 2024 46.449862\n", + " 344 NaN XKO 1996 2024 146.566919\n", + " \n", + " [345 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 AUT 2005 2010 2171.787210\n", + " 1 1.0 CHE 2010 2010 1544.622543\n", + " 2 2.0 ITA 2006 2010 9731.733226\n", + " 3 NaN AUT 2005 2011 2171.787210\n", + " 4 NaN CHE 2010 2011 1544.622543\n", + " 5 NaN ITA 2006 2011 9731.733226\n", + " 6 NaN AUT 2005 2012 2171.787210\n", + " 7 NaN CHE 2010 2012 1544.622543\n", + " 8 NaN ITA 2006 2012 9734.062170\n", + " 9 NaN AUT 2005 2013 2171.787210\n", + " 10 NaN CHE 2010 2013 1544.622543\n", + " 11 NaN ITA 2006 2013 9761.063149\n", + " 12 NaN AUT 2005 2014 2171.787210\n", + " 13 NaN CHE 2010 2014 1619.764246\n", + " 14 NaN ITA 2006 2014 10149.397243\n", + " 15 NaN AUT 2005 2015 2309.007796\n", + " 16 NaN CHE 2010 2015 1619.764246\n", + " 17 NaN ITA 2006 2015 10286.617830\n", + " 18 NaN AUT 2005 2016 2563.887563\n", + " 19 NaN CHE 2010 2016 1679.276752\n", + " 20 NaN ITA 2006 2016 10838.487792\n", + " 21 NaN AUT 2005 2017 2563.887563\n", + " 22 NaN CHE 2010 2017 1692.845912\n", + " 23 NaN ITA 2006 2017 10851.683384\n", + " 24 NaN AUT 2005 2018 2563.887563\n", + " 25 NaN CHE 2010 2018 1693.055262\n", + " 26 NaN ITA 2006 2018 11305.889199\n", + " 27 NaN AUT 2005 2019 2563.888533\n", + " 28 NaN CHE 2010 2019 1693.055262\n", + " 29 NaN ITA 2006 2019 11370.873348\n", + " 30 NaN AUT 2005 2020 2563.888533\n", + " 31 NaN CHE 2010 2020 1693.393227\n", + " 32 NaN ITA 2006 2020 11370.873348\n", + " 33 NaN AUT 2005 2021 2670.746758\n", + " 34 NaN CHE 2010 2021 1695.054393\n", + " 35 NaN ITA 2006 2021 11478.092138\n", + " 36 NaN AUT 2005 2022 2671.126080\n", + " 37 NaN CHE 2010 2022 1695.054393\n", + " 38 NaN ITA 2006 2022 11478.092138\n", + " 39 NaN AUT 2005 2023 2671.126080\n", + " 40 NaN CHE 2010 2023 1716.924619\n", + " 41 NaN ITA 2006 2023 11478.628253\n", + " 42 NaN AUT 2005 2024 2671.126080\n", + " 43 NaN CHE 2010 2024 1716.924619\n", + " 44 NaN ITA 2006 2024 11478.628253,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 AUT 1991 2010 3085.388896\n", + " 1 1.0 CZE 2001 2010 825.420544\n", + " 2 2.0 DEU 1982 2010 12708.432936\n", + " 3 3.0 ITA 1991 2010 571.458432\n", + " 4 NaN AUT 1991 2011 3085.751979\n", + " 5 NaN CZE 2001 2011 825.420544\n", + " 6 NaN DEU 1982 2011 12708.433708\n", + " 7 NaN ITA 1991 2011 571.458432\n", + " 8 NaN AUT 1991 2012 3085.751979\n", + " 9 NaN CZE 2001 2012 825.420544\n", + " 10 NaN DEU 1982 2012 12734.064075\n", + " 11 NaN ITA 1991 2012 571.458432\n", + " 12 NaN AUT 1991 2013 3188.657660\n", + " 13 NaN CZE 2001 2013 825.420544\n", + " 14 NaN DEU 1982 2013 12741.649887\n", + " 15 NaN ITA 1991 2013 571.458432\n", + " 16 NaN AUT 1991 2014 3188.657660\n", + " 17 NaN CZE 2001 2014 825.420544\n", + " 18 NaN DEU 1982 2014 12743.584630\n", + " 19 NaN ITA 1991 2014 571.458432\n", + " 20 NaN AUT 1991 2015 3311.630345\n", + " 21 NaN CZE 2001 2015 825.420544\n", + " 22 NaN DEU 1982 2015 12819.492349\n", + " 23 NaN ITA 1991 2015 618.523402\n", + " 24 NaN AUT 1991 2016 3545.828058\n", + " 25 NaN CZE 2001 2016 825.420544\n", + " 26 NaN DEU 1982 2016 13452.511162\n", + " 27 NaN ITA 1991 2016 784.042096\n", + " 28 NaN AUT 1991 2017 3545.828116\n", + " 29 NaN CZE 2001 2017 825.420544\n", + " 30 NaN DEU 1982 2017 13458.827488\n", + " 31 NaN ITA 1991 2017 784.042096\n", + " 32 NaN AUT 1991 2018 3545.864585\n", + " 33 NaN CZE 2001 2018 825.420544\n", + " 34 NaN DEU 1982 2018 13463.351562\n", + " 35 NaN ITA 1991 2018 784.042096\n", + " 36 NaN AUT 1991 2019 3557.361084\n", + " 37 NaN CZE 2001 2019 825.420544\n", + " 38 NaN DEU 1982 2019 13564.957364\n", + " 39 NaN ITA 1991 2019 784.042096\n", + " 40 NaN AUT 1991 2020 3612.584675\n", + " 41 NaN CZE 2001 2020 825.420544\n", + " 42 NaN DEU 1982 2020 13620.180955\n", + " 43 NaN ITA 1991 2020 784.042096\n", + " 44 NaN AUT 1991 2021 3635.797919\n", + " 45 NaN CZE 2001 2021 825.420544\n", + " 46 NaN DEU 1982 2021 13620.180955\n", + " 47 NaN ITA 1991 2021 812.788388\n", + " 48 NaN AUT 1991 2022 3671.340656\n", + " 49 NaN CZE 2001 2022 825.420544\n", + " 50 NaN DEU 1982 2022 13882.734668\n", + " 51 NaN ITA 1991 2022 812.788388\n", + " 52 NaN AUT 1991 2023 3671.701190\n", + " 53 NaN CZE 2001 2023 825.420544\n", + " 54 NaN DEU 1982 2023 13882.734668\n", + " 55 NaN ITA 1991 2023 812.788388\n", + " 56 NaN AUT 1991 2024 3671.701190\n", + " 57 NaN CZE 2001 2024 825.420544\n", + " 58 NaN DEU 1982 2024 13882.734668\n", + " 59 NaN ITA 1991 2024 812.788388,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 ALB 2007 2010 12.322206\n", + " 1 1.0 AUT 2004 2010 2641.250199\n", + " 2 2.0 BEL 2007 2010 12.322206\n", + " 3 3.0 BGR 2007 2010 12.322206\n", + " 4 4.0 BIH 2007 2010 12.322206\n", + " .. ... ... ... ... ...\n", + " 250 NaN POL 2007 2024 12.322206\n", + " 251 NaN ROU 2007 2024 12.322206\n", + " 252 NaN SVK 2007 2024 12.322206\n", + " 253 NaN SVN 2004 2024 7897.699822\n", + " 254 NaN UKR 2007 2024 12.322206\n", + " \n", + " [255 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 ALB 2007 2010 54.944011\n", + " 1 1.0 AUT 1991 2010 12178.240434\n", + " 2 2.0 BEL 2007 2010 54.944011\n", + " 3 3.0 BGR 2007 2010 54.944011\n", + " 4 4.0 BIH 2007 2010 54.944011\n", + " .. ... ... ... ... ...\n", + " 250 NaN POL 2007 2024 54.944011\n", + " 251 NaN ROU 2007 2024 54.944011\n", + " 252 NaN SVK 2007 2024 54.944011\n", + " 253 NaN SVN 2007 2024 54.944011\n", + " 254 NaN UKR 2007 2024 54.944011\n", + " \n", + " [255 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 ALB 2007 2010 15.668779\n", + " 1 1.0 AUT 2007 2010 15.668779\n", + " 2 2.0 BEL 2007 2010 15.668779\n", + " 3 3.0 BGR 2007 2010 15.668779\n", + " 4 4.0 BIH 2007 2010 15.668779\n", + " .. ... ... ... ... ...\n", + " 250 NaN POL 2007 2024 15.668779\n", + " 251 NaN ROU 2007 2024 15.668779\n", + " 252 NaN SVK 2007 2024 15.668779\n", + " 253 NaN SVN 2007 2024 15.668779\n", + " 254 NaN UKR 2007 2024 15.668779\n", + " \n", + " [255 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 DEU 2010 2010 11509.371392\n", + " 1 1.0 DNK 1983 2010 0.852640\n", + " 2 NaN DEU 2010 2011 11557.573144\n", + " 3 NaN DNK 1983 2011 0.852665\n", + " 4 NaN DEU 2010 2012 11608.976882\n", + " 5 NaN DNK 1983 2012 0.852665\n", + " 6 NaN DEU 2010 2013 11675.560778\n", + " 7 NaN DNK 1983 2013 0.852665\n", + " 8 NaN DEU 2010 2014 11791.453811\n", + " 9 NaN DNK 1983 2014 0.852665\n", + " 10 NaN DEU 2010 2015 11919.385657\n", + " 11 NaN DNK 1983 2015 0.852665\n", + " 12 NaN DEU 2010 2016 12372.782335\n", + " 13 NaN DNK 1983 2016 0.852665\n", + " 14 NaN DEU 2010 2017 12627.161222\n", + " 15 NaN DNK 1983 2017 0.852665\n", + " 16 NaN DEU 2010 2018 12826.955592\n", + " 17 NaN DNK 1983 2018 0.852665\n", + " 18 NaN DEU 2010 2019 13282.487835\n", + " 19 NaN DNK 1983 2019 0.852665\n", + " 20 NaN DEU 2010 2020 13778.840605\n", + " 21 NaN DNK 1983 2020 0.852665\n", + " 22 NaN DEU 2010 2021 14713.456837\n", + " 23 NaN DNK 1983 2021 0.852665\n", + " 24 NaN DEU 2010 2022 14726.770337\n", + " 25 NaN DNK 1983 2022 0.852665\n", + " 26 NaN DEU 2010 2023 14726.770337\n", + " 27 NaN DNK 1983 2023 0.852665\n", + " 28 NaN DEU 2010 2024 14726.770337\n", + " 29 NaN DNK 1983 2024 0.852665,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CZE 1987 2010 6342.559119\n", + " 1 1.0 DEU 1987 2010 10529.149116\n", + " 2 2.0 POL 1974 2010 678.316933\n", + " 3 NaN CZE 1987 2011 6538.704405\n", + " 4 NaN DEU 1987 2011 10880.670562\n", + " 5 NaN POL 1974 2011 679.709968\n", + " 6 NaN CZE 1987 2012 6545.552859\n", + " 7 NaN DEU 1987 2012 10946.423739\n", + " 8 NaN POL 1974 2012 708.683811\n", + " 9 NaN CZE 1987 2013 6569.170467\n", + " 10 NaN DEU 1987 2013 10961.339456\n", + " 11 NaN POL 1974 2013 708.683811\n", + " 12 NaN CZE 1987 2014 6684.935558\n", + " 13 NaN DEU 1987 2014 11253.793013\n", + " 14 NaN POL 1974 2014 708.683811\n", + " 15 NaN CZE 1987 2015 6984.685423\n", + " 16 NaN DEU 1987 2015 11260.279856\n", + " 17 NaN POL 1974 2015 709.905910\n", + " 18 NaN CZE 1987 2016 7010.427850\n", + " 19 NaN DEU 1987 2016 11264.649875\n", + " 20 NaN POL 1974 2016 709.905910\n", + " 21 NaN CZE 1987 2017 7034.831786\n", + " 22 NaN DEU 1987 2017 11564.305993\n", + " 23 NaN POL 1974 2017 709.905910\n", + " 24 NaN CZE 1987 2018 7054.196920\n", + " 25 NaN DEU 1987 2018 11588.971097\n", + " 26 NaN POL 1974 2018 709.905910\n", + " 27 NaN CZE 1987 2019 7057.319148\n", + " 28 NaN DEU 1987 2019 11591.194603\n", + " 29 NaN POL 1974 2019 709.905910\n", + " 30 NaN CZE 1987 2020 7075.458167\n", + " 31 NaN DEU 1987 2020 11638.755268\n", + " 32 NaN POL 1974 2020 709.905910\n", + " 33 NaN CZE 1987 2021 7076.636477\n", + " 34 NaN DEU 1987 2021 11638.850610\n", + " 35 NaN POL 1974 2021 709.905910\n", + " 36 NaN CZE 1987 2022 7076.636643\n", + " 37 NaN DEU 1987 2022 11645.312242\n", + " 38 NaN POL 1974 2022 709.905910\n", + " 39 NaN CZE 1987 2023 7076.644587\n", + " 40 NaN DEU 1987 2023 11645.312242\n", + " 41 NaN POL 1974 2023 709.905910\n", + " 42 NaN CZE 1987 2024 7076.644587\n", + " 43 NaN DEU 1987 2024 11645.312242\n", + " 44 NaN POL 1974 2024 709.905910,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 ALB 2007 2010 13.362410\n", + " 1 1.0 AUT 2007 2010 13.362410\n", + " 2 2.0 BEL 2007 2010 13.362410\n", + " 3 3.0 BGR 2007 2010 13.362410\n", + " 4 4.0 BIH 2007 2010 13.362410\n", + " .. ... ... ... ... ...\n", + " 250 NaN POL 2007 2024 6145.687864\n", + " 251 NaN ROU 2007 2024 13.362410\n", + " 252 NaN SVK 2007 2024 13.362410\n", + " 253 NaN SVN 2007 2024 13.362410\n", + " 254 NaN UKR 2007 2024 13.362410\n", + " \n", + " [255 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 ALB 2007 2010 8.482132\n", + " 1 1.0 AUT 2004 2010 2279.556063\n", + " 2 2.0 BEL 2007 2010 8.482132\n", + " 3 3.0 BGR 2007 2010 8.482132\n", + " 4 4.0 BIH 0 2010 714.927019\n", + " .. ... ... ... ... ...\n", + " 280 NaN ROU 2007 2024 8.482132\n", + " 281 NaN SRB 1996 2024 112.872831\n", + " 282 NaN SVK 2007 2024 8.482132\n", + " 283 NaN SVN 1988 2024 5383.010109\n", + " 284 NaN UKR 2007 2024 8.482132\n", + " \n", + " [285 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 ALB 2007 2010 16.060056\n", + " 1 1.0 AUT 1981 2010 10958.726349\n", + " 2 2.0 BEL 2007 2010 16.060056\n", + " 3 3.0 BGR 2007 2010 16.060056\n", + " 4 4.0 BIH 2007 2010 16.060056\n", + " .. ... ... ... ... ...\n", + " 265 NaN POL 2007 2024 16.060056\n", + " 266 NaN ROU 2007 2024 16.060056\n", + " 267 NaN SVK 2007 2024 2319.945643\n", + " 268 NaN SVN 2007 2024 16.060056\n", + " 269 NaN UKR 2007 2024 16.060056\n", + " \n", + " [270 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 BIH 0 2010 346.351373\n", + " 1 1.0 HRV 1975 2010 1473.609809\n", + " 2 2.0 HUN 1944 2010 4430.699668\n", + " 3 3.0 SRB 0 2010 1471.860563\n", + " 4 4.0 SVK 2010 2010 31.632598\n", + " .. ... ... ... ... ...\n", + " 84 NaN HRV 1975 2024 3195.253893\n", + " 85 NaN HUN 1944 2024 5189.916912\n", + " 86 NaN SRB 0 2024 2496.548545\n", + " 87 NaN SVK 2010 2024 31.632598\n", + " 88 NaN SVN None 2024 389.102087\n", + " \n", + " [89 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 AUT 2003 2010 254.677126\n", + " 1 1.0 CZE 1984 2010 2911.029743\n", + " 2 2.0 HUN 1975 2010 4143.409924\n", + " 3 3.0 POL 1984 2010 2930.309099\n", + " 4 4.0 SVK 2010 2010 13074.438241\n", + " .. ... ... ... ... ...\n", + " 70 NaN AUT 2003 2024 254.677126\n", + " 71 NaN CZE 1984 2024 2980.515682\n", + " 72 NaN HUN 1975 2024 4149.128661\n", + " 73 NaN POL 1984 2024 2930.338248\n", + " 74 NaN SVK 2010 2024 13224.641383\n", + " \n", + " [75 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 ALB 2007 2010 22.942845\n", + " 1 1.0 AUT 2007 2010 22.942845\n", + " 2 2.0 BEL 2007 2010 22.942845\n", + " 3 3.0 BGR 2007 2010 22.942845\n", + " 4 4.0 BIH 2007 2010 22.942845\n", + " .. ... ... ... ... ...\n", + " 265 NaN ROU 2007 2024 22.942845\n", + " 266 NaN RUS 2004 2024 468.543903\n", + " 267 NaN SVK 2007 2024 473.315637\n", + " 268 NaN SVN 2007 2024 22.942845\n", + " 269 NaN UKR 2007 2024 22.942845\n", + " \n", + " [270 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 DEU 2010 2010 191.288531\n", + " 1 1.0 DNK 1983 2010 1897.771941\n", + " 2 2.0 SWE 1995 2010 14.343676\n", + " 3 NaN DEU 2010 2011 191.360161\n", + " 4 NaN DNK 1983 2011 1977.220918\n", + " 5 NaN SWE 1995 2011 14.343676\n", + " 6 NaN DEU 2010 2012 199.661054\n", + " 7 NaN DNK 1983 2012 1977.315064\n", + " 8 NaN SWE 1995 2012 14.353056\n", + " 9 NaN DEU 2010 2013 199.661054\n", + " 10 NaN DNK 1983 2013 1978.389298\n", + " 11 NaN SWE 1995 2013 14.353056\n", + " 12 NaN DEU 2010 2014 199.661054\n", + " 13 NaN DNK 1983 2014 1978.578355\n", + " 14 NaN SWE 1995 2014 14.421559\n", + " 15 NaN DEU 2010 2015 199.661054\n", + " 16 NaN DNK 1983 2015 1978.734615\n", + " 17 NaN SWE 1995 2015 14.421559\n", + " 18 NaN DEU 2010 2016 199.661054\n", + " 19 NaN DNK 1983 2016 1982.396895\n", + " 20 NaN SWE 1995 2016 14.421559\n", + " 21 NaN DEU 2010 2017 199.661054\n", + " 22 NaN DNK 1983 2017 1982.408721\n", + " 23 NaN SWE 1995 2017 14.421559\n", + " 24 NaN DEU 2010 2018 199.661054\n", + " 25 NaN DNK 1983 2018 1982.408721\n", + " 26 NaN SWE 1995 2018 14.421559\n", + " 27 NaN DEU 2010 2019 199.661054\n", + " 28 NaN DNK 1983 2019 1982.408721\n", + " 29 NaN SWE 1995 2019 14.421559\n", + " 30 NaN DEU 2010 2020 199.661054\n", + " 31 NaN DNK 1983 2020 1982.408721\n", + " 32 NaN SWE 1995 2020 14.547305\n", + " 33 NaN DEU 2010 2021 200.810248\n", + " 34 NaN DNK 1983 2021 1984.888191\n", + " 35 NaN SWE 1995 2021 14.547305\n", + " 36 NaN DEU 2010 2022 200.810248\n", + " 37 NaN DNK 1983 2022 1984.977129\n", + " 38 NaN SWE 1995 2022 14.547305\n", + " 39 NaN DEU 2010 2023 200.810248\n", + " 40 NaN DNK 1983 2023 1985.607290\n", + " 41 NaN SWE 1995 2023 14.547305\n", + " 42 NaN DEU 2010 2024 200.810248\n", + " 43 NaN DNK 1983 2024 1985.607290\n", + " 44 NaN SWE 1995 2024 14.547305,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 DNK 1992 2010 334.934609\n", + " 1 1.0 NOR 2009 2010 177.364305\n", + " 2 2.0 SWE 1967 2010 1028.766657\n", + " 3 NaN DNK 1992 2011 356.678847\n", + " 4 NaN NOR 2009 2011 177.364223\n", + " 5 NaN SWE 1967 2011 1031.954207\n", + " 6 NaN DNK 1992 2012 356.678847\n", + " 7 NaN NOR 2009 2012 177.425514\n", + " 8 NaN SWE 1967 2012 1042.698163\n", + " 9 NaN DNK 1992 2013 356.739793\n", + " 10 NaN NOR 2009 2013 189.858276\n", + " 11 NaN SWE 1967 2013 1049.951018\n", + " 12 NaN DNK 1992 2014 356.739793\n", + " 13 NaN NOR 2009 2014 208.296195\n", + " 14 NaN SWE 1967 2014 1058.897086\n", + " 15 NaN DNK 1992 2015 356.739793\n", + " 16 NaN NOR 2009 2015 210.203197\n", + " 17 NaN SWE 1967 2015 1064.880285\n", + " 18 NaN DNK 1992 2016 356.739793\n", + " 19 NaN NOR 2009 2016 213.357727\n", + " 20 NaN SWE 1967 2016 1079.718410\n", + " 21 NaN DNK 1992 2017 356.739793\n", + " 22 NaN NOR 2009 2017 219.386352\n", + " 23 NaN SWE 1967 2017 1100.500059\n", + " 24 NaN DNK 1992 2018 356.739793\n", + " 25 NaN NOR 2009 2018 221.983977\n", + " 26 NaN SWE 1967 2018 1104.788897\n", + " 27 NaN DNK 1992 2019 356.739793\n", + " 28 NaN NOR 2009 2019 224.393818\n", + " 29 NaN SWE 1967 2019 1112.430587\n", + " 30 NaN DNK 1992 2020 356.739793\n", + " 31 NaN NOR 2009 2020 243.525146\n", + " 32 NaN SWE 1967 2020 1116.979046\n", + " 33 NaN DNK 1992 2021 356.739793\n", + " 34 NaN NOR 2009 2021 243.525146\n", + " 35 NaN SWE 1967 2021 1118.643223\n", + " 36 NaN DNK 1992 2022 357.192338\n", + " 37 NaN NOR 2009 2022 244.218439\n", + " 38 NaN SWE 1967 2022 1128.862062\n", + " 39 NaN DNK 1992 2023 357.199286\n", + " 40 NaN NOR 2009 2023 246.133570\n", + " 41 NaN SWE 1967 2023 1138.298427\n", + " 42 NaN DNK 1992 2024 357.199286\n", + " 43 NaN NOR 2009 2024 246.133570\n", + " 44 NaN SWE 1967 2024 1138.298427,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 DEU 1966 2010 17.021505\n", + " 1 1.0 DNK 1992 2010 136.221311\n", + " 2 2.0 SWE 1997 2010 1289.726877\n", + " 3 NaN DEU 1966 2011 17.021505\n", + " 4 NaN DNK 1992 2011 150.899142\n", + " 5 NaN SWE 1997 2011 1299.213732\n", + " 6 NaN DEU 1966 2012 17.021505\n", + " 7 NaN DNK 1992 2012 151.781124\n", + " 8 NaN SWE 1997 2012 1324.257534\n", + " 9 NaN DEU 1966 2013 17.021505\n", + " 10 NaN DNK 1992 2013 151.836258\n", + " 11 NaN SWE 1997 2013 1338.476301\n", + " 12 NaN DEU 1966 2014 17.021505\n", + " 13 NaN DNK 1992 2014 151.836258\n", + " 14 NaN SWE 1997 2014 1349.539440\n", + " 15 NaN DEU 1966 2015 17.021505\n", + " 16 NaN DNK 1992 2015 151.836258\n", + " 17 NaN SWE 1997 2015 1367.703542\n", + " 18 NaN DEU 1966 2016 17.208416\n", + " 19 NaN DNK 1992 2016 151.836258\n", + " 20 NaN SWE 1997 2016 1379.943139\n", + " 21 NaN DEU 1966 2017 17.208416\n", + " 22 NaN DNK 1992 2017 151.836258\n", + " 23 NaN SWE 1997 2017 1383.593454\n", + " 24 NaN DEU 1966 2018 17.208416\n", + " 25 NaN DNK 1992 2018 151.836258\n", + " 26 NaN SWE 1997 2018 1391.681114\n", + " 27 NaN DEU 1966 2019 17.208416\n", + " 28 NaN DNK 1992 2019 151.836258\n", + " 29 NaN SWE 1997 2019 1402.193372\n", + " 30 NaN DEU 1966 2020 17.208416\n", + " 31 NaN DNK 1992 2020 151.836258\n", + " 32 NaN SWE 1997 2020 1408.959050\n", + " 33 NaN DEU 1966 2021 17.208416\n", + " 34 NaN DNK 1992 2021 151.836258\n", + " 35 NaN SWE 1997 2021 1423.652355\n", + " 36 NaN DEU 1966 2022 17.208416\n", + " 37 NaN DNK 1992 2022 151.836258\n", + " 38 NaN SWE 1997 2022 1432.066900\n", + " 39 NaN DEU 1966 2023 17.208416\n", + " 40 NaN DNK 1992 2023 151.836258\n", + " 41 NaN SWE 1997 2023 1437.217578\n", + " 42 NaN DEU 1966 2024 17.208416\n", + " 43 NaN DNK 1992 2024 151.836258\n", + " 44 NaN SWE 1997 2024 1437.217578,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 SWE 1998 2010 3844.202065\n", + " 1 NaN SWE 1998 2011 3851.396131\n", + " 2 NaN SWE 1998 2012 3855.699415\n", + " 3 NaN SWE 1998 2013 3880.363274\n", + " 4 NaN SWE 1998 2014 3893.480643\n", + " 5 NaN SWE 1998 2015 3902.717954\n", + " 6 NaN SWE 1998 2016 3923.021181\n", + " 7 NaN SWE 1998 2017 3931.256468\n", + " 8 NaN SWE 1998 2018 3949.051192\n", + " 9 NaN SWE 1998 2019 3966.881536\n", + " 10 NaN SWE 1998 2020 3973.821772\n", + " 11 NaN SWE 1998 2021 3983.996898\n", + " 12 NaN SWE 1998 2022 3992.164688\n", + " 13 NaN SWE 1998 2023 3995.490057\n", + " 14 NaN SWE 1998 2024 3995.490057,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 NOR 2001 2010 12430.355582\n", + " 1 1.0 SWE 1999 2010 12759.830537\n", + " 2 NaN NOR 2001 2011 12961.179023\n", + " 3 NaN SWE 1999 2011 12792.420518\n", + " 4 NaN NOR 2001 2012 13047.137527\n", + " 5 NaN SWE 1999 2012 12920.272065\n", + " 6 NaN NOR 2001 2013 13141.370341\n", + " 7 NaN SWE 1999 2013 12971.221186\n", + " 8 NaN NOR 2001 2014 13258.261123\n", + " 9 NaN SWE 1999 2014 12999.413955\n", + " 10 NaN NOR 2001 2015 13405.272489\n", + " 11 NaN SWE 1999 2015 13064.890907\n", + " 12 NaN NOR 2001 2016 13455.802415\n", + " 13 NaN SWE 1999 2016 13104.373761\n", + " 14 NaN NOR 2001 2017 13626.792139\n", + " 15 NaN SWE 1999 2017 13219.273629\n", + " 16 NaN NOR 2001 2018 13778.927744\n", + " 17 NaN SWE 1999 2018 13301.117766\n", + " 18 NaN NOR 2001 2019 13856.747407\n", + " 19 NaN SWE 1999 2019 13369.014048\n", + " 20 NaN NOR 2001 2020 13903.668108\n", + " 21 NaN SWE 1999 2020 13432.530241\n", + " 22 NaN NOR 2001 2021 14005.376845\n", + " 23 NaN SWE 1999 2021 13481.938053\n", + " 24 NaN NOR 2001 2022 14035.297027\n", + " 25 NaN SWE 1999 2022 13547.625604\n", + " 26 NaN NOR 2001 2023 14136.824921\n", + " 27 NaN SWE 1999 2023 13582.215637\n", + " 28 NaN NOR 2001 2024 14136.824921\n", + " 29 NaN SWE 1999 2024 13582.215637,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 DNK 1936 2010 26.729709\n", + " 1 1.0 POL 2009 2010 247.203775\n", + " 2 2.0 SWE 2002 2010 641.705331\n", + " 3 NaN DNK 1936 2011 28.186057\n", + " 4 NaN POL 2009 2011 247.203775\n", + " 5 NaN SWE 2002 2011 642.774306\n", + " 6 NaN DNK 1936 2012 28.186057\n", + " 7 NaN POL 2009 2012 247.274812\n", + " 8 NaN SWE 2002 2012 643.992006\n", + " 9 NaN DNK 1936 2013 28.186057\n", + " 10 NaN POL 2009 2013 247.332149\n", + " 11 NaN SWE 2002 2013 649.785063\n", + " 12 NaN DNK 1936 2014 28.186057\n", + " 13 NaN POL 2009 2014 247.332149\n", + " 14 NaN SWE 2002 2014 650.853977\n", + " 15 NaN DNK 1936 2015 28.186057\n", + " 16 NaN POL 2009 2015 247.332149\n", + " 17 NaN SWE 2002 2015 651.614982\n", + " 18 NaN DNK 1936 2016 28.186057\n", + " 19 NaN POL 2009 2016 247.332149\n", + " 20 NaN SWE 2002 2016 654.188039\n", + " 21 NaN DNK 1936 2017 28.186057\n", + " 22 NaN POL 2009 2017 247.332149\n", + " 23 NaN SWE 2002 2017 655.923260\n", + " 24 NaN DNK 1936 2018 28.186057\n", + " 25 NaN POL 2009 2018 247.332149\n", + " 26 NaN SWE 2002 2018 657.740158\n", + " 27 NaN DNK 1936 2019 28.186057\n", + " 28 NaN POL 2009 2019 247.332149\n", + " 29 NaN SWE 2002 2019 660.173788\n", + " 30 NaN DNK 1936 2020 28.186057\n", + " 31 NaN POL 2009 2020 247.332149\n", + " 32 NaN SWE 2002 2020 661.122950\n", + " 33 NaN DNK 1936 2021 28.186057\n", + " 34 NaN POL 2009 2021 247.332149\n", + " 35 NaN SWE 2002 2021 665.966119\n", + " 36 NaN DNK 1936 2022 28.186057\n", + " 37 NaN POL 2009 2022 247.332149\n", + " 38 NaN SWE 2002 2022 666.720397\n", + " 39 NaN DNK 1936 2023 28.186057\n", + " 40 NaN POL 2009 2023 247.332149\n", + " 41 NaN SWE 2002 2023 669.013673\n", + " 42 NaN DNK 1936 2024 28.186057\n", + " 43 NaN POL 2009 2024 247.332149\n", + " 44 NaN SWE 2002 2024 669.013673,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 SWE 2000 2010 1234.359602\n", + " 1 NaN SWE 2000 2011 1245.334407\n", + " 2 NaN SWE 2000 2012 1255.966799\n", + " 3 NaN SWE 2000 2013 1269.199104\n", + " 4 NaN SWE 2000 2014 1289.240371\n", + " 5 NaN SWE 2000 2015 1295.669330\n", + " 6 NaN SWE 2000 2016 1307.603627\n", + " 7 NaN SWE 2000 2017 1322.662538\n", + " 8 NaN SWE 2000 2018 1350.252615\n", + " 9 NaN SWE 2000 2019 1366.634807\n", + " 10 NaN SWE 2000 2020 1371.751033\n", + " 11 NaN SWE 2000 2021 1378.864989\n", + " 12 NaN SWE 2000 2022 1387.268637\n", + " 13 NaN SWE 2000 2023 1400.401814\n", + " 14 NaN SWE 2000 2024 1400.401814,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 POL 2009 2010 595.674934\n", + " 1 1.0 RUS 1994 2010 5.323643\n", + " 2 2.0 SWE 2004 2010 25.147410\n", + " 3 NaN POL 2009 2011 595.674934\n", + " 4 NaN RUS 1994 2011 5.323643\n", + " 5 NaN SWE 2004 2011 25.147410\n", + " 6 NaN POL 2009 2012 603.701411\n", + " 7 NaN RUS 1994 2012 5.323643\n", + " 8 NaN SWE 2004 2012 25.509538\n", + " 9 NaN POL 2009 2013 603.701411\n", + " 10 NaN RUS 1994 2013 5.323643\n", + " 11 NaN SWE 2004 2013 25.509538\n", + " 12 NaN POL 2009 2014 603.701411\n", + " 13 NaN RUS 1994 2014 5.323643\n", + " 14 NaN SWE 2004 2014 25.510260\n", + " 15 NaN POL 2009 2015 603.701411\n", + " 16 NaN RUS 1994 2015 5.323643\n", + " 17 NaN SWE 2004 2015 25.510260\n", + " 18 NaN POL 2009 2016 603.701411\n", + " 19 NaN RUS 1994 2016 5.323643\n", + " 20 NaN SWE 2004 2016 25.510260\n", + " 21 NaN POL 2009 2017 603.701411\n", + " 22 NaN RUS 1994 2017 5.323643\n", + " 23 NaN SWE 2004 2017 25.510260\n", + " 24 NaN POL 2009 2018 603.701411\n", + " 25 NaN RUS 1994 2018 5.323643\n", + " 26 NaN SWE 2004 2018 25.510260\n", + " 27 NaN POL 2009 2019 603.701411\n", + " 28 NaN RUS 1994 2019 5.323643\n", + " 29 NaN SWE 2004 2019 25.510260\n", + " 30 NaN POL 2009 2020 603.701411\n", + " 31 NaN RUS 1994 2020 5.323643\n", + " 32 NaN SWE 2004 2020 25.510260\n", + " 33 NaN POL 2009 2021 648.343139\n", + " 34 NaN RUS 1994 2021 5.323643\n", + " 35 NaN SWE 2004 2021 25.510260\n", + " 36 NaN POL 2009 2022 648.343139\n", + " 37 NaN RUS 1994 2022 5.323643\n", + " 38 NaN SWE 2004 2022 25.549015\n", + " 39 NaN POL 2009 2023 648.343139\n", + " 40 NaN RUS 1994 2023 5.323643\n", + " 41 NaN SWE 2004 2023 25.549015\n", + " 42 NaN POL 2009 2024 648.343139\n", + " 43 NaN RUS 1994 2024 5.323643\n", + " 44 NaN SWE 2004 2024 25.549015,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 SWE 1999 2010 863.990019\n", + " 1 NaN SWE 1999 2011 871.670845\n", + " 2 NaN SWE 1999 2012 877.776291\n", + " 3 NaN SWE 1999 2013 884.230582\n", + " 4 NaN SWE 1999 2014 892.779835\n", + " 5 NaN SWE 1999 2015 966.382379\n", + " 6 NaN SWE 1999 2016 972.089474\n", + " 7 NaN SWE 1999 2017 980.931947\n", + " 8 NaN SWE 1999 2018 999.631998\n", + " 9 NaN SWE 1999 2019 1003.892670\n", + " 10 NaN SWE 1999 2020 1008.393753\n", + " 11 NaN SWE 1999 2021 1013.749501\n", + " 12 NaN SWE 1999 2022 1019.763906\n", + " 13 NaN SWE 1999 2023 1025.376959\n", + " 14 NaN SWE 1999 2024 1025.376959,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 SWE 1998 2010 1491.320073\n", + " 1 NaN SWE 1998 2011 1502.760525\n", + " 2 NaN SWE 1998 2012 1529.744134\n", + " 3 NaN SWE 1998 2013 1556.198756\n", + " 4 NaN SWE 1998 2014 1567.891214\n", + " 5 NaN SWE 1998 2015 1585.797163\n", + " 6 NaN SWE 1998 2016 1675.590357\n", + " 7 NaN SWE 1998 2017 1693.028322\n", + " 8 NaN SWE 1998 2018 1716.019812\n", + " 9 NaN SWE 1998 2019 1773.443138\n", + " 10 NaN SWE 1998 2020 1786.341549\n", + " 11 NaN SWE 1998 2021 1792.611929\n", + " 12 NaN SWE 1998 2022 1805.151460\n", + " 13 NaN SWE 1998 2023 1822.259444\n", + " 14 NaN SWE 1998 2024 1822.259444,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 NOR 1997 2010 12.981370\n", + " 1 1.0 SWE 2005 2010 980.830601\n", + " 2 NaN NOR 1997 2011 12.981370\n", + " 3 NaN SWE 2005 2011 985.490479\n", + " 4 NaN NOR 1997 2012 12.981370\n", + " 5 NaN SWE 2005 2012 987.130471\n", + " 6 NaN NOR 1997 2013 12.981370\n", + " 7 NaN SWE 2005 2013 994.679868\n", + " 8 NaN NOR 1997 2014 12.981370\n", + " 9 NaN SWE 2005 2014 1014.709157\n", + " 10 NaN NOR 1997 2015 12.981370\n", + " 11 NaN SWE 2005 2015 1056.959525\n", + " 12 NaN NOR 1997 2016 12.981370\n", + " 13 NaN SWE 2005 2016 1070.501668\n", + " 14 NaN NOR 1997 2017 12.981370\n", + " 15 NaN SWE 2005 2017 1106.403065\n", + " 16 NaN NOR 1997 2018 12.981370\n", + " 17 NaN SWE 2005 2018 1121.835049\n", + " 18 NaN NOR 1997 2019 12.981370\n", + " 19 NaN SWE 2005 2019 1148.514604\n", + " 20 NaN NOR 1997 2020 12.981370\n", + " 21 NaN SWE 2005 2020 1155.929135\n", + " 22 NaN NOR 1997 2021 12.981370\n", + " 23 NaN SWE 2005 2021 1161.362815\n", + " 24 NaN NOR 1997 2022 12.981370\n", + " 25 NaN SWE 2005 2022 1179.933342\n", + " 26 NaN NOR 1997 2023 12.981370\n", + " 27 NaN SWE 2005 2023 1189.640446\n", + " 28 NaN NOR 1997 2024 12.981370\n", + " 29 NaN SWE 2005 2024 1189.640446,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 FIN 1998 2010 3.953479\n", + " 1 1.0 SWE 2010 2010 297.668813\n", + " 2 NaN FIN 1998 2011 3.953479\n", + " 3 NaN SWE 2010 2011 300.486754\n", + " 4 NaN FIN 1998 2012 3.953479\n", + " 5 NaN SWE 2010 2012 316.235596\n", + " 6 NaN FIN 1998 2013 3.953479\n", + " 7 NaN SWE 2010 2013 323.367196\n", + " 8 NaN FIN 1998 2014 3.953479\n", + " 9 NaN SWE 2010 2014 328.093556\n", + " 10 NaN FIN 1998 2015 5.816958\n", + " 11 NaN SWE 2010 2015 333.464256\n", + " 12 NaN FIN 1998 2016 5.816958\n", + " 13 NaN SWE 2010 2016 337.107278\n", + " 14 NaN FIN 1998 2017 5.816958\n", + " 15 NaN SWE 2010 2017 342.347540\n", + " 16 NaN FIN 1998 2018 6.292789\n", + " 17 NaN SWE 2010 2018 356.680113\n", + " 18 NaN FIN 1998 2019 6.292789\n", + " 19 NaN SWE 2010 2019 367.614525\n", + " 20 NaN FIN 1998 2020 6.292789\n", + " 21 NaN SWE 2010 2020 371.770743\n", + " 22 NaN FIN 1998 2021 6.292789\n", + " 23 NaN SWE 2010 2021 375.968359\n", + " 24 NaN FIN 1998 2022 6.478374\n", + " 25 NaN SWE 2010 2022 381.615302\n", + " 26 NaN FIN 1998 2023 6.478374\n", + " 27 NaN SWE 2010 2023 384.840507\n", + " 28 NaN FIN 1998 2024 6.479216\n", + " 29 NaN SWE 2010 2024 384.840507,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 FIN 2000 2010 688.547845\n", + " 1 1.0 SWE 2000 2010 1827.454070\n", + " 2 NaN FIN 2000 2011 688.547845\n", + " 3 NaN SWE 2000 2011 1830.083464\n", + " 4 NaN FIN 2000 2012 688.547845\n", + " 5 NaN SWE 2000 2012 1834.740262\n", + " 6 NaN FIN 2000 2013 688.547845\n", + " 7 NaN NOR None 2013 127.243390\n", + " 8 NaN SWE 2000 2013 1836.190954\n", + " 9 NaN FIN 2000 2014 688.547845\n", + " 10 NaN NOR None 2014 127.243390\n", + " 11 NaN SWE 2000 2014 1841.403356\n", + " 12 NaN FIN 2000 2015 688.547845\n", + " 13 NaN NOR None 2015 127.243390\n", + " 14 NaN SWE 2000 2015 1847.939853\n", + " 15 NaN FIN 2000 2016 688.547845\n", + " 16 NaN NOR None 2016 127.243390\n", + " 17 NaN SWE 2000 2016 1862.586046\n", + " 18 NaN FIN 2000 2017 688.547845\n", + " 19 NaN NOR None 2017 127.243390\n", + " 20 NaN SWE 2000 2017 1900.079234\n", + " 21 NaN FIN 2000 2018 688.547845\n", + " 22 NaN NOR None 2018 127.243390\n", + " 23 NaN SWE 2000 2018 1915.357958\n", + " 24 NaN FIN 2000 2019 688.547845\n", + " 25 NaN NOR None 2019 127.243390\n", + " 26 NaN SWE 2000 2019 1937.629840\n", + " 27 NaN FIN 2000 2020 688.547845\n", + " 28 NaN NOR None 2020 127.243390\n", + " 29 NaN SWE 2000 2020 1940.116740\n", + " 30 NaN FIN 2000 2021 688.547845\n", + " 31 NaN NOR None 2021 127.243390\n", + " 32 NaN SWE 2000 2021 1954.573582\n", + " 33 NaN FIN 2000 2022 688.547845\n", + " 34 NaN NOR None 2022 127.243390\n", + " 35 NaN SWE 2000 2022 1956.911922\n", + " 36 NaN FIN 2000 2023 688.547845\n", + " 37 NaN NOR None 2023 127.243390\n", + " 38 NaN SWE 2000 2023 1961.311488\n", + " 39 NaN FIN 2000 2024 688.547845\n", + " 40 NaN NOR None 2024 127.243390\n", + " 41 NaN SWE 2000 2024 1961.311488,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 FIN 2009 2010 594.514712\n", + " 1 1.0 NOR 2007 2010 26475.129588\n", + " 2 2.0 SWE 1993 2010 37478.635557\n", + " 3 NaN FIN 2009 2011 594.514712\n", + " 4 NaN NOR 2007 2011 26704.852178\n", + " 5 NaN SWE 1993 2011 37511.444962\n", + " 6 NaN FIN 2009 2012 594.514712\n", + " 7 NaN NOR 2007 2012 26892.462064\n", + " 8 NaN SWE 1993 2012 37548.953036\n", + " 9 NaN FIN 2009 2013 594.514712\n", + " 10 NaN NOR 2007 2013 27353.691773\n", + " 11 NaN SWE 1993 2013 37579.385383\n", + " 12 NaN FIN 2009 2014 594.514712\n", + " 13 NaN NOR 2007 2014 27354.685074\n", + " 14 NaN SWE 1993 2014 37755.892833\n", + " 15 NaN FIN 2009 2015 594.514712\n", + " 16 NaN NOR 2007 2015 27356.951444\n", + " 17 NaN SWE 1993 2015 37758.108024\n", + " 18 NaN FIN 2009 2016 594.514712\n", + " 19 NaN NOR 2007 2016 27360.427403\n", + " 20 NaN SWE 1993 2016 37797.877548\n", + " 21 NaN FIN 2009 2017 594.514712\n", + " 22 NaN NOR 2007 2017 27819.813906\n", + " 23 NaN SWE 1993 2017 38184.717736\n", + " 24 NaN FIN 2009 2018 594.514712\n", + " 25 NaN NOR 2007 2018 27967.951394\n", + " 26 NaN SWE 1993 2018 38486.907940\n", + " 27 NaN FIN 2009 2019 594.514712\n", + " 28 NaN NOR 2007 2019 28010.872664\n", + " 29 NaN SWE 1993 2019 38564.038318\n", + " 30 NaN FIN 2009 2020 594.514712\n", + " 31 NaN NOR 2007 2020 28154.929119\n", + " 32 NaN SWE 1993 2020 38572.296390\n", + " 33 NaN FIN 2009 2021 594.514712\n", + " 34 NaN NOR 2007 2021 28200.742277\n", + " 35 NaN SWE 1993 2021 38579.875363\n", + " 36 NaN FIN 2009 2022 594.514712\n", + " 37 NaN NOR 2007 2022 28239.871731\n", + " 38 NaN SWE 1993 2022 38601.553770\n", + " 39 NaN FIN 2009 2023 594.514712\n", + " 40 NaN NOR 2007 2023 28327.232387\n", + " 41 NaN SWE 1993 2023 38692.053721\n", + " 42 NaN FIN 2009 2024 594.514712\n", + " 43 NaN NOR 2007 2024 28327.232387\n", + " 44 NaN SWE 1993 2024 38692.053721,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 NOR 1985 2010 3942.929444\n", + " 1 NaN NOR 1985 2011 3942.929444\n", + " 2 NaN NOR 1985 2012 3942.929444\n", + " 3 NaN NOR 1985 2013 3942.929444\n", + " 4 NaN NOR 1985 2014 17025.858063\n", + " 5 NaN NOR 1985 2015 17025.858063\n", + " 6 NaN NOR 1985 2016 17025.858063\n", + " 7 NaN NOR 1985 2017 17025.858063\n", + " 8 NaN NOR 1985 2018 17025.858063\n", + " 9 NaN NOR 1985 2019 17031.223937\n", + " 10 NaN NOR 1985 2020 17031.223937\n", + " 11 NaN NOR 1985 2021 20108.588267\n", + " 12 NaN NOR 1985 2022 20108.588267\n", + " 13 NaN NOR 1985 2023 20108.588267\n", + " 14 NaN NOR 1985 2024 20108.588267,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 BWA 1931 2010 14725.286220\n", + " 1 1.0 LSO 1997 2010 9879.824311\n", + " 2 2.0 NAM 1931 2010 14483.858306\n", + " 3 3.0 ZAF 1999 2010 47561.604745\n", + " 4 NaN BWA 1931 2011 14725.286220\n", + " 5 NaN LSO 1997 2011 10135.602753\n", + " 6 NaN NAM 1931 2011 14483.858306\n", + " 7 NaN ZAF 1999 2011 47561.604745\n", + " 8 NaN BWA 1931 2012 14725.286220\n", + " 9 NaN LSO 1997 2012 10135.602753\n", + " 10 NaN NAM 1931 2012 14483.858306\n", + " 11 NaN ZAF 1999 2012 47871.953471\n", + " 12 NaN BWA 1931 2013 14725.286220\n", + " 13 NaN LSO 1997 2013 10135.602753\n", + " 14 NaN NAM 1931 2013 14483.858306\n", + " 15 NaN ZAF 1999 2013 48058.200525\n", + " 16 NaN BWA 1931 2014 14725.286220\n", + " 17 NaN LSO 1997 2014 10135.602753\n", + " 18 NaN NAM 1931 2014 14483.858306\n", + " 19 NaN ZAF 1999 2014 48692.520904\n", + " 20 NaN BWA 1931 2015 14725.286220\n", + " 21 NaN LSO 1997 2015 10135.602753\n", + " 22 NaN NAM 1931 2015 14483.858306\n", + " 23 NaN ZAF 1999 2015 49129.595726\n", + " 24 NaN BWA 1931 2016 14725.286220\n", + " 25 NaN LSO 1997 2016 10135.602753\n", + " 26 NaN NAM 1931 2016 14483.858306\n", + " 27 NaN ZAF 1999 2016 58402.360431\n", + " 28 NaN BWA 1931 2017 14725.286220\n", + " 29 NaN LSO 1997 2017 10135.602753\n", + " 30 NaN NAM 1931 2017 14483.858306\n", + " 31 NaN ZAF 1999 2017 58499.911806\n", + " 32 NaN BWA 1931 2018 14725.286220\n", + " 33 NaN LSO 1997 2018 10135.602753\n", + " 34 NaN NAM 1931 2018 14483.858306\n", + " 35 NaN ZAF 1999 2018 60270.654903\n", + " 36 NaN BWA 1931 2019 14725.286220\n", + " 37 NaN LSO 1997 2019 10135.602753\n", + " 38 NaN NAM 1931 2019 14483.858306\n", + " 39 NaN ZAF 1999 2019 60435.076995\n", + " 40 NaN BWA 1931 2020 14725.286220\n", + " 41 NaN LSO 1997 2020 10135.602753\n", + " 42 NaN NAM 1931 2020 14483.858306\n", + " 43 NaN ZAF 1999 2020 61867.460818\n", + " 44 NaN BWA 1931 2021 14725.286220\n", + " 45 NaN LSO 1997 2021 10135.602753\n", + " 46 NaN NAM 1931 2021 14483.858306\n", + " 47 NaN ZAF 1999 2021 62062.478764\n", + " 48 NaN BWA 1931 2022 14725.286220\n", + " 49 NaN LSO 1997 2022 10135.602753\n", + " 50 NaN NAM 1931 2022 14483.858306\n", + " 51 NaN ZAF 1999 2022 62333.072761\n", + " 52 NaN BWA 1931 2023 14725.286220\n", + " 53 NaN LSO 1997 2023 10135.602753\n", + " 54 NaN NAM 1931 2023 14483.858306\n", + " 55 NaN ZAF 1999 2023 62487.643368\n", + " 56 NaN BWA 1931 2024 14725.286220\n", + " 57 NaN LSO 1997 2024 10135.602753\n", + " 58 NaN NAM 1931 2024 14483.858306\n", + " 59 NaN ZAF 1999 2024 62492.337598,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 AGO 2007 2010 37235.704817\n", + " 1 1.0 BWA 1965 2010 221507.812007\n", + " 2 2.0 NAM 1931 2010 166474.964547\n", + " 3 3.0 ZAF 1957 2010 32484.751350\n", + " 4 4.0 ZMB 2002 2010 85491.257314\n", + " .. ... ... ... ... ...\n", + " 85 NaN BWA 1965 2024 231423.323800\n", + " 86 NaN NAM 1931 2024 200310.889666\n", + " 87 NaN ZAF 1957 2024 34036.831081\n", + " 88 NaN ZMB 2002 2024 132717.037532\n", + " 89 NaN ZWE 1949 2024 84917.587774\n", + " \n", + " [90 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 AGO 1971 2010 31739.084658\n", + " 1 1.0 COD 1964 2010 57555.947577\n", + " 2 2.0 MOZ 1980 2010 1440.051150\n", + " 3 3.0 NAM 1971 2010 17172.255126\n", + " 4 4.0 TZA 1980 2010 1327.919258\n", + " .. ... ... ... ... ...\n", + " 100 NaN MOZ 1980 2024 1440.051150\n", + " 101 NaN NAM 1971 2024 17172.255126\n", + " 102 NaN TZA 1980 2024 1327.919258\n", + " 103 NaN ZMB 1971 2024 130364.463969\n", + " 104 NaN ZWE 1983 2024 2815.093860\n", + " \n", + " [105 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 BDI 1954 2010 1670.210014\n", + " 1 1.0 CAF 1974 2010 19989.692116\n", + " 2 2.0 COD 1905 2010 180585.187100\n", + " 3 3.0 RWA 1934 2010 9722.025698\n", + " 4 4.0 SSD 1951 2010 30145.470619\n", + " .. ... ... ... ... ...\n", + " 100 NaN COD 1905 2024 190527.406548\n", + " 101 NaN RWA 1934 2024 9759.595349\n", + " 102 NaN SSD 1951 2024 30145.470619\n", + " 103 NaN TZA 0 2024 345.654364\n", + " 104 NaN UGA 1946 2024 11257.434258\n", + " \n", + " [105 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CAF 1974 2010 104646.982408\n", + " 1 1.0 COD 1951 2010 24575.228815\n", + " 2 2.0 SDN 1986 2010 6928.049389\n", + " 3 3.0 SSD 1951 2010 63512.136350\n", + " 4 4.0 TCD 1933 2010 43131.917560\n", + " .. ... ... ... ... ...\n", + " 70 NaN CAF 1974 2024 124442.647274\n", + " 71 NaN COD 1951 2024 24575.228815\n", + " 72 NaN SDN 1986 2024 6928.049389\n", + " 73 NaN SSD 1951 2024 63512.136350\n", + " 74 NaN TCD 1933 2024 43131.917560\n", + " \n", + " [75 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 EGY 2007 2010 43530.319570\n", + " 1 1.0 LBY 2007 2010 43530.319570\n", + " 2 2.0 SDN 2007 2010 43530.319570\n", + " 3 3.0 TCD 1969 2010 15449.535198\n", + " 4 NaN EGY 2007 2011 43530.319570\n", + " 5 NaN LBY 2007 2011 43530.319570\n", + " 6 NaN SDN 2007 2011 43530.319570\n", + " 7 NaN TCD 1969 2011 15449.535198\n", + " 8 NaN EGY 2007 2012 43530.319570\n", + " 9 NaN LBY 2007 2012 43530.319570\n", + " 10 NaN SDN 2007 2012 43530.319570\n", + " 11 NaN TCD 1969 2012 16080.380218\n", + " 12 NaN EGY 2007 2013 43530.319570\n", + " 13 NaN LBY 2007 2013 43530.319570\n", + " 14 NaN SDN 2007 2013 43530.319570\n", + " 15 NaN TCD 1969 2013 16080.380218\n", + " 16 NaN EGY 2007 2014 43530.319570\n", + " 17 NaN LBY 2007 2014 43530.319570\n", + " 18 NaN SDN 2007 2014 43530.319570\n", + " 19 NaN TCD 1969 2014 16080.380218\n", + " 20 NaN EGY 2007 2015 43530.319570\n", + " 21 NaN LBY 2007 2015 43530.319570\n", + " 22 NaN SDN 2007 2015 43530.319570\n", + " 23 NaN TCD 1969 2015 16080.380218\n", + " 24 NaN EGY 2007 2016 43530.319570\n", + " 25 NaN LBY 2007 2016 43530.319570\n", + " 26 NaN SDN 2007 2016 43530.319570\n", + " 27 NaN TCD 1969 2016 38848.088838\n", + " 28 NaN EGY 2007 2017 43530.319570\n", + " 29 NaN LBY 2007 2017 43530.319570\n", + " 30 NaN SDN 2007 2017 43530.319570\n", + " 31 NaN TCD 1969 2017 38848.088838\n", + " 32 NaN EGY 2007 2018 43530.319570\n", + " 33 NaN LBY 2007 2018 43530.319570\n", + " 34 NaN SDN 2007 2018 43530.319570\n", + " 35 NaN TCD 1969 2018 38848.088838\n", + " 36 NaN EGY 2007 2019 43530.319570\n", + " 37 NaN LBY 2007 2019 43530.319570\n", + " 38 NaN SDN 2007 2019 43530.319570\n", + " 39 NaN TCD 1969 2019 38848.088838\n", + " 40 NaN EGY 2007 2020 43530.319570\n", + " 41 NaN LBY 2007 2020 43530.319570\n", + " 42 NaN SDN 2007 2020 43530.319570\n", + " 43 NaN TCD 1969 2020 38848.088838\n", + " 44 NaN EGY 2007 2021 43530.319570\n", + " 45 NaN LBY 2007 2021 43530.319570\n", + " 46 NaN SDN 2007 2021 43530.319570\n", + " 47 NaN TCD 1969 2021 38848.088838\n", + " 48 NaN EGY 2007 2022 43530.319570\n", + " 49 NaN LBY 2007 2022 43530.319570\n", + " 50 NaN SDN 2007 2022 43530.319570\n", + " 51 NaN TCD 1969 2022 38848.088838\n", + " 52 NaN EGY 2007 2023 43530.319570\n", + " 53 NaN LBY 2007 2023 43530.319570\n", + " 54 NaN SDN 2007 2023 43530.319570\n", + " 55 NaN TCD 1969 2023 38848.088838\n", + " 56 NaN EGY 2007 2024 43530.319570\n", + " 57 NaN LBY 2007 2024 43530.319570\n", + " 58 NaN SDN 2007 2024 43530.319570\n", + " 59 NaN TCD 1969 2024 38848.088838,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 EGY 2007 2010 42065.557994\n", + " 1 1.0 LBY 2007 2010 16708.233661\n", + " 2 2.0 SDN 2007 2010 8996.911688\n", + " 3 NaN EGY 2007 2011 42065.557994\n", + " 4 NaN LBY 2007 2011 16708.233661\n", + " 5 NaN SDN 2007 2011 8996.911688\n", + " 6 NaN EGY 2007 2012 42065.557994\n", + " 7 NaN LBY 2007 2012 16708.233661\n", + " 8 NaN SDN 2007 2012 8996.911688\n", + " 9 NaN EGY 2007 2013 42065.557994\n", + " 10 NaN LBY 2007 2013 16708.233661\n", + " 11 NaN SDN 2007 2013 8996.911688\n", + " 12 NaN EGY 2007 2014 42065.557994\n", + " 13 NaN LBY 2007 2014 16708.233661\n", + " 14 NaN SDN 2007 2014 8996.911688\n", + " 15 NaN EGY 2007 2015 42065.557994\n", + " 16 NaN LBY 2007 2015 16708.233661\n", + " 17 NaN SDN 2007 2015 8996.911688\n", + " 18 NaN EGY 2007 2016 42065.557994\n", + " 19 NaN LBY 2007 2016 16708.233661\n", + " 20 NaN SDN 2007 2016 8996.911688\n", + " 21 NaN EGY 2007 2017 42065.557994\n", + " 22 NaN LBY 2007 2017 16708.233661\n", + " 23 NaN SDN 2007 2017 8996.911688\n", + " 24 NaN EGY 2007 2018 42065.557994\n", + " 25 NaN LBY 2007 2018 16708.233661\n", + " 26 NaN SDN 2007 2018 8996.911688\n", + " 27 NaN EGY 2007 2019 42065.557994\n", + " 28 NaN LBY 2007 2019 16708.233661\n", + " 29 NaN SDN 2007 2019 8996.911688\n", + " 30 NaN EGY 2007 2020 42065.557994\n", + " 31 NaN LBY 2007 2020 16708.233661\n", + " 32 NaN SDN 2007 2020 8996.911688\n", + " 33 NaN EGY 2007 2021 42065.557994\n", + " 34 NaN LBY 2007 2021 16708.233661\n", + " 35 NaN SDN 2007 2021 8996.911688\n", + " 36 NaN EGY 2007 2022 42065.557994\n", + " 37 NaN LBY 2007 2022 16708.233661\n", + " 38 NaN SDN 2007 2022 8996.911688\n", + " 39 NaN EGY 2007 2023 42065.557994\n", + " 40 NaN LBY 2007 2023 16708.233661\n", + " 41 NaN SDN 2007 2023 8996.911688\n", + " 42 NaN EGY 2007 2024 42065.557994\n", + " 43 NaN LBY 2007 2024 16708.233661\n", + " 44 NaN SDN 2007 2024 8996.911688,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 ALB 2002 2010 8811.985475\n", + " 1 1.0 AUT 2007 2010 143.570949\n", + " 2 2.0 BEL 2007 2010 143.570949\n", + " 3 3.0 BGR 2009 2010 43210.443184\n", + " 4 4.0 BIH 2007 2010 143.570949\n", + " .. ... ... ... ... ...\n", + " 355 NaN SVK 2007 2024 143.570949\n", + " 356 NaN SVN 2007 2024 143.570949\n", + " 357 NaN TUR 1988 2024 5115.597711\n", + " 358 NaN UKR 2007 2024 486.354333\n", + " 359 NaN XKO 0 2024 4938.012571\n", + " \n", + " [360 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 ALB 2007 2010 523.175531\n", + " 1 1.0 AUT 2007 2010 523.175531\n", + " 2 2.0 BEL 2007 2010 523.175531\n", + " 3 3.0 BGR 2007 2010 523.175531\n", + " 4 4.0 BIH 2007 2010 523.175531\n", + " .. ... ... ... ... ...\n", + " 280 NaN ROU 1994 2024 32621.247361\n", + " 281 NaN SRB 0 2024 3094.300891\n", + " 282 NaN SVK 2010 2024 15896.004311\n", + " 283 NaN SVN 2007 2024 523.175531\n", + " 284 NaN UKR 2007 2024 16219.625181\n", + " \n", + " [285 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 ALB 2007 2010 3.835749\n", + " 1 1.0 AUT 2007 2010 3.835749\n", + " 2 2.0 BEL 2007 2010 3.835749\n", + " 3 3.0 BGR 2007 2010 3.835749\n", + " 4 4.0 BIH 2007 2010 3.835749\n", + " .. ... ... ... ... ...\n", + " 295 NaN ROU 2007 2024 3.835749\n", + " 296 NaN RUS 1998 2024 2527.107384\n", + " 297 NaN SVK 2007 2024 1264.633537\n", + " 298 NaN SVN 2007 2024 3.835749\n", + " 299 NaN UKR 2007 2024 7668.622393\n", + " \n", + " [300 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 ALB 2007 2010 11.554105\n", + " 1 1.0 AUT 2007 2010 11.554105\n", + " 2 2.0 BEL 2007 2010 11.554105\n", + " 3 3.0 BGR 2007 2010 11.554105\n", + " 4 4.0 BIH 2007 2010 11.554105\n", + " .. ... ... ... ... ...\n", + " 265 NaN POL 2007 2024 11.554105\n", + " 266 NaN ROU 2007 2024 23217.557215\n", + " 267 NaN SVK 2007 2024 11.554105\n", + " 268 NaN SVN 2007 2024 11.554105\n", + " 269 NaN UKR 2007 2024 15759.599606\n", + " \n", + " [270 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 BLR 0 2010 12271.065680\n", + " 1 1.0 LTU 0 2010 400.958444\n", + " 2 2.0 RUS 1979 2010 18.489498\n", + " 3 3.0 UKR 0 2010 14971.748403\n", + " 4 NaN BLR 0 2011 12271.065680\n", + " 5 NaN LTU 0 2011 400.958444\n", + " 6 NaN RUS 1979 2011 18.489498\n", + " 7 NaN UKR 0 2011 14971.748403\n", + " 8 NaN BLR 0 2012 12720.757104\n", + " 9 NaN LTU 0 2012 401.026313\n", + " 10 NaN RUS 1979 2012 18.489498\n", + " 11 NaN UKR 0 2012 14998.837234\n", + " 12 NaN BLR 0 2013 12952.735349\n", + " 13 NaN LTU 0 2013 401.026313\n", + " 14 NaN RUS 1979 2013 18.489498\n", + " 15 NaN UKR 0 2013 15000.800053\n", + " 16 NaN BLR 0 2014 12952.735349\n", + " 17 NaN LTU 0 2014 401.123772\n", + " 18 NaN RUS 1979 2014 18.489498\n", + " 19 NaN UKR 0 2014 15000.800053\n", + " 20 NaN BLR 0 2015 13222.457296\n", + " 21 NaN LTU 0 2015 401.123772\n", + " 22 NaN RUS 1979 2015 18.489498\n", + " 23 NaN UKR 0 2015 15000.800053\n", + " 24 NaN BLR 0 2016 13235.843945\n", + " 25 NaN LTU 0 2016 401.123772\n", + " 26 NaN RUS 1979 2016 18.489498\n", + " 27 NaN UKR 0 2016 15013.739802\n", + " 28 NaN BLR 0 2017 13235.843945\n", + " 29 NaN LTU 0 2017 401.123772\n", + " 30 NaN RUS 1979 2017 18.489498\n", + " 31 NaN UKR 0 2017 15013.739802\n", + " 32 NaN BLR 0 2018 13235.843945\n", + " 33 NaN LTU 0 2018 401.123772\n", + " 34 NaN RUS 1979 2018 18.489498\n", + " 35 NaN UKR 0 2018 15013.739802\n", + " 36 NaN BLR 0 2019 13235.843945\n", + " 37 NaN LTU 0 2019 401.123772\n", + " 38 NaN RUS 1979 2019 18.489498\n", + " 39 NaN UKR 0 2019 15013.739802\n", + " 40 NaN BLR 0 2020 13235.843945\n", + " 41 NaN LTU 0 2020 401.191570\n", + " 42 NaN RUS 1979 2020 18.489498\n", + " 43 NaN UKR 0 2020 15013.739802\n", + " 44 NaN BLR 0 2021 13235.898354\n", + " 45 NaN LTU 0 2021 401.733425\n", + " 46 NaN RUS 1979 2021 18.489498\n", + " 47 NaN UKR 0 2021 15013.739802\n", + " 48 NaN BLR 0 2022 13235.898354\n", + " 49 NaN LTU 0 2022 401.733425\n", + " 50 NaN RUS 1979 2022 18.489498\n", + " 51 NaN UKR 0 2022 15013.739802\n", + " 52 NaN BLR 0 2023 13235.898354\n", + " 53 NaN LTU 0 2023 401.733425\n", + " 54 NaN RUS 1979 2023 18.489498\n", + " 55 NaN UKR 0 2023 15013.739802\n", + " 56 NaN BLR 0 2024 13235.898354\n", + " 57 NaN LTU 0 2024 401.733425\n", + " 58 NaN RUS 1979 2024 18.489498\n", + " 59 NaN UKR 0 2024 15013.739802,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 LTU 2005 2010 2373.003434\n", + " 1 1.0 LVA 1992 2010 493.441265\n", + " 2 2.0 RUS 1985 2010 630.300522\n", + " 3 NaN LTU 2005 2011 2374.011789\n", + " 4 NaN LVA 1992 2011 493.441265\n", + " 5 NaN RUS 1985 2011 630.300522\n", + " 6 NaN LTU 2005 2012 2432.940112\n", + " 7 NaN LVA 1992 2012 493.441265\n", + " 8 NaN RUS 1985 2012 849.369106\n", + " 9 NaN LTU 2005 2013 2432.940112\n", + " 10 NaN LVA 1992 2013 493.441265\n", + " 11 NaN RUS 1985 2013 849.369106\n", + " 12 NaN LTU 2005 2014 2434.251866\n", + " 13 NaN LVA 1992 2014 493.441265\n", + " 14 NaN RUS 1985 2014 849.369106\n", + " 15 NaN LTU 2005 2015 2435.746300\n", + " 16 NaN LVA 1992 2015 493.441265\n", + " 17 NaN RUS 1985 2015 849.369106\n", + " 18 NaN LTU 2005 2016 2435.746300\n", + " 19 NaN LVA 1992 2016 493.441265\n", + " 20 NaN RUS 1985 2016 849.369106\n", + " 21 NaN LTU 2005 2017 2435.746300\n", + " 22 NaN LVA 1992 2017 493.441265\n", + " 23 NaN RUS 1985 2017 849.369106\n", + " 24 NaN LTU 2005 2018 2435.981844\n", + " 25 NaN LVA 1992 2018 493.441799\n", + " 26 NaN RUS 1985 2018 849.388122\n", + " 27 NaN LTU 2005 2019 2455.347466\n", + " 28 NaN LVA 1992 2019 493.441799\n", + " 29 NaN RUS 1985 2019 849.388122\n", + " 30 NaN LTU 2005 2020 2457.212227\n", + " 31 NaN LVA 1992 2020 493.441799\n", + " 32 NaN RUS 1985 2020 849.505675\n", + " 33 NaN LTU 2005 2021 2457.944908\n", + " 34 NaN LVA 1992 2021 493.441799\n", + " 35 NaN RUS 1985 2021 849.505675\n", + " 36 NaN LTU 2005 2022 2464.713576\n", + " 37 NaN LVA 1992 2022 493.441801\n", + " 38 NaN RUS 1985 2022 853.208866\n", + " 39 NaN LTU 2005 2023 2464.713576\n", + " 40 NaN LVA 1992 2023 525.375167\n", + " 41 NaN RUS 1985 2023 853.208866\n", + " 42 NaN LTU 2005 2024 2464.713576\n", + " 43 NaN LVA 1992 2024 525.375167\n", + " 44 NaN RUS 1985 2024 853.208866,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 EST 2007 2010 353.458166\n", + " 1 1.0 LVA 2002 2010 497.048455\n", + " 2 NaN EST 2007 2011 354.445048\n", + " 3 NaN LVA 2002 2011 497.048455\n", + " 4 NaN EST 2007 2012 354.543238\n", + " 5 NaN LVA 2002 2012 497.048455\n", + " 6 NaN EST 2007 2013 364.912369\n", + " 7 NaN LVA 2002 2013 497.048455\n", + " 8 NaN EST 2007 2014 367.395348\n", + " 9 NaN LVA 2002 2014 497.048455\n", + " 10 NaN EST 2007 2015 367.541469\n", + " 11 NaN LVA 2002 2015 497.048455\n", + " 12 NaN EST 2007 2016 367.541469\n", + " 13 NaN LVA 2002 2016 497.048455\n", + " 14 NaN EST 2007 2017 367.873975\n", + " 15 NaN LVA 2002 2017 497.048455\n", + " 16 NaN EST 2007 2018 369.423677\n", + " 17 NaN LVA 2002 2018 497.048455\n", + " 18 NaN EST 2007 2019 374.410513\n", + " 19 NaN LVA 2002 2019 497.048455\n", + " 20 NaN EST 2007 2020 379.285046\n", + " 21 NaN LVA 2002 2020 497.048455\n", + " 22 NaN EST 2007 2021 386.049417\n", + " 23 NaN LVA 2002 2021 497.048455\n", + " 24 NaN EST 2007 2022 387.509145\n", + " 25 NaN LVA 2002 2022 497.502754\n", + " 26 NaN EST 2007 2023 396.777057\n", + " 27 NaN LVA 2002 2023 508.854376\n", + " 28 NaN EST 2007 2024 396.777057\n", + " 29 NaN LVA 2002 2024 508.854376,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 LTU 1992 2010 3906.453803\n", + " 1 1.0 LVA 1987 2010 1034.957130\n", + " 2 2.0 RUS 1988 2010 181.157467\n", + " 3 NaN LTU 1992 2011 3907.206324\n", + " 4 NaN LVA 1987 2011 1034.957130\n", + " 5 NaN RUS 1988 2011 181.157467\n", + " 6 NaN LTU 1992 2012 3908.218115\n", + " 7 NaN LVA 1987 2012 1034.957130\n", + " 8 NaN RUS 1988 2012 181.157467\n", + " 9 NaN LTU 1992 2013 3908.218115\n", + " 10 NaN LVA 1987 2013 1034.957130\n", + " 11 NaN RUS 1988 2013 181.157467\n", + " 12 NaN LTU 1992 2014 3917.166786\n", + " 13 NaN LVA 1987 2014 1034.957130\n", + " 14 NaN RUS 1988 2014 181.157467\n", + " 15 NaN LTU 1992 2015 3919.822942\n", + " 16 NaN LVA 1987 2015 1034.957130\n", + " 17 NaN RUS 1988 2015 181.157467\n", + " 18 NaN LTU 1992 2016 3919.822942\n", + " 19 NaN LVA 1987 2016 1034.957130\n", + " 20 NaN RUS 1988 2016 181.157467\n", + " 21 NaN LTU 1992 2017 3919.822942\n", + " 22 NaN LVA 1987 2017 1034.957130\n", + " 23 NaN RUS 1988 2017 181.157467\n", + " 24 NaN LTU 1992 2018 3927.289942\n", + " 25 NaN LVA 1987 2018 1034.957130\n", + " 26 NaN RUS 1988 2018 181.208561\n", + " 27 NaN LTU 1992 2019 3932.729385\n", + " 28 NaN LVA 1987 2019 1041.722082\n", + " 29 NaN RUS 1988 2019 181.208561\n", + " 30 NaN LTU 1992 2020 3942.908973\n", + " 31 NaN LVA 1987 2020 1041.722082\n", + " 32 NaN RUS 1988 2020 181.248157\n", + " 33 NaN LTU 1992 2021 3962.974304\n", + " 34 NaN LVA 1987 2021 1041.722082\n", + " 35 NaN RUS 1988 2021 181.248157\n", + " 36 NaN LTU 1992 2022 3974.953947\n", + " 37 NaN LVA 1987 2022 1042.594219\n", + " 38 NaN RUS 1988 2022 186.182720\n", + " 39 NaN LTU 1992 2023 3974.953947\n", + " 40 NaN LVA 1987 2023 1074.992600\n", + " 41 NaN RUS 1988 2023 186.182720\n", + " 42 NaN LTU 1992 2024 3974.953947\n", + " 43 NaN LVA 1987 2024 1074.992600\n", + " 44 NaN RUS 1988 2024 186.182720,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 EST 2006 2010 63.650925\n", + " 1 1.0 LVA 2004 2010 223.342047\n", + " 2 NaN EST 2006 2011 63.889356\n", + " 3 NaN LVA 2004 2011 223.342047\n", + " 4 NaN EST 2006 2012 63.899597\n", + " 5 NaN LVA 2004 2012 223.342047\n", + " 6 NaN EST 2006 2013 63.899597\n", + " 7 NaN LVA 2004 2013 223.342047\n", + " 8 NaN EST 2006 2014 63.899597\n", + " 9 NaN LVA 2004 2014 223.342047\n", + " 10 NaN EST 2006 2015 63.899597\n", + " 11 NaN LVA 2004 2015 223.342047\n", + " 12 NaN EST 2006 2016 63.899597\n", + " 13 NaN LVA 2004 2016 223.342047\n", + " 14 NaN EST 2006 2017 63.934778\n", + " 15 NaN LVA 2004 2017 223.342047\n", + " 16 NaN EST 2006 2018 64.330402\n", + " 17 NaN LVA 2004 2018 223.342047\n", + " 18 NaN EST 2006 2019 64.372856\n", + " 19 NaN LVA 2004 2019 223.342047\n", + " 20 NaN EST 2006 2020 64.644675\n", + " 21 NaN LVA 2004 2020 223.342047\n", + " 22 NaN EST 2006 2021 64.747777\n", + " 23 NaN LVA 2004 2021 223.342047\n", + " 24 NaN EST 2006 2022 64.892633\n", + " 25 NaN LVA 2004 2022 223.342047\n", + " 26 NaN EST 2006 2023 65.023945\n", + " 27 NaN LVA 2004 2023 230.814166\n", + " 28 NaN EST 2006 2024 65.023945\n", + " 29 NaN LVA 2004 2024 230.814166,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 EST 2003 2010 749.816709\n", + " 1 NaN EST 2003 2011 750.689213\n", + " 2 NaN EST 2003 2012 751.043503\n", + " 3 NaN EST 2003 2013 751.464028\n", + " 4 NaN EST 2003 2014 751.536088\n", + " 5 NaN EST 2003 2015 751.537515\n", + " 6 NaN EST 2003 2016 751.538533\n", + " 7 NaN EST 2003 2017 757.493734\n", + " 8 NaN EST 2003 2018 761.313144\n", + " 9 NaN EST 2003 2019 764.152056\n", + " 10 NaN EST 2003 2020 769.180162\n", + " 11 NaN EST 2003 2021 771.460644\n", + " 12 NaN EST 2003 2022 773.720188\n", + " 13 NaN EST 2003 2023 778.964003\n", + " 14 NaN EST 2003 2024 778.964003,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 EST 2003 2010 2666.623056\n", + " 1 1.0 LVA 1962 2010 2850.838509\n", + " 2 NaN EST 2003 2011 2667.587236\n", + " 3 NaN LVA 1962 2011 2850.838509\n", + " 4 NaN EST 2003 2012 2667.721352\n", + " 5 NaN LVA 1962 2012 2850.838509\n", + " 6 NaN EST 2003 2013 2667.721352\n", + " 7 NaN LVA 1962 2013 2851.361421\n", + " 8 NaN EST 2003 2014 2667.779252\n", + " 9 NaN LVA 1962 2014 2851.361421\n", + " 10 NaN EST 2003 2015 2667.779252\n", + " 11 NaN LVA 1962 2015 2851.361421\n", + " 12 NaN EST 2003 2016 2667.779252\n", + " 13 NaN LVA 1962 2016 2851.361421\n", + " 14 NaN EST 2003 2017 2670.636946\n", + " 15 NaN LVA 1962 2017 2851.361421\n", + " 16 NaN EST 2003 2018 2672.495369\n", + " 17 NaN LVA 1962 2018 2851.379266\n", + " 18 NaN EST 2003 2019 2674.766122\n", + " 19 NaN LVA 1962 2019 2851.410158\n", + " 20 NaN EST 2003 2020 2677.103423\n", + " 21 NaN LVA 1962 2020 2851.410158\n", + " 22 NaN EST 2003 2021 2679.276008\n", + " 23 NaN LVA 1962 2021 2851.410158\n", + " 24 NaN EST 2003 2022 2681.472694\n", + " 25 NaN LVA 1962 2022 2851.437264\n", + " 26 NaN EST 2003 2023 2682.870213\n", + " 27 NaN LVA 1962 2023 2851.437264\n", + " 28 NaN EST 2003 2024 2682.870213\n", + " 29 NaN LVA 1962 2024 2851.437264,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 EST 2009 2010 1597.382691\n", + " 1 NaN EST 2009 2011 1598.528721\n", + " 2 NaN EST 2009 2012 1598.745914\n", + " 3 NaN EST 2009 2013 1601.533728\n", + " 4 NaN EST 2009 2014 1655.183660\n", + " 5 NaN EST 2009 2015 1655.235038\n", + " 6 NaN EST 2009 2016 1655.292087\n", + " 7 NaN EST 2009 2017 1661.141295\n", + " 8 NaN EST 2009 2018 1665.911352\n", + " 9 NaN EST 2009 2019 1715.510348\n", + " 10 NaN EST 2009 2020 1722.524378\n", + " 11 NaN EST 2009 2021 1727.535212\n", + " 12 NaN EST 2009 2022 1735.382550\n", + " 13 NaN EST 2009 2023 1776.608390\n", + " 14 NaN EST 2009 2024 1776.608390,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 FIN 1992 2010 494.314060\n", + " 1 NaN FIN 1992 2011 497.433270\n", + " 2 NaN FIN 1992 2012 500.104887\n", + " 3 NaN FIN 1992 2013 501.362004\n", + " 4 NaN FIN 1992 2014 533.084818\n", + " 5 NaN FIN 1992 2015 647.322709\n", + " 6 NaN FIN 1992 2016 655.607221\n", + " 7 NaN FIN 1992 2017 657.448959\n", + " 8 NaN FIN 1992 2018 660.754328\n", + " 9 NaN FIN 1992 2019 662.780570\n", + " 10 NaN FIN 1992 2020 668.585688\n", + " 11 NaN FIN 1992 2021 672.270561\n", + " 12 NaN FIN 1992 2022 677.714011\n", + " 13 NaN FIN 1992 2023 680.862416\n", + " 14 NaN FIN 1992 2024 680.888666,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 FIN 2006 2010 604.678669\n", + " 1 1.0 SWE 2004 2010 427.596002\n", + " 2 NaN FIN 2006 2011 605.294408\n", + " 3 NaN SWE 2004 2011 427.846931\n", + " 4 NaN FIN 2006 2012 606.505481\n", + " 5 NaN SWE 2004 2012 428.727228\n", + " 6 NaN FIN 2006 2013 606.628441\n", + " 7 NaN NOR None 2013 3.293372\n", + " 8 NaN SWE 2004 2013 429.282073\n", + " 9 NaN FIN 2006 2014 606.856909\n", + " 10 NaN NOR None 2014 3.293372\n", + " 11 NaN SWE 2004 2014 429.568301\n", + " 12 NaN FIN 2006 2015 640.449662\n", + " 13 NaN NOR None 2015 3.293372\n", + " 14 NaN SWE 2004 2015 430.426911\n", + " 15 NaN FIN 2006 2016 641.780413\n", + " 16 NaN NOR None 2016 3.293372\n", + " 17 NaN SWE 2004 2016 435.690477\n", + " 18 NaN FIN 2006 2017 642.766140\n", + " 19 NaN NOR None 2017 3.293372\n", + " 20 NaN SWE 2004 2017 439.942644\n", + " 21 NaN FIN 2006 2018 643.645326\n", + " 22 NaN NOR None 2018 3.293372\n", + " 23 NaN SWE 2004 2018 441.143028\n", + " 24 NaN FIN 2006 2019 644.095369\n", + " 25 NaN NOR None 2019 3.293372\n", + " 26 NaN SWE 2004 2019 442.655414\n", + " 27 NaN FIN 2006 2020 644.708200\n", + " 28 NaN NOR None 2020 3.293372\n", + " 29 NaN SWE 2004 2020 443.031906\n", + " 30 NaN FIN 2006 2021 646.116636\n", + " 31 NaN NOR None 2021 3.293372\n", + " 32 NaN SWE 2004 2021 443.046481\n", + " 33 NaN FIN 2006 2022 646.603876\n", + " 34 NaN NOR None 2022 3.293372\n", + " 35 NaN SWE 2004 2022 445.100833\n", + " 36 NaN FIN 2006 2023 647.425181\n", + " 37 NaN NOR None 2023 3.293372\n", + " 38 NaN SWE 2004 2023 445.287300\n", + " 39 NaN FIN 2006 2024 647.436336\n", + " 40 NaN NOR None 2024 3.293372\n", + " 41 NaN SWE 2004 2024 445.287300,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 EST 2004 2010 4.504134\n", + " 1 1.0 FIN 1992 2010 722.294075\n", + " 2 NaN EST 2004 2011 4.504134\n", + " 3 NaN FIN 1992 2011 730.435555\n", + " 4 NaN EST 2004 2012 4.504134\n", + " 5 NaN FIN 1992 2012 738.993027\n", + " 6 NaN EST 2004 2013 4.504134\n", + " 7 NaN FIN 1992 2013 746.230610\n", + " 8 NaN EST 2004 2014 4.504134\n", + " 9 NaN FIN 1992 2014 832.824785\n", + " 10 NaN EST 2004 2015 4.504134\n", + " 11 NaN FIN 1992 2015 1219.645357\n", + " 12 NaN EST 2004 2016 4.504638\n", + " 13 NaN FIN 1992 2016 1226.213050\n", + " 14 NaN EST 2004 2017 4.504638\n", + " 15 NaN FIN 1992 2017 1236.089827\n", + " 16 NaN EST 2004 2018 4.507786\n", + " 17 NaN FIN 1992 2018 1249.222360\n", + " 18 NaN EST 2004 2019 4.507786\n", + " 19 NaN FIN 1992 2019 1255.307349\n", + " 20 NaN EST 2004 2020 4.507786\n", + " 21 NaN FIN 1992 2020 1267.821940\n", + " 22 NaN EST 2004 2021 4.507786\n", + " 23 NaN FIN 1992 2021 1303.750919\n", + " 24 NaN EST 2004 2022 4.507786\n", + " 25 NaN FIN 1992 2022 1317.989513\n", + " 26 NaN EST 2004 2023 4.508625\n", + " 27 NaN FIN 1992 2023 1327.994577\n", + " 28 NaN EST 2004 2024 4.508625\n", + " 29 NaN FIN 1992 2024 1327.994577,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 FIN 2007 2010 583.708961\n", + " 1 NaN FIN 2007 2011 586.133300\n", + " 2 NaN FIN 2007 2012 590.480548\n", + " 3 NaN FIN 2007 2013 598.311271\n", + " 4 NaN FIN 2007 2014 599.954934\n", + " 5 NaN FIN 2007 2015 890.769645\n", + " 6 NaN FIN 2007 2016 893.703618\n", + " 7 NaN FIN 2007 2017 904.513702\n", + " 8 NaN FIN 2007 2018 908.626349\n", + " 9 NaN FIN 2007 2019 912.451947\n", + " 10 NaN FIN 2007 2020 916.208092\n", + " 11 NaN FIN 2007 2021 926.632227\n", + " 12 NaN FIN 2007 2022 970.264846\n", + " 13 NaN FIN 2007 2023 978.398020\n", + " 14 NaN FIN 2007 2024 978.754754,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 BLR 0 2010 1890.156198\n", + " 1 1.0 LTU 1992 2010 3015.824608\n", + " 2 2.0 LVA 1977 2010 3676.741651\n", + " 3 NaN BLR 0 2011 1893.706497\n", + " 4 NaN LTU 1992 2011 3017.250940\n", + " 5 NaN LVA 1977 2011 3676.741651\n", + " 6 NaN BLR 0 2012 1893.706497\n", + " 7 NaN LTU 1992 2012 3018.038915\n", + " 8 NaN LVA 1977 2012 3676.741651\n", + " 9 NaN BLR 0 2013 1893.706497\n", + " 10 NaN LTU 1992 2013 3018.038915\n", + " 11 NaN LVA 1977 2013 3676.741651\n", + " 12 NaN BLR 0 2014 2033.115251\n", + " 13 NaN LTU 1992 2014 3103.118123\n", + " 14 NaN LVA 1977 2014 3676.741651\n", + " 15 NaN BLR 0 2015 2033.115251\n", + " 16 NaN LTU 1992 2015 3103.188360\n", + " 17 NaN LVA 1977 2015 3676.741651\n", + " 18 NaN BLR 0 2016 2033.115251\n", + " 19 NaN LTU 1992 2016 3103.188360\n", + " 20 NaN LVA 1977 2016 3676.741651\n", + " 21 NaN BLR 0 2017 2033.115251\n", + " 22 NaN LTU 1992 2017 3103.188360\n", + " 23 NaN LVA 1977 2017 3676.741651\n", + " 24 NaN BLR 0 2018 2033.115251\n", + " 25 NaN LTU 1992 2018 3106.913243\n", + " 26 NaN LVA 1977 2018 3676.741651\n", + " 27 NaN BLR 0 2019 2033.115251\n", + " 28 NaN LTU 1992 2019 3108.462588\n", + " 29 NaN LVA 1977 2019 3676.982277\n", + " 30 NaN BLR 0 2020 2033.208615\n", + " 31 NaN LTU 1992 2020 3110.443985\n", + " 32 NaN LVA 1977 2020 3676.982277\n", + " 33 NaN BLR 0 2021 2033.208615\n", + " 34 NaN LTU 1992 2021 3117.832745\n", + " 35 NaN LVA 1977 2021 3676.982277\n", + " 36 NaN BLR 0 2022 2033.208615\n", + " 37 NaN LTU 1992 2022 3147.460199\n", + " 38 NaN LVA 1977 2022 3691.441651\n", + " 39 NaN BLR 0 2023 2033.208615\n", + " 40 NaN LTU 1992 2023 3147.460199\n", + " 41 NaN LVA 1977 2023 3760.230829\n", + " 42 NaN BLR 0 2024 2033.208615\n", + " 43 NaN LTU 1992 2024 3147.460199\n", + " 44 NaN LVA 1977 2024 3760.230829,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 EST 2001 2010 2912.528698\n", + " 1 1.0 LVA 1987 2010 3276.976599\n", + " 2 NaN EST 2001 2011 2914.276118\n", + " 3 NaN LVA 1987 2011 3277.183565\n", + " 4 NaN EST 2001 2012 2914.438337\n", + " 5 NaN LVA 1987 2012 3277.183565\n", + " 6 NaN EST 2001 2013 2914.737812\n", + " 7 NaN LVA 1987 2013 3277.183565\n", + " 8 NaN EST 2001 2014 2916.082350\n", + " 9 NaN LVA 1987 2014 3277.183565\n", + " 10 NaN EST 2001 2015 2916.583217\n", + " 11 NaN LVA 1987 2015 3277.183565\n", + " 12 NaN EST 2001 2016 2918.657836\n", + " 13 NaN LVA 1987 2016 3277.189343\n", + " 14 NaN EST 2001 2017 2924.475545\n", + " 15 NaN LVA 1987 2017 3277.189343\n", + " 16 NaN EST 2001 2018 2926.824088\n", + " 17 NaN LVA 1987 2018 3277.215427\n", + " 18 NaN EST 2001 2019 2977.812197\n", + " 19 NaN LVA 1987 2019 3282.347763\n", + " 20 NaN EST 2001 2020 2982.267681\n", + " 21 NaN LVA 1987 2020 3282.403065\n", + " 22 NaN EST 2001 2021 2988.405903\n", + " 23 NaN LVA 1987 2021 3282.408977\n", + " 24 NaN EST 2001 2022 2992.726284\n", + " 25 NaN LVA 1987 2022 3282.458531\n", + " 26 NaN EST 2001 2023 2996.677736\n", + " 27 NaN LVA 1987 2023 3285.523770\n", + " 28 NaN EST 2001 2024 2996.677736\n", + " 29 NaN LVA 1987 2024 3285.523770,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 EST 2003 2010 2000.623487\n", + " 1 NaN EST 2003 2011 2002.836246\n", + " 2 NaN EST 2003 2012 2003.160130\n", + " 3 NaN EST 2003 2013 2008.470034\n", + " 4 NaN EST 2003 2014 2010.851901\n", + " 5 NaN EST 2003 2015 2011.197646\n", + " 6 NaN EST 2003 2016 2011.199420\n", + " 7 NaN EST 2003 2017 2017.505036\n", + " 8 NaN EST 2003 2018 2025.678814\n", + " 9 NaN EST 2003 2019 2093.005875\n", + " 10 NaN EST 2003 2020 2100.254719\n", + " 11 NaN EST 2003 2021 2108.616004\n", + " 12 NaN EST 2003 2022 2129.514251\n", + " 13 NaN EST 2003 2023 2139.281861\n", + " 14 NaN EST 2003 2024 2139.281861,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 EST 2005 2010 1121.537975\n", + " 1 1.0 LVA 2009 2010 416.408455\n", + " 2 2.0 RUS 2006 2010 67.791211\n", + " 3 NaN EST 2005 2011 1122.321353\n", + " 4 NaN LVA 2009 2011 416.410248\n", + " 5 NaN RUS 2006 2011 67.791211\n", + " 6 NaN EST 2005 2012 1122.827105\n", + " 7 NaN LVA 2009 2012 416.410248\n", + " 8 NaN RUS 2006 2012 67.791211\n", + " 9 NaN EST 2005 2013 1122.827514\n", + " 10 NaN LVA 2009 2013 416.410248\n", + " 11 NaN RUS 2006 2013 67.791211\n", + " 12 NaN EST 2005 2014 1122.916737\n", + " 13 NaN LVA 2009 2014 416.410248\n", + " 14 NaN RUS 2006 2014 67.791211\n", + " 15 NaN EST 2005 2015 1124.167230\n", + " 16 NaN LVA 2009 2015 416.410248\n", + " 17 NaN RUS 2006 2015 67.791211\n", + " 18 NaN EST 2005 2016 1124.245768\n", + " 19 NaN LVA 2009 2016 416.425936\n", + " 20 NaN RUS 2006 2016 67.791211\n", + " 21 NaN EST 2005 2017 1127.114010\n", + " 22 NaN LVA 2009 2017 416.443352\n", + " 23 NaN RUS 2006 2017 67.791211\n", + " 24 NaN EST 2005 2018 1141.613492\n", + " 25 NaN LVA 2009 2018 416.443353\n", + " 26 NaN RUS 2006 2018 67.791211\n", + " 27 NaN EST 2005 2019 1185.773533\n", + " 28 NaN LVA 2009 2019 443.901683\n", + " 29 NaN RUS 2006 2019 67.791211\n", + " 30 NaN EST 2005 2020 1190.696702\n", + " 31 NaN LVA 2009 2020 443.901683\n", + " 32 NaN RUS 2006 2020 67.791211\n", + " 33 NaN EST 2005 2021 1195.533024\n", + " 34 NaN LVA 2009 2021 443.939607\n", + " 35 NaN RUS 2006 2021 67.791211\n", + " 36 NaN EST 2005 2022 1201.735682\n", + " 37 NaN LVA 2009 2022 444.907606\n", + " 38 NaN RUS 2006 2022 68.314713\n", + " 39 NaN EST 2005 2023 1204.789886\n", + " 40 NaN LVA 2009 2023 456.472521\n", + " 41 NaN RUS 2006 2023 68.314713\n", + " 42 NaN EST 2005 2024 1204.789886\n", + " 43 NaN LVA 2009 2024 456.472521\n", + " 44 NaN RUS 2006 2024 68.314713,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 EST 2004 2010 1466.823235\n", + " 1 1.0 RUS 1997 2010 549.252130\n", + " 2 NaN EST 2004 2011 1471.484049\n", + " 3 NaN RUS 1997 2011 549.252130\n", + " 4 NaN EST 2004 2012 1472.275089\n", + " 5 NaN RUS 1997 2012 549.252130\n", + " 6 NaN EST 2004 2013 1477.454527\n", + " 7 NaN RUS 1997 2013 549.252130\n", + " 8 NaN EST 2004 2014 1478.864450\n", + " 9 NaN RUS 1997 2014 549.252130\n", + " 10 NaN EST 2004 2015 1481.623947\n", + " 11 NaN RUS 1997 2015 549.252130\n", + " 12 NaN EST 2004 2016 1481.663000\n", + " 13 NaN RUS 1997 2016 549.252130\n", + " 14 NaN EST 2004 2017 1500.486155\n", + " 15 NaN RUS 1997 2017 549.252130\n", + " 16 NaN EST 2004 2018 1516.244070\n", + " 17 NaN RUS 1997 2018 549.254065\n", + " 18 NaN EST 2004 2019 1582.989286\n", + " 19 NaN RUS 1997 2019 549.254065\n", + " 20 NaN EST 2004 2020 1591.715758\n", + " 21 NaN RUS 1997 2020 549.254065\n", + " 22 NaN EST 2004 2021 1603.242163\n", + " 23 NaN RUS 1997 2021 740.833133\n", + " 24 NaN EST 2004 2022 1612.812851\n", + " 25 NaN RUS 1997 2022 740.833133\n", + " 26 NaN EST 2004 2023 1616.612888\n", + " 27 NaN RUS 1997 2023 740.833133\n", + " 28 NaN EST 2004 2024 1616.612888\n", + " 29 NaN RUS 1997 2024 740.833133,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 BLR 1979 2010 2889.797244\n", + " 1 1.0 LVA 2004 2010 1156.589401\n", + " 2 2.0 RUS 2002 2010 1994.848518\n", + " 3 NaN BLR 1979 2011 2889.797244\n", + " 4 NaN LVA 2004 2011 1156.589401\n", + " 5 NaN RUS 2002 2011 1994.848518\n", + " 6 NaN BLR 1979 2012 2889.797244\n", + " 7 NaN LVA 2004 2012 1156.589401\n", + " 8 NaN RUS 2002 2012 1994.848518\n", + " 9 NaN BLR 1979 2013 2936.895870\n", + " 10 NaN LVA 2004 2013 1156.589401\n", + " 11 NaN RUS 2002 2013 1994.848518\n", + " 12 NaN BLR 1979 2014 3158.570239\n", + " 13 NaN LVA 2004 2014 1156.589401\n", + " 14 NaN RUS 2002 2014 1994.848518\n", + " 15 NaN BLR 1979 2015 3158.570239\n", + " 16 NaN LVA 2004 2015 1156.589401\n", + " 17 NaN RUS 2002 2015 1994.848518\n", + " 18 NaN BLR 1979 2016 3158.570239\n", + " 19 NaN LVA 2004 2016 1156.589401\n", + " 20 NaN RUS 2002 2016 1994.848518\n", + " 21 NaN BLR 1979 2017 3158.570239\n", + " 22 NaN LVA 2004 2017 1156.589401\n", + " 23 NaN RUS 2002 2017 1994.848518\n", + " 24 NaN BLR 1979 2018 3158.570239\n", + " 25 NaN LVA 2004 2018 1156.589401\n", + " 26 NaN RUS 2002 2018 1994.848518\n", + " 27 NaN BLR 1979 2019 3158.570239\n", + " 28 NaN LVA 2004 2019 1156.589401\n", + " 29 NaN RUS 2002 2019 1994.848518\n", + " 30 NaN BLR 1979 2020 3158.570239\n", + " 31 NaN LVA 2004 2020 1156.589401\n", + " 32 NaN RUS 2002 2020 1994.848518\n", + " 33 NaN BLR 1979 2021 3158.570239\n", + " 34 NaN LVA 2004 2021 1156.589401\n", + " 35 NaN RUS 2002 2021 1994.848518\n", + " 36 NaN BLR 1979 2022 3158.570239\n", + " 37 NaN LVA 2004 2022 1156.589401\n", + " 38 NaN RUS 2002 2022 1994.848518\n", + " 39 NaN BLR 1979 2023 3158.570239\n", + " 40 NaN LVA 2004 2023 1171.052075\n", + " 41 NaN RUS 2002 2023 2005.002321\n", + " 42 NaN BLR 1979 2024 3158.570239\n", + " 43 NaN LVA 2004 2024 1171.052075\n", + " 44 NaN RUS 2002 2024 2005.002321,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 EST 2003 2010 1693.109923\n", + " 1 1.0 LVA 2004 2010 98.079618\n", + " 2 2.0 RUS 1997 2010 2618.655286\n", + " 3 NaN EST 2003 2011 1693.447186\n", + " 4 NaN LVA 2004 2011 98.079618\n", + " 5 NaN RUS 1997 2011 2618.972075\n", + " 6 NaN EST 2003 2012 1693.485467\n", + " 7 NaN LVA 2004 2012 98.079618\n", + " 8 NaN RUS 1997 2012 2618.972075\n", + " 9 NaN EST 2003 2013 1693.485467\n", + " 10 NaN LVA 2004 2013 98.079618\n", + " 11 NaN RUS 1997 2013 2618.972075\n", + " 12 NaN EST 2003 2014 1693.482343\n", + " 13 NaN LVA 2004 2014 98.079618\n", + " 14 NaN RUS 1997 2014 2618.972075\n", + " 15 NaN EST 2003 2015 1693.482343\n", + " 16 NaN LVA 2004 2015 98.079618\n", + " 17 NaN RUS 1997 2015 2618.972075\n", + " 18 NaN EST 2003 2016 1693.482343\n", + " 19 NaN LVA 2004 2016 98.079618\n", + " 20 NaN RUS 1997 2016 2618.972075\n", + " 21 NaN EST 2003 2017 1693.483927\n", + " 22 NaN LVA 2004 2017 98.079618\n", + " 23 NaN RUS 1997 2017 2618.972075\n", + " 24 NaN EST 2003 2018 1694.984135\n", + " 25 NaN LVA 2004 2018 98.079618\n", + " 26 NaN RUS 1997 2018 2618.972079\n", + " 27 NaN EST 2003 2019 1697.978558\n", + " 28 NaN LVA 2004 2019 98.079618\n", + " 29 NaN RUS 1997 2019 2618.972079\n", + " 30 NaN EST 2003 2020 1698.864295\n", + " 31 NaN LVA 2004 2020 98.079618\n", + " 32 NaN RUS 1997 2020 2618.972079\n", + " 33 NaN EST 2003 2021 1724.531223\n", + " 34 NaN LVA 2004 2021 98.079618\n", + " 35 NaN RUS 1997 2021 2734.335694\n", + " 36 NaN EST 2003 2022 1726.397160\n", + " 37 NaN LVA 2004 2022 98.079618\n", + " 38 NaN RUS 1997 2022 2735.387314\n", + " 39 NaN EST 2003 2023 1727.436793\n", + " 40 NaN LVA 2004 2023 100.406117\n", + " 41 NaN RUS 1997 2023 2735.387314\n", + " 42 NaN EST 2003 2024 1727.436793\n", + " 43 NaN LVA 2004 2024 100.406117\n", + " 44 NaN RUS 1997 2024 2735.387314,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 EST 2007 2010 85.115374\n", + " 1 1.0 FIN 2000 2010 745.446881\n", + " 2 NaN EST 2007 2011 85.115374\n", + " 3 NaN FIN 2000 2011 772.038599\n", + " 4 NaN EST 2007 2012 85.115374\n", + " 5 NaN FIN 2000 2012 780.063869\n", + " 6 NaN EST 2007 2013 85.115374\n", + " 7 NaN FIN 2000 2013 787.792553\n", + " 8 NaN EST 2007 2014 85.115374\n", + " 9 NaN FIN 2000 2014 793.548482\n", + " 10 NaN EST 2007 2015 85.115404\n", + " 11 NaN FIN 2000 2015 1165.288171\n", + " 12 NaN EST 2007 2016 85.115404\n", + " 13 NaN FIN 2000 2016 1168.596209\n", + " 14 NaN EST 2007 2017 85.115404\n", + " 15 NaN FIN 2000 2017 1179.855015\n", + " 16 NaN RUS None 2017 5.393189\n", + " 17 NaN EST 2007 2018 85.388358\n", + " 18 NaN FIN 2000 2018 1185.051894\n", + " 19 NaN RUS None 2018 5.393189\n", + " 20 NaN EST 2007 2019 85.388358\n", + " 21 NaN FIN 2000 2019 1202.057114\n", + " 22 NaN RUS None 2019 5.393189\n", + " 23 NaN EST 2007 2020 85.388358\n", + " 24 NaN FIN 2000 2020 1211.891681\n", + " 25 NaN RUS None 2020 5.393189\n", + " 26 NaN EST 2007 2021 85.388358\n", + " 27 NaN FIN 2000 2021 1223.110022\n", + " 28 NaN RUS None 2021 5.393189\n", + " 29 NaN EST 2007 2022 85.388358\n", + " 30 NaN FIN 2000 2022 1232.709845\n", + " 31 NaN RUS None 2022 5.393189\n", + " 32 NaN EST 2007 2023 85.388358\n", + " 33 NaN FIN 2000 2023 1239.204771\n", + " 34 NaN RUS None 2023 5.393189\n", + " 35 NaN EST 2007 2024 85.388358\n", + " 36 NaN FIN 2000 2024 1239.224821\n", + " 37 NaN RUS None 2024 5.393189,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 FIN 1998 2010 975.783744\n", + " 1 NaN FIN 1998 2011 978.916718\n", + " 2 NaN FIN 1998 2012 985.040006\n", + " 3 NaN FIN 1998 2013 994.517602\n", + " 4 NaN FIN 1998 2014 1006.273873\n", + " 5 NaN FIN 1998 2015 1273.414467\n", + " 6 NaN FIN 1998 2016 1275.960365\n", + " 7 NaN FIN 1998 2017 1315.171636\n", + " 8 NaN FIN 1998 2018 1319.181946\n", + " 9 NaN FIN 1998 2019 1330.879910\n", + " 10 NaN FIN 1998 2020 1336.134892\n", + " 11 NaN FIN 1998 2021 1345.939665\n", + " 12 NaN FIN 1998 2022 1356.192528\n", + " 13 NaN FIN 1998 2023 1358.711679\n", + " 14 NaN FIN 1998 2024 1358.863480,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 FIN 2000 2010 231.446452\n", + " 1 1.0 RUS 1976 2010 895.331309\n", + " 2 NaN FIN 2000 2011 237.338580\n", + " 3 NaN RUS 1976 2011 910.138606\n", + " 4 NaN FIN 2000 2012 241.275493\n", + " 5 NaN RUS 1976 2012 936.553795\n", + " 6 NaN FIN 2000 2013 244.192603\n", + " 7 NaN RUS 1976 2013 938.817574\n", + " 8 NaN FIN 2000 2014 333.331922\n", + " 9 NaN RUS 1976 2014 938.817574\n", + " 10 NaN FIN 2000 2015 1711.612084\n", + " 11 NaN RUS 1976 2015 1077.839856\n", + " 12 NaN FIN 2000 2016 1713.134273\n", + " 13 NaN RUS 1976 2016 1098.383594\n", + " 14 NaN FIN 2000 2017 1720.538103\n", + " 15 NaN RUS 1976 2017 1221.418993\n", + " 16 NaN FIN 2000 2018 1723.078495\n", + " 17 NaN RUS 1976 2018 1221.418993\n", + " 18 NaN FIN 2000 2019 1750.669160\n", + " 19 NaN RUS 1976 2019 1221.418993\n", + " 20 NaN FIN 2000 2020 1755.509374\n", + " 21 NaN RUS 1976 2020 1221.418993\n", + " 22 NaN FIN 2000 2021 1763.468156\n", + " 23 NaN RUS 1976 2021 1221.418993\n", + " 24 NaN FIN 2000 2022 1766.955712\n", + " 25 NaN RUS 1976 2022 1221.418993\n", + " 26 NaN FIN 2000 2023 1769.096300\n", + " 27 NaN RUS 1976 2023 1221.418993\n", + " 28 NaN FIN 2000 2024 1769.096300\n", + " 29 NaN RUS 1976 2024 1221.418993,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 FIN 2007 2010 925.749510\n", + " 1 1.0 RUS 1990 2010 0.086241\n", + " 2 NaN FIN 2007 2011 933.109015\n", + " 3 NaN RUS 1990 2011 0.086241\n", + " 4 NaN FIN 2007 2012 938.324461\n", + " 5 NaN RUS 1990 2012 0.086241\n", + " 6 NaN FIN 2007 2013 952.916096\n", + " 7 NaN RUS 1990 2013 0.086241\n", + " 8 NaN FIN 2007 2014 1024.222783\n", + " 9 NaN RUS 1990 2014 0.086241\n", + " 10 NaN FIN 2007 2015 1878.447220\n", + " 11 NaN RUS 1990 2015 9.345584\n", + " 12 NaN FIN 2007 2016 1880.498447\n", + " 13 NaN RUS 1990 2016 9.345584\n", + " 14 NaN FIN 2007 2017 1884.050278\n", + " 15 NaN RUS 1990 2017 9.345584\n", + " 16 NaN FIN 2007 2018 1889.028661\n", + " 17 NaN RUS 1990 2018 9.345584\n", + " 18 NaN FIN 2007 2019 1901.725124\n", + " 19 NaN RUS 1990 2019 9.345584\n", + " 20 NaN FIN 2007 2020 1910.962989\n", + " 21 NaN RUS 1990 2020 9.345584\n", + " 22 NaN FIN 2007 2021 1975.389687\n", + " 23 NaN RUS 1990 2021 9.345584\n", + " 24 NaN FIN 2007 2022 1990.494102\n", + " 25 NaN RUS 1990 2022 9.345584\n", + " 26 NaN FIN 2007 2023 1994.859146\n", + " 27 NaN RUS 1990 2023 9.345584\n", + " 28 NaN FIN 2007 2024 1994.859146\n", + " 29 NaN RUS 1990 2024 9.345584,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 FIN 2009 2010 35203.503375\n", + " 1 1.0 NOR 1996 2010 25083.256578\n", + " 2 2.0 RUS 2006 2010 1794.400204\n", + " 3 3.0 SWE 2006 2010 13886.791061\n", + " 4 NaN FIN 2009 2011 35211.236999\n", + " 5 NaN NOR 1996 2011 25186.945925\n", + " 6 NaN RUS 2006 2011 2105.436538\n", + " 7 NaN SWE 2006 2011 13962.776224\n", + " 8 NaN FIN 2009 2012 35221.154158\n", + " 9 NaN NOR 1996 2012 25186.945925\n", + " 10 NaN RUS 2006 2012 2105.436538\n", + " 11 NaN SWE 2006 2012 14030.416678\n", + " 12 NaN FIN 2009 2013 35239.342197\n", + " 13 NaN NOR 1996 2013 25186.945925\n", + " 14 NaN RUS 2006 2013 2105.436538\n", + " 15 NaN SWE 2006 2013 14048.648233\n", + " 16 NaN FIN 2009 2014 35550.150888\n", + " 17 NaN NOR 1996 2014 25186.945925\n", + " 18 NaN RUS 2006 2014 2166.725539\n", + " 19 NaN SWE 2006 2014 14255.954419\n", + " 20 NaN FIN 2009 2015 38628.687473\n", + " 21 NaN NOR 1996 2015 25533.521372\n", + " 22 NaN RUS 2006 2015 2291.799148\n", + " 23 NaN SWE 2006 2015 14551.108713\n", + " 24 NaN FIN 2009 2016 38638.709088\n", + " 25 NaN NOR 1996 2016 25533.521372\n", + " 26 NaN RUS 2006 2016 2291.799148\n", + " 27 NaN SWE 2006 2016 14571.672679\n", + " 28 NaN FIN 2009 2017 39000.360916\n", + " 29 NaN NOR 1996 2017 25533.521372\n", + " 30 NaN RUS 2006 2017 2460.292583\n", + " 31 NaN SWE 2006 2017 14582.024189\n", + " 32 NaN FIN 2009 2018 39007.791354\n", + " 33 NaN NOR 1996 2018 25547.307979\n", + " 34 NaN RUS 2006 2018 2460.292583\n", + " 35 NaN SWE 2006 2018 14592.832534\n", + " 36 NaN FIN 2009 2019 39011.170265\n", + " 37 NaN NOR 1996 2019 25547.307979\n", + " 38 NaN RUS 2006 2019 2460.292583\n", + " 39 NaN SWE 2006 2019 14657.654417\n", + " 40 NaN FIN 2009 2020 39029.236826\n", + " 41 NaN NOR 1996 2020 25547.307979\n", + " 42 NaN RUS 2006 2020 2460.292583\n", + " 43 NaN SWE 2006 2020 14730.279281\n", + " 44 NaN FIN 2009 2021 39057.385436\n", + " 45 NaN NOR 1996 2021 25547.307979\n", + " 46 NaN RUS 2006 2021 2460.632132\n", + " 47 NaN SWE 2006 2021 14815.431598\n", + " 48 NaN FIN 2009 2022 39091.019126\n", + " 49 NaN NOR 1996 2022 25547.307979\n", + " 50 NaN RUS 2006 2022 2460.632132\n", + " 51 NaN SWE 2006 2022 14820.743225\n", + " 52 NaN FIN 2009 2023 39110.866378\n", + " 53 NaN NOR 1996 2023 25547.307979\n", + " 54 NaN RUS 2006 2023 2460.632132\n", + " 55 NaN SWE 2006 2023 14848.790468\n", + " 56 NaN FIN 2009 2024 39112.791145\n", + " 57 NaN NOR 1996 2024 25547.307979\n", + " 58 NaN RUS 2006 2024 2460.632132\n", + " 59 NaN SWE 2006 2024 14848.790468,\n", + " index iso_3 STATUS_YR year area\n", + " 0 NaN NOR None 2014 19661.127887\n", + " 1 NaN NOR None 2015 19661.127887\n", + " 2 NaN NOR None 2016 19661.127887\n", + " 3 NaN NOR None 2017 19661.127887\n", + " 4 NaN NOR None 2018 19661.127887\n", + " 5 NaN NOR None 2019 19661.127887\n", + " 6 NaN NOR None 2020 19661.127887\n", + " 7 NaN NOR None 2021 19661.127887\n", + " 8 NaN NOR None 2022 19661.127887\n", + " 9 NaN NOR None 2023 19661.127887\n", + " 10 NaN NOR None 2024 19661.127887,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 MOZ 1999 2010 3177.138035\n", + " 1 1.0 SWZ 1972 2010 1255.232360\n", + " 2 2.0 ZAF 1971 2010 7484.031696\n", + " 3 3.0 ZWE 1926 2010 244.988045\n", + " 4 NaN MOZ 1999 2011 3177.138035\n", + " 5 NaN SWZ 1972 2011 1255.232360\n", + " 6 NaN ZAF 1971 2011 7621.240584\n", + " 7 NaN ZWE 1926 2011 244.988045\n", + " 8 NaN MOZ 1999 2012 3182.865812\n", + " 9 NaN SWZ 1972 2012 1255.232360\n", + " 10 NaN ZAF 1971 2012 7760.184405\n", + " 11 NaN ZWE 1926 2012 244.988045\n", + " 12 NaN MOZ 1999 2013 3182.865812\n", + " 13 NaN SWZ 1972 2013 1280.924433\n", + " 14 NaN ZAF 1971 2013 7910.545205\n", + " 15 NaN ZWE 1926 2013 244.988045\n", + " 16 NaN MOZ 1999 2014 3182.865812\n", + " 17 NaN SWZ 1972 2014 1280.924433\n", + " 18 NaN ZAF 1971 2014 8614.381022\n", + " 19 NaN ZWE 1926 2014 244.988045\n", + " 20 NaN MOZ 1999 2015 3182.865812\n", + " 21 NaN SWZ 1972 2015 1304.736362\n", + " 22 NaN ZAF 1971 2015 8778.993235\n", + " 23 NaN ZWE 1926 2015 244.988045\n", + " 24 NaN MOZ 1999 2016 3182.865812\n", + " 25 NaN SWZ 1972 2016 1304.736362\n", + " 26 NaN ZAF 1971 2016 8778.993235\n", + " 27 NaN ZWE 1926 2016 244.988045\n", + " 28 NaN MOZ 1999 2017 3182.865812\n", + " 29 NaN SWZ 1972 2017 1304.736362\n", + " 30 NaN ZAF 1971 2017 9034.399435\n", + " 31 NaN ZWE 1926 2017 244.988045\n", + " 32 NaN MOZ 1999 2018 3182.865812\n", + " 33 NaN SWZ 1972 2018 1895.605924\n", + " 34 NaN ZAF 1971 2018 9408.145225\n", + " 35 NaN ZWE 1926 2018 244.988045\n", + " 36 NaN MOZ 1999 2019 4631.392967\n", + " 37 NaN SWZ 1972 2019 1985.870755\n", + " 38 NaN ZAF 1971 2019 11184.879008\n", + " 39 NaN ZWE 1926 2019 244.988045\n", + " 40 NaN MOZ 1999 2020 4631.392967\n", + " 41 NaN SWZ 1972 2020 1985.870755\n", + " 42 NaN ZAF 1971 2020 11184.879008\n", + " 43 NaN ZWE 1926 2020 244.988045\n", + " 44 NaN MOZ 1999 2021 4631.392967\n", + " 45 NaN SWZ 1972 2021 1985.870755\n", + " 46 NaN ZAF 1971 2021 11575.614033\n", + " 47 NaN ZWE 1926 2021 244.988045\n", + " 48 NaN MOZ 1999 2022 4631.392967\n", + " 49 NaN SWZ 1972 2022 1985.870755\n", + " 50 NaN ZAF 1971 2022 11611.905630\n", + " 51 NaN ZWE 1926 2022 244.988045\n", + " 52 NaN MOZ 1999 2023 4631.392967\n", + " 53 NaN SWZ 1972 2023 1985.870755\n", + " 54 NaN ZAF 1971 2023 12063.112790\n", + " 55 NaN ZWE 1926 2023 244.988045\n", + " 56 NaN MOZ 1999 2024 4631.392967\n", + " 57 NaN SWZ 1972 2024 1985.870755\n", + " 58 NaN ZAF 1971 2024 12063.112790\n", + " 59 NaN ZWE 1926 2024 244.988045,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 MOZ 1926 2010 142971.261701\n", + " 1 1.0 MWI 1931 2010 3748.433973\n", + " 2 2.0 ZAF 1969 2010 38272.709356\n", + " 3 3.0 ZMB 1964 2010 19621.228796\n", + " 4 4.0 ZWE 1926 2010 70137.627210\n", + " .. ... ... ... ... ...\n", + " 70 NaN MOZ 1926 2024 150312.994233\n", + " 71 NaN MWI 1931 2024 4359.148035\n", + " 72 NaN ZAF 1969 2024 40170.714041\n", + " 73 NaN ZMB 1964 2024 19621.228796\n", + " 74 NaN ZWE 1926 2024 72202.445165\n", + " \n", + " [75 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 COD 1981 2010 6678.774099\n", + " 1 1.0 MOZ 0 2010 106675.030501\n", + " 2 2.0 MWI 1924 2010 53314.524093\n", + " 3 3.0 TZA 1998 2010 283360.691651\n", + " 4 4.0 ZMB 0 2010 115687.493096\n", + " .. ... ... ... ... ...\n", + " 85 NaN MOZ 0 2024 116400.376342\n", + " 86 NaN MWI 1924 2024 53314.524093\n", + " 87 NaN TZA 1998 2024 288410.737214\n", + " 88 NaN ZMB 0 2024 116148.100481\n", + " 89 NaN ZWE 0 2024 19066.854658\n", + " \n", + " [90 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 BDI 1980 2010 750.611471\n", + " 1 1.0 COD 1996 2010 2696.414391\n", + " 2 2.0 ETH 1974 2010 2085.562173\n", + " 3 3.0 KEN 1961 2010 96323.520528\n", + " 4 4.0 RWA 0 2010 1225.106614\n", + " .. ... ... ... ... ...\n", + " 115 NaN KEN 1961 2024 127625.022542\n", + " 116 NaN RWA 0 2024 6250.367548\n", + " 117 NaN SSD 1954 2024 5899.827929\n", + " 118 NaN TZA 0 2024 145506.796111\n", + " 119 NaN UGA 1967 2024 39176.037948\n", + " \n", + " [120 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 COD 1986 2010 0.262390\n", + " 1 1.0 ERI 0 2010 2993.690761\n", + " 2 2.0 ETH 0 2010 117115.368325\n", + " 3 3.0 KEN 0 2010 11004.382659\n", + " 4 4.0 SDN 2006 2010 11099.204330\n", + " .. ... ... ... ... ...\n", + " 85 NaN ERI 0 2024 2993.690761\n", + " 86 NaN ETH 0 2024 143660.111914\n", + " 87 NaN KEN 0 2024 20285.179723\n", + " 88 NaN SDN 2006 2024 11099.204330\n", + " 89 NaN SSD 1973 2024 96738.726105\n", + " \n", + " [90 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 EGY 1996 2010 56386.898432\n", + " 1 1.0 ERI 0 2010 12759.628180\n", + " 2 2.0 ETH 0 2010 2653.186540\n", + " 3 3.0 SAU 1990 2010 1464.513727\n", + " 4 4.0 SDN 0 2010 49166.604405\n", + " .. ... ... ... ... ...\n", + " 70 NaN EGY 1996 2024 56386.898432\n", + " 71 NaN ERI 0 2024 12759.628180\n", + " 72 NaN ETH 0 2024 2653.186540\n", + " 73 NaN SAU 1990 2024 1464.513727\n", + " 74 NaN SDN 0 2024 49627.813204\n", + " \n", + " [75 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 EGY 2007 2010 39308.328543\n", + " 1 1.0 ISR 2002 2010 5313.910350\n", + " 2 2.0 JOR 1979 2010 6199.923445\n", + " 3 3.0 LBN 1970 2010 184.757862\n", + " 4 4.0 PSE 2009 2010 963.537974\n", + " .. ... ... ... ... ...\n", + " 94 NaN JOR 1979 2024 119676.584813\n", + " 95 NaN LBN 1970 2024 210.024846\n", + " 96 NaN PSE 2009 2024 1450.519531\n", + " 97 NaN SAU 1983 2024 155373.056593\n", + " 98 NaN XAD None 2024 16.601905\n", + " \n", + " [99 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CYP 1990 2010 3145.500816\n", + " 1 1.0 GEO 2010 2010 2075.561159\n", + " 2 2.0 RUS 1971 2010 2821.923318\n", + " 3 3.0 TUR 1994 2010 1043.507925\n", + " 4 4.0 UKR 0 2010 550.988627\n", + " .. ... ... ... ... ...\n", + " 100 NaN RUS 1971 2024 2889.058004\n", + " 101 NaN TUR 1994 2024 1043.507925\n", + " 102 NaN UKR 0 2024 550.988627\n", + " 103 NaN XAD 2003 2024 107.808733\n", + " 104 NaN ZNC 1990 2024 2728.129545\n", + " \n", + " [105 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 BLR 0 2010 3344.092770\n", + " 1 1.0 MDA 0 2010 647.439044\n", + " 2 2.0 RUS 1991 2010 21707.656891\n", + " 3 3.0 UKR 0 2010 66898.379527\n", + " 4 NaN BLR 0 2011 3344.092770\n", + " 5 NaN MDA 0 2011 647.439044\n", + " 6 NaN RUS 1991 2011 21707.725119\n", + " 7 NaN UKR 0 2011 66898.379527\n", + " 8 NaN BLR 0 2012 3344.092770\n", + " 9 NaN MDA 0 2012 647.439044\n", + " 10 NaN RUS 1991 2012 21722.496189\n", + " 11 NaN UKR 0 2012 66898.379527\n", + " 12 NaN BLR 0 2013 3344.092770\n", + " 13 NaN MDA 0 2013 647.439044\n", + " 14 NaN RUS 1991 2013 21803.479362\n", + " 15 NaN UKR 0 2013 66902.623613\n", + " 16 NaN BLR 0 2014 3634.178413\n", + " 17 NaN MDA 0 2014 647.439044\n", + " 18 NaN RUS 1991 2014 21807.449203\n", + " 19 NaN UKR 0 2014 67192.709255\n", + " 20 NaN BLR 0 2015 3669.066160\n", + " 21 NaN MDA 0 2015 647.439044\n", + " 22 NaN RUS 1991 2015 21808.448689\n", + " 23 NaN UKR 0 2015 67192.709255\n", + " 24 NaN BLR 0 2016 3677.855285\n", + " 25 NaN MDA 0 2016 647.439044\n", + " 26 NaN RUS 1991 2016 21809.755125\n", + " 27 NaN UKR 0 2016 67196.211403\n", + " 28 NaN BLR 0 2017 3677.855285\n", + " 29 NaN MDA 0 2017 647.439044\n", + " 30 NaN RUS 1991 2017 21847.781149\n", + " 31 NaN UKR 0 2017 67196.211403\n", + " 32 NaN BLR 0 2018 3677.855285\n", + " 33 NaN MDA 0 2018 647.439044\n", + " 34 NaN RUS 1991 2018 21849.751192\n", + " 35 NaN UKR 0 2018 67196.211403\n", + " 36 NaN BLR 0 2019 3677.855285\n", + " 37 NaN MDA 0 2019 647.439044\n", + " 38 NaN RUS 1991 2019 21893.734645\n", + " 39 NaN UKR 0 2019 67196.211403\n", + " 40 NaN BLR 0 2020 3677.855285\n", + " 41 NaN MDA 0 2020 647.439044\n", + " 42 NaN RUS 1991 2020 21908.905095\n", + " 43 NaN UKR 0 2020 67196.211403\n", + " 44 NaN BLR 0 2021 3677.855285\n", + " 45 NaN MDA 0 2021 647.439044\n", + " 46 NaN RUS 1991 2021 21908.905095\n", + " 47 NaN UKR 0 2021 67196.211403\n", + " 48 NaN BLR 0 2022 3677.855285\n", + " 49 NaN MDA 0 2022 647.439044\n", + " 50 NaN RUS 1991 2022 21908.905095\n", + " 51 NaN UKR 0 2022 67196.211403\n", + " 52 NaN BLR 0 2023 3677.855285\n", + " 53 NaN MDA 0 2023 647.439044\n", + " 54 NaN RUS 1991 2023 21908.905095\n", + " 55 NaN UKR 0 2023 67196.211403\n", + " 56 NaN BLR 0 2024 3677.855285\n", + " 57 NaN MDA 0 2024 647.439044\n", + " 58 NaN RUS 1991 2024 21908.905095\n", + " 59 NaN UKR 0 2024 67196.211403,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 BLR 2001 2010 453.262627\n", + " 1 1.0 FIN 2006 2010 620.826504\n", + " 2 2.0 RUS 1991 2010 35799.762177\n", + " 3 NaN BLR 2001 2011 453.262627\n", + " 4 NaN FIN 2006 2011 624.648785\n", + " 5 NaN RUS 1991 2011 35831.290289\n", + " 6 NaN BLR 2001 2012 453.262627\n", + " 7 NaN FIN 2006 2012 629.966605\n", + " 8 NaN RUS 1991 2012 36603.260378\n", + " 9 NaN BLR 2001 2013 453.262627\n", + " 10 NaN FIN 2006 2013 631.702073\n", + " 11 NaN RUS 1991 2013 36721.254343\n", + " 12 NaN BLR 2001 2014 453.262627\n", + " 13 NaN FIN 2006 2014 633.182487\n", + " 14 NaN RUS 1991 2014 36893.519773\n", + " 15 NaN BLR 2001 2015 453.262627\n", + " 16 NaN FIN 2006 2015 904.257837\n", + " 17 NaN RUS 1991 2015 37051.844545\n", + " 18 NaN BLR 2001 2016 453.262627\n", + " 19 NaN FIN 2006 2016 905.025802\n", + " 20 NaN RUS 1991 2016 37090.098844\n", + " 21 NaN BLR 2001 2017 453.262627\n", + " 22 NaN FIN 2006 2017 908.901864\n", + " 23 NaN RUS 1991 2017 38216.811781\n", + " 24 NaN BLR 2001 2018 453.262627\n", + " 25 NaN FIN 2006 2018 911.236709\n", + " 26 NaN RUS 1991 2018 38406.954234\n", + " 27 NaN BLR 2001 2019 453.262627\n", + " 28 NaN FIN 2006 2019 911.949143\n", + " 29 NaN RUS 1991 2019 38575.807182\n", + " 30 NaN BLR 2001 2020 453.262627\n", + " 31 NaN FIN 2006 2020 919.846948\n", + " 32 NaN RUS 1991 2020 38578.721490\n", + " 33 NaN BLR 2001 2021 453.262627\n", + " 34 NaN FIN 2006 2021 995.114737\n", + " 35 NaN RUS 1991 2021 38588.532251\n", + " 36 NaN BLR 2001 2022 453.262627\n", + " 37 NaN FIN 2006 2022 996.174824\n", + " 38 NaN RUS 1991 2022 38588.532251\n", + " 39 NaN BLR 2001 2023 453.262627\n", + " 40 NaN FIN 2006 2023 996.206991\n", + " 41 NaN RUS 1991 2023 38588.532251\n", + " 42 NaN BLR 2001 2024 453.262627\n", + " 43 NaN FIN 2006 2024 996.206991\n", + " 44 NaN RUS 1991 2024 38588.532251,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 FIN 2005 2010 1077.539474\n", + " 1 1.0 NOR 1961 2010 1088.731022\n", + " 2 2.0 RUS 1996 2010 12743.063658\n", + " 3 NaN FIN 2005 2011 1077.539474\n", + " 4 NaN NOR 1961 2011 1088.731022\n", + " 5 NaN RUS 1996 2011 14159.459500\n", + " 6 NaN FIN 2005 2012 1077.670050\n", + " 7 NaN NOR 1961 2012 1088.731022\n", + " 8 NaN RUS 1996 2012 14159.459500\n", + " 9 NaN FIN 2005 2013 1077.670050\n", + " 10 NaN NOR 1961 2013 1088.731022\n", + " 11 NaN RUS 1996 2013 15976.011132\n", + " 12 NaN FIN 2005 2014 1114.156343\n", + " 13 NaN NOR 1961 2014 1088.731022\n", + " 14 NaN RUS 1996 2014 18301.007760\n", + " 15 NaN FIN 2005 2015 1114.685705\n", + " 16 NaN NOR 1961 2015 1088.731022\n", + " 17 NaN RUS 1996 2015 18659.551946\n", + " 18 NaN FIN 2005 2016 1114.685705\n", + " 19 NaN NOR 1961 2016 1088.731022\n", + " 20 NaN RUS 1996 2016 18659.551946\n", + " 21 NaN FIN 2005 2017 1114.685705\n", + " 22 NaN NOR 1961 2017 1088.731022\n", + " 23 NaN RUS 1996 2017 18659.551946\n", + " 24 NaN FIN 2005 2018 1114.685705\n", + " 25 NaN NOR 1961 2018 1088.731022\n", + " 26 NaN RUS 1996 2018 19557.120102\n", + " 27 NaN FIN 2005 2019 1114.685705\n", + " 28 NaN NOR 1961 2019 1088.731022\n", + " 29 NaN RUS 1996 2019 19557.120102\n", + " 30 NaN FIN 2005 2020 1114.685705\n", + " 31 NaN NOR 1961 2020 1088.731022\n", + " 32 NaN RUS 1996 2020 19557.120102\n", + " 33 NaN FIN 2005 2021 1115.738293\n", + " 34 NaN NOR 1961 2021 1088.731022\n", + " 35 NaN RUS 1996 2021 19557.120102\n", + " 36 NaN FIN 2005 2022 1115.738293\n", + " 37 NaN NOR 1961 2022 1088.731022\n", + " 38 NaN RUS 1996 2022 19557.120102\n", + " 39 NaN FIN 2005 2023 1115.738293\n", + " 40 NaN NOR 1961 2023 1088.731022\n", + " 41 NaN RUS 1996 2023 19557.120102\n", + " 42 NaN FIN 2005 2024 1115.738293\n", + " 43 NaN NOR 1961 2024 1088.731022\n", + " 44 NaN RUS 1996 2024 19557.120102,\n", + " index iso_3 STATUS_YR year area\n", + " 0 NaN NOR None 2014 673.538246\n", + " 1 NaN NOR None 2015 673.538246\n", + " 2 NaN NOR None 2016 673.538246\n", + " 3 NaN NOR None 2017 673.538246\n", + " 4 NaN NOR None 2018 673.538246\n", + " 5 NaN NOR None 2019 673.538246\n", + " 6 NaN NOR None 2020 673.538246\n", + " 7 NaN NOR None 2021 673.538246\n", + " 8 NaN NOR None 2022 673.538246\n", + " 9 NaN NOR None 2023 673.538246\n", + " 10 NaN NOR None 2024 673.538246,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 MDG 1962 2010 63.09996\n", + " 1 NaN MDG 1962 2011 63.09996\n", + " 2 NaN MDG 1962 2012 63.09996\n", + " 3 NaN MDG 1962 2013 63.09996\n", + " 4 NaN MDG 1962 2014 63.09996\n", + " 5 NaN MDG 1962 2015 63.09996\n", + " 6 NaN MDG 1962 2016 63.09996\n", + " 7 NaN MDG 1962 2017 63.09996\n", + " 8 NaN MDG 1962 2018 63.09996\n", + " 9 NaN MDG 1962 2019 63.09996\n", + " 10 NaN MDG 1962 2020 63.09996\n", + " 11 NaN MDG 1962 2021 63.09996\n", + " 12 NaN MDG 1962 2022 63.09996\n", + " 13 NaN MDG 1962 2023 63.09996\n", + " 14 NaN MDG 1962 2024 63.09996,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 MDG 2006 2010 64867.878909\n", + " 1 NaN MDG 2006 2011 64867.910002\n", + " 2 NaN MDG 2006 2012 65019.718565\n", + " 3 NaN MOZ None 2012 28.785323\n", + " 4 NaN MDG 2006 2013 65019.718565\n", + " 5 NaN MOZ None 2013 28.785323\n", + " 6 NaN MDG 2006 2014 65019.718565\n", + " 7 NaN MOZ None 2014 28.785323\n", + " 8 NaN MDG 2006 2015 65035.408090\n", + " 9 NaN MOZ None 2015 28.785323\n", + " 10 NaN MDG 2006 2016 65035.408090\n", + " 11 NaN MOZ None 2016 28.785323\n", + " 12 NaN MDG 2006 2017 65650.590024\n", + " 13 NaN MOZ None 2017 28.785323\n", + " 14 NaN MDG 2006 2018 65650.590024\n", + " 15 NaN MOZ None 2018 28.785323\n", + " 16 NaN MDG 2006 2019 65650.590024\n", + " 17 NaN MOZ None 2019 28.785323\n", + " 18 NaN MDG 2006 2020 65650.590024\n", + " 19 NaN MOZ None 2020 28.785323\n", + " 20 NaN MDG 2006 2021 65650.590024\n", + " 21 NaN MOZ None 2021 28.785323\n", + " 22 NaN MDG 2006 2022 65650.590024\n", + " 23 NaN MOZ None 2022 28.785323\n", + " 24 NaN MDG 2006 2023 65650.590024\n", + " 25 NaN MOZ None 2023 28.785323\n", + " 26 NaN MDG 2006 2024 65650.590024\n", + " 27 NaN MOZ None 2024 28.785323,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 COM 1995 2010 550.739576\n", + " 1 1.0 FRA 1997 2010 22.814663\n", + " 2 2.0 MDG 2007 2010 19820.998646\n", + " 3 3.0 MOZ 0 2010 6459.442913\n", + " 4 4.0 SYC 1982 2010 164.896769\n", + " .. ... ... ... ... ...\n", + " 85 NaN FRA 1997 2024 56.366128\n", + " 86 NaN MDG 2007 2024 19837.973574\n", + " 87 NaN MOZ 0 2024 6459.442913\n", + " 88 NaN SYC 1982 2024 164.896769\n", + " 89 NaN TZA 0 2024 320.269896\n", + " \n", + " [90 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 ETH 1989 2010 1964.799743\n", + " 1 1.0 KEN 1979 2010 4020.795869\n", + " 2 NaN ETH 1989 2011 1964.799743\n", + " 3 NaN KEN 1979 2011 4020.795869\n", + " 4 NaN ETH 1989 2012 1964.799743\n", + " 5 NaN KEN 1979 2012 4020.795869\n", + " 6 NaN ETH 1989 2013 1964.799743\n", + " 7 NaN KEN 1979 2013 4020.795869\n", + " 8 NaN ETH 1989 2014 1964.799743\n", + " 9 NaN KEN 1979 2014 4020.795869\n", + " 10 NaN ETH 1989 2015 1964.799743\n", + " 11 NaN KEN 1979 2015 4020.795869\n", + " 12 NaN ETH 1989 2016 1964.799743\n", + " 13 NaN KEN 1979 2016 4020.795869\n", + " 14 NaN ETH 1989 2017 5254.902045\n", + " 15 NaN KEN 1979 2017 8389.677184\n", + " 16 NaN ETH 1989 2018 5254.902045\n", + " 17 NaN KEN 1979 2018 8389.677184\n", + " 18 NaN ETH 1989 2019 5254.902045\n", + " 19 NaN KEN 1979 2019 8389.677184\n", + " 20 NaN ETH 1989 2020 5254.902045\n", + " 21 NaN KEN 1979 2020 8389.677184\n", + " 22 NaN ETH 1989 2021 5254.902045\n", + " 23 NaN KEN 1979 2021 8389.677184\n", + " 24 NaN ETH 1989 2022 5254.902045\n", + " 25 NaN KEN 1979 2022 8389.677184\n", + " 26 NaN ETH 1989 2023 5254.902045\n", + " 27 NaN KEN 1979 2023 8389.677184\n", + " 28 NaN ETH 1989 2024 5254.902045\n", + " 29 NaN KEN 1979 2024 8389.677184,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 DJI 2004 2010 92.468887\n", + " 1 1.0 ERI 0 2010 394.801098\n", + " 2 2.0 ETH 0 2010 75295.766764\n", + " 3 3.0 YEM 2008 2010 643.787321\n", + " 4 NaN DJI 2004 2011 92.468887\n", + " .. ... ... ... ... ...\n", + " 75 NaN ERI 0 2024 394.801098\n", + " 76 NaN ETH 0 2024 78460.515084\n", + " 77 NaN KEN None 2024 1300.021804\n", + " 78 NaN SOM None 2024 249.753799\n", + " 79 NaN YEM 2008 2024 1548.608904\n", + " \n", + " [80 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 ERI 0 2010 3470.128756\n", + " 1 1.0 SAU 1990 2010 44728.630430\n", + " 2 2.0 YEM 2005 2010 2627.459541\n", + " 3 NaN ERI 0 2011 3470.128756\n", + " 4 NaN SAU 1990 2011 44728.630430\n", + " 5 NaN YEM 2005 2011 2627.459541\n", + " 6 NaN ERI 0 2012 3470.128756\n", + " 7 NaN SAU 1990 2012 44728.630430\n", + " 8 NaN YEM 2005 2012 2627.459541\n", + " 9 NaN ERI 0 2013 3470.128756\n", + " 10 NaN SAU 1990 2013 44728.630430\n", + " 11 NaN YEM 2005 2013 2627.459541\n", + " 12 NaN ERI 0 2014 3470.128756\n", + " 13 NaN SAU 1990 2014 44728.630430\n", + " 14 NaN YEM 2005 2014 2627.459541\n", + " 15 NaN ERI 0 2015 3470.128756\n", + " 16 NaN SAU 1990 2015 44728.630430\n", + " 17 NaN YEM 2005 2015 2627.459541\n", + " 18 NaN ERI 0 2016 3470.128756\n", + " 19 NaN SAU 1990 2016 44728.630430\n", + " 20 NaN YEM 2005 2016 2627.459541\n", + " 21 NaN ERI 0 2017 3470.128756\n", + " 22 NaN SAU 1990 2017 44728.630430\n", + " 23 NaN YEM 2005 2017 2627.459541\n", + " 24 NaN ERI 0 2018 3470.128756\n", + " 25 NaN SAU 1990 2018 44728.630430\n", + " 26 NaN YEM 2005 2018 2627.459541\n", + " 27 NaN ERI 0 2019 3470.128756\n", + " 28 NaN SAU 1990 2019 44857.989662\n", + " 29 NaN YEM 2005 2019 2627.459541\n", + " 30 NaN ERI 0 2020 3470.128756\n", + " 31 NaN SAU 1990 2020 44857.989662\n", + " 32 NaN YEM 2005 2020 2627.459541\n", + " 33 NaN ERI 0 2021 3470.128756\n", + " 34 NaN SAU 1990 2021 44857.989662\n", + " 35 NaN YEM 2005 2021 2627.459541\n", + " 36 NaN ERI 0 2022 3470.128756\n", + " 37 NaN SAU 1990 2022 44857.989662\n", + " 38 NaN YEM 2005 2022 2627.459541\n", + " 39 NaN ERI 0 2023 3470.128756\n", + " 40 NaN SAU 1990 2023 64307.048668\n", + " 41 NaN YEM 2005 2023 30130.533535\n", + " 42 NaN ERI 0 2024 3470.128756\n", + " 43 NaN SAU 1990 2024 64307.048668\n", + " 44 NaN YEM 2005 2024 30130.533535,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 IRN 1975 2010 8945.800730\n", + " 1 1.0 IRQ 2007 2010 1374.830198\n", + " 2 2.0 KWT 2010 2010 1683.804605\n", + " 3 3.0 QAT 1990 2010 1343.934802\n", + " 4 4.0 SAU 2005 2010 14639.755878\n", + " .. ... ... ... ... ...\n", + " 77 NaN IRQ 2007 2024 16943.112592\n", + " 78 NaN JOR None 2024 20943.802682\n", + " 79 NaN KWT 2010 2024 3779.409610\n", + " 80 NaN QAT 1990 2024 1343.934802\n", + " 81 NaN SAU 2005 2024 163960.572040\n", + " \n", + " [82 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 ARM 0 2010 15473.390162\n", + " 1 1.0 AZE 1973 2010 31287.693829\n", + " 2 2.0 GEO 0 2010 29232.004525\n", + " 3 3.0 IRN 1976 2010 15481.122962\n", + " 4 4.0 IRQ 2000 2010 1385.826692\n", + " .. ... ... ... ... ...\n", + " 115 NaN IRN 1976 2024 15829.029610\n", + " 116 NaN IRQ 2000 2024 2640.500204\n", + " 117 NaN RUS 0 2024 32512.570327\n", + " 118 NaN TUR 0 2024 1854.501387\n", + " 119 NaN XCA 2002 2024 3406.417816\n", + " \n", + " [120 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 KAZ 1993 2010 12063.773830\n", + " 1 1.0 RUS 1987 2010 35995.533907\n", + " 2 2.0 UKR 0 2010 78.814164\n", + " 3 3.0 XCA 1976 2010 10621.817003\n", + " 4 NaN KAZ 1993 2011 12063.773830\n", + " 5 NaN RUS 1987 2011 36069.771707\n", + " 6 NaN UKR 0 2011 78.814164\n", + " 7 NaN XCA 1976 2011 10621.817003\n", + " 8 NaN KAZ 1993 2012 12063.773830\n", + " 9 NaN RUS 1987 2012 36090.083132\n", + " 10 NaN UKR 0 2012 78.814164\n", + " 11 NaN XCA 1976 2012 10621.817003\n", + " 12 NaN KAZ 1993 2013 12063.773830\n", + " 13 NaN RUS 1987 2013 36208.523553\n", + " 14 NaN UKR 0 2013 78.814164\n", + " 15 NaN XCA 1976 2013 10621.817003\n", + " 16 NaN KAZ 1993 2014 12063.773830\n", + " 17 NaN RUS 1987 2014 36236.076569\n", + " 18 NaN UKR 0 2014 78.814164\n", + " 19 NaN XCA 1976 2014 10621.817003\n", + " 20 NaN KAZ 1993 2015 12063.773830\n", + " 21 NaN RUS 1987 2015 36279.281059\n", + " 22 NaN UKR 0 2015 78.814164\n", + " 23 NaN XCA 1976 2015 10621.817003\n", + " 24 NaN KAZ 1993 2016 12063.773830\n", + " 25 NaN RUS 1987 2016 36279.281059\n", + " 26 NaN UKR 0 2016 78.814164\n", + " 27 NaN XCA 1976 2016 10621.817003\n", + " 28 NaN KAZ 1993 2017 12063.773830\n", + " 29 NaN RUS 1987 2017 36362.842456\n", + " 30 NaN UKR 0 2017 78.814164\n", + " 31 NaN XCA 1976 2017 10621.817003\n", + " 32 NaN KAZ 1993 2018 12063.773830\n", + " 33 NaN RUS 1987 2018 36362.842456\n", + " 34 NaN UKR 0 2018 78.814164\n", + " 35 NaN XCA 1976 2018 10621.817003\n", + " 36 NaN KAZ 1993 2019 12063.773830\n", + " 37 NaN RUS 1987 2019 36362.842456\n", + " 38 NaN UKR 0 2019 78.814164\n", + " 39 NaN XCA 1976 2019 10621.817003\n", + " 40 NaN KAZ 1993 2020 12063.773830\n", + " 41 NaN RUS 1987 2020 36386.989624\n", + " 42 NaN UKR 0 2020 78.814164\n", + " 43 NaN XCA 1976 2020 10621.817003\n", + " 44 NaN KAZ 1993 2021 12063.773830\n", + " 45 NaN RUS 1987 2021 36386.989624\n", + " 46 NaN UKR 0 2021 78.814164\n", + " 47 NaN XCA 1976 2021 10621.817003\n", + " 48 NaN KAZ 1993 2022 12063.773830\n", + " 49 NaN RUS 1987 2022 36386.989624\n", + " 50 NaN UKR 0 2022 78.814164\n", + " 51 NaN XCA 1976 2022 10621.817003\n", + " 52 NaN KAZ 1993 2023 12063.773830\n", + " 53 NaN RUS 1987 2023 36386.989624\n", + " 54 NaN UKR 0 2023 78.814164\n", + " 55 NaN XCA 1976 2023 10621.817003\n", + " 56 NaN KAZ 1993 2024 12063.773830\n", + " 57 NaN RUS 1987 2024 36386.989624\n", + " 58 NaN UKR 0 2024 78.814164\n", + " 59 NaN XCA 1976 2024 10621.817003,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 RUS 2008 2010 29915.104832\n", + " 1 NaN RUS 2008 2011 29987.409436\n", + " 2 NaN RUS 2008 2012 30043.565637\n", + " 3 NaN RUS 2008 2013 30147.153742\n", + " 4 NaN RUS 2008 2014 30578.598204\n", + " 5 NaN RUS 2008 2015 31360.182915\n", + " 6 NaN RUS 2008 2016 31360.491411\n", + " 7 NaN RUS 2008 2017 31445.332317\n", + " 8 NaN RUS 2008 2018 31566.397315\n", + " 9 NaN RUS 2008 2019 35075.380953\n", + " 10 NaN RUS 2008 2020 35075.380953\n", + " 11 NaN RUS 2008 2021 35075.380953\n", + " 12 NaN RUS 2008 2022 35075.380953\n", + " 13 NaN RUS 2008 2023 35075.380953\n", + " 14 NaN RUS 2008 2024 35075.380953,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 RUS 1998 2010 8691.733068\n", + " 1 NaN RUS 1998 2011 8739.875529\n", + " 2 NaN RUS 1998 2012 8739.875529\n", + " 3 NaN RUS 1998 2013 8739.875529\n", + " 4 NaN RUS 1998 2014 8739.875529\n", + " 5 NaN RUS 1998 2015 8739.875529\n", + " 6 NaN RUS 1998 2016 8739.875529\n", + " 7 NaN RUS 1998 2017 13764.411167\n", + " 8 NaN RUS 1998 2018 13764.411167\n", + " 9 NaN RUS 1998 2019 13764.411167\n", + " 10 NaN RUS 1998 2020 13764.411167\n", + " 11 NaN RUS 1998 2021 13764.411167\n", + " 12 NaN RUS 1998 2022 13764.411167\n", + " 13 NaN RUS 1998 2023 13764.411167\n", + " 14 NaN RUS 1998 2024 13764.411167,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 RUS 2009 2010 3011.242073\n", + " 1 NaN RUS 2009 2011 3011.242073\n", + " 2 NaN RUS 2009 2012 3011.242073\n", + " 3 NaN RUS 2009 2013 3011.242073\n", + " 4 NaN RUS 2009 2014 3011.242073\n", + " 5 NaN RUS 2009 2015 3011.242073\n", + " 6 NaN RUS 2009 2016 3011.242073\n", + " 7 NaN RUS 2009 2017 3011.242073\n", + " 8 NaN RUS 2009 2018 3011.242073\n", + " 9 NaN RUS 2009 2019 3011.242073\n", + " 10 NaN RUS 2009 2020 3011.242073\n", + " 11 NaN RUS 2009 2021 3011.242073\n", + " 12 NaN RUS 2009 2022 3011.242073\n", + " 13 NaN RUS 2009 2023 3011.242073\n", + " 14 NaN RUS 2009 2024 3011.242073,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 FRA 2008 2010 363.169222\n", + " 1 NaN FRA 2008 2011 363.169222\n", + " 2 NaN FRA 2008 2012 363.169222\n", + " 3 NaN FRA 2008 2013 363.169222\n", + " 4 NaN FRA 2008 2014 363.169222\n", + " 5 NaN FRA 2008 2015 363.169222\n", + " 6 NaN FRA 2008 2016 363.169222\n", + " 7 NaN FRA 2008 2017 363.169222\n", + " 8 NaN FRA 2008 2018 363.169222\n", + " 9 NaN FRA 2008 2019 363.169222\n", + " 10 NaN FRA 2008 2020 363.169222\n", + " 11 NaN FRA 2008 2021 363.169222\n", + " 12 NaN FRA 2008 2022 363.169222\n", + " 13 NaN FRA 2008 2023 363.169222\n", + " 14 NaN FRA 2008 2024 363.169222,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 FRA 1987 2010 1712.183093\n", + " 1 1.0 MDG 2007 2010 1625.634495\n", + " 2 2.0 MUS 1994 2010 74.353935\n", + " 3 NaN FRA 1987 2011 1712.183266\n", + " 4 NaN MDG 2007 2011 1625.634495\n", + " 5 NaN MUS 1994 2011 78.297017\n", + " 6 NaN FRA 1987 2012 1712.183266\n", + " 7 NaN MDG 2007 2012 1625.634495\n", + " 8 NaN MUS 1994 2012 78.297017\n", + " 9 NaN FRA 1987 2013 1712.183266\n", + " 10 NaN MDG 2007 2013 1625.634495\n", + " 11 NaN MUS 1994 2013 78.297017\n", + " 12 NaN FRA 1987 2014 1712.186620\n", + " 13 NaN MDG 2007 2014 1625.634495\n", + " 14 NaN MUS 1994 2014 78.297017\n", + " 15 NaN FRA 1987 2015 1712.186620\n", + " 16 NaN MDG 2007 2015 1625.634495\n", + " 17 NaN MUS 1994 2015 78.297017\n", + " 18 NaN FRA 1987 2016 1712.668584\n", + " 19 NaN MDG 2007 2016 1625.634495\n", + " 20 NaN MUS 1994 2016 78.297017\n", + " 21 NaN FRA 1987 2017 1712.668584\n", + " 22 NaN MDG 2007 2017 1625.634495\n", + " 23 NaN MUS 1994 2017 78.297017\n", + " 24 NaN FRA 1987 2018 1712.668584\n", + " 25 NaN MDG 2007 2018 1625.634495\n", + " 26 NaN MUS 1994 2018 78.297017\n", + " 27 NaN FRA 1987 2019 1712.918211\n", + " 28 NaN MDG 2007 2019 1625.634495\n", + " 29 NaN MUS 1994 2019 78.297017\n", + " 30 NaN FRA 1987 2020 1712.918703\n", + " 31 NaN MDG 2007 2020 1625.634495\n", + " 32 NaN MUS 1994 2020 78.297017\n", + " 33 NaN FRA 1987 2021 1712.918703\n", + " 34 NaN MDG 2007 2021 1625.634495\n", + " 35 NaN MUS 1994 2021 78.297017\n", + " 36 NaN FRA 1987 2022 1712.918703\n", + " 37 NaN MDG 2007 2022 1625.634495\n", + " 38 NaN MUS 1994 2022 78.297017\n", + " 39 NaN FRA 1987 2023 1712.918703\n", + " 40 NaN MDG 2007 2023 1625.634495\n", + " 41 NaN MUS 1994 2023 78.297017\n", + " 42 NaN FRA 1987 2024 1712.918703\n", + " 43 NaN MDG 2007 2024 1625.634495\n", + " 44 NaN MUS 1994 2024 78.297017,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 MDG 2007 2010 1122.397658\n", + " 1 NaN MDG 2007 2011 1122.397658\n", + " 2 NaN MDG 2007 2012 1122.397658\n", + " 3 NaN MDG 2007 2013 1122.397658\n", + " 4 NaN MDG 2007 2014 1122.397658\n", + " 5 NaN MDG 2007 2015 1122.397658\n", + " 6 NaN MDG 2007 2016 1122.397658\n", + " 7 NaN MDG 2007 2017 1122.397658\n", + " 8 NaN MDG 2007 2018 1122.397658\n", + " 9 NaN MDG 2007 2019 1122.397658\n", + " 10 NaN MDG 2007 2020 1122.397658\n", + " 11 NaN MDG 2007 2021 1122.397658\n", + " 12 NaN MDG 2007 2022 1122.397658\n", + " 13 NaN MDG 2007 2023 1122.397658\n", + " 14 NaN MDG 2007 2024 1122.397658,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 SYC 0 2010 58.911705\n", + " 1 NaN SYC 0 2011 58.911705\n", + " 2 NaN SYC 0 2012 58.911705\n", + " 3 NaN SYC 0 2013 58.911705\n", + " 4 NaN SYC 0 2014 58.911705\n", + " 5 NaN SYC 0 2015 58.911705\n", + " 6 NaN SYC 0 2016 58.911705\n", + " 7 NaN SYC 0 2017 58.911705\n", + " 8 NaN SYC 0 2018 58.911705\n", + " 9 NaN SYC 0 2019 58.911705\n", + " 10 NaN SYC 0 2020 58.911705\n", + " 11 NaN SYC 0 2021 58.911705\n", + " 12 NaN SYC 0 2022 58.911705\n", + " 13 NaN SYC 0 2023 58.911705\n", + " 14 NaN SYC 0 2024 58.911705,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 YEM 2008 2010 3794.335441\n", + " 1 NaN YEM 2008 2011 3794.335441\n", + " 2 NaN YEM 2008 2012 3794.335441\n", + " 3 NaN YEM 2008 2013 3794.335441\n", + " 4 NaN YEM 2008 2014 3794.335441\n", + " 5 NaN YEM 2008 2015 3794.335441\n", + " 6 NaN YEM 2008 2016 3794.335441\n", + " 7 NaN YEM 2008 2017 3794.335441\n", + " 8 NaN YEM 2008 2018 3794.335441\n", + " 9 NaN YEM 2008 2019 3794.335441\n", + " 10 NaN YEM 2008 2020 3794.335441\n", + " 11 NaN YEM 2008 2021 3794.335441\n", + " 12 NaN YEM 2008 2022 3794.335441\n", + " 13 NaN YEM 2008 2023 3794.335441\n", + " 14 NaN YEM 2008 2024 3794.335441,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 ARE 2001 2010 1057.677191\n", + " 1 1.0 OMN 2005 2010 66858.412225\n", + " 2 2.0 QAT 1990 2010 935.663641\n", + " 3 3.0 SAU 1990 2010 58964.674645\n", + " 4 4.0 YEM 2005 2010 295.813829\n", + " .. ... ... ... ... ...\n", + " 70 NaN ARE 2001 2024 13334.353617\n", + " 71 NaN OMN 2005 2024 124996.470746\n", + " 72 NaN QAT 1990 2024 935.663641\n", + " 73 NaN SAU 1990 2024 58964.674645\n", + " 74 NaN YEM 2005 2024 583.557920\n", + " \n", + " [75 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 ARE 2007 2010 615.583167\n", + " 1 1.0 IRN 2001 2010 47154.860010\n", + " 2 2.0 QAT 1990 2010 5643.199907\n", + " 3 3.0 SAU 1990 2010 5345.106292\n", + " 4 NaN ARE 2007 2011 615.583167\n", + " .. ... ... ... ... ...\n", + " 58 NaN ARE 2007 2024 1654.158313\n", + " 59 NaN IRN 2001 2024 69828.702698\n", + " 60 NaN OMN None 2024 1203.221823\n", + " 61 NaN QAT 1990 2024 5643.199907\n", + " 62 NaN SAU 1990 2024 5345.106292\n", + " \n", + " [63 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 AZE 1990 2010 1091.737709\n", + " 1 1.0 IRN 1982 2010 37896.008639\n", + " 2 2.0 KAZ 2001 2010 17764.612740\n", + " 3 3.0 TKM 1932 2010 19893.479144\n", + " 4 4.0 UZB 1979 2010 8107.743412\n", + " .. ... ... ... ... ...\n", + " 85 NaN IRN 1982 2024 37896.008639\n", + " 86 NaN KAZ 2001 2024 43119.566145\n", + " 87 NaN TKM 1932 2024 39749.981473\n", + " 88 NaN UZB 1979 2024 28979.622727\n", + " 89 NaN XCA 1975 2024 4621.228585\n", + " \n", + " [90 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 KAZ 1939 2010 4089.586240\n", + " 1 1.0 RUS 1998 2010 9613.714217\n", + " 2 2.0 XCA 2001 2010 2025.360005\n", + " 3 NaN KAZ 1939 2011 4089.586240\n", + " 4 NaN RUS 1998 2011 9616.372397\n", + " 5 NaN XCA 2001 2011 2025.360005\n", + " 6 NaN KAZ 1939 2012 4089.586240\n", + " 7 NaN RUS 1998 2012 9616.372397\n", + " 8 NaN XCA 2001 2012 2025.360005\n", + " 9 NaN KAZ 1939 2013 4089.586240\n", + " 10 NaN RUS 1998 2013 9620.441891\n", + " 11 NaN XCA 2001 2013 2025.360005\n", + " 12 NaN KAZ 1939 2014 4089.586240\n", + " 13 NaN RUS 1998 2014 9687.395527\n", + " 14 NaN XCA 2001 2014 2025.360005\n", + " 15 NaN KAZ 1939 2015 4089.586240\n", + " 16 NaN RUS 1998 2015 9687.395527\n", + " 17 NaN XCA 2001 2015 2025.360005\n", + " 18 NaN KAZ 1939 2016 10073.025285\n", + " 19 NaN RUS 1998 2016 9700.404021\n", + " 20 NaN UZB None 2016 5983.439045\n", + " 21 NaN XCA 2001 2016 2025.360005\n", + " 22 NaN KAZ 1939 2017 10073.025285\n", + " 23 NaN RUS 1998 2017 9700.404021\n", + " 24 NaN UZB None 2017 5983.439045\n", + " 25 NaN XCA 2001 2017 2025.360005\n", + " 26 NaN KAZ 1939 2018 11128.276122\n", + " 27 NaN RUS 1998 2018 11161.240981\n", + " 28 NaN UZB None 2018 5983.439045\n", + " 29 NaN XCA 2001 2018 2025.360005\n", + " 30 NaN KAZ 1939 2019 11128.276122\n", + " 31 NaN RUS 1998 2019 11338.979760\n", + " 32 NaN UZB None 2019 5983.439045\n", + " 33 NaN XCA 2001 2019 2025.360005\n", + " 34 NaN KAZ 1939 2020 11128.276122\n", + " 35 NaN RUS 1998 2020 11338.979760\n", + " 36 NaN UZB None 2020 5983.439045\n", + " 37 NaN XCA 2001 2020 2025.360005\n", + " 38 NaN KAZ 1939 2021 11128.276122\n", + " 39 NaN RUS 1998 2021 11338.979760\n", + " 40 NaN UZB None 2021 5983.439045\n", + " 41 NaN XCA 2001 2021 2025.360005\n", + " 42 NaN KAZ 1939 2022 11128.276122\n", + " 43 NaN RUS 1998 2022 11338.979760\n", + " 44 NaN UZB None 2022 5983.439045\n", + " 45 NaN XCA 2001 2022 2025.360005\n", + " 46 NaN KAZ 1939 2023 11271.902370\n", + " 47 NaN RUS 1998 2023 11338.979760\n", + " 48 NaN TKM None 2023 6325.657796\n", + " 49 NaN UZB None 2023 6341.499284\n", + " 50 NaN XCA 2001 2023 2025.360005\n", + " 51 NaN KAZ 1939 2024 11271.902370\n", + " 52 NaN RUS 1998 2024 11338.979760\n", + " 53 NaN TKM None 2024 6325.657796\n", + " 54 NaN UZB None 2024 6341.499284\n", + " 55 NaN XCA 2001 2024 2025.360005,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 RUS 1991 2010 60127.141915\n", + " 1 NaN RUS 1991 2011 60129.618852\n", + " 2 NaN RUS 1991 2012 60129.692191\n", + " 3 NaN RUS 1991 2013 61431.802468\n", + " 4 NaN RUS 1991 2014 61431.802468\n", + " 5 NaN RUS 1991 2015 61431.802468\n", + " 6 NaN RUS 1991 2016 61431.802468\n", + " 7 NaN RUS 1991 2017 61446.996676\n", + " 8 NaN RUS 1991 2018 61446.996676\n", + " 9 NaN RUS 1991 2019 62210.338140\n", + " 10 NaN RUS 1991 2020 62210.338140\n", + " 11 NaN RUS 1991 2021 62210.338140\n", + " 12 NaN RUS 1991 2022 62210.338140\n", + " 13 NaN RUS 1991 2023 62210.338140\n", + " 14 NaN RUS 1991 2024 62210.338140,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 RUS 1995 2010 21497.600323\n", + " 1 NaN RUS 1995 2011 21497.600323\n", + " 2 NaN RUS 1995 2012 21497.600323\n", + " 3 NaN RUS 1995 2013 21497.600323\n", + " 4 NaN RUS 1995 2014 21497.600323\n", + " 5 NaN RUS 1995 2015 21497.600323\n", + " 6 NaN RUS 1995 2016 21497.600323\n", + " 7 NaN RUS 1995 2017 22561.900496\n", + " 8 NaN RUS 1995 2018 22561.900496\n", + " 9 NaN RUS 1995 2019 22561.900496\n", + " 10 NaN RUS 1995 2020 22561.900496\n", + " 11 NaN RUS 1995 2021 22561.900496\n", + " 12 NaN RUS 1995 2022 22561.900496\n", + " 13 NaN RUS 1995 2023 22561.900496\n", + " 14 NaN RUS 1995 2024 22561.900496,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 RUS 2009 2010 8128.303337\n", + " 1 NaN RUS 2009 2011 8128.303337\n", + " 2 NaN RUS 2009 2012 8128.303337\n", + " 3 NaN RUS 2009 2013 8128.303337\n", + " 4 NaN RUS 2009 2014 8128.303337\n", + " 5 NaN RUS 2009 2015 8128.303337\n", + " 6 NaN RUS 2009 2016 8128.303337\n", + " 7 NaN RUS 2009 2017 8128.303337\n", + " 8 NaN RUS 2009 2018 8128.303337\n", + " 9 NaN RUS 2009 2019 8128.303337\n", + " 10 NaN RUS 2009 2020 8128.303337\n", + " 11 NaN RUS 2009 2021 8128.303337\n", + " 12 NaN RUS 2009 2022 8128.303337\n", + " 13 NaN RUS 2009 2023 8128.303337\n", + " 14 NaN RUS 2009 2024 8128.303337,\n", + " index iso_3 STATUS_YR year area\n", + " 0 NaN ATA None 2015 8.110072\n", + " 1 NaN ATA None 2016 8.110072\n", + " 2 NaN ATA None 2017 8.110072\n", + " 3 NaN ATA None 2018 8.110072\n", + " 4 NaN ATA None 2019 8.110072\n", + " 5 NaN ATA None 2020 8.110072\n", + " 6 NaN ATA None 2021 8.110072\n", + " 7 NaN ATA None 2022 8.110072\n", + " 8 NaN ATA None 2023 8.110072\n", + " 9 NaN ATA None 2024 8.110072,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 FRA 2008 2010 5729.376267\n", + " 1 NaN FRA 2008 2011 5729.376267\n", + " 2 NaN FRA 2008 2012 5729.376267\n", + " 3 NaN FRA 2008 2013 5729.376267\n", + " 4 NaN FRA 2008 2014 5729.376267\n", + " 5 NaN FRA 2008 2015 5729.376267\n", + " 6 NaN FRA 2008 2016 5729.376267\n", + " 7 NaN FRA 2008 2017 5729.376267\n", + " 8 NaN FRA 2008 2018 5729.376267\n", + " 9 NaN FRA 2008 2019 5729.376267\n", + " 10 NaN FRA 2008 2020 5729.376267\n", + " 11 NaN FRA 2008 2021 5729.376267\n", + " 12 NaN FRA 2008 2022 5729.376267\n", + " 13 NaN FRA 2008 2023 5729.376267\n", + " 14 NaN FRA 2008 2024 5729.376267,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 IND 1980 2010 3956.214463\n", + " 1 1.0 PAK 1977 2010 5452.806935\n", + " 2 NaN IND 1980 2011 3956.214463\n", + " 3 NaN PAK 1977 2011 5452.806935\n", + " 4 NaN IND 1980 2012 3956.214463\n", + " 5 NaN PAK 1977 2012 5452.806935\n", + " 6 NaN IND 1980 2013 3956.214463\n", + " 7 NaN PAK 1977 2013 5452.806935\n", + " 8 NaN IND 1980 2014 3956.214463\n", + " 9 NaN PAK 1977 2014 5452.806935\n", + " 10 NaN IND 1980 2015 3956.214463\n", + " 11 NaN PAK 1977 2015 5452.806935\n", + " 12 NaN IND 1980 2016 3956.214463\n", + " 13 NaN PAK 1977 2016 5452.806935\n", + " 14 NaN IND 1980 2017 3956.214463\n", + " 15 NaN PAK 1977 2017 5452.806935\n", + " 16 NaN IND 1980 2018 3956.214463\n", + " 17 NaN PAK 1977 2018 5452.806935\n", + " 18 NaN IND 1980 2019 3956.214463\n", + " 19 NaN PAK 1977 2019 5452.806935\n", + " 20 NaN IND 1980 2020 3956.214463\n", + " 21 NaN PAK 1977 2020 5452.806935\n", + " 22 NaN IND 1980 2021 3956.214463\n", + " 23 NaN PAK 1977 2021 5452.806935\n", + " 24 NaN IND 1980 2022 3956.214463\n", + " 25 NaN PAK 1977 2022 5452.806935\n", + " 26 NaN IND 1980 2023 3956.214463\n", + " 27 NaN PAK 1977 2023 5452.806935\n", + " 28 NaN IND 1980 2024 3956.214463\n", + " 29 NaN PAK 1977 2024 5452.806935,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 AFG 0 2010 26345.572192\n", + " 1 1.0 IND 1980 2010 9788.490440\n", + " 2 2.0 IRN 0 2010 9149.236722\n", + " 3 3.0 PAK 1977 2010 50105.112359\n", + " 4 NaN AFG 0 2011 26345.572192\n", + " 5 NaN IND 1980 2011 9788.490440\n", + " 6 NaN IRN 0 2011 9149.236722\n", + " 7 NaN PAK 1977 2011 50105.112359\n", + " 8 NaN AFG 0 2012 26345.572192\n", + " 9 NaN IND 1980 2012 9788.490440\n", + " 10 NaN IRN 0 2012 9149.236722\n", + " 11 NaN PAK 1977 2012 50105.112359\n", + " 12 NaN AFG 0 2013 26345.572192\n", + " 13 NaN IND 1980 2013 9788.490440\n", + " 14 NaN IRN 0 2013 9149.236722\n", + " 15 NaN PAK 1977 2013 52058.046768\n", + " 16 NaN AFG 0 2014 26345.572192\n", + " 17 NaN IND 1980 2014 9788.490440\n", + " 18 NaN IRN 0 2014 9149.236722\n", + " 19 NaN PAK 1977 2014 52058.046768\n", + " 20 NaN AFG 0 2015 26345.572192\n", + " 21 NaN IND 1980 2015 9788.490440\n", + " 22 NaN IRN 0 2015 9149.236722\n", + " 23 NaN PAK 1977 2015 52058.046768\n", + " 24 NaN AFG 0 2016 26345.572192\n", + " 25 NaN IND 1980 2016 9788.490440\n", + " 26 NaN IRN 0 2016 9149.236722\n", + " 27 NaN PAK 1977 2016 52058.046768\n", + " 28 NaN AFG 0 2017 26347.234546\n", + " 29 NaN IND 1980 2017 9788.490440\n", + " 30 NaN IRN 0 2017 9149.236722\n", + " 31 NaN PAK 1977 2017 52300.314888\n", + " 32 NaN AFG 0 2018 26347.234546\n", + " 33 NaN IND 1980 2018 9788.490440\n", + " 34 NaN IRN 0 2018 9149.236722\n", + " 35 NaN PAK 1977 2018 52300.314888\n", + " 36 NaN AFG 0 2019 26347.234546\n", + " 37 NaN IND 1980 2019 9788.490440\n", + " 38 NaN IRN 0 2019 9149.236722\n", + " 39 NaN PAK 1977 2019 52300.314888\n", + " 40 NaN AFG 0 2020 26722.580143\n", + " 41 NaN IND 1980 2020 9788.490440\n", + " 42 NaN IRN 0 2020 9149.236722\n", + " 43 NaN PAK 1977 2020 52300.314888\n", + " 44 NaN AFG 0 2021 26722.580143\n", + " 45 NaN IND 1980 2021 9788.490440\n", + " 46 NaN IRN 0 2021 9149.236722\n", + " 47 NaN PAK 1977 2021 53781.048815\n", + " 48 NaN AFG 0 2022 26722.580143\n", + " 49 NaN IND 1980 2022 9788.490440\n", + " 50 NaN IRN 0 2022 9149.236722\n", + " 51 NaN PAK 1977 2022 53781.048815\n", + " 52 NaN AFG 0 2023 26722.580143\n", + " 53 NaN IND 1980 2023 9788.490440\n", + " 54 NaN IRN 0 2023 9149.236722\n", + " 55 NaN PAK 1977 2023 53781.048815\n", + " 56 NaN AFG 0 2024 26722.580143\n", + " 57 NaN IND 1980 2024 9788.490440\n", + " 58 NaN IRN 0 2024 9149.236722\n", + " 59 NaN PAK 1977 2024 53781.048815,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 AFG 0 2010 9832.571494\n", + " 1 1.0 IRN 0 2010 10608.023619\n", + " 2 2.0 KAZ 2001 2010 37250.708167\n", + " 3 3.0 KGZ 1990 2010 453.717954\n", + " 4 4.0 TJK 1938 2010 3164.465852\n", + " .. ... ... ... ... ...\n", + " 100 NaN KAZ 2001 2024 40903.639828\n", + " 101 NaN KGZ 1990 2024 894.843500\n", + " 102 NaN TJK 1938 2024 4634.711198\n", + " 103 NaN TKM 0 2024 14665.597690\n", + " 104 NaN UZB 1986 2024 46313.148971\n", + " \n", + " [105 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 KAZ 2005 2010 64904.432174\n", + " 1 1.0 RUS 1998 2010 3695.223049\n", + " 2 2.0 UZB 2005 2010 29203.092633\n", + " 3 NaN KAZ 2005 2011 64904.432174\n", + " 4 NaN RUS 1998 2011 3705.562018\n", + " 5 NaN UZB 2005 2011 29203.092633\n", + " 6 NaN KAZ 2005 2012 69787.271981\n", + " 7 NaN RUS 1998 2012 3705.562018\n", + " 8 NaN UZB 2005 2012 29203.092633\n", + " 9 NaN KAZ 2005 2013 69787.271981\n", + " 10 NaN RUS 1998 2013 3705.562018\n", + " 11 NaN UZB 2005 2013 29203.092633\n", + " 12 NaN KAZ 2005 2014 69787.271981\n", + " 13 NaN RUS 1998 2014 3705.562018\n", + " 14 NaN UZB 2005 2014 29203.092633\n", + " 15 NaN KAZ 2005 2015 69787.271981\n", + " 16 NaN RUS 1998 2015 3705.562018\n", + " 17 NaN UZB 2005 2015 29203.092633\n", + " 18 NaN KAZ 2005 2016 69787.271981\n", + " 19 NaN RUS 1998 2016 3705.562018\n", + " 20 NaN UZB 2005 2016 29203.092633\n", + " 21 NaN KAZ 2005 2017 69787.271981\n", + " 22 NaN RUS 1998 2017 3705.562018\n", + " 23 NaN UZB 2005 2017 29203.092633\n", + " 24 NaN KAZ 2005 2018 69787.271981\n", + " 25 NaN RUS 1998 2018 3705.562018\n", + " 26 NaN UZB 2005 2018 29203.092633\n", + " 27 NaN KAZ 2005 2019 69787.271981\n", + " 28 NaN RUS 1998 2019 3705.562018\n", + " 29 NaN UZB 2005 2019 29203.092633\n", + " 30 NaN KAZ 2005 2020 69787.271981\n", + " 31 NaN RUS 1998 2020 3705.562018\n", + " 32 NaN UZB 2005 2020 29203.092633\n", + " 33 NaN KAZ 2005 2021 69787.271981\n", + " 34 NaN RUS 1998 2021 3705.562018\n", + " 35 NaN UZB 2005 2021 29203.092633\n", + " 36 NaN KAZ 2005 2022 69787.271981\n", + " 37 NaN RUS 1998 2022 3705.562018\n", + " 38 NaN UZB 2005 2022 29203.092633\n", + " 39 NaN KAZ 2005 2023 70963.588974\n", + " 40 NaN RUS 1998 2023 3705.562018\n", + " 41 NaN TKM None 2023 1262.552974\n", + " 42 NaN UZB 2005 2023 30465.645607\n", + " 43 NaN KAZ 2005 2024 70963.588974\n", + " 44 NaN RUS 1998 2024 3705.562018\n", + " 45 NaN TKM None 2024 1262.552974\n", + " 46 NaN UZB 2005 2024 30465.645607,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 KAZ 2001 2010 11745.679197\n", + " 1 1.0 RUS 1920 2010 46521.777212\n", + " 2 NaN KAZ 2001 2011 11745.679197\n", + " 3 NaN RUS 1920 2011 46521.777212\n", + " 4 NaN KAZ 2001 2012 11745.679197\n", + " 5 NaN RUS 1920 2012 46640.671436\n", + " 6 NaN KAZ 2001 2013 11745.679197\n", + " 7 NaN RUS 1920 2013 46796.752336\n", + " 8 NaN KAZ 2001 2014 11745.679197\n", + " 9 NaN RUS 1920 2014 46796.752336\n", + " 10 NaN KAZ 2001 2015 11745.679197\n", + " 11 NaN RUS 1920 2015 46797.601381\n", + " 12 NaN KAZ 2001 2016 11745.679197\n", + " 13 NaN RUS 1920 2016 46900.052142\n", + " 14 NaN KAZ 2001 2017 11745.679197\n", + " 15 NaN RUS 1920 2017 46900.618225\n", + " 16 NaN KAZ 2001 2018 11745.679197\n", + " 17 NaN RUS 1920 2018 46912.950789\n", + " 18 NaN KAZ 2001 2019 11745.679197\n", + " 19 NaN RUS 1920 2019 46913.108777\n", + " 20 NaN KAZ 2001 2020 11745.679197\n", + " 21 NaN RUS 1920 2020 46913.108777\n", + " 22 NaN KAZ 2001 2021 11745.679197\n", + " 23 NaN RUS 1920 2021 46913.108777\n", + " 24 NaN KAZ 2001 2022 11745.679197\n", + " 25 NaN RUS 1920 2022 46913.108777\n", + " 26 NaN KAZ 2001 2023 11745.679197\n", + " 27 NaN RUS 1920 2023 46913.108777\n", + " 28 NaN KAZ 2001 2024 11745.679197\n", + " 29 NaN RUS 1920 2024 46913.108777,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 RUS 1995 2010 29131.573164\n", + " 1 NaN RUS 1995 2011 29131.573164\n", + " 2 NaN RUS 1995 2012 29131.573164\n", + " 3 NaN RUS 1995 2013 29131.573164\n", + " 4 NaN RUS 1995 2014 32207.627293\n", + " 5 NaN RUS 1995 2015 32207.627293\n", + " 6 NaN RUS 1995 2016 32207.627293\n", + " 7 NaN RUS 1995 2017 35102.542194\n", + " 8 NaN RUS 1995 2018 38387.077289\n", + " 9 NaN RUS 1995 2019 38387.077289\n", + " 10 NaN RUS 1995 2020 38387.077289\n", + " 11 NaN RUS 1995 2021 38387.077289\n", + " 12 NaN RUS 1995 2022 38387.077289\n", + " 13 NaN RUS 1995 2023 38387.077289\n", + " 14 NaN RUS 1995 2024 38387.077289,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 RUS 2009 2010 10629.933442\n", + " 1 NaN RUS 2009 2011 10629.933442\n", + " 2 NaN RUS 2009 2012 10629.933442\n", + " 3 NaN RUS 2009 2013 10629.933442\n", + " 4 NaN RUS 2009 2014 10629.933442\n", + " 5 NaN RUS 2009 2015 10629.933442\n", + " 6 NaN RUS 2009 2016 10629.933442\n", + " 7 NaN RUS 2009 2017 10629.933442\n", + " 8 NaN RUS 2009 2018 10629.933442\n", + " 9 NaN RUS 2009 2019 10629.933442\n", + " 10 NaN RUS 2009 2020 10629.933442\n", + " 11 NaN RUS 2009 2021 10629.933442\n", + " 12 NaN RUS 2009 2022 10629.933442\n", + " 13 NaN RUS 2009 2023 10629.933442\n", + " 14 NaN RUS 2009 2024 10629.933442,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 ATA 2008 2010 101.983765\n", + " 1 NaN ATA 2008 2011 101.983765\n", + " 2 NaN ATA 2008 2012 101.983765\n", + " 3 NaN ATA 2008 2013 101.983765\n", + " 4 NaN ATA 2008 2014 151.508118\n", + " 5 NaN ATA 2008 2015 151.508118\n", + " 6 NaN ATA 2008 2016 151.508118\n", + " 7 NaN ATA 2008 2017 151.508118\n", + " 8 NaN ATA 2008 2018 151.508118\n", + " 9 NaN ATA 2008 2019 151.508118\n", + " 10 NaN ATA 2008 2020 151.508118\n", + " 11 NaN ATA 2008 2021 151.508118\n", + " 12 NaN ATA 2008 2022 151.508118\n", + " 13 NaN ATA 2008 2023 151.508118\n", + " 14 NaN ATA 2008 2024 151.508118,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 FRA 2008 2010 1543.957569\n", + " 1 NaN FRA 2008 2011 1543.957569\n", + " 2 NaN FRA 2008 2012 1543.957569\n", + " 3 NaN FRA 2008 2013 1543.957569\n", + " 4 NaN FRA 2008 2014 1543.957569\n", + " 5 NaN FRA 2008 2015 1543.957569\n", + " 6 NaN FRA 2008 2016 1543.957569\n", + " 7 NaN FRA 2008 2017 1543.957569\n", + " 8 NaN FRA 2008 2018 1543.957569\n", + " 9 NaN FRA 2008 2019 1543.957569\n", + " 10 NaN FRA 2008 2020 1543.957569\n", + " 11 NaN FRA 2008 2021 1543.957569\n", + " 12 NaN FRA 2008 2022 1543.957569\n", + " 13 NaN FRA 2008 2023 1543.957569\n", + " 14 NaN FRA 2008 2024 1543.957569,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 FRA 2008 2010 64.280415\n", + " 1 NaN FRA 2008 2011 64.280415\n", + " 2 NaN FRA 2008 2012 64.280415\n", + " 3 NaN FRA 2008 2013 64.280415\n", + " 4 NaN FRA 2008 2014 64.280415\n", + " 5 NaN FRA 2008 2015 64.280415\n", + " 6 NaN FRA 2008 2016 64.280415\n", + " 7 NaN FRA 2008 2017 64.280415\n", + " 8 NaN FRA 2008 2018 64.280415\n", + " 9 NaN FRA 2008 2019 64.280415\n", + " 10 NaN FRA 2008 2020 64.280415\n", + " 11 NaN FRA 2008 2021 64.280415\n", + " 12 NaN FRA 2008 2022 64.280415\n", + " 13 NaN FRA 2008 2023 64.280415\n", + " 14 NaN FRA 2008 2024 64.280415,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 MDV 2004 2010 1.180256\n", + " 1 NaN MDV 2004 2011 1.180256\n", + " 2 NaN MDV 2004 2012 1.180256\n", + " 3 NaN MDV 2004 2013 1.180256\n", + " 4 NaN MDV 2004 2014 1.180256\n", + " 5 NaN MDV 2004 2015 1.180256\n", + " 6 NaN MDV 2004 2016 1.180256\n", + " 7 NaN MDV 2004 2017 1.180256\n", + " 8 NaN MDV 2004 2018 1.180256\n", + " 9 NaN MDV 2004 2019 1.180256\n", + " 10 NaN MDV 2004 2020 2.055685\n", + " 11 NaN MDV 2004 2021 2.055685\n", + " 12 NaN MDV 2004 2022 2.055685\n", + " 13 NaN MDV 2004 2023 2.055685\n", + " 14 NaN MDV 2004 2024 2.055685,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 LKA 1985 2010 1008.442785\n", + " 1 NaN LKA 1985 2011 1008.442785\n", + " 2 NaN MDV None 2011 0.203672\n", + " 3 NaN IND None 2012 6959.536742\n", + " 4 NaN LKA 1985 2012 1065.082824\n", + " 5 NaN MDV None 2012 0.203672\n", + " 6 NaN IND None 2013 6959.536742\n", + " 7 NaN LKA 1985 2013 1074.337897\n", + " 8 NaN MDV None 2013 0.203672\n", + " 9 NaN IND None 2014 6959.536742\n", + " 10 NaN LKA 1985 2014 1074.337897\n", + " 11 NaN MDV None 2014 0.203672\n", + " 12 NaN IND None 2015 6959.536742\n", + " 13 NaN LKA 1985 2015 1074.337897\n", + " 14 NaN MDV None 2015 0.203672\n", + " 15 NaN IND None 2016 6959.536742\n", + " 16 NaN LKA 1985 2016 1074.337897\n", + " 17 NaN MDV None 2016 0.203672\n", + " 18 NaN IND None 2017 6959.536742\n", + " 19 NaN LKA 1985 2017 1074.337897\n", + " 20 NaN MDV None 2017 0.203672\n", + " 21 NaN IND None 2018 6959.536742\n", + " 22 NaN LKA 1985 2018 1074.337897\n", + " 23 NaN MDV None 2018 2.532950\n", + " 24 NaN IND None 2019 6959.536742\n", + " 25 NaN LKA 1985 2019 1074.337897\n", + " 26 NaN MDV None 2019 3.052767\n", + " 27 NaN IND None 2020 6959.536742\n", + " 28 NaN LKA 1985 2020 1074.337897\n", + " 29 NaN MDV None 2020 3.052767\n", + " 30 NaN IND None 2021 6959.536742\n", + " 31 NaN LKA 1985 2021 1074.337897\n", + " 32 NaN MDV None 2021 3.052767\n", + " 33 NaN IND None 2022 6959.536742\n", + " 34 NaN LKA 1985 2022 1074.337897\n", + " 35 NaN MDV None 2022 3.052767\n", + " 36 NaN IND None 2023 6959.536742\n", + " 37 NaN LKA 1985 2023 1074.337897\n", + " 38 NaN MDV None 2023 3.052767\n", + " 39 NaN IND None 2024 6959.536742\n", + " 40 NaN LKA 1985 2024 1074.337897\n", + " 41 NaN MDV None 2024 3.052767,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 IND 1980 2010 3350.924089\n", + " 1 1.0 PAK 1980 2010 3350.924089\n", + " 2 NaN IND 1980 2011 3350.924089\n", + " 3 NaN PAK 1980 2011 3350.924089\n", + " 4 NaN IND 1980 2012 4538.318332\n", + " 5 NaN PAK 1980 2012 3350.924089\n", + " 6 NaN IND 1980 2013 4538.318332\n", + " 7 NaN PAK 1980 2013 3350.924089\n", + " 8 NaN IND 1980 2014 4538.318332\n", + " 9 NaN PAK 1980 2014 3350.924089\n", + " 10 NaN IND 1980 2015 4538.318332\n", + " 11 NaN PAK 1980 2015 3350.924089\n", + " 12 NaN IND 1980 2016 4538.318332\n", + " 13 NaN PAK 1980 2016 3350.924089\n", + " 14 NaN IND 1980 2017 4538.318332\n", + " 15 NaN PAK 1980 2017 3350.924089\n", + " 16 NaN IND 1980 2018 4538.318332\n", + " 17 NaN PAK 1980 2018 3350.924089\n", + " 18 NaN IND 1980 2019 4538.318332\n", + " 19 NaN PAK 1980 2019 3350.924089\n", + " 20 NaN IND 1980 2020 4538.318332\n", + " 21 NaN PAK 1980 2020 3350.924089\n", + " 22 NaN IND 1980 2021 4538.318332\n", + " 23 NaN PAK 1980 2021 3350.924089\n", + " 24 NaN IND 1980 2022 4538.318332\n", + " 25 NaN PAK 1980 2022 3350.924089\n", + " 26 NaN IND 1980 2023 4538.318332\n", + " 27 NaN PAK 1980 2023 3350.924089\n", + " 28 NaN IND 1980 2024 4538.318332\n", + " 29 NaN PAK 1980 2024 3350.924089,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 IND 1980 2010 23642.375432\n", + " 1 1.0 PAK 1980 2010 24902.092485\n", + " 2 NaN IND 1980 2011 23642.375432\n", + " 3 NaN PAK 1980 2011 24902.092485\n", + " 4 NaN IND 1980 2012 23642.375432\n", + " 5 NaN PAK 1980 2012 24902.092485\n", + " 6 NaN IND 1980 2013 23642.375432\n", + " 7 NaN PAK 1980 2013 24902.092485\n", + " 8 NaN IND 1980 2014 24465.387207\n", + " 9 NaN PAK 1980 2014 24902.092485\n", + " 10 NaN IND 1980 2015 24465.387207\n", + " 11 NaN PAK 1980 2015 24902.092485\n", + " 12 NaN IND 1980 2016 24465.387207\n", + " 13 NaN PAK 1980 2016 24902.092485\n", + " 14 NaN IND 1980 2017 24465.387207\n", + " 15 NaN PAK 1980 2017 24902.092485\n", + " 16 NaN IND 1980 2018 24465.387207\n", + " 17 NaN PAK 1980 2018 24902.092485\n", + " 18 NaN IND 1980 2019 24465.387207\n", + " 19 NaN PAK 1980 2019 24902.092485\n", + " 20 NaN IND 1980 2020 24465.387207\n", + " 21 NaN PAK 1980 2020 24902.092485\n", + " 22 NaN IND 1980 2021 24465.387207\n", + " 23 NaN PAK 1980 2021 24989.678237\n", + " 24 NaN IND 1980 2022 24465.387207\n", + " 25 NaN PAK 1980 2022 39261.617216\n", + " 26 NaN IND 1980 2023 24465.387207\n", + " 27 NaN PAK 1980 2023 39792.086276\n", + " 28 NaN IND 1980 2024 24465.387207\n", + " 29 NaN PAK 1980 2024 39792.086276,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 AFG 1975 2010 1861.822377\n", + " 1 1.0 CHN 1993 2010 3983.924515\n", + " 2 2.0 KAZ 2001 2010 50214.301818\n", + " 3 3.0 KGZ 1983 2010 44972.791313\n", + " 4 4.0 PAK 1977 2010 7375.500500\n", + " .. ... ... ... ... ...\n", + " 102 NaN KGZ 1983 2024 50726.027813\n", + " 103 NaN PAK 1977 2024 19017.964645\n", + " 104 NaN TJK 1972 2024 39987.996641\n", + " 105 NaN TKM None 2024 562.077632\n", + " 106 NaN UZB 1977 2024 19739.984330\n", + " \n", + " [107 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CHN 2010 2010 335.985329\n", + " 1 1.0 KAZ 2005 2010 47220.371607\n", + " 2 2.0 RUS 2003 2010 13454.314427\n", + " 3 3.0 UZB 2005 2010 8647.374929\n", + " 4 NaN CHN 2010 2011 335.985329\n", + " 5 NaN KAZ 2005 2011 48067.982429\n", + " 6 NaN RUS 2003 2011 13454.314427\n", + " 7 NaN UZB 2005 2011 8647.374929\n", + " 8 NaN CHN 2010 2012 335.985329\n", + " 9 NaN KAZ 2005 2012 48067.982429\n", + " 10 NaN RUS 2003 2012 13454.846414\n", + " 11 NaN UZB 2005 2012 8647.374929\n", + " 12 NaN CHN 2010 2013 335.985329\n", + " 13 NaN KAZ 2005 2013 48067.982429\n", + " 14 NaN RUS 2003 2013 13454.846414\n", + " 15 NaN UZB 2005 2013 8647.374929\n", + " 16 NaN CHN 2010 2014 335.985329\n", + " 17 NaN KAZ 2005 2014 48067.982429\n", + " 18 NaN RUS 2003 2014 13455.404238\n", + " 19 NaN UZB 2005 2014 8647.374929\n", + " 20 NaN CHN 2010 2015 335.985329\n", + " 21 NaN KAZ 2005 2015 48067.982429\n", + " 22 NaN RUS 2003 2015 13569.074675\n", + " 23 NaN UZB 2005 2015 8647.374929\n", + " 24 NaN CHN 2010 2016 335.985329\n", + " 25 NaN KAZ 2005 2016 48067.982429\n", + " 26 NaN RUS 2003 2016 13569.074675\n", + " 27 NaN UZB 2005 2016 8647.374929\n", + " 28 NaN CHN 2010 2017 335.985329\n", + " 29 NaN KAZ 2005 2017 48067.982429\n", + " 30 NaN RUS 2003 2017 13884.257093\n", + " 31 NaN UZB 2005 2017 8647.374929\n", + " 32 NaN CHN 2010 2018 335.985329\n", + " 33 NaN KAZ 2005 2018 49251.243545\n", + " 34 NaN RUS 2003 2018 13884.257093\n", + " 35 NaN UZB 2005 2018 8647.374929\n", + " 36 NaN CHN 2010 2019 335.985329\n", + " 37 NaN KAZ 2005 2019 49251.243545\n", + " 38 NaN RUS 2003 2019 13885.231161\n", + " 39 NaN UZB 2005 2019 8647.374929\n", + " 40 NaN CHN 2010 2020 335.985329\n", + " 41 NaN KAZ 2005 2020 49251.243545\n", + " 42 NaN RUS 2003 2020 13885.231161\n", + " 43 NaN UZB 2005 2020 8647.374929\n", + " 44 NaN CHN 2010 2021 335.985329\n", + " 45 NaN KAZ 2005 2021 49251.243545\n", + " 46 NaN RUS 2003 2021 13885.231161\n", + " 47 NaN UZB 2005 2021 8647.374929\n", + " 48 NaN CHN 2010 2022 335.985329\n", + " 49 NaN KAZ 2005 2022 49251.243545\n", + " 50 NaN RUS 2003 2022 13885.231161\n", + " 51 NaN UZB 2005 2022 8647.374929\n", + " 52 NaN CHN 2010 2023 335.985329\n", + " 53 NaN KAZ 2005 2023 49251.243545\n", + " 54 NaN RUS 2003 2023 13885.231161\n", + " 55 NaN UZB 2005 2023 8647.374929\n", + " 56 NaN CHN 2010 2024 335.985329\n", + " 57 NaN KAZ 2005 2024 49251.243545\n", + " 58 NaN RUS 2003 2024 13885.231161\n", + " 59 NaN UZB 2005 2024 8647.374929,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 KAZ 2001 2010 3189.678650\n", + " 1 1.0 RUS 1987 2010 33268.392711\n", + " 2 NaN KAZ 2001 2011 3189.678650\n", + " 3 NaN RUS 1987 2011 33268.392711\n", + " 4 NaN KAZ 2001 2012 3189.678650\n", + " 5 NaN RUS 1987 2012 33367.456364\n", + " 6 NaN KAZ 2001 2013 3189.678650\n", + " 7 NaN RUS 1987 2013 33434.849118\n", + " 8 NaN KAZ 2001 2014 3189.678650\n", + " 9 NaN RUS 1987 2014 33434.849118\n", + " 10 NaN KAZ 2001 2015 3189.678650\n", + " 11 NaN RUS 1987 2015 33437.998339\n", + " 12 NaN KAZ 2001 2016 3189.678650\n", + " 13 NaN RUS 1987 2016 33437.998339\n", + " 14 NaN KAZ 2001 2017 3189.678650\n", + " 15 NaN RUS 1987 2017 34562.828184\n", + " 16 NaN KAZ 2001 2018 3189.678650\n", + " 17 NaN RUS 1987 2018 34562.828184\n", + " 18 NaN KAZ 2001 2019 3189.678650\n", + " 19 NaN RUS 1987 2019 34562.828184\n", + " 20 NaN KAZ 2001 2020 3189.678650\n", + " 21 NaN RUS 1987 2020 34562.828184\n", + " 22 NaN KAZ 2001 2021 3189.678650\n", + " 23 NaN RUS 1987 2021 34562.828184\n", + " 24 NaN KAZ 2001 2022 3189.678650\n", + " 25 NaN RUS 1987 2022 34562.828184\n", + " 26 NaN KAZ 2001 2023 3189.678650\n", + " 27 NaN RUS 1987 2023 34562.828184\n", + " 28 NaN KAZ 2001 2024 3189.678650\n", + " 29 NaN RUS 1987 2024 34562.828184,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 RUS 1994 2010 30955.143966\n", + " 1 NaN RUS 1994 2011 30955.143966\n", + " 2 NaN RUS 1994 2012 30955.143966\n", + " 3 NaN RUS 1994 2013 30955.143966\n", + " 4 NaN RUS 1994 2014 30955.143998\n", + " 5 NaN RUS 1994 2015 30955.143998\n", + " 6 NaN RUS 1994 2016 30955.143998\n", + " 7 NaN RUS 1994 2017 30955.143998\n", + " 8 NaN RUS 1994 2018 30955.143998\n", + " 9 NaN RUS 1994 2019 30955.143998\n", + " 10 NaN RUS 1994 2020 30955.143998\n", + " 11 NaN RUS 1994 2021 30955.143998\n", + " 12 NaN RUS 1994 2022 30955.143998\n", + " 13 NaN RUS 1994 2023 30955.143998\n", + " 14 NaN RUS 1994 2024 30955.143998,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 LKA 1980 2010 15736.920972\n", + " 1 NaN LKA 1980 2011 15759.194315\n", + " 2 NaN LKA 1980 2012 18436.326118\n", + " 3 NaN LKA 1980 2013 19738.902404\n", + " 4 NaN LKA 1980 2014 19738.902404\n", + " 5 NaN LKA 1980 2015 19738.902404\n", + " 6 NaN LKA 1980 2016 19738.902404\n", + " 7 NaN LKA 1980 2017 19738.902404\n", + " 8 NaN LKA 1980 2018 19738.902404\n", + " 9 NaN LKA 1980 2019 19738.902404\n", + " 10 NaN LKA 1980 2020 19738.902404\n", + " 11 NaN LKA 1980 2021 19738.902404\n", + " 12 NaN LKA 1980 2022 19738.902404\n", + " 13 NaN LKA 1980 2023 19738.902404\n", + " 14 NaN LKA 1980 2024 19738.902404,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 BGD 1997 2010 4268.152498\n", + " 1 1.0 IND 1981 2010 2089.493126\n", + " 2 NaN BGD 1997 2011 4268.152498\n", + " 3 NaN IND 1981 2011 2089.493126\n", + " 4 NaN BGD 1997 2012 4269.070544\n", + " 5 NaN IND 1981 2012 2089.493126\n", + " 6 NaN BGD 1997 2013 4274.741884\n", + " 7 NaN IND 1981 2013 2089.493126\n", + " 8 NaN BGD 1997 2014 4274.741884\n", + " 9 NaN IND 1981 2014 2089.493126\n", + " 10 NaN BGD 1997 2015 4274.741884\n", + " 11 NaN IND 1981 2015 2089.493126\n", + " 12 NaN BGD 1997 2016 4274.741884\n", + " 13 NaN IND 1981 2016 2089.493126\n", + " 14 NaN BGD 1997 2017 4339.667684\n", + " 15 NaN IND 1981 2017 2089.493126\n", + " 16 NaN BGD 1997 2018 4339.667684\n", + " 17 NaN IND 1981 2018 2089.493126\n", + " 18 NaN BGD 1997 2019 4339.667684\n", + " 19 NaN IND 1981 2019 2089.493126\n", + " 20 NaN BGD 1997 2020 4340.214546\n", + " 21 NaN IND 1981 2020 2089.493126\n", + " 22 NaN BGD 1997 2021 4340.214546\n", + " 23 NaN IND 1981 2021 2089.493126\n", + " 24 NaN BGD 1997 2022 4340.214546\n", + " 25 NaN IND 1981 2022 2089.493126\n", + " 26 NaN BGD 1997 2023 4340.214546\n", + " 27 NaN IND 1981 2023 2089.493126\n", + " 28 NaN BGD 1997 2024 4340.214546\n", + " 29 NaN IND 1981 2024 2089.493126,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 BGD 2010 2010 10.740733\n", + " 1 1.0 BTN 1993 2010 5265.847645\n", + " 2 2.0 CHN 1993 2010 30784.274935\n", + " 3 3.0 IND 1993 2010 9365.637841\n", + " 4 4.0 NPL 2004 2010 34870.529483\n", + " .. ... ... ... ... ...\n", + " 70 NaN BGD 2010 2024 21.442818\n", + " 71 NaN BTN 1993 2024 5265.847645\n", + " 72 NaN CHN 1993 2024 43316.318607\n", + " 73 NaN IND 1993 2024 11114.148767\n", + " 74 NaN NPL 2004 2024 36870.699741\n", + " \n", + " [75 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 KAZ 2001 2010 196.705690\n", + " 1 NaN KAZ 2001 2011 196.705690\n", + " 2 NaN KAZ 2001 2012 196.705690\n", + " 3 NaN CHN None 2013 5690.244440\n", + " 4 NaN KAZ 2001 2013 5886.950130\n", + " 5 NaN KGZ None 2013 5690.244440\n", + " 6 NaN CHN None 2014 5690.244440\n", + " 7 NaN KAZ 2001 2014 5886.950130\n", + " 8 NaN KGZ None 2014 5690.244440\n", + " 9 NaN CHN None 2015 5690.244440\n", + " 10 NaN KAZ 2001 2015 5886.950130\n", + " 11 NaN KGZ None 2015 5690.244440\n", + " 12 NaN CHN None 2016 5690.244440\n", + " 13 NaN KAZ 2001 2016 5886.950130\n", + " 14 NaN KGZ None 2016 5690.244440\n", + " 15 NaN CHN None 2017 7759.530242\n", + " 16 NaN KAZ 2001 2017 5886.950130\n", + " 17 NaN KGZ None 2017 5690.244440\n", + " 18 NaN CHN None 2018 7759.530242\n", + " 19 NaN KAZ 2001 2018 5886.950130\n", + " 20 NaN KGZ None 2018 5690.244440\n", + " 21 NaN CHN None 2019 7759.530242\n", + " 22 NaN KAZ 2001 2019 5886.950130\n", + " 23 NaN KGZ None 2019 5690.244440\n", + " 24 NaN CHN None 2020 7759.530242\n", + " 25 NaN KAZ 2001 2020 5886.950130\n", + " 26 NaN KGZ None 2020 5690.244440\n", + " 27 NaN CHN None 2021 7759.530242\n", + " 28 NaN KAZ 2001 2021 5886.950130\n", + " 29 NaN KGZ None 2021 5690.244440\n", + " 30 NaN CHN None 2022 7759.530242\n", + " 31 NaN KAZ 2001 2022 5886.950130\n", + " 32 NaN KGZ None 2022 5690.244440\n", + " 33 NaN CHN None 2023 7759.530242\n", + " 34 NaN KAZ 2001 2023 5886.950130\n", + " 35 NaN KGZ None 2023 5690.244440\n", + " 36 NaN CHN None 2024 7759.530242\n", + " 37 NaN KAZ 2001 2024 5886.950130\n", + " 38 NaN KGZ None 2024 5690.244440,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CHN 2010 2010 18805.949047\n", + " 1 1.0 KAZ 2001 2010 41890.281598\n", + " 2 2.0 MNG 1996 2010 26313.298861\n", + " 3 3.0 RUS 2003 2010 63648.159908\n", + " 4 NaN CHN 2010 2011 18805.949047\n", + " 5 NaN KAZ 2001 2011 41890.281598\n", + " 6 NaN MNG 1996 2011 26313.298861\n", + " 7 NaN RUS 2003 2011 67601.273217\n", + " 8 NaN CHN 2010 2012 20776.633828\n", + " 9 NaN KAZ 2001 2012 43860.966379\n", + " 10 NaN MNG 1996 2012 26313.298861\n", + " 11 NaN RUS 2003 2012 67613.917769\n", + " 12 NaN CHN 2010 2013 20776.633828\n", + " 13 NaN KAZ 2001 2013 43860.966379\n", + " 14 NaN MNG 1996 2013 26313.298861\n", + " 15 NaN RUS 2003 2013 69201.730313\n", + " 16 NaN CHN 2010 2014 20776.633828\n", + " 17 NaN KAZ 2001 2014 43860.966379\n", + " 18 NaN MNG 1996 2014 26313.298861\n", + " 19 NaN RUS 2003 2014 69404.202509\n", + " 20 NaN CHN 2010 2015 20776.633828\n", + " 21 NaN KAZ 2001 2015 43899.406557\n", + " 22 NaN MNG 1996 2015 26313.298861\n", + " 23 NaN RUS 2003 2015 69927.420234\n", + " 24 NaN CHN 2010 2016 20776.633828\n", + " 25 NaN KAZ 2001 2016 43899.406557\n", + " 26 NaN MNG 1996 2016 26313.298861\n", + " 27 NaN RUS 2003 2016 69964.912790\n", + " 28 NaN CHN 2010 2017 20776.633828\n", + " 29 NaN KAZ 2001 2017 43899.406557\n", + " 30 NaN MNG 1996 2017 26313.298861\n", + " 31 NaN RUS 2003 2017 70034.301766\n", + " 32 NaN CHN 2010 2018 22785.678157\n", + " 33 NaN KAZ 2001 2018 45908.450886\n", + " 34 NaN MNG 1996 2018 26313.298861\n", + " 35 NaN RUS 2003 2018 70403.998710\n", + " 36 NaN CHN 2010 2019 22785.678157\n", + " 37 NaN KAZ 2001 2019 45908.450886\n", + " 38 NaN MNG 1996 2019 26313.298861\n", + " 39 NaN RUS 2003 2019 70403.998710\n", + " 40 NaN CHN 2010 2020 22785.678157\n", + " 41 NaN KAZ 2001 2020 45908.450886\n", + " 42 NaN MNG 1996 2020 26313.298861\n", + " 43 NaN RUS 2003 2020 71238.249825\n", + " 44 NaN CHN 2010 2021 22785.678157\n", + " 45 NaN KAZ 2001 2021 45908.450886\n", + " 46 NaN MNG 1996 2021 26313.298861\n", + " 47 NaN RUS 2003 2021 71238.249825\n", + " 48 NaN CHN 2010 2022 22785.678157\n", + " 49 NaN KAZ 2001 2022 45908.450886\n", + " 50 NaN MNG 1996 2022 26313.298861\n", + " 51 NaN RUS 2003 2022 71238.249825\n", + " 52 NaN CHN 2010 2023 22785.678157\n", + " 53 NaN KAZ 2001 2023 45908.450886\n", + " 54 NaN MNG 1996 2023 26313.298861\n", + " 55 NaN RUS 2003 2023 71238.249825\n", + " 56 NaN CHN 2010 2024 22785.678157\n", + " 57 NaN KAZ 2001 2024 45908.450886\n", + " 58 NaN MNG 1996 2024 26313.298861\n", + " 59 NaN RUS 2003 2024 71238.249825,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 RUS 1993 2010 33949.815392\n", + " 1 NaN RUS 1993 2011 34752.748437\n", + " 2 NaN RUS 1993 2012 34803.917495\n", + " 3 NaN RUS 1993 2013 34811.878703\n", + " 4 NaN RUS 1993 2014 34900.074900\n", + " 5 NaN RUS 1993 2015 34928.593462\n", + " 6 NaN RUS 1993 2016 35108.406839\n", + " 7 NaN RUS 1993 2017 39363.313216\n", + " 8 NaN RUS 1993 2018 39486.178563\n", + " 9 NaN RUS 1993 2019 39540.826829\n", + " 10 NaN RUS 1993 2020 39542.260957\n", + " 11 NaN RUS 1993 2021 39542.260957\n", + " 12 NaN RUS 1993 2022 39542.260957\n", + " 13 NaN RUS 1993 2023 39542.260957\n", + " 14 NaN RUS 1993 2024 39542.260957,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 RUS 1996 2010 35559.831659\n", + " 1 NaN RUS 1996 2011 35559.831659\n", + " 2 NaN RUS 1996 2012 35559.831659\n", + " 3 NaN RUS 1996 2013 35559.831659\n", + " 4 NaN RUS 1996 2014 35559.831659\n", + " 5 NaN RUS 1996 2015 35559.831659\n", + " 6 NaN RUS 1996 2016 35559.831659\n", + " 7 NaN RUS 1996 2017 35559.831659\n", + " 8 NaN RUS 1996 2018 35559.831659\n", + " 9 NaN RUS 1996 2019 35559.831659\n", + " 10 NaN RUS 1996 2020 35559.831659\n", + " 11 NaN RUS 1996 2021 35559.831659\n", + " 12 NaN RUS 1996 2022 35559.831659\n", + " 13 NaN RUS 1996 2023 35559.831659\n", + " 14 NaN RUS 1996 2024 35559.831659,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 IDN 0 2010 15100.141646\n", + " 1 NaN IDN 0 2011 15100.141646\n", + " 2 NaN IDN 0 2012 15100.141646\n", + " 3 NaN IDN 0 2013 15100.141646\n", + " 4 NaN IDN 0 2014 15100.141646\n", + " 5 NaN IDN 0 2015 15100.141646\n", + " 6 NaN IDN 0 2016 15100.141646\n", + " 7 NaN IDN 0 2017 15100.141646\n", + " 8 NaN IDN 0 2018 15100.141646\n", + " 9 NaN IDN 0 2019 15100.141646\n", + " 10 NaN IDN 0 2020 15100.141646\n", + " 11 NaN IDN 0 2021 15100.141646\n", + " 12 NaN IDN 0 2022 15100.141646\n", + " 13 NaN IDN 0 2023 15100.141646\n", + " 14 NaN IDN 0 2024 15100.141646,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 IDN 0 2010 296.543753\n", + " 1 1.0 MMR 1982 2010 8366.928017\n", + " 2 2.0 MYS 2010 2010 0.212560\n", + " 3 3.0 THA 1980 2010 19512.618168\n", + " 4 NaN IDN 0 2011 296.543753\n", + " 5 NaN MMR 1982 2011 8366.928017\n", + " 6 NaN MYS 2010 2011 0.212560\n", + " 7 NaN THA 1980 2011 19512.618168\n", + " 8 NaN IDN 0 2012 296.543753\n", + " 9 NaN MMR 1982 2012 8366.928017\n", + " 10 NaN MYS 2010 2012 0.212560\n", + " 11 NaN THA 1980 2012 19512.618168\n", + " 12 NaN IDN 0 2013 296.543786\n", + " 13 NaN MMR 1982 2013 8366.928017\n", + " 14 NaN MYS 2010 2013 0.212560\n", + " 15 NaN THA 1980 2013 19627.819536\n", + " 16 NaN IDN 0 2014 296.543786\n", + " 17 NaN MMR 1982 2014 8366.928017\n", + " 18 NaN MYS 2010 2014 0.212560\n", + " 19 NaN THA 1980 2014 19627.819536\n", + " 20 NaN IDN 0 2015 296.543786\n", + " 21 NaN MMR 1982 2015 8366.928017\n", + " 22 NaN MYS 2010 2015 0.212560\n", + " 23 NaN THA 1980 2015 19815.917444\n", + " 24 NaN IDN 0 2016 296.543786\n", + " 25 NaN MMR 1982 2016 8366.928017\n", + " 26 NaN MYS 2010 2016 0.212560\n", + " 27 NaN THA 1980 2016 20218.553555\n", + " 28 NaN IDN 0 2017 296.543786\n", + " 29 NaN MMR 1982 2017 8366.928017\n", + " 30 NaN MYS 2010 2017 0.212560\n", + " 31 NaN THA 1980 2017 20218.553555\n", + " 32 NaN IDN 0 2018 296.543786\n", + " 33 NaN MMR 1982 2018 8366.928017\n", + " 34 NaN MYS 2010 2018 0.212560\n", + " 35 NaN THA 1980 2018 20219.176098\n", + " 36 NaN IDN 0 2019 296.543786\n", + " 37 NaN MMR 1982 2019 8366.928017\n", + " 38 NaN MYS 2010 2019 0.212560\n", + " 39 NaN THA 1980 2019 20219.176098\n", + " 40 NaN IDN 0 2020 296.543786\n", + " 41 NaN MMR 1982 2020 8366.928017\n", + " 42 NaN MYS 2010 2020 0.212560\n", + " 43 NaN THA 1980 2020 20219.176098\n", + " 44 NaN IDN 0 2021 296.543786\n", + " 45 NaN MMR 1982 2021 8366.928017\n", + " 46 NaN MYS 2010 2021 0.212560\n", + " 47 NaN THA 1980 2021 20554.881643\n", + " 48 NaN IDN 0 2022 296.543786\n", + " 49 NaN MMR 1982 2022 8366.928017\n", + " 50 NaN MYS 2010 2022 0.212560\n", + " 51 NaN THA 1980 2022 20554.881643\n", + " 52 NaN IDN 0 2023 296.543786\n", + " 53 NaN MMR 1982 2023 8366.928017\n", + " 54 NaN MYS 2010 2023 0.212560\n", + " 55 NaN THA 1980 2023 20554.881643\n", + " 56 NaN IDN 0 2024 296.543786\n", + " 57 NaN MMR 1982 2024 8366.928017\n", + " 58 NaN MYS 2010 2024 0.212560\n", + " 59 NaN THA 1980 2024 20554.881643,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 BGD 2009 2010 1212.425393\n", + " 1 1.0 IND 1983 2010 693.870182\n", + " 2 2.0 MMR 2005 2010 26017.215750\n", + " 3 3.0 THA 2005 2010 46099.759435\n", + " 4 NaN BGD 2009 2011 1227.742930\n", + " 5 NaN IND 1983 2011 693.870182\n", + " 6 NaN MMR 2005 2011 26017.215750\n", + " 7 NaN THA 2005 2011 46099.759435\n", + " 8 NaN BGD 2009 2012 1227.742930\n", + " 9 NaN IND 1983 2012 693.870182\n", + " 10 NaN MMR 2005 2012 26017.215750\n", + " 11 NaN THA 2005 2012 46099.759435\n", + " 12 NaN BGD 2009 2013 1227.742930\n", + " 13 NaN IND 1983 2013 693.870182\n", + " 14 NaN MMR 2005 2013 26148.397150\n", + " 15 NaN THA 2005 2013 46099.759435\n", + " 16 NaN BGD 2009 2014 1227.742930\n", + " 17 NaN IND 1983 2014 693.870182\n", + " 18 NaN MMR 2005 2014 27141.221590\n", + " 19 NaN THA 2005 2014 46099.759435\n", + " 20 NaN BGD 2009 2015 1227.816285\n", + " 21 NaN IND 1983 2015 693.870182\n", + " 22 NaN MMR 2005 2015 27141.221590\n", + " 23 NaN THA 2005 2015 46099.759435\n", + " 24 NaN BGD 2009 2016 1227.816285\n", + " 25 NaN IND 1983 2016 693.870182\n", + " 26 NaN MMR 2005 2016 27141.221590\n", + " 27 NaN THA 2005 2016 46099.759435\n", + " 28 NaN BGD 2009 2017 1227.816285\n", + " 29 NaN IND 1983 2017 693.870182\n", + " 30 NaN MMR 2005 2017 27457.059533\n", + " 31 NaN THA 2005 2017 46099.759435\n", + " 32 NaN BGD 2009 2018 1228.597071\n", + " 33 NaN IND 1983 2018 693.870182\n", + " 34 NaN MMR 2005 2018 27725.334249\n", + " 35 NaN THA 2005 2018 46099.759435\n", + " 36 NaN BGD 2009 2019 1305.317812\n", + " 37 NaN IND 1983 2019 693.870182\n", + " 38 NaN MMR 2005 2019 27935.851591\n", + " 39 NaN THA 2005 2019 46099.759435\n", + " 40 NaN BGD 2009 2020 1422.066674\n", + " 41 NaN IND 1983 2020 693.870182\n", + " 42 NaN MMR 2005 2020 27935.851591\n", + " 43 NaN THA 2005 2020 46099.759435\n", + " 44 NaN BGD 2009 2021 1422.066674\n", + " 45 NaN IND 1983 2021 693.870182\n", + " 46 NaN MMR 2005 2021 27935.851591\n", + " 47 NaN THA 2005 2021 46099.759435\n", + " 48 NaN BGD 2009 2022 1422.066674\n", + " 49 NaN IND 1983 2022 693.870182\n", + " 50 NaN MMR 2005 2022 27935.851591\n", + " 51 NaN THA 2005 2022 46099.759435\n", + " 52 NaN BGD 2009 2023 1422.066674\n", + " 53 NaN IND 1983 2023 693.870182\n", + " 54 NaN MMR 2005 2023 27935.851591\n", + " 55 NaN THA 2005 2023 46099.759435\n", + " 56 NaN BGD 2009 2024 1422.066674\n", + " 57 NaN IND 1983 2024 693.870182\n", + " 58 NaN MMR 2005 2024 27935.851591\n", + " 59 NaN THA 2005 2024 46099.759435,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 BGD 2001 2010 92.389089\n", + " 1 1.0 BTN 1999 2010 14960.402484\n", + " 2 2.0 CHN 2008 2010 30502.369775\n", + " 3 3.0 IND 1990 2010 23528.620839\n", + " 4 4.0 MMR 2004 2010 49583.119220\n", + " .. ... ... ... ... ...\n", + " 70 NaN BGD 2001 2024 94.806419\n", + " 71 NaN BTN 1999 2024 15186.432507\n", + " 72 NaN CHN 2008 2024 45460.358826\n", + " 73 NaN IND 1990 2024 23744.824689\n", + " 74 NaN MMR 2004 2024 51692.191330\n", + " \n", + " [75 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CHN 2004 2010 46884.063437\n", + " 1 1.0 MNG 1975 2010 49369.563089\n", + " 2 NaN CHN 2004 2011 46884.063437\n", + " 3 NaN MNG 1975 2011 49369.563089\n", + " 4 NaN CHN 2004 2012 46884.063437\n", + " 5 NaN MNG 1975 2012 49369.563089\n", + " 6 NaN CHN 2004 2013 46884.063437\n", + " 7 NaN MNG 1975 2013 49369.563089\n", + " 8 NaN CHN 2004 2014 46884.063437\n", + " 9 NaN MNG 1975 2014 49369.563089\n", + " 10 NaN CHN 2004 2015 47180.864010\n", + " 11 NaN MNG 1975 2015 49369.563089\n", + " 12 NaN CHN 2004 2016 47180.864010\n", + " 13 NaN MNG 1975 2016 51890.288599\n", + " 14 NaN CHN 2004 2017 87738.743273\n", + " 15 NaN MNG 1975 2017 51890.288599\n", + " 16 NaN CHN 2004 2018 88036.827334\n", + " 17 NaN MNG 1975 2018 51890.288599\n", + " 18 NaN CHN 2004 2019 88036.827334\n", + " 19 NaN MNG 1975 2019 51890.288599\n", + " 20 NaN CHN 2004 2020 88036.827334\n", + " 21 NaN MNG 1975 2020 51890.288599\n", + " 22 NaN CHN 2004 2021 88036.827334\n", + " 23 NaN MNG 1975 2021 51890.288599\n", + " 24 NaN CHN 2004 2022 88036.827334\n", + " 25 NaN MNG 1975 2022 51890.288599\n", + " 26 NaN CHN 2004 2023 88036.827334\n", + " 27 NaN MNG 1975 2023 51890.288599\n", + " 28 NaN CHN 2004 2024 88036.827334\n", + " 29 NaN MNG 1975 2024 51890.288599,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CHN 1975 2010 20564.339797\n", + " 1 1.0 MNG 1975 2010 79530.437669\n", + " 2 2.0 RUS 2007 2010 34576.162623\n", + " 3 NaN CHN 1975 2011 20564.339797\n", + " 4 NaN MNG 1975 2011 93146.306692\n", + " 5 NaN RUS 2007 2011 44047.290474\n", + " 6 NaN CHN 1975 2012 22184.602107\n", + " 7 NaN MNG 1975 2012 98128.737637\n", + " 8 NaN RUS 2007 2012 44047.290474\n", + " 9 NaN CHN 1975 2013 22184.602107\n", + " 10 NaN MNG 1975 2013 98128.737637\n", + " 11 NaN RUS 2007 2013 44047.290474\n", + " 12 NaN CHN 1975 2014 22184.602107\n", + " 13 NaN MNG 1975 2014 98128.737637\n", + " 14 NaN RUS 2007 2014 44063.067592\n", + " 15 NaN CHN 1975 2015 22184.602107\n", + " 16 NaN MNG 1975 2015 98128.737637\n", + " 17 NaN RUS 2007 2015 44319.455331\n", + " 18 NaN CHN 1975 2016 22184.602107\n", + " 19 NaN MNG 1975 2016 98128.737637\n", + " 20 NaN RUS 2007 2016 44319.455331\n", + " 21 NaN CHN 1975 2017 22184.602107\n", + " 22 NaN MNG 1975 2017 98128.737637\n", + " 23 NaN RUS 2007 2017 44324.168577\n", + " 24 NaN CHN 1975 2018 22184.602107\n", + " 25 NaN MNG 1975 2018 98128.737637\n", + " 26 NaN RUS 2007 2018 44324.773126\n", + " 27 NaN CHN 1975 2019 22184.602107\n", + " 28 NaN MNG 1975 2019 101795.235866\n", + " 29 NaN RUS 2007 2019 44873.262428\n", + " 30 NaN CHN 1975 2020 22184.602107\n", + " 31 NaN MNG 1975 2020 101795.235866\n", + " 32 NaN RUS 2007 2020 44873.262428\n", + " 33 NaN CHN 1975 2021 22184.602107\n", + " 34 NaN MNG 1975 2021 101795.235866\n", + " 35 NaN RUS 2007 2021 44873.262428\n", + " 36 NaN CHN 1975 2022 22184.602107\n", + " 37 NaN MNG 1975 2022 101795.235866\n", + " 38 NaN RUS 2007 2022 44873.262428\n", + " 39 NaN CHN 1975 2023 22184.602107\n", + " 40 NaN MNG 1975 2023 101795.235866\n", + " 41 NaN RUS 2007 2023 44873.262428\n", + " 42 NaN CHN 1975 2024 22184.602107\n", + " 43 NaN MNG 1975 2024 101795.235866\n", + " 44 NaN RUS 2007 2024 44873.262428,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 RUS 1993 2010 15408.339395\n", + " 1 NaN RUS 1993 2011 15819.148419\n", + " 2 NaN RUS 1993 2012 16088.484216\n", + " 3 NaN RUS 1993 2013 16568.729761\n", + " 4 NaN RUS 1993 2014 16568.729761\n", + " 5 NaN RUS 1993 2015 16632.971110\n", + " 6 NaN RUS 1993 2016 16632.971110\n", + " 7 NaN RUS 1993 2017 16635.285196\n", + " 8 NaN RUS 1993 2018 16635.285196\n", + " 9 NaN RUS 1993 2019 16667.256274\n", + " 10 NaN RUS 1993 2020 16668.592541\n", + " 11 NaN RUS 1993 2021 16668.592541\n", + " 12 NaN RUS 1993 2022 16668.592541\n", + " 13 NaN RUS 1993 2023 16668.592541\n", + " 14 NaN RUS 1993 2024 16668.592541,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 RUS 1993 2010 30719.449481\n", + " 1 NaN RUS 1993 2011 30719.449481\n", + " 2 NaN RUS 1993 2012 30719.449481\n", + " 3 NaN RUS 1993 2013 30719.449481\n", + " 4 NaN RUS 1993 2014 30719.449481\n", + " 5 NaN RUS 1993 2015 30719.449481\n", + " 6 NaN RUS 1993 2016 30719.449481\n", + " 7 NaN RUS 1993 2017 30719.449481\n", + " 8 NaN RUS 1993 2018 30719.449481\n", + " 9 NaN RUS 1993 2019 30719.449481\n", + " 10 NaN RUS 1993 2020 30719.449481\n", + " 11 NaN RUS 1993 2021 30719.449481\n", + " 12 NaN RUS 1993 2022 30719.449481\n", + " 13 NaN RUS 1993 2023 30719.449481\n", + " 14 NaN RUS 1993 2024 30719.449481,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 RUS 1979 2010 3081.149832\n", + " 1 NaN RUS 1979 2011 3081.149832\n", + " 2 NaN RUS 1979 2012 3081.149832\n", + " 3 NaN RUS 1979 2013 3081.149832\n", + " 4 NaN RUS 1979 2014 3081.149832\n", + " 5 NaN RUS 1979 2015 3081.149832\n", + " 6 NaN RUS 1979 2016 3081.149832\n", + " 7 NaN RUS 1979 2017 3081.149832\n", + " 8 NaN RUS 1979 2018 3081.149832\n", + " 9 NaN RUS 1979 2019 3081.149832\n", + " 10 NaN RUS 1979 2020 3081.149832\n", + " 11 NaN RUS 1979 2021 3081.149832\n", + " 12 NaN RUS 1979 2022 3081.149832\n", + " 13 NaN RUS 1979 2023 3081.149832\n", + " 14 NaN RUS 1979 2024 3081.149832,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 AUS 1989 2010 87.183663\n", + " 1 1.0 IDN 0 2010 3884.674868\n", + " 2 NaN AUS 1989 2011 87.183663\n", + " 3 NaN IDN 0 2011 3884.674868\n", + " 4 NaN AUS 1989 2012 87.183663\n", + " 5 NaN IDN 0 2012 3890.077258\n", + " 6 NaN AUS 1989 2013 87.183663\n", + " 7 NaN IDN 0 2013 3890.077258\n", + " 8 NaN AUS 1989 2014 87.183663\n", + " 9 NaN IDN 0 2014 3890.077258\n", + " 10 NaN AUS 1989 2015 87.183663\n", + " 11 NaN IDN 0 2015 3890.077258\n", + " 12 NaN AUS 1989 2016 87.183663\n", + " 13 NaN IDN 0 2016 3890.077258\n", + " 14 NaN AUS 1989 2017 87.183663\n", + " 15 NaN IDN 0 2017 3890.077258\n", + " 16 NaN AUS 1989 2018 87.183663\n", + " 17 NaN IDN 0 2018 3890.077258\n", + " 18 NaN AUS 1989 2019 87.183663\n", + " 19 NaN IDN 0 2019 3890.077258\n", + " 20 NaN AUS 1989 2020 87.183663\n", + " 21 NaN IDN 0 2020 3890.077258\n", + " 22 NaN AUS 1989 2021 87.183663\n", + " 23 NaN IDN 0 2021 3890.077258\n", + " 24 NaN AUS 1989 2022 87.183663\n", + " 25 NaN IDN 0 2022 3890.077258\n", + " 26 NaN AUS 1989 2023 87.183663\n", + " 27 NaN IDN 0 2023 3890.077258\n", + " 28 NaN AUS 1989 2024 87.183663\n", + " 29 NaN IDN 0 2024 3890.077258,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 IDN 0 2010 38152.415683\n", + " 1 1.0 MYS 2004 2010 11701.126152\n", + " 2 2.0 SGP 1990 2010 33.498889\n", + " 3 NaN IDN 0 2011 38152.415683\n", + " 4 NaN MYS 2004 2011 11701.126152\n", + " 5 NaN SGP 1990 2011 33.498889\n", + " 6 NaN IDN 0 2012 38152.415683\n", + " 7 NaN MYS 2004 2012 11701.126152\n", + " 8 NaN SGP 1990 2012 33.498889\n", + " 9 NaN IDN 0 2013 38152.415683\n", + " 10 NaN MYS 2004 2013 11701.126152\n", + " 11 NaN SGP 1990 2013 33.498889\n", + " 12 NaN IDN 0 2014 38152.415683\n", + " 13 NaN MYS 2004 2014 11701.126152\n", + " 14 NaN SGP 1990 2014 33.498889\n", + " 15 NaN IDN 0 2015 38152.415683\n", + " 16 NaN MYS 2004 2015 11701.126152\n", + " 17 NaN SGP 1990 2015 33.498889\n", + " 18 NaN IDN 0 2016 38152.415683\n", + " 19 NaN MYS 2004 2016 11706.859609\n", + " 20 NaN SGP 1990 2016 33.498889\n", + " 21 NaN IDN 0 2017 38152.415683\n", + " 22 NaN MYS 2004 2017 11706.859609\n", + " 23 NaN SGP 1990 2017 33.498889\n", + " 24 NaN IDN 0 2018 38152.415683\n", + " 25 NaN MYS 2004 2018 11706.859609\n", + " 26 NaN SGP 1990 2018 33.498889\n", + " 27 NaN IDN 0 2019 38152.415683\n", + " 28 NaN MYS 2004 2019 11706.859609\n", + " 29 NaN SGP 1990 2019 33.498889\n", + " 30 NaN IDN 0 2020 38152.415683\n", + " 31 NaN MYS 2004 2020 11706.859609\n", + " 32 NaN SGP 1990 2020 33.498889\n", + " 33 NaN IDN 0 2021 38152.415683\n", + " 34 NaN MYS 2004 2021 11706.859609\n", + " 35 NaN SGP 1990 2021 33.498889\n", + " 36 NaN IDN 0 2022 38152.415683\n", + " 37 NaN MYS 2004 2022 11706.859609\n", + " 38 NaN SGP 1990 2022 33.498889\n", + " 39 NaN IDN 0 2023 38152.415683\n", + " 40 NaN MYS 2004 2023 11706.859609\n", + " 41 NaN SGP 1990 2023 33.498889\n", + " 42 NaN IDN 0 2024 38152.415683\n", + " 43 NaN MYS 2004 2024 11706.859609\n", + " 44 NaN SGP 1990 2024 33.498889,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 KHM 1993 2010 35998.290915\n", + " 1 1.0 LAO 1993 2010 3964.173151\n", + " 2 2.0 MYS 2002 2010 8184.360199\n", + " 3 3.0 THA 2007 2010 20322.120736\n", + " 4 4.0 VNM 2002 2010 14365.918833\n", + " .. ... ... ... ... ...\n", + " 70 NaN KHM 1993 2024 84370.176684\n", + " 71 NaN LAO 1993 2024 16533.788894\n", + " 72 NaN MYS 2002 2024 8318.239391\n", + " 73 NaN THA 2007 2024 29090.763634\n", + " 74 NaN VNM 2002 2024 31072.461745\n", + " \n", + " [75 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CHN 1993 2010 2437.507096\n", + " 1 1.0 KHM 2005 2010 2093.714479\n", + " 2 2.0 LAO 0 2010 32473.734603\n", + " 3 3.0 MMR 1996 2010 105.372532\n", + " 4 4.0 THA 2005 2010 30206.512317\n", + " .. ... ... ... ... ...\n", + " 85 NaN KHM 2005 2024 2937.404091\n", + " 86 NaN LAO 0 2024 53035.291062\n", + " 87 NaN MMR 1996 2024 105.372532\n", + " 88 NaN THA 2005 2024 30206.512317\n", + " 89 NaN VNM 2001 2024 35969.354765\n", + " \n", + " [90 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CHN 2010 2010 17664.101516\n", + " 1 1.0 MMR 2003 2010 2333.565320\n", + " 2 NaN CHN 2010 2011 19699.648440\n", + " 3 NaN MMR 2003 2011 2333.565320\n", + " 4 NaN CHN 2010 2012 19704.541470\n", + " 5 NaN MMR 2003 2012 2333.565320\n", + " 6 NaN CHN 2010 2013 19711.020045\n", + " 7 NaN MMR 2003 2013 2333.565320\n", + " 8 NaN CHN 2010 2014 19711.020045\n", + " 9 NaN MMR 2003 2014 2333.565320\n", + " 10 NaN CHN 2010 2015 19711.020045\n", + " 11 NaN MMR 2003 2015 2333.565320\n", + " 12 NaN CHN 2010 2016 19711.020045\n", + " 13 NaN MMR 2003 2016 2333.565320\n", + " 14 NaN CHN 2010 2017 19711.020045\n", + " 15 NaN MMR 2003 2017 2333.565320\n", + " 16 NaN CHN 2010 2018 20114.319282\n", + " 17 NaN MMR 2003 2018 2333.565320\n", + " 18 NaN CHN 2010 2019 20114.319282\n", + " 19 NaN MMR 2003 2019 2333.565320\n", + " 20 NaN CHN 2010 2020 20114.319282\n", + " 21 NaN MMR 2003 2020 2333.565320\n", + " 22 NaN CHN 2010 2021 20114.319282\n", + " 23 NaN MMR 2003 2021 2333.565320\n", + " 24 NaN CHN 2010 2022 20114.319282\n", + " 25 NaN MMR 2003 2022 2333.565320\n", + " 26 NaN CHN 2010 2023 20114.319282\n", + " 27 NaN MMR 2003 2023 2333.565320\n", + " 28 NaN CHN 2010 2024 20114.319282\n", + " 29 NaN MMR 2003 2024 2333.565320,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CHN 1996 2010 18484.866816\n", + " 1 1.0 MNG 1996 2010 43454.655486\n", + " 2 NaN CHN 1996 2011 18505.384207\n", + " 3 NaN MNG 1996 2011 43710.598822\n", + " 4 NaN CHN 1996 2012 18505.384207\n", + " 5 NaN MNG 1996 2012 43710.598822\n", + " 6 NaN CHN 1996 2013 18505.384207\n", + " 7 NaN MNG 1996 2013 43710.598822\n", + " 8 NaN CHN 1996 2014 18505.384207\n", + " 9 NaN MNG 1996 2014 43710.598822\n", + " 10 NaN CHN 1996 2015 18620.152611\n", + " 11 NaN MNG 1996 2015 43710.598822\n", + " 12 NaN CHN 1996 2016 18620.152611\n", + " 13 NaN MNG 1996 2016 50118.236817\n", + " 14 NaN CHN 1996 2017 18620.152611\n", + " 15 NaN MNG 1996 2017 50118.236817\n", + " 16 NaN CHN 1996 2018 18620.152611\n", + " 17 NaN MNG 1996 2018 50118.236817\n", + " 18 NaN CHN 1996 2019 18620.152611\n", + " 19 NaN MNG 1996 2019 50118.236817\n", + " 20 NaN CHN 1996 2020 18620.152611\n", + " 21 NaN MNG 1996 2020 50118.236817\n", + " 22 NaN CHN 1996 2021 18620.152611\n", + " 23 NaN MNG 1996 2021 50118.236817\n", + " 24 NaN CHN 1996 2022 18620.152611\n", + " 25 NaN MNG 1996 2022 50118.236817\n", + " 26 NaN CHN 1996 2023 18620.152611\n", + " 27 NaN MNG 1996 2023 50118.236817\n", + " 28 NaN CHN 1996 2024 18620.152611\n", + " 29 NaN MNG 1996 2024 50118.236817,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 MNG 2008 2010 52719.717516\n", + " 1 1.0 RUS 1980 2010 102282.490390\n", + " 2 NaN MNG 2008 2011 59407.246340\n", + " 3 NaN RUS 1980 2011 108308.077108\n", + " 4 NaN MNG 2008 2012 59582.667620\n", + " 5 NaN RUS 1980 2012 108308.077108\n", + " 6 NaN MNG 2008 2013 59582.667620\n", + " 7 NaN RUS 1980 2013 108308.077108\n", + " 8 NaN MNG 2008 2014 59582.667620\n", + " 9 NaN RUS 1980 2014 111357.071972\n", + " 10 NaN MNG 2008 2015 59582.667620\n", + " 11 NaN RUS 1980 2015 111357.071972\n", + " 12 NaN MNG 2008 2016 59582.667620\n", + " 13 NaN RUS 1980 2016 111357.071972\n", + " 14 NaN MNG 2008 2017 59700.740926\n", + " 15 NaN RUS 1980 2017 111357.071972\n", + " 16 NaN MNG 2008 2018 59700.740926\n", + " 17 NaN RUS 1980 2018 111357.168273\n", + " 18 NaN MNG 2008 2019 59700.740926\n", + " 19 NaN RUS 1980 2019 111357.168273\n", + " 20 NaN MNG 2008 2020 59700.740926\n", + " 21 NaN RUS 1980 2020 111357.168273\n", + " 22 NaN MNG 2008 2021 59700.740926\n", + " 23 NaN RUS 1980 2021 111357.168273\n", + " 24 NaN MNG 2008 2022 59700.740926\n", + " 25 NaN RUS 1980 2022 111357.168273\n", + " 26 NaN MNG 2008 2023 59700.740926\n", + " 27 NaN RUS 1980 2023 111357.168273\n", + " 28 NaN MNG 2008 2024 59700.740926\n", + " 29 NaN RUS 1980 2024 111357.168273,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 RUS 1996 2010 32581.765845\n", + " 1 NaN RUS 1996 2011 32581.765845\n", + " 2 NaN RUS 1996 2012 32581.765845\n", + " 3 NaN RUS 1996 2013 32581.765845\n", + " 4 NaN RUS 1996 2014 34702.379215\n", + " 5 NaN RUS 1996 2015 34702.379215\n", + " 6 NaN RUS 1996 2016 34702.379215\n", + " 7 NaN RUS 1996 2017 34702.379215\n", + " 8 NaN RUS 1996 2018 34702.379215\n", + " 9 NaN RUS 1996 2019 34702.379215\n", + " 10 NaN RUS 1996 2020 34702.379215\n", + " 11 NaN RUS 1996 2021 34702.379215\n", + " 12 NaN RUS 1996 2022 34702.379215\n", + " 13 NaN RUS 1996 2023 34702.379215\n", + " 14 NaN RUS 1996 2024 34702.379215,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 RUS 2000 2010 53305.142966\n", + " 1 NaN RUS 2000 2011 53305.142966\n", + " 2 NaN RUS 2000 2012 53305.142966\n", + " 3 NaN RUS 2000 2013 53305.142966\n", + " 4 NaN RUS 2000 2014 53305.142966\n", + " 5 NaN RUS 2000 2015 53305.142966\n", + " 6 NaN RUS 2000 2016 53305.142966\n", + " 7 NaN RUS 2000 2017 53305.142966\n", + " 8 NaN RUS 2000 2018 53305.142966\n", + " 9 NaN RUS 2000 2019 53305.142966\n", + " 10 NaN RUS 2000 2020 53305.142966\n", + " 11 NaN RUS 2000 2021 53305.142966\n", + " 12 NaN RUS 2000 2022 53305.142966\n", + " 13 NaN RUS 2000 2023 53305.142966\n", + " 14 NaN RUS 2000 2024 53305.142966,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 RUS 1996 2010 1252.165023\n", + " 1 NaN RUS 1996 2011 1252.165023\n", + " 2 NaN RUS 1996 2012 1252.165023\n", + " 3 NaN RUS 1996 2013 1252.165023\n", + " 4 NaN RUS 1996 2014 1252.165023\n", + " 5 NaN RUS 1996 2015 1252.165023\n", + " 6 NaN RUS 1996 2016 1252.165023\n", + " 7 NaN RUS 1996 2017 1252.165023\n", + " 8 NaN RUS 1996 2018 1252.165023\n", + " 9 NaN RUS 1996 2019 1252.165023\n", + " 10 NaN RUS 1996 2020 1252.165023\n", + " 11 NaN RUS 1996 2021 1252.165023\n", + " 12 NaN RUS 1996 2022 1252.165023\n", + " 13 NaN RUS 1996 2023 1252.165023\n", + " 14 NaN RUS 1996 2024 1252.165023,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 ATA 2000 2010 8.849486\n", + " 1 NaN ATA 2000 2011 8.849486\n", + " 2 NaN ATA 2000 2012 8.849486\n", + " 3 NaN ATA 2000 2013 9.128316\n", + " 4 NaN ATA 2000 2014 9.128316\n", + " 5 NaN ATA 2000 2015 9.128316\n", + " 6 NaN ATA 2000 2016 9.128316\n", + " 7 NaN ATA 2000 2017 9.128316\n", + " 8 NaN ATA 2000 2018 9.128316\n", + " 9 NaN ATA 2000 2019 9.128316\n", + " 10 NaN ATA 2000 2020 9.128316\n", + " 11 NaN ATA 2000 2021 9.128316\n", + " 12 NaN ATA 2000 2022 9.128316\n", + " 13 NaN ATA 2000 2023 9.128316\n", + " 14 NaN ATA 2000 2024 9.128316,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 AUS 2001 2010 70657.273004\n", + " 1 NaN AUS 2001 2011 71838.325287\n", + " 2 NaN AUS 2001 2012 71867.367753\n", + " 3 NaN AUS 2001 2013 73062.659637\n", + " 4 NaN AUS 2001 2014 73087.604392\n", + " 5 NaN AUS 2001 2015 73105.473962\n", + " 6 NaN AUS 2001 2016 73125.538753\n", + " 7 NaN AUS 2001 2017 73142.098475\n", + " 8 NaN AUS 2001 2018 73155.489205\n", + " 9 NaN AUS 2001 2019 73171.136662\n", + " 10 NaN AUS 2001 2020 73231.255445\n", + " 11 NaN AUS 2001 2021 76969.118995\n", + " 12 NaN AUS 2001 2022 77716.537940\n", + " 13 NaN AUS 2001 2023 77716.537940\n", + " 14 NaN AUS 2001 2024 77716.537940,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 AUS 1991 2010 31993.411446\n", + " 1 NaN AUS 1991 2011 32696.697038\n", + " 2 NaN AUS 1991 2012 32696.697038\n", + " 3 NaN AUS 1991 2013 32745.439011\n", + " 4 NaN AUS 1991 2014 32745.439011\n", + " 5 NaN AUS 1991 2015 32745.439011\n", + " 6 NaN AUS 1991 2016 32749.527113\n", + " 7 NaN AUS 1991 2017 32749.527113\n", + " 8 NaN AUS 1991 2018 32749.527113\n", + " 9 NaN AUS 1991 2019 32751.516505\n", + " 10 NaN AUS 1991 2020 33019.468140\n", + " 11 NaN AUS 1991 2021 33526.312539\n", + " 12 NaN AUS 1991 2022 33526.312539\n", + " 13 NaN AUS 1991 2023 33526.312539\n", + " 14 NaN AUS 1991 2024 33526.312539,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 IDN 0 2010 5573.731475\n", + " 1 NaN IDN 0 2011 5573.731475\n", + " 2 NaN IDN 0 2012 5573.784589\n", + " 3 NaN IDN 0 2013 5573.784589\n", + " 4 NaN IDN 0 2014 5576.191382\n", + " 5 NaN IDN 0 2015 5576.191382\n", + " 6 NaN IDN 0 2016 5576.476100\n", + " 7 NaN IDN 0 2017 5576.476100\n", + " 8 NaN IDN 0 2018 5576.476100\n", + " 9 NaN IDN 0 2019 5576.476100\n", + " 10 NaN IDN 0 2020 5576.476100\n", + " 11 NaN IDN 0 2021 5576.476100\n", + " 12 NaN IDN 0 2022 5576.476100\n", + " 13 NaN IDN 0 2023 5576.476100\n", + " 14 NaN IDN 0 2024 5576.476100,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 BRN 1947 2010 1662.668477\n", + " 1 1.0 IDN 0 2010 52082.834235\n", + " 2 2.0 MYS 2005 2010 24669.283383\n", + " 3 NaN BRN 1947 2011 2082.128677\n", + " 4 NaN IDN 0 2011 52082.834235\n", + " 5 NaN MYS 2005 2011 25100.038953\n", + " 6 NaN BRN 1947 2012 2082.128677\n", + " 7 NaN IDN 0 2012 52149.492198\n", + " 8 NaN MYS 2005 2012 25185.705048\n", + " 9 NaN BRN 1947 2013 2082.128677\n", + " 10 NaN IDN 0 2013 53779.887908\n", + " 11 NaN MYS 2005 2013 27853.014736\n", + " 12 NaN BRN 1947 2014 2082.128677\n", + " 13 NaN IDN 0 2014 53779.887908\n", + " 14 NaN MYS 2005 2014 28092.213819\n", + " 15 NaN BRN 1947 2015 2082.128677\n", + " 16 NaN IDN 0 2015 53895.628792\n", + " 17 NaN MYS 2005 2015 28680.008381\n", + " 18 NaN BRN 1947 2016 2082.128677\n", + " 19 NaN IDN 0 2016 54137.527744\n", + " 20 NaN MYS 2005 2016 28680.008381\n", + " 21 NaN BRN 1947 2017 2082.128677\n", + " 22 NaN IDN 0 2017 54137.527744\n", + " 23 NaN MYS 2005 2017 28680.008381\n", + " 24 NaN BRN 1947 2018 2082.128677\n", + " 25 NaN IDN 0 2018 54137.527744\n", + " 26 NaN MYS 2005 2018 28680.008381\n", + " 27 NaN BRN 1947 2019 2082.128677\n", + " 28 NaN IDN 0 2019 54137.527744\n", + " 29 NaN MYS 2005 2019 28680.008381\n", + " 30 NaN BRN 1947 2020 2082.128677\n", + " 31 NaN IDN 0 2020 54137.527744\n", + " 32 NaN MYS 2005 2020 28680.008381\n", + " 33 NaN BRN 1947 2021 2082.128677\n", + " 34 NaN IDN 0 2021 54137.527744\n", + " 35 NaN MYS 2005 2021 28680.008381\n", + " 36 NaN BRN 1947 2022 2082.128677\n", + " 37 NaN IDN 0 2022 54137.527744\n", + " 38 NaN MYS 2005 2022 28680.008381\n", + " 39 NaN BRN 1947 2023 2082.128677\n", + " 40 NaN IDN 0 2023 54137.527744\n", + " 41 NaN MYS 2005 2023 28680.008381\n", + " 42 NaN BRN 1947 2024 2082.128677\n", + " 43 NaN IDN 0 2024 54137.527744\n", + " 44 NaN MYS 2005 2024 28680.008381,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 BRN 1950 2010 232.800224\n", + " 1 1.0 MYS 1984 2010 7856.075900\n", + " 2 2.0 PHL 2009 2010 13170.451446\n", + " 3 NaN BRN 1950 2011 232.800224\n", + " 4 NaN MYS 1984 2011 7856.075900\n", + " 5 NaN PHL 2009 2011 13170.451446\n", + " 6 NaN BRN 1950 2012 253.422907\n", + " 7 NaN MYS 1984 2012 8677.734147\n", + " 8 NaN PHL 2009 2012 13170.451446\n", + " 9 NaN BRN 1950 2013 253.422907\n", + " 10 NaN MYS 1984 2013 10425.512061\n", + " 11 NaN PHL 2009 2013 13170.451446\n", + " 12 NaN BRN 1950 2014 253.422907\n", + " 13 NaN MYS 1984 2014 14325.801262\n", + " 14 NaN PHL 2009 2014 13170.451446\n", + " 15 NaN BRN 1950 2015 253.422907\n", + " 16 NaN MYS 1984 2015 16508.746818\n", + " 17 NaN PHL 2009 2015 13170.451446\n", + " 18 NaN BRN 1950 2016 253.422907\n", + " 19 NaN MYS 1984 2016 16508.762006\n", + " 20 NaN PHL 2009 2016 13170.451446\n", + " 21 NaN BRN 1950 2017 253.422907\n", + " 22 NaN MYS 1984 2017 16508.762006\n", + " 23 NaN PHL 2009 2017 13170.451446\n", + " 24 NaN BRN 1950 2018 253.422907\n", + " 25 NaN MYS 1984 2018 16508.762006\n", + " 26 NaN PHL 2009 2018 13170.451446\n", + " 27 NaN BRN 1950 2019 253.422907\n", + " 28 NaN MYS 1984 2019 16508.762006\n", + " 29 NaN PHL 2009 2019 13170.451446\n", + " 30 NaN BRN 1950 2020 253.422907\n", + " 31 NaN MYS 1984 2020 16508.762006\n", + " 32 NaN PHL 2009 2020 13170.451446\n", + " 33 NaN BRN 1950 2021 253.422907\n", + " 34 NaN MYS 1984 2021 16508.762006\n", + " 35 NaN PHL 2009 2021 13170.451446\n", + " 36 NaN BRN 1950 2022 253.422907\n", + " 37 NaN MYS 1984 2022 16508.762006\n", + " 38 NaN PHL 2009 2022 13170.451446\n", + " 39 NaN BRN 1950 2023 253.422907\n", + " 40 NaN MYS 1984 2023 16508.762006\n", + " 41 NaN PHL 2009 2023 13170.451446\n", + " 42 NaN BRN 1950 2024 253.422907\n", + " 43 NaN MYS 1984 2024 16508.762006\n", + " 44 NaN PHL 2009 2024 13170.451446,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CHN 1979 2010 429.027641\n", + " 1 1.0 PHL 1980 2010 0.230241\n", + " 2 2.0 TWN 1995 2010 42.315967\n", + " 3 NaN CHN 1979 2011 429.027641\n", + " 4 NaN PHL 1980 2011 0.230241\n", + " 5 NaN TWN 1995 2011 42.315967\n", + " 6 NaN CHN 1979 2012 429.027641\n", + " 7 NaN PHL 1980 2012 0.230241\n", + " 8 NaN TWN 1995 2012 42.315967\n", + " 9 NaN CHN 1979 2013 429.027641\n", + " 10 NaN PHL 1980 2013 0.230241\n", + " 11 NaN TWN 1995 2013 42.315967\n", + " 12 NaN CHN 1979 2014 429.027641\n", + " 13 NaN PHL 1980 2014 0.230241\n", + " 14 NaN TWN 1995 2014 42.315967\n", + " 15 NaN CHN 1979 2015 429.027641\n", + " 16 NaN PHL 1980 2015 0.230241\n", + " 17 NaN TWN 1995 2015 42.315967\n", + " 18 NaN CHN 1979 2016 429.027641\n", + " 19 NaN PHL 1980 2016 0.230241\n", + " 20 NaN TWN 1995 2016 42.315967\n", + " 21 NaN CHN 1979 2017 429.027641\n", + " 22 NaN PHL 1980 2017 0.230241\n", + " 23 NaN TWN 1995 2017 42.315967\n", + " 24 NaN CHN 1979 2018 429.027641\n", + " 25 NaN PHL 1980 2018 9.338150\n", + " 26 NaN TWN 1995 2018 42.315967\n", + " 27 NaN CHN 1979 2019 429.027641\n", + " 28 NaN PHL 1980 2019 9.338150\n", + " 29 NaN TWN 1995 2019 42.315967\n", + " 30 NaN CHN 1979 2020 429.027641\n", + " 31 NaN PHL 1980 2020 9.338150\n", + " 32 NaN TWN 1995 2020 42.315967\n", + " 33 NaN CHN 1979 2021 429.027641\n", + " 34 NaN PHL 1980 2021 9.338150\n", + " 35 NaN TWN 1995 2021 42.315967\n", + " 36 NaN CHN 1979 2022 429.027641\n", + " 37 NaN PHL 1980 2022 9.338150\n", + " 38 NaN TWN 1995 2022 42.315967\n", + " 39 NaN CHN 1979 2023 429.027641\n", + " 40 NaN PHL 1980 2023 9.338150\n", + " 41 NaN TWN 1995 2023 42.315967\n", + " 42 NaN CHN 1979 2024 429.027641\n", + " 43 NaN PHL 1980 2024 9.338150\n", + " 44 NaN TWN 1995 2024 42.315967,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CHN 2010 2010 6249.129008\n", + " 1 1.0 TWN 2000 2010 0.156168\n", + " 2 NaN CHN 2010 2011 6249.129008\n", + " 3 NaN TWN 2000 2011 0.156168\n", + " 4 NaN CHN 2010 2012 6249.129008\n", + " 5 NaN TWN 2000 2012 0.156168\n", + " 6 NaN CHN 2010 2013 6451.761888\n", + " 7 NaN TWN 2000 2013 0.156168\n", + " 8 NaN CHN 2010 2014 6451.761888\n", + " 9 NaN TWN 2000 2014 0.156168\n", + " 10 NaN CHN 2010 2015 6785.766988\n", + " 11 NaN TWN 2000 2015 0.156168\n", + " 12 NaN CHN 2010 2016 7518.777709\n", + " 13 NaN TWN 2000 2016 0.156168\n", + " 14 NaN CHN 2010 2017 7518.777709\n", + " 15 NaN TWN 2000 2017 0.156168\n", + " 16 NaN CHN 2010 2018 7762.180620\n", + " 17 NaN TWN 2000 2018 0.156168\n", + " 18 NaN CHN 2010 2019 7762.180620\n", + " 19 NaN TWN 2000 2019 0.156168\n", + " 20 NaN CHN 2010 2020 7762.180620\n", + " 21 NaN TWN 2000 2020 0.156168\n", + " 22 NaN CHN 2010 2021 7762.180620\n", + " 23 NaN TWN 2000 2021 0.156168\n", + " 24 NaN CHN 2010 2022 7762.180620\n", + " 25 NaN TWN 2000 2022 0.156168\n", + " 26 NaN CHN 2010 2023 7762.180620\n", + " 27 NaN TWN 2000 2023 0.156168\n", + " 28 NaN CHN 2010 2024 7762.180620\n", + " 29 NaN TWN 2000 2024 0.156168,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CHN 1987 2010 269.567346\n", + " 1 1.0 MNG 2010 2010 359.204398\n", + " 2 NaN CHN 1987 2011 269.567346\n", + " 3 NaN MNG 2010 2011 359.204398\n", + " 4 NaN CHN 1987 2012 269.567346\n", + " 5 NaN MNG 2010 2012 359.204398\n", + " 6 NaN CHN 1987 2013 672.723575\n", + " 7 NaN MNG 2010 2013 359.204398\n", + " 8 NaN CHN 1987 2014 672.723575\n", + " 9 NaN MNG 2010 2014 359.204398\n", + " 10 NaN CHN 1987 2015 672.723575\n", + " 11 NaN MNG 2010 2015 359.204398\n", + " 12 NaN CHN 1987 2016 672.723575\n", + " 13 NaN MNG 2010 2016 359.204398\n", + " 14 NaN CHN 1987 2017 672.723575\n", + " 15 NaN MNG 2010 2017 359.204398\n", + " 16 NaN CHN 1987 2018 1142.555288\n", + " 17 NaN MNG 2010 2018 359.204398\n", + " 18 NaN CHN 1987 2019 1142.555288\n", + " 19 NaN MNG 2010 2019 359.204398\n", + " 20 NaN CHN 1987 2020 1142.555288\n", + " 21 NaN MNG 2010 2020 359.204398\n", + " 22 NaN CHN 1987 2021 1142.555288\n", + " 23 NaN MNG 2010 2021 359.204398\n", + " 24 NaN CHN 1987 2022 1142.555288\n", + " 25 NaN MNG 2010 2022 359.204398\n", + " 26 NaN CHN 1987 2023 1142.555288\n", + " 27 NaN MNG 2010 2023 359.204398\n", + " 28 NaN CHN 1987 2024 1142.555288\n", + " 29 NaN MNG 2010 2024 359.204398,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CHN 1964 2010 16523.291369\n", + " 1 1.0 MNG 1998 2010 32171.674819\n", + " 2 2.0 RUS 2003 2010 10954.779093\n", + " 3 NaN CHN 1964 2011 16629.739166\n", + " 4 NaN MNG 1998 2011 32270.014446\n", + " 5 NaN RUS 2003 2011 11535.348350\n", + " 6 NaN CHN 1964 2012 16629.739166\n", + " 7 NaN MNG 1998 2012 35711.541302\n", + " 8 NaN RUS 2003 2012 11535.348350\n", + " 9 NaN CHN 1964 2013 16629.739166\n", + " 10 NaN MNG 1998 2013 35711.541302\n", + " 11 NaN RUS 2003 2013 11535.916442\n", + " 12 NaN CHN 1964 2014 16629.739166\n", + " 13 NaN MNG 1998 2014 35711.541302\n", + " 14 NaN RUS 2003 2014 17244.381947\n", + " 15 NaN CHN 1964 2015 16732.974782\n", + " 16 NaN MNG 1998 2015 35711.541302\n", + " 17 NaN RUS 2003 2015 17347.617564\n", + " 18 NaN CHN 1964 2016 16732.974782\n", + " 19 NaN MNG 1998 2016 35711.541302\n", + " 20 NaN RUS 2003 2016 17347.617564\n", + " 21 NaN CHN 1964 2017 17783.421551\n", + " 22 NaN MNG 1998 2017 41379.101540\n", + " 23 NaN RUS 2003 2017 24883.073657\n", + " 24 NaN CHN 1964 2018 17783.421551\n", + " 25 NaN MNG 1998 2018 41379.101540\n", + " 26 NaN RUS 2003 2018 26076.922848\n", + " 27 NaN CHN 1964 2019 23482.674615\n", + " 28 NaN MNG 1998 2019 47078.354604\n", + " 29 NaN RUS 2003 2019 28591.515387\n", + " 30 NaN CHN 1964 2020 23482.674615\n", + " 31 NaN MNG 1998 2020 47078.354604\n", + " 32 NaN RUS 2003 2020 28592.378973\n", + " 33 NaN CHN 1964 2021 23482.674615\n", + " 34 NaN MNG 1998 2021 47078.354604\n", + " 35 NaN RUS 2003 2021 28592.378973\n", + " 36 NaN CHN 1964 2022 23482.674615\n", + " 37 NaN MNG 1998 2022 47078.354604\n", + " 38 NaN RUS 2003 2022 28592.378973\n", + " 39 NaN CHN 1964 2023 23482.674615\n", + " 40 NaN MNG 1998 2023 47078.354604\n", + " 41 NaN RUS 2003 2023 28592.378973\n", + " 42 NaN CHN 1964 2024 23482.674615\n", + " 43 NaN MNG 1998 2024 47078.354604\n", + " 44 NaN RUS 2003 2024 28592.378973,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 RUS 1996 2010 57676.082631\n", + " 1 NaN RUS 1996 2011 57676.082631\n", + " 2 NaN RUS 1996 2012 57676.082631\n", + " 3 NaN RUS 1996 2013 57676.082631\n", + " 4 NaN RUS 1996 2014 57676.082631\n", + " 5 NaN RUS 1996 2015 57676.082631\n", + " 6 NaN RUS 1996 2016 57676.082631\n", + " 7 NaN RUS 1996 2017 57676.082631\n", + " 8 NaN RUS 1996 2018 62587.349104\n", + " 9 NaN RUS 1996 2019 62587.349104\n", + " 10 NaN RUS 1996 2020 62587.349104\n", + " 11 NaN RUS 1996 2021 62587.349104\n", + " 12 NaN RUS 1996 2022 62587.349104\n", + " 13 NaN RUS 1996 2023 62587.349104\n", + " 14 NaN RUS 1996 2024 62587.349104,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 RUS 2000 2010 158966.532373\n", + " 1 NaN RUS 2000 2011 158966.532373\n", + " 2 NaN RUS 2000 2012 158966.532373\n", + " 3 NaN RUS 2000 2013 158966.532373\n", + " 4 NaN RUS 2000 2014 158966.532373\n", + " 5 NaN RUS 2000 2015 158966.532373\n", + " 6 NaN RUS 2000 2016 158966.532373\n", + " 7 NaN RUS 2000 2017 158966.532373\n", + " 8 NaN RUS 2000 2018 158966.532373\n", + " 9 NaN RUS 2000 2019 158966.532373\n", + " 10 NaN RUS 2000 2020 158966.532373\n", + " 11 NaN RUS 2000 2021 158966.532373\n", + " 12 NaN RUS 2000 2022 158966.532373\n", + " 13 NaN RUS 2000 2023 158966.532373\n", + " 14 NaN RUS 2000 2024 158966.532373,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 RUS 1995 2010 3670.732088\n", + " 1 NaN RUS 1995 2011 3670.732088\n", + " 2 NaN RUS 1995 2012 3670.732088\n", + " 3 NaN RUS 1995 2013 3670.732088\n", + " 4 NaN RUS 1995 2014 3670.732088\n", + " 5 NaN RUS 1995 2015 3670.732088\n", + " 6 NaN RUS 1995 2016 3670.732088\n", + " 7 NaN RUS 1995 2017 3670.732088\n", + " 8 NaN RUS 1995 2018 3670.732088\n", + " 9 NaN RUS 1995 2019 3670.732088\n", + " 10 NaN RUS 1995 2020 3670.732088\n", + " 11 NaN RUS 1995 2021 3670.732088\n", + " 12 NaN RUS 1995 2022 3670.732088\n", + " 13 NaN RUS 1995 2023 3670.732088\n", + " 14 NaN RUS 1995 2024 3670.732088,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 AUS 1973 2010 125528.814155\n", + " 1 NaN AUS 1973 2011 125547.683663\n", + " 2 NaN AUS 1973 2012 125552.195660\n", + " 3 NaN AUS 1973 2013 128422.255918\n", + " 4 NaN AUS 1973 2014 131505.660555\n", + " 5 NaN AUS 1973 2015 144015.484178\n", + " 6 NaN AUS 1973 2016 144015.484178\n", + " 7 NaN AUS 1973 2017 144015.484178\n", + " 8 NaN AUS 1973 2018 144015.484178\n", + " 9 NaN AUS 1973 2019 144015.484178\n", + " 10 NaN AUS 1973 2020 212149.446498\n", + " 11 NaN AUS 1973 2021 309528.610828\n", + " 12 NaN AUS 1973 2022 309528.610828\n", + " 13 NaN AUS 1973 2023 309528.610828\n", + " 14 NaN AUS 1973 2024 309528.610828,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 AUS 1977 2010 137417.367620\n", + " 1 NaN AUS 1977 2011 140640.231389\n", + " 2 NaN AUS 1977 2012 167018.654750\n", + " 3 NaN AUS 1977 2013 266514.055775\n", + " 4 NaN AUS 1977 2014 335350.937187\n", + " 5 NaN AUS 1977 2015 377175.824027\n", + " 6 NaN AUS 1977 2016 377196.006270\n", + " 7 NaN AUS 1977 2017 378288.574790\n", + " 8 NaN AUS 1977 2018 378331.903769\n", + " 9 NaN AUS 1977 2019 378918.607900\n", + " 10 NaN AUS 1977 2020 412479.978357\n", + " 11 NaN AUS 1977 2021 412639.193666\n", + " 12 NaN AUS 1977 2022 434616.343038\n", + " 13 NaN AUS 1977 2023 434616.343038\n", + " 14 NaN AUS 1977 2024 434616.343038,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 AUS 1977 2010 7874.250911\n", + " 1 1.0 IDN 0 2010 2299.598534\n", + " 2 2.0 TLS 2000 2010 1920.712610\n", + " 3 NaN AUS 1977 2011 10396.666011\n", + " 4 NaN IDN 0 2011 2322.545101\n", + " 5 NaN TLS 2000 2011 1920.712610\n", + " 6 NaN AUS 1977 2012 10396.666011\n", + " 7 NaN IDN 0 2012 2322.545101\n", + " 8 NaN TLS 2000 2012 1920.712610\n", + " 9 NaN AUS 1977 2013 23080.870020\n", + " 10 NaN IDN 0 2013 2322.545101\n", + " 11 NaN TLS 2000 2013 1920.712610\n", + " 12 NaN AUS 1977 2014 23884.623304\n", + " 13 NaN IDN 0 2014 2322.779749\n", + " 14 NaN TLS 2000 2014 1920.712610\n", + " 15 NaN AUS 1977 2015 23884.623304\n", + " 16 NaN IDN 0 2015 2322.779749\n", + " 17 NaN TLS 2000 2015 2013.284458\n", + " 18 NaN AUS 1977 2016 31427.236102\n", + " 19 NaN IDN 0 2016 2408.995938\n", + " 20 NaN TLS 2000 2016 2400.208704\n", + " 21 NaN AUS 1977 2017 31427.236102\n", + " 22 NaN IDN 0 2017 2408.995938\n", + " 23 NaN TLS 2000 2017 2400.208704\n", + " 24 NaN AUS 1977 2018 31427.236102\n", + " 25 NaN IDN 0 2018 2408.995938\n", + " 26 NaN TLS 2000 2018 2400.208704\n", + " 27 NaN AUS 1977 2019 31458.876846\n", + " 28 NaN IDN 0 2019 2408.995938\n", + " 29 NaN TLS 2000 2019 2400.208704\n", + " 30 NaN AUS 1977 2020 31458.876846\n", + " 31 NaN IDN 0 2020 2408.995938\n", + " 32 NaN TLS 2000 2020 2400.208704\n", + " 33 NaN AUS 1977 2021 31458.876846\n", + " 34 NaN IDN 0 2021 2408.995938\n", + " 35 NaN TLS 2000 2021 2400.208704\n", + " 36 NaN AUS 1977 2022 31458.876846\n", + " 37 NaN IDN 0 2022 2408.995938\n", + " 38 NaN TLS 2000 2022 2400.208704\n", + " 39 NaN AUS 1977 2023 31458.876846\n", + " 40 NaN IDN 0 2023 2408.995938\n", + " 41 NaN TLS 2000 2023 2400.208704\n", + " 42 NaN AUS 1977 2024 31458.876846\n", + " 43 NaN IDN 0 2024 2408.995938\n", + " 44 NaN TLS 2000 2024 2400.208704,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 IDN 0 2010 20323.637408\n", + " 1 NaN IDN 0 2011 20323.637408\n", + " 2 NaN IDN 0 2012 20329.769904\n", + " 3 NaN IDN 0 2013 20329.769904\n", + " 4 NaN IDN 0 2014 20329.769904\n", + " 5 NaN IDN 0 2015 20329.769904\n", + " 6 NaN IDN 0 2016 20329.769904\n", + " 7 NaN IDN 0 2017 20329.769904\n", + " 8 NaN IDN 0 2018 20329.769904\n", + " 9 NaN IDN 0 2019 20329.769904\n", + " 10 NaN IDN 0 2020 20329.769904\n", + " 11 NaN IDN 0 2021 20329.769904\n", + " 12 NaN IDN 0 2022 20329.769904\n", + " 13 NaN IDN 0 2023 20329.769904\n", + " 14 NaN IDN 0 2024 20329.769904,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 PHL 1984 2010 11308.012211\n", + " 1 NaN PHL 1984 2011 11834.820278\n", + " 2 NaN PHL 1984 2012 11834.820278\n", + " 3 NaN PHL 1984 2013 11834.820278\n", + " 4 NaN PHL 1984 2014 11994.235708\n", + " 5 NaN PHL 1984 2015 12016.358811\n", + " 6 NaN PHL 1984 2016 12016.358811\n", + " 7 NaN PHL 1984 2017 12016.358811\n", + " 8 NaN PHL 1984 2018 20599.210965\n", + " 9 NaN PHL 1984 2019 20599.210965\n", + " 10 NaN PHL 1984 2020 20599.210965\n", + " 11 NaN PHL 1984 2021 20599.210965\n", + " 12 NaN PHL 1984 2022 20599.210965\n", + " 13 NaN PHL 1984 2023 20599.210965\n", + " 14 NaN PHL 1984 2024 20599.210965,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 JPN 1991 2010 238.631355\n", + " 1 1.0 PHL 1968 2010 10183.403985\n", + " 2 2.0 TWN 1994 2010 6633.618409\n", + " 3 NaN JPN 1991 2011 238.998532\n", + " 4 NaN PHL 1968 2011 10183.403985\n", + " 5 NaN TWN 1994 2011 6633.618409\n", + " 6 NaN JPN 1991 2012 238.998532\n", + " 7 NaN PHL 1968 2012 10183.403985\n", + " 8 NaN TWN 1994 2012 6640.994095\n", + " 9 NaN JPN 1991 2013 238.998532\n", + " 10 NaN PHL 1968 2013 10183.403985\n", + " 11 NaN TWN 1994 2013 6640.994095\n", + " 12 NaN JPN 1991 2014 238.998532\n", + " 13 NaN PHL 1968 2014 10183.403985\n", + " 14 NaN TWN 1994 2014 6640.994095\n", + " 15 NaN JPN 1991 2015 238.998532\n", + " 16 NaN PHL 1968 2015 10183.403985\n", + " 17 NaN TWN 1994 2015 6640.994095\n", + " 18 NaN JPN 1991 2016 238.998532\n", + " 19 NaN PHL 1968 2016 10183.403985\n", + " 20 NaN TWN 1994 2016 6640.994095\n", + " 21 NaN JPN 1991 2017 238.998532\n", + " 22 NaN PHL 1968 2017 10183.403985\n", + " 23 NaN TWN 1994 2017 6640.994095\n", + " 24 NaN JPN 1991 2018 239.038701\n", + " 25 NaN PHL 1968 2018 12946.019426\n", + " 26 NaN TWN 1994 2018 6640.994095\n", + " 27 NaN JPN 1991 2019 239.038701\n", + " 28 NaN PHL 1968 2019 12946.019426\n", + " 29 NaN TWN 1994 2019 6640.994095\n", + " 30 NaN JPN 1991 2020 423.548328\n", + " 31 NaN PHL 1968 2020 12946.019426\n", + " 32 NaN TWN 1994 2020 6640.994095\n", + " 33 NaN JPN 1991 2021 423.643789\n", + " 34 NaN PHL 1968 2021 12946.019426\n", + " 35 NaN TWN 1994 2021 6640.994095\n", + " 36 NaN JPN 1991 2022 423.643789\n", + " 37 NaN PHL 1968 2022 12946.019426\n", + " 38 NaN TWN 1994 2022 6640.994095\n", + " 39 NaN JPN 1991 2023 423.643789\n", + " 40 NaN PHL 1968 2023 12946.019426\n", + " 41 NaN TWN 1994 2023 6640.994095\n", + " 42 NaN JPN 1991 2024 423.643789\n", + " 43 NaN PHL 1968 2024 12946.019426\n", + " 44 NaN TWN 1994 2024 6640.994095,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CHN 2009 2010 2489.696630\n", + " 1 1.0 JPN 1995 2010 990.514928\n", + " 2 2.0 KOR 2005 2010 943.415823\n", + " 3 3.0 PRK 2009 2010 22.686007\n", + " 4 4.0 TWN 2000 2010 462.603523\n", + " .. ... ... ... ... ...\n", + " 70 NaN CHN 2009 2024 2489.696630\n", + " 71 NaN JPN 1995 2024 1869.146480\n", + " 72 NaN KOR 2005 2024 1005.936173\n", + " 73 NaN PRK 2009 2024 601.559205\n", + " 74 NaN TWN 2000 2024 475.782970\n", + " \n", + " [75 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CHN 2004 2010 810.451316\n", + " 1 1.0 JPN 1968 2010 27.090999\n", + " 2 2.0 KOR 2006 2010 15021.537169\n", + " 3 3.0 PRK 2009 2010 1772.515015\n", + " 4 NaN CHN 2004 2011 810.451316\n", + " 5 NaN JPN 1968 2011 27.090999\n", + " 6 NaN KOR 2006 2011 15035.786135\n", + " 7 NaN PRK 2009 2011 1772.515015\n", + " 8 NaN CHN 2004 2012 810.451316\n", + " 9 NaN JPN 1968 2012 27.090999\n", + " 10 NaN KOR 2006 2012 15044.718691\n", + " 11 NaN PRK 2009 2012 1772.515015\n", + " 12 NaN CHN 2004 2013 810.451316\n", + " 13 NaN JPN 1968 2013 27.090999\n", + " 14 NaN KOR 2006 2013 15115.803718\n", + " 15 NaN PRK 2009 2013 1772.515015\n", + " 16 NaN CHN 2004 2014 810.451316\n", + " 17 NaN JPN 1968 2014 27.090999\n", + " 18 NaN KOR 2006 2014 15115.965070\n", + " 19 NaN PRK 2009 2014 1772.515015\n", + " 20 NaN CHN 2004 2015 810.451316\n", + " 21 NaN JPN 1968 2015 30.404378\n", + " 22 NaN KOR 2006 2015 15121.489987\n", + " 23 NaN PRK 2009 2015 1772.515015\n", + " 24 NaN CHN 2004 2016 810.451316\n", + " 25 NaN JPN 1968 2016 32.386569\n", + " 26 NaN KOR 2006 2016 15138.644654\n", + " 27 NaN PRK 2009 2016 1772.515015\n", + " 28 NaN CHN 2004 2017 810.451316\n", + " 29 NaN JPN 1968 2017 32.386569\n", + " 30 NaN KOR 2006 2017 15141.119964\n", + " 31 NaN PRK 2009 2017 1772.515015\n", + " 32 NaN CHN 2004 2018 846.104517\n", + " 33 NaN JPN 1968 2018 32.386569\n", + " 34 NaN KOR 2006 2018 15141.505874\n", + " 35 NaN PRK 2009 2018 1772.515015\n", + " 36 NaN CHN 2004 2019 846.104517\n", + " 37 NaN JPN 1968 2019 33.341097\n", + " 38 NaN KOR 2006 2019 16217.434213\n", + " 39 NaN PRK 2009 2019 7546.626241\n", + " 40 NaN CHN 2004 2020 846.104517\n", + " 41 NaN JPN 1968 2020 33.341097\n", + " 42 NaN KOR 2006 2020 16318.047587\n", + " 43 NaN PRK 2009 2020 7546.626241\n", + " 44 NaN CHN 2004 2021 846.104517\n", + " 45 NaN JPN 1968 2021 33.341097\n", + " 46 NaN KOR 2006 2021 16319.478024\n", + " 47 NaN PRK 2009 2021 7546.626241\n", + " 48 NaN CHN 2004 2022 846.104517\n", + " 49 NaN JPN 1968 2022 33.341097\n", + " 50 NaN KOR 2006 2022 16326.605361\n", + " 51 NaN PRK 2009 2022 7546.626241\n", + " 52 NaN CHN 2004 2023 846.104517\n", + " 53 NaN JPN 1968 2023 33.341097\n", + " 54 NaN KOR 2006 2023 16330.535373\n", + " 55 NaN PRK 2009 2023 7546.626241\n", + " 56 NaN CHN 2004 2024 846.104517\n", + " 57 NaN JPN 1968 2024 33.341097\n", + " 58 NaN KOR 2006 2024 16330.535373\n", + " 59 NaN PRK 2009 2024 7546.626241,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CHN 1992 2010 5877.217063\n", + " 1 1.0 RUS 1978 2010 18937.170896\n", + " 2 NaN CHN 1992 2011 8660.486873\n", + " 3 NaN RUS 1978 2011 19132.186000\n", + " 4 NaN CHN 1992 2012 8660.486873\n", + " 5 NaN RUS 1978 2012 19132.186000\n", + " 6 NaN CHN 1992 2013 8660.486873\n", + " 7 NaN RUS 1978 2013 19132.186000\n", + " 8 NaN CHN 1992 2014 8660.486873\n", + " 9 NaN RUS 1978 2014 19132.186000\n", + " 10 NaN CHN 1992 2015 10941.720388\n", + " 11 NaN RUS 1978 2015 21515.134096\n", + " 12 NaN CHN 1992 2016 10941.720388\n", + " 13 NaN RUS 1978 2016 21515.134096\n", + " 14 NaN CHN 1992 2017 11943.107250\n", + " 15 NaN RUS 1978 2017 22516.520958\n", + " 16 NaN CHN 1992 2018 13618.885349\n", + " 17 NaN RUS 1978 2018 22516.520958\n", + " 18 NaN CHN 1992 2019 13618.885349\n", + " 19 NaN RUS 1978 2019 22516.520958\n", + " 20 NaN CHN 1992 2020 13618.885349\n", + " 21 NaN RUS 1978 2020 22553.052549\n", + " 22 NaN CHN 1992 2021 13618.885349\n", + " 23 NaN RUS 1978 2021 22553.052549\n", + " 24 NaN CHN 1992 2022 13618.885349\n", + " 25 NaN RUS 1978 2022 22553.052549\n", + " 26 NaN CHN 1992 2023 13618.885349\n", + " 27 NaN RUS 1978 2023 22553.052549\n", + " 28 NaN CHN 1992 2024 13618.885349\n", + " 29 NaN RUS 1978 2024 22553.052549,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 RUS 1999 2010 122647.111935\n", + " 1 NaN RUS 1999 2011 122647.111935\n", + " 2 NaN RUS 1999 2012 134523.040462\n", + " 3 NaN RUS 1999 2013 134523.040462\n", + " 4 NaN RUS 1999 2014 134523.040462\n", + " 5 NaN RUS 1999 2015 134523.040462\n", + " 6 NaN RUS 1999 2016 134523.040462\n", + " 7 NaN RUS 1999 2017 134523.040462\n", + " 8 NaN RUS 1999 2018 134524.568624\n", + " 9 NaN RUS 1999 2019 134524.568624\n", + " 10 NaN RUS 1999 2020 134524.568624\n", + " 11 NaN RUS 1999 2021 134524.568624\n", + " 12 NaN RUS 1999 2022 134524.568624\n", + " 13 NaN RUS 1999 2023 134524.568624\n", + " 14 NaN RUS 1999 2024 134524.568624,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 RUS 2000 2010 101691.695728\n", + " 1 NaN RUS 2000 2011 101691.695728\n", + " 2 NaN RUS 2000 2012 101691.695728\n", + " 3 NaN RUS 2000 2013 101691.695728\n", + " 4 NaN RUS 2000 2014 101691.695728\n", + " 5 NaN RUS 2000 2015 101691.695728\n", + " 6 NaN RUS 2000 2016 101691.695728\n", + " 7 NaN RUS 2000 2017 101691.695728\n", + " 8 NaN RUS 2000 2018 101691.695728\n", + " 9 NaN RUS 2000 2019 101691.695728\n", + " 10 NaN RUS 2000 2020 101691.695728\n", + " 11 NaN RUS 2000 2021 101691.695728\n", + " 12 NaN RUS 2000 2022 101691.695728\n", + " 13 NaN RUS 2000 2023 101691.695728\n", + " 14 NaN RUS 2000 2024 101691.695728,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 AUS 2007 2010 3077.377168\n", + " 1 NaN AUS 2007 2011 3083.286100\n", + " 2 NaN AUS 2007 2012 3083.985631\n", + " 3 NaN AUS 2007 2013 3086.912814\n", + " 4 NaN AUS 2007 2014 3457.133817\n", + " 5 NaN AUS 2007 2015 3472.969273\n", + " 6 NaN AUS 2007 2016 3473.745815\n", + " 7 NaN AUS 2007 2017 3477.725945\n", + " 8 NaN AUS 2007 2018 3488.217923\n", + " 9 NaN AUS 2007 2019 3488.217923\n", + " 10 NaN AUS 2007 2020 3488.539587\n", + " 11 NaN AUS 2007 2021 3537.635436\n", + " 12 NaN AUS 2007 2022 3537.794507\n", + " 13 NaN AUS 2007 2023 3537.794507\n", + " 14 NaN AUS 2007 2024 3537.794507,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 AUS 1985 2010 150969.444063\n", + " 1 NaN AUS 1985 2011 174648.230819\n", + " 2 NaN AUS 1985 2012 175948.472957\n", + " 3 NaN AUS 1985 2013 176279.774863\n", + " 4 NaN AUS 1985 2014 195853.564670\n", + " 5 NaN AUS 1985 2015 205880.542815\n", + " 6 NaN AUS 1985 2016 208101.637964\n", + " 7 NaN AUS 1985 2017 208205.330819\n", + " 8 NaN AUS 1985 2018 208692.067312\n", + " 9 NaN AUS 1985 2019 208698.780303\n", + " 10 NaN AUS 1985 2020 208831.685945\n", + " 11 NaN AUS 1985 2021 249052.432073\n", + " 12 NaN AUS 1985 2022 249056.610215\n", + " 13 NaN AUS 1985 2023 249056.610215\n", + " 14 NaN AUS 1985 2024 249056.610215,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 AUS 1994 2010 71851.913863\n", + " 1 NaN AUS 1994 2011 80813.203441\n", + " 2 NaN AUS 1994 2012 165018.927131\n", + " 3 NaN AUS 1994 2013 166254.583893\n", + " 4 NaN AUS 1994 2014 169487.910322\n", + " 5 NaN AUS 1994 2015 200319.228971\n", + " 6 NaN AUS 1994 2016 202868.921788\n", + " 7 NaN AUS 1994 2017 202868.921788\n", + " 8 NaN AUS 1994 2018 202868.921788\n", + " 9 NaN AUS 1994 2019 202868.921788\n", + " 10 NaN AUS 1994 2020 202868.921788\n", + " 11 NaN AUS 1994 2021 203006.873845\n", + " 12 NaN AUS 1994 2022 203006.873845\n", + " 13 NaN AUS 1994 2023 203006.873845\n", + " 14 NaN AUS 1994 2024 203006.873845,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 AUS 2009 2010 57911.056243\n", + " 1 1.0 IDN 0 2010 9685.113302\n", + " 2 NaN AUS 2009 2011 57987.405293\n", + " 3 NaN IDN 0 2011 9685.113302\n", + " 4 NaN AUS 2009 2012 64203.836953\n", + " 5 NaN IDN 0 2012 9685.113302\n", + " 6 NaN AUS 2009 2013 65630.695588\n", + " 7 NaN IDN 0 2013 9685.113302\n", + " 8 NaN AUS 2009 2014 67856.037928\n", + " 9 NaN IDN 0 2014 9685.113302\n", + " 10 NaN AUS 2009 2015 67856.037928\n", + " 11 NaN IDN 0 2015 9685.113302\n", + " 12 NaN AUS 2009 2016 89318.923410\n", + " 13 NaN IDN 0 2016 9826.551272\n", + " 14 NaN AUS 2009 2017 89318.923410\n", + " 15 NaN IDN 0 2017 9826.551272\n", + " 16 NaN AUS 2009 2018 89318.923410\n", + " 17 NaN IDN 0 2018 9826.551272\n", + " 18 NaN AUS 2009 2019 89318.923410\n", + " 19 NaN IDN 0 2019 9826.551272\n", + " 20 NaN AUS 2009 2020 89318.923410\n", + " 21 NaN IDN 0 2020 9826.551272\n", + " 22 NaN AUS 2009 2021 89318.923410\n", + " 23 NaN IDN 0 2021 9826.551272\n", + " 24 NaN AUS 2009 2022 89598.038546\n", + " 25 NaN IDN 0 2022 9826.551272\n", + " 26 NaN AUS 2009 2023 89598.038546\n", + " 27 NaN IDN 0 2023 9826.551272\n", + " 28 NaN AUS 2009 2024 89598.038546\n", + " 29 NaN IDN 0 2024 9826.551272,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 IDN 0 2010 66629.529063\n", + " 1 NaN IDN 0 2011 66629.529063\n", + " 2 NaN IDN 0 2012 66629.600413\n", + " 3 NaN IDN 0 2013 66629.600413\n", + " 4 NaN IDN 0 2014 66629.600413\n", + " 5 NaN IDN 0 2015 66679.642379\n", + " 6 NaN IDN 0 2016 66679.642379\n", + " 7 NaN IDN 0 2017 66689.641083\n", + " 8 NaN IDN 0 2018 66689.641083\n", + " 9 NaN IDN 0 2019 66689.641083\n", + " 10 NaN IDN 0 2020 66689.641083\n", + " 11 NaN IDN 0 2021 66689.641083\n", + " 12 NaN IDN 0 2022 66689.641083\n", + " 13 NaN IDN 0 2023 66689.641083\n", + " 14 NaN IDN 0 2024 66689.641083,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 PLW 0 2010 49.819595\n", + " 1 NaN PLW 0 2011 49.819595\n", + " 2 NaN PLW 0 2012 49.819595\n", + " 3 NaN PLW 0 2013 49.819595\n", + " 4 NaN PLW 0 2014 49.819595\n", + " 5 NaN PLW 0 2015 49.819595\n", + " 6 NaN PLW 0 2016 49.819595\n", + " 7 NaN PLW 0 2017 49.819595\n", + " 8 NaN PLW 0 2018 49.819595\n", + " 9 NaN PLW 0 2019 49.819595\n", + " 10 NaN PLW 0 2020 49.819595\n", + " 11 NaN PLW 0 2021 49.819595\n", + " 12 NaN PLW 0 2022 49.819595\n", + " 13 NaN PLW 0 2023 49.819595\n", + " 14 NaN PLW 0 2024 49.819595,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 JPN 2004 2010 10722.468244\n", + " 1 NaN JPN 2004 2011 10919.471872\n", + " 2 NaN JPN 2004 2012 11448.190617\n", + " 3 NaN JPN 2004 2013 11828.218864\n", + " 4 NaN JPN 2004 2014 12327.433574\n", + " 5 NaN JPN 2004 2015 12613.544219\n", + " 6 NaN JPN 2004 2016 12938.150377\n", + " 7 NaN JPN 2004 2017 13172.547904\n", + " 8 NaN JPN 2004 2018 13891.188774\n", + " 9 NaN JPN 2004 2019 14038.902218\n", + " 10 NaN JPN 2004 2020 14802.842061\n", + " 11 NaN JPN 2004 2021 14840.008919\n", + " 12 NaN JPN 2004 2022 14984.609571\n", + " 13 NaN JPN 2004 2023 14984.609571\n", + " 14 NaN JPN 2004 2024 14984.609571,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CHN 2008 2010 3366.313918\n", + " 1 1.0 JPN 1974 2010 25724.856726\n", + " 2 2.0 KOR 2009 2010 23.713762\n", + " 3 3.0 PRK 2009 2010 15.138745\n", + " 4 4.0 RUS 1984 2010 7197.332878\n", + " .. ... ... ... ... ...\n", + " 70 NaN CHN 2008 2024 4415.002339\n", + " 71 NaN JPN 1974 2024 34907.102553\n", + " 72 NaN KOR 2009 2024 41.502045\n", + " 73 NaN PRK 2009 2024 22.614207\n", + " 74 NaN RUS 1984 2024 8245.518749\n", + " \n", + " [75 rows x 5 columns],\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 CHN 1987 2010 7572.080034\n", + " 1 1.0 RUS 1986 2010 66369.355363\n", + " 2 NaN CHN 1987 2011 7772.284818\n", + " 3 NaN RUS 1986 2011 66369.355363\n", + " 4 NaN CHN 1987 2012 8493.816352\n", + " 5 NaN RUS 1986 2012 67090.886897\n", + " 6 NaN CHN 1987 2013 8808.047158\n", + " 7 NaN RUS 1986 2013 67548.271837\n", + " 8 NaN CHN 1987 2014 8808.047158\n", + " 9 NaN RUS 1986 2014 68332.896722\n", + " 10 NaN CHN 1987 2015 8808.047158\n", + " 11 NaN RUS 1986 2015 81118.999310\n", + " 12 NaN CHN 1987 2016 8853.551049\n", + " 13 NaN RUS 1986 2016 81164.503201\n", + " 14 NaN CHN 1987 2017 8853.551049\n", + " 15 NaN RUS 1986 2017 82109.600187\n", + " 16 NaN CHN 1987 2018 8853.551049\n", + " 17 NaN RUS 1986 2018 82204.404927\n", + " 18 NaN CHN 1987 2019 9811.417333\n", + " 19 NaN RUS 1986 2019 83162.271211\n", + " 20 NaN CHN 1987 2020 9811.417333\n", + " 21 NaN RUS 1986 2020 83162.271211\n", + " 22 NaN CHN 1987 2021 9811.417333\n", + " 23 NaN RUS 1986 2021 83162.271211\n", + " 24 NaN CHN 1987 2022 9811.417333\n", + " 25 NaN RUS 1986 2022 83162.271211\n", + " 26 NaN CHN 1987 2023 9811.417333\n", + " 27 NaN RUS 1986 2023 83162.271211\n", + " 28 NaN CHN 1987 2024 9811.417333\n", + " 29 NaN RUS 1986 2024 83162.271211,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 RUS 1990 2010 58122.027336\n", + " 1 NaN RUS 1990 2011 58122.027336\n", + " 2 NaN RUS 1990 2012 58122.027336\n", + " 3 NaN RUS 1990 2013 60359.690164\n", + " 4 NaN RUS 1990 2014 60359.690164\n", + " 5 NaN RUS 1990 2015 60359.690164\n", + " 6 NaN RUS 1990 2016 60359.690164\n", + " 7 NaN RUS 1990 2017 60359.690164\n", + " 8 NaN RUS 1990 2018 60359.690164\n", + " 9 NaN RUS 1990 2019 60359.690164\n", + " 10 NaN RUS 1990 2020 60359.690164\n", + " 11 NaN RUS 1990 2021 60359.690164\n", + " 12 NaN RUS 1990 2022 60359.690164\n", + " 13 NaN RUS 1990 2023 60359.690164\n", + " 14 NaN RUS 1990 2024 60359.690164,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 RUS 1996 2010 75798.363858\n", + " 1 NaN RUS 1996 2011 75798.363858\n", + " 2 NaN RUS 1996 2012 75798.363858\n", + " 3 NaN RUS 1996 2013 75798.363858\n", + " 4 NaN RUS 1996 2014 75798.363858\n", + " 5 NaN RUS 1996 2015 75798.363858\n", + " 6 NaN RUS 1996 2016 75798.363858\n", + " 7 NaN RUS 1996 2017 75798.363858\n", + " 8 NaN RUS 1996 2018 75798.363858\n", + " 9 NaN RUS 1996 2019 75798.363858\n", + " 10 NaN RUS 1996 2020 75798.363858\n", + " 11 NaN RUS 1996 2021 75798.363858\n", + " 12 NaN RUS 1996 2022 75798.363858\n", + " 13 NaN RUS 1996 2023 75798.363858\n", + " 14 NaN RUS 1996 2024 75798.363858,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 RUS 1996 2010 10725.784664\n", + " 1 NaN RUS 1996 2011 10725.784664\n", + " 2 NaN RUS 1996 2012 10725.784664\n", + " 3 NaN RUS 1996 2013 10725.784664\n", + " 4 NaN RUS 1996 2014 10725.784664\n", + " 5 NaN RUS 1996 2015 10725.784664\n", + " 6 NaN RUS 1996 2016 10725.784664\n", + " 7 NaN RUS 1996 2017 10725.784664\n", + " 8 NaN RUS 1996 2018 10725.784664\n", + " 9 NaN RUS 1996 2019 10725.784664\n", + " 10 NaN RUS 1996 2020 10725.784664\n", + " 11 NaN RUS 1996 2021 10725.784664\n", + " 12 NaN RUS 1996 2022 10725.784664\n", + " 13 NaN RUS 1996 2023 10725.784664\n", + " 14 NaN RUS 1996 2024 10725.784664,\n", + " index iso_3 STATUS_YR year area\n", + " 0 NaN ATA None 2014 0.355296\n", + " 1 NaN ATA None 2015 0.355296\n", + " 2 NaN ATA None 2016 0.355296\n", + " 3 NaN ATA None 2017 0.355296\n", + " 4 NaN ATA None 2018 0.355296\n", + " 5 NaN ATA None 2019 0.355296\n", + " 6 NaN ATA None 2020 0.355296\n", + " 7 NaN ATA None 2021 0.355296\n", + " 8 NaN ATA None 2022 0.355296\n", + " 9 NaN ATA None 2023 0.355296\n", + " 10 NaN ATA None 2024 0.355296,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 AUS 1999 2010 56760.633803\n", + " 1 NaN AUS 1999 2011 59310.402250\n", + " 2 NaN AUS 1999 2012 62169.383134\n", + " 3 NaN AUS 1999 2013 65119.609013\n", + " 4 NaN AUS 1999 2014 66592.951800\n", + " 5 NaN AUS 1999 2015 67985.359594\n", + " 6 NaN AUS 1999 2016 68524.132291\n", + " 7 NaN AUS 1999 2017 68609.471597\n", + " 8 NaN AUS 1999 2018 71337.360720\n", + " 9 NaN AUS 1999 2019 71487.514741\n", + " 10 NaN AUS 1999 2020 71595.688432\n", + " 11 NaN AUS 1999 2021 78476.557470\n", + " 12 NaN AUS 1999 2022 78479.283039\n", + " 13 NaN AUS 1999 2023 78479.283039\n", + " 14 NaN AUS 1999 2024 78479.283039,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 AUS 1992 2010 48540.865377\n", + " 1 NaN AUS 1992 2011 55042.862936\n", + " 2 NaN AUS 1992 2012 56037.827923\n", + " 3 NaN AUS 1992 2013 56204.505355\n", + " 4 NaN AUS 1992 2014 56675.813615\n", + " 5 NaN AUS 1992 2015 60652.651102\n", + " 6 NaN AUS 1992 2016 63970.018420\n", + " 7 NaN AUS 1992 2017 66579.358041\n", + " 8 NaN AUS 1992 2018 66676.062715\n", + " 9 NaN AUS 1992 2019 67633.711372\n", + " 10 NaN AUS 1992 2020 71563.304097\n", + " 11 NaN AUS 1992 2021 80137.397997\n", + " 12 NaN AUS 1992 2022 80286.371048\n", + " 13 NaN AUS 1992 2023 80286.371048\n", + " 14 NaN AUS 1992 2024 80286.371048,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 AUS 2005 2010 41268.867069\n", + " 1 NaN AUS 2005 2011 42789.291584\n", + " 2 NaN AUS 2005 2012 43029.534771\n", + " 3 NaN AUS 2005 2013 45046.299294\n", + " 4 NaN AUS 2005 2014 47819.428161\n", + " 5 NaN AUS 2005 2015 48882.390394\n", + " 6 NaN AUS 2005 2016 52624.336196\n", + " 7 NaN AUS 2005 2017 59279.429330\n", + " 8 NaN AUS 2005 2018 62176.190435\n", + " 9 NaN AUS 2005 2019 63086.780100\n", + " 10 NaN AUS 2005 2020 66785.584340\n", + " 11 NaN AUS 2005 2021 69549.401664\n", + " 12 NaN AUS 2005 2022 69647.153070\n", + " 13 NaN AUS 2005 2023 69647.153070\n", + " 14 NaN AUS 2005 2024 69647.153070,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 AUS 2008 2010 10244.065325\n", + " 1 1.0 IDN 2006 2010 6250.326588\n", + " 2 2.0 PNG 1998 2010 15572.611923\n", + " 3 NaN AUS 2008 2011 15656.436847\n", + " 4 NaN IDN 2006 2011 6250.326588\n", + " 5 NaN PNG 1998 2011 15572.611923\n", + " 6 NaN AUS 2008 2012 21628.616041\n", + " 7 NaN IDN 2006 2012 6250.326588\n", + " 8 NaN PNG 1998 2012 15572.611923\n", + " 9 NaN AUS 2008 2013 26576.961875\n", + " 10 NaN IDN 2006 2013 6250.326588\n", + " 11 NaN PNG 1998 2013 15572.611923\n", + " 12 NaN AUS 2008 2014 27313.506610\n", + " 13 NaN IDN 2006 2014 6250.326588\n", + " 14 NaN PNG 1998 2014 15572.611923\n", + " 15 NaN AUS 2008 2015 27313.506610\n", + " 16 NaN IDN 2006 2015 6250.326588\n", + " 17 NaN PNG 1998 2015 15572.611923\n", + " 18 NaN AUS 2008 2016 32166.143623\n", + " 19 NaN IDN 2006 2016 6250.326588\n", + " 20 NaN PNG 1998 2016 15572.611923\n", + " 21 NaN AUS 2008 2017 35019.811621\n", + " 22 NaN IDN 2006 2017 6250.326588\n", + " 23 NaN PNG 1998 2017 17714.768781\n", + " 24 NaN AUS 2008 2018 37143.844630\n", + " 25 NaN IDN 2006 2018 6250.326588\n", + " 26 NaN PNG 1998 2018 17714.768781\n", + " 27 NaN AUS 2008 2019 37143.844630\n", + " 28 NaN IDN 2006 2019 6250.326588\n", + " 29 NaN PNG 1998 2019 17714.768781\n", + " 30 NaN AUS 2008 2020 37143.844630\n", + " 31 NaN IDN 2006 2020 6250.326588\n", + " 32 NaN PNG 1998 2020 17714.768781\n", + " 33 NaN AUS 2008 2021 37143.938320\n", + " 34 NaN IDN 2006 2021 6250.326588\n", + " 35 NaN PNG 1998 2021 17714.768781\n", + " 36 NaN AUS 2008 2022 39698.728647\n", + " 37 NaN IDN 2006 2022 6250.326588\n", + " 38 NaN PNG 1998 2022 17714.768781\n", + " 39 NaN AUS 2008 2023 42061.815669\n", + " 40 NaN IDN 2006 2023 6250.326588\n", + " 41 NaN PNG 1998 2023 17714.768781\n", + " 42 NaN AUS 2008 2024 42061.815669\n", + " 43 NaN IDN 2006 2024 6250.326588\n", + " 44 NaN PNG 1998 2024 17714.768781,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 IDN 0 2010 6247.929808\n", + " 1 1.0 PNG 1976 2010 3005.927400\n", + " 2 NaN IDN 0 2011 6247.929808\n", + " 3 NaN PNG 1976 2011 3005.927400\n", + " 4 NaN IDN 0 2012 6247.929808\n", + " 5 NaN PNG 1976 2012 3005.927400\n", + " 6 NaN IDN 0 2013 6247.929808\n", + " 7 NaN PNG 1976 2013 3005.927400\n", + " 8 NaN IDN 0 2014 6247.929808\n", + " 9 NaN PNG 1976 2014 3005.927400\n", + " 10 NaN IDN 0 2015 6247.929808\n", + " 11 NaN PNG 1976 2015 3005.927400\n", + " 12 NaN IDN 0 2016 6247.929808\n", + " 13 NaN PNG 1976 2016 3005.927400\n", + " 14 NaN IDN 0 2017 6247.929808\n", + " 15 NaN PNG 1976 2017 3005.927400\n", + " 16 NaN IDN 0 2018 6247.929808\n", + " 17 NaN PNG 1976 2018 3005.927400\n", + " 18 NaN IDN 0 2019 6247.929808\n", + " 19 NaN PNG 1976 2019 3005.927400\n", + " 20 NaN IDN 0 2020 6247.929808\n", + " 21 NaN PNG 1976 2020 3005.927400\n", + " 22 NaN IDN 0 2021 6247.929808\n", + " 23 NaN PNG 1976 2021 3005.927400\n", + " 24 NaN IDN 0 2022 6247.929808\n", + " 25 NaN PNG 1976 2022 3005.927400\n", + " 26 NaN IDN 0 2023 6247.929808\n", + " 27 NaN PNG 1976 2023 3005.927400\n", + " 28 NaN IDN 0 2024 6247.929808\n", + " 29 NaN PNG 1976 2024 3005.927400,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 USA 0 2010 22.960559\n", + " 1 NaN USA 0 2011 22.960559\n", + " 2 NaN USA 0 2012 22.960559\n", + " 3 NaN USA 0 2013 22.960559\n", + " 4 NaN USA 0 2014 22.960559\n", + " 5 NaN USA 0 2015 22.960559\n", + " 6 NaN USA 0 2016 22.960559\n", + " 7 NaN USA 0 2017 22.960559\n", + " 8 NaN USA 0 2018 22.960559\n", + " 9 NaN USA 0 2019 22.960559\n", + " 10 NaN USA 0 2020 22.960559\n", + " 11 NaN USA 0 2021 22.960559\n", + " 12 NaN USA 0 2022 22.960559\n", + " 13 NaN USA 0 2023 22.960559\n", + " 14 NaN USA 0 2024 22.960559,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 JPN 1983 2010 3.451504\n", + " 1 1.0 USA 2000 2010 1.041859\n", + " 2 NaN JPN 1983 2011 3.456137\n", + " 3 NaN USA 2000 2011 1.041859\n", + " 4 NaN JPN 1983 2012 3.456137\n", + " 5 NaN USA 2000 2012 1.041859\n", + " 6 NaN JPN 1983 2013 3.456137\n", + " 7 NaN USA 2000 2013 1.041859\n", + " 8 NaN JPN 1983 2014 3.456137\n", + " 9 NaN USA 2000 2014 1.041859\n", + " 10 NaN JPN 1983 2015 3.456137\n", + " 11 NaN USA 2000 2015 1.041859\n", + " 12 NaN JPN 1983 2016 3.456137\n", + " 13 NaN USA 2000 2016 1.041859\n", + " 14 NaN JPN 1983 2017 3.456137\n", + " 15 NaN USA 2000 2017 1.041859\n", + " 16 NaN JPN 1983 2018 3.456137\n", + " 17 NaN USA 2000 2018 1.041859\n", + " 18 NaN JPN 1983 2019 3.456137\n", + " 19 NaN USA 2000 2019 1.041859\n", + " 20 NaN JPN 1983 2020 3.456137\n", + " 21 NaN USA 2000 2020 1.041859\n", + " 22 NaN JPN 1983 2021 3.456137\n", + " 23 NaN USA 2000 2021 1.041859\n", + " 24 NaN JPN 1983 2022 3.456137\n", + " 25 NaN USA 2000 2022 1.041859\n", + " 26 NaN JPN 1983 2023 3.456137\n", + " 27 NaN USA 2000 2023 1.041859\n", + " 28 NaN JPN 1983 2024 3.456137\n", + " 29 NaN USA 2000 2024 1.041859,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 JPN 2009 2010 23.079001\n", + " 1 NaN JPN 2009 2011 28.046599\n", + " 2 NaN JPN 2009 2012 28.046599\n", + " 3 NaN JPN 2009 2013 28.046599\n", + " 4 NaN JPN 2009 2014 28.046599\n", + " 5 NaN JPN 2009 2015 28.046599\n", + " 6 NaN JPN 2009 2016 28.046599\n", + " 7 NaN JPN 2009 2017 28.046599\n", + " 8 NaN JPN 2009 2018 28.046599\n", + " 9 NaN JPN 2009 2019 33.615978\n", + " 10 NaN JPN 2009 2020 33.615978\n", + " 11 NaN JPN 2009 2021 33.615978\n", + " 12 NaN JPN 2009 2022 33.615978\n", + " 13 NaN JPN 2009 2023 33.615978\n", + " 14 NaN JPN 2009 2024 33.615978,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 JPN 1935 2010 18894.918233\n", + " 1 1.0 RUS 1997 2010 1135.315721\n", + " 2 NaN JPN 1935 2011 19136.820078\n", + " 3 NaN RUS 1997 2011 1135.315721\n", + " 4 NaN JPN 1935 2012 19551.884371\n", + " 5 NaN RUS 1997 2012 1135.315721\n", + " 6 NaN JPN 1935 2013 20140.241484\n", + " 7 NaN RUS 1997 2013 1135.315721\n", + " 8 NaN JPN 1935 2014 20533.452206\n", + " 9 NaN RUS 1997 2014 1135.315721\n", + " 10 NaN JPN 1935 2015 20824.123034\n", + " 11 NaN RUS 1997 2015 1135.315721\n", + " 12 NaN JPN 1935 2016 21006.512739\n", + " 13 NaN RUS 1997 2016 1135.315721\n", + " 14 NaN JPN 1935 2017 22268.408619\n", + " 15 NaN RUS 1997 2017 1135.315721\n", + " 16 NaN JPN 1935 2018 23671.588260\n", + " 17 NaN RUS 1997 2018 1135.315721\n", + " 18 NaN JPN 1935 2019 23767.487300\n", + " 19 NaN RUS 1997 2019 1135.315721\n", + " 20 NaN JPN 1935 2020 23770.238612\n", + " 21 NaN RUS 1997 2020 1135.315721\n", + " 22 NaN JPN 1935 2021 23983.634040\n", + " 23 NaN RUS 1997 2021 1135.315721\n", + " 24 NaN JPN 1935 2022 23983.634040\n", + " 25 NaN RUS 1997 2022 1135.315721\n", + " 26 NaN JPN 1935 2023 23983.634040\n", + " 27 NaN RUS 1997 2023 1135.315721\n", + " 28 NaN JPN 1935 2024 23983.634040\n", + " 29 NaN RUS 1997 2024 1135.315721,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 JPN 2007 2010 509.868647\n", + " 1 1.0 RUS 1988 2010 8858.191906\n", + " 2 NaN JPN 2007 2011 510.851169\n", + " 3 NaN RUS 1988 2011 8858.191906\n", + " 4 NaN JPN 2007 2012 515.127264\n", + " 5 NaN RUS 1988 2012 8858.191906\n", + " 6 NaN JPN 2007 2013 523.932264\n", + " 7 NaN RUS 1988 2013 8858.191906\n", + " 8 NaN JPN 2007 2014 532.473654\n", + " 9 NaN RUS 1988 2014 8858.191906\n", + " 10 NaN JPN 2007 2015 626.857855\n", + " 11 NaN RUS 1988 2015 8858.191906\n", + " 12 NaN JPN 2007 2016 709.291937\n", + " 13 NaN RUS 1988 2016 8858.191906\n", + " 14 NaN JPN 2007 2017 709.291937\n", + " 15 NaN RUS 1988 2017 9011.830378\n", + " 16 NaN JPN 2007 2018 710.475568\n", + " 17 NaN RUS 1988 2018 9011.830378\n", + " 18 NaN JPN 2007 2019 710.475568\n", + " 19 NaN RUS 1988 2019 9011.830378\n", + " 20 NaN JPN 2007 2020 710.475568\n", + " 21 NaN RUS 1988 2020 9011.830378\n", + " 22 NaN JPN 2007 2021 710.475568\n", + " 23 NaN RUS 1988 2021 9011.830378\n", + " 24 NaN JPN 2007 2022 710.475568\n", + " 25 NaN RUS 1988 2022 9011.830378\n", + " 26 NaN JPN 2007 2023 710.475568\n", + " 27 NaN RUS 1988 2023 9011.830378\n", + " 28 NaN JPN 2007 2024 710.475568\n", + " 29 NaN RUS 1988 2024 9011.830378,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 RUS 1983 2010 29285.478366\n", + " 1 NaN RUS 1983 2011 29285.478366\n", + " 2 NaN RUS 1983 2012 29285.478366\n", + " 3 NaN RUS 1983 2013 29285.478366\n", + " 4 NaN RUS 1983 2014 29285.478366\n", + " 5 NaN RUS 1983 2015 29285.478366\n", + " 6 NaN RUS 1983 2016 29285.478366\n", + " 7 NaN RUS 1983 2017 29285.478366\n", + " 8 NaN RUS 1983 2018 29285.950224\n", + " 9 NaN RUS 1983 2019 29285.950224\n", + " 10 NaN RUS 1983 2020 29285.950224\n", + " 11 NaN RUS 1983 2021 29285.950224\n", + " 12 NaN RUS 1983 2022 29285.950224\n", + " 13 NaN RUS 1983 2023 29285.950224\n", + " 14 NaN RUS 1983 2024 29285.950224,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 RUS 2002 2010 108695.544174\n", + " 1 NaN RUS 2002 2011 108695.544174\n", + " 2 NaN RUS 2002 2012 108695.544174\n", + " 3 NaN RUS 2002 2013 108695.544174\n", + " 4 NaN RUS 2002 2014 108695.544174\n", + " 5 NaN RUS 2002 2015 108695.544174\n", + " 6 NaN RUS 2002 2016 108695.544174\n", + " 7 NaN RUS 2002 2017 108695.544174\n", + " 8 NaN RUS 2002 2018 108695.544174\n", + " 9 NaN RUS 2002 2019 108695.544174\n", + " 10 NaN RUS 2002 2020 108695.544174\n", + " 11 NaN RUS 2002 2021 108695.544174\n", + " 12 NaN RUS 2002 2022 108695.544174\n", + " 13 NaN RUS 2002 2023 108695.544174\n", + " 14 NaN RUS 2002 2024 108695.544174,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 RUS 1996 2010 18147.299578\n", + " 1 NaN RUS 1996 2011 18147.299578\n", + " 2 NaN RUS 1996 2012 18147.299578\n", + " 3 NaN RUS 1996 2013 18147.299578\n", + " 4 NaN RUS 1996 2014 18147.299578\n", + " 5 NaN RUS 1996 2015 18147.299578\n", + " 6 NaN RUS 1996 2016 18147.299578\n", + " 7 NaN RUS 1996 2017 18147.299578\n", + " 8 NaN RUS 1996 2018 18416.250469\n", + " 9 NaN RUS 1996 2019 18416.250469\n", + " 10 NaN RUS 1996 2020 18416.250469\n", + " 11 NaN RUS 1996 2021 18416.250469\n", + " 12 NaN RUS 1996 2022 18416.250469\n", + " 13 NaN RUS 1996 2023 18416.250469\n", + " 14 NaN RUS 1996 2024 18416.250469,\n", + " index iso_3 STATUS_YR year area\n", + " 0 NaN ATA None 2017 140.649268\n", + " 1 NaN ATA None 2018 140.649268\n", + " 2 NaN ATA None 2019 140.649268\n", + " 3 NaN ATA None 2020 140.649268\n", + " 4 NaN ATA None 2021 140.649268\n", + " 5 NaN ATA None 2022 140.649268\n", + " 6 NaN ATA None 2023 140.649268\n", + " 7 NaN ATA None 2024 140.649268,\n", + " index iso_3 STATUS_YR year area\n", + " 0 NaN ATA None 2017 6.720745e-12\n", + " 1 NaN ATA None 2018 6.720745e-12\n", + " 2 NaN ATA None 2019 6.720745e-12\n", + " 3 NaN ATA None 2020 6.720745e-12\n", + " 4 NaN ATA None 2021 6.720745e-12\n", + " 5 NaN ATA None 2022 6.720745e-12\n", + " 6 NaN ATA None 2023 6.720745e-12\n", + " 7 NaN ATA None 2024 6.720745e-12,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 AUS 2000 2010 123.44873\n", + " 1 NaN AUS 2000 2011 123.44873\n", + " 2 NaN AUS 2000 2012 123.44873\n", + " 3 NaN AUS 2000 2013 123.44873\n", + " 4 NaN AUS 2000 2014 123.44873\n", + " 5 NaN AUS 2000 2015 123.44873\n", + " 6 NaN AUS 2000 2016 123.44873\n", + " 7 NaN AUS 2000 2017 123.44873\n", + " 8 NaN AUS 2000 2018 123.44873\n", + " 9 NaN AUS 2000 2019 123.44873\n", + " 10 NaN AUS 2000 2020 123.44873\n", + " 11 NaN AUS 2000 2021 123.44873\n", + " 12 NaN AUS 2000 2022 123.44873\n", + " 13 NaN AUS 2000 2023 123.44873\n", + " 14 NaN AUS 2000 2024 123.44873,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 AUS 2004 2010 269.650069\n", + " 1 NaN AUS 2004 2011 290.894514\n", + " 2 NaN AUS 2004 2012 290.894514\n", + " 3 NaN AUS 2004 2013 290.894514\n", + " 4 NaN AUS 2004 2014 307.719670\n", + " 5 NaN AUS 2004 2015 330.746791\n", + " 6 NaN AUS 2004 2016 353.352826\n", + " 7 NaN AUS 2004 2017 353.352826\n", + " 8 NaN AUS 2004 2018 475.563277\n", + " 9 NaN AUS 2004 2019 475.563277\n", + " 10 NaN AUS 2004 2020 475.563277\n", + " 11 NaN AUS 2004 2021 566.256145\n", + " 12 NaN AUS 2004 2022 566.256145\n", + " 13 NaN AUS 2004 2023 566.256145\n", + " 14 NaN AUS 2004 2024 566.256145,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 AUS 2001 2010 26832.875684\n", + " 1 NaN AUS 2001 2011 27939.771119\n", + " 2 NaN AUS 2001 2012 28360.172100\n", + " 3 NaN AUS 2001 2013 28580.766564\n", + " 4 NaN AUS 2001 2014 29061.187066\n", + " 5 NaN AUS 2001 2015 29476.079061\n", + " 6 NaN AUS 2001 2016 32424.653934\n", + " 7 NaN AUS 2001 2017 32628.543399\n", + " 8 NaN AUS 2001 2018 34081.352236\n", + " 9 NaN AUS 2001 2019 35870.539294\n", + " 10 NaN AUS 2001 2020 36943.504773\n", + " 11 NaN AUS 2001 2021 40420.801396\n", + " 12 NaN AUS 2001 2022 40791.615915\n", + " 13 NaN AUS 2001 2023 40791.615915\n", + " 14 NaN AUS 2001 2024 40791.615915,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 AUS 1999 2010 3632.830773\n", + " 1 NaN AUS 1999 2011 3674.025933\n", + " 2 NaN AUS 1999 2012 3676.170427\n", + " 3 NaN AUS 1999 2013 3789.743653\n", + " 4 NaN AUS 1999 2014 4239.628923\n", + " 5 NaN AUS 1999 2015 4593.036664\n", + " 6 NaN AUS 1999 2016 5109.525836\n", + " 7 NaN AUS 1999 2017 5177.536618\n", + " 8 NaN AUS 1999 2018 5178.999538\n", + " 9 NaN AUS 1999 2019 5326.614837\n", + " 10 NaN AUS 1999 2020 5389.049250\n", + " 11 NaN AUS 1999 2021 5565.241606\n", + " 12 NaN AUS 1999 2022 5565.241606\n", + " 13 NaN AUS 1999 2023 5565.241606\n", + " 14 NaN AUS 1999 2024 5565.241606,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 PNG 1977 2010 424.916139\n", + " 1 1.0 SLB 2002 2010 325.918356\n", + " 2 NaN PNG 1977 2011 424.916139\n", + " 3 NaN SLB 2002 2011 327.856125\n", + " 4 NaN PNG 1977 2012 424.916139\n", + " 5 NaN SLB 2002 2012 327.856125\n", + " 6 NaN PNG 1977 2013 424.916139\n", + " 7 NaN SLB 2002 2013 327.856125\n", + " 8 NaN PNG 1977 2014 424.916139\n", + " 9 NaN SLB 2002 2014 327.856125\n", + " 10 NaN PNG 1977 2015 424.916139\n", + " 11 NaN SLB 2002 2015 327.856125\n", + " 12 NaN PNG 1977 2016 424.916139\n", + " 13 NaN SLB 2002 2016 327.856125\n", + " 14 NaN PNG 1977 2017 424.916139\n", + " 15 NaN SLB 2002 2017 327.856125\n", + " 16 NaN PNG 1977 2018 424.916139\n", + " 17 NaN SLB 2002 2018 327.856125\n", + " 18 NaN PNG 1977 2019 424.916139\n", + " 19 NaN SLB 2002 2019 343.262057\n", + " 20 NaN PNG 1977 2020 424.916139\n", + " 21 NaN SLB 2002 2020 343.262057\n", + " 22 NaN PNG 1977 2021 424.916139\n", + " 23 NaN SLB 2002 2021 392.014425\n", + " 24 NaN PNG 1977 2022 424.916139\n", + " 25 NaN SLB 2002 2022 397.764390\n", + " 26 NaN PNG 1977 2023 424.916139\n", + " 27 NaN SLB 2002 2023 397.764390\n", + " 28 NaN PNG 1977 2024 424.916139\n", + " 29 NaN SLB 2002 2024 397.764390,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 PNG 1976 2010 260.348914\n", + " 1 NaN PNG 1976 2011 260.348914\n", + " 2 NaN PNG 1976 2012 260.348914\n", + " 3 NaN PNG 1976 2013 260.348914\n", + " 4 NaN PNG 1976 2014 260.348914\n", + " 5 NaN PNG 1976 2015 260.348914\n", + " 6 NaN PNG 1976 2016 260.348914\n", + " 7 NaN PNG 1976 2017 260.348914\n", + " 8 NaN PNG 1976 2018 260.348914\n", + " 9 NaN PNG 1976 2019 260.348914\n", + " 10 NaN PNG 1976 2020 260.348914\n", + " 11 NaN PNG 1976 2021 260.348914\n", + " 12 NaN PNG 1976 2022 260.348914\n", + " 13 NaN PNG 1976 2023 260.348914\n", + " 14 NaN PNG 1976 2024 260.348914,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 JPN 2009 2010 1.436843\n", + " 1 NaN JPN 2009 2011 1.436843\n", + " 2 NaN JPN 2009 2012 1.436843\n", + " 3 NaN JPN 2009 2013 1.436843\n", + " 4 NaN JPN 2009 2014 1.436843\n", + " 5 NaN JPN 2009 2015 1.436843\n", + " 6 NaN JPN 2009 2016 1.436843\n", + " 7 NaN JPN 2009 2017 1.436843\n", + " 8 NaN JPN 2009 2018 1.436843\n", + " 9 NaN JPN 2009 2019 1.436843\n", + " 10 NaN JPN 2009 2020 1.436843\n", + " 11 NaN JPN 2009 2021 1.436843\n", + " 12 NaN JPN 2009 2022 1.436843\n", + " 13 NaN JPN 2009 2023 1.436843\n", + " 14 NaN JPN 2009 2024 1.436843,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 RUS 1987 2010 18521.652422\n", + " 1 NaN RUS 1987 2011 18521.652422\n", + " 2 NaN RUS 1987 2012 18521.652422\n", + " 3 NaN RUS 1987 2013 18521.652422\n", + " 4 NaN RUS 1987 2014 18521.652422\n", + " 5 NaN RUS 1987 2015 18521.652422\n", + " 6 NaN RUS 1987 2016 18521.652422\n", + " 7 NaN RUS 1987 2017 18521.652422\n", + " 8 NaN RUS 1987 2018 18521.652422\n", + " 9 NaN RUS 1987 2019 18521.652422\n", + " 10 NaN RUS 1987 2020 18521.652422\n", + " 11 NaN RUS 1987 2021 18521.652422\n", + " 12 NaN RUS 1987 2022 18521.652422\n", + " 13 NaN RUS 1987 2023 18521.652422\n", + " 14 NaN RUS 1987 2024 18521.652422,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 RUS 1996 2010 27391.891049\n", + " 1 NaN RUS 1996 2011 27391.891049\n", + " 2 NaN RUS 1996 2012 27397.964129\n", + " 3 NaN RUS 1996 2013 27397.964129\n", + " 4 NaN RUS 1996 2014 27397.964129\n", + " 5 NaN RUS 1996 2015 27397.964129\n", + " 6 NaN RUS 1996 2016 27397.964129\n", + " 7 NaN RUS 1996 2017 27397.964129\n", + " 8 NaN RUS 1996 2018 27397.964129\n", + " 9 NaN RUS 1996 2019 27398.765917\n", + " 10 NaN RUS 1996 2020 27398.765917\n", + " 11 NaN RUS 1996 2021 27398.765917\n", + " 12 NaN RUS 1996 2022 27398.765917\n", + " 13 NaN RUS 1996 2023 27398.765917\n", + " 14 NaN RUS 1996 2024 27398.765917,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 RUS 2002 2010 66837.806815\n", + " 1 NaN RUS 2002 2011 66837.806815\n", + " 2 NaN RUS 2002 2012 66837.806815\n", + " 3 NaN RUS 2002 2013 66837.806815\n", + " 4 NaN RUS 2002 2014 66837.806815\n", + " 5 NaN RUS 2002 2015 66837.806815\n", + " 6 NaN RUS 2002 2016 66837.806815\n", + " 7 NaN RUS 2002 2017 66837.806815\n", + " 8 NaN RUS 2002 2018 66837.806815\n", + " 9 NaN RUS 2002 2019 66837.806815\n", + " 10 NaN RUS 2002 2020 66838.403210\n", + " 11 NaN RUS 2002 2021 66838.403210\n", + " 12 NaN RUS 2002 2022 66838.403210\n", + " 13 NaN RUS 2002 2023 66838.403210\n", + " 14 NaN RUS 2002 2024 66838.403210,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 RUS 1996 2010 755.917572\n", + " 1 NaN RUS 1996 2011 755.917572\n", + " 2 NaN RUS 1996 2012 755.917572\n", + " 3 NaN RUS 1996 2013 755.917572\n", + " 4 NaN RUS 1996 2014 755.917572\n", + " 5 NaN RUS 1996 2015 755.917572\n", + " 6 NaN RUS 1996 2016 755.917572\n", + " 7 NaN RUS 1996 2017 755.917572\n", + " 8 NaN RUS 1996 2018 779.516505\n", + " 9 NaN RUS 1996 2019 779.516505\n", + " 10 NaN RUS 1996 2020 779.516505\n", + " 11 NaN RUS 1996 2021 779.516505\n", + " 12 NaN RUS 1996 2022 779.516505\n", + " 13 NaN RUS 1996 2023 779.516505\n", + " 14 NaN RUS 1996 2024 779.516505,\n", + " index iso_3 STATUS_YR year area\n", + " 0 NaN ATA None 2017 4020.571082\n", + " 1 NaN ATA None 2018 4020.571082\n", + " 2 NaN ATA None 2019 4020.571082\n", + " 3 NaN ATA None 2020 4020.571082\n", + " 4 NaN ATA None 2021 4020.571082\n", + " 5 NaN ATA None 2022 4020.571082\n", + " 6 NaN ATA None 2023 4020.571082\n", + " 7 NaN ATA None 2024 4020.571082,\n", + " index iso_3 STATUS_YR year area\n", + " 0 NaN ATA None 2017 2551.699309\n", + " 1 NaN ATA None 2018 2551.699309\n", + " 2 NaN ATA None 2019 2551.699309\n", + " 3 NaN ATA None 2020 2551.699309\n", + " 4 NaN ATA None 2021 2551.699309\n", + " 5 NaN ATA None 2022 2551.699309\n", + " 6 NaN ATA None 2023 2551.699309\n", + " 7 NaN ATA None 2024 2551.699309,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 NZL 1954 2010 11929.864672\n", + " 1 NaN NZL 1954 2011 11944.078209\n", + " 2 NaN NZL 1954 2012 11946.696258\n", + " 3 NaN NZL 1954 2013 11982.167637\n", + " 4 NaN NZL 1954 2014 11985.247726\n", + " 5 NaN NZL 1954 2015 11986.740184\n", + " 6 NaN NZL 1954 2016 11991.175583\n", + " 7 NaN NZL 1954 2017 12011.686909\n", + " 8 NaN NZL 1954 2018 12017.709874\n", + " 9 NaN NZL 1954 2019 12031.241735\n", + " 10 NaN NZL 1954 2020 12031.397692\n", + " 11 NaN NZL 1954 2021 12031.397692\n", + " 12 NaN NZL 1954 2022 12031.397692\n", + " 13 NaN NZL 1954 2023 12031.397692\n", + " 14 NaN NZL 1954 2024 12031.397692,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 NZL 1978 2010 21213.139246\n", + " 1 NaN NZL 1978 2011 21254.388354\n", + " 2 NaN NZL 1978 2012 21293.324251\n", + " 3 NaN NZL 1978 2013 21372.971529\n", + " 4 NaN NZL 1978 2014 21374.955864\n", + " 5 NaN NZL 1978 2015 21912.360108\n", + " 6 NaN NZL 1978 2016 21912.812550\n", + " 7 NaN NZL 1978 2017 21921.775953\n", + " 8 NaN NZL 1978 2018 22062.667429\n", + " 9 NaN NZL 1978 2019 22064.814191\n", + " 10 NaN NZL 1978 2020 22065.180300\n", + " 11 NaN NZL 1978 2021 22065.180300\n", + " 12 NaN NZL 1978 2022 22065.180300\n", + " 13 NaN NZL 1978 2023 22065.180300\n", + " 14 NaN NZL 1978 2024 22065.180300,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 AUS 1996 2010 6.901103\n", + " 1 NaN AUS 1996 2011 6.901103\n", + " 2 NaN AUS 1996 2012 6.901103\n", + " 3 NaN AUS 1996 2013 6.901103\n", + " 4 NaN AUS 1996 2014 6.901103\n", + " 5 NaN AUS 1996 2015 6.901103\n", + " 6 NaN AUS 1996 2016 6.901103\n", + " 7 NaN AUS 1996 2017 6.901103\n", + " 8 NaN AUS 1996 2018 6.901103\n", + " 9 NaN AUS 1996 2019 6.901103\n", + " 10 NaN AUS 1996 2020 6.901103\n", + " 11 NaN AUS 1996 2021 6.901103\n", + " 12 NaN AUS 1996 2022 6.901103\n", + " 13 NaN AUS 1996 2023 6.901103\n", + " 14 NaN AUS 1996 2024 6.901103,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 FRA 2009 2010 2422.666191\n", + " 1 1.0 VUT 1995 2010 402.868009\n", + " 2 NaN FRA 2009 2011 2422.666191\n", + " 3 NaN VUT 1995 2011 402.868009\n", + " 4 NaN FRA 2009 2012 2511.165081\n", + " 5 NaN VUT 1995 2012 402.868009\n", + " 6 NaN FRA 2009 2013 2511.165081\n", + " 7 NaN VUT 1995 2013 402.868009\n", + " 8 NaN FRA 2009 2014 2691.432523\n", + " 9 NaN VUT 1995 2014 402.868009\n", + " 10 NaN FRA 2009 2015 2691.432523\n", + " 11 NaN VUT 1995 2015 402.868009\n", + " 12 NaN FRA 2009 2016 2691.432523\n", + " 13 NaN VUT 1995 2016 402.868009\n", + " 14 NaN FRA 2009 2017 2691.432523\n", + " 15 NaN VUT 1995 2017 402.868009\n", + " 16 NaN FRA 2009 2018 2691.432523\n", + " 17 NaN VUT 1995 2018 402.868009\n", + " 18 NaN FRA 2009 2019 3513.757641\n", + " 19 NaN VUT 1995 2019 402.868009\n", + " 20 NaN FRA 2009 2020 3513.757641\n", + " 21 NaN VUT 1995 2020 402.868009\n", + " 22 NaN FRA 2009 2021 3513.757641\n", + " 23 NaN VUT 1995 2021 402.868009\n", + " 24 NaN FRA 2009 2022 3513.757641\n", + " 25 NaN VUT 1995 2022 402.868009\n", + " 26 NaN FRA 2009 2023 3513.757641\n", + " 27 NaN VUT 1995 2023 402.868009\n", + " 28 NaN FRA 2009 2024 3513.757641\n", + " 29 NaN VUT 1995 2024 402.868009,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 SLB 1998 2010 347.614259\n", + " 1 1.0 VUT 1994 2010 53.441800\n", + " 2 NaN SLB 1998 2011 347.614259\n", + " 3 NaN VUT 1994 2011 53.441800\n", + " 4 NaN SLB 1998 2012 347.614259\n", + " 5 NaN VUT 1994 2012 53.441800\n", + " 6 NaN SLB 1998 2013 347.614259\n", + " 7 NaN VUT 1994 2013 53.441800\n", + " 8 NaN SLB 1998 2014 347.614259\n", + " 9 NaN VUT 1994 2014 53.441800\n", + " 10 NaN SLB 1998 2015 347.614259\n", + " 11 NaN VUT 1994 2015 53.441800\n", + " 12 NaN SLB 1998 2016 347.614259\n", + " 13 NaN VUT 1994 2016 53.441800\n", + " 14 NaN SLB 1998 2017 347.614259\n", + " 15 NaN VUT 1994 2017 53.441800\n", + " 16 NaN SLB 1998 2018 347.614259\n", + " 17 NaN VUT 1994 2018 53.441800\n", + " 18 NaN SLB 1998 2019 347.614259\n", + " 19 NaN VUT 1994 2019 53.441800\n", + " 20 NaN SLB 1998 2020 347.614259\n", + " 21 NaN VUT 1994 2020 53.441800\n", + " 22 NaN SLB 1998 2021 347.614259\n", + " 23 NaN VUT 1994 2021 53.441800\n", + " 24 NaN SLB 1998 2022 347.614259\n", + " 25 NaN VUT 1994 2022 53.441800\n", + " 26 NaN SLB 1998 2023 347.614259\n", + " 27 NaN VUT 1994 2023 53.441800\n", + " 28 NaN SLB 1998 2024 347.614259\n", + " 29 NaN VUT 1994 2024 53.441800,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 MHL 0 2010 0.085735\n", + " 1 NaN MHL 0 2011 0.085735\n", + " 2 NaN MHL 0 2012 4.935246\n", + " 3 NaN MHL 0 2013 4.935246\n", + " 4 NaN MHL 0 2014 4.935246\n", + " 5 NaN MHL 0 2015 4.935246\n", + " 6 NaN MHL 0 2016 4.935246\n", + " 7 NaN MHL 0 2017 4.935246\n", + " 8 NaN MHL 0 2018 4.935246\n", + " 9 NaN MHL 0 2019 4.935246\n", + " 10 NaN MHL 0 2020 4.935246\n", + " 11 NaN MHL 0 2021 4.935246\n", + " 12 NaN MHL 0 2022 4.935246\n", + " 13 NaN MHL 0 2023 4.935246\n", + " 14 NaN MHL 0 2024 4.935246,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 RUS 1996 2010 1674.279961\n", + " 1 NaN RUS 1996 2011 1674.279961\n", + " 2 NaN RUS 1996 2012 1674.279961\n", + " 3 NaN RUS 1996 2013 1674.279961\n", + " 4 NaN RUS 1996 2014 1674.279961\n", + " 5 NaN RUS 1996 2015 1674.279961\n", + " 6 NaN RUS 1996 2016 1674.279961\n", + " 7 NaN RUS 1996 2017 1674.279961\n", + " 8 NaN RUS 1996 2018 1674.279961\n", + " 9 NaN RUS 1996 2019 1674.279961\n", + " 10 NaN RUS 1996 2020 1674.279961\n", + " 11 NaN RUS 1996 2021 1674.279961\n", + " 12 NaN RUS 1996 2022 1674.279961\n", + " 13 NaN RUS 1996 2023 1674.279961\n", + " 14 NaN RUS 1996 2024 1674.279961,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 RUS 1996 2010 37836.220963\n", + " 1 NaN RUS 1996 2011 37836.220963\n", + " 2 NaN RUS 1996 2012 37836.220963\n", + " 3 NaN RUS 1996 2013 37836.220963\n", + " 4 NaN RUS 1996 2014 37836.220963\n", + " 5 NaN RUS 1996 2015 37836.220963\n", + " 6 NaN RUS 1996 2016 37836.220963\n", + " 7 NaN RUS 1996 2017 37836.220963\n", + " 8 NaN RUS 1996 2018 37836.220963\n", + " 9 NaN RUS 1996 2019 37836.348432\n", + " 10 NaN RUS 1996 2020 37836.348432\n", + " 11 NaN RUS 1996 2021 37836.348432\n", + " 12 NaN RUS 1996 2022 37836.348432\n", + " 13 NaN RUS 1996 2023 37836.348432\n", + " 14 NaN RUS 1996 2024 37836.348432,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 RUS 1983 2010 3723.799612\n", + " 1 NaN RUS 1983 2011 3723.799612\n", + " 2 NaN RUS 1983 2012 3723.799612\n", + " 3 NaN RUS 1983 2013 3723.799612\n", + " 4 NaN RUS 1983 2014 3723.799612\n", + " 5 NaN RUS 1983 2015 3723.799612\n", + " 6 NaN RUS 1983 2016 3723.799612\n", + " 7 NaN RUS 1983 2017 3723.799612\n", + " 8 NaN RUS 1983 2018 3723.799612\n", + " 9 NaN RUS 1983 2019 3723.799612\n", + " 10 NaN RUS 1983 2020 3858.612339\n", + " 11 NaN RUS 1983 2021 3858.612339\n", + " 12 NaN RUS 1983 2022 3858.612339\n", + " 13 NaN RUS 1983 2023 3858.612339\n", + " 14 NaN RUS 1983 2024 3858.612339,\n", + " index iso_3 STATUS_YR year area\n", + " 0 NaN ATA None 2017 3.240399\n", + " 1 NaN ATA None 2018 3.240399\n", + " 2 NaN ATA None 2019 3.240399\n", + " 3 NaN ATA None 2020 3.240399\n", + " 4 NaN ATA None 2021 3.240399\n", + " 5 NaN ATA None 2022 3.240399\n", + " 6 NaN ATA None 2023 3.240399\n", + " 7 NaN ATA None 2024 3.240399,\n", + " index iso_3 STATUS_YR year area\n", + " 0 NaN ATA None 2017 105.031405\n", + " 1 NaN ATA None 2018 105.031405\n", + " 2 NaN ATA None 2019 105.031405\n", + " 3 NaN ATA None 2020 105.031405\n", + " 4 NaN ATA None 2021 105.031405\n", + " 5 NaN ATA None 2022 105.031405\n", + " 6 NaN ATA None 2023 105.031405\n", + " 7 NaN ATA None 2024 105.031405,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 NZL 1961 2010 229.909040\n", + " 1 NaN NZL 1961 2011 230.627407\n", + " 2 NaN NZL 1961 2012 230.841033\n", + " 3 NaN NZL 1961 2013 230.937009\n", + " 4 NaN NZL 1961 2014 233.622078\n", + " 5 NaN NZL 1961 2015 233.691310\n", + " 6 NaN NZL 1961 2016 235.607023\n", + " 7 NaN NZL 1961 2017 235.740879\n", + " 8 NaN NZL 1961 2018 235.827818\n", + " 9 NaN NZL 1961 2019 235.985887\n", + " 10 NaN NZL 1961 2020 235.985887\n", + " 11 NaN NZL 1961 2021 235.985887\n", + " 12 NaN NZL 1961 2022 235.985887\n", + " 13 NaN NZL 1961 2023 235.985887\n", + " 14 NaN NZL 1961 2024 235.985887,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 NZL 1989 2010 34221.938171\n", + " 1 NaN NZL 1989 2011 34317.349803\n", + " 2 NaN NZL 1989 2012 34356.351286\n", + " 3 NaN NZL 1989 2013 34376.968862\n", + " 4 NaN NZL 1989 2014 34474.258149\n", + " 5 NaN NZL 1989 2015 34639.578157\n", + " 6 NaN NZL 1989 2016 34675.855405\n", + " 7 NaN NZL 1989 2017 34776.471971\n", + " 8 NaN NZL 1989 2018 34825.385277\n", + " 9 NaN NZL 1989 2019 34827.541784\n", + " 10 NaN NZL 1989 2020 34827.855471\n", + " 11 NaN NZL 1989 2021 34827.855471\n", + " 12 NaN NZL 1989 2022 34827.855471\n", + " 13 NaN NZL 1989 2023 34827.855471\n", + " 14 NaN NZL 1989 2024 34827.855471,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 NZL 2004 2010 3762.053163\n", + " 1 NaN NZL 2004 2011 3768.581078\n", + " 2 NaN NZL 2004 2012 3782.502275\n", + " 3 NaN NZL 2004 2013 3798.571534\n", + " 4 NaN NZL 2004 2014 3822.229804\n", + " 5 NaN NZL 2004 2015 3845.510703\n", + " 6 NaN NZL 2004 2016 3848.049509\n", + " 7 NaN NZL 2004 2017 3855.346294\n", + " 8 NaN NZL 2004 2018 3859.749216\n", + " 9 NaN NZL 2004 2019 3868.286138\n", + " 10 NaN NZL 2004 2020 3868.829482\n", + " 11 NaN NZL 2004 2021 3868.829482\n", + " 12 NaN NZL 2004 2022 3868.829482\n", + " 13 NaN NZL 2004 2023 3868.829482\n", + " 14 NaN NZL 2004 2024 3868.829482,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 NZL 1987 2010 1792.841469\n", + " 1 NaN NZL 1987 2011 1795.386144\n", + " 2 NaN NZL 1987 2012 1799.883349\n", + " 3 NaN NZL 1987 2013 1802.806730\n", + " 4 NaN NZL 1987 2014 1804.135323\n", + " 5 NaN NZL 1987 2015 1804.351505\n", + " 6 NaN NZL 1987 2016 1805.762571\n", + " 7 NaN NZL 1987 2017 1809.872855\n", + " 8 NaN NZL 1987 2018 1810.105042\n", + " 9 NaN NZL 1987 2019 1810.873635\n", + " 10 NaN NZL 1987 2020 1810.879637\n", + " 11 NaN NZL 1987 2021 1810.879637\n", + " 12 NaN NZL 1987 2022 1810.879637\n", + " 13 NaN NZL 1987 2023 1810.879637\n", + " 14 NaN NZL 1987 2024 1810.879637,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 NZL 1976 2010 11252.256730\n", + " 1 NaN NZL 1976 2011 11262.201556\n", + " 2 NaN NZL 1976 2012 11830.638326\n", + " 3 NaN NZL 1976 2013 11837.901574\n", + " 4 NaN NZL 1976 2014 13704.416026\n", + " 5 NaN NZL 1976 2015 13827.969391\n", + " 6 NaN NZL 1976 2016 13839.207868\n", + " 7 NaN NZL 1976 2017 13845.458649\n", + " 8 NaN NZL 1976 2018 13849.503722\n", + " 9 NaN NZL 1976 2019 13864.549294\n", + " 10 NaN NZL 1976 2020 13864.633076\n", + " 11 NaN NZL 1976 2021 13864.633076\n", + " 12 NaN NZL 1976 2022 13864.633076\n", + " 13 NaN NZL 1976 2023 13864.633076\n", + " 14 NaN NZL 1976 2024 13864.633076,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 NZL 2001 2010 760.949814\n", + " 1 NaN NZL 2001 2011 762.327052\n", + " 2 NaN NZL 2001 2012 762.868834\n", + " 3 NaN NZL 2001 2013 763.489051\n", + " 4 NaN NZL 2001 2014 766.352066\n", + " 5 NaN NZL 2001 2015 822.068145\n", + " 6 NaN NZL 2001 2016 822.068145\n", + " 7 NaN NZL 2001 2017 822.747391\n", + " 8 NaN NZL 2001 2018 823.644393\n", + " 9 NaN NZL 2001 2019 824.503231\n", + " 10 NaN NZL 2001 2020 824.503231\n", + " 11 NaN NZL 2001 2021 824.503231\n", + " 12 NaN NZL 2001 2022 824.503231\n", + " 13 NaN NZL 2001 2023 824.503231\n", + " 14 NaN NZL 2001 2024 824.503231,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 FJI 2006 2010 431.262651\n", + " 1 NaN FJI 2006 2011 431.262651\n", + " 2 NaN FJI 2006 2012 431.262651\n", + " 3 NaN FJI 2006 2013 431.262651\n", + " 4 NaN FJI 2006 2014 431.262651\n", + " 5 NaN FJI 2006 2015 431.262651\n", + " 6 NaN FJI 2006 2016 431.262651\n", + " 7 NaN FJI 2006 2017 431.262651\n", + " 8 NaN FJI 2006 2018 431.262651\n", + " 9 NaN FJI 2006 2019 431.262651\n", + " 10 NaN FJI 2006 2020 431.262651\n", + " 11 NaN FJI 2006 2021 431.262651\n", + " 12 NaN FJI 2006 2022 431.262651\n", + " 13 NaN FJI 2006 2023 431.262651\n", + " 14 NaN FJI 2006 2024 431.262651,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 TUV 2003 2010 0.22623\n", + " 1 NaN TUV 2003 2011 0.22623\n", + " 2 NaN TUV 2003 2012 0.22623\n", + " 3 NaN TUV 2003 2013 0.22623\n", + " 4 NaN TUV 2003 2014 0.22623\n", + " 5 NaN TUV 2003 2015 0.22623\n", + " 6 NaN TUV 2003 2016 0.22623\n", + " 7 NaN TUV 2003 2017 0.22623\n", + " 8 NaN TUV 2003 2018 0.22623\n", + " 9 NaN TUV 2003 2019 0.22623\n", + " 10 NaN TUV 2003 2020 0.22623\n", + " 11 NaN TUV 2003 2021 0.22623\n", + " 12 NaN TUV 2003 2022 0.22623\n", + " 13 NaN TUV 2003 2023 0.22623\n", + " 14 NaN TUV 2003 2024 0.22623,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 USA 1985 2010 2055.059082\n", + " 1 NaN USA 1985 2011 2055.059082\n", + " 2 NaN USA 1985 2012 2055.059082\n", + " 3 NaN USA 1985 2013 2055.059082\n", + " 4 NaN USA 1985 2014 2055.059082\n", + " 5 NaN USA 1985 2015 2055.059082\n", + " 6 NaN USA 1985 2016 2055.059082\n", + " 7 NaN USA 1985 2017 2055.059082\n", + " 8 NaN USA 1985 2018 2055.059082\n", + " 9 NaN USA 1985 2019 2055.059519\n", + " 10 NaN USA 1985 2020 2055.059519\n", + " 11 NaN USA 1985 2021 2055.059519\n", + " 12 NaN USA 1985 2022 2055.059519\n", + " 13 NaN USA 1985 2023 2055.059519\n", + " 14 NaN USA 1985 2024 2055.059519,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 RUS 1971 2010 2840.773979\n", + " 1 NaN RUS 1971 2011 2840.773979\n", + " 2 NaN RUS 1971 2012 2840.773979\n", + " 3 NaN RUS 1971 2013 2840.773979\n", + " 4 NaN RUS 1971 2014 2840.773979\n", + " 5 NaN RUS 1971 2015 2840.773979\n", + " 6 NaN RUS 1971 2016 2840.773979\n", + " 7 NaN RUS 1971 2017 2840.773979\n", + " 8 NaN RUS 1971 2018 2840.773979\n", + " 9 NaN RUS 1971 2019 2840.773979\n", + " 10 NaN RUS 1971 2020 2840.773979\n", + " 11 NaN RUS 1971 2021 2840.773979\n", + " 12 NaN RUS 1971 2022 2840.773979\n", + " 13 NaN RUS 1971 2023 2840.773979\n", + " 14 NaN RUS 1971 2024 2840.773979,\n", + " index iso_3 STATUS_YR year area\n", + " 0 0.0 RUS 1975 2010 9612.744312\n", + " 1 NaN RUS 1975 2011 9612.744312\n", + " 2 NaN RUS 1975 2012 9612.744312\n", + " 3 NaN RUS 1975 2013 9612.744312\n", + " 4 NaN RUS 1975 2014 9612.744312\n", + " 5 NaN RUS 1975 2015 12872.077226\n", + " 6 NaN RUS 1975 2016 12872.077226\n", + " 7 NaN RUS 1975 2017 12872.077226\n", + " 8 NaN RUS 1975 2018 12872.077226\n", + " 9 NaN RUS 1975 2019 12872.077226\n", + " 10 NaN RUS 1975 2020 12872.077226\n", + " 11 NaN RUS 1975 2021 12872.077226\n", + " 12 NaN RUS 1975 2022 12872.077226\n", + " 13 NaN RUS 1975 2023 12872.077226\n", + " 14 NaN RUS 1975 2024 12872.077226]" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data" + ] + }, { "cell_type": "code", "execution_count": 107, @@ -317,7 +10709,7 @@ " .agg({\"PA_DEF\": \"count\"})\n", " .rename(columns={\"PA_DEF\": \"count\"})\n", " .reset_index()\n", - " .pivot(index=\"iso_3\", columns=\"PA_DEF\", values=\"count\")\n", + " .pivot(index=\"GID_0\", columns=\"PA_DEF\", values=\"count\")\n", " .fillna(0)\n", " .reset_index()\n", " .rename(columns={\"0\": \"oecm\", \"1\": \"pa\"})\n", @@ -521,7 +10913,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.5" + "version": "3.12.6" } }, "nbformat": 4, diff --git a/data/src/pipelines/processors.py b/data/src/pipelines/processors.py index 2d1db64e..9b7fff42 100644 --- a/data/src/pipelines/processors.py +++ b/data/src/pipelines/processors.py @@ -243,11 +243,8 @@ def clean_geometries(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: def collection_to_multipolygon(geometry_collection): """Convert collection of polygons to multipolygon.""" - print(type(geometry_collection.geoms)) geom_list = [ - geom - for geom in geometry_collection.geoms - if geom.geom_type == "Polygon" or geom.geom_type == "MultiPolygon" + geom for geom in geometry_collection.geoms if geom.geom_type in ("Polygon", "MultiPolygon") ] return unary_union(geom_list) From 40bc2612f8e3ae86621ec8cc63002b47a3fb222b Mon Sep 17 00:00:00 2001 From: sofia Date: Tue, 1 Oct 2024 18:12:28 +0200 Subject: [PATCH 09/16] processing of terrestrial coverage data --- data/notebooks/pipes_mock/intermediate.ipynb | 239 +- data/notebooks/pipes_mock/locations.ipynb | 112 +- data/notebooks/pipes_mock/precalc_sofia.ipynb | 2927 +++++++++++++---- 3 files changed, 2445 insertions(+), 833 deletions(-) diff --git a/data/notebooks/pipes_mock/intermediate.ipynb b/data/notebooks/pipes_mock/intermediate.ipynb index acb5bd28..101d0277 100644 --- a/data/notebooks/pipes_mock/intermediate.ipynb +++ b/data/notebooks/pipes_mock/intermediate.ipynb @@ -21,7 +21,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -63,7 +63,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -311,7 +311,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -323,7 +323,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -364,7 +364,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -372,6 +372,7 @@ "output_type": "stream", "text": [ "Processing: /home/sofia/dev/skytruth-30x30/data/data/gadm/raw/temp_preprocess/gadm_410-levels\n", + "Removed existing folder: /home/sofia/dev/skytruth-30x30/data/data/gadm/raw/temp_preprocess/gadm_410-levels\n", "Unpacked /home/sofia/dev/skytruth-30x30/data/data/gadm/raw/gadm_410-levels.zip to /home/sofia/dev/skytruth-30x30/data/data/gadm/raw/temp_preprocess/gadm_410-levels\n" ] } @@ -394,7 +395,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -424,87 +425,10 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 25, "metadata": {}, "outputs": [], "source": [ - "dependency_to_parent = {\n", - " \"ABW\": (\"NLD\", \"Netherlands\"),\n", - " \"AIA\": (\"GBR\", \"United Kingdom\"),\n", - " \"ALA\": (\"FIN\", \"Finland\"),\n", - " \"ASM\": (\"USA\", \"United States\"),\n", - " \"ATF\": (\"FRA\", \"France\"),\n", - " \"BES\": (\"NLD\", \"Netherlands\"),\n", - " \"BLM\": (\"FRA\", \"France\"),\n", - " \"BMU\": (\"GBR\", \"United Kingdom\"),\n", - " \"BVT\": (\"NOR\", \"Norway\"),\n", - " \"CCK\": (\"AUS\", \"Australia\"),\n", - " \"COK\": (\"NZL\", \"New Zealand\"),\n", - " \"CUW\": (\"NLD\", \"Netherlands\"),\n", - " \"CXR\": (\"AUS\", \"Australia\"),\n", - " \"CYM\": (\"GBR\", \"United Kingdom\"),\n", - " \"FLK\": (\"GBR\", \"United Kingdom\"),\n", - " \"FRO\": (\"DNK\", \"Denmark\"),\n", - " \"GGY\": (\"GBR\", \"United Kingdom\"),\n", - " \"GLP\": (\"FRA\", \"France\"),\n", - " \"GRL\": (\"DNK\", \"Denmark\"),\n", - " \"GUF\": (\"FRA\", \"France\"),\n", - " \"GUM\": (\"USA\", \"United States\"),\n", - " \"HMD\": (\"AUS\", \"Australia\"),\n", - " \"IMN\": (\"GBR\", \"United Kingdom\"),\n", - " \"IOT\": (\"GBR\", \"United Kingdom\"),\n", - " \"JEY\": (\"GBR\", \"United Kingdom\"),\n", - " \"KGZ\": (\"KGZ\", \"Kyrgyzstan\"),\n", - " \"LAO\": (\"LAO\", \"Laos\"),\n", - " \"LIE\": (\"LIE\", \"Liechtenstein\"),\n", - " \"LSO\": (\"LSO\", \"Lesotho\"),\n", - " \"LUX\": (\"LUX\", \"Luxembourg\"),\n", - " \"MAF\": (\"FRA\", \"France\"),\n", - " \"MDA\": (\"MDA\", \"Moldova\"),\n", - " \"MKD\": (\"MKD\", \"North Macedonia\"),\n", - " \"MLI\": (\"MLI\", \"Mali\"),\n", - " \"MNG\": (\"MNG\", \"Mongolia\"),\n", - " \"MNP\": (\"USA\", \"United States\"),\n", - " \"MSR\": (\"GBR\", \"United Kingdom\"),\n", - " \"MTQ\": (\"FRA\", \"France\"),\n", - " \"MWI\": (\"MWI\", \"Malawi\"),\n", - " \"MYT\": (\"FRA\", \"France\"),\n", - " \"NCL\": (\"FRA\", \"France\"),\n", - " \"NER\": (\"NER\", \"Niger\"),\n", - " \"NFK\": (\"AUS\", \"Australia\"),\n", - " \"NIU\": (\"NZL\", \"New Zealand\"),\n", - " \"NPL\": (\"NPL\", \"Nepal\"),\n", - " \"PCN\": (\"GBR\", \"United Kingdom\"),\n", - " \"PRI\": (\"USA\", \"United States\"),\n", - " \"PRY\": (\"PRY\", \"Paraguay\"),\n", - " \"PYF\": (\"FRA\", \"France\"),\n", - " \"REU\": (\"FRA\", \"France\"),\n", - " \"RWA\": (\"RWA\", \"Rwanda\"),\n", - " \"SGS\": (\"GBR\", \"United Kingdom\"),\n", - " \"SHN\": (\"GBR\", \"United Kingdom\"),\n", - " \"SJM\": (\"NOR\", \"Norway\"),\n", - " \"SMR\": (\"SMR\", \"San Marino\"),\n", - " \"SPM\": (\"FRA\", \"France\"),\n", - " \"SRB\": (\"SRB\", \"Serbia\"),\n", - " \"SSD\": (\"SSD\", \"South Sudan\"),\n", - " \"SVK\": (\"SVK\", \"Slovakia\"),\n", - " \"SWZ\": (\"SWZ\", \"Eswatini\"),\n", - " \"SXM\": (\"NLD\", \"Netherlands\"),\n", - " \"TCA\": (\"GBR\", \"United Kingdom\"),\n", - " \"TCD\": (\"TCD\", \"Chad\"),\n", - " \"TJK\": (\"TJK\", \"Tajikistan\"),\n", - " \"TKL\": (\"NZL\", \"New Zealand\"),\n", - " \"UGA\": (\"UGA\", \"Uganda\"),\n", - " \"UMI\": (\"USA\", \"United States\"),\n", - " \"UZB\": (\"UZB\", \"Uzbekistan\"),\n", - " \"VAT\": (\"VAT\", \"Vatican City\"),\n", - " \"VGB\": (\"GBR\", \"United Kingdom\"),\n", - " \"VIR\": (\"USA\", \"United States\"),\n", - " \"WLF\": (\"FRA\", \"France\"),\n", - " \"ZMB\": (\"ZMB\", \"Zambia\"),\n", - " \"ZWE\": (\"ZWE\", \"Zimbabwe\"),\n", - "}\n", - "\n", "def update_gid_0_and_country(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:\n", " \"\"\"\n", " Updates the GID_0 and COUNTRY values in the GeoDataFrame for dependent territories \n", @@ -516,6 +440,10 @@ " Returns:\n", " gpd.GeoDataFrame: The GeoDataFrame with updated 'GID_0' and 'COUNTRY' values for dependent territories.\n", " \"\"\"\n", + " # Load the dependency_to_parent mapping\n", + " with open(scripts_dir.joinpath('data_commons/data/dependency_to_parent.json'), 'r') as json_file:\n", + " dependency_to_parent = json.load(json_file)\n", + "\n", " # Map GID_0 to the updated values\n", " gdf['GID_0'] = gdf['GID_0'].map(lambda x: dependency_to_parent.get(x, (x, x))[0])\n", " \n", @@ -524,16 +452,24 @@ "\n", " return gdf\n", "\n", - "# Assuming your GeoDataFrame is called `gdf`\n", - "gdf_updated = update_gid_0_and_country(gdf)\n" + "\n", + "def add_translations(df, translations_csv_path):\n", + " translations_df = pd.read_csv(translations_csv_path, keep_default_na=False, na_values=[])\n", + " \n", + " df = df.merge(translations_df[['code', 'name_es', 'name_fr']], left_on='GID_0', right_on='code', how='left')\n", + " \n", + " return df" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 18, "metadata": {}, "outputs": [], "source": [ + "# Assign territories to their parent countries\n", + "gdf_updated = update_gid_0_and_country(gdf)\n", + "\n", "# Dissolve by country\n", "gdf_updated = gdf_updated.dissolve(by='COUNTRY').reset_index()\n", "\n", @@ -543,14 +479,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "# Download country translations\n", "working_folder = FileConventionHandler(pipe)\n", "input_path = working_folder.pipe_raw_path\n", - "input_path\n", "\n", "translations_csv_url = \"vizzuality_processed_data/gadm/preprocess/locations_translated.csv\"\n", "translations_csv_output = input_path.joinpath(translations_csv_url.split(\"/\")[-1])\n", @@ -561,35 +496,14 @@ " blob_name=translations_csv_url,\n", " file=translations_csv_output,\n", " operation=\"r\",\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [], - "source": [ - "def add_translations(df, translations_csv_path):\n", - " translations_df = pd.read_csv(translations_csv_path, keep_default_na=False, na_values=[])\n", - " \n", - " df = df.merge(translations_df[['code', 'name_es', 'name_fr']], left_on='GID_0', right_on='code', how='left')\n", - " \n", - " return df" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [], - "source": [ + ")\n", + "\n", "translations_path = input_path.joinpath('locations_translated.csv')" ] }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 27, "metadata": {}, "outputs": [ { @@ -633,15 +547,6 @@ " \n", " \n", " 1\n", - " Akrotiri and Dhekelia\n", - " MULTIPOLYGON (((33.00764 34.62583, 33.00708 34...\n", - " XAD\n", - " 233.64\n", - " Akrotiri y Dhekelia\n", - " Akrotiri et Dhekelia\n", - " \n", - " \n", - " 2\n", " Albania\n", " MULTIPOLYGON (((19.27804 40.50524, 19.28189 40...\n", " ALB\n", @@ -650,7 +555,7 @@ " Albanie\n", " \n", " \n", - " 3\n", + " 2\n", " Algeria\n", " MULTIPOLYGON (((2.84535 36.74691, 2.84597 36.7...\n", " DZA\n", @@ -659,7 +564,7 @@ " Algérie\n", " \n", " \n", - " 4\n", + " 3\n", " Andorra\n", " POLYGON ((1.61725 42.62406, 1.63334 42.62553, ...\n", " AND\n", @@ -668,6 +573,15 @@ " Andorre\n", " \n", " \n", + " 4\n", + " Angola\n", + " MULTIPOLYGON (((11.78636 -16.78001, 11.78478 -...\n", + " AGO\n", + " 1251701.39\n", + " Angola\n", + " Angola\n", + " \n", + " \n", " ...\n", " ...\n", " ...\n", @@ -677,7 +591,7 @@ " ...\n", " \n", " \n", - " 201\n", + " 199\n", " Vietnam\n", " MULTIPOLYGON (((103.46895 9.25602, 103.46736 9...\n", " VNM\n", @@ -686,7 +600,7 @@ " Vietnam\n", " \n", " \n", - " 202\n", + " 200\n", " Western Sahara\n", " MULTIPOLYGON (((-16.83569 22.15403, -16.83597 ...\n", " ESH\n", @@ -695,7 +609,7 @@ " Sahara occidental\n", " \n", " \n", - " 203\n", + " 201\n", " Yemen\n", " MULTIPOLYGON (((42.1618 15.03042, 42.16236 15....\n", " YEM\n", @@ -704,7 +618,7 @@ " Yémen\n", " \n", " \n", - " 204\n", + " 202\n", " Zambia\n", " POLYGON ((25.87834 -17.97218, 25.87034 -17.970...\n", " ZMB\n", @@ -713,7 +627,7 @@ " Zambie\n", " \n", " \n", - " 205\n", + " 203\n", " Zimbabwe\n", " POLYGON ((32.70425 -18.96022, 32.70537 -18.965...\n", " ZWE\n", @@ -723,59 +637,60 @@ " \n", " \n", "\n", - "

206 rows × 6 columns

\n", + "

204 rows × 6 columns

\n", "" ], "text/plain": [ - " COUNTRY geometry \\\n", - "0 Afghanistan MULTIPOLYGON (((63.61425 29.46993, 63.60868 29... \n", - "1 Akrotiri and Dhekelia MULTIPOLYGON (((33.00764 34.62583, 33.00708 34... \n", - "2 Albania MULTIPOLYGON (((19.27804 40.50524, 19.28189 40... \n", - "3 Algeria MULTIPOLYGON (((2.84535 36.74691, 2.84597 36.7... \n", - "4 Andorra POLYGON ((1.61725 42.62406, 1.63334 42.62553, ... \n", - ".. ... ... \n", - "201 Vietnam MULTIPOLYGON (((103.46895 9.25602, 103.46736 9... \n", - "202 Western Sahara MULTIPOLYGON (((-16.83569 22.15403, -16.83597 ... \n", - "203 Yemen MULTIPOLYGON (((42.1618 15.03042, 42.16236 15.... \n", - "204 Zambia POLYGON ((25.87834 -17.97218, 25.87034 -17.970... \n", - "205 Zimbabwe POLYGON ((32.70425 -18.96022, 32.70537 -18.965... \n", + " COUNTRY geometry GID_0 \\\n", + "0 Afghanistan MULTIPOLYGON (((63.61425 29.46993, 63.60868 29... AFG \n", + "1 Albania MULTIPOLYGON (((19.27804 40.50524, 19.28189 40... ALB \n", + "2 Algeria MULTIPOLYGON (((2.84535 36.74691, 2.84597 36.7... DZA \n", + "3 Andorra POLYGON ((1.61725 42.62406, 1.63334 42.62553, ... AND \n", + "4 Angola MULTIPOLYGON (((11.78636 -16.78001, 11.78478 -... AGO \n", + ".. ... ... ... \n", + "199 Vietnam MULTIPOLYGON (((103.46895 9.25602, 103.46736 9... VNM \n", + "200 Western Sahara MULTIPOLYGON (((-16.83569 22.15403, -16.83597 ... ESH \n", + "201 Yemen MULTIPOLYGON (((42.1618 15.03042, 42.16236 15.... YEM \n", + "202 Zambia POLYGON ((25.87834 -17.97218, 25.87034 -17.970... ZMB \n", + "203 Zimbabwe POLYGON ((32.70425 -18.96022, 32.70537 -18.965... ZWE \n", "\n", - " GID_0 area_km2 name_es name_fr \n", - "0 AFG 644050.28 Afganistán Afghanistan \n", - "1 XAD 233.64 Akrotiri y Dhekelia Akrotiri et Dhekelia \n", - "2 ALB 28689.62 Albania Albanie \n", - "3 DZA 2311455.23 Argelia Algérie \n", - "4 AND 450.35 Andorra Andorre \n", - ".. ... ... ... ... \n", - "201 VNM 330364.87 Vietnam Vietnam \n", - "202 ESH 267892.77 Sahara Occidental Sahara occidental \n", - "203 YEM 453741.18 Yemen Yémen \n", - "204 ZMB 753990.33 Zambia Zambie \n", - "205 ZWE 391234.88 Zimbabue Zimbabwe \n", + " area_km2 name_es name_fr \n", + "0 644050.28 Afganistán Afghanistan \n", + "1 28689.62 Albania Albanie \n", + "2 2311455.23 Argelia Algérie \n", + "3 450.35 Andorra Andorre \n", + "4 1251701.39 Angola Angola \n", + ".. ... ... ... \n", + "199 330364.87 Vietnam Vietnam \n", + "200 267892.77 Sahara Occidental Sahara occidental \n", + "201 453741.18 Yemen Yémen \n", + "202 753990.33 Zambia Zambie \n", + "203 391234.88 Zimbabue Zimbabwe \n", "\n", - "[206 rows x 6 columns]" + "[204 rows x 6 columns]" ] }, - "execution_count": 22, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ + "# Add translations for country names\n", "gdf_translated = add_translations(gdf_updated, translations_path).drop(columns=['code'])\n", "gdf_translated" ] }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 28, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 206/206 [05:43<00:00, 1.67s/it]\n" + "100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 204/204 [05:58<00:00, 1.75s/it]\n" ] } ], @@ -785,7 +700,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 29, "metadata": {}, "outputs": [], "source": [ @@ -795,7 +710,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 30, "metadata": {}, "outputs": [], "source": [ @@ -805,7 +720,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 31, "metadata": {}, "outputs": [], "source": [ @@ -1356,7 +1271,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Mpas protected planet intermediate terrestrial" + "### Pas protected planet intermediate terrestrial" ] }, { diff --git a/data/notebooks/pipes_mock/locations.ipynb b/data/notebooks/pipes_mock/locations.ipynb index 5fa31e98..ba957244 100644 --- a/data/notebooks/pipes_mock/locations.ipynb +++ b/data/notebooks/pipes_mock/locations.ipynb @@ -2,9 +2,18 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 42, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The autoreload extension is already loaded. To reload it, use:\n", + " %reload_ext autoreload\n" + ] + } + ], "source": [ "%load_ext autoreload\n", "%autoreload 2" @@ -12,7 +21,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -85,7 +94,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -96,7 +105,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -115,7 +124,7 @@ "PosixPath('/home/sofia/dev/skytruth-30x30/data/data/gadm/processed/preprocess')" ] }, - "execution_count": 4, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -137,11 +146,41 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ - "from typing import List\n", + "# # Download country translations\n", + "working_folder = FileConventionHandler(pipe_gadm)\n", + "input_path = working_folder.pipe_raw_path\n", + "input_path\n", + "\n", + "translations_csv_url = \"vizzuality_processed_data/gadm/preprocess/locations_translated.csv\"\n", + "translations_csv_output = input_path.joinpath(translations_csv_url.split(\"/\")[-1])\n", + "\n", + "# writeReadGCP(\n", + "# credentials=mysettings.GCS_KEYFILE_JSON,\n", + "# bucket_name=mysettings.GCS_BUCKET,\n", + "# blob_name=translations_csv_url,\n", + "# file=translations_csv_output,\n", + "# operation=\"r\",\n", + "# )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Functions" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "from typing import List, Union\n", "import pandera as pa\n", "from pandera.typing import Index, Series\n", "import pandas as pd\n", @@ -245,42 +284,53 @@ " pd.DataFrame: The DataFrame with the new combined column.\n", " \"\"\"\n", " df[new_col] = df[col1].combine_first(df[col2])\n", - " return df\n" + " return df\n", + "\n", + "def add_region_iso_2(\n", + " df: pd.DataFrame | gpd.GeoDataFrame, iso_column\n", + ") -> pd.DataFrame | gpd.GeoDataFrame:\n", + " regions = pd.read_json(scripts_dir.joinpath(\"data_commons/data/regions_data2.json\"))\n", + "\n", + " def find_region_iso(iso: str) -> Union[str, None]:\n", + " filtered_regions = list(filter(lambda x: iso in x[\"country_iso_3s\"], regions.get(\"data\")))\n", + " return filtered_regions[0][\"region_iso\"] if len(filtered_regions) > 0 else None\n", + "\n", + " return df.assign(region=lambda row: row[iso_column].apply(find_region_iso))\n", + "\n", + "def add_location_name_2(df: pd.DataFrame | gpd.GeoDataFrame) -> pd.DataFrame | gpd.GeoDataFrame:\n", + " with open(scripts_dir.joinpath('data_commons/data/iso_map2.json'), 'r') as f:\n", + " iso_map = json.load(f)\n", + "\n", + " def get_name(iso):\n", + " test = iso_map.get(iso, np.nan)\n", + " return test\n", + "\n", + " return df.assign(name=df.iso.apply(get_name))\n" ] }, { - "cell_type": "code", - "execution_count": 6, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "# # Download country translations\n", - "working_folder = FileConventionHandler(pipe_gadm)\n", - "input_path = working_folder.pipe_raw_path\n", - "input_path\n", + "#### Note: gadm includes some extra iso codes that had to be included in the regions_data.json (provided by protected planet) to process the terrestrial stats:\n", "\n", - "translations_csv_url = \"vizzuality_processed_data/gadm/preprocess/locations_translated.csv\"\n", - "translations_csv_output = input_path.joinpath(translations_csv_url.split(\"/\")[-1])\n", + "'XCA': Caspian Sea, included in Asia & Pacific region\n", "\n", - "# writeReadGCP(\n", - "# credentials=mysettings.GCS_KEYFILE_JSON,\n", - "# bucket_name=mysettings.GCS_BUCKET,\n", - "# blob_name=translations_csv_url,\n", - "# file=translations_csv_output,\n", - "# operation=\"r\",\n", - "# )" + "'XKO': Kosovo, included in Europe region\n", + "\n", + "'ZNC': Northern Cyprus, included in Europe region" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_3347624/1577571524.py:35: DeprecationWarning: The 'unary_union' attribute is deprecated, use the 'union_all()' method instead.\n", + "/tmp/ipykernel_3513234/673975959.py:35: DeprecationWarning: The 'unary_union' attribute is deprecated, use the 'union_all()' method instead.\n", " \"geometry\": gpd.GeoSeries([gpd.GeoSeries(df[\"geometry\"]).unary_union]),\n" ] } @@ -324,11 +374,11 @@ " gpd.read_file(pipe_gadm_dir.get_step_fmt_file_path(prev_step, \"shp\"))\n", " .rename(columns={\"GID_0\": \"iso\", 'area_km2': 'AREA_KM2'})\n", " .pipe(add_envelope)\n", - " .pipe(add_region_iso, 'iso')\n", + " .pipe(add_region_iso_2, 'iso') # add_region_iso_2 is used instead of add_region_iso because gadm includes new iso codes\n", " .pipe(calculate_gadm_area)\n", " .pipe(add_bbox)\n", " .pipe(add_groups_and_members_land)\n", - " .pipe(add_location_name)\n", + " .pipe(add_location_name_2)\n", " .pipe(add_translations, translations_csv_output)\n", " .rename(\n", " columns={\n", @@ -413,7 +463,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -425,7 +475,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ diff --git a/data/notebooks/pipes_mock/precalc_sofia.ipynb b/data/notebooks/pipes_mock/precalc_sofia.ipynb index 6aececfd..39944b86 100644 --- a/data/notebooks/pipes_mock/precalc_sofia.ipynb +++ b/data/notebooks/pipes_mock/precalc_sofia.ipynb @@ -12,7 +12,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -26,6 +26,17 @@ "import json\n", "import dotenv\n", "import os\n", + "import logging\n", + "from typing import Tuple, List\n", + "import sys\n", + "from pathlib import Path\n", + "import pandas as pd\n", + "import geopandas as gpd\n", + "import numpy as np\n", + "import asyncio\n", + "from tqdm.asyncio import tqdm\n", + "from itertools import product\n", + "from shapely.geometry import box\n", "\n", "dotenv.load_dotenv()\n", "\n", @@ -78,17 +89,22 @@ " extract_wdpaid_mpaatlas,\n", " simplify_async,\n", " process_tpa_data,\n", + " get_matches,\n", + " repair_geometry, \n", + " arrange_dimensions, \n", ")\n", + "from pipelines.utils import background\n", "\n", "logging.basicConfig(level=logging.DEBUG)\n", "logging.getLogger(\"requests\").setLevel(logging.WARNING)\n", "logging.getLogger(\"urllib3\").setLevel(logging.WARNING)\n", - "logging.getLogger(\"fiona\").setLevel(logging.WARNING)" + "logging.getLogger(\"fiona\").setLevel(logging.WARNING)\n", + "logger = logging.getLogger(\"notebook\")" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -99,7 +115,7 @@ }, { "cell_type": "code", - "execution_count": 225, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -158,7 +174,7 @@ " \n", " return df\n", "\n", - "def add_pa_oecm_percentages(df):\n", + "def add_mpa_oecm_percentages(df):\n", " # Calculate the total protectedAreasCount for each year and iso_3\n", " total_counts = df.groupby(['year', 'iso_3'])['protectedAreasCount'].transform('sum')\n", "\n", @@ -180,13 +196,23 @@ "\n", " return final_df\n", "\n", - "def calculate_coverage_percentage_mpa(df):\n", - " if 'total_marine_area' in df.columns:\n", - " df['coverage'] = (df['protected_area'] / df['total_marine_area']) * 100\n", - " elif 'total_terrestrial_area' in df.columns:\n", - " df['coverage'] = (df['protected_area'] / df['total_terrestrial_area']) * 100\n", - " else:\n", - " df['coverage'] = np.nan\n", + "def calculate_pa_def_percentages(df: pd.DataFrame, iso_col: str = \"iso_3\") -> pd.DataFrame:\n", + " \"\"\"\n", + " Calculate the percentages for each PA_DEF value.\n", + "\n", + " Parameters:\n", + " df (pd.DataFrame): The DataFrame containing the cumulative counts of PA_DEF values.\n", + " iso_col (str): The column name for the iso_3 values. Default is \"iso_3\".\n", + "\n", + " Returns:\n", + " pd.DataFrame: A DataFrame with the percentages of PA_DEF values for each iso_3 and each year.\n", + " \"\"\"\n", + " \n", + " df['protected_areas_count'] = df['0'] + df['1']\n", + " df['oecms'] = (df['0'] / df['protected_areas_count']) * 100\n", + " df['pas'] = (df['1'] / df['protected_areas_count']) * 100\n", + "\n", + " df = df.drop(columns=['0', '1'], errors='ignore')\n", "\n", " return df\n", "\n", @@ -196,9 +222,9 @@ "\n", "def calculate_coverage_percentage_pa(df):\n", " if 'total_marine_area' in df.columns:\n", - " df['coverage'] = (df['area_km2'] / df['total_marine_area']) * 100\n", + " df['coverage'] = (df['protected_area'] / df['total_marine_area']) * 100\n", " elif 'total_terrestrial_area' in df.columns:\n", - " df['coverage'] = (df['area_km2'] / df['total_terrestrial_area']) * 100\n", + " df['coverage'] = (df['protected_area'] / df['total_terrestrial_area']) * 100\n", " else:\n", " df['coverage'] = np.nan\n", "\n", @@ -287,7 +313,11 @@ " pd.DataFrame: The processed DataFrame.\n", " \"\"\"\n", " if iso_column:\n", - " locations_code = pd.read_csv(scripts_dir.joinpath(\"data_commons/data/locations_code_all.csv\"))\n", + " locations_code = pd.read_csv(\n", + " scripts_dir.joinpath(\"data_commons/data/locations_code_all.csv\"),\n", + " keep_default_na=False,\n", + " na_values=[]\n", + " )\n", " df = df.join(locations_code.set_index(\"code\"), on=iso_column, how=\"left\")\n", " return (\n", " df.replace(rep_d)\n", @@ -304,6 +334,72 @@ ") -> pd.DataFrame | gpd.GeoDataFrame:\n", " return df.assign(child_id=df[columns].bfill(axis=1)[columns[0]])\n", "\n", + "def calculate_global_area_pa(\n", + " df: pd.DataFrame,\n", + " gby_col: list,\n", + " agg_ops: Dict[str, str] = {\"area\": \"sum\"},\n", + " iso_column=\"iso_3\",\n", + ") -> pd.DataFrame:\n", + " global_area = df.groupby([*gby_col]).agg(agg_ops).reset_index().assign(**{iso_column: \"GLOB\"})\n", + " return pd.concat([global_area, df], ignore_index=True)\n", + "\n", + "\n", + "def cumulative_pa_def_counts(df: pd.DataFrame, year_col: str = \"STATUS_YR\", pa_def_col: str = \"PA_DEF\", iso_col: str = \"iso_3\", start_year: int = 2010) -> pd.DataFrame:\n", + " \"\"\"\n", + " Calculate the cumulative number of PA_DEF values for each iso_3 and each year starting from a given year.\n", + "\n", + " Parameters:\n", + " df (pd.DataFrame): The DataFrame containing the data.\n", + " year_col (str): The column name for the year. Default is \"STATUS_YR\".\n", + " pa_def_col (str): The column name for the PA_DEF values. Default is \"PA_DEF\".\n", + " iso_col (str): The column name for the iso_3 values. Default is \"iso_3\".\n", + " start_year (int): The starting year for cumulative counts. Default is 2010.\n", + "\n", + " Returns:\n", + " pd.DataFrame: A DataFrame with cumulative counts of PA_DEF values for each iso_3 and each year.\n", + " \"\"\"\n", + " \n", + " results = []\n", + " years = sorted(df[year_col].unique())\n", + "\n", + " for year in years:\n", + " if year < start_year:\n", + " continue\n", + " cumulative_data = df[df[year_col] <= year]\n", + " pa_def_counts = cumulative_data.groupby([iso_col, pa_def_col]).size().unstack(fill_value=0)\n", + " pa_def_counts['year'] = year\n", + " results.append(pa_def_counts.reset_index())\n", + "\n", + " final_results = pd.concat(results, ignore_index=True)\n", + " final_results = final_results.fillna(0)\n", + " final_results = final_results.groupby([iso_col, 'year']).sum().reset_index()\n", + "\n", + " final_results['protected_areas_count'] = final_results['0'] + final_results['1']\n", + "\n", + " return final_results\n", + "\n", + "def calculate_stats_pa(\n", + " df: pd.DataFrame, gby_col: list, iso_column: str, ops: dict[str, str] = {\"protected_area\": \"sum\"}\n", + ") -> pd.DataFrame:\n", + " # Group by the specified columns and region, then aggregate\n", + " regions = (\n", + " df.groupby([*gby_col, \"region\"])\n", + " .agg(ops)\n", + " .reset_index()\n", + " .rename(columns={\"region\": iso_column})\n", + " )\n", + "\n", + " # Group by the specified columns and iso_column, then aggregate\n", + " countries = df.groupby([*gby_col, iso_column]).agg(ops).reset_index()\n", + "\n", + " # Concatenate the results\n", + " return pd.concat([regions, countries], ignore_index=True)\n", + "\n", + "def calculate_stats_cov_pa(df: pd.DataFrame, gby_col: list, iso_column: str):\n", + " return calculate_stats_pa(df, gby_col, iso_column, {\"protected_area\": \"sum\", \"protected_areas_count\": \"sum\", \"1\": \"sum\", \"0\": \"sum\"})\n", + "\n", + "\n", + "\n", "\n", "class NewProtectedAreaExtentSchema(pa.DataFrameModel):\n", " id: Index[int] = pa.Field(gt=0, coerce=True)\n", @@ -348,6 +444,200 @@ "\n" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Code for pa terrestrial processing\n", + "\n", + "def split_by_year(\n", + " gdf: gpd.GeoDataFrame, year_col: str = \"STATUS_YR\", year_val: int = 2010\n", + ") -> List[gpd.GeoDataFrame]:\n", + " \"\"\"Split data by year. relevant for MPA data.(coverage indicator)\"\"\"\n", + " prior_2010 = (\n", + " gdf[gdf[year_col] <= year_val][[\"iso_3\", \"STATUS_YR\", \"geometry\"]]\n", + " .dissolve(\n", + " by=[\"iso_3\"],\n", + " )\n", + " .assign(year=2010)\n", + " .reset_index()\n", + " )\n", + "\n", + " after_2010 = (\n", + " gdf[gdf[\"STATUS_YR\"] > 2010][[\"iso_3\", \"STATUS_YR\", \"geometry\"]]\n", + " .rename(columns={\"STATUS_YR\": \"year\"})\n", + " )\n", + " return [prior_2010, after_2010]\n", + "\n", + "\n", + "def create_grid(bounds: Tuple[float, float, float, float], cell_size: int = 1) -> gpd.GeoDataFrame:\n", + " \"\"\"Create a grid of cells for a given GeoDataFrame\"\"\"\n", + " minx, miny, maxx, maxy = bounds\n", + " x = np.arange(minx, maxx, cell_size)\n", + " y = np.arange(miny, maxy, cell_size)\n", + " polygons = [\n", + " {\n", + " \"geometry\": box(i, j, i + cell_size, j + cell_size),\n", + " \"cell_id\": f\"{i}_{j}\",\n", + " }\n", + " for i, j in product(x, y)\n", + " ]\n", + " return gpd.GeoDataFrame(polygons)\n", + "\n", + "\n", + "def subdivide_grid(\n", + " grid_gdf: gpd.GeoDataFrame, gdf: gpd.GeoDataFrame, max_cellsize: float, max_complexity: int\n", + ") -> List:\n", + " subdivided_elements = []\n", + " for grid_element in grid_gdf.geometry:\n", + " candidates = get_matches(grid_element, gdf)\n", + " density = len(candidates)\n", + " if density > max_complexity:\n", + " \n", + " subdivision_cellsize = max_cellsize / 2\n", + " # Subdivide the grid element recursively\n", + " subgrid = create_grid(grid_element.bounds, subdivision_cellsize)\n", + " subdivided_elements.extend(\n", + " subdivide_grid(subgrid, gdf, subdivision_cellsize, max_complexity)\n", + " )\n", + " elif density > 0:\n", + " subdivided_elements.append(grid_element)\n", + "\n", + " return subdivided_elements\n", + "\n", + "\n", + "def create_density_based_grid(\n", + " gdf: gpd.GeoDataFrame, max_cellsize: int = 10, max_complexity: int = 10000\n", + ") -> gpd.GeoDataFrame:\n", + " # Get the bounds of the GeoDataFrame\n", + " minx, miny, maxx, maxy = gdf.total_bounds\n", + "\n", + " # Create an initial grid\n", + " grid_gdf = create_grid((minx, miny, maxx, maxy), max_cellsize)\n", + "\n", + " # Subdivide grid elements based on density and complexity\n", + " subdivided_elements = subdivide_grid(grid_gdf, gdf, max_cellsize, max_complexity)\n", + "\n", + " return gpd.GeoDataFrame(geometry=subdivided_elements)\n", + "\n", + "\n", + "# TODO: refactor this so old function mantains functionality for marine areas\n", + "\n", + "def split_gdf_by_grid(gdf: gpd.GeoDataFrame, grid_gdf: gpd.GeoDataFrame):\n", + " result = []\n", + " gdf[\"already_processed\"] = False\n", + " for geometry in grid_gdf.geometry:\n", + " candidates = get_matches(geometry, gdf)\n", + " subset = gdf.loc[candidates.index][~gdf[\"already_processed\"]]\n", + " gdf.loc[subset.index, \"already_processed\"] = True\n", + " if not subset.empty:\n", + " result.append(subset.drop(columns=[\"already_processed\"]).reset_index(drop=True).copy())\n", + " return result\n", + "\n", + "\n", + "@background\n", + "def spatial_join_chunk(df_large_chunk, df_small, pbar):\n", + " try:\n", + " bbox = df_large_chunk.total_bounds\n", + "\n", + " candidates = get_matches(box(*bbox), df_small.geometry)\n", + " if len(candidates) > 0:\n", + " subset = df_small.loc[candidates.index].clip(box(*bbox))\n", + "\n", + " result = (\n", + " gpd.overlay(df_large_chunk, subset).reset_index(drop=True)\n", + " .clip(subset.geometry)\n", + " .reset_index(drop=True)\n", + " )\n", + " result.geometry = result.geometry.apply(repair_geometry)\n", + " else:\n", + " result = gpd.GeoDataFrame(columns=df_large_chunk.columns)\n", + " return result\n", + " except Exception as e:\n", + " logging.error(e)\n", + " return gpd.GeoDataFrame()\n", + " finally:\n", + " pbar.update(1)\n", + "\n", + "\n", + "async def spatial_join(\n", + " geodataframe_a: gpd.GeoDataFrame, geodataframe_b: gpd.GeoDataFrame\n", + ") -> gpd.GeoDataFrame:\n", + " \"\"\"Create spatial join between two GeoDataFrames.\"\"\"\n", + " # we build the spatial index for the larger GeoDataFrame\n", + " smaller_dim, larger_dim = arrange_dimensions(geodataframe_a, geodataframe_b)\n", + "\n", + " logger.info(f\"Processing {len(larger_dim)} elements\")\n", + "\n", + " grid = create_density_based_grid(larger_dim, max_cellsize=10, max_complexity=5000)\n", + "\n", + " logger.info(f\"grid created with {len(grid)} cells\")\n", + "\n", + " list_of_chunks = split_gdf_by_grid(larger_dim, grid)\n", + "\n", + " logger.info(f\"grid split into {len(list_of_chunks)} chunks\")\n", + "\n", + " with tqdm(total=len(list_of_chunks)) as pbar: # we create a progress bar\n", + " new_df = await asyncio.gather(\n", + " *(spatial_join_chunk(chunk, smaller_dim, pbar) for chunk in list_of_chunks)\n", + " )\n", + "\n", + " return gpd.GeoDataFrame(pd.concat(new_df, ignore_index=True), crs=smaller_dim.crs)\n", + "\n", + "\n", + "@background\n", + "def spatial_dissolve_chunk(geometry, gdf, pbar):\n", + " try:\n", + " logger.info(\"Processing chunk\")\n", + " candidates = get_matches(\n", + " geometry,\n", + " gdf.geometry,\n", + " )\n", + " subset = gdf.loc[candidates.index]\n", + "\n", + " result = pd.concat(\n", + " subset.clip(geometry).pipe(split_by_year, year_col=\"STATUS_YR\"), ignore_index=True\n", + " ).copy()\n", + "\n", + " data_chunk = [\n", + " (\n", + " result[result[\"year\"] <= 2010]\n", + " .reset_index()\n", + " .pipe(calculate_area, \"area\", None)\n", + " .drop(columns=[\"geometry\"])\n", + " )\n", + " ]\n", + " for year in range(2011, 2025):\n", + " data_chunk.append(\n", + " result[result[\"year\"] <= year]\n", + " .dissolve(\n", + " by=[\"iso_3\"],\n", + " )\n", + " .assign(year=year)\n", + " .reset_index()\n", + " .pipe(calculate_area, \"area\", None)\n", + " .drop(columns=[\"geometry\"])\n", + " )\n", + "\n", + " return pd.concat(data_chunk, ignore_index=True)\n", + " except Exception as e:\n", + " logging.error(e)\n", + " return gpd.GeoDataFrame()\n", + " finally:\n", + " pbar.update(1)\n", + "\n", + "async def process_grid(gdf):\n", + " grid_gdf = create_density_based_grid(gdf, max_cellsize=10, max_complexity=5000)\n", + " logger.info(f\"grid created with {grid_gdf.shape[0]} cells\")\n", + "\n", + " with tqdm(total=grid_gdf.shape[0], desc=\"Processing grid elements\") as pbar:\n", + " jobs = [spatial_dissolve_chunk(geometry, gdf, pbar) for geometry in grid_gdf.geometry.values]\n", + " result = await asyncio.gather(*jobs)\n", + " return result" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -372,7 +662,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -408,29 +698,14 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 282/282 [08:21<00:00, 1.78s/it]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n" + "100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 282/282 [08:22<00:00, 1.78s/it]\n" ] } ], @@ -560,21 +835,446 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 14/14 [03:31<00:00, 15.09s/it]\n" + "100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 14/14 [03:30<00:00, 15.01s/it]\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍| 281/282 [00:20<00:01, 1.84s/it]" + "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 392/392 [10:13<00:00, 104.86s/it]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 0%|▎ | 1/425 [00:02<17:53, 2.53s/it]INFO:notebook:Processing chunk\n", + "Processing grid elements: 0%|▋ | 2/425 [00:03<09:41, 1.38s/it]INFO:notebook:Processing chunk\n", + "Processing grid elements: 1%|▉ | 3/425 [00:03<04:52, 1.44it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 1%|█▎ | 4/425 [00:03<03:47, 1.85it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 1%|█▌ | 5/425 [00:04<05:04, 1.38it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 1%|█▉ | 6/425 [00:04<04:24, 1.59it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 2%|██▏ | 7/425 [00:05<03:04, 2.27it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 2%|██▌ | 8/425 [00:05<02:39, 2.62it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 2%|██▊ | 9/425 [00:05<02:56, 2.36it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 3%|███▍ | 11/425 [00:06<02:40, 2.57it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 3%|███▋ | 12/425 [00:07<03:23, 2.03it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 3%|████▎ | 14/425 [00:07<02:46, 2.48it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 4%|████▋ | 15/425 [00:07<02:24, 2.83it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 4%|████▉ | 16/425 [00:08<03:33, 1.92it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 4%|█████▎ | 17/425 [00:09<05:07, 1.33it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 4%|█████▉ | 19/425 [00:10<04:37, 1.46it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 5%|██████▌ | 21/425 [00:11<03:56, 1.71it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 5%|██████▊ | 22/425 [00:12<03:42, 1.81it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 5%|███████▏ | 23/425 [00:12<03:46, 1.77it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 6%|███████▊ | 25/425 [00:12<02:31, 2.64it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 6%|████████ | 26/425 [00:14<03:54, 1.70it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 6%|████████▍ | 27/425 [00:14<03:23, 1.95it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 7%|█████████ | 29/425 [00:14<02:12, 3.00it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 7%|█████████▎ | 30/425 [00:15<02:33, 2.57it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 7%|█████████▋ | 31/425 [00:16<03:18, 1.98it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 8%|█████████▉ | 32/425 [00:16<03:00, 2.18it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 8%|██████████▏ | 33/425 [00:16<02:10, 3.01it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 8%|██████████▌ | 34/425 [00:17<02:42, 2.40it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 8%|███████████▏ | 36/425 [00:17<01:59, 3.25it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 9%|███████████▍ | 37/425 [00:18<02:17, 2.82it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 9%|███████████▊ | 38/425 [00:19<03:17, 1.96it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 10%|████████████▋ | 41/425 [00:19<01:57, 3.28it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 10%|█████████████ | 42/425 [00:20<03:16, 1.95it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 10%|█████████████▎ | 43/425 [00:21<03:17, 1.93it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 10%|█████████████▋ | 44/425 [00:21<02:43, 2.33it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 11%|█████████████▉ | 45/425 [00:22<02:59, 2.11it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 11%|██████████████▎ | 46/425 [00:22<02:22, 2.65it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 11%|██████████████▌ | 47/425 [00:23<03:19, 1.89it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 11%|██████████████▉ | 48/425 [00:23<03:23, 1.85it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 12%|███████████████▊ | 51/425 [00:25<04:05, 1.52it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 12%|████████████████▏ | 52/425 [00:27<04:44, 1.31it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 12%|████████████████▍ | 53/425 [00:27<04:51, 1.28it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 13%|████████████████▊ | 54/425 [00:29<06:33, 1.06s/it]INFO:notebook:Processing chunk\n", + "Processing grid elements: 13%|█████████████████ | 55/425 [00:30<06:12, 1.01s/it]INFO:notebook:Processing chunk\n", + "Processing grid elements: 13%|█████████████████▍ | 56/425 [00:33<08:23, 1.37s/it]INFO:notebook:Processing chunk\n", + "Processing grid elements: 13%|█████████████████▋ | 57/425 [00:33<07:22, 1.20s/it]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 14%|██████████████████▎ | 59/425 [00:34<04:41, 1.30it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 14%|██████████████████▉ | 61/425 [00:35<04:23, 1.38it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 15%|███████████████████▎ | 62/425 [00:35<03:41, 1.64it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 15%|███████████████████▌ | 63/425 [00:37<05:31, 1.09it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 15%|███████████████████▉ | 64/425 [00:37<04:24, 1.37it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 16%|████████████████████▍ | 66/425 [00:38<02:27, 2.43it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 16%|████████████████████▊ | 67/425 [00:38<02:02, 2.91it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 16%|█████████████████████ | 68/425 [00:40<05:02, 1.18it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 16%|█████████████████████▍ | 69/425 [00:40<04:11, 1.41it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 16%|█████████████████████▋ | 70/425 [00:41<04:23, 1.35it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 17%|██████████████████████ | 71/425 [00:42<03:53, 1.52it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 17%|██████████████████████▎ | 72/425 [00:43<04:17, 1.37it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 17%|██████████████████████▋ | 73/425 [00:44<04:53, 1.20it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 18%|███████████████████████▎ | 75/425 [00:45<03:44, 1.56it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 18%|███████████████████████▌ | 76/425 [00:45<04:05, 1.42it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 18%|███████████████████████▉ | 77/425 [00:46<04:00, 1.45it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 18%|████████████████████████▏ | 78/425 [00:46<03:24, 1.70it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 19%|████████████████████████▌ | 79/425 [00:47<03:03, 1.88it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 19%|████████████████████████▊ | 80/425 [00:48<04:08, 1.39it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 19%|█████████████████████████▍ | 82/425 [00:48<02:52, 1.99it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 20%|█████████████████████████▊ | 83/425 [00:49<02:19, 2.45it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 20%|██████████████████████████ | 84/425 [00:49<02:46, 2.04it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 20%|██████████████████████████▋ | 86/425 [00:50<02:19, 2.43it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 21%|███████████████████████████▎ | 88/425 [00:51<02:45, 2.03it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 21%|███████████████████████████▋ | 89/425 [00:51<02:42, 2.07it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 21%|███████████████████████████▉ | 90/425 [00:52<02:47, 2.00it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 21%|████████████████████████████▎ | 91/425 [00:52<02:36, 2.13it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 22%|████████████████████████████▌ | 92/425 [00:52<02:09, 2.58it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 22%|████████████████████████████▉ | 93/425 [00:53<03:29, 1.58it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 22%|█████████████████████████████▏ | 94/425 [00:54<03:04, 1.79it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 22%|█████████████████████████████▌ | 95/425 [00:54<02:52, 1.91it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 23%|█████████████████████████████▊ | 96/425 [00:54<02:12, 2.48it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 23%|██████████████████████████████▏ | 97/425 [00:55<02:44, 1.99it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 23%|██████████████████████████████▍ | 98/425 [00:55<02:24, 2.26it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 23%|██████████████████████████████▋ | 99/425 [00:55<01:41, 3.20it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 24%|██████████████████████████████▊ | 100/425 [00:56<02:39, 2.03it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 24%|███████████████████████████████▏ | 101/425 [00:57<02:22, 2.27it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 24%|███████████████████████████████▋ | 103/425 [00:57<01:40, 3.19it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 25%|████████████████████████████████▎ | 105/425 [00:57<00:49, 6.46it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 25%|████████████████████████████████▎ | 105/425 [00:57<00:49, 6.46it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 25%|████████████████████████████████▉ | 107/425 [00:59<02:59, 1.77it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 25%|█████████████████████████████████▎ | 108/425 [00:59<02:48, 1.88it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 26%|█████████████████████████████████▌ | 109/425 [01:00<03:22, 1.56it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 26%|██████████████████████████████████▏ | 111/425 [01:02<03:40, 1.43it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 27%|██████████████████████████████████▊ | 113/425 [01:02<02:38, 1.97it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 27%|███████████████████████████████████▏ | 114/425 [01:02<01:45, 2.94it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 27%|███████████████████████████████████▏ | 114/425 [01:02<01:45, 2.94it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 27%|███████████████████████████████████▍ | 115/425 [01:02<01:38, 3.16it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 27%|███████████████████████████████████▊ | 116/425 [01:02<01:33, 3.30it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 28%|████████████████████████████████████ | 117/425 [01:03<01:38, 3.11it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 28%|████████████████████████████████████▎ | 118/425 [01:03<01:26, 3.56it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 28%|████████████████████████████████████▉ | 120/425 [01:04<02:49, 1.80it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 28%|█████████████████████████████████████▎ | 121/425 [01:05<03:31, 1.44it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 29%|█████████████████████████████████████▌ | 122/425 [01:06<03:07, 1.62it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 29%|█████████████████████████████████████▉ | 123/425 [01:06<02:46, 1.81it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 29%|██████████████████████████████████████▏ | 124/425 [01:06<02:12, 2.27it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 29%|██████████████████████████████████████▌ | 125/425 [01:06<01:45, 2.86it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 30%|███████████████████████████████████████▍ | 128/425 [01:07<01:17, 3.81it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 30%|███████████████████████████████████████▊ | 129/425 [01:07<01:36, 3.07it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 31%|████████████████████████████████████████▉ | 133/425 [01:08<00:50, 5.78it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 32%|█████████████████████████████████████████▎ | 134/425 [01:08<01:07, 4.29it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 32%|█████████████████████████████████████████▌ | 135/425 [01:09<01:22, 3.52it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 32%|█████████████████████████████████████████▉ | 136/425 [01:09<01:15, 3.84it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 32%|██████████████████████████████████████████▏ | 137/425 [01:09<01:33, 3.09it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 33%|██████████████████████████████████████████▊ | 139/425 [01:11<02:09, 2.20it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 33%|███████████████████████████████████████████▏ | 140/425 [01:11<01:56, 2.44it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 33%|███████████████████████████████████████████▍ | 141/425 [01:11<01:43, 2.75it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 33%|███████████████████████████████████████████▊ | 142/425 [01:13<03:30, 1.34it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 34%|████████████████████████████████████████████ | 143/425 [01:14<03:37, 1.29it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 34%|████████████████████████████████████████████▍ | 144/425 [01:14<03:20, 1.40it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 34%|█████████████████████████████████████████████ | 146/425 [01:19<06:39, 1.43s/it]INFO:notebook:Processing chunk\n", + "Processing grid elements: 35%|█████████████████████████████████████████████▎ | 147/425 [01:21<07:03, 1.52s/it]INFO:notebook:Processing chunk\n", + "Processing grid elements: 35%|█████████████████████████████████████████████▌ | 148/425 [01:22<06:19, 1.37s/it]INFO:notebook:Processing chunk\n", + "Processing grid elements: 35%|█████████████████████████████████████████████▉ | 149/425 [01:27<10:58, 2.39s/it]INFO:notebook:Processing chunk\n", + "Processing grid elements: 35%|██████████████████████████████████████████████▏ | 150/425 [01:28<08:46, 1.91s/it]INFO:notebook:Processing chunk\n", + "Processing grid elements: 36%|██████████████████████████████████████████████▌ | 151/425 [01:30<09:01, 1.97s/it]INFO:notebook:Processing chunk\n", + "Processing grid elements: 36%|██████████████████████████████████████████████▊ | 152/425 [01:32<08:56, 1.96s/it]INFO:notebook:Processing chunk\n", + "Processing grid elements: 36%|███████████████████████████████████████████████▏ | 153/425 [01:34<09:18, 2.05s/it]INFO:notebook:Processing chunk\n", + "Processing grid elements: 36%|███████████████████████████████████████████████▍ | 154/425 [01:36<08:56, 1.98s/it]INFO:notebook:Processing chunk\n", + "Processing grid elements: 36%|███████████████████████████████████████████████▊ | 155/425 [01:37<07:18, 1.62s/it]INFO:notebook:Processing chunk\n", + "Processing grid elements: 37%|████████████████████████████████████████████████ | 156/425 [01:37<05:47, 1.29s/it]INFO:notebook:Processing chunk\n", + "Processing grid elements: 37%|████████████████████████████████████████████████▍ | 157/425 [01:38<05:10, 1.16s/it]INFO:notebook:Processing chunk\n", + "Processing grid elements: 37%|█████████████████████████████████████████████████ | 159/425 [01:38<02:55, 1.52it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 38%|█████████████████████████████████████████████████▎ | 160/425 [01:38<02:26, 1.81it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 38%|█████████████████████████████████████████████████▋ | 161/425 [01:39<02:29, 1.77it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 38%|█████████████████████████████████████████████████▉ | 162/425 [01:39<01:55, 2.28it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 38%|██████████████████████████████████████████████████▏ | 163/425 [01:40<02:51, 1.52it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 39%|██████████████████████████████████████████████████▌ | 164/425 [01:41<02:34, 1.69it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 39%|██████████████████████████████████████████████████▊ | 165/425 [01:41<02:13, 1.95it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 39%|███████████████████████████████████████████████████▏ | 166/425 [01:41<02:09, 2.00it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 39%|███████████████████████████████████████████████████▍ | 167/425 [01:44<05:04, 1.18s/it]INFO:notebook:Processing chunk\n", + "Processing grid elements: 40%|███████████████████████████████████████████████████▊ | 168/425 [01:45<04:10, 1.02it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 40%|████████████████████████████████████████████████████ | 169/425 [01:46<05:04, 1.19s/it]INFO:notebook:Processing chunk\n", + "Processing grid elements: 40%|████████████████████████████████████████████████████▍ | 170/425 [01:52<10:47, 2.54s/it]INFO:notebook:Processing chunk\n", + "Processing grid elements: 40%|████████████████████████████████████████████████████▋ | 171/425 [01:53<08:12, 1.94s/it]INFO:notebook:Processing chunk\n", + "Processing grid elements: 40%|█████████████████████████████████████████████████████ | 172/425 [01:54<07:48, 1.85s/it]INFO:notebook:Processing chunk\n", + "Processing grid elements: 41%|█████████████████████████████████████████████████████▎ | 173/425 [01:57<09:01, 2.15s/it]INFO:notebook:Processing chunk\n", + "Processing grid elements: 41%|█████████████████████████████████████████████████████▋ | 174/425 [02:01<10:54, 2.61s/it]INFO:notebook:Processing chunk\n", + "Processing grid elements: 41%|█████████████████████████████████████████████████████▉ | 175/425 [02:04<10:57, 2.63s/it]INFO:notebook:Processing chunk\n", + "Processing grid elements: 41%|██████████████████████████████████████████████████████▏ | 176/425 [02:07<12:20, 2.97s/it]INFO:notebook:Processing chunk\n", + "Processing grid elements: 42%|██████████████████████████████████████████████████████▌ | 177/425 [02:11<13:48, 3.34s/it]INFO:notebook:Processing chunk\n", + "Processing grid elements: 42%|██████████████████████████████████████████████████████▊ | 178/425 [02:12<10:26, 2.53s/it]INFO:notebook:Processing chunk\n", + "Processing grid elements: 42%|███████████████████████████████████████████████████████▏ | 179/425 [02:16<11:29, 2.80s/it]INFO:notebook:Processing chunk\n", + "Processing grid elements: 42%|███████████████████████████████████████████████████████▍ | 180/425 [02:17<09:44, 2.38s/it]INFO:notebook:Processing chunk\n", + "Processing grid elements: 43%|███████████████████████████████████████████████████████▊ | 181/425 [02:21<11:25, 2.81s/it]INFO:notebook:Processing chunk\n", + "Processing grid elements: 43%|████████████████████████████████████████████████████████ | 182/425 [02:22<09:44, 2.40s/it]INFO:notebook:Processing chunk\n", + "Processing grid elements: 43%|████████████████████████████████████████████████████████▍ | 183/425 [02:22<06:55, 1.72s/it]INFO:notebook:Processing chunk\n", + "Processing grid elements: 43%|████████████████████████████████████████████████████████▋ | 184/425 [02:23<05:05, 1.27s/it]INFO:notebook:Processing chunk\n", + "Processing grid elements: 44%|█████████████████████████████████████████████████████████ | 185/425 [02:24<05:29, 1.37s/it]INFO:notebook:Processing chunk\n", + "Processing grid elements: 44%|█████████████████████████████████████████████████████████▎ | 186/425 [02:26<06:15, 1.57s/it]INFO:notebook:Processing chunk\n", + "Processing grid elements: 44%|█████████████████████████████████████████████████████████▋ | 187/425 [02:28<06:31, 1.64s/it]INFO:notebook:Processing chunk\n", + "Processing grid elements: 44%|█████████████████████████████████████████████████████████▉ | 188/425 [02:29<05:58, 1.51s/it]INFO:notebook:Processing chunk\n", + "Processing grid elements: 44%|██████████████████████████████████████████████████████████▎ | 189/425 [02:31<05:48, 1.48s/it]INFO:notebook:Processing chunk\n", + "Processing grid elements: 45%|██████████████████████████████████████████████████████████▌ | 190/425 [02:32<05:23, 1.37s/it]INFO:notebook:Processing chunk\n", + "Processing grid elements: 45%|██████████████████████████████████████████████████████████▊ | 191/425 [02:32<04:01, 1.03s/it]INFO:notebook:Processing chunk\n", + "Processing grid elements: 45%|███████████████████████████████████████████████████████████▏ | 192/425 [02:33<04:21, 1.12s/it]INFO:notebook:Processing chunk\n", + "Processing grid elements: 45%|███████████████████████████████████████████████████████████▍ | 193/425 [02:34<04:16, 1.10s/it]INFO:notebook:Processing chunk\n", + "Processing grid elements: 46%|███████████████████████████████████████████████████████████▊ | 194/425 [02:36<04:23, 1.14s/it]INFO:notebook:Processing chunk\n", + "Processing grid elements: 46%|████████████████████████████████████████████████████████████ | 195/425 [02:36<03:32, 1.08it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 46%|████████████████████████████████████████████████████████████▍ | 196/425 [02:36<02:49, 1.35it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 46%|████████████████████████████████████████████████████████████▋ | 197/425 [02:37<02:10, 1.74it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 47%|█████████████████████████████████████████████████████████████ | 198/425 [02:37<02:03, 1.84it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 47%|█████████████████████████████████████████████████████████████▎ | 199/425 [02:38<02:19, 1.62it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 47%|█████████████████████████████████████████████████████████████▉ | 201/425 [02:39<01:55, 1.94it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 48%|██████████████████████████████████████████████████████████████▌ | 203/425 [02:39<01:19, 2.80it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 48%|██████████████████████████████████████████████████████████████▉ | 204/425 [02:40<01:54, 1.92it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 48%|███████████████████████████████████████████████████████████████▏ | 205/425 [02:43<04:15, 1.16s/it]INFO:notebook:Processing chunk\n", + "Processing grid elements: 48%|███████████████████████████████████████████████████████████████▍ | 206/425 [02:44<04:00, 1.10s/it]INFO:notebook:Processing chunk\n", + "Processing grid elements: 49%|███████████████████████████████████████████████████████████████▊ | 207/425 [02:45<04:06, 1.13s/it]INFO:notebook:Processing chunk\n", + "Processing grid elements: 49%|████████████████████████████████████████████████████████████████ | 208/425 [02:46<03:31, 1.03it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 49%|████████████████████████████████████████████████████████████████▍ | 209/425 [02:46<02:38, 1.36it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 49%|████████████████████████████████████████████████████████████████▋ | 210/425 [02:47<02:37, 1.36it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 50%|█████████████████████████████████████████████████████████████████ | 211/425 [02:47<02:25, 1.47it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 50%|█████████████████████████████████████████████████████████████████▎ | 212/425 [02:48<02:14, 1.58it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 50%|█████████████████████████████████████████████████████████████████▋ | 213/425 [02:48<02:04, 1.70it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 50%|█████████████████████████████████████████████████████████████████▉ | 214/425 [02:49<01:47, 1.97it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 51%|██████████████████████████████████████████████████████████████████▎ | 215/425 [02:49<01:28, 2.37it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 51%|██████████████████████████████████████████████████████████████████▌ | 216/425 [02:49<01:37, 2.14it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 51%|██████████████████████████████████████████████████████████████████▉ | 217/425 [02:52<04:17, 1.24s/it]INFO:notebook:Processing chunk\n", + "Processing grid elements: 51%|███████████████████████████████████████████████████████████████████▏ | 218/425 [02:53<03:27, 1.00s/it]INFO:notebook:Processing chunk\n", + "Processing grid elements: 52%|███████████████████████████████████████████████████████████████████▌ | 219/425 [02:53<03:04, 1.12it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 52%|████████████████████████████████████████████████████████████████████ | 221/425 [02:54<01:46, 1.91it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 52%|████████████████████████████████████████████████████████████████████▍ | 222/425 [02:54<01:34, 2.16it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 53%|█████████████████████████████████████████████████████████████████████ | 224/425 [02:56<02:04, 1.62it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 53%|█████████████████████████████████████████████████████████████████████▎ | 225/425 [02:56<02:03, 1.62it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 53%|█████████████████████████████████████████████████████████████████████▋ | 226/425 [02:57<02:15, 1.47it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 53%|█████████████████████████████████████████████████████████████████████▉ | 227/425 [02:57<01:59, 1.66it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 54%|██████████████████████████████████████████████████████████████████████▌ | 229/425 [02:58<01:30, 2.17it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 54%|██████████████████████████████████████████████████████████████████████▉ | 230/425 [02:59<01:57, 1.66it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 54%|███████████████████████████████████████████████████████████████████████▏ | 231/425 [03:00<01:56, 1.67it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 55%|███████████████████████████████████████████████████████████████████████▌ | 232/425 [03:00<01:32, 2.09it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 55%|███████████████████████████████████████████████████████████████████████▊ | 233/425 [03:00<01:17, 2.47it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 55%|████████████████████████████████████████████████████████████████████████▏ | 234/425 [03:00<01:04, 2.95it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 55%|████████████████████████████████████████████████████████████████████████▍ | 235/425 [03:00<01:00, 3.14it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 56%|████████████████████████████████████████████████████████████████████████▋ | 236/425 [03:01<00:59, 3.16it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 56%|█████████████████████████████████████████████████████████████████████████ | 237/425 [03:01<01:16, 2.46it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 56%|█████████████████████████████████████████████████████████████████████████▎ | 238/425 [03:02<01:04, 2.88it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 56%|█████████████████████████████████████████████████████████████████████████▋ | 239/425 [03:02<01:05, 2.84it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 56%|█████████████████████████████████████████████████████████████████████████▉ | 240/425 [03:03<01:31, 2.02it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 57%|██████████████████████████████████████████████████████████████████████████▎ | 241/425 [03:03<01:28, 2.09it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 57%|██████████████████████████████████████████████████████████████████████████▌ | 242/425 [03:04<02:05, 1.46it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 57%|██████████████████████████████████████████████████████████████████████████▉ | 243/425 [03:05<01:38, 1.84it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 58%|███████████████████████████████████████████████████████████████████████████▌ | 245/425 [03:05<01:17, 2.33it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 58%|███████████████████████████████████████████████████████████████████████████▊ | 246/425 [03:06<01:26, 2.06it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 58%|████████████████████████████████████████████████████████████████████████████▏ | 247/425 [03:06<01:11, 2.49it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 58%|████████████████████████████████████████████████████████████████████████████▍ | 248/425 [03:06<01:02, 2.82it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 59%|████████████████████████████████████████████████████████████████████████████▊ | 249/425 [03:06<00:50, 3.49it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 59%|█████████████████████████████████████████████████████████████████████████████ | 250/425 [03:07<01:08, 2.55it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 59%|█████████████████████████████████████████████████████████████████████████████▎ | 251/425 [03:07<01:05, 2.66it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 59%|█████████████████████████████████████████████████████████████████████████████▋ | 252/425 [03:08<00:56, 3.09it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 60%|█████████████████████████████████████████████████████████████████████████████▉ | 253/425 [03:08<01:08, 2.51it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 60%|██████████████████████████████████████████████████████████████████████████████▎ | 254/425 [03:08<00:57, 2.99it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 60%|██████████████████████████████████████████████████████████████████████████████▌ | 255/425 [03:09<01:09, 2.46it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 60%|██████████████████████████████████████████████████████████████████████████████▉ | 256/425 [03:09<01:07, 2.50it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 60%|███████████████████████████████████████████████████████████████████████████████▏ | 257/425 [03:09<00:54, 3.11it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 61%|███████████████████████████████████████████████████████████████████████████████▌ | 258/425 [03:10<00:46, 3.59it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 61%|████████████████████████████████████████████████████████████████████████████████▏ | 260/425 [03:10<00:34, 4.81it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 61%|████████████████████████████████████████████████████████████████████████████████▍ | 261/425 [03:10<00:38, 4.27it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 62%|█████████████████████████████████████████████████████████████████████████████████ | 263/425 [03:11<00:52, 3.08it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 62%|█████████████████████████████████████████████████████████████████████████████████▎ | 264/425 [03:11<00:51, 3.11it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 62%|█████████████████████████████████████████████████████████████████████████████████▋ | 265/425 [03:11<00:45, 3.51it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 63%|█████████████████████████████████████████████████████████████████████████████████▉ | 266/425 [03:12<00:41, 3.86it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 63%|██████████████████████████████████████████████████████████████████████████████████▎ | 267/425 [03:13<01:18, 2.01it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 63%|██████████████████████████████████████████████████████████████████████████████████▌ | 268/425 [03:13<01:26, 1.81it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 63%|██████████████████████████████████████████████████████████████████████████████████▉ | 269/425 [03:14<01:31, 1.70it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 64%|███████████████████████████████████████████████████████████████████████████████████▏ | 270/425 [03:14<01:17, 1.99it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 64%|███████████████████████████████████████████████████████████████████████████████████▌ | 271/425 [03:14<01:10, 2.18it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 64%|███████████████████████████████████████████████████████████████████████████████████▊ | 272/425 [03:15<01:30, 1.70it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 64%|████████████████████████████████████████████████████████████████████████████████████▍ | 274/425 [03:16<01:09, 2.17it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 65%|████████████████████████████████████████████████████████████████████████████████████▊ | 275/425 [03:16<01:08, 2.20it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 65%|█████████████████████████████████████████████████████████████████████████████████████ | 276/425 [03:17<01:15, 1.97it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 65%|█████████████████████████████████████████████████████████████████████████████████████▋ | 278/425 [03:17<01:01, 2.39it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 66%|█████████████████████████████████████████████████████████████████████████████████████▉ | 279/425 [03:18<00:55, 2.62it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 66%|██████████████████████████████████████████████████████████████████████████████████████▎ | 280/425 [03:18<00:54, 2.67it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 66%|██████████████████████████████████████████████████████████████████████████████████████▌ | 281/425 [03:18<00:51, 2.80it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 66%|██████████████████████████████████████████████████████████████████████████████████████▉ | 282/425 [03:19<00:46, 3.04it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 67%|███████████████████████████████████████████████████████████████████████████████████████▏ | 283/425 [03:19<01:01, 2.33it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 67%|███████████████████████████████████████████████████████████████████████████████████████▌ | 284/425 [03:20<00:55, 2.52it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 67%|████████████████████████████████████████████████████████████████████████████████████████▏ | 286/425 [03:20<00:31, 4.35it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 68%|████████████████████████████████████████████████████████████████████████████████████████▍ | 287/425 [03:21<01:06, 2.08it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 68%|█████████████████████████████████████████████████████████████████████████████████████████ | 289/425 [03:21<01:02, 2.19it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 68%|█████████████████████████████████████████████████████████████████████████████████████████▍ | 290/425 [03:21<00:53, 2.53it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 68%|█████████████████████████████████████████████████████████████████████████████████████████▋ | 291/425 [03:22<01:10, 1.90it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 69%|██████████████████████████████████████████████████████████████████████████████████████████ | 292/425 [03:23<01:02, 2.13it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 69%|██████████████████████████████████████████████████████████████████████████████████████████▌ | 294/425 [03:23<00:34, 3.80it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 69%|██████████████████████████████████████████████████████████████████████████████████████████▉ | 295/425 [03:23<00:31, 4.07it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 70%|███████████████████████████████████████████████████████████████████████████████████████████▏ | 296/425 [03:24<00:46, 2.78it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 70%|███████████████████████████████████████████████████████████████████████████████████████████▌ | 297/425 [03:24<00:45, 2.81it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 70%|███████████████████████████████████████████████████████████████████████████████████████████▊ | 298/425 [03:24<00:44, 2.82it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 70%|████████████████████████████████████████████████████████████████████████████████████████████▏ | 299/425 [03:26<01:33, 1.34it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 71%|████████████████████████████████████████████████████████████████████████████████████████████▍ | 300/425 [03:26<01:16, 1.64it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 71%|████████████████████████████████████████████████████████████████████████████████████████████▊ | 301/425 [03:26<00:50, 2.45it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 71%|█████████████████████████████████████████████████████████████████████████████████████████████ | 302/425 [03:27<00:48, 2.53it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 71%|█████████████████████████████████████████████████████████████████████████████████████████████▍ | 303/425 [03:27<00:42, 2.87it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 72%|█████████████████████████████████████████████████████████████████████████████████████████████▋ | 304/425 [03:27<00:41, 2.91it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 72%|██████████████████████████████████████████████████████████████████████████████████████████████ | 305/425 [03:28<00:46, 2.60it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 72%|██████████████████████████████████████████████████████████████████████████████████████████████▎ | 306/425 [03:28<00:54, 2.20it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 72%|██████████████████████████████████████████████████████████████████████████████████████████████▋ | 307/425 [03:29<00:44, 2.64it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 72%|██████████████████████████████████████████████████████████████████████████████████████████████▉ | 308/425 [03:29<00:46, 2.53it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 73%|███████████████████████████████████████████████████████████████████████████████████████████████▏ | 309/425 [03:30<01:14, 1.56it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 73%|███████████████████████████████████████████████████████████████████████████████████████████████▌ | 310/425 [03:31<01:09, 1.66it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 73%|████████████████████████████████████████████████████████████████████████████████████████████████▏ | 312/425 [03:31<00:36, 3.12it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 74%|████████████████████████████████████████████████████████████████████████████████████████████████▊ | 314/425 [03:32<00:40, 2.76it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 74%|█████████████████████████████████████████████████████████████████████████████████████████████████▍ | 316/425 [03:32<00:27, 3.97it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 75%|██████████████████████████████████████████████████████████████████████████████████████████████████ | 318/425 [03:32<00:25, 4.15it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 75%|██████████████████████████████████████████████████████████████████████████████████████████████████▎ | 319/425 [03:33<00:26, 4.06it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 75%|██████████████████████████████████████████████████████████████████████████████████████████████████▋ | 320/425 [03:33<00:22, 4.77it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 76%|██████████████████████████████████████████████████████████████████████████████████████████████████▉ | 321/425 [03:33<00:25, 4.07it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 76%|███████████████████████████████████████████████████████████████████████████████████████████████████▎ | 322/425 [03:34<00:41, 2.45it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 76%|███████████████████████████████████████████████████████████████████████████████████████████████████▌ | 323/425 [03:34<00:44, 2.27it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 77%|████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 326/425 [03:35<00:38, 2.58it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 77%|████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 327/425 [03:36<00:40, 2.42it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 77%|█████████████████████████████████████████████████████████████████████████████████████████████████████ | 328/425 [03:36<00:36, 2.68it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 77%|█████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 329/425 [03:36<00:30, 3.11it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 78%|█████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 330/425 [03:37<00:46, 2.02it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 78%|██████████████████████████████████████████████████████████████████████████████████████████████████████ | 331/425 [03:38<00:44, 2.12it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 78%|██████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 332/425 [03:38<00:40, 2.29it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 78%|██████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 333/425 [03:38<00:34, 2.65it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 79%|██████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 334/425 [03:39<00:31, 2.85it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 79%|███████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 336/425 [03:39<00:35, 2.48it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 79%|███████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 337/425 [03:40<00:31, 2.76it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 80%|████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 338/425 [03:40<00:41, 2.11it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 80%|████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 339/425 [03:41<00:34, 2.47it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 80%|████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 340/425 [03:41<00:33, 2.52it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 80%|█████████████████████████████████████████████████████████████████████████████████████████████████████████ | 341/425 [03:42<00:43, 1.94it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 80%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 342/425 [03:43<00:50, 1.64it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 81%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 343/425 [03:43<00:44, 1.86it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 81%|██████████████████████████████████████████████████████████████████████████████████████████████████████████ | 344/425 [03:44<00:51, 1.58it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 81%|██████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 345/425 [03:44<00:51, 1.56it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 82%|███████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 348/425 [03:45<00:14, 5.38it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 82%|███████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 349/425 [03:45<00:14, 5.38it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 83%|████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 351/425 [03:45<00:17, 4.19it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 83%|████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 353/425 [03:46<00:18, 3.90it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 83%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 354/425 [03:46<00:20, 3.45it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 84%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 356/425 [03:47<00:21, 3.28it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 84%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 357/425 [03:47<00:27, 2.47it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 84%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 358/425 [03:48<00:30, 2.19it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 84%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 359/425 [03:48<00:27, 2.38it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 85%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 360/425 [03:49<00:31, 2.09it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 85%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 362/425 [03:49<00:17, 3.55it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 85%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 363/425 [03:50<00:23, 2.63it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 365/425 [03:50<00:15, 3.84it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 366/425 [03:51<00:17, 3.34it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 86%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 367/425 [03:51<00:16, 3.52it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 87%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 369/425 [03:51<00:15, 3.59it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 87%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 370/425 [03:51<00:13, 4.07it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 87%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 371/425 [03:52<00:20, 2.66it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 88%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 372/425 [03:52<00:22, 2.32it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 88%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 373/425 [03:53<00:25, 2.07it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 374/425 [03:54<00:27, 1.85it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 376/425 [03:54<00:15, 3.25it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 89%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 379/425 [03:56<00:26, 1.73it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 89%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 380/425 [03:56<00:24, 1.86it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 90%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 381/425 [03:56<00:22, 1.97it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 90%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 382/425 [03:56<00:18, 2.37it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 90%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 383/425 [03:57<00:18, 2.29it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 91%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 385/425 [03:57<00:13, 2.98it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 91%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 386/425 [03:58<00:15, 2.44it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 91%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 387/425 [03:59<00:19, 1.99it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 91%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 388/425 [03:59<00:14, 2.48it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 92%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 389/425 [03:59<00:11, 3.13it/s]INFO:notebook:Processing chunk\n", + "INFO:notebook:Processing chunk\n", + "Processing grid elements: 92%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 391/425 [03:59<00:07, 4.48it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 92%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 392/425 [03:59<00:06, 5.04it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 92%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 393/425 [03:59<00:06, 5.08it/s]INFO:notebook:Processing chunk\n", + "Processing grid elements: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 425/425 [19:35<00:00, 208.44s/it]" ] } ], @@ -589,7 +1289,7 @@ }, { "cell_type": "code", - "execution_count": 57, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -614,69 +1314,40 @@ " \n", " \n", " year\n", + " PA_DEF\n", " iso_3\n", " area\n", - " protected_areas_count\n", - " oecms\n", - " pas\n", - " total_marine_area\n", - " protected_area\n", - " coverage\n", - " global_contribution\n", - " is_last_year\n", - " environment\n", + " protectedAreasCount\n", " \n", " \n", " \n", " \n", " 0\n", " 2010\n", - " ABNJ\n", - " 996236.125498\n", - " 29.0\n", - " 0.00000\n", - " 100.00000\n", - " 212881389.0\n", - " 996236.13\n", - " 0.467977\n", - " 0.275966\n", - " False\n", - " marine\n", + " 0\n", + " AF\n", + " 206.100207\n", + " 10.0\n", " \n", " \n", " 1\n", " 2010\n", - " AF\n", - " 129790.939474\n", - " 427.0\n", - " 2.34192\n", - " 97.65808\n", - " 14878058.0\n", - " 129790.94\n", - " 0.872365\n", - " 0.035953\n", - " False\n", - " marine\n", + " 0\n", + " AS\n", + " 31956.310701\n", + " 24.0\n", " \n", " \n", "\n", "" ], "text/plain": [ - " year iso_3 area protected_areas_count oecms pas \\\n", - "0 2010 ABNJ 996236.125498 29.0 0.00000 100.00000 \n", - "1 2010 AF 129790.939474 427.0 2.34192 97.65808 \n", - "\n", - " total_marine_area protected_area coverage global_contribution \\\n", - "0 212881389.0 996236.13 0.467977 0.275966 \n", - "1 14878058.0 129790.94 0.872365 0.035953 \n", - "\n", - " is_last_year environment \n", - "0 False marine \n", - "1 False marine " + " year PA_DEF iso_3 area protectedAreasCount\n", + "0 2010 0 AF 206.100207 10.0\n", + "1 2010 0 AS 31956.310701 24.0" ] }, - "execution_count": 57, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -702,31 +1373,67 @@ " }\n", " )\n", " .pipe(calculate_stats_cov, [\"year\", \"PA_DEF\"], \"iso_3\").astype({\"PA_DEF\": int})\n", - " .pipe(add_pa_oecm_percentages)\n", - " .pipe(add_total_marine_area)\n", - " .pipe(coverage_stats2)\n", - " .pipe(calculate_coverage_percentage_mpa)\n", - " .pipe(calculate_global_contribution)\n", - " .pipe(add_is_last_year)\n", - " .pipe(add_environment)\n", - ")\n", + "# .pipe(add_mpa_oecm_percentages)\n", + "# .pipe(add_total_marine_area)\n", + "# .pipe(coverage_stats2)\n", + "# .pipe(calculate_coverage_percentage_pa)\n", + "# .pipe(calculate_global_contribution)\n", + "# .pipe(add_is_last_year)\n", + "# .pipe(add_environment)\n", + "# )\n", "\n", "\n", - "NewProtectedAreaExtentSchema(\n", - " coverage.pipe(\n", - " output,\n", - " \"iso_3\",\n", - " {},\n", - " {},\n", - " [\"area\", \"iso_3\", 'total_marine_area'],\n", - " )\n", - ").to_csv(\n", - " output_file,\n", - " index=True,\n", + "# NewProtectedAreaExtentSchema(\n", + "# coverage.pipe(\n", + "# output,\n", + "# \"iso_3\",\n", + "# {},\n", + "# {},\n", + "# [\"area\", \"iso_3\", 'total_marine_area'],\n", + "# )\n", + "# ).to_csv(\n", + "# output_file,\n", + "# index=True,\n", ")\n", "coverage.head(2)" ] }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['AF', 'AS', 'EU', 'SA', 'NA', 'WA', 'COL', 'ESP', 'GLOB', 'MAR',\n", + " 'PHL', 'ABNJ', 'AGO', 'ALB', 'ARE', 'ARG', 'ATG', 'AUS', 'AZE',\n", + " 'BEL', 'BGD', 'BGR', 'BHS', 'BLZ', 'BRA', 'BRB', 'BRN', 'CAN',\n", + " 'CHL', 'CHN', 'CMR', 'COD', 'COG', 'COM', 'CPV', 'CRI', 'CUB',\n", + " 'CYP', 'DEU', 'DMA', 'DNK', 'DOM', 'DZA', 'ECU', 'EGY', 'EST',\n", + " 'FIN', 'FJI', 'FRA', 'GAB', 'GBR', 'GEO', 'GIN', 'GMB', 'GNB',\n", + " 'GNQ', 'GRC', 'GRD', 'GTM', 'HND', 'HRV', 'HTI', 'IDN', 'IRL',\n", + " 'IRN', 'ISL', 'ISR', 'ITA', 'JAM', 'JPN', 'KAZ', 'KEN', 'KHM',\n", + " 'KIR', 'KNA', 'KOR', 'KWT', 'LBN', 'LBR', 'LCA', 'LKA', 'LTU',\n", + " 'LVA', 'MCO', 'MDG', 'MDV', 'MEX', 'MHL', 'MLT', 'MMR', 'MNE',\n", + " 'MOZ', 'MRT', 'MUS', 'MYS', 'NAM', 'NGA', 'NIC', 'NLD', 'NOR',\n", + " 'NZL', 'OMN', 'PAK', 'PAN', 'PER', 'PLW', 'PNG', 'POL', 'PRT',\n", + " 'QAT', 'ROU', 'RUS', 'SAU', 'SDN', 'SEN', 'SLB', 'SLE', 'SLV',\n", + " 'STP', 'SUR', 'SVN', 'SWE', 'SYC', 'THA', 'TKM', 'TLS', 'TON',\n", + " 'TTO', 'TUN', 'TUR', 'TUV', 'TZA', 'UKR', 'URY', 'USA', 'VCT',\n", + " 'VEN', 'VNM', 'VUT', 'WSM', 'YEM', 'ZAF', 'BHR', 'JOR'],\n", + " dtype=object)" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "coverage.iso_3.unique()" + ] + }, { "cell_type": "code", "execution_count": 72, @@ -2315,7 +3022,7 @@ }, { "cell_type": "code", - "execution_count": 97, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -2532,7 +3239,7 @@ }, { "cell_type": "code", - "execution_count": 261, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -2551,7 +3258,7 @@ "PosixPath('/home/sofia/dev/skytruth-30x30/data/data/gadm/processed/preprocess')" ] }, - "execution_count": 261, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -2572,7 +3279,7 @@ }, { "cell_type": "code", - "execution_count": 262, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -2583,7 +3290,18 @@ }, { "cell_type": "code", - "execution_count": 263, + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "iucn_cat = pd.read_csv(\n", + " pipe_dir.get_processed_step_path(current_step).joinpath(\"iucn_categories.csv\"), index_col=0\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -2598,7 +3316,7 @@ " columns={\n", " \"parent_iso\": \"iso\",\n", " \"status_yr\": \"year\",\n", - " \"gis_area\": \"area_km2\",\n", + " \"gis_area\": \"protected_area\",\n", " }\n", " ).drop(columns=['status'])\n", " ).assign(source=\"protected_planet\"),\n", @@ -2625,9 +3343,18 @@ }, { "cell_type": "code", - "execution_count": 264, + "execution_count": 25, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_3510708/3364924951.py:202: FutureWarning: Downcasting behavior in `replace` is deprecated and will be removed in a future version. To retain the old behavior, explicitly call `result.infer_objects(copy=False)`. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`\n", + " df.replace(rep_d)\n" + ] + } + ], "source": [ "tpa_table = (\n", " init_table.pipe(add_bbox, \"bbox\")\n", @@ -2654,7 +3381,7 @@ " },\n", " rename={\n", " \"pa_def\": \"protection_status\",\n", - " \"area_km2\": \"area\",\n", + " \"protected_area\": \"area\",\n", " \"iucn_cat\": \"pa_iucn_category\",\n", " \"desig_eng\": \"designation\",\n", " \"source\": \"data_source\",\n", @@ -2676,7 +3403,7 @@ }, { "cell_type": "code", - "execution_count": 265, + "execution_count": 26, "metadata": {}, "outputs": [], "source": [ @@ -2687,7 +3414,7 @@ }, { "cell_type": "code", - "execution_count": 266, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -2697,7 +3424,176 @@ }, { "cell_type": "code", - "execution_count": 274, + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# batch_export(\n", + "# mpa_table[mpa_table.area.notna()],\n", + "# 5000,\n", + "# PAsSchema,\n", + "# pipe_dir.get_processed_step_path(current_step),\n", + "# \"mpa_detail\",\n", + "# format=\"json\",\n", + "# strapi_colection=strapi_collection_mpas,\n", + "# )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# # This code is to be able to identify groups that has wdpa_pid so in the future if needed we could combine the group geometries to generate a wdpa coverage geometry\n", + "# init_table[\n", + "# (\n", + "# init_table.sort_values(by=[\"wdpaid\", \"source\"], ascending=[True, False])\n", + "# .groupby(\"wdpaid\")\n", + "# .transform(\"size\")\n", + "# .gt(1)\n", + "# )\n", + "# & (init_table.wdpa_pid.str.extract(r\"([A-Za-z]+)\", expand=False).notna())\n", + "# ].groupby(\"wdpaid\")\n", + "# .geometry.apply(lambda x: x.union_all())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# strapi.deleteCollectionData(\"mpa\", list(range(1, 20914)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# for i in range(0, 4):\n", + "# strapi.importCollectionData(\n", + "# strapi_collection_mpas,\n", + "# mpa_folder.joinpath(f\"mpa_detail_{i}.csv\"),\n", + "# )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### PA coverage - terrestrial" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/mpa-terrestrial_preprocess.zip\n", + "/home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/preprocess\n", + "/home/sofia/dev/skytruth-30x30/data/data/gadm/processed/gadm_preprocess.zip\n", + "/home/sofia/dev/skytruth-30x30/data/data/gadm/processed/preprocess\n" + ] + }, + { + "data": { + "text/plain": [ + "PosixPath('/home/sofia/dev/skytruth-30x30/data/data/gadm/processed/preprocess')" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pipe = \"mpa-terrestrial\"\n", + "step = \"preprocess\"\n", + "strapi_collection_mpas = \"mpa-terrestrial\"\n", + "\n", + "pipe_dir = FileConventionHandler(pipe)\n", + "pipe_dir_gadm = FileConventionHandler(\"gadm\")\n", + "\n", + "working_folder = FileConventionHandler(pipe)\n", + "input_path = working_folder.pipe_raw_path\n", + "temp_working_path = working_folder.get_temp_file_path(step)\n", + "output_file_sjoin = pipe_dir.get_processed_step_path(current_step).joinpath(\"tpa_sjoin.shp\")\n", + "output_file_dissolve = pipe_dir.get_processed_step_path(current_step).joinpath(\"tpa_dissolve.csv\")\n", + "output_file_tpas = pipe_dir.get_processed_step_path(current_step).joinpath(\"tpa_detail.csv\")\n", + "\n", + "# Download the protected atlas file && unzip it\n", + "download_and_unzip_if_needed(pipe_dir, prev_step, mysettings)\n", + "# Download the mpaatlas file \n", + "download_and_unzip_if_needed(pipe_dir_gadm, prev_step, mysettings)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# # Load the data\n", + "# wdpa = gpd.read_file(pipe_dir.get_step_fmt_file_path(prev_step, \"gpkg\")).pipe(\n", + "# clean_geometries\n", + "# )\n", + "# gadm = gpd.read_file(pipe_dir_gadm.get_step_fmt_file_path(prev_step, \"shp\")).pipe(clean_geometries)\n", + "\n", + "# gadm.sindex\n", + "# wdpa.sindex" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# # Spatial join using overlay\n", + "# wdpa_subset = wdpa[\n", + "# ~(\n", + "# (wdpa.bounds.minx < -181)\n", + "# | (wdpa.bounds.miny < -91)\n", + "# | (wdpa.bounds.maxx > 181)\n", + "# | (wdpa.bounds.maxy > 91)\n", + "# )\n", + "# ].reset_index(drop=True)\n", + "\n", + "# sjoin_gdf = await spatial_join(wdpa_subset, gadm)\n", + "# sjoin_gdf.rename(columns={\"GID_0\": \"iso_3\"}, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -2721,230 +3617,248 @@ " \n", " \n", " \n", - " wdpaid\n", - " wdpa_pid\n", - " protection_status\n", - " name\n", - " designation\n", - " pa_iucn_category\n", - " year\n", - " area\n", - " data_source\n", - " mpaa_establishment_stage\n", - " mpaa_protection_level\n", - " bbox\n", - " is_child\n", - " child_id\n", - " coverage\n", - " environment\n", - " location\n", - " children\n", - " \n", - " \n", - " id\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " WDPAID\n", + " WDPA_PID\n", + " PA_DEF\n", + " NAME\n", + " DESIG_ENG\n", + " IUCN_CAT\n", + " MARINE\n", + " GIS_AREA\n", + " STATUS\n", + " STATUS_YR\n", + " PARENT_ISO\n", + " COUNTRY\n", + " iso_3\n", + " area_km2\n", + " geometry\n", " \n", " \n", " \n", - " \n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " Diamond Reef and Salt Fish Tail Reef\n", - " Marine Reserve\n", - " 1\n", - " 1973\n", - " 14.636135\n", - " 3\n", - " NaN\n", - " NaN\n", - " [-61.88691617799998, 17.184972703000028, -61.8...\n", - " False\n", - " 1\n", - " 0.013119\n", - " marine\n", - " 15.0\n", - " NaN\n", - " \n", - " \n", - " 2\n", - " 2\n", - " 2\n", - " 1\n", - " Palaster Reef\n", - " Marine Reserve\n", - " 1\n", - " 1973\n", - " 3.845623\n", - " 3\n", - " NaN\n", - " NaN\n", - " [-61.771742115999984, 17.520006550999994, -61....\n", - " False\n", - " 2\n", - " 0.003447\n", - " marine\n", - " 15.0\n", - " NaN\n", - " \n", " \n", "\n", "" ], "text/plain": [ - " wdpaid wdpa_pid protection_status name \\\n", - "id \n", - "1 1 1 1 Diamond Reef and Salt Fish Tail Reef \n", - "2 2 2 1 Palaster Reef \n", - "\n", - " designation pa_iucn_category year area data_source \\\n", - "id \n", - "1 Marine Reserve 1 1973 14.636135 3 \n", - "2 Marine Reserve 1 1973 3.845623 3 \n", - "\n", - " mpaa_establishment_stage mpaa_protection_level \\\n", - "id \n", - "1 NaN NaN \n", - "2 NaN NaN \n", - "\n", - " bbox is_child child_id \\\n", - "id \n", - "1 [-61.88691617799998, 17.184972703000028, -61.8... False 1 \n", - "2 [-61.771742115999984, 17.520006550999994, -61.... False 2 \n", - "\n", - " coverage environment location children \n", - "id \n", - "1 0.013119 marine 15.0 NaN \n", - "2 0.003447 marine 15.0 NaN " + "Empty GeoDataFrame\n", + "Columns: [WDPAID, WDPA_PID, PA_DEF, NAME, DESIG_ENG, IUCN_CAT, MARINE, GIS_AREA, STATUS, STATUS_YR, PARENT_ISO, COUNTRY, iso_3, area_km2, geometry]\n", + "Index: []" ] }, - "execution_count": 274, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# Create final table with all the data\n", - "final_table = pd.concat([mpa_table, tpa_table])\n", - "final_table.index = range(1, len(final_table) + 1)\n", - "final_table.index.name = 'id'\n", - "final_table.head(2)" + "# # test that we have not produce duplicates\n", + "# sjoin_gdf.loc[sjoin_gdf.duplicated(subset=[\"WDPA_PID\", \"iso_3\"], keep=False)].sort_values(\n", + "# \"WDPA_PID\"\n", + "# )" ] }, { "cell_type": "code", - "execution_count": 281, + "execution_count": 12, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "289352" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "pipe_dir_pa = FileConventionHandler(\"pa\")\n", - "output_file_pas = pipe_dir_pa.get_processed_step_path(current_step).joinpath(\"pa_detail.csv\")\n", - "PAsSchema(final_table[final_table.location.notna()]).to_csv(output_file_pas, index=True)" + "# sjoin_gdf = filter_by_exluding_propossed_mpas(sjoin_gdf)\n", + "# len(sjoin_gdf)" ] }, { "cell_type": "code", - "execution_count": 283, + "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "DEBUG:google.auth.transport.requests:Making request: POST https://oauth2.googleapis.com/token\n" + "INFO:pyogrio._io:Created 289,352 records\n" ] } ], "source": [ - "remote_path = 'vizzuality_processed_data/strapi_tables/pa.csv'\n", - "\n", - "writeReadGCP(\n", - " credentials=mysettings.GCS_KEYFILE_JSON,\n", - " bucket_name=mysettings.GCS_BUCKET,\n", - " blob_name=remote_path,\n", - " file=output_file_pas,\n", - " operation=\"w\",\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# batch_export(\n", - "# mpa_table[mpa_table.area.notna()],\n", - "# 5000,\n", - "# PAsSchema,\n", - "# pipe_dir.get_processed_step_path(current_step),\n", - "# \"mpa_detail\",\n", - "# format=\"json\",\n", - "# strapi_colection=strapi_collection_mpas,\n", - "# )" + "# # Save the spatial join\n", + "# sjoin_gdf.to_file(output_file_sjoin, driver=\"ESRI Shapefile\")" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ - "# # This code is to be able to identify groups that has wdpa_pid so in the future if needed we could combine the group geometries to generate a wdpa coverage geometry\n", - "# init_table[\n", - "# (\n", - "# init_table.sort_values(by=[\"wdpaid\", \"source\"], ascending=[True, False])\n", - "# .groupby(\"wdpaid\")\n", - "# .transform(\"size\")\n", - "# .gt(1)\n", - "# )\n", - "# & (init_table.wdpa_pid.str.extract(r\"([A-Za-z]+)\", expand=False).notna())\n", - "# ].groupby(\"wdpaid\")\n", - "# .geometry.apply(lambda x: x.union_all())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### upload data to strapi" + "sjoin_gdf = gpd.read_file(output_file_sjoin)\n", + "sjoin_gdf[\"STATUS_YR\"] = sjoin_gdf[\"STATUS_YR\"].astype(\"Int64\")" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, "outputs": [ { "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PA_DEFiso_3year10protected_areas_count
0AFG2010100.010.0
1AFG2011100.010.0
2AFG2012100.010.0
3AFG2013100.010.0
4AFG2014100.010.0
..................
2884ZWE20202290.0229.0
2885ZWE20212290.0229.0
2886ZWE20222290.0229.0
2887ZWE20232290.0229.0
2888ZWE20242290.0229.0
\n", + "

2889 rows × 5 columns

\n", + "
" + ], "text/plain": [ - "" + "PA_DEF iso_3 year 1 0 protected_areas_count\n", + "0 AFG 2010 10 0.0 10.0\n", + "1 AFG 2011 10 0.0 10.0\n", + "2 AFG 2012 10 0.0 10.0\n", + "3 AFG 2013 10 0.0 10.0\n", + "4 AFG 2014 10 0.0 10.0\n", + "... ... ... ... ... ...\n", + "2884 ZWE 2020 229 0.0 229.0\n", + "2885 ZWE 2021 229 0.0 229.0\n", + "2886 ZWE 2022 229 0.0 229.0\n", + "2887 ZWE 2023 229 0.0 229.0\n", + "2888 ZWE 2024 229 0.0 229.0\n", + "\n", + "[2889 rows x 5 columns]" ] }, + "execution_count": 15, "metadata": {}, - "output_type": "display_data" + "output_type": "execute_result" } ], "source": [ - "# strapi.deleteCollectionData(\"mpa\", list(range(1, 20914)))" + "# # Calculate wdpa cumulative counts and pa and oecm percentages\n", + "cumulative_counts = cumulative_pa_def_counts(sjoin_gdf)\n", + "cumulative_counts" ] }, { @@ -2953,370 +3867,1074 @@ "metadata": {}, "outputs": [], "source": [ - "# for i in range(0, 4):\n", - "# strapi.importCollectionData(\n", - "# strapi_collection_mpas,\n", - "# mpa_folder.joinpath(f\"mpa_detail_{i}.csv\"),\n", - "# )" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### PA coverage" + "# # Dissolve geometries to calculate the coverage\n", + "# data = await process_grid(sjoin_gdf)" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 17, "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "/home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/mpa-terrestrial_preprocess.zip\n", - "/home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/preprocess\n", - "/home/sofia/dev/skytruth-30x30/data/data/gadm/processed/gadm_preprocess.zip\n", - "/home/sofia/dev/skytruth-30x30/data/data/gadm/processed/preprocess\n" - ] - }, + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
iso_3yearprotected_area
0ATA20171395.028044
1ATA20181395.028044
2ATA20191395.028044
3ATA20201395.028044
4ATA20211395.028044
\n", + "
" + ], + "text/plain": [ + " iso_3 year protected_area\n", + "0 ATA 2017 1395.028044\n", + "1 ATA 2018 1395.028044\n", + "2 ATA 2019 1395.028044\n", + "3 ATA 2020 1395.028044\n", + "4 ATA 2021 1395.028044" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# tpa = pd.concat(data, ignore_index=True).drop(columns=['STATUS_YR', 'index']).rename(columns={'area': 'protected_area'})\n", + "# tpa.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ { "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
iso_3yearprotected_area
0AFG20101078.918622
1AFG20111078.918622
2AFG20121078.918622
3AFG20131078.918622
4AFG20141078.918622
\n", + "
" + ], "text/plain": [ - "PosixPath('/home/sofia/dev/skytruth-30x30/data/data/gadm/processed/preprocess')" + " iso_3 year protected_area\n", + "0 AFG 2010 1078.918622\n", + "1 AFG 2011 1078.918622\n", + "2 AFG 2012 1078.918622\n", + "3 AFG 2013 1078.918622\n", + "4 AFG 2014 1078.918622" ] }, - "execution_count": 7, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "pipe = \"mpa-terrestrial\"\n", - "strapi_collection_mpas = \"mpa-terrestrial\"\n", - "\n", - "pipe_dir = FileConventionHandler(pipe)\n", - "pipe_dir_gadm = FileConventionHandler(\"gadm\")\n", - "output_file_tpas = pipe_dir.get_processed_step_path(current_step).joinpath(\"tpa_detail.csv\")\n", - "\n", - "# Download the protected atlas file && unzip it\n", - "download_and_unzip_if_needed(pipe_dir, prev_step, mysettings)\n", - "# Download the mpaatlas file \n", - "download_and_unzip_if_needed(pipe_dir_gadm, prev_step, mysettings)" + "# # Group by 'iso_3' and 'year' and sum the 'area'\n", + "# tpa_grouped = tpa.groupby(['iso_3', 'year'], as_index=False)['protected_area'].sum()\n", + "# tpa_grouped.reset_index(drop=True, inplace=True)\n", + "# tpa_grouped.head(5)" ] }, { "cell_type": "code", - "execution_count": 209, + "execution_count": 21, "metadata": {}, "outputs": [], "source": [ - "# Load the data\n", - "tpa_intermediate = gpd.read_file(pipe_dir.get_step_fmt_file_path(prev_step, \"gpkg\")).pipe(\n", - " clean_geometries\n", - ")\n", - "gadm = gpd.read_file(pipe_dir_gadm.get_step_fmt_file_path(prev_step, \"shp\")).pipe(clean_geometries)" + "# # save to csv\n", + "# tpa_grouped.to_csv(output_file_dissolve, index=False)" ] }, { "cell_type": "code", - "execution_count": 210, + "execution_count": 17, "metadata": {}, "outputs": [ { "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
iso_3yearprotected_area
0AFG20101078.918622
1AFG20111078.918622
2AFG20121078.918622
3AFG20131078.918622
4AFG20141078.918622
\n", + "
" + ], "text/plain": [ - "" + " iso_3 year protected_area\n", + "0 AFG 2010 1078.918622\n", + "1 AFG 2011 1078.918622\n", + "2 AFG 2012 1078.918622\n", + "3 AFG 2013 1078.918622\n", + "4 AFG 2014 1078.918622" ] }, - "execution_count": 210, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "gadm.sindex\n", - "tpa_intermediate.sindex" + "tpa_grouped = pd.read_csv(output_file_dissolve)\n", + "tpa_grouped.head(5)" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 63, "metadata": {}, - "outputs": [], - "source": [ - "import logging\n", - "from typing import Tuple, List\n", - "import sys\n", - "from pathlib import Path\n", - "import pandas as pd\n", - "import geopandas as gpd\n", - "import numpy as np\n", - "import asyncio\n", - "from tqdm.asyncio import tqdm\n", - "from itertools import product\n", - "from shapely.geometry import box\n", - "\n", - "\n", - "scripts_dir = Path(\".\").joinpath(\"src\")\n", - "if scripts_dir not in sys.path:\n", - " sys.path.insert(0, scripts_dir.resolve().as_posix())\n", - "\n", - "from pipelines.utils import background\n", - "from pipelines.processors import calculate_area, get_matches, repair_geometry, arrange_dimensions, clean_geometries, simplify_async\n", - "\n", - "logging.basicConfig(level=logging.DEBUG)\n", - "logging.getLogger(\"requests\").setLevel(logging.WARNING)\n", - "logging.getLogger(\"urllib3\").setLevel(logging.WARNING)\n", - "logging.getLogger(\"fiona\").setLevel(logging.WARNING)\n", - "logger = logging.getLogger(\"notebook\")\n", - "\n", - "\n", - "def split_by_year(\n", - " gdf: gpd.GeoDataFrame, year_col: str = \"STATUS_YR\", year_val: int = 2010\n", - ") -> List[gpd.GeoDataFrame]:\n", - " \"\"\"Split data by year. relevant for MPA data.(coverage indicator)\"\"\"\n", - " prior_2010 = (\n", - " gdf[gdf[year_col] <= year_val][[\"iso_3\", \"STATUS_YR\", \"geometry\"]]\n", - " .dissolve(\n", - " by=[\"iso_3\"],\n", - " )\n", - " .assign(year=2010)\n", - " .reset_index()\n", - " )\n", - "\n", - " after_2010 = (\n", - " gdf[gdf[\"STATUS_YR\"] > 2010][[\"iso_3\", \"STATUS_YR\", \"geometry\"]]\n", - " .rename(columns={\"STATUS_YR\": \"year\"})\n", - " )\n", - " return [prior_2010, after_2010]\n", - "\n", - "\n", - "def create_grid(bounds: Tuple[float, float, float, float], cell_size: int = 1) -> gpd.GeoDataFrame:\n", - " \"\"\"Create a grid of cells for a given GeoDataFrame\"\"\"\n", - " minx, miny, maxx, maxy = bounds\n", - " x = np.arange(minx, maxx, cell_size)\n", - " y = np.arange(miny, maxy, cell_size)\n", - " polygons = [\n", - " {\n", - " \"geometry\": box(i, j, i + cell_size, j + cell_size),\n", - " \"cell_id\": f\"{i}_{j}\",\n", - " }\n", - " for i, j in product(x, y)\n", - " ]\n", - " return gpd.GeoDataFrame(polygons)\n", - "\n", - "\n", - "def subdivide_grid(\n", - " grid_gdf: gpd.GeoDataFrame, gdf: gpd.GeoDataFrame, max_cellsize: float, max_complexity: int\n", - ") -> List:\n", - " subdivided_elements = []\n", - " for grid_element in grid_gdf.geometry:\n", - " candidates = get_matches(grid_element, gdf)\n", - " density = len(candidates)\n", - " if density > max_complexity:\n", - " \n", - " subdivision_cellsize = max_cellsize / 2\n", - " # Subdivide the grid element recursively\n", - " subgrid = create_grid(grid_element.bounds, subdivision_cellsize)\n", - " subdivided_elements.extend(\n", - " subdivide_grid(subgrid, gdf, subdivision_cellsize, max_complexity)\n", - " )\n", - " elif density > 0:\n", - " subdivided_elements.append(grid_element)\n", - "\n", - " return subdivided_elements\n", - "\n", - "\n", - "def create_density_based_grid(\n", - " gdf: gpd.GeoDataFrame, max_cellsize: int = 10, max_complexity: int = 10000\n", - ") -> gpd.GeoDataFrame:\n", - " # Get the bounds of the GeoDataFrame\n", - " minx, miny, maxx, maxy = gdf.total_bounds\n", - "\n", - " # Create an initial grid\n", - " grid_gdf = create_grid((minx, miny, maxx, maxy), max_cellsize)\n", - "\n", - " # Subdivide grid elements based on density and complexity\n", - " subdivided_elements = subdivide_grid(grid_gdf, gdf, max_cellsize, max_complexity)\n", - "\n", - " return gpd.GeoDataFrame(geometry=subdivided_elements)\n", - "\n", - "# TODO: refactor this so old function mantains functionality for marine areas\n", - "\n", - "def split_gdf_by_grid(gdf: gpd.GeoDataFrame, grid_gdf: gpd.GeoDataFrame):\n", - " result = []\n", - " gdf[\"already_processed\"] = False\n", - " for geometry in grid_gdf.geometry:\n", - " candidates = get_matches(geometry, gdf)\n", - " subset = gdf.loc[candidates.index][~gdf[\"already_processed\"]]\n", - " gdf.loc[subset.index, \"already_processed\"] = True\n", - " if not subset.empty:\n", - " result.append(subset.drop(columns=[\"already_processed\"]).reset_index(drop=True).copy())\n", - " return result\n", - "\n", - "\n", - "@background\n", - "def spatial_join_chunk(df_large_chunk, df_small, pbar):\n", - " try:\n", - " bbox = df_large_chunk.total_bounds\n", - "\n", - " candidates = get_matches(box(*bbox), df_small.geometry)\n", - " if len(candidates) > 0:\n", - " subset = df_small.loc[candidates.index].clip(box(*bbox))\n", - "\n", - " result = (\n", - " df_large_chunk.sjoin(subset, how=\"inner\")\n", - " .clip(subset.geometry)\n", - " .reset_index(drop=True)\n", - " )\n", - " result.geometry = result.geometry.apply(repair_geometry)\n", - " else:\n", - " result = gpd.GeoDataFrame(columns=df_large_chunk.columns)\n", - " return result\n", - " except Exception as e:\n", - " logging.error(e)\n", - " return gpd.GeoDataFrame()\n", - " finally:\n", - " pbar.update(1)\n", - "\n", - "\n", - "async def spatial_join(\n", - " geodataframe_a: gpd.GeoDataFrame, geodataframe_b: gpd.GeoDataFrame\n", - ") -> gpd.GeoDataFrame:\n", - " \"\"\"Create spatial join between two GeoDataFrames.\"\"\"\n", - " # we build the spatial index for the larger GeoDataFrame\n", - " smaller_dim, larger_dim = arrange_dimensions(geodataframe_a, geodataframe_b)\n", - "\n", - " logger.info(f\"Processing {len(larger_dim)} elements\")\n", - "\n", - " grid = create_density_based_grid(larger_dim, max_cellsize=10, max_complexity=5000)\n", - "\n", - " logger.info(f\"grid created with {len(grid)} cells\")\n", - "\n", - " list_of_chunks = split_gdf_by_grid(larger_dim, grid)\n", - "\n", - " logger.info(f\"grid split into {len(list_of_chunks)} chunks\")\n", + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
yeariso_3protected_areaprotected_areas_countoecmspastotal_terrestrial_areacoverageglobal_contributionis_last_yearenvironment
02010AF3.636311e+067272.00.0100.029993094.7112.1238272.694465Falseterrestrial
12010AS2.040713e+0624761.00.0100.031625555.586.4527341.512145Falseterrestrial
22010AT1.108333e+022.00.0100.012088229.650.0009170.000082Falseterrestrial
32010EU4.303722e+06116101.00.0100.030037571.3714.3277953.189009Falseterrestrial
42010NA2.006295e+0652176.00.0100.019371151.9210.3571271.486642Falseterrestrial
....................................
29892024YEM5.145397e+0315.00.0100.0453741.181.1339940.003813Trueterrestrial
29902024ZAF1.143850e+051631.00.0100.01221327.529.3656310.084758Trueterrestrial
29912024ZMB2.929805e+05557.00.0100.0753990.3338.8573300.217095Trueterrestrial
29922024ZNC2.779983e+008.00.0100.03314.080.0838840.000002Trueterrestrial
29932024ZWE1.096232e+05229.00.0100.0391234.8828.0198030.081230Trueterrestrial
\n", + "

2994 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " year iso_3 protected_area protected_areas_count oecms pas \\\n", + "0 2010 AF 3.636311e+06 7272.0 0.0 100.0 \n", + "1 2010 AS 2.040713e+06 24761.0 0.0 100.0 \n", + "2 2010 AT 1.108333e+02 2.0 0.0 100.0 \n", + "3 2010 EU 4.303722e+06 116101.0 0.0 100.0 \n", + "4 2010 NA 2.006295e+06 52176.0 0.0 100.0 \n", + "... ... ... ... ... ... ... \n", + "2989 2024 YEM 5.145397e+03 15.0 0.0 100.0 \n", + "2990 2024 ZAF 1.143850e+05 1631.0 0.0 100.0 \n", + "2991 2024 ZMB 2.929805e+05 557.0 0.0 100.0 \n", + "2992 2024 ZNC 2.779983e+00 8.0 0.0 100.0 \n", + "2993 2024 ZWE 1.096232e+05 229.0 0.0 100.0 \n", + "\n", + " total_terrestrial_area coverage global_contribution is_last_year \\\n", + "0 29993094.71 12.123827 2.694465 False \n", + "1 31625555.58 6.452734 1.512145 False \n", + "2 12088229.65 0.000917 0.000082 False \n", + "3 30037571.37 14.327795 3.189009 False \n", + "4 19371151.92 10.357127 1.486642 False \n", + "... ... ... ... ... \n", + "2989 453741.18 1.133994 0.003813 True \n", + "2990 1221327.52 9.365631 0.084758 True \n", + "2991 753990.33 38.857330 0.217095 True \n", + "2992 3314.08 0.083884 0.000002 True \n", + "2993 391234.88 28.019803 0.081230 True \n", + "\n", + " environment \n", + "0 terrestrial \n", + "1 terrestrial \n", + "2 terrestrial \n", + "3 terrestrial \n", + "4 terrestrial \n", + "... ... \n", + "2989 terrestrial \n", + "2990 terrestrial \n", + "2991 terrestrial \n", + "2992 terrestrial \n", + "2993 terrestrial \n", + "\n", + "[2994 rows x 11 columns]" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Add pa and oecm counts to the coverage table\n", + "coverage = (\n", + " pd.merge(tpa_grouped, cumulative_counts, on=['iso_3', 'year'], how='left')\n", + " .pipe(add_region_iso, \"iso_3\")\n", + " .pipe(calculate_stats_cov_pa, [\"year\"], \"iso_3\")\n", + " .pipe(calculate_pa_def_percentages)\n", + " .pipe(add_total_terrestrial_area)\n", + " .pipe(calculate_coverage_percentage_pa)\n", + " .pipe(calculate_global_contribution)\n", + " .pipe(add_is_last_year)\n", + " .pipe(add_environment)\n", + ")\n", "\n", - " with tqdm(total=len(list_of_chunks)) as pbar: # we create a progress bar\n", - " new_df = await asyncio.gather(\n", - " *(spatial_join_chunk(chunk, smaller_dim, pbar) for chunk in list_of_chunks)\n", - " )\n", + "NewProtectedAreaExtentSchema(\n", + " coverage.pipe(\n", + " output2,\n", + " \"iso_3\",\n", + " {},\n", + " {},\n", + " [\"iso_3\", 'total_terrestrial_area'],\n", + " )\n", + ").to_csv(\n", + " output_file_tpas,\n", + " index=True,\n", + ")\n", "\n", - " return gpd.GeoDataFrame(pd.concat(new_df, ignore_index=True), crs=smaller_dim.crs)\n", + "coverage" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Combine marine and terrestrial - Detail table" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "pipe_mar = \"mpa\"\n", + "pipe_ter = \"mpa-terrestrial\"\n", + "pipe_pa = \"pa\"\n", + "step = \"preprocess\"\n", "\n", "\n", - "@background\n", - "def spatial_dissolve_chunk(geometry, gdf, pbar):\n", + "pipe_dir_mar = FileConventionHandler(pipe_mar)\n", + "pipe_dir_ter = FileConventionHandler(pipe_ter)\n", + "pipe_dir_pa = FileConventionHandler(pipe_pa)\n", "\n", - " try:\n", - " candidates = get_matches(\n", - " geometry,\n", - " gdf.geometry,\n", - " )\n", - " subset = gdf.loc[candidates.index]\n", + "input_path_mar = pipe_dir_mar.get_processed_step_path(current_step).joinpath(\"mpa_detail.csv\")\n", + "input_path_ter = pipe_dir_ter.get_processed_step_path(current_step).joinpath(\"tpa_detail.csv\")\n", + "output_file_pa = pipe_dir_pa.get_processed_step_path(current_step).joinpath(\"pa_detail.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ + "mpa_table = pd.read_csv(input_path_mar)\n", + "tpa_table = pd.read_csv(input_path_ter)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idwdpaidwdpa_pidprotection_statusnamedesignationpa_iucn_categoryyearareadata_sourcempaa_establishment_stagempaa_protection_levelbboxis_childchild_idcoverageenvironmentlocationchildren
id
111.011.0Diamond Reef and Salt Fish Tail ReefMarine Reserve1.01973.014.6361353NaNNaN[-61.88691617799998, 17.184972703000028, -61.8...False10.013119marine15NaN
222.021.0Palaster ReefMarine Reserve1.01973.03.8456233NaNNaN[-61.771742115999984, 17.520006550999994, -61....False20.003447marine15NaN
\n", + "
" + ], + "text/plain": [ + " id wdpaid wdpa_pid protection_status \\\n", + "id \n", + "1 1 1.0 1 1.0 \n", + "2 2 2.0 2 1.0 \n", + "\n", + " name designation pa_iucn_category \\\n", + "id \n", + "1 Diamond Reef and Salt Fish Tail Reef Marine Reserve 1.0 \n", + "2 Palaster Reef Marine Reserve 1.0 \n", + "\n", + " year area data_source mpaa_establishment_stage \\\n", + "id \n", + "1 1973.0 14.636135 3 NaN \n", + "2 1973.0 3.845623 3 NaN \n", + "\n", + " mpaa_protection_level bbox \\\n", + "id \n", + "1 NaN [-61.88691617799998, 17.184972703000028, -61.8... \n", + "2 NaN [-61.771742115999984, 17.520006550999994, -61.... \n", + "\n", + " is_child child_id coverage environment location children \n", + "id \n", + "1 False 1 0.013119 marine 15 NaN \n", + "2 False 2 0.003447 marine 15 NaN " + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Create final table with all the data\n", + "final_table = pd.concat([mpa_table, tpa_table])\n", + "final_table.index = range(1, len(final_table) + 1)\n", + "final_table.index.name = 'id'\n", + "final_table.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "ename": "SchemaError", + "evalue": "Error while coercing 'bbox' to type typing.List[float]: Could not coerce data_container into type typing.List[float]:\n index failure_case\n0 1 \n1 2 \n2 3 \n3 4 \n4 5 \n... ... ...\n306118 306119 \n306119 306120 \n306120 306121 \n306121 306122 \n306122 306123 \n\n[306123 rows x 2 columns]", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mSchemaError\u001b[0m Traceback (most recent call last)", + " \u001b[0;31m[... skipping hidden 1 frame]\u001b[0m\n", + "Cell \u001b[0;32mIn[31], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mPAsSchema\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfinal_table\u001b[49m\u001b[43m[\u001b[49m\u001b[43mfinal_table\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlocation\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnotna\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mto_csv(output_file_pa, index\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n", + "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/api/dataframe/model.py:138\u001b[0m, in \u001b[0;36mDataFrameModel.__new__\u001b[0;34m(cls, *args, **kwargs)\u001b[0m\n\u001b[1;32m 136\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"%(validate_doc)s\"\"\"\u001b[39;00m\n\u001b[1;32m 137\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cast(\n\u001b[0;32m--> 138\u001b[0m DataFrameBase[TDataFrameModel], \u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalidate\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 139\u001b[0m )\n", + "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/api/dataframe/model.py:289\u001b[0m, in \u001b[0;36mDataFrameModel.validate\u001b[0;34m(cls, check_obj, head, tail, sample, random_state, lazy, inplace)\u001b[0m\n\u001b[1;32m 286\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"%(validate_doc)s\"\"\"\u001b[39;00m\n\u001b[1;32m 287\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cast(\n\u001b[1;32m 288\u001b[0m DataFrameBase[TDataFrameModel],\n\u001b[0;32m--> 289\u001b[0m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_schema\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalidate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 290\u001b[0m \u001b[43m \u001b[49m\u001b[43mcheck_obj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mhead\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtail\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msample\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrandom_state\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlazy\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minplace\u001b[49m\n\u001b[1;32m 291\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m,\n\u001b[1;32m 292\u001b[0m )\n", + "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/api/pandas/container.py:126\u001b[0m, in \u001b[0;36mDataFrameSchema.validate\u001b[0;34m(self, check_obj, head, tail, sample, random_state, lazy, inplace)\u001b[0m\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m check_obj\u001b[38;5;241m.\u001b[39mpandera\u001b[38;5;241m.\u001b[39madd_schema(\u001b[38;5;28mself\u001b[39m)\n\u001b[0;32m--> 126\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_validate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 127\u001b[0m \u001b[43m \u001b[49m\u001b[43mcheck_obj\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcheck_obj\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 128\u001b[0m \u001b[43m \u001b[49m\u001b[43mhead\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mhead\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 129\u001b[0m \u001b[43m \u001b[49m\u001b[43mtail\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtail\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 130\u001b[0m \u001b[43m \u001b[49m\u001b[43msample\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msample\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 131\u001b[0m \u001b[43m \u001b[49m\u001b[43mrandom_state\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrandom_state\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 132\u001b[0m \u001b[43m \u001b[49m\u001b[43mlazy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlazy\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 133\u001b[0m \u001b[43m \u001b[49m\u001b[43minplace\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minplace\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 134\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/api/pandas/container.py:155\u001b[0m, in \u001b[0;36mDataFrameSchema._validate\u001b[0;34m(self, check_obj, head, tail, sample, random_state, lazy, inplace)\u001b[0m\n\u001b[1;32m 147\u001b[0m warnings\u001b[38;5;241m.\u001b[39mwarn(\n\u001b[1;32m 148\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThis \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mtype\u001b[39m(\u001b[38;5;28mself\u001b[39m)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m is an inferred schema that hasn\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt been \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 149\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodified. It\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124ms recommended that you refine the schema \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 152\u001b[0m \u001b[38;5;167;01mUserWarning\u001b[39;00m,\n\u001b[1;32m 153\u001b[0m )\n\u001b[0;32m--> 155\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_backend\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcheck_obj\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalidate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 156\u001b[0m \u001b[43m \u001b[49m\u001b[43mcheck_obj\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 157\u001b[0m \u001b[43m \u001b[49m\u001b[43mschema\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 158\u001b[0m \u001b[43m \u001b[49m\u001b[43mhead\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mhead\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 159\u001b[0m \u001b[43m \u001b[49m\u001b[43mtail\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtail\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 160\u001b[0m \u001b[43m \u001b[49m\u001b[43msample\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msample\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 161\u001b[0m \u001b[43m \u001b[49m\u001b[43mrandom_state\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrandom_state\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 162\u001b[0m \u001b[43m \u001b[49m\u001b[43mlazy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlazy\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 163\u001b[0m \u001b[43m \u001b[49m\u001b[43minplace\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minplace\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 164\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/backends/pandas/container.py:90\u001b[0m, in \u001b[0;36mDataFrameSchemaBackend.validate\u001b[0;34m(self, check_obj, schema, head, tail, sample, random_state, lazy, inplace)\u001b[0m\n\u001b[1;32m 89\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m SchemaErrors \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[0;32m---> 90\u001b[0m \u001b[43merror_handler\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcollect_errors\u001b[49m\u001b[43m(\u001b[49m\u001b[43mexc\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mschema_errors\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 92\u001b[0m \u001b[38;5;66;03m# run custom parsers\u001b[39;00m\n", + "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/api/base/error_handler.py:95\u001b[0m, in \u001b[0;36mErrorHandler.collect_errors\u001b[0;34m(self, schema_errors, original_exc)\u001b[0m\n\u001b[1;32m 94\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m schema_error \u001b[38;5;129;01min\u001b[39;00m schema_errors:\n\u001b[0;32m---> 95\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcollect_error\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 96\u001b[0m \u001b[43m \u001b[49m\u001b[43mvalidation_type\u001b[49m\u001b[43m(\u001b[49m\u001b[43mschema_error\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mreason_code\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 97\u001b[0m \u001b[43m \u001b[49m\u001b[43mschema_error\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mreason_code\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 98\u001b[0m \u001b[43m \u001b[49m\u001b[43mschema_error\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 99\u001b[0m \u001b[43m \u001b[49m\u001b[43moriginal_exc\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mschema_error\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 100\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/api/base/error_handler.py:54\u001b[0m, in \u001b[0;36mErrorHandler.collect_error\u001b[0;34m(self, error_type, reason_code, schema_error, original_exc)\u001b[0m\n\u001b[1;32m 53\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_lazy:\n\u001b[0;32m---> 54\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m schema_error \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01moriginal_exc\u001b[39;00m\n\u001b[1;32m 56\u001b[0m \u001b[38;5;66;03m# delete data of validated object from SchemaError object to prevent\u001b[39;00m\n\u001b[1;32m 57\u001b[0m \u001b[38;5;66;03m# storing copies of the validated DataFrame/Series for every\u001b[39;00m\n\u001b[1;32m 58\u001b[0m \u001b[38;5;66;03m# SchemaError collected.\u001b[39;00m\n", + "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/backends/pandas/container.py:631\u001b[0m, in \u001b[0;36mDataFrameSchemaBackend._coerce_dtype_helper.._try_coercion\u001b[0;34m(coerce_fn, obj)\u001b[0m\n\u001b[1;32m 630\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 631\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mcoerce_fn\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 632\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m SchemaError \u001b[38;5;28;01mas\u001b[39;00m exc:\n", + "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/api/dataframe/components.py:131\u001b[0m, in \u001b[0;36mComponentSchema.coerce_dtype\u001b[0;34m(self, check_obj)\u001b[0m\n\u001b[1;32m 126\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Coerce type of the data by type specified in dtype.\u001b[39;00m\n\u001b[1;32m 127\u001b[0m \n\u001b[1;32m 128\u001b[0m \u001b[38;5;124;03m:param check_obj: data to coerce\u001b[39;00m\n\u001b[1;32m 129\u001b[0m \u001b[38;5;124;03m:returns: data of the same type as the input\u001b[39;00m\n\u001b[1;32m 130\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m--> 131\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_backend\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcheck_obj\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcoerce_dtype\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcheck_obj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mschema\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/backends/pandas/components.py:211\u001b[0m, in \u001b[0;36mColumnBackend.coerce_dtype\u001b[0;34m(self, check_obj, schema)\u001b[0m\n\u001b[1;32m 210\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_field(check_obj) \u001b[38;5;129;01mor\u001b[39;00m is_index(check_obj):\n\u001b[0;32m--> 211\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mColumnBackend\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcoerce_dtype\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 212\u001b[0m \u001b[43m \u001b[49m\u001b[43mcheck_obj\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 213\u001b[0m \u001b[43m \u001b[49m\u001b[43mschema\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mschema\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 214\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 215\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m check_obj\u001b[38;5;241m.\u001b[39mapply(\n\u001b[1;32m 216\u001b[0m \u001b[38;5;28;01mlambda\u001b[39;00m x: \u001b[38;5;28msuper\u001b[39m(ColumnBackend, \u001b[38;5;28mself\u001b[39m)\u001b[38;5;241m.\u001b[39mcoerce_dtype(\n\u001b[1;32m 217\u001b[0m x,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 220\u001b[0m axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcolumns\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 221\u001b[0m )\n", + "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/backends/pandas/array.py:173\u001b[0m, in \u001b[0;36mArraySchemaBackend.coerce_dtype\u001b[0;34m(self, check_obj, schema)\u001b[0m\n\u001b[1;32m 172\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m ParserError \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[0;32m--> 173\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m SchemaError(\n\u001b[1;32m 174\u001b[0m schema\u001b[38;5;241m=\u001b[39mschema,\n\u001b[1;32m 175\u001b[0m data\u001b[38;5;241m=\u001b[39mcheck_obj,\n\u001b[1;32m 176\u001b[0m message\u001b[38;5;241m=\u001b[39m(\n\u001b[1;32m 177\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mError while coercing \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mschema\u001b[38;5;241m.\u001b[39mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m to type \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 178\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mschema\u001b[38;5;241m.\u001b[39mdtype\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mexc\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m:\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00mexc\u001b[38;5;241m.\u001b[39mfailure_cases\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 179\u001b[0m ),\n\u001b[1;32m 180\u001b[0m failure_cases\u001b[38;5;241m=\u001b[39mexc\u001b[38;5;241m.\u001b[39mfailure_cases,\n\u001b[1;32m 181\u001b[0m check\u001b[38;5;241m=\u001b[39m\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcoerce_dtype(\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mschema\u001b[38;5;241m.\u001b[39mdtype\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m)\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 182\u001b[0m reason_code\u001b[38;5;241m=\u001b[39mSchemaErrorReason\u001b[38;5;241m.\u001b[39mDATATYPE_COERCION,\n\u001b[1;32m 183\u001b[0m ) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mexc\u001b[39;00m\n", + "\u001b[0;31mSchemaError\u001b[0m: Error while coercing 'bbox' to type typing.List[float]: Could not coerce data_container into type typing.List[float]:\n index failure_case\n0 1 \n1 2 \n2 3 \n3 4 \n4 5 \n... ... ...\n306118 306119 \n306119 306120 \n306120 306121 \n306121 306122 \n306122 306123 \n\n[306123 rows x 2 columns]", + "\nThe above exception was the direct cause of the following exception:\n", + "\u001b[0;31mSchemaError\u001b[0m Traceback (most recent call last)", + " \u001b[0;31m[... skipping hidden 1 frame]\u001b[0m\n", + "Cell \u001b[0;32mIn[31], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mPAsSchema\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfinal_table\u001b[49m\u001b[43m[\u001b[49m\u001b[43mfinal_table\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlocation\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnotna\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mto_csv(output_file_pa, index\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n", + "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/api/dataframe/model.py:138\u001b[0m, in \u001b[0;36mDataFrameModel.__new__\u001b[0;34m(cls, *args, **kwargs)\u001b[0m\n\u001b[1;32m 136\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"%(validate_doc)s\"\"\"\u001b[39;00m\n\u001b[1;32m 137\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cast(\n\u001b[0;32m--> 138\u001b[0m DataFrameBase[TDataFrameModel], \u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalidate\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 139\u001b[0m )\n", + "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/api/dataframe/model.py:289\u001b[0m, in \u001b[0;36mDataFrameModel.validate\u001b[0;34m(cls, check_obj, head, tail, sample, random_state, lazy, inplace)\u001b[0m\n\u001b[1;32m 286\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"%(validate_doc)s\"\"\"\u001b[39;00m\n\u001b[1;32m 287\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cast(\n\u001b[1;32m 288\u001b[0m DataFrameBase[TDataFrameModel],\n\u001b[0;32m--> 289\u001b[0m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_schema\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalidate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 290\u001b[0m \u001b[43m \u001b[49m\u001b[43mcheck_obj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mhead\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtail\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msample\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrandom_state\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlazy\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minplace\u001b[49m\n\u001b[1;32m 291\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m,\n\u001b[1;32m 292\u001b[0m )\n", + "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/api/pandas/container.py:126\u001b[0m, in \u001b[0;36mDataFrameSchema.validate\u001b[0;34m(self, check_obj, head, tail, sample, random_state, lazy, inplace)\u001b[0m\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m check_obj\u001b[38;5;241m.\u001b[39mpandera\u001b[38;5;241m.\u001b[39madd_schema(\u001b[38;5;28mself\u001b[39m)\n\u001b[0;32m--> 126\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_validate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 127\u001b[0m \u001b[43m \u001b[49m\u001b[43mcheck_obj\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcheck_obj\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 128\u001b[0m \u001b[43m \u001b[49m\u001b[43mhead\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mhead\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 129\u001b[0m \u001b[43m \u001b[49m\u001b[43mtail\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtail\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 130\u001b[0m \u001b[43m \u001b[49m\u001b[43msample\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msample\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 131\u001b[0m \u001b[43m \u001b[49m\u001b[43mrandom_state\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrandom_state\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 132\u001b[0m \u001b[43m \u001b[49m\u001b[43mlazy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlazy\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 133\u001b[0m \u001b[43m \u001b[49m\u001b[43minplace\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minplace\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 134\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/api/pandas/container.py:155\u001b[0m, in \u001b[0;36mDataFrameSchema._validate\u001b[0;34m(self, check_obj, head, tail, sample, random_state, lazy, inplace)\u001b[0m\n\u001b[1;32m 147\u001b[0m warnings\u001b[38;5;241m.\u001b[39mwarn(\n\u001b[1;32m 148\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThis \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mtype\u001b[39m(\u001b[38;5;28mself\u001b[39m)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m is an inferred schema that hasn\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt been \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 149\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodified. It\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124ms recommended that you refine the schema \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 152\u001b[0m \u001b[38;5;167;01mUserWarning\u001b[39;00m,\n\u001b[1;32m 153\u001b[0m )\n\u001b[0;32m--> 155\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_backend\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcheck_obj\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalidate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 156\u001b[0m \u001b[43m \u001b[49m\u001b[43mcheck_obj\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 157\u001b[0m \u001b[43m \u001b[49m\u001b[43mschema\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 158\u001b[0m \u001b[43m \u001b[49m\u001b[43mhead\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mhead\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 159\u001b[0m \u001b[43m \u001b[49m\u001b[43mtail\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtail\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 160\u001b[0m \u001b[43m \u001b[49m\u001b[43msample\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msample\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 161\u001b[0m \u001b[43m \u001b[49m\u001b[43mrandom_state\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrandom_state\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 162\u001b[0m \u001b[43m \u001b[49m\u001b[43mlazy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlazy\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 163\u001b[0m \u001b[43m \u001b[49m\u001b[43minplace\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minplace\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 164\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/backends/pandas/container.py:90\u001b[0m, in \u001b[0;36mDataFrameSchemaBackend.validate\u001b[0;34m(self, check_obj, schema, head, tail, sample, random_state, lazy, inplace)\u001b[0m\n\u001b[1;32m 89\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m SchemaErrors \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[0;32m---> 90\u001b[0m \u001b[43merror_handler\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcollect_errors\u001b[49m\u001b[43m(\u001b[49m\u001b[43mexc\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mschema_errors\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 92\u001b[0m \u001b[38;5;66;03m# run custom parsers\u001b[39;00m\n", + "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/api/base/error_handler.py:95\u001b[0m, in \u001b[0;36mErrorHandler.collect_errors\u001b[0;34m(self, schema_errors, original_exc)\u001b[0m\n\u001b[1;32m 94\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m schema_error \u001b[38;5;129;01min\u001b[39;00m schema_errors:\n\u001b[0;32m---> 95\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcollect_error\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 96\u001b[0m \u001b[43m \u001b[49m\u001b[43mvalidation_type\u001b[49m\u001b[43m(\u001b[49m\u001b[43mschema_error\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mreason_code\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 97\u001b[0m \u001b[43m \u001b[49m\u001b[43mschema_error\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mreason_code\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 98\u001b[0m \u001b[43m \u001b[49m\u001b[43mschema_error\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 99\u001b[0m \u001b[43m \u001b[49m\u001b[43moriginal_exc\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mschema_error\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 100\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/api/base/error_handler.py:54\u001b[0m, in \u001b[0;36mErrorHandler.collect_error\u001b[0;34m(self, error_type, reason_code, schema_error, original_exc)\u001b[0m\n\u001b[1;32m 53\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_lazy:\n\u001b[0;32m---> 54\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m schema_error \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01moriginal_exc\u001b[39;00m\n\u001b[1;32m 56\u001b[0m \u001b[38;5;66;03m# delete data of validated object from SchemaError object to prevent\u001b[39;00m\n\u001b[1;32m 57\u001b[0m \u001b[38;5;66;03m# storing copies of the validated DataFrame/Series for every\u001b[39;00m\n\u001b[1;32m 58\u001b[0m \u001b[38;5;66;03m# SchemaError collected.\u001b[39;00m\n", + "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/backends/pandas/container.py:631\u001b[0m, in \u001b[0;36mDataFrameSchemaBackend._coerce_dtype_helper.._try_coercion\u001b[0;34m(coerce_fn, obj)\u001b[0m\n\u001b[1;32m 630\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 631\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mcoerce_fn\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 632\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m SchemaError \u001b[38;5;28;01mas\u001b[39;00m exc:\n", + "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/api/dataframe/components.py:131\u001b[0m, in \u001b[0;36mComponentSchema.coerce_dtype\u001b[0;34m(self, check_obj)\u001b[0m\n\u001b[1;32m 126\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Coerce type of the data by type specified in dtype.\u001b[39;00m\n\u001b[1;32m 127\u001b[0m \n\u001b[1;32m 128\u001b[0m \u001b[38;5;124;03m:param check_obj: data to coerce\u001b[39;00m\n\u001b[1;32m 129\u001b[0m \u001b[38;5;124;03m:returns: data of the same type as the input\u001b[39;00m\n\u001b[1;32m 130\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m--> 131\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_backend\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcheck_obj\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcoerce_dtype\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcheck_obj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mschema\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/backends/pandas/components.py:211\u001b[0m, in \u001b[0;36mColumnBackend.coerce_dtype\u001b[0;34m(self, check_obj, schema)\u001b[0m\n\u001b[1;32m 210\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_field(check_obj) \u001b[38;5;129;01mor\u001b[39;00m is_index(check_obj):\n\u001b[0;32m--> 211\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mColumnBackend\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcoerce_dtype\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 212\u001b[0m \u001b[43m \u001b[49m\u001b[43mcheck_obj\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 213\u001b[0m \u001b[43m \u001b[49m\u001b[43mschema\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mschema\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 214\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 215\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m check_obj\u001b[38;5;241m.\u001b[39mapply(\n\u001b[1;32m 216\u001b[0m \u001b[38;5;28;01mlambda\u001b[39;00m x: \u001b[38;5;28msuper\u001b[39m(ColumnBackend, \u001b[38;5;28mself\u001b[39m)\u001b[38;5;241m.\u001b[39mcoerce_dtype(\n\u001b[1;32m 217\u001b[0m x,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 220\u001b[0m axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcolumns\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 221\u001b[0m )\n", + "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/backends/pandas/array.py:173\u001b[0m, in \u001b[0;36mArraySchemaBackend.coerce_dtype\u001b[0;34m(self, check_obj, schema)\u001b[0m\n\u001b[1;32m 172\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m ParserError \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[0;32m--> 173\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m SchemaError(\n\u001b[1;32m 174\u001b[0m schema\u001b[38;5;241m=\u001b[39mschema,\n\u001b[1;32m 175\u001b[0m data\u001b[38;5;241m=\u001b[39mcheck_obj,\n\u001b[1;32m 176\u001b[0m message\u001b[38;5;241m=\u001b[39m(\n\u001b[1;32m 177\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mError while coercing \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mschema\u001b[38;5;241m.\u001b[39mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m to type \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 178\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mschema\u001b[38;5;241m.\u001b[39mdtype\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mexc\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m:\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00mexc\u001b[38;5;241m.\u001b[39mfailure_cases\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 179\u001b[0m ),\n\u001b[1;32m 180\u001b[0m failure_cases\u001b[38;5;241m=\u001b[39mexc\u001b[38;5;241m.\u001b[39mfailure_cases,\n\u001b[1;32m 181\u001b[0m check\u001b[38;5;241m=\u001b[39m\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcoerce_dtype(\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mschema\u001b[38;5;241m.\u001b[39mdtype\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m)\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 182\u001b[0m reason_code\u001b[38;5;241m=\u001b[39mSchemaErrorReason\u001b[38;5;241m.\u001b[39mDATATYPE_COERCION,\n\u001b[1;32m 183\u001b[0m ) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mexc\u001b[39;00m\n", + "\u001b[0;31mSchemaError\u001b[0m: Error while coercing 'bbox' to type typing.List[float]: Could not coerce data_container into type typing.List[float]:\n index failure_case\n0 1 \n1 2 \n2 3 \n3 4 \n4 5 \n... ... ...\n306118 306119 \n306119 306120 \n306120 306121 \n306121 306122 \n306122 306123 \n\n[306123 rows x 2 columns]", + "\nThe above exception was the direct cause of the following exception:\n", + "\u001b[0;31mSchemaError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[31], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mPAsSchema\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfinal_table\u001b[49m\u001b[43m[\u001b[49m\u001b[43mfinal_table\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlocation\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnotna\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mto_csv(output_file_pa, index\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n", + "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/api/dataframe/model.py:138\u001b[0m, in \u001b[0;36mDataFrameModel.__new__\u001b[0;34m(cls, *args, **kwargs)\u001b[0m\n\u001b[1;32m 134\u001b[0m \u001b[38;5;129m@docstring_substitution\u001b[39m(validate_doc\u001b[38;5;241m=\u001b[39mBaseSchema\u001b[38;5;241m.\u001b[39mvalidate\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__doc__\u001b[39m)\n\u001b[1;32m 135\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__new__\u001b[39m(\u001b[38;5;28mcls\u001b[39m, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m DataFrameBase[TDataFrameModel]: \u001b[38;5;66;03m# type: ignore [misc]\u001b[39;00m\n\u001b[1;32m 136\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"%(validate_doc)s\"\"\"\u001b[39;00m\n\u001b[1;32m 137\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cast(\n\u001b[0;32m--> 138\u001b[0m DataFrameBase[TDataFrameModel], \u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalidate\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 139\u001b[0m )\n", + "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/api/dataframe/model.py:289\u001b[0m, in \u001b[0;36mDataFrameModel.validate\u001b[0;34m(cls, check_obj, head, tail, sample, random_state, lazy, inplace)\u001b[0m\n\u001b[1;32m 274\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 275\u001b[0m \u001b[38;5;129m@docstring_substitution\u001b[39m(validate_doc\u001b[38;5;241m=\u001b[39mBaseSchema\u001b[38;5;241m.\u001b[39mvalidate\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__doc__\u001b[39m)\n\u001b[1;32m 276\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mvalidate\u001b[39m(\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 284\u001b[0m inplace: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[1;32m 285\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m DataFrameBase[TDataFrameModel]:\n\u001b[1;32m 286\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"%(validate_doc)s\"\"\"\u001b[39;00m\n\u001b[1;32m 287\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cast(\n\u001b[1;32m 288\u001b[0m DataFrameBase[TDataFrameModel],\n\u001b[0;32m--> 289\u001b[0m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_schema\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalidate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 290\u001b[0m \u001b[43m \u001b[49m\u001b[43mcheck_obj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mhead\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtail\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msample\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrandom_state\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlazy\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minplace\u001b[49m\n\u001b[1;32m 291\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m,\n\u001b[1;32m 292\u001b[0m )\n", + "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/api/pandas/container.py:126\u001b[0m, in \u001b[0;36mDataFrameSchema.validate\u001b[0;34m(self, check_obj, head, tail, sample, random_state, lazy, inplace)\u001b[0m\n\u001b[1;32m 114\u001b[0m check_obj \u001b[38;5;241m=\u001b[39m check_obj\u001b[38;5;241m.\u001b[39mmap_partitions( \u001b[38;5;66;03m# type: ignore [operator]\u001b[39;00m\n\u001b[1;32m 115\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_validate,\n\u001b[1;32m 116\u001b[0m head\u001b[38;5;241m=\u001b[39mhead,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 122\u001b[0m meta\u001b[38;5;241m=\u001b[39mcheck_obj,\n\u001b[1;32m 123\u001b[0m )\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m check_obj\u001b[38;5;241m.\u001b[39mpandera\u001b[38;5;241m.\u001b[39madd_schema(\u001b[38;5;28mself\u001b[39m)\n\u001b[0;32m--> 126\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_validate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 127\u001b[0m \u001b[43m \u001b[49m\u001b[43mcheck_obj\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcheck_obj\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 128\u001b[0m \u001b[43m \u001b[49m\u001b[43mhead\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mhead\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 129\u001b[0m \u001b[43m \u001b[49m\u001b[43mtail\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtail\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 130\u001b[0m \u001b[43m \u001b[49m\u001b[43msample\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msample\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 131\u001b[0m \u001b[43m \u001b[49m\u001b[43mrandom_state\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrandom_state\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 132\u001b[0m \u001b[43m \u001b[49m\u001b[43mlazy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlazy\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 133\u001b[0m \u001b[43m \u001b[49m\u001b[43minplace\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minplace\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 134\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/api/pandas/container.py:155\u001b[0m, in \u001b[0;36mDataFrameSchema._validate\u001b[0;34m(self, check_obj, head, tail, sample, random_state, lazy, inplace)\u001b[0m\n\u001b[1;32m 146\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_is_inferred:\n\u001b[1;32m 147\u001b[0m warnings\u001b[38;5;241m.\u001b[39mwarn(\n\u001b[1;32m 148\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThis \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mtype\u001b[39m(\u001b[38;5;28mself\u001b[39m)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m is an inferred schema that hasn\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt been \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 149\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodified. It\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124ms recommended that you refine the schema \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 152\u001b[0m \u001b[38;5;167;01mUserWarning\u001b[39;00m,\n\u001b[1;32m 153\u001b[0m )\n\u001b[0;32m--> 155\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_backend\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcheck_obj\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalidate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 156\u001b[0m \u001b[43m \u001b[49m\u001b[43mcheck_obj\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 157\u001b[0m \u001b[43m \u001b[49m\u001b[43mschema\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 158\u001b[0m \u001b[43m \u001b[49m\u001b[43mhead\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mhead\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 159\u001b[0m \u001b[43m \u001b[49m\u001b[43mtail\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtail\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 160\u001b[0m \u001b[43m \u001b[49m\u001b[43msample\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msample\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 161\u001b[0m \u001b[43m \u001b[49m\u001b[43mrandom_state\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrandom_state\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 162\u001b[0m \u001b[43m \u001b[49m\u001b[43mlazy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlazy\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 163\u001b[0m \u001b[43m \u001b[49m\u001b[43minplace\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minplace\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 164\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/backends/pandas/container.py:90\u001b[0m, in \u001b[0;36mDataFrameSchemaBackend.validate\u001b[0;34m(self, check_obj, schema, head, tail, sample, random_state, lazy, inplace)\u001b[0m\n\u001b[1;32m 86\u001b[0m error_handler\u001b[38;5;241m.\u001b[39mcollect_error(\n\u001b[1;32m 87\u001b[0m validation_type(exc\u001b[38;5;241m.\u001b[39mreason_code), exc\u001b[38;5;241m.\u001b[39mreason_code, exc\n\u001b[1;32m 88\u001b[0m )\n\u001b[1;32m 89\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m SchemaErrors \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[0;32m---> 90\u001b[0m \u001b[43merror_handler\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcollect_errors\u001b[49m\u001b[43m(\u001b[49m\u001b[43mexc\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mschema_errors\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 92\u001b[0m \u001b[38;5;66;03m# run custom parsers\u001b[39;00m\n\u001b[1;32m 93\u001b[0m check_obj \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mrun_parsers(schema, check_obj)\n", + "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/api/base/error_handler.py:95\u001b[0m, in \u001b[0;36mErrorHandler.collect_errors\u001b[0;34m(self, schema_errors, original_exc)\u001b[0m\n\u001b[1;32m 88\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Collect schema errors from a SchemaErrors exception.\u001b[39;00m\n\u001b[1;32m 89\u001b[0m \n\u001b[1;32m 90\u001b[0m \u001b[38;5;124;03m:param reason_code: string representing reason for error.\u001b[39;00m\n\u001b[1;32m 91\u001b[0m \u001b[38;5;124;03m:param schema_error: ``SchemaError`` object.\u001b[39;00m\n\u001b[1;32m 92\u001b[0m \u001b[38;5;124;03m:param original_exc: original exception associated with the SchemaError.\u001b[39;00m\n\u001b[1;32m 93\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 94\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m schema_error \u001b[38;5;129;01min\u001b[39;00m schema_errors:\n\u001b[0;32m---> 95\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcollect_error\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 96\u001b[0m \u001b[43m \u001b[49m\u001b[43mvalidation_type\u001b[49m\u001b[43m(\u001b[49m\u001b[43mschema_error\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mreason_code\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 97\u001b[0m \u001b[43m \u001b[49m\u001b[43mschema_error\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mreason_code\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 98\u001b[0m \u001b[43m \u001b[49m\u001b[43mschema_error\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 99\u001b[0m \u001b[43m \u001b[49m\u001b[43moriginal_exc\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mschema_error\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 100\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/api/base/error_handler.py:54\u001b[0m, in \u001b[0;36mErrorHandler.collect_error\u001b[0;34m(self, error_type, reason_code, schema_error, original_exc)\u001b[0m\n\u001b[1;32m 47\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Collect schema error, raising exception if lazy is False.\u001b[39;00m\n\u001b[1;32m 48\u001b[0m \n\u001b[1;32m 49\u001b[0m \u001b[38;5;124;03m:param error_type: type of error\u001b[39;00m\n\u001b[1;32m 50\u001b[0m \u001b[38;5;124;03m:param reason_code: string representing reason for error\u001b[39;00m\n\u001b[1;32m 51\u001b[0m \u001b[38;5;124;03m:param schema_error: ``SchemaError`` object.\u001b[39;00m\n\u001b[1;32m 52\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 53\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_lazy:\n\u001b[0;32m---> 54\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m schema_error \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01moriginal_exc\u001b[39;00m\n\u001b[1;32m 56\u001b[0m \u001b[38;5;66;03m# delete data of validated object from SchemaError object to prevent\u001b[39;00m\n\u001b[1;32m 57\u001b[0m \u001b[38;5;66;03m# storing copies of the validated DataFrame/Series for every\u001b[39;00m\n\u001b[1;32m 58\u001b[0m \u001b[38;5;66;03m# SchemaError collected.\u001b[39;00m\n\u001b[1;32m 59\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(schema_error, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdata\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n", + "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/backends/pandas/container.py:631\u001b[0m, in \u001b[0;36mDataFrameSchemaBackend._coerce_dtype_helper.._try_coercion\u001b[0;34m(coerce_fn, obj)\u001b[0m\n\u001b[1;32m 629\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_try_coercion\u001b[39m(coerce_fn, obj):\n\u001b[1;32m 630\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 631\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mcoerce_fn\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 632\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m SchemaError \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[1;32m 633\u001b[0m error_handler\u001b[38;5;241m.\u001b[39mcollect_error(\n\u001b[1;32m 634\u001b[0m validation_type(SchemaErrorReason\u001b[38;5;241m.\u001b[39mDATATYPE_COERCION),\n\u001b[1;32m 635\u001b[0m SchemaErrorReason\u001b[38;5;241m.\u001b[39mDATATYPE_COERCION,\n\u001b[1;32m 636\u001b[0m exc,\n\u001b[1;32m 637\u001b[0m )\n", + "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/api/dataframe/components.py:131\u001b[0m, in \u001b[0;36mComponentSchema.coerce_dtype\u001b[0;34m(self, check_obj)\u001b[0m\n\u001b[1;32m 125\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcoerce_dtype\u001b[39m(\u001b[38;5;28mself\u001b[39m, check_obj: TDataObject) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m TDataObject:\n\u001b[1;32m 126\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Coerce type of the data by type specified in dtype.\u001b[39;00m\n\u001b[1;32m 127\u001b[0m \n\u001b[1;32m 128\u001b[0m \u001b[38;5;124;03m :param check_obj: data to coerce\u001b[39;00m\n\u001b[1;32m 129\u001b[0m \u001b[38;5;124;03m :returns: data of the same type as the input\u001b[39;00m\n\u001b[1;32m 130\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 131\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_backend\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcheck_obj\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcoerce_dtype\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcheck_obj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mschema\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/backends/pandas/components.py:211\u001b[0m, in \u001b[0;36mColumnBackend.coerce_dtype\u001b[0;34m(self, check_obj, schema)\u001b[0m\n\u001b[1;32m 207\u001b[0m \u001b[38;5;66;03m# pylint: disable=super-with-arguments\u001b[39;00m\n\u001b[1;32m 208\u001b[0m \u001b[38;5;66;03m# pylint: disable=fixme\u001b[39;00m\n\u001b[1;32m 209\u001b[0m \u001b[38;5;66;03m# TODO: use singledispatchmethod here\u001b[39;00m\n\u001b[1;32m 210\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_field(check_obj) \u001b[38;5;129;01mor\u001b[39;00m is_index(check_obj):\n\u001b[0;32m--> 211\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mColumnBackend\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcoerce_dtype\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 212\u001b[0m \u001b[43m \u001b[49m\u001b[43mcheck_obj\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 213\u001b[0m \u001b[43m \u001b[49m\u001b[43mschema\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mschema\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 214\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 215\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m check_obj\u001b[38;5;241m.\u001b[39mapply(\n\u001b[1;32m 216\u001b[0m \u001b[38;5;28;01mlambda\u001b[39;00m x: \u001b[38;5;28msuper\u001b[39m(ColumnBackend, \u001b[38;5;28mself\u001b[39m)\u001b[38;5;241m.\u001b[39mcoerce_dtype(\n\u001b[1;32m 217\u001b[0m x,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 220\u001b[0m axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcolumns\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 221\u001b[0m )\n", + "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/backends/pandas/array.py:173\u001b[0m, in \u001b[0;36mArraySchemaBackend.coerce_dtype\u001b[0;34m(self, check_obj, schema)\u001b[0m\n\u001b[1;32m 171\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m schema\u001b[38;5;241m.\u001b[39mdtype\u001b[38;5;241m.\u001b[39mtry_coerce(check_obj)\n\u001b[1;32m 172\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m ParserError \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[0;32m--> 173\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m SchemaError(\n\u001b[1;32m 174\u001b[0m schema\u001b[38;5;241m=\u001b[39mschema,\n\u001b[1;32m 175\u001b[0m data\u001b[38;5;241m=\u001b[39mcheck_obj,\n\u001b[1;32m 176\u001b[0m message\u001b[38;5;241m=\u001b[39m(\n\u001b[1;32m 177\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mError while coercing \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mschema\u001b[38;5;241m.\u001b[39mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m to type \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 178\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mschema\u001b[38;5;241m.\u001b[39mdtype\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mexc\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m:\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00mexc\u001b[38;5;241m.\u001b[39mfailure_cases\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 179\u001b[0m ),\n\u001b[1;32m 180\u001b[0m failure_cases\u001b[38;5;241m=\u001b[39mexc\u001b[38;5;241m.\u001b[39mfailure_cases,\n\u001b[1;32m 181\u001b[0m check\u001b[38;5;241m=\u001b[39m\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcoerce_dtype(\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mschema\u001b[38;5;241m.\u001b[39mdtype\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m)\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 182\u001b[0m reason_code\u001b[38;5;241m=\u001b[39mSchemaErrorReason\u001b[38;5;241m.\u001b[39mDATATYPE_COERCION,\n\u001b[1;32m 183\u001b[0m ) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mexc\u001b[39;00m\n", + "\u001b[0;31mSchemaError\u001b[0m: Error while coercing 'bbox' to type typing.List[float]: Could not coerce data_container into type typing.List[float]:\n index failure_case\n0 1 \n1 2 \n2 3 \n3 4 \n4 5 \n... ... ...\n306118 306119 \n306119 306120 \n306120 306121 \n306121 306122 \n306122 306123 \n\n[306123 rows x 2 columns]" + ] + } + ], + "source": [ + "PAsSchema(final_table[final_table.location.notna()]).to_csv(output_file_pa, index=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "DEBUG:google.auth.transport.requests:Making request: POST https://oauth2.googleapis.com/token\n" + ] + } + ], + "source": [ + "remote_path = 'vizzuality_processed_data/strapi_tables/pa.csv'\n", "\n", - " result = pd.concat(\n", - " subset.clip(geometry).pipe(split_by_year, year_col=\"STATUS_YR\"), ignore_index=True\n", - " ).copy()\n", + "writeReadGCP(\n", + " credentials=mysettings.GCS_KEYFILE_JSON,\n", + " bucket_name=mysettings.GCS_BUCKET,\n", + " blob_name=remote_path,\n", + " file=output_file_pas,\n", + " operation=\"w\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [], + "source": [ + "pipe_mar = \"mpa\"\n", + "pipe_ter = \"mpa-terrestrial\"\n", + "step = \"preprocess\"\n", "\n", - " data_chunk = [\n", - " (\n", - " result[result[\"year\"] <= 2010]\n", - " .reset_index()\n", - " .pipe(calculate_area, \"area\", None)\n", - " .drop(columns=[\"geometry\"])\n", - " )\n", - " ]\n", - " for year in range(2011, 2025):\n", - " data_chunk.append(\n", - " result[result[\"year\"] <= year]\n", - " .dissolve(\n", - " by=[\"iso_3\"],\n", - " )\n", - " .assign(year=year)\n", - " .reset_index()\n", - " .pipe(calculate_area, \"area\", None)\n", - " .drop(columns=[\"geometry\"])\n", - " )\n", "\n", - " return pd.concat(data_chunk, ignore_index=True)\n", - " except Exception as e:\n", - " logging.error(e)\n", - " return gpd.GeoDataFrame()\n", - " finally:\n", - " pbar.update(1)\n", + "pipe_dir_mar = FileConventionHandler(pipe_mar)\n", + "pipe_dir_ter = FileConventionHandler(pipe_ter)\n", "\n", - "async def process_grid(gdf):\n", - " grid_gdf = create_density_based_grid(gdf, max_cellsize=10, max_complexity=5000)\n", - " with tqdm(total=grid_gdf.shape[0]) as pbar:\n", - " pbar = tqdm(total=len(grid_gdf), desc=\"Processing grid elements\")\n", - " result = await asyncio.gather(*[spatial_dissolve_chunk(geometry, gdf, pbar) for geometry in grid_gdf.geometry.values])\n", - " return result\n", - "\n" + "input_path_mar = pipe_dir_mar.get_processed_step_path(current_step).joinpath(\"mpa_detail.csv\")\n", + "input_path_ter = pipe_dir_ter.get_processed_step_path(current_step).joinpath(\"tpa_detail.csv\")\n", + "output_file_pa = pipe_dir_ter.get_processed_step_path(current_step).joinpath(\"pa_detail.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [], + "source": [ + "ter = pd.read_csv(input_path_ter)\n", + "mar = pd.read_csv(input_path_mar)" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['id', 'year', 'protected_area', 'protected_areas_count', 'oecms', 'pas',\n", + " 'coverage', 'global_contribution', 'is_last_year', 'environment',\n", + " 'location'],\n", + " dtype='object')" + ] + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ter.columns" ] }, { "cell_type": "code", - "execution_count": 212, + "execution_count": 68, "metadata": {}, "outputs": [ { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 206/206 [00:04<00:00, 42.41it/s]\n", - "INFO:notebook:Processing 286305 elements\n", - "INFO:notebook:grid created with 433 cells\n", - "INFO:notebook:grid split into 392 chunks\n", - "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 392/392 [10:01<00:00, 1.53s/it]\n", - "INFO:pyogrio._io:Created 290,561 records\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 69%|███████████████████████████████████████████████████████████████████████████████████████████████████████████ | 269/392 [01:06<00:11, 10.71it/s]" - ] - }, + "data": { + "text/plain": [ + "Index(['id', 'wdpaid', 'wdpa_pid', 'protection_status', 'name', 'designation',\n", + " 'pa_iucn_category', 'year', 'area', 'data_source',\n", + " 'mpaa_establishment_stage', 'mpaa_protection_level', 'bbox', 'is_child',\n", + " 'child_id', 'coverage', 'environment', 'location', 'children'],\n", + " dtype='object')" + ] + }, + "execution_count": 68, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mar.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, + "data": { + "text/plain": [ + "location 0\n", + "code 0\n", + "dtype: int64" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "locations_code.isna().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [ { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 392/392 [10:01<00:00, 122.96s/it]" - ] + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
locationcode
67NaN
\n", + "
" + ], + "text/plain": [ + " location code\n", + "6 7 NaN" + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "wdpa_subset = tpa_intermediate[\n", - " ~(\n", - " (tpa_intermediate.bounds.minx < -181)\n", - " | (tpa_intermediate.bounds.miny < -91)\n", - " | (tpa_intermediate.bounds.maxx > 181)\n", - " | (tpa_intermediate.bounds.maxy > 91)\n", + "# show rows with null values in locations_code\n", + "locations_code[locations_code.isna().any(axis=1)]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "coverage = (\n", + " final_data.pipe(calculate_global_area, [\"year\", \"PA_DEF\"], {\"area\": \"sum\"}, \"iso_3\")\n", + " .pipe(separate_parent_iso, \"iso_3\")\n", + " .pipe(add_region_iso, \"iso_3\")\n", + " .replace(\n", + " {\n", + " \"iso_3\": {\n", + " \"ATA\": \"ABNJ\",\n", + " \"COK\": \"NZL\",\n", + " \"IOT\": \"GBR\",\n", + " \"NIU\": \"NZL\",\n", + " \"SHN\": \"GBR\",\n", + " \"SJM\": \"NOR\",\n", + " \"UMI\": \"USA\",\n", + " \"NCL\": \"FRA\",\n", + " \"GIB\": \"GBR\",\n", + " }\n", + " }\n", " )\n", - "].reset_index(drop=True)\n", + " .pipe(calculate_stats_cov, [\"year\", \"PA_DEF\"], \"iso_3\").astype({\"PA_DEF\": int})\n", + " .pipe(add_pa_oecm_percentages)\n", + " .pipe(add_total_marine_area)\n", + " .pipe(coverage_stats2)\n", + " .pipe(calculate_coverage_percentage_mpa)\n", + " .pipe(calculate_global_contribution)\n", + " .pipe(add_is_last_year)\n", + " .pipe(add_environment)\n", + ")\n", "\n", - "gadm_sync = await simplify_async(gadm)\n", - "sjoin_gdf = await spatial_join(wdpa_subset, gadm_sync)\n", "\n", - "# save sjoin_gdf to file\n", - "sjoin_gdf.to_file(pipe_dir.get_processed_step_path(current_step).joinpath(\"tpa_sjoin.shp\"), driver=\"ESRI Shapefile\")" + "NewProtectedAreaExtentSchema(\n", + " coverage.pipe(\n", + " output,\n", + " \"iso_3\",\n", + " {},\n", + " {},\n", + " [\"area\", \"iso_3\", 'total_marine_area'],\n", + " )\n", + ").to_csv(\n", + " output_file,\n", + " index=True,\n", + ")\n", + "coverage.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "result_oecms = (\n", + " sjoin_gdf.groupby([\"iso_3\", \"PA_DEF\"])\n", + " .agg({\"PA_DEF\": \"count\"})\n", + " .rename(columns={\"PA_DEF\": \"count\"})\n", + " .reset_index()\n", + " .pivot(index=\"GID_0\", columns=\"PA_DEF\", values=\"count\")\n", + " .fillna(0)\n", + " .reset_index()\n", + " .rename(columns={\"0\": \"oecm\", \"1\": \"pa\"})\n", + ")\n", + "# ).reset_index().pivot(index=\"iso_3\", columns=\"PA_DEF\", values=\"count\").reset_index(names=[\"PA_DEF\"], level=0, drop=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "result_oecms[\"oecm_perc\"] = result_oecms[\"oecm\"] / (result_oecms[\"oecm\"] + result_oecms[\"pa\"])" ] }, { "cell_type": "code", - "execution_count": 213, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -3339,97 +4957,126 @@ "\n", " \n", " \n", - " \n", - " \n", - " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", "
WDPAIDWDPA_PIDPA_DEFNAMEDESIG_ENGIUCN_CATMARINEGIS_AREASTATUSSTATUS_YRPARENT_ISOgeometryindex_rightCOUNTRYGID_0area_km2iso_3oecmpaoecm_perc
0555624810555624810_A1Ross Sea Region Marine Protected AreaMarine Protected Area (CCAMLR)Not Reported11.521513e+06Designated2017ABNJMULTIPOLYGON (((179.79727 -84.3402, 179.77745 ...6.0AntarcticaATA12088229.65180USA0.050674.00.000000
15555656815555656811RangatiraNature ReserveIa12.173346e+00Designated1977NZLPOLYGON ((-176.17405 -44.35403, -176.1742 -44....127.0New Zealand161SWE0.030813.00.000000
44DEU0.023703.00.000000
55EST0.020579.00.000000
57FIN0.018427.00.000000
29CAN2.012566.00.000159
61GBR0.011712.00.000000
9AUS0.011154.00.000000
30CHE0.010632.00.000000
130NZL268885.810.010205.00.000000
\n", "" ], "text/plain": [ - " WDPAID WDPA_PID PA_DEF NAME \\\n", - "0 555624810 555624810_A 1 Ross Sea Region Marine Protected Area \n", - "1 555565681 555565681 1 Rangatira \n", - "\n", - " DESIG_ENG IUCN_CAT MARINE GIS_AREA \\\n", - "0 Marine Protected Area (CCAMLR) Not Reported 1 1.521513e+06 \n", - "1 Nature Reserve Ia 1 2.173346e+00 \n", - "\n", - " STATUS STATUS_YR PARENT_ISO \\\n", - "0 Designated 2017 ABNJ \n", - "1 Designated 1977 NZL \n", - "\n", - " geometry index_right \\\n", - "0 MULTIPOLYGON (((179.79727 -84.3402, 179.77745 ... 6.0 \n", - "1 POLYGON ((-176.17405 -44.35403, -176.1742 -44.... 127.0 \n", - "\n", - " COUNTRY GID_0 area_km2 \n", - "0 Antarctica ATA 12088229.65 \n", - "1 New Zealand NZL 268885.81 " + "PA_DEF iso_3 oecm pa oecm_perc\n", + "180 USA 0.0 50674.0 0.000000\n", + "161 SWE 0.0 30813.0 0.000000\n", + "44 DEU 0.0 23703.0 0.000000\n", + "55 EST 0.0 20579.0 0.000000\n", + "57 FIN 0.0 18427.0 0.000000\n", + "29 CAN 2.0 12566.0 0.000159\n", + "61 GBR 0.0 11712.0 0.000000\n", + "9 AUS 0.0 11154.0 0.000000\n", + "30 CHE 0.0 10632.0 0.000000\n", + "130 NZL 0.0 10205.0 0.000000" ] }, - "execution_count": 213, "metadata": {}, - "output_type": "execute_result" + "output_type": "display_data" } ], "source": [ - "sjoin_gdf.head(2)" + "result_oecms.sort_values(\"pa\", ascending=False).head(10)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "result_area = pd.concat(data)[['iso_3', 'year', 'area']].groupby(['iso_3', 'year']).sum().reset_index()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "result = result_area.merge(result_oecms, on=\"iso_3\")" ] }, { From b7a145711148cd0cee3040c93054832dfc05bd0f Mon Sep 17 00:00:00 2001 From: sofia Date: Wed, 2 Oct 2024 13:10:52 +0200 Subject: [PATCH 10/16] changes to terrestrial coverage calculation --- data/notebooks/pipes_mock/precalc_sofia.ipynb | 4224 ++++++----------- 1 file changed, 1452 insertions(+), 2772 deletions(-) diff --git a/data/notebooks/pipes_mock/precalc_sofia.ipynb b/data/notebooks/pipes_mock/precalc_sofia.ipynb index 39944b86..92a397bc 100644 --- a/data/notebooks/pipes_mock/precalc_sofia.ipynb +++ b/data/notebooks/pipes_mock/precalc_sofia.ipynb @@ -12,7 +12,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -27,7 +27,7 @@ "import dotenv\n", "import os\n", "import logging\n", - "from typing import Tuple, List\n", + "from typing import Tuple, List, Union\n", "import sys\n", "from pathlib import Path\n", "import pandas as pd\n", @@ -104,7 +104,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -113,9 +113,16 @@ "current_step = \"stats\"" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### General functions" + ] + }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -444,6 +451,13 @@ "\n" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Code for terrestrial processing" + ] + }, { "cell_type": "code", "execution_count": null, @@ -662,7 +676,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 21, "metadata": {}, "outputs": [ { @@ -698,14 +712,14 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 282/282 [08:22<00:00, 1.78s/it]\n" + "100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 282/282 [08:15<00:00, 1.76s/it]\n" ] } ], @@ -835,446 +849,21 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 23, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 14/14 [03:30<00:00, 15.01s/it]\n" + "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 14/14 [03:23<00:00, 14.54s/it]\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 392/392 [10:13<00:00, 104.86s/it]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 0%|▎ | 1/425 [00:02<17:53, 2.53s/it]INFO:notebook:Processing chunk\n", - "Processing grid elements: 0%|▋ | 2/425 [00:03<09:41, 1.38s/it]INFO:notebook:Processing chunk\n", - "Processing grid elements: 1%|▉ | 3/425 [00:03<04:52, 1.44it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 1%|█▎ | 4/425 [00:03<03:47, 1.85it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 1%|█▌ | 5/425 [00:04<05:04, 1.38it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 1%|█▉ | 6/425 [00:04<04:24, 1.59it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 2%|██▏ | 7/425 [00:05<03:04, 2.27it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 2%|██▌ | 8/425 [00:05<02:39, 2.62it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 2%|██▊ | 9/425 [00:05<02:56, 2.36it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 3%|███▍ | 11/425 [00:06<02:40, 2.57it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 3%|███▋ | 12/425 [00:07<03:23, 2.03it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 3%|████▎ | 14/425 [00:07<02:46, 2.48it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 4%|████▋ | 15/425 [00:07<02:24, 2.83it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 4%|████▉ | 16/425 [00:08<03:33, 1.92it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 4%|█████▎ | 17/425 [00:09<05:07, 1.33it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 4%|█████▉ | 19/425 [00:10<04:37, 1.46it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 5%|██████▌ | 21/425 [00:11<03:56, 1.71it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 5%|██████▊ | 22/425 [00:12<03:42, 1.81it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 5%|███████▏ | 23/425 [00:12<03:46, 1.77it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 6%|███████▊ | 25/425 [00:12<02:31, 2.64it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 6%|████████ | 26/425 [00:14<03:54, 1.70it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 6%|████████▍ | 27/425 [00:14<03:23, 1.95it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 7%|█████████ | 29/425 [00:14<02:12, 3.00it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 7%|█████████▎ | 30/425 [00:15<02:33, 2.57it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 7%|█████████▋ | 31/425 [00:16<03:18, 1.98it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 8%|█████████▉ | 32/425 [00:16<03:00, 2.18it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 8%|██████████▏ | 33/425 [00:16<02:10, 3.01it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 8%|██████████▌ | 34/425 [00:17<02:42, 2.40it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 8%|███████████▏ | 36/425 [00:17<01:59, 3.25it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 9%|███████████▍ | 37/425 [00:18<02:17, 2.82it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 9%|███████████▊ | 38/425 [00:19<03:17, 1.96it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 10%|████████████▋ | 41/425 [00:19<01:57, 3.28it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 10%|█████████████ | 42/425 [00:20<03:16, 1.95it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 10%|█████████████▎ | 43/425 [00:21<03:17, 1.93it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 10%|█████████████▋ | 44/425 [00:21<02:43, 2.33it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 11%|█████████████▉ | 45/425 [00:22<02:59, 2.11it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 11%|██████████████▎ | 46/425 [00:22<02:22, 2.65it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 11%|██████████████▌ | 47/425 [00:23<03:19, 1.89it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 11%|██████████████▉ | 48/425 [00:23<03:23, 1.85it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 12%|███████████████▊ | 51/425 [00:25<04:05, 1.52it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 12%|████████████████▏ | 52/425 [00:27<04:44, 1.31it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 12%|████████████████▍ | 53/425 [00:27<04:51, 1.28it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 13%|████████████████▊ | 54/425 [00:29<06:33, 1.06s/it]INFO:notebook:Processing chunk\n", - "Processing grid elements: 13%|█████████████████ | 55/425 [00:30<06:12, 1.01s/it]INFO:notebook:Processing chunk\n", - "Processing grid elements: 13%|█████████████████▍ | 56/425 [00:33<08:23, 1.37s/it]INFO:notebook:Processing chunk\n", - "Processing grid elements: 13%|█████████████████▋ | 57/425 [00:33<07:22, 1.20s/it]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 14%|██████████████████▎ | 59/425 [00:34<04:41, 1.30it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 14%|██████████████████▉ | 61/425 [00:35<04:23, 1.38it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 15%|███████████████████▎ | 62/425 [00:35<03:41, 1.64it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 15%|███████████████████▌ | 63/425 [00:37<05:31, 1.09it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 15%|███████████████████▉ | 64/425 [00:37<04:24, 1.37it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 16%|████████████████████▍ | 66/425 [00:38<02:27, 2.43it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 16%|████████████████████▊ | 67/425 [00:38<02:02, 2.91it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 16%|█████████████████████ | 68/425 [00:40<05:02, 1.18it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 16%|█████████████████████▍ | 69/425 [00:40<04:11, 1.41it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 16%|█████████████████████▋ | 70/425 [00:41<04:23, 1.35it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 17%|██████████████████████ | 71/425 [00:42<03:53, 1.52it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 17%|██████████████████████▎ | 72/425 [00:43<04:17, 1.37it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 17%|██████████████████████▋ | 73/425 [00:44<04:53, 1.20it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 18%|███████████████████████▎ | 75/425 [00:45<03:44, 1.56it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 18%|███████████████████████▌ | 76/425 [00:45<04:05, 1.42it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 18%|███████████████████████▉ | 77/425 [00:46<04:00, 1.45it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 18%|████████████████████████▏ | 78/425 [00:46<03:24, 1.70it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 19%|████████████████████████▌ | 79/425 [00:47<03:03, 1.88it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 19%|████████████████████████▊ | 80/425 [00:48<04:08, 1.39it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 19%|█████████████████████████▍ | 82/425 [00:48<02:52, 1.99it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 20%|█████████████████████████▊ | 83/425 [00:49<02:19, 2.45it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 20%|██████████████████████████ | 84/425 [00:49<02:46, 2.04it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 20%|██████████████████████████▋ | 86/425 [00:50<02:19, 2.43it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 21%|███████████████████████████▎ | 88/425 [00:51<02:45, 2.03it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 21%|███████████████████████████▋ | 89/425 [00:51<02:42, 2.07it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 21%|███████████████████████████▉ | 90/425 [00:52<02:47, 2.00it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 21%|████████████████████████████▎ | 91/425 [00:52<02:36, 2.13it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 22%|████████████████████████████▌ | 92/425 [00:52<02:09, 2.58it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 22%|████████████████████████████▉ | 93/425 [00:53<03:29, 1.58it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 22%|█████████████████████████████▏ | 94/425 [00:54<03:04, 1.79it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 22%|█████████████████████████████▌ | 95/425 [00:54<02:52, 1.91it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 23%|█████████████████████████████▊ | 96/425 [00:54<02:12, 2.48it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 23%|██████████████████████████████▏ | 97/425 [00:55<02:44, 1.99it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 23%|██████████████████████████████▍ | 98/425 [00:55<02:24, 2.26it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 23%|██████████████████████████████▋ | 99/425 [00:55<01:41, 3.20it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 24%|██████████████████████████████▊ | 100/425 [00:56<02:39, 2.03it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 24%|███████████████████████████████▏ | 101/425 [00:57<02:22, 2.27it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 24%|███████████████████████████████▋ | 103/425 [00:57<01:40, 3.19it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 25%|████████████████████████████████▎ | 105/425 [00:57<00:49, 6.46it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 25%|████████████████████████████████▎ | 105/425 [00:57<00:49, 6.46it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 25%|████████████████████████████████▉ | 107/425 [00:59<02:59, 1.77it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 25%|█████████████████████████████████▎ | 108/425 [00:59<02:48, 1.88it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 26%|█████████████████████████████████▌ | 109/425 [01:00<03:22, 1.56it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 26%|██████████████████████████████████▏ | 111/425 [01:02<03:40, 1.43it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 27%|██████████████████████████████████▊ | 113/425 [01:02<02:38, 1.97it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 27%|███████████████████████████████████▏ | 114/425 [01:02<01:45, 2.94it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 27%|███████████████████████████████████▏ | 114/425 [01:02<01:45, 2.94it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 27%|███████████████████████████████████▍ | 115/425 [01:02<01:38, 3.16it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 27%|███████████████████████████████████▊ | 116/425 [01:02<01:33, 3.30it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 28%|████████████████████████████████████ | 117/425 [01:03<01:38, 3.11it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 28%|████████████████████████████████████▎ | 118/425 [01:03<01:26, 3.56it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 28%|████████████████████████████████████▉ | 120/425 [01:04<02:49, 1.80it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 28%|█████████████████████████████████████▎ | 121/425 [01:05<03:31, 1.44it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 29%|█████████████████████████████████████▌ | 122/425 [01:06<03:07, 1.62it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 29%|█████████████████████████████████████▉ | 123/425 [01:06<02:46, 1.81it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 29%|██████████████████████████████████████▏ | 124/425 [01:06<02:12, 2.27it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 29%|██████████████████████████████████████▌ | 125/425 [01:06<01:45, 2.86it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 30%|███████████████████████████████████████▍ | 128/425 [01:07<01:17, 3.81it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 30%|███████████████████████████████████████▊ | 129/425 [01:07<01:36, 3.07it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 31%|████████████████████████████████████████▉ | 133/425 [01:08<00:50, 5.78it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 32%|█████████████████████████████████████████▎ | 134/425 [01:08<01:07, 4.29it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 32%|█████████████████████████████████████████▌ | 135/425 [01:09<01:22, 3.52it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 32%|█████████████████████████████████████████▉ | 136/425 [01:09<01:15, 3.84it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 32%|██████████████████████████████████████████▏ | 137/425 [01:09<01:33, 3.09it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 33%|██████████████████████████████████████████▊ | 139/425 [01:11<02:09, 2.20it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 33%|███████████████████████████████████████████▏ | 140/425 [01:11<01:56, 2.44it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 33%|███████████████████████████████████████████▍ | 141/425 [01:11<01:43, 2.75it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 33%|███████████████████████████████████████████▊ | 142/425 [01:13<03:30, 1.34it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 34%|████████████████████████████████████████████ | 143/425 [01:14<03:37, 1.29it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 34%|████████████████████████████████████████████▍ | 144/425 [01:14<03:20, 1.40it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 34%|█████████████████████████████████████████████ | 146/425 [01:19<06:39, 1.43s/it]INFO:notebook:Processing chunk\n", - "Processing grid elements: 35%|█████████████████████████████████████████████▎ | 147/425 [01:21<07:03, 1.52s/it]INFO:notebook:Processing chunk\n", - "Processing grid elements: 35%|█████████████████████████████████████████████▌ | 148/425 [01:22<06:19, 1.37s/it]INFO:notebook:Processing chunk\n", - "Processing grid elements: 35%|█████████████████████████████████████████████▉ | 149/425 [01:27<10:58, 2.39s/it]INFO:notebook:Processing chunk\n", - "Processing grid elements: 35%|██████████████████████████████████████████████▏ | 150/425 [01:28<08:46, 1.91s/it]INFO:notebook:Processing chunk\n", - "Processing grid elements: 36%|██████████████████████████████████████████████▌ | 151/425 [01:30<09:01, 1.97s/it]INFO:notebook:Processing chunk\n", - "Processing grid elements: 36%|██████████████████████████████████████████████▊ | 152/425 [01:32<08:56, 1.96s/it]INFO:notebook:Processing chunk\n", - "Processing grid elements: 36%|███████████████████████████████████████████████▏ | 153/425 [01:34<09:18, 2.05s/it]INFO:notebook:Processing chunk\n", - "Processing grid elements: 36%|███████████████████████████████████████████████▍ | 154/425 [01:36<08:56, 1.98s/it]INFO:notebook:Processing chunk\n", - "Processing grid elements: 36%|███████████████████████████████████████████████▊ | 155/425 [01:37<07:18, 1.62s/it]INFO:notebook:Processing chunk\n", - "Processing grid elements: 37%|████████████████████████████████████████████████ | 156/425 [01:37<05:47, 1.29s/it]INFO:notebook:Processing chunk\n", - "Processing grid elements: 37%|████████████████████████████████████████████████▍ | 157/425 [01:38<05:10, 1.16s/it]INFO:notebook:Processing chunk\n", - "Processing grid elements: 37%|█████████████████████████████████████████████████ | 159/425 [01:38<02:55, 1.52it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 38%|█████████████████████████████████████████████████▎ | 160/425 [01:38<02:26, 1.81it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 38%|█████████████████████████████████████████████████▋ | 161/425 [01:39<02:29, 1.77it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 38%|█████████████████████████████████████████████████▉ | 162/425 [01:39<01:55, 2.28it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 38%|██████████████████████████████████████████████████▏ | 163/425 [01:40<02:51, 1.52it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 39%|██████████████████████████████████████████████████▌ | 164/425 [01:41<02:34, 1.69it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 39%|██████████████████████████████████████████████████▊ | 165/425 [01:41<02:13, 1.95it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 39%|███████████████████████████████████████████████████▏ | 166/425 [01:41<02:09, 2.00it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 39%|███████████████████████████████████████████████████▍ | 167/425 [01:44<05:04, 1.18s/it]INFO:notebook:Processing chunk\n", - "Processing grid elements: 40%|███████████████████████████████████████████████████▊ | 168/425 [01:45<04:10, 1.02it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 40%|████████████████████████████████████████████████████ | 169/425 [01:46<05:04, 1.19s/it]INFO:notebook:Processing chunk\n", - "Processing grid elements: 40%|████████████████████████████████████████████████████▍ | 170/425 [01:52<10:47, 2.54s/it]INFO:notebook:Processing chunk\n", - "Processing grid elements: 40%|████████████████████████████████████████████████████▋ | 171/425 [01:53<08:12, 1.94s/it]INFO:notebook:Processing chunk\n", - "Processing grid elements: 40%|█████████████████████████████████████████████████████ | 172/425 [01:54<07:48, 1.85s/it]INFO:notebook:Processing chunk\n", - "Processing grid elements: 41%|█████████████████████████████████████████████████████▎ | 173/425 [01:57<09:01, 2.15s/it]INFO:notebook:Processing chunk\n", - "Processing grid elements: 41%|█████████████████████████████████████████████████████▋ | 174/425 [02:01<10:54, 2.61s/it]INFO:notebook:Processing chunk\n", - "Processing grid elements: 41%|█████████████████████████████████████████████████████▉ | 175/425 [02:04<10:57, 2.63s/it]INFO:notebook:Processing chunk\n", - "Processing grid elements: 41%|██████████████████████████████████████████████████████▏ | 176/425 [02:07<12:20, 2.97s/it]INFO:notebook:Processing chunk\n", - "Processing grid elements: 42%|██████████████████████████████████████████████████████▌ | 177/425 [02:11<13:48, 3.34s/it]INFO:notebook:Processing chunk\n", - "Processing grid elements: 42%|██████████████████████████████████████████████████████▊ | 178/425 [02:12<10:26, 2.53s/it]INFO:notebook:Processing chunk\n", - "Processing grid elements: 42%|███████████████████████████████████████████████████████▏ | 179/425 [02:16<11:29, 2.80s/it]INFO:notebook:Processing chunk\n", - "Processing grid elements: 42%|███████████████████████████████████████████████████████▍ | 180/425 [02:17<09:44, 2.38s/it]INFO:notebook:Processing chunk\n", - "Processing grid elements: 43%|███████████████████████████████████████████████████████▊ | 181/425 [02:21<11:25, 2.81s/it]INFO:notebook:Processing chunk\n", - "Processing grid elements: 43%|████████████████████████████████████████████████████████ | 182/425 [02:22<09:44, 2.40s/it]INFO:notebook:Processing chunk\n", - "Processing grid elements: 43%|████████████████████████████████████████████████████████▍ | 183/425 [02:22<06:55, 1.72s/it]INFO:notebook:Processing chunk\n", - "Processing grid elements: 43%|████████████████████████████████████████████████████████▋ | 184/425 [02:23<05:05, 1.27s/it]INFO:notebook:Processing chunk\n", - "Processing grid elements: 44%|█████████████████████████████████████████████████████████ | 185/425 [02:24<05:29, 1.37s/it]INFO:notebook:Processing chunk\n", - "Processing grid elements: 44%|█████████████████████████████████████████████████████████▎ | 186/425 [02:26<06:15, 1.57s/it]INFO:notebook:Processing chunk\n", - "Processing grid elements: 44%|█████████████████████████████████████████████████████████▋ | 187/425 [02:28<06:31, 1.64s/it]INFO:notebook:Processing chunk\n", - "Processing grid elements: 44%|█████████████████████████████████████████████████████████▉ | 188/425 [02:29<05:58, 1.51s/it]INFO:notebook:Processing chunk\n", - "Processing grid elements: 44%|██████████████████████████████████████████████████████████▎ | 189/425 [02:31<05:48, 1.48s/it]INFO:notebook:Processing chunk\n", - "Processing grid elements: 45%|██████████████████████████████████████████████████████████▌ | 190/425 [02:32<05:23, 1.37s/it]INFO:notebook:Processing chunk\n", - "Processing grid elements: 45%|██████████████████████████████████████████████████████████▊ | 191/425 [02:32<04:01, 1.03s/it]INFO:notebook:Processing chunk\n", - "Processing grid elements: 45%|███████████████████████████████████████████████████████████▏ | 192/425 [02:33<04:21, 1.12s/it]INFO:notebook:Processing chunk\n", - "Processing grid elements: 45%|███████████████████████████████████████████████████████████▍ | 193/425 [02:34<04:16, 1.10s/it]INFO:notebook:Processing chunk\n", - "Processing grid elements: 46%|███████████████████████████████████████████████████████████▊ | 194/425 [02:36<04:23, 1.14s/it]INFO:notebook:Processing chunk\n", - "Processing grid elements: 46%|████████████████████████████████████████████████████████████ | 195/425 [02:36<03:32, 1.08it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 46%|████████████████████████████████████████████████████████████▍ | 196/425 [02:36<02:49, 1.35it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 46%|████████████████████████████████████████████████████████████▋ | 197/425 [02:37<02:10, 1.74it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 47%|█████████████████████████████████████████████████████████████ | 198/425 [02:37<02:03, 1.84it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 47%|█████████████████████████████████████████████████████████████▎ | 199/425 [02:38<02:19, 1.62it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 47%|█████████████████████████████████████████████████████████████▉ | 201/425 [02:39<01:55, 1.94it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 48%|██████████████████████████████████████████████████████████████▌ | 203/425 [02:39<01:19, 2.80it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 48%|██████████████████████████████████████████████████████████████▉ | 204/425 [02:40<01:54, 1.92it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 48%|███████████████████████████████████████████████████████████████▏ | 205/425 [02:43<04:15, 1.16s/it]INFO:notebook:Processing chunk\n", - "Processing grid elements: 48%|███████████████████████████████████████████████████████████████▍ | 206/425 [02:44<04:00, 1.10s/it]INFO:notebook:Processing chunk\n", - "Processing grid elements: 49%|███████████████████████████████████████████████████████████████▊ | 207/425 [02:45<04:06, 1.13s/it]INFO:notebook:Processing chunk\n", - "Processing grid elements: 49%|████████████████████████████████████████████████████████████████ | 208/425 [02:46<03:31, 1.03it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 49%|████████████████████████████████████████████████████████████████▍ | 209/425 [02:46<02:38, 1.36it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 49%|████████████████████████████████████████████████████████████████▋ | 210/425 [02:47<02:37, 1.36it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 50%|█████████████████████████████████████████████████████████████████ | 211/425 [02:47<02:25, 1.47it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 50%|█████████████████████████████████████████████████████████████████▎ | 212/425 [02:48<02:14, 1.58it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 50%|█████████████████████████████████████████████████████████████████▋ | 213/425 [02:48<02:04, 1.70it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 50%|█████████████████████████████████████████████████████████████████▉ | 214/425 [02:49<01:47, 1.97it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 51%|██████████████████████████████████████████████████████████████████▎ | 215/425 [02:49<01:28, 2.37it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 51%|██████████████████████████████████████████████████████████████████▌ | 216/425 [02:49<01:37, 2.14it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 51%|██████████████████████████████████████████████████████████████████▉ | 217/425 [02:52<04:17, 1.24s/it]INFO:notebook:Processing chunk\n", - "Processing grid elements: 51%|███████████████████████████████████████████████████████████████████▏ | 218/425 [02:53<03:27, 1.00s/it]INFO:notebook:Processing chunk\n", - "Processing grid elements: 52%|███████████████████████████████████████████████████████████████████▌ | 219/425 [02:53<03:04, 1.12it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 52%|████████████████████████████████████████████████████████████████████ | 221/425 [02:54<01:46, 1.91it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 52%|████████████████████████████████████████████████████████████████████▍ | 222/425 [02:54<01:34, 2.16it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 53%|█████████████████████████████████████████████████████████████████████ | 224/425 [02:56<02:04, 1.62it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 53%|█████████████████████████████████████████████████████████████████████▎ | 225/425 [02:56<02:03, 1.62it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 53%|█████████████████████████████████████████████████████████████████████▋ | 226/425 [02:57<02:15, 1.47it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 53%|█████████████████████████████████████████████████████████████████████▉ | 227/425 [02:57<01:59, 1.66it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 54%|██████████████████████████████████████████████████████████████████████▌ | 229/425 [02:58<01:30, 2.17it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 54%|██████████████████████████████████████████████████████████████████████▉ | 230/425 [02:59<01:57, 1.66it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 54%|███████████████████████████████████████████████████████████████████████▏ | 231/425 [03:00<01:56, 1.67it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 55%|███████████████████████████████████████████████████████████████████████▌ | 232/425 [03:00<01:32, 2.09it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 55%|███████████████████████████████████████████████████████████████████████▊ | 233/425 [03:00<01:17, 2.47it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 55%|████████████████████████████████████████████████████████████████████████▏ | 234/425 [03:00<01:04, 2.95it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 55%|████████████████████████████████████████████████████████████████████████▍ | 235/425 [03:00<01:00, 3.14it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 56%|████████████████████████████████████████████████████████████████████████▋ | 236/425 [03:01<00:59, 3.16it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 56%|█████████████████████████████████████████████████████████████████████████ | 237/425 [03:01<01:16, 2.46it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 56%|█████████████████████████████████████████████████████████████████████████▎ | 238/425 [03:02<01:04, 2.88it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 56%|█████████████████████████████████████████████████████████████████████████▋ | 239/425 [03:02<01:05, 2.84it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 56%|█████████████████████████████████████████████████████████████████████████▉ | 240/425 [03:03<01:31, 2.02it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 57%|██████████████████████████████████████████████████████████████████████████▎ | 241/425 [03:03<01:28, 2.09it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 57%|██████████████████████████████████████████████████████████████████████████▌ | 242/425 [03:04<02:05, 1.46it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 57%|██████████████████████████████████████████████████████████████████████████▉ | 243/425 [03:05<01:38, 1.84it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 58%|███████████████████████████████████████████████████████████████████████████▌ | 245/425 [03:05<01:17, 2.33it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 58%|███████████████████████████████████████████████████████████████████████████▊ | 246/425 [03:06<01:26, 2.06it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 58%|████████████████████████████████████████████████████████████████████████████▏ | 247/425 [03:06<01:11, 2.49it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 58%|████████████████████████████████████████████████████████████████████████████▍ | 248/425 [03:06<01:02, 2.82it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 59%|████████████████████████████████████████████████████████████████████████████▊ | 249/425 [03:06<00:50, 3.49it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 59%|█████████████████████████████████████████████████████████████████████████████ | 250/425 [03:07<01:08, 2.55it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 59%|█████████████████████████████████████████████████████████████████████████████▎ | 251/425 [03:07<01:05, 2.66it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 59%|█████████████████████████████████████████████████████████████████████████████▋ | 252/425 [03:08<00:56, 3.09it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 60%|█████████████████████████████████████████████████████████████████████████████▉ | 253/425 [03:08<01:08, 2.51it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 60%|██████████████████████████████████████████████████████████████████████████████▎ | 254/425 [03:08<00:57, 2.99it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 60%|██████████████████████████████████████████████████████████████████████████████▌ | 255/425 [03:09<01:09, 2.46it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 60%|██████████████████████████████████████████████████████████████████████████████▉ | 256/425 [03:09<01:07, 2.50it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 60%|███████████████████████████████████████████████████████████████████████████████▏ | 257/425 [03:09<00:54, 3.11it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 61%|███████████████████████████████████████████████████████████████████████████████▌ | 258/425 [03:10<00:46, 3.59it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 61%|████████████████████████████████████████████████████████████████████████████████▏ | 260/425 [03:10<00:34, 4.81it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 61%|████████████████████████████████████████████████████████████████████████████████▍ | 261/425 [03:10<00:38, 4.27it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 62%|█████████████████████████████████████████████████████████████████████████████████ | 263/425 [03:11<00:52, 3.08it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 62%|█████████████████████████████████████████████████████████████████████████████████▎ | 264/425 [03:11<00:51, 3.11it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 62%|█████████████████████████████████████████████████████████████████████████████████▋ | 265/425 [03:11<00:45, 3.51it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 63%|█████████████████████████████████████████████████████████████████████████████████▉ | 266/425 [03:12<00:41, 3.86it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 63%|██████████████████████████████████████████████████████████████████████████████████▎ | 267/425 [03:13<01:18, 2.01it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 63%|██████████████████████████████████████████████████████████████████████████████████▌ | 268/425 [03:13<01:26, 1.81it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 63%|██████████████████████████████████████████████████████████████████████████████████▉ | 269/425 [03:14<01:31, 1.70it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 64%|███████████████████████████████████████████████████████████████████████████████████▏ | 270/425 [03:14<01:17, 1.99it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 64%|███████████████████████████████████████████████████████████████████████████████████▌ | 271/425 [03:14<01:10, 2.18it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 64%|███████████████████████████████████████████████████████████████████████████████████▊ | 272/425 [03:15<01:30, 1.70it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 64%|████████████████████████████████████████████████████████████████████████████████████▍ | 274/425 [03:16<01:09, 2.17it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 65%|████████████████████████████████████████████████████████████████████████████████████▊ | 275/425 [03:16<01:08, 2.20it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 65%|█████████████████████████████████████████████████████████████████████████████████████ | 276/425 [03:17<01:15, 1.97it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 65%|█████████████████████████████████████████████████████████████████████████████████████▋ | 278/425 [03:17<01:01, 2.39it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 66%|█████████████████████████████████████████████████████████████████████████████████████▉ | 279/425 [03:18<00:55, 2.62it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 66%|██████████████████████████████████████████████████████████████████████████████████████▎ | 280/425 [03:18<00:54, 2.67it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 66%|██████████████████████████████████████████████████████████████████████████████████████▌ | 281/425 [03:18<00:51, 2.80it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 66%|██████████████████████████████████████████████████████████████████████████████████████▉ | 282/425 [03:19<00:46, 3.04it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 67%|███████████████████████████████████████████████████████████████████████████████████████▏ | 283/425 [03:19<01:01, 2.33it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 67%|███████████████████████████████████████████████████████████████████████████████████████▌ | 284/425 [03:20<00:55, 2.52it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 67%|████████████████████████████████████████████████████████████████████████████████████████▏ | 286/425 [03:20<00:31, 4.35it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 68%|████████████████████████████████████████████████████████████████████████████████████████▍ | 287/425 [03:21<01:06, 2.08it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 68%|█████████████████████████████████████████████████████████████████████████████████████████ | 289/425 [03:21<01:02, 2.19it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 68%|█████████████████████████████████████████████████████████████████████████████████████████▍ | 290/425 [03:21<00:53, 2.53it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 68%|█████████████████████████████████████████████████████████████████████████████████████████▋ | 291/425 [03:22<01:10, 1.90it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 69%|██████████████████████████████████████████████████████████████████████████████████████████ | 292/425 [03:23<01:02, 2.13it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 69%|██████████████████████████████████████████████████████████████████████████████████████████▌ | 294/425 [03:23<00:34, 3.80it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 69%|██████████████████████████████████████████████████████████████████████████████████████████▉ | 295/425 [03:23<00:31, 4.07it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 70%|███████████████████████████████████████████████████████████████████████████████████████████▏ | 296/425 [03:24<00:46, 2.78it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 70%|███████████████████████████████████████████████████████████████████████████████████████████▌ | 297/425 [03:24<00:45, 2.81it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 70%|███████████████████████████████████████████████████████████████████████████████████████████▊ | 298/425 [03:24<00:44, 2.82it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 70%|████████████████████████████████████████████████████████████████████████████████████████████▏ | 299/425 [03:26<01:33, 1.34it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 71%|████████████████████████████████████████████████████████████████████████████████████████████▍ | 300/425 [03:26<01:16, 1.64it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 71%|████████████████████████████████████████████████████████████████████████████████████████████▊ | 301/425 [03:26<00:50, 2.45it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 71%|█████████████████████████████████████████████████████████████████████████████████████████████ | 302/425 [03:27<00:48, 2.53it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 71%|█████████████████████████████████████████████████████████████████████████████████████████████▍ | 303/425 [03:27<00:42, 2.87it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 72%|█████████████████████████████████████████████████████████████████████████████████████████████▋ | 304/425 [03:27<00:41, 2.91it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 72%|██████████████████████████████████████████████████████████████████████████████████████████████ | 305/425 [03:28<00:46, 2.60it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 72%|██████████████████████████████████████████████████████████████████████████████████████████████▎ | 306/425 [03:28<00:54, 2.20it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 72%|██████████████████████████████████████████████████████████████████████████████████████████████▋ | 307/425 [03:29<00:44, 2.64it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 72%|██████████████████████████████████████████████████████████████████████████████████████████████▉ | 308/425 [03:29<00:46, 2.53it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 73%|███████████████████████████████████████████████████████████████████████████████████████████████▏ | 309/425 [03:30<01:14, 1.56it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 73%|███████████████████████████████████████████████████████████████████████████████████████████████▌ | 310/425 [03:31<01:09, 1.66it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 73%|████████████████████████████████████████████████████████████████████████████████████████████████▏ | 312/425 [03:31<00:36, 3.12it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 74%|████████████████████████████████████████████████████████████████████████████████████████████████▊ | 314/425 [03:32<00:40, 2.76it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 74%|█████████████████████████████████████████████████████████████████████████████████████████████████▍ | 316/425 [03:32<00:27, 3.97it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 75%|██████████████████████████████████████████████████████████████████████████████████████████████████ | 318/425 [03:32<00:25, 4.15it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 75%|██████████████████████████████████████████████████████████████████████████████████████████████████▎ | 319/425 [03:33<00:26, 4.06it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 75%|██████████████████████████████████████████████████████████████████████████████████████████████████▋ | 320/425 [03:33<00:22, 4.77it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 76%|██████████████████████████████████████████████████████████████████████████████████████████████████▉ | 321/425 [03:33<00:25, 4.07it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 76%|███████████████████████████████████████████████████████████████████████████████████████████████████▎ | 322/425 [03:34<00:41, 2.45it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 76%|███████████████████████████████████████████████████████████████████████████████████████████████████▌ | 323/425 [03:34<00:44, 2.27it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 77%|████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 326/425 [03:35<00:38, 2.58it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 77%|████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 327/425 [03:36<00:40, 2.42it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 77%|█████████████████████████████████████████████████████████████████████████████████████████████████████ | 328/425 [03:36<00:36, 2.68it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 77%|█████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 329/425 [03:36<00:30, 3.11it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 78%|█████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 330/425 [03:37<00:46, 2.02it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 78%|██████████████████████████████████████████████████████████████████████████████████████████████████████ | 331/425 [03:38<00:44, 2.12it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 78%|██████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 332/425 [03:38<00:40, 2.29it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 78%|██████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 333/425 [03:38<00:34, 2.65it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 79%|██████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 334/425 [03:39<00:31, 2.85it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 79%|███████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 336/425 [03:39<00:35, 2.48it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 79%|███████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 337/425 [03:40<00:31, 2.76it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 80%|████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 338/425 [03:40<00:41, 2.11it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 80%|████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 339/425 [03:41<00:34, 2.47it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 80%|████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 340/425 [03:41<00:33, 2.52it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 80%|█████████████████████████████████████████████████████████████████████████████████████████████████████████ | 341/425 [03:42<00:43, 1.94it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 80%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 342/425 [03:43<00:50, 1.64it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 81%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 343/425 [03:43<00:44, 1.86it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 81%|██████████████████████████████████████████████████████████████████████████████████████████████████████████ | 344/425 [03:44<00:51, 1.58it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 81%|██████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 345/425 [03:44<00:51, 1.56it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 82%|███████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 348/425 [03:45<00:14, 5.38it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 82%|███████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 349/425 [03:45<00:14, 5.38it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 83%|████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 351/425 [03:45<00:17, 4.19it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 83%|████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 353/425 [03:46<00:18, 3.90it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 83%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 354/425 [03:46<00:20, 3.45it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 84%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 356/425 [03:47<00:21, 3.28it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 84%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 357/425 [03:47<00:27, 2.47it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 84%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 358/425 [03:48<00:30, 2.19it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 84%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 359/425 [03:48<00:27, 2.38it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 85%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 360/425 [03:49<00:31, 2.09it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 85%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 362/425 [03:49<00:17, 3.55it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 85%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 363/425 [03:50<00:23, 2.63it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 365/425 [03:50<00:15, 3.84it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 366/425 [03:51<00:17, 3.34it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 86%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 367/425 [03:51<00:16, 3.52it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 87%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 369/425 [03:51<00:15, 3.59it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 87%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 370/425 [03:51<00:13, 4.07it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 87%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 371/425 [03:52<00:20, 2.66it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 88%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 372/425 [03:52<00:22, 2.32it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 88%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 373/425 [03:53<00:25, 2.07it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 374/425 [03:54<00:27, 1.85it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 376/425 [03:54<00:15, 3.25it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 89%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 379/425 [03:56<00:26, 1.73it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 89%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 380/425 [03:56<00:24, 1.86it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 90%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 381/425 [03:56<00:22, 1.97it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 90%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 382/425 [03:56<00:18, 2.37it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 90%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 383/425 [03:57<00:18, 2.29it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 91%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 385/425 [03:57<00:13, 2.98it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 91%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 386/425 [03:58<00:15, 2.44it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 91%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 387/425 [03:59<00:19, 1.99it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 91%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 388/425 [03:59<00:14, 2.48it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 92%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 389/425 [03:59<00:11, 3.13it/s]INFO:notebook:Processing chunk\n", - "INFO:notebook:Processing chunk\n", - "Processing grid elements: 92%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 391/425 [03:59<00:07, 4.48it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 92%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 392/425 [03:59<00:06, 5.04it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 92%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 393/425 [03:59<00:06, 5.08it/s]INFO:notebook:Processing chunk\n", - "Processing grid elements: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 425/425 [19:35<00:00, 208.44s/it]" + "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 14/14 [03:23<00:00, 4.58s/it]" ] } ], @@ -1289,7 +878,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 24, "metadata": {}, "outputs": [ { @@ -1314,40 +903,69 @@ " \n", " \n", " year\n", - " PA_DEF\n", " iso_3\n", " area\n", - " protectedAreasCount\n", + " protected_areas_count\n", + " oecms\n", + " pas\n", + " total_marine_area\n", + " protected_area\n", + " coverage\n", + " global_contribution\n", + " is_last_year\n", + " environment\n", " \n", " \n", " \n", " \n", " 0\n", " 2010\n", - " 0\n", - " AF\n", - " 206.100207\n", - " 10.0\n", + " ABNJ\n", + " 996236.125498\n", + " 29.0\n", + " 0.00000\n", + " 100.00000\n", + " 212881389.0\n", + " 996236.13\n", + " 0.467977\n", + " 0.275966\n", + " False\n", + " marine\n", " \n", " \n", " 1\n", " 2010\n", - " 0\n", - " AS\n", - " 31956.310701\n", - " 24.0\n", + " AF\n", + " 129790.939474\n", + " 427.0\n", + " 2.34192\n", + " 97.65808\n", + " 14878058.0\n", + " 129790.94\n", + " 0.872365\n", + " 0.035953\n", + " False\n", + " marine\n", " \n", " \n", "\n", "" ], "text/plain": [ - " year PA_DEF iso_3 area protectedAreasCount\n", - "0 2010 0 AF 206.100207 10.0\n", - "1 2010 0 AS 31956.310701 24.0" + " year iso_3 area protected_areas_count oecms pas \\\n", + "0 2010 ABNJ 996236.125498 29.0 0.00000 100.00000 \n", + "1 2010 AF 129790.939474 427.0 2.34192 97.65808 \n", + "\n", + " total_marine_area protected_area coverage global_contribution \\\n", + "0 212881389.0 996236.13 0.467977 0.275966 \n", + "1 14878058.0 129790.94 0.872365 0.035953 \n", + "\n", + " is_last_year environment \n", + "0 False marine \n", + "1 False marine " ] }, - "execution_count": 16, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } @@ -1373,67 +991,31 @@ " }\n", " )\n", " .pipe(calculate_stats_cov, [\"year\", \"PA_DEF\"], \"iso_3\").astype({\"PA_DEF\": int})\n", - "# .pipe(add_mpa_oecm_percentages)\n", - "# .pipe(add_total_marine_area)\n", - "# .pipe(coverage_stats2)\n", - "# .pipe(calculate_coverage_percentage_pa)\n", - "# .pipe(calculate_global_contribution)\n", - "# .pipe(add_is_last_year)\n", - "# .pipe(add_environment)\n", - "# )\n", + " .pipe(add_mpa_oecm_percentages)\n", + " .pipe(add_total_marine_area)\n", + " .pipe(coverage_stats2)\n", + " .pipe(calculate_coverage_percentage_pa)\n", + " .pipe(calculate_global_contribution)\n", + " .pipe(add_is_last_year)\n", + " .pipe(add_environment)\n", + ")\n", "\n", "\n", - "# NewProtectedAreaExtentSchema(\n", - "# coverage.pipe(\n", - "# output,\n", - "# \"iso_3\",\n", - "# {},\n", - "# {},\n", - "# [\"area\", \"iso_3\", 'total_marine_area'],\n", - "# )\n", - "# ).to_csv(\n", - "# output_file,\n", - "# index=True,\n", + "NewProtectedAreaExtentSchema(\n", + " coverage.pipe(\n", + " output,\n", + " \"iso_3\",\n", + " {},\n", + " {},\n", + " [\"area\", \"iso_3\", 'total_marine_area'],\n", + " )\n", + ").to_csv(\n", + " output_file,\n", + " index=True,\n", ")\n", "coverage.head(2)" ] }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array(['AF', 'AS', 'EU', 'SA', 'NA', 'WA', 'COL', 'ESP', 'GLOB', 'MAR',\n", - " 'PHL', 'ABNJ', 'AGO', 'ALB', 'ARE', 'ARG', 'ATG', 'AUS', 'AZE',\n", - " 'BEL', 'BGD', 'BGR', 'BHS', 'BLZ', 'BRA', 'BRB', 'BRN', 'CAN',\n", - " 'CHL', 'CHN', 'CMR', 'COD', 'COG', 'COM', 'CPV', 'CRI', 'CUB',\n", - " 'CYP', 'DEU', 'DMA', 'DNK', 'DOM', 'DZA', 'ECU', 'EGY', 'EST',\n", - " 'FIN', 'FJI', 'FRA', 'GAB', 'GBR', 'GEO', 'GIN', 'GMB', 'GNB',\n", - " 'GNQ', 'GRC', 'GRD', 'GTM', 'HND', 'HRV', 'HTI', 'IDN', 'IRL',\n", - " 'IRN', 'ISL', 'ISR', 'ITA', 'JAM', 'JPN', 'KAZ', 'KEN', 'KHM',\n", - " 'KIR', 'KNA', 'KOR', 'KWT', 'LBN', 'LBR', 'LCA', 'LKA', 'LTU',\n", - " 'LVA', 'MCO', 'MDG', 'MDV', 'MEX', 'MHL', 'MLT', 'MMR', 'MNE',\n", - " 'MOZ', 'MRT', 'MUS', 'MYS', 'NAM', 'NGA', 'NIC', 'NLD', 'NOR',\n", - " 'NZL', 'OMN', 'PAK', 'PAN', 'PER', 'PLW', 'PNG', 'POL', 'PRT',\n", - " 'QAT', 'ROU', 'RUS', 'SAU', 'SDN', 'SEN', 'SLB', 'SLE', 'SLV',\n", - " 'STP', 'SUR', 'SVN', 'SWE', 'SYC', 'THA', 'TKM', 'TLS', 'TON',\n", - " 'TTO', 'TUN', 'TUR', 'TUV', 'TZA', 'UKR', 'URY', 'USA', 'VCT',\n", - " 'VEN', 'VNM', 'VUT', 'WSM', 'YEM', 'ZAF', 'BHR', 'JOR'],\n", - " dtype=object)" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "coverage.iso_3.unique()" - ] - }, { "cell_type": "code", "execution_count": 72, @@ -1503,100 +1085,79 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Mpa atlas - country stats Fully or highly protected" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We are going to use the intermediate data from eez, in order to create a dataset that can be used as a land mask.\n", - "The steps are:\n", - "1. Load eez\n", - "2. Spatial inner Join the eez dataset with the Mpaatlas one\n", - "3. iso assign using the sovereign one provided by mpaatlas\n", - "4. dissolve by location\n", - "5. calculate the area for global regions and eez countries ussing mollwide projection\n", - "6. prepare the data to be ingested in strapi\n", - "7. upload the data to strapi" + "### Coverage stats - terrestrial" ] }, { "cell_type": "code", - "execution_count": 73, + "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "/home/sofia/dev/skytruth-30x30/data/data/eez/processed/eez_preprocess.zip\n", - "/home/sofia/dev/skytruth-30x30/data/data/eez/processed/preprocess\n", - "/home/sofia/dev/skytruth-30x30/data/data/mpaatlas/processed/mpaatlas_preprocess.zip\n", - "/home/sofia/dev/skytruth-30x30/data/data/mpaatlas/processed/preprocess\n" + "/home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/mpa-terrestrial_preprocess.zip\n", + "/home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/preprocess\n", + "/home/sofia/dev/skytruth-30x30/data/data/gadm/processed/gadm_preprocess.zip\n", + "/home/sofia/dev/skytruth-30x30/data/data/gadm/processed/preprocess\n" ] + }, + { + "data": { + "text/plain": [ + "PosixPath('/home/sofia/dev/skytruth-30x30/data/data/gadm/processed/preprocess')" + ] + }, + "metadata": {}, + "output_type": "display_data" } ], "source": [ - "pipe = \"mpaatlas\"\n", - "strapi_collection = \"mpaa-protection-level-stat\"\n", + "pipe = \"mpa-terrestrial\"\n", + "step = \"preprocess\"\n", + "strapi_collection_mpas = \"mpa-terrestrial\"\n", "\n", - "pipe_dir_eez = FileConventionHandler(\"eez\")\n", - "pipe_dir_mpaatlas = FileConventionHandler(pipe)\n", - "output_file = pipe_dir_mpaatlas.get_processed_step_path(current_step).joinpath(\n", - " \"mpaatlas_protection_level.csv\"\n", - ")\n", + "pipe_dir = FileConventionHandler(pipe)\n", + "pipe_dir_gadm = FileConventionHandler(\"gadm\")\n", "\n", - "# Download the EEZ file && unzip it\n", - "download_and_unzip_if_needed(pipe_dir_eez, prev_step, mysettings)\n", - "# Download the mpas file && unzip it\n", - "download_and_unzip_if_needed(pipe_dir_mpaatlas, prev_step, mysettings)\n", + "working_folder = FileConventionHandler(pipe)\n", + "input_path = working_folder.pipe_raw_path\n", + "temp_working_path = working_folder.get_temp_file_path(step)\n", + "output_file_sjoin = pipe_dir.get_processed_step_path(current_step).joinpath(\"tpa_sjoin.shp\")\n", + "output_file_dissolve = pipe_dir.get_processed_step_path(current_step).joinpath(\"tpa_dissolve.csv\")\n", + "output_file_tpas = pipe_dir.get_processed_step_path(current_step).joinpath(\"tpa_detail.csv\")\n", "\n", - "# Load the data\n", - "eez = gpd.read_file(pipe_dir_eez.get_step_fmt_file_path(prev_step, \"shp\")).pipe(clean_geometries)\n", - "mpaatlas_intermediate = gpd.read_file(\n", - " pipe_dir_mpaatlas.get_step_fmt_file_path(prev_step, \"shp\")\n", - ").pipe(clean_geometries)" + "# Download the protected atlas file && unzip it\n", + "download_and_unzip_if_needed(pipe_dir, prev_step, mysettings)\n", + "# Download the mpaatlas file \n", + "download_and_unzip_if_needed(pipe_dir_gadm, prev_step, mysettings)" ] }, { "cell_type": "code", - "execution_count": 74, + "execution_count": null, "metadata": {}, "outputs": [ { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 282/282 [00:29<00:00, 9.59it/s]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 282/282 [00:29<00:00, 2.95s/it]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" } ], "source": [ - "eez_mpaatlas_data_join = await spatial_join(\n", - " eez, mpaatlas_intermediate.pipe(mpaatlas_filter_stablishment)\n", - ")" + "# # Load the data\n", + "# wdpa = gpd.read_file(pipe_dir.get_step_fmt_file_path(prev_step, \"gpkg\")).pipe(\n", + "# clean_geometries\n", + "# )\n", + "# gadm = gpd.read_file(pipe_dir_gadm.get_step_fmt_file_path(prev_step, \"shp\")).pipe(clean_geometries)\n", + "\n", + "# gadm.sindex\n", + "# wdpa.sindex" ] }, { @@ -1605,113 +1166,83 @@ "metadata": {}, "outputs": [], "source": [ - "# To get an idea of the spatial join results\n", - "# eez_mpaatlas_data_join.to_file(\n", - "# pipe_dir_mpaatlas.get_processed_step_path(current_step).joinpath(\"mpaatlas_sjoin.shp\"),\n", - "# driver=\"ESRI Shapefile\",\n", - "# )" + "# # Spatial join using overlay\n", + "# wdpa_subset = wdpa[\n", + "# ~(\n", + "# (wdpa.bounds.minx < -181)\n", + "# | (wdpa.bounds.miny < -91)\n", + "# | (wdpa.bounds.maxx > 181)\n", + "# | (wdpa.bounds.maxy > 91)\n", + "# )\n", + "# ].reset_index(drop=True)\n", + "\n", + "# sjoin_gdf = await spatial_join(wdpa_subset, gadm)\n", + "# sjoin_gdf.rename(columns={\"GID_0\": \"iso_3\"}, inplace=True)" ] }, { "cell_type": "code", - "execution_count": 75, + "execution_count": null, "metadata": {}, "outputs": [ { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:pyogrio._io:Created 54 records\n" - ] - } - ], - "source": [ - "eez_mpaatlas_data_join.dissolve(by=[\"protecti_1\", \"location_i\"], aggfunc={\"name\": \"count\"}).reset_index().to_file(\n", - "pipe_dir_mpaatlas.get_processed_step_path(current_step).joinpath(\"mpaatlas_sjoin_dissolved.shp\"),\n", - "driver=\"ESRI Shapefile\",\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 79, - "metadata": {}, - "outputs": [], - "source": [ - "result = (\n", - " eez_mpaatlas_data_join.rename(columns={\"location_i\": \"iso_3\"})\n", - " .pipe(process_mpaatlas_data) \n", - " .pipe(calculate_global_area, gby_col=[\"protecti_1\"], iso_column=\"iso_3\")\n", - " .pipe(separate_parent_iso)\n", - " .replace(\n", - " {\n", - " \"location_i\": {\n", - " \"COK\": \"NZL\",\n", - " \"IOT\": \"GBR\",\n", - " \"NIU\": \"NZL\",\n", - " \"SHN\": \"GBR\",\n", - " \"SJM\": \"NOR\",\n", - " \"UMI\": \"USA\",\n", - " \"NCL\": \"FRA\",\n", - " }\n", - " }\n", - " )\n", - " .pipe(add_region_iso, iso_column=\"iso_3\")\n", - " .pipe(calculate_stats, gby_col=[\"protecti_1\"], iso_column=\"iso_3\")\n", - " .query('protecti_1 != \"less protected or unknown\"')\n", - " .pipe(fix_monaco, iso_column=\"iso_3\", area_column=\"area_km2\")\n", - " .pipe(add_total_marine_area)\n", - " .pipe(calculate_coverage_percentage_mpatlas)\n", - " .pipe(\n", - " output,\n", - " iso_column=\"iso_3\",\n", - " rep_d={\n", - " \"protecti_1\": {\n", - " \"fully or highly protected\": 1,\n", - " }\n", - " },\n", - " rename={\"protecti_1\": \"mpaa_protection_level\", \"area_km2\": \"area\"},\n", - " drop_cols=[\"total_marine_area\", \"iso_3\"],\n", - " )\n", - ")\n", - "\n", - "NewProtectionLevelSchema(result[~result.location.isna()].assign(year=2024)).to_csv(\n", - " output_file, index=True\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 82, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "DEBUG:google.auth.transport.requests:Making request: POST https://oauth2.googleapis.com/token\n" - ] + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
WDPAIDWDPA_PIDPA_DEFNAMEDESIG_ENGIUCN_CATMARINEGIS_AREASTATUSSTATUS_YRPARENT_ISOCOUNTRYiso_3area_km2geometry
\n", + "
" + ], + "text/plain": [ + "Empty GeoDataFrame\n", + "Columns: [WDPAID, WDPA_PID, PA_DEF, NAME, DESIG_ENG, IUCN_CAT, MARINE, GIS_AREA, STATUS, STATUS_YR, PARENT_ISO, COUNTRY, iso_3, area_km2, geometry]\n", + "Index: []" + ] + }, + "metadata": {}, + "output_type": "display_data" } ], "source": [ - "remote_path = 'vizzuality_processed_data/strapi_tables/mpaatlas_protection_level.csv'\n", - "\n", - "writeReadGCP(\n", - " credentials=mysettings.GCS_KEYFILE_JSON,\n", - " bucket_name=mysettings.GCS_BUCKET,\n", - " blob_name=remote_path,\n", - " file=output_file,\n", - " operation=\"w\",\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# strapi_collection = \"mpaa-protection-level-stat\"" + "# # test that we have not produce duplicates\n", + "# sjoin_gdf.loc[sjoin_gdf.duplicated(subset=[\"WDPA_PID\", \"iso_3\"], keep=False)].sort_values(\n", + "# \"WDPA_PID\"\n", + "# )" ] }, { @@ -1722,7 +1253,7 @@ { "data": { "text/plain": [ - "" + "289352" ] }, "metadata": {}, @@ -1730,65 +1261,41 @@ } ], "source": [ - "# strapi.deleteCollectionData(strapi_collection, list(range(1, 300)))" + "# sjoin_gdf = filter_by_exluding_propossed_mpas(sjoin_gdf)\n", + "# len(sjoin_gdf)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], - "source": [ - "# strapi.importCollectionData(\n", - "# strapi_collection,\n", - "# output_file,\n", - "# )" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Protected seas - fishing protection level" - ] - }, - { - "cell_type": "code", - "execution_count": 83, - "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "DEBUG:google.auth.transport.requests:Making request: POST https://oauth2.googleapis.com/token\n" + "INFO:pyogrio._io:Created 289,352 records\n" ] } ], "source": [ - "pipe = \"protectedseas\"\n", - "strapi_collection = \"fishing-protection-level-stat\"\n", - "\n", - "pipe_dir = FileConventionHandler(pipe)\n", - "input_file = pipe_dir.get_processed_step_path(prev_step).joinpath(\"protectedseas_stats.xlsx\")\n", - "output_file = pipe_dir.get_processed_step_path(current_step).joinpath(\"lfp.csv\")\n", - "\n", - "# Download the protected seas file && unzip it\n", - "writeReadGCP(\n", - " credentials=mysettings.GCS_KEYFILE_JSON,\n", - " bucket_name=mysettings.GCS_BUCKET,\n", - " blob_name=\"vizzuality_processed_data/protectedseas/preprocess/protectedseas_stats.xlsx\",\n", - " file=input_file,\n", - " operation=\"r\",\n", - ")\n", - "\n", - "# Load the data\n", - "protectedseas_intermediate = pd.read_excel(input_file)" + "# # Save the spatial join\n", + "# sjoin_gdf.to_file(output_file_sjoin, driver=\"ESRI Shapefile\")" ] }, { "cell_type": "code", - "execution_count": 84, + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sjoin_gdf = gpd.read_file(output_file_sjoin)\n", + "sjoin_gdf[\"STATUS_YR\"] = sjoin_gdf[\"STATUS_YR\"].astype(\"Int64\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, "metadata": {}, "outputs": [ { @@ -1811,196 +1318,153 @@ "\n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", "
iso_teriso_sovincludes_multi_jurisdictional_areaslfparea_sqkmtotal_areapct_totalPA_DEFiso_3year10protected_areas_count
320NaNESPTrue5142.9730101011023.7760.0141410AFG2010100.010.0
321NaNESPTrue41639.6820761011023.7760.1621801AFG2011100.010.0
322NaNESPTrue3214532.8498001011023.77621.2193672AFG2012100.010.0
323NaNESPTrue215064.1327701011023.7761.4899883AFG2013100.010.0
324NaNESPTrue1779644.1388001011023.77677.1143234AFG2014100.010.0
..................
2884ZWE20202290.0229.0
2885ZWE20212290.0229.0
2886ZWE20222290.0229.0
2887ZWE20232290.0229.0
2888ZWE20242290.0229.0
\n", + "

2889 rows × 5 columns

\n", "" ], "text/plain": [ - " iso_ter iso_sov includes_multi_jurisdictional_areas lfp area_sqkm \\\n", - "320 NaN ESP True 5 142.973010 \n", - "321 NaN ESP True 4 1639.682076 \n", - "322 NaN ESP True 3 214532.849800 \n", - "323 NaN ESP True 2 15064.132770 \n", - "324 NaN ESP True 1 779644.138800 \n", + "PA_DEF iso_3 year 1 0 protected_areas_count\n", + "0 AFG 2010 10 0.0 10.0\n", + "1 AFG 2011 10 0.0 10.0\n", + "2 AFG 2012 10 0.0 10.0\n", + "3 AFG 2013 10 0.0 10.0\n", + "4 AFG 2014 10 0.0 10.0\n", + "... ... ... ... ... ...\n", + "2884 ZWE 2020 229 0.0 229.0\n", + "2885 ZWE 2021 229 0.0 229.0\n", + "2886 ZWE 2022 229 0.0 229.0\n", + "2887 ZWE 2023 229 0.0 229.0\n", + "2888 ZWE 2024 229 0.0 229.0\n", "\n", - " total_area pct_total \n", - "320 1011023.776 0.014141 \n", - "321 1011023.776 0.162180 \n", - "322 1011023.776 21.219367 \n", - "323 1011023.776 1.489988 \n", - "324 1011023.776 77.114323 " + "[2889 rows x 5 columns]" ] }, - "execution_count": 84, "metadata": {}, - "output_type": "execute_result" + "output_type": "display_data" } ], "source": [ - "protectedseas_intermediate[\n", - " (\n", - " protectedseas_intermediate.iso_ter.isna()\n", - " & protectedseas_intermediate.includes_multi_jurisdictional_areas.eq(True)\n", - " )\n", - " | (\n", - " protectedseas_intermediate.iso_ter.isna()\n", - " & protectedseas_intermediate.includes_multi_jurisdictional_areas.eq(False)\n", - " & ~protectedseas_intermediate.iso_sov.isin(\n", - " protectedseas_intermediate[\n", - " protectedseas_intermediate.includes_multi_jurisdictional_areas.eq(True)\n", - " ].iso_sov.unique()\n", - " )\n", - " )\n", - "][protectedseas_intermediate.iso_sov.eq(\"ESP\")]" + "# # Calculate wdpa cumulative counts and pa and oecm percentages\n", + "cumulative_counts = cumulative_pa_def_counts(sjoin_gdf)\n", + "cumulative_counts" ] }, { "cell_type": "code", - "execution_count": 85, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "final = (\n", - " protectedseas_intermediate[\n", - " (\n", - " protectedseas_intermediate.iso_ter.isna()\n", - " & protectedseas_intermediate.includes_multi_jurisdictional_areas.eq(True)\n", - " )\n", - " | (\n", - " protectedseas_intermediate.iso_ter.isna()\n", - " & protectedseas_intermediate.includes_multi_jurisdictional_areas.eq(False)\n", - " & ~protectedseas_intermediate.iso_sov.isin(\n", - " protectedseas_intermediate[\n", - " protectedseas_intermediate.includes_multi_jurisdictional_areas.eq(True)\n", - " ].iso_sov.unique()\n", - " )\n", - " )\n", - " ].replace(\n", - " {\n", - " \"lfp\": {\n", - " 5: \"highly\",\n", - " 4: \"highly\",\n", - " 3: \"moderately\",\n", - " 2: \"less\",\n", - " 1: \"less\",\n", - " },\n", - " }\n", - " ).groupby([\"iso_sov\", \"lfp\"]).agg({\"area_sqkm\": \"sum\", \"total_area\": \"max\"}).reset_index()\n", - " .pipe(\n", - " calculate_global_area,\n", - " gby_col=[\"lfp\"],\n", - " iso_column=\"iso_sov\",\n", - " agg_ops={\"area_sqkm\": \"sum\", \"total_area\": \"sum\"},\n", - " )\n", - " .pipe(add_region_iso, iso_column=\"iso_sov\")\n", - " .pipe(\n", - " calculate_stats,\n", - " gby_col=[\"lfp\"],\n", - " ops={\"area_sqkm\": \"sum\", \"total_area\": \"sum\"},\n", - " iso_column=\"iso_sov\",\n", - " )\n", - " .pipe(lambda x: x.assign(pct=round((x.area_sqkm / x.total_area)*100, 2)))\n", - " .pipe(\n", - " output,\n", - " iso_column=\"iso_sov\",\n", - " rep_d={\n", - " \"lfp\": {\n", - " \"highly\": 1,\n", - " \"moderately\": 2,\n", - " \"less\": 3,\n", - " }\n", - " },\n", - " rename={\"lfp\": \"fishing_protection_level\", \"area_sqkm\": \"area\"},\n", - " drop_cols=[\"iso_sov\", \"total_area\"],\n", - " )\n", - ")\n", - "FPLSchema(final[final.location.notna()]).to_csv(output_file, index=True)" + "# # Dissolve geometries to calculate the coverage\n", + "# data = await process_grid(sjoin_gdf)" ] }, { "cell_type": "code", - "execution_count": 88, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "DEBUG:google.auth.transport.requests:Making request: POST https://oauth2.googleapis.com/token\n" - ] - } - ], + "outputs": [], "source": [ - "remote_path = 'vizzuality_processed_data/strapi_tables/lfp.csv'\n", - "\n", - "writeReadGCP(\n", - " credentials=mysettings.GCS_KEYFILE_JSON,\n", - " bucket_name=mysettings.GCS_BUCKET,\n", - " blob_name=remote_path,\n", - " file=output_file,\n", - " operation=\"w\",\n", - ")" + "# tpa = pd.concat(data, ignore_index=True).drop(columns=['STATUS_YR', 'index']).rename(columns={'area': 'protected_area'})\n", + "# tpa.head(5)" ] }, { @@ -2009,7 +1473,10 @@ "metadata": {}, "outputs": [], "source": [ - "# strapi.deleteCollectionData(strapi_collection, list(range(1, 500)))" + "# # Group by 'iso_3' and 'year' and sum the 'area'\n", + "# tpa_grouped = tpa.groupby(['iso_3', 'year'], as_index=False)['protected_area'].sum()\n", + "# tpa_grouped.reset_index(drop=True, inplace=True)\n", + "# tpa_grouped.head(5)" ] }, { @@ -2018,335 +1485,13 @@ "metadata": {}, "outputs": [], "source": [ - "# strapi.importCollectionData(\n", - "# strapi_collection,\n", - "# output_file,\n", - "# )" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Country detail table data - all together WIP" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - " 1- lower case the columns \n", - "2- separate location that its regime is in dispute or on join regime \n", - "3- calcualte area for mpaatlas data \n", - "4- rename columns for merge \n", - "5- merge maaatlas and mpa data identifying the source \n", - "6- identify child resources and set them as childs \n", - "7- calculate bbox \n", - "8- set child resources \n", - "9- prepare output for batch export \n", - "10- upload data to strapi " + "# # save to csv\n", + "# tpa_grouped.to_csv(output_file_dissolve, index=False)" ] }, { "cell_type": "code", - "execution_count": 89, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/home/sofia/dev/skytruth-30x30/data/data/pa/processed/pa_preprocess.zip\n", - "/home/sofia/dev/skytruth-30x30/data/data/pa/processed/preprocess\n", - "/home/sofia/dev/skytruth-30x30/data/data/mpaatlas/processed/mpaatlas_preprocess.zip\n", - "/home/sofia/dev/skytruth-30x30/data/data/mpaatlas/processed/preprocess\n" - ] - }, - { - "data": { - "text/plain": [ - "PosixPath('/home/sofia/dev/skytruth-30x30/data/data/mpaatlas/processed/preprocess')" - ] - }, - "execution_count": 89, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "pipe = \"pa\"\n", - "strapi_collection_pas = \"pa\"\n", - "\n", - "pipe_dir = FileConventionHandler(pipe)\n", - "pipe_dir_mpaatlas = FileConventionHandler(\"mpaatlas\")\n", - "output_file_mpas = pipe_dir.get_processed_step_path(current_step).joinpath(\"pa_detail.csv\")\n", - "\n", - "# Download the protected atlas file && unzip it\n", - "download_and_unzip_if_needed(pipe_dir, prev_step, mysettings)\n", - "# Download the mpaatlas file \n", - "download_and_unzip_if_needed(pipe_dir_mpaatlas, prev_step, mysettings)" - ] - }, - { - "cell_type": "code", - "execution_count": 100, - "metadata": {}, - "outputs": [], - "source": [ - "# Load the data\n", - "pa_intermediate = gpd.read_file(pipe_dir.get_step_fmt_file_path(prev_step, \"gpkg\")).pipe(\n", - " clean_geometries\n", - ")\n", - "mpaatlas_intermediate = gpd.read_file(\n", - " pipe_dir_mpaatlas.get_step_fmt_file_path(prev_step, \"shp\")\n", - ").pipe(clean_geometries)" - ] - }, - { - "cell_type": "code", - "execution_count": 101, - "metadata": {}, - "outputs": [ - { - "ename": "KeyboardInterrupt", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", - "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandas/core/indexes/base.py:3805\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 3804\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 3805\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_engine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcasted_key\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3806\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n", - "File \u001b[0;32mindex.pyx:167\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n", - "File \u001b[0;32mindex.pyx:196\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n", - "File \u001b[0;32mpandas/_libs/hashtable_class_helper.pxi:7081\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n", - "File \u001b[0;32mpandas/_libs/hashtable_class_helper.pxi:7089\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n", - "\u001b[0;31mKeyError\u001b[0m: 'environment'", - "\nThe above exception was the direct cause of the following exception:\n", - "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", - "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandas/core/series.py:1298\u001b[0m, in \u001b[0;36mSeries.__setitem__\u001b[0;34m(self, key, value)\u001b[0m\n\u001b[1;32m 1297\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1298\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_set_with_engine\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mwarn\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mwarn\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1299\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n\u001b[1;32m 1300\u001b[0m \u001b[38;5;66;03m# We have a scalar (or for MultiIndex or object-dtype, scalar-like)\u001b[39;00m\n\u001b[1;32m 1301\u001b[0m \u001b[38;5;66;03m# key that is not present in self.index.\u001b[39;00m\n", - "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandas/core/series.py:1370\u001b[0m, in \u001b[0;36mSeries._set_with_engine\u001b[0;34m(self, key, value, warn)\u001b[0m\n\u001b[1;32m 1369\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_set_with_engine\u001b[39m(\u001b[38;5;28mself\u001b[39m, key, value, warn: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 1370\u001b[0m loc \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mindex\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1372\u001b[0m \u001b[38;5;66;03m# this is equivalent to self._values[key] = value\u001b[39;00m\n", - "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandas/core/indexes/base.py:3812\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 3811\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m InvalidIndexError(key)\n\u001b[0;32m-> 3812\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01merr\u001b[39;00m\n\u001b[1;32m 3813\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n\u001b[1;32m 3814\u001b[0m \u001b[38;5;66;03m# If we have a listlike key, _check_indexing_error will raise\u001b[39;00m\n\u001b[1;32m 3815\u001b[0m \u001b[38;5;66;03m# InvalidIndexError. Otherwise we fall through and re-raise\u001b[39;00m\n\u001b[1;32m 3816\u001b[0m \u001b[38;5;66;03m# the TypeError.\u001b[39;00m\n", - "\u001b[0;31mKeyError\u001b[0m: 'environment'", - "\nDuring handling of the above exception, another exception occurred:\n", - "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[92], line 48\u001b[0m\n\u001b[1;32m 1\u001b[0m init_table \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 2\u001b[0m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconcat\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[43m \u001b[49m\u001b[43m[\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m \u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[43m \u001b[49m\u001b[43mpa_intermediate\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpipe\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcolumns_to_lower\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 6\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpipe\u001b[49m\u001b[43m(\u001b[49m\u001b[43mseparate_parent_iso\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43miso_column\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mparent_iso\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 7\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpipe\u001b[49m\u001b[43m(\u001b[49m\u001b[43mchange_ata_to_abnj\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 8\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrename\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 9\u001b[0m \u001b[43m \u001b[49m\u001b[43mcolumns\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m{\u001b[49m\n\u001b[1;32m 10\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mparent_iso\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43miso\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 11\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstatus_yr\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43myear\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 12\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mgis_m_area\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43marea_km2_marine\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 13\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mgis_area\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43marea_km2_terrestrial\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 14\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\n\u001b[1;32m 15\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdrop\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcolumns\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mstatus\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 16\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43massign\u001b[49m\u001b[43m(\u001b[49m\u001b[43msource\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mprotected_planet\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 17\u001b[0m \u001b[43m \u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 18\u001b[0m \u001b[43m \u001b[49m\u001b[43mmpaatlas_intermediate\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpipe\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcalculate_area\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 19\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpipe\u001b[49m\u001b[43m(\u001b[49m\u001b[43mextract_wdpaid_mpaatlas\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 20\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpipe\u001b[49m\u001b[43m(\u001b[49m\u001b[43mseparate_parent_iso\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43miso_column\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mlocation_i\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 21\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrename\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 22\u001b[0m \u001b[43m \u001b[49m\u001b[43mcolumns\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m{\u001b[49m\n\u001b[1;32m 23\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mlocation_i\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43miso\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 24\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mwdpa_id\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mwdpa_pid\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 25\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mdesignatio\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mdesig_eng\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 26\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\n\u001b[1;32m 27\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 28\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43massign\u001b[49m\u001b[43m(\u001b[49m\u001b[43msource\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmpaatlas\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\n\u001b[1;32m 29\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mastype\u001b[49m\u001b[43m(\u001b[49m\u001b[43m{\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmpa_zone_i\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mInt64\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m}\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 30\u001b[0m \u001b[43m \u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 31\u001b[0m \u001b[43m \u001b[49m\u001b[43mignore_index\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 32\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 33\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mreset_index\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdrop\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 34\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mreplace\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 35\u001b[0m \u001b[43m \u001b[49m\u001b[43m{\u001b[49m\n\u001b[1;32m 36\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43miso\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43m{\u001b[49m\n\u001b[1;32m 37\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mCOK\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mNZL\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 38\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mIOT\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mGBR\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 39\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mNIU\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mNZL\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 40\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mSHN\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mGBR\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 41\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mSJM\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mNOR\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 42\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mUMI\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mUSA\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 43\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mNCL\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mFRA\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 44\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\n\u001b[1;32m 45\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\n\u001b[1;32m 46\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 47\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msort_values\u001b[49m\u001b[43m(\u001b[49m\u001b[43mby\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mwdpa_pid\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mwdpa_pid\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msource\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mascending\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m---> 48\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpipe\u001b[49m\u001b[43m(\u001b[49m\u001b[43mprocess_area_and_environment\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 49\u001b[0m )\n", - "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandas/core/generic.py:6231\u001b[0m, in \u001b[0;36mNDFrame.pipe\u001b[0;34m(self, func, *args, **kwargs)\u001b[0m\n\u001b[1;32m 6229\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m using_copy_on_write():\n\u001b[1;32m 6230\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m common\u001b[38;5;241m.\u001b[39mpipe(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcopy(deep\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m), func, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m-> 6231\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mcommon\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpipe\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandas/core/common.py:502\u001b[0m, in \u001b[0;36mpipe\u001b[0;34m(obj, func, *args, **kwargs)\u001b[0m\n\u001b[1;32m 500\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m func(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 501\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 502\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", - "Cell \u001b[0;32mIn[91], line 18\u001b[0m, in \u001b[0;36mprocess_area_and_environment\u001b[0;34m(gdf)\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m row[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmarine\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m0\u001b[39m\u001b[38;5;124m'\u001b[39m:\n\u001b[1;32m 17\u001b[0m row[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124marea_km2\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m row[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124marea_km2_terrestrial\u001b[39m\u001b[38;5;124m'\u001b[39m]\n\u001b[0;32m---> 18\u001b[0m \u001b[43mrow\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43menvironment\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mterrestrial\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 19\u001b[0m new_rows\u001b[38;5;241m.\u001b[39mappend(row)\n\u001b[1;32m 20\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m row[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmarine\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m2\u001b[39m\u001b[38;5;124m'\u001b[39m:\n", - "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandas/core/series.py:1322\u001b[0m, in \u001b[0;36mSeries.__setitem__\u001b[0;34m(self, key, value)\u001b[0m\n\u001b[1;32m 1319\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_set_values(key, value)\n\u001b[1;32m 1320\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1321\u001b[0m \u001b[38;5;66;03m# GH#12862 adding a new key to the Series\u001b[39;00m\n\u001b[0;32m-> 1322\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mloc\u001b[49m\u001b[43m[\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m]\u001b[49m \u001b[38;5;241m=\u001b[39m value\n\u001b[1;32m 1324\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (\u001b[38;5;167;01mTypeError\u001b[39;00m, \u001b[38;5;167;01mValueError\u001b[39;00m, LossySetitemError):\n\u001b[1;32m 1325\u001b[0m \u001b[38;5;66;03m# The key was OK, but we cannot set the value losslessly\u001b[39;00m\n\u001b[1;32m 1326\u001b[0m indexer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mindex\u001b[38;5;241m.\u001b[39mget_loc(key)\n", - "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandas/core/indexing.py:911\u001b[0m, in \u001b[0;36m_LocationIndexer.__setitem__\u001b[0;34m(self, key, value)\u001b[0m\n\u001b[1;32m 908\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_has_valid_setitem_indexer(key)\n\u001b[1;32m 910\u001b[0m iloc \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mname \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124miloc\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mobj\u001b[38;5;241m.\u001b[39miloc\n\u001b[0;32m--> 911\u001b[0m \u001b[43miloc\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_setitem_with_indexer\u001b[49m\u001b[43m(\u001b[49m\u001b[43mindexer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mname\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandas/core/indexing.py:1932\u001b[0m, in \u001b[0;36m_iLocIndexer._setitem_with_indexer\u001b[0;34m(self, indexer, value, name)\u001b[0m\n\u001b[1;32m 1929\u001b[0m indexer, missing \u001b[38;5;241m=\u001b[39m convert_missing_indexer(indexer)\n\u001b[1;32m 1931\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m missing:\n\u001b[0;32m-> 1932\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_setitem_with_indexer_missing\u001b[49m\u001b[43m(\u001b[49m\u001b[43mindexer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1933\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m\n\u001b[1;32m 1935\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m name \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mloc\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 1936\u001b[0m \u001b[38;5;66;03m# must come after setting of missing\u001b[39;00m\n", - "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandas/core/indexing.py:2238\u001b[0m, in \u001b[0;36m_iLocIndexer._setitem_with_indexer_missing\u001b[0;34m(self, indexer, value)\u001b[0m\n\u001b[1;32m 2231\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m warnings\u001b[38;5;241m.\u001b[39mcatch_warnings():\n\u001b[1;32m 2232\u001b[0m \u001b[38;5;66;03m# TODO: re-issue this with setitem-specific message?\u001b[39;00m\n\u001b[1;32m 2233\u001b[0m warnings\u001b[38;5;241m.\u001b[39mfilterwarnings(\n\u001b[1;32m 2234\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mignore\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 2235\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe behavior of Index.insert with object-dtype is deprecated\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 2236\u001b[0m category\u001b[38;5;241m=\u001b[39m\u001b[38;5;167;01mFutureWarning\u001b[39;00m,\n\u001b[1;32m 2237\u001b[0m )\n\u001b[0;32m-> 2238\u001b[0m new_index \u001b[38;5;241m=\u001b[39m \u001b[43mindex\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minsert\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mindex\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindexer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2240\u001b[0m \u001b[38;5;66;03m# we have a coerced indexer, e.g. a float\u001b[39;00m\n\u001b[1;32m 2241\u001b[0m \u001b[38;5;66;03m# that matches in an int64 Index, so\u001b[39;00m\n\u001b[1;32m 2242\u001b[0m \u001b[38;5;66;03m# we will not create a duplicate index, rather\u001b[39;00m\n\u001b[1;32m 2243\u001b[0m \u001b[38;5;66;03m# index to that element\u001b[39;00m\n\u001b[1;32m 2244\u001b[0m \u001b[38;5;66;03m# e.g. 0.0 -> 0\u001b[39;00m\n\u001b[1;32m 2245\u001b[0m \u001b[38;5;66;03m# GH#12246\u001b[39;00m\n\u001b[1;32m 2246\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m index\u001b[38;5;241m.\u001b[39mis_unique:\n\u001b[1;32m 2247\u001b[0m \u001b[38;5;66;03m# pass new_index[-1:] instead if [new_index[-1]]\u001b[39;00m\n\u001b[1;32m 2248\u001b[0m \u001b[38;5;66;03m# so that we retain dtype\u001b[39;00m\n", - "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandas/core/indexes/base.py:7012\u001b[0m, in \u001b[0;36mIndex.insert\u001b[0;34m(self, loc, item)\u001b[0m\n\u001b[1;32m 7009\u001b[0m loc \u001b[38;5;241m=\u001b[39m loc \u001b[38;5;28;01mif\u001b[39;00m loc \u001b[38;5;241m>\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m loc \u001b[38;5;241m-\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[1;32m 7010\u001b[0m new_values[loc] \u001b[38;5;241m=\u001b[39m item\n\u001b[0;32m-> 7012\u001b[0m out \u001b[38;5;241m=\u001b[39m Index\u001b[38;5;241m.\u001b[39m_with_infer(new_values, name\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mname\u001b[49m)\n\u001b[1;32m 7013\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[1;32m 7014\u001b[0m using_pyarrow_string_dtype()\n\u001b[1;32m 7015\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m is_string_dtype(out\u001b[38;5;241m.\u001b[39mdtype)\n\u001b[1;32m 7016\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m new_values\u001b[38;5;241m.\u001b[39mdtype \u001b[38;5;241m==\u001b[39m \u001b[38;5;28mobject\u001b[39m\n\u001b[1;32m 7017\u001b[0m ):\n\u001b[1;32m 7018\u001b[0m out \u001b[38;5;241m=\u001b[39m out\u001b[38;5;241m.\u001b[39mastype(new_values\u001b[38;5;241m.\u001b[39mdtype)\n", - "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandas/core/indexes/base.py:1671\u001b[0m, in \u001b[0;36mIndex.name\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1666\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m result\n\u001b[1;32m 1668\u001b[0m \u001b[38;5;66;03m# --------------------------------------------------------------------\u001b[39;00m\n\u001b[1;32m 1669\u001b[0m \u001b[38;5;66;03m# Name-Centric Methods\u001b[39;00m\n\u001b[0;32m-> 1671\u001b[0m \u001b[38;5;129m@property\u001b[39m\n\u001b[1;32m 1672\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mname\u001b[39m(\u001b[38;5;28mself\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Hashable:\n\u001b[1;32m 1673\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 1674\u001b[0m \u001b[38;5;124;03m Return Index or MultiIndex name.\u001b[39;00m\n\u001b[1;32m 1675\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1682\u001b[0m \u001b[38;5;124;03m 'x'\u001b[39;00m\n\u001b[1;32m 1683\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m 1684\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_name\n", - "\u001b[0;31mKeyboardInterrupt\u001b[0m: " - ] - } - ], - "source": [ - "init_table = (\n", - " pd.concat(\n", - " [\n", - " (\n", - " pa_intermediate.pipe(columns_to_lower)\n", - " .pipe(separate_parent_iso, iso_column=\"parent_iso\")\n", - " .pipe(change_ata_to_abnj)\n", - " .rename(\n", - " columns={\n", - " \"parent_iso\": \"iso\",\n", - " \"status_yr\": \"year\",\n", - " \"gis_m_area\": \"area_km2_marine\",\n", - " \"gis_area\": \"area_km2_terrestrial\",\n", - " }\n", - " )\n", - " .drop(columns=['status'])\n", - " .assign(source=\"protected_planet\")\n", - " ),\n", - " (\n", - " mpaatlas_intermediate.pipe(calculate_area)\n", - " .pipe(extract_wdpaid_mpaatlas)\n", - " .pipe(separate_parent_iso, iso_column=\"location_i\")\n", - " .rename(\n", - " columns={\n", - " \"location_i\": \"iso\",\n", - " \"wdpa_id\": \"wdpa_pid\",\n", - " \"designatio\": \"desig_eng\",\n", - " }\n", - " )\n", - " .assign(source=\"mpaatlas\")\n", - " .astype({\"mpa_zone_i\": \"Int64\"})\n", - " ),\n", - " ],\n", - " ignore_index=True,\n", - " )\n", - " .reset_index(drop=True)\n", - " .replace(\n", - " {\n", - " \"iso\": {\n", - " \"COK\": \"NZL\",\n", - " \"IOT\": \"GBR\",\n", - " \"NIU\": \"NZL\",\n", - " \"SHN\": \"GBR\",\n", - " \"SJM\": \"NOR\",\n", - " \"UMI\": \"USA\",\n", - " \"NCL\": \"FRA\",\n", - " }\n", - " }\n", - " )\n", - " .sort_values(by=[\"wdpa_pid\", \"source\"], ascending=[True, False])\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 96, - "metadata": {}, - "outputs": [], - "source": [ - "# to be run if things change a lot in the future\n", - "iucn_cat = pd.DataFrame(\n", - " {\"slug\": init_table.iucn_cat.dropna().unique(), \"name\": init_table.iucn_cat.dropna().unique()},\n", - " index=pd.Index(np.arange(1, len(init_table.iucn_cat.dropna().unique()) + 1)),\n", - ")\n", - "iucn_cat.to_csv(pipe_dir.get_processed_step_path(current_step).joinpath(\"iucn_categories.csv\"), index=True)\n", - "\n", - "iucn_cat = pd.read_csv(\n", - " pipe_dir.get_processed_step_path(current_step).joinpath(\"iucn_categories.csv\"), index_col=0\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 104, - "metadata": {}, - "outputs": [], - "source": [ - "def define_is_child(\n", - " gdf: pd.DataFrame | gpd.GeoDataFrame,\n", - " gby: str = \"wdpaid\",\n", - " env_col: str = \"environment\",\n", - " sort_by: dict[str, bool] = {\"wdpa_pid\": True, \"source\": False},\n", - " col_name: str = \"is_child\",\n", - ") -> pd.DataFrame | gpd.GeoDataFrame:\n", - " return gdf.assign(\n", - " **{\n", - " col_name: np.where(\n", - " gdf.index.isin(\n", - " gdf.sort_values(by=list(sort_by.keys()), ascending=list(sort_by.values()))\n", - " .groupby([gby, env_col]) # Group by wdpaid and environment\n", - " .nth(slice(1, None))\n", - " .index\n", - " ),\n", - " True,\n", - " False,\n", - " )\n", - " }\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": 110, - "metadata": {}, - "outputs": [ - { - "ename": "KeyError", - "evalue": "\"Columns not found: 'data_source'\"", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[110], line 65\u001b[0m\n\u001b[1;32m 1\u001b[0m pa_table \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 2\u001b[0m \u001b[43minit_table\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpipe\u001b[49m\u001b[43m(\u001b[49m\u001b[43madd_bbox\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mbbox\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpipe\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdefine_is_child\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpipe\u001b[49m\u001b[43m(\u001b[49m\u001b[43mset_child_id\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msort_values\u001b[49m\u001b[43m(\u001b[49m\u001b[43mby\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mwdpaid\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mis_child\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mascending\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 6\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mreset_index\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdrop\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 7\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# .pipe(add_total_areas)\u001b[39;49;00m\n\u001b[1;32m 8\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# .pipe(calculate_coverage_percentage_pa)\u001b[39;49;00m\n\u001b[1;32m 9\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# .pipe(add_environment)\u001b[39;49;00m\n\u001b[1;32m 10\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# .pipe(\u001b[39;49;00m\n\u001b[1;32m 11\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# output,\u001b[39;49;00m\n\u001b[1;32m 12\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# iso_column=\"iso\",\u001b[39;49;00m\n\u001b[1;32m 13\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# rep_d={\u001b[39;49;00m\n\u001b[1;32m 14\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"status\": {\u001b[39;49;00m\n\u001b[1;32m 15\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"Adopted\": 4,\u001b[39;49;00m\n\u001b[1;32m 16\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"implemented\": 6,\u001b[39;49;00m\n\u001b[1;32m 17\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"Established\": 6,\u001b[39;49;00m\n\u001b[1;32m 18\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"Designated\": 5,\u001b[39;49;00m\n\u001b[1;32m 19\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"Proposed\": 3,\u001b[39;49;00m\n\u001b[1;32m 20\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"Inscribed\": 3,\u001b[39;49;00m\n\u001b[1;32m 21\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"unknown\": 1,\u001b[39;49;00m\n\u001b[1;32m 22\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# },\u001b[39;49;00m\n\u001b[1;32m 23\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"pa_def\": {\"0\": 2, \"1\": 1},\u001b[39;49;00m\n\u001b[1;32m 24\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"year\": {0: pd.NA},\u001b[39;49;00m\n\u001b[1;32m 25\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"iucn_cat\": dict(\u001b[39;49;00m\n\u001b[1;32m 26\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# iucn_cat[[\"slug\"]]\u001b[39;49;00m\n\u001b[1;32m 27\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# .reset_index(drop=False)\u001b[39;49;00m\n\u001b[1;32m 28\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# .iloc[:, [1, 0]]\u001b[39;49;00m\n\u001b[1;32m 29\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# .to_dict(orient=\"tight\")[\"data\"]\u001b[39;49;00m\n\u001b[1;32m 30\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# ),\u001b[39;49;00m\n\u001b[1;32m 31\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"source\": {\"protected_planet\": 3, \"mpaatlas\": 1},\u001b[39;49;00m\n\u001b[1;32m 32\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"protection\": {\u001b[39;49;00m\n\u001b[1;32m 33\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"full\": 3,\u001b[39;49;00m\n\u001b[1;32m 34\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"light\": 4,\u001b[39;49;00m\n\u001b[1;32m 35\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"incompatible\": 5,\u001b[39;49;00m\n\u001b[1;32m 36\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"high\": 6,\u001b[39;49;00m\n\u001b[1;32m 37\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"minimal\": 7,\u001b[39;49;00m\n\u001b[1;32m 38\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"unknown\": 8,\u001b[39;49;00m\n\u001b[1;32m 39\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"unknown/to be determined\": 8,\u001b[39;49;00m\n\u001b[1;32m 40\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# },\u001b[39;49;00m\n\u001b[1;32m 41\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"establishm\": {\u001b[39;49;00m\n\u001b[1;32m 42\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"actively managed\": 4,\u001b[39;49;00m\n\u001b[1;32m 43\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"implemented\": 6,\u001b[39;49;00m\n\u001b[1;32m 44\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"designated\": 5,\u001b[39;49;00m\n\u001b[1;32m 45\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"Designated\": 5,\u001b[39;49;00m\n\u001b[1;32m 46\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"proposed or committed\": 3,\u001b[39;49;00m\n\u001b[1;32m 47\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"Proposed\": 3,\u001b[39;49;00m\n\u001b[1;32m 48\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"Inscribed\": 3,\u001b[39;49;00m\n\u001b[1;32m 49\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"Established\": 5,\u001b[39;49;00m\n\u001b[1;32m 50\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"Adopted\": 5,\u001b[39;49;00m\n\u001b[1;32m 51\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"unknown\": 1,\u001b[39;49;00m\n\u001b[1;32m 52\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# },\u001b[39;49;00m\n\u001b[1;32m 53\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# },\u001b[39;49;00m\n\u001b[1;32m 54\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# rename={\u001b[39;49;00m\n\u001b[1;32m 55\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"pa_def\": \"protection_status\",\u001b[39;49;00m\n\u001b[1;32m 56\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"area_km2\": \"area\",\u001b[39;49;00m\n\u001b[1;32m 57\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"iucn_cat\": \"pa_iucn_category\",\u001b[39;49;00m\n\u001b[1;32m 58\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"desig_eng\": \"designation\",\u001b[39;49;00m\n\u001b[1;32m 59\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"protection\": \"mpaa_protection_level\",\u001b[39;49;00m\n\u001b[1;32m 60\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"establishm\": \"mpaa_establishment_stage\",\u001b[39;49;00m\n\u001b[1;32m 61\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"source\": \"data_source\",\u001b[39;49;00m\n\u001b[1;32m 62\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# },\u001b[39;49;00m\n\u001b[1;32m 63\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# drop_cols=[\"geometry\", \"protecti_1\",\"mpa_zone_i\", \"iso\", \"total_marine_area\"]\u001b[39;49;00m\n\u001b[1;32m 64\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# )\u001b[39;49;00m\n\u001b[0;32m---> 65\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpipe\u001b[49m\u001b[43m(\u001b[49m\u001b[43madd_child_parent_relationship\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 66\u001b[0m \u001b[38;5;66;03m# .astype(\u001b[39;00m\n\u001b[1;32m 67\u001b[0m \u001b[38;5;66;03m# {\u001b[39;00m\n\u001b[1;32m 68\u001b[0m \u001b[38;5;66;03m# \"year\": \"Int32\",\u001b[39;00m\n\u001b[1;32m 69\u001b[0m \u001b[38;5;66;03m# \"pa_iucn_category\": \"Int64\",\u001b[39;00m\n\u001b[1;32m 70\u001b[0m \u001b[38;5;66;03m# \"protection_status\": \"Int64\",\u001b[39;00m\n\u001b[1;32m 71\u001b[0m \u001b[38;5;66;03m# }\u001b[39;00m\n\u001b[1;32m 72\u001b[0m \u001b[38;5;66;03m# )\u001b[39;00m\n\u001b[1;32m 73\u001b[0m \u001b[38;5;66;03m# .query(\"coverage <= 100\") \u001b[39;00m\n\u001b[1;32m 74\u001b[0m \u001b[38;5;66;03m# .sort_index()\u001b[39;00m\n\u001b[1;32m 75\u001b[0m )\n\u001b[1;32m 76\u001b[0m pa_table\u001b[38;5;241m.\u001b[39mhead(\u001b[38;5;241m5\u001b[39m)\n", - "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandas/core/generic.py:6231\u001b[0m, in \u001b[0;36mNDFrame.pipe\u001b[0;34m(self, func, *args, **kwargs)\u001b[0m\n\u001b[1;32m 6229\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m using_copy_on_write():\n\u001b[1;32m 6230\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m common\u001b[38;5;241m.\u001b[39mpipe(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcopy(deep\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m), func, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m-> 6231\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mcommon\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpipe\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandas/core/common.py:502\u001b[0m, in \u001b[0;36mpipe\u001b[0;34m(obj, func, *args, **kwargs)\u001b[0m\n\u001b[1;32m 500\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m func(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 501\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 502\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/dev/skytruth-30x30/data/src/pipelines/processors.py:696\u001b[0m, in \u001b[0;36madd_child_parent_relationship\u001b[0;34m(df, gby, cols)\u001b[0m\n\u001b[1;32m 691\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21madd_child_parent_relationship\u001b[39m(\n\u001b[1;32m 692\u001b[0m df: pd\u001b[38;5;241m.\u001b[39mDataFrame \u001b[38;5;241m|\u001b[39m gpd\u001b[38;5;241m.\u001b[39mGeoDataFrame,\n\u001b[1;32m 693\u001b[0m gby: \u001b[38;5;28mstr\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mwdpaid\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 694\u001b[0m cols: \u001b[38;5;28mlist\u001b[39m \u001b[38;5;241m=\u001b[39m [\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mwdpaid\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mwdpa_pid\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mis_child\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdata_source\u001b[39m\u001b[38;5;124m\"\u001b[39m],\n\u001b[1;32m 695\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m pd\u001b[38;5;241m.\u001b[39mDataFrame \u001b[38;5;241m|\u001b[39m gpd\u001b[38;5;241m.\u001b[39mGeoDataFrame:\n\u001b[0;32m--> 696\u001b[0m groups: pd\u001b[38;5;241m.\u001b[39mSeries \u001b[38;5;241m=\u001b[39m \u001b[43mdf\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgroupby\u001b[49m\u001b[43m(\u001b[49m\u001b[43mgby\u001b[49m\u001b[43m)\u001b[49m\u001b[43m[\u001b[49m\u001b[43mcols\u001b[49m\u001b[43m]\u001b[49m\u001b[38;5;241m.\u001b[39mapply(define_childs_ids)\n\u001b[1;32m 697\u001b[0m df[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mchildren\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 698\u001b[0m pd\u001b[38;5;241m.\u001b[39mDataFrame([[a, b] \u001b[38;5;28;01mfor\u001b[39;00m a, b \u001b[38;5;129;01min\u001b[39;00m groups\u001b[38;5;241m.\u001b[39mvalues], columns\u001b[38;5;241m=\u001b[39m[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mparent\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mchildren\u001b[39m\u001b[38;5;124m\"\u001b[39m])\n\u001b[1;32m 699\u001b[0m \u001b[38;5;241m.\u001b[39mdropna(subset\u001b[38;5;241m=\u001b[39m[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mparent\u001b[39m\u001b[38;5;124m\"\u001b[39m])\n\u001b[1;32m 700\u001b[0m \u001b[38;5;241m.\u001b[39mset_index(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mparent\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 701\u001b[0m )\n\u001b[1;32m 703\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m df\n", - "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandas/core/groupby/generic.py:1951\u001b[0m, in \u001b[0;36mDataFrameGroupBy.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 1944\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(key, \u001b[38;5;28mtuple\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(key) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[1;32m 1945\u001b[0m \u001b[38;5;66;03m# if len == 1, then it becomes a SeriesGroupBy and this is actually\u001b[39;00m\n\u001b[1;32m 1946\u001b[0m \u001b[38;5;66;03m# valid syntax, so don't raise\u001b[39;00m\n\u001b[1;32m 1947\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 1948\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCannot subset columns with a tuple with more than one element. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1949\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUse a list instead.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1950\u001b[0m )\n\u001b[0;32m-> 1951\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__getitem__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandas/core/base.py:239\u001b[0m, in \u001b[0;36mSelectionMixin.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 237\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mobj\u001b[38;5;241m.\u001b[39mcolumns\u001b[38;5;241m.\u001b[39mintersection(key)) \u001b[38;5;241m!=\u001b[39m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mset\u001b[39m(key)):\n\u001b[1;32m 238\u001b[0m bad_keys \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(\u001b[38;5;28mset\u001b[39m(key)\u001b[38;5;241m.\u001b[39mdifference(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mobj\u001b[38;5;241m.\u001b[39mcolumns))\n\u001b[0;32m--> 239\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mColumns not found: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mstr\u001b[39m(bad_keys)[\u001b[38;5;241m1\u001b[39m:\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 240\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_gotitem(\u001b[38;5;28mlist\u001b[39m(key), ndim\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m2\u001b[39m)\n\u001b[1;32m 242\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", - "\u001b[0;31mKeyError\u001b[0m: \"Columns not found: 'data_source'\"" - ] - } - ], - "source": [ - "pa_table = (\n", - " init_table.pipe(add_bbox, \"bbox\")\n", - " .pipe(define_is_child)\n", - " .pipe(set_child_id)\n", - " .sort_values(by=[\"wdpaid\", \"is_child\"], ascending=[True, True])\n", - " .reset_index(drop=True)\n", - " # .pipe(add_total_areas)\n", - " # .pipe(calculate_coverage_percentage_pa)\n", - " # .pipe(add_environment)\n", - " # .pipe(\n", - " # output,\n", - " # iso_column=\"iso\",\n", - " # rep_d={\n", - " # \"status\": {\n", - " # \"Adopted\": 4,\n", - " # \"implemented\": 6,\n", - " # \"Established\": 6,\n", - " # \"Designated\": 5,\n", - " # \"Proposed\": 3,\n", - " # \"Inscribed\": 3,\n", - " # \"unknown\": 1,\n", - " # },\n", - " # \"pa_def\": {\"0\": 2, \"1\": 1},\n", - " # \"year\": {0: pd.NA},\n", - " # \"iucn_cat\": dict(\n", - " # iucn_cat[[\"slug\"]]\n", - " # .reset_index(drop=False)\n", - " # .iloc[:, [1, 0]]\n", - " # .to_dict(orient=\"tight\")[\"data\"]\n", - " # ),\n", - " # \"source\": {\"protected_planet\": 3, \"mpaatlas\": 1},\n", - " # \"protection\": {\n", - " # \"full\": 3,\n", - " # \"light\": 4,\n", - " # \"incompatible\": 5,\n", - " # \"high\": 6,\n", - " # \"minimal\": 7,\n", - " # \"unknown\": 8,\n", - " # \"unknown/to be determined\": 8,\n", - " # },\n", - " # \"establishm\": {\n", - " # \"actively managed\": 4,\n", - " # \"implemented\": 6,\n", - " # \"designated\": 5,\n", - " # \"Designated\": 5,\n", - " # \"proposed or committed\": 3,\n", - " # \"Proposed\": 3,\n", - " # \"Inscribed\": 3,\n", - " # \"Established\": 5,\n", - " # \"Adopted\": 5,\n", - " # \"unknown\": 1,\n", - " # },\n", - " # },\n", - " # rename={\n", - " # \"pa_def\": \"protection_status\",\n", - " # \"area_km2\": \"area\",\n", - " # \"iucn_cat\": \"pa_iucn_category\",\n", - " # \"desig_eng\": \"designation\",\n", - " # \"protection\": \"mpaa_protection_level\",\n", - " # \"establishm\": \"mpaa_establishment_stage\",\n", - " # \"source\": \"data_source\",\n", - " # },\n", - " # drop_cols=[\"geometry\", \"protecti_1\",\"mpa_zone_i\", \"iso\", \"total_marine_area\"]\n", - " # )\n", - " .pipe(add_child_parent_relationship)\n", - " # .astype(\n", - " # {\n", - " # \"year\": \"Int32\",\n", - " # \"pa_iucn_category\": \"Int64\",\n", - " # \"protection_status\": \"Int64\",\n", - " # }\n", - " # )\n", - " # .query(\"coverage <= 100\") \n", - " # .sort_index()\n", - ")\n", - "pa_table.head(5)" - ] - }, - { - "cell_type": "code", - "execution_count": 106, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -2370,146 +1515,87 @@ " \n", " \n", " \n", - " wdpaid\n", - " wdpa_pid\n", - " pa_def\n", - " name\n", - " desig_eng\n", - " iucn_cat\n", - " marine\n", + " iso_3\n", " year\n", - " iso\n", - " geometry\n", - " source\n", - " mpa_zone_i\n", - " establishm\n", - " protection\n", - " protecti_1\n", - " area_km2\n", - " environment\n", - " bbox\n", - " is_child\n", - " child_id\n", + " protected_area\n", " \n", " \n", " \n", " \n", - " 19056\n", - " 100672\n", - " 100672_A\n", - " 1\n", - " Ivvavik National Park Of Canada\n", - " National Park\n", - " II\n", - " 0\n", - " 1984.0\n", - " CAN\n", - " MULTIPOLYGON (((-140.83302 69.63132, -140.8350...\n", - " protected_planet\n", - " <NA>\n", - " NaN\n", - " NaN\n", - " NaN\n", - " 9695.837607\n", - " terrestrial\n", - " [-141.000000001, 68.556807999, -138.1338199979...\n", - " False\n", - " 100672_A\n", + " 0\n", + " AFG\n", + " 2010\n", + " 1078.918622\n", " \n", " \n", - " 19057\n", - " 100672\n", - " 100672_B\n", - " 1\n", - " Ivvavik National Park Of Canada\n", - " National Park\n", - " II\n", - " 1\n", - " 1984.0\n", - " CAN\n", - " MULTIPOLYGON (((-139.78657 69.59821, -139.7872...\n", - " protected_planet\n", - " <NA>\n", - " NaN\n", - " NaN\n", - " NaN\n", - " 79.375056\n", - " terrestrial\n", - " [-140.894068268, 69.19278843000001, -138.37542...\n", - " True\n", - " 100672_B\n", + " 1\n", + " AFG\n", + " 2011\n", + " 1078.918622\n", " \n", " \n", - " 19058\n", - " 100672\n", - " 100672_B\n", - " 1\n", - " Ivvavik National Park Of Canada\n", - " National Park\n", - " II\n", - " 1\n", - " 1984.0\n", - " CAN\n", - " MULTIPOLYGON (((-139.78657 69.59821, -139.7872...\n", - " protected_planet\n", - " <NA>\n", - " NaN\n", - " NaN\n", - " NaN\n", - " 52.170080\n", - " marine\n", - " [-140.894068268, 69.19278843000001, -138.37542...\n", - " True\n", - " 100672_B\n", + " 2\n", + " AFG\n", + " 2012\n", + " 1078.918622\n", + " \n", + " \n", + " 3\n", + " AFG\n", + " 2013\n", + " 1078.918622\n", + " \n", + " \n", + " 4\n", + " AFG\n", + " 2014\n", + " 1078.918622\n", " \n", " \n", "\n", "" ], "text/plain": [ - " wdpaid wdpa_pid pa_def name \\\n", - "19056 100672 100672_A 1 Ivvavik National Park Of Canada \n", - "19057 100672 100672_B 1 Ivvavik National Park Of Canada \n", - "19058 100672 100672_B 1 Ivvavik National Park Of Canada \n", - "\n", - " desig_eng iucn_cat marine year iso \\\n", - "19056 National Park II 0 1984.0 CAN \n", - "19057 National Park II 1 1984.0 CAN \n", - "19058 National Park II 1 1984.0 CAN \n", - "\n", - " geometry source \\\n", - "19056 MULTIPOLYGON (((-140.83302 69.63132, -140.8350... protected_planet \n", - "19057 MULTIPOLYGON (((-139.78657 69.59821, -139.7872... protected_planet \n", - "19058 MULTIPOLYGON (((-139.78657 69.59821, -139.7872... protected_planet \n", - "\n", - " mpa_zone_i establishm protection protecti_1 area_km2 \\\n", - "19056 NaN NaN NaN 9695.837607 \n", - "19057 NaN NaN NaN 79.375056 \n", - "19058 NaN NaN NaN 52.170080 \n", - "\n", - " environment bbox \\\n", - "19056 terrestrial [-141.000000001, 68.556807999, -138.1338199979... \n", - "19057 terrestrial [-140.894068268, 69.19278843000001, -138.37542... \n", - "19058 marine [-140.894068268, 69.19278843000001, -138.37542... \n", - "\n", - " is_child child_id \n", - "19056 False 100672_A \n", - "19057 True 100672_B \n", - "19058 True 100672_B " + " iso_3 year protected_area\n", + "0 AFG 2010 1078.918622\n", + "1 AFG 2011 1078.918622\n", + "2 AFG 2012 1078.918622\n", + "3 AFG 2013 1078.918622\n", + "4 AFG 2014 1078.918622" ] }, - "execution_count": 106, "metadata": {}, - "output_type": "execute_result" + "output_type": "display_data" } ], "source": [ - "pa_table[pa_table['name'] == 'Ivvavik National Park Of Canada']" + "tpa_grouped = pd.read_csv(output_file_dissolve)\n", + "tpa_grouped.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def add_region_iso2(\n", + " df: pd.DataFrame | gpd.GeoDataFrame, iso_column\n", + ") -> pd.DataFrame | gpd.GeoDataFrame:\n", + " \n", + " with open(scripts_dir.joinpath('data_commons/data/regions_data2.json'), 'r') as f:\n", + " regions = json.load(f)\n", + "\n", + " def find_region_iso(iso: str) -> Union[str, None]:\n", + " filtered_regions = list(filter(lambda x: iso in x[\"country_iso_3s\"], regions.get(\"data\")))\n", + " return filtered_regions[0][\"region_iso\"] if len(filtered_regions) > 0 else None\n", + "\n", + " return df.assign(region=lambda row: row[iso_column].apply(find_region_iso))" ] }, { "cell_type": "code", - "execution_count": 107, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -2533,133 +1619,297 @@ " \n", " \n", " \n", - " wdpaid\n", - " wdpa_pid\n", - " protection_status\n", - " name\n", - " designation\n", - " pa_iucn_category\n", " year\n", - " area\n", - " data_source\n", - " mpaa_establishment_stage\n", - " mpaa_protection_level\n", - " bbox\n", - " is_child\n", - " child_id\n", + " iso_3\n", + " protected_area\n", + " protected_areas_count\n", + " oecms\n", + " pas\n", + " total_terrestrial_area\n", " coverage\n", + " global_contribution\n", + " is_last_year\n", " environment\n", - " location\n", - " children\n", - " \n", - " \n", - " id\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", " \n", " \n", " \n", " \n", - " 2137\n", - " 100672\n", - " 100672_A\n", - " 1\n", - " Ivvavik National Park Of Canada\n", - " National Park\n", - " 2\n", - " 1984\n", - " 39.201811\n", - " 3\n", - " NaN\n", - " NaN\n", - " [-141.000000001, 68.556807999, -138.1338199979...\n", + " 0\n", + " 2010\n", + " AF\n", + " 3.636311e+06\n", + " 7272.0\n", + " 0.0\n", + " 100.0\n", + " 29993094.71\n", + " 12.123827\n", + " 2.694465\n", " False\n", - " 100672_A\n", - " 0.000680\n", - " marine\n", - " 29.0\n", - " [2138]\n", + " terrestrial\n", " \n", " \n", - " 2138\n", - " 100672\n", - " 100672_B\n", - " 1\n", - " Ivvavik National Park Of Canada\n", - " National Park\n", - " 2\n", - " 1984\n", - " 52.170080\n", - " 3\n", - " NaN\n", - " NaN\n", - " [-140.894068268, 69.19278843000001, -138.37542...\n", - " True\n", - " 100672_B\n", - " 0.000905\n", - " marine\n", - " 29.0\n", - " NaN\n", + " 1\n", + " 2010\n", + " AS\n", + " 2.051386e+06\n", + " 24782.0\n", + " 0.0\n", + " 100.0\n", + " 31625555.58\n", + " 6.486481\n", + " 1.520053\n", + " False\n", + " terrestrial\n", " \n", - " \n", + " \n", + " 2\n", + " 2010\n", + " AT\n", + " 1.108333e+02\n", + " 2.0\n", + " 0.0\n", + " 100.0\n", + " 12088229.65\n", + " 0.000917\n", + " 0.000082\n", + " False\n", + " terrestrial\n", + " \n", + " \n", + " 3\n", + " 2010\n", + " EU\n", + " 4.306080e+06\n", + " 116128.0\n", + " 0.0\n", + " 100.0\n", + " 30037571.37\n", + " 14.335645\n", + " 3.190756\n", + " False\n", + " terrestrial\n", + " \n", + " \n", + " 4\n", + " 2010\n", + " NA\n", + " 2.006295e+06\n", + " 52176.0\n", + " 0.0\n", + " 100.0\n", + " 19371151.92\n", + " 10.357127\n", + " 1.486642\n", + " False\n", + " terrestrial\n", + " \n", + " \n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " \n", + " \n", + " 2989\n", + " 2024\n", + " YEM\n", + " 5.145397e+03\n", + " 15.0\n", + " 0.0\n", + " 100.0\n", + " 453741.18\n", + " 1.133994\n", + " 0.003813\n", + " True\n", + " terrestrial\n", + " \n", + " \n", + " 2990\n", + " 2024\n", + " ZAF\n", + " 1.143850e+05\n", + " 1631.0\n", + " 0.0\n", + " 100.0\n", + " 1221327.52\n", + " 9.365631\n", + " 0.084758\n", + " True\n", + " terrestrial\n", + " \n", + " \n", + " 2991\n", + " 2024\n", + " ZMB\n", + " 2.929805e+05\n", + " 557.0\n", + " 0.0\n", + " 100.0\n", + " 753990.33\n", + " 38.857330\n", + " 0.217095\n", + " True\n", + " terrestrial\n", + " \n", + " \n", + " 2992\n", + " 2024\n", + " ZNC\n", + " 2.779983e+00\n", + " 8.0\n", + " 0.0\n", + " 100.0\n", + " 3314.08\n", + " 0.083884\n", + " 0.000002\n", + " True\n", + " terrestrial\n", + " \n", + " \n", + " 2993\n", + " 2024\n", + " ZWE\n", + " 1.096232e+05\n", + " 229.0\n", + " 0.0\n", + " 100.0\n", + " 391234.88\n", + " 28.019803\n", + " 0.081230\n", + " True\n", + " terrestrial\n", + " \n", + " \n", "\n", + "

2994 rows × 11 columns

\n", "" ], "text/plain": [ - " wdpaid wdpa_pid protection_status name \\\n", - "id \n", - "2137 100672 100672_A 1 Ivvavik National Park Of Canada \n", - "2138 100672 100672_B 1 Ivvavik National Park Of Canada \n", - "\n", - " designation pa_iucn_category year area data_source \\\n", - "id \n", - "2137 National Park 2 1984 39.201811 3 \n", - "2138 National Park 2 1984 52.170080 3 \n", + " year iso_3 protected_area protected_areas_count oecms pas \\\n", + "0 2010 AF 3.636311e+06 7272.0 0.0 100.0 \n", + "1 2010 AS 2.051386e+06 24782.0 0.0 100.0 \n", + "2 2010 AT 1.108333e+02 2.0 0.0 100.0 \n", + "3 2010 EU 4.306080e+06 116128.0 0.0 100.0 \n", + "4 2010 NA 2.006295e+06 52176.0 0.0 100.0 \n", + "... ... ... ... ... ... ... \n", + "2989 2024 YEM 5.145397e+03 15.0 0.0 100.0 \n", + "2990 2024 ZAF 1.143850e+05 1631.0 0.0 100.0 \n", + "2991 2024 ZMB 2.929805e+05 557.0 0.0 100.0 \n", + "2992 2024 ZNC 2.779983e+00 8.0 0.0 100.0 \n", + "2993 2024 ZWE 1.096232e+05 229.0 0.0 100.0 \n", "\n", - " mpaa_establishment_stage mpaa_protection_level \\\n", - "id \n", - "2137 NaN NaN \n", - "2138 NaN NaN \n", + " total_terrestrial_area coverage global_contribution is_last_year \\\n", + "0 29993094.71 12.123827 2.694465 False \n", + "1 31625555.58 6.486481 1.520053 False \n", + "2 12088229.65 0.000917 0.000082 False \n", + "3 30037571.37 14.335645 3.190756 False \n", + "4 19371151.92 10.357127 1.486642 False \n", + "... ... ... ... ... \n", + "2989 453741.18 1.133994 0.003813 True \n", + "2990 1221327.52 9.365631 0.084758 True \n", + "2991 753990.33 38.857330 0.217095 True \n", + "2992 3314.08 0.083884 0.000002 True \n", + "2993 391234.88 28.019803 0.081230 True \n", "\n", - " bbox is_child child_id \\\n", - "id \n", - "2137 [-141.000000001, 68.556807999, -138.1338199979... False 100672_A \n", - "2138 [-140.894068268, 69.19278843000001, -138.37542... True 100672_B \n", + " environment \n", + "0 terrestrial \n", + "1 terrestrial \n", + "2 terrestrial \n", + "3 terrestrial \n", + "4 terrestrial \n", + "... ... \n", + "2989 terrestrial \n", + "2990 terrestrial \n", + "2991 terrestrial \n", + "2992 terrestrial \n", + "2993 terrestrial \n", "\n", - " coverage environment location children \n", - "id \n", - "2137 0.000680 marine 29.0 [2138] \n", - "2138 0.000905 marine 29.0 NaN " + "[2994 rows x 11 columns]" ] }, - "execution_count": 107, "metadata": {}, - "output_type": "execute_result" + "output_type": "display_data" } ], "source": [ - "mpa_table[mpa_table['name'] == 'Ivvavik National Park Of Canada']" + "# Add pa and oecm counts to the coverage table\n", + "coverage = (\n", + " pd.merge(tpa_grouped, cumulative_counts, on=['iso_3', 'year'], how='left')\n", + " .pipe(add_region_iso2, \"iso_3\")\n", + " .pipe(calculate_stats_cov_pa, [\"year\"], \"iso_3\")\n", + " .pipe(calculate_pa_def_percentages)\n", + " .pipe(add_total_terrestrial_area)\n", + " .pipe(calculate_coverage_percentage_pa)\n", + " .pipe(calculate_global_contribution)\n", + " .pipe(add_is_last_year)\n", + " .pipe(add_environment)\n", + ")\n", + "\n", + "NewProtectedAreaExtentSchema(\n", + " coverage.pipe(\n", + " output2,\n", + " \"iso_3\",\n", + " {},\n", + " {},\n", + " [\"iso_3\", 'total_terrestrial_area'],\n", + " )\n", + ").to_csv(\n", + " output_file_tpas,\n", + " index=True,\n", + ")\n", + "\n", + "coverage" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Coverage stats - all" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [], + "source": [ + "pipe = \"pa\"\n", + "pipe_tpa = \"mpa-terrestrial\"\n", + "pipe_mpa = \"mpa\"\n", + "step = \"preprocess\"\n", + "\n", + "pipe_dir = FileConventionHandler(pipe)\n", + "pipe_dir_tpa = FileConventionHandler(pipe_tpa)\n", + "pipe_dir_mpa = FileConventionHandler(pipe_mpa)\n", + "\n", + "input_path_tpas = pipe_dir_tpa.get_processed_step_path(current_step).joinpath(\"tpa_coverage.csv\")\n", + "input_path_mpas = pipe_dir_mpa.get_processed_step_path(current_step).joinpath(\"mpa_coverage.csv\")\n", + "\n", + "output_file = pipe_dir.get_processed_step_path(current_step).joinpath(\"protection_coverage_stats.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [], + "source": [ + "tpa = pd.read_csv(input_path_tpas)\n", + "mpa = pd.read_csv(input_path_mpas)" ] }, { "cell_type": "code", - "execution_count": 109, + "execution_count": 41, "metadata": {}, "outputs": [ { @@ -2683,133 +1933,166 @@ " \n", " \n", " \n", - " wdpaid\n", - " wdpa_pid\n", - " protection_status\n", - " name\n", - " designation\n", - " pa_iucn_category\n", - " area\n", + " id\n", " year\n", - " data_source\n", - " bbox\n", - " is_child\n", - " child_id\n", + " protected_area\n", + " protected_areas_count\n", + " oecms\n", + " pas\n", " coverage\n", + " global_contribution\n", + " is_last_year\n", " environment\n", " location\n", - " children\n", - " \n", - " \n", - " id\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", " \n", " \n", " \n", " \n", - " 17036\n", - " 100672\n", - " 100672_A\n", + " 1\n", " 1\n", - " Ivvavik National Park Of Canada\n", - " National Park\n", - " 2\n", - " 9695.837607\n", - " 1984\n", - " 3\n", - " [-141.000000001, 68.556807999, -138.1338199979...\n", - " False\n", - " 100672_A\n", - " 0.097898\n", - " terrestrial\n", - " 29.0\n", - " [17037]\n", - " \n", - " \n", - " 17037\n", - " 100672\n", - " 100672_B\n", - " 1\n", - " Ivvavik National Park Of Canada\n", - " National Park\n", - " 2\n", - " 79.375056\n", - " 1984\n", - " 3\n", - " [-140.894068268, 69.19278843000001, -138.37542...\n", - " True\n", - " 100672_B\n", - " 0.000801\n", + " 2010\n", + " 3.636311e+06\n", + " 7272\n", + " 0.0\n", + " 100.0\n", + " 12.123827\n", + " 2.694465\n", + " False\n", " terrestrial\n", - " 29.0\n", - " NaN\n", + " 3\n", " \n", " \n", "\n", "" ], "text/plain": [ - " wdpaid wdpa_pid protection_status name \\\n", - "id \n", - "17036 100672 100672_A 1 Ivvavik National Park Of Canada \n", - "17037 100672 100672_B 1 Ivvavik National Park Of Canada \n", - "\n", - " designation pa_iucn_category area year data_source \\\n", - "id \n", - "17036 National Park 2 9695.837607 1984 3 \n", - "17037 National Park 2 79.375056 1984 3 \n", - "\n", - " bbox is_child child_id \\\n", - "id \n", - "17036 [-141.000000001, 68.556807999, -138.1338199979... False 100672_A \n", - "17037 [-140.894068268, 69.19278843000001, -138.37542... True 100672_B \n", + " id year protected_area protected_areas_count oecms pas coverage \\\n", + "1 1 2010 3.636311e+06 7272 0.0 100.0 12.123827 \n", "\n", - " coverage environment location children \n", - "id \n", - "17036 0.097898 terrestrial 29.0 [17037] \n", - "17037 0.000801 terrestrial 29.0 NaN " + " global_contribution is_last_year environment location \n", + "1 2.694465 False terrestrial 3 " ] }, - "execution_count": 109, + "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "tpa_table[tpa_table['name'] == 'Ivvavik National Park Of Canada']" + "# concatenate the two dataframes\n", + "final_data = pd.concat([tpa, mpa], ignore_index=True)\n", + "final_data.index = range(1, len(final_data) + 1)\n", + "final_data['id'] = final_data.index\n", + "final_data[final_data['id'] == 1]" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 50, "metadata": {}, "outputs": [], "source": [ - "PAsSchema(mpa_table[mpa_table.location.notna()]).to_csv(output_file_mpas, index=True)" + "NewProtectedAreaExtentSchema(final_data).to_csv(output_file, index=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Mpa atlas - country stats Fully or highly protected" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We are going to use the intermediate data from eez, in order to create a dataset that can be used as a land mask.\n", + "The steps are:\n", + "1. Load eez\n", + "2. Spatial inner Join the eez dataset with the Mpaatlas one\n", + "3. iso assign using the sovereign one provided by mpaatlas\n", + "4. dissolve by location\n", + "5. calculate the area for global regions and eez countries ussing mollwide projection\n", + "6. prepare the data to be ingested in strapi\n", + "7. upload the data to strapi" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 73, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/home/sofia/dev/skytruth-30x30/data/data/eez/processed/eez_preprocess.zip\n", + "/home/sofia/dev/skytruth-30x30/data/data/eez/processed/preprocess\n", + "/home/sofia/dev/skytruth-30x30/data/data/mpaatlas/processed/mpaatlas_preprocess.zip\n", + "/home/sofia/dev/skytruth-30x30/data/data/mpaatlas/processed/preprocess\n" + ] + } + ], "source": [ - "# todo investigate the issue with area as null" + "pipe = \"mpaatlas\"\n", + "strapi_collection = \"mpaa-protection-level-stat\"\n", + "\n", + "pipe_dir_eez = FileConventionHandler(\"eez\")\n", + "pipe_dir_mpaatlas = FileConventionHandler(pipe)\n", + "output_file = pipe_dir_mpaatlas.get_processed_step_path(current_step).joinpath(\n", + " \"mpaatlas_protection_level.csv\"\n", + ")\n", + "\n", + "# Download the EEZ file && unzip it\n", + "download_and_unzip_if_needed(pipe_dir_eez, prev_step, mysettings)\n", + "# Download the mpas file && unzip it\n", + "download_and_unzip_if_needed(pipe_dir_mpaatlas, prev_step, mysettings)\n", + "\n", + "# Load the data\n", + "eez = gpd.read_file(pipe_dir_eez.get_step_fmt_file_path(prev_step, \"shp\")).pipe(clean_geometries)\n", + "mpaatlas_intermediate = gpd.read_file(\n", + " pipe_dir_mpaatlas.get_step_fmt_file_path(prev_step, \"shp\")\n", + ").pipe(clean_geometries)" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 282/282 [00:29<00:00, 9.59it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 282/282 [00:29<00:00, 2.95s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "eez_mpaatlas_data_join = await spatial_join(\n", + " eez, mpaatlas_intermediate.pipe(mpaatlas_filter_stablishment)\n", + ")" ] }, { @@ -2818,41 +2101,113 @@ "metadata": {}, "outputs": [], "source": [ - "# batch_export(\n", - "# mpa_table[mpa_table.area.notna()],\n", - "# 5000,\n", - "# PAsSchema,\n", - "# pipe_dir.get_processed_step_path(current_step),\n", - "# \"mpa_detail\",\n", - "# format=\"json\",\n", - "# strapi_colection=strapi_collection_mpas,\n", + "# To get an idea of the spatial join results\n", + "# eez_mpaatlas_data_join.to_file(\n", + "# pipe_dir_mpaatlas.get_processed_step_path(current_step).joinpath(\"mpaatlas_sjoin.shp\"),\n", + "# driver=\"ESRI Shapefile\",\n", "# )" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 75, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:pyogrio._io:Created 54 records\n" + ] + } + ], + "source": [ + "eez_mpaatlas_data_join.dissolve(by=[\"protecti_1\", \"location_i\"], aggfunc={\"name\": \"count\"}).reset_index().to_file(\n", + "pipe_dir_mpaatlas.get_processed_step_path(current_step).joinpath(\"mpaatlas_sjoin_dissolved.shp\"),\n", + "driver=\"ESRI Shapefile\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 79, "metadata": {}, "outputs": [], "source": [ - "# # This code is to be able to identify groups that has wdpa_pid so in the future if needed we could combine the group geometries to generate a wdpa coverage geometry\n", - "# init_table[\n", - "# (\n", - "# init_table.sort_values(by=[\"wdpaid\", \"source\"], ascending=[True, False])\n", - "# .groupby(\"wdpaid\")\n", - "# .transform(\"size\")\n", - "# .gt(1)\n", - "# )\n", - "# & (init_table.wdpa_pid.str.extract(r\"([A-Za-z]+)\", expand=False).notna())\n", - "# ].groupby(\"wdpaid\")\n", - "# .geometry.apply(lambda x: x.union_all())" + "result = (\n", + " eez_mpaatlas_data_join.rename(columns={\"location_i\": \"iso_3\"})\n", + " .pipe(process_mpaatlas_data) \n", + " .pipe(calculate_global_area, gby_col=[\"protecti_1\"], iso_column=\"iso_3\")\n", + " .pipe(separate_parent_iso)\n", + " .replace(\n", + " {\n", + " \"location_i\": {\n", + " \"COK\": \"NZL\",\n", + " \"IOT\": \"GBR\",\n", + " \"NIU\": \"NZL\",\n", + " \"SHN\": \"GBR\",\n", + " \"SJM\": \"NOR\",\n", + " \"UMI\": \"USA\",\n", + " \"NCL\": \"FRA\",\n", + " }\n", + " }\n", + " )\n", + " .pipe(add_region_iso, iso_column=\"iso_3\")\n", + " .pipe(calculate_stats, gby_col=[\"protecti_1\"], iso_column=\"iso_3\")\n", + " .query('protecti_1 != \"less protected or unknown\"')\n", + " .pipe(fix_monaco, iso_column=\"iso_3\", area_column=\"area_km2\")\n", + " .pipe(add_total_marine_area)\n", + " .pipe(calculate_coverage_percentage_mpatlas)\n", + " .pipe(\n", + " output,\n", + " iso_column=\"iso_3\",\n", + " rep_d={\n", + " \"protecti_1\": {\n", + " \"fully or highly protected\": 1,\n", + " }\n", + " },\n", + " rename={\"protecti_1\": \"mpaa_protection_level\", \"area_km2\": \"area\"},\n", + " drop_cols=[\"total_marine_area\", \"iso_3\"],\n", + " )\n", + ")\n", + "\n", + "NewProtectionLevelSchema(result[~result.location.isna()].assign(year=2024)).to_csv(\n", + " output_file, index=True\n", + ")" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 82, "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "DEBUG:google.auth.transport.requests:Making request: POST https://oauth2.googleapis.com/token\n" + ] + } + ], "source": [ - "#### upload data to strapi" + "remote_path = 'vizzuality_processed_data/strapi_tables/mpaatlas_protection_level.csv'\n", + "\n", + "writeReadGCP(\n", + " credentials=mysettings.GCS_KEYFILE_JSON,\n", + " bucket_name=mysettings.GCS_BUCKET,\n", + " blob_name=remote_path,\n", + " file=output_file,\n", + " operation=\"w\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# strapi_collection = \"mpaa-protection-level-stat\"" ] }, { @@ -2871,7 +2226,7 @@ } ], "source": [ - "# strapi.deleteCollectionData(\"mpa\", list(range(1, 20914)))" + "# strapi.deleteCollectionData(strapi_collection, list(range(1, 300)))" ] }, { @@ -2880,99 +2235,393 @@ "metadata": {}, "outputs": [], "source": [ - "# for i in range(0, 4):\n", - "# strapi.importCollectionData(\n", - "# strapi_collection_mpas,\n", - "# mpa_folder.joinpath(f\"mpa_detail_{i}.csv\"),\n", - "# )" + "# strapi.importCollectionData(\n", + "# strapi_collection,\n", + "# output_file,\n", + "# )" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Country mpas detail table data" + "### Protected seas - fishing protection level" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 83, "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "DEBUG:google.auth.transport.requests:Making request: POST https://oauth2.googleapis.com/token\n" + ] + } + ], "source": [ - " 1- lower case the columns \n", - "2- separate location that its regime is in dispute or on join regime \n", - "3- calcualte area for mpaatlas data \n", - "4- rename columns for merge \n", - "5- merge maaatlas and mpa data identifying the source \n", - "6- identify child resources and set them as childs \n", - "7- calculate bbox \n", - "8- set child resources \n", - "9- prepare output for batch export \n", - "10- upload data to strapi " + "pipe = \"protectedseas\"\n", + "strapi_collection = \"fishing-protection-level-stat\"\n", + "\n", + "pipe_dir = FileConventionHandler(pipe)\n", + "input_file = pipe_dir.get_processed_step_path(prev_step).joinpath(\"protectedseas_stats.xlsx\")\n", + "output_file = pipe_dir.get_processed_step_path(current_step).joinpath(\"lfp.csv\")\n", + "\n", + "# Download the protected seas file && unzip it\n", + "writeReadGCP(\n", + " credentials=mysettings.GCS_KEYFILE_JSON,\n", + " bucket_name=mysettings.GCS_BUCKET,\n", + " blob_name=\"vizzuality_processed_data/protectedseas/preprocess/protectedseas_stats.xlsx\",\n", + " file=input_file,\n", + " operation=\"r\",\n", + ")\n", + "\n", + "# Load the data\n", + "protectedseas_intermediate = pd.read_excel(input_file)" ] }, { "cell_type": "code", - "execution_count": 256, + "execution_count": 84, "metadata": {}, "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/home/sofia/dev/skytruth-30x30/data/data/mpa/processed/mpa_preprocess.zip\n", - "/home/sofia/dev/skytruth-30x30/data/data/mpa/processed/preprocess\n", - "/home/sofia/dev/skytruth-30x30/data/data/mpaatlas/processed/mpaatlas_preprocess.zip\n", - "/home/sofia/dev/skytruth-30x30/data/data/mpaatlas/processed/preprocess\n" - ] - }, { "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
iso_teriso_sovincludes_multi_jurisdictional_areaslfparea_sqkmtotal_areapct_total
320NaNESPTrue5142.9730101011023.7760.014141
321NaNESPTrue41639.6820761011023.7760.162180
322NaNESPTrue3214532.8498001011023.77621.219367
323NaNESPTrue215064.1327701011023.7761.489988
324NaNESPTrue1779644.1388001011023.77677.114323
\n", + "
" + ], "text/plain": [ - "PosixPath('/home/sofia/dev/skytruth-30x30/data/data/mpaatlas/processed/preprocess')" + " iso_ter iso_sov includes_multi_jurisdictional_areas lfp area_sqkm \\\n", + "320 NaN ESP True 5 142.973010 \n", + "321 NaN ESP True 4 1639.682076 \n", + "322 NaN ESP True 3 214532.849800 \n", + "323 NaN ESP True 2 15064.132770 \n", + "324 NaN ESP True 1 779644.138800 \n", + "\n", + " total_area pct_total \n", + "320 1011023.776 0.014141 \n", + "321 1011023.776 0.162180 \n", + "322 1011023.776 21.219367 \n", + "323 1011023.776 1.489988 \n", + "324 1011023.776 77.114323 " ] }, - "execution_count": 256, + "execution_count": 84, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "pipe = \"mpa\"\n", - "strapi_collection_mpas = \"mpa\"\n", - "\n", - "pipe_dir = FileConventionHandler(pipe)\n", - "pipe_dir_mpaatlas = FileConventionHandler(\"mpaatlas\")\n", - "output_file_mpas = pipe_dir.get_processed_step_path(current_step).joinpath(\"mpa_detail.csv\")\n", - "\n", - "# Download the protected atlas file && unzip it\n", - "download_and_unzip_if_needed(pipe_dir, prev_step, mysettings)\n", - "# Download the mpaatlas file \n", - "download_and_unzip_if_needed(pipe_dir_mpaatlas, prev_step, mysettings)" + "protectedseas_intermediate[\n", + " (\n", + " protectedseas_intermediate.iso_ter.isna()\n", + " & protectedseas_intermediate.includes_multi_jurisdictional_areas.eq(True)\n", + " )\n", + " | (\n", + " protectedseas_intermediate.iso_ter.isna()\n", + " & protectedseas_intermediate.includes_multi_jurisdictional_areas.eq(False)\n", + " & ~protectedseas_intermediate.iso_sov.isin(\n", + " protectedseas_intermediate[\n", + " protectedseas_intermediate.includes_multi_jurisdictional_areas.eq(True)\n", + " ].iso_sov.unique()\n", + " )\n", + " )\n", + "][protectedseas_intermediate.iso_sov.eq(\"ESP\")]" ] }, { "cell_type": "code", - "execution_count": 257, + "execution_count": 85, "metadata": {}, "outputs": [], "source": [ - "# Load the data\n", - "mpa_intermediate = gpd.read_file(pipe_dir.get_step_fmt_file_path(prev_step, \"shp\")).pipe(\n", - " clean_geometries\n", + "final = (\n", + " protectedseas_intermediate[\n", + " (\n", + " protectedseas_intermediate.iso_ter.isna()\n", + " & protectedseas_intermediate.includes_multi_jurisdictional_areas.eq(True)\n", + " )\n", + " | (\n", + " protectedseas_intermediate.iso_ter.isna()\n", + " & protectedseas_intermediate.includes_multi_jurisdictional_areas.eq(False)\n", + " & ~protectedseas_intermediate.iso_sov.isin(\n", + " protectedseas_intermediate[\n", + " protectedseas_intermediate.includes_multi_jurisdictional_areas.eq(True)\n", + " ].iso_sov.unique()\n", + " )\n", + " )\n", + " ].replace(\n", + " {\n", + " \"lfp\": {\n", + " 5: \"highly\",\n", + " 4: \"highly\",\n", + " 3: \"moderately\",\n", + " 2: \"less\",\n", + " 1: \"less\",\n", + " },\n", + " }\n", + " ).groupby([\"iso_sov\", \"lfp\"]).agg({\"area_sqkm\": \"sum\", \"total_area\": \"max\"}).reset_index()\n", + " .pipe(\n", + " calculate_global_area,\n", + " gby_col=[\"lfp\"],\n", + " iso_column=\"iso_sov\",\n", + " agg_ops={\"area_sqkm\": \"sum\", \"total_area\": \"sum\"},\n", + " )\n", + " .pipe(add_region_iso, iso_column=\"iso_sov\")\n", + " .pipe(\n", + " calculate_stats,\n", + " gby_col=[\"lfp\"],\n", + " ops={\"area_sqkm\": \"sum\", \"total_area\": \"sum\"},\n", + " iso_column=\"iso_sov\",\n", + " )\n", + " .pipe(lambda x: x.assign(pct=round((x.area_sqkm / x.total_area)*100, 2)))\n", + " .pipe(\n", + " output,\n", + " iso_column=\"iso_sov\",\n", + " rep_d={\n", + " \"lfp\": {\n", + " \"highly\": 1,\n", + " \"moderately\": 2,\n", + " \"less\": 3,\n", + " }\n", + " },\n", + " rename={\"lfp\": \"fishing_protection_level\", \"area_sqkm\": \"area\"},\n", + " drop_cols=[\"iso_sov\", \"total_area\"],\n", + " )\n", ")\n", - "mpaatlas_intermediate = gpd.read_file(\n", - " pipe_dir_mpaatlas.get_step_fmt_file_path(prev_step, \"shp\")\n", - ").pipe(clean_geometries)" + "FPLSchema(final[final.location.notna()]).to_csv(output_file, index=True)" ] }, { "cell_type": "code", - "execution_count": 258, + "execution_count": 88, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "DEBUG:google.auth.transport.requests:Making request: POST https://oauth2.googleapis.com/token\n" + ] + } + ], + "source": [ + "remote_path = 'vizzuality_processed_data/strapi_tables/lfp.csv'\n", + "\n", + "writeReadGCP(\n", + " credentials=mysettings.GCS_KEYFILE_JSON,\n", + " bucket_name=mysettings.GCS_BUCKET,\n", + " blob_name=remote_path,\n", + " file=output_file,\n", + " operation=\"w\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "init_table = (\n", - " pd.concat(\n", + "# strapi.deleteCollectionData(strapi_collection, list(range(1, 500)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# strapi.importCollectionData(\n", + "# strapi_collection,\n", + "# output_file,\n", + "# )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " 1- lower case the columns \n", + "2- separate location that its regime is in dispute or on join regime \n", + "3- calcualte area for mpaatlas data \n", + "4- rename columns for merge \n", + "5- merge maaatlas and mpa data identifying the source \n", + "6- identify child resources and set them as childs \n", + "7- calculate bbox \n", + "8- set child resources \n", + "9- prepare output for batch export \n", + "10- upload data to strapi " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Country mpas detail table data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " 1- lower case the columns \n", + "2- separate location that its regime is in dispute or on join regime \n", + "3- calcualte area for mpaatlas data \n", + "4- rename columns for merge \n", + "5- merge maaatlas and mpa data identifying the source \n", + "6- identify child resources and set them as childs \n", + "7- calculate bbox \n", + "8- set child resources \n", + "9- prepare output for batch export \n", + "10- upload data to strapi " + ] + }, + { + "cell_type": "code", + "execution_count": 256, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/home/sofia/dev/skytruth-30x30/data/data/mpa/processed/mpa_preprocess.zip\n", + "/home/sofia/dev/skytruth-30x30/data/data/mpa/processed/preprocess\n", + "/home/sofia/dev/skytruth-30x30/data/data/mpaatlas/processed/mpaatlas_preprocess.zip\n", + "/home/sofia/dev/skytruth-30x30/data/data/mpaatlas/processed/preprocess\n" + ] + }, + { + "data": { + "text/plain": [ + "PosixPath('/home/sofia/dev/skytruth-30x30/data/data/mpaatlas/processed/preprocess')" + ] + }, + "execution_count": 256, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pipe = \"mpa\"\n", + "strapi_collection_mpas = \"mpa\"\n", + "\n", + "pipe_dir = FileConventionHandler(pipe)\n", + "pipe_dir_mpaatlas = FileConventionHandler(\"mpaatlas\")\n", + "output_file_mpas = pipe_dir.get_processed_step_path(current_step).joinpath(\"mpa_detail.csv\")\n", + "\n", + "# Download the protected atlas file && unzip it\n", + "download_and_unzip_if_needed(pipe_dir, prev_step, mysettings)\n", + "# Download the mpaatlas file \n", + "download_and_unzip_if_needed(pipe_dir_mpaatlas, prev_step, mysettings)" + ] + }, + { + "cell_type": "code", + "execution_count": 257, + "metadata": {}, + "outputs": [], + "source": [ + "# Load the data\n", + "mpa_intermediate = gpd.read_file(pipe_dir.get_step_fmt_file_path(prev_step, \"shp\")).pipe(\n", + " clean_geometries\n", + ")\n", + "mpaatlas_intermediate = gpd.read_file(\n", + " pipe_dir_mpaatlas.get_step_fmt_file_path(prev_step, \"shp\")\n", + ").pipe(clean_geometries)" + ] + }, + { + "cell_type": "code", + "execution_count": 258, + "metadata": {}, + "outputs": [], + "source": [ + "init_table = (\n", + " pd.concat(\n", " [\n", " (\n", " mpa_intermediate.pipe(columns_to_lower)\n", @@ -3290,1129 +2939,204 @@ }, { "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "iucn_cat = pd.read_csv(\n", - " pipe_dir.get_processed_step_path(current_step).joinpath(\"iucn_categories.csv\"), index_col=0\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "init_table = (\n", - " pd.concat(\n", - " [\n", - " (\n", - " tpa_intermediate.pipe(columns_to_lower)\n", - " .pipe(separate_parent_iso, iso_column=\"parent_iso\")\n", - " .query(\"parent_iso != 'ATA' and parent_iso != 'ABNJ'\")\n", - " .rename(\n", - " columns={\n", - " \"parent_iso\": \"iso\",\n", - " \"status_yr\": \"year\",\n", - " \"gis_area\": \"protected_area\",\n", - " }\n", - " ).drop(columns=['status'])\n", - " ).assign(source=\"protected_planet\"),\n", - " ],\n", - " ignore_index=True,\n", - " )\n", - " .reset_index(drop=True)\n", - " .replace(\n", - " {\n", - " \"iso\": {\n", - " \"COK\": \"NZL\",\n", - " \"IOT\": \"GBR\",\n", - " \"NIU\": \"NZL\",\n", - " \"SHN\": \"GBR\",\n", - " \"SJM\": \"NOR\",\n", - " \"UMI\": \"USA\",\n", - " \"NCL\": \"FRA\",\n", - " }\n", - " }\n", - " )\n", - " .sort_values(by=[\"wdpa_pid\", \"wdpa_pid\", \"source\"], ascending=[True, True, False])\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/tmp/ipykernel_3510708/3364924951.py:202: FutureWarning: Downcasting behavior in `replace` is deprecated and will be removed in a future version. To retain the old behavior, explicitly call `result.infer_objects(copy=False)`. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`\n", - " df.replace(rep_d)\n" - ] - } - ], - "source": [ - "tpa_table = (\n", - " init_table.pipe(add_bbox, \"bbox\")\n", - " .pipe(define_is_child)\n", - " .pipe(set_child_id_pa)\n", - " .sort_values(by=[\"wdpaid\", \"is_child\"], ascending=[True, True])\n", - " .reset_index(drop=True)\n", - " .pipe(add_total_terrestrial_area)\n", - " .pipe(calculate_coverage_percentage_pa)\n", - " .pipe(add_environment)\n", - " .pipe(\n", - " output2,\n", - " iso_column=\"iso\",\n", - " rep_d={\n", - " \"pa_def\": {\"0\": 2, \"1\": 1},\n", - " \"year\": {0: pd.NA},\n", - " \"iucn_cat\": dict(\n", - " iucn_cat[[\"slug\"]]\n", - " .reset_index(drop=False)\n", - " .iloc[:, [1, 0]]\n", - " .to_dict(orient=\"tight\")[\"data\"]\n", - " ),\n", - " \"source\": {\"protected_planet\": 3},\n", - " },\n", - " rename={\n", - " \"pa_def\": \"protection_status\",\n", - " \"protected_area\": \"area\",\n", - " \"iucn_cat\": \"pa_iucn_category\",\n", - " \"desig_eng\": \"designation\",\n", - " \"source\": \"data_source\",\n", - " },\n", - " drop_cols=[\"geometry\", \"iso\", \"marine\", \"total_terrestrial_area\"]\n", - " )\n", - " .pipe(add_child_parent_relationship)\n", - " .astype(\n", - " {\n", - " \"year\": \"Int32\",\n", - " \"pa_iucn_category\": \"Int64\",\n", - " \"protection_status\": \"Int64\",\n", - " }\n", - " )\n", - " .query(\"coverage <= 100\") \n", - " .sort_index()\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [], - "source": [ - "# Add col mpaa_protection_level and mpa_establishment_stage to the table to validate it\n", - "tpa_table['mpaa_protection_level'] = np.nan\n", - "tpa_table['mpaa_establishment_stage'] = np.nan" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [], - "source": [ - "# Validate and save\n", - "PAsSchema(tpa_table[tpa_table.location.notna()]).to_csv(output_file_tpas, index=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# batch_export(\n", - "# mpa_table[mpa_table.area.notna()],\n", - "# 5000,\n", - "# PAsSchema,\n", - "# pipe_dir.get_processed_step_path(current_step),\n", - "# \"mpa_detail\",\n", - "# format=\"json\",\n", - "# strapi_colection=strapi_collection_mpas,\n", - "# )" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# # This code is to be able to identify groups that has wdpa_pid so in the future if needed we could combine the group geometries to generate a wdpa coverage geometry\n", - "# init_table[\n", - "# (\n", - "# init_table.sort_values(by=[\"wdpaid\", \"source\"], ascending=[True, False])\n", - "# .groupby(\"wdpaid\")\n", - "# .transform(\"size\")\n", - "# .gt(1)\n", - "# )\n", - "# & (init_table.wdpa_pid.str.extract(r\"([A-Za-z]+)\", expand=False).notna())\n", - "# ].groupby(\"wdpaid\")\n", - "# .geometry.apply(lambda x: x.union_all())" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# strapi.deleteCollectionData(\"mpa\", list(range(1, 20914)))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# for i in range(0, 4):\n", - "# strapi.importCollectionData(\n", - "# strapi_collection_mpas,\n", - "# mpa_folder.joinpath(f\"mpa_detail_{i}.csv\"),\n", - "# )" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### PA coverage - terrestrial" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/mpa-terrestrial_preprocess.zip\n", - "/home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/preprocess\n", - "/home/sofia/dev/skytruth-30x30/data/data/gadm/processed/gadm_preprocess.zip\n", - "/home/sofia/dev/skytruth-30x30/data/data/gadm/processed/preprocess\n" - ] - }, - { - "data": { - "text/plain": [ - "PosixPath('/home/sofia/dev/skytruth-30x30/data/data/gadm/processed/preprocess')" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "pipe = \"mpa-terrestrial\"\n", - "step = \"preprocess\"\n", - "strapi_collection_mpas = \"mpa-terrestrial\"\n", - "\n", - "pipe_dir = FileConventionHandler(pipe)\n", - "pipe_dir_gadm = FileConventionHandler(\"gadm\")\n", - "\n", - "working_folder = FileConventionHandler(pipe)\n", - "input_path = working_folder.pipe_raw_path\n", - "temp_working_path = working_folder.get_temp_file_path(step)\n", - "output_file_sjoin = pipe_dir.get_processed_step_path(current_step).joinpath(\"tpa_sjoin.shp\")\n", - "output_file_dissolve = pipe_dir.get_processed_step_path(current_step).joinpath(\"tpa_dissolve.csv\")\n", - "output_file_tpas = pipe_dir.get_processed_step_path(current_step).joinpath(\"tpa_detail.csv\")\n", - "\n", - "# Download the protected atlas file && unzip it\n", - "download_and_unzip_if_needed(pipe_dir, prev_step, mysettings)\n", - "# Download the mpaatlas file \n", - "download_and_unzip_if_needed(pipe_dir_gadm, prev_step, mysettings)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# # Load the data\n", - "# wdpa = gpd.read_file(pipe_dir.get_step_fmt_file_path(prev_step, \"gpkg\")).pipe(\n", - "# clean_geometries\n", - "# )\n", - "# gadm = gpd.read_file(pipe_dir_gadm.get_step_fmt_file_path(prev_step, \"shp\")).pipe(clean_geometries)\n", - "\n", - "# gadm.sindex\n", - "# wdpa.sindex" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# # Spatial join using overlay\n", - "# wdpa_subset = wdpa[\n", - "# ~(\n", - "# (wdpa.bounds.minx < -181)\n", - "# | (wdpa.bounds.miny < -91)\n", - "# | (wdpa.bounds.maxx > 181)\n", - "# | (wdpa.bounds.maxy > 91)\n", - "# )\n", - "# ].reset_index(drop=True)\n", - "\n", - "# sjoin_gdf = await spatial_join(wdpa_subset, gadm)\n", - "# sjoin_gdf.rename(columns={\"GID_0\": \"iso_3\"}, inplace=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
WDPAIDWDPA_PIDPA_DEFNAMEDESIG_ENGIUCN_CATMARINEGIS_AREASTATUSSTATUS_YRPARENT_ISOCOUNTRYiso_3area_km2geometry
\n", - "
" - ], - "text/plain": [ - "Empty GeoDataFrame\n", - "Columns: [WDPAID, WDPA_PID, PA_DEF, NAME, DESIG_ENG, IUCN_CAT, MARINE, GIS_AREA, STATUS, STATUS_YR, PARENT_ISO, COUNTRY, iso_3, area_km2, geometry]\n", - "Index: []" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# # test that we have not produce duplicates\n", - "# sjoin_gdf.loc[sjoin_gdf.duplicated(subset=[\"WDPA_PID\", \"iso_3\"], keep=False)].sort_values(\n", - "# \"WDPA_PID\"\n", - "# )" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "289352" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# sjoin_gdf = filter_by_exluding_propossed_mpas(sjoin_gdf)\n", - "# len(sjoin_gdf)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:pyogrio._io:Created 289,352 records\n" - ] - } - ], - "source": [ - "# # Save the spatial join\n", - "# sjoin_gdf.to_file(output_file_sjoin, driver=\"ESRI Shapefile\")" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "sjoin_gdf = gpd.read_file(output_file_sjoin)\n", - "sjoin_gdf[\"STATUS_YR\"] = sjoin_gdf[\"STATUS_YR\"].astype(\"Int64\")" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
PA_DEFiso_3year10protected_areas_count
0AFG2010100.010.0
1AFG2011100.010.0
2AFG2012100.010.0
3AFG2013100.010.0
4AFG2014100.010.0
..................
2884ZWE20202290.0229.0
2885ZWE20212290.0229.0
2886ZWE20222290.0229.0
2887ZWE20232290.0229.0
2888ZWE20242290.0229.0
\n", - "

2889 rows × 5 columns

\n", - "
" - ], - "text/plain": [ - "PA_DEF iso_3 year 1 0 protected_areas_count\n", - "0 AFG 2010 10 0.0 10.0\n", - "1 AFG 2011 10 0.0 10.0\n", - "2 AFG 2012 10 0.0 10.0\n", - "3 AFG 2013 10 0.0 10.0\n", - "4 AFG 2014 10 0.0 10.0\n", - "... ... ... ... ... ...\n", - "2884 ZWE 2020 229 0.0 229.0\n", - "2885 ZWE 2021 229 0.0 229.0\n", - "2886 ZWE 2022 229 0.0 229.0\n", - "2887 ZWE 2023 229 0.0 229.0\n", - "2888 ZWE 2024 229 0.0 229.0\n", - "\n", - "[2889 rows x 5 columns]" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# # Calculate wdpa cumulative counts and pa and oecm percentages\n", - "cumulative_counts = cumulative_pa_def_counts(sjoin_gdf)\n", - "cumulative_counts" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# # Dissolve geometries to calculate the coverage\n", - "# data = await process_grid(sjoin_gdf)" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
iso_3yearprotected_area
0ATA20171395.028044
1ATA20181395.028044
2ATA20191395.028044
3ATA20201395.028044
4ATA20211395.028044
\n", - "
" - ], - "text/plain": [ - " iso_3 year protected_area\n", - "0 ATA 2017 1395.028044\n", - "1 ATA 2018 1395.028044\n", - "2 ATA 2019 1395.028044\n", - "3 ATA 2020 1395.028044\n", - "4 ATA 2021 1395.028044" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# tpa = pd.concat(data, ignore_index=True).drop(columns=['STATUS_YR', 'index']).rename(columns={'area': 'protected_area'})\n", - "# tpa.head(5)" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
iso_3yearprotected_area
0AFG20101078.918622
1AFG20111078.918622
2AFG20121078.918622
3AFG20131078.918622
4AFG20141078.918622
\n", - "
" - ], - "text/plain": [ - " iso_3 year protected_area\n", - "0 AFG 2010 1078.918622\n", - "1 AFG 2011 1078.918622\n", - "2 AFG 2012 1078.918622\n", - "3 AFG 2013 1078.918622\n", - "4 AFG 2014 1078.918622" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# # Group by 'iso_3' and 'year' and sum the 'area'\n", - "# tpa_grouped = tpa.groupby(['iso_3', 'year'], as_index=False)['protected_area'].sum()\n", - "# tpa_grouped.reset_index(drop=True, inplace=True)\n", - "# tpa_grouped.head(5)" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [], - "source": [ - "# # save to csv\n", - "# tpa_grouped.to_csv(output_file_dissolve, index=False)" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
iso_3yearprotected_area
0AFG20101078.918622
1AFG20111078.918622
2AFG20121078.918622
3AFG20131078.918622
4AFG20141078.918622
\n", - "
" - ], - "text/plain": [ - " iso_3 year protected_area\n", - "0 AFG 2010 1078.918622\n", - "1 AFG 2011 1078.918622\n", - "2 AFG 2012 1078.918622\n", - "3 AFG 2013 1078.918622\n", - "4 AFG 2014 1078.918622" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "tpa_grouped = pd.read_csv(output_file_dissolve)\n", - "tpa_grouped.head(5)" - ] - }, - { - "cell_type": "code", - "execution_count": 63, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
yeariso_3protected_areaprotected_areas_countoecmspastotal_terrestrial_areacoverageglobal_contributionis_last_yearenvironment
02010AF3.636311e+067272.00.0100.029993094.7112.1238272.694465Falseterrestrial
12010AS2.040713e+0624761.00.0100.031625555.586.4527341.512145Falseterrestrial
22010AT1.108333e+022.00.0100.012088229.650.0009170.000082Falseterrestrial
32010EU4.303722e+06116101.00.0100.030037571.3714.3277953.189009Falseterrestrial
42010NA2.006295e+0652176.00.0100.019371151.9210.3571271.486642Falseterrestrial
....................................
29892024YEM5.145397e+0315.00.0100.0453741.181.1339940.003813Trueterrestrial
29902024ZAF1.143850e+051631.00.0100.01221327.529.3656310.084758Trueterrestrial
29912024ZMB2.929805e+05557.00.0100.0753990.3338.8573300.217095Trueterrestrial
29922024ZNC2.779983e+008.00.0100.03314.080.0838840.000002Trueterrestrial
29932024ZWE1.096232e+05229.00.0100.0391234.8828.0198030.081230Trueterrestrial
\n", - "

2994 rows × 11 columns

\n", - "
" - ], - "text/plain": [ - " year iso_3 protected_area protected_areas_count oecms pas \\\n", - "0 2010 AF 3.636311e+06 7272.0 0.0 100.0 \n", - "1 2010 AS 2.040713e+06 24761.0 0.0 100.0 \n", - "2 2010 AT 1.108333e+02 2.0 0.0 100.0 \n", - "3 2010 EU 4.303722e+06 116101.0 0.0 100.0 \n", - "4 2010 NA 2.006295e+06 52176.0 0.0 100.0 \n", - "... ... ... ... ... ... ... \n", - "2989 2024 YEM 5.145397e+03 15.0 0.0 100.0 \n", - "2990 2024 ZAF 1.143850e+05 1631.0 0.0 100.0 \n", - "2991 2024 ZMB 2.929805e+05 557.0 0.0 100.0 \n", - "2992 2024 ZNC 2.779983e+00 8.0 0.0 100.0 \n", - "2993 2024 ZWE 1.096232e+05 229.0 0.0 100.0 \n", - "\n", - " total_terrestrial_area coverage global_contribution is_last_year \\\n", - "0 29993094.71 12.123827 2.694465 False \n", - "1 31625555.58 6.452734 1.512145 False \n", - "2 12088229.65 0.000917 0.000082 False \n", - "3 30037571.37 14.327795 3.189009 False \n", - "4 19371151.92 10.357127 1.486642 False \n", - "... ... ... ... ... \n", - "2989 453741.18 1.133994 0.003813 True \n", - "2990 1221327.52 9.365631 0.084758 True \n", - "2991 753990.33 38.857330 0.217095 True \n", - "2992 3314.08 0.083884 0.000002 True \n", - "2993 391234.88 28.019803 0.081230 True \n", - "\n", - " environment \n", - "0 terrestrial \n", - "1 terrestrial \n", - "2 terrestrial \n", - "3 terrestrial \n", - "4 terrestrial \n", - "... ... \n", - "2989 terrestrial \n", - "2990 terrestrial \n", - "2991 terrestrial \n", - "2992 terrestrial \n", - "2993 terrestrial \n", - "\n", - "[2994 rows x 11 columns]" - ] - }, - "execution_count": 63, - "metadata": {}, - "output_type": "execute_result" + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "iucn_cat = pd.read_csv(\n", + " pipe_dir.get_processed_step_path(current_step).joinpath(\"iucn_categories.csv\"), index_col=0\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "init_table = (\n", + " pd.concat(\n", + " [\n", + " (\n", + " tpa_intermediate.pipe(columns_to_lower)\n", + " .pipe(separate_parent_iso, iso_column=\"parent_iso\")\n", + " .query(\"parent_iso != 'ATA' and parent_iso != 'ABNJ'\")\n", + " .rename(\n", + " columns={\n", + " \"parent_iso\": \"iso\",\n", + " \"status_yr\": \"year\",\n", + " \"gis_area\": \"protected_area\",\n", + " }\n", + " ).drop(columns=['status'])\n", + " ).assign(source=\"protected_planet\"),\n", + " ],\n", + " ignore_index=True,\n", + " )\n", + " .reset_index(drop=True)\n", + " .replace(\n", + " {\n", + " \"iso\": {\n", + " \"COK\": \"NZL\",\n", + " \"IOT\": \"GBR\",\n", + " \"NIU\": \"NZL\",\n", + " \"SHN\": \"GBR\",\n", + " \"SJM\": \"NOR\",\n", + " \"UMI\": \"USA\",\n", + " \"NCL\": \"FRA\",\n", + " }\n", + " }\n", + " )\n", + " .sort_values(by=[\"wdpa_pid\", \"wdpa_pid\", \"source\"], ascending=[True, True, False])\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_3510708/3364924951.py:202: FutureWarning: Downcasting behavior in `replace` is deprecated and will be removed in a future version. To retain the old behavior, explicitly call `result.infer_objects(copy=False)`. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`\n", + " df.replace(rep_d)\n" + ] } ], "source": [ - "# Add pa and oecm counts to the coverage table\n", - "coverage = (\n", - " pd.merge(tpa_grouped, cumulative_counts, on=['iso_3', 'year'], how='left')\n", - " .pipe(add_region_iso, \"iso_3\")\n", - " .pipe(calculate_stats_cov_pa, [\"year\"], \"iso_3\")\n", - " .pipe(calculate_pa_def_percentages)\n", + "tpa_table = (\n", + " init_table.pipe(add_bbox, \"bbox\")\n", + " .pipe(define_is_child)\n", + " .pipe(set_child_id_pa)\n", + " .sort_values(by=[\"wdpaid\", \"is_child\"], ascending=[True, True])\n", + " .reset_index(drop=True)\n", " .pipe(add_total_terrestrial_area)\n", " .pipe(calculate_coverage_percentage_pa)\n", - " .pipe(calculate_global_contribution)\n", - " .pipe(add_is_last_year)\n", " .pipe(add_environment)\n", - ")\n", - "\n", - "NewProtectedAreaExtentSchema(\n", - " coverage.pipe(\n", + " .pipe(\n", " output2,\n", - " \"iso_3\",\n", - " {},\n", - " {},\n", - " [\"iso_3\", 'total_terrestrial_area'],\n", + " iso_column=\"iso\",\n", + " rep_d={\n", + " \"pa_def\": {\"0\": 2, \"1\": 1},\n", + " \"year\": {0: pd.NA},\n", + " \"iucn_cat\": dict(\n", + " iucn_cat[[\"slug\"]]\n", + " .reset_index(drop=False)\n", + " .iloc[:, [1, 0]]\n", + " .to_dict(orient=\"tight\")[\"data\"]\n", + " ),\n", + " \"source\": {\"protected_planet\": 3},\n", + " },\n", + " rename={\n", + " \"pa_def\": \"protection_status\",\n", + " \"protected_area\": \"area\",\n", + " \"iucn_cat\": \"pa_iucn_category\",\n", + " \"desig_eng\": \"designation\",\n", + " \"source\": \"data_source\",\n", + " },\n", + " drop_cols=[\"geometry\", \"iso\", \"marine\", \"total_terrestrial_area\"]\n", " )\n", - ").to_csv(\n", - " output_file_tpas,\n", - " index=True,\n", - ")\n", - "\n", - "coverage" + " .pipe(add_child_parent_relationship)\n", + " .astype(\n", + " {\n", + " \"year\": \"Int32\",\n", + " \"pa_iucn_category\": \"Int64\",\n", + " \"protection_status\": \"Int64\",\n", + " }\n", + " )\n", + " .query(\"coverage <= 100\") \n", + " .sort_index()\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "# Add col mpaa_protection_level and mpa_establishment_stage to the table to validate it\n", + "tpa_table['mpaa_protection_level'] = np.nan\n", + "tpa_table['mpaa_establishment_stage'] = np.nan" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "# Validate and save\n", + "PAsSchema(tpa_table[tpa_table.location.notna()]).to_csv(output_file_tpas, index=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# batch_export(\n", + "# mpa_table[mpa_table.area.notna()],\n", + "# 5000,\n", + "# PAsSchema,\n", + "# pipe_dir.get_processed_step_path(current_step),\n", + "# \"mpa_detail\",\n", + "# format=\"json\",\n", + "# strapi_colection=strapi_collection_mpas,\n", + "# )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# # This code is to be able to identify groups that has wdpa_pid so in the future if needed we could combine the group geometries to generate a wdpa coverage geometry\n", + "# init_table[\n", + "# (\n", + "# init_table.sort_values(by=[\"wdpaid\", \"source\"], ascending=[True, False])\n", + "# .groupby(\"wdpaid\")\n", + "# .transform(\"size\")\n", + "# .gt(1)\n", + "# )\n", + "# & (init_table.wdpa_pid.str.extract(r\"([A-Za-z]+)\", expand=False).notna())\n", + "# ].groupby(\"wdpaid\")\n", + "# .geometry.apply(lambda x: x.union_all())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# strapi.deleteCollectionData(\"mpa\", list(range(1, 20914)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# for i in range(0, 4):\n", + "# strapi.importCollectionData(\n", + "# strapi_collection_mpas,\n", + "# mpa_folder.joinpath(f\"mpa_detail_{i}.csv\"),\n", + "# )" ] }, { @@ -4424,7 +3148,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 25, "metadata": {}, "outputs": [], "source": [ @@ -4445,7 +3169,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 26, "metadata": {}, "outputs": [], "source": [ @@ -4455,7 +3179,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 27, "metadata": {}, "outputs": [ { @@ -4489,15 +3213,17 @@ " year\n", " area\n", " data_source\n", - " mpaa_establishment_stage\n", - " mpaa_protection_level\n", - " bbox\n", - " is_child\n", - " child_id\n", + " ...\n", " coverage\n", " environment\n", " location\n", " children\n", + " protected_area\n", + " protected_areas_count\n", + " oecms\n", + " pas\n", + " global_contribution\n", + " is_last_year\n", " \n", " \n", " id\n", @@ -4520,6 +3246,8 @@ " \n", " \n", " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -4534,16 +3262,18 @@ " 1.0\n", " 1973.0\n", " 14.636135\n", - " 3\n", - " NaN\n", - " NaN\n", - " [-61.88691617799998, 17.184972703000028, -61.8...\n", - " False\n", - " 1\n", + " 3.0\n", + " ...\n", " 0.013119\n", " marine\n", " 15\n", " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", " \n", " \n", " 2\n", @@ -4556,19 +3286,22 @@ " 1.0\n", " 1973.0\n", " 3.845623\n", - " 3\n", - " NaN\n", - " NaN\n", - " [-61.771742115999984, 17.520006550999994, -61....\n", - " False\n", - " 2\n", + " 3.0\n", + " ...\n", " 0.003447\n", " marine\n", " 15\n", " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", " \n", " \n", "\n", + "

2 rows × 26 columns

\n", "" ], "text/plain": [ @@ -4582,23 +3315,25 @@ "1 Diamond Reef and Salt Fish Tail Reef Marine Reserve 1.0 \n", "2 Palaster Reef Marine Reserve 1.0 \n", "\n", - " year area data_source mpaa_establishment_stage \\\n", - "id \n", - "1 1973.0 14.636135 3 NaN \n", - "2 1973.0 3.845623 3 NaN \n", + " year area data_source ... coverage environment location \\\n", + "id ... \n", + "1 1973.0 14.636135 3.0 ... 0.013119 marine 15 \n", + "2 1973.0 3.845623 3.0 ... 0.003447 marine 15 \n", "\n", - " mpaa_protection_level bbox \\\n", - "id \n", - "1 NaN [-61.88691617799998, 17.184972703000028, -61.8... \n", - "2 NaN [-61.771742115999984, 17.520006550999994, -61.... \n", + " children protected_area protected_areas_count oecms pas \\\n", + "id \n", + "1 NaN NaN NaN NaN NaN \n", + "2 NaN NaN NaN NaN NaN \n", "\n", - " is_child child_id coverage environment location children \n", - "id \n", - "1 False 1 0.013119 marine 15 NaN \n", - "2 False 2 0.003447 marine 15 NaN " + " global_contribution is_last_year \n", + "id \n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "\n", + "[2 rows x 26 columns]" ] }, - "execution_count": 30, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } @@ -4613,64 +3348,9 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "ename": "SchemaError", - "evalue": "Error while coercing 'bbox' to type typing.List[float]: Could not coerce data_container into type typing.List[float]:\n index failure_case\n0 1 \n1 2 \n2 3 \n3 4 \n4 5 \n... ... ...\n306118 306119 \n306119 306120 \n306120 306121 \n306121 306122 \n306122 306123 \n\n[306123 rows x 2 columns]", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mSchemaError\u001b[0m Traceback (most recent call last)", - " \u001b[0;31m[... skipping hidden 1 frame]\u001b[0m\n", - "Cell \u001b[0;32mIn[31], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mPAsSchema\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfinal_table\u001b[49m\u001b[43m[\u001b[49m\u001b[43mfinal_table\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlocation\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnotna\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mto_csv(output_file_pa, index\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n", - "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/api/dataframe/model.py:138\u001b[0m, in \u001b[0;36mDataFrameModel.__new__\u001b[0;34m(cls, *args, **kwargs)\u001b[0m\n\u001b[1;32m 136\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"%(validate_doc)s\"\"\"\u001b[39;00m\n\u001b[1;32m 137\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cast(\n\u001b[0;32m--> 138\u001b[0m DataFrameBase[TDataFrameModel], \u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalidate\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 139\u001b[0m )\n", - "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/api/dataframe/model.py:289\u001b[0m, in \u001b[0;36mDataFrameModel.validate\u001b[0;34m(cls, check_obj, head, tail, sample, random_state, lazy, inplace)\u001b[0m\n\u001b[1;32m 286\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"%(validate_doc)s\"\"\"\u001b[39;00m\n\u001b[1;32m 287\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cast(\n\u001b[1;32m 288\u001b[0m DataFrameBase[TDataFrameModel],\n\u001b[0;32m--> 289\u001b[0m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_schema\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalidate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 290\u001b[0m \u001b[43m \u001b[49m\u001b[43mcheck_obj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mhead\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtail\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msample\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrandom_state\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlazy\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minplace\u001b[49m\n\u001b[1;32m 291\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m,\n\u001b[1;32m 292\u001b[0m )\n", - "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/api/pandas/container.py:126\u001b[0m, in \u001b[0;36mDataFrameSchema.validate\u001b[0;34m(self, check_obj, head, tail, sample, random_state, lazy, inplace)\u001b[0m\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m check_obj\u001b[38;5;241m.\u001b[39mpandera\u001b[38;5;241m.\u001b[39madd_schema(\u001b[38;5;28mself\u001b[39m)\n\u001b[0;32m--> 126\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_validate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 127\u001b[0m \u001b[43m \u001b[49m\u001b[43mcheck_obj\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcheck_obj\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 128\u001b[0m \u001b[43m \u001b[49m\u001b[43mhead\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mhead\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 129\u001b[0m \u001b[43m \u001b[49m\u001b[43mtail\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtail\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 130\u001b[0m \u001b[43m \u001b[49m\u001b[43msample\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msample\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 131\u001b[0m \u001b[43m \u001b[49m\u001b[43mrandom_state\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrandom_state\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 132\u001b[0m \u001b[43m \u001b[49m\u001b[43mlazy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlazy\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 133\u001b[0m \u001b[43m \u001b[49m\u001b[43minplace\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minplace\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 134\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/api/pandas/container.py:155\u001b[0m, in \u001b[0;36mDataFrameSchema._validate\u001b[0;34m(self, check_obj, head, tail, sample, random_state, lazy, inplace)\u001b[0m\n\u001b[1;32m 147\u001b[0m warnings\u001b[38;5;241m.\u001b[39mwarn(\n\u001b[1;32m 148\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThis \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mtype\u001b[39m(\u001b[38;5;28mself\u001b[39m)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m is an inferred schema that hasn\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt been \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 149\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodified. It\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124ms recommended that you refine the schema \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 152\u001b[0m \u001b[38;5;167;01mUserWarning\u001b[39;00m,\n\u001b[1;32m 153\u001b[0m )\n\u001b[0;32m--> 155\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_backend\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcheck_obj\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalidate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 156\u001b[0m \u001b[43m \u001b[49m\u001b[43mcheck_obj\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 157\u001b[0m \u001b[43m \u001b[49m\u001b[43mschema\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 158\u001b[0m \u001b[43m \u001b[49m\u001b[43mhead\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mhead\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 159\u001b[0m \u001b[43m \u001b[49m\u001b[43mtail\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtail\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 160\u001b[0m \u001b[43m \u001b[49m\u001b[43msample\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msample\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 161\u001b[0m \u001b[43m \u001b[49m\u001b[43mrandom_state\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrandom_state\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 162\u001b[0m \u001b[43m \u001b[49m\u001b[43mlazy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlazy\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 163\u001b[0m \u001b[43m \u001b[49m\u001b[43minplace\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minplace\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 164\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/backends/pandas/container.py:90\u001b[0m, in \u001b[0;36mDataFrameSchemaBackend.validate\u001b[0;34m(self, check_obj, schema, head, tail, sample, random_state, lazy, inplace)\u001b[0m\n\u001b[1;32m 89\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m SchemaErrors \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[0;32m---> 90\u001b[0m \u001b[43merror_handler\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcollect_errors\u001b[49m\u001b[43m(\u001b[49m\u001b[43mexc\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mschema_errors\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 92\u001b[0m \u001b[38;5;66;03m# run custom parsers\u001b[39;00m\n", - "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/api/base/error_handler.py:95\u001b[0m, in \u001b[0;36mErrorHandler.collect_errors\u001b[0;34m(self, schema_errors, original_exc)\u001b[0m\n\u001b[1;32m 94\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m schema_error \u001b[38;5;129;01min\u001b[39;00m schema_errors:\n\u001b[0;32m---> 95\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcollect_error\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 96\u001b[0m \u001b[43m \u001b[49m\u001b[43mvalidation_type\u001b[49m\u001b[43m(\u001b[49m\u001b[43mschema_error\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mreason_code\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 97\u001b[0m \u001b[43m \u001b[49m\u001b[43mschema_error\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mreason_code\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 98\u001b[0m \u001b[43m \u001b[49m\u001b[43mschema_error\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 99\u001b[0m \u001b[43m \u001b[49m\u001b[43moriginal_exc\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mschema_error\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 100\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/api/base/error_handler.py:54\u001b[0m, in \u001b[0;36mErrorHandler.collect_error\u001b[0;34m(self, error_type, reason_code, schema_error, original_exc)\u001b[0m\n\u001b[1;32m 53\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_lazy:\n\u001b[0;32m---> 54\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m schema_error \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01moriginal_exc\u001b[39;00m\n\u001b[1;32m 56\u001b[0m \u001b[38;5;66;03m# delete data of validated object from SchemaError object to prevent\u001b[39;00m\n\u001b[1;32m 57\u001b[0m \u001b[38;5;66;03m# storing copies of the validated DataFrame/Series for every\u001b[39;00m\n\u001b[1;32m 58\u001b[0m \u001b[38;5;66;03m# SchemaError collected.\u001b[39;00m\n", - "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/backends/pandas/container.py:631\u001b[0m, in \u001b[0;36mDataFrameSchemaBackend._coerce_dtype_helper.._try_coercion\u001b[0;34m(coerce_fn, obj)\u001b[0m\n\u001b[1;32m 630\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 631\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mcoerce_fn\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 632\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m SchemaError \u001b[38;5;28;01mas\u001b[39;00m exc:\n", - "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/api/dataframe/components.py:131\u001b[0m, in \u001b[0;36mComponentSchema.coerce_dtype\u001b[0;34m(self, check_obj)\u001b[0m\n\u001b[1;32m 126\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Coerce type of the data by type specified in dtype.\u001b[39;00m\n\u001b[1;32m 127\u001b[0m \n\u001b[1;32m 128\u001b[0m \u001b[38;5;124;03m:param check_obj: data to coerce\u001b[39;00m\n\u001b[1;32m 129\u001b[0m \u001b[38;5;124;03m:returns: data of the same type as the input\u001b[39;00m\n\u001b[1;32m 130\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m--> 131\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_backend\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcheck_obj\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcoerce_dtype\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcheck_obj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mschema\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/backends/pandas/components.py:211\u001b[0m, in \u001b[0;36mColumnBackend.coerce_dtype\u001b[0;34m(self, check_obj, schema)\u001b[0m\n\u001b[1;32m 210\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_field(check_obj) \u001b[38;5;129;01mor\u001b[39;00m is_index(check_obj):\n\u001b[0;32m--> 211\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mColumnBackend\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcoerce_dtype\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 212\u001b[0m \u001b[43m \u001b[49m\u001b[43mcheck_obj\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 213\u001b[0m \u001b[43m \u001b[49m\u001b[43mschema\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mschema\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 214\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 215\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m check_obj\u001b[38;5;241m.\u001b[39mapply(\n\u001b[1;32m 216\u001b[0m \u001b[38;5;28;01mlambda\u001b[39;00m x: \u001b[38;5;28msuper\u001b[39m(ColumnBackend, \u001b[38;5;28mself\u001b[39m)\u001b[38;5;241m.\u001b[39mcoerce_dtype(\n\u001b[1;32m 217\u001b[0m x,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 220\u001b[0m axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcolumns\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 221\u001b[0m )\n", - "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/backends/pandas/array.py:173\u001b[0m, in \u001b[0;36mArraySchemaBackend.coerce_dtype\u001b[0;34m(self, check_obj, schema)\u001b[0m\n\u001b[1;32m 172\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m ParserError \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[0;32m--> 173\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m SchemaError(\n\u001b[1;32m 174\u001b[0m schema\u001b[38;5;241m=\u001b[39mschema,\n\u001b[1;32m 175\u001b[0m data\u001b[38;5;241m=\u001b[39mcheck_obj,\n\u001b[1;32m 176\u001b[0m message\u001b[38;5;241m=\u001b[39m(\n\u001b[1;32m 177\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mError while coercing \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mschema\u001b[38;5;241m.\u001b[39mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m to type \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 178\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mschema\u001b[38;5;241m.\u001b[39mdtype\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mexc\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m:\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00mexc\u001b[38;5;241m.\u001b[39mfailure_cases\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 179\u001b[0m ),\n\u001b[1;32m 180\u001b[0m failure_cases\u001b[38;5;241m=\u001b[39mexc\u001b[38;5;241m.\u001b[39mfailure_cases,\n\u001b[1;32m 181\u001b[0m check\u001b[38;5;241m=\u001b[39m\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcoerce_dtype(\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mschema\u001b[38;5;241m.\u001b[39mdtype\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m)\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 182\u001b[0m reason_code\u001b[38;5;241m=\u001b[39mSchemaErrorReason\u001b[38;5;241m.\u001b[39mDATATYPE_COERCION,\n\u001b[1;32m 183\u001b[0m ) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mexc\u001b[39;00m\n", - "\u001b[0;31mSchemaError\u001b[0m: Error while coercing 'bbox' to type typing.List[float]: Could not coerce data_container into type typing.List[float]:\n index failure_case\n0 1 \n1 2 \n2 3 \n3 4 \n4 5 \n... ... ...\n306118 306119 \n306119 306120 \n306120 306121 \n306121 306122 \n306122 306123 \n\n[306123 rows x 2 columns]", - "\nThe above exception was the direct cause of the following exception:\n", - "\u001b[0;31mSchemaError\u001b[0m Traceback (most recent call last)", - " \u001b[0;31m[... skipping hidden 1 frame]\u001b[0m\n", - "Cell \u001b[0;32mIn[31], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mPAsSchema\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfinal_table\u001b[49m\u001b[43m[\u001b[49m\u001b[43mfinal_table\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlocation\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnotna\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mto_csv(output_file_pa, index\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n", - "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/api/dataframe/model.py:138\u001b[0m, in \u001b[0;36mDataFrameModel.__new__\u001b[0;34m(cls, *args, **kwargs)\u001b[0m\n\u001b[1;32m 136\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"%(validate_doc)s\"\"\"\u001b[39;00m\n\u001b[1;32m 137\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cast(\n\u001b[0;32m--> 138\u001b[0m DataFrameBase[TDataFrameModel], \u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalidate\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 139\u001b[0m )\n", - "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/api/dataframe/model.py:289\u001b[0m, in \u001b[0;36mDataFrameModel.validate\u001b[0;34m(cls, check_obj, head, tail, sample, random_state, lazy, inplace)\u001b[0m\n\u001b[1;32m 286\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"%(validate_doc)s\"\"\"\u001b[39;00m\n\u001b[1;32m 287\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cast(\n\u001b[1;32m 288\u001b[0m DataFrameBase[TDataFrameModel],\n\u001b[0;32m--> 289\u001b[0m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_schema\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalidate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 290\u001b[0m \u001b[43m \u001b[49m\u001b[43mcheck_obj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mhead\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtail\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msample\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrandom_state\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlazy\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minplace\u001b[49m\n\u001b[1;32m 291\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m,\n\u001b[1;32m 292\u001b[0m )\n", - "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/api/pandas/container.py:126\u001b[0m, in \u001b[0;36mDataFrameSchema.validate\u001b[0;34m(self, check_obj, head, tail, sample, random_state, lazy, inplace)\u001b[0m\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m check_obj\u001b[38;5;241m.\u001b[39mpandera\u001b[38;5;241m.\u001b[39madd_schema(\u001b[38;5;28mself\u001b[39m)\n\u001b[0;32m--> 126\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_validate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 127\u001b[0m \u001b[43m \u001b[49m\u001b[43mcheck_obj\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcheck_obj\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 128\u001b[0m \u001b[43m \u001b[49m\u001b[43mhead\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mhead\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 129\u001b[0m \u001b[43m \u001b[49m\u001b[43mtail\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtail\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 130\u001b[0m \u001b[43m \u001b[49m\u001b[43msample\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msample\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 131\u001b[0m \u001b[43m \u001b[49m\u001b[43mrandom_state\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrandom_state\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 132\u001b[0m \u001b[43m \u001b[49m\u001b[43mlazy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlazy\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 133\u001b[0m \u001b[43m \u001b[49m\u001b[43minplace\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minplace\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 134\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/api/pandas/container.py:155\u001b[0m, in \u001b[0;36mDataFrameSchema._validate\u001b[0;34m(self, check_obj, head, tail, sample, random_state, lazy, inplace)\u001b[0m\n\u001b[1;32m 147\u001b[0m warnings\u001b[38;5;241m.\u001b[39mwarn(\n\u001b[1;32m 148\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThis \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mtype\u001b[39m(\u001b[38;5;28mself\u001b[39m)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m is an inferred schema that hasn\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt been \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 149\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodified. It\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124ms recommended that you refine the schema \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 152\u001b[0m \u001b[38;5;167;01mUserWarning\u001b[39;00m,\n\u001b[1;32m 153\u001b[0m )\n\u001b[0;32m--> 155\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_backend\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcheck_obj\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalidate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 156\u001b[0m \u001b[43m \u001b[49m\u001b[43mcheck_obj\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 157\u001b[0m \u001b[43m \u001b[49m\u001b[43mschema\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 158\u001b[0m \u001b[43m \u001b[49m\u001b[43mhead\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mhead\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 159\u001b[0m \u001b[43m \u001b[49m\u001b[43mtail\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtail\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 160\u001b[0m \u001b[43m \u001b[49m\u001b[43msample\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msample\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 161\u001b[0m \u001b[43m \u001b[49m\u001b[43mrandom_state\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrandom_state\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 162\u001b[0m \u001b[43m \u001b[49m\u001b[43mlazy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlazy\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 163\u001b[0m \u001b[43m \u001b[49m\u001b[43minplace\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minplace\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 164\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/backends/pandas/container.py:90\u001b[0m, in \u001b[0;36mDataFrameSchemaBackend.validate\u001b[0;34m(self, check_obj, schema, head, tail, sample, random_state, lazy, inplace)\u001b[0m\n\u001b[1;32m 89\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m SchemaErrors \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[0;32m---> 90\u001b[0m \u001b[43merror_handler\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcollect_errors\u001b[49m\u001b[43m(\u001b[49m\u001b[43mexc\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mschema_errors\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 92\u001b[0m \u001b[38;5;66;03m# run custom parsers\u001b[39;00m\n", - "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/api/base/error_handler.py:95\u001b[0m, in \u001b[0;36mErrorHandler.collect_errors\u001b[0;34m(self, schema_errors, original_exc)\u001b[0m\n\u001b[1;32m 94\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m schema_error \u001b[38;5;129;01min\u001b[39;00m schema_errors:\n\u001b[0;32m---> 95\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcollect_error\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 96\u001b[0m \u001b[43m \u001b[49m\u001b[43mvalidation_type\u001b[49m\u001b[43m(\u001b[49m\u001b[43mschema_error\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mreason_code\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 97\u001b[0m \u001b[43m \u001b[49m\u001b[43mschema_error\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mreason_code\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 98\u001b[0m \u001b[43m \u001b[49m\u001b[43mschema_error\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 99\u001b[0m \u001b[43m \u001b[49m\u001b[43moriginal_exc\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mschema_error\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 100\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/api/base/error_handler.py:54\u001b[0m, in \u001b[0;36mErrorHandler.collect_error\u001b[0;34m(self, error_type, reason_code, schema_error, original_exc)\u001b[0m\n\u001b[1;32m 53\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_lazy:\n\u001b[0;32m---> 54\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m schema_error \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01moriginal_exc\u001b[39;00m\n\u001b[1;32m 56\u001b[0m \u001b[38;5;66;03m# delete data of validated object from SchemaError object to prevent\u001b[39;00m\n\u001b[1;32m 57\u001b[0m \u001b[38;5;66;03m# storing copies of the validated DataFrame/Series for every\u001b[39;00m\n\u001b[1;32m 58\u001b[0m \u001b[38;5;66;03m# SchemaError collected.\u001b[39;00m\n", - "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/backends/pandas/container.py:631\u001b[0m, in \u001b[0;36mDataFrameSchemaBackend._coerce_dtype_helper.._try_coercion\u001b[0;34m(coerce_fn, obj)\u001b[0m\n\u001b[1;32m 630\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 631\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mcoerce_fn\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 632\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m SchemaError \u001b[38;5;28;01mas\u001b[39;00m exc:\n", - "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/api/dataframe/components.py:131\u001b[0m, in \u001b[0;36mComponentSchema.coerce_dtype\u001b[0;34m(self, check_obj)\u001b[0m\n\u001b[1;32m 126\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Coerce type of the data by type specified in dtype.\u001b[39;00m\n\u001b[1;32m 127\u001b[0m \n\u001b[1;32m 128\u001b[0m \u001b[38;5;124;03m:param check_obj: data to coerce\u001b[39;00m\n\u001b[1;32m 129\u001b[0m \u001b[38;5;124;03m:returns: data of the same type as the input\u001b[39;00m\n\u001b[1;32m 130\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m--> 131\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_backend\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcheck_obj\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcoerce_dtype\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcheck_obj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mschema\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/backends/pandas/components.py:211\u001b[0m, in \u001b[0;36mColumnBackend.coerce_dtype\u001b[0;34m(self, check_obj, schema)\u001b[0m\n\u001b[1;32m 210\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_field(check_obj) \u001b[38;5;129;01mor\u001b[39;00m is_index(check_obj):\n\u001b[0;32m--> 211\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mColumnBackend\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcoerce_dtype\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 212\u001b[0m \u001b[43m \u001b[49m\u001b[43mcheck_obj\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 213\u001b[0m \u001b[43m \u001b[49m\u001b[43mschema\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mschema\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 214\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 215\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m check_obj\u001b[38;5;241m.\u001b[39mapply(\n\u001b[1;32m 216\u001b[0m \u001b[38;5;28;01mlambda\u001b[39;00m x: \u001b[38;5;28msuper\u001b[39m(ColumnBackend, \u001b[38;5;28mself\u001b[39m)\u001b[38;5;241m.\u001b[39mcoerce_dtype(\n\u001b[1;32m 217\u001b[0m x,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 220\u001b[0m axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcolumns\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 221\u001b[0m )\n", - "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/backends/pandas/array.py:173\u001b[0m, in \u001b[0;36mArraySchemaBackend.coerce_dtype\u001b[0;34m(self, check_obj, schema)\u001b[0m\n\u001b[1;32m 172\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m ParserError \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[0;32m--> 173\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m SchemaError(\n\u001b[1;32m 174\u001b[0m schema\u001b[38;5;241m=\u001b[39mschema,\n\u001b[1;32m 175\u001b[0m data\u001b[38;5;241m=\u001b[39mcheck_obj,\n\u001b[1;32m 176\u001b[0m message\u001b[38;5;241m=\u001b[39m(\n\u001b[1;32m 177\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mError while coercing \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mschema\u001b[38;5;241m.\u001b[39mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m to type \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 178\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mschema\u001b[38;5;241m.\u001b[39mdtype\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mexc\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m:\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00mexc\u001b[38;5;241m.\u001b[39mfailure_cases\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 179\u001b[0m ),\n\u001b[1;32m 180\u001b[0m failure_cases\u001b[38;5;241m=\u001b[39mexc\u001b[38;5;241m.\u001b[39mfailure_cases,\n\u001b[1;32m 181\u001b[0m check\u001b[38;5;241m=\u001b[39m\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcoerce_dtype(\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mschema\u001b[38;5;241m.\u001b[39mdtype\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m)\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 182\u001b[0m reason_code\u001b[38;5;241m=\u001b[39mSchemaErrorReason\u001b[38;5;241m.\u001b[39mDATATYPE_COERCION,\n\u001b[1;32m 183\u001b[0m ) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mexc\u001b[39;00m\n", - "\u001b[0;31mSchemaError\u001b[0m: Error while coercing 'bbox' to type typing.List[float]: Could not coerce data_container into type typing.List[float]:\n index failure_case\n0 1 \n1 2 \n2 3 \n3 4 \n4 5 \n... ... ...\n306118 306119 \n306119 306120 \n306120 306121 \n306121 306122 \n306122 306123 \n\n[306123 rows x 2 columns]", - "\nThe above exception was the direct cause of the following exception:\n", - "\u001b[0;31mSchemaError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[31], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mPAsSchema\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfinal_table\u001b[49m\u001b[43m[\u001b[49m\u001b[43mfinal_table\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlocation\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnotna\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mto_csv(output_file_pa, index\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n", - "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/api/dataframe/model.py:138\u001b[0m, in \u001b[0;36mDataFrameModel.__new__\u001b[0;34m(cls, *args, **kwargs)\u001b[0m\n\u001b[1;32m 134\u001b[0m \u001b[38;5;129m@docstring_substitution\u001b[39m(validate_doc\u001b[38;5;241m=\u001b[39mBaseSchema\u001b[38;5;241m.\u001b[39mvalidate\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__doc__\u001b[39m)\n\u001b[1;32m 135\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__new__\u001b[39m(\u001b[38;5;28mcls\u001b[39m, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m DataFrameBase[TDataFrameModel]: \u001b[38;5;66;03m# type: ignore [misc]\u001b[39;00m\n\u001b[1;32m 136\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"%(validate_doc)s\"\"\"\u001b[39;00m\n\u001b[1;32m 137\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cast(\n\u001b[0;32m--> 138\u001b[0m DataFrameBase[TDataFrameModel], \u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalidate\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 139\u001b[0m )\n", - "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/api/dataframe/model.py:289\u001b[0m, in \u001b[0;36mDataFrameModel.validate\u001b[0;34m(cls, check_obj, head, tail, sample, random_state, lazy, inplace)\u001b[0m\n\u001b[1;32m 274\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 275\u001b[0m \u001b[38;5;129m@docstring_substitution\u001b[39m(validate_doc\u001b[38;5;241m=\u001b[39mBaseSchema\u001b[38;5;241m.\u001b[39mvalidate\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__doc__\u001b[39m)\n\u001b[1;32m 276\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mvalidate\u001b[39m(\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 284\u001b[0m inplace: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[1;32m 285\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m DataFrameBase[TDataFrameModel]:\n\u001b[1;32m 286\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"%(validate_doc)s\"\"\"\u001b[39;00m\n\u001b[1;32m 287\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cast(\n\u001b[1;32m 288\u001b[0m DataFrameBase[TDataFrameModel],\n\u001b[0;32m--> 289\u001b[0m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_schema\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalidate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 290\u001b[0m \u001b[43m \u001b[49m\u001b[43mcheck_obj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mhead\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtail\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msample\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrandom_state\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlazy\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minplace\u001b[49m\n\u001b[1;32m 291\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m,\n\u001b[1;32m 292\u001b[0m )\n", - "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/api/pandas/container.py:126\u001b[0m, in \u001b[0;36mDataFrameSchema.validate\u001b[0;34m(self, check_obj, head, tail, sample, random_state, lazy, inplace)\u001b[0m\n\u001b[1;32m 114\u001b[0m check_obj \u001b[38;5;241m=\u001b[39m check_obj\u001b[38;5;241m.\u001b[39mmap_partitions( \u001b[38;5;66;03m# type: ignore [operator]\u001b[39;00m\n\u001b[1;32m 115\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_validate,\n\u001b[1;32m 116\u001b[0m head\u001b[38;5;241m=\u001b[39mhead,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 122\u001b[0m meta\u001b[38;5;241m=\u001b[39mcheck_obj,\n\u001b[1;32m 123\u001b[0m )\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m check_obj\u001b[38;5;241m.\u001b[39mpandera\u001b[38;5;241m.\u001b[39madd_schema(\u001b[38;5;28mself\u001b[39m)\n\u001b[0;32m--> 126\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_validate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 127\u001b[0m \u001b[43m \u001b[49m\u001b[43mcheck_obj\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcheck_obj\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 128\u001b[0m \u001b[43m \u001b[49m\u001b[43mhead\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mhead\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 129\u001b[0m \u001b[43m \u001b[49m\u001b[43mtail\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtail\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 130\u001b[0m \u001b[43m \u001b[49m\u001b[43msample\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msample\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 131\u001b[0m \u001b[43m \u001b[49m\u001b[43mrandom_state\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrandom_state\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 132\u001b[0m \u001b[43m \u001b[49m\u001b[43mlazy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlazy\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 133\u001b[0m \u001b[43m \u001b[49m\u001b[43minplace\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minplace\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 134\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/api/pandas/container.py:155\u001b[0m, in \u001b[0;36mDataFrameSchema._validate\u001b[0;34m(self, check_obj, head, tail, sample, random_state, lazy, inplace)\u001b[0m\n\u001b[1;32m 146\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_is_inferred:\n\u001b[1;32m 147\u001b[0m warnings\u001b[38;5;241m.\u001b[39mwarn(\n\u001b[1;32m 148\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThis \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mtype\u001b[39m(\u001b[38;5;28mself\u001b[39m)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m is an inferred schema that hasn\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt been \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 149\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodified. It\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124ms recommended that you refine the schema \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 152\u001b[0m \u001b[38;5;167;01mUserWarning\u001b[39;00m,\n\u001b[1;32m 153\u001b[0m )\n\u001b[0;32m--> 155\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_backend\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcheck_obj\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalidate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 156\u001b[0m \u001b[43m \u001b[49m\u001b[43mcheck_obj\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 157\u001b[0m \u001b[43m \u001b[49m\u001b[43mschema\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 158\u001b[0m \u001b[43m \u001b[49m\u001b[43mhead\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mhead\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 159\u001b[0m \u001b[43m \u001b[49m\u001b[43mtail\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtail\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 160\u001b[0m \u001b[43m \u001b[49m\u001b[43msample\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msample\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 161\u001b[0m \u001b[43m \u001b[49m\u001b[43mrandom_state\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrandom_state\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 162\u001b[0m \u001b[43m \u001b[49m\u001b[43mlazy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlazy\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 163\u001b[0m \u001b[43m \u001b[49m\u001b[43minplace\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minplace\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 164\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/backends/pandas/container.py:90\u001b[0m, in \u001b[0;36mDataFrameSchemaBackend.validate\u001b[0;34m(self, check_obj, schema, head, tail, sample, random_state, lazy, inplace)\u001b[0m\n\u001b[1;32m 86\u001b[0m error_handler\u001b[38;5;241m.\u001b[39mcollect_error(\n\u001b[1;32m 87\u001b[0m validation_type(exc\u001b[38;5;241m.\u001b[39mreason_code), exc\u001b[38;5;241m.\u001b[39mreason_code, exc\n\u001b[1;32m 88\u001b[0m )\n\u001b[1;32m 89\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m SchemaErrors \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[0;32m---> 90\u001b[0m \u001b[43merror_handler\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcollect_errors\u001b[49m\u001b[43m(\u001b[49m\u001b[43mexc\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mschema_errors\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 92\u001b[0m \u001b[38;5;66;03m# run custom parsers\u001b[39;00m\n\u001b[1;32m 93\u001b[0m check_obj \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mrun_parsers(schema, check_obj)\n", - "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/api/base/error_handler.py:95\u001b[0m, in \u001b[0;36mErrorHandler.collect_errors\u001b[0;34m(self, schema_errors, original_exc)\u001b[0m\n\u001b[1;32m 88\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Collect schema errors from a SchemaErrors exception.\u001b[39;00m\n\u001b[1;32m 89\u001b[0m \n\u001b[1;32m 90\u001b[0m \u001b[38;5;124;03m:param reason_code: string representing reason for error.\u001b[39;00m\n\u001b[1;32m 91\u001b[0m \u001b[38;5;124;03m:param schema_error: ``SchemaError`` object.\u001b[39;00m\n\u001b[1;32m 92\u001b[0m \u001b[38;5;124;03m:param original_exc: original exception associated with the SchemaError.\u001b[39;00m\n\u001b[1;32m 93\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 94\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m schema_error \u001b[38;5;129;01min\u001b[39;00m schema_errors:\n\u001b[0;32m---> 95\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcollect_error\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 96\u001b[0m \u001b[43m \u001b[49m\u001b[43mvalidation_type\u001b[49m\u001b[43m(\u001b[49m\u001b[43mschema_error\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mreason_code\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 97\u001b[0m \u001b[43m \u001b[49m\u001b[43mschema_error\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mreason_code\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 98\u001b[0m \u001b[43m \u001b[49m\u001b[43mschema_error\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 99\u001b[0m \u001b[43m \u001b[49m\u001b[43moriginal_exc\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mschema_error\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 100\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/api/base/error_handler.py:54\u001b[0m, in \u001b[0;36mErrorHandler.collect_error\u001b[0;34m(self, error_type, reason_code, schema_error, original_exc)\u001b[0m\n\u001b[1;32m 47\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Collect schema error, raising exception if lazy is False.\u001b[39;00m\n\u001b[1;32m 48\u001b[0m \n\u001b[1;32m 49\u001b[0m \u001b[38;5;124;03m:param error_type: type of error\u001b[39;00m\n\u001b[1;32m 50\u001b[0m \u001b[38;5;124;03m:param reason_code: string representing reason for error\u001b[39;00m\n\u001b[1;32m 51\u001b[0m \u001b[38;5;124;03m:param schema_error: ``SchemaError`` object.\u001b[39;00m\n\u001b[1;32m 52\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 53\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_lazy:\n\u001b[0;32m---> 54\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m schema_error \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01moriginal_exc\u001b[39;00m\n\u001b[1;32m 56\u001b[0m \u001b[38;5;66;03m# delete data of validated object from SchemaError object to prevent\u001b[39;00m\n\u001b[1;32m 57\u001b[0m \u001b[38;5;66;03m# storing copies of the validated DataFrame/Series for every\u001b[39;00m\n\u001b[1;32m 58\u001b[0m \u001b[38;5;66;03m# SchemaError collected.\u001b[39;00m\n\u001b[1;32m 59\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(schema_error, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdata\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n", - "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/backends/pandas/container.py:631\u001b[0m, in \u001b[0;36mDataFrameSchemaBackend._coerce_dtype_helper.._try_coercion\u001b[0;34m(coerce_fn, obj)\u001b[0m\n\u001b[1;32m 629\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_try_coercion\u001b[39m(coerce_fn, obj):\n\u001b[1;32m 630\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 631\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mcoerce_fn\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 632\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m SchemaError \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[1;32m 633\u001b[0m error_handler\u001b[38;5;241m.\u001b[39mcollect_error(\n\u001b[1;32m 634\u001b[0m validation_type(SchemaErrorReason\u001b[38;5;241m.\u001b[39mDATATYPE_COERCION),\n\u001b[1;32m 635\u001b[0m SchemaErrorReason\u001b[38;5;241m.\u001b[39mDATATYPE_COERCION,\n\u001b[1;32m 636\u001b[0m exc,\n\u001b[1;32m 637\u001b[0m )\n", - "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/api/dataframe/components.py:131\u001b[0m, in \u001b[0;36mComponentSchema.coerce_dtype\u001b[0;34m(self, check_obj)\u001b[0m\n\u001b[1;32m 125\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcoerce_dtype\u001b[39m(\u001b[38;5;28mself\u001b[39m, check_obj: TDataObject) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m TDataObject:\n\u001b[1;32m 126\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Coerce type of the data by type specified in dtype.\u001b[39;00m\n\u001b[1;32m 127\u001b[0m \n\u001b[1;32m 128\u001b[0m \u001b[38;5;124;03m :param check_obj: data to coerce\u001b[39;00m\n\u001b[1;32m 129\u001b[0m \u001b[38;5;124;03m :returns: data of the same type as the input\u001b[39;00m\n\u001b[1;32m 130\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 131\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_backend\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcheck_obj\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcoerce_dtype\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcheck_obj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mschema\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/backends/pandas/components.py:211\u001b[0m, in \u001b[0;36mColumnBackend.coerce_dtype\u001b[0;34m(self, check_obj, schema)\u001b[0m\n\u001b[1;32m 207\u001b[0m \u001b[38;5;66;03m# pylint: disable=super-with-arguments\u001b[39;00m\n\u001b[1;32m 208\u001b[0m \u001b[38;5;66;03m# pylint: disable=fixme\u001b[39;00m\n\u001b[1;32m 209\u001b[0m \u001b[38;5;66;03m# TODO: use singledispatchmethod here\u001b[39;00m\n\u001b[1;32m 210\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_field(check_obj) \u001b[38;5;129;01mor\u001b[39;00m is_index(check_obj):\n\u001b[0;32m--> 211\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mColumnBackend\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcoerce_dtype\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 212\u001b[0m \u001b[43m \u001b[49m\u001b[43mcheck_obj\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 213\u001b[0m \u001b[43m \u001b[49m\u001b[43mschema\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mschema\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 214\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 215\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m check_obj\u001b[38;5;241m.\u001b[39mapply(\n\u001b[1;32m 216\u001b[0m \u001b[38;5;28;01mlambda\u001b[39;00m x: \u001b[38;5;28msuper\u001b[39m(ColumnBackend, \u001b[38;5;28mself\u001b[39m)\u001b[38;5;241m.\u001b[39mcoerce_dtype(\n\u001b[1;32m 217\u001b[0m x,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 220\u001b[0m axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcolumns\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 221\u001b[0m )\n", - "File \u001b[0;32m~/miniforge3/envs/skytruth/lib/python3.12/site-packages/pandera/backends/pandas/array.py:173\u001b[0m, in \u001b[0;36mArraySchemaBackend.coerce_dtype\u001b[0;34m(self, check_obj, schema)\u001b[0m\n\u001b[1;32m 171\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m schema\u001b[38;5;241m.\u001b[39mdtype\u001b[38;5;241m.\u001b[39mtry_coerce(check_obj)\n\u001b[1;32m 172\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m ParserError \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[0;32m--> 173\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m SchemaError(\n\u001b[1;32m 174\u001b[0m schema\u001b[38;5;241m=\u001b[39mschema,\n\u001b[1;32m 175\u001b[0m data\u001b[38;5;241m=\u001b[39mcheck_obj,\n\u001b[1;32m 176\u001b[0m message\u001b[38;5;241m=\u001b[39m(\n\u001b[1;32m 177\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mError while coercing \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mschema\u001b[38;5;241m.\u001b[39mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m to type \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 178\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mschema\u001b[38;5;241m.\u001b[39mdtype\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mexc\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m:\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00mexc\u001b[38;5;241m.\u001b[39mfailure_cases\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 179\u001b[0m ),\n\u001b[1;32m 180\u001b[0m failure_cases\u001b[38;5;241m=\u001b[39mexc\u001b[38;5;241m.\u001b[39mfailure_cases,\n\u001b[1;32m 181\u001b[0m check\u001b[38;5;241m=\u001b[39m\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcoerce_dtype(\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mschema\u001b[38;5;241m.\u001b[39mdtype\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m)\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 182\u001b[0m reason_code\u001b[38;5;241m=\u001b[39mSchemaErrorReason\u001b[38;5;241m.\u001b[39mDATATYPE_COERCION,\n\u001b[1;32m 183\u001b[0m ) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mexc\u001b[39;00m\n", - "\u001b[0;31mSchemaError\u001b[0m: Error while coercing 'bbox' to type typing.List[float]: Could not coerce data_container into type typing.List[float]:\n index failure_case\n0 1 \n1 2 \n2 3 \n3 4 \n4 5 \n... ... ...\n306118 306119 \n306119 306120 \n306120 306121 \n306121 306122 \n306122 306123 \n\n[306123 rows x 2 columns]" - ] - } - ], + "outputs": [], "source": [ "PAsSchema(final_table[final_table.location.notna()]).to_csv(output_file_pa, index=True)" ] From 47d4363c8e33d8ff364dd857263edce9e9ff0f8c Mon Sep 17 00:00:00 2001 From: sofia Date: Thu, 3 Oct 2024 10:38:44 +0200 Subject: [PATCH 11/16] data and layer preparation for terrestrial --- data/notebooks/pipes_mock/intermediate.ipynb | 329 +--- data/notebooks/pipes_mock/locations.ipynb | 42 +- data/notebooks/pipes_mock/precalc_sofia.ipynb | 37 +- data/notebooks/pipes_mock/tiles.ipynb | 1511 +++++++---------- 4 files changed, 697 insertions(+), 1222 deletions(-) diff --git a/data/notebooks/pipes_mock/intermediate.ipynb b/data/notebooks/pipes_mock/intermediate.ipynb index 101d0277..6b6000ea 100644 --- a/data/notebooks/pipes_mock/intermediate.ipynb +++ b/data/notebooks/pipes_mock/intermediate.ipynb @@ -21,7 +21,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -31,6 +31,7 @@ "import geopandas as gpd\n", "import pandas as pd\n", "import requests\n", + "import json\n", "import dotenv \n", "\n", "dotenv.load_dotenv()\n", @@ -63,7 +64,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -311,7 +312,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -323,7 +324,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -364,7 +365,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -395,7 +396,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -425,7 +426,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -463,7 +464,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -479,7 +480,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -503,7 +504,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -545,132 +546,19 @@ " Afganistán\n", " Afghanistan\n", " \n", - " \n", - " 1\n", - " Albania\n", - " MULTIPOLYGON (((19.27804 40.50524, 19.28189 40...\n", - " ALB\n", - " 28689.62\n", - " Albania\n", - " Albanie\n", - " \n", - " \n", - " 2\n", - " Algeria\n", - " MULTIPOLYGON (((2.84535 36.74691, 2.84597 36.7...\n", - " DZA\n", - " 2311455.23\n", - " Argelia\n", - " Algérie\n", - " \n", - " \n", - " 3\n", - " Andorra\n", - " POLYGON ((1.61725 42.62406, 1.63334 42.62553, ...\n", - " AND\n", - " 450.35\n", - " Andorra\n", - " Andorre\n", - " \n", - " \n", - " 4\n", - " Angola\n", - " MULTIPOLYGON (((11.78636 -16.78001, 11.78478 -...\n", - " AGO\n", - " 1251701.39\n", - " Angola\n", - " Angola\n", - " \n", - " \n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " \n", - " \n", - " 199\n", - " Vietnam\n", - " MULTIPOLYGON (((103.46895 9.25602, 103.46736 9...\n", - " VNM\n", - " 330364.87\n", - " Vietnam\n", - " Vietnam\n", - " \n", - " \n", - " 200\n", - " Western Sahara\n", - " MULTIPOLYGON (((-16.83569 22.15403, -16.83597 ...\n", - " ESH\n", - " 267892.77\n", - " Sahara Occidental\n", - " Sahara occidental\n", - " \n", - " \n", - " 201\n", - " Yemen\n", - " MULTIPOLYGON (((42.1618 15.03042, 42.16236 15....\n", - " YEM\n", - " 453741.18\n", - " Yemen\n", - " Yémen\n", - " \n", - " \n", - " 202\n", - " Zambia\n", - " POLYGON ((25.87834 -17.97218, 25.87034 -17.970...\n", - " ZMB\n", - " 753990.33\n", - " Zambia\n", - " Zambie\n", - " \n", - " \n", - " 203\n", - " Zimbabwe\n", - " POLYGON ((32.70425 -18.96022, 32.70537 -18.965...\n", - " ZWE\n", - " 391234.88\n", - " Zimbabue\n", - " Zimbabwe\n", - " \n", " \n", "\n", - "

204 rows × 6 columns

\n", "" ], "text/plain": [ - " COUNTRY geometry GID_0 \\\n", - "0 Afghanistan MULTIPOLYGON (((63.61425 29.46993, 63.60868 29... AFG \n", - "1 Albania MULTIPOLYGON (((19.27804 40.50524, 19.28189 40... ALB \n", - "2 Algeria MULTIPOLYGON (((2.84535 36.74691, 2.84597 36.7... DZA \n", - "3 Andorra POLYGON ((1.61725 42.62406, 1.63334 42.62553, ... AND \n", - "4 Angola MULTIPOLYGON (((11.78636 -16.78001, 11.78478 -... AGO \n", - ".. ... ... ... \n", - "199 Vietnam MULTIPOLYGON (((103.46895 9.25602, 103.46736 9... VNM \n", - "200 Western Sahara MULTIPOLYGON (((-16.83569 22.15403, -16.83597 ... ESH \n", - "201 Yemen MULTIPOLYGON (((42.1618 15.03042, 42.16236 15.... YEM \n", - "202 Zambia POLYGON ((25.87834 -17.97218, 25.87034 -17.970... ZMB \n", - "203 Zimbabwe POLYGON ((32.70425 -18.96022, 32.70537 -18.965... ZWE \n", - "\n", - " area_km2 name_es name_fr \n", - "0 644050.28 Afganistán Afghanistan \n", - "1 28689.62 Albania Albanie \n", - "2 2311455.23 Argelia Algérie \n", - "3 450.35 Andorra Andorre \n", - "4 1251701.39 Angola Angola \n", - ".. ... ... ... \n", - "199 330364.87 Vietnam Vietnam \n", - "200 267892.77 Sahara Occidental Sahara occidental \n", - "201 453741.18 Yemen Yémen \n", - "202 753990.33 Zambia Zambie \n", - "203 391234.88 Zimbabue Zimbabwe \n", + " COUNTRY geometry GID_0 \\\n", + "0 Afghanistan MULTIPOLYGON (((63.61425 29.46993, 63.60868 29... AFG \n", "\n", - "[204 rows x 6 columns]" + " area_km2 name_es name_fr \n", + "0 644050.28 Afganistán Afghanistan " ] }, - "execution_count": 27, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -678,29 +566,29 @@ "source": [ "# Add translations for country names\n", "gdf_translated = add_translations(gdf_updated, translations_path).drop(columns=['code'])\n", - "gdf_translated" + "gdf_translated.head(1)" ] }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 204/204 [05:58<00:00, 1.75s/it]\n" + "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 204/204 [05:40<00:00, 1.67s/it]\n" ] } ], "source": [ - "final_gadm = await simplify_async(gdf_updated)" + "final_gadm = await simplify_async(gdf_translated)" ] }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -710,7 +598,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -720,7 +608,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -899,177 +787,6 @@ "rm_tree(output_path) if output_path.exists() else None" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Protected seas intermediate" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "# DEPRECATED\n", - "force_clean = True\n", - "step = \"preprocess\"\n", - "pipe = \"protectedseas\"" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [], - "source": [ - "working_folder = FileConventionHandler(pipe)\n", - "input_path = working_folder.pipe_raw_path\n", - "temp_working_path = working_folder.get_temp_file_path(step)\n", - "\n", - "output_path = working_folder.get_processed_step_path(step)\n", - "output_file = working_folder.get_step_fmt_file_path(step, \"shp\")\n", - "zipped_output_file = working_folder.get_step_fmt_file_path(step, \"zip\", True)\n", - "remote_path = working_folder.get_remote_path(step)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "ps_csv_url = \"ProtectedSeas/ProtectedSeas_06142023.csv\"\n", - "ps_csv_output = input_path.joinpath(ps_csv_url.split(\"/\")[-1])\n", - "\n", - "ps_geometries_url = (\n", - " \"ProtectedSeas/ProtectedSeas_ProtectedSeas_06142023_shp_ProtectedSeas_06142023_shp.zip\"\n", - ")\n", - "ps_geometries_output = input_path.joinpath(ps_geometries_url.split(\"/\")[-1])" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [], - "source": [ - "if not force_clean and zipped_output_file.exists():\n", - " print(f\"File {zipped_output_file} already exists\")" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [], - "source": [ - "## get the data\n", - "\n", - "writeReadGCP(\n", - " credentials=mysettings.GCS_KEYFILE_JSON,\n", - " bucket_name=mysettings.GCS_BUCKET,\n", - " blob_name=ps_csv_url,\n", - " file=ps_csv_output,\n", - " operation=\"r\",\n", - ")\n", - "\n", - "writeReadGCP(\n", - " credentials=mysettings.GCS_KEYFILE_JSON,\n", - " bucket_name=mysettings.GCS_BUCKET,\n", - " blob_name=ps_geometries_url,\n", - " file=ps_geometries_output,\n", - " operation=\"r\",\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [], - "source": [ - "# unzip shapefile\n", - "shutil.unpack_archive(ps_geometries_output, temp_working_path)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# transform data\n", - "# TODO: Modify the preprocessing steps so we do not eliminate the geometries that does not intersect with MPAs - do to a change in the processing methodology\n", - "data_table = pd.read_csv(ps_csv_output).pipe(get_mpas).pipe(set_location_iso).pipe(set_fps_classes)\n", - "\n", - "data_table.drop(\n", - " columns=data_table.columns.difference(\n", - " [\n", - " \"site_id\",\n", - " \"iso\",\n", - " \"FPS_cat\",\n", - " \"site_name\",\n", - " \"country\",\n", - " \"wdpa_id\",\n", - " \"removal_of_marine_life_is_prohibited\",\n", - " \"total_area\",\n", - " ]\n", - " ),\n", - " inplace=True,\n", - ")\n", - "\n", - "data_table.rename(columns={\"removal_of_marine_life_is_prohibited\": \"FPS\"}, inplace=True)\n", - "\n", - "# load geoemtries & merge\n", - "\n", - "gdf = gpd.read_file(ps_geometries_output)" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [], - "source": [ - "# save data\n", - "gdf.merge(data_table, how=\"inner\", left_on=\"SITE_ID\", right_on=\"site_id\").drop(\n", - " columns=[\"SITE_ID\", \"SITE_NAME\"]\n", - ").to_file(filename=output_file.as_posix(), driver=\"ESRI Shapefile\", encoding=\"utf-8\")\n", - "\n", - "# zip data\n", - "make_archive(output_path, zipped_output_file)" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [], - "source": [ - "# clean unzipped files\n", - "rm_tree(temp_working_path) if temp_working_path.exists() else None\n", - "rm_tree(output_path) if output_path.exists() else None" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": {}, - "outputs": [], - "source": [ - "# LOAD\n", - "## load zipped file to GCS\n", - "writeReadGCP(\n", - " credentials=mysettings.GCS_KEYFILE_JSON,\n", - " bucket_name=mysettings.GCS_BUCKET,\n", - " blob_name=remote_path,\n", - " file=zipped_output_file,\n", - " operation=\"w\",\n", - ")" - ] - }, { "cell_type": "markdown", "metadata": {}, diff --git a/data/notebooks/pipes_mock/locations.ipynb b/data/notebooks/pipes_mock/locations.ipynb index ba957244..fe6b0b94 100644 --- a/data/notebooks/pipes_mock/locations.ipynb +++ b/data/notebooks/pipes_mock/locations.ipynb @@ -105,7 +105,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -124,7 +124,7 @@ "PosixPath('/home/sofia/dev/skytruth-30x30/data/data/gadm/processed/preprocess')" ] }, - "execution_count": 6, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -146,7 +146,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -176,7 +176,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -323,14 +323,14 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_3513234/673975959.py:35: DeprecationWarning: The 'unary_union' attribute is deprecated, use the 'union_all()' method instead.\n", + "/tmp/ipykernel_3524857/673975959.py:35: DeprecationWarning: The 'unary_union' attribute is deprecated, use the 'union_all()' method instead.\n", " \"geometry\": gpd.GeoSeries([gpd.GeoSeries(df[\"geometry\"]).unary_union]),\n" ] } @@ -463,7 +463,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -475,7 +475,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -483,6 +483,32 @@ "(combined_locations[['id', 'code']].rename(columns={'id': 'location'})\n", " .to_csv(scripts_dir.joinpath('data_commons/data/locations_code_all.csv'), index=False))" ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "DEBUG:google.auth.transport.requests:Making request: POST https://oauth2.googleapis.com/token\n" + ] + } + ], + "source": [ + "# Upload csv to bucket\n", + "remote_path = 'vizzuality_processed_data/strapi_tables/location_code.csv'\n", + "\n", + "writeReadGCP(\n", + " credentials=mysettings.GCS_KEYFILE_JSON,\n", + " bucket_name=mysettings.GCS_BUCKET,\n", + " blob_name=remote_path,\n", + " file=scripts_dir.joinpath('data_commons/data/locations_code_all.csv'),\n", + " operation=\"w\",\n", + ")" + ] } ], "metadata": { diff --git a/data/notebooks/pipes_mock/precalc_sofia.ipynb b/data/notebooks/pipes_mock/precalc_sofia.ipynb index 92a397bc..a846dfef 100644 --- a/data/notebooks/pipes_mock/precalc_sofia.ipynb +++ b/data/notebooks/pipes_mock/precalc_sofia.ipynb @@ -406,6 +406,19 @@ " return calculate_stats_pa(df, gby_col, iso_column, {\"protected_area\": \"sum\", \"protected_areas_count\": \"sum\", \"1\": \"sum\", \"0\": \"sum\"})\n", "\n", "\n", + "def add_region_iso2(\n", + " df: pd.DataFrame | gpd.GeoDataFrame, iso_column\n", + ") -> pd.DataFrame | gpd.GeoDataFrame:\n", + " \n", + " with open(scripts_dir.joinpath('data_commons/data/regions_data2.json'), 'r') as f:\n", + " regions = json.load(f)\n", + "\n", + " def find_region_iso(iso: str) -> Union[str, None]:\n", + " filtered_regions = list(filter(lambda x: iso in x[\"country_iso_3s\"], regions.get(\"data\")))\n", + " return filtered_regions[0][\"region_iso\"] if len(filtered_regions) > 0 else None\n", + "\n", + " return df.assign(region=lambda row: row[iso_column].apply(find_region_iso))\n", + "\n", "\n", "\n", "class NewProtectedAreaExtentSchema(pa.DataFrameModel):\n", @@ -1573,26 +1586,6 @@ "tpa_grouped.head(5)" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def add_region_iso2(\n", - " df: pd.DataFrame | gpd.GeoDataFrame, iso_column\n", - ") -> pd.DataFrame | gpd.GeoDataFrame:\n", - " \n", - " with open(scripts_dir.joinpath('data_commons/data/regions_data2.json'), 'r') as f:\n", - " regions = json.load(f)\n", - "\n", - " def find_region_iso(iso: str) -> Union[str, None]:\n", - " filtered_regions = list(filter(lambda x: iso in x[\"country_iso_3s\"], regions.get(\"data\")))\n", - " return filtered_regions[0][\"region_iso\"] if len(filtered_regions) > 0 else None\n", - "\n", - " return df.assign(region=lambda row: row[iso_column].apply(find_region_iso))" - ] - }, { "cell_type": "code", "execution_count": null, @@ -2868,7 +2861,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Terrestrial pas - detail table data" + "### Country pas - detail table data" ] }, { @@ -3143,7 +3136,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Combine marine and terrestrial - Detail table" + "### Country marine and terrestrial - Detail table" ] }, { diff --git a/data/notebooks/pipes_mock/tiles.ipynb b/data/notebooks/pipes_mock/tiles.ipynb index 7b81f0ee..6ff2bd33 100644 --- a/data/notebooks/pipes_mock/tiles.ipynb +++ b/data/notebooks/pipes_mock/tiles.ipynb @@ -12,7 +12,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -20,9 +20,13 @@ "import sys\n", "import geopandas as gpd\n", "import pandas as pd\n", + "import json\n", + "import dotenv\n", "\n", + "dotenv.load_dotenv()\n", "\n", - "scripts_dir = Path(\"../..\").joinpath(\"src\")\n", + "\n", + "scripts_dir = Path(\".\").joinpath(\"src\")\n", "if scripts_dir not in sys.path:\n", " sys.path.insert(0, scripts_dir.resolve().as_posix())\n", "from helpers.mapshaper import Mapshaper\n", @@ -32,12 +36,14 @@ "from helpers.file_handler import FileConventionHandler\n", "from helpers.utils import download_and_unzip_if_needed, writeReadGCP\n", "\n", - "from data_commons.loader import load_regions" + "from data_commons.loader import load_regions\n", + "\n", + "from pipelines.processors import clean_geometries" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -245,15 +251,15 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "/home/mambauser/data/gadm/processed/gadm_preprocess.zip\n", - "/home/mambauser/data/gadm/processed/preprocess\n" + "/home/sofia/dev/skytruth-30x30/data/data/gadm/processed/gadm_preprocess.zip\n", + "/home/sofia/dev/skytruth-30x30/data/data/gadm/processed/preprocess\n" ] }, { @@ -261,24 +267,23 @@ "output_type": "stream", "text": [ "Allocating 64 GB of heap memory\n", - "[o] Wrote /home/mambauser/data/gadm/processed/tiles/gadm_tiles.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/gadm/processed/tiles/gadm_tiles.json\n", "For layer 0, using name \"gadm_tiles\"\n", - "/home/mambauser/data/gadm/processed/tiles/gadm_tiles.json:12: Found ] at top level\n", - "/home/mambauser/data/gadm/processed/tiles/gadm_tiles.json:10: Reached EOF without all containers being closed\n", - "In JSON object {\"type\":\"FeatureCollection\",\"features\":[]}\n", - "206 features, 181286774 bytes of geometry, 3347 bytes of separate metadata, 9901 bytes of string pool\n", - "Choosing a maxzoom of -z0 for features about 2772178 feet (844960 meters) apart\n", - "Choosing a maxzoom of -z10 for resolution of about 317 feet (96 meters) within features\n", - " 99.9% 10/128/306 \n" + "/home/sofia/dev/skytruth-30x30/data/data/gadm/processed/tiles/gadm_tiles.json:12: Found ] at top level: \n", + "/home/sofia/dev/skytruth-30x30/data/data/gadm/processed/tiles/gadm_tiles.json:9: Reached EOF without all containers being closed: in JSON object {\"type\":\"FeatureCollection\",\"features\":[]}\n", + "204 features, 104203392 bytes of geometry, 9732 bytes of string pool\n", + "Choosing a maxzoom of -z0 for features typically 2730281 feet (832190 meters) apart, and at least 605951 feet (184694 meters) apart\n", + "Choosing a maxzoom of -z9 for resolution of about 590 feet (180 meters) within features\n", + " 99.9% 9/146/198 \n" ] }, { "data": { "text/plain": [ - "PosixPath('/home/mambauser/data/gadm/processed/tiles/gadm_simplified.mbtiles')" + "PosixPath('/home/sofia/dev/skytruth-30x30/data/data/gadm/processed/tiles/gadm_simplified.mbtiles')" ] }, - "execution_count": 6, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -294,7 +299,7 @@ "\n", "# simplify the geometries\n", "Mapshaper(64).input([gadm_dir.get_step_fmt_file_path(prev_step, \"shp\").as_posix()]).filter_fields(\n", - " fields=\",\".join([\"GID_0\", \"COUNTRY\", \"COUNTRY_ES\", \"COUNTRY_FR\", 'area_km2'])\n", + " fields=\",\".join([\"GID_0\", \"COUNTRY\", \"name_es\", \"name_fr\", 'area_km2'])\n", ").output(\n", " gadm_dir.get_step_fmt_file_path(current_step, \"json\").as_posix(), force=True, format=\"geojson\"\n", ").execute()\n", @@ -309,21 +314,21 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "upload: ../../data/gadm/processed/tiles/gadm_simplified.mbtiles to s3://tilestream-tilesets-production/69/_pending/6o7ulrkerlm1xi20i3scyy0mc/skytruth\n" + "upload: data/gadm/processed/tiles/gadm_simplified.mbtiles to s3://tilestream-tilesets-production/96/_pending/y008s4k96pt1elm0ek7for1mc/skytruth\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "Linking tileset to Mapbox: 100%|██████████| 100/100 [03:43<00:00, 2.23s/it]\n" + "Linking tileset to Mapbox: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [02:41<00:00, 1.61s/it]\n" ] }, { @@ -332,7 +337,7 @@ "True" ] }, - "execution_count": 9, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -348,87 +353,119 @@ }, { "cell_type": "markdown", - "metadata": { - "jp-MarkdownHeadingCollapsed": true - }, + "metadata": {}, "source": [ - "### MPAs: Marine Protected Areas from WDPA" + "#### Gadm regions" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 14, "metadata": {}, "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/home/mambauser/data/mpa/processed/mpa_preprocess.zip\n", - "/home/mambauser/data/mpa/processed/preprocess\n" - ] - }, { "name": "stderr", "output_type": "stream", "text": [ + "/tmp/ipykernel_3525114/3234969863.py:30: UserWarning: Column names longer than 10 characters will be truncated when saved to ESRI Shapefile.\n", + " ).to_file(\n", + "/home/sofia/miniforge3/envs/skytruth/lib/python3.12/site-packages/pyogrio/raw.py:709: RuntimeWarning: Normalized/laundered field name: 'location_id' to 'location_i'\n", + " ogr_write(\n", "Allocating 16 GB of heap memory\n", - "[clean] Retained 19,184 of 19,184 features\n", - "[simplify] Repaired 277,407 intersections; 443,678 intersections could not be repaired\n", - "[clean] Retained 19,184 of 19,184 features\n", - "[o] Wrote /home/mambauser/data/mpa/processed/tiles/mpa_tiles.json\n" + "[dissolve2] Dissolved 204 features into 8 features\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/gadm/processed/tiles/gadm_regions.json\n", + "For layer 0, using name \"gadm_regions\"\n", + "/home/sofia/dev/skytruth-30x30/data/data/gadm/processed/tiles/gadm_regions.json:3: Found ] at top level: \n", + "/home/sofia/dev/skytruth-30x30/data/data/gadm/processed/tiles/gadm_regions.json:2: Reached EOF without all containers being closed: in JSON object {\"type\":\"FeatureCollection\",\"features\":[]}\n", + "8 features, 99192107 bytes of geometry, 107 bytes of string pool\n", + "Choosing a maxzoom of -z0 for features typically 34570296 feet (10537026 meters) apart, and at least 22231463 feet (6776150 meters) apart\n", + "Choosing a maxzoom of -z9 for resolution of about 562 feet (171 meters) within features\n", + " 99.9% 9/403/254 \n" ] }, { "data": { "text/plain": [ - "PosixPath('/home/mambauser/data/mpa/processed/tiles/mpa_tiles.mbtiles')" + "PosixPath('/home/sofia/dev/skytruth-30x30/data/data/gadm/processed/tiles/gadm_regions.mbtiles')" ] }, - "execution_count": 9, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "pipe = \"mpa\"\n", - "collection_name = \"mpas_wdpa\"\n", + "collection_name = \"gadm_regions\"\n", "\n", - "source_dir = FileConventionHandler(pipe)\n", + "# load the EEZ file & the regions file\n", + "gadm_data = gpd.read_file(gadm_dir.get_step_fmt_file_path(prev_step, \"shp\").as_posix())\n", "\n", - "# Download the EEZ file && unzip it\n", - "download_and_unzip_if_needed(source_dir, prev_step, mysettings)\n", + "with open(scripts_dir.joinpath('data_commons/data/regions_data2.json'), 'r') as f:\n", + " regions = json.load(f)\n", "\n", - "# simplify the geometries\n", - "Mapshaper(16).input([source_dir.get_step_fmt_file_path(prev_step, \"shp\").as_posix()]).filter_fields(\n", - " fields=\"WDPAID,NAME,PA_DEF,GIS_M_AREA,PARENT_ISO\"\n", - ").clean(allow_overlaps=True, rewind=True).simplify(\"dp 10% keep-shapes planar\").clean(\n", - " allow_overlaps=True\n", - ").output(\n", - " source_dir.get_step_fmt_file_path(current_step, \"json\").as_posix(), force=True, format=\"geojson\"\n", + "\n", + "regions_df = pd.DataFrame(\n", + " [\n", + " {\"region_id\": data[\"region_iso\"], \"location_id\": iso}\n", + " for data in load_regions().get(\"data\", [])\n", + " for iso in data[\"country_iso_3s\"]\n", + " ]\n", + ")\n", + "\n", + "# merge the two files\n", + "gpd.GeoDataFrame(\n", + " pd.merge(\n", + " gadm_data,\n", + " regions_df,\n", + " how=\"left\",\n", + " left_on=\"GID_0\",\n", + " right_on=\"location_id\",\n", + " sort=True,\n", + " copy=True,\n", + " ),\n", + " crs=gadm_data.crs,\n", + ").to_file(\n", + " filename=gadm_dir.get_processed_step_path(prev_step)\n", + " .joinpath(f\"{pipe}_{prev_step}_{collection_name}.shp\")\n", + " .as_posix(),\n", + " driver=\"ESRI Shapefile\",\n", + ")\n", + "\n", + "# dissolve by region_id\n", + "\n", + "Mapshaper(16).input(\n", + " [\n", + " gadm_dir.get_processed_step_path(prev_step)\n", + " .joinpath(f\"{pipe}_{prev_step}_{collection_name}.shp\")\n", + " .as_posix()\n", + " ]\n", + ").dissolve2(fields=\"region_id\").output(\n", + " gadm_dir.get_processed_step_path(current_step).joinpath(f\"{collection_name}.json\").as_posix(),\n", + " force=True,\n", + " format=\"geojson\",\n", ").execute()\n", "\n", "# generate the mbtiles\n", - "mbtileGeneration(source_dir.get_step_fmt_file_path(current_step, \"json\"))" + "mbtileGeneration(gadm_dir.get_processed_step_path(current_step).joinpath(f\"{collection_name}.json\"))" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "upload: ../../data/mpa/processed/tiles/mpa_tiles.mbtiles to s3://tilestream-tilesets-production/de/_pending/yvng0dxxxru12eq9ye80350mc/skytruth\n" + "upload: data/gadm/processed/tiles/gadm_regions.mbtiles to s3://tilestream-tilesets-production/41/_pending/345ipuxzuqu1eqn09bs6tr1mc/skytruth\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "Linking tileset to Mapbox: 100%|██████████| 100/100 [02:34<00:00, 1.54s/it]\n" + "Linking tileset to Mapbox: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [02:19<00:00, 1.40s/it]\n" ] }, { @@ -437,14 +474,14 @@ "True" ] }, - "execution_count": 10, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "uploadToMapbox(\n", - " source_dir.get_step_fmt_file_path(current_step, \"mbtiles\"),\n", + " gadm_dir.get_processed_step_path(current_step).joinpath(f\"{collection_name}.mbtiles\"),\n", " collection_name,\n", " mysettings.MAPBOX_USER,\n", " mysettings.MAPBOX_TOKEN,\n", @@ -457,555 +494,151 @@ "jp-MarkdownHeadingCollapsed": true }, "source": [ - "### PAs: Terrestrial Protected Areas from WDPA" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "def split_n_parts(gdf: gpd.GeoDataFrame, folder: Path, n:int) -> None:\n", - " \n", - " for i in range(n):\n", - " path = folder.joinpath(f\"part{i}.shp\")\n", - " gdf.iloc[i * len(gdf) // n : (i + 1) * len(gdf) // n].to_file(path, driver=\"ESRI Shapefile\")" + "### MPAs: Marine Protected Areas from WDPA" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "pipe = \"mpa-terrestrial\"\n", - "collection_name = \"pas_wdpa\"\n", - "\n", - "source_dir = FileConventionHandler(pipe)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/home/mambauser/data/mpa-terrestrial/processed/mpa-terrestrial_preprocess.gpkg\n", - "/home/mambauser/data/mpa-terrestrial/processed/preprocess\n" - ] - } - ], - "source": [ - "# Download the file && unzip it\n", - "download_and_unzip_if_needed(source_dir, prev_step, mysettings, \"gpkg\")\n", - "\n", - "# split the file in two parts\n", - "file = gpd.read_file(source_dir.get_step_fmt_file_path(prev_step, \"gpkg\").as_posix())" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 2%|▏ | 6511/292261 [00:12<00:35, 8148.33it/s] " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 4%|▍ | 12019/292261 [00:13<01:04, 4363.45it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 9%|▉ | 26702/292261 [00:17<01:37, 2722.22it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 9%|▉ | 27206/292261 [00:17<04:30, 980.59it/s] " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 18%|█▊ | 53263/292261 [00:25<06:31, 609.87it/s] " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "'Polygon' object has no attribute 'geoms'\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 27%|██▋ | 77713/292261 [00:32<00:56, 3799.17it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 28%|██▊ | 81780/292261 [00:34<03:11, 1097.17it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 28%|██▊ | 82483/292261 [00:34<01:27, 2385.63it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 29%|██▉ | 84671/292261 [00:35<02:11, 1580.33it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 33%|███▎ | 96205/292261 [00:53<07:36, 429.82it/s] " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 34%|███▍ | 99156/292261 [00:55<04:11, 766.30it/s] " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 34%|███▍ | 100110/292261 [00:58<05:36, 571.85it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 35%|███▌ | 102473/292261 [01:02<12:39, 249.94it/s] " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "'Polygon' object has no attribute 'geoms'\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 35%|███▌ | 102982/292261 [01:04<06:56, 454.71it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 37%|███▋ | 107131/292261 [01:11<04:11, 736.67it/s] " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 54%|█████▍ | 158622/292261 [01:38<02:26, 911.79it/s] " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 56%|█████▌ | 164270/292261 [01:40<01:53, 1125.52it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 66%|██████▋ | 193634/292261 [01:48<02:32, 647.47it/s] " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 68%|██████▊ | 198829/292261 [01:50<02:00, 777.87it/s] " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 70%|██████▉ | 203548/292261 [01:51<02:10, 682.27it/s] " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 75%|███████▍ | 217768/292261 [01:56<00:57, 1295.56it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 80%|███████▉ | 232966/292261 [02:05<02:51, 346.33it/s] " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "'Polygon' object has no attribute 'geoms'\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 81%|████████ | 236337/292261 [02:09<00:42, 1311.65it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 84%|████████▍ | 246533/292261 [02:13<00:31, 1436.44it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 87%|████████▋ | 255326/292261 [02:16<00:39, 943.14it/s] " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "'Polygon' object has no attribute 'geoms'\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 88%|████████▊ | 257145/292261 [02:16<00:19, 1804.46it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "'Polygon' object has no attribute 'geoms'\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 89%|████████▉ | 261395/292261 [02:18<01:41, 304.37it/s] " - ] - }, + "pipe = \"mpa\"\n", + "collection_name = \"mpas_wdpa\"\n", + "\n", + "source_dir = FileConventionHandler(pipe)\n", + "\n", + "# Download the EEZ file && unzip it\n", + "download_and_unzip_if_needed(source_dir, prev_step, mysettings)\n", + "\n", + "# simplify the geometries\n", + "Mapshaper(64).input([source_dir.get_step_fmt_file_path(prev_step, \"shp\").as_posix()]).filter_fields(\n", + " fields=\"WDPAID,NAME,PA_DEF,GIS_M_AREA,PARENT_ISO\"\n", + ").clean(allow_overlaps=True, rewind=True).simplify(\"dp 10% keep-shapes planar\").clean(\n", + " allow_overlaps=True\n", + ").output(\n", + " source_dir.get_step_fmt_file_path(current_step, \"json\").as_posix(), force=True, format=\"geojson\"\n", + ").execute()\n", + "\n", + "# generate the mbtiles\n", + "mbtileGeneration(source_dir.get_step_fmt_file_path(current_step, \"json\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "\n" + "upload: ../../data/mpa/processed/tiles/mpa_tiles.mbtiles to s3://tilestream-tilesets-production/de/_pending/yvng0dxxxru12eq9ye80350mc/skytruth\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - " 91%|█████████▏| 267248/292261 [02:22<00:20, 1225.90it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" + "Linking tileset to Mapbox: 100%|██████████| 100/100 [02:34<00:00, 1.54s/it]\n" ] }, { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 292261/292261 [02:57<00:00, 1642.99it/s]\n" - ] + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "test = await simplify_async(file)" + "uploadToMapbox(\n", + " source_dir.get_step_fmt_file_path(current_step, \"mbtiles\"),\n", + " collection_name,\n", + " mysettings.MAPBOX_USER,\n", + " mysettings.MAPBOX_TOKEN,\n", + ")" ] }, { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], + "cell_type": "markdown", + "metadata": { + "jp-MarkdownHeadingCollapsed": true + }, "source": [ - "test.to_file(source_dir.get_step_fmt_file_path(prev_step, \"shp\").as_posix(), driver=\"ESRI Shapefile\")" + "### PAs: Terrestrial Protected Areas from WDPA" ] }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 5, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'/home/mambauser/data/mpa-terrestrial/processed/preprocess/mpa-terrestrial_preprocess.shp'" - ] - }, - "execution_count": 45, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "source_dir.get_step_fmt_file_path(prev_step, \"shp\").as_posix()" + "def split_n_parts(gdf: gpd.GeoDataFrame, folder: Path, n:int) -> None:\n", + " \n", + " for i in range(n):\n", + " path = folder.joinpath(f\"part{i}.shp\")\n", + " gdf.iloc[i * len(gdf) // n : (i + 1) * len(gdf) // n].to_file(path, driver=\"ESRI Shapefile\")" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ - "i_folder = source_dir.get_processed_step_path(prev_step).joinpath(\"parts\")\n", - "i_folder.mkdir(exist_ok=True, parents=True)\n", - "folders = split_n_parts(test, i_folder, 100)\n", - "del test\n", - "del file" + "pipe = \"mpa-terrestrial\"\n", + "collection_name = \"pas_wdpa\"\n", + "\n", + "source_dir = FileConventionHandler(pipe)" ] }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "mapshaper-xl 32gb -i /home/mambauser/data/mpa-terrestrial/processed/preprocess/parts/*.shp -filter-fields fields=WDPAID,NAME,PA_DEF,GIS_AREA,PARENT_ISO -clean allow-overlaps rewind -simplify dp 10% keep-shapes planar -clean allow-overlaps -o /home/mambauser/data/mpa-terrestrial/processed/tiles force format=geojson\n" + "/home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/mpa-terrestrial_preprocess.gpkg\n", + "/home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/preprocess\n" ] } ], "source": [ - "Mapshaper(32).input(\n", - " [f\"{i_folder}/*.shp\"],\n", - ").filter_fields(\n", - " fields=\"WDPAID,NAME,PA_DEF,GIS_AREA,PARENT_ISO\"\n", - ").clean(allow_overlaps=True, rewind=True).simplify(\"dp 10% keep-shapes planar\").clean(\n", - " allow_overlaps=True\n", - ").output(\n", - " source_dir.get_processed_step_path(current_step), force=True, format=\"geojson\"\n", - ").debug()" + "# Download the file && unzip it\n", + "download_and_unzip_if_needed(source_dir, prev_step, mysettings, \"gpkg\")\n", + "\n", + "# split the file in two parts\n", + "wdpa = gpd.read_file(source_dir.get_step_fmt_file_path(prev_step, \"gpkg\").as_posix())" ] }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 19, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Allocating 32 GB of heap memory\n", - "[o] RFC 7946 warning: non-WGS84 GeoJSON output.\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/mpa-terrestrial-tiles.json\n" - ] - } - ], + "outputs": [], "source": [ - "!mapshaper-xl 32gb -i /home/mambauser/data/mpa-terrestrial/processed/tiles/part*.json combine-files -merge-layers -o /home/mambauser/data/mpa-terrestrial/processed/tiles/mpa-terrestrial-tiles.json force format=geojson" + "# Divide the dataset into n parts\n", + "n_parts = 100\n", + "\n", + "in_folder = source_dir.get_processed_step_path(prev_step).joinpath(\"parts\")\n", + "in_folder.mkdir(exist_ok=True, parents=True)\n", + "folders = split_n_parts(wdpa, in_folder, n_parts)\n", + "\n", + "out_folder = source_dir.get_processed_step_path(current_step).joinpath(\"parts\")\n", + "out_folder.mkdir(exist_ok=True, parents=True)" ] }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -1014,649 +647,635 @@ "text": [ "Allocating 32 GB of heap memory\n", "[clean] Retained 2,922 of 2,922 features\n", - "[simplify] Repaired 14,231 intersections; 128 intersections could not be repaired\n", + "[simplify] Repaired 5,748 intersections; 20 intersections could not be repaired\n", "[clean] Retained 2,922 of 2,922 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part0.json\n", - "Allocating 32 GB of heap memory\n", - "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 2,300 intersections; 253 intersections could not be repaired\n", - "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part1.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part0.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,922 of 2,922 features\n", - "[simplify] Repaired 464 intersections\n", + "[simplify] Repaired 187 intersections\n", "[clean] Retained 2,922 of 2,922 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part10.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part10.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 434 intersections\n", + "[simplify] Repaired 220 intersections\n", "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part11.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part11.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,922 of 2,922 features\n", - "[simplify] Repaired 2,655 intersections; 48 intersections could not be repaired\n", + "[simplify] Repaired 527 intersections; 16 intersections could not be repaired\n", "[clean] Retained 2,922 of 2,922 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part12.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part12.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 615 intersections; 3 intersections could not be repaired\n", + "[simplify] Repaired 3,550 intersections; 24 intersections could not be repaired\n", "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part13.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part13.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 878 intersections; 154 intersections could not be repaired\n", - "[clean] Retained 2,922 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part14.json\n", + "[simplify] Repaired 923 intersections; 32 intersections could not be repaired\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part14.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,922 of 2,922 features\n", - "[simplify] Repaired 2,265 intersections; 330 intersections could not be repaired\n", + "[simplify] Repaired 2,285 intersections; 8 intersections could not be repaired\n", "[clean] Retained 2,922 of 2,922 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part15.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part15.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 773 intersections\n", + "[simplify] Repaired 729 intersections; 16 intersections could not be repaired\n", "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part16.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part16.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,922 of 2,922 features\n", - "[simplify] Repaired 1,791 intersections; 8 intersections could not be repaired\n", + "[simplify] Repaired 478 intersections\n", "[clean] Retained 2,922 of 2,922 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part17.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part17.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 3,202 intersections; 28 intersections could not be repaired\n", + "[simplify] Repaired 1,075 intersections; 32 intersections could not be repaired\n", "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part18.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part18.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 817 intersections\n", + "[simplify] Repaired 3,405 intersections; 8 intersections could not be repaired\n", "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part19.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part19.json\n", "Allocating 32 GB of heap memory\n", - "[clean] Retained 2,922 of 2,922 features\n", - "[simplify] Repaired 659 intersections; 48 intersections could not be repaired\n", - "[clean] Retained 2,922 of 2,922 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part2.json\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 9,028 intersections; 56 intersections could not be repaired\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part1.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,922 of 2,922 features\n", - "[simplify] Repaired 397 intersections\n", + "[simplify] Repaired 4,116 intersections; 17 intersections could not be repaired\n", "[clean] Retained 2,922 of 2,922 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part20.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part20.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 1,638 intersections; 4 intersections could not be repaired\n", + "[simplify] Repaired 1,174 intersections; 8 intersections could not be repaired\n", "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part21.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part21.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 70,976 intersections; 1,029 intersections could not be repaired\n", + "[simplify] Repaired 1,508 intersections; 75 intersections could not be repaired\n", "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part22.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part22.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,922 of 2,922 features\n", - "[simplify] Repaired 1,149 intersections; 20 intersections could not be repaired\n", + "[simplify] Repaired 1,747 intersections; 288 intersections could not be repaired\n", "[clean] Retained 2,922 of 2,922 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part23.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part23.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 4,472 intersections; 11 intersections could not be repaired\n", + "[simplify] Repaired 996 intersections; 100 intersections could not be repaired\n", "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part24.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part24.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,922 of 2,922 features\n", - "[simplify] Repaired 1,483 intersections; 52 intersections could not be repaired\n", + "[simplify] Repaired 977 intersections; 11 intersections could not be repaired\n", "[clean] Retained 2,922 of 2,922 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part25.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part25.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 769 intersections; 18 intersections could not be repaired\n", + "[simplify] Repaired 2,273 intersections; 200 intersections could not be repaired\n", "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part26.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part26.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 3,186 intersections; 358 intersections could not be repaired\n", + "[simplify] Repaired 1,467 intersections; 8 intersections could not be repaired\n", "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part27.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part27.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,922 of 2,922 features\n", - "[simplify] Repaired 2,071 intersections; 31 intersections could not be repaired\n", + "[simplify] Repaired 2,924 intersections; 110 intersections could not be repaired\n", "[clean] Retained 2,922 of 2,922 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part28.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part28.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 1,563 intersections; 24 intersections could not be repaired\n", + "[simplify] Repaired 1,246 intersections\n", "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part29.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part29.json\n", "Allocating 32 GB of heap memory\n", - "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 1,343 intersections; 16 intersections could not be repaired\n", - "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part3.json\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[simplify] Repaired 2,041 intersections; 72 intersections could not be repaired\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part2.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,922 of 2,922 features\n", - "[simplify] Repaired 37,179 intersections; 7,576 intersections could not be repaired\n", - "[clean] Retained 2,921 of 2,922 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part30.json\n", + "[simplify] Repaired 77 intersections\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part30.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 12,639 intersections; 2,386 intersections could not be repaired\n", + "[simplify] Repaired 813 intersections; 40 intersections could not be repaired\n", "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part31.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part31.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 13,114 intersections; 119 intersections could not be repaired\n", + "[simplify] Repaired 1,540 intersections; 128 intersections could not be repaired\n", "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part32.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part32.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,922 of 2,922 features\n", - "[simplify] Repaired 291 intersections; 2 intersections could not be repaired\n", + "[simplify] Repaired 1,761 intersections; 27 intersections could not be repaired\n", "[clean] Retained 2,922 of 2,922 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part33.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part33.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 5,448 intersections; 36 intersections could not be repaired\n", + "[simplify] Repaired 471 intersections\n", "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part34.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part34.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,922 of 2,922 features\n", - "[simplify] Repaired 8,936 intersections; 124 intersections could not be repaired\n", + "[simplify] Repaired 3,212 intersections; 8 intersections could not be repaired\n", "[clean] Retained 2,922 of 2,922 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part35.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part35.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 1,785 intersections; 8 intersections could not be repaired\n", + "[simplify] Repaired 2,303 intersections; 68 intersections could not be repaired\n", "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part36.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part36.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 548 intersections\n", + "[simplify] Repaired 702 intersections; 16 intersections could not be repaired\n", "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part37.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part37.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,922 of 2,922 features\n", - "[simplify] Repaired 2,049 intersections; 48 intersections could not be repaired\n", + "[simplify] Repaired 1,473 intersections; 12 intersections could not be repaired\n", "[clean] Retained 2,922 of 2,922 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part38.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part38.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 2,724 intersections; 75 intersections could not be repaired\n", + "[simplify] Repaired 2,704 intersections; 88 intersections could not be repaired\n", "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part39.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part39.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 6,848 intersections; 1 intersection could not be repaired\n", + "[simplify] Repaired 524 intersections\n", "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part4.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part3.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 1,576 intersections\n", + "[simplify] Repaired 5,124 intersections; 27 intersections could not be repaired\n", "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part40.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part40.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,922 of 2,922 features\n", - "[simplify] Repaired 1,263 intersections\n", + "[simplify] Repaired 9,631 intersections; 136 intersections could not be repaired\n", "[clean] Retained 2,922 of 2,922 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part41.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part41.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 3,539 intersections; 44 intersections could not be repaired\n", + "[simplify] Repaired 9,110 intersections; 64 intersections could not be repaired\n", "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part42.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part42.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,922 of 2,922 features\n", - "[simplify] Repaired 2,252 intersections; 20 intersections could not be repaired\n", + "[simplify] Repaired 4,879 intersections; 8 intersections could not be repaired\n", "[clean] Retained 2,922 of 2,922 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part43.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part43.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 172 intersections\n", + "[simplify] Repaired 2,283 intersections; 64 intersections could not be repaired\n", "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part44.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part44.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 267 intersections; 8 intersections could not be repaired\n", + "[simplify] Repaired 70,799 intersections; 380 intersections could not be repaired\n", "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part45.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part45.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,922 of 2,922 features\n", - "[simplify] Repaired 540 intersections; 8 intersections could not be repaired\n", + "[simplify] Repaired 3,258 intersections; 88 intersections could not be repaired\n", "[clean] Retained 2,922 of 2,922 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part46.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part46.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 3,570 intersections; 24 intersections could not be repaired\n", + "[simplify] Repaired 2,333 intersections; 205 intersections could not be repaired\n", "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part47.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part47.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,922 of 2,922 features\n", - "[simplify] Repaired 942 intersections; 32 intersections could not be repaired\n", + "[simplify] Repaired 1,801 intersections; 92 intersections could not be repaired\n", "[clean] Retained 2,922 of 2,922 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part48.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part48.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 2,262 intersections; 8 intersections could not be repaired\n", + "[simplify] Repaired 3,474 intersections; 83 intersections could not be repaired\n", "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part49.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part49.json\n", "Allocating 32 GB of heap memory\n", - "[clean] Retained 2,922 of 2,922 features\n", - "[simplify] Repaired 952 intersections; 15 intersections could not be repaired\n", - "[clean] Retained 2,922 of 2,922 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part5.json\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 2,153 intersections; 48 intersections could not be repaired\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part4.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 697 intersections; 16 intersections could not be repaired\n", + "[simplify] Repaired 2,880 intersections; 108 intersections could not be repaired\n", "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part50.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part50.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,922 of 2,922 features\n", - "[simplify] Repaired 537 intersections\n", + "[simplify] Repaired 2,048 intersections; 7 intersections could not be repaired\n", "[clean] Retained 2,922 of 2,922 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part51.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part51.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 1,066 intersections; 32 intersections could not be repaired\n", + "[simplify] Repaired 705 intersections; 40 intersections could not be repaired\n", "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part52.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part52.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,922 of 2,922 features\n", - "[simplify] Repaired 3,586 intersections; 16 intersections could not be repaired\n", + "[simplify] Repaired 395 intersections\n", "[clean] Retained 2,922 of 2,922 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part53.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part53.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 3,821 intersections; 17 intersections could not be repaired\n", + "[simplify] Repaired 1,006 intersections; 451 intersections could not be repaired\n", "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part54.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part54.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 1,213 intersections; 16 intersections could not be repaired\n", + "[simplify] Repaired 6,022 intersections; 528 intersections could not be repaired\n", "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part55.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part55.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,922 of 2,922 features\n", - "[simplify] Repaired 1,465 intersections; 75 intersections could not be repaired\n", + "[simplify] Repaired 3,333 intersections; 178 intersections could not be repaired\n", "[clean] Retained 2,922 of 2,922 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part56.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part56.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 1,644 intersections; 120 intersections could not be repaired\n", + "[simplify] Repaired 2,672 intersections; 32 intersections could not be repaired\n", "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part57.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part57.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,922 of 2,922 features\n", - "[simplify] Repaired 981 intersections; 100 intersections could not be repaired\n", + "[simplify] Repaired 15,941 intersections; 148 intersections could not be repaired\n", "[clean] Retained 2,922 of 2,922 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part58.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part58.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 975 intersections; 4 intersections could not be repaired\n", + "[simplify] Repaired 2,914 intersections; 48 intersections could not be repaired\n", "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part59.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part59.json\n", "Allocating 32 GB of heap memory\n", - "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 1,009 intersections; 51 intersections could not be repaired\n", - "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part6.json\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[simplify] Repaired 2,661 intersections; 75 intersections could not be repaired\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part5.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 2,397 intersections; 200 intersections could not be repaired\n", + "[simplify] Repaired 1,663 intersections; 68 intersections could not be repaired\n", "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part60.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part60.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,922 of 2,922 features\n", - "[simplify] Repaired 1,494 intersections; 8 intersections could not be repaired\n", + "[simplify] Repaired 1,759 intersections; 8 intersections could not be repaired\n", "[clean] Retained 2,922 of 2,922 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part61.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part61.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 2,872 intersections; 110 intersections could not be repaired\n", + "[simplify] Repaired 6,005 intersections; 396 intersections could not be repaired\n", "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part62.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part62.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 1,130 intersections\n", + "[simplify] Repaired 2,626 intersections; 24 intersections could not be repaired\n", "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part63.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part63.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,922 of 2,922 features\n", - "[simplify] Repaired 82 intersections\n", + "[simplify] Repaired 767 intersections; 108 intersections could not be repaired\n", "[clean] Retained 2,922 of 2,922 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part64.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part64.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 857 intersections; 40 intersections could not be repaired\n", + "[simplify] Repaired 38 intersections\n", "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part65.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part65.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,922 of 2,922 features\n", - "[simplify] Repaired 1,611 intersections; 131 intersections could not be repaired\n", - "[clean] Retained 2,922 of 2,922 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part66.json\n", + "[simplify] Repaired 13,933 intersections; 154 intersections could not be repaired\n", + "[clean] Retained 2,921 of 2,922 features\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part66.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 1,892 intersections; 27 intersections could not be repaired\n", + "[simplify] Repaired 2,359 intersections; 261 intersections could not be repaired\n", "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part67.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part67.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 473 intersections\n", + "[simplify] Repaired 548 intersections; 48 intersections could not be repaired\n", "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part68.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part68.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,922 of 2,922 features\n", - "[simplify] Repaired 3,528 intersections; 16 intersections could not be repaired\n", + "[simplify] Repaired 1,373 intersections\n", "[clean] Retained 2,922 of 2,922 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part69.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part69.json\n", "Allocating 32 GB of heap memory\n", - "[clean] Retained 2,922 of 2,922 features\n", - "[simplify] Repaired 1,456 intersections; 80 intersections could not be repaired\n", - "[clean] Retained 2,922 of 2,922 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part7.json\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 1,617 intersections\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part6.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 2,041 intersections; 68 intersections could not be repaired\n", + "[simplify] Repaired 6,889 intersections; 33 intersections could not be repaired\n", "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part70.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part70.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,922 of 2,922 features\n", - "[simplify] Repaired 681 intersections; 16 intersections could not be repaired\n", + "[simplify] Repaired 945 intersections; 15 intersections could not be repaired\n", "[clean] Retained 2,922 of 2,922 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part71.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part71.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 1,515 intersections; 12 intersections could not be repaired\n", + "[simplify] Repaired 1,042 intersections; 51 intersections could not be repaired\n", "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part72.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part72.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 3,047 intersections; 88 intersections could not be repaired\n", + "[simplify] Repaired 1,443 intersections; 80 intersections could not be repaired\n", "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part73.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part73.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,922 of 2,922 features\n", - "[simplify] Repaired 5,277 intersections; 83 intersections could not be repaired\n", + "[simplify] Repaired 662 intersections\n", "[clean] Retained 2,922 of 2,922 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part74.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part74.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 9,639 intersections; 88 intersections could not be repaired\n", + "[simplify] Repaired 1,008 intersections; 3 intersections could not be repaired\n", "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part75.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part75.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,922 of 2,922 features\n", - "[simplify] Repaired 9,942 intersections; 56 intersections could not be repaired\n", + "[simplify] Repaired 479 intersections\n", "[clean] Retained 2,922 of 2,922 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part76.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part76.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 3,505 intersections; 8 intersections could not be repaired\n", + "[simplify] Repaired 407 intersections\n", "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part77.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part77.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 2,200 intersections; 64 intersections could not be repaired\n", + "[simplify] Repaired 2,587 intersections; 48 intersections could not be repaired\n", "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part78.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part78.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,922 of 2,922 features\n", - "[simplify] Repaired 70,952 intersections; 380 intersections could not be repaired\n", + "[simplify] Repaired 516 intersections\n", "[clean] Retained 2,922 of 2,922 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part79.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part79.json\n", "Allocating 32 GB of heap memory\n", - "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 662 intersections\n", - "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part8.json\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[simplify] Repaired 1,223 intersections\n", + "[clean] Retained 2,922 of 2,922 features\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part7.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 2,913 intersections; 88 intersections could not be repaired\n", - "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part80.json\n", + "[simplify] Repaired 978 intersections; 157 intersections could not be repaired\n", + "[clean] Retained 2,922 of 2,923 features\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part80.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 2,438 intersections; 245 intersections could not be repaired\n", + "[simplify] Repaired 2,302 intersections; 330 intersections could not be repaired\n", "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part81.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part81.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,922 of 2,922 features\n", - "[simplify] Repaired 1,637 intersections; 104 intersections could not be repaired\n", + "[simplify] Repaired 856 intersections\n", "[clean] Retained 2,922 of 2,922 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part82.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part82.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 3,825 intersections; 106 intersections could not be repaired\n", + "[simplify] Repaired 1,722 intersections; 8 intersections could not be repaired\n", "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part83.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part83.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,922 of 2,922 features\n", - "[simplify] Repaired 3,118 intersections; 68 intersections could not be repaired\n", + "[simplify] Repaired 3,215 intersections; 28 intersections could not be repaired\n", "[clean] Retained 2,922 of 2,922 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part84.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part84.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 1,886 intersections; 7 intersections could not be repaired\n", + "[simplify] Repaired 902 intersections\n", "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part85.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part85.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 585 intersections; 40 intersections could not be repaired\n", + "[simplify] Repaired 395 intersections\n", "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part86.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part86.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,922 of 2,922 features\n", - "[simplify] Repaired 387 intersections\n", + "[simplify] Repaired 1,585 intersections; 8 intersections could not be repaired\n", "[clean] Retained 2,922 of 2,922 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part87.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part87.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 964 intersections; 379 intersections could not be repaired\n", + "[simplify] Repaired 70,987 intersections; 1,056 intersections could not be repaired\n", "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part88.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part88.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,922 of 2,922 features\n", - "[simplify] Repaired 6,027 intersections; 528 intersections could not be repaired\n", + "[simplify] Repaired 1,271 intersections; 92 intersections could not be repaired\n", "[clean] Retained 2,922 of 2,922 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part89.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part89.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 1,028 intersections; 3 intersections could not be repaired\n", + "[simplify] Repaired 2,455 intersections; 12 intersections could not be repaired\n", "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part9.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part8.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 3,929 intersections; 209 intersections could not be repaired\n", + "[simplify] Repaired 4,606 intersections; 27 intersections could not be repaired\n", "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part90.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part90.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 2,370 intersections; 32 intersections could not be repaired\n", + "[simplify] Repaired 1,454 intersections; 52 intersections could not be repaired\n", "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part91.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part91.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,922 of 2,922 features\n", - "[simplify] Repaired 15,988 intersections; 148 intersections could not be repaired\n", + "[simplify] Repaired 747 intersections; 14 intersections could not be repaired\n", "[clean] Retained 2,922 of 2,922 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part92.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part92.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 2,955 intersections; 48 intersections could not be repaired\n", + "[simplify] Repaired 3,089 intersections; 351 intersections could not be repaired\n", "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part93.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part93.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,922 of 2,922 features\n", - "[simplify] Repaired 1,643 intersections; 68 intersections could not be repaired\n", + "[simplify] Repaired 2,157 intersections; 35 intersections could not be repaired\n", "[clean] Retained 2,922 of 2,922 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part94.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part94.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 1,855 intersections\n", + "[simplify] Repaired 1,459 intersections; 16 intersections could not be repaired\n", "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part95.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part95.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 6,680 intersections; 388 intersections could not be repaired\n", - "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part96.json\n", + "[simplify] Repaired 37,172 intersections; 7,581 intersections could not be repaired\n", + "[clean] Retained 2,922 of 2,923 features\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part96.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,922 of 2,922 features\n", - "[simplify] Repaired 1,730 intersections; 24 intersections could not be repaired\n", + "[simplify] Repaired 9,139 intersections; 1,597 intersections could not be repaired\n", "[clean] Retained 2,922 of 2,922 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part97.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part97.json\n", + "Allocating 32 GB of heap memory\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[simplify] Repaired 16,085 intersections; 112 intersections could not be repaired\n", + "[clean] Retained 2,923 of 2,923 features\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part98.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 765 intersections; 100 intersections could not be repaired\n", + "[simplify] Repaired 5 intersections\n", "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part98.json\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part99.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 33 intersections\n", + "[simplify] Repaired 3,395 intersections; 20 intersections could not be repaired\n", "[clean] Retained 2,923 of 2,923 features\n", - "[o] Wrote /home/mambauser/data/mpa-terrestrial/processed/tiles/part99.json\n" + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part9.json\n" ] } ], "source": [ - "!mapshaper-xl 32gb -i /home/mambauser/data/mpa-terrestrial/processed/preprocess/parts/*.shp -filter-fields fields=WDPAID,NAME,PA_DEF,GIS_AREA,PARENT_ISO -clean allow-overlaps rewind -simplify dp 30% keep-shapes planar -clean allow-overlaps -o /home/mambauser/data/mpa-terrestrial/processed/tiles force combine-layers format=geojson\n" + "# Simplify the geometries in each part\n", + "!mapshaper-xl 32gb -i /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/preprocess/parts/*.shp -filter-fields fields=WDPAID,NAME,PA_DEF,GIS_AREA,PARENT_ISO -clean allow-overlaps rewind -simplify dp 30% keep-shapes planar -clean allow-overlaps -o /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts format=geojson" ] }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 29, "metadata": {}, "outputs": [ { - "name": "stderr", + "name": "stdout", "output_type": "stream", "text": [ - "For layer 0, using name \"mpaterrestrialtiles\"\n", - "/home/mambauser/data/mpa-terrestrial/processed/tiles/mpa-terrestrial-tiles.json:24380: Found ] at top level\n", - "/home/mambauser/data/mpa-terrestrial/processed/tiles/mpa-terrestrial-tiles.json:12366: Reached EOF without all containers being closed\n", - "In JSON object {\"type\":\"FeatureCollection\",\"features\":[]}\n", - "292259 features, 138020430 bytes of geometry, 6110864 bytes of separate metadata, 14481131 bytes of string pool\n", - "Choosing a maxzoom of -z5 for features about 13778 feet (4200 meters) apart\n", - "Choosing a maxzoom of -z8 for resolution of about 1485 feet (452 meters) within features\n", - "tile 0/0/0 size is 845483 with detail 12, >500000 \n", - "tile 1/0/0 size is 528833 with detail 12, >500000 \n", - "tile 1/1/0 size is 972990 with detail 12, >500000 \n", - "tile 2/1/1 size is 614654 with detail 12, >500000 \n", - "tile 2/2/1 size is 1540068 with detail 12, >500000 \n", - "tile 2/2/1 size is 806964 with detail 11, >500000 \n", - "tile 3/4/2 size is 2272518 with detail 12, >500000 \n", - "tile 3/4/2 size is 1238297 with detail 11, >500000 \n", - "tile 3/4/2 size is 619274 with detail 10, >500000 \n", - "tile 4/4/5 size is 515510 with detail 12, >500000 \n", - "tile 4/9/4 size is 558716 with detail 12, >500000 \n", - "tile 4/9/5 size is 537453 with detail 12, >500000 \n", - "tile 4/8/4 size is 764105 with detail 12, >500000 \n", - "tile 4/8/5 size is 2167008 with detail 12, >500000 \n", - "tile 4/8/5 size is 1253048 with detail 11, >500000 \n", - "tile 4/8/5 size is 686870 with detail 10, >500000 \n", - "tile 5/18/9 size is 506492 with detail 12, >500000 \n", - "tile 5/17/9 size is 676608 with detail 12, >500000 \n", - "tile 5/16/11 size is 820961 with detail 12, >500000 \n", - "tile 5/17/10 size is 815791 with detail 12, >500000 \n", - "tile 5/17/11 size is 644684 with detail 12, >500000 \n", - "tile 5/16/10 size is 1459552 with detail 12, >500000 \n", - "tile 5/16/10 size is 859224 with detail 11, >500000 \n", - "tile 6/34/21 size is 540332 with detail 12, >500000 \n", - "tile 6/33/22 size is 790092 with detail 12, >500000 \n", - "tile 6/33/21 size is 1462170 with detail 12, >500000 \n", - "tile 6/33/21 size is 932291 with detail 11, >500000 \n", - "tile 6/33/21 size is 549289 with detail 10, >500000 \n", - "tile 7/66/42 size is 790837 with detail 12, >500000 \n", - "tile 7/66/42 size is 548367 with detail 11, >500000 \n", - " 99.9% 8/80/87 \n" + "Allocating 32 GB of heap memory\n", + "[info] \n", + "=======================================\n", + "Layer: part\n", + "---------------------------------------\n", + "Type: polygon\n", + "Records: 292,258\n", + "Bounds: -17365351.30054095,-8681816.563254528,17455418.620513726,8381220.576463436\n", + "CRS: [unknown]\n", + "Source: /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part0.json\n", + "\n", + "Attribute data\n", + "------------+--------------------------\n", + " Field | First value\n", + "------------+--------------------------\n", + " GIS_AREA | 218.56922190237\n", + " NAME | 'Laguna de los Pozuelos'\n", + " PA_DEF | '1'\n", + " PARENT_ISO | 'ARG'\n", + " WDPAID | 3\n", + "------------+--------------------------\n", + "\n", + "[o] RFC 7946 warning: non-WGS84 GeoJSON output.\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/mpa-terrestrial_tiles.json\n" ] - }, - { - "data": { - "text/plain": [ - "PosixPath('/home/mambauser/data/mpa-terrestrial/processed/tiles/mpa-terrestrial-tiles.mbtiles')" - ] - }, - "execution_count": 31, - "metadata": {}, - "output_type": "execute_result" } ], "source": [ - "mbtileGeneration(Path(\"/home/mambauser/data/mpa-terrestrial/processed/tiles/mpa-terrestrial-tiles.json\"))" + "# Combine simplified files into one\n", + "!mapshaper-xl 32gb -i /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/*.json combine-files -merge-layers -info -o /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/mpa-terrestrial_tiles.json format=geojson" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 30, "metadata": {}, "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/home/mambauser/data/mpa/processed/mpa_preprocess.zip\n", - "/home/mambauser/data/mpa/processed/preprocess\n" - ] - }, { "name": "stderr", "output_type": "stream", "text": [ - "Allocating 16 GB of heap memory\n", - "[clean] Retained 19,184 of 19,184 features\n", - "[simplify] Repaired 277,407 intersections; 443,678 intersections could not be repaired\n", - "[clean] Retained 19,184 of 19,184 features\n", - "[o] Wrote /home/mambauser/data/mpa/processed/tiles/mpa_tiles.json\n" + "For layer 0, using name \"mpaterrestrial_tiles\"\n", + "/home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/mpa-terrestrial_tiles.json:2292: Reached EOF without all containers being closed: in JSON object {\"type\":\"FeatureCollection\",\"features\":[]}\n", + "/home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/mpa-terrestrial_tiles.json:8054: Found ] at top level: \n", + "292258 features, 145131001 bytes of geometry, 14491838 bytes of string pool\n", + "Choosing a maxzoom of -z6 for features typically 16077 feet (4901 meters) apart, and at least 1742 feet (531 meters) apart\n", + "Choosing a maxzoom of -z8 for resolution of about 1487 feet (453 meters) within features\n", + "tile 0/0/0 size is 858242 with detail 12, >500000 \n", + "tile 1/0/0 size is 531410 with detail 12, >500000 \n", + "tile 1/1/0 size is 997206 with detail 12, >500000 \n", + "tile 2/1/1 size is 617557 with detail 12, >500000 \n", + "tile 2/2/1 size is 1580755 with detail 12, >500000 \n", + "tile 2/2/1 size is 814261 with detail 11, >500000 \n", + "tile 3/4/2 size is 2364270 with detail 12, >500000 \n", + "tile 3/4/2 size is 1269866 with detail 11, >500000 \n", + "tile 3/4/2 size is 626476 with detail 10, >500000 \n", + "tile 4/4/5 size is 518158 with detail 12, >500000 \n", + "tile 4/9/4 size is 574378 with detail 12, >500000 \n", + "tile 4/9/5 size is 542672 with detail 12, >500000 \n", + "tile 4/8/4 size is 769197 with detail 12, >500000 \n", + "tile 4/8/5 size is 2263374 with detail 12, >500000 \n", + "tile 4/8/5 size is 1306184 with detail 11, >500000 \n", + "tile 4/8/5 size is 702654 with detail 10, >500000 \n", + "tile 5/18/9 size is 517344 with detail 12, >500000 \n", + "tile 5/17/9 size is 680231 with detail 12, >500000 \n", + "tile 5/17/11 size is 661616 with detail 12, >500000 \n", + "tile 5/16/11 size is 826739 with detail 12, >500000 \n", + "tile 5/17/10 size is 860937 with detail 12, >500000 \n", + "tile 5/17/10 size is 536487 with detail 11, >500000 \n", + "tile 5/16/10 size is 1497332 with detail 12, >500000 \n", + "tile 5/16/10 size is 888181 with detail 11, >500000 \n", + "tile 6/34/21 size is 554828 with detail 12, >500000 \n", + "tile 6/33/22 size is 793806 with detail 12, >500000 \n", + "tile 6/33/21 size is 1492123 with detail 12, >500000 \n", + "tile 6/33/21 size is 962449 with detail 11, >500000 \n", + "tile 6/33/21 size is 573074 with detail 10, >500000 \n", + "tile 7/66/42 size is 805262 with detail 12, >500000 \n", + "tile 7/66/42 size is 571487 with detail 11, >500000 \n", + " 99.9% 8/139/86 \n" ] }, { "data": { "text/plain": [ - "PosixPath('/home/mambauser/data/mpa/processed/tiles/mpa_tiles.mbtiles')" + "PosixPath('/home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/mpa-terrestrial_tiles.mbtiles')" ] }, + "execution_count": 30, "metadata": {}, - "output_type": "display_data" + "output_type": "execute_result" } ], "source": [ - "pipe = \"mpa-terrestrial\"\n", - "collection_name = \"pas_wdpa\"\n", - "\n", - "source_dir = FileConventionHandler(pipe)\n", - "\n", - "# Download the EEZ file && unzip it\n", - "download_and_unzip_if_needed(source_dir, prev_step, mysettings, \"gpkg\")\n", - "# split the file in two parts\n", - "file = gpd.read_file(source_dir.get_step_fmt_file_path(prev_step, \"shp\").as_posix())\n", - "i_folder = source_dir.get_step_path(prev_step)\n", - "folders = split_2_parts(file, i_folder)\n", - "# simplify the geometries\n", - "Mapshaper(16).input(folders).filter_fields(fields=\"WDPAID,NAME,PA_DEF,GIS_AREA,PARENT_ISO\").clean(\n", - " allow_overlaps=True, rewind=True\n", - ").simplify(\"dp 10% keep-shapes planar\").clean(allow_overlaps=True).merge_layers().output(\n", - " source_dir.get_step_fmt_file_path(current_step, \"json\").as_posix(), force=True, format=\"geojson\"\n", - ").execute()\n", - "\n", - "# generate the mbtiles\n", + "# Generate tiles\n", "mbtileGeneration(source_dir.get_step_fmt_file_path(current_step, \"json\"))" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 31, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "upload: ../../data/mpa/processed/tiles/mpa_tiles.mbtiles to s3://tilestream-tilesets-production/de/_pending/yvng0dxxxru12eq9ye80350mc/skytruth\n" + "upload: data/mpa-terrestrial/processed/tiles/mpa-terrestrial_tiles.mbtiles to s3://tilestream-tilesets-production/da/_pending/ktgzxoiw6pt17hp0ixdp0s1mc/skytruth\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "Linking tileset to Mapbox: 100%|██████████| 100/100 [02:34<00:00, 1.54s/it]\n" + "Linking tileset to Mapbox: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [01:12<00:00, 1.38it/s]\n" ] }, { @@ -1665,11 +1284,13 @@ "True" ] }, + "execution_count": 31, "metadata": {}, - "output_type": "display_data" + "output_type": "execute_result" } ], "source": [ + "# Upload to mapbox\n", "uploadToMapbox(\n", " source_dir.get_step_fmt_file_path(current_step, \"mbtiles\"),\n", " collection_name,\n", @@ -1793,7 +1414,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 43, "metadata": {}, "outputs": [], "source": [ @@ -1801,53 +1422,155 @@ "collection_name = \"protected_seas\"\n", "\n", "pipe_dir = FileConventionHandler(pipe)\n", - "input_file = pipe_dir.get_processed_step_path(prev_step).joinpath(\"ProtectedSeas_20240716.geojson\")\n", + "input_file_moderate = pipe_dir.get_processed_step_path(prev_step).joinpath(\"ProtectedSeas_Navigator_LFP3_20240531.zip\")\n", + "input_file_high = pipe_dir.get_processed_step_path(prev_step).joinpath(\"ProtectedSeas_Navigator_LFP4_5_20240531.zip\")\n", + "\n", + "input_file = pipe_dir.get_processed_step_path(prev_step).joinpath(\"protectedseas.json\")\n", "output_file = pipe_dir.get_processed_step_path(current_step).joinpath(\"protectedseas_tiles.mbtiles\")\n", "\n", + "# Download the protected seas layers from the bucket && unzip it\n", + "writeReadGCP(\n", + " credentials=mysettings.GCS_KEYFILE_JSON,\n", + " bucket_name=mysettings.GCS_BUCKET,\n", + " blob_name=\"ProtectedSeas/ProtectedSeas_Navigator_LFP3_20240531.zip\",\n", + " file=input_file_moderate,\n", + " operation=\"r\",\n", + ")\n", "\n", - "# Download the protected seas file && unzip it\n", "writeReadGCP(\n", " credentials=mysettings.GCS_KEYFILE_JSON,\n", " bucket_name=mysettings.GCS_BUCKET,\n", - " blob_name=\"ProtectedSeas/ProtectedSeas_20240716.geojson\",\n", - " file=input_file,\n", + " blob_name=\"ProtectedSeas/ProtectedSeas_Navigator_LFP4_5_20240531.zip\",\n", + " file=input_file_high,\n", " operation=\"r\",\n", ")\n", "\n", "# Load the data\n", - "protectedseas_layer = gpd.read_file(input_file)" + "protectedseas_layer_mod = gpd.read_file(input_file_moderate).pipe(clean_geometries)\n", + "protectedseas_layer_high = gpd.read_file(input_file_high).pipe(clean_geometries)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [], + "source": [ + "protectedseas_layer_high = protectedseas_layer_high[['SITE_ID', 'SITE_NAME','removal_of','geometry']]\n", + "protectedseas_layer_mod = protectedseas_layer_mod[['SITE_ID', 'SITE_NAME','removal_of','geometry']]" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [], + "source": [ + "protectedseas_layer_high['FPS_cat'] = 'highly'\n", + "protectedseas_layer_mod['FPS_cat'] = 'moderately'" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SITE_IDSITE_NAMEremoval_ofgeometryFPS_cat
0AIAG19Southern No Net Zone3POLYGON ((-61.82426 17.59172, -61.82393 17.591...moderately
\n", + "
" + ], + "text/plain": [ + " SITE_ID SITE_NAME removal_of \\\n", + "0 AIAG19 Southern No Net Zone 3 \n", + "\n", + " geometry FPS_cat \n", + "0 POLYGON ((-61.82426 17.59172, -61.82393 17.591... moderately " + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Merge the two layers\n", + "protectedseas_layer = gpd.GeoDataFrame(\n", + " pd.concat([protectedseas_layer_mod, protectedseas_layer_high], ignore_index=True)\n", + ")\n", + "\n", + "# Save merged layer \n", + "protectedseas_layer.to_file(input_file, driver=\"GeoJSON\")\n", + "\n", + "protectedseas_layer.head(1)" ] }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 47, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "Allocating 16 GB of heap memory\n", + "Allocating 32 GB of heap memory\n", "[clean] Retained 6,741 of 6,741 features\n", - "[simplify] Repaired 5,240 intersections; 629 intersections could not be repaired\n", + "[simplify] Repaired 5,237 intersections; 629 intersections could not be repaired\n", "[clean] Retained 6,741 of 6,741 features\n", - "[o] Wrote /home/mambauser/data/protectedseas/processed/preprocess/ProtectedSeas_20240716.geojson\n" + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/protectedseas/processed/preprocess/protectedseas.json\n" ] }, { "data": { "text/plain": [ - "CompletedProcess(args='mapshaper-xl 16gb -i /home/mambauser/data/protectedseas/processed/preprocess/ProtectedSeas_20240716.geojson -clean allow-overlaps rewind -simplify dp 10% keep-shapes planar -clean allow-overlaps -o /home/mambauser/data/protectedseas/processed/preprocess/ProtectedSeas_20240716.geojson force format=geojson', returncode=0)" + "CompletedProcess(args='mapshaper-xl 32gb -i /home/sofia/dev/skytruth-30x30/data/data/protectedseas/processed/preprocess/protectedseas.json -clean allow-overlaps rewind -simplify dp 10% keep-shapes planar -clean allow-overlaps -o /home/sofia/dev/skytruth-30x30/data/data/protectedseas/processed/preprocess/protectedseas.json force format=geojson', returncode=0)" ] }, - "execution_count": 38, + "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# simplify the geometries\n", - "Mapshaper(16).input([input_file.as_posix()]).clean(\n", + "Mapshaper(32).input([input_file.as_posix()]).clean(\n", " allow_overlaps=True, rewind=True\n", ").simplify(\"dp 10% keep-shapes planar\").clean(allow_overlaps=True).output(\n", " input_file.as_posix(),\n", @@ -1858,50 +1581,66 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 48, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "For layer 0, using name \"protectedseas\"\n", + "/home/sofia/dev/skytruth-30x30/data/data/protectedseas/processed/preprocess/protectedseas.json:462: Found ] at top level: \n", + "/home/sofia/dev/skytruth-30x30/data/data/protectedseas/processed/preprocess/protectedseas.json:270: Reached EOF without all containers being closed: in JSON object {\"type\":\"FeatureCollection\",\"features\":[]}\n", + "6741 features, 9193728 bytes of geometry, 372650 bytes of string pool\n", + "Choosing a maxzoom of -z4 for features typically 73700 feet (22464 meters) apart, and at least 4638 feet (1414 meters) apart\n", + "Choosing a maxzoom of -z9 for resolution of about 755 feet (230 meters) within features\n", + " 99.9% 9/360/253 \n" + ] + }, { "data": { "text/plain": [ - "PosixPath('/home/mambauser/data/protectedseas/processed/tiles/protectedseas_tiles.mbtiles')" + "PosixPath('/home/sofia/dev/skytruth-30x30/data/data/protectedseas/processed/tiles/protectedseas_tiles.mbtiles')" ] }, - "execution_count": 39, + "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ + "# Generate mbtiles\n", "mbtileGeneration(input_file, output_file)" ] }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 49, "metadata": {}, "outputs": [ { - "name": "stderr", + "name": "stdout", "output_type": "stream", "text": [ - "rosetta error: failed to open elf at /lib64/ld-linux-x86-64.so.2\n", - " Trace/breakpoint trap\n" + "upload: data/protectedseas/processed/tiles/protectedseas_tiles.mbtiles to s3://tilestream-tilesets-production/49/_pending/wp5zt2tnhpm1zdx09zzczs1mc/skytruth\n" ] }, { - "ename": "CalledProcessError", - "evalue": "Command 'aws s3 cp /home/mambauser/data/protectedseas/processed/tiles/protectedseas_tiles.mbtiles s3://tilestream-tilesets-production/71/_pending/lxene85xrno1vgk2b6c6350mc/skytruth --region us-east-1' returned non-zero exit status 133.", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mCalledProcessError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[41], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43muploadToMapbox\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 2\u001b[0m \u001b[43m \u001b[49m\u001b[43moutput_file\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mprotected_seas\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m \u001b[49m\u001b[43mmysettings\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mMAPBOX_USER\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[43m \u001b[49m\u001b[43mmysettings\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mMAPBOX_TOKEN\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 6\u001b[0m \u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/src/helpers/mapbox_uploader.py:21\u001b[0m, in \u001b[0;36muploadToMapbox\u001b[0;34m(source, display_name, username, token)\u001b[0m\n\u001b[1;32m 18\u001b[0m tileset_name \u001b[38;5;241m=\u001b[39m source\u001b[38;5;241m.\u001b[39mstem\n\u001b[1;32m 19\u001b[0m mapboxCredentials \u001b[38;5;241m=\u001b[39m getS3Credentials(username, token)\n\u001b[0;32m---> 21\u001b[0m upload_status \u001b[38;5;241m=\u001b[39m \u001b[43muploadToS3\u001b[49m\u001b[43m(\u001b[49m\u001b[43msource\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmapboxCredentials\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 22\u001b[0m logger\u001b[38;5;241m.\u001b[39minfo(upload_status)\n\u001b[1;32m 23\u001b[0m result \u001b[38;5;241m=\u001b[39m linkToMapbox(\n\u001b[1;32m 24\u001b[0m username, token, mapboxCredentials, tileset_name, display_name\n\u001b[1;32m 25\u001b[0m )\n", - "File \u001b[0;32m~/src/helpers/mapbox_uploader.py:51\u001b[0m, in \u001b[0;36muploadToS3\u001b[0;34m(source, credentials)\u001b[0m\n\u001b[1;32m 49\u001b[0m logger\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUploading to S3...\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 50\u001b[0m setS3Credentials(credentials)\n\u001b[0;32m---> 51\u001b[0m status \u001b[38;5;241m=\u001b[39m \u001b[43msubprocess\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 52\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43maws s3 cp \u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43msource\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m s3://\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mcredentials\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mbucket\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m/\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mcredentials\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mkey\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m --region us-east-1\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 53\u001b[0m \u001b[43m \u001b[49m\u001b[43mshell\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 54\u001b[0m \u001b[43m \u001b[49m\u001b[43mcheck\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 55\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 57\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m status\u001b[38;5;241m.\u001b[39mreturncode \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m 58\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUpload to S3 failed with status \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mstatus\u001b[38;5;241m.\u001b[39mreturncode\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n", - "File \u001b[0;32m/opt/conda/lib/python3.12/subprocess.py:571\u001b[0m, in \u001b[0;36mrun\u001b[0;34m(input, capture_output, timeout, check, *popenargs, **kwargs)\u001b[0m\n\u001b[1;32m 569\u001b[0m retcode \u001b[38;5;241m=\u001b[39m process\u001b[38;5;241m.\u001b[39mpoll()\n\u001b[1;32m 570\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m check \u001b[38;5;129;01mand\u001b[39;00m retcode:\n\u001b[0;32m--> 571\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m CalledProcessError(retcode, process\u001b[38;5;241m.\u001b[39margs,\n\u001b[1;32m 572\u001b[0m output\u001b[38;5;241m=\u001b[39mstdout, stderr\u001b[38;5;241m=\u001b[39mstderr)\n\u001b[1;32m 573\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m CompletedProcess(process\u001b[38;5;241m.\u001b[39margs, retcode, stdout, stderr)\n", - "\u001b[0;31mCalledProcessError\u001b[0m: Command 'aws s3 cp /home/mambauser/data/protectedseas/processed/tiles/protectedseas_tiles.mbtiles s3://tilestream-tilesets-production/71/_pending/lxene85xrno1vgk2b6c6350mc/skytruth --region us-east-1' returned non-zero exit status 133." + "name": "stderr", + "output_type": "stream", + "text": [ + "Linking tileset to Mapbox: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [02:35<00:00, 1.55s/it]\n" ] + }, + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ From ab04444dd9843ff3095b4180a9ce01127041b547 Mon Sep 17 00:00:00 2001 From: sofia Date: Mon, 21 Oct 2024 09:42:20 +0200 Subject: [PATCH 12/16] terrestrial data processing --- data/notebooks/habitat2.ipynb | 181 +- data/notebooks/pipes_mock/intermediate.ipynb | 506 +-- data/notebooks/pipes_mock/locations.ipynb | 744 +++- data/notebooks/pipes_mock/precalc_sofia.ipynb | 3594 +++++++++++------ data/notebooks/pipes_mock/tiles.ipynb | 1363 ++++++- 5 files changed, 4443 insertions(+), 1945 deletions(-) diff --git a/data/notebooks/habitat2.ipynb b/data/notebooks/habitat2.ipynb index 8db4eaaa..d65f3f71 100644 --- a/data/notebooks/habitat2.ipynb +++ b/data/notebooks/habitat2.ipynb @@ -29,7 +29,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -40,7 +40,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -64,7 +64,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -77,7 +77,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -89,7 +89,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -102,7 +102,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -115,7 +115,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -130,7 +130,7 @@ " dtype='object')" ] }, - "execution_count": 8, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -143,7 +143,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -166,7 +166,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -211,7 +211,7 @@ "120 UMI 9.38785685362166 9.38785685362166" ] }, - "execution_count": 10, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -222,7 +222,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -235,7 +235,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -275,7 +275,7 @@ "Index: []" ] }, - "execution_count": 12, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -286,7 +286,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ @@ -300,7 +300,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -313,7 +313,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -326,7 +326,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 18, "metadata": {}, "outputs": [ { @@ -384,7 +384,7 @@ "1 AGO 0.000000 3.395671 cold-water corals 2024" ] }, - "execution_count": 16, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -405,7 +405,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -481,7 +481,7 @@ "4 coldwater-corals 4400.140842 15336.975280 GLOB 2024" ] }, - "execution_count": 17, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -497,7 +497,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -573,7 +573,7 @@ "4 cold-water corals 4400.140842 15336.975280 GLOB 2024" ] }, - "execution_count": 18, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -592,7 +592,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 21, "metadata": {}, "outputs": [ { @@ -602,7 +602,7 @@ " 'warm-water corals'], dtype=object)" ] }, - "execution_count": 19, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -622,7 +622,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 23, "metadata": {}, "outputs": [], "source": [ @@ -690,13 +690,6 @@ " 'country_iso_3s': [\n", " \"ARE\", \"BHR\", \"IRQ\", \"JOR\", \"KWT\", \"LBN\", \"OMN\", \"PSE\", \"QAT\", \"SAU\", \"SYR\", \"YEM\"\n", " ]\n", - " },\n", - " {\n", - " 'region_iso': 'AT', # this region is not in the Protected Planet database\n", - " 'region_name': 'Antartica',\n", - " 'country_iso_3s': [\n", - " \"ATA\"\n", - " ]\n", " }\n", "]\n", "\n", @@ -709,7 +702,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 24, "metadata": {}, "outputs": [ { @@ -965,7 +958,7 @@ "23 WA warm-water corals 547.928957 4903.230395 2024" ] }, - "execution_count": 21, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } @@ -984,7 +977,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 25, "metadata": {}, "outputs": [], "source": [ @@ -994,7 +987,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 26, "metadata": {}, "outputs": [ { @@ -1059,7 +1052,7 @@ "1 marine " ] }, - "execution_count": 23, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } @@ -1072,7 +1065,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 27, "metadata": {}, "outputs": [], "source": [ @@ -1088,7 +1081,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 28, "metadata": {}, "outputs": [], "source": [ @@ -1101,7 +1094,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 29, "metadata": {}, "outputs": [], "source": [ @@ -1115,7 +1108,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 30, "metadata": {}, "outputs": [], "source": [ @@ -1126,7 +1119,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 31, "metadata": {}, "outputs": [], "source": [ @@ -1144,7 +1137,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 32, "metadata": {}, "outputs": [], "source": [ @@ -1159,7 +1152,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 33, "metadata": {}, "outputs": [ { @@ -1244,7 +1237,7 @@ "4 26229 ABNJ 309.588492 no POINT (8.88750 84.91307)" ] }, - "execution_count": 30, + "execution_count": 33, "metadata": {}, "output_type": "execute_result" } @@ -1257,7 +1250,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 34, "metadata": {}, "outputs": [ { @@ -1299,7 +1292,7 @@ "Index: []" ] }, - "execution_count": 31, + "execution_count": 34, "metadata": {}, "output_type": "execute_result" } @@ -1319,7 +1312,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 35, "metadata": {}, "outputs": [ { @@ -1368,7 +1361,7 @@ "0 seamounts 2.690810e+07 3.426630e+06 GLOB 2011" ] }, - "execution_count": 32, + "execution_count": 35, "metadata": {}, "output_type": "execute_result" } @@ -1401,7 +1394,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 36, "metadata": {}, "outputs": [], "source": [ @@ -1420,7 +1413,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 37, "metadata": {}, "outputs": [], "source": [ @@ -1433,7 +1426,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 38, "metadata": {}, "outputs": [ { @@ -1575,7 +1568,7 @@ "[92 rows x 5 columns]" ] }, - "execution_count": 35, + "execution_count": 38, "metadata": {}, "output_type": "execute_result" } @@ -1597,7 +1590,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 39, "metadata": {}, "outputs": [ { @@ -1691,7 +1684,7 @@ "5 WA seamounts 2.487428e+03 9.384765e+04 2011" ] }, - "execution_count": 36, + "execution_count": 39, "metadata": {}, "output_type": "execute_result" } @@ -1709,7 +1702,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 40, "metadata": {}, "outputs": [], "source": [ @@ -1719,7 +1712,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 41, "metadata": {}, "outputs": [], "source": [ @@ -1729,7 +1722,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 42, "metadata": {}, "outputs": [], "source": [ @@ -1745,7 +1738,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 43, "metadata": {}, "outputs": [], "source": [ @@ -1754,7 +1747,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 44, "metadata": {}, "outputs": [], "source": [ @@ -1764,7 +1757,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 45, "metadata": {}, "outputs": [], "source": [ @@ -1778,7 +1771,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 46, "metadata": {}, "outputs": [ { @@ -1827,7 +1820,7 @@ "0 mangroves 61287.20375 147358.990971 GLOB 2020" ] }, - "execution_count": 43, + "execution_count": 46, "metadata": {}, "output_type": "execute_result" } @@ -1842,7 +1835,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 47, "metadata": {}, "outputs": [], "source": [ @@ -1852,7 +1845,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 48, "metadata": {}, "outputs": [ { @@ -1946,7 +1939,7 @@ "5 WA mangroves 27.83000 173.620938 2020" ] }, - "execution_count": 45, + "execution_count": 48, "metadata": {}, "output_type": "execute_result" } @@ -1964,7 +1957,7 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 49, "metadata": {}, "outputs": [], "source": [ @@ -1974,7 +1967,7 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 50, "metadata": {}, "outputs": [], "source": [ @@ -1984,7 +1977,7 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 51, "metadata": {}, "outputs": [], "source": [ @@ -2001,7 +1994,7 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 52, "metadata": {}, "outputs": [ { @@ -2168,7 +2161,7 @@ "[614 rows x 6 columns]" ] }, - "execution_count": 49, + "execution_count": 52, "metadata": {}, "output_type": "execute_result" } @@ -2181,7 +2174,7 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 55, "metadata": {}, "outputs": [ { @@ -2257,7 +2250,7 @@ "0 marine " ] }, - "execution_count": 50, + "execution_count": 55, "metadata": {}, "output_type": "execute_result" } @@ -2268,32 +2261,26 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 56, "metadata": {}, "outputs": [], "source": [ - "habitats_all.to_csv(path_out + \"tables/habitats6.csv\", index=False, encoding='utf-8', sep=',', decimal='.')" + "habitats_all.to_csv(path_out + \"tables/habitats6.csv\", index=False, na_rep='NaN', encoding='utf-8', sep=',', decimal='.')" ] }, { - "cell_type": "code", - "execution_count": 52, + "cell_type": "markdown", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'../data/processed/tables/habitats6.csv'" - ] - }, - "execution_count": 52, - "metadata": {}, - "output_type": "execute_result" - } - ], "source": [ - "path_out + \"tables/habitats6.csv\"" + "### Terrestrial habitats" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -2301,18 +2288,6 @@ "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.4" } }, "nbformat": 4, diff --git a/data/notebooks/pipes_mock/intermediate.ipynb b/data/notebooks/pipes_mock/intermediate.ipynb index 6b6000ea..1d699873 100644 --- a/data/notebooks/pipes_mock/intermediate.ipynb +++ b/data/notebooks/pipes_mock/intermediate.ipynb @@ -21,7 +21,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -64,7 +64,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -80,7 +80,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -796,7 +796,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -822,17 +822,9 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'id': 'marine-shp', 'title': 'WDPA_WDOECM_Sep2024_Public_marine_shp', 'url': 'https://d1gam3xoknrgr2.cloudfront.net/current/WDPA_WDOECM_Sep2024_Public_marine_shp.zip', 'hasFailed': False, 'token': 'marine'}\n" - ] - } - ], + "outputs": [], "source": [ "working_folder = FileConventionHandler(pipe)\n", "input_path = working_folder.pipe_raw_path\n", @@ -843,25 +835,25 @@ "zipped_output_file = working_folder.get_step_fmt_file_path(step, \"zip\", True)\n", "remote_path = working_folder.get_remote_path(step)\n", "\n", - "# download data\n", - "r = requests.post(url=mpa_url, data=mpa_body)\n", - "r.raise_for_status()\n", + "# # download data\n", + "# r = requests.post(url=mpa_url, data=mpa_body)\n", + "# r.raise_for_status()\n", "\n", - "download_url = r.json().get(\"url\")\n", - "input_file_name = f'{r.json().get(\"title\")}.zip'\n", - "print(r.json())\n", + "# download_url = r.json().get(\"url\")\n", + "# input_file_name = f'{r.json().get(\"title\")}.zip'\n", + "# print(r.json())\n", "\n", - "input_file = downloadFile(\n", - " url=download_url,\n", - " output_path=input_path,\n", - " overwrite=force_clean,\n", - " file=input_file_name,\n", - ")" + "# input_file = downloadFile(\n", + "# url=download_url,\n", + "# output_path=input_path,\n", + "# overwrite=force_clean,\n", + "# file=input_file_name,\n", + "# )" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -878,7 +870,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -926,7 +918,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -938,7 +930,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -955,27 +947,7 @@ }, { "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'vizzuality_processed_data/mpa/preprocess/mpa_preprocess.zip'" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "remote_path" - ] - }, - { - "cell_type": "code", - "execution_count": 19, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -993,7 +965,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -1004,7 +976,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -1019,12 +991,13 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "working_folder = FileConventionHandler(pipe)\n", - "input_path = working_folder.pipe_raw_path\n", + "# input_path = working_folder.pipe_raw_path\n", + "input_file = working_folder.pipe_raw_path.joinpath(\"WDPA_Sep2024_Public_shp.zip\")\n", "temp_working_path = working_folder.get_temp_file_path(step)\n", "\n", "output_path = working_folder.get_processed_step_path(step)\n", @@ -1035,9 +1008,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'id': 'wdpa-shp', 'title': 'WDPA_Oct2024_Public_shp', 'url': 'https://d1gam3xoknrgr2.cloudfront.net/current/WDPA_Oct2024_Public_shp.zip', 'hasFailed': False, 'token': 'wdpa'}\n" + ] + } + ], "source": [ "# download data\n", "r = requests.post(url=mpa_url, data=mpa_body)\n", @@ -1057,7 +1038,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -1074,7 +1055,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -1123,355 +1104,14 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - " 0%| | 85/292261 [00:00<14:27, 336.92it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 0%|▎ | 661/292261 [00:07<145:23:41, 1.79s/it]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 1%|██ | 4100/292261 [00:10<00:50, 5746.09it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "'Polygon' object has no attribute 'geoms'\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 5%|██████▋ | 13673/292261 [00:12<02:06, 2195.25it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 21%|█████████████████████████████▋ | 60200/292261 [00:20<01:26, 2696.80it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 22%|████████████████████████████████▏ | 65454/292261 [00:22<02:10, 1744.30it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 32%|██████████████████████████████████████████████▋ | 94837/292261 [00:27<00:41, 4733.72it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 34%|█████████████████████████████████████████████████ | 100205/292261 [00:28<00:47, 4031.78it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 36%|███████████████████████████████████████████████████▎ | 104962/292261 [00:29<00:43, 4304.70it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 41%|██████████████████████████████████████████████████████████▎ | 119249/292261 [00:32<00:35, 4855.21it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 46%|█████████████████████████████████████████████████████████████████▋ | 134186/292261 [00:35<00:52, 3035.13it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "'Polygon' object has no attribute 'geoms'\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 51%|████████████████████████████████████████████████████████████████████████▎ | 147855/292261 [00:38<00:42, 3390.00it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 53%|███████████████████████████████████████████████████████████████████████████▋ | 154770/292261 [00:40<00:37, 3677.63it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 54%|████████████████████████████████████████████████████████████████████████████▌ | 156511/292261 [00:40<01:10, 1915.83it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "'Polygon' object has no attribute 'geoms'\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 56%|███████████████████████████████████████████████████████████████████████████████▍ | 162422/292261 [00:42<00:31, 4181.06it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "'Polygon' object has no attribute 'geoms'\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 58%|██████████████████████████████████████████████████████████████████████████████████▌ | 168614/292261 [00:43<00:55, 2234.02it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 68%|█████████████████████████████████████████████████████████████████████████████████████████████████▋ | 199564/292261 [00:49<00:19, 4753.37it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 70%|████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 205065/292261 [00:50<00:27, 3198.44it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 75%|███████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 219986/292261 [00:53<00:17, 4065.50it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 84%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 246638/292261 [00:58<00:10, 4368.04it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "'Polygon' object has no attribute 'geoms'\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 93%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 271598/292261 [01:03<00:06, 3242.58it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 94%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 275485/292261 [01:03<00:04, 3707.50it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 95%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 278285/292261 [01:04<00:02, 6507.33it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 290011/292261 [01:11<00:00, 5892.57it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 292261/292261 [03:13<00:00, 1507.66it/s]\n" + "100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 292261/292261 [03:34<00:00, 1362.25it/s]\n" ] } ], @@ -1481,7 +1121,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ @@ -1494,46 +1134,6 @@ ")" ] }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "PosixPath('/home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/preprocess/mpa-terrestrial_preprocess.gpkg')" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "output_file" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array(['0', '1'], dtype=object)" - ] - }, - "execution_count": 41, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "final_wdpa_terrestrial['MARINE'].unique()" - ] - }, { "cell_type": "code", "execution_count": null, @@ -1553,13 +1153,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ - "# clean unzipped files\n", - "rm_tree(temp_working_path) if temp_working_path.exists() else None\n", - "rm_tree(output_path) if output_path.exists() else None" + "# # clean unzipped files\n", + "# rm_tree(temp_working_path) if temp_working_path.exists() else None\n", + "# rm_tree(output_path) if output_path.exists() else None" ] }, { @@ -2333,18 +1933,6 @@ "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.5" } }, "nbformat": 4, diff --git a/data/notebooks/pipes_mock/locations.ipynb b/data/notebooks/pipes_mock/locations.ipynb index fe6b0b94..d9181a95 100644 --- a/data/notebooks/pipes_mock/locations.ipynb +++ b/data/notebooks/pipes_mock/locations.ipynb @@ -164,6 +164,17 @@ "# blob_name=translations_csv_url,\n", "# file=translations_csv_output,\n", "# operation=\"r\",\n", + "# )\n", + "\n", + "country_commitments_url = \"30x30 National Commitments - MPAtlas Country Targets.csv\"\n", + "country_commitments_output = input_path.joinpath(country_commitments_url.split(\"/\")[-1])\n", + "\n", + "# writeReadGCP(\n", + "# credentials=mysettings.GCS_KEYFILE_JSON,\n", + "# bucket_name=mysettings.GCS_BUCKET,\n", + "# blob_name=country_commitments_url,\n", + "# file=country_commitments_output,\n", + "# operation=\"r\",\n", "# )" ] }, @@ -176,7 +187,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 22, "metadata": {}, "outputs": [], "source": [ @@ -191,14 +202,22 @@ " name: Series[str] = pa.Field(coerce=True)\n", " name_es: Series[str] = pa.Field(coerce=True)\n", " name_fr: Series[str] = pa.Field(coerce=True)\n", - " total_marine_area: Series[float] = pa.Field(ge=0, coerce=True) # noqa: N815\n", - " total_terrestrial_area: Series[float] = pa.Field(ge=0, coerce=True) # noqa: N815\n", + " total_marine_area: Series[int] = pa.Field(ge=0, coerce=True) # noqa: N815\n", + " total_terrestrial_area: Series[int] = pa.Field(ge=0, coerce=True) # noqa: N815\n", " type: Series[str] = pa.Field(\n", " unique_values_eq=[\"country\", \"worldwide\", \"region\", \"highseas\"], coerce=True\n", " )\n", " groups: Series[List[int]] = pa.Field(coerce=True)\n", " marine_bounds: Series[List[float]] = pa.Field(coerce=True, nullable=True)\n", " terrestrial_bounds: Series[List[float]] = pa.Field(coerce=True, nullable=True)\n", + " marine_target: Series[pd.Int64Dtype] = pa.Field(nullable=True, coerce=True)\n", + " marine_target_year: Series[pd.Int64Dtype] = pa.Field(coerce=True, nullable=True)\n", + "\n", + "def round_to_list(bounds):\n", + " return list(np.round(bounds, decimals=5))\n", + "\n", + "def add_bbox(df: gpd.GeoDataFrame, col_name: str = \"bounds\") -> gpd.GeoDataFrame:\n", + " return df.assign(**{col_name: df.geometry.bounds.apply(round_to_list, axis=1)})\n", "\n", "def add_translations(df, translations_csv_path):\n", " translations_df = pd.read_csv(translations_csv_path, keep_default_na=False, na_values=[])\n", @@ -247,6 +266,9 @@ " result.index = result.index + 1\n", " result.index.name = \"id\"\n", "\n", + " # Round AREA_KM2 to integers\n", + " result[\"AREA_KM2\"] = result[\"AREA_KM2\"].round().astype(int)\n", + "\n", " return result.assign(id=result.index)\n", "\n", "def add_groups_and_members_land(df: pd.DataFrame | gpd.GeoDataFrame) -> pd.DataFrame | gpd.GeoDataFrame:\n", @@ -259,17 +281,6 @@ " )\n", " )\n", "\n", - "# def combine_bounds(marine_bounds, land_bounds):\n", - "# # Check if marine bounds are valid\n", - "# if isinstance(marine_bounds, list) and len(marine_bounds) == 4:\n", - "# return marine_bounds\n", - "# # If marine bounds are not valid, check land bounds\n", - "# elif isinstance(land_bounds, list) and len(land_bounds) == 4:\n", - "# return land_bounds\n", - "# # If neither bounds are valid, return an empty list\n", - "# else:\n", - "# return []\n", - "\n", "def combine_columns(df, col1, col2, new_col):\n", " \"\"\"\n", " Combine two columns in a DataFrame using combine_first and assign to a new column.\n", @@ -286,6 +297,7 @@ " df[new_col] = df[col1].combine_first(df[col2])\n", " return df\n", "\n", + "\n", "def add_region_iso_2(\n", " df: pd.DataFrame | gpd.GeoDataFrame, iso_column\n", ") -> pd.DataFrame | gpd.GeoDataFrame:\n", @@ -323,16 +335,283 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 23, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_3524857/673975959.py:35: DeprecationWarning: The 'unary_union' attribute is deprecated, use the 'union_all()' method instead.\n", + "/tmp/ipykernel_3639646/4293776084.py:43: DeprecationWarning: The 'unary_union' attribute is deprecated, use the 'union_all()' method instead.\n", " \"geometry\": gpd.GeoSeries([gpd.GeoSeries(df[\"geometry\"]).unary_union]),\n" ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
total_marine_areaidmarine_boundscodetotal_terrestrial_areaterrestrial_boundstypegroupsnamename_esname_fr
02128813892[-180.0, -76.80012, 180.0, 90.0]ABNJ0NaNhighseas[]Areas Beyond National JurisdictionÁreas fuera de la jurisdicción nacionalZones au-delà de la juridiction nationale
1148780583[-28.84709, -50.31506, 75.85287, 38.80087]AF29993095[-25.3618, -34.83514, 63.50347, 37.55986]region[]AfricaÁfricaAfrique
20168NaNAFG644050[60.50487, 29.36157, 74.89413, 38.49041]country[4]AfghanistanAfganistánAfghanistan
349586610[8.19586, -17.27214, 13.86517, -5.02988]AGO1251701[11.6687, -18.04208, 24.08007, -4.37259]country[3]AngolaAngolaAngola
41216511[18.32149, 39.64039, 20.02083, 42.0112]ALB28690[19.26416, 39.6507, 21.04909, 42.66043]country[6]AlbaniaAlbaniaAlbanie
....................................
206527384166[41.08194, 8.95275, 57.946, 16.64959]YEM453741[41.81458, 12.10819, 54.53542, 19.0]country[9]YemenYemenYémen
2071547576167[13.34802, -50.31506, 42.8475, -26.86206]ZAF1221328[16.45189, -34.83514, 32.89125, -22.12503]country[3]South AfricaSudáfricaAfrique du Sud
2080209NaNZMB753990[21.98004, -18.07918, 33.71244, -8.27198]country[3]ZambiaZambiaZambie
2090210NaNZNC3314[32.602, 35.00272, 34.60792, 35.71208]country[6]Northern CyprusChipre del NorteChypre du Nord
2100211NaNZWE391235[25.23773, -22.41957, 33.05502, -15.60728]country[3]ZimbabweZimbabueZimbabwe
\n", + "

211 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " total_marine_area id marine_bounds code \\\n", + "0 212881389 2 [-180.0, -76.80012, 180.0, 90.0] ABNJ \n", + "1 14878058 3 [-28.84709, -50.31506, 75.85287, 38.80087] AF \n", + "2 0 168 NaN AFG \n", + "3 495866 10 [8.19586, -17.27214, 13.86517, -5.02988] AGO \n", + "4 12165 11 [18.32149, 39.64039, 20.02083, 42.0112] ALB \n", + ".. ... ... ... ... \n", + "206 527384 166 [41.08194, 8.95275, 57.946, 16.64959] YEM \n", + "207 1547576 167 [13.34802, -50.31506, 42.8475, -26.86206] ZAF \n", + "208 0 209 NaN ZMB \n", + "209 0 210 NaN ZNC \n", + "210 0 211 NaN ZWE \n", + "\n", + " total_terrestrial_area terrestrial_bounds \\\n", + "0 0 NaN \n", + "1 29993095 [-25.3618, -34.83514, 63.50347, 37.55986] \n", + "2 644050 [60.50487, 29.36157, 74.89413, 38.49041] \n", + "3 1251701 [11.6687, -18.04208, 24.08007, -4.37259] \n", + "4 28690 [19.26416, 39.6507, 21.04909, 42.66043] \n", + ".. ... ... \n", + "206 453741 [41.81458, 12.10819, 54.53542, 19.0] \n", + "207 1221328 [16.45189, -34.83514, 32.89125, -22.12503] \n", + "208 753990 [21.98004, -18.07918, 33.71244, -8.27198] \n", + "209 3314 [32.602, 35.00272, 34.60792, 35.71208] \n", + "210 391235 [25.23773, -22.41957, 33.05502, -15.60728] \n", + "\n", + " type groups name \\\n", + "0 highseas [] Areas Beyond National Jurisdiction \n", + "1 region [] Africa \n", + "2 country [4] Afghanistan \n", + "3 country [3] Angola \n", + "4 country [6] Albania \n", + ".. ... ... ... \n", + "206 country [9] Yemen \n", + "207 country [3] South Africa \n", + "208 country [3] Zambia \n", + "209 country [6] Northern Cyprus \n", + "210 country [3] Zimbabwe \n", + "\n", + " name_es \\\n", + "0 Áreas fuera de la jurisdicción nacional \n", + "1 África \n", + "2 Afganistán \n", + "3 Angola \n", + "4 Albania \n", + ".. ... \n", + "206 Yemen \n", + "207 Sudáfrica \n", + "208 Zambia \n", + "209 Chipre del Norte \n", + "210 Zimbabue \n", + "\n", + " name_fr \n", + "0 Zones au-delà de la juridiction nationale \n", + "1 Afrique \n", + "2 Afghanistan \n", + "3 Angola \n", + "4 Albanie \n", + ".. ... \n", + "206 Yémen \n", + "207 Afrique du Sud \n", + "208 Zambie \n", + "209 Chypre du Nord \n", + "210 Zimbabwe \n", + "\n", + "[211 rows x 11 columns]" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -423,10 +702,11 @@ " land_col = f\"{base_col}_land\"\n", " combined_locations = combine_columns(combined_locations, marine_col, land_col, base_col)\n", "\n", + "\n", "# Fill NaN values with 0 for each column\n", "columns_to_fill = ['total_marine_area', 'total_terrestrial_area']\n", "for col in columns_to_fill:\n", - " combined_locations[col] = combined_locations[col].fillna(0)\n", + " combined_locations[col] = combined_locations[col].fillna(0).astype(int)\n", "\n", "# Force the id column to be an integer\n", "combined_locations['id'] = combined_locations['id'].astype(int)\n", @@ -439,12 +719,405 @@ ")\n", "combined_locations = combined_locations.reset_index(drop=True)\n", "\n", - "# Force the index to have the values in id column (so they follow the same order in the previous table)\n", + "combined_locations\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# Add the national commitments (only marine for now)\n", + "commit = pd.read_csv(country_commitments_output, header=1)\n", + "commit = commit.iloc[:, :6][commit['30% National Target'] == 'Y']\n", + "commit.drop(columns=[\"% Fully/Highly*\"], inplace=True)\n", + "commit['% National Target'] = commit['% National Target'].str.replace('%', '').astype(int)\n", + "\n", + "# When % National Target is 30, fill By Year with 2030\n", + "commit['By Year'] = commit['By Year'].fillna(commit['% National Target'].apply(lambda x: '2030' if x == 30 else None))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
total_marine_areaidmarine_boundscodetotal_terrestrial_areaterrestrial_boundstypegroupsnamename_esname_frmarine_targetmarine_target_year
index
13610000001[-180.0, -85.5625, 180.0, 90.0]GLOB134954835[-180.0, -90.0, 180.0, 83.65833]worldwide[]GlobalGlobalGlobal302030
22128813892[-180.0, -76.80012, 180.0, 90.0]ABNJ0NaNhighseas[]Areas Beyond National JurisdictionÁreas fuera de la jurisdicción nacionalZones au-delà de la juridiction nationale<NA><NA>
3148780583[-28.84709, -50.31506, 75.85287, 38.80087]AF29993095[-25.3618, -34.83514, 63.50347, 37.55986]region[]AfricaÁfricaAfrique<NA><NA>
4540886874[-180.0, -58.44947, 180.0, 47.73081]AS31625556[-180.0, -55.11694, 180.0, 53.56086]region[]Asia & PacificAsia y PacíficoAsie et Pacifique<NA><NA>
596189785[-180.0, -85.5625, 180.0, -57.18865]AT12088230[-180.0, -90.0, 180.0, -59.59375]region[]AntarcticaAntártidaAntarctique<NA><NA>
..........................................
2070207NaNXCA371055[46.75388, 36.5723, 54.04378, 47.01562]country[4]Caspian SeaMar CaspioMer Caspienne<NA><NA>
2080208NaNXKO10819[19.97939, 41.84826, 21.79305, 43.24613]country[6]KosovoKosovoKosovo<NA><NA>
2090209NaNZMB753990[21.98004, -18.07918, 33.71244, -8.27198]country[3]ZambiaZambiaZambie<NA><NA>
2100210NaNZNC3314[32.602, 35.00272, 34.60792, 35.71208]country[6]Northern CyprusChipre del NorteChypre du Nord<NA><NA>
2110211NaNZWE391235[25.23773, -22.41957, 33.05502, -15.60728]country[3]ZimbabweZimbabueZimbabwe<NA><NA>
\n", + "

211 rows × 13 columns

\n", + "
" + ], + "text/plain": [ + " total_marine_area id marine_bounds \\\n", + "index \n", + "1 361000000 1 [-180.0, -85.5625, 180.0, 90.0] \n", + "2 212881389 2 [-180.0, -76.80012, 180.0, 90.0] \n", + "3 14878058 3 [-28.84709, -50.31506, 75.85287, 38.80087] \n", + "4 54088687 4 [-180.0, -58.44947, 180.0, 47.73081] \n", + "5 9618978 5 [-180.0, -85.5625, 180.0, -57.18865] \n", + "... ... ... ... \n", + "207 0 207 NaN \n", + "208 0 208 NaN \n", + "209 0 209 NaN \n", + "210 0 210 NaN \n", + "211 0 211 NaN \n", + "\n", + " code total_terrestrial_area \\\n", + "index \n", + "1 GLOB 134954835 \n", + "2 ABNJ 0 \n", + "3 AF 29993095 \n", + "4 AS 31625556 \n", + "5 AT 12088230 \n", + "... ... ... \n", + "207 XCA 371055 \n", + "208 XKO 10819 \n", + "209 ZMB 753990 \n", + "210 ZNC 3314 \n", + "211 ZWE 391235 \n", + "\n", + " terrestrial_bounds type groups \\\n", + "index \n", + "1 [-180.0, -90.0, 180.0, 83.65833] worldwide [] \n", + "2 NaN highseas [] \n", + "3 [-25.3618, -34.83514, 63.50347, 37.55986] region [] \n", + "4 [-180.0, -55.11694, 180.0, 53.56086] region [] \n", + "5 [-180.0, -90.0, 180.0, -59.59375] region [] \n", + "... ... ... ... \n", + "207 [46.75388, 36.5723, 54.04378, 47.01562] country [4] \n", + "208 [19.97939, 41.84826, 21.79305, 43.24613] country [6] \n", + "209 [21.98004, -18.07918, 33.71244, -8.27198] country [3] \n", + "210 [32.602, 35.00272, 34.60792, 35.71208] country [6] \n", + "211 [25.23773, -22.41957, 33.05502, -15.60728] country [3] \n", + "\n", + " name \\\n", + "index \n", + "1 Global \n", + "2 Areas Beyond National Jurisdiction \n", + "3 Africa \n", + "4 Asia & Pacific \n", + "5 Antarctica \n", + "... ... \n", + "207 Caspian Sea \n", + "208 Kosovo \n", + "209 Zambia \n", + "210 Northern Cyprus \n", + "211 Zimbabwe \n", + "\n", + " name_es \\\n", + "index \n", + "1 Global \n", + "2 Áreas fuera de la jurisdicción nacional \n", + "3 África \n", + "4 Asia y Pacífico \n", + "5 Antártida \n", + "... ... \n", + "207 Mar Caspio \n", + "208 Kosovo \n", + "209 Zambia \n", + "210 Chipre del Norte \n", + "211 Zimbabue \n", + "\n", + " name_fr marine_target \\\n", + "index \n", + "1 Global 30 \n", + "2 Zones au-delà de la juridiction nationale \n", + "3 Afrique \n", + "4 Asie et Pacifique \n", + "5 Antarctique \n", + "... ... ... \n", + "207 Mer Caspienne \n", + "208 Kosovo \n", + "209 Zambie \n", + "210 Chypre du Nord \n", + "211 Zimbabwe \n", + "\n", + " marine_target_year \n", + "index \n", + "1 2030 \n", + "2 \n", + "3 \n", + "4 \n", + "5 \n", + "... ... \n", + "207 \n", + "208 \n", + "209 \n", + "210 \n", + "211 \n", + "\n", + "[211 rows x 13 columns]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Include the national commitments in the combined_locations table\n", + "combined_locations = combined_locations.merge(commit[['Iso Code', '% National Target', 'By Year']], \n", + " left_on='code', right_on='Iso Code', how='left')\n", + "\n", + "combined_locations.rename(columns={'% National Target': 'marine_target', 'By Year': 'marine_target_year'}, inplace=True)\n", + "\n", + "combined_locations.drop(columns=['Iso Code'], inplace=True)\n", + "\n", + "combined_locations['marine_target'] = combined_locations['marine_target'].astype(pd.Int64Dtype())\n", + "combined_locations['marine_target_year'] = combined_locations['marine_target_year'].astype(pd.Int64Dtype())\n", + "\n", + "combined_locations = combined_locations.reset_index(drop=True)\n", + "\n", + "# Add marine_target and marine_target_year to the combined_locations table for code 'GLOB'\n", + "combined_locations.loc[combined_locations['code'] == 'GLOB', 'marine_target'] = 30\n", + "combined_locations.loc[combined_locations['code'] == 'GLOB', 'marine_target_year'] = 2030 \n", + "\n", + "\n", + "# Force the index to have the values in id column (so they follow the order they had in the previous table)\n", "combined_locations['index'] = combined_locations['id']\n", "combined_locations.set_index('index', inplace=True)\n", "combined_locations.sort_index(inplace=True)\n", "\n", - "# Prepare final JSON output (stored in gadm folder)\n", + "combined_locations" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# Prepare final JSON output\n", "output_locations_combined = {\n", " \"version\": 2,\n", " \"data\": {\n", @@ -458,27 +1131,20 @@ "with open(output_file, \"w\") as f:\n", " json.dump(output_locations_combined, f)\n", "\n", - "del output_locations_combined\n" + "del output_locations_combined" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "## Create locations_code (stored in gadm folder)\n", "(combined_locations[['id', 'code']].rename(columns={'id': 'location'})\n", " .to_csv(pipe_gadm_dir.get_processed_step_path(current_step)\n", - " .joinpath('locations_code_all.csv'), index=False))\n" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ + " .joinpath('locations_code_all.csv'), index=False))\n", + "\n", "## Save locations_code in data_commons/data folder\n", "(combined_locations[['id', 'code']].rename(columns={'id': 'location'})\n", " .to_csv(scripts_dir.joinpath('data_commons/data/locations_code_all.csv'), index=False))" @@ -486,27 +1152,37 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ + "DEBUG:google.auth.transport.requests:Making request: POST https://oauth2.googleapis.com/token\n", "DEBUG:google.auth.transport.requests:Making request: POST https://oauth2.googleapis.com/token\n" ] } ], "source": [ - "# Upload csv to bucket\n", - "remote_path = 'vizzuality_processed_data/strapi_tables/location_code.csv'\n", + "# Upload files to bucket\n", + "remote_path_code = 'vizzuality_processed_data/strapi_tables/location_code.csv'\n", + "remote_path_table = 'vizzuality_processed_data/strapi_tables/locations.json'\n", "\n", "writeReadGCP(\n", " credentials=mysettings.GCS_KEYFILE_JSON,\n", " bucket_name=mysettings.GCS_BUCKET,\n", - " blob_name=remote_path,\n", + " blob_name=remote_path_code,\n", " file=scripts_dir.joinpath('data_commons/data/locations_code_all.csv'),\n", " operation=\"w\",\n", + ")\n", + "\n", + "writeReadGCP(\n", + " credentials=mysettings.GCS_KEYFILE_JSON,\n", + " bucket_name=mysettings.GCS_BUCKET,\n", + " blob_name=remote_path_table,\n", + " file=output_file,\n", + " operation=\"w\",\n", ")" ] } diff --git a/data/notebooks/pipes_mock/precalc_sofia.ipynb b/data/notebooks/pipes_mock/precalc_sofia.ipynb index a846dfef..01cba7b1 100644 --- a/data/notebooks/pipes_mock/precalc_sofia.ipynb +++ b/data/notebooks/pipes_mock/precalc_sofia.ipynb @@ -12,7 +12,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -47,7 +47,7 @@ "from helpers.strapi import Strapi\n", "from helpers.settings import get_settings, Settings\n", "from helpers.file_handler import FileConventionHandler\n", - "from helpers.utils import download_and_unzip_if_needed, writeReadGCP\n", + "from helpers.utils import download_and_unzip_if_needed, writeReadGCP, make_archive\n", "\n", "from pipelines.output_schemas import (\n", " FPLSchema,\n", @@ -104,7 +104,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -113,6 +113,17 @@ "current_step = \"stats\"" ] }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# # Strapi setup\n", + "# strapi = Strapi(url=mysettings.STRAPI_URL)\n", + "# strapi.login(jwt=mysettings.STRAPI_JWT)" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -122,7 +133,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -251,7 +262,7 @@ " latest_years = df.groupby('iso_3')['year'].transform('max')\n", " \n", " # Create the is_last_year column\n", - " df['is_last_year'] = df['year'] == latest_years\n", + " df['is_last_year'] = (df['year'] == latest_years).astype(int)\n", " \n", " return df\n", "\n", @@ -266,11 +277,11 @@ " pd.DataFrame: The DataFrame with the 'environment' column added.\n", " \"\"\"\n", " if 'total_marine_area' in df.columns:\n", - " df['environment'] = 'marine'\n", + " df['environment'] = 1\n", " elif 'total_terrestrial_area' in df.columns:\n", - " df['environment'] = 'terrestrial'\n", + " df['environment'] = 2\n", " else:\n", - " df['environment'] = 'unknown' \n", + " df['environment'] = 0\n", " \n", " return df\n", "\n", @@ -341,16 +352,16 @@ ") -> pd.DataFrame | gpd.GeoDataFrame:\n", " return df.assign(child_id=df[columns].bfill(axis=1)[columns[0]])\n", "\n", - "def calculate_global_area_pa(\n", + "\n", + "def calculate_global_area_tpa(\n", " df: pd.DataFrame,\n", " gby_col: list,\n", - " agg_ops: Dict[str, str] = {\"area\": \"sum\"},\n", + " agg_ops: Dict[str, str] = {\"protected_area\": \"sum\", \"1\": \"sum\", \"0\": \"sum\", \"protected_areas_count\": \"sum\"},\n", " iso_column=\"iso_3\",\n", ") -> pd.DataFrame:\n", - " global_area = df.groupby([*gby_col]).agg(agg_ops).reset_index().assign(**{iso_column: \"GLOB\"})\n", + " global_area = df.groupby(gby_col).agg(agg_ops).reset_index().assign(**{iso_column: \"GLOB\"})\n", " return pd.concat([global_area, df], ignore_index=True)\n", "\n", - "\n", "def cumulative_pa_def_counts(df: pd.DataFrame, year_col: str = \"STATUS_YR\", pa_def_col: str = \"PA_DEF\", iso_col: str = \"iso_3\", start_year: int = 2010) -> pd.DataFrame:\n", " \"\"\"\n", " Calculate the cumulative number of PA_DEF values for each iso_3 and each year starting from a given year.\n", @@ -385,6 +396,23 @@ "\n", " return final_results\n", "\n", + "def calculate_global_area_tpa(\n", + " df: pd.DataFrame,\n", + " gby_col: list,\n", + " agg_ops: Dict[str, str] = {\"protected_area\": \"sum\", \"1\": \"sum\", \"0\": \"sum\", \"protected_areas_count\": \"sum\"},\n", + " iso_column=\"iso_3\",\n", + ") -> pd.DataFrame:\n", + " # Ensure the columns to be aggregated exist in the DataFrame\n", + " missing_cols = [col for col in agg_ops.keys() if col not in df.columns]\n", + " if missing_cols:\n", + " raise ValueError(f\"Missing columns in DataFrame: {missing_cols}\")\n", + " \n", + " # Group by the specified columns and aggregate using the provided operations\n", + " global_area = df.groupby(gby_col).agg(agg_ops).reset_index().assign(**{iso_column: \"GLOB\"})\n", + " \n", + " # Concatenate the global area DataFrame with the original DataFrame\n", + " return pd.concat([global_area, df], ignore_index=True)\n", + "\n", "def calculate_stats_pa(\n", " df: pd.DataFrame, gby_col: list, iso_column: str, ops: dict[str, str] = {\"protected_area\": \"sum\"}\n", ") -> pd.DataFrame:\n", @@ -419,6 +447,38 @@ "\n", " return df.assign(region=lambda row: row[iso_column].apply(find_region_iso))\n", "\n", + "def define_childs_ids(group) -> tuple:\n", + " if len(group) > 1:\n", + " parent_id = group[group.is_child.eq(False)].index.values[0]\n", + " children_ids = group[group.is_child.eq(True)].index.tolist()\n", + " return parent_id, children_ids\n", + " else:\n", + " return pd.NA, pd.NA\n", + "\n", + "def add_child_parent_relationship(\n", + " df: pd.DataFrame | gpd.GeoDataFrame,\n", + " gby: str = \"wdpaid\",\n", + " cols: list = [\"wdpaid\", \"wdpa_pid\", \"is_child\", \"data_source\"],\n", + ") -> pd.DataFrame | gpd.GeoDataFrame:\n", + " \n", + " # Get parent and children IDs for each group\n", + " groups = df.groupby(gby)[cols].apply(define_childs_ids)\n", + " \n", + " # Extract parent and children information\n", + " relationship_df = pd.DataFrame(\n", + " [[a, b] for a, b in groups.values], \n", + " columns=[\"parent\", \"children\"]\n", + " ).dropna(subset=[\"parent\"]).set_index(\"parent\")\n", + " \n", + " # Assign children IDs to the 'children' column\n", + " df[\"children\"] = pd.Series(relationship_df[\"children\"], index=relationship_df.index).reindex(df.index)\n", + " \n", + " # Assign parent IDs to the 'parent' column for the children\n", + " df[\"parent\"] = pd.NA \n", + " for parent, children in relationship_df.itertuples(index=True):\n", + " df.loc[children, \"parent\"] = parent\n", + " \n", + " return df\n", "\n", "\n", "class NewProtectedAreaExtentSchema(pa.DataFrameModel):\n", @@ -431,8 +491,8 @@ " coverage: Series[float] = pa.Field(ge=0, le=100, coerce=True)\n", " global_contribution: Series[float] = pa.Field(ge=0, le=100, coerce=True)\n", " year: Series[int] = pa.Field(ge=2000, coerce=True)\n", - " is_last_year: Series[bool] = pa.Field(coerce=True)\n", - " environment: Series[str] = pa.Field(isin=[\"marine\", \"terrestrial\"], coerce=True)\n", + " is_last_year: Series[int] = pa.Field(isin=[0, 1], coerce=True)\n", + " environment: Series[int] = pa.Field(isin=[1, 2], coerce=True)\n", "\n", "class NewProtectionLevelSchema(pa.DataFrameModel):\n", " id: Index[int] = pa.Field(gt=0, coerce=True)\n", @@ -445,7 +505,27 @@ "class PAsSchema(pa.DataFrameModel):\n", " id: Index[int] = pa.Field(gt=0, coerce=True)\n", " wdpaid: Series[pd.Int64Dtype] = pa.Field(coerce=True, nullable=True)\n", - " child_id: Series[str] = pa.Field(coerce=True)\n", + " # child_id: Series[str] = pa.Field(coerce=True)\n", + " name: Series[str] = pa.Field(coerce=True)\n", + " year: Series[pd.Int32Dtype] = pa.Field(gt=1700, nullable=True)\n", + " area: Series[float] = pa.Field(ge=0, coerce=True)\n", + " bbox: Series[List[float]] = pa.Field(coerce=True)\n", + " location: Series[int] = pa.Field(ge=0, coerce=True)\n", + " protection_status: Series[int] = pa.Field(ge=0, nullable=True)\n", + " mpaa_establishment_stage: Series[pd.Int32Dtype] = pa.Field(ge=0, nullable=True, coerce=True)\n", + " mpaa_protection_level: Series[pd.Int32Dtype] = pa.Field(ge=0, nullable=True, coerce=True)\n", + " iucn_category: Series[pd.Int32Dtype] = pa.Field(coerce=True, nullable=True)\n", + " designation: Series[str] = pa.Field(coerce=True, nullable=True)\n", + " parent: Series[pd.Int64Dtype] = pa.Field(coerce=True, nullable=True)\n", + " children: Series[List[int]] = pa.Field(coerce=True, nullable=True)\n", + " data_source: Series[int] = pa.Field(coerce=True)\n", + " coverage: Series[float] = pa.Field(ge=0, le=100, nullable=True)\n", + " environment: Series[int] = pa.Field(isin=[1, 2], coerce=True)\n", + "\n", + "class PAsSchemaChunk1(pa.DataFrameModel):\n", + " id: Index[int] = pa.Field(gt=0, coerce=True)\n", + " wdpaid: Series[pd.Int64Dtype] = pa.Field(coerce=True, nullable=True)\n", + " # child_id: Series[str] = pa.Field(coerce=True)\n", " name: Series[str] = pa.Field(coerce=True)\n", " year: Series[pd.Int32Dtype] = pa.Field(gt=1700, nullable=True)\n", " area: Series[float] = pa.Field(ge=0, coerce=True)\n", @@ -454,14 +534,16 @@ " protection_status: Series[int] = pa.Field(ge=0, nullable=True)\n", " mpaa_establishment_stage: Series[pd.Int32Dtype] = pa.Field(ge=0, nullable=True, coerce=True)\n", " mpaa_protection_level: Series[pd.Int32Dtype] = pa.Field(ge=0, nullable=True, coerce=True)\n", - " pa_iucn_category: Series[pd.Int32Dtype] = pa.Field(coerce=True, nullable=True)\n", + " iucn_category: Series[pd.Int32Dtype] = pa.Field(coerce=True, nullable=True)\n", " designation: Series[str] = pa.Field(coerce=True, nullable=True)\n", - " is_child: Series[bool] = pa.Field(coerce=True)\n", " children: Series[List[int]] = pa.Field(coerce=True, nullable=True)\n", " data_source: Series[int] = pa.Field(coerce=True)\n", " coverage: Series[float] = pa.Field(ge=0, le=100, nullable=True)\n", - " environment: Series[str] = pa.Field(isin=[\"marine\", \"terrestrial\"], coerce=True)\n", - "\n" + " environment: Series[int] = pa.Field(isin=[1, 2], coerce=True)\n", + "\n", + "class PAsSchemaChunk2(pa.DataFrameModel):\n", + " id: Index[int] = pa.Field(gt=0, coerce=True)\n", + " parent: Series[pd.Int64Dtype] = pa.Field(coerce=True, nullable=True)" ] }, { @@ -477,192 +559,192 @@ "metadata": {}, "outputs": [], "source": [ - "# Code for pa terrestrial processing\n", - "\n", - "def split_by_year(\n", - " gdf: gpd.GeoDataFrame, year_col: str = \"STATUS_YR\", year_val: int = 2010\n", - ") -> List[gpd.GeoDataFrame]:\n", - " \"\"\"Split data by year. relevant for MPA data.(coverage indicator)\"\"\"\n", - " prior_2010 = (\n", - " gdf[gdf[year_col] <= year_val][[\"iso_3\", \"STATUS_YR\", \"geometry\"]]\n", - " .dissolve(\n", - " by=[\"iso_3\"],\n", - " )\n", - " .assign(year=2010)\n", - " .reset_index()\n", - " )\n", + "# # Code for pa terrestrial processing\n", + "\n", + "# def split_by_year(\n", + "# gdf: gpd.GeoDataFrame, year_col: str = \"STATUS_YR\", year_val: int = 2010\n", + "# ) -> List[gpd.GeoDataFrame]:\n", + "# \"\"\"Split data by year. relevant for MPA data.(coverage indicator)\"\"\"\n", + "# prior_2010 = (\n", + "# gdf[gdf[year_col] <= year_val][[\"iso_3\", \"STATUS_YR\", \"geometry\"]]\n", + "# .dissolve(\n", + "# by=[\"iso_3\"],\n", + "# )\n", + "# .assign(year=2010)\n", + "# .reset_index()\n", + "# )\n", "\n", - " after_2010 = (\n", - " gdf[gdf[\"STATUS_YR\"] > 2010][[\"iso_3\", \"STATUS_YR\", \"geometry\"]]\n", - " .rename(columns={\"STATUS_YR\": \"year\"})\n", - " )\n", - " return [prior_2010, after_2010]\n", + "# after_2010 = (\n", + "# gdf[gdf[\"STATUS_YR\"] > 2010][[\"iso_3\", \"STATUS_YR\", \"geometry\"]]\n", + "# .rename(columns={\"STATUS_YR\": \"year\"})\n", + "# )\n", + "# return [prior_2010, after_2010]\n", "\n", "\n", - "def create_grid(bounds: Tuple[float, float, float, float], cell_size: int = 1) -> gpd.GeoDataFrame:\n", - " \"\"\"Create a grid of cells for a given GeoDataFrame\"\"\"\n", - " minx, miny, maxx, maxy = bounds\n", - " x = np.arange(minx, maxx, cell_size)\n", - " y = np.arange(miny, maxy, cell_size)\n", - " polygons = [\n", - " {\n", - " \"geometry\": box(i, j, i + cell_size, j + cell_size),\n", - " \"cell_id\": f\"{i}_{j}\",\n", - " }\n", - " for i, j in product(x, y)\n", - " ]\n", - " return gpd.GeoDataFrame(polygons)\n", - "\n", - "\n", - "def subdivide_grid(\n", - " grid_gdf: gpd.GeoDataFrame, gdf: gpd.GeoDataFrame, max_cellsize: float, max_complexity: int\n", - ") -> List:\n", - " subdivided_elements = []\n", - " for grid_element in grid_gdf.geometry:\n", - " candidates = get_matches(grid_element, gdf)\n", - " density = len(candidates)\n", - " if density > max_complexity:\n", + "# def create_grid(bounds: Tuple[float, float, float, float], cell_size: int = 1) -> gpd.GeoDataFrame:\n", + "# \"\"\"Create a grid of cells for a given GeoDataFrame\"\"\"\n", + "# minx, miny, maxx, maxy = bounds\n", + "# x = np.arange(minx, maxx, cell_size)\n", + "# y = np.arange(miny, maxy, cell_size)\n", + "# polygons = [\n", + "# {\n", + "# \"geometry\": box(i, j, i + cell_size, j + cell_size),\n", + "# \"cell_id\": f\"{i}_{j}\",\n", + "# }\n", + "# for i, j in product(x, y)\n", + "# ]\n", + "# return gpd.GeoDataFrame(polygons)\n", + "\n", + "\n", + "# def subdivide_grid(\n", + "# grid_gdf: gpd.GeoDataFrame, gdf: gpd.GeoDataFrame, max_cellsize: float, max_complexity: int\n", + "# ) -> List:\n", + "# subdivided_elements = []\n", + "# for grid_element in grid_gdf.geometry:\n", + "# candidates = get_matches(grid_element, gdf)\n", + "# density = len(candidates)\n", + "# if density > max_complexity:\n", " \n", - " subdivision_cellsize = max_cellsize / 2\n", - " # Subdivide the grid element recursively\n", - " subgrid = create_grid(grid_element.bounds, subdivision_cellsize)\n", - " subdivided_elements.extend(\n", - " subdivide_grid(subgrid, gdf, subdivision_cellsize, max_complexity)\n", - " )\n", - " elif density > 0:\n", - " subdivided_elements.append(grid_element)\n", - "\n", - " return subdivided_elements\n", - "\n", + "# subdivision_cellsize = max_cellsize / 2\n", + "# # Subdivide the grid element recursively\n", + "# subgrid = create_grid(grid_element.bounds, subdivision_cellsize)\n", + "# subdivided_elements.extend(\n", + "# subdivide_grid(subgrid, gdf, subdivision_cellsize, max_complexity)\n", + "# )\n", + "# elif density > 0:\n", + "# subdivided_elements.append(grid_element)\n", "\n", - "def create_density_based_grid(\n", - " gdf: gpd.GeoDataFrame, max_cellsize: int = 10, max_complexity: int = 10000\n", - ") -> gpd.GeoDataFrame:\n", - " # Get the bounds of the GeoDataFrame\n", - " minx, miny, maxx, maxy = gdf.total_bounds\n", + "# return subdivided_elements\n", "\n", - " # Create an initial grid\n", - " grid_gdf = create_grid((minx, miny, maxx, maxy), max_cellsize)\n", "\n", - " # Subdivide grid elements based on density and complexity\n", - " subdivided_elements = subdivide_grid(grid_gdf, gdf, max_cellsize, max_complexity)\n", + "# def create_density_based_grid(\n", + "# gdf: gpd.GeoDataFrame, max_cellsize: int = 10, max_complexity: int = 10000\n", + "# ) -> gpd.GeoDataFrame:\n", + "# # Get the bounds of the GeoDataFrame\n", + "# minx, miny, maxx, maxy = gdf.total_bounds\n", "\n", - " return gpd.GeoDataFrame(geometry=subdivided_elements)\n", + "# # Create an initial grid\n", + "# grid_gdf = create_grid((minx, miny, maxx, maxy), max_cellsize)\n", "\n", + "# # Subdivide grid elements based on density and complexity\n", + "# subdivided_elements = subdivide_grid(grid_gdf, gdf, max_cellsize, max_complexity)\n", "\n", - "# TODO: refactor this so old function mantains functionality for marine areas\n", + "# return gpd.GeoDataFrame(geometry=subdivided_elements)\n", "\n", - "def split_gdf_by_grid(gdf: gpd.GeoDataFrame, grid_gdf: gpd.GeoDataFrame):\n", - " result = []\n", - " gdf[\"already_processed\"] = False\n", - " for geometry in grid_gdf.geometry:\n", - " candidates = get_matches(geometry, gdf)\n", - " subset = gdf.loc[candidates.index][~gdf[\"already_processed\"]]\n", - " gdf.loc[subset.index, \"already_processed\"] = True\n", - " if not subset.empty:\n", - " result.append(subset.drop(columns=[\"already_processed\"]).reset_index(drop=True).copy())\n", - " return result\n", "\n", + "# # TODO: refactor this so old function mantains functionality for marine areas\n", "\n", - "@background\n", - "def spatial_join_chunk(df_large_chunk, df_small, pbar):\n", - " try:\n", - " bbox = df_large_chunk.total_bounds\n", - "\n", - " candidates = get_matches(box(*bbox), df_small.geometry)\n", - " if len(candidates) > 0:\n", - " subset = df_small.loc[candidates.index].clip(box(*bbox))\n", - "\n", - " result = (\n", - " gpd.overlay(df_large_chunk, subset).reset_index(drop=True)\n", - " .clip(subset.geometry)\n", - " .reset_index(drop=True)\n", - " )\n", - " result.geometry = result.geometry.apply(repair_geometry)\n", - " else:\n", - " result = gpd.GeoDataFrame(columns=df_large_chunk.columns)\n", - " return result\n", - " except Exception as e:\n", - " logging.error(e)\n", - " return gpd.GeoDataFrame()\n", - " finally:\n", - " pbar.update(1)\n", - "\n", + "# def split_gdf_by_grid(gdf: gpd.GeoDataFrame, grid_gdf: gpd.GeoDataFrame):\n", + "# result = []\n", + "# gdf[\"already_processed\"] = False\n", + "# for geometry in grid_gdf.geometry:\n", + "# candidates = get_matches(geometry, gdf)\n", + "# subset = gdf.loc[candidates.index][~gdf[\"already_processed\"]]\n", + "# gdf.loc[subset.index, \"already_processed\"] = True\n", + "# if not subset.empty:\n", + "# result.append(subset.drop(columns=[\"already_processed\"]).reset_index(drop=True).copy())\n", + "# return result\n", "\n", - "async def spatial_join(\n", - " geodataframe_a: gpd.GeoDataFrame, geodataframe_b: gpd.GeoDataFrame\n", - ") -> gpd.GeoDataFrame:\n", - " \"\"\"Create spatial join between two GeoDataFrames.\"\"\"\n", - " # we build the spatial index for the larger GeoDataFrame\n", - " smaller_dim, larger_dim = arrange_dimensions(geodataframe_a, geodataframe_b)\n", "\n", - " logger.info(f\"Processing {len(larger_dim)} elements\")\n", + "# @background\n", + "# def spatial_join_chunk(df_large_chunk, df_small, pbar):\n", + "# try:\n", + "# bbox = df_large_chunk.total_bounds\n", "\n", - " grid = create_density_based_grid(larger_dim, max_cellsize=10, max_complexity=5000)\n", + "# candidates = get_matches(box(*bbox), df_small.geometry)\n", + "# if len(candidates) > 0:\n", + "# subset = df_small.loc[candidates.index].clip(box(*bbox))\n", "\n", - " logger.info(f\"grid created with {len(grid)} cells\")\n", + "# result = (\n", + "# gpd.overlay(df_large_chunk, subset).reset_index(drop=True)\n", + "# .clip(subset.geometry)\n", + "# .reset_index(drop=True)\n", + "# )\n", + "# result.geometry = result.geometry.apply(repair_geometry)\n", + "# else:\n", + "# result = gpd.GeoDataFrame(columns=df_large_chunk.columns)\n", + "# return result\n", + "# except Exception as e:\n", + "# logging.error(e)\n", + "# return gpd.GeoDataFrame()\n", + "# finally:\n", + "# pbar.update(1)\n", "\n", - " list_of_chunks = split_gdf_by_grid(larger_dim, grid)\n", "\n", - " logger.info(f\"grid split into {len(list_of_chunks)} chunks\")\n", + "# async def spatial_join(\n", + "# geodataframe_a: gpd.GeoDataFrame, geodataframe_b: gpd.GeoDataFrame\n", + "# ) -> gpd.GeoDataFrame:\n", + "# \"\"\"Create spatial join between two GeoDataFrames.\"\"\"\n", + "# # we build the spatial index for the larger GeoDataFrame\n", + "# smaller_dim, larger_dim = arrange_dimensions(geodataframe_a, geodataframe_b)\n", "\n", - " with tqdm(total=len(list_of_chunks)) as pbar: # we create a progress bar\n", - " new_df = await asyncio.gather(\n", - " *(spatial_join_chunk(chunk, smaller_dim, pbar) for chunk in list_of_chunks)\n", - " )\n", + "# logger.info(f\"Processing {len(larger_dim)} elements\")\n", "\n", - " return gpd.GeoDataFrame(pd.concat(new_df, ignore_index=True), crs=smaller_dim.crs)\n", + "# grid = create_density_based_grid(larger_dim, max_cellsize=10, max_complexity=5000)\n", "\n", + "# logger.info(f\"grid created with {len(grid)} cells\")\n", "\n", - "@background\n", - "def spatial_dissolve_chunk(geometry, gdf, pbar):\n", - " try:\n", - " logger.info(\"Processing chunk\")\n", - " candidates = get_matches(\n", - " geometry,\n", - " gdf.geometry,\n", - " )\n", - " subset = gdf.loc[candidates.index]\n", + "# list_of_chunks = split_gdf_by_grid(larger_dim, grid)\n", "\n", - " result = pd.concat(\n", - " subset.clip(geometry).pipe(split_by_year, year_col=\"STATUS_YR\"), ignore_index=True\n", - " ).copy()\n", + "# logger.info(f\"grid split into {len(list_of_chunks)} chunks\")\n", "\n", - " data_chunk = [\n", - " (\n", - " result[result[\"year\"] <= 2010]\n", - " .reset_index()\n", - " .pipe(calculate_area, \"area\", None)\n", - " .drop(columns=[\"geometry\"])\n", - " )\n", - " ]\n", - " for year in range(2011, 2025):\n", - " data_chunk.append(\n", - " result[result[\"year\"] <= year]\n", - " .dissolve(\n", - " by=[\"iso_3\"],\n", - " )\n", - " .assign(year=year)\n", - " .reset_index()\n", - " .pipe(calculate_area, \"area\", None)\n", - " .drop(columns=[\"geometry\"])\n", - " )\n", + "# with tqdm(total=len(list_of_chunks)) as pbar: # we create a progress bar\n", + "# new_df = await asyncio.gather(\n", + "# *(spatial_join_chunk(chunk, smaller_dim, pbar) for chunk in list_of_chunks)\n", + "# )\n", "\n", - " return pd.concat(data_chunk, ignore_index=True)\n", - " except Exception as e:\n", - " logging.error(e)\n", - " return gpd.GeoDataFrame()\n", - " finally:\n", - " pbar.update(1)\n", + "# return gpd.GeoDataFrame(pd.concat(new_df, ignore_index=True), crs=smaller_dim.crs)\n", "\n", - "async def process_grid(gdf):\n", - " grid_gdf = create_density_based_grid(gdf, max_cellsize=10, max_complexity=5000)\n", - " logger.info(f\"grid created with {grid_gdf.shape[0]} cells\")\n", "\n", - " with tqdm(total=grid_gdf.shape[0], desc=\"Processing grid elements\") as pbar:\n", - " jobs = [spatial_dissolve_chunk(geometry, gdf, pbar) for geometry in grid_gdf.geometry.values]\n", - " result = await asyncio.gather(*jobs)\n", - " return result" + "# @background\n", + "# def spatial_dissolve_chunk(geometry, gdf, pbar):\n", + "# try:\n", + "# logger.info(\"Processing chunk\")\n", + "# candidates = get_matches(\n", + "# geometry,\n", + "# gdf.geometry,\n", + "# )\n", + "# subset = gdf.loc[candidates.index]\n", + "\n", + "# result = pd.concat(\n", + "# subset.clip(geometry).pipe(split_by_year, year_col=\"STATUS_YR\"), ignore_index=True\n", + "# ).copy()\n", + "\n", + "# data_chunk = [\n", + "# (\n", + "# result[result[\"year\"] <= 2010]\n", + "# .reset_index()\n", + "# .pipe(calculate_area, \"area\", None)\n", + "# .drop(columns=[\"geometry\"])\n", + "# )\n", + "# ]\n", + "# for year in range(2011, 2025):\n", + "# data_chunk.append(\n", + "# result[result[\"year\"] <= year]\n", + "# .dissolve(\n", + "# by=[\"iso_3\"],\n", + "# )\n", + "# .assign(year=year)\n", + "# .reset_index()\n", + "# .pipe(calculate_area, \"area\", None)\n", + "# .drop(columns=[\"geometry\"])\n", + "# )\n", + "\n", + "# return pd.concat(data_chunk, ignore_index=True)\n", + "# except Exception as e:\n", + "# logging.error(e)\n", + "# return gpd.GeoDataFrame()\n", + "# finally:\n", + "# pbar.update(1)\n", + "\n", + "# async def process_grid(gdf):\n", + "# grid_gdf = create_density_based_grid(gdf, max_cellsize=10, max_complexity=5000)\n", + "# logger.info(f\"grid created with {grid_gdf.shape[0]} cells\")\n", + "\n", + "# with tqdm(total=grid_gdf.shape[0], desc=\"Processing grid elements\") as pbar:\n", + "# jobs = [spatial_dissolve_chunk(geometry, gdf, pbar) for geometry in grid_gdf.geometry.values]\n", + "# result = await asyncio.gather(*jobs)\n", + "# return result" ] }, { @@ -689,7 +771,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 130, "metadata": {}, "outputs": [ { @@ -710,7 +792,7 @@ "pipe_dir_eez = FileConventionHandler(\"eez\")\n", "pipe_dir_mpas = FileConventionHandler(pipe)\n", "output_file = pipe_dir_mpas.get_processed_step_path(current_step).joinpath(\n", - " \"mpa_landmask_strapi.csv\"\n", + " \"mpa_coverage.csv\"\n", ")\n", "\n", "# Download the EEZ file && unzip it\n", @@ -725,14 +807,14 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 131, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 282/282 [08:15<00:00, 1.76s/it]\n" + "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 282/282 [08:11<00:00, 1.74s/it]\n" ] } ], @@ -742,141 +824,14 @@ }, { "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
WDPAIDWDPA_PIDPA_DEFNAMEDESIG_ENGIUCN_CATSTATUSSTATUS_YRPARENT_ISOGIS_M_AREAgeometryindex_rightGEONAMEMRGIDAREA_KM2POL_TYPEISO_SOV1ISO_SOV2ISO_SOV3
0555624810.0555624810_D1Ross Sea Region Marine Protected AreaMarine Protected Area (CCAMLR)Not ReportedDesignated2017.0ABNJ326507.190744POLYGON ((150 -62.5, 150.90909 -62.5, 151.8181...0.0High Seas63203.0212881389.0High SeasABNJNoneNone
\n", - "
" - ], - "text/plain": [ - " WDPAID WDPA_PID PA_DEF NAME \\\n", - "0 555624810.0 555624810_D 1 Ross Sea Region Marine Protected Area \n", - "\n", - " DESIG_ENG IUCN_CAT STATUS STATUS_YR \\\n", - "0 Marine Protected Area (CCAMLR) Not Reported Designated 2017.0 \n", - "\n", - " PARENT_ISO GIS_M_AREA \\\n", - "0 ABNJ 326507.190744 \n", - "\n", - " geometry index_right GEONAME \\\n", - "0 POLYGON ((150 -62.5, 150.90909 -62.5, 151.8181... 0.0 High Seas \n", - "\n", - " MRGID AREA_KM2 POL_TYPE ISO_SOV1 ISO_SOV2 ISO_SOV3 \n", - "0 63203.0 212881389.0 High Seas ABNJ None None " - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "eez_mpas_data_join.head(1)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:pyogrio._io:Created 17,697 records\n" - ] - } - ], - "source": [ - "# # To get an idea of the spatial join results\n", - "# eez_mpas_data_join.pipe(add_location_iso).pipe(assign_iso3).to_file(\n", - "# pipe_dir_mpas.get_processed_step_path(current_step).joinpath(\"mpas_sjoin.shp\"), driver=\"ESRI Shapefile\"\n", - "# )" - ] - }, - { - "cell_type": "code", - "execution_count": 23, + "execution_count": 132, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 14/14 [03:23<00:00, 14.54s/it]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 14/14 [03:23<00:00, 4.58s/it]" + "100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 14/14 [03:22<00:00, 14.49s/it]\n" ] } ], @@ -891,7 +846,17 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 133, + "metadata": {}, + "outputs": [], + "source": [ + "# save final data\n", + "final_data.to_csv(pipe_dir_mpas.get_processed_step_path(prev_step).joinpath(\"mpa_preprocessed.csv\"), index=False)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 134, "metadata": {}, "outputs": [ { @@ -942,8 +907,8 @@ " 996236.13\n", " 0.467977\n", " 0.275966\n", - " False\n", - " marine\n", + " 0\n", + " 1\n", " \n", " \n", " 1\n", @@ -957,8 +922,8 @@ " 129790.94\n", " 0.872365\n", " 0.035953\n", - " False\n", - " marine\n", + " 0\n", + " 1\n", " \n", " \n", "\n", @@ -973,19 +938,21 @@ "0 212881389.0 996236.13 0.467977 0.275966 \n", "1 14878058.0 129790.94 0.872365 0.035953 \n", "\n", - " is_last_year environment \n", - "0 False marine \n", - "1 False marine " + " is_last_year environment \n", + "0 0 1 \n", + "1 0 1 " ] }, - "execution_count": 24, + "execution_count": 134, "metadata": {}, "output_type": "execute_result" } ], "source": [ + "final_data2 = final_data.copy()\n", + "\n", "coverage = (\n", - " final_data.pipe(calculate_global_area, [\"year\", \"PA_DEF\"], {\"area\": \"sum\"}, \"iso_3\")\n", + " final_data2.pipe(calculate_global_area, [\"year\", \"PA_DEF\"], {\"area\": \"sum\"}, \"iso_3\")\n", " .pipe(separate_parent_iso, \"iso_3\")\n", " .pipe(add_region_iso, \"iso_3\")\n", " .replace(\n", @@ -1012,8 +979,15 @@ " .pipe(add_is_last_year)\n", " .pipe(add_environment)\n", ")\n", - "\n", - "\n", + "coverage.head(2)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 135, + "metadata": {}, + "outputs": [], + "source": [ "NewProtectedAreaExtentSchema(\n", " coverage.pipe(\n", " output,\n", @@ -1025,13 +999,12 @@ ").to_csv(\n", " output_file,\n", " index=True,\n", - ")\n", - "coverage.head(2)" + ")" ] }, { "cell_type": "code", - "execution_count": 72, + "execution_count": 136, "metadata": {}, "outputs": [ { @@ -1103,7 +1076,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 118, "metadata": {}, "outputs": [ { @@ -1122,8 +1095,9 @@ "PosixPath('/home/sofia/dev/skytruth-30x30/data/data/gadm/processed/preprocess')" ] }, + "execution_count": 118, "metadata": {}, - "output_type": "display_data" + "output_type": "execute_result" } ], "source": [ @@ -1139,7 +1113,7 @@ "temp_working_path = working_folder.get_temp_file_path(step)\n", "output_file_sjoin = pipe_dir.get_processed_step_path(current_step).joinpath(\"tpa_sjoin.shp\")\n", "output_file_dissolve = pipe_dir.get_processed_step_path(current_step).joinpath(\"tpa_dissolve.csv\")\n", - "output_file_tpas = pipe_dir.get_processed_step_path(current_step).joinpath(\"tpa_detail.csv\")\n", + "output_file_tpas = pipe_dir.get_processed_step_path(current_step).joinpath(\"tpa_coverage.csv\")\n", "\n", "# Download the protected atlas file && unzip it\n", "download_and_unzip_if_needed(pipe_dir, prev_step, mysettings)\n", @@ -1298,7 +1272,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 119, "metadata": {}, "outputs": [], "source": [ @@ -1308,7 +1282,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 120, "metadata": {}, "outputs": [ { @@ -1450,8 +1424,9 @@ "[2889 rows x 5 columns]" ] }, + "execution_count": 120, "metadata": {}, - "output_type": "display_data" + "output_type": "execute_result" } ], "source": [ @@ -1504,7 +1479,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 122, "metadata": {}, "outputs": [ { @@ -1577,8 +1552,9 @@ "4 AFG 2014 1078.918622" ] }, + "execution_count": 122, "metadata": {}, - "output_type": "display_data" + "output_type": "execute_result" } ], "source": [ @@ -1588,7 +1564,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 123, "metadata": {}, "outputs": [ { @@ -1637,8 +1613,8 @@ " 29993094.71\n", " 12.123827\n", " 2.694465\n", - " False\n", - " terrestrial\n", + " 0\n", + " 2\n", " \n", " \n", " 1\n", @@ -1651,8 +1627,8 @@ " 31625555.58\n", " 6.486481\n", " 1.520053\n", - " False\n", - " terrestrial\n", + " 0\n", + " 2\n", " \n", " \n", " 2\n", @@ -1665,8 +1641,8 @@ " 12088229.65\n", " 0.000917\n", " 0.000082\n", - " False\n", - " terrestrial\n", + " 0\n", + " 2\n", " \n", " \n", " 3\n", @@ -1679,8 +1655,8 @@ " 30037571.37\n", " 14.335645\n", " 3.190756\n", - " False\n", - " terrestrial\n", + " 0\n", + " 2\n", " \n", " \n", " 4\n", @@ -1693,8 +1669,8 @@ " 19371151.92\n", " 10.357127\n", " 1.486642\n", - " False\n", - " terrestrial\n", + " 0\n", + " 2\n", " \n", " \n", " ...\n", @@ -1711,7 +1687,7 @@ " ...\n", " \n", " \n", - " 2989\n", + " 3004\n", " 2024\n", " YEM\n", " 5.145397e+03\n", @@ -1721,11 +1697,11 @@ " 453741.18\n", " 1.133994\n", " 0.003813\n", - " True\n", - " terrestrial\n", + " 1\n", + " 2\n", " \n", " \n", - " 2990\n", + " 3005\n", " 2024\n", " ZAF\n", " 1.143850e+05\n", @@ -1735,11 +1711,11 @@ " 1221327.52\n", " 9.365631\n", " 0.084758\n", - " True\n", - " terrestrial\n", + " 1\n", + " 2\n", " \n", " \n", - " 2991\n", + " 3006\n", " 2024\n", " ZMB\n", " 2.929805e+05\n", @@ -1749,11 +1725,11 @@ " 753990.33\n", " 38.857330\n", " 0.217095\n", - " True\n", - " terrestrial\n", + " 1\n", + " 2\n", " \n", " \n", - " 2992\n", + " 3007\n", " 2024\n", " ZNC\n", " 2.779983e+00\n", @@ -1763,11 +1739,11 @@ " 3314.08\n", " 0.083884\n", " 0.000002\n", - " True\n", - " terrestrial\n", + " 1\n", + " 2\n", " \n", " \n", - " 2993\n", + " 3008\n", " 2024\n", " ZWE\n", " 1.096232e+05\n", @@ -1777,12 +1753,12 @@ " 391234.88\n", " 28.019803\n", " 0.081230\n", - " True\n", - " terrestrial\n", + " 1\n", + " 2\n", " \n", " \n", "\n", - "

2994 rows × 11 columns

\n", + "

3009 rows × 11 columns

\n", "" ], "text/plain": [ @@ -1793,49 +1769,51 @@ "3 2010 EU 4.306080e+06 116128.0 0.0 100.0 \n", "4 2010 NA 2.006295e+06 52176.0 0.0 100.0 \n", "... ... ... ... ... ... ... \n", - "2989 2024 YEM 5.145397e+03 15.0 0.0 100.0 \n", - "2990 2024 ZAF 1.143850e+05 1631.0 0.0 100.0 \n", - "2991 2024 ZMB 2.929805e+05 557.0 0.0 100.0 \n", - "2992 2024 ZNC 2.779983e+00 8.0 0.0 100.0 \n", - "2993 2024 ZWE 1.096232e+05 229.0 0.0 100.0 \n", + "3004 2024 YEM 5.145397e+03 15.0 0.0 100.0 \n", + "3005 2024 ZAF 1.143850e+05 1631.0 0.0 100.0 \n", + "3006 2024 ZMB 2.929805e+05 557.0 0.0 100.0 \n", + "3007 2024 ZNC 2.779983e+00 8.0 0.0 100.0 \n", + "3008 2024 ZWE 1.096232e+05 229.0 0.0 100.0 \n", "\n", " total_terrestrial_area coverage global_contribution is_last_year \\\n", - "0 29993094.71 12.123827 2.694465 False \n", - "1 31625555.58 6.486481 1.520053 False \n", - "2 12088229.65 0.000917 0.000082 False \n", - "3 30037571.37 14.335645 3.190756 False \n", - "4 19371151.92 10.357127 1.486642 False \n", + "0 29993094.71 12.123827 2.694465 0 \n", + "1 31625555.58 6.486481 1.520053 0 \n", + "2 12088229.65 0.000917 0.000082 0 \n", + "3 30037571.37 14.335645 3.190756 0 \n", + "4 19371151.92 10.357127 1.486642 0 \n", "... ... ... ... ... \n", - "2989 453741.18 1.133994 0.003813 True \n", - "2990 1221327.52 9.365631 0.084758 True \n", - "2991 753990.33 38.857330 0.217095 True \n", - "2992 3314.08 0.083884 0.000002 True \n", - "2993 391234.88 28.019803 0.081230 True \n", + "3004 453741.18 1.133994 0.003813 1 \n", + "3005 1221327.52 9.365631 0.084758 1 \n", + "3006 753990.33 38.857330 0.217095 1 \n", + "3007 3314.08 0.083884 0.000002 1 \n", + "3008 391234.88 28.019803 0.081230 1 \n", "\n", " environment \n", - "0 terrestrial \n", - "1 terrestrial \n", - "2 terrestrial \n", - "3 terrestrial \n", - "4 terrestrial \n", + "0 2 \n", + "1 2 \n", + "2 2 \n", + "3 2 \n", + "4 2 \n", "... ... \n", - "2989 terrestrial \n", - "2990 terrestrial \n", - "2991 terrestrial \n", - "2992 terrestrial \n", - "2993 terrestrial \n", + "3004 2 \n", + "3005 2 \n", + "3006 2 \n", + "3007 2 \n", + "3008 2 \n", "\n", - "[2994 rows x 11 columns]" + "[3009 rows x 11 columns]" ] }, + "execution_count": 123, "metadata": {}, - "output_type": "display_data" + "output_type": "execute_result" } ], "source": [ "# Add pa and oecm counts to the coverage table\n", "coverage = (\n", " pd.merge(tpa_grouped, cumulative_counts, on=['iso_3', 'year'], how='left')\n", + " .pipe(calculate_global_area_tpa, [\"year\"])\n", " .pipe(add_region_iso2, \"iso_3\")\n", " .pipe(calculate_stats_cov_pa, [\"year\"], \"iso_3\")\n", " .pipe(calculate_pa_def_percentages)\n", @@ -1862,6 +1840,31 @@ "coverage" ] }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "DEBUG:google.auth.transport.requests:Making request: POST https://oauth2.googleapis.com/token\n" + ] + } + ], + "source": [ + "remote_path = 'vizzuality_processed_data/strapi_tables/tpa_coverage.csv'\n", + "\n", + "writeReadGCP(\n", + " credentials=mysettings.GCS_KEYFILE_JSON,\n", + " bucket_name=mysettings.GCS_BUCKET,\n", + " blob_name=remote_path,\n", + " file=output_file_tpas,\n", + " operation=\"w\",\n", + ")" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -1871,7 +1874,7 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 124, "metadata": {}, "outputs": [], "source": [ @@ -1892,7 +1895,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 125, "metadata": {}, "outputs": [], "source": [ @@ -1902,7 +1905,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 126, "metadata": {}, "outputs": [ { @@ -1950,8 +1953,8 @@ " 100.0\n", " 12.123827\n", " 2.694465\n", - " False\n", - " terrestrial\n", + " 0\n", + " 2\n", " 3\n", " \n", " \n", @@ -1963,10 +1966,10 @@ "1 1 2010 3.636311e+06 7272 0.0 100.0 12.123827 \n", "\n", " global_contribution is_last_year environment location \n", - "1 2.694465 False terrestrial 3 " + "1 2.694465 0 2 3 " ] }, - "execution_count": 41, + "execution_count": 126, "metadata": {}, "output_type": "execute_result" } @@ -1981,7 +1984,7 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 127, "metadata": {}, "outputs": [], "source": [ @@ -1989,13 +1992,38 @@ ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 128, "metadata": {}, - "source": [ - "### Mpa atlas - country stats Fully or highly protected" - ] - }, - { + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "DEBUG:google.auth.transport.requests:Making request: POST https://oauth2.googleapis.com/token\n" + ] + } + ], + "source": [ + "remote_path = 'vizzuality_processed_data/strapi_tables/protection_coverage_stats.csv'\n", + "\n", + "writeReadGCP(\n", + " credentials=mysettings.GCS_KEYFILE_JSON,\n", + " bucket_name=mysettings.GCS_BUCKET,\n", + " blob_name=remote_path,\n", + " file=output_file,\n", + " operation=\"w\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Mpa atlas - country stats Fully or highly protected" + ] + }, + { "cell_type": "markdown", "metadata": {}, "source": [ @@ -2012,7 +2040,7 @@ }, { "cell_type": "code", - "execution_count": 73, + "execution_count": 22, "metadata": {}, "outputs": [ { @@ -2050,38 +2078,9 @@ }, { "cell_type": "code", - "execution_count": 74, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 282/282 [00:29<00:00, 9.59it/s]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 282/282 [00:29<00:00, 2.95s/it]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - } - ], + "outputs": [], "source": [ "eez_mpaatlas_data_join = await spatial_join(\n", " eez, mpaatlas_intermediate.pipe(mpaatlas_filter_stablishment)\n", @@ -2243,7 +2242,7 @@ }, { "cell_type": "code", - "execution_count": 83, + "execution_count": 158, "metadata": {}, "outputs": [ { @@ -2277,7 +2276,7 @@ }, { "cell_type": "code", - "execution_count": 84, + "execution_count": 159, "metadata": {}, "outputs": [ { @@ -2381,7 +2380,7 @@ "324 1011023.776 77.114323 " ] }, - "execution_count": 84, + "execution_count": 159, "metadata": {}, "output_type": "execute_result" } @@ -2406,7 +2405,7 @@ }, { "cell_type": "code", - "execution_count": 85, + "execution_count": 160, "metadata": {}, "outputs": [], "source": [ @@ -2469,7 +2468,7 @@ }, { "cell_type": "code", - "execution_count": 88, + "execution_count": 161, "metadata": {}, "outputs": [ { @@ -2554,7 +2553,7 @@ }, { "cell_type": "code", - "execution_count": 256, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -2573,7 +2572,7 @@ "PosixPath('/home/sofia/dev/skytruth-30x30/data/data/mpaatlas/processed/preprocess')" ] }, - "execution_count": 256, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -2594,7 +2593,7 @@ }, { "cell_type": "code", - "execution_count": 257, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -2609,7 +2608,7 @@ }, { "cell_type": "code", - "execution_count": 258, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -2639,8 +2638,9 @@ " \"designatio\": \"desig_eng\",\n", " }\n", " )\n", - " ).assign(source=\"mpaatlas\"\n", - " ).astype({\"mpa_zone_i\": \"Int64\"}),\n", + " ).assign(source=\"mpaatlas\")\n", + " .assign(pa_def=1)\n", + " .astype({\"mpa_zone_i\": \"Int64\"}),\n", " ],\n", " ignore_index=True,\n", " )\n", @@ -2664,7 +2664,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -2682,9 +2682,20 @@ }, { "cell_type": "code", - "execution_count": 259, + "execution_count": 11, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/sofia/dev/skytruth-30x30/data/src/pipelines/processors.py:706: FutureWarning: Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`\n", + " return df.assign(child_id=df[columns].bfill(axis=1)[columns[0]])\n", + "/home/sofia/dev/skytruth-30x30/data/src/pipelines/processors.py:731: FutureWarning: Downcasting behavior in `replace` is deprecated and will be removed in a future version. To retain the old behavior, explicitly call `result.infer_objects(copy=False)`. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`\n", + " df.replace(rep_d)\n" + ] + } + ], "source": [ "mpa_table = (\n", " init_table.pipe(add_bbox, \"bbox\")\n", @@ -2693,6 +2704,7 @@ " .sort_values(by=[\"wdpaid\", \"is_child\"], ascending=[True, True])\n", " .reset_index(drop=True)\n", " .pipe(add_total_marine_area)\n", + " .rename(columns={\"area_km2\": \"protected_area\"})\n", " .pipe(calculate_coverage_percentage_pa)\n", " .pipe(add_environment)\n", " .pipe(\n", @@ -2741,8 +2753,8 @@ " },\n", " rename={\n", " \"pa_def\": \"protection_status\",\n", - " \"area_km2\": \"area\",\n", - " \"iucn_cat\": \"pa_iucn_category\",\n", + " \"protected_area\": \"area\",\n", + " \"iucn_cat\": \"iucn_category\",\n", " \"desig_eng\": \"designation\",\n", " \"protection\": \"mpaa_protection_level\",\n", " \"establishm\": \"mpaa_establishment_stage\",\n", @@ -2750,11 +2762,10 @@ " },\n", " drop_cols=[\"geometry\", \"protecti_1\",\"mpa_zone_i\", \"iso\", \"total_marine_area\"]\n", " )\n", - " .pipe(add_child_parent_relationship)\n", " .astype(\n", " {\n", " \"year\": \"Int32\",\n", - " \"pa_iucn_category\": \"Int64\",\n", + " \"iucn_category\": \"Int64\",\n", " \"protection_status\": \"Int64\",\n", " }\n", " )\n", @@ -2765,83 +2776,223 @@ }, { "cell_type": "code", - "execution_count": 260, - "metadata": {}, - "outputs": [], - "source": [ - "# Validate and save\n", - "PAsSchema(mpa_table[mpa_table.location.notna()]).to_csv(output_file_mpas, index=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# todo investigate the issue with area as null" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# batch_export(\n", - "# mpa_table[mpa_table.area.notna()],\n", - "# 5000,\n", - "# PAsSchema,\n", - "# pipe_dir.get_processed_step_path(current_step),\n", - "# \"mpa_detail\",\n", - "# format=\"json\",\n", - "# strapi_colection=strapi_collection_mpas,\n", - "# )" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# # This code is to be able to identify groups that has wdpa_pid so in the future if needed we could combine the group geometries to generate a wdpa coverage geometry\n", - "# init_table[\n", - "# (\n", - "# init_table.sort_values(by=[\"wdpaid\", \"source\"], ascending=[True, False])\n", - "# .groupby(\"wdpaid\")\n", - "# .transform(\"size\")\n", - "# .gt(1)\n", - "# )\n", - "# & (init_table.wdpa_pid.str.extract(r\"([A-Za-z]+)\", expand=False).notna())\n", - "# ].groupby(\"wdpaid\")\n", - "# .geometry.apply(lambda x: x.union_all())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### upload data to strapi" - ] - }, - { - "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": {}, "outputs": [ { "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
wdpaidwdpa_pidprotection_statusnamedesignationiucn_categoryyearareadata_sourcempaa_establishment_stagempaa_protection_levelbboxis_childchild_idcoverageenvironmentlocation
id
181701701Isla del CocoNational Park2202254819.0426323NaNNaN[-88.987016503, 4.529014728999982, -86.3670124...False1709.150798139.0
191701701Isla del Coco - Zona Minima IntervencionNational Park<NA>19781950.50000014.03.0[-87.29513967897267, 5.298053442111269, -86.82...True1700.325592139.0
201701701Isla del Coco - Zona Media IntervencionNational Park<NA>19780.93000014.03.0[-87.1038528170242, 5.492165352309547, -87.030...True1700.000155139.0
211701701Isla del Coco - Zona Baja IntervencionNational Park<NA>197870.72000014.03.0[-87.11119966572133, 5.482019746658279, -86.95...True1700.011805139.0
22170170.01Isla del Coco - 2022 ExpansionNational Park<NA>202155081.21000015.08.0[-88.987, 4.529, -86.367, 6.237]True170.09.194561139.0
\n", + "
" + ], "text/plain": [ - "" + " wdpaid wdpa_pid protection_status \\\n", + "id \n", + "18 170 170 1 \n", + "19 170 170 1 \n", + "20 170 170 1 \n", + "21 170 170 1 \n", + "22 170 170.0 1 \n", + "\n", + " name designation iucn_category \\\n", + "id \n", + "18 Isla del Coco National Park 2 \n", + "19 Isla del Coco - Zona Minima Intervencion National Park \n", + "20 Isla del Coco - Zona Media Intervencion National Park \n", + "21 Isla del Coco - Zona Baja Intervencion National Park \n", + "22 Isla del Coco - 2022 Expansion National Park \n", + "\n", + " year area data_source mpaa_establishment_stage \\\n", + "id \n", + "18 2022 54819.042632 3 NaN \n", + "19 1978 1950.500000 1 4.0 \n", + "20 1978 0.930000 1 4.0 \n", + "21 1978 70.720000 1 4.0 \n", + "22 2021 55081.210000 1 5.0 \n", + "\n", + " mpaa_protection_level bbox \\\n", + "id \n", + "18 NaN [-88.987016503, 4.529014728999982, -86.3670124... \n", + "19 3.0 [-87.29513967897267, 5.298053442111269, -86.82... \n", + "20 3.0 [-87.1038528170242, 5.492165352309547, -87.030... \n", + "21 3.0 [-87.11119966572133, 5.482019746658279, -86.95... \n", + "22 8.0 [-88.987, 4.529, -86.367, 6.237] \n", + "\n", + " is_child child_id coverage environment location \n", + "id \n", + "18 False 170 9.150798 1 39.0 \n", + "19 True 170 0.325592 1 39.0 \n", + "20 True 170 0.000155 1 39.0 \n", + "21 True 170 0.011805 1 39.0 \n", + "22 True 170.0 9.194561 1 39.0 " ] }, + "execution_count": 14, "metadata": {}, - "output_type": "display_data" + "output_type": "execute_result" } ], "source": [ - "# strapi.deleteCollectionData(\"mpa\", list(range(1, 20914)))" + "mpa_table[mpa_table[\"wdpaid\"] == 170]" ] }, { @@ -2850,11 +3001,8 @@ "metadata": {}, "outputs": [], "source": [ - "# for i in range(0, 4):\n", - "# strapi.importCollectionData(\n", - "# strapi_collection_mpas,\n", - "# mpa_folder.joinpath(f\"mpa_detail_{i}.csv\"),\n", - "# )" + "# # Validate and save\n", + "# PAsSchema(mpa_table[mpa_table.location.notna()]).to_csv(output_file_mpas, index=True)" ] }, { @@ -2881,30 +3029,9 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 12, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/mpa-terrestrial_preprocess.zip\n", - "/home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/preprocess\n", - "/home/sofia/dev/skytruth-30x30/data/data/gadm/processed/gadm_preprocess.zip\n", - "/home/sofia/dev/skytruth-30x30/data/data/gadm/processed/preprocess\n" - ] - }, - { - "data": { - "text/plain": [ - "PosixPath('/home/sofia/dev/skytruth-30x30/data/data/gadm/processed/preprocess')" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "pipe = \"mpa-terrestrial\"\n", "strapi_collection_mpas = \"mpa-terrestrial\"\n", @@ -2913,26 +3040,24 @@ "pipe_dir_gadm = FileConventionHandler(\"gadm\")\n", "output_file_tpas = pipe_dir.get_processed_step_path(current_step).joinpath(\"tpa_detail.csv\")\n", "\n", - "# Download the protected atlas file && unzip it\n", - "download_and_unzip_if_needed(pipe_dir, prev_step, mysettings)\n", - "# Download the mpaatlas file \n", - "download_and_unzip_if_needed(pipe_dir_gadm, prev_step, mysettings)" + "# # Download the protected atlas file && unzip it\n", + "# download_and_unzip_if_needed(pipe_dir, prev_step, mysettings)\n", + "# # Download the gadm file \n", + "# download_and_unzip_if_needed(pipe_dir_gadm, prev_step, mysettings)" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ - "tpa_intermediate = gpd.read_file(pipe_dir.get_step_fmt_file_path(prev_step, \"gpkg\")).pipe(\n", - " clean_geometries\n", - ")" + "tpa_intermediate = gpd.read_file(pipe_dir.get_step_fmt_file_path(prev_step, \"gpkg\"))" ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -2943,7 +3068,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ @@ -2979,20 +3104,19 @@ " }\n", " }\n", " )\n", - " .sort_values(by=[\"wdpa_pid\", \"wdpa_pid\", \"source\"], ascending=[True, True, False])\n", ")" ] }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_3510708/3364924951.py:202: FutureWarning: Downcasting behavior in `replace` is deprecated and will be removed in a future version. To retain the old behavior, explicitly call `result.infer_objects(copy=False)`. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`\n", + "/tmp/ipykernel_3690547/1736513570.py:202: FutureWarning: Downcasting behavior in `replace` is deprecated and will be removed in a future version. To retain the old behavior, explicitly call `result.infer_objects(copy=False)`. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`\n", " df.replace(rep_d)\n" ] } @@ -3024,17 +3148,16 @@ " rename={\n", " \"pa_def\": \"protection_status\",\n", " \"protected_area\": \"area\",\n", - " \"iucn_cat\": \"pa_iucn_category\",\n", + " \"iucn_cat\": \"iucn_category\",\n", " \"desig_eng\": \"designation\",\n", " \"source\": \"data_source\",\n", " },\n", " drop_cols=[\"geometry\", \"iso\", \"marine\", \"total_terrestrial_area\"]\n", " )\n", - " .pipe(add_child_parent_relationship)\n", " .astype(\n", " {\n", " \"year\": \"Int32\",\n", - " \"pa_iucn_category\": \"Int64\",\n", + " \"iucn_category\": \"Int64\",\n", " \"protection_status\": \"Int64\",\n", " }\n", " )\n", @@ -3045,7 +3168,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -3056,558 +3179,374 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ - "# Validate and save\n", - "PAsSchema(tpa_table[tpa_table.location.notna()]).to_csv(output_file_tpas, index=True)" + "# # Validate and save\n", + "# PAsSchema(tpa_table[tpa_table.location.notna()]).to_csv(output_file_tpas, index=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Country marine and terrestrial - Detail table" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "metadata": {}, "outputs": [], "source": [ - "# batch_export(\n", - "# mpa_table[mpa_table.area.notna()],\n", - "# 5000,\n", - "# PAsSchema,\n", - "# pipe_dir.get_processed_step_path(current_step),\n", - "# \"mpa_detail\",\n", - "# format=\"json\",\n", - "# strapi_colection=strapi_collection_mpas,\n", - "# )" + "pipe_mar = \"mpa\"\n", + "pipe_ter = \"mpa-terrestrial\"\n", + "pipe_pa = \"pa\"\n", + "step = \"preprocess\"\n", + "strapi_collection_pas = \"pa\"\n", + "\n", + "\n", + "pipe_dir_mar = FileConventionHandler(pipe_mar)\n", + "pipe_dir_ter = FileConventionHandler(pipe_ter)\n", + "pipe_dir_pa = FileConventionHandler(pipe_pa)\n", + "\n", + "input_path_mar = pipe_dir_mar.get_processed_step_path(current_step).joinpath(\"mpa_detail.csv\")\n", + "input_path_ter = pipe_dir_ter.get_processed_step_path(current_step).joinpath(\"tpa_detail.csv\")\n", + "output_file_pa = pipe_dir_pa.get_processed_step_path(current_step).joinpath(\"pa_detail.csv\")" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ - "# # This code is to be able to identify groups that has wdpa_pid so in the future if needed we could combine the group geometries to generate a wdpa coverage geometry\n", - "# init_table[\n", - "# (\n", - "# init_table.sort_values(by=[\"wdpaid\", \"source\"], ascending=[True, False])\n", - "# .groupby(\"wdpaid\")\n", - "# .transform(\"size\")\n", - "# .gt(1)\n", - "# )\n", - "# & (init_table.wdpa_pid.str.extract(r\"([A-Za-z]+)\", expand=False).notna())\n", - "# ].groupby(\"wdpaid\")\n", - "# .geometry.apply(lambda x: x.union_all())" + "final_table = pd.concat([mpa_table, tpa_table], ignore_index=True)\n", + "final_table.index = final_table.index + 1\n", + "final_table.index.name = 'id'\n", + "final_table = final_table.pipe(add_child_parent_relationship).drop(columns=['wdpa_pid', 'is_child', 'child_id']).sort_values(by=['parent'])" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "306123" ] }, + "execution_count": 20, "metadata": {}, - "output_type": "display_data" + "output_type": "execute_result" } ], "source": [ - "# strapi.deleteCollectionData(\"mpa\", list(range(1, 20914)))" + "len(final_table)" ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "# for i in range(0, 4):\n", - "# strapi.importCollectionData(\n", - "# strapi_collection_mpas,\n", - "# mpa_folder.joinpath(f\"mpa_detail_{i}.csv\"),\n", - "# )" + "Note! When uploading the tables the schema doesn't work. I need to run the code to generate them and then it works." ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 83, "metadata": {}, + "outputs": [], "source": [ - "### Country marine and terrestrial - Detail table" + "# # Create final table with all the data\n", + "# mpa_table2 = pd.read_csv(input_path_mar)\n", + "# tpa_table2 = pd.read_csv(input_path_ter)\n", + "# final_table = pd.concat([mpa_table2, tpa_table2])\n", + "# final_table.index = range(1, len(final_table) + 1)\n", + "# final_table.index.name = 'id'\n", + "# final_table.drop(columns=['id'], inplace=True)\n", + "# final_table.head(2)" ] }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 61, "metadata": {}, "outputs": [], "source": [ - "pipe_mar = \"mpa\"\n", - "pipe_ter = \"mpa-terrestrial\"\n", - "pipe_pa = \"pa\"\n", - "step = \"preprocess\"\n", - "\n", - "\n", - "pipe_dir_mar = FileConventionHandler(pipe_mar)\n", - "pipe_dir_ter = FileConventionHandler(pipe_ter)\n", - "pipe_dir_pa = FileConventionHandler(pipe_pa)\n", - "\n", - "input_path_mar = pipe_dir_mar.get_processed_step_path(current_step).joinpath(\"mpa_detail.csv\")\n", - "input_path_ter = pipe_dir_ter.get_processed_step_path(current_step).joinpath(\"tpa_detail.csv\")\n", - "output_file_pa = pipe_dir_pa.get_processed_step_path(current_step).joinpath(\"pa_detail.csv\")" + "PAsSchema(final_table[final_table.location.notna()]).to_csv(output_file_pa, index=True)" ] }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 23, "metadata": {}, "outputs": [], "source": [ - "mpa_table = pd.read_csv(input_path_mar)\n", - "tpa_table = pd.read_csv(input_path_ter)" + "# Divide table into two tables\n", + "final_table1 = final_table.drop(columns=['parent'])\n", + "final_table2 = final_table[['parent']]" ] }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 45, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idwdpaidwdpa_pidprotection_statusnamedesignationpa_iucn_categoryyearareadata_source...coverageenvironmentlocationchildrenprotected_areaprotected_areas_countoecmspasglobal_contributionis_last_year
id
111.011.0Diamond Reef and Salt Fish Tail ReefMarine Reserve1.01973.014.6361353.0...0.013119marine15NaNNaNNaNNaNNaNNaNNaN
222.021.0Palaster ReefMarine Reserve1.01973.03.8456233.0...0.003447marine15NaNNaNNaNNaNNaNNaNNaN
\n", - "

2 rows × 26 columns

\n", - "
" - ], - "text/plain": [ - " id wdpaid wdpa_pid protection_status \\\n", - "id \n", - "1 1 1.0 1 1.0 \n", - "2 2 2.0 2 1.0 \n", - "\n", - " name designation pa_iucn_category \\\n", - "id \n", - "1 Diamond Reef and Salt Fish Tail Reef Marine Reserve 1.0 \n", - "2 Palaster Reef Marine Reserve 1.0 \n", - "\n", - " year area data_source ... coverage environment location \\\n", - "id ... \n", - "1 1973.0 14.636135 3.0 ... 0.013119 marine 15 \n", - "2 1973.0 3.845623 3.0 ... 0.003447 marine 15 \n", - "\n", - " children protected_area protected_areas_count oecms pas \\\n", - "id \n", - "1 NaN NaN NaN NaN NaN \n", - "2 NaN NaN NaN NaN NaN \n", - "\n", - " global_contribution is_last_year \n", - "id \n", - "1 NaN NaN \n", - "2 NaN NaN \n", - "\n", - "[2 rows x 26 columns]" - ] - }, - "execution_count": 27, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "# Create final table with all the data\n", - "final_table = pd.concat([mpa_table, tpa_table])\n", - "final_table.index = range(1, len(final_table) + 1)\n", - "final_table.index.name = 'id'\n", - "final_table.head(2)" + "# batch_export(\n", + "# final_table1[final_table1.area.notna()],\n", + "# 4000,\n", + "# PAsSchemaChunk1,\n", + "# pipe_dir_pa.get_processed_step_path(current_step).joinpath(\"chunks1\"),\n", + "# \"pa_detail\",\n", + "# format=\"json\",\n", + "# strapi_colection=strapi_collection_pas,\n", + "# )\n", + "\n", + "batch_export(\n", + " final_table2,\n", + " 10000,\n", + " PAsSchemaChunk2,\n", + " pipe_dir_pa.get_processed_step_path(current_step).joinpath(\"chunks2\"),\n", + " \"pa_detail\",\n", + " format=\"json\",\n", + " strapi_colection=strapi_collection_pas,\n", + ")" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 40, "metadata": {}, "outputs": [], "source": [ - "PAsSchema(final_table[final_table.location.notna()]).to_csv(output_file_pa, index=True)" + "# zip data\n", + "make_archive(pipe_dir_pa.get_processed_step_path(current_step).joinpath(\"chunks1\"), pipe_dir_pa.get_processed_step_path(current_step).joinpath(\"chunks1.zip\"))\n", + "make_archive(pipe_dir_pa.get_processed_step_path(current_step).joinpath(\"chunks2\"), pipe_dir_pa.get_processed_step_path(current_step).joinpath(\"chunks2.zip\"))" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 42, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ + "DEBUG:google.auth.transport.requests:Making request: POST https://oauth2.googleapis.com/token\n", "DEBUG:google.auth.transport.requests:Making request: POST https://oauth2.googleapis.com/token\n" ] } ], "source": [ - "remote_path = 'vizzuality_processed_data/strapi_tables/pa.csv'\n", + "# LOAD\n", + "## load zipped file to GCS\n", + "writeReadGCP(\n", + " credentials=mysettings.GCS_KEYFILE_JSON,\n", + " bucket_name=mysettings.GCS_BUCKET,\n", + " blob_name='vizzuality_processed_data/strapi_tables/pa_chunks1.zip',\n", + " file=pipe_dir_pa.get_processed_step_path(current_step).joinpath(\"chunks1.zip\"),\n", + " operation=\"w\",\n", + ")\n", "\n", "writeReadGCP(\n", " credentials=mysettings.GCS_KEYFILE_JSON,\n", " bucket_name=mysettings.GCS_BUCKET,\n", - " blob_name=remote_path,\n", - " file=output_file_pas,\n", + " blob_name='vizzuality_processed_data/strapi_tables/pa_chunks2.zip',\n", + " file=pipe_dir_pa.get_processed_step_path(current_step).joinpath(\"chunks2.zip\"),\n", " operation=\"w\",\n", ")" ] }, { "cell_type": "code", - "execution_count": 65, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "pipe_mar = \"mpa\"\n", - "pipe_ter = \"mpa-terrestrial\"\n", - "step = \"preprocess\"\n", - "\n", - "\n", - "pipe_dir_mar = FileConventionHandler(pipe_mar)\n", - "pipe_dir_ter = FileConventionHandler(pipe_ter)\n", - "\n", - "input_path_mar = pipe_dir_mar.get_processed_step_path(current_step).joinpath(\"mpa_detail.csv\")\n", - "input_path_ter = pipe_dir_ter.get_processed_step_path(current_step).joinpath(\"tpa_detail.csv\")\n", - "output_file_pa = pipe_dir_ter.get_processed_step_path(current_step).joinpath(\"pa_detail.csv\")" + "# strapi.deleteCollectionData(\"pa\", list(range(1, 20914)))" ] }, { "cell_type": "code", - "execution_count": 66, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "ter = pd.read_csv(input_path_ter)\n", - "mar = pd.read_csv(input_path_mar)" + "# for i in range(0, 4):\n", + "# strapi.importCollectionData(\n", + "# strapi_collection_mpas,\n", + "# mpa_folder.joinpath(f\"mpa_detail_{i}.csv\"),\n", + "# )" ] }, { "cell_type": "code", - "execution_count": 67, + "execution_count": 62, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "Index(['id', 'year', 'protected_area', 'protected_areas_count', 'oecms', 'pas',\n", - " 'coverage', 'global_contribution', 'is_last_year', 'environment',\n", - " 'location'],\n", - " dtype='object')" + "76" ] }, - "execution_count": 67, + "execution_count": 62, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "ter.columns" + "# Retrive the ids left out in the batch process\n", + "left_out_ids = range(4000, 306124, 4000)\n", + "left_out_rows = final_table.loc[left_out_ids]\n", + "len(left_out_rows)" ] }, { "cell_type": "code", - "execution_count": 68, + "execution_count": 64, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Index(['id', 'wdpaid', 'wdpa_pid', 'protection_status', 'name', 'designation',\n", - " 'pa_iucn_category', 'year', 'area', 'data_source',\n", - " 'mpaa_establishment_stage', 'mpaa_protection_level', 'bbox', 'is_child',\n", - " 'child_id', 'coverage', 'environment', 'location', 'children'],\n", - " dtype='object')" - ] - }, - "execution_count": 68, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "mar.columns" + "# Import all cols but parent\n", + "left_out_rows1 = left_out_rows.drop(columns=['parent'])\n", + "\n", + "left_out_rows1 = left_out_rows1.reset_index()\n", + "left_out_rows1.index = left_out_rows1['id']\n", + "\n", + "output_file = pipe_dir_pa.get_processed_step_path(current_step).joinpath(\"left_out_rows1.json\")\n", + "left_out_rows1.to_json(output_file, orient=\"index\")\n" ] }, { "cell_type": "code", - "execution_count": 61, + "execution_count": 65, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "location 0\n", - "code 0\n", - "dtype: int64" - ] - }, - "execution_count": 61, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "locations_code.isna().sum()" + "# Import all cols but parent\n", + "left_out_rows2 = left_out_rows[['parent']]\n", + "\n", + "left_out_rows2 = left_out_rows2.reset_index()\n", + "left_out_rows2.index = left_out_rows2['id']\n", + "\n", + "output_file = pipe_dir_pa.get_processed_step_path(current_step).joinpath(\"left_out_rows2.json\")\n", + "left_out_rows2.to_json(output_file, orient=\"index\")" ] }, { "cell_type": "code", - "execution_count": 57, + "execution_count": 67, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
locationcode
67NaN
\n", - "
" - ], - "text/plain": [ - " location code\n", - "6 7 NaN" - ] - }, - "execution_count": 57, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "# show rows with null values in locations_code\n", - "locations_code[locations_code.isna().any(axis=1)]" + "# zip data\n", + "make_archive(pipe_dir_pa.get_processed_step_path(current_step).joinpath(\"left_out_rows1.json\"), pipe_dir_pa.get_processed_step_path(current_step).joinpath(\"left_out_rows1.zip\"))\n", + "make_archive(pipe_dir_pa.get_processed_step_path(current_step).joinpath(\"left_out_rows2.json\"), pipe_dir_pa.get_processed_step_path(current_step).joinpath(\"left_out_rows2.zip\"))" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 68, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "DEBUG:google.auth.transport.requests:Making request: POST https://oauth2.googleapis.com/token\n", + "DEBUG:google.auth.transport.requests:Making request: POST https://oauth2.googleapis.com/token\n" + ] + } + ], "source": [ - "coverage = (\n", - " final_data.pipe(calculate_global_area, [\"year\", \"PA_DEF\"], {\"area\": \"sum\"}, \"iso_3\")\n", - " .pipe(separate_parent_iso, \"iso_3\")\n", - " .pipe(add_region_iso, \"iso_3\")\n", - " .replace(\n", - " {\n", - " \"iso_3\": {\n", - " \"ATA\": \"ABNJ\",\n", - " \"COK\": \"NZL\",\n", - " \"IOT\": \"GBR\",\n", - " \"NIU\": \"NZL\",\n", - " \"SHN\": \"GBR\",\n", - " \"SJM\": \"NOR\",\n", - " \"UMI\": \"USA\",\n", - " \"NCL\": \"FRA\",\n", - " \"GIB\": \"GBR\",\n", - " }\n", - " }\n", - " )\n", - " .pipe(calculate_stats_cov, [\"year\", \"PA_DEF\"], \"iso_3\").astype({\"PA_DEF\": int})\n", - " .pipe(add_pa_oecm_percentages)\n", - " .pipe(add_total_marine_area)\n", - " .pipe(coverage_stats2)\n", - " .pipe(calculate_coverage_percentage_mpa)\n", - " .pipe(calculate_global_contribution)\n", - " .pipe(add_is_last_year)\n", - " .pipe(add_environment)\n", + "## load zipped file to GCS\n", + "writeReadGCP(\n", + " credentials=mysettings.GCS_KEYFILE_JSON,\n", + " bucket_name=mysettings.GCS_BUCKET,\n", + " blob_name='vizzuality_processed_data/strapi_tables/pa_left_out_rows1.zip',\n", + " file=pipe_dir_pa.get_processed_step_path(current_step).joinpath(\"left_out_rows1.zip\"),\n", + " operation=\"w\",\n", ")\n", "\n", - "\n", - "NewProtectedAreaExtentSchema(\n", - " coverage.pipe(\n", - " output,\n", - " \"iso_3\",\n", - " {},\n", - " {},\n", - " [\"area\", \"iso_3\", 'total_marine_area'],\n", - " )\n", - ").to_csv(\n", - " output_file,\n", - " index=True,\n", - ")\n", - "coverage.head(2)" + "writeReadGCP(\n", + " credentials=mysettings.GCS_KEYFILE_JSON,\n", + " bucket_name=mysettings.GCS_BUCKET,\n", + " blob_name='vizzuality_processed_data/strapi_tables/pa_left_out_rows2.zip',\n", + " file=pipe_dir_pa.get_processed_step_path(current_step).joinpath(\"left_out_rows2.zip\"),\n", + " operation=\"w\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Habitats" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "DEBUG:google.auth.transport.requests:Making request: POST https://oauth2.googleapis.com/token\n", + "DEBUG:google.auth.transport.requests:Making request: POST https://oauth2.googleapis.com/token\n" + ] + } + ], "source": [ - "result_oecms = (\n", - " sjoin_gdf.groupby([\"iso_3\", \"PA_DEF\"])\n", - " .agg({\"PA_DEF\": \"count\"})\n", - " .rename(columns={\"PA_DEF\": \"count\"})\n", - " .reset_index()\n", - " .pivot(index=\"GID_0\", columns=\"PA_DEF\", values=\"count\")\n", - " .fillna(0)\n", - " .reset_index()\n", - " .rename(columns={\"0\": \"oecm\", \"1\": \"pa\"})\n", + "pipe = \"terrestrial-habitats\"\n", + "collection_name = \"terrestrial_habitats\"\n", + "\n", + "pipe_dir = FileConventionHandler(pipe)\n", + "input_file_ter = pipe_dir.get_processed_step_path(prev_step).joinpath(\"master_data_protection.csv\")\n", + "input_file_mar = pipe_dir.get_processed_step_path(prev_step).joinpath(\"habitats6.csv\")\n", + "output_file = pipe_dir.get_processed_step_path(current_step).joinpath(\"habitats_all.csv\")\n", + "\n", + "# Download the terrestrial habitats table from the bucket\n", + "writeReadGCP(\n", + " credentials=mysettings.GCS_KEYFILE_JSON,\n", + " bucket_name=mysettings.GCS_BUCKET,\n", + " blob_name=\"vizzuality_processed_data/habitats/preprocess/master_data_protection.csv\",\n", + " file=input_file_ter,\n", + " operation=\"r\",\n", ")\n", - "# ).reset_index().pivot(index=\"iso_3\", columns=\"PA_DEF\", values=\"count\").reset_index(names=[\"PA_DEF\"], level=0, drop=True)" + "\n", + "# Download the marine habitats table from the bucket\n", + "writeReadGCP(\n", + " credentials=mysettings.GCS_KEYFILE_JSON,\n", + " bucket_name=mysettings.GCS_BUCKET,\n", + " blob_name=\"vizzuality_processed_data/processed_statistic_tables/habitats6.csv\",\n", + " file=input_file_mar,\n", + " operation=\"r\",\n", + ")" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ - "result_oecms[\"oecm_perc\"] = result_oecms[\"oecm\"] / (result_oecms[\"oecm\"] + result_oecms[\"pa\"])" + "habitat_mar = pd.read_csv(input_file_mar, na_values=['', 'NaN', 'NULL'])\n", + "habitat_mar['environment'] = 1\n", + "habitat_mar['location_id'] = habitat_mar['location_id'].fillna('NA')" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 26, "metadata": {}, "outputs": [ { @@ -3630,173 +3569,126 @@ "\n", " \n", " \n", - " \n", + " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
PA_DEFiso_3oecmpaoecm_perchabitat_namepixel_habitatpixel_protected
180USA0.050674.00.0000000GLOBArtificial28259249.02776193.0
161SWE0.030813.00.0000001GLOBDesert111106481.07778007.0
44DEU0.023703.00.0000002GLOBForest49125087.010353320.0
55EST0.020579.00.0000003GLOBGrassland35459546.05491398.0
57FIN0.018427.00.0000004GLOBOther864004.0291628.0
29CAN2.012566.00.000159...............
61GBR1728ZWEOther0.00.011712.00.000000
9AUS1729ZWERocky/mountains0.00.011154.00.000000
30CHE0.010632.00.0000001730ZWESavanna231134.097790.0
130NZL0.010205.00.0000001731ZWEShrubland52656.04262.0
1732ZWEWetlands/open water3866.03238.0
\n", + "

1733 rows × 4 columns

\n", "" ], "text/plain": [ - "PA_DEF iso_3 oecm pa oecm_perc\n", - "180 USA 0.0 50674.0 0.000000\n", - "161 SWE 0.0 30813.0 0.000000\n", - "44 DEU 0.0 23703.0 0.000000\n", - "55 EST 0.0 20579.0 0.000000\n", - "57 FIN 0.0 18427.0 0.000000\n", - "29 CAN 2.0 12566.0 0.000159\n", - "61 GBR 0.0 11712.0 0.000000\n", - "9 AUS 0.0 11154.0 0.000000\n", - "30 CHE 0.0 10632.0 0.000000\n", - "130 NZL 0.0 10205.0 0.000000" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "result_oecms.sort_values(\"pa\", ascending=False).head(10)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "result_area = pd.concat(data)[['iso_3', 'year', 'area']].groupby(['iso_3', 'year']).sum().reset_index()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "result = result_area.merge(result_oecms, on=\"iso_3\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# save sjoin_gdf to file\n", - "sjoin_gdf.to_file(pipe_dir.get_processed_step_path(current_step).joinpath(\"tpa_sjoin.shp\"), driver=\"ESRI Shapefile\")" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "sjoin_gdf = gpd.read_file(pipe_dir.get_processed_step_path(current_step).joinpath(\"tpa_sjoin.shp\")).pipe(clean_geometries)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Index(['WDPAID', 'WDPA_PID', 'PA_DEF', 'NAME', 'DESIG_ENG', 'IUCN_CAT',\n", - " 'MARINE', 'GIS_AREA', 'STATUS', 'STATUS_YR', 'PARENT_ISO', 'index_righ',\n", - " 'COUNTRY', 'GID_0', 'area_km2', 'geometry'],\n", - " dtype='object')" + " iso_3 habitat_name pixel_habitat pixel_protected\n", + "0 GLOB Artificial 28259249.0 2776193.0\n", + "1 GLOB Desert 111106481.0 7778007.0\n", + "2 GLOB Forest 49125087.0 10353320.0\n", + "3 GLOB Grassland 35459546.0 5491398.0\n", + "4 GLOB Other 864004.0 291628.0\n", + "... ... ... ... ...\n", + "1728 ZWE Other 0.0 0.0\n", + "1729 ZWE Rocky/mountains 0.0 0.0\n", + "1730 ZWE Savanna 231134.0 97790.0\n", + "1731 ZWE Shrubland 52656.0 4262.0\n", + "1732 ZWE Wetlands/open water 3866.0 3238.0\n", + "\n", + "[1733 rows x 4 columns]" ] }, - "execution_count": 10, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "sjoin_gdf.columns" + "habitat_ter = pd.read_csv(input_file_ter).drop(columns=['frac', 'perc_extent', 'total_area']).rename(columns ={'habitats':'habitat_name','total': 'pixel_habitat', 'protected': 'pixel_protected'})\n", + "habitat_ter" ] }, { "cell_type": "code", - "execution_count": 218, + "execution_count": 27, "metadata": {}, "outputs": [ { @@ -3819,199 +3711,1481 @@ "\n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", - " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
PA_DEFGID_0oecmpaiso_3habitat_namepixel_habitatpixel_protectedtotal_pixels
0AFG0.025.0GLOBArtificial28259249.02776193.0267352678.0
1AGO0.037.0GLOBDesert111106481.07778007.0267352678.0
2ALB0.0117.0GLOBForest49125087.010353320.0267352678.0
3AND0.023.0GLOBGrassland35459546.05491398.0267352678.0
4ARE0.054.05GLOBRocky/mountains3571486.0478102.0267352678.0
5ARG0.0403.0..................
6ARM0.068.01727ZWEGrassland3417.0263.0418294.0
7ATA1729ZWERocky/mountains0.09.00.0418294.0
8ATG0.010.01730ZWESavanna231134.097790.0418294.0
9AUS0.011234.01731ZWEShrubland52656.04262.0418294.0
1732ZWEWetlands/open water3866.03238.0418294.0
\n", + "

1526 rows × 5 columns

\n", "" ], "text/plain": [ - "PA_DEF GID_0 oecm pa\n", - "0 AFG 0.0 25.0\n", - "1 AGO 0.0 37.0\n", - "2 ALB 0.0 117.0\n", - "3 AND 0.0 23.0\n", - "4 ARE 0.0 54.0\n", - "5 ARG 0.0 403.0\n", - "6 ARM 0.0 68.0\n", - "7 ATA 0.0 9.0\n", - "8 ATG 0.0 10.0\n", - "9 AUS 0.0 11234.0" + " iso_3 habitat_name pixel_habitat pixel_protected total_pixels\n", + "0 GLOB Artificial 28259249.0 2776193.0 267352678.0\n", + "1 GLOB Desert 111106481.0 7778007.0 267352678.0\n", + "2 GLOB Forest 49125087.0 10353320.0 267352678.0\n", + "3 GLOB Grassland 35459546.0 5491398.0 267352678.0\n", + "5 GLOB Rocky/mountains 3571486.0 478102.0 267352678.0\n", + "... ... ... ... ... ...\n", + "1727 ZWE Grassland 3417.0 263.0 418294.0\n", + "1729 ZWE Rocky/mountains 0.0 0.0 418294.0\n", + "1730 ZWE Savanna 231134.0 97790.0 418294.0\n", + "1731 ZWE Shrubland 52656.0 4262.0 418294.0\n", + "1732 ZWE Wetlands/open water 3866.0 3238.0 418294.0\n", + "\n", + "[1526 rows x 5 columns]" ] }, - "execution_count": 218, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Remove all rows where \"habitat_name\" is \"Other\"\n", + "habitat_ter = habitat_ter[habitat_ter['habitat_name'] != 'Other'].copy()\n", + "\n", + "# calculate total_area by adding up \"total\" per iso_3\n", + "habitat_ter['total_pixels'] = habitat_ter.groupby('iso_3')['pixel_habitat'].transform('sum')\n", + "habitat_ter" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "# Assign territories to their soveraign countries\n", + "with open(scripts_dir.joinpath('data_commons/data/dependency_to_parent.json'), 'r') as json_file:\n", + " dependency_to_parent = json.load(json_file)\n", + "\n", + "mapping = {key: value[0] for key, value in dependency_to_parent.items()}\n", + "\n", + "habitat_ter['iso_3'] = habitat_ter['iso_3'].map(mapping).fillna(habitat_ter['iso_3'])" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
iso_3habitat_namepixel_habitatpixel_protectedtotal_pixels
0AFGArtificial56625.0938.0782480.0
1AFGDesert274553.03639.0782480.0
2AFGForest3928.0219.0782480.0
3AFGGrassland292319.09275.0782480.0
4AFGRocky/mountains137716.016183.0782480.0
..................
1505ZWEGrassland3417.0263.0418294.0
1506ZWERocky/mountains0.00.0418294.0
1507ZWESavanna231134.097790.0418294.0
1508ZWEShrubland52656.04262.0418294.0
1509ZWEWetlands/open water3866.03238.0418294.0
\n", + "

1510 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " iso_3 habitat_name pixel_habitat pixel_protected total_pixels\n", + "0 AFG Artificial 56625.0 938.0 782480.0\n", + "1 AFG Desert 274553.0 3639.0 782480.0\n", + "2 AFG Forest 3928.0 219.0 782480.0\n", + "3 AFG Grassland 292319.0 9275.0 782480.0\n", + "4 AFG Rocky/mountains 137716.0 16183.0 782480.0\n", + "... ... ... ... ... ...\n", + "1505 ZWE Grassland 3417.0 263.0 418294.0\n", + "1506 ZWE Rocky/mountains 0.0 0.0 418294.0\n", + "1507 ZWE Savanna 231134.0 97790.0 418294.0\n", + "1508 ZWE Shrubland 52656.0 4262.0 418294.0\n", + "1509 ZWE Wetlands/open water 3866.0 3238.0 418294.0\n", + "\n", + "[1510 rows x 5 columns]" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# groupby country and habitats and sum the pixeles\n", + "habitat_ter_grouped = habitat_ter.groupby(['iso_3', 'habitat_name']).sum().reset_index()\n", + "habitat_ter_grouped" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
iso_3habitat_namepixel_habitatpixel_protectedtotal_pixelsprotected_perchabitat_perc
0AFGArtificial56625.0938.0782480.01.6565127.236607
1AFGDesert274553.03639.0782480.01.32542735.087542
2AFGForest3928.0219.0782480.05.5753560.501994
3AFGGrassland292319.09275.0782480.03.17290437.358016
4AFGRocky/mountains137716.016183.0782480.011.75099517.599939
........................
1505ZWEGrassland3417.0263.0418294.07.6968100.816890
1506ZWERocky/mountains0.00.0418294.0NaN0.000000
1507ZWESavanna231134.097790.0418294.042.30879155.256351
1508ZWEShrubland52656.04262.0418294.08.09404412.588275
1509ZWEWetlands/open water3866.03238.0418294.083.7558200.924230
\n", + "

1510 rows × 7 columns

\n", + "
" + ], + "text/plain": [ + " iso_3 habitat_name pixel_habitat pixel_protected total_pixels \\\n", + "0 AFG Artificial 56625.0 938.0 782480.0 \n", + "1 AFG Desert 274553.0 3639.0 782480.0 \n", + "2 AFG Forest 3928.0 219.0 782480.0 \n", + "3 AFG Grassland 292319.0 9275.0 782480.0 \n", + "4 AFG Rocky/mountains 137716.0 16183.0 782480.0 \n", + "... ... ... ... ... ... \n", + "1505 ZWE Grassland 3417.0 263.0 418294.0 \n", + "1506 ZWE Rocky/mountains 0.0 0.0 418294.0 \n", + "1507 ZWE Savanna 231134.0 97790.0 418294.0 \n", + "1508 ZWE Shrubland 52656.0 4262.0 418294.0 \n", + "1509 ZWE Wetlands/open water 3866.0 3238.0 418294.0 \n", + "\n", + " protected_perc habitat_perc \n", + "0 1.656512 7.236607 \n", + "1 1.325427 35.087542 \n", + "2 5.575356 0.501994 \n", + "3 3.172904 37.358016 \n", + "4 11.750995 17.599939 \n", + "... ... ... \n", + "1505 7.696810 0.816890 \n", + "1506 NaN 0.000000 \n", + "1507 42.308791 55.256351 \n", + "1508 8.094044 12.588275 \n", + "1509 83.755820 0.924230 \n", + "\n", + "[1510 rows x 7 columns]" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Calculate the percentage of protected pixels and the percentage of extent of the habitat\n", + "habitat_ter_grouped['protected_perc'] = habitat_ter_grouped['pixel_protected']/habitat_ter_grouped['pixel_habitat']*100\n", + "habitat_ter_grouped['habitat_perc'] = habitat_ter_grouped['pixel_habitat']/habitat_ter_grouped['total_pixels']*100\n", + "habitat_ter_grouped" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
iso_3habitat_namepixel_habitatpixel_protectedtotal_pixelsprotected_perchabitat_perctotal_terrestrial_area
0AFGArtificial56625.0938.0782480.01.6565127.236607644050.28
1AFGDesert274553.03639.0782480.01.32542735.087542644050.28
2AFGForest3928.0219.0782480.05.5753560.501994644050.28
3AFGGrassland292319.09275.0782480.03.17290437.358016644050.28
4AFGRocky/mountains137716.016183.0782480.011.75099517.599939644050.28
...........................
1505ZWEGrassland3417.0263.0418294.07.6968100.816890391234.88
1506ZWERocky/mountains0.00.0418294.0NaN0.000000391234.88
1507ZWESavanna231134.097790.0418294.042.30879155.256351391234.88
1508ZWEShrubland52656.04262.0418294.08.09404412.588275391234.88
1509ZWEWetlands/open water3866.03238.0418294.083.7558200.924230391234.88
\n", + "

1510 rows × 8 columns

\n", + "
" + ], + "text/plain": [ + " iso_3 habitat_name pixel_habitat pixel_protected total_pixels \\\n", + "0 AFG Artificial 56625.0 938.0 782480.0 \n", + "1 AFG Desert 274553.0 3639.0 782480.0 \n", + "2 AFG Forest 3928.0 219.0 782480.0 \n", + "3 AFG Grassland 292319.0 9275.0 782480.0 \n", + "4 AFG Rocky/mountains 137716.0 16183.0 782480.0 \n", + "... ... ... ... ... ... \n", + "1505 ZWE Grassland 3417.0 263.0 418294.0 \n", + "1506 ZWE Rocky/mountains 0.0 0.0 418294.0 \n", + "1507 ZWE Savanna 231134.0 97790.0 418294.0 \n", + "1508 ZWE Shrubland 52656.0 4262.0 418294.0 \n", + "1509 ZWE Wetlands/open water 3866.0 3238.0 418294.0 \n", + "\n", + " protected_perc habitat_perc total_terrestrial_area \n", + "0 1.656512 7.236607 644050.28 \n", + "1 1.325427 35.087542 644050.28 \n", + "2 5.575356 0.501994 644050.28 \n", + "3 3.172904 37.358016 644050.28 \n", + "4 11.750995 17.599939 644050.28 \n", + "... ... ... ... \n", + "1505 7.696810 0.816890 391234.88 \n", + "1506 NaN 0.000000 391234.88 \n", + "1507 42.308791 55.256351 391234.88 \n", + "1508 8.094044 12.588275 391234.88 \n", + "1509 83.755820 0.924230 391234.88 \n", + "\n", + "[1510 rows x 8 columns]" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Add country's terrestrial area\n", + "add_total_terrestrial_area(habitat_ter_grouped)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [], + "source": [ + "# Estimate the total area and the protected area based on pixels proportions and the total terrestrial area\n", + "habitat_ter_grouped['total_habitat_area'] = habitat_ter_grouped['total_terrestrial_area']*habitat_ter_grouped['habitat_perc']/100\n", + "habitat_ter_grouped['protected_habitat_area'] = habitat_ter_grouped['total_terrestrial_area']*habitat_ter_grouped['protected_perc']/100" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
iso_3habitat_namepixel_habitatpixel_protectedtotal_pixelsprotected_perchabitat_perctotal_terrestrial_areatotal_habitat_areaprotected_habitat_area
71AUTArtificial56023.017428.0126396.031.10865244.32339683709.4837102.88456926040.890660
72AUTDesert799.0703.0126396.087.9849810.63214083709.48529.16132373651.770263
73AUTForest39594.017631.0126396.044.52947431.32535883709.4826222.29462337275.391268
74AUTGrassland16498.09748.0126396.059.08595013.05262883709.4810926.28723349460.541341
75AUTRocky/mountains1534.01090.0126396.071.0560631.21364683709.481015.93675759480.660495
76AUTShrubland10911.04511.0126396.041.3435988.63239383709.487226.13165234608.511070
77AUTWetlands/open water1037.0870.0126396.083.8958530.82043783709.48686.78384470228.782642
\n", + "
" + ], + "text/plain": [ + " iso_3 habitat_name pixel_habitat pixel_protected total_pixels \\\n", + "71 AUT Artificial 56023.0 17428.0 126396.0 \n", + "72 AUT Desert 799.0 703.0 126396.0 \n", + "73 AUT Forest 39594.0 17631.0 126396.0 \n", + "74 AUT Grassland 16498.0 9748.0 126396.0 \n", + "75 AUT Rocky/mountains 1534.0 1090.0 126396.0 \n", + "76 AUT Shrubland 10911.0 4511.0 126396.0 \n", + "77 AUT Wetlands/open water 1037.0 870.0 126396.0 \n", + "\n", + " protected_perc habitat_perc total_terrestrial_area total_habitat_area \\\n", + "71 31.108652 44.323396 83709.48 37102.884569 \n", + "72 87.984981 0.632140 83709.48 529.161323 \n", + "73 44.529474 31.325358 83709.48 26222.294623 \n", + "74 59.085950 13.052628 83709.48 10926.287233 \n", + "75 71.056063 1.213646 83709.48 1015.936757 \n", + "76 41.343598 8.632393 83709.48 7226.131652 \n", + "77 83.895853 0.820437 83709.48 686.783844 \n", + "\n", + " protected_habitat_area \n", + "71 26040.890660 \n", + "72 73651.770263 \n", + "73 37275.391268 \n", + "74 49460.541341 \n", + "75 59480.660495 \n", + "76 34608.511070 \n", + "77 70228.782642 " + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "habitat_ter_grouped[habitat_ter_grouped['iso_3'] == 'AUT']" + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "metadata": {}, + "outputs": [], + "source": [ + "# Add regions\n", + "habitat_ter_grouped = add_region_iso2(habitat_ter_grouped, 'iso_3')\n", + "\n", + "regions = habitat_ter_grouped.groupby(['region', 'habitat_name']).agg({\n", + " 'total_area': 'sum',\n", + " 'protected_area': 'sum'\n", + "}).reset_index()\n", + "\n", + "regions.rename(columns={'region': 'location_id'}, inplace=True)\n", + "habitat_ter_grouped.drop(columns=['pixel_habitat', 'pixel_protected', 'total_pixel_area', 'protect_perc', 'extent_perc', 'total_terrestrial_area', 'region'], inplace=True)\n", + "habitat_ter_grouped = habitat_ter_grouped.rename(columns = {'iso_3':'location_id'})" + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "metadata": {}, + "outputs": [], + "source": [ + "# Concatenate regions and habitat_ter_grouped dataframes\n", + "habitats_terrestrial = pd.concat([regions, habitat_ter_grouped], ignore_index=True)\n", + "\n", + "# fill protected_area and total_area with 0 if they are NaN\n", + "habitats_terrestrial['protected_area'] = habitats_terrestrial['protected_area'].fillna(0)\n", + "habitats_terrestrial['total_area'] = habitats_terrestrial['total_area'].fillna(0)" + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "metadata": {}, + "outputs": [], + "source": [ + "# Add year and environment columns\n", + "habitats_terrestrial['year'] = 2024\n", + "habitats_terrestrial['environment'] = 2" + ] + }, + { + "cell_type": "code", + "execution_count": 119, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
location_idhabitattotal_areaprotected_areayearenvironment
0AFartificial2.924001e+062.331527e+0520242
1AFdesert9.872960e+066.726984e+0520242
2AFforest4.458009e+069.713040e+0520242
3AFgrassland2.035644e+062.283491e+0520242
4AFrocky/mountains2.384691e+054.774587e+0420242
5AFsavanna8.387535e+061.910888e+0620242
6AFshrubland1.766346e+062.037187e+0520242
7AFwetlands/open water3.101304e+055.931974e+0420242
8ASartificial8.041755e+062.699350e+0520242
9ASdesert3.538487e+063.150823e+0520242
\n", + "
" + ], + "text/plain": [ + " location_id habitat total_area protected_area year \\\n", + "0 AF artificial 2.924001e+06 2.331527e+05 2024 \n", + "1 AF desert 9.872960e+06 6.726984e+05 2024 \n", + "2 AF forest 4.458009e+06 9.713040e+05 2024 \n", + "3 AF grassland 2.035644e+06 2.283491e+05 2024 \n", + "4 AF rocky/mountains 2.384691e+05 4.774587e+04 2024 \n", + "5 AF savanna 8.387535e+06 1.910888e+06 2024 \n", + "6 AF shrubland 1.766346e+06 2.037187e+05 2024 \n", + "7 AF wetlands/open water 3.101304e+05 5.931974e+04 2024 \n", + "8 AS artificial 8.041755e+06 2.699350e+05 2024 \n", + "9 AS desert 3.538487e+06 3.150823e+05 2024 \n", + "\n", + " environment \n", + "0 2 \n", + "1 2 \n", + "2 2 \n", + "3 2 \n", + "4 2 \n", + "5 2 \n", + "6 2 \n", + "7 2 \n", + "8 2 \n", + "9 2 " + ] + }, + "execution_count": 119, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Concatenate terrestrial and marine habitats\n", + "habitats_all = pd.concat([habitats_terrestrial, habitat_mar], ignore_index=True).rename(columns={'habitat_name': 'habitat'})\n", + "habitats_all['habitat'] = habitats_all['habitat'].str.lower()\n", + "habitats_all.head(10)" + ] + }, + { + "cell_type": "code", + "execution_count": 120, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
location_idhabitattotal_areaprotected_areayearenvironment
0AF432.924001e+062.331527e+0520242
1AF449.872960e+066.726984e+0520242
2AF454.458009e+069.713040e+0520242
3AF462.035644e+062.283491e+0520242
4AF472.384691e+054.774587e+0420242
5AF488.387535e+061.910888e+0620242
6AF491.766346e+062.037187e+0520242
7AF503.101304e+055.931974e+0420242
8AS438.041755e+062.699350e+0520242
9AS443.538487e+063.150823e+0520242
\n", + "
" + ], + "text/plain": [ + " location_id habitat total_area protected_area year environment\n", + "0 AF 43 2.924001e+06 2.331527e+05 2024 2\n", + "1 AF 44 9.872960e+06 6.726984e+05 2024 2\n", + "2 AF 45 4.458009e+06 9.713040e+05 2024 2\n", + "3 AF 46 2.035644e+06 2.283491e+05 2024 2\n", + "4 AF 47 2.384691e+05 4.774587e+04 2024 2\n", + "5 AF 48 8.387535e+06 1.910888e+06 2024 2\n", + "6 AF 49 1.766346e+06 2.037187e+05 2024 2\n", + "7 AF 50 3.101304e+05 5.931974e+04 2024 2\n", + "8 AS 43 8.041755e+06 2.699350e+05 2024 2\n", + "9 AS 44 3.538487e+06 3.150823e+05 2024 2" + ] + }, + "execution_count": 120, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "result_oecms = (\n", - " sjoin_gdf.groupby([\"GID_0\", \"PA_DEF\"])\n", - " .agg({\"PA_DEF\": \"count\"})\n", - " .rename(columns={\"PA_DEF\": \"count\"})\n", - " .reset_index()\n", - " .pivot(index=\"GID_0\", columns=\"PA_DEF\", values=\"count\")\n", - " .fillna(0)\n", - " .reset_index()\n", - " .rename(columns={\"0\": \"oecm\", \"1\": \"pa\"})\n", - ")\n", - "# ).reset_index().pivot(index=\"iso_3\", columns=\"PA_DEF\", values=\"count\").reset_index(names=[\"PA_DEF\"], level=0, drop=True)\n", - "\n", - "result_oecms.head(10)" + "# change habitat to have the id of the habitat\n", + "habitat_dict = {\n", + " 'mangroves': 5,\n", + " 'seamounts': 6,\n", + " 'artificial': 43,\n", + " 'forest': 45,\n", + " 'grassland': 46,\n", + " 'wetlands/open water': 50,\n", + " 'seagrasses': 2,\n", + " 'cold-water corals': 4,\n", + " 'desert': 44,\n", + " 'rocky/mountains': 47,\n", + " 'savanna': 48,\n", + " 'shrubland': 49,\n", + " 'saltmarshes': 1,\n", + " 'warm-water corals': 3\n", + "}\n", + "\n", + "habitats_all['habitat'] = habitats_all['habitat'].replace(habitat_dict)\n", + "habitats_all.head(10)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 121, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "output2(habitats_all, 'location_id', {}, {}, ['location_id']).to_csv(output_file, index=True)" + ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 122, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idhabitattotal_areaprotected_areayearenvironmentlocation
01432.924001e+06233152.675055202423.0
12449.872960e+06672698.366583202423.0
23454.458009e+06971303.987441202423.0
34462.035644e+06228349.125359202423.0
45472.384691e+0547745.870360202423.0
........................
2168216957.429267e+0421277.220000202014.0
2169217051.246190e+03732.143750202016.0
2170217152.415419e+032097.740000202017.0
2171217253.989344e+0427151.740000202018.0
2172217351.736209e+0227.830000202019.0
\n", + "

2173 rows × 7 columns

\n", + "
" + ], + "text/plain": [ + " id habitat total_area protected_area year environment location\n", + "0 1 43 2.924001e+06 233152.675055 2024 2 3.0\n", + "1 2 44 9.872960e+06 672698.366583 2024 2 3.0\n", + "2 3 45 4.458009e+06 971303.987441 2024 2 3.0\n", + "3 4 46 2.035644e+06 228349.125359 2024 2 3.0\n", + "4 5 47 2.384691e+05 47745.870360 2024 2 3.0\n", + "... ... ... ... ... ... ... ...\n", + "2168 2169 5 7.429267e+04 21277.220000 2020 1 4.0\n", + "2169 2170 5 1.246190e+03 732.143750 2020 1 6.0\n", + "2170 2171 5 2.415419e+03 2097.740000 2020 1 7.0\n", + "2171 2172 5 3.989344e+04 27151.740000 2020 1 8.0\n", + "2172 2173 5 1.736209e+02 27.830000 2020 1 9.0\n", + "\n", + "[2173 rows x 7 columns]" + ] + }, + "execution_count": 122, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# async def process_mpa_data(\n", - "# gdf: gpd.GeoDataFrame, loop: list[int], by: list[str], aggfunc: dict\n", - "# ) -> pd.DataFrame:\n", - "# \"\"\"process protected planet data. relevant for acc coverage extent by year indicator.\"\"\"\n", - "# # we split the data by =< year so we can acumulate the coverage\n", - "# base = split_by_year(gdf)\n", - "\n", - "# result_to_iter = pd.concat(base, ignore_index=True).copy()\n", - "\n", - "# with tqdm(total=len(loop)) as pbar: # we create a progress bar\n", - "# new_df = await asyncio.gather(\n", - "# *(spatial_dissolve_chunk(year, result_to_iter, pbar, by, aggfunc) for year in loop)\n", - "# )\n", - "# return pd.concat(\n", - "# [base[0].pipe(calculate_area, \"area\", None).drop(columns=[\"geometry\"]), *new_df],\n", - "# ignore_index=True,\n", - "# )" + "a = pd.read_csv(output_file)\n", + "a" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 123, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "DEBUG:google.auth.transport.requests:Making request: POST https://oauth2.googleapis.com/token\n" + ] + } + ], "source": [ - "# final_data = await process_mpa_data(\n", - "# eez_mpas_data_join.pipe(add_location_iso).pipe(assign_iso3),\n", - "# range(2011, time.localtime().tm_year + 1),\n", - "# [\"PA_DEF\", \"iso_3\"],\n", - "# {\"protectedAreasCount\": \"sum\"},\n", - "# )\n", - "# coverage = (\n", - "# final_data.pipe(calculate_global_area, [\"year\", \"PA_DEF\"], {\"area\": \"sum\"}, \"iso_3\")\n", - "# .pipe(separate_parent_iso, \"iso_3\")\n", - "# .pipe(add_region_iso, \"iso_3\")\n", - "# .replace(\n", - "# {\n", - "# \"iso_3\": {\n", - "# \"ATA\": \"ABNJ\",\n", - "# \"COK\": \"NZL\",\n", - "# \"IOT\": \"GBR\",\n", - "# \"NIU\": \"NZL\",\n", - "# \"SHN\": \"GBR\",\n", - "# \"SJM\": \"NOR\",\n", - "# \"UMI\": \"USA\",\n", - "# \"NCL\": \"FRA\",\n", - "# \"GIB\": \"GBR\",\n", - "# }\n", - "# }\n", - "# )\n", - "# .pipe(calculate_stats_cov, [\"year\", \"PA_DEF\"], \"iso_3\").astype({\"PA_DEF\": int})\n", - "# .pipe(add_pa_oecm_percentages)\n", - "# .pipe(add_total_marine_area)\n", - "# .pipe(coverage_stats2)\n", - "# .pipe(calculate_coverage_percentage_mpa)\n", - "# .pipe(calculate_global_contribution)\n", - "# .pipe(add_is_last_year)\n", - "# .pipe(add_environment)\n", - "# )\n", - "\n", + "# Upload csv to bucket\n", + "remote_path = 'vizzuality_processed_data/strapi_tables/habitats.csv'\n", "\n", - "# NewProtectedAreaExtentSchema(\n", - "# coverage.pipe(\n", - "# output,\n", - "# \"iso_3\",\n", - "# {},\n", - "# {},\n", - "# [\"area\", \"iso_3\", 'total_marine_area'],\n", - "# )\n", - "# ).to_csv(\n", - "# output_file,\n", - "# index=True,\n", - "# )\n", - "# coverage.head(2)" + "writeReadGCP(\n", + " credentials=mysettings.GCS_KEYFILE_JSON,\n", + " bucket_name=mysettings.GCS_BUCKET,\n", + " blob_name=remote_path,\n", + " file=output_file,\n", + " operation=\"w\",\n", + ")" ] } ], diff --git a/data/notebooks/pipes_mock/tiles.ipynb b/data/notebooks/pipes_mock/tiles.ipynb index 6ff2bd33..f687dfe2 100644 --- a/data/notebooks/pipes_mock/tiles.ipynb +++ b/data/notebooks/pipes_mock/tiles.ipynb @@ -12,13 +12,21 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ + "from typing import Union\n", + "from dataclasses import dataclass\n", + "import subprocess\n", + "import logging\n", "from pathlib import Path\n", "import sys\n", "import geopandas as gpd\n", + "import rasterio as rio\n", + "from rasterio.enums import ColorInterp\n", + "from rasterio.plot import show\n", + "import numpy as np\n", "import pandas as pd\n", "import json\n", "import dotenv\n", @@ -36,14 +44,12 @@ "from helpers.file_handler import FileConventionHandler\n", "from helpers.utils import download_and_unzip_if_needed, writeReadGCP\n", "\n", - "from data_commons.loader import load_regions\n", - "\n", "from pipelines.processors import clean_geometries" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -52,6 +58,60 @@ "current_step = \"tiles\"" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "regions_translation = {\n", + " 'Asia & Pacific': {\n", + " 'Spanish': 'Asia y Pacífico',\n", + " 'French': 'Asie et Pacifique',\n", + " 'LocationCode': 4\n", + " },\n", + " 'Africa': {\n", + " 'Spanish': 'África',\n", + " 'French': 'Afrique',\n", + " 'LocationCode': 3\n", + " },\n", + " 'Europe': {\n", + " 'Spanish': 'Europa',\n", + " 'French': 'Europe',\n", + " 'LocationCode': 6\n", + " },\n", + " 'Latin America & Caribbean': {\n", + " 'Spanish': 'América Latina y el Caribe',\n", + " 'French': 'Amérique latine et Caraïbes',\n", + " 'LocationCode': 8\n", + " },\n", + " 'Polar': {\n", + " 'Spanish': 'Polar',\n", + " 'French': 'Polaire',\n", + " 'LocationCode': 9\n", + " },\n", + " 'North America': {\n", + " 'Spanish': 'América del Norte',\n", + " 'French': 'Amérique du Nord',\n", + " 'LocationCode': 7\n", + " },\n", + " 'West Asia': {\n", + " 'Spanish': 'Asia Occidental',\n", + " 'French': 'Asie occidentale',\n", + " 'LocationCode': 9\n", + " },\n", + " 'Antartica': {\n", + " 'Spanish': 'Antártida',\n", + " 'French': 'Antarctique',\n", + " 'LocationCode': 5\n", + " }\n", + "}\n", + "\n", + "# Create a DataFrame from the translations dictionary\n", + "translations_df = pd.DataFrame.from_dict(regions_translation, orient='index').reset_index()\n", + "translations_df.columns = ['name', 'name_es', 'name_fr', 'location']" + ] + }, { "cell_type": "markdown", "metadata": { @@ -63,15 +123,15 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 40, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "/home/mambauser/data/eez/processed/eez_preprocess.zip\n", - "/home/mambauser/data/eez/processed/preprocess\n" + "/home/sofia/dev/skytruth-30x30/data/data/eez/processed/eez_preprocess.zip\n", + "/home/sofia/dev/skytruth-30x30/data/data/eez/processed/preprocess\n" ] }, { @@ -79,16 +139,16 @@ "output_type": "stream", "text": [ "Allocating 8 GB of heap memory\n", - "[o] Wrote /home/mambauser/data/eez/processed/tiles/eez_tiles.json\n" + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/eez/processed/tiles/eez_tiles.json\n" ] }, { "data": { "text/plain": [ - "PosixPath('/home/mambauser/data/eez/processed/tiles/eez_v11.mbtiles')" + "PosixPath('/home/sofia/dev/skytruth-30x30/data/data/eez/processed/tiles/eez_v11.mbtiles')" ] }, - "execution_count": 5, + "execution_count": 40, "metadata": {}, "output_type": "execute_result" } @@ -136,19 +196,33 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 62, "metadata": {}, "outputs": [ { - "ename": "NameError", - "evalue": "name 'eez_dir' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[21], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m collection_name \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mregions\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;66;03m# load the EEZ file & the regions file\u001b[39;00m\n\u001b[0;32m----> 4\u001b[0m eez_data \u001b[38;5;241m=\u001b[39m gpd\u001b[38;5;241m.\u001b[39mread_file(\u001b[43meez_dir\u001b[49m\u001b[38;5;241m.\u001b[39mget_step_fmt_file_path(prev_step, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mshp\u001b[39m\u001b[38;5;124m\"\u001b[39m)\u001b[38;5;241m.\u001b[39mas_posix())\n\u001b[1;32m 5\u001b[0m regions_df \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mDataFrame(\n\u001b[1;32m 6\u001b[0m [\n\u001b[1;32m 7\u001b[0m {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mregion_id\u001b[39m\u001b[38;5;124m\"\u001b[39m: data[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mregion_iso\u001b[39m\u001b[38;5;124m\"\u001b[39m], \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlocation_id\u001b[39m\u001b[38;5;124m\"\u001b[39m: iso}\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 10\u001b[0m ]\n\u001b[1;32m 11\u001b[0m )\n\u001b[1;32m 13\u001b[0m \u001b[38;5;66;03m# merge the two files\u001b[39;00m\n", - "\u001b[0;31mNameError\u001b[0m: name 'eez_dir' is not defined" + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_3609107/3960908248.py:34: UserWarning: Column names longer than 10 characters will be truncated when saved to ESRI Shapefile.\n", + " ).to_file(\n", + "/home/sofia/miniforge3/envs/skytruth/lib/python3.12/site-packages/pyogrio/raw.py:709: RuntimeWarning: Normalized/laundered field name: 'location_id' to 'location_i'\n", + " ogr_write(\n", + "/home/sofia/miniforge3/envs/skytruth/lib/python3.12/site-packages/pyogrio/raw.py:709: RuntimeWarning: Value 212881389 of field AREA_KM2 of feature 0 not successfully written. Possibly due to too larger number with respect to field width\n", + " ogr_write(\n", + "Allocating 16 GB of heap memory\n", + "[dissolve2] Dissolved 282 features into 8 features\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/eez/processed/tiles/regions.json\n" ] + }, + { + "data": { + "text/plain": [ + "CompletedProcess(args='mapshaper-xl 16gb -i /home/sofia/dev/skytruth-30x30/data/data/eez/processed/preprocess/eez_preprocess_regions.shp -dissolve2 fields=region_id -o /home/sofia/dev/skytruth-30x30/data/data/eez/processed/tiles/regions.json force format=geojson', returncode=0)" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -156,10 +230,19 @@ "\n", "# load the EEZ file & the regions file\n", "eez_data = gpd.read_file(eez_dir.get_step_fmt_file_path(prev_step, \"shp\").as_posix())\n", + "\n", + "\n", + "with open(scripts_dir.joinpath('data_commons/data/regions_data2.json'), 'r') as f:\n", + " regions = json.load(f)\n", + "\n", "regions_df = pd.DataFrame(\n", " [\n", - " {\"region_id\": data[\"region_iso\"], \"location_id\": iso}\n", - " for data in load_regions().get(\"data\", [])\n", + " {\n", + " \"region_id\": data[\"region_iso\"],\n", + " \"name\": data[\"region_name\"],\n", + " \"location_id\": iso\n", + " }\n", + " for data in regions.get(\"data\", [])\n", " for iso in data[\"country_iso_3s\"]\n", " ]\n", ")\n", @@ -196,28 +279,344 @@ " force=True,\n", " format=\"geojson\",\n", ").execute()\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
geometryregion_id
0MULTIPOLYGON (((-155.43933 -11.35762, -155.440...
1MULTIPOLYGON (((8.26354 -17.25, 8.25715 -17.25...AF
2MULTIPOLYGON (((19.38155 41.99554, 19.38144 41...EU
3MULTIPOLYGON (((56.37383 24.98043, 56.38053 24...WA
4MULTIPOLYGON (((-56.77653 -36.29604, -56.7764 ...SA
5POLYGON ((-180 -70.32232, -180 -84.36012, -179...AT
6MULTIPOLYGON (((102.56807 -8.87455, 102.56638 ...AS
7MULTIPOLYGON (((-141 73.39761, -141.01268 73.3...NA
\n", + "
" + ], + "text/plain": [ + " geometry region_id\n", + "0 MULTIPOLYGON (((-155.43933 -11.35762, -155.440... \n", + "1 MULTIPOLYGON (((8.26354 -17.25, 8.25715 -17.25... AF\n", + "2 MULTIPOLYGON (((19.38155 41.99554, 19.38144 41... EU\n", + "3 MULTIPOLYGON (((56.37383 24.98043, 56.38053 24... WA\n", + "4 MULTIPOLYGON (((-56.77653 -36.29604, -56.7764 ... SA\n", + "5 POLYGON ((-180 -70.32232, -180 -84.36012, -179... AT\n", + "6 MULTIPOLYGON (((102.56807 -8.87455, 102.56638 ... AS\n", + "7 MULTIPOLYGON (((-141 73.39761, -141.01268 73.3... NA" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "with open(eez_dir.get_processed_step_path(current_step).joinpath(f\"{collection_name}.json\"), 'r') as f:\n", + " data = json.load(f)\n", + "\n", + "gdf = gpd.GeoDataFrame.from_features(data['features'])\n", + "gdf" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
geometryregion_idlocationnamename_esname_fr
1MULTIPOLYGON (((8.26354 -17.25, 8.25715 -17.25...AF3.0AfricaÁfricaAfrique
2MULTIPOLYGON (((19.38155 41.99554, 19.38144 41...EU6.0EuropeEuropaEurope
3MULTIPOLYGON (((56.37383 24.98043, 56.38053 24...WA9.0PolarPolarPolaire
4MULTIPOLYGON (((56.37383 24.98043, 56.38053 24...WA9.0West AsiaAsia OccidentalAsie occidentale
5MULTIPOLYGON (((-56.77653 -36.29604, -56.7764 ...SA8.0Latin America & CaribbeanAmérica Latina y el CaribeAmérique latine et Caraïbes
6POLYGON ((-180 -70.32232, -180 -84.36012, -179...AT5.0AntarticaAntártidaAntarctique
7MULTIPOLYGON (((102.56807 -8.87455, 102.56638 ...AS4.0Asia & PacificAsia y PacíficoAsie et Pacifique
8MULTIPOLYGON (((-141 73.39761, -141.01268 73.3...NA7.0North AmericaAmérica del NorteAmérique du Nord
\n", + "
" + ], + "text/plain": [ + " geometry region_id location \\\n", + "1 MULTIPOLYGON (((8.26354 -17.25, 8.25715 -17.25... AF 3.0 \n", + "2 MULTIPOLYGON (((19.38155 41.99554, 19.38144 41... EU 6.0 \n", + "3 MULTIPOLYGON (((56.37383 24.98043, 56.38053 24... WA 9.0 \n", + "4 MULTIPOLYGON (((56.37383 24.98043, 56.38053 24... WA 9.0 \n", + "5 MULTIPOLYGON (((-56.77653 -36.29604, -56.7764 ... SA 8.0 \n", + "6 POLYGON ((-180 -70.32232, -180 -84.36012, -179... AT 5.0 \n", + "7 MULTIPOLYGON (((102.56807 -8.87455, 102.56638 ... AS 4.0 \n", + "8 MULTIPOLYGON (((-141 73.39761, -141.01268 73.3... NA 7.0 \n", + "\n", + " name name_es \\\n", + "1 Africa África \n", + "2 Europe Europa \n", + "3 Polar Polar \n", + "4 West Asia Asia Occidental \n", + "5 Latin America & Caribbean América Latina y el Caribe \n", + "6 Antartica Antártida \n", + "7 Asia & Pacific Asia y Pacífico \n", + "8 North America América del Norte \n", + "\n", + " name_fr \n", + "1 Afrique \n", + "2 Europe \n", + "3 Polaire \n", + "4 Asie occidentale \n", + "5 Amérique latine et Caraïbes \n", + "6 Antarctique \n", + "7 Asie et Pacifique \n", + "8 Amérique du Nord " + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Load the locations code CSV\n", + "locations_code = pd.read_csv(\n", + " scripts_dir.joinpath(\"data_commons/data/locations_code_all.csv\"),\n", + " na_values=[\"\", \"NULL\", \"N/A\", \"NaN\"], # Exclude \"NA\" from being treated as NaN\n", + " keep_default_na=False # Prevent pandas from treating \"NA\" as NaN\n", + ")\n", + "\n", + "# Merge the regions data with the locations code\n", + "regions_df = gdf.merge(locations_code, how=\"left\", left_on=\"region_id\", right_on=\"code\").drop(columns=[\"code\"])\n", "\n", + "# Merge the regions data with the translations\n", + "regions_df = regions_df.merge(translations_df, how=\"left\", on='location')\n", + "regions_df = regions_df.dropna(subset=['location'])\n", + "regions_df" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/sofia/miniforge3/envs/skytruth/lib/python3.12/site-packages/pyogrio/raw.py:698: UserWarning: 'crs' was not provided. The output dataset will not have projection information defined and may not be usable in other systems.\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "# Save a geojson with extension json\n", + "regions_df.to_file(eez_dir.get_processed_step_path(current_step).joinpath(f\"{collection_name}.json\").as_posix(), driver=\"GeoJSON\")" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "For layer 0, using name \"regions\"\n", + "/home/sofia/dev/skytruth-30x30/data/data/eez/processed/tiles/regions.json:2: Found ] at top level: \n", + "/home/sofia/dev/skytruth-30x30/data/data/eez/processed/tiles/regions.json:5: Reached EOF without all containers being closed: in JSON object {\"type\":\"FeatureCollection\",\"name\":\"regions\",\"features\":[]}\n", + "8 features, 57364566 bytes of geometry, 608 bytes of string pool\n", + "Choosing a maxzoom of -z0 for features typically 39965448 feet (12181468 meters) apart, and at least 22706313 feet (6920884 meters) apart\n", + "Choosing a maxzoom of -z9 for resolution of about 854 feet (260 meters) within features\n", + " 99.9% 9/257/169 \n" + ] + }, + { + "data": { + "text/plain": [ + "PosixPath('/home/sofia/dev/skytruth-30x30/data/data/eez/processed/tiles/regions.mbtiles')" + ] + }, + "execution_count": 68, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ "# generate the mbtiles\n", "mbtileGeneration(eez_dir.get_processed_step_path(current_step).joinpath(f\"{collection_name}.json\"))" ] }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 69, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "upload: ../../data/eez/processed/tiles/regions.mbtiles to s3://tilestream-tilesets-production/97/_pending/ojc7oxn5cpu10yo0o9tsl1xlc/skytruth\n" + "upload: data/eez/processed/tiles/regions.mbtiles to s3://tilestream-tilesets-production/67/_pending/nqa72cxynns1wzq1v6pp332mc/skytruth\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "Linking tileset to Mapbox: 100%|██████████| 100/100 [03:00<00:00, 1.81s/it]\n" + "Linking tileset to Mapbox: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [02:14<00:00, 1.35s/it]\n" ] }, { @@ -226,7 +625,7 @@ "True" ] }, - "execution_count": 34, + "execution_count": 69, "metadata": {}, "output_type": "execute_result" } @@ -314,73 +713,576 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "upload: data/gadm/processed/tiles/gadm_simplified.mbtiles to s3://tilestream-tilesets-production/96/_pending/y008s4k96pt1elm0ek7for1mc/skytruth\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Linking tileset to Mapbox: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [02:41<00:00, 1.61s/it]\n" + ] + }, + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "uploadToMapbox(\n", + " gadm_dir.get_processed_step_path(current_step).joinpath(f\"{collection_name}.mbtiles\"),\n", + " collection_name,\n", + " mysettings.MAPBOX_USER,\n", + " mysettings.MAPBOX_TOKEN,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Gadm regions" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "pipe = \"gadm\"\n", + "gadm_dir = FileConventionHandler(pipe)\n", + "collection_name = \"gadm_regions\"\n", + "\n", + "# load the EEZ file & the regions file\n", + "gadm_data = gpd.read_file(gadm_dir.get_step_fmt_file_path(prev_step, \"shp\").as_posix()).drop(columns=['name_es', 'name_fr'])\n", + "\n", + "with open(scripts_dir.joinpath('data_commons/data/regions_data2.json'), 'r') as f:\n", + " regions = json.load(f)\n", + "\n", + "\n", + "regions_df = pd.DataFrame(\n", + " [\n", + " {\n", + " \"region_id\": data[\"region_iso\"],\n", + " \"name\": data[\"region_name\"],\n", + " \"location_id\": iso\n", + " }\n", + " for data in regions.get(\"data\", [])\n", + " for iso in data[\"country_iso_3s\"]\n", + " ]\n", + ")\n", + "\n", + "# Load the locations code CSV\n", + "locations_code = pd.read_csv(\n", + " scripts_dir.joinpath(\"data_commons/data/locations_code_all.csv\"),\n", + " na_values=[\"\", \"NULL\", \"N/A\", \"NaN\"], # Exclude \"NA\" from being treated as NaN\n", + " keep_default_na=False # Prevent pandas from treating \"NA\" as NaN\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_3609107/976430064.py:13: UserWarning: Column names longer than 10 characters will be truncated when saved to ESRI Shapefile.\n", + " ).to_file(\n", + "/home/sofia/miniforge3/envs/skytruth/lib/python3.12/site-packages/pyogrio/raw.py:709: RuntimeWarning: Normalized/laundered field name: 'location_id' to 'location_i'\n", + " ogr_write(\n" + ] + } + ], + "source": [ + "# merge the two files\n", + "gpd.GeoDataFrame(\n", + " pd.merge(\n", + " gadm_data,\n", + " regions_df,\n", + " how=\"left\",\n", + " left_on=\"GID_0\",\n", + " right_on=\"location_id\",\n", + " sort=True,\n", + " copy=True,\n", + " ),\n", + " crs=gadm_data.crs,\n", + ").to_file(\n", + " filename=gadm_dir.get_processed_step_path(prev_step)\n", + " .joinpath(f\"{pipe}_{prev_step}_{collection_name}.shp\")\n", + " .as_posix(),\n", + " driver=\"ESRI Shapefile\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Allocating 16 GB of heap memory\n", + "[dissolve2] Dissolved 204 features into 8 features\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/gadm/processed/tiles/gadm_regions.json\n" + ] + }, + { + "data": { + "text/plain": [ + "CompletedProcess(args='mapshaper-xl 16gb -i /home/sofia/dev/skytruth-30x30/data/data/gadm/processed/preprocess/gadm_preprocess_gadm_regions.shp -dissolve2 fields=region_id -o /home/sofia/dev/skytruth-30x30/data/data/gadm/processed/tiles/gadm_regions.json force format=geojson', returncode=0)" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# dissolve by region_id keeping the location, region_id, name, name_es, name_fr\n", + "Mapshaper(16).input(\n", + " [\n", + " gadm_dir.get_processed_step_path(prev_step)\n", + " .joinpath(f\"{pipe}_{prev_step}_{collection_name}.shp\")\n", + " .as_posix()\n", + " ]\n", + ").dissolve2(fields=\"region_id\").output(\n", + " gadm_dir.get_processed_step_path(current_step).joinpath(f\"{collection_name}.json\").as_posix(),\n", + " force=True,\n", + " format=\"geojson\",\n", + ").execute()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
geometryregion_id
0MULTIPOLYGON (((61.283 35.609, 61.277 35.613, ...AS
1MULTIPOLYGON (((11.786 -16.78, 11.789 -16.775,...AF
2MULTIPOLYGON (((19.278 40.505, 19.276 40.51, 1...EU
3MULTIPOLYGON (((52.444 24.107, 52.444 24.108, ...WA
4MULTIPOLYGON (((-68.688 -52.61, -68.688 -52.61...SA
5MULTIPOLYGON (((-169.027 -83.619, -169.029 -83...AT
6MULTIPOLYGON (((-135.117 68.473, -135.119 68.4...NA
7MULTIPOLYGON (((-109.225 10.32, -109.227 10.32...
\n", + "
" + ], + "text/plain": [ + " geometry region_id\n", + "0 MULTIPOLYGON (((61.283 35.609, 61.277 35.613, ... AS\n", + "1 MULTIPOLYGON (((11.786 -16.78, 11.789 -16.775,... AF\n", + "2 MULTIPOLYGON (((19.278 40.505, 19.276 40.51, 1... EU\n", + "3 MULTIPOLYGON (((52.444 24.107, 52.444 24.108, ... WA\n", + "4 MULTIPOLYGON (((-68.688 -52.61, -68.688 -52.61... SA\n", + "5 MULTIPOLYGON (((-169.027 -83.619, -169.029 -83... AT\n", + "6 MULTIPOLYGON (((-135.117 68.473, -135.119 68.4... NA\n", + "7 MULTIPOLYGON (((-109.225 10.32, -109.227 10.32... " + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import json\n", + "\n", + "with open(gadm_dir.get_processed_step_path(current_step).joinpath(f\"{collection_name}.json\"), 'r') as f:\n", + " data = json.load(f)\n", + "\n", + "gdf = gpd.GeoDataFrame.from_features(data['features'])\n", + "gdf\n" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
geometryregion_idlocation
0MULTIPOLYGON (((61.283 35.609, 61.277 35.613, ...AS4.0
1MULTIPOLYGON (((11.786 -16.78, 11.789 -16.775,...AF3.0
2MULTIPOLYGON (((19.278 40.505, 19.276 40.51, 1...EU6.0
3MULTIPOLYGON (((52.444 24.107, 52.444 24.108, ...WA9.0
4MULTIPOLYGON (((-68.688 -52.61, -68.688 -52.61...SA8.0
5MULTIPOLYGON (((-169.027 -83.619, -169.029 -83...AT5.0
6MULTIPOLYGON (((-135.117 68.473, -135.119 68.4...NA7.0
7MULTIPOLYGON (((-109.225 10.32, -109.227 10.32...NaN
\n", + "
" + ], + "text/plain": [ + " geometry region_id location\n", + "0 MULTIPOLYGON (((61.283 35.609, 61.277 35.613, ... AS 4.0\n", + "1 MULTIPOLYGON (((11.786 -16.78, 11.789 -16.775,... AF 3.0\n", + "2 MULTIPOLYGON (((19.278 40.505, 19.276 40.51, 1... EU 6.0\n", + "3 MULTIPOLYGON (((52.444 24.107, 52.444 24.108, ... WA 9.0\n", + "4 MULTIPOLYGON (((-68.688 -52.61, -68.688 -52.61... SA 8.0\n", + "5 MULTIPOLYGON (((-169.027 -83.619, -169.029 -83... AT 5.0\n", + "6 MULTIPOLYGON (((-135.117 68.473, -135.119 68.4... NA 7.0\n", + "7 MULTIPOLYGON (((-109.225 10.32, -109.227 10.32... NaN" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Merge the regions data with the locations code data\n", + "regions_df = gdf.merge(locations_code, how=\"left\", left_on=\"region_id\", right_on=\"code\").drop(columns=[\"code\"])\n", + "\n", + "regions_df" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
geometryregion_idlocationnamename_esname_fr
0MULTIPOLYGON (((61.283 35.609, 61.277 35.613, ...AS4.0Asia & PacificAsia y PacíficoAsie et Pacifique
1MULTIPOLYGON (((11.786 -16.78, 11.789 -16.775,...AF3.0AfricaÁfricaAfrique
2MULTIPOLYGON (((19.278 40.505, 19.276 40.51, 1...EU6.0EuropeEuropaEurope
3MULTIPOLYGON (((52.444 24.107, 52.444 24.108, ...WA9.0PolarPolarPolaire
4MULTIPOLYGON (((52.444 24.107, 52.444 24.108, ...WA9.0West AsiaAsia OccidentalAsie occidentale
5MULTIPOLYGON (((-68.688 -52.61, -68.688 -52.61...SA8.0Latin America & CaribbeanAmérica Latina y el CaribeAmérique latine et Caraïbes
6MULTIPOLYGON (((-169.027 -83.619, -169.029 -83...AT5.0AntarticaAntártidaAntarctique
7MULTIPOLYGON (((-135.117 68.473, -135.119 68.4...NA7.0North AmericaAmérica del NorteAmérique du Nord
8MULTIPOLYGON (((-109.225 10.32, -109.227 10.32...NaNNaNNaNNaN
\n", + "
" + ], + "text/plain": [ + " geometry region_id location \\\n", + "0 MULTIPOLYGON (((61.283 35.609, 61.277 35.613, ... AS 4.0 \n", + "1 MULTIPOLYGON (((11.786 -16.78, 11.789 -16.775,... AF 3.0 \n", + "2 MULTIPOLYGON (((19.278 40.505, 19.276 40.51, 1... EU 6.0 \n", + "3 MULTIPOLYGON (((52.444 24.107, 52.444 24.108, ... WA 9.0 \n", + "4 MULTIPOLYGON (((52.444 24.107, 52.444 24.108, ... WA 9.0 \n", + "5 MULTIPOLYGON (((-68.688 -52.61, -68.688 -52.61... SA 8.0 \n", + "6 MULTIPOLYGON (((-169.027 -83.619, -169.029 -83... AT 5.0 \n", + "7 MULTIPOLYGON (((-135.117 68.473, -135.119 68.4... NA 7.0 \n", + "8 MULTIPOLYGON (((-109.225 10.32, -109.227 10.32... NaN \n", + "\n", + " name name_es \\\n", + "0 Asia & Pacific Asia y Pacífico \n", + "1 Africa África \n", + "2 Europe Europa \n", + "3 Polar Polar \n", + "4 West Asia Asia Occidental \n", + "5 Latin America & Caribbean América Latina y el Caribe \n", + "6 Antartica Antártida \n", + "7 North America América del Norte \n", + "8 NaN NaN \n", + "\n", + " name_fr \n", + "0 Asie et Pacifique \n", + "1 Afrique \n", + "2 Europe \n", + "3 Polaire \n", + "4 Asie occidentale \n", + "5 Amérique latine et Caraïbes \n", + "6 Antarctique \n", + "7 Amérique du Nord \n", + "8 NaN " + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Merge the regions data with the translations data\n", + "regions_df = regions_df.merge(translations_df, how=\"left\", on='location')\n", + "regions_df" + ] + }, + { + "cell_type": "code", + "execution_count": 34, "metadata": {}, "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "upload: data/gadm/processed/tiles/gadm_simplified.mbtiles to s3://tilestream-tilesets-production/96/_pending/y008s4k96pt1elm0ek7for1mc/skytruth\n" - ] - }, { "name": "stderr", "output_type": "stream", "text": [ - "Linking tileset to Mapbox: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [02:41<00:00, 1.61s/it]\n" + "/home/sofia/miniforge3/envs/skytruth/lib/python3.12/site-packages/pyogrio/raw.py:698: UserWarning: 'crs' was not provided. The output dataset will not have projection information defined and may not be usable in other systems.\n", + " warnings.warn(\n" ] - }, - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" } ], "source": [ - "uploadToMapbox(\n", - " gadm_dir.get_processed_step_path(current_step).joinpath(f\"{collection_name}.mbtiles\"),\n", - " collection_name,\n", - " mysettings.MAPBOX_USER,\n", - " mysettings.MAPBOX_TOKEN,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Gadm regions" + "# Drop row with location nan and save a geojson with extension json\n", + "regions_df = regions_df.dropna(subset=['location'])\n", + "regions_df.to_file(gadm_dir.get_processed_step_path(current_step).joinpath(f\"{collection_name}.json\").as_posix(), driver=\"GeoJSON\")" ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 36, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_3525114/3234969863.py:30: UserWarning: Column names longer than 10 characters will be truncated when saved to ESRI Shapefile.\n", - " ).to_file(\n", - "/home/sofia/miniforge3/envs/skytruth/lib/python3.12/site-packages/pyogrio/raw.py:709: RuntimeWarning: Normalized/laundered field name: 'location_id' to 'location_i'\n", - " ogr_write(\n", - "Allocating 16 GB of heap memory\n", - "[dissolve2] Dissolved 204 features into 8 features\n", - "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/gadm/processed/tiles/gadm_regions.json\n", "For layer 0, using name \"gadm_regions\"\n", - "/home/sofia/dev/skytruth-30x30/data/data/gadm/processed/tiles/gadm_regions.json:3: Found ] at top level: \n", - "/home/sofia/dev/skytruth-30x30/data/data/gadm/processed/tiles/gadm_regions.json:2: Reached EOF without all containers being closed: in JSON object {\"type\":\"FeatureCollection\",\"features\":[]}\n", - "8 features, 99192107 bytes of geometry, 107 bytes of string pool\n", - "Choosing a maxzoom of -z0 for features typically 34570296 feet (10537026 meters) apart, and at least 22231463 feet (6776150 meters) apart\n", - "Choosing a maxzoom of -z9 for resolution of about 562 feet (171 meters) within features\n", - " 99.9% 9/403/254 \n" + "/home/sofia/dev/skytruth-30x30/data/data/gadm/processed/tiles/gadm_regions.json:2: Found ] at top level: \n", + "/home/sofia/dev/skytruth-30x30/data/data/gadm/processed/tiles/gadm_regions.json:5: Reached EOF without all containers being closed: in JSON object {\"type\":\"FeatureCollection\",\"name\":\"gadm_regions\",\"features\":[]}\n", + "8 features, 100618534 bytes of geometry, 705 bytes of string pool\n", + "Choosing a maxzoom of -z0 for features typically 25606995 feet (7805012 meters) apart, and at least 12085379 feet (3683624 meters) apart\n", + "Choosing a maxzoom of -z9 for resolution of about 557 feet (170 meters) within features\n", + " 99.9% 9/439/254 \n" ] }, { @@ -389,83 +1291,33 @@ "PosixPath('/home/sofia/dev/skytruth-30x30/data/data/gadm/processed/tiles/gadm_regions.mbtiles')" ] }, - "execution_count": 14, + "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "collection_name = \"gadm_regions\"\n", - "\n", - "# load the EEZ file & the regions file\n", - "gadm_data = gpd.read_file(gadm_dir.get_step_fmt_file_path(prev_step, \"shp\").as_posix())\n", - "\n", - "with open(scripts_dir.joinpath('data_commons/data/regions_data2.json'), 'r') as f:\n", - " regions = json.load(f)\n", - "\n", - "\n", - "regions_df = pd.DataFrame(\n", - " [\n", - " {\"region_id\": data[\"region_iso\"], \"location_id\": iso}\n", - " for data in load_regions().get(\"data\", [])\n", - " for iso in data[\"country_iso_3s\"]\n", - " ]\n", - ")\n", - "\n", - "# merge the two files\n", - "gpd.GeoDataFrame(\n", - " pd.merge(\n", - " gadm_data,\n", - " regions_df,\n", - " how=\"left\",\n", - " left_on=\"GID_0\",\n", - " right_on=\"location_id\",\n", - " sort=True,\n", - " copy=True,\n", - " ),\n", - " crs=gadm_data.crs,\n", - ").to_file(\n", - " filename=gadm_dir.get_processed_step_path(prev_step)\n", - " .joinpath(f\"{pipe}_{prev_step}_{collection_name}.shp\")\n", - " .as_posix(),\n", - " driver=\"ESRI Shapefile\",\n", - ")\n", - "\n", - "# dissolve by region_id\n", - "\n", - "Mapshaper(16).input(\n", - " [\n", - " gadm_dir.get_processed_step_path(prev_step)\n", - " .joinpath(f\"{pipe}_{prev_step}_{collection_name}.shp\")\n", - " .as_posix()\n", - " ]\n", - ").dissolve2(fields=\"region_id\").output(\n", - " gadm_dir.get_processed_step_path(current_step).joinpath(f\"{collection_name}.json\").as_posix(),\n", - " force=True,\n", - " format=\"geojson\",\n", - ").execute()\n", - "\n", "# generate the mbtiles\n", "mbtileGeneration(gadm_dir.get_processed_step_path(current_step).joinpath(f\"{collection_name}.json\"))" ] }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 37, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "upload: data/gadm/processed/tiles/gadm_regions.mbtiles to s3://tilestream-tilesets-production/41/_pending/345ipuxzuqu1eqn09bs6tr1mc/skytruth\n" + "upload: data/gadm/processed/tiles/gadm_regions.mbtiles to s3://tilestream-tilesets-production/4d/_pending/849nr2jzhpt1m5c3rt55232mc/skytruth\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "Linking tileset to Mapbox: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [02:19<00:00, 1.40s/it]\n" + "Linking tileset to Mapbox: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [02:25<00:00, 1.45s/it]\n" ] }, { @@ -474,7 +1326,7 @@ "True" ] }, - "execution_count": 15, + "execution_count": 37, "metadata": {}, "output_type": "execute_result" } @@ -1414,7 +2266,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -1652,6 +2504,251 @@ ")" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Terrestrial habitats" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [], + "source": [ + "pipe = \"terrestrial-habitats\"\n", + "collection_name = \"terrestrial_habitats\"\n", + "\n", + "pipe_dir = FileConventionHandler(pipe)\n", + "input_file = pipe_dir.get_processed_step_path(prev_step).joinpath(\"jung_etal_1km_reclassed.tif\")\n", + "output_file = pipe_dir.get_processed_step_path(current_step).joinpath(\"terrestrial_habitats_tiles.mbtiles\")\n", + "\n", + "# Download the protected seas layers from the bucket && unzip it\n", + "writeReadGCP(\n", + " credentials=mysettings.GCS_KEYFILE_JSON,\n", + " bucket_name=mysettings.GCS_BUCKET,\n", + " blob_name=\"terrestrial/jung_etal_2020/jung_etal_1km_reclassed.tif\",\n", + " file=input_file,\n", + " operation=\"r\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": {}, + "outputs": [], + "source": [ + "# Define the land cover classes and their corresponding colors (as hex strings)\n", + "land_cover_classes = {\n", + " 1: \"#01550E\", # Forest\n", + " 2: \"#ffe399\", # Savanna\n", + " 3: \"#C6FF53\", # Shrubland\n", + " 4: \"#1D931D\", # Grassland\n", + " 5: \"#5BB5FF\", # Wetlands/open water\n", + " 6: \"#79685a\", # Rocky/mountains\n", + " 7: \"#FBF8D6\", # Desert\n", + " 8: \"#cecece\", # Artificial\n", + " 255: \"#D3D3D3\" # Other\n", + "}\n", + "\n", + "# Function to convert hex color codes to RGB tuples\n", + "def hex_to_rgb(hex_color):\n", + " hex_color = hex_color.lstrip(\"#\") # Remove the '#' symbol\n", + " return tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4))\n", + "\n", + "# Create a color map from the land cover classes\n", + "def create_color_map(land_cover_classes):\n", + " color_map = np.zeros((256, 3), dtype=np.uint8) # 256 possible values (0-255)\n", + " for class_value, hex_color in land_cover_classes.items():\n", + " color_map[class_value] = hex_to_rgb(hex_color) # Convert hex to RGB\n", + " return color_map" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Styled raster saved at: /home/sofia/dev/skytruth-30x30/data/data/terrestrial-habitats/processed/preprocess/jung_etal_1km_styled.tif\n" + ] + } + ], + "source": [ + "# Open the raster file\n", + "with rio.open(input_file) as src:\n", + " band = src.read(1) # Read the first band\n", + " profile = src.profile # Get the metadata\n", + " crs = src.crs # Get the CRS from the original file\n", + "\n", + "# Replace 255 values with NaN\n", + "band = band.astype(np.float32) # Change dtype to float32 to allow NaN values\n", + "band[band == 255] = np.nan # Set 255 values to NaN\n", + "\n", + "# Create the color map\n", + "color_map = create_color_map(land_cover_classes)\n", + "\n", + "# Create a colored image based on the band values, setting NaNs to a transparent color (for visualization only)\n", + "colored_image = np.zeros((band.shape[0], band.shape[1], 3), dtype=np.uint8)\n", + "\n", + "for class_value, hex_color in land_cover_classes.items():\n", + " if class_value != 255: # Exclude the 'Other' category\n", + " rgb_color = hex_to_rgb(hex_color)\n", + " colored_image[band == class_value] = rgb_color\n", + "\n", + "# Save the styled raster as a new GeoTIFF\n", + "styled_raster_path = pipe_dir.get_processed_step_path(prev_step).joinpath(\"jung_etal_1km_styled.tif\") # Update with desired output path\n", + "\n", + "# Update profile for the RGB image\n", + "profile.update({\n", + " 'count': 3, # Number of bands for RGB\n", + " 'dtype': 'uint8', # Change to uint8 for RGB\n", + " 'driver': 'GTiff', # Format\n", + " 'crs': crs, # Add the CRS to the profile\n", + " 'compress': 'deflate', # Apply Deflate compression\n", + " 'predictor': 2 # Use horizontal differencing predictor\n", + "})\n", + "\n", + "with rio.open(styled_raster_path, 'w', **profile) as dst:\n", + " # Write each color channel to a separate band\n", + " dst.write(colored_image[:, :, 0], 1) # Red channel\n", + " dst.write(colored_image[:, :, 1], 2) # Green channel\n", + " dst.write(colored_image[:, :, 2], 3) # Blue channel\n", + "\n", + " # Set color interpretation for each band\n", + " dst.colorinterp = [ColorInterp.red, ColorInterp.green, ColorInterp.blue]\n", + "\n", + "print(f\"Styled raster saved at: {styled_raster_path}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [], + "source": [ + "remote_path = 'vizzuality_processed_data/strapi_tables/habitats.tif'\n", + "writeReadGCP(\n", + " credentials=mysettings.GCS_KEYFILE_JSON,\n", + " bucket_name=mysettings.GCS_BUCKET,\n", + " blob_name=remote_path,\n", + " file=styled_raster_path,\n", + " operation=\"w\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "upload: data/terrestrial-habitats/processed/preprocess/jung_etal_1km_styled.tif to s3://tilestream-tilesets-production/e1/_pending/hnc0pcuglmm184g2tzay632mc/skytruth\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Linking tileset to Mapbox: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [02:14<00:00, 1.35s/it]\n" + ] + }, + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 75, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "uploadToMapbox(\n", + " styled_raster_path,\n", + " \"terrestrial_habitats\",\n", + " mysettings.MAPBOX_USER,\n", + " mysettings.MAPBOX_TOKEN,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Terrestrial recommendations" + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "metadata": {}, + "outputs": [], + "source": [ + "pipe = \"terrestrial-recommendations\"\n", + "collection_name = \"terrestrial_recommendations\"\n", + "\n", + "pipe_dir = FileConventionHandler(pipe)\n", + "input_file = pipe_dir.get_processed_step_path(current_step).joinpath(\"terrestrial-jung.mbtiles\")\n", + "\n", + "# Download the protected seas layers from the bucket && unzip it\n", + "writeReadGCP(\n", + " credentials=mysettings.GCS_KEYFILE_JSON,\n", + " bucket_name=mysettings.GCS_BUCKET,\n", + " blob_name=\"terrestrial/jung_etal_2021/terrestial-jung.mbtiles\",\n", + " file=input_file,\n", + " operation=\"r\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "upload: data/terrestrial-recommendations/processed/tiles/terrestrial-jung.mbtiles to s3://tilestream-tilesets-production/3a/_pending/h47pn0cfeow1b0l2gw41502mc/skytruth\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Linking tileset to Mapbox: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:20<00:00, 4.81it/s]\n" + ] + }, + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 87, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "uploadToMapbox(\n", + " input_file,\n", + " \"terrestrial_recommendations\",\n", + " mysettings.MAPBOX_USER,\n", + " mysettings.MAPBOX_TOKEN,\n", + ")" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -1721,18 +2818,6 @@ "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.5" } }, "nbformat": 4, From 4c656b4658fa6b16b898b02390e97d54b3a6bcce Mon Sep 17 00:00:00 2001 From: sofia Date: Wed, 23 Oct 2024 11:42:09 +0200 Subject: [PATCH 13/16] add data commons files --- .../data/dependency_to_parent.json | 78 +++++++++++++++++++ data/src/data_commons/data/iso_map.json | 5 +- data/src/data_commons/data/locations.json | 1 + data/src/data_commons/data/locations_code.csv | 44 +++++++++++ data/src/data_commons/data/regions_data.json | 7 +- 5 files changed, 132 insertions(+), 3 deletions(-) create mode 100644 data/src/data_commons/data/dependency_to_parent.json create mode 100644 data/src/data_commons/data/locations.json diff --git a/data/src/data_commons/data/dependency_to_parent.json b/data/src/data_commons/data/dependency_to_parent.json new file mode 100644 index 00000000..841c8681 --- /dev/null +++ b/data/src/data_commons/data/dependency_to_parent.json @@ -0,0 +1,78 @@ +{ + "ABW": ["NLD", "Netherlands"], + "AIA": ["GBR", "United Kingdom"], + "ALA": ["FIN", "Finland"], + "ASM": ["USA", "United States"], + "ATF": ["FRA", "France"], + "BES": ["NLD", "Netherlands"], + "BLM": ["FRA", "France"], + "BMU": ["GBR", "United Kingdom"], + "BVT": ["NOR", "Norway"], + "CCK": ["AUS", "Australia"], + "COK": ["NZL", "New Zealand"], + "CUW": ["NLD", "Netherlands"], + "CXR": ["AUS", "Australia"], + "CYM": ["GBR", "United Kingdom"], + "FLK": ["GBR", "United Kingdom"], + "FRO": ["DNK", "Denmark"], + "GGY": ["GBR", "United Kingdom"], + "GIB": ["GBR", "United Kingdom"], + "GLP": ["FRA", "France"], + "GRL": ["DNK", "Denmark"], + "GUF": ["FRA", "France"], + "GUM": ["USA", "United States"], + "HMD": ["AUS", "Australia"], + "IMN": ["GBR", "United Kingdom"], + "IOT": ["GBR", "United Kingdom"], + "JEY": ["GBR", "United Kingdom"], + "KGZ": ["KGZ", "Kyrgyzstan"], + "LAO": ["LAO", "Laos"], + "LIE": ["LIE", "Liechtenstein"], + "LSO": ["LSO", "Lesotho"], + "LUX": ["LUX", "Luxembourg"], + "MAF": ["FRA", "France"], + "MDA": ["MDA", "Moldova"], + "MKD": ["MKD", "North Macedonia"], + "MLI": ["MLI", "Mali"], + "MNG": ["MNG", "Mongolia"], + "MNP": ["USA", "United States"], + "MSR": ["GBR", "United Kingdom"], + "MTQ": ["FRA", "France"], + "MWI": ["MWI", "Malawi"], + "MYT": ["FRA", "France"], + "NCL": ["FRA", "France"], + "NER": ["NER", "Niger"], + "NFK": ["AUS", "Australia"], + "NIU": ["NZL", "New Zealand"], + "NPL": ["NPL", "Nepal"], + "PCN": ["GBR", "United Kingdom"], + "PRI": ["USA", "United States"], + "PRY": ["PRY", "Paraguay"], + "PYF": ["FRA", "France"], + "REU": ["FRA", "France"], + "RWA": ["RWA", "Rwanda"], + "SGS": ["GBR", "United Kingdom"], + "SHN": ["GBR", "United Kingdom"], + "SJM": ["NOR", "Norway"], + "SMR": ["SMR", "San Marino"], + "SPM": ["FRA", "France"], + "SRB": ["SRB", "Serbia"], + "SSD": ["SSD", "South Sudan"], + "SVK": ["SVK", "Slovakia"], + "SWZ": ["SWZ", "Eswatini"], + "SXM": ["NLD", "Netherlands"], + "TCA": ["GBR", "United Kingdom"], + "TCD": ["TCD", "Chad"], + "TJK": ["TJK", "Tajikistan"], + "TKL": ["NZL", "New Zealand"], + "UGA": ["UGA", "Uganda"], + "UMI": ["USA", "United States"], + "UZB": ["UZB", "Uzbekistan"], + "VAT": ["VAT", "Vatican City"], + "VGB": ["GBR", "United Kingdom"], + "VIR": ["USA", "United States"], + "WLF": ["FRA", "France"], + "XAD": ["GBR", "United Kingdom"], + "ZMB": ["ZMB", "Zambia"], + "ZWE": ["ZWE", "Zimbabwe"] +} \ No newline at end of file diff --git a/data/src/data_commons/data/iso_map.json b/data/src/data_commons/data/iso_map.json index 37eb3868..703f6266 100644 --- a/data/src/data_commons/data/iso_map.json +++ b/data/src/data_commons/data/iso_map.json @@ -207,5 +207,8 @@ "UZB": "Uzbekistan", "VAT": "Vatican City", "ZMB": "Zambia", - "ZWE": "Zimbabwe" + "ZWE": "Zimbabwe", + "XCA": "Caspian Sea", + "XKO": "Kosovo", + "ZNC": "Northern Cyprus" } diff --git a/data/src/data_commons/data/locations.json b/data/src/data_commons/data/locations.json new file mode 100644 index 00000000..250736da --- /dev/null +++ b/data/src/data_commons/data/locations.json @@ -0,0 +1 @@ +{"version": 2, "data": {"api::location.location": {"1": {"total_marine_area": 361000000, "id": 1, "marine_bounds": [-180.0, -85.5625, 180.0, 90.0], "code": "GLOB", "total_terrestrial_area": 134954835, "terrestrial_bounds": [-180.0, -90.0, 180.0, 83.65833], "type": "worldwide", "groups": [], "name": "Global", "name_es": "Global", "name_fr": "Global", "marine_target": 30, "marine_target_year": 2030}, "2": {"total_marine_area": 212881389, "id": 2, "marine_bounds": [-180.0, -76.80012, 180.0, 90.0], "code": "ABNJ", "total_terrestrial_area": 0, "terrestrial_bounds": null, "type": "highseas", "groups": [], "name": "Areas Beyond National Jurisdiction", "name_es": "\u00c1reas fuera de la jurisdicci\u00f3n nacional", "name_fr": "Zones au-del\u00e0 de la juridiction nationale", "marine_target": null, "marine_target_year": null}, "3": {"total_marine_area": 14878058, "id": 3, "marine_bounds": [-28.84709, -50.31506, 75.85287, 38.80087], "code": "AF", "total_terrestrial_area": 29993095, "terrestrial_bounds": [-25.3618, -34.83514, 63.50347, 37.55986], "type": "region", "groups": [], "name": "Africa", "name_es": "\u00c1frica", "name_fr": "Afrique", "marine_target": null, "marine_target_year": null}, "4": {"total_marine_area": 54088687, "id": 4, "marine_bounds": [-180.0, -58.44947, 180.0, 47.73081], "code": "AS", "total_terrestrial_area": 31625556, "terrestrial_bounds": [-180.0, -55.11694, 180.0, 53.56086], "type": "region", "groups": [], "name": "Asia & Pacific", "name_es": "Asia y Pac\u00edfico", "name_fr": "Asie et Pacifique", "marine_target": null, "marine_target_year": null}, "5": {"total_marine_area": 9618978, "id": 5, "marine_bounds": [-180.0, -85.5625, 180.0, -57.18865], "code": "AT", "total_terrestrial_area": 12088230, "terrestrial_bounds": [-180.0, -90.0, 180.0, -59.59375], "type": "region", "groups": [], "name": "Antarctica", "name_es": "Ant\u00e1rtida", "name_fr": "Antarctique", "marine_target": null, "marine_target_year": null}, "6": {"total_marine_area": 35210435, "id": 6, "marine_bounds": [-180.0, -62.78834, 180.0, 86.99401], "code": "EU", "total_terrestrial_area": 30037571, "terrestrial_bounds": [-180.0, -59.48428, 180.0, 83.65833], "type": "region", "groups": [], "name": "Europe", "name_es": "Europa", "name_fr": "Europe", "marine_target": null, "marine_target_year": null}, "7": {"total_marine_area": 17971235, "id": 7, "marine_bounds": [-180.0, -17.55527, 180.0, 86.43187], "code": "NA", "total_terrestrial_area": 19371152, "terrestrial_bounds": [-179.15056, -14.37324, 179.77341, 83.11042], "type": "region", "groups": [], "name": "North America", "name_es": "Am\u00e9rica del Norte", "name_fr": "Am\u00e9rique du Nord", "marine_target": null, "marine_target_year": null}, "8": {"total_marine_area": 20993094, "id": 8, "marine_bounds": [-122.1791, -62.78834, -19.89991, 32.62694], "code": "SA", "total_terrestrial_area": 20405996, "terrestrial_bounds": [-118.36648, -55.98, -28.84764, 32.71863], "type": "region", "groups": [], "name": "Latin America & Caribbean", "name_es": "Am\u00e9rica Latina y Caribe", "name_fr": "Am\u00e9rique latine et Cara\u00efbes", "marine_target": null, "marine_target_year": null}, "9": {"total_marine_area": 1449233, "id": 9, "marine_bounds": [33.69465, 8.95275, 63.36954, 36.0625], "code": "WA", "total_terrestrial_area": 3521433, "terrestrial_bounds": [34.22903, 12.10819, 59.83931, 37.37804], "type": "region", "groups": [], "name": "West Asia", "name_es": "Asia Occidental", "name_fr": "Asie occidentale", "marine_target": null, "marine_target_year": null}, "10": {"total_marine_area": 495866, "id": 10, "marine_bounds": [8.19586, -17.27214, 13.86517, -5.02988], "code": "AGO", "total_terrestrial_area": 1251701, "terrestrial_bounds": [11.6687, -18.04208, 24.08007, -4.37259], "type": "country", "groups": [3], "name": "Angola", "name_es": "Angola", "name_fr": "Angola", "marine_target": null, "marine_target_year": null}, "11": {"total_marine_area": 12165, "id": 11, "marine_bounds": [18.32149, 39.64039, 20.02083, 42.0112], "code": "ALB", "total_terrestrial_area": 28690, "terrestrial_bounds": [19.26416, 39.6507, 21.04909, 42.66043], "type": "country", "groups": [6], "name": "Albania", "name_es": "Albania", "name_fr": "Albanie", "marine_target": null, "marine_target_year": null}, "12": {"total_marine_area": 57973, "id": 12, "marine_bounds": [51.43556, 23.95901, 57.12739, 26.41293], "code": "ARE", "total_terrestrial_area": 71259, "terrestrial_bounds": [51.49798, 22.63162, 56.38116, 26.06933], "type": "country", "groups": [9], "name": "United Arab Emirates", "name_es": "Emiratos \u00c1rabes Unidos", "name_fr": "\u00c9mirats Arabes Unis", "marine_target": null, "marine_target_year": null}, "13": {"total_marine_area": 2901816, "id": 13, "marine_bounds": [-69.60084, -62.78834, -19.89991, -31.52703], "code": "ARG", "total_terrestrial_area": 2779262, "terrestrial_bounds": [-73.56056, -55.06153, -53.59184, -21.78137], "type": "country", "groups": [8], "name": "Argentina", "name_es": "Argentina", "name_fr": "Argentine", "marine_target": null, "marine_target_year": null}, "14": {"total_marine_area": 9618978, "id": 14, "marine_bounds": [-180.0, -85.5625, 180.0, -57.18865], "code": "ATA", "total_terrestrial_area": 12088230, "terrestrial_bounds": [-180.0, -90.0, 180.0, -59.59375], "type": "country", "groups": [5], "name": "Antarctica", "name_es": "Ant\u00e1rtida", "name_fr": "Antarctique", "marine_target": null, "marine_target_year": null}, "15": {"total_marine_area": 111568, "id": 15, "marine_bounds": [-62.75096, 16.61163, -58.36828, 20.92249], "code": "ATG", "total_terrestrial_area": 437, "terrestrial_bounds": [-62.34903, 16.93153, -61.65653, 17.72958], "type": "country", "groups": [8], "name": "Antigua and Barbuda", "name_es": "Antigua y Barbuda", "name_fr": "Antigua-et-Barbuda", "marine_target": null, "marine_target_year": null}, "16": {"total_marine_area": 8994341, "id": 16, "marine_bounds": [67.05406, -58.44947, 171.80111, -8.47174], "code": "AUS", "total_terrestrial_area": 7703345, "terrestrial_bounds": [72.58265, -55.11694, 167.99806, -9.14218], "type": "country", "groups": [4], "name": "Australia", "name_es": "Australia", "name_fr": "Australie", "marine_target": 30, "marine_target_year": 2030}, "17": {"total_marine_area": 80614, "id": 17, "marine_bounds": [48.59387, 38.28529, 51.81735, 42.60552], "code": "AZE", "total_terrestrial_area": 86138, "terrestrial_bounds": [44.7726, 38.39705, 50.60783, 41.90748], "type": "country", "groups": [6], "name": "Azerbaijan", "name_es": "Azerbaiy\u00e1n", "name_fr": "Azerba\u00efdjan", "marine_target": null, "marine_target_year": null}, "18": {"total_marine_area": 3495, "id": 18, "marine_bounds": [2.23833, 51.08931, 3.3704, 51.87611], "code": "BEL", "total_terrestrial_area": 30599, "terrestrial_bounds": [2.55536, 49.49722, 6.40787, 51.50382], "type": "country", "groups": [6], "name": "Belgium", "name_es": "B\u00e9lgica", "name_fr": "Belgique", "marine_target": 30, "marine_target_year": 2030}, "19": {"total_marine_area": 35493, "id": 19, "marine_bounds": [1.632, 2.96992, 2.98419, 6.5121], "code": "BEN", "total_terrestrial_area": 115759, "terrestrial_bounds": [0.77435, 6.23491, 3.8517, 12.41835], "type": "country", "groups": [3], "name": "Benin", "name_es": "Ben\u00edn", "name_fr": "B\u00e9nin", "marine_target": null, "marine_target_year": null}, "20": {"total_marine_area": 112166, "id": 20, "marine_bounds": [88.92678, 17.86338, 92.42944, 24.37767], "code": "BGD", "total_terrestrial_area": 139704, "terrestrial_bounds": [88.01057, 20.74111, 92.67366, 26.63407], "type": "country", "groups": [4], "name": "Bangladesh", "name_es": "Banglad\u00e9s", "name_fr": "Bangladesh", "marine_target": null, "marine_target_year": null}, "21": {"total_marine_area": 34745, "id": 21, "marine_bounds": [27.44371, 41.97819, 31.33082, 43.73847], "code": "BGR", "total_terrestrial_area": 111398, "terrestrial_bounds": [22.34378, 41.23481, 28.60903, 44.21268], "type": "country", "groups": [6], "name": "Bulgaria", "name_es": "Bulgaria", "name_fr": "Bulgarie", "marine_target": null, "marine_target_year": null}, "22": {"total_marine_area": 7516, "id": 22, "marine_bounds": [50.26963, 25.535, 51.12189, 27.16667], "code": "BHR", "total_terrestrial_area": 716, "terrestrial_bounds": [50.27467, 25.55625, 50.82486, 26.28875], "type": "country", "groups": [9], "name": "Bahrain", "name_es": "Bar\u00e9in", "name_fr": "Bahre\u00efn", "marine_target": null, "marine_target_year": null}, "23": {"total_marine_area": 619785, "id": 23, "marine_bounds": [-81.23148, 20.37354, -70.51049, 30.37239], "code": "BHS", "total_terrestrial_area": 13418, "terrestrial_bounds": [-80.47597, 20.91208, -72.71208, 27.27139], "type": "country", "groups": [8], "name": "The Bahamas", "name_es": "Bahamas", "name_fr": "Bahamas", "marine_target": null, "marine_target_year": null}, "24": {"total_marine_area": 13, "id": 24, "marine_bounds": [17.542, 42.88342, 17.64799, 42.93836], "code": "BIH", "total_terrestrial_area": 50965, "terrestrial_bounds": [15.72739, 42.56531, 19.61471, 45.27468], "type": "country", "groups": [6], "name": "Bosnia and Herzegovina", "name_es": "Bosnia y Herzegovina", "name_fr": "Bosnie-Herz\u00e9govine", "marine_target": null, "marine_target_year": null}, "25": {"total_marine_area": 34312, "id": 25, "marine_bounds": [-88.94537, 15.88746, -86.17251, 18.48793], "code": "BLZ", "total_terrestrial_area": 22115, "terrestrial_bounds": [-89.22417, 15.89266, -87.48597, 18.49666], "type": "country", "groups": [8], "name": "Belize", "name_es": "Belice", "name_fr": "Belize", "marine_target": 30, "marine_target_year": 2030}, "26": {"total_marine_area": 3677664, "id": 26, "marine_bounds": [-54.58321, -35.78779, -25.29396, 7.04507], "code": "BRA", "total_terrestrial_area": 8541468, "terrestrial_bounds": [-73.98971, -33.74632, -28.84764, 5.26488], "type": "country", "groups": [8], "name": "Brazil", "name_es": "Brasil", "name_fr": "Br\u00e9sil", "marine_target": 30, "marine_target_year": 2030}, "27": {"total_marine_area": 185074, "id": 27, "marine_bounds": [-60.38158, 10.60444, -56.00158, 16.01923], "code": "BRB", "total_terrestrial_area": 436, "terrestrial_bounds": [-59.6507, 13.04458, -59.4193, 13.33514], "type": "country", "groups": [8], "name": "Barbados", "name_es": "Barbados", "name_fr": "Barbade", "marine_target": null, "marine_target_year": null}, "28": {"total_marine_area": 43144, "id": 28, "marine_bounds": [111.72596, 4.55212, 115.25645, 7.58738], "code": "BRN", "total_terrestrial_area": 5803, "terrestrial_bounds": [114.12805, 4.0222, 115.37596, 5.04602], "type": "country", "groups": [4], "name": "Brunei", "name_es": "Brun\u00e9i", "name_fr": "Brunei", "marine_target": null, "marine_target_year": null}, "29": {"total_marine_area": 5765316, "id": 29, "marine_bounds": [-141.0, 40.05115, -47.69415, 86.43187], "code": "CAN", "total_terrestrial_area": 9903987, "terrestrial_bounds": [-141.00687, 41.67693, -52.61889, 83.11042], "type": "country", "groups": [7], "name": "Canada", "name_es": "Canad\u00e1", "name_fr": "Canada", "marine_target": 30, "marine_target_year": 2030}, "30": {"total_marine_area": 3668775, "id": 30, "marine_bounds": [-113.19655, -59.85268, -65.72667, -18.35012], "code": "CHL", "total_terrestrial_area": 752264, "terrestrial_bounds": [-109.45491, -55.98, -66.41821, -17.49859], "type": "country", "groups": [8], "name": "Chile", "name_es": "Chile", "name_fr": "Chili", "marine_target": 30, "marine_target_year": 2030}, "31": {"total_marine_area": 1376189, "id": 31, "marine_bounds": [107.15944, 6.10049, 126.1863, 41.15916], "code": "CHN", "total_terrestrial_area": 9384266, "terrestrial_bounds": [73.5577, 18.15931, 134.77393, 53.56086], "type": "country", "groups": [4], "name": "China", "name_es": "China", "name_fr": "Chine", "marine_target": 30, "marine_target_year": 2030}, "32": {"total_marine_area": 171760, "id": 32, "marine_bounds": [-7.59449, 1.01343, -2.85022, 5.54478], "code": "CIV", "total_terrestrial_area": 322907, "terrestrial_bounds": [-8.5993, 4.36181, -2.49489, 10.73664], "type": "country", "groups": [3], "name": "Ivory Coast", "name_es": "Costa de Marfil", "name_fr": "C\u00f4te d'Ivoire", "marine_target": 50, "marine_target_year": 2020}, "33": {"total_marine_area": 15143, "id": 33, "marine_bounds": [8.32608, 2.23237, 10.12744, 4.93252], "code": "CMR", "total_terrestrial_area": 468059, "terrestrial_bounds": [8.49945, 1.65227, 16.19105, 13.07739], "type": "country", "groups": [3], "name": "Cameroon", "name_es": "Camer\u00fan", "name_fr": "Cameroun", "marine_target": null, "marine_target_year": null}, "34": {"total_marine_area": 13373, "id": 34, "marine_bounds": [9.00732, -7.06505, 13.6504, -5.52709], "code": "COD", "total_terrestrial_area": 2339058, "terrestrial_bounds": [12.20153, -13.45248, 31.3057, 5.3861], "type": "country", "groups": [3], "name": "Democratic Republic of the Congo", "name_es": "Rep\u00fablica Democr\u00e1tica del Congo", "name_fr": "R\u00e9publique d\u00e9mocratique du Congo", "marine_target": null, "marine_target_year": null}, "35": {"total_marine_area": 33807, "id": 35, "marine_bounds": [8.91135, -6.73445, 12.00958, -3.92522], "code": "COG", "total_terrestrial_area": 343258, "terrestrial_bounds": [11.20086, -5.03066, 18.65, 3.70308], "type": "country", "groups": [3], "name": "Republic of the Congo", "name_es": "Rep\u00fablica del Congo", "name_fr": "R\u00e9publique du Congo", "marine_target": null, "marine_target_year": null}, "36": {"total_marine_area": 775857, "id": 36, "marine_bounds": [-84.8098, 1.28917, -70.41768, 16.16944], "code": "COL", "total_terrestrial_area": 1142228, "terrestrial_bounds": [-81.84153, -4.22843, -66.83774, 15.91248], "type": "country", "groups": [8], "name": "Colombia", "name_es": "Colombia", "name_fr": "Colombie", "marine_target": 30, "marine_target_year": 2030}, "37": {"total_marine_area": 231214, "id": 37, "marine_bounds": [41.83518, -14.53065, 46.68598, -8.10518], "code": "COM", "total_terrestrial_area": 1680, "terrestrial_bounds": [43.22875, -12.42264, 44.54097, -11.36486], "type": "country", "groups": [3], "name": "Comoros", "name_es": "Comoras", "name_fr": "Comores", "marine_target": null, "marine_target_year": null}, "38": {"total_marine_area": 801936, "id": 38, "marine_bounds": [-28.84709, 11.45131, -19.535, 20.55551], "code": "CPV", "total_terrestrial_area": 4105, "terrestrial_bounds": [-25.3618, 14.80181, -22.6568, 17.20542], "type": "country", "groups": [3], "name": "Cape Verde", "name_es": "Cabo Verde", "name_fr": "Cap-Vert", "marine_target": null, "marine_target_year": null}, "39": {"total_marine_area": 599063, "id": 39, "marine_bounds": [-90.47111, 1.98306, -80.0, 11.60171], "code": "CRI", "total_terrestrial_area": 51387, "terrestrial_bounds": [-87.10184, 5.49858, -82.55232, 11.21976], "type": "country", "groups": [8], "name": "Costa Rica", "name_es": "Costa Rica", "name_fr": "Costa Rica", "marine_target": 30, "marine_target_year": 2030}, "40": {"total_marine_area": 352259, "id": 40, "marine_bounds": [-86.93966, 18.83224, -73.58236, 25.22457], "code": "CUB", "total_terrestrial_area": 111025, "terrestrial_bounds": [-84.95236, 19.82597, -74.13119, 23.27764], "type": "country", "groups": [8], "name": "Cuba", "name_es": "Cuba", "name_fr": "Cuba", "marine_target": null, "marine_target_year": null}, "41": {"total_marine_area": 98450, "id": 41, "marine_bounds": [29.84648, 32.88889, 35.1943, 36.21911], "code": "CYP", "total_terrestrial_area": 5720, "terrestrial_bounds": [32.26931, 34.62502, 34.0882, 35.1994], "type": "country", "groups": [6], "name": "Cyprus", "name_es": "Chipre", "name_fr": "Chypre", "marine_target": 30, "marine_target_year": 2030}, "42": {"total_marine_area": 56763, "id": 42, "marine_bounds": [3.35, 52.87112, 14.75, 55.91928], "code": "DEU", "total_terrestrial_area": 356527, "terrestrial_bounds": [5.86625, 47.2707, 15.04181, 55.05653], "type": "country", "groups": [6], "name": "Germany", "name_es": "Alemania", "name_fr": "Allemagne", "marine_target": 30, "marine_target_year": 2030}, "43": {"total_marine_area": 7468, "id": 43, "marine_bounds": [42.51346, 11.46101, 44.14265, 12.86159], "code": "DJI", "total_terrestrial_area": 22460, "terrestrial_bounds": [41.74823, 10.90872, 43.41764, 12.70678], "type": "country", "groups": [3], "name": "Djibouti", "name_es": "Yibuti", "name_fr": "Djibouti", "marine_target": null, "marine_target_year": null}, "44": {"total_marine_area": 28552, "id": 44, "marine_bounds": [-62.81389, 14.48861, -57.875, 16.50083], "code": "DMA", "total_terrestrial_area": 757, "terrestrial_bounds": [-61.48014, 15.20625, -61.24014, 15.64014], "type": "country", "groups": [8], "name": "Dominica", "name_es": "Dominica", "name_fr": "Dominique", "marine_target": null, "marine_target_year": null}, "45": {"total_marine_area": 2648521, "id": 45, "marine_bounds": [-75.0, 54.36483, 16.50737, 86.99401], "code": "DNK", "total_terrestrial_area": 2178809, "terrestrial_bounds": [-73.24323, 54.55903, 15.19306, 83.65833], "type": "country", "groups": [6], "name": "Denmark", "name_es": "Dinamarca", "name_fr": "Danemark", "marine_target": 30, "marine_target_year": 2030}, "46": {"total_marine_area": 384084, "id": 46, "marine_bounds": [-73.47475, 14.675, -65.82185, 24.08931], "code": "DOM", "total_terrestrial_area": 48240, "terrestrial_bounds": [-72.00388, 17.47014, -68.32264, 19.93236], "type": "country", "groups": [8], "name": "Dominican Republic", "name_es": "Rep\u00fablica Dominicana", "name_fr": "R\u00e9publique dominicaine", "marine_target": 30, "marine_target_year": 2030}, "47": {"total_marine_area": 131193, "id": 47, "marine_bounds": [-2.21233, 35.07067, 8.64205, 38.80087], "code": "DZA", "total_terrestrial_area": 2311455, "terrestrial_bounds": [-8.67387, 18.96023, 11.98737, 37.0887], "type": "country", "groups": [3], "name": "Algeria", "name_es": "Argelia", "name_fr": "Alg\u00e9rie", "marine_target": null, "marine_target_year": null}, "48": {"total_marine_area": 1129828, "id": 48, "marine_bounds": [-95.33883, -4.76335, -78.76202, 5.03164], "code": "ECU", "total_terrestrial_area": 257487, "terrestrial_bounds": [-92.00854, -5.0158, -75.18715, 1.68183], "type": "country", "groups": [8], "name": "Ecuador", "name_es": "Ecuador", "name_fr": "\u00c9quateur", "marine_target": null, "marine_target_year": null}, "49": {"total_marine_area": 263076, "id": 49, "marine_bounds": [25.14461, 21.88967, 37.82927, 33.82085], "code": "EGY", "total_terrestrial_area": 985528, "terrestrial_bounds": [24.6981, 21.72539, 36.24875, 31.66792], "type": "country", "groups": [3], "name": "Egypt", "name_es": "Egipto", "name_fr": "\u00c9gypte", "marine_target": null, "marine_target_year": null}, "50": {"total_marine_area": 78355, "id": 50, "marine_bounds": [38.5669, 12.68006, 43.30099, 18.10384], "code": "ERI", "total_terrestrial_area": 120884, "terrestrial_bounds": [36.43877, 12.35698, 43.13764, 18.00669], "type": "country", "groups": [3], "name": "Eritrea", "name_es": "Eritrea", "name_fr": "\u00c9rythr\u00e9e", "marine_target": null, "marine_target_year": null}, "51": {"total_marine_area": 284054, "id": 51, "marine_bounds": [-20.66692, 19.33012, -13.17315, 27.83362], "code": "ESH", "total_terrestrial_area": 267893, "terrestrial_bounds": [-17.10541, 20.76958, -8.67001, 27.6831], "type": "country", "groups": [3], "name": "Western Sahara", "name_es": "Sahara Occidental", "name_fr": "Sahara occidental", "marine_target": null, "marine_target_year": null}, "52": {"total_marine_area": 1011025, "id": 52, "marine_bounds": [-21.92039, 24.58474, 6.3, 46.874], "code": "ESP", "total_terrestrial_area": 505487, "terrestrial_bounds": [-18.16153, 27.63736, 4.3282, 43.79153], "type": "country", "groups": [6], "name": "Spain", "name_es": "Espa\u00f1a", "name_fr": "Espagne", "marine_target": 30, "marine_target_year": 2030}, "53": {"total_marine_area": 36451, "id": 53, "marine_bounds": [20.37133, 57.58638, 28.20899, 59.99492], "code": "EST", "total_terrestrial_area": 45249, "terrestrial_bounds": [21.76431, 57.51395, 28.20897, 59.82202], "type": "country", "groups": [6], "name": "Estonia", "name_es": "Estonia", "name_fr": "Estonie", "marine_target": 30, "marine_target_year": 2030}, "54": {"total_marine_area": 81553, "id": 54, "marine_bounds": [19.0832, 58.8445, 27.8312, 66.77516], "code": "FIN", "total_terrestrial_area": 336291, "terrestrial_bounds": [19.30469, 59.72097, 31.5828, 70.09145], "type": "country", "groups": [6], "name": "Finland", "name_es": "Finlandia", "name_fr": "Finlande", "marine_target": 30, "marine_target_year": 2030}, "55": {"total_marine_area": 1289978, "id": 55, "marine_bounds": [-180.0, -25.09711, 180.0, -9.78332], "code": "FJI", "total_terrestrial_area": 19025, "terrestrial_bounds": [-180.0, -21.0425, 180.0, -12.46172], "type": "country", "groups": [4], "name": "Fiji", "name_es": "Fiyi", "name_fr": "Fidji", "marine_target": 30, "marine_target_year": 2025}, "56": {"total_marine_area": 10113903, "id": 56, "marine_bounds": [-180.0, -53.17863, 180.0, 51.55778], "code": "FRA", "total_terrestrial_area": 668712, "terrestrial_bounds": [-178.1825, -50.01889, 172.09009, 51.0894], "type": "country", "groups": [6], "name": "France", "name_es": "Francia", "name_fr": "France", "marine_target": 30, "marine_target_year": 2030}, "57": {"total_marine_area": 3010644, "id": 57, "marine_bounds": [135.31244, -1.17311, 165.67653, 13.44543], "code": "FSM", "total_terrestrial_area": 778, "terrestrial_bounds": [137.42528, 1.02528, 163.03555, 10.09062], "type": "country", "groups": [4], "name": "Micronesia", "name_es": "Micronesia", "name_fr": "Micron\u00e9sie", "marine_target": 30, "marine_target_year": 2030}, "58": {"total_marine_area": 201759, "id": 58, "marine_bounds": [6.92245, -6.44726, 11.18054, 1.07377], "code": "GAB", "total_terrestrial_area": 265735, "terrestrial_bounds": [8.69903, -3.99069, 14.50235, 2.31564], "type": "country", "groups": [3], "name": "Gabon", "name_es": "Gab\u00f3n", "name_fr": "Gabon", "marine_target": null, "marine_target_year": null}, "59": {"total_marine_area": 5886843, "id": 59, "marine_bounds": [-133.43269, -62.78834, 3.4, 63.88748], "code": "GBR", "total_terrestrial_area": 263926, "terrestrial_bounds": [-130.75623, -59.48428, 72.4957, 60.84548], "type": "country", "groups": [6], "name": "United Kingdom", "name_es": "Reino Unido", "name_fr": "Royaume-Uni", "marine_target": 30, "marine_target_year": 2030}, "60": {"total_marine_area": 22944, "id": 60, "marine_bounds": [38.97585, 41.52049, 42.35496, 43.3857], "code": "GEO", "total_terrestrial_area": 69735, "terrestrial_bounds": [40.01111, 41.03851, 46.72136, 43.58454], "type": "country", "groups": [6], "name": "Georgia", "name_es": "Georgia", "name_fr": "G\u00e9orgie", "marine_target": null, "marine_target_year": null}, "61": {"total_marine_area": 227500, "id": 61, "marine_bounds": [-3.78808, 1.38688, 2.16694, 6.1223], "code": "GHA", "total_terrestrial_area": 240484, "terrestrial_bounds": [-3.26206, 4.73877, 1.20021, 11.17484], "type": "country", "groups": [3], "name": "Ghana", "name_es": "Ghana", "name_fr": "Ghana", "marine_target": null, "marine_target_year": null}, "62": {"total_marine_area": 388, "id": 62, "marine_bounds": [-5.39735, 36.01042, -4.96717, 36.15729], "code": "GIB", "total_terrestrial_area": 0, "terrestrial_bounds": null, "type": "country", "groups": [6], "name": "Gibraltar", "name_es": "Gibraltar", "name_fr": "Gibraltar", "marine_target": null, "marine_target_year": null}, "63": {"total_marine_area": 102163, "id": 63, "marine_bounds": [-17.9347, 7.47841, -13.09405, 11.3022], "code": "GIN", "total_terrestrial_area": 245806, "terrestrial_bounds": [-15.07625, 7.19355, -7.64107, 12.6915], "type": "country", "groups": [3], "name": "Guinea", "name_es": "Guinea", "name_fr": "Guin\u00e9e", "marine_target": null, "marine_target_year": null}, "64": {"total_marine_area": 23097, "id": 64, "marine_bounds": [-20.23731, 13.05583, -15.29988, 13.64283], "code": "GMB", "total_terrestrial_area": 10711, "terrestrial_bounds": [-16.81736, 13.06466, -13.79093, 13.82689], "type": "country", "groups": [3], "name": "Gambia", "name_es": "Gambia", "name_fr": "Gambie", "marine_target": null, "marine_target_year": null}, "65": {"total_marine_area": 170330, "id": 65, "marine_bounds": [-20.18917, 8.63918, -14.82274, 12.41352], "code": "GNB", "total_terrestrial_area": 34015, "terrestrial_bounds": [-16.71489, 10.86431, -13.63652, 12.68544], "type": "country", "groups": [3], "name": "Guinea-Bissau", "name_es": "Guinea-Bis\u00e1u", "name_fr": "Guin\u00e9e-Bissau", "marine_target": null, "marine_target_year": null}, "66": {"total_marine_area": 304133, "id": 66, "marine_bounds": [2.28586, -4.82016, 9.92008, 4.12341], "code": "GNQ", "total_terrestrial_area": 27080, "terrestrial_bounds": [5.61644, -1.46764, 11.33744, 3.78875], "type": "country", "groups": [3], "name": "Equatorial Guinea", "name_es": "Guinea Ecuatorial", "name_fr": "Guin\u00e9e \u00e9quatoriale", "marine_target": null, "marine_target_year": null}, "67": {"total_marine_area": 482910, "id": 67, "marine_bounds": [18.26167, 33.28103, 30.10359, 41.09133], "code": "GRC", "total_terrestrial_area": 132468, "terrestrial_bounds": [19.37236, 34.80069, 29.6457, 41.74801], "type": "country", "groups": [6], "name": "Greece", "name_es": "Grecia", "name_fr": "Gr\u00e8ce", "marine_target": 30, "marine_target_year": 2030}, "68": {"total_marine_area": 25571, "id": 68, "marine_bounds": [-63.26709, 11.3668, -60.78119, 13.35159], "code": "GRD", "total_terrestrial_area": 361, "terrestrial_bounds": [-61.80208, 11.98431, -61.37819, 12.54014], "type": "country", "groups": [8], "name": "Grenada", "name_es": "Granada", "name_fr": "Grenade", "marine_target": null, "marine_target_year": null}, "69": {"total_marine_area": 110695, "id": 69, "marine_bounds": [-94.30881, 10.58415, -88.21344, 16.07235], "code": "GTM", "total_terrestrial_area": 109528, "terrestrial_bounds": [-92.22236, 13.73828, -88.22569, 17.81652], "type": "country", "groups": [8], "name": "Guatemala", "name_es": "Guatemala", "name_fr": "Guatemala", "marine_target": null, "marine_target_year": null}, "70": {"total_marine_area": 138739, "id": 70, "marine_bounds": [-59.84489, 5.99615, -55.76734, 10.9765], "code": "GUY", "total_terrestrial_area": 210663, "terrestrial_bounds": [-61.38692, 1.17677, -56.48025, 8.53092], "type": "country", "groups": [8], "name": "Guyana", "name_es": "Guyana", "name_fr": "Guyana", "marine_target": null, "marine_target_year": null}, "71": {"total_marine_area": 210711, "id": 71, "marine_bounds": [-88.2378, 12.98399, -80.22399, 19.5405], "code": "HND", "total_terrestrial_area": 112695, "terrestrial_bounds": [-89.35079, 12.98454, -82.40569, 17.41847], "type": "country", "groups": [8], "name": "Honduras", "name_es": "Honduras", "name_fr": "Honduras", "marine_target": null, "marine_target_year": null}, "72": {"total_marine_area": 55502, "id": 72, "marine_bounds": [13.00833, 41.6297, 18.54925, 45.56495], "code": "HRV", "total_terrestrial_area": 56951, "terrestrial_bounds": [13.48958, 42.38543, 19.43518, 46.55052], "type": "country", "groups": [6], "name": "Croatia", "name_es": "Croacia", "name_fr": "Croatie", "marine_target": 30, "marine_target_year": 2030}, "73": {"total_marine_area": 117375, "id": 73, "marine_bounds": [-75.83244, 14.87816, -71.75862, 20.72252], "code": "HTI", "total_terrestrial_area": 27190, "terrestrial_bounds": [-74.48125, 18.02181, -71.61815, 20.09042], "type": "country", "groups": [8], "name": "Haiti", "name_es": "Hait\u00ed", "name_fr": "Ha\u00efti", "marine_target": 30, "marine_target_year": 2030}, "74": {"total_marine_area": 6020917, "id": 74, "marine_bounds": [92.0506, -13.94214, 141.4, 7.78333], "code": "IDN", "total_terrestrial_area": 1898599, "terrestrial_bounds": [95.00971, -11.00754, 141.01939, 6.07694], "type": "country", "groups": [4], "name": "Indonesia", "name_es": "Indonesia", "name_fr": "Indon\u00e9sie", "marine_target": 30, "marine_target_year": 2045}, "75": {"total_marine_area": 2323935, "id": 75, "marine_bounds": [65.63915, 3.84066, 95.69667, 23.99004], "code": "IND", "total_terrestrial_area": 3158952, "terrestrial_bounds": [68.18625, 6.75426, 97.41516, 35.50133], "type": "country", "groups": [4], "name": "India", "name_es": "India", "name_fr": "Inde", "marine_target": null, "marine_target_year": null}, "76": {"total_marine_area": 427039, "id": 76, "marine_bounds": [-16.07389, 48.17871, -5.27233, 56.7], "code": "IRL", "total_terrestrial_area": 69978, "terrestrial_bounds": [-10.66284, 51.41991, -5.9945, 55.43514], "type": "country", "groups": [6], "name": "Ireland", "name_es": "Irlanda", "name_fr": "Irlande", "marine_target": 30, "marine_target_year": 2030}, "77": {"total_marine_area": 221505, "id": 77, "marine_bounds": [48.10217, 23.34667, 61.6123, 38.70502], "code": "IRN", "total_terrestrial_area": 1622723, "terrestrial_bounds": [44.04726, 25.05875, 63.31746, 39.77722], "type": "country", "groups": [4], "name": "Iran", "name_es": "Ir\u00e1n", "name_fr": "Iran", "marine_target": null, "marine_target_year": null}, "78": {"total_marine_area": 1187, "id": 78, "marine_bounds": [47.83492, 29.47813, 48.86193, 30.45285], "code": "IRQ", "total_terrestrial_area": 436671, "terrestrial_bounds": [38.79684, 29.05857, 48.56856, 37.37804], "type": "country", "groups": [9], "name": "Iraq", "name_es": "Irak", "name_fr": "Irak", "marine_target": null, "marine_target_year": null}, "79": {"total_marine_area": 810005, "id": 79, "marine_bounds": [-30.86792, 59.96894, -5.57341, 70.58333], "code": "ISL", "total_terrestrial_area": 101557, "terrestrial_bounds": [-24.53281, 63.09583, -12.05052, 66.56458], "type": "country", "groups": [6], "name": "Iceland", "name_es": "Islandia", "name_fr": "Islande", "marine_target": null, "marine_target_year": null}, "80": {"total_marine_area": 25748, "id": 80, "marine_bounds": [32.97222, 29.44996, 35.10561, 33.48099], "code": "ISR", "total_terrestrial_area": 22177, "terrestrial_bounds": [34.26801, 29.49708, 35.90094, 33.36403], "type": "country", "groups": [6], "name": "Israel", "name_es": "Israel", "name_fr": "Isra\u00ebl", "marine_target": null, "marine_target_year": null}, "81": {"total_marine_area": 536721, "id": 81, "marine_bounds": [5.88972, 35.06441, 18.99524, 45.80891], "code": "ITA", "total_terrestrial_area": 300227, "terrestrial_bounds": [6.63088, 35.49292, 18.52069, 47.09265], "type": "country", "groups": [6], "name": "Italy", "name_es": "Italia", "name_fr": "Italie", "marine_target": 30, "marine_target_year": 2030}, "82": {"total_marine_area": 286046, "id": 82, "marine_bounds": [-80.83306, 14.08333, -74.00943, 19.35781], "code": "JAM", "total_terrestrial_area": 11035, "terrestrial_bounds": [-78.36903, 17.02041, -75.96986, 18.52514], "type": "country", "groups": [8], "name": "Jamaica", "name_es": "Jamaica", "name_fr": "Jama\u00efque", "marine_target": null, "marine_target_year": null}, "83": {"total_marine_area": 97, "id": 83, "marine_bounds": [34.88535, 29.35736, 35.00137, 29.54252], "code": "JOR", "total_terrestrial_area": 89272, "terrestrial_bounds": [34.95764, 29.1834, 39.30209, 33.36817], "type": "country", "groups": [9], "name": "Jordan", "name_es": "Jordania", "name_fr": "Jordanie", "marine_target": null, "marine_target_year": null}, "84": {"total_marine_area": 4438264, "id": 84, "marine_bounds": [122.38639, 17.06508, 157.63789, 47.73081], "code": "JPN", "total_terrestrial_area": 372294, "terrestrial_bounds": [122.9332, 24.04542, 153.98692, 45.52271], "type": "country", "groups": [4], "name": "Japan", "name_es": "Jap\u00f3n", "name_fr": "Japon", "marine_target": 30, "marine_target_year": 2030}, "85": {"total_marine_area": 114383, "id": 85, "marine_bounds": [49.0, 41.22277, 53.84526, 47.13061], "code": "KAZ", "total_terrestrial_area": 2722562, "terrestrial_bounds": [46.49186, 40.55167, 87.31266, 55.43181], "type": "country", "groups": [6], "name": "Kazakhstan", "name_es": "Kazajist\u00e1n", "name_fr": "Kazakhstan", "marine_target": null, "marine_target_year": null}, "86": {"total_marine_area": 164062, "id": 86, "marine_bounds": [39.2213, -4.90032, 45.9452, -1.654], "code": "KEN", "total_terrestrial_area": 588601, "terrestrial_bounds": [33.90959, -4.72042, 41.92622, 5.06117], "type": "country", "groups": [3], "name": "Kenya", "name_es": "Kenia", "name_fr": "Kenya", "marine_target": 30, "marine_target_year": 2030}, "87": {"total_marine_area": 48697, "id": 87, "marine_bounds": [101.3026, 8.78188, 104.52001, 11.83098], "code": "KHM", "total_terrestrial_area": 182393, "terrestrial_bounds": [102.33383, 9.91362, 107.62768, 14.69027], "type": "country", "groups": [4], "name": "Cambodia", "name_es": "Camboya", "name_fr": "Cambodge", "marine_target": null, "marine_target_year": null}, "88": {"total_marine_area": 3440220, "id": 88, "marine_bounds": [-180.0, -13.83833, 180.0, 7.87906], "code": "KIR", "total_terrestrial_area": 1017, "terrestrial_bounds": [-174.5434, -17.85211, 176.84869, 4.69949], "type": "country", "groups": [4], "name": "Kiribati", "name_es": "Kiribati", "name_fr": "Kiribati", "marine_target": null, "marine_target_year": null}, "89": {"total_marine_area": 9502, "id": 89, "marine_bounds": [-63.62492, 16.34156, -62.37419, 17.66173], "code": "KNA", "total_terrestrial_area": 268, "terrestrial_bounds": [-62.86431, 17.09347, -62.53931, 17.41819], "type": "country", "groups": [8], "name": "Saint Kitts and Nevis", "name_es": "San Crist\u00f3bal y Nieves", "name_fr": "Saint-Christophe-et-Ni\u00e9v\u00e8s", "marine_target": null, "marine_target_year": null}, "90": {"total_marine_area": 431905, "id": 90, "marine_bounds": [122.89634, 28.6, 133.80609, 39.83969], "code": "KOR", "total_terrestrial_area": 100514, "terrestrial_bounds": [124.60972, 33.11236, 131.87147, 38.61177], "type": "country", "groups": [4], "name": "South Korea", "name_es": "Corea del Sur", "name_fr": "Cor\u00e9e du Sud", "marine_target": 30, "marine_target_year": 2030}, "91": {"total_marine_area": 11179, "id": 91, "marine_bounds": [47.70244, 28.53402, 49.55047, 30.03015], "code": "KWT", "total_terrestrial_area": 17450, "terrestrial_bounds": [46.54261, 28.52436, 48.65403, 30.08444], "type": "country", "groups": [9], "name": "Kuwait", "name_es": "Kuwait", "name_fr": "Kowe\u00eft", "marine_target": null, "marine_target_year": null}, "92": {"total_marine_area": 20184, "id": 92, "marine_bounds": [33.69465, 33.09507, 35.99164, 34.81245], "code": "LBN", "total_terrestrial_area": 10241, "terrestrial_bounds": [35.10263, 33.055, 36.62183, 34.69141], "type": "country", "groups": [9], "name": "Lebanon", "name_es": "L\u00edbano", "name_fr": "Liban", "marine_target": null, "marine_target_year": null}, "93": {"total_marine_area": 251781, "id": 93, "marine_bounds": [-13.56693, 1.00107, -7.33043, 6.93907], "code": "LBR", "total_terrestrial_area": 96332, "terrestrial_bounds": [-11.48569, 4.35292, -7.36511, 8.55179], "type": "country", "groups": [3], "name": "Liberia", "name_es": "Liberia", "name_fr": "Liberia", "marine_target": null, "marine_target_year": null}, "94": {"total_marine_area": 364524, "id": 94, "marine_bounds": [11.56139, 30.26705, 26.19226, 35.42515], "code": "LBY", "total_terrestrial_area": 1618818, "terrestrial_bounds": [9.39174, 19.50817, 25.14847, 33.16542], "type": "country", "groups": [3], "name": "Libya", "name_es": "Libia", "name_fr": "Libye", "marine_target": null, "marine_target_year": null}, "95": {"total_marine_area": 15413, "id": 95, "marine_bounds": [-62.86401, 13.24324, -59.99972, 14.2725], "code": "LCA", "total_terrestrial_area": 617, "terrestrial_bounds": [-61.08014, 13.70708, -60.86986, 14.11042], "type": "country", "groups": [8], "name": "Saint Lucia", "name_es": "Santa Luc\u00eda", "name_fr": "Sainte-Lucie", "marine_target": null, "marine_target_year": null}, "96": {"total_marine_area": 533559, "id": 96, "marine_bounds": [77.02333, 2.56649, 85.23292, 11.44882], "code": "LKA", "total_terrestrial_area": 66117, "terrestrial_bounds": [79.52181, 5.91847, 81.87875, 9.83597], "type": "country", "groups": [4], "name": "Sri Lanka", "name_es": "Sri Lanka", "name_fr": "Sri Lanka", "marine_target": 30, "marine_target_year": 2030}, "97": {"total_marine_area": 6832, "id": 97, "marine_bounds": [19.02348, 55.18488, 21.53884, 56.06913], "code": "LTU", "total_terrestrial_area": 64719, "terrestrial_bounds": [20.9506, 53.89011, 26.84722, 56.44703], "type": "country", "groups": [6], "name": "Lithuania", "name_es": "Lituania", "name_fr": "Lituanie", "marine_target": 30, "marine_target_year": 2030}, "98": {"total_marine_area": 28353, "id": 98, "marine_bounds": [19.09754, 56.04173, 24.41247, 58.02407], "code": "LVA", "total_terrestrial_area": 64392, "terrestrial_bounds": [20.97139, 55.66372, 28.24051, 58.08557], "type": "country", "groups": [6], "name": "Latvia", "name_es": "Letonia", "name_fr": "Lettonie", "marine_target": 30, "marine_target_year": 2030}, "99": {"total_marine_area": 564611, "id": 99, "marine_bounds": [-20.66692, 19.33012, -2.05752, 36.0051], "code": "MAR", "total_terrestrial_area": 413802, "terrestrial_bounds": [-13.16792, 27.67007, -0.99627, 35.92264], "type": "country", "groups": [3], "name": "Morocco", "name_es": "Marruecos", "name_fr": "Maroc", "marine_target": null, "marine_target_year": null}, "100": {"total_marine_area": 288, "id": 100, "marine_bounds": [7.41979, 42.94639, 7.75694, 43.74984], "code": "MCO", "total_terrestrial_area": 2, "terrestrial_bounds": [7.40953, 43.72264, 7.44014, 43.75088], "type": "country", "groups": [6], "name": "Monaco", "name_es": "M\u00f3naco", "name_fr": "Monaco", "marine_target": 30, "marine_target_year": 2030}, "101": {"total_marine_area": 1514346, "id": 101, "marine_bounds": [40.33437, -28.9513, 57.21419, -10.33732], "code": "MDG", "total_terrestrial_area": 593211, "terrestrial_bounds": [43.18819, -25.60625, 50.48653, -11.94875], "type": "country", "groups": [3], "name": "Madagascar", "name_es": "Madagascar", "name_fr": "Madagascar", "marine_target": 30, "marine_target_year": 2030}, "102": {"total_marine_area": 920739, "id": 102, "marine_bounds": [69.20593, -3.31164, 77.10595, 8.09389], "code": "MDV", "total_terrestrial_area": 301, "terrestrial_bounds": [72.63819, -0.70319, 73.76041, 7.10653], "type": "country", "groups": [4], "name": "Maldives", "name_es": "Maldivas", "name_fr": "Maldives", "marine_target": null, "marine_target_year": null}, "103": {"total_marine_area": 3187013, "id": 103, "marine_bounds": [-122.1791, 11.87329, -84.64185, 32.62694], "code": "MEX", "total_terrestrial_area": 1955607, "terrestrial_bounds": [-118.36648, 14.53507, -86.71074, 32.71863], "type": "country", "groups": [8], "name": "Mexico", "name_es": "M\u00e9xico", "name_fr": "Mexique", "marine_target": null, "marine_target_year": null}, "104": {"total_marine_area": 2001566, "id": 104, "marine_bounds": [157.46054, 1.77731, 175.52347, 17.94606], "code": "MHL", "total_terrestrial_area": 302, "terrestrial_bounds": [160.79527, 4.5725, 172.17261, 14.72319], "type": "country", "groups": [4], "name": "Marshall Islands", "name_es": "Islas Marshall", "name_fr": "\u00celes Marshall", "marine_target": null, "marine_target_year": null}, "105": {"total_marine_area": 52923, "id": 105, "marine_bounds": [13.41697, 34.21329, 17.49726, 36.5124], "code": "MLT", "total_terrestrial_area": 323, "terrestrial_bounds": [14.1836, 35.78597, 14.57653, 36.08236], "type": "country", "groups": [6], "name": "Malta", "name_es": "Malta", "name_fr": "Malte", "marine_target": 30, "marine_target_year": 2030}, "106": {"total_marine_area": 497460, "id": 106, "marine_bounds": [90.14458, 9.42939, 99.13974, 21.13243], "code": "MMR", "total_terrestrial_area": 671115, "terrestrial_bounds": [92.1725, 8.82445, 101.17678, 28.54326], "type": "country", "groups": [4], "name": "Myanmar", "name_es": "Myanmar", "name_fr": "Myanmar", "marine_target": null, "marine_target_year": null}, "107": {"total_marine_area": 6383, "id": 107, "marine_bounds": [18.0183, 41.44175, 19.38823, 42.5168], "code": "MNE", "total_terrestrial_area": 13314, "terrestrial_bounds": [18.45892, 41.84792, 20.31765, 43.56264], "type": "country", "groups": [6], "name": "Montenegro", "name_es": "Montenegro", "name_fr": "Mont\u00e9n\u00e9gro", "marine_target": null, "marine_target_year": null}, "108": {"total_marine_area": 566292, "id": 108, "marine_bounds": [32.43062, -27.71605, 43.0091, -10.09139], "code": "MOZ", "total_terrestrial_area": 790031, "terrestrial_bounds": [30.21944, -26.86869, 40.83931, -10.47125], "type": "country", "groups": [3], "name": "Mozambique", "name_es": "Mozambique", "name_fr": "Mozambique", "marine_target": null, "marine_target_year": null}, "109": {"total_marine_area": 173180, "id": 109, "marine_bounds": [-20.26901, 16.06375, -16.02022, 21.16636], "code": "MRT", "total_terrestrial_area": 1044148, "terrestrial_bounds": [-17.06652, 14.71555, -4.82996, 27.29807], "type": "country", "groups": [3], "name": "Mauritania", "name_es": "Mauritania", "name_fr": "Mauritanie", "marine_target": 30, "marine_target_year": 2030}, "110": {"total_marine_area": 2203541, "id": 110, "marine_bounds": [52.39061, -23.80956, 75.85287, -2.28834], "code": "MUS", "total_terrestrial_area": 2033, "terrestrial_bounds": [56.58569, -20.52569, 63.50347, -10.33708], "type": "country", "groups": [3], "name": "Mauritius", "name_es": "Mauricio", "name_fr": "Maurice", "marine_target": null, "marine_target_year": null}, "111": {"total_marine_area": 510804, "id": 111, "marine_bounds": [98.025, 1.2123, 119.49453, 8.99089], "code": "MYS", "total_terrestrial_area": 331036, "terrestrial_bounds": [99.64072, 0.85372, 119.26972, 7.38056], "type": "country", "groups": [4], "name": "Malaysia", "name_es": "Malasia", "name_fr": "Malaisie", "marine_target": null, "marine_target_year": null}, "112": {"total_marine_area": 562212, "id": 112, "marine_bounds": [8.24329, -30.65814, 16.82843, -17.24295], "code": "NAM", "total_terrestrial_area": 826139, "terrestrial_bounds": [11.73486, -28.96945, 25.26771, -16.95989], "type": "country", "groups": [3], "name": "Namibia", "name_es": "Namibia", "name_fr": "Namibie", "marine_target": null, "marine_target_year": null}, "113": {"total_marine_area": 213582, "id": 113, "marine_bounds": [2.68486, 1.15472, 8.65353, 6.62269], "code": "NGA", "total_terrestrial_area": 912130, "terrestrial_bounds": [2.66843, 4.27042, 14.67642, 13.89201], "type": "country", "groups": [3], "name": "Nigeria", "name_es": "Nigeria", "name_fr": "Nig\u00e9ria", "marine_target": null, "marine_target_year": null}, "114": {"total_marine_area": 213813, "id": 114, "marine_bounds": [-89.4242, 9.7231, -79.23975, 16.00875], "code": "NIC", "total_terrestrial_area": 128669, "terrestrial_bounds": [-87.69097, 10.70738, -81.99986, 15.02591], "type": "country", "groups": [8], "name": "Nicaragua", "name_es": "Nicaragua", "name_fr": "Nicaragua", "marine_target": null, "marine_target_year": null}, "115": {"total_marine_area": 145991, "id": 115, "marine_bounds": [-70.41768, 11.66667, 7.21028, 55.765], "code": "NLD", "total_terrestrial_area": 38525, "terrestrial_bounds": [-70.06347, 11.97819, 7.2271, 53.55459], "type": "country", "groups": [6], "name": "Netherlands", "name_es": "Pa\u00edses Bajos", "name_fr": "Pays-Bas", "marine_target": 30, "marine_target_year": 2030}, "116": {"total_marine_area": 2068669, "id": 116, "marine_bounds": [-13.62956, 56.08667, 38.0, 84.14578], "code": "NOR", "total_terrestrial_area": 382992, "terrestrial_bounds": [-9.07656, -54.46458, 36.80782, 82.5375], "type": "country", "groups": [6], "name": "Norway", "name_es": "Noruega", "name_fr": "Norv\u00e8ge", "marine_target": null, "marine_target_year": null}, "117": {"total_marine_area": 309261, "id": 117, "marine_bounds": [163.5788, -3.90731, 168.56024, 2.69385], "code": "NRU", "total_terrestrial_area": 22, "terrestrial_bounds": [166.90953, -0.55431, 166.95934, -0.50207], "type": "country", "groups": [4], "name": "Nauru", "name_es": "Nauru", "name_fr": "Nauru", "marine_target": null, "marine_target_year": null}, "118": {"total_marine_area": 6712792, "id": 118, "marine_bounds": [-180.0, -55.9493, 180.0, -5.79822], "code": "NZL", "total_terrestrial_area": 268886, "terrestrial_bounds": [-178.82695, -52.62083, 179.06583, -8.53194], "type": "country", "groups": [4], "name": "New Zealand", "name_es": "Nueva Zelanda", "name_fr": "Nouvelle-Z\u00e9lande", "marine_target": null, "marine_target_year": null}, "119": {"total_marine_area": 556490, "id": 119, "marine_bounds": [53.10934, 13.76417, 63.36954, 26.7375], "code": "OMN", "total_terrestrial_area": 310026, "terrestrial_bounds": [52.0, 16.64236, 59.83931, 26.50681], "type": "country", "groups": [9], "name": "Oman", "name_es": "Om\u00e1n", "name_fr": "Oman", "marine_target": null, "marine_target_year": null}, "120": {"total_marine_area": 224374, "id": 120, "marine_bounds": [61.41667, 20.98034, 68.90909, 25.59738], "code": "PAK", "total_terrestrial_area": 875180, "terrestrial_bounds": [60.89944, 23.70292, 77.84308, 37.09701], "type": "country", "groups": [4], "name": "Pakistan", "name_es": "Pakist\u00e1n", "name_fr": "Pakistan", "marine_target": null, "marine_target_year": null}, "121": {"total_marine_area": 331318, "id": 121, "marine_bounds": [-84.31667, 5.0, -77.05, 12.5], "code": "PAN", "total_terrestrial_area": 75769, "terrestrial_bounds": [-83.05189, 7.20236, -77.12928, 9.64736], "type": "country", "groups": [8], "name": "Panama", "name_es": "Panam\u00e1", "name_fr": "Panama", "marine_target": 30, "marine_target_year": 2030}, "122": {"total_marine_area": 870795, "id": 122, "marine_bounds": [-84.67071, -20.20814, -70.37955, -3.22444], "code": "PER", "total_terrestrial_area": 1297731, "terrestrial_bounds": [-81.3307, -18.3518, -68.65221, -0.03896], "type": "country", "groups": [8], "name": "Peru", "name_es": "Per\u00fa", "name_fr": "P\u00e9rou", "marine_target": null, "marine_target_year": null}, "123": {"total_marine_area": 1971033, "id": 123, "marine_bounds": [113.67772, 3.10545, 129.94384, 22.25356], "code": "PHL", "total_terrestrial_area": 297006, "terrestrial_bounds": [116.92834, 4.58694, 126.60535, 21.07014], "type": "country", "groups": [4], "name": "Philippines", "name_es": "Filipinas", "name_fr": "Philippines", "marine_target": null, "marine_target_year": null}, "124": {"total_marine_area": 614807, "id": 124, "marine_bounds": [129.5088, 1.62141, 136.9541, 11.55872], "code": "PLW", "total_terrestrial_area": 486, "terrestrial_bounds": [131.11986, 2.97138, 134.72139, 8.09417], "type": "country", "groups": [4], "name": "Palau", "name_es": "Palaos", "name_fr": "Palaos", "marine_target": 30, "marine_target_year": 2030}, "125": {"total_marine_area": 2403355, "id": 125, "marine_bounds": [139.20137, -14.74845, 162.80338, 2.59597], "code": "PNG", "total_terrestrial_area": 465612, "terrestrial_bounds": [140.8405, -11.65538, 157.03778, -0.75583], "type": "country", "groups": [4], "name": "Papua New Guinea", "name_es": "Pap\u00faa Nueva Guinea", "name_fr": "Papouasie-Nouvelle-Guin\u00e9e", "marine_target": null, "marine_target_year": null}, "126": {"total_marine_area": 29982, "id": 126, "marine_bounds": [14.2014, 52.65352, 19.80486, 55.92155], "code": "POL", "total_terrestrial_area": 311193, "terrestrial_bounds": [14.12288, 49.00204, 24.14578, 54.83642], "type": "country", "groups": [6], "name": "Poland", "name_es": "Polonia", "name_fr": "Pologne", "marine_target": null, "marine_target_year": null}, "127": {"total_marine_area": 114379, "id": 127, "marine_bounds": [123.56226, 36.96683, 133.18005, 42.42638], "code": "PRK", "total_terrestrial_area": 122573, "terrestrial_bounds": [124.18236, 37.67598, 130.67439, 43.00605], "type": "country", "groups": [4], "name": "North Korea", "name_es": "Corea del Norte", "name_fr": "Cor\u00e9e du Nord", "marine_target": null, "marine_target_year": null}, "128": {"total_marine_area": 1728718, "id": 128, "marine_bounds": [-35.58558, 29.24785, -7.25694, 43.06483], "code": "PRT", "total_terrestrial_area": 91790, "terrestrial_bounds": [-31.26818, 30.03019, -6.18914, 42.15432], "type": "country", "groups": [6], "name": "Portugal", "name_es": "Portugal", "name_fr": "Portugal", "marine_target": 30, "marine_target_year": 2030}, "129": {"total_marine_area": 1120, "id": 129, "marine_bounds": [34.00897, 31.33598, 34.4727, 31.78946], "code": "PSE", "total_terrestrial_area": 6225, "terrestrial_bounds": [34.22903, 31.22361, 35.57545, 32.55157], "type": "country", "groups": [9], "name": "Palestine", "name_es": "Palestina", "name_fr": "Palestine", "marine_target": null, "marine_target_year": null}, "130": {"total_marine_area": 31489, "id": 130, "marine_bounds": [50.56453, 24.50638, 53.03472, 27.04333], "code": "QAT", "total_terrestrial_area": 11607, "terrestrial_bounds": [50.73653, 24.47075, 51.64736, 26.18403], "type": "country", "groups": [9], "name": "Qatar", "name_es": "Catar", "name_fr": "Qatar", "marine_target": null, "marine_target_year": null}, "131": {"total_marine_area": 29606, "id": 131, "marine_bounds": [28.52782, 43.4398, 31.40972, 45.21284], "code": "ROU", "total_terrestrial_area": 237763, "terrestrial_bounds": [20.26354, 43.61933, 29.71986, 48.2656], "type": "country", "groups": [6], "name": "Romania", "name_es": "Rumania", "name_fr": "Roumanie", "marine_target": 30, "marine_target_year": 2030}, "132": {"total_marine_area": 7957053, "id": 132, "marine_bounds": [-180.0, 39.72669, 180.0, 85.178], "code": "RUS", "total_terrestrial_area": 16831214, "terrestrial_bounds": [-180.0, 41.18887, 180.0, 81.85625], "type": "country", "groups": [6], "name": "Russia", "name_es": "Rusia", "name_fr": "Russie", "marine_target": null, "marine_target_year": null}, "133": {"total_marine_area": 224345, "id": 133, "marine_bounds": [34.45589, 16.29, 51.72543, 29.37521], "code": "SAU", "total_terrestrial_area": 1927296, "terrestrial_bounds": [34.49441, 16.37953, 55.66669, 32.15434], "type": "country", "groups": [9], "name": "Saudi Arabia", "name_es": "Arabia Saudita", "name_fr": "Arabie Saoudite", "marine_target": null, "marine_target_year": null}, "134": {"total_marine_area": 82560, "id": 134, "marine_bounds": [35.61351, 18.02317, 39.73517, 23.33313], "code": "SDN", "total_terrestrial_area": 1878546, "terrestrial_bounds": [21.83895, 8.67912, 38.8493, 23.14515], "type": "country", "groups": [3], "name": "Sudan", "name_es": "Sud\u00e1n", "name_fr": "Soudan", "marine_target": null, "marine_target_year": null}, "135": {"total_marine_area": 221818, "id": 135, "marine_bounds": [-20.21423, 9.33944, -15.21077, 16.52891], "code": "SEN", "total_terrestrial_area": 197610, "terrestrial_bounds": [-17.54319, 12.30786, -11.34252, 16.69207], "type": "country", "groups": [3], "name": "Senegal", "name_es": "Senegal", "name_fr": "S\u00e9n\u00e9gal", "marine_target": null, "marine_target_year": null}, "136": {"total_marine_area": 714, "id": 136, "marine_bounds": [103.54738, 1.13036, 104.09857, 1.47797], "code": "SGP", "total_terrestrial_area": 700, "terrestrial_bounds": [103.60906, 1.16639, 104.0858, 1.47139], "type": "country", "groups": [4], "name": "Singapore", "name_es": "Singapur", "name_fr": "Singapour", "marine_target": null, "marine_target_year": null}, "137": {"total_marine_area": 1605325, "id": 137, "marine_bounds": [154.58556, -16.12694, 173.5934, -4.13994], "code": "SLB", "total_terrestrial_area": 28652, "terrestrial_bounds": [155.3925, -12.30833, 170.19251, -4.44522], "type": "country", "groups": [4], "name": "Solomon Islands", "name_es": "Islas Salom\u00f3n", "name_fr": "\u00celes Salomon", "marine_target": null, "marine_target_year": null}, "138": {"total_marine_area": 160584, "id": 138, "marine_bounds": [-16.55379, 4.18726, -11.44519, 9.13123], "code": "SLE", "total_terrestrial_area": 72905, "terrestrial_bounds": [-13.30351, 6.91762, -10.26575, 10.00043], "type": "country", "groups": [3], "name": "Sierra Leone", "name_es": "Sierra Leona", "name_fr": "Sierra Leone", "marine_target": null, "marine_target_year": null}, "139": {"total_marine_area": 95099, "id": 139, "marine_bounds": [-91.4401, 9.94488, -87.59715, 13.74772], "code": "SLV", "total_terrestrial_area": 20499, "terrestrial_bounds": [-90.12486, 13.15264, -87.68375, 14.45055], "type": "country", "groups": [8], "name": "El Salvador", "name_es": "El Salvador", "name_fr": "Salvador", "marine_target": null, "marine_target_year": null}, "140": {"total_marine_area": 832072, "id": 140, "marine_bounds": [41.56622, -3.55475, 54.44504, 13.52464], "code": "SOM", "total_terrestrial_area": 636254, "terrestrial_bounds": [40.9785, -1.64708, 51.4157, 11.98931], "type": "country", "groups": [3], "name": "Somalia", "name_es": "Somalia", "name_fr": "Somalie", "marine_target": null, "marine_target_year": null}, "141": {"total_marine_area": 165378, "id": 141, "marine_bounds": [3.20264, -1.48152, 8.55014, 3.03944], "code": "STP", "total_terrestrial_area": 1007, "terrestrial_bounds": [6.45986, -0.01403, 7.46264, 1.70153], "type": "country", "groups": [3], "name": "S\u00e3o Tom\u00e9 and Pr\u00edncipe", "name_es": "Santo Tom\u00e9 y Pr\u00edncipe", "name_fr": "S\u00e3o Tom\u00e9-et-Principe", "marine_target": null, "marine_target_year": null}, "142": {"total_marine_area": 133303, "id": 142, "marine_bounds": [-57.34074, 5.09766, -52.52102, 9.35309], "code": "SUR", "total_terrestrial_area": 147072, "terrestrial_bounds": [-58.08656, 1.83115, -53.97749, 6.01514], "type": "country", "groups": [8], "name": "Suriname", "name_es": "Surinam", "name_fr": "Suriname", "marine_target": 30, "marine_target_year": 2035}, "143": {"total_marine_area": 312, "id": 143, "marine_bounds": [13.21167, 45.39906, 13.74668, 45.63], "code": "SVN", "total_terrestrial_area": 19907, "terrestrial_bounds": [13.38212, 45.42828, 16.58431, 46.87822], "type": "country", "groups": [6], "name": "Slovenia", "name_es": "Eslovenia", "name_fr": "Slov\u00e9nie", "marine_target": 30, "marine_target_year": 2030}, "144": {"total_marine_area": 155487, "id": 144, "marine_bounds": [10.03003, 54.96244, 24.18972, 67.08059], "code": "SWE", "total_terrestrial_area": 447344, "terrestrial_bounds": [10.96139, 55.33625, 24.1724, 69.05904], "type": "country", "groups": [6], "name": "Sweden", "name_es": "Suecia", "name_fr": "Su\u00e8de", "marine_target": 30, "marine_target_year": 2030}, "145": {"total_marine_area": 1341504, "id": 145, "marine_bounds": [43.17607, -12.7565, 59.63137, -0.3609], "code": "SYC", "total_terrestrial_area": 493, "terrestrial_bounds": [46.20368, -10.22736, 56.29569, -3.71264], "type": "country", "groups": [3], "name": "Seychelles", "name_es": "Seychelles", "name_fr": "Seychelles", "marine_target": 30, "marine_target_year": 2030}, "146": {"total_marine_area": 10269, "id": 146, "marine_bounds": [34.96334, 34.59925, 35.97254, 36.0625], "code": "SYR", "total_terrestrial_area": 186930, "terrestrial_bounds": [35.71625, 32.31068, 42.38504, 37.31915], "type": "country", "groups": [9], "name": "Syria", "name_es": "Siria", "name_fr": "Syrie", "marine_target": null, "marine_target_year": null}, "147": {"total_marine_area": 15447, "id": 147, "marine_bounds": [1.19951, 2.88401, 2.41623, 6.2357], "code": "TGO", "total_terrestrial_area": 57143, "terrestrial_bounds": [-0.14401, 6.11153, 1.80753, 11.13804], "type": "country", "groups": [3], "name": "Togo", "name_es": "Togo", "name_fr": "Togo", "marine_target": null, "marine_target_year": null}, "148": {"total_marine_area": 298683, "id": 148, "marine_bounds": [95.53, 5.95, 103.053, 13.72709], "code": "THA", "total_terrestrial_area": 515758, "terrestrial_bounds": [97.34519, 5.61604, 105.63913, 20.46321], "type": "country", "groups": [4], "name": "Thailand", "name_es": "Tailandia", "name_fr": "Tha\u00eflande", "marine_target": null, "marine_target_year": null}, "149": {"total_marine_area": 61226, "id": 149, "marine_bounds": [51.37653, 37.31889, 54.01155, 41.76313], "code": "TKM", "total_terrestrial_area": 489172, "terrestrial_bounds": [52.44143, 35.12981, 66.6843, 42.79555], "type": "country", "groups": [6], "name": "Turkmenistan", "name_es": "Turkmenist\u00e1n", "name_fr": "Turkm\u00e9nistan", "marine_target": null, "marine_target_year": null}, "150": {"total_marine_area": 77473, "id": 150, "marine_bounds": [124.03175, -11.40017, 128.50201, -8.10437], "code": "TLS", "total_terrestrial_area": 14978, "terrestrial_bounds": [124.04465, -9.50465, 127.3425, -8.12694], "type": "country", "groups": [4], "name": "Timor-Leste", "name_es": "Timor-Leste", "name_fr": "Timor-Leste", "marine_target": null, "marine_target_year": null}, "151": {"total_marine_area": 666052, "id": 151, "marine_bounds": [-179.08825, -25.69606, -171.30528, -14.15464], "code": "TON", "total_terrestrial_area": 766, "terrestrial_bounds": [-176.21384, -22.34972, -173.735, -15.5658], "type": "country", "groups": [4], "name": "Tonga", "name_es": "Tonga", "name_fr": "Tonga", "marine_target": 30, "marine_target_year": 2030}, "152": {"total_marine_area": 80174, "id": 152, "marine_bounds": [-62.08306, 9.83194, -57.11442, 12.35464], "code": "TTO", "total_terrestrial_area": 5180, "terrestrial_bounds": [-61.93014, 10.04292, -60.49208, 11.35958], "type": "country", "groups": [8], "name": "Trinidad and Tobago", "name_es": "Trinidad y Tobago", "name_fr": "Trinit\u00e9-et-Tobago", "marine_target": null, "marine_target_year": null}, "153": {"total_marine_area": 99701, "id": 153, "marine_bounds": [7.81667, 33.16147, 13.68389, 38.41], "code": "TUN", "total_terrestrial_area": 154911, "terrestrial_bounds": [7.53008, 30.23681, 11.59826, 37.55986], "type": "country", "groups": [3], "name": "Tunisia", "name_es": "T\u00fanez", "name_fr": "Tunisie", "marine_target": null, "marine_target_year": null}, "154": {"total_marine_area": 262233, "id": 154, "marine_bounds": [25.44175, 34.20923, 41.54703, 43.44972], "code": "TUR", "total_terrestrial_area": 780048, "terrestrial_bounds": [25.66514, 35.81543, 44.83499, 42.10666], "type": "country", "groups": [6], "name": "Turkey", "name_es": "Turqu\u00eda", "name_fr": "Turquie", "marine_target": null, "marine_target_year": null}, "155": {"total_marine_area": 753133, "id": 155, "marine_bounds": [-180.0, -13.24039, 180.0, -3.96556], "code": "TUV", "total_terrestrial_area": 42, "terrestrial_bounds": [176.05875, -10.79187, 179.87135, -5.6425], "type": "country", "groups": [4], "name": "Tuvalu", "name_es": "Tuvalu", "name_fr": "Tuvalu", "marine_target": null, "marine_target_year": null}, "156": {"total_marine_area": 429227, "id": 156, "marine_bounds": [114.23736, 17.26388, 125.67969, 28.31348], "code": "TWN", "total_terrestrial_area": 36490, "terrestrial_bounds": [116.71, 20.6975, 122.10848, 26.38542], "type": "country", "groups": [4], "name": "Taiwan", "name_es": "Taiw\u00e1n", "name_fr": "Ta\u00efwan", "marine_target": null, "marine_target_year": null}, "157": {"total_marine_area": 241567, "id": 157, "marine_bounds": [38.53763, -10.90985, 43.28224, -4.67832], "code": "TZA", "total_terrestrial_area": 944573, "terrestrial_bounds": [29.32717, -11.7457, 40.44514, -0.98579], "type": "country", "groups": [3], "name": "Tanzania", "name_es": "Tanzania", "name_fr": "Tanzanie", "marine_target": null, "marine_target_year": null}, "158": {"total_marine_area": 136198, "id": 158, "marine_bounds": [29.60404, 43.18806, 38.34363, 47.3103], "code": "UKR", "total_terrestrial_area": 599471, "terrestrial_bounds": [22.14045, 44.38597, 40.21807, 52.37503], "type": "country", "groups": [6], "name": "Ukraine", "name_es": "Ucrania", "name_fr": "Ukraine", "marine_target": null, "marine_target_year": null}, "159": {"total_marine_area": 184506, "id": 159, "marine_bounds": [-58.49435, -37.85991, -50.05938, -31.52344], "code": "URY", "total_terrestrial_area": 177696, "terrestrial_bounds": [-58.44057, -34.97403, -53.09425, -30.0854], "type": "country", "groups": [8], "name": "Uruguay", "name_es": "Uruguay", "name_fr": "Uruguay", "marine_target": null, "marine_target_year": null}, "160": {"total_marine_area": 12205919, "id": 160, "marine_bounds": [-180.0, -17.55527, 180.0, 74.70884], "code": "USA", "total_terrestrial_area": 9467165, "terrestrial_bounds": [-179.15056, -14.37324, 179.77341, 72.6875], "type": "country", "groups": [7], "name": "United States", "name_es": "Estados Unidos", "name_fr": "\u00c9tats-Unis", "marine_target": 30, "marine_target_year": 2030}, "161": {"total_marine_area": 36244, "id": 161, "marine_bounds": [-63.36908, 12.06203, -60.29506, 14.08032], "code": "VCT", "total_terrestrial_area": 399, "terrestrial_bounds": [-61.46097, 12.57875, -61.11403, 13.38347], "type": "country", "groups": [8], "name": "Saint Vincent and the Grenadines", "name_es": "San Vicente y las Granadinas", "name_fr": "Saint-Vincent-et-les-Grenadines", "marine_target": null, "marine_target_year": null}, "162": {"total_marine_area": 478140, "id": 162, "marine_bounds": [-72.76095, 8.30483, -57.11442, 16.74694], "code": "VEN", "total_terrestrial_area": 916244, "terrestrial_bounds": [-73.35214, 0.64876, -59.80701, 15.67292], "type": "country", "groups": [8], "name": "Venezuela", "name_es": "Venezuela", "name_fr": "Venezuela", "marine_target": null, "marine_target_year": null}, "163": {"total_marine_area": 750958, "id": 163, "marine_bounds": [102.20324, 6.09667, 112.8816, 21.54716], "code": "VNM", "total_terrestrial_area": 330365, "terrestrial_bounds": [102.14458, 8.38136, 109.46917, 23.39269], "type": "country", "groups": [4], "name": "Vietnam", "name_es": "Vietnam", "name_fr": "Vietnam", "marine_target": null, "marine_target_year": null}, "164": {"total_marine_area": 810608, "id": 164, "marine_bounds": [163.3086, -25.74671, 174.27569, -12.28215], "code": "VUT", "total_terrestrial_area": 12361, "terrestrial_bounds": [166.54143, -20.25324, 170.24028, -13.07248], "type": "country", "groups": [4], "name": "Vanuatu", "name_es": "Vanuatu", "name_fr": "Vanuatu", "marine_target": null, "marine_target_year": null}, "165": {"total_marine_area": 130480, "id": 165, "marine_bounds": [-174.51139, -15.87838, -170.54266, -10.96083], "code": "WSM", "total_terrestrial_area": 2864, "terrestrial_bounds": [-172.80412, -14.07722, -171.3977, -13.43981], "type": "country", "groups": [4], "name": "Samoa", "name_es": "Samoa", "name_fr": "Samoa", "marine_target": null, "marine_target_year": null}, "166": {"total_marine_area": 527384, "id": 166, "marine_bounds": [41.08194, 8.95275, 57.946, 16.64959], "code": "YEM", "total_terrestrial_area": 453741, "terrestrial_bounds": [41.81458, 12.10819, 54.53542, 19.0], "type": "country", "groups": [9], "name": "Yemen", "name_es": "Yemen", "name_fr": "Y\u00e9men", "marine_target": null, "marine_target_year": null}, "167": {"total_marine_area": 1547576, "id": 167, "marine_bounds": [13.34802, -50.31506, 42.8475, -26.86206], "code": "ZAF", "total_terrestrial_area": 1221328, "terrestrial_bounds": [16.45189, -34.83514, 32.89125, -22.12503], "type": "country", "groups": [3], "name": "South Africa", "name_es": "Sud\u00e1frica", "name_fr": "Afrique du Sud", "marine_target": null, "marine_target_year": null}, "168": {"total_marine_area": 0, "id": 168, "marine_bounds": null, "code": "AFG", "total_terrestrial_area": 644050, "terrestrial_bounds": [60.50487, 29.36157, 74.89413, 38.49041], "type": "country", "groups": [4], "name": "Afghanistan", "name_es": "Afganist\u00e1n", "name_fr": "Afghanistan", "marine_target": null, "marine_target_year": null}, "169": {"total_marine_area": 0, "id": 169, "marine_bounds": null, "code": "AND", "total_terrestrial_area": 450, "terrestrial_bounds": [1.41421, 42.42956, 1.78421, 42.65208], "type": "country", "groups": [6], "name": "Andorra", "name_es": "Andorra", "name_fr": "Andorre", "marine_target": null, "marine_target_year": null}, "170": {"total_marine_area": 0, "id": 170, "marine_bounds": null, "code": "ARM", "total_terrestrial_area": 29657, "terrestrial_bounds": [43.44978, 38.83052, 46.63004, 41.30184], "type": "country", "groups": [6], "name": "Armenia", "name_es": "Armenia", "name_fr": "Arm\u00e9nie", "marine_target": null, "marine_target_year": null}, "171": {"total_marine_area": 0, "id": 171, "marine_bounds": null, "code": "AUT", "total_terrestrial_area": 83709, "terrestrial_bounds": [9.53095, 46.37265, 17.16207, 49.02117], "type": "country", "groups": [6], "name": "Austria", "name_es": "Austria", "name_fr": "Autriche", "marine_target": null, "marine_target_year": null}, "172": {"total_marine_area": 0, "id": 172, "marine_bounds": null, "code": "BDI", "total_terrestrial_area": 27055, "terrestrial_bounds": [29.00035, -4.47, 30.85023, -2.30983], "type": "country", "groups": [3], "name": "Burundi", "name_es": "Burundi", "name_fr": "Burundi", "marine_target": null, "marine_target_year": null}, "173": {"total_marine_area": 0, "id": 173, "marine_bounds": null, "code": "BFA", "total_terrestrial_area": 274005, "terrestrial_bounds": [-5.51892, 9.40111, 2.4054, 15.08259], "type": "country", "groups": [3], "name": "Burkina Faso", "name_es": "Burkina Faso", "name_fr": "Burkina Faso", "marine_target": null, "marine_target_year": null}, "174": {"total_marine_area": 0, "id": 174, "marine_bounds": null, "code": "BLR", "total_terrestrial_area": 206203, "terrestrial_bounds": [23.17834, 51.26957, 32.79461, 56.16836], "type": "country", "groups": [6], "name": "Belarus", "name_es": "Bielorrusia", "name_fr": "Bi\u00e9lorussie", "marine_target": null, "marine_target_year": null}, "175": {"total_marine_area": 0, "id": 175, "marine_bounds": null, "code": "BOL", "total_terrestrial_area": 1088012, "terrestrial_bounds": [-69.64525, -22.90657, -57.45443, -9.67092], "type": "country", "groups": [8], "name": "Bolivia", "name_es": "Bolivia", "name_fr": "Bolivie", "marine_target": null, "marine_target_year": null}, "176": {"total_marine_area": 0, "id": 176, "marine_bounds": null, "code": "BTN", "total_terrestrial_area": 38827, "terrestrial_bounds": [88.74604, 26.70065, 92.12476, 28.24768], "type": "country", "groups": [4], "name": "Bhutan", "name_es": "But\u00e1n", "name_fr": "Bhoutan", "marine_target": null, "marine_target_year": null}, "177": {"total_marine_area": 0, "id": 177, "marine_bounds": null, "code": "BWA", "total_terrestrial_area": 579375, "terrestrial_bounds": [19.99953, -26.90733, 29.36831, -17.78081], "type": "country", "groups": [3], "name": "Botswana", "name_es": "Botsuana", "name_fr": "Botswana", "marine_target": null, "marine_target_year": null}, "178": {"total_marine_area": 0, "id": 178, "marine_bounds": null, "code": "CAF", "total_terrestrial_area": 622867, "terrestrial_bounds": [14.41774, 2.22052, 27.46342, 11.00757], "type": "country", "groups": [3], "name": "Central African Republic", "name_es": "Rep\u00fablica Centroafricana", "name_fr": "R\u00e9publique centrafricaine", "marine_target": null, "marine_target_year": null}, "179": {"total_marine_area": 0, "id": 179, "marine_bounds": null, "code": "CHE", "total_terrestrial_area": 41126, "terrestrial_bounds": [5.95606, 45.81706, 10.49508, 47.80848], "type": "country", "groups": [6], "name": "Switzerland", "name_es": "Suiza", "name_fr": "Suisse", "marine_target": null, "marine_target_year": null}, "180": {"total_marine_area": 0, "id": 180, "marine_bounds": null, "code": "CZE", "total_terrestrial_area": 78575, "terrestrial_bounds": [12.09076, 48.55242, 18.85925, 51.05569], "type": "country", "groups": [6], "name": "Czechia", "name_es": "Chequia", "name_fr": "Tch\u00e9quie", "marine_target": null, "marine_target_year": null}, "181": {"total_marine_area": 0, "id": 181, "marine_bounds": null, "code": "ETH", "total_terrestrial_area": 1133713, "terrestrial_bounds": [33.00154, 3.39882, 47.95823, 14.84548], "type": "country", "groups": [3], "name": "Ethiopia", "name_es": "Etiop\u00eda", "name_fr": "\u00c9thiopie", "marine_target": null, "marine_target_year": null}, "182": {"total_marine_area": 0, "id": 182, "marine_bounds": null, "code": "HUN", "total_terrestrial_area": 92786, "terrestrial_bounds": [16.11384, 45.74783, 22.90558, 48.58638], "type": "country", "groups": [6], "name": "Hungary", "name_es": "Hungr\u00eda", "name_fr": "Hongrie", "marine_target": 30, "marine_target_year": 2030}, "183": {"total_marine_area": 0, "id": 183, "marine_bounds": null, "code": "KGZ", "total_terrestrial_area": 197173, "terrestrial_bounds": [69.27974, 39.17284, 80.22843, 43.23824], "type": "country", "groups": [6], "name": "Kyrgyzstan", "name_es": "Kirguist\u00e1n", "name_fr": "Kirghizistan", "marine_target": null, "marine_target_year": null}, "184": {"total_marine_area": 0, "id": 184, "marine_bounds": null, "code": "LAO", "total_terrestrial_area": 230590, "terrestrial_bounds": [100.08677, 13.90968, 107.63498, 22.5004], "type": "country", "groups": [4], "name": "Laos", "name_es": "Laos", "name_fr": "Laos", "marine_target": null, "marine_target_year": null}, "185": {"total_marine_area": 0, "id": 185, "marine_bounds": null, "code": "LIE", "total_terrestrial_area": 159, "terrestrial_bounds": [9.47248, 47.04738, 9.63592, 47.27027], "type": "country", "groups": [6], "name": "Liechtenstein", "name_es": "Liechtenstein", "name_fr": "Liechtenstein", "marine_target": null, "marine_target_year": null}, "186": {"total_marine_area": 0, "id": 186, "marine_bounds": null, "code": "LSO", "total_terrestrial_area": 30582, "terrestrial_bounds": [27.01126, -30.67558, 29.45571, -28.5708], "type": "country", "groups": [3], "name": "Lesotho", "name_es": "Lesoto", "name_fr": "Lesotho", "marine_target": null, "marine_target_year": null}, "187": {"total_marine_area": 0, "id": 187, "marine_bounds": null, "code": "LUX", "total_terrestrial_area": 2573, "terrestrial_bounds": [5.74414, 49.44781, 6.53077, 50.18162], "type": "country", "groups": [6], "name": "Luxembourg", "name_es": "Luxemburgo", "name_fr": "Luxembourg", "marine_target": null, "marine_target_year": null}, "188": {"total_marine_area": 0, "id": 188, "marine_bounds": null, "code": "MDA", "total_terrestrial_area": 33815, "terrestrial_bounds": [26.62131, 45.46676, 30.16374, 48.49017], "type": "country", "groups": [6], "name": "Moldova", "name_es": "Moldavia", "name_fr": "Moldavie", "marine_target": null, "marine_target_year": null}, "189": {"total_marine_area": 0, "id": 189, "marine_bounds": null, "code": "MKD", "total_terrestrial_area": 24874, "terrestrial_bounds": [20.45577, 40.85529, 23.00613, 42.33859], "type": "country", "groups": [6], "name": "North Macedonia", "name_es": "Macedonia del Norte", "name_fr": "Mac\u00e9doine du Nord", "marine_target": null, "marine_target_year": null}, "190": {"total_marine_area": 0, "id": 190, "marine_bounds": null, "code": "MLI", "total_terrestrial_area": 1255274, "terrestrial_bounds": [-12.23891, 10.15951, 4.24497, 25.0], "type": "country", "groups": [3], "name": "Mali", "name_es": "Mal\u00ed", "name_fr": "Mali", "marine_target": null, "marine_target_year": null}, "191": {"total_marine_area": 0, "id": 191, "marine_bounds": null, "code": "MNG", "total_terrestrial_area": 1562128, "terrestrial_bounds": [87.74966, 41.56769, 119.9243, 52.1543], "type": "country", "groups": [4], "name": "Mongolia", "name_es": "Mongolia", "name_fr": "Mongolie", "marine_target": null, "marine_target_year": null}, "192": {"total_marine_area": 0, "id": 192, "marine_bounds": null, "code": "MWI", "total_terrestrial_area": 118372, "terrestrial_bounds": [32.68984, -17.12721, 35.91505, -9.36381], "type": "country", "groups": [3], "name": "Malawi", "name_es": "Malaui", "name_fr": "Malawi", "marine_target": null, "marine_target_year": null}, "193": {"total_marine_area": 0, "id": 193, "marine_bounds": null, "code": "NER", "total_terrestrial_area": 1187533, "terrestrial_bounds": [0.16625, 11.69697, 15.99564, 23.52503], "type": "country", "groups": [3], "name": "Niger", "name_es": "N\u00edger", "name_fr": "Niger", "marine_target": null, "marine_target_year": null}, "194": {"total_marine_area": 0, "id": 194, "marine_bounds": null, "code": "NPL", "total_terrestrial_area": 147880, "terrestrial_bounds": [80.06014, 26.34752, 88.20401, 30.44702], "type": "country", "groups": [4], "name": "Nepal", "name_es": "Nepal", "name_fr": "N\u00e9pal", "marine_target": null, "marine_target_year": null}, "195": {"total_marine_area": 0, "id": 195, "marine_bounds": null, "code": "PRY", "total_terrestrial_area": 400238, "terrestrial_bounds": [-62.64242, -27.60586, -54.25863, -19.29517], "type": "country", "groups": [8], "name": "Paraguay", "name_es": "Paraguay", "name_fr": "Paraguay", "marine_target": null, "marine_target_year": null}, "196": {"total_marine_area": 0, "id": 196, "marine_bounds": null, "code": "RWA", "total_terrestrial_area": 25392, "terrestrial_bounds": [28.86171, -2.83997, 30.89907, -1.04745], "type": "country", "groups": [3], "name": "Rwanda", "name_es": "Ruanda", "name_fr": "Rwanda", "marine_target": null, "marine_target_year": null}, "197": {"total_marine_area": 0, "id": 197, "marine_bounds": null, "code": "SMR", "total_terrestrial_area": 61, "terrestrial_bounds": [12.40066, 43.89465, 12.51412, 43.99569], "type": "country", "groups": [6], "name": "San Marino", "name_es": "San Marino", "name_fr": "Saint-Marin", "marine_target": null, "marine_target_year": null}, "198": {"total_marine_area": 0, "id": 198, "marine_bounds": null, "code": "SRB", "total_terrestrial_area": 78072, "terrestrial_bounds": [18.80852, 42.22762, 22.97894, 46.1894], "type": "country", "groups": [6], "name": "Serbia", "name_es": "Serbia", "name_fr": "Serbie", "marine_target": null, "marine_target_year": null}, "199": {"total_marine_area": 0, "id": 199, "marine_bounds": null, "code": "SSD", "total_terrestrial_area": 630773, "terrestrial_bounds": [24.15193, 3.481, 35.86995, 12.219], "type": "country", "groups": [3], "name": "South Sudan", "name_es": "Sud\u00e1n del Sur", "name_fr": "Soudan du Sud", "marine_target": null, "marine_target_year": null}, "200": {"total_marine_area": 0, "id": 200, "marine_bounds": null, "code": "SVK", "total_terrestrial_area": 48944, "terrestrial_bounds": [16.83447, 47.73275, 22.56791, 49.6138], "type": "country", "groups": [6], "name": "Slovakia", "name_es": "Eslovaquia", "name_fr": "Slovaquie", "marine_target": null, "marine_target_year": null}, "201": {"total_marine_area": 0, "id": 201, "marine_bounds": null, "code": "SWZ", "total_terrestrial_area": 17415, "terrestrial_bounds": [30.7908, -27.31752, 32.1367, -25.71876], "type": "country", "groups": [3], "name": "Eswatini", "name_es": "Suazilandia", "name_fr": "Swaziland", "marine_target": null, "marine_target_year": null}, "202": {"total_marine_area": 0, "id": 202, "marine_bounds": null, "code": "TCD", "total_terrestrial_area": 1274403, "terrestrial_bounds": [13.47348, 7.44107, 24.00269, 23.45037], "type": "country", "groups": [3], "name": "Chad", "name_es": "Chad", "name_fr": "Tchad", "marine_target": null, "marine_target_year": null}, "203": {"total_marine_area": 0, "id": 203, "marine_bounds": null, "code": "TJK", "total_terrestrial_area": 142011, "terrestrial_bounds": [67.38713, 36.67208, 75.13722, 41.04224], "type": "country", "groups": [6], "name": "Tajikistan", "name_es": "Tayikist\u00e1n", "name_fr": "Tadjikistan", "marine_target": null, "marine_target_year": null}, "204": {"total_marine_area": 0, "id": 204, "marine_bounds": null, "code": "UGA", "total_terrestrial_area": 242520, "terrestrial_bounds": [29.5715, -1.48214, 35.00027, 4.23447], "type": "country", "groups": [3], "name": "Uganda", "name_es": "Uganda", "name_fr": "Ouganda", "marine_target": null, "marine_target_year": null}, "205": {"total_marine_area": 0, "id": 205, "marine_bounds": null, "code": "UZB", "total_terrestrial_area": 446913, "terrestrial_bounds": [55.99778, 37.17719, 73.1346, 45.57111], "type": "country", "groups": [6], "name": "Uzbekistan", "name_es": "Uzbekist\u00e1n", "name_fr": "Ouzb\u00e9kistan", "marine_target": null, "marine_target_year": null}, "206": {"total_marine_area": 0, "id": 206, "marine_bounds": null, "code": "VAT", "total_terrestrial_area": 1, "terrestrial_bounds": [12.44561, 41.90011, 12.45845, 41.90755], "type": "country", "groups": [6], "name": "Vatican City", "name_es": "Ciudad del Vaticano", "name_fr": "Cit\u00e9 du Vatican", "marine_target": null, "marine_target_year": null}, "207": {"total_marine_area": 0, "id": 207, "marine_bounds": null, "code": "XCA", "total_terrestrial_area": 371055, "terrestrial_bounds": [46.75388, 36.5723, 54.04378, 47.01562], "type": "country", "groups": [4], "name": "Caspian Sea", "name_es": "Mar Caspio", "name_fr": "Mer Caspienne", "marine_target": null, "marine_target_year": null}, "208": {"total_marine_area": 0, "id": 208, "marine_bounds": null, "code": "XKO", "total_terrestrial_area": 10819, "terrestrial_bounds": [19.97939, 41.84826, 21.79305, 43.24613], "type": "country", "groups": [6], "name": "Kosovo", "name_es": "Kosovo", "name_fr": "Kosovo", "marine_target": null, "marine_target_year": null}, "209": {"total_marine_area": 0, "id": 209, "marine_bounds": null, "code": "ZMB", "total_terrestrial_area": 753990, "terrestrial_bounds": [21.98004, -18.07918, 33.71244, -8.27198], "type": "country", "groups": [3], "name": "Zambia", "name_es": "Zambia", "name_fr": "Zambie", "marine_target": null, "marine_target_year": null}, "210": {"total_marine_area": 0, "id": 210, "marine_bounds": null, "code": "ZNC", "total_terrestrial_area": 3314, "terrestrial_bounds": [32.602, 35.00272, 34.60792, 35.71208], "type": "country", "groups": [6], "name": "Northern Cyprus", "name_es": "Chipre del Norte", "name_fr": "Chypre du Nord", "marine_target": null, "marine_target_year": null}, "211": {"total_marine_area": 0, "id": 211, "marine_bounds": null, "code": "ZWE", "total_terrestrial_area": 391235, "terrestrial_bounds": [25.23773, -22.41957, 33.05502, -15.60728], "type": "country", "groups": [3], "name": "Zimbabwe", "name_es": "Zimbabue", "name_fr": "Zimbabwe", "marine_target": null, "marine_target_year": null}}}} \ No newline at end of file diff --git a/data/src/data_commons/data/locations_code.csv b/data/src/data_commons/data/locations_code.csv index 64340484..77bb8c18 100644 --- a/data/src/data_commons/data/locations_code.csv +++ b/data/src/data_commons/data/locations_code.csv @@ -166,3 +166,47 @@ location,code 165,WSM 166,YEM 167,ZAF +168,AFG +169,AND +170,ARM +171,AUT +172,BDI +173,BFA +174,BLR +175,BOL +176,BTN +177,BWA +178,CAF +179,CHE +180,CZE +181,ETH +182,HUN +183,KGZ +184,LAO +185,LIE +186,LSO +187,LUX +188,MDA +189,MKD +190,MLI +191,MNG +192,MWI +193,NER +194,NPL +195,PRY +196,RWA +197,SMR +198,SRB +199,SSD +200,SVK +201,SWZ +202,TCD +203,TJK +204,UGA +205,UZB +206,VAT +207,XCA +208,XKO +209,ZMB +210,ZNC +211,ZWE diff --git a/data/src/data_commons/data/regions_data.json b/data/src/data_commons/data/regions_data.json index bafa42dc..26145569 100644 --- a/data/src/data_commons/data/regions_data.json +++ b/data/src/data_commons/data/regions_data.json @@ -59,7 +59,8 @@ "VNM", "VUT", "WLF", - "WSM" + "WSM", + "XCA" ] }, { @@ -191,7 +192,9 @@ "TUR", "UKR", "UZB", - "VAT" + "VAT", + "XKO", + "ZNC" ] }, { From b1672c56ec91defbd83f8aed678b546e03401c3b Mon Sep 17 00:00:00 2001 From: sofia Date: Mon, 28 Oct 2024 12:36:31 +0100 Subject: [PATCH 14/16] code cleaning --- data/notebooks/pipes_mock/locations.ipynb | 718 ++------- data/notebooks/pipes_mock/precalc_sofia.ipynb | 1354 +++-------------- data/notebooks/pipes_mock/tiles.ipynb | 682 ++------- .../data/regions_translations.json | 42 + data/src/data_commons/loader.py | 7 + data/src/pipelines/output_schemas.py | 58 +- data/src/pipelines/processors.py | 520 ++++++- 7 files changed, 1042 insertions(+), 2339 deletions(-) create mode 100644 data/src/data_commons/data/regions_translations.json diff --git a/data/notebooks/pipes_mock/locations.ipynb b/data/notebooks/pipes_mock/locations.ipynb index d9181a95..8e79e5fe 100644 --- a/data/notebooks/pipes_mock/locations.ipynb +++ b/data/notebooks/pipes_mock/locations.ipynb @@ -47,12 +47,7 @@ "from helpers.utils import download_and_unzip_if_needed, writeReadGCP\n", "\n", "from pipelines.output_schemas import (\n", - " FPLSchema,\n", - " ProtectionLevelSchema,\n", - " MPAsSchema,\n", - " HabitatsSchema,\n", " LocationSchema,\n", - " ProtectedAreaExtentSchema,\n", ")\n", "from pipelines.processors import (\n", " add_envelope,\n", @@ -63,27 +58,13 @@ " add_bbox,\n", " add_groups_and_members,\n", " add_location_name,\n", - " output,\n", - " clean_geometries,\n", - " filter_by_exluding_propossed_mpas,\n", - " spatial_join,\n", - " process_mpa_data,\n", - " assign_iso3,\n", - " calculate_global_area,\n", - " separate_parent_iso,\n", - " calculate_stats_cov,\n", - " coverage_stats,\n", - " mpaatlas_filter_stablishment,\n", - " process_mpaatlas_data,\n", - " calculate_stats,\n", - " fix_monaco,\n", - " batch_export,\n", - " calculate_area,\n", - " define_is_child,\n", - " set_child_id,\n", - " add_child_parent_relationship,\n", - " columns_to_lower,\n", - " extract_wdpaid_mpaatlas,\n", + " add_translations,\n", + " calculate_gadm_area,\n", + " map_and_generate_ids,\n", + " drop_unnecessary_columns,\n", + " combine_and_clean_columns,\n", + " process_and_merge_commitments,\n", + " set_index_and_sort,\n", ")\n", "\n", "logging.basicConfig(level=logging.DEBUG)\n", @@ -153,11 +134,8 @@ "# # Download country translations\n", "working_folder = FileConventionHandler(pipe_gadm)\n", "input_path = working_folder.pipe_raw_path\n", - "input_path\n", - "\n", - "translations_csv_url = \"vizzuality_processed_data/gadm/preprocess/locations_translated.csv\"\n", - "translations_csv_output = input_path.joinpath(translations_csv_url.split(\"/\")[-1])\n", "\n", + "# # Download data from GCS if needed\n", "# writeReadGCP(\n", "# credentials=mysettings.GCS_KEYFILE_JSON,\n", "# bucket_name=mysettings.GCS_BUCKET,\n", @@ -166,581 +144,32 @@ "# operation=\"r\",\n", "# )\n", "\n", - "country_commitments_url = \"30x30 National Commitments - MPAtlas Country Targets.csv\"\n", - "country_commitments_output = input_path.joinpath(country_commitments_url.split(\"/\")[-1])\n", - "\n", "# writeReadGCP(\n", "# credentials=mysettings.GCS_KEYFILE_JSON,\n", "# bucket_name=mysettings.GCS_BUCKET,\n", "# blob_name=country_commitments_url,\n", "# file=country_commitments_output,\n", "# operation=\"r\",\n", - "# )" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Functions" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [], - "source": [ - "from typing import List, Union\n", - "import pandera as pa\n", - "from pandera.typing import Index, Series\n", - "import pandas as pd\n", - "\n", - "class LocationSchemaAll(pa.DataFrameModel):\n", - " id: Index[int] = pa.Field(gt=0, coerce=True)\n", - " code: Series[str] = pa.Field(coerce=True)\n", - " name: Series[str] = pa.Field(coerce=True)\n", - " name_es: Series[str] = pa.Field(coerce=True)\n", - " name_fr: Series[str] = pa.Field(coerce=True)\n", - " total_marine_area: Series[int] = pa.Field(ge=0, coerce=True) # noqa: N815\n", - " total_terrestrial_area: Series[int] = pa.Field(ge=0, coerce=True) # noqa: N815\n", - " type: Series[str] = pa.Field(\n", - " unique_values_eq=[\"country\", \"worldwide\", \"region\", \"highseas\"], coerce=True\n", - " )\n", - " groups: Series[List[int]] = pa.Field(coerce=True)\n", - " marine_bounds: Series[List[float]] = pa.Field(coerce=True, nullable=True)\n", - " terrestrial_bounds: Series[List[float]] = pa.Field(coerce=True, nullable=True)\n", - " marine_target: Series[pd.Int64Dtype] = pa.Field(nullable=True, coerce=True)\n", - " marine_target_year: Series[pd.Int64Dtype] = pa.Field(coerce=True, nullable=True)\n", - "\n", - "def round_to_list(bounds):\n", - " return list(np.round(bounds, decimals=5))\n", - "\n", - "def add_bbox(df: gpd.GeoDataFrame, col_name: str = \"bounds\") -> gpd.GeoDataFrame:\n", - " return df.assign(**{col_name: df.geometry.bounds.apply(round_to_list, axis=1)})\n", - "\n", - "def add_translations(df, translations_csv_path):\n", - " translations_df = pd.read_csv(translations_csv_path, keep_default_na=False, na_values=[])\n", - " \n", - " df = df.merge(translations_df[['code', 'name_es', 'name_fr']], left_on='iso', right_on='code', how='left')\n", - " \n", - " return df\n", - "\n", - "def calculate_gadm_area(df: pd.DataFrame) -> pd.DataFrame:\n", - " glob = gpd.GeoDataFrame(\n", - " {\n", - " \"iso\": \"GLOB\",\n", - " \"AREA_KM2\": 134954835,\n", - " \"location_type\": \"worldwide\",\n", - " \"region\": np.nan,\n", - " \"geometry\": gpd.GeoSeries([gpd.GeoSeries(df[\"geometry\"]).unary_union]),\n", - " },\n", - " crs=\"EPSG:4326\",\n", - " )\n", - "\n", - " terrestrial_areas = (\n", - " df\n", - " .dissolve(by=[\"iso\", \"region\"], aggfunc={\"AREA_KM2\": \"sum\"})\n", - " .reset_index()\n", - " .assign(location_type=\"country\")\n", - " )\n", - " regions_areas = (\n", - " df\n", - " .dissolve(by=[\"region\"], aggfunc={\"AREA_KM2\": \"sum\"})\n", - " .reset_index()\n", - " .rename(columns={\"region\": \"iso\"})\n", - " .assign(location_type=\"region\")\n", - " )\n", - " result = (\n", - " pd.concat(\n", - " [\n", - " glob,\n", - " regions_areas,\n", - " terrestrial_areas,\n", - " ],\n", - " ignore_index=True,\n", - " )\n", - " .dropna(subset=[\"iso\"])\n", - " .reset_index(drop=True)\n", - " )\n", - " result.index = result.index + 1\n", - " result.index.name = \"id\"\n", - "\n", - " # Round AREA_KM2 to integers\n", - " result[\"AREA_KM2\"] = result[\"AREA_KM2\"].round().astype(int)\n", - "\n", - " return result.assign(id=result.index)\n", - "\n", - "def add_groups_and_members_land(df: pd.DataFrame | gpd.GeoDataFrame) -> pd.DataFrame | gpd.GeoDataFrame:\n", - " return df.assign(\n", - " groups=lambda row: row[[\"region\", \"location_type\"]].apply(\n", - " lambda x: (np.where(df.iso == x[\"region\"])[0] + 2).tolist()\n", - " if x[\"location_type\"] == \"country\"\n", - " else [],\n", - " axis=1,\n", - " )\n", - " )\n", - "\n", - "def combine_columns(df, col1, col2, new_col):\n", - " \"\"\"\n", - " Combine two columns in a DataFrame using combine_first and assign to a new column.\n", - "\n", - " Parameters:\n", - " df (pd.DataFrame): The DataFrame containing the columns to combine.\n", - " col1 (str): The name of the first column.\n", - " col2 (str): The name of the second column.\n", - " new_col (str): The name of the new column to assign the combined result.\n", - "\n", - " Returns:\n", - " pd.DataFrame: The DataFrame with the new combined column.\n", - " \"\"\"\n", - " df[new_col] = df[col1].combine_first(df[col2])\n", - " return df\n", - "\n", - "\n", - "def add_region_iso_2(\n", - " df: pd.DataFrame | gpd.GeoDataFrame, iso_column\n", - ") -> pd.DataFrame | gpd.GeoDataFrame:\n", - " regions = pd.read_json(scripts_dir.joinpath(\"data_commons/data/regions_data2.json\"))\n", - "\n", - " def find_region_iso(iso: str) -> Union[str, None]:\n", - " filtered_regions = list(filter(lambda x: iso in x[\"country_iso_3s\"], regions.get(\"data\")))\n", - " return filtered_regions[0][\"region_iso\"] if len(filtered_regions) > 0 else None\n", - "\n", - " return df.assign(region=lambda row: row[iso_column].apply(find_region_iso))\n", - "\n", - "def add_location_name_2(df: pd.DataFrame | gpd.GeoDataFrame) -> pd.DataFrame | gpd.GeoDataFrame:\n", - " with open(scripts_dir.joinpath('data_commons/data/iso_map2.json'), 'r') as f:\n", - " iso_map = json.load(f)\n", - "\n", - " def get_name(iso):\n", - " test = iso_map.get(iso, np.nan)\n", - " return test\n", + "# )\n", "\n", - " return df.assign(name=df.iso.apply(get_name))\n" + "# Load translations and commitments\n", + "translations_csv_url = \"vizzuality_processed_data/gadm/preprocess/locations_translated.csv\"\n", + "translations_csv_output = input_path.joinpath(translations_csv_url.split(\"/\")[-1])\n", + "country_commitments_url = \"30x30 National Commitments - MPAtlas Country Targets.csv\"\n", + "country_commitments_output = input_path.joinpath(country_commitments_url.split(\"/\")[-1])\n", + "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "#### Note: gadm includes some extra iso codes that had to be included in the regions_data.json (provided by protected planet) to process the terrestrial stats:\n", - "\n", - "'XCA': Caspian Sea, included in Asia & Pacific region\n", - "\n", - "'XKO': Kosovo, included in Europe region\n", - "\n", - "'ZNC': Northern Cyprus, included in Europe region" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/tmp/ipykernel_3639646/4293776084.py:43: DeprecationWarning: The 'unary_union' attribute is deprecated, use the 'union_all()' method instead.\n", - " \"geometry\": gpd.GeoSeries([gpd.GeoSeries(df[\"geometry\"]).unary_union]),\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
total_marine_areaidmarine_boundscodetotal_terrestrial_areaterrestrial_boundstypegroupsnamename_esname_fr
02128813892[-180.0, -76.80012, 180.0, 90.0]ABNJ0NaNhighseas[]Areas Beyond National JurisdictionÁreas fuera de la jurisdicción nacionalZones au-delà de la juridiction nationale
1148780583[-28.84709, -50.31506, 75.85287, 38.80087]AF29993095[-25.3618, -34.83514, 63.50347, 37.55986]region[]AfricaÁfricaAfrique
20168NaNAFG644050[60.50487, 29.36157, 74.89413, 38.49041]country[4]AfghanistanAfganistánAfghanistan
349586610[8.19586, -17.27214, 13.86517, -5.02988]AGO1251701[11.6687, -18.04208, 24.08007, -4.37259]country[3]AngolaAngolaAngola
41216511[18.32149, 39.64039, 20.02083, 42.0112]ALB28690[19.26416, 39.6507, 21.04909, 42.66043]country[6]AlbaniaAlbaniaAlbanie
....................................
206527384166[41.08194, 8.95275, 57.946, 16.64959]YEM453741[41.81458, 12.10819, 54.53542, 19.0]country[9]YemenYemenYémen
2071547576167[13.34802, -50.31506, 42.8475, -26.86206]ZAF1221328[16.45189, -34.83514, 32.89125, -22.12503]country[3]South AfricaSudáfricaAfrique du Sud
2080209NaNZMB753990[21.98004, -18.07918, 33.71244, -8.27198]country[3]ZambiaZambiaZambie
2090210NaNZNC3314[32.602, 35.00272, 34.60792, 35.71208]country[6]Northern CyprusChipre del NorteChypre du Nord
2100211NaNZWE391235[25.23773, -22.41957, 33.05502, -15.60728]country[3]ZimbabweZimbabueZimbabwe
\n", - "

211 rows × 11 columns

\n", - "
" - ], - "text/plain": [ - " total_marine_area id marine_bounds code \\\n", - "0 212881389 2 [-180.0, -76.80012, 180.0, 90.0] ABNJ \n", - "1 14878058 3 [-28.84709, -50.31506, 75.85287, 38.80087] AF \n", - "2 0 168 NaN AFG \n", - "3 495866 10 [8.19586, -17.27214, 13.86517, -5.02988] AGO \n", - "4 12165 11 [18.32149, 39.64039, 20.02083, 42.0112] ALB \n", - ".. ... ... ... ... \n", - "206 527384 166 [41.08194, 8.95275, 57.946, 16.64959] YEM \n", - "207 1547576 167 [13.34802, -50.31506, 42.8475, -26.86206] ZAF \n", - "208 0 209 NaN ZMB \n", - "209 0 210 NaN ZNC \n", - "210 0 211 NaN ZWE \n", - "\n", - " total_terrestrial_area terrestrial_bounds \\\n", - "0 0 NaN \n", - "1 29993095 [-25.3618, -34.83514, 63.50347, 37.55986] \n", - "2 644050 [60.50487, 29.36157, 74.89413, 38.49041] \n", - "3 1251701 [11.6687, -18.04208, 24.08007, -4.37259] \n", - "4 28690 [19.26416, 39.6507, 21.04909, 42.66043] \n", - ".. ... ... \n", - "206 453741 [41.81458, 12.10819, 54.53542, 19.0] \n", - "207 1221328 [16.45189, -34.83514, 32.89125, -22.12503] \n", - "208 753990 [21.98004, -18.07918, 33.71244, -8.27198] \n", - "209 3314 [32.602, 35.00272, 34.60792, 35.71208] \n", - "210 391235 [25.23773, -22.41957, 33.05502, -15.60728] \n", - "\n", - " type groups name \\\n", - "0 highseas [] Areas Beyond National Jurisdiction \n", - "1 region [] Africa \n", - "2 country [4] Afghanistan \n", - "3 country [3] Angola \n", - "4 country [6] Albania \n", - ".. ... ... ... \n", - "206 country [9] Yemen \n", - "207 country [3] South Africa \n", - "208 country [3] Zambia \n", - "209 country [6] Northern Cyprus \n", - "210 country [3] Zimbabwe \n", - "\n", - " name_es \\\n", - "0 Áreas fuera de la jurisdicción nacional \n", - "1 África \n", - "2 Afganistán \n", - "3 Angola \n", - "4 Albania \n", - ".. ... \n", - "206 Yemen \n", - "207 Sudáfrica \n", - "208 Zambia \n", - "209 Chipre del Norte \n", - "210 Zimbabue \n", - "\n", - " name_fr \n", - "0 Zones au-delà de la juridiction nationale \n", - "1 Afrique \n", - "2 Afghanistan \n", - "3 Angola \n", - "4 Albanie \n", - ".. ... \n", - "206 Yémen \n", - "207 Afrique du Sud \n", - "208 Zambie \n", - "209 Chypre du Nord \n", - "210 Zimbabwe \n", - "\n", - "[211 rows x 11 columns]" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Process EEZ data (marine data)\n", - "locations = (\n", - " gpd.read_file(pipe_eez_dir.get_step_fmt_file_path(prev_step, \"shp\"))\n", - " .pipe(add_envelope)\n", - " .pipe(add_location_iso)\n", - " .pipe(expand_multiple_locations)\n", - " .pipe(add_region_iso, 'iso')\n", - " .pipe(calculate_eez_area)\n", - " .pipe(add_bbox)\n", - " .pipe(add_groups_and_members)\n", - " .pipe(add_location_name)\n", - " .pipe(add_translations, translations_csv_output)\n", - " .rename(\n", - " columns={\n", - " \n", - " \"AREA_KM2\": \"total_marine_area\",\n", - " \"location_type\": \"type\",\n", - " \"bounds\":'marine_bounds'\n", - " }\n", - " )\n", - ").reset_index(drop=True)\n", - "\n", - "locations.drop(\n", - " columns=list(\n", - " set(locations.columns) -\n", - " set([\"code\", \"name\", \"name_es\", \"name_fr\", \"total_marine_area\", \"marine_bounds\", \"type\", \"groups\", \"id\"])\n", - " ),\n", - " inplace=True,\n", - ")\n", - "\n", - "\n", - "id_lookup = locations.set_index('code')['id'].to_dict() # Create a lookup dictionary for IDs from EEZ data\n", - "\n", - "# Process GADM data \n", - "locations_land = (\n", - " gpd.read_file(pipe_gadm_dir.get_step_fmt_file_path(prev_step, \"shp\"))\n", - " .rename(columns={\"GID_0\": \"iso\", 'area_km2': 'AREA_KM2'})\n", - " .pipe(add_envelope)\n", - " .pipe(add_region_iso_2, 'iso') # add_region_iso_2 is used instead of add_region_iso because gadm includes new iso codes\n", - " .pipe(calculate_gadm_area)\n", - " .pipe(add_bbox)\n", - " .pipe(add_groups_and_members_land)\n", - " .pipe(add_location_name_2)\n", - " .pipe(add_translations, translations_csv_output)\n", - " .rename(\n", - " columns={\n", - " \"AREA_KM2\": \"total_terrestrial_area\",\n", - " \"location_type\": \"type\",\n", - " \"bounds\": \"terrestrial_bounds\"\n", - " }\n", - " )\n", - ").reset_index(drop=True)\n", - "\n", - "locations_land['id'] = locations_land['code'].map(id_lookup) # Apply the EEZ IDs to the GADM dataset\n", - "\n", - "nan_mask = locations_land['id'].isna() # Identify the NaN values in the id column\n", - "\n", - "new_ids = pd.Series(\n", - " range(max(id_lookup.values()) + 1, max(id_lookup.values()) + 1 + nan_mask.sum()),\n", - " index=locations_land[nan_mask].index\n", - ") # Generate new IDs for any GADM rows without an EEZ match\n", - "\n", - "locations_land['id'] = locations_land['id'].fillna(new_ids).astype(int) # Assign the new IDs to the NaN values in the id column\n", - "\n", - "locations_land.drop(\n", - " columns=list(\n", - " set(locations_land.columns) -\n", - " set([\"code\", \"name\", \"name_es\", \"name_fr\", \"total_terrestrial_area\", \"type\", \"groups\", \"terrestrial_bounds\", \"id\"])\n", - " ),\n", - " inplace=True,\n", - ")\n", - "\n", - "# Merge EEZ and GADM datasets\n", - "combined_locations = pd.merge(\n", - " locations, locations_land,\n", - " on=['code', 'id'],\n", - " suffixes=('_marine', '_land'),\n", - " how='outer' \n", - ")\n", - "\n", - "# Combine data from land and marine for each base column\n", - "base_columns = ['type', 'groups', 'name', 'name_es', 'name_fr']\n", - "for base_col in base_columns:\n", - " marine_col = f\"{base_col}_marine\"\n", - " land_col = f\"{base_col}_land\"\n", - " combined_locations = combine_columns(combined_locations, marine_col, land_col, base_col)\n", - "\n", - "\n", - "# Fill NaN values with 0 for each column\n", - "columns_to_fill = ['total_marine_area', 'total_terrestrial_area']\n", - "for col in columns_to_fill:\n", - " combined_locations[col] = combined_locations[col].fillna(0).astype(int)\n", - "\n", - "# Force the id column to be an integer\n", - "combined_locations['id'] = combined_locations['id'].astype(int)\n", - "\n", - "\n", - "# Drop unnecessary columns\n", - "combined_locations.drop(\n", - " columns=[col for col in combined_locations.columns if col.endswith('_marine') or col.endswith('_land')],\n", - " inplace=True\n", - ")\n", - "combined_locations = combined_locations.reset_index(drop=True)\n", - "\n", - "combined_locations\n" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "# Add the national commitments (only marine for now)\n", - "commit = pd.read_csv(country_commitments_output, header=1)\n", - "commit = commit.iloc[:, :6][commit['30% National Target'] == 'Y']\n", - "commit.drop(columns=[\"% Fully/Highly*\"], inplace=True)\n", - "commit['% National Target'] = commit['% National Target'].str.replace('%', '').astype(int)\n", - "\n", - "# When % National Target is 30, fill By Year with 2030\n", - "commit['By Year'] = commit['By Year'].fillna(commit['% National Target'].apply(lambda x: '2030' if x == 30 else None))\n" + "**Note:** gadm includes some extra iso codes that had to be included in the regions_data.json (provided by protected planet) to process the terrestrial stats: 'XCA': Caspian Sea, included in Asia & Pacific region, 'XKO': Kosovo, included in Europe region, 'ZNC': Northern Cyprus, included in Europe region" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -1079,41 +508,83 @@ "[211 rows x 13 columns]" ] }, - "execution_count": 8, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# Include the national commitments in the combined_locations table\n", - "combined_locations = combined_locations.merge(commit[['Iso Code', '% National Target', 'By Year']], \n", - " left_on='code', right_on='Iso Code', how='left')\n", - "\n", - "combined_locations.rename(columns={'% National Target': 'marine_target', 'By Year': 'marine_target_year'}, inplace=True)\n", - "\n", - "combined_locations.drop(columns=['Iso Code'], inplace=True)\n", - "\n", - "combined_locations['marine_target'] = combined_locations['marine_target'].astype(pd.Int64Dtype())\n", - "combined_locations['marine_target_year'] = combined_locations['marine_target_year'].astype(pd.Int64Dtype())\n", - "\n", - "combined_locations = combined_locations.reset_index(drop=True)\n", - "\n", - "# Add marine_target and marine_target_year to the combined_locations table for code 'GLOB'\n", - "combined_locations.loc[combined_locations['code'] == 'GLOB', 'marine_target'] = 30\n", - "combined_locations.loc[combined_locations['code'] == 'GLOB', 'marine_target_year'] = 2030 \n", + "# Process EEZ data (marine data)\n", + "locations_marine = (\n", + " gpd.read_file(pipe_eez_dir.get_step_fmt_file_path(prev_step, \"shp\"))\n", + " .pipe(add_envelope)\n", + " .pipe(add_location_iso)\n", + " .pipe(expand_multiple_locations)\n", + " .pipe(add_region_iso, 'iso')\n", + " .pipe(calculate_eez_area)\n", + " .pipe(add_bbox)\n", + " .pipe(add_groups_and_members, \"marine\")\n", + " .pipe(add_location_name)\n", + " .pipe(add_translations, translations_csv_output)\n", + " .rename(\n", + " columns={\n", + " \"AREA_KM2\": \"total_marine_area\",\n", + " \"location_type\": \"type\",\n", + " \"bounds\": 'marine_bounds'\n", + " }\n", + " )\n", + " .pipe(lambda df: df.drop(\n", + " columns=list(\n", + " set(df.columns) -\n", + " set([\"code\", \"name\", \"name_es\", \"name_fr\", \"total_marine_area\", \"marine_bounds\", \"type\", \"groups\", \"id\"])\n", + " )\n", + " ))\n", + " .reset_index(drop=True)\n", + ")\n", "\n", + "# Process GADM data (land data)\n", + "locations_land = (\n", + " gpd.read_file(pipe_gadm_dir.get_step_fmt_file_path(prev_step, \"shp\"))\n", + " .rename(columns={\"GID_0\": \"iso\", 'area_km2': 'AREA_KM2'})\n", + " .pipe(add_envelope)\n", + " .pipe(add_region_iso, 'iso')\n", + " .pipe(calculate_gadm_area)\n", + " .pipe(add_bbox)\n", + " .pipe(add_groups_and_members, \"land\")\n", + " .pipe(add_location_name)\n", + " .pipe(add_translations, translations_csv_output)\n", + " .rename(\n", + " columns={\n", + " \"AREA_KM2\": \"total_terrestrial_area\",\n", + " \"location_type\": \"type\",\n", + " \"bounds\": \"terrestrial_bounds\"\n", + " }\n", + " )\n", + " .pipe(map_and_generate_ids, locations_marine)\n", + " .pipe(drop_unnecessary_columns, [\"code\", \"name\", \"name_es\", \"name_fr\", \"total_terrestrial_area\", \"type\", \"groups\", \"terrestrial_bounds\", \"id\"])\n", + " .reset_index(drop=True)\n", + ")\n", "\n", - "# Force the index to have the values in id column (so they follow the order they had in the previous table)\n", - "combined_locations['index'] = combined_locations['id']\n", - "combined_locations.set_index('index', inplace=True)\n", - "combined_locations.sort_index(inplace=True)\n", + "# Merge EEZ and GADM datasets and add marine national commitments\n", + "combined_locations = (\n", + " pd.merge(\n", + " locations_marine, locations_land,\n", + " on=['code', 'id'],\n", + " suffixes=('_marine', '_land'),\n", + " how='outer'\n", + " )\n", + " .pipe(combine_and_clean_columns)\n", + " .reset_index(drop=True)\n", + " .pipe(lambda df: process_and_merge_commitments(df, pd.read_csv(country_commitments_output, header=1)))\n", + " .pipe(set_index_and_sort)\n", + ")\n", "\n", "combined_locations" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -1121,7 +592,7 @@ "output_locations_combined = {\n", " \"version\": 2,\n", " \"data\": {\n", - " \"api::location.location\": LocationSchemaAll(pd.DataFrame(combined_locations)).to_dict(\n", + " \"api::location.location\": LocationSchema(pd.DataFrame(combined_locations)).to_dict(\n", " orient=\"index\"\n", " )\n", " },\n", @@ -1136,18 +607,13 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ - "## Create locations_code (stored in gadm folder)\n", + "# Create locations_code and save in data_commons/data folder\n", "(combined_locations[['id', 'code']].rename(columns={'id': 'location'})\n", - " .to_csv(pipe_gadm_dir.get_processed_step_path(current_step)\n", - " .joinpath('locations_code_all.csv'), index=False))\n", - "\n", - "## Save locations_code in data_commons/data folder\n", - "(combined_locations[['id', 'code']].rename(columns={'id': 'location'})\n", - " .to_csv(scripts_dir.joinpath('data_commons/data/locations_code_all.csv'), index=False))" + " .to_csv(scripts_dir.joinpath('data_commons/data/locations_code.csv'), index=False))" ] }, { @@ -1173,7 +639,7 @@ " credentials=mysettings.GCS_KEYFILE_JSON,\n", " bucket_name=mysettings.GCS_BUCKET,\n", " blob_name=remote_path_code,\n", - " file=scripts_dir.joinpath('data_commons/data/locations_code_all.csv'),\n", + " file=scripts_dir.joinpath('data_commons/data/locations_code.csv'),\n", " operation=\"w\",\n", ")\n", "\n", @@ -1192,18 +658,6 @@ "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.5" } }, "nbformat": 4, diff --git a/data/notebooks/pipes_mock/precalc_sofia.ipynb b/data/notebooks/pipes_mock/precalc_sofia.ipynb index 01cba7b1..fe184acb 100644 --- a/data/notebooks/pipes_mock/precalc_sofia.ipynb +++ b/data/notebooks/pipes_mock/precalc_sofia.ipynb @@ -52,10 +52,12 @@ "from pipelines.output_schemas import (\n", " FPLSchema,\n", " ProtectionLevelSchema,\n", - " MPAsSchema,\n", + " PAsSchema,\n", " HabitatsSchema,\n", " LocationSchema,\n", " ProtectedAreaExtentSchema,\n", + " PAsSchemaChunk1,\n", + " PAsSchemaChunk2,\n", ")\n", "from pipelines.processors import (\n", " add_envelope,\n", @@ -88,10 +90,20 @@ " columns_to_lower,\n", " extract_wdpaid_mpaatlas,\n", " simplify_async,\n", - " process_tpa_data,\n", " get_matches,\n", " repair_geometry, \n", - " arrange_dimensions, \n", + " arrange_dimensions,\n", + " add_total_area, \n", + " change_ata_to_abnj,\n", + " calculate_padef_percentages,\n", + " calculate_coverage_percentage,\n", + " calculate_coverage_percentage_mpatlas,\n", + " calculate_global_contribution,\n", + " add_is_last_year,\n", + " add_environment,\n", + " cumulative_pa_def_counts, \n", + " process_final_coverage,\n", + " \n", ")\n", "from pipelines.utils import background\n", "\n", @@ -124,428 +136,6 @@ "# strapi.login(jwt=mysettings.STRAPI_JWT)" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### General functions" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "from typing import List, Dict\n", - "import pandera as pa\n", - "from pandera.typing import Index, Series\n", - "\n", - "def change_ata_to_abnj(df):\n", - " \"\"\"\n", - " Changes values in the parent_iso column from 'ATA' to 'ABNJ' as there is no 'ATA' stats in Protected Planet.\n", - " \"\"\"\n", - " # Count the occurrences of 'ATA'\n", - " count_changes = df['parent_iso'].value_counts().get('ATA', 0)\n", - " \n", - " # Replace 'ATA' with 'ABNJ'\n", - " df['parent_iso'] = df['parent_iso'].replace('ATA', 'ABNJ')\n", - "\n", - " return df\n", - "\n", - "\n", - "def add_total_marine_area(df):\n", - " # Read the JSON file\n", - " with open(scripts_dir.joinpath('data_commons/data/locations_all.json'), 'r') as f:\n", - " locations_data = json.load(f)\n", - " \n", - " # Access the nested dictionary\n", - " locations_dict = locations_data.get('data', {}).get('api::location.location', {})\n", - " \n", - " # Create a lookup dictionary from the nested dictionary\n", - " marine_area_lookup = {item['code']: item['total_marine_area'] for item in locations_dict.values()}\n", - " \n", - " # Identify the column that contains the word 'iso'\n", - " iso_column = [col for col in df.columns if 'iso' in col][0]\n", - "\n", - " # Perform the mapping using the identified column\n", - " df['total_marine_area'] = df[iso_column].map(marine_area_lookup)\n", - " \n", - " return df\n", - "\n", - "def add_total_terrestrial_area(df):\n", - " # Read the JSON file\n", - " with open(scripts_dir.joinpath('data_commons/data/locations_all.json'), 'r') as f:\n", - " locations_data = json.load(f)\n", - " \n", - " # Access the nested dictionary\n", - " locations_dict = locations_data.get('data', {}).get('api::location.location', {})\n", - " \n", - " # Create a lookup dictionary from the nested dictionary\n", - " marine_area_lookup = {item['code']: item['total_terrestrial_area'] for item in locations_dict.values()}\n", - " \n", - " # Identify the column that contains the word 'iso'\n", - " iso_column = [col for col in df.columns if 'iso' in col][0]\n", - "\n", - " # Perform the mapping using the identified column\n", - " df['total_terrestrial_area'] = df[iso_column].map(marine_area_lookup)\n", - " \n", - " return df\n", - "\n", - "def add_mpa_oecm_percentages(df):\n", - " # Calculate the total protectedAreasCount for each year and iso_3\n", - " total_counts = df.groupby(['year', 'iso_3'])['protectedAreasCount'].transform('sum')\n", - "\n", - " # Calculate the counts for PA_DEF == 0 and PA_DEF == 1\n", - " df['oecm_count'] = df['protectedAreasCount'].where(df['PA_DEF'] == 0, 0)\n", - " df['pa_count'] = df['protectedAreasCount'].where(df['PA_DEF'] == 1, 0)\n", - "\n", - " # Calculate the percentages\n", - " df['oecms'] = df.groupby(['year', 'iso_3'])['oecm_count'].transform('sum') / total_counts * 100\n", - " df['pas'] = df.groupby(['year', 'iso_3'])['pa_count'].transform('sum') / total_counts * 100\n", - "\n", - " # Aggregate the results and fill NaN values with 0\n", - " final_df = df.groupby(['year', 'iso_3']).agg(\n", - " area=('area', 'sum'),\n", - " protected_areas_count=('protectedAreasCount', 'sum'),\n", - " oecms=('oecms', 'first'),\n", - " pas=('pas', 'first')\n", - " ).reset_index().fillna(0)\n", - "\n", - " return final_df\n", - "\n", - "def calculate_pa_def_percentages(df: pd.DataFrame, iso_col: str = \"iso_3\") -> pd.DataFrame:\n", - " \"\"\"\n", - " Calculate the percentages for each PA_DEF value.\n", - "\n", - " Parameters:\n", - " df (pd.DataFrame): The DataFrame containing the cumulative counts of PA_DEF values.\n", - " iso_col (str): The column name for the iso_3 values. Default is \"iso_3\".\n", - "\n", - " Returns:\n", - " pd.DataFrame: A DataFrame with the percentages of PA_DEF values for each iso_3 and each year.\n", - " \"\"\"\n", - " \n", - " df['protected_areas_count'] = df['0'] + df['1']\n", - " df['oecms'] = (df['0'] / df['protected_areas_count']) * 100\n", - " df['pas'] = (df['1'] / df['protected_areas_count']) * 100\n", - "\n", - " df = df.drop(columns=['0', '1'], errors='ignore')\n", - "\n", - " return df\n", - "\n", - "def calculate_coverage_percentage_mpatlas(df):\n", - " df['percentage'] = (df['area_km2'] / df['total_marine_area']) * 100\n", - " return df\n", - "\n", - "def calculate_coverage_percentage_pa(df):\n", - " if 'total_marine_area' in df.columns:\n", - " df['coverage'] = (df['protected_area'] / df['total_marine_area']) * 100\n", - " elif 'total_terrestrial_area' in df.columns:\n", - " df['coverage'] = (df['protected_area'] / df['total_terrestrial_area']) * 100\n", - " else:\n", - " df['coverage'] = np.nan\n", - "\n", - " return df\n", - "\n", - "def calculate_global_contribution(df):\n", - " if 'total_marine_area' in df.columns:\n", - " df['global_contribution'] = (df['protected_area'] / 361000000) * 100\n", - " elif 'total_terrestrial_area' in df.columns:\n", - " df['global_contribution'] = (df['protected_area'] / 134954835) * 100\n", - " else:\n", - " df['global_contribution'] = np.nan\n", - " return df\n", - "\n", - "def add_is_last_year(df):\n", - " # Find the latest year for each iso_3\n", - " latest_years = df.groupby('iso_3')['year'].transform('max')\n", - " \n", - " # Create the is_last_year column\n", - " df['is_last_year'] = (df['year'] == latest_years).astype(int)\n", - " \n", - " return df\n", - "\n", - "def add_environment(df):\n", - " \"\"\"\n", - " Adds a column 'environment' based on the presence of 'totalMarineArea' or 'totalLandArea'.\n", - "\n", - " Parameters:\n", - " df (pd.DataFrame): The input DataFrame.\n", - "\n", - " Returns:\n", - " pd.DataFrame: The DataFrame with the 'environment' column added.\n", - " \"\"\"\n", - " if 'total_marine_area' in df.columns:\n", - " df['environment'] = 1\n", - " elif 'total_terrestrial_area' in df.columns:\n", - " df['environment'] = 2\n", - " else:\n", - " df['environment'] = 0\n", - " \n", - " return df\n", - "\n", - "def coverage_stats2(\n", - " df: pd.DataFrame,\n", - " area_col: str = \"area\",\n", - " sort_vals: List[str] = [\"iso_3\", \"year\"],\n", - ") -> pd.DataFrame:\n", - " \"\"\"only relevant to get the coverage numbers for mpa\"\"\"\n", - " return df.assign(\n", - " protected_area=(\n", - " df.sort_values(by=sort_vals)[area_col]\n", - " - df.sort_values(by=sort_vals)\n", - " .groupby(sort_vals)[area_col]\n", - " .shift(-1, fill_value=0)\n", - " .reset_index(drop=True)\n", - " ).round(2),\n", - " )\n", - "\n", - "def process_mpaatlas_data(gdf: gpd.GeoDataFrame) -> pd.DataFrame:\n", - " return (\n", - " gdf.dissolve(by=[\"protecti_1\", \"iso_3\"], aggfunc={\"name\": \"count\"})\n", - " .reset_index()\n", - " .pipe(calculate_area, \"area_km2\", None)\n", - " .drop(columns=[\"geometry\"])\n", - " )\n", - "\n", - "def separate_parent_iso(df: pd.DataFrame, iso_column=\"iso_3\", separator=\";\") -> pd.DataFrame:\n", - " df[iso_column] = (\n", - " df[iso_column].str.replace(\" \", \"\").str.replace(\":\", separator).str.split(separator)\n", - " )\n", - " return df.explode(iso_column)\n", - "\n", - "def output2(\n", - " df: pd.DataFrame, iso_column: str, rep_d: dict, rename: Dict[str, str], drop_cols: List[str]\n", - ") -> pd.DataFrame:\n", - " \"\"\"Output function formatter for the data.\n", - "\n", - " Args:\n", - " df (pd.DataFrame): The DataFrame to process.\n", - " iso_column (str): The column containing the ISO codes.\n", - " rep_d (dict): A dictionary of values to replace.\n", - " rename (Dict[str, str]): A dictionary of columns to rename.\n", - " drop_cols (List[str]): A list of columns to drop.\n", - "\n", - " Returns:\n", - " pd.DataFrame: The processed DataFrame.\n", - " \"\"\"\n", - " if iso_column:\n", - " locations_code = pd.read_csv(\n", - " scripts_dir.joinpath(\"data_commons/data/locations_code_all.csv\"),\n", - " keep_default_na=False,\n", - " na_values=[]\n", - " )\n", - " df = df.join(locations_code.set_index(\"code\"), on=iso_column, how=\"left\")\n", - " return (\n", - " df.replace(rep_d)\n", - " .rename(columns=rename)\n", - " .drop(columns=drop_cols)\n", - " .assign(\n", - " id=df.index + 1,\n", - " )\n", - " .set_index(\"id\")\n", - " )\n", - "\n", - "def set_child_id_pa(\n", - " df: pd.DataFrame | gpd.GeoDataFrame, columns: list[str] = [\"wdpa_pid\"]\n", - ") -> pd.DataFrame | gpd.GeoDataFrame:\n", - " return df.assign(child_id=df[columns].bfill(axis=1)[columns[0]])\n", - "\n", - "\n", - "def calculate_global_area_tpa(\n", - " df: pd.DataFrame,\n", - " gby_col: list,\n", - " agg_ops: Dict[str, str] = {\"protected_area\": \"sum\", \"1\": \"sum\", \"0\": \"sum\", \"protected_areas_count\": \"sum\"},\n", - " iso_column=\"iso_3\",\n", - ") -> pd.DataFrame:\n", - " global_area = df.groupby(gby_col).agg(agg_ops).reset_index().assign(**{iso_column: \"GLOB\"})\n", - " return pd.concat([global_area, df], ignore_index=True)\n", - "\n", - "def cumulative_pa_def_counts(df: pd.DataFrame, year_col: str = \"STATUS_YR\", pa_def_col: str = \"PA_DEF\", iso_col: str = \"iso_3\", start_year: int = 2010) -> pd.DataFrame:\n", - " \"\"\"\n", - " Calculate the cumulative number of PA_DEF values for each iso_3 and each year starting from a given year.\n", - "\n", - " Parameters:\n", - " df (pd.DataFrame): The DataFrame containing the data.\n", - " year_col (str): The column name for the year. Default is \"STATUS_YR\".\n", - " pa_def_col (str): The column name for the PA_DEF values. Default is \"PA_DEF\".\n", - " iso_col (str): The column name for the iso_3 values. Default is \"iso_3\".\n", - " start_year (int): The starting year for cumulative counts. Default is 2010.\n", - "\n", - " Returns:\n", - " pd.DataFrame: A DataFrame with cumulative counts of PA_DEF values for each iso_3 and each year.\n", - " \"\"\"\n", - " \n", - " results = []\n", - " years = sorted(df[year_col].unique())\n", - "\n", - " for year in years:\n", - " if year < start_year:\n", - " continue\n", - " cumulative_data = df[df[year_col] <= year]\n", - " pa_def_counts = cumulative_data.groupby([iso_col, pa_def_col]).size().unstack(fill_value=0)\n", - " pa_def_counts['year'] = year\n", - " results.append(pa_def_counts.reset_index())\n", - "\n", - " final_results = pd.concat(results, ignore_index=True)\n", - " final_results = final_results.fillna(0)\n", - " final_results = final_results.groupby([iso_col, 'year']).sum().reset_index()\n", - "\n", - " final_results['protected_areas_count'] = final_results['0'] + final_results['1']\n", - "\n", - " return final_results\n", - "\n", - "def calculate_global_area_tpa(\n", - " df: pd.DataFrame,\n", - " gby_col: list,\n", - " agg_ops: Dict[str, str] = {\"protected_area\": \"sum\", \"1\": \"sum\", \"0\": \"sum\", \"protected_areas_count\": \"sum\"},\n", - " iso_column=\"iso_3\",\n", - ") -> pd.DataFrame:\n", - " # Ensure the columns to be aggregated exist in the DataFrame\n", - " missing_cols = [col for col in agg_ops.keys() if col not in df.columns]\n", - " if missing_cols:\n", - " raise ValueError(f\"Missing columns in DataFrame: {missing_cols}\")\n", - " \n", - " # Group by the specified columns and aggregate using the provided operations\n", - " global_area = df.groupby(gby_col).agg(agg_ops).reset_index().assign(**{iso_column: \"GLOB\"})\n", - " \n", - " # Concatenate the global area DataFrame with the original DataFrame\n", - " return pd.concat([global_area, df], ignore_index=True)\n", - "\n", - "def calculate_stats_pa(\n", - " df: pd.DataFrame, gby_col: list, iso_column: str, ops: dict[str, str] = {\"protected_area\": \"sum\"}\n", - ") -> pd.DataFrame:\n", - " # Group by the specified columns and region, then aggregate\n", - " regions = (\n", - " df.groupby([*gby_col, \"region\"])\n", - " .agg(ops)\n", - " .reset_index()\n", - " .rename(columns={\"region\": iso_column})\n", - " )\n", - "\n", - " # Group by the specified columns and iso_column, then aggregate\n", - " countries = df.groupby([*gby_col, iso_column]).agg(ops).reset_index()\n", - "\n", - " # Concatenate the results\n", - " return pd.concat([regions, countries], ignore_index=True)\n", - "\n", - "def calculate_stats_cov_pa(df: pd.DataFrame, gby_col: list, iso_column: str):\n", - " return calculate_stats_pa(df, gby_col, iso_column, {\"protected_area\": \"sum\", \"protected_areas_count\": \"sum\", \"1\": \"sum\", \"0\": \"sum\"})\n", - "\n", - "\n", - "def add_region_iso2(\n", - " df: pd.DataFrame | gpd.GeoDataFrame, iso_column\n", - ") -> pd.DataFrame | gpd.GeoDataFrame:\n", - " \n", - " with open(scripts_dir.joinpath('data_commons/data/regions_data2.json'), 'r') as f:\n", - " regions = json.load(f)\n", - "\n", - " def find_region_iso(iso: str) -> Union[str, None]:\n", - " filtered_regions = list(filter(lambda x: iso in x[\"country_iso_3s\"], regions.get(\"data\")))\n", - " return filtered_regions[0][\"region_iso\"] if len(filtered_regions) > 0 else None\n", - "\n", - " return df.assign(region=lambda row: row[iso_column].apply(find_region_iso))\n", - "\n", - "def define_childs_ids(group) -> tuple:\n", - " if len(group) > 1:\n", - " parent_id = group[group.is_child.eq(False)].index.values[0]\n", - " children_ids = group[group.is_child.eq(True)].index.tolist()\n", - " return parent_id, children_ids\n", - " else:\n", - " return pd.NA, pd.NA\n", - "\n", - "def add_child_parent_relationship(\n", - " df: pd.DataFrame | gpd.GeoDataFrame,\n", - " gby: str = \"wdpaid\",\n", - " cols: list = [\"wdpaid\", \"wdpa_pid\", \"is_child\", \"data_source\"],\n", - ") -> pd.DataFrame | gpd.GeoDataFrame:\n", - " \n", - " # Get parent and children IDs for each group\n", - " groups = df.groupby(gby)[cols].apply(define_childs_ids)\n", - " \n", - " # Extract parent and children information\n", - " relationship_df = pd.DataFrame(\n", - " [[a, b] for a, b in groups.values], \n", - " columns=[\"parent\", \"children\"]\n", - " ).dropna(subset=[\"parent\"]).set_index(\"parent\")\n", - " \n", - " # Assign children IDs to the 'children' column\n", - " df[\"children\"] = pd.Series(relationship_df[\"children\"], index=relationship_df.index).reindex(df.index)\n", - " \n", - " # Assign parent IDs to the 'parent' column for the children\n", - " df[\"parent\"] = pd.NA \n", - " for parent, children in relationship_df.itertuples(index=True):\n", - " df.loc[children, \"parent\"] = parent\n", - " \n", - " return df\n", - "\n", - "\n", - "class NewProtectedAreaExtentSchema(pa.DataFrameModel):\n", - " id: Index[int] = pa.Field(gt=0, coerce=True)\n", - " location: Series[int] = pa.Field(gt=0, coerce=True)\n", - " protected_area: Series[float] = pa.Field(ge=0, coerce=True)\n", - " protected_areas_count: Series[int] = pa.Field(ge=0, coerce=True)\n", - " oecms: Series[float] = pa.Field(ge=0, le=100, coerce=True)\n", - " pas: Series[float] = pa.Field(ge=0, le=100, coerce=True)\n", - " coverage: Series[float] = pa.Field(ge=0, le=100, coerce=True)\n", - " global_contribution: Series[float] = pa.Field(ge=0, le=100, coerce=True)\n", - " year: Series[int] = pa.Field(ge=2000, coerce=True)\n", - " is_last_year: Series[int] = pa.Field(isin=[0, 1], coerce=True)\n", - " environment: Series[int] = pa.Field(isin=[1, 2], coerce=True)\n", - "\n", - "class NewProtectionLevelSchema(pa.DataFrameModel):\n", - " id: Index[int] = pa.Field(gt=0, coerce=True)\n", - " location: Series[int] = pa.Field(gt=0, coerce=True)\n", - " mpaa_protection_level: Series[int] = pa.Field(ge=0, coerce=True)\n", - " year: Series[int] = pa.Field(gt=1900, coerce=True)\n", - " area: Series[float] = pa.Field(ge=0, coerce=True)\n", - " percentage: Series[float] = pa.Field(ge=0, le=100, coerce=True)\n", - "\n", - "class PAsSchema(pa.DataFrameModel):\n", - " id: Index[int] = pa.Field(gt=0, coerce=True)\n", - " wdpaid: Series[pd.Int64Dtype] = pa.Field(coerce=True, nullable=True)\n", - " # child_id: Series[str] = pa.Field(coerce=True)\n", - " name: Series[str] = pa.Field(coerce=True)\n", - " year: Series[pd.Int32Dtype] = pa.Field(gt=1700, nullable=True)\n", - " area: Series[float] = pa.Field(ge=0, coerce=True)\n", - " bbox: Series[List[float]] = pa.Field(coerce=True)\n", - " location: Series[int] = pa.Field(ge=0, coerce=True)\n", - " protection_status: Series[int] = pa.Field(ge=0, nullable=True)\n", - " mpaa_establishment_stage: Series[pd.Int32Dtype] = pa.Field(ge=0, nullable=True, coerce=True)\n", - " mpaa_protection_level: Series[pd.Int32Dtype] = pa.Field(ge=0, nullable=True, coerce=True)\n", - " iucn_category: Series[pd.Int32Dtype] = pa.Field(coerce=True, nullable=True)\n", - " designation: Series[str] = pa.Field(coerce=True, nullable=True)\n", - " parent: Series[pd.Int64Dtype] = pa.Field(coerce=True, nullable=True)\n", - " children: Series[List[int]] = pa.Field(coerce=True, nullable=True)\n", - " data_source: Series[int] = pa.Field(coerce=True)\n", - " coverage: Series[float] = pa.Field(ge=0, le=100, nullable=True)\n", - " environment: Series[int] = pa.Field(isin=[1, 2], coerce=True)\n", - "\n", - "class PAsSchemaChunk1(pa.DataFrameModel):\n", - " id: Index[int] = pa.Field(gt=0, coerce=True)\n", - " wdpaid: Series[pd.Int64Dtype] = pa.Field(coerce=True, nullable=True)\n", - " # child_id: Series[str] = pa.Field(coerce=True)\n", - " name: Series[str] = pa.Field(coerce=True)\n", - " year: Series[pd.Int32Dtype] = pa.Field(gt=1700, nullable=True)\n", - " area: Series[float] = pa.Field(ge=0, coerce=True)\n", - " bbox: Series[List[float]] = pa.Field(coerce=True)\n", - " location: Series[int] = pa.Field(ge=0, coerce=True)\n", - " protection_status: Series[int] = pa.Field(ge=0, nullable=True)\n", - " mpaa_establishment_stage: Series[pd.Int32Dtype] = pa.Field(ge=0, nullable=True, coerce=True)\n", - " mpaa_protection_level: Series[pd.Int32Dtype] = pa.Field(ge=0, nullable=True, coerce=True)\n", - " iucn_category: Series[pd.Int32Dtype] = pa.Field(coerce=True, nullable=True)\n", - " designation: Series[str] = pa.Field(coerce=True, nullable=True)\n", - " children: Series[List[int]] = pa.Field(coerce=True, nullable=True)\n", - " data_source: Series[int] = pa.Field(coerce=True)\n", - " coverage: Series[float] = pa.Field(ge=0, le=100, nullable=True)\n", - " environment: Series[int] = pa.Field(isin=[1, 2], coerce=True)\n", - "\n", - "class PAsSchemaChunk2(pa.DataFrameModel):\n", - " id: Index[int] = pa.Field(gt=0, coerce=True)\n", - " parent: Series[pd.Int64Dtype] = pa.Field(coerce=True, nullable=True)" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -771,7 +361,7 @@ }, { "cell_type": "code", - "execution_count": 130, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -783,6 +373,16 @@ "/home/sofia/dev/skytruth-30x30/data/data/mpa/processed/mpa_preprocess.zip\n", "/home/sofia/dev/skytruth-30x30/data/data/mpa/processed/preprocess\n" ] + }, + { + "data": { + "text/plain": [ + "PosixPath('/home/sofia/dev/skytruth-30x30/data/data/mpa/processed/preprocess')" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -798,65 +398,54 @@ "# Download the EEZ file && unzip it\n", "download_and_unzip_if_needed(pipe_dir_eez, prev_step, mysettings)\n", "# Download the mpas file && unzip it\n", - "download_and_unzip_if_needed(pipe_dir_mpas, prev_step, mysettings)\n", - "\n", - "# Load the data\n", - "eez = gpd.read_file(pipe_dir_eez.get_step_fmt_file_path(prev_step, \"shp\")).pipe(clean_geometries)\n", - "mpas = gpd.read_file(pipe_dir_mpas.get_step_fmt_file_path(prev_step, \"shp\")).pipe(clean_geometries)" + "download_and_unzip_if_needed(pipe_dir_mpas, prev_step, mysettings)" ] }, { "cell_type": "code", - "execution_count": 131, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 282/282 [08:11<00:00, 1.74s/it]\n" - ] - } - ], + "outputs": [], "source": [ - "eez_mpas_data_join = await spatial_join(eez, mpas.pipe(filter_by_exluding_propossed_mpas))" + "# # Load the data\n", + "# eez = gpd.read_file(pipe_dir_eez.get_step_fmt_file_path(prev_step, \"shp\")).pipe(clean_geometries)\n", + "# mpas = gpd.read_file(pipe_dir_mpas.get_step_fmt_file_path(prev_step, \"shp\")).pipe(clean_geometries)\n", + "\n", + "# # Join the eez data with the wdpa data\n", + "# eez_mpas_data_join = await spatial_join(eez, mpas.pipe(filter_by_exluding_propossed_mpas))" ] }, { "cell_type": "code", - "execution_count": 132, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 14/14 [03:22<00:00, 14.49s/it]\n" - ] - } - ], + "outputs": [], "source": [ - "final_data = await process_mpa_data(\n", - " eez_mpas_data_join.pipe(add_location_iso).pipe(assign_iso3),\n", - " range(2011, time.localtime().tm_year + 1),\n", - " [\"PA_DEF\", \"iso_3\"],\n", - " {\"protectedAreasCount\": \"sum\"},\n", - ")" + "# # Prepare the mpa data\n", + "# final_data = await process_mpa_data(\n", + "# eez_mpas_data_join.pipe(add_location_iso).pipe(assign_iso3),\n", + "# range(2011, time.localtime().tm_year + 1),\n", + "# [\"PA_DEF\", \"iso_3\"],\n", + "# {\"protectedAreasCount\": \"sum\"},\n", + "# )\n", + "\n", + "# # Save the results\n", + "# final_data.to_csv(pipe_dir_mpas.get_processed_step_path(prev_step).joinpath(\"mpa_preprocessed.csv\"), index=False)" ] }, { "cell_type": "code", - "execution_count": 133, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ - "# save final data\n", - "final_data.to_csv(pipe_dir_mpas.get_processed_step_path(prev_step).joinpath(\"mpa_preprocessed.csv\"), index=False)\n" + "# Load the results\n", + "final_data = pd.read_csv(pipe_dir_mpas.get_processed_step_path(prev_step).joinpath(\"mpa_preprocessed.csv\"))" ] }, { "cell_type": "code", - "execution_count": 134, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -903,7 +492,7 @@ " 29.0\n", " 0.00000\n", " 100.00000\n", - " 212881389.0\n", + " 212881389\n", " 996236.13\n", " 0.467977\n", " 0.275966\n", @@ -918,7 +507,7 @@ " 427.0\n", " 2.34192\n", " 97.65808\n", - " 14878058.0\n", + " 14878058\n", " 129790.94\n", " 0.872365\n", " 0.035953\n", @@ -935,24 +524,25 @@ "1 2010 AF 129790.939474 427.0 2.34192 97.65808 \n", "\n", " total_marine_area protected_area coverage global_contribution \\\n", - "0 212881389.0 996236.13 0.467977 0.275966 \n", - "1 14878058.0 129790.94 0.872365 0.035953 \n", + "0 212881389 996236.13 0.467977 0.275966 \n", + "1 14878058 129790.94 0.872365 0.035953 \n", "\n", " is_last_year environment \n", "0 0 1 \n", "1 0 1 " ] }, - "execution_count": 134, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ + "# Create coverage stats table\n", "final_data2 = final_data.copy()\n", "\n", "coverage = (\n", - " final_data2.pipe(calculate_global_area, [\"year\", \"PA_DEF\"], {\"area\": \"sum\"}, \"iso_3\")\n", + " final_data2.pipe(calculate_global_area, [\"year\", \"PA_DEF\"], \"marine\", {\"area\": \"sum\"}, \"iso_3\")\n", " .pipe(separate_parent_iso, \"iso_3\")\n", " .pipe(add_region_iso, \"iso_3\")\n", " .replace(\n", @@ -970,25 +560,18 @@ " }\n", " }\n", " )\n", - " .pipe(calculate_stats_cov, [\"year\", \"PA_DEF\"], \"iso_3\").astype({\"PA_DEF\": int})\n", - " .pipe(add_mpa_oecm_percentages)\n", - " .pipe(add_total_marine_area)\n", - " .pipe(coverage_stats2)\n", - " .pipe(calculate_coverage_percentage_pa)\n", + " .pipe(calculate_stats_cov, [\"year\", \"PA_DEF\"], \"iso_3\", environment='marine').astype({\"PA_DEF\": int})\n", + " .pipe(calculate_padef_percentages, 'marine')\n", + " .pipe(add_total_area, 'marine')\n", + " .pipe(coverage_stats)\n", + " .pipe(calculate_coverage_percentage)\n", " .pipe(calculate_global_contribution)\n", " .pipe(add_is_last_year)\n", " .pipe(add_environment)\n", ")\n", - "coverage.head(2)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 135, - "metadata": {}, - "outputs": [], - "source": [ - "NewProtectedAreaExtentSchema(\n", + "\n", + "# Create the output and save it\n", + "ProtectedAreaExtentSchema(\n", " coverage.pipe(\n", " output,\n", " \"iso_3\",\n", @@ -999,7 +582,10 @@ ").to_csv(\n", " output_file,\n", " index=True,\n", - ")" + ")\n", + "\n", + "\n", + "coverage.head(2)" ] }, { @@ -1016,6 +602,7 @@ } ], "source": [ + "# Upload the results to GCS\n", "remote_path = 'vizzuality_processed_data/strapi_tables/mpa_coverage.csv'\n", "\n", "writeReadGCP(\n", @@ -1076,7 +663,7 @@ }, { "cell_type": "code", - "execution_count": 118, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -1095,7 +682,7 @@ "PosixPath('/home/sofia/dev/skytruth-30x30/data/data/gadm/processed/preprocess')" ] }, - "execution_count": 118, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -1226,7 +813,7 @@ } ], "source": [ - "# # test that we have not produce duplicates\n", + "# # Test existence of duplicates\n", "# sjoin_gdf.loc[sjoin_gdf.duplicated(subset=[\"WDPA_PID\", \"iso_3\"], keep=False)].sort_values(\n", "# \"WDPA_PID\"\n", "# )" @@ -1236,53 +823,29 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "289352" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ + "# # Exclude \"proposed\" protected areas\n", "# sjoin_gdf = filter_by_exluding_propossed_mpas(sjoin_gdf)\n", - "# len(sjoin_gdf)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:pyogrio._io:Created 289,352 records\n" - ] - } - ], - "source": [ - "# # Save the spatial join\n", + "\n", + "# # Save the results of the spatial join\n", "# sjoin_gdf.to_file(output_file_sjoin, driver=\"ESRI Shapefile\")" ] }, { "cell_type": "code", - "execution_count": 119, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ + "# Load the data\n", "sjoin_gdf = gpd.read_file(output_file_sjoin)\n", "sjoin_gdf[\"STATUS_YR\"] = sjoin_gdf[\"STATUS_YR\"].astype(\"Int64\")" ] }, { "cell_type": "code", - "execution_count": 120, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -1424,13 +987,13 @@ "[2889 rows x 5 columns]" ] }, - "execution_count": 120, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# # Calculate wdpa cumulative counts and pa and oecm percentages\n", + "# Calculate wdpa cumulative counts and pa and oecm percentages\n", "cumulative_counts = cumulative_pa_def_counts(sjoin_gdf)\n", "cumulative_counts" ] @@ -1442,44 +1005,20 @@ "outputs": [], "source": [ "# # Dissolve geometries to calculate the coverage\n", - "# data = await process_grid(sjoin_gdf)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ + "# data = await process_grid(sjoin_gdf)\n", "# tpa = pd.concat(data, ignore_index=True).drop(columns=['STATUS_YR', 'index']).rename(columns={'area': 'protected_area'})\n", - "# tpa.head(5)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ + "\n", "# # Group by 'iso_3' and 'year' and sum the 'area'\n", "# tpa_grouped = tpa.groupby(['iso_3', 'year'], as_index=False)['protected_area'].sum()\n", "# tpa_grouped.reset_index(drop=True, inplace=True)\n", - "# tpa_grouped.head(5)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ + "\n", "# # save to csv\n", "# tpa_grouped.to_csv(output_file_dissolve, index=False)" ] }, { "cell_type": "code", - "execution_count": 122, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -1552,19 +1091,20 @@ "4 AFG 2014 1078.918622" ] }, - "execution_count": 122, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ + "# Load dissolved data\n", "tpa_grouped = pd.read_csv(output_file_dissolve)\n", "tpa_grouped.head(5)" ] }, { "cell_type": "code", - "execution_count": 123, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -1610,7 +1150,7 @@ " 7272.0\n", " 0.0\n", " 100.0\n", - " 29993094.71\n", + " 29993095\n", " 12.123827\n", " 2.694465\n", " 0\n", @@ -1624,7 +1164,7 @@ " 24782.0\n", " 0.0\n", " 100.0\n", - " 31625555.58\n", + " 31625556\n", " 6.486481\n", " 1.520053\n", " 0\n", @@ -1638,7 +1178,7 @@ " 2.0\n", " 0.0\n", " 100.0\n", - " 12088229.65\n", + " 12088230\n", " 0.000917\n", " 0.000082\n", " 0\n", @@ -1652,7 +1192,7 @@ " 116128.0\n", " 0.0\n", " 100.0\n", - " 30037571.37\n", + " 30037571\n", " 14.335645\n", " 3.190756\n", " 0\n", @@ -1666,7 +1206,7 @@ " 52176.0\n", " 0.0\n", " 100.0\n", - " 19371151.92\n", + " 19371152\n", " 10.357127\n", " 1.486642\n", " 0\n", @@ -1694,7 +1234,7 @@ " 15.0\n", " 0.0\n", " 100.0\n", - " 453741.18\n", + " 453741\n", " 1.133994\n", " 0.003813\n", " 1\n", @@ -1708,8 +1248,8 @@ " 1631.0\n", " 0.0\n", " 100.0\n", - " 1221327.52\n", - " 9.365631\n", + " 1221328\n", + " 9.365627\n", " 0.084758\n", " 1\n", " 2\n", @@ -1722,8 +1262,8 @@ " 557.0\n", " 0.0\n", " 100.0\n", - " 753990.33\n", - " 38.857330\n", + " 753990\n", + " 38.857347\n", " 0.217095\n", " 1\n", " 2\n", @@ -1736,8 +1276,8 @@ " 8.0\n", " 0.0\n", " 100.0\n", - " 3314.08\n", - " 0.083884\n", + " 3314\n", + " 0.083886\n", " 0.000002\n", " 1\n", " 2\n", @@ -1750,8 +1290,8 @@ " 229.0\n", " 0.0\n", " 100.0\n", - " 391234.88\n", - " 28.019803\n", + " 391235\n", + " 28.019795\n", " 0.081230\n", " 1\n", " 2\n", @@ -1776,17 +1316,17 @@ "3008 2024 ZWE 1.096232e+05 229.0 0.0 100.0 \n", "\n", " total_terrestrial_area coverage global_contribution is_last_year \\\n", - "0 29993094.71 12.123827 2.694465 0 \n", - "1 31625555.58 6.486481 1.520053 0 \n", - "2 12088229.65 0.000917 0.000082 0 \n", - "3 30037571.37 14.335645 3.190756 0 \n", - "4 19371151.92 10.357127 1.486642 0 \n", + "0 29993095 12.123827 2.694465 0 \n", + "1 31625556 6.486481 1.520053 0 \n", + "2 12088230 0.000917 0.000082 0 \n", + "3 30037571 14.335645 3.190756 0 \n", + "4 19371152 10.357127 1.486642 0 \n", "... ... ... ... ... \n", - "3004 453741.18 1.133994 0.003813 1 \n", - "3005 1221327.52 9.365631 0.084758 1 \n", - "3006 753990.33 38.857330 0.217095 1 \n", - "3007 3314.08 0.083884 0.000002 1 \n", - "3008 391234.88 28.019803 0.081230 1 \n", + "3004 453741 1.133994 0.003813 1 \n", + "3005 1221328 9.365627 0.084758 1 \n", + "3006 753990 38.857347 0.217095 1 \n", + "3007 3314 0.083886 0.000002 1 \n", + "3008 391235 28.019795 0.081230 1 \n", "\n", " environment \n", "0 2 \n", @@ -1804,29 +1344,29 @@ "[3009 rows x 11 columns]" ] }, - "execution_count": 123, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# Add pa and oecm counts to the coverage table\n", + "# Create coverage stats table\n", "coverage = (\n", " pd.merge(tpa_grouped, cumulative_counts, on=['iso_3', 'year'], how='left')\n", - " .pipe(calculate_global_area_tpa, [\"year\"])\n", - " .pipe(add_region_iso2, \"iso_3\")\n", - " .pipe(calculate_stats_cov_pa, [\"year\"], \"iso_3\")\n", - " .pipe(calculate_pa_def_percentages)\n", - " .pipe(add_total_terrestrial_area)\n", - " .pipe(calculate_coverage_percentage_pa)\n", + " .pipe(calculate_global_area, [\"year\"], environment='terrestrial')\n", + " .pipe(add_region_iso, \"iso_3\")\n", + " .pipe(calculate_stats_cov, [\"year\"], \"iso_3\", environment= \"terrestrial\")\n", + " .pipe(calculate_padef_percentages, \"terrestrial\")\n", + " .pipe(add_total_area, \"terrestrial\")\n", + " .pipe(calculate_coverage_percentage)\n", " .pipe(calculate_global_contribution)\n", " .pipe(add_is_last_year)\n", " .pipe(add_environment)\n", ")\n", "\n", - "NewProtectedAreaExtentSchema(\n", + "ProtectedAreaExtentSchema(\n", " coverage.pipe(\n", - " output2,\n", + " output,\n", " \"iso_3\",\n", " {},\n", " {},\n", @@ -1854,6 +1394,7 @@ } ], "source": [ + "# Save the results in GCS\n", "remote_path = 'vizzuality_processed_data/strapi_tables/tpa_coverage.csv'\n", "\n", "writeReadGCP(\n", @@ -1869,12 +1410,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Coverage stats - all" + "### Coverage stats - concatenate marine & terrestrial" ] }, { "cell_type": "code", - "execution_count": 124, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -1895,17 +1436,7 @@ }, { "cell_type": "code", - "execution_count": 125, - "metadata": {}, - "outputs": [], - "source": [ - "tpa = pd.read_csv(input_path_tpas)\n", - "mpa = pd.read_csv(input_path_mpas)" - ] - }, - { - "cell_type": "code", - "execution_count": 126, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -1969,26 +1500,26 @@ "1 2.694465 0 2 3 " ] }, - "execution_count": 126, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# concatenate the two dataframes\n", - "final_data = pd.concat([tpa, mpa], ignore_index=True)\n", - "final_data.index = range(1, len(final_data) + 1)\n", - "final_data['id'] = final_data.index\n", + "# Concatenate the marine and terrestrial data\n", + "final_data = process_final_coverage(input_path_tpas, input_path_mpas)\n", + "\n", + "# Filter the DataFrame to get the row where 'id' is 1\n", "final_data[final_data['id'] == 1]" ] }, { "cell_type": "code", - "execution_count": 127, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ - "NewProtectedAreaExtentSchema(final_data).to_csv(output_file, index=True)" + "ProtectedAreaExtentSchema(final_data).to_csv(output_file, index=True)" ] }, { @@ -2005,6 +1536,7 @@ } ], "source": [ + "# Save the results in GCS\n", "remote_path = 'vizzuality_processed_data/strapi_tables/protection_coverage_stats.csv'\n", "\n", "writeReadGCP(\n", @@ -2040,7 +1572,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -2078,9 +1610,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 282/282 [00:28<00:00, 9.89it/s]\n" + ] + } + ], "source": [ "eez_mpaatlas_data_join = await spatial_join(\n", " eez, mpaatlas_intermediate.pipe(mpaatlas_filter_stablishment)\n", @@ -2102,14 +1642,14 @@ }, { "cell_type": "code", - "execution_count": 75, + "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "INFO:pyogrio._io:Created 54 records\n" + "INFO:pyogrio._io:Created 55 records\n" ] } ], @@ -2122,18 +1662,20 @@ }, { "cell_type": "code", - "execution_count": 79, + "execution_count": 25, "metadata": {}, "outputs": [], "source": [ + "eez_mpaatlas_data_join2 = eez_mpaatlas_data_join.copy()\n", + "\n", "result = (\n", - " eez_mpaatlas_data_join.rename(columns={\"location_i\": \"iso_3\"})\n", + " eez_mpaatlas_data_join2.rename(columns={\"location_i\": \"iso_3\"})\n", " .pipe(process_mpaatlas_data) \n", - " .pipe(calculate_global_area, gby_col=[\"protecti_1\"], iso_column=\"iso_3\")\n", - " .pipe(separate_parent_iso)\n", + " .pipe(calculate_global_area, gby_col=[\"protecti_1\"], iso_column=\"iso_3\", environment = \"marine\")\n", + " .pipe(separate_parent_iso, iso_column=\"iso_3\")\n", " .replace(\n", " {\n", - " \"location_i\": {\n", + " \"iso_3\": {\n", " \"COK\": \"NZL\",\n", " \"IOT\": \"GBR\",\n", " \"NIU\": \"NZL\",\n", @@ -2148,7 +1690,7 @@ " .pipe(calculate_stats, gby_col=[\"protecti_1\"], iso_column=\"iso_3\")\n", " .query('protecti_1 != \"less protected or unknown\"')\n", " .pipe(fix_monaco, iso_column=\"iso_3\", area_column=\"area_km2\")\n", - " .pipe(add_total_marine_area)\n", + " .pipe(add_total_area, 'marine')\n", " .pipe(calculate_coverage_percentage_mpatlas)\n", " .pipe(\n", " output,\n", @@ -2163,7 +1705,7 @@ " )\n", ")\n", "\n", - "NewProtectionLevelSchema(result[~result.location.isna()].assign(year=2024)).to_csv(\n", + "ProtectionLevelSchema(result[~result.location.isna()].assign(year=2024)).to_csv(\n", " output_file, index=True\n", ")" ] @@ -2182,6 +1724,7 @@ } ], "source": [ + "# Save the results in GCS\n", "remote_path = 'vizzuality_processed_data/strapi_tables/mpaatlas_protection_level.csv'\n", "\n", "writeReadGCP(\n", @@ -2532,28 +2075,32 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Country mpas detail table data" + "### Country detail table data\n", + "The country detail table is done for marine and terrestrial independently and the results are concatenated." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - " 1- lower case the columns \n", - "2- separate location that its regime is in dispute or on join regime \n", - "3- calcualte area for mpaatlas data \n", - "4- rename columns for merge \n", - "5- merge maaatlas and mpa data identifying the source \n", - "6- identify child resources and set them as childs \n", - "7- calculate bbox \n", - "8- set child resources \n", - "9- prepare output for batch export \n", - "10- upload data to strapi " + "Methodology for marine:\n", + "\n", + "1- lower case the columns \n", + "2- separate location that its regime is in dispute or on join regime \n", + "3- remove ATA and ABNJ because Protected planet doesn't include stats for ATA and ABNJ is marine \n", + "4- calculate area for mpaatlas data \n", + "5- rename columns for merge \n", + "6- merge maaatlas and mpa data identifying the source \n", + "7- identify child resources and set them as childs \n", + "8- calculate bbox \n", + "9- set child resources \n", + "10- Add coverage percentage\n", + "11- Add environment marine\n" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -2565,16 +2112,6 @@ "/home/sofia/dev/skytruth-30x30/data/data/mpaatlas/processed/mpaatlas_preprocess.zip\n", "/home/sofia/dev/skytruth-30x30/data/data/mpaatlas/processed/preprocess\n" ] - }, - { - "data": { - "text/plain": [ - "PosixPath('/home/sofia/dev/skytruth-30x30/data/data/mpaatlas/processed/preprocess')" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" } ], "source": [ @@ -2588,15 +2125,8 @@ "# Download the protected atlas file && unzip it\n", "download_and_unzip_if_needed(pipe_dir, prev_step, mysettings)\n", "# Download the mpaatlas file \n", - "download_and_unzip_if_needed(pipe_dir_mpaatlas, prev_step, mysettings)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ + "download_and_unzip_if_needed(pipe_dir_mpaatlas, prev_step, mysettings)\n", + "\n", "# Load the data\n", "mpa_intermediate = gpd.read_file(pipe_dir.get_step_fmt_file_path(prev_step, \"shp\")).pipe(\n", " clean_geometries\n", @@ -2608,10 +2138,28 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ + "# Load iucn categories\n", + "# iucn_cat = pd.DataFrame(\n", + "# {\"slug\": init_table.iucn_cat.dropna().unique(), \"name\": init_table.iucn_cat.dropna().unique()},\n", + "# index=pd.Index(np.arange(1, len(init_table.iucn_cat.dropna().unique()) + 1)),\n", + "# )\n", + "# iucn_cat.to_csv(pipe_dir.get_processed_step_path(current_step).joinpath(\"iucn_categories.csv\"), index=True)\n", + "iucn_cat = pd.read_csv(\n", + " pipe_dir.get_processed_step_path(current_step).joinpath(\"iucn_categories.csv\"), index_col=0\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# Preprocess marine tables (mpa and mpaatlas) and concatenate them\n", "init_table = (\n", " pd.concat(\n", " [\n", @@ -2664,34 +2212,16 @@ }, { "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "# to be run if things change a lot in the future\n", - "# iucn_cat = pd.DataFrame(\n", - "# {\"slug\": init_table.iucn_cat.dropna().unique(), \"name\": init_table.iucn_cat.dropna().unique()},\n", - "# index=pd.Index(np.arange(1, len(init_table.iucn_cat.dropna().unique()) + 1)),\n", - "# )\n", - "# iucn_cat.to_csv(pipe_dir.get_processed_step_path(current_step).joinpath(\"iucn_categories.csv\"), index=True)\n", - "\n", - "iucn_cat = pd.read_csv(\n", - " pipe_dir.get_processed_step_path(current_step).joinpath(\"iucn_categories.csv\"), index_col=0\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 11, + "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/home/sofia/dev/skytruth-30x30/data/src/pipelines/processors.py:706: FutureWarning: Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`\n", + "/home/sofia/dev/skytruth-30x30/data/src/pipelines/processors.py:1026: FutureWarning: Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`\n", " return df.assign(child_id=df[columns].bfill(axis=1)[columns[0]])\n", - "/home/sofia/dev/skytruth-30x30/data/src/pipelines/processors.py:731: FutureWarning: Downcasting behavior in `replace` is deprecated and will be removed in a future version. To retain the old behavior, explicitly call `result.infer_objects(copy=False)`. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`\n", + "/home/sofia/dev/skytruth-30x30/data/src/pipelines/processors.py:1179: FutureWarning: Downcasting behavior in `replace` is deprecated and will be removed in a future version. To retain the old behavior, explicitly call `result.infer_objects(copy=False)`. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`\n", " df.replace(rep_d)\n" ] } @@ -2700,12 +2230,12 @@ "mpa_table = (\n", " init_table.pipe(add_bbox, \"bbox\")\n", " .pipe(define_is_child)\n", - " .pipe(set_child_id)\n", + " .pipe(set_child_id, 'marine')\n", " .sort_values(by=[\"wdpaid\", \"is_child\"], ascending=[True, True])\n", " .reset_index(drop=True)\n", - " .pipe(add_total_marine_area)\n", + " .pipe(add_total_area, 'marine')\n", " .rename(columns={\"area_km2\": \"protected_area\"})\n", - " .pipe(calculate_coverage_percentage_pa)\n", + " .pipe(calculate_coverage_percentage)\n", " .pipe(add_environment)\n", " .pipe(\n", " output,\n", @@ -2774,262 +2304,27 @@ ")" ] }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
wdpaidwdpa_pidprotection_statusnamedesignationiucn_categoryyearareadata_sourcempaa_establishment_stagempaa_protection_levelbboxis_childchild_idcoverageenvironmentlocation
id
181701701Isla del CocoNational Park2202254819.0426323NaNNaN[-88.987016503, 4.529014728999982, -86.3670124...False1709.150798139.0
191701701Isla del Coco - Zona Minima IntervencionNational Park<NA>19781950.50000014.03.0[-87.29513967897267, 5.298053442111269, -86.82...True1700.325592139.0
201701701Isla del Coco - Zona Media IntervencionNational Park<NA>19780.93000014.03.0[-87.1038528170242, 5.492165352309547, -87.030...True1700.000155139.0
211701701Isla del Coco - Zona Baja IntervencionNational Park<NA>197870.72000014.03.0[-87.11119966572133, 5.482019746658279, -86.95...True1700.011805139.0
22170170.01Isla del Coco - 2022 ExpansionNational Park<NA>202155081.21000015.08.0[-88.987, 4.529, -86.367, 6.237]True170.09.194561139.0
\n", - "
" - ], - "text/plain": [ - " wdpaid wdpa_pid protection_status \\\n", - "id \n", - "18 170 170 1 \n", - "19 170 170 1 \n", - "20 170 170 1 \n", - "21 170 170 1 \n", - "22 170 170.0 1 \n", - "\n", - " name designation iucn_category \\\n", - "id \n", - "18 Isla del Coco National Park 2 \n", - "19 Isla del Coco - Zona Minima Intervencion National Park \n", - "20 Isla del Coco - Zona Media Intervencion National Park \n", - "21 Isla del Coco - Zona Baja Intervencion National Park \n", - "22 Isla del Coco - 2022 Expansion National Park \n", - "\n", - " year area data_source mpaa_establishment_stage \\\n", - "id \n", - "18 2022 54819.042632 3 NaN \n", - "19 1978 1950.500000 1 4.0 \n", - "20 1978 0.930000 1 4.0 \n", - "21 1978 70.720000 1 4.0 \n", - "22 2021 55081.210000 1 5.0 \n", - "\n", - " mpaa_protection_level bbox \\\n", - "id \n", - "18 NaN [-88.987016503, 4.529014728999982, -86.3670124... \n", - "19 3.0 [-87.29513967897267, 5.298053442111269, -86.82... \n", - "20 3.0 [-87.1038528170242, 5.492165352309547, -87.030... \n", - "21 3.0 [-87.11119966572133, 5.482019746658279, -86.95... \n", - "22 8.0 [-88.987, 4.529, -86.367, 6.237] \n", - "\n", - " is_child child_id coverage environment location \n", - "id \n", - "18 False 170 9.150798 1 39.0 \n", - "19 True 170 0.325592 1 39.0 \n", - "20 True 170 0.000155 1 39.0 \n", - "21 True 170 0.011805 1 39.0 \n", - "22 True 170.0 9.194561 1 39.0 " - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "mpa_table[mpa_table[\"wdpaid\"] == 170]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# # Validate and save\n", - "# PAsSchema(mpa_table[mpa_table.location.notna()]).to_csv(output_file_mpas, index=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Country pas - detail table data" - ] - }, { "cell_type": "markdown", "metadata": {}, "source": [ + "Methodology for terrestrial:\n", + "\n", "1- lower case the columns \n", "2- separate location that its regime is in dispute or on join regime \n", - "3- remove ATA and ABNJ because Protected planet doesn't include stats for ATA and ABNJ is marine \n", + "3- remove ATA and ABNJ because Protected planet doesn't include stats for ATA and ABNJ is marine \n", "4- rename columns for merge \n", "5- identify child resources and set them as childs \n", "6- calculate bbox \n", "7- set child resources \n", - "8- prepare output for batch export \n", - "9- upload data to strapi " + "8- Add coverage percentage \n", + "9- Add environment terrestrial \n", + "10- Add marine fields with nan " ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -3048,7 +2343,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -3057,7 +2352,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -3068,7 +2363,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -3109,14 +2404,14 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_3690547/1736513570.py:202: FutureWarning: Downcasting behavior in `replace` is deprecated and will be removed in a future version. To retain the old behavior, explicitly call `result.infer_objects(copy=False)`. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`\n", + "/home/sofia/dev/skytruth-30x30/data/src/pipelines/processors.py:1179: FutureWarning: Downcasting behavior in `replace` is deprecated and will be removed in a future version. To retain the old behavior, explicitly call `result.infer_objects(copy=False)`. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`\n", " df.replace(rep_d)\n" ] } @@ -3125,14 +2420,14 @@ "tpa_table = (\n", " init_table.pipe(add_bbox, \"bbox\")\n", " .pipe(define_is_child)\n", - " .pipe(set_child_id_pa)\n", + " .pipe(set_child_id, 'terrestrial')\n", " .sort_values(by=[\"wdpaid\", \"is_child\"], ascending=[True, True])\n", " .reset_index(drop=True)\n", - " .pipe(add_total_terrestrial_area)\n", - " .pipe(calculate_coverage_percentage_pa)\n", + " .pipe(add_total_area, 'terrestrial')\n", + " .pipe(calculate_coverage_percentage)\n", " .pipe(add_environment)\n", " .pipe(\n", - " output2,\n", + " output,\n", " iso_column=\"iso\",\n", " rep_d={\n", " \"pa_def\": {\"0\": 2, \"1\": 1},\n", @@ -3168,7 +2463,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -3177,48 +2472,37 @@ "tpa_table['mpaa_establishment_stage'] = np.nan" ] }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [], - "source": [ - "# # Validate and save\n", - "# PAsSchema(tpa_table[tpa_table.location.notna()]).to_csv(output_file_tpas, index=True)" - ] - }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Country marine and terrestrial - Detail table" + "Concatenate marine and terrestrial tables\n", + "\n", + "1- Concatenate tables \n", + "2- Add parent and children columns \n", + "3- Sort by parent \n", + "4- Create batch export for all columns by parent (to handle relations when uploading in Strapi) \n", + "5- Create batch export only for column parent (to handle relations when uploading in Strapi)" ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ - "pipe_mar = \"mpa\"\n", - "pipe_ter = \"mpa-terrestrial\"\n", "pipe_pa = \"pa\"\n", "step = \"preprocess\"\n", "strapi_collection_pas = \"pa\"\n", "\n", - "\n", - "pipe_dir_mar = FileConventionHandler(pipe_mar)\n", - "pipe_dir_ter = FileConventionHandler(pipe_ter)\n", "pipe_dir_pa = FileConventionHandler(pipe_pa)\n", "\n", - "input_path_mar = pipe_dir_mar.get_processed_step_path(current_step).joinpath(\"mpa_detail.csv\")\n", - "input_path_ter = pipe_dir_ter.get_processed_step_path(current_step).joinpath(\"tpa_detail.csv\")\n", "output_file_pa = pipe_dir_pa.get_processed_step_path(current_step).joinpath(\"pa_detail.csv\")" ] }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -3230,50 +2514,7 @@ }, { "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "306123" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "len(final_table)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Note! When uploading the tables the schema doesn't work. I need to run the code to generate them and then it works." - ] - }, - { - "cell_type": "code", - "execution_count": 83, - "metadata": {}, - "outputs": [], - "source": [ - "# # Create final table with all the data\n", - "# mpa_table2 = pd.read_csv(input_path_mar)\n", - "# tpa_table2 = pd.read_csv(input_path_ter)\n", - "# final_table = pd.concat([mpa_table2, tpa_table2])\n", - "# final_table.index = range(1, len(final_table) + 1)\n", - "# final_table.index.name = 'id'\n", - "# final_table.drop(columns=['id'], inplace=True)\n", - "# final_table.head(2)" - ] - }, - { - "cell_type": "code", - "execution_count": 61, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ @@ -3282,7 +2523,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -3293,29 +2534,30 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ - "# batch_export(\n", - "# final_table1[final_table1.area.notna()],\n", - "# 4000,\n", - "# PAsSchemaChunk1,\n", - "# pipe_dir_pa.get_processed_step_path(current_step).joinpath(\"chunks1\"),\n", - "# \"pa_detail\",\n", - "# format=\"json\",\n", - "# strapi_colection=strapi_collection_pas,\n", - "# )\n", - "\n", + "# Divide output in chunks to be uploaded to strapi\n", "batch_export(\n", - " final_table2,\n", - " 10000,\n", - " PAsSchemaChunk2,\n", - " pipe_dir_pa.get_processed_step_path(current_step).joinpath(\"chunks2\"),\n", + " final_table1[final_table1.area.notna()],\n", + " 4000,\n", + " PAsSchemaChunk1,\n", + " pipe_dir_pa.get_processed_step_path(current_step).joinpath(\"chunks1\"),\n", " \"pa_detail\",\n", " format=\"json\",\n", " strapi_colection=strapi_collection_pas,\n", - ")" + ")\n", + "\n", + "# batch_export(\n", + "# final_table2,\n", + "# 10000,\n", + "# PAsSchemaChunk2,\n", + "# pipe_dir_pa.get_processed_step_path(current_step).joinpath(\"chunks2\"),\n", + "# \"pa_detail\",\n", + "# format=\"json\",\n", + "# strapi_colection=strapi_collection_pas,\n", + "# )" ] }, { @@ -3344,8 +2586,7 @@ } ], "source": [ - "# LOAD\n", - "## load zipped file to GCS\n", + "# Save zipped file in GCS\n", "writeReadGCP(\n", " credentials=mysettings.GCS_KEYFILE_JSON,\n", " bucket_name=mysettings.GCS_BUCKET,\n", @@ -3385,105 +2626,6 @@ "# )" ] }, - { - "cell_type": "code", - "execution_count": 62, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "76" - ] - }, - "execution_count": 62, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Retrive the ids left out in the batch process\n", - "left_out_ids = range(4000, 306124, 4000)\n", - "left_out_rows = final_table.loc[left_out_ids]\n", - "len(left_out_rows)" - ] - }, - { - "cell_type": "code", - "execution_count": 64, - "metadata": {}, - "outputs": [], - "source": [ - "# Import all cols but parent\n", - "left_out_rows1 = left_out_rows.drop(columns=['parent'])\n", - "\n", - "left_out_rows1 = left_out_rows1.reset_index()\n", - "left_out_rows1.index = left_out_rows1['id']\n", - "\n", - "output_file = pipe_dir_pa.get_processed_step_path(current_step).joinpath(\"left_out_rows1.json\")\n", - "left_out_rows1.to_json(output_file, orient=\"index\")\n" - ] - }, - { - "cell_type": "code", - "execution_count": 65, - "metadata": {}, - "outputs": [], - "source": [ - "# Import all cols but parent\n", - "left_out_rows2 = left_out_rows[['parent']]\n", - "\n", - "left_out_rows2 = left_out_rows2.reset_index()\n", - "left_out_rows2.index = left_out_rows2['id']\n", - "\n", - "output_file = pipe_dir_pa.get_processed_step_path(current_step).joinpath(\"left_out_rows2.json\")\n", - "left_out_rows2.to_json(output_file, orient=\"index\")" - ] - }, - { - "cell_type": "code", - "execution_count": 67, - "metadata": {}, - "outputs": [], - "source": [ - "# zip data\n", - "make_archive(pipe_dir_pa.get_processed_step_path(current_step).joinpath(\"left_out_rows1.json\"), pipe_dir_pa.get_processed_step_path(current_step).joinpath(\"left_out_rows1.zip\"))\n", - "make_archive(pipe_dir_pa.get_processed_step_path(current_step).joinpath(\"left_out_rows2.json\"), pipe_dir_pa.get_processed_step_path(current_step).joinpath(\"left_out_rows2.zip\"))" - ] - }, - { - "cell_type": "code", - "execution_count": 68, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "DEBUG:google.auth.transport.requests:Making request: POST https://oauth2.googleapis.com/token\n", - "DEBUG:google.auth.transport.requests:Making request: POST https://oauth2.googleapis.com/token\n" - ] - } - ], - "source": [ - "## load zipped file to GCS\n", - "writeReadGCP(\n", - " credentials=mysettings.GCS_KEYFILE_JSON,\n", - " bucket_name=mysettings.GCS_BUCKET,\n", - " blob_name='vizzuality_processed_data/strapi_tables/pa_left_out_rows1.zip',\n", - " file=pipe_dir_pa.get_processed_step_path(current_step).joinpath(\"left_out_rows1.zip\"),\n", - " operation=\"w\",\n", - ")\n", - "\n", - "writeReadGCP(\n", - " credentials=mysettings.GCS_KEYFILE_JSON,\n", - " bucket_name=mysettings.GCS_BUCKET,\n", - " blob_name='vizzuality_processed_data/strapi_tables/pa_left_out_rows2.zip',\n", - " file=pipe_dir_pa.get_processed_step_path(current_step).joinpath(\"left_out_rows2.zip\"),\n", - " operation=\"w\",\n", - ")" - ] - }, { "cell_type": "markdown", "metadata": {}, diff --git a/data/notebooks/pipes_mock/tiles.ipynb b/data/notebooks/pipes_mock/tiles.ipynb index f687dfe2..0835ee3c 100644 --- a/data/notebooks/pipes_mock/tiles.ipynb +++ b/data/notebooks/pipes_mock/tiles.ipynb @@ -12,7 +12,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -44,12 +44,12 @@ "from helpers.file_handler import FileConventionHandler\n", "from helpers.utils import download_and_unzip_if_needed, writeReadGCP\n", "\n", - "from pipelines.processors import clean_geometries" + "from pipelines.processors import clean_geometries, add_names_and_translations_to_regions, split_n_parts, create_color_map, hex_to_rgb" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -58,60 +58,6 @@ "current_step = \"tiles\"" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "regions_translation = {\n", - " 'Asia & Pacific': {\n", - " 'Spanish': 'Asia y Pacífico',\n", - " 'French': 'Asie et Pacifique',\n", - " 'LocationCode': 4\n", - " },\n", - " 'Africa': {\n", - " 'Spanish': 'África',\n", - " 'French': 'Afrique',\n", - " 'LocationCode': 3\n", - " },\n", - " 'Europe': {\n", - " 'Spanish': 'Europa',\n", - " 'French': 'Europe',\n", - " 'LocationCode': 6\n", - " },\n", - " 'Latin America & Caribbean': {\n", - " 'Spanish': 'América Latina y el Caribe',\n", - " 'French': 'Amérique latine et Caraïbes',\n", - " 'LocationCode': 8\n", - " },\n", - " 'Polar': {\n", - " 'Spanish': 'Polar',\n", - " 'French': 'Polaire',\n", - " 'LocationCode': 9\n", - " },\n", - " 'North America': {\n", - " 'Spanish': 'América del Norte',\n", - " 'French': 'Amérique du Nord',\n", - " 'LocationCode': 7\n", - " },\n", - " 'West Asia': {\n", - " 'Spanish': 'Asia Occidental',\n", - " 'French': 'Asie occidentale',\n", - " 'LocationCode': 9\n", - " },\n", - " 'Antartica': {\n", - " 'Spanish': 'Antártida',\n", - " 'French': 'Antarctique',\n", - " 'LocationCode': 5\n", - " }\n", - "}\n", - "\n", - "# Create a DataFrame from the translations dictionary\n", - "translations_df = pd.DataFrame.from_dict(regions_translation, orient='index').reset_index()\n", - "translations_df.columns = ['name', 'name_es', 'name_fr', 'location']" - ] - }, { "cell_type": "markdown", "metadata": { @@ -123,7 +69,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -148,7 +94,7 @@ "PosixPath('/home/sofia/dev/skytruth-30x30/data/data/eez/processed/tiles/eez_v11.mbtiles')" ] }, - "execution_count": 40, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -191,19 +137,19 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "#### EEZs: wdpa Regions" + "#### EEZs: Regions" ] }, { "cell_type": "code", - "execution_count": 62, + "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_3609107/3960908248.py:34: UserWarning: Column names longer than 10 characters will be truncated when saved to ESRI Shapefile.\n", + "/tmp/ipykernel_3808474/1407804161.py:34: UserWarning: Column names longer than 10 characters will be truncated when saved to ESRI Shapefile.\n", " ).to_file(\n", "/home/sofia/miniforge3/envs/skytruth/lib/python3.12/site-packages/pyogrio/raw.py:709: RuntimeWarning: Normalized/laundered field name: 'location_id' to 'location_i'\n", " ogr_write(\n", @@ -220,7 +166,7 @@ "CompletedProcess(args='mapshaper-xl 16gb -i /home/sofia/dev/skytruth-30x30/data/data/eez/processed/preprocess/eez_preprocess_regions.shp -dissolve2 fields=region_id -o /home/sofia/dev/skytruth-30x30/data/data/eez/processed/tiles/regions.json force format=geojson', returncode=0)" ] }, - "execution_count": 62, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -232,7 +178,7 @@ "eez_data = gpd.read_file(eez_dir.get_step_fmt_file_path(prev_step, \"shp\").as_posix())\n", "\n", "\n", - "with open(scripts_dir.joinpath('data_commons/data/regions_data2.json'), 'r') as f:\n", + "with open(scripts_dir.joinpath('data_commons/data/regions_data.json'), 'r') as f:\n", " regions = json.load(f)\n", "\n", "regions_df = pd.DataFrame(\n", @@ -284,107 +230,7 @@ }, { "cell_type": "code", - "execution_count": 63, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
geometryregion_id
0MULTIPOLYGON (((-155.43933 -11.35762, -155.440...
1MULTIPOLYGON (((8.26354 -17.25, 8.25715 -17.25...AF
2MULTIPOLYGON (((19.38155 41.99554, 19.38144 41...EU
3MULTIPOLYGON (((56.37383 24.98043, 56.38053 24...WA
4MULTIPOLYGON (((-56.77653 -36.29604, -56.7764 ...SA
5POLYGON ((-180 -70.32232, -180 -84.36012, -179...AT
6MULTIPOLYGON (((102.56807 -8.87455, 102.56638 ...AS
7MULTIPOLYGON (((-141 73.39761, -141.01268 73.3...NA
\n", - "
" - ], - "text/plain": [ - " geometry region_id\n", - "0 MULTIPOLYGON (((-155.43933 -11.35762, -155.440... \n", - "1 MULTIPOLYGON (((8.26354 -17.25, 8.25715 -17.25... AF\n", - "2 MULTIPOLYGON (((19.38155 41.99554, 19.38144 41... EU\n", - "3 MULTIPOLYGON (((56.37383 24.98043, 56.38053 24... WA\n", - "4 MULTIPOLYGON (((-56.77653 -36.29604, -56.7764 ... SA\n", - "5 POLYGON ((-180 -70.32232, -180 -84.36012, -179... AT\n", - "6 MULTIPOLYGON (((102.56807 -8.87455, 102.56638 ... AS\n", - "7 MULTIPOLYGON (((-141 73.39761, -141.01268 73.3... NA" - ] - }, - "execution_count": 63, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "with open(eez_dir.get_processed_step_path(current_step).joinpath(f\"{collection_name}.json\"), 'r') as f:\n", - " data = json.load(f)\n", - "\n", - "gdf = gpd.GeoDataFrame.from_features(data['features'])\n", - "gdf" - ] - }, - { - "cell_type": "code", - "execution_count": 65, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -525,31 +371,28 @@ "8 Amérique du Nord " ] }, - "execution_count": 65, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# Load the locations code CSV\n", - "locations_code = pd.read_csv(\n", - " scripts_dir.joinpath(\"data_commons/data/locations_code_all.csv\"),\n", - " na_values=[\"\", \"NULL\", \"N/A\", \"NaN\"], # Exclude \"NA\" from being treated as NaN\n", - " keep_default_na=False # Prevent pandas from treating \"NA\" as NaN\n", - ")\n", + "# Load the JSON data\n", + "with open(eez_dir.get_processed_step_path(current_step).joinpath(f\"{collection_name}.json\"), 'r') as f:\n", + " data = json.load(f)\n", + "\n", + "# Create a GeoDataFrame from the JSON data\n", + "gdf = gpd.GeoDataFrame.from_features(data['features'])\n", "\n", - "# Merge the regions data with the locations code\n", - "regions_df = gdf.merge(locations_code, how=\"left\", left_on=\"region_id\", right_on=\"code\").drop(columns=[\"code\"])\n", + "# Add names and translations to regions\n", + "gdf = add_names_and_translations_to_regions(gdf, scripts_dir)\n", "\n", - "# Merge the regions data with the translations\n", - "regions_df = regions_df.merge(translations_df, how=\"left\", on='location')\n", - "regions_df = regions_df.dropna(subset=['location'])\n", - "regions_df" + "gdf" ] }, { "cell_type": "code", - "execution_count": 67, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -563,7 +406,7 @@ ], "source": [ "# Save a geojson with extension json\n", - "regions_df.to_file(eez_dir.get_processed_step_path(current_step).joinpath(f\"{collection_name}.json\").as_posix(), driver=\"GeoJSON\")" + "gdf.to_file(eez_dir.get_processed_step_path(current_step).joinpath(f\"{collection_name}.json\").as_posix(), driver=\"GeoJSON\")" ] }, { @@ -759,18 +602,41 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 14, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_3808474/4011450268.py:35: UserWarning: Column names longer than 10 characters will be truncated when saved to ESRI Shapefile.\n", + " ).to_file(\n", + "/home/sofia/miniforge3/envs/skytruth/lib/python3.12/site-packages/pyogrio/raw.py:709: RuntimeWarning: Normalized/laundered field name: 'location_id' to 'location_i'\n", + " ogr_write(\n", + "Allocating 16 GB of heap memory\n", + "[dissolve2] Dissolved 204 features into 8 features\n", + "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/gadm/processed/tiles/gadm_regions.json\n" + ] + }, + { + "data": { + "text/plain": [ + "CompletedProcess(args='mapshaper-xl 16gb -i /home/sofia/dev/skytruth-30x30/data/data/gadm/processed/preprocess/gadm_preprocess_gadm_regions.shp -dissolve2 fields=region_id -o /home/sofia/dev/skytruth-30x30/data/data/gadm/processed/tiles/gadm_regions.json force format=geojson', returncode=0)" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "pipe = \"gadm\"\n", "gadm_dir = FileConventionHandler(pipe)\n", "collection_name = \"gadm_regions\"\n", "\n", - "# load the EEZ file & the regions file\n", "gadm_data = gpd.read_file(gadm_dir.get_step_fmt_file_path(prev_step, \"shp\").as_posix()).drop(columns=['name_es', 'name_fr'])\n", "\n", - "with open(scripts_dir.joinpath('data_commons/data/regions_data2.json'), 'r') as f:\n", + "with open(scripts_dir.joinpath('data_commons/data/regions_data.json'), 'r') as f:\n", " regions = json.load(f)\n", "\n", "\n", @@ -786,31 +652,6 @@ " ]\n", ")\n", "\n", - "# Load the locations code CSV\n", - "locations_code = pd.read_csv(\n", - " scripts_dir.joinpath(\"data_commons/data/locations_code_all.csv\"),\n", - " na_values=[\"\", \"NULL\", \"N/A\", \"NaN\"], # Exclude \"NA\" from being treated as NaN\n", - " keep_default_na=False # Prevent pandas from treating \"NA\" as NaN\n", - ")\n" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/tmp/ipykernel_3609107/976430064.py:13: UserWarning: Column names longer than 10 characters will be truncated when saved to ESRI Shapefile.\n", - " ).to_file(\n", - "/home/sofia/miniforge3/envs/skytruth/lib/python3.12/site-packages/pyogrio/raw.py:709: RuntimeWarning: Normalized/laundered field name: 'location_id' to 'location_i'\n", - " ogr_write(\n" - ] - } - ], - "source": [ "# merge the two files\n", "gpd.GeoDataFrame(\n", " pd.merge(\n", @@ -828,36 +669,9 @@ " .joinpath(f\"{pipe}_{prev_step}_{collection_name}.shp\")\n", " .as_posix(),\n", " driver=\"ESRI Shapefile\",\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Allocating 16 GB of heap memory\n", - "[dissolve2] Dissolved 204 features into 8 features\n", - "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/gadm/processed/tiles/gadm_regions.json\n" - ] - }, - { - "data": { - "text/plain": [ - "CompletedProcess(args='mapshaper-xl 16gb -i /home/sofia/dev/skytruth-30x30/data/data/gadm/processed/preprocess/gadm_preprocess_gadm_regions.shp -dissolve2 fields=region_id -o /home/sofia/dev/skytruth-30x30/data/data/gadm/processed/tiles/gadm_regions.json force format=geojson', returncode=0)" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# dissolve by region_id keeping the location, region_id, name, name_es, name_fr\n", + ")\n", + "\n", + "# dissolve by region_id \n", "Mapshaper(16).input(\n", " [\n", " gadm_dir.get_processed_step_path(prev_step)\n", @@ -873,217 +687,7 @@ }, { "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
geometryregion_id
0MULTIPOLYGON (((61.283 35.609, 61.277 35.613, ...AS
1MULTIPOLYGON (((11.786 -16.78, 11.789 -16.775,...AF
2MULTIPOLYGON (((19.278 40.505, 19.276 40.51, 1...EU
3MULTIPOLYGON (((52.444 24.107, 52.444 24.108, ...WA
4MULTIPOLYGON (((-68.688 -52.61, -68.688 -52.61...SA
5MULTIPOLYGON (((-169.027 -83.619, -169.029 -83...AT
6MULTIPOLYGON (((-135.117 68.473, -135.119 68.4...NA
7MULTIPOLYGON (((-109.225 10.32, -109.227 10.32...
\n", - "
" - ], - "text/plain": [ - " geometry region_id\n", - "0 MULTIPOLYGON (((61.283 35.609, 61.277 35.613, ... AS\n", - "1 MULTIPOLYGON (((11.786 -16.78, 11.789 -16.775,... AF\n", - "2 MULTIPOLYGON (((19.278 40.505, 19.276 40.51, 1... EU\n", - "3 MULTIPOLYGON (((52.444 24.107, 52.444 24.108, ... WA\n", - "4 MULTIPOLYGON (((-68.688 -52.61, -68.688 -52.61... SA\n", - "5 MULTIPOLYGON (((-169.027 -83.619, -169.029 -83... AT\n", - "6 MULTIPOLYGON (((-135.117 68.473, -135.119 68.4... NA\n", - "7 MULTIPOLYGON (((-109.225 10.32, -109.227 10.32... " - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import json\n", - "\n", - "with open(gadm_dir.get_processed_step_path(current_step).joinpath(f\"{collection_name}.json\"), 'r') as f:\n", - " data = json.load(f)\n", - "\n", - "gdf = gpd.GeoDataFrame.from_features(data['features'])\n", - "gdf\n" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
geometryregion_idlocation
0MULTIPOLYGON (((61.283 35.609, 61.277 35.613, ...AS4.0
1MULTIPOLYGON (((11.786 -16.78, 11.789 -16.775,...AF3.0
2MULTIPOLYGON (((19.278 40.505, 19.276 40.51, 1...EU6.0
3MULTIPOLYGON (((52.444 24.107, 52.444 24.108, ...WA9.0
4MULTIPOLYGON (((-68.688 -52.61, -68.688 -52.61...SA8.0
5MULTIPOLYGON (((-169.027 -83.619, -169.029 -83...AT5.0
6MULTIPOLYGON (((-135.117 68.473, -135.119 68.4...NA7.0
7MULTIPOLYGON (((-109.225 10.32, -109.227 10.32...NaN
\n", - "
" - ], - "text/plain": [ - " geometry region_id location\n", - "0 MULTIPOLYGON (((61.283 35.609, 61.277 35.613, ... AS 4.0\n", - "1 MULTIPOLYGON (((11.786 -16.78, 11.789 -16.775,... AF 3.0\n", - "2 MULTIPOLYGON (((19.278 40.505, 19.276 40.51, 1... EU 6.0\n", - "3 MULTIPOLYGON (((52.444 24.107, 52.444 24.108, ... WA 9.0\n", - "4 MULTIPOLYGON (((-68.688 -52.61, -68.688 -52.61... SA 8.0\n", - "5 MULTIPOLYGON (((-169.027 -83.619, -169.029 -83... AT 5.0\n", - "6 MULTIPOLYGON (((-135.117 68.473, -135.119 68.4... NA 7.0\n", - "7 MULTIPOLYGON (((-109.225 10.32, -109.227 10.32... NaN" - ] - }, - "execution_count": 28, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Merge the regions data with the locations code data\n", - "regions_df = gdf.merge(locations_code, how=\"left\", left_on=\"region_id\", right_on=\"code\").drop(columns=[\"code\"])\n", - "\n", - "regions_df" - ] - }, - { - "cell_type": "code", - "execution_count": 30, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -1188,15 +792,6 @@ " América del Norte\n", " Amérique du Nord\n", " \n", - " \n", - " 8\n", - " MULTIPOLYGON (((-109.225 10.32, -109.227 10.32...\n", - " \n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " \n", " \n", "\n", "" @@ -1211,7 +806,6 @@ "5 MULTIPOLYGON (((-68.688 -52.61, -68.688 -52.61... SA 8.0 \n", "6 MULTIPOLYGON (((-169.027 -83.619, -169.029 -83... AT 5.0 \n", "7 MULTIPOLYGON (((-135.117 68.473, -135.119 68.4... NA 7.0 \n", - "8 MULTIPOLYGON (((-109.225 10.32, -109.227 10.32... NaN \n", "\n", " name name_es \\\n", "0 Asia & Pacific Asia y Pacífico \n", @@ -1222,7 +816,6 @@ "5 Latin America & Caribbean América Latina y el Caribe \n", "6 Antartica Antártida \n", "7 North America América del Norte \n", - "8 NaN NaN \n", "\n", " name_fr \n", "0 Asie et Pacifique \n", @@ -1232,24 +825,31 @@ "4 Asie occidentale \n", "5 Amérique latine et Caraïbes \n", "6 Antarctique \n", - "7 Amérique du Nord \n", - "8 NaN " + "7 Amérique du Nord " ] }, - "execution_count": 30, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# Merge the regions data with the translations data\n", - "regions_df = regions_df.merge(translations_df, how=\"left\", on='location')\n", - "regions_df" + "# Load the JSON data\n", + "with open(gadm_dir.get_processed_step_path(current_step).joinpath(f\"{collection_name}.json\"), 'r') as f:\n", + " data = json.load(f)\n", + "\n", + "# Create a GeoDataFrame from the JSON data\n", + "gdf = gpd.GeoDataFrame.from_features(data['features'])\n", + "\n", + "# Add names and translations to regions\n", + "gdf = add_names_and_translations_to_regions(gdf, scripts_dir)\n", + "\n", + "gdf" ] }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -1262,9 +862,8 @@ } ], "source": [ - "# Drop row with location nan and save a geojson with extension json\n", - "regions_df = regions_df.dropna(subset=['location'])\n", - "regions_df.to_file(gadm_dir.get_processed_step_path(current_step).joinpath(f\"{collection_name}.json\").as_posix(), driver=\"GeoJSON\")" + "# Save a geojson with extension json\n", + "gdf.to_file(gadm_dir.get_processed_step_path(current_step).joinpath(f\"{collection_name}.json\").as_posix(), driver=\"GeoJSON\")" ] }, { @@ -1426,20 +1025,7 @@ }, { "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "def split_n_parts(gdf: gpd.GeoDataFrame, folder: Path, n:int) -> None:\n", - " \n", - " for i in range(n):\n", - " path = folder.joinpath(f\"part{i}.shp\")\n", - " gdf.iloc[i * len(gdf) // n : (i + 1) * len(gdf) // n].to_file(path, driver=\"ESRI Shapefile\")" - ] - }, - { - "cell_type": "code", - "execution_count": 6, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -1451,7 +1037,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -1473,7 +1059,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -1490,7 +1076,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -1499,7 +1085,7 @@ "text": [ "Allocating 32 GB of heap memory\n", "[clean] Retained 2,922 of 2,922 features\n", - "[simplify] Repaired 5,748 intersections; 20 intersections could not be repaired\n", + "[simplify] Repaired 5,750 intersections; 20 intersections could not be repaired\n", "[clean] Retained 2,922 of 2,922 features\n", "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part0.json\n", "Allocating 32 GB of heap memory\n", @@ -1514,7 +1100,7 @@ "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part11.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,922 of 2,922 features\n", - "[simplify] Repaired 527 intersections; 16 intersections could not be repaired\n", + "[simplify] Repaired 528 intersections; 16 intersections could not be repaired\n", "[clean] Retained 2,922 of 2,922 features\n", "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part12.json\n", "Allocating 32 GB of heap memory\n", @@ -1549,12 +1135,12 @@ "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part18.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 3,405 intersections; 8 intersections could not be repaired\n", + "[simplify] Repaired 3,403 intersections; 8 intersections could not be repaired\n", "[clean] Retained 2,923 of 2,923 features\n", "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part19.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 9,028 intersections; 56 intersections could not be repaired\n", + "[simplify] Repaired 9,030 intersections; 56 intersections could not be repaired\n", "[clean] Retained 2,923 of 2,923 features\n", "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part1.json\n", "Allocating 32 GB of heap memory\n", @@ -1584,7 +1170,7 @@ "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part24.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,922 of 2,922 features\n", - "[simplify] Repaired 977 intersections; 11 intersections could not be repaired\n", + "[simplify] Repaired 976 intersections; 11 intersections could not be repaired\n", "[clean] Retained 2,922 of 2,922 features\n", "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part25.json\n", "Allocating 32 GB of heap memory\n", @@ -1599,7 +1185,7 @@ "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part27.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,922 of 2,922 features\n", - "[simplify] Repaired 2,924 intersections; 110 intersections could not be repaired\n", + "[simplify] Repaired 2,922 intersections; 110 intersections could not be repaired\n", "[clean] Retained 2,922 of 2,922 features\n", "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part28.json\n", "Allocating 32 GB of heap memory\n", @@ -1649,12 +1235,12 @@ "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part36.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 702 intersections; 16 intersections could not be repaired\n", + "[simplify] Repaired 700 intersections; 16 intersections could not be repaired\n", "[clean] Retained 2,923 of 2,923 features\n", "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part37.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,922 of 2,922 features\n", - "[simplify] Repaired 1,473 intersections; 12 intersections could not be repaired\n", + "[simplify] Repaired 1,476 intersections; 12 intersections could not be repaired\n", "[clean] Retained 2,922 of 2,922 features\n", "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part38.json\n", "Allocating 32 GB of heap memory\n", @@ -1704,12 +1290,12 @@ "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part46.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 2,333 intersections; 205 intersections could not be repaired\n", + "[simplify] Repaired 2,332 intersections; 205 intersections could not be repaired\n", "[clean] Retained 2,923 of 2,923 features\n", "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part47.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,922 of 2,922 features\n", - "[simplify] Repaired 1,801 intersections; 92 intersections could not be repaired\n", + "[simplify] Repaired 1,803 intersections; 92 intersections could not be repaired\n", "[clean] Retained 2,922 of 2,922 features\n", "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part48.json\n", "Allocating 32 GB of heap memory\n", @@ -1724,7 +1310,7 @@ "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part4.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 2,880 intersections; 108 intersections could not be repaired\n", + "[simplify] Repaired 2,879 intersections; 108 intersections could not be repaired\n", "[clean] Retained 2,923 of 2,923 features\n", "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part50.json\n", "Allocating 32 GB of heap memory\n", @@ -1764,7 +1350,7 @@ "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part57.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,922 of 2,922 features\n", - "[simplify] Repaired 15,941 intersections; 148 intersections could not be repaired\n", + "[simplify] Repaired 15,944 intersections; 148 intersections could not be repaired\n", "[clean] Retained 2,922 of 2,922 features\n", "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part58.json\n", "Allocating 32 GB of heap memory\n", @@ -1774,7 +1360,7 @@ "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part59.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,922 of 2,922 features\n", - "[simplify] Repaired 2,661 intersections; 75 intersections could not be repaired\n", + "[simplify] Repaired 2,659 intersections; 75 intersections could not be repaired\n", "[clean] Retained 2,922 of 2,922 features\n", "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part5.json\n", "Allocating 32 GB of heap memory\n", @@ -1789,7 +1375,7 @@ "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part61.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 6,005 intersections; 396 intersections could not be repaired\n", + "[simplify] Repaired 6,003 intersections; 396 intersections could not be repaired\n", "[clean] Retained 2,923 of 2,923 features\n", "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part62.json\n", "Allocating 32 GB of heap memory\n", @@ -1809,7 +1395,7 @@ "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part65.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,922 of 2,922 features\n", - "[simplify] Repaired 13,933 intersections; 154 intersections could not be repaired\n", + "[simplify] Repaired 13,934 intersections; 154 intersections could not be repaired\n", "[clean] Retained 2,921 of 2,922 features\n", "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part66.json\n", "Allocating 32 GB of heap memory\n", @@ -1824,17 +1410,17 @@ "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part68.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,922 of 2,922 features\n", - "[simplify] Repaired 1,373 intersections\n", + "[simplify] Repaired 1,372 intersections\n", "[clean] Retained 2,922 of 2,922 features\n", "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part69.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 1,617 intersections\n", + "[simplify] Repaired 1,615 intersections\n", "[clean] Retained 2,923 of 2,923 features\n", "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part6.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 6,889 intersections; 33 intersections could not be repaired\n", + "[simplify] Repaired 6,887 intersections; 33 intersections could not be repaired\n", "[clean] Retained 2,923 of 2,923 features\n", "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part70.json\n", "Allocating 32 GB of heap memory\n", @@ -1849,7 +1435,7 @@ "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part72.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 1,443 intersections; 80 intersections could not be repaired\n", + "[simplify] Repaired 1,445 intersections; 80 intersections could not be repaired\n", "[clean] Retained 2,923 of 2,923 features\n", "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part73.json\n", "Allocating 32 GB of heap memory\n", @@ -1859,7 +1445,7 @@ "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part74.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 1,008 intersections; 3 intersections could not be repaired\n", + "[simplify] Repaired 1,006 intersections; 3 intersections could not be repaired\n", "[clean] Retained 2,923 of 2,923 features\n", "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part75.json\n", "Allocating 32 GB of heap memory\n", @@ -1874,7 +1460,7 @@ "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part77.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 2,587 intersections; 48 intersections could not be repaired\n", + "[simplify] Repaired 2,585 intersections; 48 intersections could not be repaired\n", "[clean] Retained 2,923 of 2,923 features\n", "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part78.json\n", "Allocating 32 GB of heap memory\n", @@ -1884,7 +1470,7 @@ "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part79.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,922 of 2,922 features\n", - "[simplify] Repaired 1,223 intersections\n", + "[simplify] Repaired 1,226 intersections\n", "[clean] Retained 2,922 of 2,922 features\n", "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part7.json\n", "Allocating 32 GB of heap memory\n", @@ -1934,12 +1520,12 @@ "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part88.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,922 of 2,922 features\n", - "[simplify] Repaired 1,271 intersections; 92 intersections could not be repaired\n", + "[simplify] Repaired 1,275 intersections; 92 intersections could not be repaired\n", "[clean] Retained 2,922 of 2,922 features\n", "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part89.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 2,455 intersections; 12 intersections could not be repaired\n", + "[simplify] Repaired 2,453 intersections; 12 intersections could not be repaired\n", "[clean] Retained 2,923 of 2,923 features\n", "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part8.json\n", "Allocating 32 GB of heap memory\n", @@ -1949,12 +1535,12 @@ "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part90.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 1,454 intersections; 52 intersections could not be repaired\n", + "[simplify] Repaired 1,452 intersections; 52 intersections could not be repaired\n", "[clean] Retained 2,923 of 2,923 features\n", "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part91.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,922 of 2,922 features\n", - "[simplify] Repaired 747 intersections; 14 intersections could not be repaired\n", + "[simplify] Repaired 749 intersections; 14 intersections could not be repaired\n", "[clean] Retained 2,922 of 2,922 features\n", "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part92.json\n", "Allocating 32 GB of heap memory\n", @@ -1964,22 +1550,22 @@ "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part93.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,922 of 2,922 features\n", - "[simplify] Repaired 2,157 intersections; 35 intersections could not be repaired\n", + "[simplify] Repaired 2,153 intersections; 35 intersections could not be repaired\n", "[clean] Retained 2,922 of 2,922 features\n", "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part94.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 1,459 intersections; 16 intersections could not be repaired\n", + "[simplify] Repaired 1,461 intersections; 16 intersections could not be repaired\n", "[clean] Retained 2,923 of 2,923 features\n", "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part95.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 37,172 intersections; 7,581 intersections could not be repaired\n", + "[simplify] Repaired 37,173 intersections; 7,581 intersections could not be repaired\n", "[clean] Retained 2,922 of 2,923 features\n", "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part96.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,922 of 2,922 features\n", - "[simplify] Repaired 9,139 intersections; 1,597 intersections could not be repaired\n", + "[simplify] Repaired 9,138 intersections; 1,597 intersections could not be repaired\n", "[clean] Retained 2,922 of 2,922 features\n", "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part97.json\n", "Allocating 32 GB of heap memory\n", @@ -1994,7 +1580,7 @@ "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part99.json\n", "Allocating 32 GB of heap memory\n", "[clean] Retained 2,923 of 2,923 features\n", - "[simplify] Repaired 3,395 intersections; 20 intersections could not be repaired\n", + "[simplify] Repaired 3,394 intersections; 20 intersections could not be repaired\n", "[clean] Retained 2,923 of 2,923 features\n", "[o] Wrote /home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/tiles/parts/part9.json\n" ] @@ -2007,7 +1593,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -2513,7 +2099,7 @@ }, { "cell_type": "code", - "execution_count": 70, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -2536,9 +2122,17 @@ }, { "cell_type": "code", - "execution_count": 72, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Styled raster saved at: /home/sofia/dev/skytruth-30x30/data/data/terrestrial-habitats/processed/preprocess/jung_etal_1km_styled.tif\n" + ] + } + ], "source": [ "# Define the land cover classes and their corresponding colors (as hex strings)\n", "land_cover_classes = {\n", @@ -2553,34 +2147,6 @@ " 255: \"#D3D3D3\" # Other\n", "}\n", "\n", - "# Function to convert hex color codes to RGB tuples\n", - "def hex_to_rgb(hex_color):\n", - " hex_color = hex_color.lstrip(\"#\") # Remove the '#' symbol\n", - " return tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4))\n", - "\n", - "# Create a color map from the land cover classes\n", - "def create_color_map(land_cover_classes):\n", - " color_map = np.zeros((256, 3), dtype=np.uint8) # 256 possible values (0-255)\n", - " for class_value, hex_color in land_cover_classes.items():\n", - " color_map[class_value] = hex_to_rgb(hex_color) # Convert hex to RGB\n", - " return color_map" - ] - }, - { - "cell_type": "code", - "execution_count": 73, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Styled raster saved at: /home/sofia/dev/skytruth-30x30/data/data/terrestrial-habitats/processed/preprocess/jung_etal_1km_styled.tif\n" - ] - } - ], - "source": [ - "# Open the raster file\n", "with rio.open(input_file) as src:\n", " band = src.read(1) # Read the first band\n", " profile = src.profile # Get the metadata\n", @@ -2591,7 +2157,7 @@ "band[band == 255] = np.nan # Set 255 values to NaN\n", "\n", "# Create the color map\n", - "color_map = create_color_map(land_cover_classes)\n", + "color_map = create_color_map(land_cover_classes, value_to_rgb_func=hex_to_rgb)\n", "\n", "# Create a colored image based on the band values, setting NaNs to a transparent color (for visualization only)\n", "colored_image = np.zeros((band.shape[0], band.shape[1], 3), dtype=np.uint8)\n", @@ -2818,6 +2384,18 @@ "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" } }, "nbformat": 4, diff --git a/data/src/data_commons/data/regions_translations.json b/data/src/data_commons/data/regions_translations.json new file mode 100644 index 00000000..69920d81 --- /dev/null +++ b/data/src/data_commons/data/regions_translations.json @@ -0,0 +1,42 @@ +{ + "Asia & Pacific": { + "name_es": "Asia y Pacífico", + "name_fr": "Asie et Pacifique", + "location": 4 + }, + "Africa": { + "name_es": "África", + "name_fr": "Afrique", + "location": 3 + }, + "Europe": { + "name_es": "Europa", + "name_fr": "Europe", + "location": 6 + }, + "Latin America & Caribbean": { + "name_es": "América Latina y el Caribe", + "name_fr": "Amérique latine et Caraïbes", + "location": 8 + }, + "Polar": { + "name_es": "Polar", + "name_fr": "Polaire", + "location": 9 + }, + "North America": { + "name_es": "América del Norte", + "name_fr": "Amérique du Nord", + "location": 7 + }, + "West Asia": { + "name_es": "Asia Occidental", + "name_fr": "Asie occidentale", + "location": 9 + }, + "Antartica": { + "name_es": "Antártida", + "name_fr": "Antarctique", + "location": 5 + } +} \ No newline at end of file diff --git a/data/src/data_commons/loader.py b/data/src/data_commons/loader.py index 699fc839..e329b93b 100644 --- a/data/src/data_commons/loader.py +++ b/data/src/data_commons/loader.py @@ -32,3 +32,10 @@ def load_regions(): def load_locations_code(): base = os.path.dirname(os.path.abspath(__file__)) return pd.read_csv(f"{base}/data/locations_code.csv", keep_default_na=False) + + +@lru_cache() +def load_locations_data() -> dict: + base = os.path.dirname(os.path.abspath(__file__)) + with open(f"{base}/data/locations.json", 'r') as f: + return json.load(f) diff --git a/data/src/pipelines/output_schemas.py b/data/src/pipelines/output_schemas.py index 364bcb7e..bf9638c6 100644 --- a/data/src/pipelines/output_schemas.py +++ b/data/src/pipelines/output_schemas.py @@ -20,11 +20,15 @@ class LocationSchema(pa.DataFrameModel): class ProtectedAreaExtentSchema(pa.DataFrameModel): id: Index[int] = pa.Field(gt=0, coerce=True) location: Series[int] = pa.Field(gt=0, coerce=True) - protection_status: Series[int] = pa.Field(gt=0, coerce=True) - cumSumProtectedArea: Series[float] = pa.Field(ge=0, coerce=True) # noqa: N815 - protectedArea: Series[float] = pa.Field(ge=0, coerce=True) # noqa: N815 - protectedAreasCount: Series[int] = pa.Field(ge=0, coerce=True) # noqa: N815 + protected_area: Series[float] = pa.Field(ge=0, coerce=True) + protected_areas_count: Series[int] = pa.Field(ge=0, coerce=True) + oecms: Series[float] = pa.Field(ge=0, le=100, coerce=True) + pas: Series[float] = pa.Field(ge=0, le=100, coerce=True) + coverage: Series[float] = pa.Field(ge=0, le=100, coerce=True) + global_contribution: Series[float] = pa.Field(ge=0, le=100, coerce=True) year: Series[int] = pa.Field(ge=2000, coerce=True) + is_last_year: Series[int] = pa.Field(isin=[0, 1], coerce=True) + environment: Series[int] = pa.Field(isin=[1, 2], coerce=True) class ProtectionLevelSchema(pa.DataFrameModel): @@ -33,6 +37,7 @@ class ProtectionLevelSchema(pa.DataFrameModel): mpaa_protection_level: Series[int] = pa.Field(ge=0, coerce=True) year: Series[int] = pa.Field(gt=1900, coerce=True) area: Series[float] = pa.Field(ge=0, coerce=True) + percentage: Series[float] = pa.Field(ge=0, le=100, coerce=True) class FPLSchema(pa.DataFrameModel): @@ -71,6 +76,51 @@ class MPAsSchema(pa.DataFrameModel): data_source: Series[int] = pa.Field(coerce=True) +class PAsSchema(pa.DataFrameModel): + id: Index[int] = pa.Field(gt=0, coerce=True) + wdpaid: Series[pd.Int64Dtype] = pa.Field(coerce=True, nullable=True) + # child_id: Series[str] = pa.Field(coerce=True) + name: Series[str] = pa.Field(coerce=True) + year: Series[pd.Int32Dtype] = pa.Field(gt=1700, nullable=True) + area: Series[float] = pa.Field(ge=0, coerce=True) + bbox: Series[List[float]] = pa.Field(coerce=True) + location: Series[int] = pa.Field(ge=0, coerce=True) + protection_status: Series[int] = pa.Field(ge=0, nullable=True) + mpaa_establishment_stage: Series[pd.Int32Dtype] = pa.Field(ge=0, nullable=True, coerce=True) + mpaa_protection_level: Series[pd.Int32Dtype] = pa.Field(ge=0, nullable=True, coerce=True) + iucn_category: Series[pd.Int32Dtype] = pa.Field(coerce=True, nullable=True) + designation: Series[str] = pa.Field(coerce=True, nullable=True) + parent: Series[pd.Int64Dtype] = pa.Field(coerce=True, nullable=True) + children: Series[List[int]] = pa.Field(coerce=True, nullable=True) + data_source: Series[int] = pa.Field(coerce=True) + coverage: Series[float] = pa.Field(ge=0, le=100, nullable=True) + environment: Series[int] = pa.Field(isin=[1, 2], coerce=True) + + +class PAsSchemaChunk1(pa.DataFrameModel): + id: Index[int] = pa.Field(gt=0, coerce=True) + wdpaid: Series[pd.Int64Dtype] = pa.Field(coerce=True, nullable=True) + # child_id: Series[str] = pa.Field(coerce=True) + name: Series[str] = pa.Field(coerce=True) + year: Series[pd.Int32Dtype] = pa.Field(gt=1700, nullable=True) + area: Series[float] = pa.Field(ge=0, coerce=True) + bbox: Series[List[float]] = pa.Field(coerce=True) + location: Series[int] = pa.Field(ge=0, coerce=True) + protection_status: Series[int] = pa.Field(ge=0, nullable=True) + mpaa_establishment_stage: Series[pd.Int32Dtype] = pa.Field(ge=0, nullable=True, coerce=True) + mpaa_protection_level: Series[pd.Int32Dtype] = pa.Field(ge=0, nullable=True, coerce=True) + iucn_category: Series[pd.Int32Dtype] = pa.Field(coerce=True, nullable=True) + designation: Series[str] = pa.Field(coerce=True, nullable=True) + children: Series[List[int]] = pa.Field(coerce=True, nullable=True) + data_source: Series[int] = pa.Field(coerce=True) + coverage: Series[float] = pa.Field(ge=0, le=100, nullable=True) + environment: Series[int] = pa.Field(isin=[1, 2], coerce=True) + +class PAsSchemaChunk2(pa.DataFrameModel): + id: Index[int] = pa.Field(gt=0, coerce=True) + parent: Series[pd.Int64Dtype] = pa.Field(coerce=True, nullable=True) + + class MPAsTableOTFSchema(pa.DataFrameModel): MRGID: Index[pd.Int64Dtype] = pa.Field(coerce=True) GEONAME: Series[str] = pa.Field(coerce=True) diff --git a/data/src/pipelines/processors.py b/data/src/pipelines/processors.py index 9b7fff42..5f1d7b48 100644 --- a/data/src/pipelines/processors.py +++ b/data/src/pipelines/processors.py @@ -1,5 +1,5 @@ from pathlib import Path -from typing import Callable, List, Union, Dict +from typing import Callable, List, Union, Dict, Tuple, Literal import pandas as pd import geopandas as gpd import numpy as np @@ -16,6 +16,7 @@ load_locations_code, load_iso_mapping, load_country_mapping, + load_locations_data, ) from pipelines.utils import background @@ -132,6 +133,13 @@ def split_by_year( return [prior_2010, after_2010] +def split_n_parts(gdf: gpd.GeoDataFrame, folder: Path, n:int) -> None: + + for i in range(n): + path = folder.joinpath(f"part{i}.shp") + gdf.iloc[i * len(gdf) // n : (i + 1) * len(gdf) // n].to_file(path, driver="ESRI Shapefile") + + def get_mpas(df: pd.DataFrame | gpd.GeoDataFrame) -> pd.DataFrame | gpd.GeoDataFrame: mask1 = df["wdpa_id"].notna() mask2 = df["wdpa_id"] != "0" @@ -145,6 +153,154 @@ def set_fps_classes(df: pd.DataFrame | gpd.GeoDataFrame) -> pd.DataFrame | gpd.G expand=False, ) +def add_total_area(df: pd.DataFrame, area_type: Literal['marine', 'terrestrial']) -> pd.DataFrame: + """ + Add total marine or terrestrial area to the DataFrame. + + Parameters: + - df (pd.DataFrame): The input DataFrame. + - area_type (Literal['marine', 'terrestrial']): The type of area to add ('marine' or 'terrestrial'). + + Returns: + - pd.DataFrame: The DataFrame with the added area column. + """ + # Load the locations data + locations_data = load_locations_data() + + # Access the nested dictionary + locations_dict = locations_data.get('data', {}).get('api::location.location', {}) + + # Create a lookup dictionary from the nested dictionary + if area_type == 'marine': + area_lookup = {item['code']: item['total_marine_area'] for item in locations_dict.values()} + area_column = 'total_marine_area' + elif area_type == 'terrestrial': + area_lookup = {item['code']: item['total_terrestrial_area'] for item in locations_dict.values()} + area_column = 'total_terrestrial_area' + else: + raise ValueError("Invalid area_type. Must be 'marine' or 'terrestrial'.") + + # Identify the column that contains the word 'iso' + iso_column = [col for col in df.columns if 'iso' in col][0] + + # Perform the mapping using the identified column + df[area_column] = df[iso_column].map(area_lookup) + + return df + +def change_ata_to_abnj(df: pd.DataFrame | gpd.GeoDataFrame) -> pd.DataFrame | gpd.GeoDataFrame: + """ + Changes values in the parent_iso column from 'ATA' to 'ABNJ' as there is no 'ATA' stats in Protected Planet. + """ + # Count the occurrences of 'ATA' + count_changes = df['parent_iso'].value_counts().get('ATA', 0) + + # Replace 'ATA' with 'ABNJ' + df['parent_iso'] = df['parent_iso'].replace('ATA', 'ABNJ') + + return df + +def calculate_padef_percentages(df: pd.DataFrame, environment: Literal['marine', 'terrestrial'], iso_col: str = "iso_3") -> pd.DataFrame: + """ + Calculate the percentages for each PA_DEF value based on the area type. + """ + if environment == 'marine': + # Calculate the total protectedAreasCount for each year and iso_3 + total_counts = df.groupby(['year', iso_col])['protectedAreasCount'].transform('sum') + + # Calculate the counts for PA_DEF == 0 and PA_DEF == 1 + df['oecm_count'] = df['protectedAreasCount'].where(df['PA_DEF'] == 0, 0) + df['pa_count'] = df['protectedAreasCount'].where(df['PA_DEF'] == 1, 0) + + # Calculate the percentages + df['oecms'] = df.groupby(['year', iso_col])['oecm_count'].transform('sum') / total_counts * 100 + df['pas'] = df.groupby(['year', iso_col])['pa_count'].transform('sum') / total_counts * 100 + + # Aggregate the results and fill NaN values with 0 + final_df = df.groupby(['year', iso_col]).agg( + area=('area', 'sum'), + protected_areas_count=('protectedAreasCount', 'sum'), + oecms=('oecms', 'first'), + pas=('pas', 'first') + ).reset_index().fillna(0) + + elif environment == 'terrestrial': + # Calculate the total protected_areas_count as the sum of '0' and '1' + df['protected_areas_count'] = df['0'] + df['1'] + + # Calculate the percentages + df['oecms'] = (df['0'] / df['protected_areas_count']) * 100 + df['pas'] = (df['1'] / df['protected_areas_count']) * 100 + + # Drop the columns '0' and '1' + df = df.drop(columns=['0', '1'], errors='ignore') + + # Fill NaN values with 0 + df = df.fillna(0) + + final_df = df + + else: + raise ValueError("Invalid area_type. Must be 'marine' or 'terrestrial'.") + + return final_df + +def calculate_coverage_percentage(df: pd.DataFrame) -> pd.DataFrame: + """ + Calculate the coverage percentage for protected areas. + """ + if 'total_marine_area' in df.columns: + df['coverage'] = (df['protected_area'] / df['total_marine_area']) * 100 + elif 'total_terrestrial_area' in df.columns: + df['coverage'] = (df['protected_area'] / df['total_terrestrial_area']) * 100 + else: + df['coverage'] = np.nan + + return df + +def calculate_coverage_percentage_mpatlas(df: pd.DataFrame) -> pd.DataFrame: + """ + Calculate the coverage percentage for MPAtlas. + """ + df['percentage'] = (df['area_km2'] / df['total_marine_area']) * 100 + return df + +def calculate_global_contribution(df: pd.DataFrame) -> pd.DataFrame: + """ + Calculate the global contribution for protected areas. + """ + if 'total_marine_area' in df.columns: + df['global_contribution'] = (df['protected_area'] / 361000000) * 100 + elif 'total_terrestrial_area' in df.columns: + df['global_contribution'] = (df['protected_area'] / 134954835) * 100 + else: + df['global_contribution'] = np.nan + return df + +def add_is_last_year(df: pd.DataFrame) -> pd.DataFrame: + """ + Add a column indicating if the row corresponds to the latest year for each iso_3. + """ + # Find the latest year for each iso_3 + latest_years = df.groupby('iso_3')['year'].transform('max') + + # Create the is_last_year column + df['is_last_year'] = (df['year'] == latest_years).astype(int) + + return df + +def add_environment(df: pd.DataFrame) -> pd.DataFrame: + """ + Add a column 'environment' based on the presence of 'total_marine_area' or 'total_terrestrial_area'. + """ + if 'total_marine_area' in df.columns: + df['environment'] = 1 + elif 'total_terrestrial_area' in df.columns: + df['environment'] = 2 + else: + df['environment'] = 0 + + return df ### Iso processors def set_location_iso(df: pd.DataFrame | gpd.GeoDataFrame) -> pd.DataFrame | gpd.GeoDataFrame: @@ -212,6 +368,7 @@ def find_region_iso(iso: str) -> Union[str, None]: return df.assign(region=lambda row: row[iso_column].apply(find_region_iso)) + def add_location_name(df: pd.DataFrame | gpd.GeoDataFrame) -> pd.DataFrame | gpd.GeoDataFrame: iso_map = load_iso_mapping() @@ -222,16 +379,43 @@ def get_name(iso): return df.assign(name=df.iso.apply(get_name)) -def add_groups_and_members(df: pd.DataFrame | gpd.GeoDataFrame) -> pd.DataFrame | gpd.GeoDataFrame: +def add_groups_and_members(df: pd.DataFrame | gpd.GeoDataFrame, location_type: str) -> pd.DataFrame | gpd.GeoDataFrame: + increment = 2 if location_type == "land" else 1 return df.assign( groups=lambda row: row[["region", "location_type"]].apply( - lambda x: (np.where(df.iso == x["region"])[0] + 1).tolist() + lambda x: (np.where(df.iso == x["region"])[0] + increment).tolist() if x["location_type"] == "country" else [], axis=1, ) ) +def add_names_and_translations_to_regions(gdf, scripts_dir): + # Load the locations code CSV + locations_code = pd.read_csv( + scripts_dir.joinpath("data_commons/data/locations_code.csv"), + na_values=["", "NULL", "N/A", "NaN"], # Exclude "NA" from being treated as NaN + keep_default_na=False # Prevent pandas from treating "NA" as NaN + ) + + # Load the regions translation JSON + with open(scripts_dir.joinpath('data_commons/data/regions_translations.json'), 'r') as f: + regions_translations = json.load(f) + + # Convert the JSON data into a DataFrame + translations_df = pd.DataFrame.from_dict(regions_translations, orient='index').reset_index() + translations_df.columns = ['name', 'name_es', 'name_fr', 'location'] + + # Merge the regions data with the locations code to add the 'location' field + regions_df = gdf.merge(locations_code, how="left", left_on="region_id", right_on="code").drop(columns=["code"]) + + # Merge the regions data with the translations + regions_df = regions_df.merge(translations_df[['location', 'name', 'name_es', 'name_fr']], on='location', how='left') + + # Drop rows where 'location' is NaN + regions_df = regions_df.dropna(subset=['location']) + + return regions_df ## Geometry processors @@ -263,9 +447,11 @@ def add_envelope(df: gpd.GeoDataFrame) -> gpd.GeoDataFrame: return df.assign(geometry=lambda row: row["geometry"].envelope) -def add_bbox(df: gpd.GeoDataFrame, col_name: str = "bounds") -> gpd.GeoDataFrame: - return df.assign(**{col_name: df.geometry.bounds.apply(list, axis=1)}) +def round_to_list(bounds): + return list(np.round(bounds, decimals=5)) +def add_bbox(df: gpd.GeoDataFrame, col_name: str = "bounds") -> gpd.GeoDataFrame: + return df.assign(**{col_name: df.geometry.bounds.apply(round_to_list, axis=1)}) def calculate_area( df: gpd.GeoDataFrame, output_area_column="area_km2", round: None | int = 2 @@ -410,11 +596,28 @@ async def simplify_async(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: def calculate_global_area( df: pd.DataFrame, - gby_col: list, - agg_ops: Dict[str, str] = {"area_km2": "sum"}, - iso_column="location_i", + gby_col: List[str], + environment: Literal['marine', 'terrestrial'], + agg_ops: Union[Dict[str, str], None] = None, + iso_column: Union[str, None] = None, ) -> pd.DataFrame: - global_area = df.groupby([*gby_col]).agg(agg_ops).reset_index().assign(**{iso_column: "GLOB"}) + """ + Calculate the global area for marine or terrestrial areas. + """ + if environment == 'marine': + if agg_ops is None: + agg_ops = {"area_km2": "sum"} + if iso_column is None: + iso_column = "location_i" + elif environment == 'terrestrial': + if agg_ops is None: + agg_ops = {"protected_area": "sum", "1": "sum", "0": "sum", "protected_areas_count": "sum"} + if iso_column is None: + iso_column = "iso_3" + else: + raise ValueError("Invalid environment. Must be 'marine' or 'terrestrial'.") + + global_area = df.groupby(gby_col).agg(agg_ops).reset_index().assign(**{iso_column: "GLOB"}) return pd.concat([global_area, df], ignore_index=True) @@ -468,6 +671,52 @@ def calculate_eez_area(df: pd.DataFrame) -> pd.DataFrame: return result.assign(id=result.index) +def calculate_gadm_area(df: pd.DataFrame) -> pd.DataFrame: + glob = gpd.GeoDataFrame( + { + "iso": "GLOB", + "AREA_KM2": 134954835, + "location_type": "worldwide", + "region": np.nan, + "geometry": gpd.GeoSeries([gpd.GeoSeries(df["geometry"]).unary_union]), + }, + crs="EPSG:4326", + ) + + terrestrial_areas = ( + df + .dissolve(by=["iso", "region"], aggfunc={"AREA_KM2": "sum"}) + .reset_index() + .assign(location_type="country") + ) + regions_areas = ( + df + .dissolve(by=["region"], aggfunc={"AREA_KM2": "sum"}) + .reset_index() + .rename(columns={"region": "iso"}) + .assign(location_type="region") + ) + result = ( + pd.concat( + [ + glob, + regions_areas, + terrestrial_areas, + ], + ignore_index=True, + ) + .dropna(subset=["iso"]) + .reset_index(drop=True) + ) + result.index = result.index + 1 + result.index.name = "id" + + # Round AREA_KM2 to integers + result["AREA_KM2"] = result["AREA_KM2"].round().astype(int) + + return result.assign(id=result.index) + + # TODO: check if this is still needed as we also have calculate_global_area def calculate_global_coverage(df: pd.DataFrame) -> pd.DataFrame: global_area = ( @@ -523,19 +772,33 @@ def calculate_stats( ) -def calculate_stats_cov(df: pd.DataFrame, gby_col: list, iso_column: str): - return calculate_stats(df, gby_col, iso_column, {"area": "sum", "protectedAreasCount": "sum"}) +def calculate_stats_cov( + df: pd.DataFrame, + gby_col: List[str], + iso_column: str, + environment: Literal['marine', 'terrestrial'] +) -> pd.DataFrame: + """ + Calculate the statistics for coverage based on the environment. + """ + if environment == 'marine': + agg_ops = {"area": "sum", "protectedAreasCount": "sum"} + return calculate_stats(df, gby_col, iso_column, agg_ops) + elif environment == 'terrestrial': + agg_ops = {"protected_area": "sum", "protected_areas_count": "sum", "1": "sum", "0": "sum"} + return calculate_stats(df, gby_col, iso_column, agg_ops) + else: + raise ValueError("Invalid area_type. Must be 'marine' or 'terrestrial'.") def coverage_stats( df: pd.DataFrame, area_col: str = "area", - sort_vals: List[str] = ["iso_3", "year", "PA_DEF"], + sort_vals: List[str] = ["iso_3", "year"], ) -> pd.DataFrame: """only relevant to get the coverage numbers for mpa""" return df.assign( - cumSumProtectedArea=df[area_col].round(2), - protectedArea=( + protected_area=( df.sort_values(by=sort_vals)[area_col] - df.sort_values(by=sort_vals) .groupby(sort_vals)[area_col] @@ -544,6 +807,16 @@ def coverage_stats( ).round(2), ) +def process_final_coverage(input_path_tpas: str, input_path_mpas: str) -> pd.DataFrame: + """ + Read TPAs and MPAs from CSV files, concatenate them, and add an 'id' column. + """ + tpa = pd.read_csv(input_path_tpas) + mpa = pd.read_csv(input_path_mpas) + final_data = pd.concat([tpa, mpa], ignore_index=True) + final_data.index = range(1, len(final_data) + 1) + final_data['id'] = final_data.index + return final_data # # TODO: check if this is still needed as we also have calculate_stats # def calculate_region_coverage(df: pd.DataFrame): @@ -587,33 +860,37 @@ async def process_mpa_data( ) -async def process_tpa_data( - gdf: gpd.GeoDataFrame, loop: list[int], by: list[str], aggfunc: dict -) -> pd.DataFrame: - """process protected planet data. relevant for acc coverage extent by year indicator.""" - # we split the data by =< year so we can acumulate the coverage - base = split_by_year(gdf) - - result_to_iter = pd.concat(base, ignore_index=True).copy() - - with tqdm(total=len(loop)) as pbar: # we create a progress bar - new_df = await asyncio.gather( - *(spatial_dissolve_chunk(year, result_to_iter, pbar, by, aggfunc) for year in loop) - ) - return pd.concat( - [base[0].pipe(calculate_area, "area", None).drop(columns=["geometry"]), *new_df], - ignore_index=True, - ) - - def process_mpaatlas_data(gdf: gpd.GeoDataFrame) -> pd.DataFrame: return ( - gdf.dissolve(by=["protecti_1", "location_i"], aggfunc={"name": "count"}) + gdf.dissolve(by=["protecti_1", "iso_3"], aggfunc={"name": "count"}) .reset_index() .pipe(calculate_area, "area_km2", None) .drop(columns=["geometry"]) ) +def cumulative_pa_def_counts(df: pd.DataFrame, year_col: str = "STATUS_YR", pa_def_col: str = "PA_DEF", iso_col: str = "iso_3", start_year: int = 2010) -> pd.DataFrame: + """ + Calculate the cumulative number of PA_DEF values for each iso_3 and each year starting from a given year. + """ + + results = [] + years = sorted(df[year_col].unique()) + + for year in years: + if year < start_year: + continue + cumulative_data = df[df[year_col] <= year] + pa_def_counts = cumulative_data.groupby([iso_col, pa_def_col]).size().unstack(fill_value=0) + pa_def_counts['year'] = year + results.append(pa_def_counts.reset_index()) + + final_results = pd.concat(results, ignore_index=True) + final_results = final_results.fillna(0) + final_results = final_results.groupby([iso_col, 'year']).sum().reset_index() + + final_results['protected_areas_count'] = final_results['0'] + final_results['1'] + + return final_results ## MISC @@ -690,21 +967,173 @@ def add_child_parent_relationship( gby: str = "wdpaid", cols: list = ["wdpaid", "wdpa_pid", "is_child", "data_source"], ) -> pd.DataFrame | gpd.GeoDataFrame: - groups: pd.Series = df.groupby(gby)[cols].apply(define_childs_ids) - df["children"] = ( - pd.DataFrame([[a, b] for a, b in groups.values], columns=["parent", "children"]) - .dropna(subset=["parent"]) - .set_index("parent") - ) - + + # Get parent and children IDs for each group + groups = df.groupby(gby)[cols].apply(define_childs_ids) + + # Extract parent and children information + relationship_df = pd.DataFrame( + [[a, b] for a, b in groups.values], + columns=["parent", "children"] + ).dropna(subset=["parent"]).set_index("parent") + + # Assign children IDs to the 'children' column + df["children"] = pd.Series(relationship_df["children"], index=relationship_df.index).reindex(df.index) + + # Assign parent IDs to the 'parent' column for the children + df["parent"] = pd.NA + for parent, children in relationship_df.itertuples(index=True): + df.loc[children, "parent"] = parent + return df def set_child_id( - df: pd.DataFrame | gpd.GeoDataFrame, columns: list[str] = ["wdpa_pid", "mpa_zone_i"] -) -> pd.DataFrame | gpd.GeoDataFrame: + df: Union[pd.DataFrame, gpd.GeoDataFrame], + environment: Literal['marine', 'terrestrial'], + marine_columns: List[str] = ["wdpa_pid", "mpa_zone_i"], + terrestrial_columns: List[str] = ["wdpa_pid"] +) -> Union[pd.DataFrame, gpd.GeoDataFrame]: + """ + Set the child_id column based on the environment. + """ + if environment == 'marine': + columns = marine_columns + elif environment == 'terrestrial': + columns = terrestrial_columns + else: + raise ValueError("Invalid environment. Must be 'marine' or 'terrestrial'.") + return df.assign(child_id=df[columns].bfill(axis=1)[columns[0]]) +def add_translations(df: pd.DataFrame, translations_csv_path: str) -> pd.DataFrame: + translations_df = pd.read_csv(translations_csv_path, keep_default_na=False, na_values=[]) + df = df.merge(translations_df[['code', 'name_es', 'name_fr']], left_on='iso', right_on='code', how='left') + return df + +def map_and_generate_ids(locations_land: pd.DataFrame, locations_marine: pd.DataFrame) -> pd.DataFrame: + # Create a lookup dictionary for IDs from EEZ data + id_lookup = locations_marine.set_index('code')['id'].to_dict() + + # Apply the EEZ IDs to the GADM dataset + locations_land['id'] = locations_land['code'].map(id_lookup) + + # Identify the NaN values in the id column + nan_mask = locations_land['id'].isna() + + # Generate new IDs for any GADM rows without an EEZ match + new_ids = pd.Series( + range(max(id_lookup.values()) + 1, max(id_lookup.values()) + 1 + nan_mask.sum()), + index=locations_land[nan_mask].index + ) + + # Assign the new IDs to the NaN values in the id column + locations_land['id'] = locations_land['id'].fillna(new_ids).astype(int) + + return locations_land + +def drop_unnecessary_columns(df: pd.DataFrame, columns_to_keep: List[str]) -> pd.DataFrame: + return df.drop(columns=[col for col in df.columns if col not in columns_to_keep]) + +def combine_and_clean_columns(df: pd.DataFrame) -> pd.DataFrame: + # Combine data from land and marine for each base column + base_columns = ['type', 'groups', 'name', 'name_es', 'name_fr'] + for base_col in base_columns: + marine_col = f"{base_col}_marine" + land_col = f"{base_col}_land" + df[base_col] = df[marine_col].combine_first(df[land_col]) + + # Fill NaN values with 0 for each column + columns_to_fill = ['total_marine_area', 'total_terrestrial_area'] + for col in columns_to_fill: + df[col] = df[col].fillna(0).astype(int) + + # Force the id column to be an integer + df['id'] = df['id'].astype(int) + + # Drop unnecessary columns + df = df.drop(columns=[col for col in df.columns if col.endswith('_marine') or col.endswith('_land')]) + + return df + +def process_and_merge_commitments(df: pd.DataFrame, commit: pd.DataFrame) -> pd.DataFrame: + # Filter and process the commitments + commit = commit.iloc[:, :6][commit['30% National Target'] == 'Y'] + commit.drop(columns=["% Fully/Highly*"], inplace=True) + commit['% National Target'] = commit['% National Target'].str.replace('%', '').astype(int) + + # When % National Target is 30, fill By Year with 2030 + commit['By Year'] = commit['By Year'].fillna(commit['% National Target'].apply(lambda x: '2030' if x == 30 else None)) + + # Merge the commitments into the combined_locations table + df = df.merge(commit[['Iso Code', '% National Target', 'By Year']], + left_on='code', right_on='Iso Code', how='left') + + df.rename(columns={'% National Target': 'marine_target', 'By Year': 'marine_target_year'}, inplace=True) + df.drop(columns=['Iso Code'], inplace=True) + + df['marine_target'] = df['marine_target'].astype(pd.Int64Dtype()) + df['marine_target_year'] = df['marine_target_year'].astype(pd.Int64Dtype()) + + # Add marine_target and marine_target_year to the combined_locations table for code 'GLOB' + df.loc[df['code'] == 'GLOB', 'marine_target'] = 30 + df.loc[df['code'] == 'GLOB', 'marine_target_year'] = 2030 + + return df + +def set_index_and_sort(df: pd.DataFrame) -> pd.DataFrame: + # Force the index to have the values in id column and sort by index + df['index'] = df['id'] + df.set_index('index', inplace=True) + df.sort_index(inplace=True) + return df + +## RASTER VISUALIZATION + +# Function to convert hex color codes to RGB tuples +def hex_to_rgb(hex_color: str) -> Tuple[int, int, int]: + """ + Convert a hex color code to an RGB tuple. + + Parameters: + - hex_color (str): Hex color code (e.g., "#FFFFFF"). + + Returns: + - Tuple[int, int, int]: RGB tuple. + """ + hex_color = hex_color.lstrip("#") # Remove the '#' symbol + if len(hex_color) != 6: + raise ValueError(f"Invalid hex color code: {hex_color}") + return tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4)) + +def create_color_map( + class_color_dict: Dict[int, str], + num_values: int = 256, + value_to_rgb_func: Callable[[str], Tuple[int, int, int]] = hex_to_rgb +) -> np.ndarray: + """ + Create a color map from a dictionary mapping class values to colors. + + Parameters: + - class_color_dict (Dict[int, str]): Dictionary mapping class values to colors. + - num_values (int): Number of possible values (default is 256). + - value_to_rgb_func (Callable[[str], Tuple[int, int, int]]): Function to convert dictionary values to RGB. + + Returns: + - np.ndarray: Color map array. + """ + if value_to_rgb_func is None: + raise ValueError("A function to convert values to RGB must be provided.") + + color_map = np.zeros((num_values, 3), dtype=np.uint8) # num_values possible values (0 to num_values-1) + + for class_value, color_value in class_color_dict.items(): + if not (0 <= class_value < num_values): + raise ValueError(f"Class value {class_value} is out of range (0 to {num_values-1}).") + color_map[class_value] = value_to_rgb_func(color_value) # Convert value to RGB + + return color_map + ## OUTPUT FUNCTIONS @@ -751,7 +1180,7 @@ def batch_export( prev = 0 if format == "csv": for idx, size in enumerate(range(batch_size, len(df.index) + batch_size, batch_size)): - schema(df[(df.index > prev) & (df.index < size)]).to_csv( + schema(df[(df.index >= prev) & (df.index < size)]).to_csv( folder.joinpath(f"{filename}_{idx}.csv"), index=True, encoding="utf-8", @@ -764,7 +1193,7 @@ def batch_export( "version": 2, "data": { f"api::{strapi_colection}.{strapi_colection}": schema( - df[(df.index > prev) & (df.index < size)] + df[(df.index >= prev) & (df.index < size)] ).to_dict(orient="index", index=True) }, } @@ -773,3 +1202,4 @@ def batch_export( prev = size else: raise ValueError("Invalid format") + From a4ca9a902d2c12a77a0662c959e675b8fb3ef609 Mon Sep 17 00:00:00 2001 From: sofia Date: Tue, 26 Nov 2024 11:22:12 +0100 Subject: [PATCH 15/16] add precalculations notebook, update processors and remove old notebooks --- data/notebooks/habitat.ipynb | 2281 ++++++++ data/notebooks/pipes_mock/intermediate.ipynb | 15 +- data/notebooks/pipes_mock/locations.ipynb | 23 + .../pipes_mock/precalculations.ipynb | 5184 +++++++++++++++++ data/src/pipelines/processors.py | 322 +- 5 files changed, 7765 insertions(+), 60 deletions(-) create mode 100644 data/notebooks/habitat.ipynb create mode 100644 data/notebooks/pipes_mock/precalculations.ipynb diff --git a/data/notebooks/habitat.ipynb b/data/notebooks/habitat.ipynb new file mode 100644 index 00000000..13f168f3 --- /dev/null +++ b/data/notebooks/habitat.ipynb @@ -0,0 +1,2281 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Set up" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33mWARNING: The directory '/home/mambauser/.cache/pip' or its parent directory is not owned or is not writable by the current user. The cache has been disabled. Check the permissions and owner of that directory. If executing pip with sudo, you should use sudo's -H flag.\u001b[0m\u001b[33m\n", + "\u001b[0mDefaulting to user installation because normal site-packages is not writeable\n", + "Requirement already satisfied: openpyxl in /opt/conda/lib/python3.12/site-packages (3.1.5)\n", + "Requirement already satisfied: et-xmlfile in /opt/conda/lib/python3.12/site-packages (from openpyxl) (1.1.0)\n" + ] + } + ], + "source": [ + "!pip install openpyxl" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "import geopandas as gpd\n", + "import pandas as pd\n", + "from datetime import datetime" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "path_in = \"../data/raw/\"\n", + "path_out = \"../data/processed/\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Process habitats from [Ocean+](https://habitats.oceanplus.org/) (except mangroves)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Country stats**" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "cold = pd.read_csv(path_in + \"Ocean+HabitatsDownload_Global/coldwatercorals.csv\")\n", + "salt = pd.read_csv(path_in + \"Ocean+HabitatsDownload_Global/saltmarshes.csv\")\n", + "sea = pd.read_csv(path_in + \"Ocean+HabitatsDownload_Global/seagrasses.csv\")\n", + "warm = pd.read_csv(path_in + \"Ocean+HabitatsDownload_Global/warmwatercorals.csv\")\n", + "glob = pd.read_excel(path_in + \"Ocean+HabitatsDownload_Global/global-stats.xlsx\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "cold2 = cold[['ISO3', 'protected_area', 'total_area']].rename(columns={'ISO3': 'location_id'})\n", + "salt2 = salt[['ISO3', 'protected_area', 'total_area']].rename(columns={'ISO3': 'location_id'})\n", + "sea2 = sea[['ISO3', 'protected_area', 'total_area']].rename(columns={'ISO3': 'location_id'})\n", + "warm2 = warm[['ISO3', 'protected_area', 'total_area']].rename(columns={'ISO3': 'location_id'})" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# Remove rows with '-' in 'protected_area' or 'total_area'\n", + "cold2 = cold2[~cold2['protected_area'].str.contains('-') & ~cold2['total_area'].str.contains('-')]\n", + "salt2 = salt2[~salt2['protected_area'].str.contains('-') & ~salt2['total_area'].str.contains('-')]\n", + "sea2 = sea2[~sea2['protected_area'].str.contains('-') & ~sea2['total_area'].str.contains('-')]\n", + "warm2 = warm2[~warm2['protected_area'].str.contains('-') & ~warm2['total_area'].str.contains('-')]" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# Replace 'ATA' with 'ABNJ' in 'location_id'\n", + "cold2 = cold2.replace('ATA', 'ABNJ')\n", + "salt2 = salt2.replace('ATA', 'ABNJ')\n", + "sea2 = sea2.replace('ATA', 'ABNJ')\n", + "warm2 = warm2.replace('ATA', 'ABNJ')" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['MRGID', 'GEONAME', 'MRGID_TER1', 'POL_TYPE', 'MRGID_SOV1',\n", + " 'TERRITORY1', 'ISO_TER1', 'SOVEREIGN1', 'MRGID_TER2', 'MRGID_SOV2',\n", + " 'TERRITORY2', 'ISO_TER2', 'SOVEREIGN2', 'MRGID_TER3', 'MRGID_SOV3',\n", + " 'TERRITORY3', 'ISO_TER3', 'SOVEREIGN3', 'X_1', 'Y_1', 'MRGID_EEZ',\n", + " 'AREA_KM2', 'ISO_SOV1', 'ISO_SOV2', 'ISO_SOV3', 'UN_SOV1', 'UN_SOV2',\n", + " 'UN_SOV3', 'UN_TER1', 'UN_TER2', 'UN_TER3', 'geometry'],\n", + " dtype='object')" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Bring the eez file to get iso3 and parent_iso equivalences\n", + "eez = gpd.read_file(path_in + \"World_EEZ_v11_20191118/eez_v11.shp\")\n", + "eez.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "# Initialize an empty dictionary\n", + "territory_iso = {}\n", + "\n", + "# Iterate over each row in the DataFrame\n", + "for index, row in eez.iterrows():\n", + " # Use TERRITORY1 as the key and ISO_SOV1 as the value\n", + " territory = row['ISO_TER1']\n", + " iso_sov = row['ISO_SOV1']\n", + " \n", + " # Check if the territory is not null or NaN\n", + " if pd.notnull(territory) and pd.notnull(iso_sov):\n", + " territory_iso[territory] = iso_sov\n", + "\n", + "# Add ABNJ\n", + "territory_iso['ABNJ'] = 'ABNJ' # Replace 'ABNJ_DEFAULT' with the desired default value\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
location_idprotected_areatotal_area
120UMI9.387856853621669.38785685362166
\n", + "
" + ], + "text/plain": [ + " location_id protected_area total_area\n", + "120 UMI 9.38785685362166 9.38785685362166" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cold2[cold2['location_id']=='UMI']" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "# Create a mapping dictionary for ISO3-PARENT_ISO pairs and modify the 'location_id' column in the habitats dataframes\n", + "cold2['location_id'] = cold2['location_id'].map(territory_iso)\n", + "salt2['location_id'] = salt2['location_id'].map(territory_iso)\n", + "sea2['location_id'] = sea2['location_id'].map(territory_iso)\n", + "warm2['location_id'] = warm2['location_id'].map(territory_iso)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
location_idprotected_areatotal_area
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [location_id, protected_area, total_area]\n", + "Index: []" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cold2[cold2['location_id']=='UMI']" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "# Convert the 'protected_area' and 'total_area' columns to numeric\n", + "dataframes = [cold2, salt2, sea2, warm2]\n", + "\n", + "for df in dataframes:\n", + " df['protected_area'] = pd.to_numeric(df['protected_area'])\n", + " df['total_area'] = pd.to_numeric(df['total_area'])" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "# Group by 'location_id' and calculate the sum of 'protected_area' and 'total_area'\n", + "cold2_grouped = cold2.groupby('location_id').sum().reset_index()\n", + "salt2_grouped = salt2.groupby('location_id').sum().reset_index()\n", + "sea2_grouped = sea2.groupby('location_id').sum().reset_index()\n", + "warm2_grouped = warm2.groupby('location_id').sum().reset_index()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "# Add the 'habitat_name' column\n", + "cold2_grouped['habitat_name'] = 'cold-water corals'\n", + "salt2_grouped['habitat_name'] = 'saltmarshes'\n", + "sea2_grouped['habitat_name'] = 'seagrasses'\n", + "warm2_grouped['habitat_name'] = 'warm-water corals'" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
location_idprotected_areatotal_areahabitat_nameyear
0ABNJ427.0485241893.871282cold-water corals2024
1AGO0.0000003.395671cold-water corals2024
\n", + "
" + ], + "text/plain": [ + " location_id protected_area total_area habitat_name year\n", + "0 ABNJ 427.048524 1893.871282 cold-water corals 2024\n", + "1 AGO 0.000000 3.395671 cold-water corals 2024" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Concatenate the dataframes\n", + "habitats = pd.concat([cold2_grouped, salt2_grouped, sea2_grouped, warm2_grouped])\n", + "habitats['year'] = datetime.now().year\n", + "habitats.head(2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Global stats**" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
habitat_nameprotected_areatotal_arealocation_idyear
0saltmarsh111638.252564224435.075094GLOB2024
1seagrass74787.449960314001.940600GLOB2024
2warmwater-corals63259.499130149886.974126GLOB2024
4coldwater-corals4400.14084215336.975280GLOB2024
\n", + "
" + ], + "text/plain": [ + " habitat_name protected_area total_area location_id year\n", + "0 saltmarsh 111638.252564 224435.075094 GLOB 2024\n", + "1 seagrass 74787.449960 314001.940600 GLOB 2024\n", + "2 warmwater-corals 63259.499130 149886.974126 GLOB 2024\n", + "4 coldwater-corals 4400.140842 15336.975280 GLOB 2024" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Calculate global stats for habitats\n", + "habitats_global = glob[['habitat','protected_area', 'total_area']].rename(columns={'habitat': 'habitat_name'})\n", + "habitats_global['location_id'] = 'GLOB'\n", + "habitats_global['year'] = datetime.now().year\n", + "habitats_global = habitats_global[habitats_global['habitat_name'] != 'mangroves'] # remove mangroves\n", + "habitats_global" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
habitat_nameprotected_areatotal_arealocation_idyear
0saltmarshes111638.252564224435.075094GLOB2024
1seagrasses74787.449960314001.940600GLOB2024
2warm-water corals63259.499130149886.974126GLOB2024
4cold-water corals4400.14084215336.975280GLOB2024
\n", + "
" + ], + "text/plain": [ + " habitat_name protected_area total_area location_id year\n", + "0 saltmarshes 111638.252564 224435.075094 GLOB 2024\n", + "1 seagrasses 74787.449960 314001.940600 GLOB 2024\n", + "2 warm-water corals 63259.499130 149886.974126 GLOB 2024\n", + "4 cold-water corals 4400.140842 15336.975280 GLOB 2024" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Change the name of the habitats to match the ones in the habitats dataframe\n", + "habitat_name_mapping = {\n", + " 'saltmarsh': 'saltmarshes',\n", + " 'seagrass': 'seagrasses',\n", + " 'warmwater-corals': 'warm-water corals',\n", + " 'coldwater-corals': 'cold-water corals'\n", + "}\n", + "habitats_global['habitat_name'] = habitats_global['habitat_name'].replace(habitat_name_mapping)\n", + "habitats_global" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['cold-water corals', 'saltmarshes', 'seagrasses',\n", + " 'warm-water corals'], dtype=object)" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Concatenate the global stats to the habitats dataframe\n", + "habitats = pd.concat([habitats, habitats_global])\n", + "habitats['habitat_name'].unique()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Regions stats**" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "# List of dictionaries for data in Region_ISO3_PP.txt (list of regions used in the Protected Planet database)\n", + "regions_data = [\n", + " {\n", + " 'region_iso': 'AS',\n", + " 'region_name': 'Asia & Pacific',\n", + " 'country_iso_3s': [\n", + " \"AFG\", \"ASM\", \"AUS\", \"BGD\", \"BRN\", \"BTN\", \"CCK\", \"CHN\", \"COK\", \"CXR\", \"FJI\", \"FSM\", \"GUM\", \"HKG\", \"IDN\",\n", + " \"IND\", \"IOT\", \"IRN\", \"JPN\", \"KHM\", \"KIR\", \"KOR\", \"LAO\", \"LKA\", \"MAC\", \"MDV\", \"MHL\", \"MMR\", \"MNG\", \"MNP\",\n", + " \"MYS\", \"NCL\", \"NFK\", \"NIU\", \"NPL\", \"NRU\", \"NZL\", \"PAK\", \"PCN\", \"PHL\", \"PLW\", \"PNG\", \"PRK\", \"PYF\", \"SGP\",\n", + " \"SLB\", \"THA\", \"TKL\", \"TLS\", \"TON\", \"TUV\", \"TWN\", \"VNM\", \"VUT\", \"WLF\", \"WSM\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'AF',\n", + " 'region_name': 'Africa',\n", + " 'country_iso_3s': [\n", + " \"AGO\", \"BDI\", \"BEN\", \"BFA\", \"BWA\", \"CAF\", \"CIV\", \"CMR\", \"COD\", \"COG\", \"COM\", \"CPV\", \"DJI\", \"DZA\", \"EGY\",\n", + " \"ERI\", \"ESH\", \"ETH\", \"GAB\", \"GHA\", \"GIN\", \"GMB\", \"GNB\", \"GNQ\", \"KEN\", \"LBR\", \"LBY\", \"LSO\", \"MAR\", \"MDG\",\n", + " \"MLI\", \"MOZ\", \"MRT\", \"MUS\", \"MWI\", \"MYT\", \"NAM\", \"NER\", \"NGA\", \"REU\", \"RWA\", \"SDN\", \"SEN\", \"SHN\", \"SLE\",\n", + " \"SOM\", \"SSD\", \"STP\", \"SWZ\", \"SYC\", \"TCD\", \"TGO\", \"TUN\", \"TZA\", \"UGA\", \"ZAF\", \"ZMB\", \"ZWE\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'EU',\n", + " 'region_name': 'Europe',\n", + " 'country_iso_3s': [\n", + " \"ALA\", \"ALB\", \"AND\", \"ARM\", \"AUT\", \"AZE\", \"BEL\", \"BGR\", \"BIH\", \"BLR\", \"CHE\", \"CYP\", \"CZE\", \"DEU\", \"DNK\",\n", + " \"ESP\", \"EST\", \"FIN\", \"FRA\", \"FRO\", \"GBR\", \"GEO\", \"GGY\", \"GIB\", \"GRC\", \"HRV\", \"HUN\", \"IMN\", \"IRL\", \"ISL\",\n", + " \"ISR\", \"ITA\", \"JEY\", \"KAZ\", \"KGZ\", \"LIE\", \"LTU\", \"LUX\", \"LVA\", \"MCO\", \"MDA\", \"MKD\", \"MLT\", \"MNE\", \"NLD\",\n", + " \"NOR\", \"POL\", \"PRT\", \"ROU\", \"RUS\", \"SJM\", \"SMR\", \"SRB\", \"SVK\", \"SVN\", \"SWE\", \"TJK\", \"TKM\", \"TUR\", \"UKR\",\n", + " \"UZB\", \"VAT\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'SA',\n", + " 'region_name': 'Latin America & Caribbean',\n", + " 'country_iso_3s': [\n", + " \"ABW\", \"AIA\", \"ARG\", \"ATG\", \"BES\", \"BHS\", \"BLM\", \"BLZ\", \"BMU\", \"BOL\", \"BRA\", \"BRB\", \"CHL\", \"COL\", \"CRI\",\n", + " \"CUB\", \"CUW\", \"CYM\", \"DMA\", \"DOM\", \"ECU\", \"FLK\", \"GLP\", \"GRD\", \"GTM\", \"GUF\", \"GUY\", \"HND\", \"HTI\", \"JAM\",\n", + " \"KNA\", \"LCA\", \"MAF\", \"MEX\", \"MSR\", \"MTQ\", \"NIC\", \"PAN\", \"PER\", \"PRI\", \"PRY\", \"SLV\", \"SUR\", \"SXM\", \"TCA\",\n", + " \"TTO\", \"UMI\", \"URY\", \"VCT\", \"VEN\", \"VGB\", \"VIR\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'PO',\n", + " 'region_name': 'Polar',\n", + " 'country_iso_3s': [\n", + " \"ATF\", \"BVT\", \"GRL\", \"HMD\", \"SGS\"\n", + " ]\n", + " },\n", + " {\n", + " 'region_iso': 'NA',\n", + " 'region_name': 'North America',\n", + " 'country_iso_3s': [\n", + " \"CAN\", \"SPM\", \"USA\"\n", + " ]\n", + " },\n", + " \n", + " {\n", + " 'region_iso': 'WA',\n", + " 'region_name': 'West Asia',\n", + " 'country_iso_3s': [\n", + " \"ARE\", \"BHR\", \"IRQ\", \"JOR\", \"KWT\", \"LBN\", \"OMN\", \"PSE\", \"QAT\", \"SAU\", \"SYR\", \"YEM\"\n", + " ]\n", + " }\n", + "]\n", + "\n", + "# Convert the region data to a dictionary that maps each country to its region name\n", + "country_to_region = {}\n", + "for region in regions_data:\n", + " for country in region['country_iso_3s']:\n", + " country_to_region[country] = region['region_iso']" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
location_idhabitat_nameprotected_areatotal_areayear
0AFcold-water corals29.477984377.6059592024
1AFsaltmarshes6688.70287919847.7574982024
2AFseagrasses6319.09949163472.0687922024
3AFwarm-water corals6591.34008315615.1936292024
4AScold-water corals428.3579481714.5593842024
5ASsaltmarshes11965.69391044702.8051872024
6ASseagrasses29091.313202123320.7277982024
7ASwarm-water corals41328.384526100117.4157922024
8EUcold-water corals2665.9295177307.5011172024
9EUsaltmarshes11399.88231818450.5500922024
10EUseagrasses9767.76058116552.5895552024
11EUwarm-water corals4357.9310189873.2102372024
12NAcold-water corals438.7397162393.7043412024
13NAsaltmarshes57209.60317687048.1644942024
14NAseagrasses8800.52079415860.8997572024
15NAwarm-water corals3824.8169394717.3880572024
16SAcold-water corals225.3435131406.8634662024
17SAsaltmarshes22969.81590635983.3927442024
18SAseagrasses16517.09766745847.4594122024
19SAwarm-water corals5425.03653412697.4709202024
20WAcold-water corals0.00000012.9707052024
21WAsaltmarshes1402.95876218398.0335362024
22WAseagrasses1053.44867325348.6062582024
23WAwarm-water corals547.9289574903.2303952024
\n", + "
" + ], + "text/plain": [ + " location_id habitat_name protected_area total_area year\n", + "0 AF cold-water corals 29.477984 377.605959 2024\n", + "1 AF saltmarshes 6688.702879 19847.757498 2024\n", + "2 AF seagrasses 6319.099491 63472.068792 2024\n", + "3 AF warm-water corals 6591.340083 15615.193629 2024\n", + "4 AS cold-water corals 428.357948 1714.559384 2024\n", + "5 AS saltmarshes 11965.693910 44702.805187 2024\n", + "6 AS seagrasses 29091.313202 123320.727798 2024\n", + "7 AS warm-water corals 41328.384526 100117.415792 2024\n", + "8 EU cold-water corals 2665.929517 7307.501117 2024\n", + "9 EU saltmarshes 11399.882318 18450.550092 2024\n", + "10 EU seagrasses 9767.760581 16552.589555 2024\n", + "11 EU warm-water corals 4357.931018 9873.210237 2024\n", + "12 NA cold-water corals 438.739716 2393.704341 2024\n", + "13 NA saltmarshes 57209.603176 87048.164494 2024\n", + "14 NA seagrasses 8800.520794 15860.899757 2024\n", + "15 NA warm-water corals 3824.816939 4717.388057 2024\n", + "16 SA cold-water corals 225.343513 1406.863466 2024\n", + "17 SA saltmarshes 22969.815906 35983.392744 2024\n", + "18 SA seagrasses 16517.097667 45847.459412 2024\n", + "19 SA warm-water corals 5425.036534 12697.470920 2024\n", + "20 WA cold-water corals 0.000000 12.970705 2024\n", + "21 WA saltmarshes 1402.958762 18398.033536 2024\n", + "22 WA seagrasses 1053.448673 25348.606258 2024\n", + "23 WA warm-water corals 547.928957 4903.230395 2024" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Add regions field\n", + "habitats_regions = habitats.copy()\n", + "habitats_regions['region'] = habitats['location_id'].map(country_to_region)\n", + "\n", + "# Calculate stats for each region\n", + "habitats_regions = habitats_regions.groupby(['region', 'habitat_name']).agg({'protected_area': 'sum', 'total_area': 'sum'}).reset_index()\n", + "habitats_regions['year'] = datetime.now().year\n", + "habitats_regions.rename(columns={'region': 'location_id'}, inplace=True)\n", + "habitats_regions" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "# Concatenate region statistics to the habitats dataframe\n", + "habitats = pd.concat([habitats, habitats_regions])" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
location_idprotected_areatotal_areahabitat_nameyearenvironment
0ABNJ427.0485241893.871282cold-water corals2024marine
1AGO0.0000003.395671cold-water corals2024marine
\n", + "
" + ], + "text/plain": [ + " location_id protected_area total_area habitat_name year \\\n", + "0 ABNJ 427.048524 1893.871282 cold-water corals 2024 \n", + "1 AGO 0.000000 3.395671 cold-water corals 2024 \n", + "\n", + " environment \n", + "0 marine \n", + "1 marine " + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Add environment\n", + "habitats['environment'] = 'marine'\n", + "habitats.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "habitats.to_csv(path_out + \"habitats/ocean+_processed.csv\", index=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Process seamounts from [UN WCMC](https://data.unep-wcmc.org/datasets/41)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "# Read required data\n", + "seamounts = gpd.read_file(path_in + \"Seamounts/seamounts.shp\")\n", + "eez = gpd.read_file(path_out + \"/administrative/eez_area_mollweide.shp\")\n", + "hs = gpd.read_file(path_in + \"/high_seas/high_seas.shp\")\n", + "protected_areas = gpd.read_file(path_out + \"wdpa/timeseries/protected_dissolved_2023.shp\").to_crs(\"EPSG:4326\")" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ + "# Keep relevant fields in eez and hs and merge them in one dataframe\n", + "eez = eez[['SOVEREIGN1', 'SOVEREIGN2', 'SOVEREIGN3','ISO_SOV1', 'ISO_SOV2', 'ISO_SOV3', 'geometry']]\n", + "hs = hs[['geometry']]\n", + "hs['SOVEREIGN1'] = 'High Seas'\n", + "hs['ISO_SOV1'] = 'ABNJ'\n", + "eez_hs = eez.merge(hs, how='outer')" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [], + "source": [ + "# Join eez/highseas info to seamounts falling within eez polygons and drop those not associated with any of them\n", + "seamounts_eez = gpd.sjoin(seamounts, eez_hs, how=\"left\", predicate=\"within\")\n", + "seamounts_eez = seamounts_eez.dropna(subset=['ISO_SOV1'])" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [], + "source": [ + "# Create new column \"iso\" with the iso_sov codes from eez/hs data\n", + "def concatenate_iso(row):\n", + " iso_list = [row['ISO_SOV1']]\n", + " if not pd.isna(row['ISO_SOV2']):\n", + " iso_list.append(row['ISO_SOV2'])\n", + " if not pd.isna(row['ISO_SOV3']):\n", + " iso_list.append(row['ISO_SOV3'])\n", + " return ';'.join(iso_list)\n", + "\n", + "seamounts_eez['iso'] = seamounts_eez.apply(concatenate_iso, axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [], + "source": [ + "# Check which seamounts are protected\n", + "seamounts_wdpa = gpd.sjoin(seamounts, protected_areas, how=\"left\", predicate=\"within\")\n", + "seamounts_wdpa['protection'] = \"no\" \n", + "seamounts_wdpa.loc[~seamounts_wdpa['index_right'].isna(), 'protection'] = \"yes\"\n", + "\n", + "# Keep relevant fields\n", + "seamounts_wdpa = seamounts_wdpa[['PEAKID', 'protection']]" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PEAKIDisoAREA2Dprotectiongeometry
026000DNK982.028337noPOINT (2.76250 84.97974)
126157ABNJ348.473055noPOINT (9.14306 84.93529)
226158ABNJ367.540380noPOINT (9.18333 84.93807)
326228ABNJ299.443636noPOINT (8.74861 84.90751)
426229ABNJ309.588492noPOINT (8.88750 84.91307)
\n", + "
" + ], + "text/plain": [ + " PEAKID iso AREA2D protection geometry\n", + "0 26000 DNK 982.028337 no POINT (2.76250 84.97974)\n", + "1 26157 ABNJ 348.473055 no POINT (9.14306 84.93529)\n", + "2 26158 ABNJ 367.540380 no POINT (9.18333 84.93807)\n", + "3 26228 ABNJ 299.443636 no POINT (8.74861 84.90751)\n", + "4 26229 ABNJ 309.588492 no POINT (8.88750 84.91307)" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Merge information about protection with seamounts_eez and keep only relevant fields\n", + "seamounts_eez_protection = seamounts_eez.merge(seamounts_wdpa, how=\"left\", on='PEAKID')[['PEAKID', 'iso', 'AREA2D', 'protection', 'geometry']]\n", + "seamounts_eez_protection.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PEAKIDisoAREA2Dprotectiongeometry
\n", + "
" + ], + "text/plain": [ + "Empty GeoDataFrame\n", + "Columns: [PEAKID, iso, AREA2D, protection, geometry]\n", + "Index: []" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# All seamounts that have iso \"ATA\" should have iso \"ABNJ\"\n", + "seamounts_eez_protection.loc[seamounts_eez_protection['iso']=='ATA', 'iso'] = 'ABNJ'\n", + "seamounts_eez_protection[seamounts_eez_protection['iso']=='ATA']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Global stats**" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
habitat_nametotal_areaprotected_arealocation_idyear
0seamounts2.690810e+073.426630e+06GLOB2011
\n", + "
" + ], + "text/plain": [ + " habitat_name total_area protected_area location_id year\n", + "0 seamounts 2.690810e+07 3.426630e+06 GLOB 2011" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Calculate the total_area (sum of AREA2D for all seamounts)\n", + "total_area = seamounts_eez_protection['AREA2D'].sum()\n", + "\n", + "# Calculate the protected_area (sum of AREA2D for seamounts where 'protection' is 'yes')\n", + "protected_area = seamounts_eez_protection.loc[seamounts_eez_protection['protection'] == 'yes', 'AREA2D'].sum()\n", + "\n", + "# Create a DataFrame with the results\n", + "seamounts_global = pd.DataFrame({\n", + " 'habitat_name': ['seamounts'],\n", + " 'total_area': [total_area],\n", + " 'protected_area': [protected_area],\n", + " 'location_id': ['GLOB'],\n", + " 'year': [2011]\n", + "})\n", + "\n", + "seamounts_global\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Country stats**" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [], + "source": [ + "# Split the 'iso_code' values and create separate rows only for those with multiple values\n", + "mask = seamounts_eez_protection['iso'].str.contains(';', na=False)\n", + "split_rows = seamounts_eez_protection[mask].copy()\n", + "split_rows['iso'] = split_rows['iso'].str.split(';')\n", + "split_rows = split_rows.explode('iso')\n", + "\n", + "# Keep rows with single values in 'iso_code'\n", + "single_value_rows = seamounts_eez_protection[~mask]\n", + "\n", + "# Concatenate the exploded rows with the single value rows\n", + "seamounts_eez_iso = pd.concat([single_value_rows, split_rows], ignore_index=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [], + "source": [ + "# Calculate the total_area (sum of AREA2D for all seamounts)\n", + "total_area_iso = seamounts_eez_iso.groupby(['iso']).agg({'AREA2D': 'sum'}).reset_index().rename(columns={'AREA2D': 'total_area'})\n", + "\n", + "# Calculate the protected_area_iso (sum of AREA2D for seamounts where 'protection' is 'yes' grouped by 'iso')\n", + "protected_area_iso = seamounts_eez_iso.loc[seamounts_eez_iso['protection'] == 'yes'].groupby('iso')['AREA2D'].sum().reset_index().rename(columns={'AREA2D': 'protected_area'})" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
location_idtotal_areaprotected_areahabitat_nameyear
0ABNJ1.518615e+07308819.904730seamounts2011
1AGO9.556242e+03NaNseamounts2011
2ARG3.110730e+05303902.727433seamounts2011
3ATG6.215895e+03NaNseamounts2011
4AUS4.772977e+05250507.827932seamounts2011
..................
87VNM4.421338e+04NaNseamounts2011
88VUT1.199475e+0543501.694036seamounts2011
89WSM4.117997e+04NaNseamounts2011
90YEM6.294974e+042487.428050seamounts2011
91ZAF9.946306e+0441753.168421seamounts2011
\n", + "

92 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " location_id total_area protected_area habitat_name year\n", + "0 ABNJ 1.518615e+07 308819.904730 seamounts 2011\n", + "1 AGO 9.556242e+03 NaN seamounts 2011\n", + "2 ARG 3.110730e+05 303902.727433 seamounts 2011\n", + "3 ATG 6.215895e+03 NaN seamounts 2011\n", + "4 AUS 4.772977e+05 250507.827932 seamounts 2011\n", + ".. ... ... ... ... ...\n", + "87 VNM 4.421338e+04 NaN seamounts 2011\n", + "88 VUT 1.199475e+05 43501.694036 seamounts 2011\n", + "89 WSM 4.117997e+04 NaN seamounts 2011\n", + "90 YEM 6.294974e+04 2487.428050 seamounts 2011\n", + "91 ZAF 9.946306e+04 41753.168421 seamounts 2011\n", + "\n", + "[92 rows x 5 columns]" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Merge total_area_iso and protected_area_iso DataFrames on 'iso'\n", + "seamounts_iso = total_area_iso.merge(protected_area_iso, how='left', on='iso').rename(columns={'iso': 'location_id'})\n", + "seamounts_iso['habitat_name'] = 'seamounts'\n", + "seamounts_iso['year'] = 2011\n", + "seamounts_iso" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Regions stats**" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
location_idhabitat_nameprotected_areatotal_areayear
0AFseamounts1.733576e+056.162351e+052011
1ASseamounts1.114013e+065.433433e+062011
2EUseamounts7.472441e+052.641119e+062011
3NAseamounts5.544910e+051.664794e+062011
4SAseamounts8.474488e+051.655552e+062011
5WAseamounts2.487428e+039.384765e+042011
\n", + "
" + ], + "text/plain": [ + " location_id habitat_name protected_area total_area year\n", + "0 AF seamounts 1.733576e+05 6.162351e+05 2011\n", + "1 AS seamounts 1.114013e+06 5.433433e+06 2011\n", + "2 EU seamounts 7.472441e+05 2.641119e+06 2011\n", + "3 NA seamounts 5.544910e+05 1.664794e+06 2011\n", + "4 SA seamounts 8.474488e+05 1.655552e+06 2011\n", + "5 WA seamounts 2.487428e+03 9.384765e+04 2011" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "seamounts_regions = seamounts_iso.copy()\n", + "seamounts_regions['region'] = seamounts_regions['location_id'].map(country_to_region)\n", + "\n", + "# Calculate stats for each region\n", + "seamounts_regions = seamounts_regions.groupby(['region', 'habitat_name']).agg({'protected_area': 'sum', 'total_area': 'sum'}).reset_index()\n", + "seamounts_regions['year'] = 2011\n", + "seamounts_regions.rename(columns={'region': 'location_id'}, inplace=True)\n", + "seamounts_regions" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [], + "source": [ + "# Concatenate region and global stats to seamounts_iso2\n", + "seamounts_all = pd.concat([seamounts_iso, seamounts_regions, seamounts_global])" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [], + "source": [ + "# Add environment\n", + "seamounts_all['environment'] = 'marine'" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [], + "source": [ + "seamounts_all.to_csv(path_out + \"habitats/seamounts_processed.csv\", index=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Process mangroves from GMW" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [], + "source": [ + "mangroves = pd.read_csv(path_in + \"mangroves/mangroves.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [], + "source": [ + "# Change location_id to match parent_iso from eez\n", + "mangroves['location_id'] = mangroves['location_id'].map(territory_iso)" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [], + "source": [ + "mangroves_iso = mangroves.groupby('location_id').agg({\n", + " 'habitat_name': 'first', \n", + " 'year': 'first', \n", + " 'protected_area': 'sum', \n", + " 'total_area': 'sum' \n", + "}).reset_index()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
habitat_nameprotected_areatotal_arealocation_idyear
0mangroves61287.20375147358.990971GLOB2020
\n", + "
" + ], + "text/plain": [ + " habitat_name protected_area total_area location_id year\n", + "0 mangroves 61287.20375 147358.990971 GLOB 2020" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Calculate global stats for mangroves\n", + "mangroves_global = mangroves_iso.groupby(['habitat_name']).agg({'protected_area': 'sum', 'total_area': 'sum'}).reset_index()\n", + "mangroves_global['location_id'] = 'GLOB'\n", + "mangroves_global['year'] = 2020\n", + "mangroves_global" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [], + "source": [ + "# Concatenate the global stats to the mangroves dataframe\n", + "mangroves_all = pd.concat([mangroves_iso, mangroves_global])" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
location_idhabitat_nameprotected_areatotal_areayear
0AFmangroves10000.5300029337.6440452020
1ASmangroves21277.2200074292.6731462020
2EUmangroves732.143751246.1896772020
3NAmangroves2097.740002415.4185572020
4SAmangroves27151.7400039893.4446082020
5WAmangroves27.83000173.6209382020
\n", + "
" + ], + "text/plain": [ + " location_id habitat_name protected_area total_area year\n", + "0 AF mangroves 10000.53000 29337.644045 2020\n", + "1 AS mangroves 21277.22000 74292.673146 2020\n", + "2 EU mangroves 732.14375 1246.189677 2020\n", + "3 NA mangroves 2097.74000 2415.418557 2020\n", + "4 SA mangroves 27151.74000 39893.444608 2020\n", + "5 WA mangroves 27.83000 173.620938 2020" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mangroves_regions = mangroves_iso.copy()\n", + "mangroves_regions['region'] = mangroves_iso['location_id'].map(country_to_region)\n", + "\n", + "# Calculate stats for each region\n", + "mangroves_regions = mangroves_regions.groupby(['region', 'habitat_name']).agg({'protected_area': 'sum', 'total_area': 'sum'}).reset_index()\n", + "mangroves_regions['year'] = 2020\n", + "mangroves_regions.rename(columns={'region': 'location_id'}, inplace=True)\n", + "mangroves_regions" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [], + "source": [ + "# Concatenate stats for regions with mangroves\n", + "mangroves_all = pd.concat([mangroves_all, mangroves_regions])" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [], + "source": [ + "# Add environment\n", + "mangroves_all['environment'] = 'marine'" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [], + "source": [ + "# Save file\n", + "mangroves_all.to_csv(path_out + \"habitats/mangroves_processed.csv\", index=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Concatenate all habitats" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
location_idprotected_areatotal_areahabitat_nameyearenvironment
0ABNJ427.0485241893.871282cold-water corals2024marine
1AGO0.0000003.395671cold-water corals2024marine
2ALB0.0000005.986479cold-water corals2024marine
3ARG6.98422661.826344cold-water corals2024marine
4ATG0.0000000.997747cold-water corals2024marine
.....................
1AS21277.22000074292.673146mangroves2020marine
2EU732.1437501246.189677mangroves2020marine
3NA2097.7400002415.418557mangroves2020marine
4SA27151.74000039893.444608mangroves2020marine
5WA27.830000173.620938mangroves2020marine
\n", + "

614 rows × 6 columns

\n", + "
" + ], + "text/plain": [ + " location_id protected_area total_area habitat_name year \\\n", + "0 ABNJ 427.048524 1893.871282 cold-water corals 2024 \n", + "1 AGO 0.000000 3.395671 cold-water corals 2024 \n", + "2 ALB 0.000000 5.986479 cold-water corals 2024 \n", + "3 ARG 6.984226 61.826344 cold-water corals 2024 \n", + "4 ATG 0.000000 0.997747 cold-water corals 2024 \n", + ".. ... ... ... ... ... \n", + "1 AS 21277.220000 74292.673146 mangroves 2020 \n", + "2 EU 732.143750 1246.189677 mangroves 2020 \n", + "3 NA 2097.740000 2415.418557 mangroves 2020 \n", + "4 SA 27151.740000 39893.444608 mangroves 2020 \n", + "5 WA 27.830000 173.620938 mangroves 2020 \n", + "\n", + " environment \n", + "0 marine \n", + "1 marine \n", + "2 marine \n", + "3 marine \n", + "4 marine \n", + ".. ... \n", + "1 marine \n", + "2 marine \n", + "3 marine \n", + "4 marine \n", + "5 marine \n", + "\n", + "[614 rows x 6 columns]" + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Concatenate the dataframes\n", + "habitats_all = pd.concat([habitats, seamounts_all, mangroves_all])\n", + "habitats_all" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
location_idprotected_areatotal_areahabitat_nameyearenvironment
0ABNJ427.0485241.893871e+03cold-water corals2024marine
0ABNJ0.0000006.335727e+03seagrasses2024marine
0ABNJ308819.9047301.518615e+07seamounts2011marine
\n", + "
" + ], + "text/plain": [ + " location_id protected_area total_area habitat_name year \\\n", + "0 ABNJ 427.048524 1.893871e+03 cold-water corals 2024 \n", + "0 ABNJ 0.000000 6.335727e+03 seagrasses 2024 \n", + "0 ABNJ 308819.904730 1.518615e+07 seamounts 2011 \n", + "\n", + " environment \n", + "0 marine \n", + "0 marine \n", + "0 marine " + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "habitats_all[habitats_all['location_id'] == 'ABNJ']" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [], + "source": [ + "habitats_all.to_csv(path_out + \"tables/habitats6.csv\", index=False, na_rep='NaN', encoding='utf-8', sep=',', decimal='.')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/data/notebooks/pipes_mock/intermediate.ipynb b/data/notebooks/pipes_mock/intermediate.ipynb index 1d699873..68c43d89 100644 --- a/data/notebooks/pipes_mock/intermediate.ipynb +++ b/data/notebooks/pipes_mock/intermediate.ipynb @@ -1,12 +1,19 @@ { "cells": [ { - "cell_type": "code", - "execution_count": 1, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Data download and preprocessing\n", + "\n", + "This notebook handles data downloading and preprocessing, preparing it for use in tiles.ipynb, locations.ipynb, and/or precalculations.ipynb." + ] + }, + { + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "# TODO: should we save every output as a [geoparquet](https://geoparquet.org/) in the future to improve read performance (reduction 30% read time)?" + "### Set up" ] }, { diff --git a/data/notebooks/pipes_mock/locations.ipynb b/data/notebooks/pipes_mock/locations.ipynb index 8e79e5fe..c23cc592 100644 --- a/data/notebooks/pipes_mock/locations.ipynb +++ b/data/notebooks/pipes_mock/locations.ipynb @@ -1,5 +1,21 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Locations table for marine and terrestrial territories\n", + "\n", + "This notebook gathers and organizes all necessary data to generate a the locations table, encompassing both marine and terrestrial territories. To be run after intermediate.ipynb and before precalculations.ipynb." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Set up" + ] + }, { "cell_type": "code", "execution_count": 42, @@ -84,6 +100,13 @@ "current_step = \"stats\"" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create locations from EEZ and GADM" + ] + }, { "cell_type": "code", "execution_count": 3, diff --git a/data/notebooks/pipes_mock/precalculations.ipynb b/data/notebooks/pipes_mock/precalculations.ipynb new file mode 100644 index 00000000..de0ec359 --- /dev/null +++ b/data/notebooks/pipes_mock/precalculations.ipynb @@ -0,0 +1,5184 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Pre-calculations for 30x30 widgets and tables data \n", + "This notebook performs the calculations needed for the data displayed in the widgets and tables of the 30x30 platform. It should be run after executing the intermediate.ipynb and locations.ipynb notebooks." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Set up" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "from pathlib import Path\n", + "import time\n", + "import pandas as pd\n", + "import geopandas as gpd\n", + "import numpy as np\n", + "import json\n", + "import dotenv\n", + "import sys\n", + "from pathlib import Path\n", + "import pandas as pd\n", + "import geopandas as gpd\n", + "import numpy as np\n", + "import asyncio\n", + "from tqdm.asyncio import tqdm\n", + "from itertools import product\n", + "from shapely.geometry import box\n", + "\n", + "dotenv.load_dotenv()\n", + "\n", + "scripts_dir = Path(\".\").joinpath(\"src\")\n", + "if scripts_dir not in sys.path:\n", + " sys.path.insert(0, scripts_dir.resolve().as_posix())\n", + "\n", + "from helpers.strapi import Strapi\n", + "from helpers.settings import get_settings, Settings\n", + "from helpers.file_handler import FileConventionHandler\n", + "from helpers.utils import download_and_unzip_if_needed, writeReadGCP, make_archive\n", + "\n", + "from pipelines.output_schemas import (\n", + " FPLSchema,\n", + " ProtectionLevelSchema,\n", + " PAsSchema,\n", + " HabitatsSchema,\n", + " LocationSchema,\n", + " ProtectedAreaExtentSchema,\n", + " PAsSchemaChunk1,\n", + " PAsSchemaChunk2,\n", + ")\n", + "from pipelines.processors import (\n", + " add_envelope,\n", + " add_location_iso,\n", + " expand_multiple_locations,\n", + " add_region_iso,\n", + " calculate_eez_area,\n", + " add_bbox,\n", + " add_groups_and_members,\n", + " add_location_name,\n", + " output,\n", + " clean_geometries,\n", + " filter_by_exluding_propossed_mpas,\n", + " spatial_join,\n", + " process_mpa_data,\n", + " assign_iso3,\n", + " calculate_global_area,\n", + " separate_parent_iso,\n", + " calculate_stats_cov,\n", + " coverage_stats,\n", + " mpaatlas_filter_stablishment,\n", + " process_mpaatlas_data,\n", + " calculate_stats,\n", + " fix_monaco,\n", + " batch_export,\n", + " calculate_area,\n", + " define_is_child,\n", + " set_child_id,\n", + " add_child_parent_relationship,\n", + " columns_to_lower,\n", + " extract_wdpaid_mpaatlas,\n", + " simplify_async,\n", + " get_matches,\n", + " repair_geometry, \n", + " arrange_dimensions,\n", + " add_total_area, \n", + " change_ata_to_abnj,\n", + " calculate_padef_percentages,\n", + " calculate_coverage_percentage,\n", + " calculate_coverage_percentage_mpatlas,\n", + " calculate_global_contribution,\n", + " add_is_last_year,\n", + " add_environment,\n", + " cumulative_pa_def_counts, \n", + " process_final_coverage,\n", + " process_grid\n", + " \n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "mysettings = get_settings()\n", + "prev_step = \"preprocess\"\n", + "current_step = \"stats\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# # Strapi setup\n", + "# strapi = Strapi(url=mysettings.STRAPI_URL)\n", + "# strapi.login(jwt=mysettings.STRAPI_JWT)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Coverage stats - Marine Protected Areas" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We are going to use the intermediate data from eez, in order to create a dataset that can be used as a land mask.\n", + "The steps are:\n", + "1. Load eez\n", + "2. Spatial inner Join the eez dataset with the Mpas one\n", + "3. Assign the location iso\n", + "4. dissolve by location iso and cummulative year\n", + "5. calculate the area for global regions and eez countries\n", + "6. prepare the data to be ingested in strapi\n", + "7. upload the data to strapi" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/home/sofia/dev/skytruth-30x30/data/data/eez/processed/eez_preprocess.zip\n", + "/home/sofia/dev/skytruth-30x30/data/data/eez/processed/preprocess\n", + "/home/sofia/dev/skytruth-30x30/data/data/mpa/processed/mpa_preprocess.zip\n", + "/home/sofia/dev/skytruth-30x30/data/data/mpa/processed/preprocess\n" + ] + }, + { + "data": { + "text/plain": [ + "PosixPath('/home/sofia/dev/skytruth-30x30/data/data/mpa/processed/preprocess')" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pipe = \"mpa\"\n", + "strapi_collection = \"\"\n", + "\n", + "pipe_dir_eez = FileConventionHandler(\"eez\")\n", + "pipe_dir_mpas = FileConventionHandler(pipe)\n", + "output_file = pipe_dir_mpas.get_processed_step_path(current_step).joinpath(\n", + " \"mpa_coverage.csv\"\n", + ")\n", + "\n", + "# Download the EEZ file && unzip it\n", + "download_and_unzip_if_needed(pipe_dir_eez, prev_step, mysettings)\n", + "# Download the mpas file && unzip it\n", + "download_and_unzip_if_needed(pipe_dir_mpas, prev_step, mysettings)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|█████████████████████████████████████████████████████████████████████████████████████████████| 282/282 [08:27<00:00, 1.80s/it]\n" + ] + } + ], + "source": [ + "# Load the data\n", + "eez = gpd.read_file(pipe_dir_eez.get_step_fmt_file_path(prev_step, \"shp\")).pipe(clean_geometries)\n", + "mpas = gpd.read_file(pipe_dir_mpas.get_step_fmt_file_path(prev_step, \"shp\")).pipe(clean_geometries)\n", + "\n", + "# Join the eez data with the wdpa data\n", + "eez_mpas_data_join = await spatial_join(eez, mpas.pipe(filter_by_exluding_propossed_mpas), environment=\"marine\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Prepare the mpa data\n", + "final_data = await process_mpa_data(\n", + " eez_mpas_data_join.pipe(add_location_iso).pipe(assign_iso3),\n", + " range(2011, time.localtime().tm_year + 1),\n", + " [\"PA_DEF\", \"iso_3\"],\n", + " {\"protectedAreasCount\": \"sum\"},\n", + ")\n", + "\n", + "# Save the results\n", + "final_data.to_csv(pipe_dir_mpas.get_processed_step_path(prev_step).joinpath(\"mpa_preprocessed.csv\"), index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PA_DEFiso_3protectedAreasCountyeararea
00COL220103295.358429
10ESP;MAR120100.641148
20MAR92010205.459059
30PHL24201031956.310702
41ABNJ292010996236.123210
..................
1451VNM3120103357.704625
1461VUT5201018.148840
1471WSM2201099.018821
1481YEM520101410.245095
1491ZAF8720104462.398392
\n", + "

150 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " PA_DEF iso_3 protectedAreasCount year area\n", + "0 0 COL 2 2010 3295.358429\n", + "1 0 ESP;MAR 1 2010 0.641148\n", + "2 0 MAR 9 2010 205.459059\n", + "3 0 PHL 24 2010 31956.310702\n", + "4 1 ABNJ 29 2010 996236.123210\n", + ".. ... ... ... ... ...\n", + "145 1 VNM 31 2010 3357.704625\n", + "146 1 VUT 5 2010 18.148840\n", + "147 1 WSM 2 2010 99.018821\n", + "148 1 YEM 5 2010 1410.245095\n", + "149 1 ZAF 87 2010 4462.398392\n", + "\n", + "[150 rows x 5 columns]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Load the results\n", + "final_data = pd.read_csv(pipe_dir_mpas.get_processed_step_path(prev_step).joinpath(\"mpa_preprocessed.csv\"))\n", + "final_data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
yeariso_3areaprotected_areas_countoecmspastotal_marine_areaprotected_areacoverageglobal_contributionis_last_yearenvironment
02010ABNJ996236.12321029.00.00000100.00000212881389996236.120.4679770.27596611
12010AF129790.939457427.02.3419297.6580814878058129790.940.8723650.03595311
\n", + "
" + ], + "text/plain": [ + " year iso_3 area protected_areas_count oecms pas \\\n", + "0 2010 ABNJ 996236.123210 29.0 0.00000 100.00000 \n", + "1 2010 AF 129790.939457 427.0 2.34192 97.65808 \n", + "\n", + " total_marine_area protected_area coverage global_contribution \\\n", + "0 212881389 996236.12 0.467977 0.275966 \n", + "1 14878058 129790.94 0.872365 0.035953 \n", + "\n", + " is_last_year environment \n", + "0 1 1 \n", + "1 1 1 " + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Create coverage stats table\n", + "final_data2 = final_data.copy()\n", + "\n", + "coverage = (\n", + " final_data2.pipe(calculate_global_area, [\"year\", \"PA_DEF\"], \"marine\", {\"area\": \"sum\"}, \"iso_3\")\n", + " .pipe(separate_parent_iso, \"iso_3\")\n", + " .pipe(add_region_iso, \"iso_3\")\n", + " .replace(\n", + " {\n", + " \"iso_3\": {\n", + " \"ATA\": \"ABNJ\",\n", + " \"COK\": \"NZL\",\n", + " \"IOT\": \"GBR\",\n", + " \"NIU\": \"NZL\",\n", + " \"SHN\": \"GBR\",\n", + " \"SJM\": \"NOR\",\n", + " \"UMI\": \"USA\",\n", + " \"NCL\": \"FRA\",\n", + " \"GIB\": \"GBR\",\n", + " }\n", + " }\n", + " )\n", + " .pipe(calculate_stats_cov, [\"year\", \"PA_DEF\"], \"iso_3\", environment='marine').astype({\"PA_DEF\": int})\n", + " .pipe(calculate_padef_percentages, 'marine')\n", + " .pipe(add_total_area, 'marine')\n", + " .pipe(coverage_stats)\n", + " .pipe(calculate_coverage_percentage)\n", + " .pipe(calculate_global_contribution)\n", + " .pipe(add_is_last_year)\n", + " .pipe(add_environment)\n", + ")\n", + "\n", + "# Create the output and save it\n", + "ProtectedAreaExtentSchema(\n", + " coverage.pipe(\n", + " output,\n", + " \"iso_3\",\n", + " {},\n", + " {},\n", + " [\"area\", \"iso_3\", 'total_marine_area'],\n", + " )\n", + ").to_csv(\n", + " output_file,\n", + " index=True,\n", + ")\n", + "\n", + "\n", + "coverage.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "DEBUG:google.auth.transport.requests:Making request: POST https://oauth2.googleapis.com/token\n" + ] + } + ], + "source": [ + "# Upload the results to GCS\n", + "remote_path = 'vizzuality_processed_data/strapi_tables/mpa_coverage.csv'\n", + "\n", + "writeReadGCP(\n", + " credentials=mysettings.GCS_KEYFILE_JSON,\n", + " bucket_name=mysettings.GCS_BUCKET,\n", + " blob_name=remote_path,\n", + " file=output_file,\n", + " operation=\"w\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# strapi_collection = \"protection-coverage-stat\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# strapi.deleteCollectionData(strapi_collection, list(range(1, 2300)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# strapi.importCollectionData(\n", + "# strapi_collection,\n", + "# output_file,\n", + "# )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Coverage stats - Terrestrial Protected Areas" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/mpa-terrestrial_preprocess.zip\n", + "/home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/preprocess\n", + "/home/sofia/dev/skytruth-30x30/data/data/gadm/processed/gadm_preprocess.zip\n", + "/home/sofia/dev/skytruth-30x30/data/data/gadm/processed/preprocess\n" + ] + }, + { + "data": { + "text/plain": [ + "PosixPath('/home/sofia/dev/skytruth-30x30/data/data/gadm/processed/preprocess')" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pipe = \"mpa-terrestrial\"\n", + "step = \"preprocess\"\n", + "strapi_collection_mpas = \"mpa-terrestrial\"\n", + "\n", + "pipe_dir = FileConventionHandler(pipe)\n", + "pipe_dir_gadm = FileConventionHandler(\"gadm\")\n", + "\n", + "working_folder = FileConventionHandler(pipe)\n", + "input_path = working_folder.pipe_raw_path\n", + "temp_working_path = working_folder.get_temp_file_path(step)\n", + "output_file_sjoin = pipe_dir.get_processed_step_path(current_step).joinpath(\"tpa_sjoin.shp\")\n", + "output_file_dissolve = pipe_dir.get_processed_step_path(current_step).joinpath(\"tpa_dissolve.csv\")\n", + "output_file_tpas = pipe_dir.get_processed_step_path(current_step).joinpath(\"tpa_coverage.csv\")\n", + "\n", + "# Download the protected atlas file && unzip it\n", + "download_and_unzip_if_needed(pipe_dir, prev_step, mysettings)\n", + "# Download the mpaatlas file \n", + "download_and_unzip_if_needed(pipe_dir_gadm, prev_step, mysettings)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Load the data\n", + "wdpa = gpd.read_file(pipe_dir.get_step_fmt_file_path(prev_step, \"gpkg\")).pipe(\n", + " clean_geometries\n", + ")\n", + "gadm = gpd.read_file(pipe_dir_gadm.get_step_fmt_file_path(prev_step, \"shp\")).pipe(clean_geometries)\n", + "\n", + "gadm.sindex\n", + "wdpa.sindex" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:notebook:Processing 286305 elements\n", + "INFO:notebook:Grid created with 433 cells\n", + "INFO:notebook:Grid split into 392 chunks\n", + "100%|███████████████████████████████████████████████████████████████████████████████████████████| 392/392 [12:49<00:00, 1.96s/it]\n" + ] + } + ], + "source": [ + "# Spatial join using overlay\n", + "wdpa_subset = wdpa[\n", + " ~(\n", + " (wdpa.bounds.minx < -181)\n", + " | (wdpa.bounds.miny < -91)\n", + " | (wdpa.bounds.maxx > 181)\n", + " | (wdpa.bounds.maxy > 91)\n", + " )\n", + "].reset_index(drop=True)\n", + "\n", + "sjoin_gdf = await spatial_join(wdpa_subset, gadm, environment=\"terrestrial\")\n", + "sjoin_gdf.rename(columns={\"GID_0\": \"iso_3\"}, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
WDPAIDWDPA_PIDPA_DEFNAMEDESIG_ENGIUCN_CATMARINEGIS_AREASTATUSSTATUS_YRPARENT_ISOCOUNTRYiso_3area_km2name_esname_frgeometry
\n", + "
" + ], + "text/plain": [ + "Empty GeoDataFrame\n", + "Columns: [WDPAID, WDPA_PID, PA_DEF, NAME, DESIG_ENG, IUCN_CAT, MARINE, GIS_AREA, STATUS, STATUS_YR, PARENT_ISO, COUNTRY, iso_3, area_km2, name_es, name_fr, geometry]\n", + "Index: []" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Test existence of duplicates\n", + "sjoin_gdf.loc[sjoin_gdf.duplicated(subset=[\"WDPA_PID\", \"iso_3\"], keep=False)].sort_values(\n", + " \"WDPA_PID\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:pyogrio._io:Created 289,352 records\n" + ] + } + ], + "source": [ + "# Exclude \"proposed\" protected areas\n", + "sjoin_gdf = filter_by_exluding_propossed_mpas(sjoin_gdf)\n", + "\n", + "# Save the results of the spatial join\n", + "sjoin_gdf.to_file(output_file_sjoin, driver=\"ESRI Shapefile\")" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "# Load the data\n", + "sjoin_gdf = gpd.read_file(output_file_sjoin)\n", + "sjoin_gdf[\"STATUS_YR\"] = sjoin_gdf[\"STATUS_YR\"].astype(\"Int64\")" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PA_DEFiso_3year10protected_areas_count
0AFG2010100.010.0
1AFG2011100.010.0
2AFG2012100.010.0
3AFG2013100.010.0
4AFG2014100.010.0
..................
2884ZWE20202290.0229.0
2885ZWE20212290.0229.0
2886ZWE20222290.0229.0
2887ZWE20232290.0229.0
2888ZWE20242290.0229.0
\n", + "

2889 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + "PA_DEF iso_3 year 1 0 protected_areas_count\n", + "0 AFG 2010 10 0.0 10.0\n", + "1 AFG 2011 10 0.0 10.0\n", + "2 AFG 2012 10 0.0 10.0\n", + "3 AFG 2013 10 0.0 10.0\n", + "4 AFG 2014 10 0.0 10.0\n", + "... ... ... ... ... ...\n", + "2884 ZWE 2020 229 0.0 229.0\n", + "2885 ZWE 2021 229 0.0 229.0\n", + "2886 ZWE 2022 229 0.0 229.0\n", + "2887 ZWE 2023 229 0.0 229.0\n", + "2888 ZWE 2024 229 0.0 229.0\n", + "\n", + "[2889 rows x 5 columns]" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Calculate wdpa cumulative counts and pa and oecm percentages\n", + "cumulative_counts = cumulative_pa_def_counts(sjoin_gdf)\n", + "cumulative_counts" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Dissolve geometries to calculate the coverage\n", + "data = await process_grid(sjoin_gdf, \"terrestrial\")\n", + "\n", + "# Concatenate the data in a single dataframe\n", + "tpa = pd.concat(data, ignore_index=True).drop(columns=['index']).rename(columns={'area': 'protected_area'})\n", + "\n", + "# Group by 'iso_3' and 'year' and sum the 'area'\n", + "tpa_grouped = tpa.groupby(['iso_3', 'year'], as_index=False)['protected_area'].sum()\n", + "tpa_grouped.reset_index(drop=True, inplace=True)\n", + "\n", + "# save to csv\n", + "tpa_grouped.to_csv(output_file_dissolve, index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
iso_3yearprotected_area
0AFG20101078.918622
1AFG20111078.918622
2AFG20121078.918622
3AFG20131078.918622
4AFG20141078.918622
\n", + "
" + ], + "text/plain": [ + " iso_3 year protected_area\n", + "0 AFG 2010 1078.918622\n", + "1 AFG 2011 1078.918622\n", + "2 AFG 2012 1078.918622\n", + "3 AFG 2013 1078.918622\n", + "4 AFG 2014 1078.918622" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Load dissolved data\n", + "tpa_grouped = pd.read_csv(output_file_dissolve)\n", + "tpa_grouped.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
yeariso_3protected_areaprotected_areas_countoecmspastotal_terrestrial_areacoverageglobal_contributionis_last_yearenvironment
02010AF3.636311e+067272.00.0100.02999309512.1238272.69446502
12010AS2.051386e+0624782.00.0100.0316255566.4864811.52005302
22010AT1.108333e+022.00.0100.0120882300.0009170.00008202
32010EU4.306080e+06116128.00.0100.03003757114.3356453.19075602
42010NA2.044176e+0652176.00.0100.01937115210.5526831.51471102
....................................
30042024YEM5.145397e+0315.00.0100.04537411.1339940.00381312
30052024ZAF1.143850e+051631.00.0100.012213289.3656270.08475812
30062024ZMB2.929805e+05557.00.0100.075399038.8573470.21709512
30072024ZNC2.779983e+008.00.0100.033140.0838860.00000212
30082024ZWE1.096232e+05229.00.0100.039123528.0197950.08123012
\n", + "

3009 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " year iso_3 protected_area protected_areas_count oecms pas \\\n", + "0 2010 AF 3.636311e+06 7272.0 0.0 100.0 \n", + "1 2010 AS 2.051386e+06 24782.0 0.0 100.0 \n", + "2 2010 AT 1.108333e+02 2.0 0.0 100.0 \n", + "3 2010 EU 4.306080e+06 116128.0 0.0 100.0 \n", + "4 2010 NA 2.044176e+06 52176.0 0.0 100.0 \n", + "... ... ... ... ... ... ... \n", + "3004 2024 YEM 5.145397e+03 15.0 0.0 100.0 \n", + "3005 2024 ZAF 1.143850e+05 1631.0 0.0 100.0 \n", + "3006 2024 ZMB 2.929805e+05 557.0 0.0 100.0 \n", + "3007 2024 ZNC 2.779983e+00 8.0 0.0 100.0 \n", + "3008 2024 ZWE 1.096232e+05 229.0 0.0 100.0 \n", + "\n", + " total_terrestrial_area coverage global_contribution is_last_year \\\n", + "0 29993095 12.123827 2.694465 0 \n", + "1 31625556 6.486481 1.520053 0 \n", + "2 12088230 0.000917 0.000082 0 \n", + "3 30037571 14.335645 3.190756 0 \n", + "4 19371152 10.552683 1.514711 0 \n", + "... ... ... ... ... \n", + "3004 453741 1.133994 0.003813 1 \n", + "3005 1221328 9.365627 0.084758 1 \n", + "3006 753990 38.857347 0.217095 1 \n", + "3007 3314 0.083886 0.000002 1 \n", + "3008 391235 28.019795 0.081230 1 \n", + "\n", + " environment \n", + "0 2 \n", + "1 2 \n", + "2 2 \n", + "3 2 \n", + "4 2 \n", + "... ... \n", + "3004 2 \n", + "3005 2 \n", + "3006 2 \n", + "3007 2 \n", + "3008 2 \n", + "\n", + "[3009 rows x 11 columns]" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Create coverage stats table\n", + "coverage = (\n", + " pd.merge(tpa_grouped, cumulative_counts, on=['iso_3', 'year'], how='left')\n", + " .pipe(calculate_global_area, [\"year\"], environment='terrestrial')\n", + " .pipe(add_region_iso, \"iso_3\")\n", + " .pipe(calculate_stats_cov, [\"year\"], \"iso_3\", environment= \"terrestrial\")\n", + " .pipe(calculate_padef_percentages, \"terrestrial\")\n", + " .pipe(add_total_area, \"terrestrial\")\n", + " .pipe(calculate_coverage_percentage)\n", + " .pipe(calculate_global_contribution)\n", + " .pipe(add_is_last_year)\n", + " .pipe(add_environment)\n", + ")\n", + "\n", + "ProtectedAreaExtentSchema(\n", + " coverage.pipe(\n", + " output,\n", + " \"iso_3\",\n", + " {},\n", + " {},\n", + " [\"iso_3\", 'total_terrestrial_area'],\n", + " )\n", + ").to_csv(\n", + " output_file_tpas,\n", + " index=True,\n", + ")\n", + "\n", + "coverage" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "DEBUG:google.auth.transport.requests:Making request: POST https://oauth2.googleapis.com/token\n" + ] + } + ], + "source": [ + "# Save the results in GCS\n", + "remote_path = 'vizzuality_processed_data/strapi_tables/tpa_coverage.csv'\n", + "\n", + "writeReadGCP(\n", + " credentials=mysettings.GCS_KEYFILE_JSON,\n", + " bucket_name=mysettings.GCS_BUCKET,\n", + " blob_name=remote_path,\n", + " file=output_file_tpas,\n", + " operation=\"w\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Coverage stats - Concatenate Marine & Terrestrial" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "pipe = \"pa\"\n", + "pipe_tpa = \"mpa-terrestrial\"\n", + "pipe_mpa = \"mpa\"\n", + "step = \"preprocess\"\n", + "\n", + "pipe_dir = FileConventionHandler(pipe)\n", + "pipe_dir_tpa = FileConventionHandler(pipe_tpa)\n", + "pipe_dir_mpa = FileConventionHandler(pipe_mpa)\n", + "\n", + "input_path_tpas = pipe_dir_tpa.get_processed_step_path(current_step).joinpath(\"tpa_coverage.csv\")\n", + "input_path_mpas = pipe_dir_mpa.get_processed_step_path(current_step).joinpath(\"mpa_coverage.csv\")\n", + "\n", + "output_file = pipe_dir.get_processed_step_path(current_step).joinpath(\"protection_coverage_stats.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idyearprotected_areaprotected_areas_countoecmspascoverageglobal_contributionis_last_yearenvironmentlocation
1120103.636311e+0672720.0100.012.1238272.694465023
\n", + "
" + ], + "text/plain": [ + " id year protected_area protected_areas_count oecms pas coverage \\\n", + "1 1 2010 3.636311e+06 7272 0.0 100.0 12.123827 \n", + "\n", + " global_contribution is_last_year environment location \n", + "1 2.694465 0 2 3 " + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Concatenate the marine and terrestrial data\n", + "final_data = process_final_coverage(input_path_tpas, input_path_mpas)\n", + "\n", + "# Filter the DataFrame to get the row where 'id' is 1\n", + "final_data[final_data['id'] == 1]" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "ProtectedAreaExtentSchema(final_data).to_csv(output_file, index=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 128, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "DEBUG:google.auth.transport.requests:Making request: POST https://oauth2.googleapis.com/token\n" + ] + } + ], + "source": [ + "# Save the results in GCS\n", + "remote_path = 'vizzuality_processed_data/strapi_tables/protection_coverage_stats.csv'\n", + "\n", + "writeReadGCP(\n", + " credentials=mysettings.GCS_KEYFILE_JSON,\n", + " bucket_name=mysettings.GCS_BUCKET,\n", + " blob_name=remote_path,\n", + " file=output_file,\n", + " operation=\"w\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### MPAtlas - Marine Conservation Protection Levels" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We are going to use the intermediate data from eez, in order to create a dataset that can be used as a land mask.\n", + "The steps are:\n", + "1. Load eez\n", + "2. Spatial inner Join the eez dataset with the Mpaatlas one\n", + "3. iso assign using the sovereign one provided by mpaatlas\n", + "4. dissolve by location\n", + "5. calculate the area for global regions and eez countries ussing mollwide projection\n", + "6. prepare the data to be ingested in strapi\n", + "7. upload the data to strapi" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/home/sofia/dev/skytruth-30x30/data/data/eez/processed/eez_preprocess.zip\n", + "/home/sofia/dev/skytruth-30x30/data/data/eez/processed/preprocess\n", + "/home/sofia/dev/skytruth-30x30/data/data/mpaatlas/processed/mpaatlas_preprocess.zip\n", + "/home/sofia/dev/skytruth-30x30/data/data/mpaatlas/processed/preprocess\n" + ] + } + ], + "source": [ + "pipe = \"mpaatlas\"\n", + "strapi_collection = \"mpaa-protection-level-stat\"\n", + "\n", + "pipe_dir_eez = FileConventionHandler(\"eez\")\n", + "pipe_dir_mpaatlas = FileConventionHandler(pipe)\n", + "output_file = pipe_dir_mpaatlas.get_processed_step_path(current_step).joinpath(\n", + " \"mpaatlas_protection_level.csv\"\n", + ")\n", + "\n", + "# Download the EEZ file && unzip it\n", + "download_and_unzip_if_needed(pipe_dir_eez, prev_step, mysettings)\n", + "# Download the mpas file && unzip it\n", + "download_and_unzip_if_needed(pipe_dir_mpaatlas, prev_step, mysettings)\n", + "\n", + "# Load the data\n", + "eez = gpd.read_file(pipe_dir_eez.get_step_fmt_file_path(prev_step, \"shp\")).pipe(clean_geometries)\n", + "mpaatlas_intermediate = gpd.read_file(\n", + " pipe_dir_mpaatlas.get_step_fmt_file_path(prev_step, \"shp\")\n", + ").pipe(clean_geometries)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 282/282 [00:28<00:00, 9.89it/s]\n" + ] + } + ], + "source": [ + "eez_mpaatlas_data_join = await spatial_join(\n", + " eez, mpaatlas_intermediate.pipe(mpaatlas_filter_stablishment)\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# To get an idea of the spatial join results\n", + "# eez_mpaatlas_data_join.to_file(\n", + "# pipe_dir_mpaatlas.get_processed_step_path(current_step).joinpath(\"mpaatlas_sjoin.shp\"),\n", + "# driver=\"ESRI Shapefile\",\n", + "# )" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:pyogrio._io:Created 55 records\n" + ] + } + ], + "source": [ + "eez_mpaatlas_data_join.dissolve(by=[\"protecti_1\", \"location_i\"], aggfunc={\"name\": \"count\"}).reset_index().to_file(\n", + "pipe_dir_mpaatlas.get_processed_step_path(current_step).joinpath(\"mpaatlas_sjoin_dissolved.shp\"),\n", + "driver=\"ESRI Shapefile\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "eez_mpaatlas_data_join2 = eez_mpaatlas_data_join.copy()\n", + "\n", + "result = (\n", + " eez_mpaatlas_data_join2.rename(columns={\"location_i\": \"iso_3\"})\n", + " .pipe(process_mpaatlas_data) \n", + " .pipe(calculate_global_area, gby_col=[\"protecti_1\"], iso_column=\"iso_3\", environment = \"marine\")\n", + " .pipe(separate_parent_iso, iso_column=\"iso_3\")\n", + " .replace(\n", + " {\n", + " \"iso_3\": {\n", + " \"COK\": \"NZL\",\n", + " \"IOT\": \"GBR\",\n", + " \"NIU\": \"NZL\",\n", + " \"SHN\": \"GBR\",\n", + " \"SJM\": \"NOR\",\n", + " \"UMI\": \"USA\",\n", + " \"NCL\": \"FRA\",\n", + " }\n", + " }\n", + " )\n", + " .pipe(add_region_iso, iso_column=\"iso_3\")\n", + " .pipe(calculate_stats, gby_col=[\"protecti_1\"], iso_column=\"iso_3\")\n", + " .query('protecti_1 != \"less protected or unknown\"')\n", + " .pipe(fix_monaco, iso_column=\"iso_3\", area_column=\"area_km2\")\n", + " .pipe(add_total_area, 'marine')\n", + " .pipe(calculate_coverage_percentage_mpatlas)\n", + " .pipe(\n", + " output,\n", + " iso_column=\"iso_3\",\n", + " rep_d={\n", + " \"protecti_1\": {\n", + " \"fully or highly protected\": 1,\n", + " }\n", + " },\n", + " rename={\"protecti_1\": \"mpaa_protection_level\", \"area_km2\": \"area\"},\n", + " drop_cols=[\"total_marine_area\", \"iso_3\"],\n", + " )\n", + ")\n", + "\n", + "ProtectionLevelSchema(result[~result.location.isna()].assign(year=2024)).to_csv(\n", + " output_file, index=True\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "DEBUG:google.auth.transport.requests:Making request: POST https://oauth2.googleapis.com/token\n" + ] + } + ], + "source": [ + "# Save the results in GCS\n", + "remote_path = 'vizzuality_processed_data/strapi_tables/mpaatlas_protection_level.csv'\n", + "\n", + "writeReadGCP(\n", + " credentials=mysettings.GCS_KEYFILE_JSON,\n", + " bucket_name=mysettings.GCS_BUCKET,\n", + " blob_name=remote_path,\n", + " file=output_file,\n", + " operation=\"w\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# strapi_collection = \"mpaa-protection-level-stat\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# strapi.deleteCollectionData(strapi_collection, list(range(1, 300)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# strapi.importCollectionData(\n", + "# strapi_collection,\n", + "# output_file,\n", + "# )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Protected Seas - Level of Fishing Protection" + ] + }, + { + "cell_type": "code", + "execution_count": 158, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "DEBUG:google.auth.transport.requests:Making request: POST https://oauth2.googleapis.com/token\n" + ] + } + ], + "source": [ + "pipe = \"protectedseas\"\n", + "strapi_collection = \"fishing-protection-level-stat\"\n", + "\n", + "pipe_dir = FileConventionHandler(pipe)\n", + "input_file = pipe_dir.get_processed_step_path(prev_step).joinpath(\"protectedseas_stats.xlsx\")\n", + "output_file = pipe_dir.get_processed_step_path(current_step).joinpath(\"lfp.csv\")\n", + "\n", + "# Download the protected seas file && unzip it\n", + "writeReadGCP(\n", + " credentials=mysettings.GCS_KEYFILE_JSON,\n", + " bucket_name=mysettings.GCS_BUCKET,\n", + " blob_name=\"vizzuality_processed_data/protectedseas/preprocess/protectedseas_stats.xlsx\",\n", + " file=input_file,\n", + " operation=\"r\",\n", + ")\n", + "\n", + "# Load the data\n", + "protectedseas_intermediate = pd.read_excel(input_file)" + ] + }, + { + "cell_type": "code", + "execution_count": 159, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
iso_teriso_sovincludes_multi_jurisdictional_areaslfparea_sqkmtotal_areapct_total
320NaNESPTrue5142.9730101011023.7760.014141
321NaNESPTrue41639.6820761011023.7760.162180
322NaNESPTrue3214532.8498001011023.77621.219367
323NaNESPTrue215064.1327701011023.7761.489988
324NaNESPTrue1779644.1388001011023.77677.114323
\n", + "
" + ], + "text/plain": [ + " iso_ter iso_sov includes_multi_jurisdictional_areas lfp area_sqkm \\\n", + "320 NaN ESP True 5 142.973010 \n", + "321 NaN ESP True 4 1639.682076 \n", + "322 NaN ESP True 3 214532.849800 \n", + "323 NaN ESP True 2 15064.132770 \n", + "324 NaN ESP True 1 779644.138800 \n", + "\n", + " total_area pct_total \n", + "320 1011023.776 0.014141 \n", + "321 1011023.776 0.162180 \n", + "322 1011023.776 21.219367 \n", + "323 1011023.776 1.489988 \n", + "324 1011023.776 77.114323 " + ] + }, + "execution_count": 159, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "protectedseas_intermediate[\n", + " (\n", + " protectedseas_intermediate.iso_ter.isna()\n", + " & protectedseas_intermediate.includes_multi_jurisdictional_areas.eq(True)\n", + " )\n", + " | (\n", + " protectedseas_intermediate.iso_ter.isna()\n", + " & protectedseas_intermediate.includes_multi_jurisdictional_areas.eq(False)\n", + " & ~protectedseas_intermediate.iso_sov.isin(\n", + " protectedseas_intermediate[\n", + " protectedseas_intermediate.includes_multi_jurisdictional_areas.eq(True)\n", + " ].iso_sov.unique()\n", + " )\n", + " )\n", + "][protectedseas_intermediate.iso_sov.eq(\"ESP\")]" + ] + }, + { + "cell_type": "code", + "execution_count": 160, + "metadata": {}, + "outputs": [], + "source": [ + "final = (\n", + " protectedseas_intermediate[\n", + " (\n", + " protectedseas_intermediate.iso_ter.isna()\n", + " & protectedseas_intermediate.includes_multi_jurisdictional_areas.eq(True)\n", + " )\n", + " | (\n", + " protectedseas_intermediate.iso_ter.isna()\n", + " & protectedseas_intermediate.includes_multi_jurisdictional_areas.eq(False)\n", + " & ~protectedseas_intermediate.iso_sov.isin(\n", + " protectedseas_intermediate[\n", + " protectedseas_intermediate.includes_multi_jurisdictional_areas.eq(True)\n", + " ].iso_sov.unique()\n", + " )\n", + " )\n", + " ].replace(\n", + " {\n", + " \"lfp\": {\n", + " 5: \"highly\",\n", + " 4: \"highly\",\n", + " 3: \"moderately\",\n", + " 2: \"less\",\n", + " 1: \"less\",\n", + " },\n", + " }\n", + " ).groupby([\"iso_sov\", \"lfp\"]).agg({\"area_sqkm\": \"sum\", \"total_area\": \"max\"}).reset_index()\n", + " .pipe(\n", + " calculate_global_area,\n", + " gby_col=[\"lfp\"],\n", + " iso_column=\"iso_sov\",\n", + " agg_ops={\"area_sqkm\": \"sum\", \"total_area\": \"sum\"},\n", + " )\n", + " .pipe(add_region_iso, iso_column=\"iso_sov\")\n", + " .pipe(\n", + " calculate_stats,\n", + " gby_col=[\"lfp\"],\n", + " ops={\"area_sqkm\": \"sum\", \"total_area\": \"sum\"},\n", + " iso_column=\"iso_sov\",\n", + " )\n", + " .pipe(lambda x: x.assign(pct=round((x.area_sqkm / x.total_area)*100, 2)))\n", + " .pipe(\n", + " output,\n", + " iso_column=\"iso_sov\",\n", + " rep_d={\n", + " \"lfp\": {\n", + " \"highly\": 1,\n", + " \"moderately\": 2,\n", + " \"less\": 3,\n", + " }\n", + " },\n", + " rename={\"lfp\": \"fishing_protection_level\", \"area_sqkm\": \"area\"},\n", + " drop_cols=[\"iso_sov\", \"total_area\"],\n", + " )\n", + ")\n", + "FPLSchema(final[final.location.notna()]).to_csv(output_file, index=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 161, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "DEBUG:google.auth.transport.requests:Making request: POST https://oauth2.googleapis.com/token\n" + ] + } + ], + "source": [ + "remote_path = 'vizzuality_processed_data/strapi_tables/lfp.csv'\n", + "\n", + "writeReadGCP(\n", + " credentials=mysettings.GCS_KEYFILE_JSON,\n", + " bucket_name=mysettings.GCS_BUCKET,\n", + " blob_name=remote_path,\n", + " file=output_file,\n", + " operation=\"w\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# strapi.deleteCollectionData(strapi_collection, list(range(1, 500)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# strapi.importCollectionData(\n", + "# strapi_collection,\n", + "# output_file,\n", + "# )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " 1- lower case the columns \n", + "2- separate location that its regime is in dispute or on join regime \n", + "3- calcualte area for mpaatlas data \n", + "4- rename columns for merge \n", + "5- merge maaatlas and mpa data identifying the source \n", + "6- identify child resources and set them as childs \n", + "7- calculate bbox \n", + "8- set child resources \n", + "9- prepare output for batch export \n", + "10- upload data to strapi " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Country Detail Table Data\n", + "The country detail table is done for marine and terrestrial independently and the results are concatenated." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Methodology for marine:\n", + "\n", + "1- lower case the columns \n", + "2- separate location that its regime is in dispute or on join regime \n", + "3- remove ATA and ABNJ because Protected planet doesn't include stats for ATA and ABNJ is marine \n", + "4- calculate area for mpaatlas data \n", + "5- rename columns for merge \n", + "6- merge maaatlas and mpa data identifying the source \n", + "7- identify child resources and set them as childs \n", + "8- calculate bbox \n", + "9- set child resources \n", + "10- Add coverage percentage\n", + "11- Add environment marine\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/home/sofia/dev/skytruth-30x30/data/data/mpa/processed/mpa_preprocess.zip\n", + "/home/sofia/dev/skytruth-30x30/data/data/mpa/processed/preprocess\n", + "/home/sofia/dev/skytruth-30x30/data/data/mpaatlas/processed/mpaatlas_preprocess.zip\n", + "/home/sofia/dev/skytruth-30x30/data/data/mpaatlas/processed/preprocess\n" + ] + } + ], + "source": [ + "pipe = \"mpa\"\n", + "strapi_collection_mpas = \"mpa\"\n", + "\n", + "pipe_dir = FileConventionHandler(pipe)\n", + "pipe_dir_mpaatlas = FileConventionHandler(\"mpaatlas\")\n", + "output_file_mpas = pipe_dir.get_processed_step_path(current_step).joinpath(\"mpa_detail.csv\")\n", + "\n", + "# Download the protected atlas file && unzip it\n", + "download_and_unzip_if_needed(pipe_dir, prev_step, mysettings)\n", + "# Download the mpaatlas file \n", + "download_and_unzip_if_needed(pipe_dir_mpaatlas, prev_step, mysettings)\n", + "\n", + "# Load the data\n", + "mpa_intermediate = gpd.read_file(pipe_dir.get_step_fmt_file_path(prev_step, \"shp\")).pipe(\n", + " clean_geometries\n", + ")\n", + "mpaatlas_intermediate = gpd.read_file(\n", + " pipe_dir_mpaatlas.get_step_fmt_file_path(prev_step, \"shp\")\n", + ").pipe(clean_geometries)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# Load iucn categories\n", + "# iucn_cat = pd.DataFrame(\n", + "# {\"slug\": init_table.iucn_cat.dropna().unique(), \"name\": init_table.iucn_cat.dropna().unique()},\n", + "# index=pd.Index(np.arange(1, len(init_table.iucn_cat.dropna().unique()) + 1)),\n", + "# )\n", + "# iucn_cat.to_csv(pipe_dir.get_processed_step_path(current_step).joinpath(\"iucn_categories.csv\"), index=True)\n", + "iucn_cat = pd.read_csv(\n", + " pipe_dir.get_processed_step_path(current_step).joinpath(\"iucn_categories.csv\"), index_col=0\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# Preprocess marine tables (mpa and mpaatlas) and concatenate them\n", + "init_table = (\n", + " pd.concat(\n", + " [\n", + " (\n", + " mpa_intermediate.pipe(columns_to_lower)\n", + " .pipe(separate_parent_iso, iso_column=\"parent_iso\")\n", + " .pipe(change_ata_to_abnj)\n", + " .rename(\n", + " columns={\n", + " \"parent_iso\": \"iso\",\n", + " \"status_yr\": \"year\",\n", + " \"gis_m_area\": \"area_km2\",\n", + " }\n", + " ).drop(columns=['status'])\n", + " ).assign(source=\"protected_planet\"),\n", + " (\n", + " mpaatlas_intermediate.pipe(calculate_area)\n", + " .pipe(extract_wdpaid_mpaatlas)\n", + " .pipe(separate_parent_iso, iso_column=\"location_i\")\n", + " .rename(\n", + " columns={\n", + " \"location_i\": \"iso\",\n", + " \"wdpa_id\": \"wdpa_pid\",\n", + " \"designatio\": \"desig_eng\",\n", + " }\n", + " )\n", + " ).assign(source=\"mpaatlas\")\n", + " .assign(pa_def=1)\n", + " .astype({\"mpa_zone_i\": \"Int64\"}),\n", + " ],\n", + " ignore_index=True,\n", + " )\n", + " .reset_index(drop=True)\n", + " .replace(\n", + " {\n", + " \"iso\": {\n", + " \"COK\": \"NZL\",\n", + " \"IOT\": \"GBR\",\n", + " \"NIU\": \"NZL\",\n", + " \"SHN\": \"GBR\",\n", + " \"SJM\": \"NOR\",\n", + " \"UMI\": \"USA\",\n", + " \"NCL\": \"FRA\",\n", + " }\n", + " }\n", + " )\n", + " .sort_values(by=[\"wdpa_pid\", \"wdpa_pid\", \"source\"], ascending=[True, True, False])\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/sofia/dev/skytruth-30x30/data/src/pipelines/processors.py:1007: FutureWarning: Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`\n", + " return df.assign(child_id=df[columns].bfill(axis=1)[columns[0]])\n", + "/home/sofia/dev/skytruth-30x30/data/src/pipelines/processors.py:1160: FutureWarning: Downcasting behavior in `replace` is deprecated and will be removed in a future version. To retain the old behavior, explicitly call `result.infer_objects(copy=False)`. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`\n", + " df.replace(rep_d)\n" + ] + } + ], + "source": [ + "mpa_table = (\n", + " init_table.pipe(add_bbox, \"bbox\")\n", + " .pipe(define_is_child)\n", + " .pipe(set_child_id, 'marine')\n", + " .sort_values(by=[\"wdpaid\", \"is_child\"], ascending=[True, True])\n", + " .reset_index(drop=True)\n", + " .pipe(add_total_area, 'marine')\n", + " .rename(columns={\"area_km2\": \"protected_area\"})\n", + " .pipe(calculate_coverage_percentage)\n", + " .pipe(add_environment)\n", + " .pipe(\n", + " output,\n", + " iso_column=\"iso\",\n", + " rep_d={\n", + " \"status\": {\n", + " \"Adopted\": 4,\n", + " \"implemented\": 6,\n", + " \"Established\": 6,\n", + " \"Designated\": 5,\n", + " \"Proposed\": 3,\n", + " \"Inscribed\": 3,\n", + " \"unknown\": 1,\n", + " },\n", + " \"pa_def\": {\"0\": 2, \"1\": 1},\n", + " \"year\": {0: pd.NA},\n", + " \"iucn_cat\": dict(\n", + " iucn_cat[[\"slug\"]]\n", + " .reset_index(drop=False)\n", + " .iloc[:, [1, 0]]\n", + " .to_dict(orient=\"tight\")[\"data\"]\n", + " ),\n", + " \"source\": {\"protected_planet\": 3, \"mpaatlas\": 1},\n", + " \"protection\": {\n", + " \"full\": 3,\n", + " \"light\": 4,\n", + " \"incompatible\": 5,\n", + " \"high\": 6,\n", + " \"minimal\": 7,\n", + " \"unknown\": 8,\n", + " \"unknown/to be determined\": 8,\n", + " },\n", + " \"establishm\": {\n", + " \"actively managed\": 4,\n", + " \"implemented\": 6,\n", + " \"designated\": 5,\n", + " \"Designated\": 5,\n", + " \"proposed or committed\": 3,\n", + " \"Proposed\": 3,\n", + " \"Inscribed\": 3,\n", + " \"Established\": 5,\n", + " \"Adopted\": 5,\n", + " \"unknown\": 1,\n", + " },\n", + " },\n", + " rename={\n", + " \"pa_def\": \"protection_status\",\n", + " \"protected_area\": \"area\",\n", + " \"iucn_cat\": \"iucn_category\",\n", + " \"desig_eng\": \"designation\",\n", + " \"protection\": \"mpaa_protection_level\",\n", + " \"establishm\": \"mpaa_establishment_stage\",\n", + " \"source\": \"data_source\",\n", + " },\n", + " drop_cols=[\"geometry\", \"protecti_1\",\"mpa_zone_i\", \"iso\", \"total_marine_area\"]\n", + " )\n", + " .astype(\n", + " {\n", + " \"year\": \"Int32\",\n", + " \"iucn_category\": \"Int64\",\n", + " \"protection_status\": \"Int64\",\n", + " }\n", + " )\n", + " .query(\"coverage <= 100\") \n", + " .sort_index()\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Methodology for terrestrial:\n", + "\n", + "1- lower case the columns \n", + "2- separate location that its regime is in dispute or on join regime \n", + "3- remove ATA and ABNJ because Protected planet doesn't include stats for ATA and ABNJ is marine \n", + "4- rename columns for merge \n", + "5- identify child resources and set them as childs \n", + "6- calculate bbox \n", + "7- set child resources \n", + "8- Add coverage percentage \n", + "9- Add environment terrestrial \n", + "10- Add marine fields with nan " + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "pipe = \"mpa-terrestrial\"\n", + "strapi_collection_mpas = \"mpa-terrestrial\"\n", + "\n", + "pipe_dir = FileConventionHandler(pipe)\n", + "pipe_dir_gadm = FileConventionHandler(\"gadm\")\n", + "output_file_tpas = pipe_dir.get_processed_step_path(current_step).joinpath(\"tpa_detail.csv\")\n", + "\n", + "# # Download the protected atlas file && unzip it\n", + "# download_and_unzip_if_needed(pipe_dir, prev_step, mysettings)\n", + "# # Download the gadm file \n", + "# download_and_unzip_if_needed(pipe_dir_gadm, prev_step, mysettings)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "tpa_intermediate = gpd.read_file(pipe_dir.get_step_fmt_file_path(prev_step, \"gpkg\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "iucn_cat = pd.read_csv(\n", + " pipe_dir.get_processed_step_path(current_step).joinpath(\"iucn_categories.csv\"), index_col=0\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "init_table = (\n", + " pd.concat(\n", + " [\n", + " (\n", + " tpa_intermediate.pipe(columns_to_lower)\n", + " .pipe(separate_parent_iso, iso_column=\"parent_iso\")\n", + " .query(\"parent_iso != 'ATA' and parent_iso != 'ABNJ'\")\n", + " .rename(\n", + " columns={\n", + " \"parent_iso\": \"iso\",\n", + " \"status_yr\": \"year\",\n", + " \"gis_area\": \"protected_area\",\n", + " }\n", + " ).drop(columns=['status'])\n", + " ).assign(source=\"protected_planet\"),\n", + " ],\n", + " ignore_index=True,\n", + " )\n", + " .reset_index(drop=True)\n", + " .replace(\n", + " {\n", + " \"iso\": {\n", + " \"COK\": \"NZL\",\n", + " \"IOT\": \"GBR\",\n", + " \"NIU\": \"NZL\",\n", + " \"SHN\": \"GBR\",\n", + " \"SJM\": \"NOR\",\n", + " \"UMI\": \"USA\",\n", + " \"NCL\": \"FRA\",\n", + " }\n", + " }\n", + " )\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/sofia/dev/skytruth-30x30/data/src/pipelines/processors.py:1160: FutureWarning: Downcasting behavior in `replace` is deprecated and will be removed in a future version. To retain the old behavior, explicitly call `result.infer_objects(copy=False)`. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`\n", + " df.replace(rep_d)\n" + ] + } + ], + "source": [ + "tpa_table = (\n", + " init_table.pipe(add_bbox, \"bbox\")\n", + " .pipe(define_is_child)\n", + " .pipe(set_child_id, 'terrestrial')\n", + " .sort_values(by=[\"wdpaid\", \"is_child\"], ascending=[True, True])\n", + " .reset_index(drop=True)\n", + " .pipe(add_total_area, 'terrestrial')\n", + " .pipe(calculate_coverage_percentage)\n", + " .pipe(add_environment)\n", + " .pipe(\n", + " output,\n", + " iso_column=\"iso\",\n", + " rep_d={\n", + " \"pa_def\": {\"0\": 2, \"1\": 1},\n", + " \"year\": {0: pd.NA},\n", + " \"iucn_cat\": dict(\n", + " iucn_cat[[\"slug\"]]\n", + " .reset_index(drop=False)\n", + " .iloc[:, [1, 0]]\n", + " .to_dict(orient=\"tight\")[\"data\"]\n", + " ),\n", + " \"source\": {\"protected_planet\": 3},\n", + " },\n", + " rename={\n", + " \"pa_def\": \"protection_status\",\n", + " \"protected_area\": \"area\",\n", + " \"iucn_cat\": \"iucn_category\",\n", + " \"desig_eng\": \"designation\",\n", + " \"source\": \"data_source\",\n", + " },\n", + " drop_cols=[\"geometry\", \"iso\", \"marine\", \"total_terrestrial_area\"]\n", + " )\n", + " .astype(\n", + " {\n", + " \"year\": \"Int32\",\n", + " \"iucn_category\": \"Int64\",\n", + " \"protection_status\": \"Int64\",\n", + " }\n", + " )\n", + " .query(\"coverage <= 100\") \n", + " .sort_index()\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "# Add col mpaa_protection_level and mpa_establishment_stage to the table to validate it\n", + "tpa_table['mpaa_protection_level'] = np.nan\n", + "tpa_table['mpaa_establishment_stage'] = np.nan" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Concatenate marine and terrestrial tables\n", + "\n", + "1- Concatenate tables \n", + "2- Add parent and children columns \n", + "3- Sort by parent \n", + "4- Create batch export for all columns by parent (to handle relations when uploading in Strapi) \n", + "5- Create batch export only for column parent (to handle relations when uploading in Strapi)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "pipe_pa = \"pa\"\n", + "step = \"preprocess\"\n", + "strapi_collection_pas = \"pa\"\n", + "\n", + "pipe_dir_pa = FileConventionHandler(pipe_pa)\n", + "\n", + "output_file_pa = pipe_dir_pa.get_processed_step_path(current_step).joinpath(\"pa_detail.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "final_table = pd.concat([mpa_table, tpa_table], ignore_index=True)\n", + "final_table.index = final_table.index + 1\n", + "final_table.index.name = 'id'\n", + "final_table = final_table.pipe(add_child_parent_relationship).drop(columns=['wdpa_pid', 'is_child', 'child_id']).sort_values(by=['parent'])" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "PAsSchema(final_table[final_table.location.notna()]).to_csv(output_file_pa, index=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "# Divide table into two tables\n", + "final_table1 = final_table.drop(columns=['parent'])\n", + "final_table2 = final_table[['parent']]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Divide output in chunks to be uploaded to strapi\n", + "batch_export(\n", + " final_table1[final_table1.area.notna()],\n", + " 4000,\n", + " PAsSchemaChunk1,\n", + " pipe_dir_pa.get_processed_step_path(current_step).joinpath(\"chunks1\"),\n", + " \"pa_detail\",\n", + " format=\"json\",\n", + " strapi_colection=strapi_collection_pas,\n", + ")\n", + "\n", + "batch_export(\n", + " final_table2,\n", + " 10000,\n", + " PAsSchemaChunk2,\n", + " pipe_dir_pa.get_processed_step_path(current_step).joinpath(\"chunks2\"),\n", + " \"pa_detail\",\n", + " format=\"json\",\n", + " strapi_colection=strapi_collection_pas,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [], + "source": [ + "# zip data\n", + "make_archive(pipe_dir_pa.get_processed_step_path(current_step).joinpath(\"chunks1\"), pipe_dir_pa.get_processed_step_path(current_step).joinpath(\"chunks1.zip\"))\n", + "make_archive(pipe_dir_pa.get_processed_step_path(current_step).joinpath(\"chunks2\"), pipe_dir_pa.get_processed_step_path(current_step).joinpath(\"chunks2.zip\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "DEBUG:google.auth.transport.requests:Making request: POST https://oauth2.googleapis.com/token\n", + "DEBUG:google.auth.transport.requests:Making request: POST https://oauth2.googleapis.com/token\n" + ] + } + ], + "source": [ + "# Save zipped file in GCS\n", + "writeReadGCP(\n", + " credentials=mysettings.GCS_KEYFILE_JSON,\n", + " bucket_name=mysettings.GCS_BUCKET,\n", + " blob_name='vizzuality_processed_data/strapi_tables/pa_chunks1.zip',\n", + " file=pipe_dir_pa.get_processed_step_path(current_step).joinpath(\"chunks1.zip\"),\n", + " operation=\"w\",\n", + ")\n", + "\n", + "writeReadGCP(\n", + " credentials=mysettings.GCS_KEYFILE_JSON,\n", + " bucket_name=mysettings.GCS_BUCKET,\n", + " blob_name='vizzuality_processed_data/strapi_tables/pa_chunks2.zip',\n", + " file=pipe_dir_pa.get_processed_step_path(current_step).joinpath(\"chunks2.zip\"),\n", + " operation=\"w\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# strapi.deleteCollectionData(\"pa\", list(range(1, 20914)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# for i in range(0, 4):\n", + "# strapi.importCollectionData(\n", + "# strapi_collection_mpas,\n", + "# mpa_folder.joinpath(f\"mpa_detail_{i}.csv\"),\n", + "# )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Habitats" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Note:** The marine habitat data has already been processed in the habitats.ipynb notebook. This section imports the output from habitats.ipynb, processes the terrestrial data, and generates the final output table required for Strapi." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "DEBUG:google.auth.transport.requests:Making request: POST https://oauth2.googleapis.com/token\n", + "DEBUG:google.auth.transport.requests:Making request: POST https://oauth2.googleapis.com/token\n" + ] + } + ], + "source": [ + "pipe = \"terrestrial-habitats\"\n", + "collection_name = \"terrestrial_habitats\"\n", + "\n", + "pipe_dir = FileConventionHandler(pipe)\n", + "input_file_ter = pipe_dir.get_processed_step_path(prev_step).joinpath(\"master_data_protection_exact.csv\")\n", + "input_file_mar = pipe_dir.get_processed_step_path(prev_step).joinpath(\"habitats6.csv\")\n", + "output_file = pipe_dir.get_processed_step_path(current_step).joinpath(\"habitats_all.csv\")\n", + "\n", + "# Download the terrestrial habitats table from the bucket\n", + "writeReadGCP(\n", + " credentials=mysettings.GCS_KEYFILE_JSON,\n", + " bucket_name=mysettings.GCS_BUCKET,\n", + " blob_name=\"vizzuality_processed_data/habitats/preprocess/master_data_protection_exact.csv\",\n", + " file=input_file_ter,\n", + " operation=\"r\",\n", + ")\n", + "\n", + "# Download the marine habitats table from the bucket\n", + "writeReadGCP(\n", + " credentials=mysettings.GCS_KEYFILE_JSON,\n", + " bucket_name=mysettings.GCS_BUCKET,\n", + " blob_name=\"vizzuality_processed_data/processed_statistic_tables/habitats6.csv\",\n", + " file=input_file_mar,\n", + " operation=\"r\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
locationprotected_areatotal_areahabitatyearenvironment
0ABNJ427.0485241893.871282cold-water corals20241
1AGO0.0000003.395671cold-water corals20241
2ALB0.0000005.986479cold-water corals20241
3ARG6.98422661.826344cold-water corals20241
4ATG0.0000000.997747cold-water corals20241
.....................
609AS21277.22000074292.673146mangroves20201
610EU732.1437501246.189677mangroves20201
611NA2097.7400002415.418557mangroves20201
612SA27151.74000039893.444608mangroves20201
613WA27.830000173.620938mangroves20201
\n", + "

614 rows × 6 columns

\n", + "
" + ], + "text/plain": [ + " location protected_area total_area habitat year \\\n", + "0 ABNJ 427.048524 1893.871282 cold-water corals 2024 \n", + "1 AGO 0.000000 3.395671 cold-water corals 2024 \n", + "2 ALB 0.000000 5.986479 cold-water corals 2024 \n", + "3 ARG 6.984226 61.826344 cold-water corals 2024 \n", + "4 ATG 0.000000 0.997747 cold-water corals 2024 \n", + ".. ... ... ... ... ... \n", + "609 AS 21277.220000 74292.673146 mangroves 2020 \n", + "610 EU 732.143750 1246.189677 mangroves 2020 \n", + "611 NA 2097.740000 2415.418557 mangroves 2020 \n", + "612 SA 27151.740000 39893.444608 mangroves 2020 \n", + "613 WA 27.830000 173.620938 mangroves 2020 \n", + "\n", + " environment \n", + "0 1 \n", + "1 1 \n", + "2 1 \n", + "3 1 \n", + "4 1 \n", + ".. ... \n", + "609 1 \n", + "610 1 \n", + "611 1 \n", + "612 1 \n", + "613 1 \n", + "\n", + "[614 rows x 6 columns]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "habitat_mar = pd.read_csv(input_file_mar, na_values=['', 'NaN', 'NULL'])\n", + "habitat_mar['environment'] = 1\n", + "habitat_mar['location_id'] = habitat_mar['location_id'].fillna('NA')\n", + "habitat_mar.rename(columns={'location_id': 'location', 'habitat_name':'habitat'}, inplace=True)\n", + "habitat_mar['habitat'] = habitat_mar['habitat'].astype(str).str.lower()\n", + "habitat_mar" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
iso_3habitattotalprotected
0GLOBartificial2.814393e+091.709865e+08
1GLOBdesert1.103513e+107.445863e+08
2GLOBforest4.894422e+098.934822e+08
3GLOBgrassland3.532339e+094.938337e+08
4GLOBother3.129403e+071.039509e+07
...............
1502ZWEgrassland3.384890e+052.174229e+04
1503ZWErocky/mountains5.100000e+01NaN
1504ZWEsavanna2.287906e+078.742367e+06
1505ZWEshrubland5.252410e+062.875969e+05
1506ZWEwetlands/open water3.727666e+053.034355e+05
\n", + "

1507 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " iso_3 habitat total protected\n", + "0 GLOB artificial 2.814393e+09 1.709865e+08\n", + "1 GLOB desert 1.103513e+10 7.445863e+08\n", + "2 GLOB forest 4.894422e+09 8.934822e+08\n", + "3 GLOB grassland 3.532339e+09 4.938337e+08\n", + "4 GLOB other 3.129403e+07 1.039509e+07\n", + "... ... ... ... ...\n", + "1502 ZWE grassland 3.384890e+05 2.174229e+04\n", + "1503 ZWE rocky/mountains 5.100000e+01 NaN\n", + "1504 ZWE savanna 2.287906e+07 8.742367e+06\n", + "1505 ZWE shrubland 5.252410e+06 2.875969e+05\n", + "1506 ZWE wetlands/open water 3.727666e+05 3.034355e+05\n", + "\n", + "[1507 rows x 4 columns]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "habitat_ter = pd.read_csv(input_file_ter).drop(columns=['frac', 'perc_extent', 'total_area'])\n", + "habitat_ter.rename(columns={'habitats': 'habitat'}, inplace=True)\n", + "habitat_ter['habitat'] = habitat_ter['habitat'].astype(str).str.lower()\n", + "habitat_ter" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
iso_3habitattotalprotectedtotal_area_country
0GLOBartificial2.814393e+091.709865e+082.658415e+10
1GLOBdesert1.103513e+107.445863e+082.658415e+10
2GLOBforest4.894422e+098.934822e+082.658415e+10
3GLOBgrassland3.532339e+094.938337e+082.658415e+10
5GLOBrocky/mountains3.550780e+084.447316e+072.658415e+10
..................
1502ZWEgrassland3.384890e+052.174229e+044.148695e+07
1503ZWErocky/mountains5.100000e+01NaN4.148695e+07
1504ZWEsavanna2.287906e+078.742367e+064.148695e+07
1505ZWEshrubland5.252410e+062.875969e+054.148695e+07
1506ZWEwetlands/open water3.727666e+053.034355e+054.148695e+07
\n", + "

1337 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " iso_3 habitat total protected \\\n", + "0 GLOB artificial 2.814393e+09 1.709865e+08 \n", + "1 GLOB desert 1.103513e+10 7.445863e+08 \n", + "2 GLOB forest 4.894422e+09 8.934822e+08 \n", + "3 GLOB grassland 3.532339e+09 4.938337e+08 \n", + "5 GLOB rocky/mountains 3.550780e+08 4.447316e+07 \n", + "... ... ... ... ... \n", + "1502 ZWE grassland 3.384890e+05 2.174229e+04 \n", + "1503 ZWE rocky/mountains 5.100000e+01 NaN \n", + "1504 ZWE savanna 2.287906e+07 8.742367e+06 \n", + "1505 ZWE shrubland 5.252410e+06 2.875969e+05 \n", + "1506 ZWE wetlands/open water 3.727666e+05 3.034355e+05 \n", + "\n", + " total_area_country \n", + "0 2.658415e+10 \n", + "1 2.658415e+10 \n", + "2 2.658415e+10 \n", + "3 2.658415e+10 \n", + "5 2.658415e+10 \n", + "... ... \n", + "1502 4.148695e+07 \n", + "1503 4.148695e+07 \n", + "1504 4.148695e+07 \n", + "1505 4.148695e+07 \n", + "1506 4.148695e+07 \n", + "\n", + "[1337 rows x 5 columns]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Remove all rows where \"habitat_name\" is \"Other\"\n", + "habitat_ter = habitat_ter[habitat_ter['habitat'] != 'other'].copy()\n", + "\n", + "# calculate total_area by adding up \"total\" per iso_3\n", + "habitat_ter['total_area_country'] = habitat_ter.groupby('iso_3')['total'].transform('sum')\n", + "habitat_ter" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "# Assign territories to their soveraign countries\n", + "with open(scripts_dir.joinpath('data_commons/data/dependency_to_parent.json'), 'r') as json_file:\n", + " dependency_to_parent = json.load(json_file)\n", + "\n", + "mapping = {key: value[0] for key, value in dependency_to_parent.items()}\n", + "\n", + "habitat_ter['iso_3'] = habitat_ter['iso_3'].map(mapping).fillna(habitat_ter['iso_3'])" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
iso_3habitattotalprotectedtotal_area_country
0AFGartificial5.623284e+068.474413e+047.775693e+07
1AFGdesert2.726139e+073.217268e+057.775693e+07
2AFGforest3.825968e+052.001767e+047.775693e+07
3AFGgrassland2.910245e+078.725795e+057.775693e+07
4AFGrocky/mountains1.370481e+071.567462e+067.775693e+07
..................
1332ZWEgrassland3.384890e+052.174229e+044.148695e+07
1333ZWErocky/mountains5.100000e+010.000000e+004.148695e+07
1334ZWEsavanna2.287906e+078.742367e+064.148695e+07
1335ZWEshrubland5.252410e+062.875969e+054.148695e+07
1336ZWEwetlands/open water3.727666e+053.034355e+054.148695e+07
\n", + "

1337 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " iso_3 habitat total protected \\\n", + "0 AFG artificial 5.623284e+06 8.474413e+04 \n", + "1 AFG desert 2.726139e+07 3.217268e+05 \n", + "2 AFG forest 3.825968e+05 2.001767e+04 \n", + "3 AFG grassland 2.910245e+07 8.725795e+05 \n", + "4 AFG rocky/mountains 1.370481e+07 1.567462e+06 \n", + "... ... ... ... ... \n", + "1332 ZWE grassland 3.384890e+05 2.174229e+04 \n", + "1333 ZWE rocky/mountains 5.100000e+01 0.000000e+00 \n", + "1334 ZWE savanna 2.287906e+07 8.742367e+06 \n", + "1335 ZWE shrubland 5.252410e+06 2.875969e+05 \n", + "1336 ZWE wetlands/open water 3.727666e+05 3.034355e+05 \n", + "\n", + " total_area_country \n", + "0 7.775693e+07 \n", + "1 7.775693e+07 \n", + "2 7.775693e+07 \n", + "3 7.775693e+07 \n", + "4 7.775693e+07 \n", + "... ... \n", + "1332 4.148695e+07 \n", + "1333 4.148695e+07 \n", + "1334 4.148695e+07 \n", + "1335 4.148695e+07 \n", + "1336 4.148695e+07 \n", + "\n", + "[1337 rows x 5 columns]" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# groupby country and habitats and sum the pixeles\n", + "habitat_ter_grouped = habitat_ter.groupby(['iso_3', 'habitat']).sum().reset_index()\n", + "habitat_ter_grouped" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
iso_3habitattotalprotectedtotal_area_countryprotected%habitat%
0AFGartificial5.623284e+068.474413e+047.775693e+071.5070227.231875
1AFGdesert2.726139e+073.217268e+057.775693e+071.18015635.059751
2AFGforest3.825968e+052.001767e+047.775693e+075.2320550.492042
3AFGgrassland2.910245e+078.725795e+057.775693e+072.99830337.427462
4AFGrocky/mountains1.370481e+071.567462e+067.775693e+0711.43731717.625187
........................
1332ZWEgrassland3.384890e+052.174229e+044.148695e+076.4233380.815893
1333ZWErocky/mountains5.100000e+010.000000e+004.148695e+070.0000000.000123
1334ZWEsavanna2.287906e+078.742367e+064.148695e+0738.21121855.147600
1335ZWEshrubland5.252410e+062.875969e+054.148695e+075.47552212.660390
1336ZWEwetlands/open water3.727666e+053.034355e+054.148695e+0781.4009410.898515
\n", + "

1337 rows × 7 columns

\n", + "
" + ], + "text/plain": [ + " iso_3 habitat total protected \\\n", + "0 AFG artificial 5.623284e+06 8.474413e+04 \n", + "1 AFG desert 2.726139e+07 3.217268e+05 \n", + "2 AFG forest 3.825968e+05 2.001767e+04 \n", + "3 AFG grassland 2.910245e+07 8.725795e+05 \n", + "4 AFG rocky/mountains 1.370481e+07 1.567462e+06 \n", + "... ... ... ... ... \n", + "1332 ZWE grassland 3.384890e+05 2.174229e+04 \n", + "1333 ZWE rocky/mountains 5.100000e+01 0.000000e+00 \n", + "1334 ZWE savanna 2.287906e+07 8.742367e+06 \n", + "1335 ZWE shrubland 5.252410e+06 2.875969e+05 \n", + "1336 ZWE wetlands/open water 3.727666e+05 3.034355e+05 \n", + "\n", + " total_area_country protected% habitat% \n", + "0 7.775693e+07 1.507022 7.231875 \n", + "1 7.775693e+07 1.180156 35.059751 \n", + "2 7.775693e+07 5.232055 0.492042 \n", + "3 7.775693e+07 2.998303 37.427462 \n", + "4 7.775693e+07 11.437317 17.625187 \n", + "... ... ... ... \n", + "1332 4.148695e+07 6.423338 0.815893 \n", + "1333 4.148695e+07 0.000000 0.000123 \n", + "1334 4.148695e+07 38.211218 55.147600 \n", + "1335 4.148695e+07 5.475522 12.660390 \n", + "1336 4.148695e+07 81.400941 0.898515 \n", + "\n", + "[1337 rows x 7 columns]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Calculate the percentage of protected pixels and the percentage of extent of the habitat\n", + "habitat_ter_grouped['protected%'] = habitat_ter_grouped['protected']/habitat_ter_grouped['total']*100\n", + "habitat_ter_grouped['habitat%'] = habitat_ter_grouped['total']/habitat_ter_grouped['total_area_country']*100\n", + "habitat_ter_grouped" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
iso_3habitattotalprotectedtotal_area_countryprotected%habitat%total_terrestrial_area
0AFGartificial5.623284e+068.474413e+047.775693e+071.5070227.231875644050.0
1AFGdesert2.726139e+073.217268e+057.775693e+071.18015635.059751644050.0
2AFGforest3.825968e+052.001767e+047.775693e+075.2320550.492042644050.0
3AFGgrassland2.910245e+078.725795e+057.775693e+072.99830337.427462644050.0
4AFGrocky/mountains1.370481e+071.567462e+067.775693e+0711.43731717.625187644050.0
...........................
1332ZWEgrassland3.384890e+052.174229e+044.148695e+076.4233380.815893391235.0
1333ZWErocky/mountains5.100000e+010.000000e+004.148695e+070.0000000.000123391235.0
1334ZWEsavanna2.287906e+078.742367e+064.148695e+0738.21121855.147600391235.0
1335ZWEshrubland5.252410e+062.875969e+054.148695e+075.47552212.660390391235.0
1336ZWEwetlands/open water3.727666e+053.034355e+054.148695e+0781.4009410.898515391235.0
\n", + "

1337 rows × 8 columns

\n", + "
" + ], + "text/plain": [ + " iso_3 habitat total protected \\\n", + "0 AFG artificial 5.623284e+06 8.474413e+04 \n", + "1 AFG desert 2.726139e+07 3.217268e+05 \n", + "2 AFG forest 3.825968e+05 2.001767e+04 \n", + "3 AFG grassland 2.910245e+07 8.725795e+05 \n", + "4 AFG rocky/mountains 1.370481e+07 1.567462e+06 \n", + "... ... ... ... ... \n", + "1332 ZWE grassland 3.384890e+05 2.174229e+04 \n", + "1333 ZWE rocky/mountains 5.100000e+01 0.000000e+00 \n", + "1334 ZWE savanna 2.287906e+07 8.742367e+06 \n", + "1335 ZWE shrubland 5.252410e+06 2.875969e+05 \n", + "1336 ZWE wetlands/open water 3.727666e+05 3.034355e+05 \n", + "\n", + " total_area_country protected% habitat% total_terrestrial_area \n", + "0 7.775693e+07 1.507022 7.231875 644050.0 \n", + "1 7.775693e+07 1.180156 35.059751 644050.0 \n", + "2 7.775693e+07 5.232055 0.492042 644050.0 \n", + "3 7.775693e+07 2.998303 37.427462 644050.0 \n", + "4 7.775693e+07 11.437317 17.625187 644050.0 \n", + "... ... ... ... ... \n", + "1332 4.148695e+07 6.423338 0.815893 391235.0 \n", + "1333 4.148695e+07 0.000000 0.000123 391235.0 \n", + "1334 4.148695e+07 38.211218 55.147600 391235.0 \n", + "1335 4.148695e+07 5.475522 12.660390 391235.0 \n", + "1336 4.148695e+07 81.400941 0.898515 391235.0 \n", + "\n", + "[1337 rows x 8 columns]" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Add country's terrestrial area\n", + "add_total_area(habitat_ter_grouped, 'terrestrial')" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "# Estimate the total area and the protected area based on pixels proportions and the total terrestrial area\n", + "habitat_ter_grouped['total_area'] = habitat_ter_grouped['total_terrestrial_area']*habitat_ter_grouped['habitat%']/100\n", + "habitat_ter_grouped['protected_area'] = habitat_ter_grouped['total_area']*habitat_ter_grouped['protected%']/100" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
iso_3habitattotalprotectedtotal_area_countryprotected%habitat%total_terrestrial_areatotal_areaprotected_area
68AUTartificial5.546380e+06414568.7793491.243977e+077.47458344.58587683709.037322.3911882789.693093
69AUTdesert7.359923e+0433653.6037871.243977e+0745.7254820.59164583709.0495.259844226.459952
70AUTforest3.889044e+06352118.4617571.243977e+079.05411431.26299083709.026169.9360402369.455901
71AUTgrassland1.604881e+06227355.7391861.243977e+0714.16651912.90121083709.010799.4738131529.909551
72AUTrocky/mountains1.492322e+0546986.2555631.243977e+0731.4853371.19963883709.01004.204873316.177289
73AUTshrubland1.074615e+0693825.4829071.243977e+078.7310788.63854683709.07231.240356631.365203
74AUTwetlands/open water1.020180e+0517607.4299951.243977e+0717.2591390.82009683709.0686.493886118.482935
\n", + "
" + ], + "text/plain": [ + " iso_3 habitat total protected \\\n", + "68 AUT artificial 5.546380e+06 414568.779349 \n", + "69 AUT desert 7.359923e+04 33653.603787 \n", + "70 AUT forest 3.889044e+06 352118.461757 \n", + "71 AUT grassland 1.604881e+06 227355.739186 \n", + "72 AUT rocky/mountains 1.492322e+05 46986.255563 \n", + "73 AUT shrubland 1.074615e+06 93825.482907 \n", + "74 AUT wetlands/open water 1.020180e+05 17607.429995 \n", + "\n", + " total_area_country protected% habitat% total_terrestrial_area \\\n", + "68 1.243977e+07 7.474583 44.585876 83709.0 \n", + "69 1.243977e+07 45.725482 0.591645 83709.0 \n", + "70 1.243977e+07 9.054114 31.262990 83709.0 \n", + "71 1.243977e+07 14.166519 12.901210 83709.0 \n", + "72 1.243977e+07 31.485337 1.199638 83709.0 \n", + "73 1.243977e+07 8.731078 8.638546 83709.0 \n", + "74 1.243977e+07 17.259139 0.820096 83709.0 \n", + "\n", + " total_area protected_area \n", + "68 37322.391188 2789.693093 \n", + "69 495.259844 226.459952 \n", + "70 26169.936040 2369.455901 \n", + "71 10799.473813 1529.909551 \n", + "72 1004.204873 316.177289 \n", + "73 7231.240356 631.365203 \n", + "74 686.493886 118.482935 " + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "habitat_ter_grouped[habitat_ter_grouped['iso_3'] == 'AUT']" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "# Add regions\n", + "habitat_ter_grouped = add_region_iso(habitat_ter_grouped, 'iso_3')\n", + "\n", + "regions = habitat_ter_grouped.groupby(['region', 'habitat']).agg({\n", + " 'total_area': 'sum',\n", + " 'protected_area': 'sum'\n", + "}).reset_index()\n", + "\n", + "regions.rename(columns={'region': 'location'}, inplace=True)\n", + "habitat_ter_grouped.drop(columns=['total', 'protected', 'total_area_country', 'protected%', 'habitat%', 'total_terrestrial_area', 'region'], inplace=True)\n", + "habitat_ter_grouped = habitat_ter_grouped.rename(columns = {'iso_3':'location'})" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "# Concatenate regions and habitat_ter_grouped dataframes\n", + "habitats_terrestrial = pd.concat([regions, habitat_ter_grouped], ignore_index=True)\n", + "\n", + "# fill protected_area and total_area with 0 if they are NaN\n", + "habitats_terrestrial['protected_area'] = habitats_terrestrial['protected_area'].fillna(0)\n", + "habitats_terrestrial['total_area'] = habitats_terrestrial['total_area'].fillna(0)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
locationhabitattotal_areaprotected_areayearenvironment
0AFartificial2.925993e+06190150.53942520242
1AFdesert9.875738e+06658679.48501820242
2AFforest4.461370e+06919656.96000720242
3AFgrassland2.039020e+06192683.23452020242
4AFrocky/mountains2.384026e+0546963.72554320242
.....................
1381ZWEgrassland3.192058e+03205.03664220242
1382ZWErocky/mountains4.809460e-010.00000020242
1383ZWEsavanna2.157567e+0582443.26646820242
1384ZWEshrubland4.953188e+042712.12882120242
1385ZWEwetlands/open water3.515306e+032861.49223520242
\n", + "

1386 rows × 6 columns

\n", + "
" + ], + "text/plain": [ + " location habitat total_area protected_area year \\\n", + "0 AF artificial 2.925993e+06 190150.539425 2024 \n", + "1 AF desert 9.875738e+06 658679.485018 2024 \n", + "2 AF forest 4.461370e+06 919656.960007 2024 \n", + "3 AF grassland 2.039020e+06 192683.234520 2024 \n", + "4 AF rocky/mountains 2.384026e+05 46963.725543 2024 \n", + "... ... ... ... ... ... \n", + "1381 ZWE grassland 3.192058e+03 205.036642 2024 \n", + "1382 ZWE rocky/mountains 4.809460e-01 0.000000 2024 \n", + "1383 ZWE savanna 2.157567e+05 82443.266468 2024 \n", + "1384 ZWE shrubland 4.953188e+04 2712.128821 2024 \n", + "1385 ZWE wetlands/open water 3.515306e+03 2861.492235 2024 \n", + "\n", + " environment \n", + "0 2 \n", + "1 2 \n", + "2 2 \n", + "3 2 \n", + "4 2 \n", + "... ... \n", + "1381 2 \n", + "1382 2 \n", + "1383 2 \n", + "1384 2 \n", + "1385 2 \n", + "\n", + "[1386 rows x 6 columns]" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Add year and environment columns\n", + "habitats_terrestrial['year'] = 2024\n", + "habitats_terrestrial['environment'] = 2\n", + "habitats_terrestrial" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
locationhabitattotal_areaprotected_areayearenvironment
0AFartificial2.925993e+06190150.53942520242
1AFdesert9.875738e+06658679.48501820242
2AFforest4.461370e+06919656.96000720242
3AFgrassland2.039020e+06192683.23452020242
4AFrocky/mountains2.384026e+0546963.72554320242
.....................
1995ASmangroves7.429267e+0421277.22000020201
1996EUmangroves1.246190e+03732.14375020201
1997NAmangroves2.415419e+032097.74000020201
1998SAmangroves3.989344e+0427151.74000020201
1999WAmangroves1.736209e+0227.83000020201
\n", + "

2000 rows × 6 columns

\n", + "
" + ], + "text/plain": [ + " location habitat total_area protected_area year \\\n", + "0 AF artificial 2.925993e+06 190150.539425 2024 \n", + "1 AF desert 9.875738e+06 658679.485018 2024 \n", + "2 AF forest 4.461370e+06 919656.960007 2024 \n", + "3 AF grassland 2.039020e+06 192683.234520 2024 \n", + "4 AF rocky/mountains 2.384026e+05 46963.725543 2024 \n", + "... ... ... ... ... ... \n", + "1995 AS mangroves 7.429267e+04 21277.220000 2020 \n", + "1996 EU mangroves 1.246190e+03 732.143750 2020 \n", + "1997 NA mangroves 2.415419e+03 2097.740000 2020 \n", + "1998 SA mangroves 3.989344e+04 27151.740000 2020 \n", + "1999 WA mangroves 1.736209e+02 27.830000 2020 \n", + "\n", + " environment \n", + "0 2 \n", + "1 2 \n", + "2 2 \n", + "3 2 \n", + "4 2 \n", + "... ... \n", + "1995 1 \n", + "1996 1 \n", + "1997 1 \n", + "1998 1 \n", + "1999 1 \n", + "\n", + "[2000 rows x 6 columns]" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Concatenate terrestrial and marine habitats\n", + "habitats_all = pd.concat([habitats_terrestrial, habitat_mar], ignore_index=True)\n", + "habitats_all" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_1401056/3397704638.py:19: FutureWarning: Downcasting behavior in `replace` is deprecated and will be removed in a future version. To retain the old behavior, explicitly call `result.infer_objects(copy=False)`. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`\n", + " habitats_all['habitat'] = habitats_all['habitat'].replace(habitat_dict)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
location_idhabitattotal_areaprotected_areayearenvironment
0AF432.925993e+061.901505e+0520242
1AF449.875738e+066.586795e+0520242
2AF454.461370e+069.196570e+0520242
3AF462.039020e+061.926832e+0520242
4AF472.384026e+054.696373e+0420242
5AF488.384999e+061.697340e+0620242
6AF491.765385e+061.598750e+0520242
7AF503.021895e+055.023631e+0420242
8AS438.063010e+061.693832e+0520242
9AS443.536380e+062.998901e+0520242
\n", + "
" + ], + "text/plain": [ + " location_id habitat total_area protected_area year environment\n", + "0 AF 43 2.925993e+06 1.901505e+05 2024 2\n", + "1 AF 44 9.875738e+06 6.586795e+05 2024 2\n", + "2 AF 45 4.461370e+06 9.196570e+05 2024 2\n", + "3 AF 46 2.039020e+06 1.926832e+05 2024 2\n", + "4 AF 47 2.384026e+05 4.696373e+04 2024 2\n", + "5 AF 48 8.384999e+06 1.697340e+06 2024 2\n", + "6 AF 49 1.765385e+06 1.598750e+05 2024 2\n", + "7 AF 50 3.021895e+05 5.023631e+04 2024 2\n", + "8 AS 43 8.063010e+06 1.693832e+05 2024 2\n", + "9 AS 44 3.536380e+06 2.998901e+05 2024 2" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# change habitat to have the id of the habitat\n", + "habitat_dict = {\n", + " 'mangroves': 5,\n", + " 'seamounts': 6,\n", + " 'artificial': 43,\n", + " 'forest': 45,\n", + " 'grassland': 46,\n", + " 'wetlands/open water': 50,\n", + " 'seagrasses': 2,\n", + " 'cold-water corals': 4,\n", + " 'desert': 44,\n", + " 'rocky/mountains': 47,\n", + " 'savanna': 48,\n", + " 'shrubland': 49,\n", + " 'saltmarshes': 1,\n", + " 'warm-water corals': 3\n", + "}\n", + "\n", + "habitats_all['habitat'] = habitats_all['habitat'].replace(habitat_dict)\n", + "habitats_all.rename(columns={'location': 'location_id'}, inplace=True)\n", + "habitats_all.head(10)" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [], + "source": [ + "output(habitats_all, 'location_id', {}, {}, ['location_id']).to_csv(output_file, index=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idhabitattotal_areaprotected_areayearenvironmentlocation
01432.925993e+06190150.539425202423.0
12449.875738e+06658679.485018202423.0
23454.461370e+06919656.960007202423.0
34462.039020e+06192683.234520202423.0
45472.384026e+0546963.725543202423.0
........................
1995199657.429267e+0421277.220000202014.0
1996199751.246190e+03732.143750202016.0
1997199852.415419e+032097.740000202017.0
1998199953.989344e+0427151.740000202018.0
1999200051.736209e+0227.830000202019.0
\n", + "

2000 rows × 7 columns

\n", + "
" + ], + "text/plain": [ + " id habitat total_area protected_area year environment location\n", + "0 1 43 2.925993e+06 190150.539425 2024 2 3.0\n", + "1 2 44 9.875738e+06 658679.485018 2024 2 3.0\n", + "2 3 45 4.461370e+06 919656.960007 2024 2 3.0\n", + "3 4 46 2.039020e+06 192683.234520 2024 2 3.0\n", + "4 5 47 2.384026e+05 46963.725543 2024 2 3.0\n", + "... ... ... ... ... ... ... ...\n", + "1995 1996 5 7.429267e+04 21277.220000 2020 1 4.0\n", + "1996 1997 5 1.246190e+03 732.143750 2020 1 6.0\n", + "1997 1998 5 2.415419e+03 2097.740000 2020 1 7.0\n", + "1998 1999 5 3.989344e+04 27151.740000 2020 1 8.0\n", + "1999 2000 5 1.736209e+02 27.830000 2020 1 9.0\n", + "\n", + "[2000 rows x 7 columns]" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a = pd.read_csv(output_file)\n", + "a" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "DEBUG:google.auth.transport.requests:Making request: POST https://oauth2.googleapis.com/token\n" + ] + } + ], + "source": [ + "# Upload csv to bucket\n", + "remote_path = 'vizzuality_processed_data/strapi_tables/habitats_exact.csv'\n", + "\n", + "writeReadGCP(\n", + " credentials=mysettings.GCS_KEYFILE_JSON,\n", + " bucket_name=mysettings.GCS_BUCKET,\n", + " blob_name=remote_path,\n", + " file=output_file,\n", + " operation=\"w\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'final_table1' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[1], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mfinal_table1\u001b[49m\n", + "\u001b[0;31mNameError\u001b[0m: name 'final_table1' is not defined" + ] + } + ], + "source": [ + "final_table1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Missing IDs: [4000, 8000, 12000, 16000, 20000, 24000, 28000, 32000, 36000, 40000, 44000, 48000, 52000, 56000, 60000, 64000, 68000, 72000, 76000, 80000, 84000, 88000, 92000, 96000, 100000, 104000, 108000, 112000, 116000, 120000, 124000, 128000, 132000, 136000, 140000, 144000, 148000, 152000, 156000, 160000, 164000, 168000, 172000, 176000, 180000, 184000, 188000, 192000, 196000, 200000, 204000, 208000, 212000, 216000, 220000, 224000, 228000, 232000, 236000, 240000, 244000, 248000, 252000, 256000, 260000, 264000, 268000, 272000, 276000, 280000, 284000, 288000, 292000, 296000, 300000, 304000]\n" + ] + } + ], + "source": [ + "import os\n", + "import json\n", + "from typing import List\n", + "\n", + "def find_missing_ids(folder_path: str) -> List[int]:\n", + " \"\"\"\n", + " Find missing IDs from JSON files in a folder.\n", + "\n", + " Parameters:\n", + " - folder_path (str): The path to the folder containing JSON files.\n", + "\n", + " Returns:\n", + " - List[int]: A list of missing IDs.\n", + " \"\"\"\n", + " ids = []\n", + "\n", + " # Iterate over each file in the folder\n", + " for filename in os.listdir(folder_path):\n", + " if filename.endswith(\".json\"):\n", + " file_path = os.path.join(folder_path, filename)\n", + " with open(file_path, 'r') as file:\n", + " data = json.load(file)\n", + " # Assuming the data is a dictionary with a nested structure\n", + " for entry_id in data.get(\"data\", {}).get(\"api::pa.pa\", {}).keys():\n", + " ids.append(int(entry_id))\n", + "\n", + " # Sort the IDs\n", + " ids.sort()\n", + "\n", + " # Find missing IDs\n", + " missing_ids = [i for i in range(ids[0], ids[-1] + 1) if i not in ids]\n", + "\n", + " return missing_ids\n", + "\n", + "\n", + "# Example usage\n", + "folder_path = \"/Users/sofia/Documents/SkyTruth/chunks1\"\n", + "missing_ids = find_missing_ids(folder_path)\n", + "print(\"Missing IDs:\", missing_ids)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "76" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "missing_ids = list(range(4000, 304001, 4000)) # Ensure missing_ids is defined\n", + "missing_rows = final_table1[final_table1.index.isin(missing_ids)]\n", + "len(missing_rows)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
wdpaidprotection_statusnamedesignationiucn_categoryyearareadata_sourcempaa_establishment_stagempaa_protection_levelbboxcoverageenvironmentlocationchildren
id
40001699631Lough FoyleArea of Special Scientific Interest8199817.2841223NaNNaN[-7.2615, 55.02512, -6.95054, 55.15644]2.936060e-04159.0[]
80009999691Offshore Island; Moke'ehinaSeabird Sanctuary4<NA>0.0260583NaNNaN[-156.5265, 20.98578, -156.52467, 20.9883]2.134853e-071160.0NaN
120005555653401Waitaki BridgeStewardship Area919870.3393193NaNNaN[171.09819, -44.93648, 171.10997, -44.92941]5.054808e-061118.0NaN
160005556555941Colvos Passage Marine PreserveMarine Protected Area320000.0090263NaNNaN[-122.55642, 47.34733, -122.55491, 47.34868]7.394379e-081160.0[]
200003241MochimaNational Park (PN)21973951.6236013NaNNaN[-64.76841, 10.15918, -64.22326, 10.43008]1.038614e-012162.0NaN
................................................
2880005557495991Twin Arch Business Park Section 4 Lots 8-19 & 35Park6<NA>0.0125843NaNNaN[-77.13543, 39.36138, -77.13257, 39.3645]1.329268e-072160.0NaN
2920005557579661ELKO PARKC - Park919580.2238913NaNNaN[-115.12089, 49.29449, -115.11511, 49.30275]2.260619e-06229.0NaN
2960005557681101LiechtbergForest Reserves620230.0377143NaNNaN[8.14625, 47.53048, 8.15053, 47.53406]9.170416e-052179.0NaN
3000005557832641Reserva Natural Dos RiosCivil Society Nature Reserve520230.2458953NaNNaN[-75.61825, 5.75218, -75.61308, 5.75883]2.152766e-05236.0NaN
3040005557877711Anse de Goulven, dunes de KeremmaSite of Community Importance (Habitats Directive)9200420.6380093NaNNaN[-4.31897, 48.62997, -4.19427, 48.67352]3.086233e-03256.0NaN
\n", + "

76 rows × 15 columns

\n", + "
" + ], + "text/plain": [ + " wdpaid protection_status \\\n", + "id \n", + "4000 169963 1 \n", + "8000 999969 1 \n", + "12000 555565340 1 \n", + "16000 555655594 1 \n", + "20000 324 1 \n", + "... ... ... \n", + "288000 555749599 1 \n", + "292000 555757966 1 \n", + "296000 555768110 1 \n", + "300000 555783264 1 \n", + "304000 555787771 1 \n", + "\n", + " name \\\n", + "id \n", + "4000 Lough Foyle \n", + "8000 Offshore Island; Moke'ehina \n", + "12000 Waitaki Bridge \n", + "16000 Colvos Passage Marine Preserve \n", + "20000 Mochima \n", + "... ... \n", + "288000 Twin Arch Business Park Section 4 Lots 8-19 & 35 \n", + "292000 ELKO PARK \n", + "296000 Liechtberg \n", + "300000 Reserva Natural Dos Rios \n", + "304000 Anse de Goulven, dunes de Keremma \n", + "\n", + " designation iucn_category \\\n", + "id \n", + "4000 Area of Special Scientific Interest 8 \n", + "8000 Seabird Sanctuary 4 \n", + "12000 Stewardship Area 9 \n", + "16000 Marine Protected Area 3 \n", + "20000 National Park (PN) 2 \n", + "... ... ... \n", + "288000 Park 6 \n", + "292000 C - Park 9 \n", + "296000 Forest Reserves 6 \n", + "300000 Civil Society Nature Reserve 5 \n", + "304000 Site of Community Importance (Habitats Directive) 9 \n", + "\n", + " year area data_source mpaa_establishment_stage \\\n", + "id \n", + "4000 1998 17.284122 3 NaN \n", + "8000 0.026058 3 NaN \n", + "12000 1987 0.339319 3 NaN \n", + "16000 2000 0.009026 3 NaN \n", + "20000 1973 951.623601 3 NaN \n", + "... ... ... ... ... \n", + "288000 0.012584 3 NaN \n", + "292000 1958 0.223891 3 NaN \n", + "296000 2023 0.037714 3 NaN \n", + "300000 2023 0.245895 3 NaN \n", + "304000 2004 20.638009 3 NaN \n", + "\n", + " mpaa_protection_level bbox \\\n", + "id \n", + "4000 NaN [-7.2615, 55.02512, -6.95054, 55.15644] \n", + "8000 NaN [-156.5265, 20.98578, -156.52467, 20.9883] \n", + "12000 NaN [171.09819, -44.93648, 171.10997, -44.92941] \n", + "16000 NaN [-122.55642, 47.34733, -122.55491, 47.34868] \n", + "20000 NaN [-64.76841, 10.15918, -64.22326, 10.43008] \n", + "... ... ... \n", + "288000 NaN [-77.13543, 39.36138, -77.13257, 39.3645] \n", + "292000 NaN [-115.12089, 49.29449, -115.11511, 49.30275] \n", + "296000 NaN [8.14625, 47.53048, 8.15053, 47.53406] \n", + "300000 NaN [-75.61825, 5.75218, -75.61308, 5.75883] \n", + "304000 NaN [-4.31897, 48.62997, -4.19427, 48.67352] \n", + "\n", + " coverage environment location children \n", + "id \n", + "4000 2.936060e-04 1 59.0 [] \n", + "8000 2.134853e-07 1 160.0 NaN \n", + "12000 5.054808e-06 1 118.0 NaN \n", + "16000 7.394379e-08 1 160.0 [] \n", + "20000 1.038614e-01 2 162.0 NaN \n", + "... ... ... ... ... \n", + "288000 1.329268e-07 2 160.0 NaN \n", + "292000 2.260619e-06 2 29.0 NaN \n", + "296000 9.170416e-05 2 179.0 NaN \n", + "300000 2.152766e-05 2 36.0 NaN \n", + "304000 3.086233e-03 2 56.0 NaN \n", + "\n", + "[76 rows x 15 columns]" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "missing_rows" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/data/src/pipelines/processors.py b/data/src/pipelines/processors.py index 5f1d7b48..6b3c15bb 100644 --- a/data/src/pipelines/processors.py +++ b/data/src/pipelines/processors.py @@ -10,6 +10,9 @@ import asyncio from tqdm.asyncio import tqdm +from itertools import product +from shapely.geometry import box + from data_commons.loader import ( load_regions, @@ -20,6 +23,13 @@ ) from pipelines.utils import background +import logging +logging.basicConfig(level=logging.DEBUG) +logging.getLogger("requests").setLevel(logging.WARNING) +logging.getLogger("urllib3").setLevel(logging.WARNING) +logging.getLogger("fiona").setLevel(logging.WARNING) +logger = logging.getLogger("notebook") + ## DATAFRAME PROCESSORS def filter_by_methodology(df: gpd.GeoDataFrame) -> gpd.GeoDataFrame: @@ -110,26 +120,44 @@ def create_year(df: pd.DataFrame | gpd.GeoDataFrame) -> pd.DataFrame | gpd.GeoDa def split_by_year( - gdf: gpd.GeoDataFrame, year_col: str = "STATUS_YR", year_val: int = 2010 + gdf: gpd.GeoDataFrame, + year_col: str = "STATUS_YR", + year_val: int = 2010, + environment: Literal['marine', 'terrestrial'] = 'marine' ) -> List[gpd.GeoDataFrame]: - """Split data by year. relevant for MPA data.(coverage indicator)""" - prior_2010 = ( - gdf[gdf[year_col] <= year_val] - .dissolve( - by=["PA_DEF", "iso_3"], - aggfunc={ - "PA_DEF": "count", - }, + """ + Split data by year. Relevant for MPA and terrestrial data (coverage indicator). + """ + if environment == 'marine': + prior_2010 = ( + gdf[gdf[year_col] <= year_val] + .dissolve( + by=["PA_DEF", "iso_3"], + aggfunc={"PA_DEF": "count"}, + ) + .assign(year=2010) + .rename(columns={"PA_DEF": "protectedAreasCount"}) + .reset_index() ) - .assign(year=2010) - .rename(columns={"PA_DEF": "protectedAreasCount"}) - .reset_index() - ) - after_2010 = ( - gdf[gdf["STATUS_YR"] > 2010][["iso_3", "STATUS_YR", "PA_DEF", "geometry"]] - .assign(protectedAreasCount=1) - .rename(columns={"STATUS_YR": "year"}) - ) + after_2010 = ( + gdf[gdf[year_col] > year_val][["iso_3", "STATUS_YR", "PA_DEF", "geometry"]] + .assign(protectedAreasCount=1) + .rename(columns={"STATUS_YR": "year"}) + ) + elif environment == 'terrestrial': + prior_2010 = ( + gdf[gdf[year_col] <= year_val][["iso_3", "STATUS_YR", "geometry"]] + .dissolve(by=["iso_3"]) + .assign(year=2010) + .reset_index() + ) + after_2010 = ( + gdf[gdf[year_col] > year_val][["iso_3", "STATUS_YR", "geometry"]] + .rename(columns={"STATUS_YR": "year"}) + ) + else: + raise ValueError("Invalid environment. Must be 'marine' or 'terrestrial'.") + return [prior_2010, after_2010] @@ -480,6 +508,68 @@ def transform_points(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: else: return gdf +### Grid for more efficient processing (applied in terrestrial only) + +def create_grid(bounds: Tuple[float, float, float, float], cell_size: int = 1) -> gpd.GeoDataFrame: + """Create a grid of cells for a given GeoDataFrame""" + minx, miny, maxx, maxy = bounds + x = np.arange(minx, maxx, cell_size) + y = np.arange(miny, maxy, cell_size) + polygons = [ + { + "geometry": box(i, j, i + cell_size, j + cell_size), + "cell_id": f"{i}_{j}", + } + for i, j in product(x, y) + ] + return gpd.GeoDataFrame(polygons) + +def subdivide_grid( + grid_gdf: gpd.GeoDataFrame, gdf: gpd.GeoDataFrame, max_cellsize: float, max_complexity: int +) -> List: + subdivided_elements = [] + for grid_element in grid_gdf.geometry: + candidates = get_matches(grid_element, gdf) + density = len(candidates) + if density > max_complexity: + + subdivision_cellsize = max_cellsize / 2 + # Subdivide the grid element recursively + subgrid = create_grid(grid_element.bounds, subdivision_cellsize) + subdivided_elements.extend( + subdivide_grid(subgrid, gdf, subdivision_cellsize, max_complexity) + ) + elif density > 0: + subdivided_elements.append(grid_element) + + return subdivided_elements + + +def create_density_based_grid( + gdf: gpd.GeoDataFrame, max_cellsize: int = 10, max_complexity: int = 10000 +) -> gpd.GeoDataFrame: + # Get the bounds of the GeoDataFrame + minx, miny, maxx, maxy = gdf.total_bounds + + # Create an initial grid + grid_gdf = create_grid((minx, miny, maxx, maxy), max_cellsize) + + # Subdivide grid elements based on density and complexity + subdivided_elements = subdivide_grid(grid_gdf, gdf, max_cellsize, max_complexity) + + return gpd.GeoDataFrame(geometry=subdivided_elements) + + +def split_gdf_by_grid(gdf: gpd.GeoDataFrame, grid_gdf: gpd.GeoDataFrame): + result = [] + gdf["already_processed"] = False + for geometry in grid_gdf.geometry: + candidates = get_matches(geometry, gdf) + subset = gdf.loc[candidates.index][~gdf["already_processed"]] + gdf.loc[subset.index, "already_processed"] = True + if not subset.empty: + result.append(subset.drop(columns=["already_processed"]).reset_index(drop=True).copy()) + return result ### Spatial joins and dissolves @@ -505,33 +595,91 @@ def arrange_dimensions( ## TODO properly type this ## TODO: generalize the next operations to make them more reusable @background -def spatial_join_chunk(row_small, df_large, pbar): - test_row = gpd.GeoDataFrame([row_small], crs=df_large.crs) - candidates = get_matches(row_small.geometry, df_large.geometry) - if len(candidates) > 0: - subset = df_large.loc[candidates.index] - - result = subset.sjoin(test_row, how="inner").clip(test_row.geometry).reset_index(drop=True) - result.geometry = result.geometry.apply(repair_geometry) - else: - result = gpd.GeoDataFrame(columns=test_row.columns) - pbar.update(1) - return result +def spatial_join_chunk( + param: Union[int, gpd.GeoSeries], + gdf: gpd.GeoDataFrame, + pbar, + environment: Literal['marine', 'terrestrial'] +) -> gpd.GeoDataFrame: + """ + Perform a spatial join chunk based on the environment. + """ + try: + if environment == 'marine': + test_row = gpd.GeoDataFrame([param], crs=gdf.crs) + candidates = get_matches(param.geometry, gdf.geometry) + if len(candidates) > 0: + subset = gdf.loc[candidates.index] + result = ( + gpd.overlay(test_row, subset, how="intersection") + .reset_index(drop=True) + .clip(test_row.geometry) + .reset_index(drop=True) + ) + result.geometry = result.geometry.apply(repair_geometry) + else: + result = gpd.GeoDataFrame(columns=test_row.columns) + elif environment == 'terrestrial': + bbox = param.total_bounds + candidates = get_matches(box(*bbox), gdf.geometry) + if len(candidates) > 0: + subset = gdf.loc[candidates.index].clip(box(*bbox)) + result = ( + gpd.overlay(param, subset, how="intersection") + .reset_index(drop=True) + .clip(subset.geometry) + .reset_index(drop=True) + ) + result.geometry = result.geometry.apply(repair_geometry) + else: + result = gpd.GeoDataFrame(columns=param.columns) + else: + raise ValueError("Invalid environment. Must be 'marine' or 'terrestrial'.") + return result + except Exception as e: + logging.error(e) + return gpd.GeoDataFrame() + finally: + pbar.update(1) async def spatial_join( - geodataframe_a: gpd.GeoDataFrame, geodataframe_b: gpd.GeoDataFrame + geodataframe_a: gpd.GeoDataFrame, + geodataframe_b: gpd.GeoDataFrame, + environment: Literal['marine', 'terrestrial'] ) -> gpd.GeoDataFrame: - """Create spatial join between two GeoDataFrames.""" - # we build the spatial index for the larger GeoDataFrame + """ + Create spatial join between two GeoDataFrames. + """ + # Build the spatial index for the larger GeoDataFrame smaller_dim, larger_dim = arrange_dimensions(geodataframe_a, geodataframe_b) - with tqdm(total=smaller_dim.shape[0]) as pbar: # we create a progress bar - new_df = await asyncio.gather( - *( - spatial_join_chunk(row, larger_dim, pbar) - for row in smaller_dim.itertuples(index=False) + + if environment == 'marine': + with tqdm(total=smaller_dim.shape[0]) as pbar: # Create a progress bar + new_df = await asyncio.gather( + *( + spatial_join_chunk(row, larger_dim, pbar, environment) + for row in smaller_dim.itertuples(index=False) + ) ) - ) + elif environment == 'terrestrial': + logger.info(f"Processing {len(larger_dim)} elements") + + grid = create_density_based_grid(larger_dim, max_cellsize=10, max_complexity=5000) + + logger.info(f"Grid created with {len(grid)} cells") + + list_of_chunks = split_gdf_by_grid(larger_dim, grid) + + logger.info(f"Grid split into {len(list_of_chunks)} chunks") + + with tqdm(total=len(list_of_chunks)) as pbar: # Create a progress bar + new_df = await asyncio.gather( + *(spatial_join_chunk(chunk, smaller_dim, pbar, environment) for chunk in list_of_chunks) + ) + else: + raise ValueError("Invalid environment. Must be 'marine' or 'terrestrial'.") + return gpd.GeoDataFrame(pd.concat(new_df, ignore_index=True), crs=smaller_dim.crs) @@ -561,17 +709,62 @@ async def create_difference(geodataframe1, geodataframe2): @background -def spatial_dissolve_chunk(i, gdf, pbar, _by, _aggfunc): - result = ( - gdf[gdf["year"] <= i] - .dissolve(by=_by, aggfunc=_aggfunc) - .assign(year=i) - .reset_index() - .pipe(calculate_area, "area", None) - .drop(columns=["geometry"]) - ) - pbar.update(1) - return result +def spatial_dissolve_chunk( + param: Union[int, gpd.GeoSeries], + gdf: gpd.GeoDataFrame, + pbar, + environment: Literal['marine', 'terrestrial'], + _by: list[str] = None, + _aggfunc: dict = None, +) -> pd.DataFrame: + """ + Perform a spatial dissolve chunk based on the environment. + """ + try: + if environment == 'marine': + result = ( + gdf[gdf["year"] <= param] + .dissolve(by=_by, aggfunc=_aggfunc) + .assign(year=param) + .reset_index() + .pipe(calculate_area, "area", None) + .drop(columns=["geometry"]) + ) + elif environment == 'terrestrial': + candidates = get_matches(param, gdf.geometry) + subset = gdf.loc[candidates.index] + + result = pd.concat( + subset.clip(param).pipe(split_by_year, year_col="STATUS_YR"), ignore_index=True + ).copy() + + data_chunk = [ + ( + result[result["year"] <= 2010] + .reset_index() + .pipe(calculate_area, "area", None) + .drop(columns=["geometry"]) + ) + ] + for year in range(2011, 2025): + data_chunk.append( + result[result["year"] <= year] + .dissolve(by=["iso_3"]) + .assign(year=year) + .reset_index() + .pipe(calculate_area, "area", None) + .drop(columns=["geometry"]) + ) + + result = pd.concat(data_chunk, ignore_index=True) + else: + raise ValueError("Invalid environment. Must be 'marine' or 'terrestrial'.") + return result + except Exception as e: + logging.error(e) + return gpd.GeoDataFrame() + finally: + pbar.update(1) @background @@ -842,17 +1035,23 @@ def aggregate_area(df: pd.DataFrame) -> pd.DataFrame: async def process_mpa_data( - gdf: gpd.GeoDataFrame, loop: list[int], by: list[str], aggfunc: dict + gdf: gpd.GeoDataFrame, + loop: List[int], + by: List[str], + aggfunc: Dict, + environment: str = 'marine' ) -> pd.DataFrame: - """process protected planet data. relevant for acc coverage extent by year indicator.""" - # we split the data by =< year so we can acumulate the coverage - base = split_by_year(gdf) + """ + Process protected planet data. Relevant for acc coverage extent by year indicator. + """ + # Split the data by year based on the environment + base = split_by_year(gdf, environment=environment) result_to_iter = pd.concat(base, ignore_index=True).copy() - with tqdm(total=len(loop)) as pbar: # we create a progress bar + with tqdm(total=len(loop)) as pbar: # Create a progress bar new_df = await asyncio.gather( - *(spatial_dissolve_chunk(year, result_to_iter, pbar, by, aggfunc) for year in loop) + *(spatial_dissolve_chunk(year, result_to_iter, pbar, by, aggfunc, environment) for year in loop) ) return pd.concat( [base[0].pipe(calculate_area, "area", None).drop(columns=["geometry"]), *new_df], @@ -860,6 +1059,17 @@ async def process_mpa_data( ) +async def process_grid(gdf: gpd.GeoDataFrame, environment: Literal['marine', 'terrestrial']) -> pd.DataFrame: + grid_gdf = create_density_based_grid(gdf, max_cellsize=10, max_complexity=5000) + logger.info(f"grid created with {grid_gdf.shape[0]} cells") + + with tqdm(total=grid_gdf.shape[0], desc="Processing grid elements") as pbar: + jobs = [spatial_dissolve_chunk(geometry, gdf, pbar, environment) for geometry in grid_gdf.geometry.values] + result = await asyncio.gather(*jobs) + return result + + + def process_mpaatlas_data(gdf: gpd.GeoDataFrame) -> pd.DataFrame: return ( gdf.dissolve(by=["protecti_1", "iso_3"], aggfunc={"name": "count"}) From 09568ca2c93c2683ab9434cb1734c941ad9f5fa0 Mon Sep 17 00:00:00 2001 From: sofia Date: Tue, 26 Nov 2024 11:31:02 +0100 Subject: [PATCH 16/16] update habitat section in precalculations notebook --- data/notebooks/habitat2.ipynb | 2295 --------- data/notebooks/habitats.ipynb | 2003 -------- data/notebooks/pipes_mock/precalc_sofia.ipynb | 4355 ----------------- .../pipes_mock/precalculations.ipynb | 870 +--- 4 files changed, 36 insertions(+), 9487 deletions(-) delete mode 100644 data/notebooks/habitat2.ipynb delete mode 100644 data/notebooks/habitats.ipynb delete mode 100644 data/notebooks/pipes_mock/precalc_sofia.ipynb diff --git a/data/notebooks/habitat2.ipynb b/data/notebooks/habitat2.ipynb deleted file mode 100644 index d65f3f71..00000000 --- a/data/notebooks/habitat2.ipynb +++ /dev/null @@ -1,2295 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Set up" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[33mWARNING: The directory '/home/mambauser/.cache/pip' or its parent directory is not owned or is not writable by the current user. The cache has been disabled. Check the permissions and owner of that directory. If executing pip with sudo, you should use sudo's -H flag.\u001b[0m\u001b[33m\n", - "\u001b[0mDefaulting to user installation because normal site-packages is not writeable\n", - "Requirement already satisfied: openpyxl in /opt/conda/lib/python3.12/site-packages (3.1.5)\n", - "Requirement already satisfied: et-xmlfile in /opt/conda/lib/python3.12/site-packages (from openpyxl) (1.1.0)\n" - ] - } - ], - "source": [ - "!pip install openpyxl" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "import geopandas as gpd\n", - "import pandas as pd\n", - "from datetime import datetime" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "path_in = \"../data/raw/\"\n", - "path_out = \"../data/processed/\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Process habitats from [Ocean+](https://habitats.oceanplus.org/) (except mangroves)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Country stats**" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "cold = pd.read_csv(path_in + \"Ocean+HabitatsDownload_Global/coldwatercorals.csv\")\n", - "salt = pd.read_csv(path_in + \"Ocean+HabitatsDownload_Global/saltmarshes.csv\")\n", - "sea = pd.read_csv(path_in + \"Ocean+HabitatsDownload_Global/seagrasses.csv\")\n", - "warm = pd.read_csv(path_in + \"Ocean+HabitatsDownload_Global/warmwatercorals.csv\")\n", - "glob = pd.read_excel(path_in + \"Ocean+HabitatsDownload_Global/global-stats.xlsx\")" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "cold2 = cold[['ISO3', 'protected_area', 'total_area']].rename(columns={'ISO3': 'location_id'})\n", - "salt2 = salt[['ISO3', 'protected_area', 'total_area']].rename(columns={'ISO3': 'location_id'})\n", - "sea2 = sea[['ISO3', 'protected_area', 'total_area']].rename(columns={'ISO3': 'location_id'})\n", - "warm2 = warm[['ISO3', 'protected_area', 'total_area']].rename(columns={'ISO3': 'location_id'})" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "# Remove rows with '-' in 'protected_area' or 'total_area'\n", - "cold2 = cold2[~cold2['protected_area'].str.contains('-') & ~cold2['total_area'].str.contains('-')]\n", - "salt2 = salt2[~salt2['protected_area'].str.contains('-') & ~salt2['total_area'].str.contains('-')]\n", - "sea2 = sea2[~sea2['protected_area'].str.contains('-') & ~sea2['total_area'].str.contains('-')]\n", - "warm2 = warm2[~warm2['protected_area'].str.contains('-') & ~warm2['total_area'].str.contains('-')]" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "# Replace 'ATA' with 'ABNJ' in 'location_id'\n", - "cold2 = cold2.replace('ATA', 'ABNJ')\n", - "salt2 = salt2.replace('ATA', 'ABNJ')\n", - "sea2 = sea2.replace('ATA', 'ABNJ')\n", - "warm2 = warm2.replace('ATA', 'ABNJ')" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Index(['MRGID', 'GEONAME', 'MRGID_TER1', 'POL_TYPE', 'MRGID_SOV1',\n", - " 'TERRITORY1', 'ISO_TER1', 'SOVEREIGN1', 'MRGID_TER2', 'MRGID_SOV2',\n", - " 'TERRITORY2', 'ISO_TER2', 'SOVEREIGN2', 'MRGID_TER3', 'MRGID_SOV3',\n", - " 'TERRITORY3', 'ISO_TER3', 'SOVEREIGN3', 'X_1', 'Y_1', 'MRGID_EEZ',\n", - " 'AREA_KM2', 'ISO_SOV1', 'ISO_SOV2', 'ISO_SOV3', 'UN_SOV1', 'UN_SOV2',\n", - " 'UN_SOV3', 'UN_TER1', 'UN_TER2', 'UN_TER3', 'geometry'],\n", - " dtype='object')" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Bring the eez file to get iso3 and parent_iso equivalences\n", - "eez = gpd.read_file(path_in + \"World_EEZ_v11_20191118/eez_v11.shp\")\n", - "eez.columns" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], - "source": [ - "# Initialize an empty dictionary\n", - "territory_iso = {}\n", - "\n", - "# Iterate over each row in the DataFrame\n", - "for index, row in eez.iterrows():\n", - " # Use TERRITORY1 as the key and ISO_SOV1 as the value\n", - " territory = row['ISO_TER1']\n", - " iso_sov = row['ISO_SOV1']\n", - " \n", - " # Check if the territory is not null or NaN\n", - " if pd.notnull(territory) and pd.notnull(iso_sov):\n", - " territory_iso[territory] = iso_sov\n", - "\n", - "# Add ABNJ\n", - "territory_iso['ABNJ'] = 'ABNJ' # Replace 'ABNJ_DEFAULT' with the desired default value\n" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
location_idprotected_areatotal_area
120UMI9.387856853621669.38785685362166
\n", - "
" - ], - "text/plain": [ - " location_id protected_area total_area\n", - "120 UMI 9.38785685362166 9.38785685362166" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cold2[cold2['location_id']=='UMI']" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "# Create a mapping dictionary for ISO3-PARENT_ISO pairs and modify the 'location_id' column in the habitats dataframes\n", - "cold2['location_id'] = cold2['location_id'].map(territory_iso)\n", - "salt2['location_id'] = salt2['location_id'].map(territory_iso)\n", - "sea2['location_id'] = sea2['location_id'].map(territory_iso)\n", - "warm2['location_id'] = warm2['location_id'].map(territory_iso)" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
location_idprotected_areatotal_area
\n", - "
" - ], - "text/plain": [ - "Empty DataFrame\n", - "Columns: [location_id, protected_area, total_area]\n", - "Index: []" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cold2[cold2['location_id']=='UMI']" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [], - "source": [ - "# Convert the 'protected_area' and 'total_area' columns to numeric\n", - "dataframes = [cold2, salt2, sea2, warm2]\n", - "\n", - "for df in dataframes:\n", - " df['protected_area'] = pd.to_numeric(df['protected_area'])\n", - " df['total_area'] = pd.to_numeric(df['total_area'])" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [], - "source": [ - "# Group by 'location_id' and calculate the sum of 'protected_area' and 'total_area'\n", - "cold2_grouped = cold2.groupby('location_id').sum().reset_index()\n", - "salt2_grouped = salt2.groupby('location_id').sum().reset_index()\n", - "sea2_grouped = sea2.groupby('location_id').sum().reset_index()\n", - "warm2_grouped = warm2.groupby('location_id').sum().reset_index()" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [], - "source": [ - "# Add the 'habitat_name' column\n", - "cold2_grouped['habitat_name'] = 'cold-water corals'\n", - "salt2_grouped['habitat_name'] = 'saltmarshes'\n", - "sea2_grouped['habitat_name'] = 'seagrasses'\n", - "warm2_grouped['habitat_name'] = 'warm-water corals'" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
location_idprotected_areatotal_areahabitat_nameyear
0ABNJ427.0485241893.871282cold-water corals2024
1AGO0.0000003.395671cold-water corals2024
\n", - "
" - ], - "text/plain": [ - " location_id protected_area total_area habitat_name year\n", - "0 ABNJ 427.048524 1893.871282 cold-water corals 2024\n", - "1 AGO 0.000000 3.395671 cold-water corals 2024" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Concatenate the dataframes\n", - "habitats = pd.concat([cold2_grouped, salt2_grouped, sea2_grouped, warm2_grouped])\n", - "habitats['year'] = datetime.now().year\n", - "habitats.head(2)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Global stats**" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
habitat_nameprotected_areatotal_arealocation_idyear
0saltmarsh111638.252564224435.075094GLOB2024
1seagrass74787.449960314001.940600GLOB2024
2warmwater-corals63259.499130149886.974126GLOB2024
4coldwater-corals4400.14084215336.975280GLOB2024
\n", - "
" - ], - "text/plain": [ - " habitat_name protected_area total_area location_id year\n", - "0 saltmarsh 111638.252564 224435.075094 GLOB 2024\n", - "1 seagrass 74787.449960 314001.940600 GLOB 2024\n", - "2 warmwater-corals 63259.499130 149886.974126 GLOB 2024\n", - "4 coldwater-corals 4400.140842 15336.975280 GLOB 2024" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Calculate global stats for habitats\n", - "habitats_global = glob[['habitat','protected_area', 'total_area']].rename(columns={'habitat': 'habitat_name'})\n", - "habitats_global['location_id'] = 'GLOB'\n", - "habitats_global['year'] = datetime.now().year\n", - "habitats_global = habitats_global[habitats_global['habitat_name'] != 'mangroves'] # remove mangroves\n", - "habitats_global" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
habitat_nameprotected_areatotal_arealocation_idyear
0saltmarshes111638.252564224435.075094GLOB2024
1seagrasses74787.449960314001.940600GLOB2024
2warm-water corals63259.499130149886.974126GLOB2024
4cold-water corals4400.14084215336.975280GLOB2024
\n", - "
" - ], - "text/plain": [ - " habitat_name protected_area total_area location_id year\n", - "0 saltmarshes 111638.252564 224435.075094 GLOB 2024\n", - "1 seagrasses 74787.449960 314001.940600 GLOB 2024\n", - "2 warm-water corals 63259.499130 149886.974126 GLOB 2024\n", - "4 cold-water corals 4400.140842 15336.975280 GLOB 2024" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Change the name of the habitats to match the ones in the habitats dataframe\n", - "habitat_name_mapping = {\n", - " 'saltmarsh': 'saltmarshes',\n", - " 'seagrass': 'seagrasses',\n", - " 'warmwater-corals': 'warm-water corals',\n", - " 'coldwater-corals': 'cold-water corals'\n", - "}\n", - "habitats_global['habitat_name'] = habitats_global['habitat_name'].replace(habitat_name_mapping)\n", - "habitats_global" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array(['cold-water corals', 'saltmarshes', 'seagrasses',\n", - " 'warm-water corals'], dtype=object)" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Concatenate the global stats to the habitats dataframe\n", - "habitats = pd.concat([habitats, habitats_global])\n", - "habitats['habitat_name'].unique()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Regions stats**" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [], - "source": [ - "# List of dictionaries for data in Region_ISO3_PP.txt (list of regions used in the Protected Planet database)\n", - "regions_data = [\n", - " {\n", - " 'region_iso': 'AS',\n", - " 'region_name': 'Asia & Pacific',\n", - " 'country_iso_3s': [\n", - " \"AFG\", \"ASM\", \"AUS\", \"BGD\", \"BRN\", \"BTN\", \"CCK\", \"CHN\", \"COK\", \"CXR\", \"FJI\", \"FSM\", \"GUM\", \"HKG\", \"IDN\",\n", - " \"IND\", \"IOT\", \"IRN\", \"JPN\", \"KHM\", \"KIR\", \"KOR\", \"LAO\", \"LKA\", \"MAC\", \"MDV\", \"MHL\", \"MMR\", \"MNG\", \"MNP\",\n", - " \"MYS\", \"NCL\", \"NFK\", \"NIU\", \"NPL\", \"NRU\", \"NZL\", \"PAK\", \"PCN\", \"PHL\", \"PLW\", \"PNG\", \"PRK\", \"PYF\", \"SGP\",\n", - " \"SLB\", \"THA\", \"TKL\", \"TLS\", \"TON\", \"TUV\", \"TWN\", \"VNM\", \"VUT\", \"WLF\", \"WSM\"\n", - " ]\n", - " },\n", - " {\n", - " 'region_iso': 'AF',\n", - " 'region_name': 'Africa',\n", - " 'country_iso_3s': [\n", - " \"AGO\", \"BDI\", \"BEN\", \"BFA\", \"BWA\", \"CAF\", \"CIV\", \"CMR\", \"COD\", \"COG\", \"COM\", \"CPV\", \"DJI\", \"DZA\", \"EGY\",\n", - " \"ERI\", \"ESH\", \"ETH\", \"GAB\", \"GHA\", \"GIN\", \"GMB\", \"GNB\", \"GNQ\", \"KEN\", \"LBR\", \"LBY\", \"LSO\", \"MAR\", \"MDG\",\n", - " \"MLI\", \"MOZ\", \"MRT\", \"MUS\", \"MWI\", \"MYT\", \"NAM\", \"NER\", \"NGA\", \"REU\", \"RWA\", \"SDN\", \"SEN\", \"SHN\", \"SLE\",\n", - " \"SOM\", \"SSD\", \"STP\", \"SWZ\", \"SYC\", \"TCD\", \"TGO\", \"TUN\", \"TZA\", \"UGA\", \"ZAF\", \"ZMB\", \"ZWE\"\n", - " ]\n", - " },\n", - " {\n", - " 'region_iso': 'EU',\n", - " 'region_name': 'Europe',\n", - " 'country_iso_3s': [\n", - " \"ALA\", \"ALB\", \"AND\", \"ARM\", \"AUT\", \"AZE\", \"BEL\", \"BGR\", \"BIH\", \"BLR\", \"CHE\", \"CYP\", \"CZE\", \"DEU\", \"DNK\",\n", - " \"ESP\", \"EST\", \"FIN\", \"FRA\", \"FRO\", \"GBR\", \"GEO\", \"GGY\", \"GIB\", \"GRC\", \"HRV\", \"HUN\", \"IMN\", \"IRL\", \"ISL\",\n", - " \"ISR\", \"ITA\", \"JEY\", \"KAZ\", \"KGZ\", \"LIE\", \"LTU\", \"LUX\", \"LVA\", \"MCO\", \"MDA\", \"MKD\", \"MLT\", \"MNE\", \"NLD\",\n", - " \"NOR\", \"POL\", \"PRT\", \"ROU\", \"RUS\", \"SJM\", \"SMR\", \"SRB\", \"SVK\", \"SVN\", \"SWE\", \"TJK\", \"TKM\", \"TUR\", \"UKR\",\n", - " \"UZB\", \"VAT\"\n", - " ]\n", - " },\n", - " {\n", - " 'region_iso': 'SA',\n", - " 'region_name': 'Latin America & Caribbean',\n", - " 'country_iso_3s': [\n", - " \"ABW\", \"AIA\", \"ARG\", \"ATG\", \"BES\", \"BHS\", \"BLM\", \"BLZ\", \"BMU\", \"BOL\", \"BRA\", \"BRB\", \"CHL\", \"COL\", \"CRI\",\n", - " \"CUB\", \"CUW\", \"CYM\", \"DMA\", \"DOM\", \"ECU\", \"FLK\", \"GLP\", \"GRD\", \"GTM\", \"GUF\", \"GUY\", \"HND\", \"HTI\", \"JAM\",\n", - " \"KNA\", \"LCA\", \"MAF\", \"MEX\", \"MSR\", \"MTQ\", \"NIC\", \"PAN\", \"PER\", \"PRI\", \"PRY\", \"SLV\", \"SUR\", \"SXM\", \"TCA\",\n", - " \"TTO\", \"UMI\", \"URY\", \"VCT\", \"VEN\", \"VGB\", \"VIR\"\n", - " ]\n", - " },\n", - " {\n", - " 'region_iso': 'PO',\n", - " 'region_name': 'Polar',\n", - " 'country_iso_3s': [\n", - " \"ATF\", \"BVT\", \"GRL\", \"HMD\", \"SGS\"\n", - " ]\n", - " },\n", - " {\n", - " 'region_iso': 'NA',\n", - " 'region_name': 'North America',\n", - " 'country_iso_3s': [\n", - " \"CAN\", \"SPM\", \"USA\"\n", - " ]\n", - " },\n", - " \n", - " {\n", - " 'region_iso': 'WA',\n", - " 'region_name': 'West Asia',\n", - " 'country_iso_3s': [\n", - " \"ARE\", \"BHR\", \"IRQ\", \"JOR\", \"KWT\", \"LBN\", \"OMN\", \"PSE\", \"QAT\", \"SAU\", \"SYR\", \"YEM\"\n", - " ]\n", - " }\n", - "]\n", - "\n", - "# Convert the region data to a dictionary that maps each country to its region name\n", - "country_to_region = {}\n", - "for region in regions_data:\n", - " for country in region['country_iso_3s']:\n", - " country_to_region[country] = region['region_iso']" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
location_idhabitat_nameprotected_areatotal_areayear
0AFcold-water corals29.477984377.6059592024
1AFsaltmarshes6688.70287919847.7574982024
2AFseagrasses6319.09949163472.0687922024
3AFwarm-water corals6591.34008315615.1936292024
4AScold-water corals428.3579481714.5593842024
5ASsaltmarshes11965.69391044702.8051872024
6ASseagrasses29091.313202123320.7277982024
7ASwarm-water corals41328.384526100117.4157922024
8EUcold-water corals2665.9295177307.5011172024
9EUsaltmarshes11399.88231818450.5500922024
10EUseagrasses9767.76058116552.5895552024
11EUwarm-water corals4357.9310189873.2102372024
12NAcold-water corals438.7397162393.7043412024
13NAsaltmarshes57209.60317687048.1644942024
14NAseagrasses8800.52079415860.8997572024
15NAwarm-water corals3824.8169394717.3880572024
16SAcold-water corals225.3435131406.8634662024
17SAsaltmarshes22969.81590635983.3927442024
18SAseagrasses16517.09766745847.4594122024
19SAwarm-water corals5425.03653412697.4709202024
20WAcold-water corals0.00000012.9707052024
21WAsaltmarshes1402.95876218398.0335362024
22WAseagrasses1053.44867325348.6062582024
23WAwarm-water corals547.9289574903.2303952024
\n", - "
" - ], - "text/plain": [ - " location_id habitat_name protected_area total_area year\n", - "0 AF cold-water corals 29.477984 377.605959 2024\n", - "1 AF saltmarshes 6688.702879 19847.757498 2024\n", - "2 AF seagrasses 6319.099491 63472.068792 2024\n", - "3 AF warm-water corals 6591.340083 15615.193629 2024\n", - "4 AS cold-water corals 428.357948 1714.559384 2024\n", - "5 AS saltmarshes 11965.693910 44702.805187 2024\n", - "6 AS seagrasses 29091.313202 123320.727798 2024\n", - "7 AS warm-water corals 41328.384526 100117.415792 2024\n", - "8 EU cold-water corals 2665.929517 7307.501117 2024\n", - "9 EU saltmarshes 11399.882318 18450.550092 2024\n", - "10 EU seagrasses 9767.760581 16552.589555 2024\n", - "11 EU warm-water corals 4357.931018 9873.210237 2024\n", - "12 NA cold-water corals 438.739716 2393.704341 2024\n", - "13 NA saltmarshes 57209.603176 87048.164494 2024\n", - "14 NA seagrasses 8800.520794 15860.899757 2024\n", - "15 NA warm-water corals 3824.816939 4717.388057 2024\n", - "16 SA cold-water corals 225.343513 1406.863466 2024\n", - "17 SA saltmarshes 22969.815906 35983.392744 2024\n", - "18 SA seagrasses 16517.097667 45847.459412 2024\n", - "19 SA warm-water corals 5425.036534 12697.470920 2024\n", - "20 WA cold-water corals 0.000000 12.970705 2024\n", - "21 WA saltmarshes 1402.958762 18398.033536 2024\n", - "22 WA seagrasses 1053.448673 25348.606258 2024\n", - "23 WA warm-water corals 547.928957 4903.230395 2024" - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Add regions field\n", - "habitats_regions = habitats.copy()\n", - "habitats_regions['region'] = habitats['location_id'].map(country_to_region)\n", - "\n", - "# Calculate stats for each region\n", - "habitats_regions = habitats_regions.groupby(['region', 'habitat_name']).agg({'protected_area': 'sum', 'total_area': 'sum'}).reset_index()\n", - "habitats_regions['year'] = datetime.now().year\n", - "habitats_regions.rename(columns={'region': 'location_id'}, inplace=True)\n", - "habitats_regions" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [], - "source": [ - "# Concatenate region statistics to the habitats dataframe\n", - "habitats = pd.concat([habitats, habitats_regions])" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
location_idprotected_areatotal_areahabitat_nameyearenvironment
0ABNJ427.0485241893.871282cold-water corals2024marine
1AGO0.0000003.395671cold-water corals2024marine
\n", - "
" - ], - "text/plain": [ - " location_id protected_area total_area habitat_name year \\\n", - "0 ABNJ 427.048524 1893.871282 cold-water corals 2024 \n", - "1 AGO 0.000000 3.395671 cold-water corals 2024 \n", - "\n", - " environment \n", - "0 marine \n", - "1 marine " - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Add environment\n", - "habitats['environment'] = 'marine'\n", - "habitats.head(2)" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [], - "source": [ - "habitats.to_csv(path_out + \"habitats/ocean+_processed.csv\", index=False)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Process seamounts from [UN WCMC](https://data.unep-wcmc.org/datasets/41)" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [], - "source": [ - "# Read required data\n", - "seamounts = gpd.read_file(path_in + \"Seamounts/seamounts.shp\")\n", - "eez = gpd.read_file(path_out + \"/administrative/eez_area_mollweide.shp\")\n", - "hs = gpd.read_file(path_in + \"/high_seas/high_seas.shp\")\n", - "protected_areas = gpd.read_file(path_out + \"wdpa/timeseries/protected_dissolved_2023.shp\").to_crs(\"EPSG:4326\")" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [], - "source": [ - "# Keep relevant fields in eez and hs and merge them in one dataframe\n", - "eez = eez[['SOVEREIGN1', 'SOVEREIGN2', 'SOVEREIGN3','ISO_SOV1', 'ISO_SOV2', 'ISO_SOV3', 'geometry']]\n", - "hs = hs[['geometry']]\n", - "hs['SOVEREIGN1'] = 'High Seas'\n", - "hs['ISO_SOV1'] = 'ABNJ'\n", - "eez_hs = eez.merge(hs, how='outer')" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [], - "source": [ - "# Join eez/highseas info to seamounts falling within eez polygons and drop those not associated with any of them\n", - "seamounts_eez = gpd.sjoin(seamounts, eez_hs, how=\"left\", predicate=\"within\")\n", - "seamounts_eez = seamounts_eez.dropna(subset=['ISO_SOV1'])" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": {}, - "outputs": [], - "source": [ - "# Create new column \"iso\" with the iso_sov codes from eez/hs data\n", - "def concatenate_iso(row):\n", - " iso_list = [row['ISO_SOV1']]\n", - " if not pd.isna(row['ISO_SOV2']):\n", - " iso_list.append(row['ISO_SOV2'])\n", - " if not pd.isna(row['ISO_SOV3']):\n", - " iso_list.append(row['ISO_SOV3'])\n", - " return ';'.join(iso_list)\n", - "\n", - "seamounts_eez['iso'] = seamounts_eez.apply(concatenate_iso, axis=1)" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": {}, - "outputs": [], - "source": [ - "# Check which seamounts are protected\n", - "seamounts_wdpa = gpd.sjoin(seamounts, protected_areas, how=\"left\", predicate=\"within\")\n", - "seamounts_wdpa['protection'] = \"no\" \n", - "seamounts_wdpa.loc[~seamounts_wdpa['index_right'].isna(), 'protection'] = \"yes\"\n", - "\n", - "# Keep relevant fields\n", - "seamounts_wdpa = seamounts_wdpa[['PEAKID', 'protection']]" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
PEAKIDisoAREA2Dprotectiongeometry
026000DNK982.028337noPOINT (2.76250 84.97974)
126157ABNJ348.473055noPOINT (9.14306 84.93529)
226158ABNJ367.540380noPOINT (9.18333 84.93807)
326228ABNJ299.443636noPOINT (8.74861 84.90751)
426229ABNJ309.588492noPOINT (8.88750 84.91307)
\n", - "
" - ], - "text/plain": [ - " PEAKID iso AREA2D protection geometry\n", - "0 26000 DNK 982.028337 no POINT (2.76250 84.97974)\n", - "1 26157 ABNJ 348.473055 no POINT (9.14306 84.93529)\n", - "2 26158 ABNJ 367.540380 no POINT (9.18333 84.93807)\n", - "3 26228 ABNJ 299.443636 no POINT (8.74861 84.90751)\n", - "4 26229 ABNJ 309.588492 no POINT (8.88750 84.91307)" - ] - }, - "execution_count": 33, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Merge information about protection with seamounts_eez and keep only relevant fields\n", - "seamounts_eez_protection = seamounts_eez.merge(seamounts_wdpa, how=\"left\", on='PEAKID')[['PEAKID', 'iso', 'AREA2D', 'protection', 'geometry']]\n", - "seamounts_eez_protection.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
PEAKIDisoAREA2Dprotectiongeometry
\n", - "
" - ], - "text/plain": [ - "Empty GeoDataFrame\n", - "Columns: [PEAKID, iso, AREA2D, protection, geometry]\n", - "Index: []" - ] - }, - "execution_count": 34, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# All seamounts that have iso \"ATA\" should have iso \"ABNJ\"\n", - "seamounts_eez_protection.loc[seamounts_eez_protection['iso']=='ATA', 'iso'] = 'ABNJ'\n", - "seamounts_eez_protection[seamounts_eez_protection['iso']=='ATA']" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Global stats**" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
habitat_nametotal_areaprotected_arealocation_idyear
0seamounts2.690810e+073.426630e+06GLOB2011
\n", - "
" - ], - "text/plain": [ - " habitat_name total_area protected_area location_id year\n", - "0 seamounts 2.690810e+07 3.426630e+06 GLOB 2011" - ] - }, - "execution_count": 35, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Calculate the total_area (sum of AREA2D for all seamounts)\n", - "total_area = seamounts_eez_protection['AREA2D'].sum()\n", - "\n", - "# Calculate the protected_area (sum of AREA2D for seamounts where 'protection' is 'yes')\n", - "protected_area = seamounts_eez_protection.loc[seamounts_eez_protection['protection'] == 'yes', 'AREA2D'].sum()\n", - "\n", - "# Create a DataFrame with the results\n", - "seamounts_global = pd.DataFrame({\n", - " 'habitat_name': ['seamounts'],\n", - " 'total_area': [total_area],\n", - " 'protected_area': [protected_area],\n", - " 'location_id': ['GLOB'],\n", - " 'year': [2011]\n", - "})\n", - "\n", - "seamounts_global\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Country stats**" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "metadata": {}, - "outputs": [], - "source": [ - "# Split the 'iso_code' values and create separate rows only for those with multiple values\n", - "mask = seamounts_eez_protection['iso'].str.contains(';', na=False)\n", - "split_rows = seamounts_eez_protection[mask].copy()\n", - "split_rows['iso'] = split_rows['iso'].str.split(';')\n", - "split_rows = split_rows.explode('iso')\n", - "\n", - "# Keep rows with single values in 'iso_code'\n", - "single_value_rows = seamounts_eez_protection[~mask]\n", - "\n", - "# Concatenate the exploded rows with the single value rows\n", - "seamounts_eez_iso = pd.concat([single_value_rows, split_rows], ignore_index=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "metadata": {}, - "outputs": [], - "source": [ - "# Calculate the total_area (sum of AREA2D for all seamounts)\n", - "total_area_iso = seamounts_eez_iso.groupby(['iso']).agg({'AREA2D': 'sum'}).reset_index().rename(columns={'AREA2D': 'total_area'})\n", - "\n", - "# Calculate the protected_area_iso (sum of AREA2D for seamounts where 'protection' is 'yes' grouped by 'iso')\n", - "protected_area_iso = seamounts_eez_iso.loc[seamounts_eez_iso['protection'] == 'yes'].groupby('iso')['AREA2D'].sum().reset_index().rename(columns={'AREA2D': 'protected_area'})" - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
location_idtotal_areaprotected_areahabitat_nameyear
0ABNJ1.518615e+07308819.904730seamounts2011
1AGO9.556242e+03NaNseamounts2011
2ARG3.110730e+05303902.727433seamounts2011
3ATG6.215895e+03NaNseamounts2011
4AUS4.772977e+05250507.827932seamounts2011
..................
87VNM4.421338e+04NaNseamounts2011
88VUT1.199475e+0543501.694036seamounts2011
89WSM4.117997e+04NaNseamounts2011
90YEM6.294974e+042487.428050seamounts2011
91ZAF9.946306e+0441753.168421seamounts2011
\n", - "

92 rows × 5 columns

\n", - "
" - ], - "text/plain": [ - " location_id total_area protected_area habitat_name year\n", - "0 ABNJ 1.518615e+07 308819.904730 seamounts 2011\n", - "1 AGO 9.556242e+03 NaN seamounts 2011\n", - "2 ARG 3.110730e+05 303902.727433 seamounts 2011\n", - "3 ATG 6.215895e+03 NaN seamounts 2011\n", - "4 AUS 4.772977e+05 250507.827932 seamounts 2011\n", - ".. ... ... ... ... ...\n", - "87 VNM 4.421338e+04 NaN seamounts 2011\n", - "88 VUT 1.199475e+05 43501.694036 seamounts 2011\n", - "89 WSM 4.117997e+04 NaN seamounts 2011\n", - "90 YEM 6.294974e+04 2487.428050 seamounts 2011\n", - "91 ZAF 9.946306e+04 41753.168421 seamounts 2011\n", - "\n", - "[92 rows x 5 columns]" - ] - }, - "execution_count": 38, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Merge total_area_iso and protected_area_iso DataFrames on 'iso'\n", - "seamounts_iso = total_area_iso.merge(protected_area_iso, how='left', on='iso').rename(columns={'iso': 'location_id'})\n", - "seamounts_iso['habitat_name'] = 'seamounts'\n", - "seamounts_iso['year'] = 2011\n", - "seamounts_iso" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Regions stats**" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
location_idhabitat_nameprotected_areatotal_areayear
0AFseamounts1.733576e+056.162351e+052011
1ASseamounts1.114013e+065.433433e+062011
2EUseamounts7.472441e+052.641119e+062011
3NAseamounts5.544910e+051.664794e+062011
4SAseamounts8.474488e+051.655552e+062011
5WAseamounts2.487428e+039.384765e+042011
\n", - "
" - ], - "text/plain": [ - " location_id habitat_name protected_area total_area year\n", - "0 AF seamounts 1.733576e+05 6.162351e+05 2011\n", - "1 AS seamounts 1.114013e+06 5.433433e+06 2011\n", - "2 EU seamounts 7.472441e+05 2.641119e+06 2011\n", - "3 NA seamounts 5.544910e+05 1.664794e+06 2011\n", - "4 SA seamounts 8.474488e+05 1.655552e+06 2011\n", - "5 WA seamounts 2.487428e+03 9.384765e+04 2011" - ] - }, - "execution_count": 39, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "seamounts_regions = seamounts_iso.copy()\n", - "seamounts_regions['region'] = seamounts_regions['location_id'].map(country_to_region)\n", - "\n", - "# Calculate stats for each region\n", - "seamounts_regions = seamounts_regions.groupby(['region', 'habitat_name']).agg({'protected_area': 'sum', 'total_area': 'sum'}).reset_index()\n", - "seamounts_regions['year'] = 2011\n", - "seamounts_regions.rename(columns={'region': 'location_id'}, inplace=True)\n", - "seamounts_regions" - ] - }, - { - "cell_type": "code", - "execution_count": 40, - "metadata": {}, - "outputs": [], - "source": [ - "# Concatenate region and global stats to seamounts_iso2\n", - "seamounts_all = pd.concat([seamounts_iso, seamounts_regions, seamounts_global])" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "metadata": {}, - "outputs": [], - "source": [ - "# Add environment\n", - "seamounts_all['environment'] = 'marine'" - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "metadata": {}, - "outputs": [], - "source": [ - "seamounts_all.to_csv(path_out + \"habitats/seamounts_processed.csv\", index=False)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Process mangroves from GMW" - ] - }, - { - "cell_type": "code", - "execution_count": 43, - "metadata": {}, - "outputs": [], - "source": [ - "mangroves = pd.read_csv(path_in + \"mangroves/mangroves.csv\")" - ] - }, - { - "cell_type": "code", - "execution_count": 44, - "metadata": {}, - "outputs": [], - "source": [ - "# Change location_id to match parent_iso from eez\n", - "mangroves['location_id'] = mangroves['location_id'].map(territory_iso)" - ] - }, - { - "cell_type": "code", - "execution_count": 45, - "metadata": {}, - "outputs": [], - "source": [ - "mangroves_iso = mangroves.groupby('location_id').agg({\n", - " 'habitat_name': 'first', \n", - " 'year': 'first', \n", - " 'protected_area': 'sum', \n", - " 'total_area': 'sum' \n", - "}).reset_index()\n" - ] - }, - { - "cell_type": "code", - "execution_count": 46, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
habitat_nameprotected_areatotal_arealocation_idyear
0mangroves61287.20375147358.990971GLOB2020
\n", - "
" - ], - "text/plain": [ - " habitat_name protected_area total_area location_id year\n", - "0 mangroves 61287.20375 147358.990971 GLOB 2020" - ] - }, - "execution_count": 46, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Calculate global stats for mangroves\n", - "mangroves_global = mangroves_iso.groupby(['habitat_name']).agg({'protected_area': 'sum', 'total_area': 'sum'}).reset_index()\n", - "mangroves_global['location_id'] = 'GLOB'\n", - "mangroves_global['year'] = 2020\n", - "mangroves_global" - ] - }, - { - "cell_type": "code", - "execution_count": 47, - "metadata": {}, - "outputs": [], - "source": [ - "# Concatenate the global stats to the mangroves dataframe\n", - "mangroves_all = pd.concat([mangroves_iso, mangroves_global])" - ] - }, - { - "cell_type": "code", - "execution_count": 48, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
location_idhabitat_nameprotected_areatotal_areayear
0AFmangroves10000.5300029337.6440452020
1ASmangroves21277.2200074292.6731462020
2EUmangroves732.143751246.1896772020
3NAmangroves2097.740002415.4185572020
4SAmangroves27151.7400039893.4446082020
5WAmangroves27.83000173.6209382020
\n", - "
" - ], - "text/plain": [ - " location_id habitat_name protected_area total_area year\n", - "0 AF mangroves 10000.53000 29337.644045 2020\n", - "1 AS mangroves 21277.22000 74292.673146 2020\n", - "2 EU mangroves 732.14375 1246.189677 2020\n", - "3 NA mangroves 2097.74000 2415.418557 2020\n", - "4 SA mangroves 27151.74000 39893.444608 2020\n", - "5 WA mangroves 27.83000 173.620938 2020" - ] - }, - "execution_count": 48, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "mangroves_regions = mangroves_iso.copy()\n", - "mangroves_regions['region'] = mangroves_iso['location_id'].map(country_to_region)\n", - "\n", - "# Calculate stats for each region\n", - "mangroves_regions = mangroves_regions.groupby(['region', 'habitat_name']).agg({'protected_area': 'sum', 'total_area': 'sum'}).reset_index()\n", - "mangroves_regions['year'] = 2020\n", - "mangroves_regions.rename(columns={'region': 'location_id'}, inplace=True)\n", - "mangroves_regions" - ] - }, - { - "cell_type": "code", - "execution_count": 49, - "metadata": {}, - "outputs": [], - "source": [ - "# Concatenate stats for regions with mangroves\n", - "mangroves_all = pd.concat([mangroves_all, mangroves_regions])" - ] - }, - { - "cell_type": "code", - "execution_count": 50, - "metadata": {}, - "outputs": [], - "source": [ - "# Add environment\n", - "mangroves_all['environment'] = 'marine'" - ] - }, - { - "cell_type": "code", - "execution_count": 51, - "metadata": {}, - "outputs": [], - "source": [ - "# Save file\n", - "mangroves_all.to_csv(path_out + \"habitats/mangroves_processed.csv\", index=False)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Concatenate all habitats" - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
location_idprotected_areatotal_areahabitat_nameyearenvironment
0ABNJ427.0485241893.871282cold-water corals2024marine
1AGO0.0000003.395671cold-water corals2024marine
2ALB0.0000005.986479cold-water corals2024marine
3ARG6.98422661.826344cold-water corals2024marine
4ATG0.0000000.997747cold-water corals2024marine
.....................
1AS21277.22000074292.673146mangroves2020marine
2EU732.1437501246.189677mangroves2020marine
3NA2097.7400002415.418557mangroves2020marine
4SA27151.74000039893.444608mangroves2020marine
5WA27.830000173.620938mangroves2020marine
\n", - "

614 rows × 6 columns

\n", - "
" - ], - "text/plain": [ - " location_id protected_area total_area habitat_name year \\\n", - "0 ABNJ 427.048524 1893.871282 cold-water corals 2024 \n", - "1 AGO 0.000000 3.395671 cold-water corals 2024 \n", - "2 ALB 0.000000 5.986479 cold-water corals 2024 \n", - "3 ARG 6.984226 61.826344 cold-water corals 2024 \n", - "4 ATG 0.000000 0.997747 cold-water corals 2024 \n", - ".. ... ... ... ... ... \n", - "1 AS 21277.220000 74292.673146 mangroves 2020 \n", - "2 EU 732.143750 1246.189677 mangroves 2020 \n", - "3 NA 2097.740000 2415.418557 mangroves 2020 \n", - "4 SA 27151.740000 39893.444608 mangroves 2020 \n", - "5 WA 27.830000 173.620938 mangroves 2020 \n", - "\n", - " environment \n", - "0 marine \n", - "1 marine \n", - "2 marine \n", - "3 marine \n", - "4 marine \n", - ".. ... \n", - "1 marine \n", - "2 marine \n", - "3 marine \n", - "4 marine \n", - "5 marine \n", - "\n", - "[614 rows x 6 columns]" - ] - }, - "execution_count": 52, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Concatenate the dataframes\n", - "habitats_all = pd.concat([habitats, seamounts_all, mangroves_all])\n", - "habitats_all" - ] - }, - { - "cell_type": "code", - "execution_count": 55, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
location_idprotected_areatotal_areahabitat_nameyearenvironment
0ABNJ427.0485241.893871e+03cold-water corals2024marine
0ABNJ0.0000006.335727e+03seagrasses2024marine
0ABNJ308819.9047301.518615e+07seamounts2011marine
\n", - "
" - ], - "text/plain": [ - " location_id protected_area total_area habitat_name year \\\n", - "0 ABNJ 427.048524 1.893871e+03 cold-water corals 2024 \n", - "0 ABNJ 0.000000 6.335727e+03 seagrasses 2024 \n", - "0 ABNJ 308819.904730 1.518615e+07 seamounts 2011 \n", - "\n", - " environment \n", - "0 marine \n", - "0 marine \n", - "0 marine " - ] - }, - "execution_count": 55, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "habitats_all[habitats_all['location_id'] == 'ABNJ']" - ] - }, - { - "cell_type": "code", - "execution_count": 56, - "metadata": {}, - "outputs": [], - "source": [ - "habitats_all.to_csv(path_out + \"tables/habitats6.csv\", index=False, na_rep='NaN', encoding='utf-8', sep=',', decimal='.')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Terrestrial habitats" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/data/notebooks/habitats.ipynb b/data/notebooks/habitats.ipynb deleted file mode 100644 index ce49ba1a..00000000 --- a/data/notebooks/habitats.ipynb +++ /dev/null @@ -1,2003 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Set up" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Collecting openpyxl\n", - " Downloading openpyxl-3.1.2-py2.py3-none-any.whl (249 kB)\n", - "\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m250.0/250.0 kB\u001b[0m \u001b[31m1.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m[36m0:00:01\u001b[0m[36m0:00:01\u001b[0m:01\u001b[0m\n", - "\u001b[?25hCollecting et-xmlfile (from openpyxl)\n", - " Downloading et_xmlfile-1.1.0-py3-none-any.whl (4.7 kB)\n", - "Installing collected packages: et-xmlfile, openpyxl\n", - "Successfully installed et-xmlfile-1.1.0 openpyxl-3.1.2\n" - ] - } - ], - "source": [ - "!pip install openpyxl" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import geopandas as gpd\n", - "import pandas as pd\n", - "import openpyxl\n", - "from datetime import datetime" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "path_in = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/raw/\"\n", - "path_out = \"/Users/sofia/Documents/Repos/skytruth-30x30/data/data/processed/\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Process habitats from [Ocean+](https://habitats.oceanplus.org/) (except mangroves)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "cold = pd.read_csv(path_in + \"Ocean+HabitatsDownload_Global/coldwatercorals.csv\")\n", - "salt = pd.read_csv(path_in + \"Ocean+HabitatsDownload_Global/saltmarshes.csv\")\n", - "sea = pd.read_csv(path_in + \"Ocean+HabitatsDownload_Global/seagrasses.csv\")\n", - "warm = pd.read_csv(path_in + \"Ocean+HabitatsDownload_Global/warmwatercorals.csv\")\n", - "glob = pd.read_excel(path_in + \"Ocean+HabitatsDownload_Global/global-stats.xlsx\")" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "cold2 = cold[['ISO3', 'protected_area', 'total_area']].rename(columns={'ISO3': 'location_id'})\n", - "salt2 = salt[['ISO3', 'protected_area', 'total_area']].rename(columns={'ISO3': 'location_id'})\n", - "sea2 = sea[['ISO3', 'protected_area', 'total_area']].rename(columns={'ISO3': 'location_id'})\n", - "warm2 = warm[['ISO3', 'protected_area', 'total_area']].rename(columns={'ISO3': 'location_id'})" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "# Remove rows with '-' in 'protected_area' or 'total_area'\n", - "cold2 = cold2[~cold2['protected_area'].str.contains('-') & ~cold2['total_area'].str.contains('-')]\n", - "salt2 = salt2[~salt2['protected_area'].str.contains('-') & ~salt2['total_area'].str.contains('-')]\n", - "sea2 = sea2[~sea2['protected_area'].str.contains('-') & ~sea2['total_area'].str.contains('-')]\n", - "warm2 = warm2[~warm2['protected_area'].str.contains('-') & ~warm2['total_area'].str.contains('-')]\n" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "# Bring the wdpa file to get the iso3 and parent_iso equivalences\n", - "wdpa = gpd.read_file(path_out + \"wdpa/merged_mpa.shp\")\n", - "\n", - "# Filter out rows with multiple values in either 'ISO3' or 'PARENT_ISO'\n", - "wdpa = wdpa[~wdpa['ISO3'].str.contains(';') & ~wdpa['PARENT_ISO'].str.contains(';')]\n", - "\n", - "# Extract unique ISO3-PARENT_ISO pairs\n", - "unique_pairs = wdpa[['ISO3', 'PARENT_ISO']].drop_duplicates()" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "# Create a mapping dictionary for ISO3-PARENT_ISO pairs and modify the 'location_id' column in the habitats dataframes\n", - "mapping_dict = dict(zip(unique_pairs['ISO3'], unique_pairs['PARENT_ISO']))\n", - "cold2['location_id'] = cold2['location_id'].map(mapping_dict)\n", - "salt2['location_id'] = salt2['location_id'].map(mapping_dict)\n", - "sea2['location_id'] = sea2['location_id'].map(mapping_dict)\n", - "warm2['location_id'] = warm2['location_id'].map(mapping_dict)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "# Group by 'location_id' and calculate the sum of 'protected_area' and 'total_area'\n", - "cold2_grouped = cold2.groupby('location_id').sum().reset_index()\n", - "salt2_grouped = salt2.groupby('location_id').sum().reset_index()\n", - "sea2_grouped = sea2.groupby('location_id').sum().reset_index()\n", - "warm2_grouped = warm2.groupby('location_id').sum().reset_index()" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "# Add the 'habitat_name' column\n", - "cold2_grouped['habitat_name'] = 'cold-water corals'\n", - "salt2_grouped['habitat_name'] = 'saltmarshes'\n", - "sea2_grouped['habitat_name'] = 'seagrasses'\n", - "warm2_grouped['habitat_name'] = 'warm-water corals'" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
location_idprotected_areatotal_areahabitat_nameyear
0ABNJ421.6293726799041874.98221422617cold-water corals2023
1AGO03.39567053773998cold-water corals2023
\n", - "
" - ], - "text/plain": [ - " location_id protected_area total_area habitat_name year\n", - "0 ABNJ 421.629372679904 1874.98221422617 cold-water corals 2023\n", - "1 AGO 0 3.39567053773998 cold-water corals 2023" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Concatenate the dataframes\n", - "habitats = pd.concat([cold2_grouped, salt2_grouped, sea2_grouped, warm2_grouped])\n", - "habitats['year'] = datetime.now().year\n", - "habitats.head(2)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
habitat_nameprotected_areatotal_arealocation_idyear
0saltmarsh111638.252564224435.075094GLOB2023
1seagrass74787.449960314001.940600GLOB2023
2warmwater-corals63259.499130149886.974126GLOB2023
4coldwater-corals4400.14084215336.975280GLOB2023
\n", - "
" - ], - "text/plain": [ - " habitat_name protected_area total_area location_id year\n", - "0 saltmarsh 111638.252564 224435.075094 GLOB 2023\n", - "1 seagrass 74787.449960 314001.940600 GLOB 2023\n", - "2 warmwater-corals 63259.499130 149886.974126 GLOB 2023\n", - "4 coldwater-corals 4400.140842 15336.975280 GLOB 2023" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Calculate global stats for habitats\n", - "habitats_global = glob[['habitat','protected_area', 'total_area']].rename(columns={'habitat': 'habitat_name'})\n", - "habitats_global['location_id'] = 'GLOB'\n", - "habitats_global['year'] = datetime.now().year\n", - "habitats_global = habitats_global[habitats_global['habitat_name'] != 'mangroves'] # remove mangroves\n", - "habitats_global" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
habitat_nameprotected_areatotal_arealocation_idyear
0saltmarshes111638.252564224435.075094GLOB2023
1seagrasses74787.449960314001.940600GLOB2023
2warm-water corals63259.499130149886.974126GLOB2023
4cold-water corals4400.14084215336.975280GLOB2023
\n", - "
" - ], - "text/plain": [ - " habitat_name protected_area total_area location_id year\n", - "0 saltmarshes 111638.252564 224435.075094 GLOB 2023\n", - "1 seagrasses 74787.449960 314001.940600 GLOB 2023\n", - "2 warm-water corals 63259.499130 149886.974126 GLOB 2023\n", - "4 cold-water corals 4400.140842 15336.975280 GLOB 2023" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Change the name of the habitats to match the ones in the habitats dataframe\n", - "habitat_name_mapping = {\n", - " 'saltmarsh': 'saltmarshes',\n", - " 'seagrass': 'seagrasses',\n", - " 'warmwater-corals': 'warm-water corals',\n", - " 'coldwater-corals': 'cold-water corals'\n", - "}\n", - "habitats_global['habitat_name'] = habitats_global['habitat_name'].replace(habitat_name_mapping)\n", - "habitats_global" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array(['cold-water corals', 'saltmarshes', 'seagrasses',\n", - " 'warm-water corals'], dtype=object)" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Concatenate the global stats to the habitats dataframe\n", - "habitats = pd.concat([habitats, habitats_global])\n", - "habitats['habitat_name'].unique()" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [], - "source": [ - "# List of dictionaries for data in Region_ISO3_PP.txt (list of regions used in the Protected Planet database)\n", - "regions_data = [\n", - " {\n", - " 'region_iso': 'AS',\n", - " 'region_name': 'Asia & Pacific',\n", - " 'country_iso_3s': [\n", - " \"AFG\", \"ASM\", \"AUS\", \"BGD\", \"BRN\", \"BTN\", \"CCK\", \"CHN\", \"COK\", \"CXR\", \"FJI\", \"FSM\", \"GUM\", \"HKG\", \"IDN\",\n", - " \"IND\", \"IOT\", \"IRN\", \"JPN\", \"KHM\", \"KIR\", \"KOR\", \"LAO\", \"LKA\", \"MAC\", \"MDV\", \"MHL\", \"MMR\", \"MNG\", \"MNP\",\n", - " \"MYS\", \"NCL\", \"NFK\", \"NIU\", \"NPL\", \"NRU\", \"NZL\", \"PAK\", \"PCN\", \"PHL\", \"PLW\", \"PNG\", \"PRK\", \"PYF\", \"SGP\",\n", - " \"SLB\", \"THA\", \"TKL\", \"TLS\", \"TON\", \"TUV\", \"TWN\", \"VNM\", \"VUT\", \"WLF\", \"WSM\"\n", - " ]\n", - " },\n", - " {\n", - " 'region_iso': 'AF',\n", - " 'region_name': 'Africa',\n", - " 'country_iso_3s': [\n", - " \"AGO\", \"BDI\", \"BEN\", \"BFA\", \"BWA\", \"CAF\", \"CIV\", \"CMR\", \"COD\", \"COG\", \"COM\", \"CPV\", \"DJI\", \"DZA\", \"EGY\",\n", - " \"ERI\", \"ESH\", \"ETH\", \"GAB\", \"GHA\", \"GIN\", \"GMB\", \"GNB\", \"GNQ\", \"KEN\", \"LBR\", \"LBY\", \"LSO\", \"MAR\", \"MDG\",\n", - " \"MLI\", \"MOZ\", \"MRT\", \"MUS\", \"MWI\", \"MYT\", \"NAM\", \"NER\", \"NGA\", \"REU\", \"RWA\", \"SDN\", \"SEN\", \"SHN\", \"SLE\",\n", - " \"SOM\", \"SSD\", \"STP\", \"SWZ\", \"SYC\", \"TCD\", \"TGO\", \"TUN\", \"TZA\", \"UGA\", \"ZAF\", \"ZMB\", \"ZWE\"\n", - " ]\n", - " },\n", - " {\n", - " 'region_iso': 'EU',\n", - " 'region_name': 'Europe',\n", - " 'country_iso_3s': [\n", - " \"ALA\", \"ALB\", \"AND\", \"ARM\", \"AUT\", \"AZE\", \"BEL\", \"BGR\", \"BIH\", \"BLR\", \"CHE\", \"CYP\", \"CZE\", \"DEU\", \"DNK\",\n", - " \"ESP\", \"EST\", \"FIN\", \"FRA\", \"FRO\", \"GBR\", \"GEO\", \"GGY\", \"GIB\", \"GRC\", \"HRV\", \"HUN\", \"IMN\", \"IRL\", \"ISL\",\n", - " \"ISR\", \"ITA\", \"JEY\", \"KAZ\", \"KGZ\", \"LIE\", \"LTU\", \"LUX\", \"LVA\", \"MCO\", \"MDA\", \"MKD\", \"MLT\", \"MNE\", \"NLD\",\n", - " \"NOR\", \"POL\", \"PRT\", \"ROU\", \"RUS\", \"SJM\", \"SMR\", \"SRB\", \"SVK\", \"SVN\", \"SWE\", \"TJK\", \"TKM\", \"TUR\", \"UKR\",\n", - " \"UZB\", \"VAT\"\n", - " ]\n", - " },\n", - " {\n", - " 'region_iso': 'SA',\n", - " 'region_name': 'Latin America & Caribbean',\n", - " 'country_iso_3s': [\n", - " \"ABW\", \"AIA\", \"ARG\", \"ATG\", \"BES\", \"BHS\", \"BLM\", \"BLZ\", \"BMU\", \"BOL\", \"BRA\", \"BRB\", \"CHL\", \"COL\", \"CRI\",\n", - " \"CUB\", \"CUW\", \"CYM\", \"DMA\", \"DOM\", \"ECU\", \"FLK\", \"GLP\", \"GRD\", \"GTM\", \"GUF\", \"GUY\", \"HND\", \"HTI\", \"JAM\",\n", - " \"KNA\", \"LCA\", \"MAF\", \"MEX\", \"MSR\", \"MTQ\", \"NIC\", \"PAN\", \"PER\", \"PRI\", \"PRY\", \"SLV\", \"SUR\", \"SXM\", \"TCA\",\n", - " \"TTO\", \"UMI\", \"URY\", \"VCT\", \"VEN\", \"VGB\", \"VIR\"\n", - " ]\n", - " },\n", - " {\n", - " 'region_iso': 'PO',\n", - " 'region_name': 'Polar',\n", - " 'country_iso_3s': [\n", - " \"ATF\", \"BVT\", \"GRL\", \"HMD\", \"SGS\"\n", - " ]\n", - " },\n", - " {\n", - " 'region_iso': 'NA',\n", - " 'region_name': 'North America',\n", - " 'country_iso_3s': [\n", - " \"CAN\", \"SPM\", \"USA\"\n", - " ]\n", - " },\n", - " \n", - " {\n", - " 'region_iso': 'WA',\n", - " 'region_name': 'West Asia',\n", - " 'country_iso_3s': [\n", - " \"ARE\", \"BHR\", \"IRQ\", \"JOR\", \"KWT\", \"LBN\", \"OMN\", \"PSE\", \"QAT\", \"SAU\", \"SYR\", \"YEM\"\n", - " ]\n", - " },\n", - " {\n", - " 'region_iso': 'AT', # this region is not in the Protected Planet database\n", - " 'region_name': 'Antartica',\n", - " 'country_iso_3s': [\n", - " \"ATA\"\n", - " ]\n", - " }\n", - "]\n", - "\n", - "# Convert the region data to a dictionary that maps each country to its region name\n", - "country_to_region = {}\n", - "for region in regions_data:\n", - " for country in region['country_iso_3s']:\n", - " country_to_region[country] = region['region_iso']" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
location_idhabitat_nameprotected_areatotal_areayear
0AFcold-water corals37.761626381.9932342023
1AFsaltmarshes6688.70287919845.9150002023
2AFseagrasses6319.09949161939.4849042023
3AFwarm-water corals6591.34008315216.3939472023
4AScold-water corals263.2514981332.2250802023
5ASsaltmarshes11721.43953939229.8888602023
6ASseagrasses28942.70566072666.4820522023
7ASwarm-water corals13895.87065967363.4866092023
8EUcold-water corals2183.0502666179.5264272023
9EUsaltmarshes7431.04371013274.3264782023
10EUseagrasses5840.37292510391.1899112023
11EUwarm-water corals0.6057630.7933572023
12NAcold-water corals22.960099204.2804332023
13NAsaltmarshes51092.64468368200.0819302023
14NAseagrasses70.012791301.9091412023
15NAwarm-water corals0.0000000.0000002023
16SAcold-water corals234.7313701416.2513232023
17SAsaltmarshes22969.81590635983.3927442023
18SAseagrasses16517.09766745847.4594122023
19SAwarm-water corals5597.36684512869.8012312023
20WAcold-water corals0.00000012.9707052023
21WAsaltmarshes1309.22573611798.8326192023
22WAseagrasses1053.44867325273.7274312023
23WAwarm-water corals547.9289574903.2303952023
\n", - "
" - ], - "text/plain": [ - " location_id habitat_name protected_area total_area year\n", - "0 AF cold-water corals 37.761626 381.993234 2023\n", - "1 AF saltmarshes 6688.702879 19845.915000 2023\n", - "2 AF seagrasses 6319.099491 61939.484904 2023\n", - "3 AF warm-water corals 6591.340083 15216.393947 2023\n", - "4 AS cold-water corals 263.251498 1332.225080 2023\n", - "5 AS saltmarshes 11721.439539 39229.888860 2023\n", - "6 AS seagrasses 28942.705660 72666.482052 2023\n", - "7 AS warm-water corals 13895.870659 67363.486609 2023\n", - "8 EU cold-water corals 2183.050266 6179.526427 2023\n", - "9 EU saltmarshes 7431.043710 13274.326478 2023\n", - "10 EU seagrasses 5840.372925 10391.189911 2023\n", - "11 EU warm-water corals 0.605763 0.793357 2023\n", - "12 NA cold-water corals 22.960099 204.280433 2023\n", - "13 NA saltmarshes 51092.644683 68200.081930 2023\n", - "14 NA seagrasses 70.012791 301.909141 2023\n", - "15 NA warm-water corals 0.000000 0.000000 2023\n", - "16 SA cold-water corals 234.731370 1416.251323 2023\n", - "17 SA saltmarshes 22969.815906 35983.392744 2023\n", - "18 SA seagrasses 16517.097667 45847.459412 2023\n", - "19 SA warm-water corals 5597.366845 12869.801231 2023\n", - "20 WA cold-water corals 0.000000 12.970705 2023\n", - "21 WA saltmarshes 1309.225736 11798.832619 2023\n", - "22 WA seagrasses 1053.448673 25273.727431 2023\n", - "23 WA warm-water corals 547.928957 4903.230395 2023" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Add regions field\n", - "habitats_regions = habitats.copy()\n", - "habitats_regions['region'] = habitats['location_id'].map(country_to_region)\n", - "\n", - "# Convert fields to numeric\n", - "habitats_regions['protected_area'] = pd.to_numeric(habitats_regions['protected_area'], errors='coerce')\n", - "habitats_regions['total_area'] = pd.to_numeric(habitats_regions['total_area'], errors='coerce')\n", - "\n", - "# Calculate stats for each region\n", - "habitats_regions = habitats_regions.groupby(['region', 'habitat_name']).agg({'protected_area': 'sum', 'total_area': 'sum'}).reset_index()\n", - "habitats_regions['year'] = datetime.now().year\n", - "habitats_regions.rename(columns={'region': 'location_id'}, inplace=True)\n", - "habitats_regions\n" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [], - "source": [ - "# Concatenate region statistics to the habitats dataframe\n", - "habitats = pd.concat([habitats, habitats_regions])" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array(['ABNJ', 'AGO', 'ALB', 'ARG', 'ATG', 'AUS', 'BHS', 'BLZ', 'BRA',\n", - " 'BRB', 'CAN', 'CHL', 'CHN', 'CIV', 'COK', 'COL', 'CPV', 'CRI',\n", - " 'CUB', 'CYP', 'DMA', 'DNK', 'DOM', 'DZA', 'ECU', 'ERI', 'ESP',\n", - " 'FJI', 'FRA', 'FSM', 'GBR', 'GHA', 'GIN', 'GNB', 'GNQ', 'GRC',\n", - " 'GRD', 'GTM', 'GUY', 'HND', 'HRV', 'HTI', 'IDN', 'IND', 'IRL',\n", - " 'ISL', 'ITA', 'JAM', 'JPN', 'KEN', 'KIR', 'KNA', 'LBR', 'LCA',\n", - " 'LKA', 'MAR', 'MDG', 'MEX', 'MHL', 'MLT', 'MMR', 'MNE', 'MOZ',\n", - " 'MRT', 'MUS', 'MYS', 'NAM', 'NGA', 'NIC', 'NLD', 'NOR', 'NZL',\n", - " 'OMN', 'PAN', 'PER', 'PHL', 'PLW', 'PNG', 'PRT', 'RUS', 'SAU',\n", - " 'SEN', 'SHN', 'SJM', 'STP', 'SUR', 'SWE', 'SYC', 'THA', 'TLS',\n", - " 'TON', 'TTO', 'TUN', 'TUV', 'UMI', 'URY', 'USA', 'VCT', 'VEN',\n", - " 'VNM', 'VUT', 'WSM', 'YEM', 'ZAF', 'ARE', 'AZE', 'BEL', 'BGR',\n", - " 'BHR', 'DEU', 'DJI', 'EGY', 'EST', 'FIN', 'GEO', 'GMB', 'IRN',\n", - " 'KHM', 'KOR', 'KWT', 'LBY', 'LTU', 'LVA', 'PAK', 'POL', 'QAT',\n", - " 'ROU', 'SDN', 'SVN', 'TUR', 'TZA', 'UKR', 'COM', 'ISR', 'JOR',\n", - " 'MCO', 'MDV', 'SGP', 'SLB', 'SLE', 'BGD', 'BRN', 'NIU', 'GLOB',\n", - " 'AF', 'AS', 'EU', 'NA', 'SA', 'WA'], dtype=object)" - ] - }, - "execution_count": 32, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "habitats['location_id'].unique()" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": {}, - "outputs": [], - "source": [ - "habitats.to_csv(path_out + \"habitats/ocean+.csv\", index=False)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Process seamounts from [UN WCMC](https://data.unep-wcmc.org/datasets/41)" - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "metadata": {}, - "outputs": [], - "source": [ - "# Read required data\n", - "seamounts = gpd.read_file(path_in + \"Seamounts/DownloadPack-14_001_ZSL002_ModelledSeamounts2011_v1_01_Data_Seamounts_Seamounts.shp\")\n", - "eez = gpd.read_file(path_out + \"/administrative/eez_area_mollweide.shp\")\n", - "hs = gpd.read_file(path_in + \"/high_seas/high_seas.shp\")\n", - "protected_areas = gpd.read_file(path_out + \"wdpa/timeseries/protected_dissolved_2023.shp\").to_crs(\"EPSG:4326\")" - ] - }, - { - "cell_type": "code", - "execution_count": 53, - "metadata": {}, - "outputs": [], - "source": [ - "# Keep relevant fields in eez and hs and merge then in one dataframe\n", - "eez = eez[['SOVEREIGN1', 'SOVEREIGN2', 'SOVEREIGN3','ISO_SOV1', 'ISO_SOV2', 'ISO_SOV3', 'geometry']]\n", - "hs = hs[['geometry']]\n", - "hs['SOVEREIGN1'] = 'High Seas'\n", - "hs['ISO_SOV1'] = 'ABNJ'\n", - "eez_hs = eez.merge(hs, how='outer')" - ] - }, - { - "cell_type": "code", - "execution_count": 54, - "metadata": {}, - "outputs": [], - "source": [ - "# Join eez info to seamounts falling within eez polygons\n", - "seamounts_eez = gpd.sjoin(seamounts, eez_hs, how=\"left\", predicate=\"within\")\n", - "# Drop those not associated with an eez or hs\n", - "seamounts_eez = seamounts_eez.dropna(subset=['ISO_SOV1'])" - ] - }, - { - "cell_type": "code", - "execution_count": 55, - "metadata": {}, - "outputs": [], - "source": [ - "# Create new column \"iso\" with the iso_sov codes\n", - "def concatenate_iso(row):\n", - " iso_list = [row['ISO_SOV1']]\n", - " if not pd.isna(row['ISO_SOV2']):\n", - " iso_list.append(row['ISO_SOV2'])\n", - " if not pd.isna(row['ISO_SOV3']):\n", - " iso_list.append(row['ISO_SOV3'])\n", - " return ';'.join(iso_list)\n", - "\n", - "seamounts_eez['iso'] = seamounts_eez.apply(concatenate_iso, axis=1)" - ] - }, - { - "cell_type": "code", - "execution_count": 56, - "metadata": {}, - "outputs": [], - "source": [ - "# Check which seamounts are protectec\n", - "seamounts_wdpa = gpd.sjoin(seamounts, protected_areas, how=\"left\", predicate=\"within\")\n", - "seamounts_wdpa['protection'] = \"no\" \n", - "seamounts_wdpa.loc[~seamounts_wdpa['index_right'].isna(), 'protection'] = \"yes\"\n", - "# Remove rows in which protection is \"no\"\n", - "seamounts_wdpa = seamounts_wdpa[seamounts_wdpa['protection'] != \"no\"]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Global stats" - ] - }, - { - "cell_type": "code", - "execution_count": 65, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
habitat_nametotal_arealocation_idyear
0seamounts2.690810e+07GLOB2011
\n", - "
" - ], - "text/plain": [ - " habitat_name total_area location_id year\n", - "0 seamounts 2.690810e+07 GLOB 2011" - ] - }, - "execution_count": 65, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Calculate global area of seamounts\n", - "seamounts_eez['habitat_name'] = 'seamounts'\n", - "seamounts_global = seamounts_eez.groupby(['habitat_name']).agg({'AREA2D': 'sum'}).reset_index().rename(columns={'AREA2D': 'total_area'})\n", - "seamounts_global['location_id'] = 'GLOB'\n", - "seamounts_global['year'] = 2011\n", - "seamounts_global" - ] - }, - { - "cell_type": "code", - "execution_count": 66, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
habitat_nameprotected_area
0seamounts3.438552e+06
\n", - "
" - ], - "text/plain": [ - " habitat_name protected_area\n", - "0 seamounts 3.438552e+06" - ] - }, - "execution_count": 66, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Calculate global area of seamounts protected\n", - "seamounts_wdpa['habitat_name'] = 'seamounts'\n", - "seamounts_wdpa_global = seamounts_wdpa.groupby(['habitat_name']).agg({'AREA2D': 'sum'}).reset_index().rename(columns={'AREA2D': 'protected_area'})\n", - "seamounts_wdpa_global" - ] - }, - { - "cell_type": "code", - "execution_count": 67, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
habitat_nametotal_arealocation_idyearprotected_area
0seamounts2.690810e+07GLOB20113.438552e+06
\n", - "
" - ], - "text/plain": [ - " habitat_name total_area location_id year protected_area\n", - "0 seamounts 2.690810e+07 GLOB 2011 3.438552e+06" - ] - }, - "execution_count": 67, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Bring 'protected_area' field from seamouts_wdpa_global to seamounts_global\n", - "seamounts_global = seamounts_global.merge(seamounts_wdpa_global[['habitat_name', 'protected_area']], how='left', on='habitat_name')\n", - "seamounts_global" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Country stats" - ] - }, - { - "cell_type": "code", - "execution_count": 68, - "metadata": {}, - "outputs": [], - "source": [ - "# Split the 'iso_code' values and create separate rows only for those with multiple values\n", - "mask = seamounts_eez['iso'].str.contains(';', na=False)\n", - "split_rows = seamounts_eez[mask].copy()\n", - "split_rows['iso'] = split_rows['iso'].str.split(';')\n", - "split_rows = split_rows.explode('iso')\n", - "\n", - "# Keep rows with single values in 'iso_code'\n", - "single_value_rows = seamounts_eez[~mask]\n", - "\n", - "# Concatenate the exploded rows with the single value rows\n", - "seamounts_eez_new = pd.concat([single_value_rows, split_rows], ignore_index=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 69, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
location_idtotal_areahabitat_nameyear
0ABNJ1.483098e+07seamounts2011
1AGO9.556242e+03seamounts2011
2ARG3.110730e+05seamounts2011
3ATA3.551629e+05seamounts2011
4ATG6.215895e+03seamounts2011
...............
88VNM4.421338e+04seamounts2011
89VUT1.199475e+05seamounts2011
90WSM4.117997e+04seamounts2011
91YEM6.294974e+04seamounts2011
92ZAF9.946306e+04seamounts2011
\n", - "

93 rows × 4 columns

\n", - "
" - ], - "text/plain": [ - " location_id total_area habitat_name year\n", - "0 ABNJ 1.483098e+07 seamounts 2011\n", - "1 AGO 9.556242e+03 seamounts 2011\n", - "2 ARG 3.110730e+05 seamounts 2011\n", - "3 ATA 3.551629e+05 seamounts 2011\n", - "4 ATG 6.215895e+03 seamounts 2011\n", - ".. ... ... ... ...\n", - "88 VNM 4.421338e+04 seamounts 2011\n", - "89 VUT 1.199475e+05 seamounts 2011\n", - "90 WSM 4.117997e+04 seamounts 2011\n", - "91 YEM 6.294974e+04 seamounts 2011\n", - "92 ZAF 9.946306e+04 seamounts 2011\n", - "\n", - "[93 rows x 4 columns]" - ] - }, - "execution_count": 69, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Get area of seamounts per iso\n", - "seamounts_iso = seamounts_eez_new.groupby(['iso']).agg({'AREA2D': 'sum'}).reset_index()\n", - "seamounts_iso = seamounts_iso.rename(columns={'AREA2D': 'total_area', 'iso': 'location_id'})\n", - "seamounts_iso['habitat_name'] = 'seamounts'\n", - "seamounts_iso['year'] = 2011\n", - "seamounts_iso " - ] - }, - { - "cell_type": "code", - "execution_count": 70, - "metadata": {}, - "outputs": [], - "source": [ - "# Split the 'iso_code' values in seamounts_wdpa and create separate rows only for those with multiple values\n", - "mask = seamounts_wdpa['PARENT_ISO'].str.contains(';', na=False)\n", - "split_rows = seamounts_wdpa[mask].copy()\n", - "split_rows['PARENT_ISO'] = split_rows['PARENT_ISO'].str.split(';')\n", - "split_rows = split_rows.explode('PARENT_ISO')\n", - "\n", - "# Keep rows with single values in 'iso_code'\n", - "single_value_rows = seamounts_wdpa[~mask]\n", - "\n", - "# Concatenate the exploded rows with the single value rows\n", - "seamounts_wdpa_new = pd.concat([single_value_rows, split_rows], ignore_index=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 71, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
location_idprotected_area
0ABNJ226253.932283
1ARG38773.659962
\n", - "
" - ], - "text/plain": [ - " location_id protected_area\n", - "0 ABNJ 226253.932283\n", - "1 ARG 38773.659962" - ] - }, - "execution_count": 71, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Calculate area protected per iso\n", - "seamounts_protected = seamounts_wdpa_new.groupby(['PARENT_ISO']).agg({'AREA2D': 'sum'}).reset_index()\n", - "seamounts_protected = seamounts_protected.rename(columns={'AREA2D': 'protected_area', 'PARENT_ISO': 'location_id'})\n", - "seamounts_protected.head(2)" - ] - }, - { - "cell_type": "code", - "execution_count": 73, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
location_idtotal_areahabitat_nameyearprotected_area
0ABNJ1.483098e+07seamounts2011226253.932283
1AGO9.556242e+03seamounts2011NaN
\n", - "
" - ], - "text/plain": [ - " location_id total_area habitat_name year protected_area\n", - "0 ABNJ 1.483098e+07 seamounts 2011 226253.932283\n", - "1 AGO 9.556242e+03 seamounts 2011 NaN" - ] - }, - "execution_count": 73, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Join seamounts_iso and seamounts_protected\n", - "seamounts_iso = seamounts_iso.merge(seamounts_protected, how='left', on='location_id')\n", - "seamounts_iso.head(2)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Regions stats" - ] - }, - { - "cell_type": "code", - "execution_count": 74, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
location_idhabitat_nameprotected_areatotal_areayear
0AFseamounts94385.1789586.162351e+052011
1ASseamounts832497.7839375.433433e+062011
2ATseamounts0.0000003.551629e+052011
3EUseamounts894514.9102552.641119e+062011
4NAseamounts555588.2107251.664794e+062011
5SAseamounts581172.1543891.655552e+062011
6WAseamounts2487.4280509.384765e+042011
\n", - "
" - ], - "text/plain": [ - " location_id habitat_name protected_area total_area year\n", - "0 AF seamounts 94385.178958 6.162351e+05 2011\n", - "1 AS seamounts 832497.783937 5.433433e+06 2011\n", - "2 AT seamounts 0.000000 3.551629e+05 2011\n", - "3 EU seamounts 894514.910255 2.641119e+06 2011\n", - "4 NA seamounts 555588.210725 1.664794e+06 2011\n", - "5 SA seamounts 581172.154389 1.655552e+06 2011\n", - "6 WA seamounts 2487.428050 9.384765e+04 2011" - ] - }, - "execution_count": 74, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "seamounts_regions = seamounts_iso.copy()\n", - "seamounts_regions['region'] = seamounts_regions['location_id'].map(country_to_region)\n", - "\n", - "# Calculate stats for each region\n", - "seamounts_regions = seamounts_regions.groupby(['region', 'habitat_name']).agg({'protected_area': 'sum', 'total_area': 'sum'}).reset_index()\n", - "seamounts_regions['year'] = 2011\n", - "seamounts_regions.rename(columns={'region': 'location_id'}, inplace=True)\n", - "seamounts_regions" - ] - }, - { - "cell_type": "code", - "execution_count": 75, - "metadata": {}, - "outputs": [], - "source": [ - "# Concatenate region and global stats to seamounts_iso2\n", - "seamounts_all = pd.concat([seamounts_iso, seamounts_regions, seamounts_global])" - ] - }, - { - "cell_type": "code", - "execution_count": 77, - "metadata": {}, - "outputs": [], - "source": [ - "seamounts_all.to_csv(path_out + \"habitats/seamounts.csv\", index=False)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Process mangroves from GMW" - ] - }, - { - "cell_type": "code", - "execution_count": 79, - "metadata": {}, - "outputs": [], - "source": [ - "mangroves = pd.read_csv(path_out + \"habitats/mangroves.csv\")" - ] - }, - { - "cell_type": "code", - "execution_count": 80, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
habitat_nameprotected_areatotal_arealocation_idyear
0mangroves61287.20375147358.990971GLOB2020
\n", - "
" - ], - "text/plain": [ - " habitat_name protected_area total_area location_id year\n", - "0 mangroves 61287.20375 147358.990971 GLOB 2020" - ] - }, - "execution_count": 80, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Calculate global stats for mangroves\n", - "mangroves_global = mangroves.groupby(['habitat_name']).agg({'protected_area': 'sum', 'total_area': 'sum'}).reset_index()\n", - "mangroves_global['location_id'] = 'GLOB'\n", - "mangroves_global['year'] = 2020\n", - "mangroves_global" - ] - }, - { - "cell_type": "code", - "execution_count": 81, - "metadata": {}, - "outputs": [], - "source": [ - "# Concatenate the global stats to the mangroves dataframe\n", - "mangroves = pd.concat([mangroves, mangroves_global])" - ] - }, - { - "cell_type": "code", - "execution_count": 82, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
location_idhabitat_nameprotected_areatotal_areayear
0AFmangroves10006.9700029344.4043992020
1ASmangroves21378.7500074629.1944462020
2NAmangroves2055.400002329.1155052020
3POmangroves6.720006.7230182020
4SAmangroves27811.5337540875.9326662020
5WAmangroves27.83000173.6209382020
\n", - "
" - ], - "text/plain": [ - " location_id habitat_name protected_area total_area year\n", - "0 AF mangroves 10006.97000 29344.404399 2020\n", - "1 AS mangroves 21378.75000 74629.194446 2020\n", - "2 NA mangroves 2055.40000 2329.115505 2020\n", - "3 PO mangroves 6.72000 6.723018 2020\n", - "4 SA mangroves 27811.53375 40875.932666 2020\n", - "5 WA mangroves 27.83000 173.620938 2020" - ] - }, - "execution_count": 82, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "mangroves_regions = mangroves.copy()\n", - "mangroves_regions['region'] = mangroves['location_id'].map(country_to_region)\n", - "\n", - "# Calculate stats for each region\n", - "mangroves_regions = mangroves_regions.groupby(['region', 'habitat_name']).agg({'protected_area': 'sum', 'total_area': 'sum'}).reset_index()\n", - "mangroves_regions['year'] = 2020\n", - "mangroves_regions.rename(columns={'region': 'location_id'}, inplace=True)\n", - "mangroves_regions" - ] - }, - { - "cell_type": "code", - "execution_count": 83, - "metadata": {}, - "outputs": [], - "source": [ - "# Concatenate stats for regions with mangroves\n", - "mangroves = pd.concat([mangroves, mangroves_regions])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Concatenate all habitats" - ] - }, - { - "cell_type": "code", - "execution_count": 85, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
location_idprotected_areatotal_areahabitat_nameyear
0ABNJ421.6293726799041874.98221422617cold-water corals2023
1AGO03.39567053773998cold-water corals2023
2ALB05.98647948252716cold-water corals2023
3ARG6.9842260206355761.8263440651753cold-water corals2023
4ATG00.997746538545076cold-water corals2023
..................
1AS21378.7574629.194446mangroves2020
2NA2055.42329.115505mangroves2020
3PO6.726.723018mangroves2020
4SA27811.5337540875.932666mangroves2020
5WA27.83173.620938mangroves2020
\n", - "

628 rows × 5 columns

\n", - "
" - ], - "text/plain": [ - " location_id protected_area total_area habitat_name year\n", - "0 ABNJ 421.629372679904 1874.98221422617 cold-water corals 2023\n", - "1 AGO 0 3.39567053773998 cold-water corals 2023\n", - "2 ALB 0 5.98647948252716 cold-water corals 2023\n", - "3 ARG 6.98422602063557 61.8263440651753 cold-water corals 2023\n", - "4 ATG 0 0.997746538545076 cold-water corals 2023\n", - ".. ... ... ... ... ...\n", - "1 AS 21378.75 74629.194446 mangroves 2020\n", - "2 NA 2055.4 2329.115505 mangroves 2020\n", - "3 PO 6.72 6.723018 mangroves 2020\n", - "4 SA 27811.53375 40875.932666 mangroves 2020\n", - "5 WA 27.83 173.620938 mangroves 2020\n", - "\n", - "[628 rows x 5 columns]" - ] - }, - "execution_count": 85, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Concatenate the dataframes\n", - "habitats_all = pd.concat([habitats, seamounts_all, mangroves])\n", - "habitats_all" - ] - }, - { - "cell_type": "code", - "execution_count": 86, - "metadata": {}, - "outputs": [], - "source": [ - "habitats_all.to_csv(path_out + \"tables/habitats2.csv\", index=False)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.13" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/data/notebooks/pipes_mock/precalc_sofia.ipynb b/data/notebooks/pipes_mock/precalc_sofia.ipynb deleted file mode 100644 index fe184acb..00000000 --- a/data/notebooks/pipes_mock/precalc_sofia.ipynb +++ /dev/null @@ -1,4355 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 194, - "metadata": {}, - "outputs": [], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import logging\n", - "import sys\n", - "from pathlib import Path\n", - "import time\n", - "import pandas as pd\n", - "import geopandas as gpd\n", - "import numpy as np\n", - "import json\n", - "import dotenv\n", - "import os\n", - "import logging\n", - "from typing import Tuple, List, Union\n", - "import sys\n", - "from pathlib import Path\n", - "import pandas as pd\n", - "import geopandas as gpd\n", - "import numpy as np\n", - "import asyncio\n", - "from tqdm.asyncio import tqdm\n", - "from itertools import product\n", - "from shapely.geometry import box\n", - "\n", - "dotenv.load_dotenv()\n", - "\n", - "scripts_dir = Path(\".\").joinpath(\"src\")\n", - "if scripts_dir not in sys.path:\n", - " sys.path.insert(0, scripts_dir.resolve().as_posix())\n", - "\n", - "from helpers.strapi import Strapi\n", - "from helpers.settings import get_settings, Settings\n", - "from helpers.file_handler import FileConventionHandler\n", - "from helpers.utils import download_and_unzip_if_needed, writeReadGCP, make_archive\n", - "\n", - "from pipelines.output_schemas import (\n", - " FPLSchema,\n", - " ProtectionLevelSchema,\n", - " PAsSchema,\n", - " HabitatsSchema,\n", - " LocationSchema,\n", - " ProtectedAreaExtentSchema,\n", - " PAsSchemaChunk1,\n", - " PAsSchemaChunk2,\n", - ")\n", - "from pipelines.processors import (\n", - " add_envelope,\n", - " add_location_iso,\n", - " expand_multiple_locations,\n", - " add_region_iso,\n", - " calculate_eez_area,\n", - " add_bbox,\n", - " add_groups_and_members,\n", - " add_location_name,\n", - " output,\n", - " clean_geometries,\n", - " filter_by_exluding_propossed_mpas,\n", - " spatial_join,\n", - " process_mpa_data,\n", - " assign_iso3,\n", - " calculate_global_area,\n", - " separate_parent_iso,\n", - " calculate_stats_cov,\n", - " coverage_stats,\n", - " mpaatlas_filter_stablishment,\n", - " process_mpaatlas_data,\n", - " calculate_stats,\n", - " fix_monaco,\n", - " batch_export,\n", - " calculate_area,\n", - " define_is_child,\n", - " set_child_id,\n", - " add_child_parent_relationship,\n", - " columns_to_lower,\n", - " extract_wdpaid_mpaatlas,\n", - " simplify_async,\n", - " get_matches,\n", - " repair_geometry, \n", - " arrange_dimensions,\n", - " add_total_area, \n", - " change_ata_to_abnj,\n", - " calculate_padef_percentages,\n", - " calculate_coverage_percentage,\n", - " calculate_coverage_percentage_mpatlas,\n", - " calculate_global_contribution,\n", - " add_is_last_year,\n", - " add_environment,\n", - " cumulative_pa_def_counts, \n", - " process_final_coverage,\n", - " \n", - ")\n", - "from pipelines.utils import background\n", - "\n", - "logging.basicConfig(level=logging.DEBUG)\n", - "logging.getLogger(\"requests\").setLevel(logging.WARNING)\n", - "logging.getLogger(\"urllib3\").setLevel(logging.WARNING)\n", - "logging.getLogger(\"fiona\").setLevel(logging.WARNING)\n", - "logger = logging.getLogger(\"notebook\")" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "mysettings = get_settings()\n", - "prev_step = \"preprocess\"\n", - "current_step = \"stats\"" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "# # Strapi setup\n", - "# strapi = Strapi(url=mysettings.STRAPI_URL)\n", - "# strapi.login(jwt=mysettings.STRAPI_JWT)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Code for terrestrial processing" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# # Code for pa terrestrial processing\n", - "\n", - "# def split_by_year(\n", - "# gdf: gpd.GeoDataFrame, year_col: str = \"STATUS_YR\", year_val: int = 2010\n", - "# ) -> List[gpd.GeoDataFrame]:\n", - "# \"\"\"Split data by year. relevant for MPA data.(coverage indicator)\"\"\"\n", - "# prior_2010 = (\n", - "# gdf[gdf[year_col] <= year_val][[\"iso_3\", \"STATUS_YR\", \"geometry\"]]\n", - "# .dissolve(\n", - "# by=[\"iso_3\"],\n", - "# )\n", - "# .assign(year=2010)\n", - "# .reset_index()\n", - "# )\n", - "\n", - "# after_2010 = (\n", - "# gdf[gdf[\"STATUS_YR\"] > 2010][[\"iso_3\", \"STATUS_YR\", \"geometry\"]]\n", - "# .rename(columns={\"STATUS_YR\": \"year\"})\n", - "# )\n", - "# return [prior_2010, after_2010]\n", - "\n", - "\n", - "# def create_grid(bounds: Tuple[float, float, float, float], cell_size: int = 1) -> gpd.GeoDataFrame:\n", - "# \"\"\"Create a grid of cells for a given GeoDataFrame\"\"\"\n", - "# minx, miny, maxx, maxy = bounds\n", - "# x = np.arange(minx, maxx, cell_size)\n", - "# y = np.arange(miny, maxy, cell_size)\n", - "# polygons = [\n", - "# {\n", - "# \"geometry\": box(i, j, i + cell_size, j + cell_size),\n", - "# \"cell_id\": f\"{i}_{j}\",\n", - "# }\n", - "# for i, j in product(x, y)\n", - "# ]\n", - "# return gpd.GeoDataFrame(polygons)\n", - "\n", - "\n", - "# def subdivide_grid(\n", - "# grid_gdf: gpd.GeoDataFrame, gdf: gpd.GeoDataFrame, max_cellsize: float, max_complexity: int\n", - "# ) -> List:\n", - "# subdivided_elements = []\n", - "# for grid_element in grid_gdf.geometry:\n", - "# candidates = get_matches(grid_element, gdf)\n", - "# density = len(candidates)\n", - "# if density > max_complexity:\n", - " \n", - "# subdivision_cellsize = max_cellsize / 2\n", - "# # Subdivide the grid element recursively\n", - "# subgrid = create_grid(grid_element.bounds, subdivision_cellsize)\n", - "# subdivided_elements.extend(\n", - "# subdivide_grid(subgrid, gdf, subdivision_cellsize, max_complexity)\n", - "# )\n", - "# elif density > 0:\n", - "# subdivided_elements.append(grid_element)\n", - "\n", - "# return subdivided_elements\n", - "\n", - "\n", - "# def create_density_based_grid(\n", - "# gdf: gpd.GeoDataFrame, max_cellsize: int = 10, max_complexity: int = 10000\n", - "# ) -> gpd.GeoDataFrame:\n", - "# # Get the bounds of the GeoDataFrame\n", - "# minx, miny, maxx, maxy = gdf.total_bounds\n", - "\n", - "# # Create an initial grid\n", - "# grid_gdf = create_grid((minx, miny, maxx, maxy), max_cellsize)\n", - "\n", - "# # Subdivide grid elements based on density and complexity\n", - "# subdivided_elements = subdivide_grid(grid_gdf, gdf, max_cellsize, max_complexity)\n", - "\n", - "# return gpd.GeoDataFrame(geometry=subdivided_elements)\n", - "\n", - "\n", - "# # TODO: refactor this so old function mantains functionality for marine areas\n", - "\n", - "# def split_gdf_by_grid(gdf: gpd.GeoDataFrame, grid_gdf: gpd.GeoDataFrame):\n", - "# result = []\n", - "# gdf[\"already_processed\"] = False\n", - "# for geometry in grid_gdf.geometry:\n", - "# candidates = get_matches(geometry, gdf)\n", - "# subset = gdf.loc[candidates.index][~gdf[\"already_processed\"]]\n", - "# gdf.loc[subset.index, \"already_processed\"] = True\n", - "# if not subset.empty:\n", - "# result.append(subset.drop(columns=[\"already_processed\"]).reset_index(drop=True).copy())\n", - "# return result\n", - "\n", - "\n", - "# @background\n", - "# def spatial_join_chunk(df_large_chunk, df_small, pbar):\n", - "# try:\n", - "# bbox = df_large_chunk.total_bounds\n", - "\n", - "# candidates = get_matches(box(*bbox), df_small.geometry)\n", - "# if len(candidates) > 0:\n", - "# subset = df_small.loc[candidates.index].clip(box(*bbox))\n", - "\n", - "# result = (\n", - "# gpd.overlay(df_large_chunk, subset).reset_index(drop=True)\n", - "# .clip(subset.geometry)\n", - "# .reset_index(drop=True)\n", - "# )\n", - "# result.geometry = result.geometry.apply(repair_geometry)\n", - "# else:\n", - "# result = gpd.GeoDataFrame(columns=df_large_chunk.columns)\n", - "# return result\n", - "# except Exception as e:\n", - "# logging.error(e)\n", - "# return gpd.GeoDataFrame()\n", - "# finally:\n", - "# pbar.update(1)\n", - "\n", - "\n", - "# async def spatial_join(\n", - "# geodataframe_a: gpd.GeoDataFrame, geodataframe_b: gpd.GeoDataFrame\n", - "# ) -> gpd.GeoDataFrame:\n", - "# \"\"\"Create spatial join between two GeoDataFrames.\"\"\"\n", - "# # we build the spatial index for the larger GeoDataFrame\n", - "# smaller_dim, larger_dim = arrange_dimensions(geodataframe_a, geodataframe_b)\n", - "\n", - "# logger.info(f\"Processing {len(larger_dim)} elements\")\n", - "\n", - "# grid = create_density_based_grid(larger_dim, max_cellsize=10, max_complexity=5000)\n", - "\n", - "# logger.info(f\"grid created with {len(grid)} cells\")\n", - "\n", - "# list_of_chunks = split_gdf_by_grid(larger_dim, grid)\n", - "\n", - "# logger.info(f\"grid split into {len(list_of_chunks)} chunks\")\n", - "\n", - "# with tqdm(total=len(list_of_chunks)) as pbar: # we create a progress bar\n", - "# new_df = await asyncio.gather(\n", - "# *(spatial_join_chunk(chunk, smaller_dim, pbar) for chunk in list_of_chunks)\n", - "# )\n", - "\n", - "# return gpd.GeoDataFrame(pd.concat(new_df, ignore_index=True), crs=smaller_dim.crs)\n", - "\n", - "\n", - "# @background\n", - "# def spatial_dissolve_chunk(geometry, gdf, pbar):\n", - "# try:\n", - "# logger.info(\"Processing chunk\")\n", - "# candidates = get_matches(\n", - "# geometry,\n", - "# gdf.geometry,\n", - "# )\n", - "# subset = gdf.loc[candidates.index]\n", - "\n", - "# result = pd.concat(\n", - "# subset.clip(geometry).pipe(split_by_year, year_col=\"STATUS_YR\"), ignore_index=True\n", - "# ).copy()\n", - "\n", - "# data_chunk = [\n", - "# (\n", - "# result[result[\"year\"] <= 2010]\n", - "# .reset_index()\n", - "# .pipe(calculate_area, \"area\", None)\n", - "# .drop(columns=[\"geometry\"])\n", - "# )\n", - "# ]\n", - "# for year in range(2011, 2025):\n", - "# data_chunk.append(\n", - "# result[result[\"year\"] <= year]\n", - "# .dissolve(\n", - "# by=[\"iso_3\"],\n", - "# )\n", - "# .assign(year=year)\n", - "# .reset_index()\n", - "# .pipe(calculate_area, \"area\", None)\n", - "# .drop(columns=[\"geometry\"])\n", - "# )\n", - "\n", - "# return pd.concat(data_chunk, ignore_index=True)\n", - "# except Exception as e:\n", - "# logging.error(e)\n", - "# return gpd.GeoDataFrame()\n", - "# finally:\n", - "# pbar.update(1)\n", - "\n", - "# async def process_grid(gdf):\n", - "# grid_gdf = create_density_based_grid(gdf, max_cellsize=10, max_complexity=5000)\n", - "# logger.info(f\"grid created with {grid_gdf.shape[0]} cells\")\n", - "\n", - "# with tqdm(total=grid_gdf.shape[0], desc=\"Processing grid elements\") as pbar:\n", - "# jobs = [spatial_dissolve_chunk(geometry, gdf, pbar) for geometry in grid_gdf.geometry.values]\n", - "# result = await asyncio.gather(*jobs)\n", - "# return result" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Coverage stats - Mpas" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We are going to use the intermediate data from eez, in order to create a dataset that can be used as a land mask.\n", - "The steps are:\n", - "1. Load eez\n", - "2. Spatial inner Join the eez dataset with the Mpas one\n", - "3. Assign the location iso\n", - "4. dissolve by location iso and cummulative year\n", - "5. calculate the area for global regions and eez countries\n", - "6. prepare the data to be ingested in strapi\n", - "7. upload the data to strapi" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/home/sofia/dev/skytruth-30x30/data/data/eez/processed/eez_preprocess.zip\n", - "/home/sofia/dev/skytruth-30x30/data/data/eez/processed/preprocess\n", - "/home/sofia/dev/skytruth-30x30/data/data/mpa/processed/mpa_preprocess.zip\n", - "/home/sofia/dev/skytruth-30x30/data/data/mpa/processed/preprocess\n" - ] - }, - { - "data": { - "text/plain": [ - "PosixPath('/home/sofia/dev/skytruth-30x30/data/data/mpa/processed/preprocess')" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "pipe = \"mpa\"\n", - "strapi_collection = \"\"\n", - "\n", - "pipe_dir_eez = FileConventionHandler(\"eez\")\n", - "pipe_dir_mpas = FileConventionHandler(pipe)\n", - "output_file = pipe_dir_mpas.get_processed_step_path(current_step).joinpath(\n", - " \"mpa_coverage.csv\"\n", - ")\n", - "\n", - "# Download the EEZ file && unzip it\n", - "download_and_unzip_if_needed(pipe_dir_eez, prev_step, mysettings)\n", - "# Download the mpas file && unzip it\n", - "download_and_unzip_if_needed(pipe_dir_mpas, prev_step, mysettings)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# # Load the data\n", - "# eez = gpd.read_file(pipe_dir_eez.get_step_fmt_file_path(prev_step, \"shp\")).pipe(clean_geometries)\n", - "# mpas = gpd.read_file(pipe_dir_mpas.get_step_fmt_file_path(prev_step, \"shp\")).pipe(clean_geometries)\n", - "\n", - "# # Join the eez data with the wdpa data\n", - "# eez_mpas_data_join = await spatial_join(eez, mpas.pipe(filter_by_exluding_propossed_mpas))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# # Prepare the mpa data\n", - "# final_data = await process_mpa_data(\n", - "# eez_mpas_data_join.pipe(add_location_iso).pipe(assign_iso3),\n", - "# range(2011, time.localtime().tm_year + 1),\n", - "# [\"PA_DEF\", \"iso_3\"],\n", - "# {\"protectedAreasCount\": \"sum\"},\n", - "# )\n", - "\n", - "# # Save the results\n", - "# final_data.to_csv(pipe_dir_mpas.get_processed_step_path(prev_step).joinpath(\"mpa_preprocessed.csv\"), index=False)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "# Load the results\n", - "final_data = pd.read_csv(pipe_dir_mpas.get_processed_step_path(prev_step).joinpath(\"mpa_preprocessed.csv\"))" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
yeariso_3areaprotected_areas_countoecmspastotal_marine_areaprotected_areacoverageglobal_contributionis_last_yearenvironment
02010ABNJ996236.12549829.00.00000100.00000212881389996236.130.4679770.27596601
12010AF129790.939474427.02.3419297.6580814878058129790.940.8723650.03595301
\n", - "
" - ], - "text/plain": [ - " year iso_3 area protected_areas_count oecms pas \\\n", - "0 2010 ABNJ 996236.125498 29.0 0.00000 100.00000 \n", - "1 2010 AF 129790.939474 427.0 2.34192 97.65808 \n", - "\n", - " total_marine_area protected_area coverage global_contribution \\\n", - "0 212881389 996236.13 0.467977 0.275966 \n", - "1 14878058 129790.94 0.872365 0.035953 \n", - "\n", - " is_last_year environment \n", - "0 0 1 \n", - "1 0 1 " - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Create coverage stats table\n", - "final_data2 = final_data.copy()\n", - "\n", - "coverage = (\n", - " final_data2.pipe(calculate_global_area, [\"year\", \"PA_DEF\"], \"marine\", {\"area\": \"sum\"}, \"iso_3\")\n", - " .pipe(separate_parent_iso, \"iso_3\")\n", - " .pipe(add_region_iso, \"iso_3\")\n", - " .replace(\n", - " {\n", - " \"iso_3\": {\n", - " \"ATA\": \"ABNJ\",\n", - " \"COK\": \"NZL\",\n", - " \"IOT\": \"GBR\",\n", - " \"NIU\": \"NZL\",\n", - " \"SHN\": \"GBR\",\n", - " \"SJM\": \"NOR\",\n", - " \"UMI\": \"USA\",\n", - " \"NCL\": \"FRA\",\n", - " \"GIB\": \"GBR\",\n", - " }\n", - " }\n", - " )\n", - " .pipe(calculate_stats_cov, [\"year\", \"PA_DEF\"], \"iso_3\", environment='marine').astype({\"PA_DEF\": int})\n", - " .pipe(calculate_padef_percentages, 'marine')\n", - " .pipe(add_total_area, 'marine')\n", - " .pipe(coverage_stats)\n", - " .pipe(calculate_coverage_percentage)\n", - " .pipe(calculate_global_contribution)\n", - " .pipe(add_is_last_year)\n", - " .pipe(add_environment)\n", - ")\n", - "\n", - "# Create the output and save it\n", - "ProtectedAreaExtentSchema(\n", - " coverage.pipe(\n", - " output,\n", - " \"iso_3\",\n", - " {},\n", - " {},\n", - " [\"area\", \"iso_3\", 'total_marine_area'],\n", - " )\n", - ").to_csv(\n", - " output_file,\n", - " index=True,\n", - ")\n", - "\n", - "\n", - "coverage.head(2)" - ] - }, - { - "cell_type": "code", - "execution_count": 136, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "DEBUG:google.auth.transport.requests:Making request: POST https://oauth2.googleapis.com/token\n" - ] - } - ], - "source": [ - "# Upload the results to GCS\n", - "remote_path = 'vizzuality_processed_data/strapi_tables/mpa_coverage.csv'\n", - "\n", - "writeReadGCP(\n", - " credentials=mysettings.GCS_KEYFILE_JSON,\n", - " bucket_name=mysettings.GCS_BUCKET,\n", - " blob_name=remote_path,\n", - " file=output_file,\n", - " operation=\"w\",\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# strapi_collection = \"protection-coverage-stat\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# strapi.deleteCollectionData(strapi_collection, list(range(1, 2300)))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# strapi.importCollectionData(\n", - "# strapi_collection,\n", - "# output_file,\n", - "# )" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Coverage stats - terrestrial" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/mpa-terrestrial_preprocess.zip\n", - "/home/sofia/dev/skytruth-30x30/data/data/mpa-terrestrial/processed/preprocess\n", - "/home/sofia/dev/skytruth-30x30/data/data/gadm/processed/gadm_preprocess.zip\n", - "/home/sofia/dev/skytruth-30x30/data/data/gadm/processed/preprocess\n" - ] - }, - { - "data": { - "text/plain": [ - "PosixPath('/home/sofia/dev/skytruth-30x30/data/data/gadm/processed/preprocess')" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "pipe = \"mpa-terrestrial\"\n", - "step = \"preprocess\"\n", - "strapi_collection_mpas = \"mpa-terrestrial\"\n", - "\n", - "pipe_dir = FileConventionHandler(pipe)\n", - "pipe_dir_gadm = FileConventionHandler(\"gadm\")\n", - "\n", - "working_folder = FileConventionHandler(pipe)\n", - "input_path = working_folder.pipe_raw_path\n", - "temp_working_path = working_folder.get_temp_file_path(step)\n", - "output_file_sjoin = pipe_dir.get_processed_step_path(current_step).joinpath(\"tpa_sjoin.shp\")\n", - "output_file_dissolve = pipe_dir.get_processed_step_path(current_step).joinpath(\"tpa_dissolve.csv\")\n", - "output_file_tpas = pipe_dir.get_processed_step_path(current_step).joinpath(\"tpa_coverage.csv\")\n", - "\n", - "# Download the protected atlas file && unzip it\n", - "download_and_unzip_if_needed(pipe_dir, prev_step, mysettings)\n", - "# Download the mpaatlas file \n", - "download_and_unzip_if_needed(pipe_dir_gadm, prev_step, mysettings)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# # Load the data\n", - "# wdpa = gpd.read_file(pipe_dir.get_step_fmt_file_path(prev_step, \"gpkg\")).pipe(\n", - "# clean_geometries\n", - "# )\n", - "# gadm = gpd.read_file(pipe_dir_gadm.get_step_fmt_file_path(prev_step, \"shp\")).pipe(clean_geometries)\n", - "\n", - "# gadm.sindex\n", - "# wdpa.sindex" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# # Spatial join using overlay\n", - "# wdpa_subset = wdpa[\n", - "# ~(\n", - "# (wdpa.bounds.minx < -181)\n", - "# | (wdpa.bounds.miny < -91)\n", - "# | (wdpa.bounds.maxx > 181)\n", - "# | (wdpa.bounds.maxy > 91)\n", - "# )\n", - "# ].reset_index(drop=True)\n", - "\n", - "# sjoin_gdf = await spatial_join(wdpa_subset, gadm)\n", - "# sjoin_gdf.rename(columns={\"GID_0\": \"iso_3\"}, inplace=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
WDPAIDWDPA_PIDPA_DEFNAMEDESIG_ENGIUCN_CATMARINEGIS_AREASTATUSSTATUS_YRPARENT_ISOCOUNTRYiso_3area_km2geometry
\n", - "
" - ], - "text/plain": [ - "Empty GeoDataFrame\n", - "Columns: [WDPAID, WDPA_PID, PA_DEF, NAME, DESIG_ENG, IUCN_CAT, MARINE, GIS_AREA, STATUS, STATUS_YR, PARENT_ISO, COUNTRY, iso_3, area_km2, geometry]\n", - "Index: []" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# # Test existence of duplicates\n", - "# sjoin_gdf.loc[sjoin_gdf.duplicated(subset=[\"WDPA_PID\", \"iso_3\"], keep=False)].sort_values(\n", - "# \"WDPA_PID\"\n", - "# )" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# # Exclude \"proposed\" protected areas\n", - "# sjoin_gdf = filter_by_exluding_propossed_mpas(sjoin_gdf)\n", - "\n", - "# # Save the results of the spatial join\n", - "# sjoin_gdf.to_file(output_file_sjoin, driver=\"ESRI Shapefile\")" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "# Load the data\n", - "sjoin_gdf = gpd.read_file(output_file_sjoin)\n", - "sjoin_gdf[\"STATUS_YR\"] = sjoin_gdf[\"STATUS_YR\"].astype(\"Int64\")" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
PA_DEFiso_3year10protected_areas_count
0AFG2010100.010.0
1AFG2011100.010.0
2AFG2012100.010.0
3AFG2013100.010.0
4AFG2014100.010.0
..................
2884ZWE20202290.0229.0
2885ZWE20212290.0229.0
2886ZWE20222290.0229.0
2887ZWE20232290.0229.0
2888ZWE20242290.0229.0
\n", - "

2889 rows × 5 columns

\n", - "
" - ], - "text/plain": [ - "PA_DEF iso_3 year 1 0 protected_areas_count\n", - "0 AFG 2010 10 0.0 10.0\n", - "1 AFG 2011 10 0.0 10.0\n", - "2 AFG 2012 10 0.0 10.0\n", - "3 AFG 2013 10 0.0 10.0\n", - "4 AFG 2014 10 0.0 10.0\n", - "... ... ... ... ... ...\n", - "2884 ZWE 2020 229 0.0 229.0\n", - "2885 ZWE 2021 229 0.0 229.0\n", - "2886 ZWE 2022 229 0.0 229.0\n", - "2887 ZWE 2023 229 0.0 229.0\n", - "2888 ZWE 2024 229 0.0 229.0\n", - "\n", - "[2889 rows x 5 columns]" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Calculate wdpa cumulative counts and pa and oecm percentages\n", - "cumulative_counts = cumulative_pa_def_counts(sjoin_gdf)\n", - "cumulative_counts" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# # Dissolve geometries to calculate the coverage\n", - "# data = await process_grid(sjoin_gdf)\n", - "# tpa = pd.concat(data, ignore_index=True).drop(columns=['STATUS_YR', 'index']).rename(columns={'area': 'protected_area'})\n", - "\n", - "# # Group by 'iso_3' and 'year' and sum the 'area'\n", - "# tpa_grouped = tpa.groupby(['iso_3', 'year'], as_index=False)['protected_area'].sum()\n", - "# tpa_grouped.reset_index(drop=True, inplace=True)\n", - "\n", - "# # save to csv\n", - "# tpa_grouped.to_csv(output_file_dissolve, index=False)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
iso_3yearprotected_area
0AFG20101078.918622
1AFG20111078.918622
2AFG20121078.918622
3AFG20131078.918622
4AFG20141078.918622
\n", - "
" - ], - "text/plain": [ - " iso_3 year protected_area\n", - "0 AFG 2010 1078.918622\n", - "1 AFG 2011 1078.918622\n", - "2 AFG 2012 1078.918622\n", - "3 AFG 2013 1078.918622\n", - "4 AFG 2014 1078.918622" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load dissolved data\n", - "tpa_grouped = pd.read_csv(output_file_dissolve)\n", - "tpa_grouped.head(5)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
yeariso_3protected_areaprotected_areas_countoecmspastotal_terrestrial_areacoverageglobal_contributionis_last_yearenvironment
02010AF3.636311e+067272.00.0100.02999309512.1238272.69446502
12010AS2.051386e+0624782.00.0100.0316255566.4864811.52005302
22010AT1.108333e+022.00.0100.0120882300.0009170.00008202
32010EU4.306080e+06116128.00.0100.03003757114.3356453.19075602
42010NA2.006295e+0652176.00.0100.01937115210.3571271.48664202
....................................
30042024YEM5.145397e+0315.00.0100.04537411.1339940.00381312
30052024ZAF1.143850e+051631.00.0100.012213289.3656270.08475812
30062024ZMB2.929805e+05557.00.0100.075399038.8573470.21709512
30072024ZNC2.779983e+008.00.0100.033140.0838860.00000212
30082024ZWE1.096232e+05229.00.0100.039123528.0197950.08123012
\n", - "

3009 rows × 11 columns

\n", - "
" - ], - "text/plain": [ - " year iso_3 protected_area protected_areas_count oecms pas \\\n", - "0 2010 AF 3.636311e+06 7272.0 0.0 100.0 \n", - "1 2010 AS 2.051386e+06 24782.0 0.0 100.0 \n", - "2 2010 AT 1.108333e+02 2.0 0.0 100.0 \n", - "3 2010 EU 4.306080e+06 116128.0 0.0 100.0 \n", - "4 2010 NA 2.006295e+06 52176.0 0.0 100.0 \n", - "... ... ... ... ... ... ... \n", - "3004 2024 YEM 5.145397e+03 15.0 0.0 100.0 \n", - "3005 2024 ZAF 1.143850e+05 1631.0 0.0 100.0 \n", - "3006 2024 ZMB 2.929805e+05 557.0 0.0 100.0 \n", - "3007 2024 ZNC 2.779983e+00 8.0 0.0 100.0 \n", - "3008 2024 ZWE 1.096232e+05 229.0 0.0 100.0 \n", - "\n", - " total_terrestrial_area coverage global_contribution is_last_year \\\n", - "0 29993095 12.123827 2.694465 0 \n", - "1 31625556 6.486481 1.520053 0 \n", - "2 12088230 0.000917 0.000082 0 \n", - "3 30037571 14.335645 3.190756 0 \n", - "4 19371152 10.357127 1.486642 0 \n", - "... ... ... ... ... \n", - "3004 453741 1.133994 0.003813 1 \n", - "3005 1221328 9.365627 0.084758 1 \n", - "3006 753990 38.857347 0.217095 1 \n", - "3007 3314 0.083886 0.000002 1 \n", - "3008 391235 28.019795 0.081230 1 \n", - "\n", - " environment \n", - "0 2 \n", - "1 2 \n", - "2 2 \n", - "3 2 \n", - "4 2 \n", - "... ... \n", - "3004 2 \n", - "3005 2 \n", - "3006 2 \n", - "3007 2 \n", - "3008 2 \n", - "\n", - "[3009 rows x 11 columns]" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Create coverage stats table\n", - "coverage = (\n", - " pd.merge(tpa_grouped, cumulative_counts, on=['iso_3', 'year'], how='left')\n", - " .pipe(calculate_global_area, [\"year\"], environment='terrestrial')\n", - " .pipe(add_region_iso, \"iso_3\")\n", - " .pipe(calculate_stats_cov, [\"year\"], \"iso_3\", environment= \"terrestrial\")\n", - " .pipe(calculate_padef_percentages, \"terrestrial\")\n", - " .pipe(add_total_area, \"terrestrial\")\n", - " .pipe(calculate_coverage_percentage)\n", - " .pipe(calculate_global_contribution)\n", - " .pipe(add_is_last_year)\n", - " .pipe(add_environment)\n", - ")\n", - "\n", - "ProtectedAreaExtentSchema(\n", - " coverage.pipe(\n", - " output,\n", - " \"iso_3\",\n", - " {},\n", - " {},\n", - " [\"iso_3\", 'total_terrestrial_area'],\n", - " )\n", - ").to_csv(\n", - " output_file_tpas,\n", - " index=True,\n", - ")\n", - "\n", - "coverage" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "DEBUG:google.auth.transport.requests:Making request: POST https://oauth2.googleapis.com/token\n" - ] - } - ], - "source": [ - "# Save the results in GCS\n", - "remote_path = 'vizzuality_processed_data/strapi_tables/tpa_coverage.csv'\n", - "\n", - "writeReadGCP(\n", - " credentials=mysettings.GCS_KEYFILE_JSON,\n", - " bucket_name=mysettings.GCS_BUCKET,\n", - " blob_name=remote_path,\n", - " file=output_file_tpas,\n", - " operation=\"w\",\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Coverage stats - concatenate marine & terrestrial" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "pipe = \"pa\"\n", - "pipe_tpa = \"mpa-terrestrial\"\n", - "pipe_mpa = \"mpa\"\n", - "step = \"preprocess\"\n", - "\n", - "pipe_dir = FileConventionHandler(pipe)\n", - "pipe_dir_tpa = FileConventionHandler(pipe_tpa)\n", - "pipe_dir_mpa = FileConventionHandler(pipe_mpa)\n", - "\n", - "input_path_tpas = pipe_dir_tpa.get_processed_step_path(current_step).joinpath(\"tpa_coverage.csv\")\n", - "input_path_mpas = pipe_dir_mpa.get_processed_step_path(current_step).joinpath(\"mpa_coverage.csv\")\n", - "\n", - "output_file = pipe_dir.get_processed_step_path(current_step).joinpath(\"protection_coverage_stats.csv\")" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idyearprotected_areaprotected_areas_countoecmspascoverageglobal_contributionis_last_yearenvironmentlocation
1120103.636311e+0672720.0100.012.1238272.694465023
\n", - "
" - ], - "text/plain": [ - " id year protected_area protected_areas_count oecms pas coverage \\\n", - "1 1 2010 3.636311e+06 7272 0.0 100.0 12.123827 \n", - "\n", - " global_contribution is_last_year environment location \n", - "1 2.694465 0 2 3 " - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Concatenate the marine and terrestrial data\n", - "final_data = process_final_coverage(input_path_tpas, input_path_mpas)\n", - "\n", - "# Filter the DataFrame to get the row where 'id' is 1\n", - "final_data[final_data['id'] == 1]" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "ProtectedAreaExtentSchema(final_data).to_csv(output_file, index=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 128, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "DEBUG:google.auth.transport.requests:Making request: POST https://oauth2.googleapis.com/token\n" - ] - } - ], - "source": [ - "# Save the results in GCS\n", - "remote_path = 'vizzuality_processed_data/strapi_tables/protection_coverage_stats.csv'\n", - "\n", - "writeReadGCP(\n", - " credentials=mysettings.GCS_KEYFILE_JSON,\n", - " bucket_name=mysettings.GCS_BUCKET,\n", - " blob_name=remote_path,\n", - " file=output_file,\n", - " operation=\"w\",\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Mpa atlas - country stats Fully or highly protected" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We are going to use the intermediate data from eez, in order to create a dataset that can be used as a land mask.\n", - "The steps are:\n", - "1. Load eez\n", - "2. Spatial inner Join the eez dataset with the Mpaatlas one\n", - "3. iso assign using the sovereign one provided by mpaatlas\n", - "4. dissolve by location\n", - "5. calculate the area for global regions and eez countries ussing mollwide projection\n", - "6. prepare the data to be ingested in strapi\n", - "7. upload the data to strapi" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/home/sofia/dev/skytruth-30x30/data/data/eez/processed/eez_preprocess.zip\n", - "/home/sofia/dev/skytruth-30x30/data/data/eez/processed/preprocess\n", - "/home/sofia/dev/skytruth-30x30/data/data/mpaatlas/processed/mpaatlas_preprocess.zip\n", - "/home/sofia/dev/skytruth-30x30/data/data/mpaatlas/processed/preprocess\n" - ] - } - ], - "source": [ - "pipe = \"mpaatlas\"\n", - "strapi_collection = \"mpaa-protection-level-stat\"\n", - "\n", - "pipe_dir_eez = FileConventionHandler(\"eez\")\n", - "pipe_dir_mpaatlas = FileConventionHandler(pipe)\n", - "output_file = pipe_dir_mpaatlas.get_processed_step_path(current_step).joinpath(\n", - " \"mpaatlas_protection_level.csv\"\n", - ")\n", - "\n", - "# Download the EEZ file && unzip it\n", - "download_and_unzip_if_needed(pipe_dir_eez, prev_step, mysettings)\n", - "# Download the mpas file && unzip it\n", - "download_and_unzip_if_needed(pipe_dir_mpaatlas, prev_step, mysettings)\n", - "\n", - "# Load the data\n", - "eez = gpd.read_file(pipe_dir_eez.get_step_fmt_file_path(prev_step, \"shp\")).pipe(clean_geometries)\n", - "mpaatlas_intermediate = gpd.read_file(\n", - " pipe_dir_mpaatlas.get_step_fmt_file_path(prev_step, \"shp\")\n", - ").pipe(clean_geometries)" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 282/282 [00:28<00:00, 9.89it/s]\n" - ] - } - ], - "source": [ - "eez_mpaatlas_data_join = await spatial_join(\n", - " eez, mpaatlas_intermediate.pipe(mpaatlas_filter_stablishment)\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# To get an idea of the spatial join results\n", - "# eez_mpaatlas_data_join.to_file(\n", - "# pipe_dir_mpaatlas.get_processed_step_path(current_step).joinpath(\"mpaatlas_sjoin.shp\"),\n", - "# driver=\"ESRI Shapefile\",\n", - "# )" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:pyogrio._io:Created 55 records\n" - ] - } - ], - "source": [ - "eez_mpaatlas_data_join.dissolve(by=[\"protecti_1\", \"location_i\"], aggfunc={\"name\": \"count\"}).reset_index().to_file(\n", - "pipe_dir_mpaatlas.get_processed_step_path(current_step).joinpath(\"mpaatlas_sjoin_dissolved.shp\"),\n", - "driver=\"ESRI Shapefile\",\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [], - "source": [ - "eez_mpaatlas_data_join2 = eez_mpaatlas_data_join.copy()\n", - "\n", - "result = (\n", - " eez_mpaatlas_data_join2.rename(columns={\"location_i\": \"iso_3\"})\n", - " .pipe(process_mpaatlas_data) \n", - " .pipe(calculate_global_area, gby_col=[\"protecti_1\"], iso_column=\"iso_3\", environment = \"marine\")\n", - " .pipe(separate_parent_iso, iso_column=\"iso_3\")\n", - " .replace(\n", - " {\n", - " \"iso_3\": {\n", - " \"COK\": \"NZL\",\n", - " \"IOT\": \"GBR\",\n", - " \"NIU\": \"NZL\",\n", - " \"SHN\": \"GBR\",\n", - " \"SJM\": \"NOR\",\n", - " \"UMI\": \"USA\",\n", - " \"NCL\": \"FRA\",\n", - " }\n", - " }\n", - " )\n", - " .pipe(add_region_iso, iso_column=\"iso_3\")\n", - " .pipe(calculate_stats, gby_col=[\"protecti_1\"], iso_column=\"iso_3\")\n", - " .query('protecti_1 != \"less protected or unknown\"')\n", - " .pipe(fix_monaco, iso_column=\"iso_3\", area_column=\"area_km2\")\n", - " .pipe(add_total_area, 'marine')\n", - " .pipe(calculate_coverage_percentage_mpatlas)\n", - " .pipe(\n", - " output,\n", - " iso_column=\"iso_3\",\n", - " rep_d={\n", - " \"protecti_1\": {\n", - " \"fully or highly protected\": 1,\n", - " }\n", - " },\n", - " rename={\"protecti_1\": \"mpaa_protection_level\", \"area_km2\": \"area\"},\n", - " drop_cols=[\"total_marine_area\", \"iso_3\"],\n", - " )\n", - ")\n", - "\n", - "ProtectionLevelSchema(result[~result.location.isna()].assign(year=2024)).to_csv(\n", - " output_file, index=True\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 82, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "DEBUG:google.auth.transport.requests:Making request: POST https://oauth2.googleapis.com/token\n" - ] - } - ], - "source": [ - "# Save the results in GCS\n", - "remote_path = 'vizzuality_processed_data/strapi_tables/mpaatlas_protection_level.csv'\n", - "\n", - "writeReadGCP(\n", - " credentials=mysettings.GCS_KEYFILE_JSON,\n", - " bucket_name=mysettings.GCS_BUCKET,\n", - " blob_name=remote_path,\n", - " file=output_file,\n", - " operation=\"w\",\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# strapi_collection = \"mpaa-protection-level-stat\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# strapi.deleteCollectionData(strapi_collection, list(range(1, 300)))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# strapi.importCollectionData(\n", - "# strapi_collection,\n", - "# output_file,\n", - "# )" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Protected seas - fishing protection level" - ] - }, - { - "cell_type": "code", - "execution_count": 158, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "DEBUG:google.auth.transport.requests:Making request: POST https://oauth2.googleapis.com/token\n" - ] - } - ], - "source": [ - "pipe = \"protectedseas\"\n", - "strapi_collection = \"fishing-protection-level-stat\"\n", - "\n", - "pipe_dir = FileConventionHandler(pipe)\n", - "input_file = pipe_dir.get_processed_step_path(prev_step).joinpath(\"protectedseas_stats.xlsx\")\n", - "output_file = pipe_dir.get_processed_step_path(current_step).joinpath(\"lfp.csv\")\n", - "\n", - "# Download the protected seas file && unzip it\n", - "writeReadGCP(\n", - " credentials=mysettings.GCS_KEYFILE_JSON,\n", - " bucket_name=mysettings.GCS_BUCKET,\n", - " blob_name=\"vizzuality_processed_data/protectedseas/preprocess/protectedseas_stats.xlsx\",\n", - " file=input_file,\n", - " operation=\"r\",\n", - ")\n", - "\n", - "# Load the data\n", - "protectedseas_intermediate = pd.read_excel(input_file)" - ] - }, - { - "cell_type": "code", - "execution_count": 159, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
iso_teriso_sovincludes_multi_jurisdictional_areaslfparea_sqkmtotal_areapct_total
320NaNESPTrue5142.9730101011023.7760.014141
321NaNESPTrue41639.6820761011023.7760.162180
322NaNESPTrue3214532.8498001011023.77621.219367
323NaNESPTrue215064.1327701011023.7761.489988
324NaNESPTrue1779644.1388001011023.77677.114323
\n", - "
" - ], - "text/plain": [ - " iso_ter iso_sov includes_multi_jurisdictional_areas lfp area_sqkm \\\n", - "320 NaN ESP True 5 142.973010 \n", - "321 NaN ESP True 4 1639.682076 \n", - "322 NaN ESP True 3 214532.849800 \n", - "323 NaN ESP True 2 15064.132770 \n", - "324 NaN ESP True 1 779644.138800 \n", - "\n", - " total_area pct_total \n", - "320 1011023.776 0.014141 \n", - "321 1011023.776 0.162180 \n", - "322 1011023.776 21.219367 \n", - "323 1011023.776 1.489988 \n", - "324 1011023.776 77.114323 " - ] - }, - "execution_count": 159, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "protectedseas_intermediate[\n", - " (\n", - " protectedseas_intermediate.iso_ter.isna()\n", - " & protectedseas_intermediate.includes_multi_jurisdictional_areas.eq(True)\n", - " )\n", - " | (\n", - " protectedseas_intermediate.iso_ter.isna()\n", - " & protectedseas_intermediate.includes_multi_jurisdictional_areas.eq(False)\n", - " & ~protectedseas_intermediate.iso_sov.isin(\n", - " protectedseas_intermediate[\n", - " protectedseas_intermediate.includes_multi_jurisdictional_areas.eq(True)\n", - " ].iso_sov.unique()\n", - " )\n", - " )\n", - "][protectedseas_intermediate.iso_sov.eq(\"ESP\")]" - ] - }, - { - "cell_type": "code", - "execution_count": 160, - "metadata": {}, - "outputs": [], - "source": [ - "final = (\n", - " protectedseas_intermediate[\n", - " (\n", - " protectedseas_intermediate.iso_ter.isna()\n", - " & protectedseas_intermediate.includes_multi_jurisdictional_areas.eq(True)\n", - " )\n", - " | (\n", - " protectedseas_intermediate.iso_ter.isna()\n", - " & protectedseas_intermediate.includes_multi_jurisdictional_areas.eq(False)\n", - " & ~protectedseas_intermediate.iso_sov.isin(\n", - " protectedseas_intermediate[\n", - " protectedseas_intermediate.includes_multi_jurisdictional_areas.eq(True)\n", - " ].iso_sov.unique()\n", - " )\n", - " )\n", - " ].replace(\n", - " {\n", - " \"lfp\": {\n", - " 5: \"highly\",\n", - " 4: \"highly\",\n", - " 3: \"moderately\",\n", - " 2: \"less\",\n", - " 1: \"less\",\n", - " },\n", - " }\n", - " ).groupby([\"iso_sov\", \"lfp\"]).agg({\"area_sqkm\": \"sum\", \"total_area\": \"max\"}).reset_index()\n", - " .pipe(\n", - " calculate_global_area,\n", - " gby_col=[\"lfp\"],\n", - " iso_column=\"iso_sov\",\n", - " agg_ops={\"area_sqkm\": \"sum\", \"total_area\": \"sum\"},\n", - " )\n", - " .pipe(add_region_iso, iso_column=\"iso_sov\")\n", - " .pipe(\n", - " calculate_stats,\n", - " gby_col=[\"lfp\"],\n", - " ops={\"area_sqkm\": \"sum\", \"total_area\": \"sum\"},\n", - " iso_column=\"iso_sov\",\n", - " )\n", - " .pipe(lambda x: x.assign(pct=round((x.area_sqkm / x.total_area)*100, 2)))\n", - " .pipe(\n", - " output,\n", - " iso_column=\"iso_sov\",\n", - " rep_d={\n", - " \"lfp\": {\n", - " \"highly\": 1,\n", - " \"moderately\": 2,\n", - " \"less\": 3,\n", - " }\n", - " },\n", - " rename={\"lfp\": \"fishing_protection_level\", \"area_sqkm\": \"area\"},\n", - " drop_cols=[\"iso_sov\", \"total_area\"],\n", - " )\n", - ")\n", - "FPLSchema(final[final.location.notna()]).to_csv(output_file, index=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 161, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "DEBUG:google.auth.transport.requests:Making request: POST https://oauth2.googleapis.com/token\n" - ] - } - ], - "source": [ - "remote_path = 'vizzuality_processed_data/strapi_tables/lfp.csv'\n", - "\n", - "writeReadGCP(\n", - " credentials=mysettings.GCS_KEYFILE_JSON,\n", - " bucket_name=mysettings.GCS_BUCKET,\n", - " blob_name=remote_path,\n", - " file=output_file,\n", - " operation=\"w\",\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# strapi.deleteCollectionData(strapi_collection, list(range(1, 500)))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# strapi.importCollectionData(\n", - "# strapi_collection,\n", - "# output_file,\n", - "# )" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - " 1- lower case the columns \n", - "2- separate location that its regime is in dispute or on join regime \n", - "3- calcualte area for mpaatlas data \n", - "4- rename columns for merge \n", - "5- merge maaatlas and mpa data identifying the source \n", - "6- identify child resources and set them as childs \n", - "7- calculate bbox \n", - "8- set child resources \n", - "9- prepare output for batch export \n", - "10- upload data to strapi " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Country detail table data\n", - "The country detail table is done for marine and terrestrial independently and the results are concatenated." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Methodology for marine:\n", - "\n", - "1- lower case the columns \n", - "2- separate location that its regime is in dispute or on join regime \n", - "3- remove ATA and ABNJ because Protected planet doesn't include stats for ATA and ABNJ is marine \n", - "4- calculate area for mpaatlas data \n", - "5- rename columns for merge \n", - "6- merge maaatlas and mpa data identifying the source \n", - "7- identify child resources and set them as childs \n", - "8- calculate bbox \n", - "9- set child resources \n", - "10- Add coverage percentage\n", - "11- Add environment marine\n" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/home/sofia/dev/skytruth-30x30/data/data/mpa/processed/mpa_preprocess.zip\n", - "/home/sofia/dev/skytruth-30x30/data/data/mpa/processed/preprocess\n", - "/home/sofia/dev/skytruth-30x30/data/data/mpaatlas/processed/mpaatlas_preprocess.zip\n", - "/home/sofia/dev/skytruth-30x30/data/data/mpaatlas/processed/preprocess\n" - ] - } - ], - "source": [ - "pipe = \"mpa\"\n", - "strapi_collection_mpas = \"mpa\"\n", - "\n", - "pipe_dir = FileConventionHandler(pipe)\n", - "pipe_dir_mpaatlas = FileConventionHandler(\"mpaatlas\")\n", - "output_file_mpas = pipe_dir.get_processed_step_path(current_step).joinpath(\"mpa_detail.csv\")\n", - "\n", - "# Download the protected atlas file && unzip it\n", - "download_and_unzip_if_needed(pipe_dir, prev_step, mysettings)\n", - "# Download the mpaatlas file \n", - "download_and_unzip_if_needed(pipe_dir_mpaatlas, prev_step, mysettings)\n", - "\n", - "# Load the data\n", - "mpa_intermediate = gpd.read_file(pipe_dir.get_step_fmt_file_path(prev_step, \"shp\")).pipe(\n", - " clean_geometries\n", - ")\n", - "mpaatlas_intermediate = gpd.read_file(\n", - " pipe_dir_mpaatlas.get_step_fmt_file_path(prev_step, \"shp\")\n", - ").pipe(clean_geometries)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "# Load iucn categories\n", - "# iucn_cat = pd.DataFrame(\n", - "# {\"slug\": init_table.iucn_cat.dropna().unique(), \"name\": init_table.iucn_cat.dropna().unique()},\n", - "# index=pd.Index(np.arange(1, len(init_table.iucn_cat.dropna().unique()) + 1)),\n", - "# )\n", - "# iucn_cat.to_csv(pipe_dir.get_processed_step_path(current_step).joinpath(\"iucn_categories.csv\"), index=True)\n", - "iucn_cat = pd.read_csv(\n", - " pipe_dir.get_processed_step_path(current_step).joinpath(\"iucn_categories.csv\"), index_col=0\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "# Preprocess marine tables (mpa and mpaatlas) and concatenate them\n", - "init_table = (\n", - " pd.concat(\n", - " [\n", - " (\n", - " mpa_intermediate.pipe(columns_to_lower)\n", - " .pipe(separate_parent_iso, iso_column=\"parent_iso\")\n", - " .pipe(change_ata_to_abnj)\n", - " .rename(\n", - " columns={\n", - " \"parent_iso\": \"iso\",\n", - " \"status_yr\": \"year\",\n", - " \"gis_m_area\": \"area_km2\",\n", - " }\n", - " ).drop(columns=['status'])\n", - " ).assign(source=\"protected_planet\"),\n", - " (\n", - " mpaatlas_intermediate.pipe(calculate_area)\n", - " .pipe(extract_wdpaid_mpaatlas)\n", - " .pipe(separate_parent_iso, iso_column=\"location_i\")\n", - " .rename(\n", - " columns={\n", - " \"location_i\": \"iso\",\n", - " \"wdpa_id\": \"wdpa_pid\",\n", - " \"designatio\": \"desig_eng\",\n", - " }\n", - " )\n", - " ).assign(source=\"mpaatlas\")\n", - " .assign(pa_def=1)\n", - " .astype({\"mpa_zone_i\": \"Int64\"}),\n", - " ],\n", - " ignore_index=True,\n", - " )\n", - " .reset_index(drop=True)\n", - " .replace(\n", - " {\n", - " \"iso\": {\n", - " \"COK\": \"NZL\",\n", - " \"IOT\": \"GBR\",\n", - " \"NIU\": \"NZL\",\n", - " \"SHN\": \"GBR\",\n", - " \"SJM\": \"NOR\",\n", - " \"UMI\": \"USA\",\n", - " \"NCL\": \"FRA\",\n", - " }\n", - " }\n", - " )\n", - " .sort_values(by=[\"wdpa_pid\", \"wdpa_pid\", \"source\"], ascending=[True, True, False])\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/sofia/dev/skytruth-30x30/data/src/pipelines/processors.py:1026: FutureWarning: Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`\n", - " return df.assign(child_id=df[columns].bfill(axis=1)[columns[0]])\n", - "/home/sofia/dev/skytruth-30x30/data/src/pipelines/processors.py:1179: FutureWarning: Downcasting behavior in `replace` is deprecated and will be removed in a future version. To retain the old behavior, explicitly call `result.infer_objects(copy=False)`. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`\n", - " df.replace(rep_d)\n" - ] - } - ], - "source": [ - "mpa_table = (\n", - " init_table.pipe(add_bbox, \"bbox\")\n", - " .pipe(define_is_child)\n", - " .pipe(set_child_id, 'marine')\n", - " .sort_values(by=[\"wdpaid\", \"is_child\"], ascending=[True, True])\n", - " .reset_index(drop=True)\n", - " .pipe(add_total_area, 'marine')\n", - " .rename(columns={\"area_km2\": \"protected_area\"})\n", - " .pipe(calculate_coverage_percentage)\n", - " .pipe(add_environment)\n", - " .pipe(\n", - " output,\n", - " iso_column=\"iso\",\n", - " rep_d={\n", - " \"status\": {\n", - " \"Adopted\": 4,\n", - " \"implemented\": 6,\n", - " \"Established\": 6,\n", - " \"Designated\": 5,\n", - " \"Proposed\": 3,\n", - " \"Inscribed\": 3,\n", - " \"unknown\": 1,\n", - " },\n", - " \"pa_def\": {\"0\": 2, \"1\": 1},\n", - " \"year\": {0: pd.NA},\n", - " \"iucn_cat\": dict(\n", - " iucn_cat[[\"slug\"]]\n", - " .reset_index(drop=False)\n", - " .iloc[:, [1, 0]]\n", - " .to_dict(orient=\"tight\")[\"data\"]\n", - " ),\n", - " \"source\": {\"protected_planet\": 3, \"mpaatlas\": 1},\n", - " \"protection\": {\n", - " \"full\": 3,\n", - " \"light\": 4,\n", - " \"incompatible\": 5,\n", - " \"high\": 6,\n", - " \"minimal\": 7,\n", - " \"unknown\": 8,\n", - " \"unknown/to be determined\": 8,\n", - " },\n", - " \"establishm\": {\n", - " \"actively managed\": 4,\n", - " \"implemented\": 6,\n", - " \"designated\": 5,\n", - " \"Designated\": 5,\n", - " \"proposed or committed\": 3,\n", - " \"Proposed\": 3,\n", - " \"Inscribed\": 3,\n", - " \"Established\": 5,\n", - " \"Adopted\": 5,\n", - " \"unknown\": 1,\n", - " },\n", - " },\n", - " rename={\n", - " \"pa_def\": \"protection_status\",\n", - " \"protected_area\": \"area\",\n", - " \"iucn_cat\": \"iucn_category\",\n", - " \"desig_eng\": \"designation\",\n", - " \"protection\": \"mpaa_protection_level\",\n", - " \"establishm\": \"mpaa_establishment_stage\",\n", - " \"source\": \"data_source\",\n", - " },\n", - " drop_cols=[\"geometry\", \"protecti_1\",\"mpa_zone_i\", \"iso\", \"total_marine_area\"]\n", - " )\n", - " .astype(\n", - " {\n", - " \"year\": \"Int32\",\n", - " \"iucn_category\": \"Int64\",\n", - " \"protection_status\": \"Int64\",\n", - " }\n", - " )\n", - " .query(\"coverage <= 100\") \n", - " .sort_index()\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Methodology for terrestrial:\n", - "\n", - "1- lower case the columns \n", - "2- separate location that its regime is in dispute or on join regime \n", - "3- remove ATA and ABNJ because Protected planet doesn't include stats for ATA and ABNJ is marine \n", - "4- rename columns for merge \n", - "5- identify child resources and set them as childs \n", - "6- calculate bbox \n", - "7- set child resources \n", - "8- Add coverage percentage \n", - "9- Add environment terrestrial \n", - "10- Add marine fields with nan " - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "pipe = \"mpa-terrestrial\"\n", - "strapi_collection_mpas = \"mpa-terrestrial\"\n", - "\n", - "pipe_dir = FileConventionHandler(pipe)\n", - "pipe_dir_gadm = FileConventionHandler(\"gadm\")\n", - "output_file_tpas = pipe_dir.get_processed_step_path(current_step).joinpath(\"tpa_detail.csv\")\n", - "\n", - "# # Download the protected atlas file && unzip it\n", - "# download_and_unzip_if_needed(pipe_dir, prev_step, mysettings)\n", - "# # Download the gadm file \n", - "# download_and_unzip_if_needed(pipe_dir_gadm, prev_step, mysettings)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "tpa_intermediate = gpd.read_file(pipe_dir.get_step_fmt_file_path(prev_step, \"gpkg\"))" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "iucn_cat = pd.read_csv(\n", - " pipe_dir.get_processed_step_path(current_step).joinpath(\"iucn_categories.csv\"), index_col=0\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "init_table = (\n", - " pd.concat(\n", - " [\n", - " (\n", - " tpa_intermediate.pipe(columns_to_lower)\n", - " .pipe(separate_parent_iso, iso_column=\"parent_iso\")\n", - " .query(\"parent_iso != 'ATA' and parent_iso != 'ABNJ'\")\n", - " .rename(\n", - " columns={\n", - " \"parent_iso\": \"iso\",\n", - " \"status_yr\": \"year\",\n", - " \"gis_area\": \"protected_area\",\n", - " }\n", - " ).drop(columns=['status'])\n", - " ).assign(source=\"protected_planet\"),\n", - " ],\n", - " ignore_index=True,\n", - " )\n", - " .reset_index(drop=True)\n", - " .replace(\n", - " {\n", - " \"iso\": {\n", - " \"COK\": \"NZL\",\n", - " \"IOT\": \"GBR\",\n", - " \"NIU\": \"NZL\",\n", - " \"SHN\": \"GBR\",\n", - " \"SJM\": \"NOR\",\n", - " \"UMI\": \"USA\",\n", - " \"NCL\": \"FRA\",\n", - " }\n", - " }\n", - " )\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/sofia/dev/skytruth-30x30/data/src/pipelines/processors.py:1179: FutureWarning: Downcasting behavior in `replace` is deprecated and will be removed in a future version. To retain the old behavior, explicitly call `result.infer_objects(copy=False)`. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`\n", - " df.replace(rep_d)\n" - ] - } - ], - "source": [ - "tpa_table = (\n", - " init_table.pipe(add_bbox, \"bbox\")\n", - " .pipe(define_is_child)\n", - " .pipe(set_child_id, 'terrestrial')\n", - " .sort_values(by=[\"wdpaid\", \"is_child\"], ascending=[True, True])\n", - " .reset_index(drop=True)\n", - " .pipe(add_total_area, 'terrestrial')\n", - " .pipe(calculate_coverage_percentage)\n", - " .pipe(add_environment)\n", - " .pipe(\n", - " output,\n", - " iso_column=\"iso\",\n", - " rep_d={\n", - " \"pa_def\": {\"0\": 2, \"1\": 1},\n", - " \"year\": {0: pd.NA},\n", - " \"iucn_cat\": dict(\n", - " iucn_cat[[\"slug\"]]\n", - " .reset_index(drop=False)\n", - " .iloc[:, [1, 0]]\n", - " .to_dict(orient=\"tight\")[\"data\"]\n", - " ),\n", - " \"source\": {\"protected_planet\": 3},\n", - " },\n", - " rename={\n", - " \"pa_def\": \"protection_status\",\n", - " \"protected_area\": \"area\",\n", - " \"iucn_cat\": \"iucn_category\",\n", - " \"desig_eng\": \"designation\",\n", - " \"source\": \"data_source\",\n", - " },\n", - " drop_cols=[\"geometry\", \"iso\", \"marine\", \"total_terrestrial_area\"]\n", - " )\n", - " .astype(\n", - " {\n", - " \"year\": \"Int32\",\n", - " \"iucn_category\": \"Int64\",\n", - " \"protection_status\": \"Int64\",\n", - " }\n", - " )\n", - " .query(\"coverage <= 100\") \n", - " .sort_index()\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [], - "source": [ - "# Add col mpaa_protection_level and mpa_establishment_stage to the table to validate it\n", - "tpa_table['mpaa_protection_level'] = np.nan\n", - "tpa_table['mpaa_establishment_stage'] = np.nan" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Concatenate marine and terrestrial tables\n", - "\n", - "1- Concatenate tables \n", - "2- Add parent and children columns \n", - "3- Sort by parent \n", - "4- Create batch export for all columns by parent (to handle relations when uploading in Strapi) \n", - "5- Create batch export only for column parent (to handle relations when uploading in Strapi)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "pipe_pa = \"pa\"\n", - "step = \"preprocess\"\n", - "strapi_collection_pas = \"pa\"\n", - "\n", - "pipe_dir_pa = FileConventionHandler(pipe_pa)\n", - "\n", - "output_file_pa = pipe_dir_pa.get_processed_step_path(current_step).joinpath(\"pa_detail.csv\")" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [], - "source": [ - "final_table = pd.concat([mpa_table, tpa_table], ignore_index=True)\n", - "final_table.index = final_table.index + 1\n", - "final_table.index.name = 'id'\n", - "final_table = final_table.pipe(add_child_parent_relationship).drop(columns=['wdpa_pid', 'is_child', 'child_id']).sort_values(by=['parent'])" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [], - "source": [ - "PAsSchema(final_table[final_table.location.notna()]).to_csv(output_file_pa, index=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [], - "source": [ - "# Divide table into two tables\n", - "final_table1 = final_table.drop(columns=['parent'])\n", - "final_table2 = final_table[['parent']]" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [], - "source": [ - "# Divide output in chunks to be uploaded to strapi\n", - "batch_export(\n", - " final_table1[final_table1.area.notna()],\n", - " 4000,\n", - " PAsSchemaChunk1,\n", - " pipe_dir_pa.get_processed_step_path(current_step).joinpath(\"chunks1\"),\n", - " \"pa_detail\",\n", - " format=\"json\",\n", - " strapi_colection=strapi_collection_pas,\n", - ")\n", - "\n", - "# batch_export(\n", - "# final_table2,\n", - "# 10000,\n", - "# PAsSchemaChunk2,\n", - "# pipe_dir_pa.get_processed_step_path(current_step).joinpath(\"chunks2\"),\n", - "# \"pa_detail\",\n", - "# format=\"json\",\n", - "# strapi_colection=strapi_collection_pas,\n", - "# )" - ] - }, - { - "cell_type": "code", - "execution_count": 40, - "metadata": {}, - "outputs": [], - "source": [ - "# zip data\n", - "make_archive(pipe_dir_pa.get_processed_step_path(current_step).joinpath(\"chunks1\"), pipe_dir_pa.get_processed_step_path(current_step).joinpath(\"chunks1.zip\"))\n", - "make_archive(pipe_dir_pa.get_processed_step_path(current_step).joinpath(\"chunks2\"), pipe_dir_pa.get_processed_step_path(current_step).joinpath(\"chunks2.zip\"))" - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "DEBUG:google.auth.transport.requests:Making request: POST https://oauth2.googleapis.com/token\n", - "DEBUG:google.auth.transport.requests:Making request: POST https://oauth2.googleapis.com/token\n" - ] - } - ], - "source": [ - "# Save zipped file in GCS\n", - "writeReadGCP(\n", - " credentials=mysettings.GCS_KEYFILE_JSON,\n", - " bucket_name=mysettings.GCS_BUCKET,\n", - " blob_name='vizzuality_processed_data/strapi_tables/pa_chunks1.zip',\n", - " file=pipe_dir_pa.get_processed_step_path(current_step).joinpath(\"chunks1.zip\"),\n", - " operation=\"w\",\n", - ")\n", - "\n", - "writeReadGCP(\n", - " credentials=mysettings.GCS_KEYFILE_JSON,\n", - " bucket_name=mysettings.GCS_BUCKET,\n", - " blob_name='vizzuality_processed_data/strapi_tables/pa_chunks2.zip',\n", - " file=pipe_dir_pa.get_processed_step_path(current_step).joinpath(\"chunks2.zip\"),\n", - " operation=\"w\",\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# strapi.deleteCollectionData(\"pa\", list(range(1, 20914)))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# for i in range(0, 4):\n", - "# strapi.importCollectionData(\n", - "# strapi_collection_mpas,\n", - "# mpa_folder.joinpath(f\"mpa_detail_{i}.csv\"),\n", - "# )" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Habitats" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "DEBUG:google.auth.transport.requests:Making request: POST https://oauth2.googleapis.com/token\n", - "DEBUG:google.auth.transport.requests:Making request: POST https://oauth2.googleapis.com/token\n" - ] - } - ], - "source": [ - "pipe = \"terrestrial-habitats\"\n", - "collection_name = \"terrestrial_habitats\"\n", - "\n", - "pipe_dir = FileConventionHandler(pipe)\n", - "input_file_ter = pipe_dir.get_processed_step_path(prev_step).joinpath(\"master_data_protection.csv\")\n", - "input_file_mar = pipe_dir.get_processed_step_path(prev_step).joinpath(\"habitats6.csv\")\n", - "output_file = pipe_dir.get_processed_step_path(current_step).joinpath(\"habitats_all.csv\")\n", - "\n", - "# Download the terrestrial habitats table from the bucket\n", - "writeReadGCP(\n", - " credentials=mysettings.GCS_KEYFILE_JSON,\n", - " bucket_name=mysettings.GCS_BUCKET,\n", - " blob_name=\"vizzuality_processed_data/habitats/preprocess/master_data_protection.csv\",\n", - " file=input_file_ter,\n", - " operation=\"r\",\n", - ")\n", - "\n", - "# Download the marine habitats table from the bucket\n", - "writeReadGCP(\n", - " credentials=mysettings.GCS_KEYFILE_JSON,\n", - " bucket_name=mysettings.GCS_BUCKET,\n", - " blob_name=\"vizzuality_processed_data/processed_statistic_tables/habitats6.csv\",\n", - " file=input_file_mar,\n", - " operation=\"r\",\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "habitat_mar = pd.read_csv(input_file_mar, na_values=['', 'NaN', 'NULL'])\n", - "habitat_mar['environment'] = 1\n", - "habitat_mar['location_id'] = habitat_mar['location_id'].fillna('NA')" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
iso_3habitat_namepixel_habitatpixel_protected
0GLOBArtificial28259249.02776193.0
1GLOBDesert111106481.07778007.0
2GLOBForest49125087.010353320.0
3GLOBGrassland35459546.05491398.0
4GLOBOther864004.0291628.0
...............
1728ZWEOther0.00.0
1729ZWERocky/mountains0.00.0
1730ZWESavanna231134.097790.0
1731ZWEShrubland52656.04262.0
1732ZWEWetlands/open water3866.03238.0
\n", - "

1733 rows × 4 columns

\n", - "
" - ], - "text/plain": [ - " iso_3 habitat_name pixel_habitat pixel_protected\n", - "0 GLOB Artificial 28259249.0 2776193.0\n", - "1 GLOB Desert 111106481.0 7778007.0\n", - "2 GLOB Forest 49125087.0 10353320.0\n", - "3 GLOB Grassland 35459546.0 5491398.0\n", - "4 GLOB Other 864004.0 291628.0\n", - "... ... ... ... ...\n", - "1728 ZWE Other 0.0 0.0\n", - "1729 ZWE Rocky/mountains 0.0 0.0\n", - "1730 ZWE Savanna 231134.0 97790.0\n", - "1731 ZWE Shrubland 52656.0 4262.0\n", - "1732 ZWE Wetlands/open water 3866.0 3238.0\n", - "\n", - "[1733 rows x 4 columns]" - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "habitat_ter = pd.read_csv(input_file_ter).drop(columns=['frac', 'perc_extent', 'total_area']).rename(columns ={'habitats':'habitat_name','total': 'pixel_habitat', 'protected': 'pixel_protected'})\n", - "habitat_ter" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
iso_3habitat_namepixel_habitatpixel_protectedtotal_pixels
0GLOBArtificial28259249.02776193.0267352678.0
1GLOBDesert111106481.07778007.0267352678.0
2GLOBForest49125087.010353320.0267352678.0
3GLOBGrassland35459546.05491398.0267352678.0
5GLOBRocky/mountains3571486.0478102.0267352678.0
..................
1727ZWEGrassland3417.0263.0418294.0
1729ZWERocky/mountains0.00.0418294.0
1730ZWESavanna231134.097790.0418294.0
1731ZWEShrubland52656.04262.0418294.0
1732ZWEWetlands/open water3866.03238.0418294.0
\n", - "

1526 rows × 5 columns

\n", - "
" - ], - "text/plain": [ - " iso_3 habitat_name pixel_habitat pixel_protected total_pixels\n", - "0 GLOB Artificial 28259249.0 2776193.0 267352678.0\n", - "1 GLOB Desert 111106481.0 7778007.0 267352678.0\n", - "2 GLOB Forest 49125087.0 10353320.0 267352678.0\n", - "3 GLOB Grassland 35459546.0 5491398.0 267352678.0\n", - "5 GLOB Rocky/mountains 3571486.0 478102.0 267352678.0\n", - "... ... ... ... ... ...\n", - "1727 ZWE Grassland 3417.0 263.0 418294.0\n", - "1729 ZWE Rocky/mountains 0.0 0.0 418294.0\n", - "1730 ZWE Savanna 231134.0 97790.0 418294.0\n", - "1731 ZWE Shrubland 52656.0 4262.0 418294.0\n", - "1732 ZWE Wetlands/open water 3866.0 3238.0 418294.0\n", - "\n", - "[1526 rows x 5 columns]" - ] - }, - "execution_count": 27, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Remove all rows where \"habitat_name\" is \"Other\"\n", - "habitat_ter = habitat_ter[habitat_ter['habitat_name'] != 'Other'].copy()\n", - "\n", - "# calculate total_area by adding up \"total\" per iso_3\n", - "habitat_ter['total_pixels'] = habitat_ter.groupby('iso_3')['pixel_habitat'].transform('sum')\n", - "habitat_ter" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [], - "source": [ - "# Assign territories to their soveraign countries\n", - "with open(scripts_dir.joinpath('data_commons/data/dependency_to_parent.json'), 'r') as json_file:\n", - " dependency_to_parent = json.load(json_file)\n", - "\n", - "mapping = {key: value[0] for key, value in dependency_to_parent.items()}\n", - "\n", - "habitat_ter['iso_3'] = habitat_ter['iso_3'].map(mapping).fillna(habitat_ter['iso_3'])" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
iso_3habitat_namepixel_habitatpixel_protectedtotal_pixels
0AFGArtificial56625.0938.0782480.0
1AFGDesert274553.03639.0782480.0
2AFGForest3928.0219.0782480.0
3AFGGrassland292319.09275.0782480.0
4AFGRocky/mountains137716.016183.0782480.0
..................
1505ZWEGrassland3417.0263.0418294.0
1506ZWERocky/mountains0.00.0418294.0
1507ZWESavanna231134.097790.0418294.0
1508ZWEShrubland52656.04262.0418294.0
1509ZWEWetlands/open water3866.03238.0418294.0
\n", - "

1510 rows × 5 columns

\n", - "
" - ], - "text/plain": [ - " iso_3 habitat_name pixel_habitat pixel_protected total_pixels\n", - "0 AFG Artificial 56625.0 938.0 782480.0\n", - "1 AFG Desert 274553.0 3639.0 782480.0\n", - "2 AFG Forest 3928.0 219.0 782480.0\n", - "3 AFG Grassland 292319.0 9275.0 782480.0\n", - "4 AFG Rocky/mountains 137716.0 16183.0 782480.0\n", - "... ... ... ... ... ...\n", - "1505 ZWE Grassland 3417.0 263.0 418294.0\n", - "1506 ZWE Rocky/mountains 0.0 0.0 418294.0\n", - "1507 ZWE Savanna 231134.0 97790.0 418294.0\n", - "1508 ZWE Shrubland 52656.0 4262.0 418294.0\n", - "1509 ZWE Wetlands/open water 3866.0 3238.0 418294.0\n", - "\n", - "[1510 rows x 5 columns]" - ] - }, - "execution_count": 29, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# groupby country and habitats and sum the pixeles\n", - "habitat_ter_grouped = habitat_ter.groupby(['iso_3', 'habitat_name']).sum().reset_index()\n", - "habitat_ter_grouped" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
iso_3habitat_namepixel_habitatpixel_protectedtotal_pixelsprotected_perchabitat_perc
0AFGArtificial56625.0938.0782480.01.6565127.236607
1AFGDesert274553.03639.0782480.01.32542735.087542
2AFGForest3928.0219.0782480.05.5753560.501994
3AFGGrassland292319.09275.0782480.03.17290437.358016
4AFGRocky/mountains137716.016183.0782480.011.75099517.599939
........................
1505ZWEGrassland3417.0263.0418294.07.6968100.816890
1506ZWERocky/mountains0.00.0418294.0NaN0.000000
1507ZWESavanna231134.097790.0418294.042.30879155.256351
1508ZWEShrubland52656.04262.0418294.08.09404412.588275
1509ZWEWetlands/open water3866.03238.0418294.083.7558200.924230
\n", - "

1510 rows × 7 columns

\n", - "
" - ], - "text/plain": [ - " iso_3 habitat_name pixel_habitat pixel_protected total_pixels \\\n", - "0 AFG Artificial 56625.0 938.0 782480.0 \n", - "1 AFG Desert 274553.0 3639.0 782480.0 \n", - "2 AFG Forest 3928.0 219.0 782480.0 \n", - "3 AFG Grassland 292319.0 9275.0 782480.0 \n", - "4 AFG Rocky/mountains 137716.0 16183.0 782480.0 \n", - "... ... ... ... ... ... \n", - "1505 ZWE Grassland 3417.0 263.0 418294.0 \n", - "1506 ZWE Rocky/mountains 0.0 0.0 418294.0 \n", - "1507 ZWE Savanna 231134.0 97790.0 418294.0 \n", - "1508 ZWE Shrubland 52656.0 4262.0 418294.0 \n", - "1509 ZWE Wetlands/open water 3866.0 3238.0 418294.0 \n", - "\n", - " protected_perc habitat_perc \n", - "0 1.656512 7.236607 \n", - "1 1.325427 35.087542 \n", - "2 5.575356 0.501994 \n", - "3 3.172904 37.358016 \n", - "4 11.750995 17.599939 \n", - "... ... ... \n", - "1505 7.696810 0.816890 \n", - "1506 NaN 0.000000 \n", - "1507 42.308791 55.256351 \n", - "1508 8.094044 12.588275 \n", - "1509 83.755820 0.924230 \n", - "\n", - "[1510 rows x 7 columns]" - ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Calculate the percentage of protected pixels and the percentage of extent of the habitat\n", - "habitat_ter_grouped['protected_perc'] = habitat_ter_grouped['pixel_protected']/habitat_ter_grouped['pixel_habitat']*100\n", - "habitat_ter_grouped['habitat_perc'] = habitat_ter_grouped['pixel_habitat']/habitat_ter_grouped['total_pixels']*100\n", - "habitat_ter_grouped" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
iso_3habitat_namepixel_habitatpixel_protectedtotal_pixelsprotected_perchabitat_perctotal_terrestrial_area
0AFGArtificial56625.0938.0782480.01.6565127.236607644050.28
1AFGDesert274553.03639.0782480.01.32542735.087542644050.28
2AFGForest3928.0219.0782480.05.5753560.501994644050.28
3AFGGrassland292319.09275.0782480.03.17290437.358016644050.28
4AFGRocky/mountains137716.016183.0782480.011.75099517.599939644050.28
...........................
1505ZWEGrassland3417.0263.0418294.07.6968100.816890391234.88
1506ZWERocky/mountains0.00.0418294.0NaN0.000000391234.88
1507ZWESavanna231134.097790.0418294.042.30879155.256351391234.88
1508ZWEShrubland52656.04262.0418294.08.09404412.588275391234.88
1509ZWEWetlands/open water3866.03238.0418294.083.7558200.924230391234.88
\n", - "

1510 rows × 8 columns

\n", - "
" - ], - "text/plain": [ - " iso_3 habitat_name pixel_habitat pixel_protected total_pixels \\\n", - "0 AFG Artificial 56625.0 938.0 782480.0 \n", - "1 AFG Desert 274553.0 3639.0 782480.0 \n", - "2 AFG Forest 3928.0 219.0 782480.0 \n", - "3 AFG Grassland 292319.0 9275.0 782480.0 \n", - "4 AFG Rocky/mountains 137716.0 16183.0 782480.0 \n", - "... ... ... ... ... ... \n", - "1505 ZWE Grassland 3417.0 263.0 418294.0 \n", - "1506 ZWE Rocky/mountains 0.0 0.0 418294.0 \n", - "1507 ZWE Savanna 231134.0 97790.0 418294.0 \n", - "1508 ZWE Shrubland 52656.0 4262.0 418294.0 \n", - "1509 ZWE Wetlands/open water 3866.0 3238.0 418294.0 \n", - "\n", - " protected_perc habitat_perc total_terrestrial_area \n", - "0 1.656512 7.236607 644050.28 \n", - "1 1.325427 35.087542 644050.28 \n", - "2 5.575356 0.501994 644050.28 \n", - "3 3.172904 37.358016 644050.28 \n", - "4 11.750995 17.599939 644050.28 \n", - "... ... ... ... \n", - "1505 7.696810 0.816890 391234.88 \n", - "1506 NaN 0.000000 391234.88 \n", - "1507 42.308791 55.256351 391234.88 \n", - "1508 8.094044 12.588275 391234.88 \n", - "1509 83.755820 0.924230 391234.88 \n", - "\n", - "[1510 rows x 8 columns]" - ] - }, - "execution_count": 31, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Add country's terrestrial area\n", - "add_total_terrestrial_area(habitat_ter_grouped)" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": {}, - "outputs": [], - "source": [ - "# Estimate the total area and the protected area based on pixels proportions and the total terrestrial area\n", - "habitat_ter_grouped['total_habitat_area'] = habitat_ter_grouped['total_terrestrial_area']*habitat_ter_grouped['habitat_perc']/100\n", - "habitat_ter_grouped['protected_habitat_area'] = habitat_ter_grouped['total_terrestrial_area']*habitat_ter_grouped['protected_perc']/100" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
iso_3habitat_namepixel_habitatpixel_protectedtotal_pixelsprotected_perchabitat_perctotal_terrestrial_areatotal_habitat_areaprotected_habitat_area
71AUTArtificial56023.017428.0126396.031.10865244.32339683709.4837102.88456926040.890660
72AUTDesert799.0703.0126396.087.9849810.63214083709.48529.16132373651.770263
73AUTForest39594.017631.0126396.044.52947431.32535883709.4826222.29462337275.391268
74AUTGrassland16498.09748.0126396.059.08595013.05262883709.4810926.28723349460.541341
75AUTRocky/mountains1534.01090.0126396.071.0560631.21364683709.481015.93675759480.660495
76AUTShrubland10911.04511.0126396.041.3435988.63239383709.487226.13165234608.511070
77AUTWetlands/open water1037.0870.0126396.083.8958530.82043783709.48686.78384470228.782642
\n", - "
" - ], - "text/plain": [ - " iso_3 habitat_name pixel_habitat pixel_protected total_pixels \\\n", - "71 AUT Artificial 56023.0 17428.0 126396.0 \n", - "72 AUT Desert 799.0 703.0 126396.0 \n", - "73 AUT Forest 39594.0 17631.0 126396.0 \n", - "74 AUT Grassland 16498.0 9748.0 126396.0 \n", - "75 AUT Rocky/mountains 1534.0 1090.0 126396.0 \n", - "76 AUT Shrubland 10911.0 4511.0 126396.0 \n", - "77 AUT Wetlands/open water 1037.0 870.0 126396.0 \n", - "\n", - " protected_perc habitat_perc total_terrestrial_area total_habitat_area \\\n", - "71 31.108652 44.323396 83709.48 37102.884569 \n", - "72 87.984981 0.632140 83709.48 529.161323 \n", - "73 44.529474 31.325358 83709.48 26222.294623 \n", - "74 59.085950 13.052628 83709.48 10926.287233 \n", - "75 71.056063 1.213646 83709.48 1015.936757 \n", - "76 41.343598 8.632393 83709.48 7226.131652 \n", - "77 83.895853 0.820437 83709.48 686.783844 \n", - "\n", - " protected_habitat_area \n", - "71 26040.890660 \n", - "72 73651.770263 \n", - "73 37275.391268 \n", - "74 49460.541341 \n", - "75 59480.660495 \n", - "76 34608.511070 \n", - "77 70228.782642 " - ] - }, - "execution_count": 33, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "habitat_ter_grouped[habitat_ter_grouped['iso_3'] == 'AUT']" - ] - }, - { - "cell_type": "code", - "execution_count": 95, - "metadata": {}, - "outputs": [], - "source": [ - "# Add regions\n", - "habitat_ter_grouped = add_region_iso2(habitat_ter_grouped, 'iso_3')\n", - "\n", - "regions = habitat_ter_grouped.groupby(['region', 'habitat_name']).agg({\n", - " 'total_area': 'sum',\n", - " 'protected_area': 'sum'\n", - "}).reset_index()\n", - "\n", - "regions.rename(columns={'region': 'location_id'}, inplace=True)\n", - "habitat_ter_grouped.drop(columns=['pixel_habitat', 'pixel_protected', 'total_pixel_area', 'protect_perc', 'extent_perc', 'total_terrestrial_area', 'region'], inplace=True)\n", - "habitat_ter_grouped = habitat_ter_grouped.rename(columns = {'iso_3':'location_id'})" - ] - }, - { - "cell_type": "code", - "execution_count": 96, - "metadata": {}, - "outputs": [], - "source": [ - "# Concatenate regions and habitat_ter_grouped dataframes\n", - "habitats_terrestrial = pd.concat([regions, habitat_ter_grouped], ignore_index=True)\n", - "\n", - "# fill protected_area and total_area with 0 if they are NaN\n", - "habitats_terrestrial['protected_area'] = habitats_terrestrial['protected_area'].fillna(0)\n", - "habitats_terrestrial['total_area'] = habitats_terrestrial['total_area'].fillna(0)" - ] - }, - { - "cell_type": "code", - "execution_count": 97, - "metadata": {}, - "outputs": [], - "source": [ - "# Add year and environment columns\n", - "habitats_terrestrial['year'] = 2024\n", - "habitats_terrestrial['environment'] = 2" - ] - }, - { - "cell_type": "code", - "execution_count": 119, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
location_idhabitattotal_areaprotected_areayearenvironment
0AFartificial2.924001e+062.331527e+0520242
1AFdesert9.872960e+066.726984e+0520242
2AFforest4.458009e+069.713040e+0520242
3AFgrassland2.035644e+062.283491e+0520242
4AFrocky/mountains2.384691e+054.774587e+0420242
5AFsavanna8.387535e+061.910888e+0620242
6AFshrubland1.766346e+062.037187e+0520242
7AFwetlands/open water3.101304e+055.931974e+0420242
8ASartificial8.041755e+062.699350e+0520242
9ASdesert3.538487e+063.150823e+0520242
\n", - "
" - ], - "text/plain": [ - " location_id habitat total_area protected_area year \\\n", - "0 AF artificial 2.924001e+06 2.331527e+05 2024 \n", - "1 AF desert 9.872960e+06 6.726984e+05 2024 \n", - "2 AF forest 4.458009e+06 9.713040e+05 2024 \n", - "3 AF grassland 2.035644e+06 2.283491e+05 2024 \n", - "4 AF rocky/mountains 2.384691e+05 4.774587e+04 2024 \n", - "5 AF savanna 8.387535e+06 1.910888e+06 2024 \n", - "6 AF shrubland 1.766346e+06 2.037187e+05 2024 \n", - "7 AF wetlands/open water 3.101304e+05 5.931974e+04 2024 \n", - "8 AS artificial 8.041755e+06 2.699350e+05 2024 \n", - "9 AS desert 3.538487e+06 3.150823e+05 2024 \n", - "\n", - " environment \n", - "0 2 \n", - "1 2 \n", - "2 2 \n", - "3 2 \n", - "4 2 \n", - "5 2 \n", - "6 2 \n", - "7 2 \n", - "8 2 \n", - "9 2 " - ] - }, - "execution_count": 119, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Concatenate terrestrial and marine habitats\n", - "habitats_all = pd.concat([habitats_terrestrial, habitat_mar], ignore_index=True).rename(columns={'habitat_name': 'habitat'})\n", - "habitats_all['habitat'] = habitats_all['habitat'].str.lower()\n", - "habitats_all.head(10)" - ] - }, - { - "cell_type": "code", - "execution_count": 120, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
location_idhabitattotal_areaprotected_areayearenvironment
0AF432.924001e+062.331527e+0520242
1AF449.872960e+066.726984e+0520242
2AF454.458009e+069.713040e+0520242
3AF462.035644e+062.283491e+0520242
4AF472.384691e+054.774587e+0420242
5AF488.387535e+061.910888e+0620242
6AF491.766346e+062.037187e+0520242
7AF503.101304e+055.931974e+0420242
8AS438.041755e+062.699350e+0520242
9AS443.538487e+063.150823e+0520242
\n", - "
" - ], - "text/plain": [ - " location_id habitat total_area protected_area year environment\n", - "0 AF 43 2.924001e+06 2.331527e+05 2024 2\n", - "1 AF 44 9.872960e+06 6.726984e+05 2024 2\n", - "2 AF 45 4.458009e+06 9.713040e+05 2024 2\n", - "3 AF 46 2.035644e+06 2.283491e+05 2024 2\n", - "4 AF 47 2.384691e+05 4.774587e+04 2024 2\n", - "5 AF 48 8.387535e+06 1.910888e+06 2024 2\n", - "6 AF 49 1.766346e+06 2.037187e+05 2024 2\n", - "7 AF 50 3.101304e+05 5.931974e+04 2024 2\n", - "8 AS 43 8.041755e+06 2.699350e+05 2024 2\n", - "9 AS 44 3.538487e+06 3.150823e+05 2024 2" - ] - }, - "execution_count": 120, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# change habitat to have the id of the habitat\n", - "habitat_dict = {\n", - " 'mangroves': 5,\n", - " 'seamounts': 6,\n", - " 'artificial': 43,\n", - " 'forest': 45,\n", - " 'grassland': 46,\n", - " 'wetlands/open water': 50,\n", - " 'seagrasses': 2,\n", - " 'cold-water corals': 4,\n", - " 'desert': 44,\n", - " 'rocky/mountains': 47,\n", - " 'savanna': 48,\n", - " 'shrubland': 49,\n", - " 'saltmarshes': 1,\n", - " 'warm-water corals': 3\n", - "}\n", - "\n", - "habitats_all['habitat'] = habitats_all['habitat'].replace(habitat_dict)\n", - "habitats_all.head(10)" - ] - }, - { - "cell_type": "code", - "execution_count": 121, - "metadata": {}, - "outputs": [], - "source": [ - "output2(habitats_all, 'location_id', {}, {}, ['location_id']).to_csv(output_file, index=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 122, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idhabitattotal_areaprotected_areayearenvironmentlocation
01432.924001e+06233152.675055202423.0
12449.872960e+06672698.366583202423.0
23454.458009e+06971303.987441202423.0
34462.035644e+06228349.125359202423.0
45472.384691e+0547745.870360202423.0
........................
2168216957.429267e+0421277.220000202014.0
2169217051.246190e+03732.143750202016.0
2170217152.415419e+032097.740000202017.0
2171217253.989344e+0427151.740000202018.0
2172217351.736209e+0227.830000202019.0
\n", - "

2173 rows × 7 columns

\n", - "
" - ], - "text/plain": [ - " id habitat total_area protected_area year environment location\n", - "0 1 43 2.924001e+06 233152.675055 2024 2 3.0\n", - "1 2 44 9.872960e+06 672698.366583 2024 2 3.0\n", - "2 3 45 4.458009e+06 971303.987441 2024 2 3.0\n", - "3 4 46 2.035644e+06 228349.125359 2024 2 3.0\n", - "4 5 47 2.384691e+05 47745.870360 2024 2 3.0\n", - "... ... ... ... ... ... ... ...\n", - "2168 2169 5 7.429267e+04 21277.220000 2020 1 4.0\n", - "2169 2170 5 1.246190e+03 732.143750 2020 1 6.0\n", - "2170 2171 5 2.415419e+03 2097.740000 2020 1 7.0\n", - "2171 2172 5 3.989344e+04 27151.740000 2020 1 8.0\n", - "2172 2173 5 1.736209e+02 27.830000 2020 1 9.0\n", - "\n", - "[2173 rows x 7 columns]" - ] - }, - "execution_count": 122, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "a = pd.read_csv(output_file)\n", - "a" - ] - }, - { - "cell_type": "code", - "execution_count": 123, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "DEBUG:google.auth.transport.requests:Making request: POST https://oauth2.googleapis.com/token\n" - ] - } - ], - "source": [ - "# Upload csv to bucket\n", - "remote_path = 'vizzuality_processed_data/strapi_tables/habitats.csv'\n", - "\n", - "writeReadGCP(\n", - " credentials=mysettings.GCS_KEYFILE_JSON,\n", - " bucket_name=mysettings.GCS_BUCKET,\n", - " blob_name=remote_path,\n", - " file=output_file,\n", - " operation=\"w\",\n", - ")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.5" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/data/notebooks/pipes_mock/precalculations.ipynb b/data/notebooks/pipes_mock/precalculations.ipynb index de0ec359..4a4f1f1d 100644 --- a/data/notebooks/pipes_mock/precalculations.ipynb +++ b/data/notebooks/pipes_mock/precalculations.ipynb @@ -2619,7 +2619,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "**Note:** The marine habitat data has already been processed in the habitats.ipynb notebook. This section imports the output from habitats.ipynb, processes the terrestrial data, and generates the final output table required for Strapi." + "**Note:** The marine habitat data has already been processed in the habitats.ipynb notebook. The terrestrial data has been processed in another notebook (TBD). This section imports the output from habitats.ipynb and output generated for terrestrial habitats (stored in the bucket), and generates the final output table required for Strapi." ] }, { @@ -2849,7 +2849,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -2979,7 +2979,7 @@ "[1507 rows x 4 columns]" ] }, - "execution_count": 8, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -2993,7 +2993,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -3148,7 +3148,7 @@ "[1337 rows x 5 columns]" ] }, - "execution_count": 9, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -3164,7 +3164,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -3179,7 +3179,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -3334,7 +3334,7 @@ "[1337 rows x 5 columns]" ] }, - "execution_count": 11, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -3347,7 +3347,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -3526,7 +3526,7 @@ "[1337 rows x 7 columns]" ] }, - "execution_count": 12, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -3540,7 +3540,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -3731,7 +3731,7 @@ "[1337 rows x 8 columns]" ] }, - "execution_count": 13, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -3743,7 +3743,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -3754,179 +3754,7 @@ }, { "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
iso_3habitattotalprotectedtotal_area_countryprotected%habitat%total_terrestrial_areatotal_areaprotected_area
68AUTartificial5.546380e+06414568.7793491.243977e+077.47458344.58587683709.037322.3911882789.693093
69AUTdesert7.359923e+0433653.6037871.243977e+0745.7254820.59164583709.0495.259844226.459952
70AUTforest3.889044e+06352118.4617571.243977e+079.05411431.26299083709.026169.9360402369.455901
71AUTgrassland1.604881e+06227355.7391861.243977e+0714.16651912.90121083709.010799.4738131529.909551
72AUTrocky/mountains1.492322e+0546986.2555631.243977e+0731.4853371.19963883709.01004.204873316.177289
73AUTshrubland1.074615e+0693825.4829071.243977e+078.7310788.63854683709.07231.240356631.365203
74AUTwetlands/open water1.020180e+0517607.4299951.243977e+0717.2591390.82009683709.0686.493886118.482935
\n", - "
" - ], - "text/plain": [ - " iso_3 habitat total protected \\\n", - "68 AUT artificial 5.546380e+06 414568.779349 \n", - "69 AUT desert 7.359923e+04 33653.603787 \n", - "70 AUT forest 3.889044e+06 352118.461757 \n", - "71 AUT grassland 1.604881e+06 227355.739186 \n", - "72 AUT rocky/mountains 1.492322e+05 46986.255563 \n", - "73 AUT shrubland 1.074615e+06 93825.482907 \n", - "74 AUT wetlands/open water 1.020180e+05 17607.429995 \n", - "\n", - " total_area_country protected% habitat% total_terrestrial_area \\\n", - "68 1.243977e+07 7.474583 44.585876 83709.0 \n", - "69 1.243977e+07 45.725482 0.591645 83709.0 \n", - "70 1.243977e+07 9.054114 31.262990 83709.0 \n", - "71 1.243977e+07 14.166519 12.901210 83709.0 \n", - "72 1.243977e+07 31.485337 1.199638 83709.0 \n", - "73 1.243977e+07 8.731078 8.638546 83709.0 \n", - "74 1.243977e+07 17.259139 0.820096 83709.0 \n", - "\n", - " total_area protected_area \n", - "68 37322.391188 2789.693093 \n", - "69 495.259844 226.459952 \n", - "70 26169.936040 2369.455901 \n", - "71 10799.473813 1529.909551 \n", - "72 1004.204873 316.177289 \n", - "73 7231.240356 631.365203 \n", - "74 686.493886 118.482935 " - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "habitat_ter_grouped[habitat_ter_grouped['iso_3'] == 'AUT']" - ] - }, - { - "cell_type": "code", - "execution_count": 16, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -3945,7 +3773,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -3959,7 +3787,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -4126,7 +3954,7 @@ "[1386 rows x 6 columns]" ] }, - "execution_count": 18, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -4140,7 +3968,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -4307,7 +4135,7 @@ "[2000 rows x 6 columns]" ] }, - "execution_count": 33, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -4320,14 +4148,14 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_1401056/3397704638.py:19: FutureWarning: Downcasting behavior in `replace` is deprecated and will be removed in a future version. To retain the old behavior, explicitly call `result.infer_objects(copy=False)`. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`\n", + "/tmp/ipykernel_1410963/3397704638.py:19: FutureWarning: Downcasting behavior in `replace` is deprecated and will be removed in a future version. To retain the old behavior, explicitly call `result.infer_objects(copy=False)`. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`\n", " habitats_all['habitat'] = habitats_all['habitat'].replace(habitat_dict)\n" ] }, @@ -4469,7 +4297,7 @@ "9 AS 44 3.536380e+06 2.998901e+05 2024 2" ] }, - "execution_count": 34, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -4500,7 +4328,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 18, "metadata": {}, "outputs": [], "source": [ @@ -4509,185 +4337,7 @@ }, { "cell_type": "code", - "execution_count": 37, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idhabitattotal_areaprotected_areayearenvironmentlocation
01432.925993e+06190150.539425202423.0
12449.875738e+06658679.485018202423.0
23454.461370e+06919656.960007202423.0
34462.039020e+06192683.234520202423.0
45472.384026e+0546963.725543202423.0
........................
1995199657.429267e+0421277.220000202014.0
1996199751.246190e+03732.143750202016.0
1997199852.415419e+032097.740000202017.0
1998199953.989344e+0427151.740000202018.0
1999200051.736209e+0227.830000202019.0
\n", - "

2000 rows × 7 columns

\n", - "
" - ], - "text/plain": [ - " id habitat total_area protected_area year environment location\n", - "0 1 43 2.925993e+06 190150.539425 2024 2 3.0\n", - "1 2 44 9.875738e+06 658679.485018 2024 2 3.0\n", - "2 3 45 4.461370e+06 919656.960007 2024 2 3.0\n", - "3 4 46 2.039020e+06 192683.234520 2024 2 3.0\n", - "4 5 47 2.384026e+05 46963.725543 2024 2 3.0\n", - "... ... ... ... ... ... ... ...\n", - "1995 1996 5 7.429267e+04 21277.220000 2020 1 4.0\n", - "1996 1997 5 1.246190e+03 732.143750 2020 1 6.0\n", - "1997 1998 5 2.415419e+03 2097.740000 2020 1 7.0\n", - "1998 1999 5 3.989344e+04 27151.740000 2020 1 8.0\n", - "1999 2000 5 1.736209e+02 27.830000 2020 1 9.0\n", - "\n", - "[2000 rows x 7 columns]" - ] - }, - "execution_count": 37, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "a = pd.read_csv(output_file)\n", - "a" - ] - }, - { - "cell_type": "code", - "execution_count": 38, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -4710,466 +4360,6 @@ " operation=\"w\",\n", ")" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "ename": "NameError", - "evalue": "name 'final_table1' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[1], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mfinal_table1\u001b[49m\n", - "\u001b[0;31mNameError\u001b[0m: name 'final_table1' is not defined" - ] - } - ], - "source": [ - "final_table1" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Missing IDs: [4000, 8000, 12000, 16000, 20000, 24000, 28000, 32000, 36000, 40000, 44000, 48000, 52000, 56000, 60000, 64000, 68000, 72000, 76000, 80000, 84000, 88000, 92000, 96000, 100000, 104000, 108000, 112000, 116000, 120000, 124000, 128000, 132000, 136000, 140000, 144000, 148000, 152000, 156000, 160000, 164000, 168000, 172000, 176000, 180000, 184000, 188000, 192000, 196000, 200000, 204000, 208000, 212000, 216000, 220000, 224000, 228000, 232000, 236000, 240000, 244000, 248000, 252000, 256000, 260000, 264000, 268000, 272000, 276000, 280000, 284000, 288000, 292000, 296000, 300000, 304000]\n" - ] - } - ], - "source": [ - "import os\n", - "import json\n", - "from typing import List\n", - "\n", - "def find_missing_ids(folder_path: str) -> List[int]:\n", - " \"\"\"\n", - " Find missing IDs from JSON files in a folder.\n", - "\n", - " Parameters:\n", - " - folder_path (str): The path to the folder containing JSON files.\n", - "\n", - " Returns:\n", - " - List[int]: A list of missing IDs.\n", - " \"\"\"\n", - " ids = []\n", - "\n", - " # Iterate over each file in the folder\n", - " for filename in os.listdir(folder_path):\n", - " if filename.endswith(\".json\"):\n", - " file_path = os.path.join(folder_path, filename)\n", - " with open(file_path, 'r') as file:\n", - " data = json.load(file)\n", - " # Assuming the data is a dictionary with a nested structure\n", - " for entry_id in data.get(\"data\", {}).get(\"api::pa.pa\", {}).keys():\n", - " ids.append(int(entry_id))\n", - "\n", - " # Sort the IDs\n", - " ids.sort()\n", - "\n", - " # Find missing IDs\n", - " missing_ids = [i for i in range(ids[0], ids[-1] + 1) if i not in ids]\n", - "\n", - " return missing_ids\n", - "\n", - "\n", - "# Example usage\n", - "folder_path = \"/Users/sofia/Documents/SkyTruth/chunks1\"\n", - "missing_ids = find_missing_ids(folder_path)\n", - "print(\"Missing IDs:\", missing_ids)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "76" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "missing_ids = list(range(4000, 304001, 4000)) # Ensure missing_ids is defined\n", - "missing_rows = final_table1[final_table1.index.isin(missing_ids)]\n", - "len(missing_rows)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
wdpaidprotection_statusnamedesignationiucn_categoryyearareadata_sourcempaa_establishment_stagempaa_protection_levelbboxcoverageenvironmentlocationchildren
id
40001699631Lough FoyleArea of Special Scientific Interest8199817.2841223NaNNaN[-7.2615, 55.02512, -6.95054, 55.15644]2.936060e-04159.0[]
80009999691Offshore Island; Moke'ehinaSeabird Sanctuary4<NA>0.0260583NaNNaN[-156.5265, 20.98578, -156.52467, 20.9883]2.134853e-071160.0NaN
120005555653401Waitaki BridgeStewardship Area919870.3393193NaNNaN[171.09819, -44.93648, 171.10997, -44.92941]5.054808e-061118.0NaN
160005556555941Colvos Passage Marine PreserveMarine Protected Area320000.0090263NaNNaN[-122.55642, 47.34733, -122.55491, 47.34868]7.394379e-081160.0[]
200003241MochimaNational Park (PN)21973951.6236013NaNNaN[-64.76841, 10.15918, -64.22326, 10.43008]1.038614e-012162.0NaN
................................................
2880005557495991Twin Arch Business Park Section 4 Lots 8-19 & 35Park6<NA>0.0125843NaNNaN[-77.13543, 39.36138, -77.13257, 39.3645]1.329268e-072160.0NaN
2920005557579661ELKO PARKC - Park919580.2238913NaNNaN[-115.12089, 49.29449, -115.11511, 49.30275]2.260619e-06229.0NaN
2960005557681101LiechtbergForest Reserves620230.0377143NaNNaN[8.14625, 47.53048, 8.15053, 47.53406]9.170416e-052179.0NaN
3000005557832641Reserva Natural Dos RiosCivil Society Nature Reserve520230.2458953NaNNaN[-75.61825, 5.75218, -75.61308, 5.75883]2.152766e-05236.0NaN
3040005557877711Anse de Goulven, dunes de KeremmaSite of Community Importance (Habitats Directive)9200420.6380093NaNNaN[-4.31897, 48.62997, -4.19427, 48.67352]3.086233e-03256.0NaN
\n", - "

76 rows × 15 columns

\n", - "
" - ], - "text/plain": [ - " wdpaid protection_status \\\n", - "id \n", - "4000 169963 1 \n", - "8000 999969 1 \n", - "12000 555565340 1 \n", - "16000 555655594 1 \n", - "20000 324 1 \n", - "... ... ... \n", - "288000 555749599 1 \n", - "292000 555757966 1 \n", - "296000 555768110 1 \n", - "300000 555783264 1 \n", - "304000 555787771 1 \n", - "\n", - " name \\\n", - "id \n", - "4000 Lough Foyle \n", - "8000 Offshore Island; Moke'ehina \n", - "12000 Waitaki Bridge \n", - "16000 Colvos Passage Marine Preserve \n", - "20000 Mochima \n", - "... ... \n", - "288000 Twin Arch Business Park Section 4 Lots 8-19 & 35 \n", - "292000 ELKO PARK \n", - "296000 Liechtberg \n", - "300000 Reserva Natural Dos Rios \n", - "304000 Anse de Goulven, dunes de Keremma \n", - "\n", - " designation iucn_category \\\n", - "id \n", - "4000 Area of Special Scientific Interest 8 \n", - "8000 Seabird Sanctuary 4 \n", - "12000 Stewardship Area 9 \n", - "16000 Marine Protected Area 3 \n", - "20000 National Park (PN) 2 \n", - "... ... ... \n", - "288000 Park 6 \n", - "292000 C - Park 9 \n", - "296000 Forest Reserves 6 \n", - "300000 Civil Society Nature Reserve 5 \n", - "304000 Site of Community Importance (Habitats Directive) 9 \n", - "\n", - " year area data_source mpaa_establishment_stage \\\n", - "id \n", - "4000 1998 17.284122 3 NaN \n", - "8000 0.026058 3 NaN \n", - "12000 1987 0.339319 3 NaN \n", - "16000 2000 0.009026 3 NaN \n", - "20000 1973 951.623601 3 NaN \n", - "... ... ... ... ... \n", - "288000 0.012584 3 NaN \n", - "292000 1958 0.223891 3 NaN \n", - "296000 2023 0.037714 3 NaN \n", - "300000 2023 0.245895 3 NaN \n", - "304000 2004 20.638009 3 NaN \n", - "\n", - " mpaa_protection_level bbox \\\n", - "id \n", - "4000 NaN [-7.2615, 55.02512, -6.95054, 55.15644] \n", - "8000 NaN [-156.5265, 20.98578, -156.52467, 20.9883] \n", - "12000 NaN [171.09819, -44.93648, 171.10997, -44.92941] \n", - "16000 NaN [-122.55642, 47.34733, -122.55491, 47.34868] \n", - "20000 NaN [-64.76841, 10.15918, -64.22326, 10.43008] \n", - "... ... ... \n", - "288000 NaN [-77.13543, 39.36138, -77.13257, 39.3645] \n", - "292000 NaN [-115.12089, 49.29449, -115.11511, 49.30275] \n", - "296000 NaN [8.14625, 47.53048, 8.15053, 47.53406] \n", - "300000 NaN [-75.61825, 5.75218, -75.61308, 5.75883] \n", - "304000 NaN [-4.31897, 48.62997, -4.19427, 48.67352] \n", - "\n", - " coverage environment location children \n", - "id \n", - "4000 2.936060e-04 1 59.0 [] \n", - "8000 2.134853e-07 1 160.0 NaN \n", - "12000 5.054808e-06 1 118.0 NaN \n", - "16000 7.394379e-08 1 160.0 [] \n", - "20000 1.038614e-01 2 162.0 NaN \n", - "... ... ... ... ... \n", - "288000 1.329268e-07 2 160.0 NaN \n", - "292000 2.260619e-06 2 29.0 NaN \n", - "296000 9.170416e-05 2 179.0 NaN \n", - "300000 2.152766e-05 2 36.0 NaN \n", - "304000 3.086233e-03 2 56.0 NaN \n", - "\n", - "[76 rows x 15 columns]" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "missing_rows" - ] } ], "metadata": { @@ -5177,6 +4367,18 @@ "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" } }, "nbformat": 4,