From 4dfec23447bf039a23a63b52d27872b59b7dc3e8 Mon Sep 17 00:00:00 2001 From: romainsacchi Date: Sun, 8 Oct 2023 15:43:53 +0200 Subject: [PATCH] Add material for custom scenarios generation. --- .github/workflows/main.yml | 65 ------------- tests/test_config_file.py | 189 ------------------------------------ tests/test_datapackage.py | 39 -------- tests/test_inventories.py | 40 -------- tests/test_scenario_data.py | 141 --------------------------- 5 files changed, 474 deletions(-) delete mode 100644 .github/workflows/main.yml delete mode 100644 tests/test_config_file.py delete mode 100644 tests/test_datapackage.py delete mode 100644 tests/test_inventories.py delete mode 100644 tests/test_scenario_data.py diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml deleted file mode 100644 index a097f0e..0000000 --- a/.github/workflows/main.yml +++ /dev/null @@ -1,65 +0,0 @@ -name: Github Action -on: [push, pull_request] - -jobs: - - test: - runs-on: ${{matrix.os}} - strategy: - matrix: - os: [ubuntu-latest] - - steps: - - uses: actions/checkout@v2 - - - name: Install python version - uses: gabrielfalcao/pyenv-action@v7 - with: - default: 3.9.0 - command: pip install -U pip # upgrade pip after installing python - - - name: create environment - run: pyenv local 3.9.0 && python -mvenv .venv390 - - - name: Install dependencies - run: | - pip install -r requirements.txt --upgrade pip - pip install pytest - - - name: Run tests - run: pytest - - - release: - name: Create Release - runs-on: ubuntu-latest - - needs: test - - steps: - - name: Checkout code - uses: actions/checkout@v2 - - - id: set_var - run: | - content=`cat ./datapackage.json` - # the following lines are only required for multi line json - content="${content//'%'/'%25'}" - content="${content//$'\n'/'%0A'}" - content="${content//$'\r'/'%0D'}" - # end of optional handling for multi line json - echo "::set-output name=packageJson::$content" - - run: | - echo "VERSION=${{fromJson(steps.set_var.outputs.packageJson).version}}" >> $GITHUB_ENV - - - name: Create Release - id: create_release - uses: softprops/action-gh-release@v1 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - with: - tag_name: ${{ env.VERSION }} - name: ${{ env.VERSION }} - body: Scenario update - draft: false - prerelease: false diff --git a/tests/test_config_file.py b/tests/test_config_file.py deleted file mode 100644 index 56c4d46..0000000 --- a/tests/test_config_file.py +++ /dev/null @@ -1,189 +0,0 @@ -import yaml -from schema import And, Optional, Or, Schema, Use -from datapackage import Package - -from premise.utils import load_constants - -config = load_constants() -LIST_REMIND_REGIONS = config["LIST_REMIND_REGIONS"] -LIST_IMAGE_REGIONS = config["LIST_IMAGE_REGIONS"] -SUPPORTED_EI_VERSIONS = config["SUPPORTED_EI_VERSIONS"] - -FILEPATH_DATAPACKAGE_SCHEMA = "./datapackage.json" - -def test_config_file(): - - dp = Package(FILEPATH_DATAPACKAGE_SCHEMA) - resource = dp.get_resource("config") - config_file = yaml.safe_load(resource.raw_read()) - - file_schema = Schema( - { - "production pathways": { - str: { - "production volume": { - "variable": str, - }, - "ecoinvent alias": { - "name": str, - "reference product": str, - Optional("exists in original database"): bool, - Optional("new dataset"): bool, - Optional("regionalize"): bool, - }, - Optional("efficiency"): [ - { - "variable": str, - Optional("reference year"): And( - Use(int), lambda n: 2005 <= n <= 2100 - ), - Optional("includes"): { - Optional("technosphere"): list, - Optional("biosphere"): list, - }, - } - ], - Optional("except regions"): And( - list, - Use(list), - lambda s: all( - i in LIST_REMIND_REGIONS + LIST_IMAGE_REGIONS for i in s - ), - ), - Optional("replaces"): [{ - "name": str, - "product": str, - Optional("location"): str, - Optional("operator"): str - } - ], - Optional("replaces in"): [ - { - Optional("name"): str, - Optional("reference product"): str, - Optional("location"): str, - Optional("operator"): str, - } - ], - Optional("replacement ratio"): float, - }, - }, - Optional("markets"): [ - { - "name": str, - "reference product": str, - "unit": str, - "includes": And( - list, - Use(list), - lambda s: all( - i in config_file["production pathways"] for i in s - ), - ), - Optional("add"): [ - { - Optional("name"): str, - Optional("reference product"): str, - Optional("categories"): str, - Optional("unit"): str, - Optional("amount"): float, - } - ], - Optional("except regions"): And( - list, - Use(list), - lambda s: all( - i in LIST_REMIND_REGIONS + LIST_IMAGE_REGIONS for i in s - ), - ), - Optional("replaces"): [ - { - "name": str, - "product": str, - Optional("location"): str, - Optional("operator"): str - } - ], - Optional("replaces in"): [ - { - Optional("name"): str, - Optional("reference product"): str, - Optional("location"): str, - Optional("operator"): str, - } - ], - Optional("replacement ratio"): float, - Optional("efficiency"): [ - { - "variable": str, - Optional("reference year"): And( - Use(int), lambda n: 2005 <= n <= 2100 - ), - Optional("includes"): { - Optional("technosphere"): list, - Optional("biosphere"): list, - }, - } - ], - } - ], - } - ) - - file_schema.validate(config_file) - - if "markets" in config_file: - # check that providers composing the market - # are listed - - if "markets" in config_file: - # check that providers composing the market - # are listed - - for market in config_file["markets"]: - - try: - market_providers = [ - ( - config_file["production pathways"][a]["ecoinvent alias"][ - "name" - ], - config_file["production pathways"][a]["ecoinvent alias"][ - "reference product" - ], - ) - for a in market["includes"] - ] - except KeyError: - raise ValueError( - "One of more providers listed under `markets/includes` is/are not listed " - "under `production pathways`." - ) - - -def get_recursively(search_dict, field): - """Takes a dict with nested lists and dicts, - and searches all dicts for a key of the field - provided. - """ - fields_found = [] - - for key, value in search_dict.items(): - - if key == field: - fields_found.append(value) - - elif isinstance(value, dict): - results = get_recursively(value, field) - for result in results: - fields_found.append(result) - - elif isinstance(value, list): - for item in value: - if isinstance(item, dict): - more_results = get_recursively(item, field) - for another_result in more_results: - fields_found.append(another_result) - - return fields_found - diff --git a/tests/test_datapackage.py b/tests/test_datapackage.py deleted file mode 100644 index c5bf866..0000000 --- a/tests/test_datapackage.py +++ /dev/null @@ -1,39 +0,0 @@ -from datapackage import Package -import pandas as pd -import yaml - -FILEPATH_DATAPACKAGE_SCHEMA = "./datapackage.json" - -def test_datapackage(): - dp = Package(FILEPATH_DATAPACKAGE_SCHEMA) - assert dp.descriptor["profile"] == "data-package" - assert len(dp.resources) >= 2 - - mandatory_keys = [ - 'profile', - 'name', - 'title', - 'description', - 'version', - 'contributors', - 'dependencies', - 'ecoinvent', - 'scenarios', - 'licenses', - 'resources' - ] - - assert all(i in dp.descriptor for i in mandatory_keys), f"One of the mandatory" \ - f"descriptor fields in the" \ - f"datapackage file is missing. " \ - f"It must contain the following fields {mandatory_keys}" - - for resource in dp.resources: - if resource.name == "config": - resource = dp.get_resource("config") - config_file = yaml.safe_load(resource.raw_read()) - assert isinstance(config_file, dict) - - if resource.name == "scenario_data": - assert pd.read_csv(resource.descriptor["path"]).shape != (0, 0) - diff --git a/tests/test_inventories.py b/tests/test_inventories.py deleted file mode 100644 index ed37730..0000000 --- a/tests/test_inventories.py +++ /dev/null @@ -1,40 +0,0 @@ -import yaml -import bw2io -from datapackage import Package - -FILEPATH_DATAPACKAGE_SCHEMA = "./datapackage.json" -dp = Package(FILEPATH_DATAPACKAGE_SCHEMA) - -if dp.get_resource("inventories"): - i = bw2io.CSVImporter(dp.get_resource("inventories").source) - - resource = dp.get_resource("config") - config_file = yaml.safe_load(resource.raw_read()) - -def test_length(): - if dp.get_resource("inventories"): - assert len(i.data) > 0 - - -def test_inventories(): - if dp.get_resource("inventories"): - for k, v in config_file["production pathways"].items(): - - name = v["ecoinvent alias"]["name"] - ref = v["ecoinvent alias"]["reference product"] - - if ( - len( - [ - a - for a in i.data - if (name, ref) == (a["name"], a["reference product"]) - ] - ) - == 0 - ) and not v["ecoinvent alias"].get("exists in original database")\ - and not v["ecoinvent alias"].get("new dataset"): - raise ValueError( - f"The inventories provided do not contain the activity: {name, ref}" - ) - diff --git a/tests/test_scenario_data.py b/tests/test_scenario_data.py deleted file mode 100644 index 38b8f8a..0000000 --- a/tests/test_scenario_data.py +++ /dev/null @@ -1,141 +0,0 @@ -import yaml -import pandas as pd -import numpy as np - -from premise.utils import load_constants - -config = load_constants() -LIST_REMIND_REGIONS = config["LIST_REMIND_REGIONS"] -LIST_IMAGE_REGIONS = config["LIST_IMAGE_REGIONS"] -SUPPORTED_EI_VERSIONS = config["SUPPORTED_EI_VERSIONS"] - -from premise.geomap import Geomap - -from datapackage import Package - -FILEPATH_DATAPACKAGE_SCHEMA = "./datapackage.json" -dp = Package(FILEPATH_DATAPACKAGE_SCHEMA) - -resource = dp.get_resource("config") -config_file = yaml.safe_load(resource.raw_read()) - - -resource = dp.get_resource("scenario_data") -scenario_data = resource.read() -scenario_headers = resource.headers - -df = pd.DataFrame(scenario_data, columns=scenario_headers) - - - -def test_scenario_data_file(): - mandatory_fields = ["model", "pathway", "region", "variables", "unit"] - if not all(v in df.columns for v in mandatory_fields): - raise ValueError( - f"One or several mandatory column are missing " - f"in the scenario data file. Mandatory columns: {mandatory_fields}." - ) - - years_cols = [] - for h, header in enumerate(scenario_headers): - try: - years_cols.append(int(header)) - except ValueError: - continue - - if not all(2005 <= y <= 2100 for y in years_cols): - raise ValueError( - f"One or several of the years provided in the scenario data file are " - "out of boundaries (2005 - 2100)." - ) - - - if len(pd.isnull(df).sum()[pd.isnull(df).sum() > 0]) > 0: - raise ValueError( - f"The following columns in the scenario data file" - f"contains empty cells.\n{pd.isnull(df).sum()[pd.isnull(df).sum() > 0]}." - ) - - - d_regions = {"remind": LIST_REMIND_REGIONS, "image": LIST_IMAGE_REGIONS} - - list_ei_locs = [ - i if isinstance(i, str) else i[-1] - for i in list(Geomap(model="remind").geo.keys()) - ] - - for irow, r in df.iterrows(): - if ( - r["region"] not in d_regions[r["model"]] - and r["region"] not in list_ei_locs - ): - raise ValueError( - f"Region {r['region']} indicated " - f"in row {irow} is not a valid region for model {r['model'].upper()}" - f"and is not found within ecoinvent locations." - ) - - if not all( - v in df["variables"].unique() - for v in get_recursively(config_file, "variable") - ): - list_unfound_variables = [ - p - for p in get_recursively(config_file, "variable") - if p not in df["variables"].unique() - ] - - raise ValueError( - "The following variables from the configuration file " - f"cannot be found in the scenario file: {list_unfound_variables}" - ) - - if not all( - v in df["variables"].unique() - for v in get_recursively(config_file, "variable") - ): - missing_variables = [ - v - for v in get_recursively(config_file, "variable") - if v not in df["variables"].unique() - ] - raise ValueError( - f"One or several variable names in the configuration file " - f"cannot be found in the scenario data file: {missing_variables}." - ) - - try: - np.array_equal(df.iloc[:, 6:], df.iloc[:, 6:].astype(float)) - except ValueError as e: - raise TypeError( - f"All values provided in the time series must be numerical " - f"in the scenario data file." - ) from e - - - -def get_recursively(search_dict, field): - """Takes a dict with nested lists and dicts, - and searches all dicts for a key of the field - provided. - """ - fields_found = [] - - for key, value in search_dict.items(): - - if key == field: - fields_found.append(value) - - elif isinstance(value, dict): - results = get_recursively(value, field) - for result in results: - fields_found.append(result) - - elif isinstance(value, list): - for item in value: - if isinstance(item, dict): - more_results = get_recursively(item, field) - for another_result in more_results: - fields_found.append(another_result) - - return fields_found