From 4dfec23447bf039a23a63b52d27872b59b7dc3e8 Mon Sep 17 00:00:00 2001
From: romainsacchi <r_s@me.com>
Date: Sun, 8 Oct 2023 15:43:53 +0200
Subject: [PATCH] Add material for custom scenarios generation.

---
 .github/workflows/main.yml  |  65 -------------
 tests/test_config_file.py   | 189 ------------------------------------
 tests/test_datapackage.py   |  39 --------
 tests/test_inventories.py   |  40 --------
 tests/test_scenario_data.py | 141 ---------------------------
 5 files changed, 474 deletions(-)
 delete mode 100644 .github/workflows/main.yml
 delete mode 100644 tests/test_config_file.py
 delete mode 100644 tests/test_datapackage.py
 delete mode 100644 tests/test_inventories.py
 delete mode 100644 tests/test_scenario_data.py

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
deleted file mode 100644
index a097f0e..0000000
--- a/.github/workflows/main.yml
+++ /dev/null
@@ -1,65 +0,0 @@
-name: Github Action
-on: [push, pull_request]
-
-jobs:
-
-  test:
-    runs-on: ${{matrix.os}}
-    strategy:
-      matrix:
-        os: [ubuntu-latest]
-
-    steps:
-      - uses: actions/checkout@v2
-
-      - name: Install python version
-        uses: gabrielfalcao/pyenv-action@v7
-        with:
-          default: 3.9.0
-          command: pip install -U pip  # upgrade pip after installing python
-
-      - name: create environment
-        run: pyenv local 3.9.0 && python -mvenv .venv390
-
-      - name: Install dependencies
-        run: |
-          pip install -r requirements.txt --upgrade pip
-          pip install pytest
-
-      - name: Run tests
-        run: pytest
-
-
-  release:
-    name: Create Release
-    runs-on: ubuntu-latest
-
-    needs: test
-
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v2
-
-      - id: set_var
-        run: |
-          content=`cat ./datapackage.json`
-          # the following lines are only required for multi line json
-          content="${content//'%'/'%25'}"
-          content="${content//$'\n'/'%0A'}"
-          content="${content//$'\r'/'%0D'}"
-          # end of optional handling for multi line json
-          echo "::set-output name=packageJson::$content"
-      - run: |
-          echo "VERSION=${{fromJson(steps.set_var.outputs.packageJson).version}}" >> $GITHUB_ENV
-
-      - name: Create Release
-        id: create_release
-        uses: softprops/action-gh-release@v1
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        with:
-          tag_name: ${{ env.VERSION }}
-          name: ${{ env.VERSION }}
-          body: Scenario update
-          draft: false
-          prerelease: false
diff --git a/tests/test_config_file.py b/tests/test_config_file.py
deleted file mode 100644
index 56c4d46..0000000
--- a/tests/test_config_file.py
+++ /dev/null
@@ -1,189 +0,0 @@
-import yaml
-from schema import And, Optional, Or, Schema, Use
-from datapackage import Package
-
-from premise.utils import load_constants
-
-config = load_constants()
-LIST_REMIND_REGIONS = config["LIST_REMIND_REGIONS"]
-LIST_IMAGE_REGIONS = config["LIST_IMAGE_REGIONS"]
-SUPPORTED_EI_VERSIONS = config["SUPPORTED_EI_VERSIONS"]
-
-FILEPATH_DATAPACKAGE_SCHEMA = "./datapackage.json"
-
-def test_config_file():
-
-    dp = Package(FILEPATH_DATAPACKAGE_SCHEMA)
-    resource = dp.get_resource("config")
-    config_file = yaml.safe_load(resource.raw_read())
-
-    file_schema = Schema(
-        {
-            "production pathways": {
-                str: {
-                    "production volume": {
-                        "variable": str,
-                    },
-                    "ecoinvent alias": {
-                        "name": str,
-                        "reference product": str,
-                        Optional("exists in original database"): bool,
-                        Optional("new dataset"): bool,
-                        Optional("regionalize"): bool,
-                    },
-                    Optional("efficiency"): [
-                        {
-                            "variable": str,
-                            Optional("reference year"): And(
-                                Use(int), lambda n: 2005 <= n <= 2100
-                            ),
-                            Optional("includes"): {
-                                Optional("technosphere"): list,
-                                Optional("biosphere"): list,
-                            },
-                        }
-                    ],
-                    Optional("except regions"): And(
-                        list,
-                        Use(list),
-                        lambda s: all(
-                            i in LIST_REMIND_REGIONS + LIST_IMAGE_REGIONS for i in s
-                        ),
-                    ),
-                    Optional("replaces"): [{
-                        "name": str,
-                        "product": str,
-                        Optional("location"): str,
-                        Optional("operator"): str
-                    }
-                    ],
-                    Optional("replaces in"): [
-                        {
-                            Optional("name"): str,
-                            Optional("reference product"): str,
-                            Optional("location"): str,
-                            Optional("operator"): str,
-                        }
-                    ],
-                    Optional("replacement ratio"): float,
-                },
-            },
-            Optional("markets"): [
-                {
-                    "name": str,
-                    "reference product": str,
-                    "unit": str,
-                    "includes": And(
-                        list,
-                        Use(list),
-                        lambda s: all(
-                            i in config_file["production pathways"] for i in s
-                        ),
-                    ),
-                    Optional("add"): [
-                        {
-                            Optional("name"): str,
-                            Optional("reference product"): str,
-                            Optional("categories"): str,
-                            Optional("unit"): str,
-                            Optional("amount"): float,
-                        }
-                    ],
-                    Optional("except regions"): And(
-                        list,
-                        Use(list),
-                        lambda s: all(
-                            i in LIST_REMIND_REGIONS + LIST_IMAGE_REGIONS for i in s
-                        ),
-                    ),
-                    Optional("replaces"): [
-                        {
-                            "name": str,
-                            "product": str,
-                            Optional("location"): str,
-                            Optional("operator"): str
-                        }
-                    ],
-                    Optional("replaces in"): [
-                        {
-                            Optional("name"): str,
-                            Optional("reference product"): str,
-                            Optional("location"): str,
-                            Optional("operator"): str,
-                        }
-                    ],
-                    Optional("replacement ratio"): float,
-                    Optional("efficiency"): [
-                        {
-                            "variable": str,
-                            Optional("reference year"): And(
-                                Use(int), lambda n: 2005 <= n <= 2100
-                            ),
-                            Optional("includes"): {
-                                Optional("technosphere"): list,
-                                Optional("biosphere"): list,
-                            },
-                        }
-                    ],
-                }
-            ],
-        }
-    )
-
-    file_schema.validate(config_file)
-
-    if "markets" in config_file:
-        # check that providers composing the market
-        # are listed
-
-        if "markets" in config_file:
-            # check that providers composing the market
-            # are listed
-
-            for market in config_file["markets"]:
-
-                try:
-                    market_providers = [
-                        (
-                            config_file["production pathways"][a]["ecoinvent alias"][
-                                "name"
-                            ],
-                            config_file["production pathways"][a]["ecoinvent alias"][
-                                "reference product"
-                            ],
-                        )
-                        for a in market["includes"]
-                    ]
-                except KeyError:
-                    raise ValueError(
-                        "One of more providers listed under `markets/includes` is/are not listed "
-                        "under `production pathways`."
-                    )
-
-
-def get_recursively(search_dict, field):
-    """Takes a dict with nested lists and dicts,
-    and searches all dicts for a key of the field
-    provided.
-    """
-    fields_found = []
-
-    for key, value in search_dict.items():
-
-        if key == field:
-            fields_found.append(value)
-
-        elif isinstance(value, dict):
-            results = get_recursively(value, field)
-            for result in results:
-                fields_found.append(result)
-
-        elif isinstance(value, list):
-            for item in value:
-                if isinstance(item, dict):
-                    more_results = get_recursively(item, field)
-                    for another_result in more_results:
-                        fields_found.append(another_result)
-
-    return fields_found
-
diff --git a/tests/test_datapackage.py b/tests/test_datapackage.py
deleted file mode 100644
index c5bf866..0000000
--- a/tests/test_datapackage.py
+++ /dev/null
@@ -1,39 +0,0 @@
-from datapackage import Package
-import pandas as pd
-import yaml
-
-FILEPATH_DATAPACKAGE_SCHEMA = "./datapackage.json"
-
-def test_datapackage():
-    dp = Package(FILEPATH_DATAPACKAGE_SCHEMA)
-    assert dp.descriptor["profile"] == "data-package"
-    assert len(dp.resources) >= 2
-
-    mandatory_keys = [
-        'profile',
-        'name',
-        'title',
-        'description',
-        'version',
-        'contributors',
-        'dependencies',
-        'ecoinvent',
-        'scenarios',
-        'licenses',
-        'resources'
-    ]
-
-    assert all(i in dp.descriptor for i in mandatory_keys), f"One of the mandatory" \
-                                                            f"descriptor fields in the" \
-                                                            f"datapackage file is missing. " \
-                                                            f"It must contain the following fields {mandatory_keys}"
-
-    for resource in dp.resources:
-        if resource.name == "config":
-            resource = dp.get_resource("config")
-            config_file = yaml.safe_load(resource.raw_read())
-            assert isinstance(config_file, dict)
-
-        if resource.name == "scenario_data":
-            assert pd.read_csv(resource.descriptor["path"]).shape != (0, 0)
-
diff --git a/tests/test_inventories.py b/tests/test_inventories.py
deleted file mode 100644
index ed37730..0000000
--- a/tests/test_inventories.py
+++ /dev/null
@@ -1,40 +0,0 @@
-import yaml
-import bw2io
-from datapackage import Package
-
-FILEPATH_DATAPACKAGE_SCHEMA = "./datapackage.json"
-dp = Package(FILEPATH_DATAPACKAGE_SCHEMA)
-
-if dp.get_resource("inventories"):
-    i = bw2io.CSVImporter(dp.get_resource("inventories").source)
-
-    resource = dp.get_resource("config")
-    config_file = yaml.safe_load(resource.raw_read())
-
-def test_length():
-    if dp.get_resource("inventories"):
-        assert len(i.data) > 0
-
-
-def test_inventories():
-    if dp.get_resource("inventories"):
-        for k, v in config_file["production pathways"].items():
-
-            name = v["ecoinvent alias"]["name"]
-            ref = v["ecoinvent alias"]["reference product"]
-
-            if (
-                len(
-                    [
-                        a
-                        for a in i.data
-                        if (name, ref) == (a["name"], a["reference product"])
-                    ]
-                )
-                == 0
-            ) and not v["ecoinvent alias"].get("exists in original database")\
-                    and not v["ecoinvent alias"].get("new dataset"):
-                raise ValueError(
-                    f"The inventories provided do not contain the activity: {name, ref}"
-                )
-
diff --git a/tests/test_scenario_data.py b/tests/test_scenario_data.py
deleted file mode 100644
index 38b8f8a..0000000
--- a/tests/test_scenario_data.py
+++ /dev/null
@@ -1,141 +0,0 @@
-import yaml
-import pandas as pd
-import numpy as np
-
-from premise.utils import load_constants
-
-config = load_constants()
-LIST_REMIND_REGIONS = config["LIST_REMIND_REGIONS"]
-LIST_IMAGE_REGIONS = config["LIST_IMAGE_REGIONS"]
-SUPPORTED_EI_VERSIONS = config["SUPPORTED_EI_VERSIONS"]
-
-from premise.geomap import Geomap
-
-from datapackage import Package
-
-FILEPATH_DATAPACKAGE_SCHEMA = "./datapackage.json"
-dp = Package(FILEPATH_DATAPACKAGE_SCHEMA)
-
-resource = dp.get_resource("config")
-config_file = yaml.safe_load(resource.raw_read())
-
-
-resource = dp.get_resource("scenario_data")
-scenario_data = resource.read()
-scenario_headers = resource.headers
-
-df = pd.DataFrame(scenario_data, columns=scenario_headers)
-
-
-
-def test_scenario_data_file():
-    mandatory_fields = ["model", "pathway", "region", "variables", "unit"]
-    if not all(v in df.columns for v in mandatory_fields):
-        raise ValueError(
-            f"One or several mandatory column are missing "
-            f"in the scenario data file. Mandatory columns: {mandatory_fields}."
-        )
-
-    years_cols = []
-    for h, header in enumerate(scenario_headers):
-        try:
-            years_cols.append(int(header))
-        except ValueError:
-            continue
-
-    if not all(2005 <= y <= 2100 for y in years_cols):
-        raise ValueError(
-            f"One or several of the years provided in the scenario data file are "
-            "out of boundaries (2005 - 2100)."
-        )
-
-
-    if len(pd.isnull(df).sum()[pd.isnull(df).sum() > 0]) > 0:
-        raise ValueError(
-            f"The following columns in the scenario data file"
-            f"contains empty cells.\n{pd.isnull(df).sum()[pd.isnull(df).sum() > 0]}."
-        )
-
-
-    d_regions = {"remind": LIST_REMIND_REGIONS, "image": LIST_IMAGE_REGIONS}
-
-    list_ei_locs = [
-        i if isinstance(i, str) else i[-1]
-        for i in list(Geomap(model="remind").geo.keys())
-    ]
-
-    for irow, r in df.iterrows():
-        if (
-                r["region"] not in d_regions[r["model"]]
-                and r["region"] not in list_ei_locs
-        ):
-            raise ValueError(
-                f"Region {r['region']} indicated "
-                f"in row {irow} is not a valid region for model {r['model'].upper()}"
-                f"and is not found within ecoinvent locations."
-            )
-
-    if not all(
-            v in df["variables"].unique()
-            for v in get_recursively(config_file, "variable")
-    ):
-        list_unfound_variables = [
-            p
-            for p in get_recursively(config_file, "variable")
-            if p not in df["variables"].unique()
-        ]
-
-        raise ValueError(
-            "The following variables from the configuration file "
-            f"cannot be found in the scenario file: {list_unfound_variables}"
-        )
-
-    if not all(
-            v in df["variables"].unique()
-            for v in get_recursively(config_file, "variable")
-    ):
-        missing_variables = [
-            v
-            for v in get_recursively(config_file, "variable")
-            if v not in df["variables"].unique()
-        ]
-        raise ValueError(
-            f"One or several variable names in the configuration file "
-            f"cannot be found in the scenario data file: {missing_variables}."
-        )
-
-    try:
-        np.array_equal(df.iloc[:, 6:], df.iloc[:, 6:].astype(float))
-    except ValueError as e:
-        raise TypeError(
-            f"All values provided in the time series must be numerical "
-            f"in the scenario data file."
-        ) from e
-
-
-
-def get_recursively(search_dict, field):
-    """Takes a dict with nested lists and dicts,
-    and searches all dicts for a key of the field
-    provided.
-    """
-    fields_found = []
-
-    for key, value in search_dict.items():
-
-        if key == field:
-            fields_found.append(value)
-
-        elif isinstance(value, dict):
-            results = get_recursively(value, field)
-            for result in results:
-                fields_found.append(result)
-
-        elif isinstance(value, list):
-            for item in value:
-                if isinstance(item, dict):
-                    more_results = get_recursively(item, field)
-                    for another_result in more_results:
-                        fields_found.append(another_result)
-
-    return fields_found