Skip to content

Commit

Permalink
seperate retrieve rules
Browse files Browse the repository at this point in the history
  • Loading branch information
trevorb1 committed Sep 9, 2024
1 parent 0a850ec commit 2cb6136
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 42 deletions.
17 changes: 0 additions & 17 deletions workflow/rules/preprocess.smk
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,6 @@ demand_figures = [

# output script files

external_files = [
'PLEXOS_World_2015_Gold_V1.1.xlsx',
'All_Demand_UTC_2015.csv',
'PLEXOS_World_MESSAGEix_GLOBIOM_Softlink.xlsx',
'ember_yearly_electricity_data.csv'
]

power_plant_files = [
'CapitalCost.csv',
'FixedCost.csv',
Expand Down Expand Up @@ -110,16 +103,6 @@ rule make_data_dir:
output: directory('results/data')
shell: 'mkdir -p {output}'

rule download_external_files:
message:
'Downloading external files...'
log:
log = 'results/logs/external_files.log'
output:
csv_files = expand('resources/data/{output_file}', output_file=external_files),
shell:
'python workflow/scripts/osemosys_global/external_files.py 2> {log}'

rule powerplant:
message:
'Generating powerplant data...'
Expand Down
36 changes: 36 additions & 0 deletions workflow/rules/retrieve.smk
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@

EXTERNAL_FILES = [
"PLEXOS_World_2015_Gold_V1.1.xlsx",
"All_Demand_UTC_2015.csv",
"PLEXOS_World_MESSAGEix_GLOBIOM_Softlink.xlsx",
"ember_yearly_electricity_data.csv"
]

def get_external_links() -> dict[str,str]:
"""Gets links that can be downloaded via requests"""

return {
"PLEXOS_World_2015_Gold_V1.1.xlsx" :
"https://dataverse.harvard.edu/api/access/datafile/4008393?format=original&gbrecs=true",

"All_Demand_UTC_2015.csv" :
"https://dataverse.harvard.edu/api/access/datafile/3985039?format=original&gbrecs=true",

"PLEXOS_World_MESSAGEix_GLOBIOM_Softlink.xlsx" :
"https://dataverse.harvard.edu/api/access/datafile/6040815",

"ember_yearly_electricity_data.csv" :
"https://ember-climate.org/app/uploads/2022/07/yearly_full_release_long_format.csv"
}

rule download_external_files:
message:
"Downloading external files..."
params:
files = get_external_links()
log:
log = "results/logs/external_files.log"
output:
csv_files = expand("resources/data/{output_file}", output_file=EXTERNAL_FILES),
script:
"../scripts/osemosys_global/external_files.py"
58 changes: 33 additions & 25 deletions workflow/scripts/osemosys_global/external_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,40 +6,48 @@
"""

import logging
logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO)

logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO)
from pathlib import Path
from configuration import ConfigPaths
import os
import requests
import sys


def download_file(file: str, url: Path | str) -> None:
"""Downloads a file if the file does not already exist"""

if Path(path).exists():
logging.info(f"{file} already exists")
return

logging.info(f"Downloading {file}")

data = requests.get(url, path)

with open(path, "wb") as f:
f.write(data.content)


# CONFIGURATION PARAMETERS
config_paths = ConfigPaths()
input_data_dir = config_paths.input_data_dir

external_files = {

'PLEXOS_World_2015_Gold_V1.1.xlsx' :
'https://dataverse.harvard.edu/api/access/datafile/4008393?format=original&gbrecs=true',

'All_Demand_UTC_2015.csv' :
'https://dataverse.harvard.edu/api/access/datafile/3985039?format=original&gbrecs=true',

'PLEXOS_World_MESSAGEix_GLOBIOM_Softlink.xlsx' :
'https://dataverse.harvard.edu/api/access/datafile/6040815',

'ember_yearly_electricity_data.csv' :
'https://ember-climate.org/app/uploads/2022/07/yearly_full_release_long_format.csv'

}

if __name__ == "__main__":
for file, url in external_files.items():
path = os.path.join(input_data_dir, file)

if not Path(path).exists():
logging.info(f'Downloading {file}')
if "snakemake" in globals():
external_files = snakemake.params.files
else:
if len(sys.argv) != 3:
msg = "Usage: python {} <save_name> <url>"
print(msg.format(sys.argv[0]))
sys.exit(1)
else:
in_file = sys.argv[1]
in_url = sys.argv[2]
external_files = {in_file: in_url}

data = requests.get(url , path)

with open(path, 'wb') as f:
f.write(data.content)
for file, url in external_files.items():
path = os.path.join(input_data_dir, file)
download_file(file, url)
1 change: 1 addition & 0 deletions workflow/snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ configfile: 'config/config.yaml'
include: 'rules/preprocess.smk'
include: 'rules/model.smk'
include: 'rules/postprocess.smk'
include: 'rules/retrieve.smk'

# constants

Expand Down

0 comments on commit 2cb6136

Please sign in to comment.