Skip to content

Commit

Permalink
Merge pull request #176 from nismod/feature/refactor-folder-structure
Browse files Browse the repository at this point in the history
Reorganise scripts and rules
  • Loading branch information
tomalrussell authored Feb 6, 2024
2 parents e6a9d27 + ee465d8 commit a8a60b9
Show file tree
Hide file tree
Showing 118 changed files with 345 additions and 3,607 deletions.
6 changes: 2 additions & 4 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
__pycache__/*
venv/*
src/snkit
build

# Data
data/*
Expand All @@ -24,10 +25,7 @@ tester/*
# IDE
.idea
*.swp
.vscode

# Rendered documentation
docs/book/

validation/plots/*
validation/holland_done_ratio
validation/wind_model_comparison
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
101 changes: 52 additions & 49 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -95,54 +95,57 @@ wildcard_constraints:
# may be upper or lower, one 'f' or two
TIFF_FILE="[^\/\.\s]+\.[tT][iI][fF][fF]?",

# generate values for global variables used across rules
include: "rules/storm_workflow_global_variables.smk"
# how many samples is each storm track dataset split into?
SAMPLES_PER_TRACKSET = {
"IBTrACS": 1,
"STORM": 10,
"IRIS": 10,
}

##### load rules #####
include: "rules/download/coastlines.smk"
include: "rules/download/natural-earth.smk"
include: "rules/download/STORM.smk"
include: "rules/download/IRIS.smk"
include: "rules/download/IBTrACS.smk"
include: "rules/download/gadm.smk"
include: "rules/download/gridfinder.smk"
include: "rules/download/ghsl-pop.smk"
include: "rules/download/hazards.smk"
include: "rules/download/dryad-gdp.smk"
include: "rules/download/wri-powerplants.smk"
include: "rules/download/osm.smk"
include: "rules/download/land_cover.smk"

include: "rules/preprocess/gadm.smk"
include: "rules/preprocess/filter_osm_data.smk"
include: "rules/preprocess/trim_hazard_data.smk"
include: "rules/preprocess/create_bbox_extracts.smk"
include: "rules/preprocess/slice.smk"
include: "rules/preprocess/join_network.smk"
include: "rules/preprocess/targets.smk"
include: "rules/preprocess/create_network.smk"
include: "rules/preprocess/join_data.smk"
include: "rules/preprocess/osm_to_geoparquet.smk"
include: "rules/preprocess/create_overall_bbox.smk"
include: "rules/preprocess/powerplants.smk"
include: "rules/preprocess/IBTrACS.smk"
include: "rules/preprocess/STORM.smk"
include: "rules/preprocess/IRIS.smk"

include: "rules/exposure/join_data.smk"
include: "rules/exposure/network_raster_intersection.smk"
include: "rules/exposure/wind_fields.smk"
include: "rules/exposure/flood_damages.smk"
include: "rules/exposure/electricity_grid/intersection.smk"
include: "rules/exposure/electricity_grid/exposure.smk"
include: "rules/exposure/electricity_grid/disruption.smk"
include: "rules/exposure/aggregate_to_admin_area.smk"

include: "rules/analyse/network_components.smk"
include: "rules/analyse/map/storm_tracks.smk"
include: "rules/analyse/map/outages.smk"
include: "rules/analyse/map/wind_fields.smk"
include: "rules/analyse/plot/target_disruption.smk"
include: "rules/analyse/plot/customers_affected_by_storm.smk"

include: "rules/target/cyclone-grid.smk"
include: "context/coastlines.smk"
include: "context/gadm.smk"
include: "context/natural-earth.smk"

include: "nature-ecosystems/land-cover.smk"
include: "population-economy/dryad-gdp.smk"
include: "population-economy/ghsl-pop.smk"

include: "power/gridfinder.smk"
include: "power/wri-powerplants.smk"
include: "power/gridfinder-targets.smk"
include: "power/create_network.smk"

include: "transport/openstreetmap.smk"
include: "transport/create_bbox_extracts.smk"
include: "transport/slice.smk"
include: "transport/join_network.smk"
include: "transport/create_network.smk"
include: "transport/osm_to_geoparquet.smk"
include: "transport/create_overall_bbox.smk"
include: "transport/join_data.smk"

include: "flood/aqueduct.smk"
include: "flood/trim_hazard_data.smk"

include: "tropical-cyclone/IBTrACS.smk"
include: "tropical-cyclone/IRIS.smk"
include: "tropical-cyclone/STORM.smk"
include: "tropical-cyclone/join_tracks.smk"
include: "tropical-cyclone/wind_fields/wind_fields.smk"

include: "transport-flood/network_raster_intersection.smk"
include: "transport-flood/flood_damages.smk"
include: "transport-flood/aggregate_to_admin_area.smk"

include: "power-tc/network_raster_intersection.smk"
include: "power-tc/intersection.smk"
include: "power-tc/exposure.smk"
include: "power-tc/disruption.smk"
include: "power-tc/network_components.smk"
include: "power-tc/map/storm_tracks.smk"
include: "power-tc/map/outages.smk"
include: "power-tc/map/wind_fields.smk"
include: "power-tc/map/target_disruption.smk"
include: "power-tc/map/customers_affected_by_storm.smk"
include: "power-tc/cyclone-grid.smk"
File renamed without changes.
26 changes: 26 additions & 0 deletions workflow/rules/preprocess/gadm.smk → workflow/context/gadm.smk
Original file line number Diff line number Diff line change
@@ -1,3 +1,29 @@
"""
Download GADM boundaries
Reference
---------
https://gadm.org/data.html
"""


rule download_gadm_levels:
output:
gpkg = "{OUTPUT_DIR}/input/admin-boundaries/gadm36_levels.gpkg"
shell:
"""
wget https://geodata.ucdavis.edu/gadm/gadm3.6/gadm36_levels_gpkg.zip \
--output-document={wildcards.OUTPUT_DIR}/input/admin-boundaries/gadm36_levels_gpkg.zip
unzip -o {wildcards.OUTPUT_DIR}/input/admin-boundaries/gadm36_levels_gpkg.zip \
-d {wildcards.OUTPUT_DIR}/input/admin-boundaries
rm {wildcards.OUTPUT_DIR}/input/admin-boundaries/gadm36_levels_gpkg.zip
"""

"""
Test with:
snakemake -c1 -- results/input/admin-boundaries/gadm36_levels.gpkg
"""

rule simplify_admin_bounds:
input:
all_admin_bounds = rules.download_gadm_levels.output.gpkg
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -23,20 +23,20 @@ Concept & Methodology:

rule download_ghsl:
output:
"{OUTPUT_DIR}/input/ghsl/GHS_POP_E{YEAR}_GLOBE_R2022A_54009_{RESOLUTION}_V1_0.tif"
"{OUTPUT_DIR}/input/ghsl/GHS_POP_E{YEAR}_GLOBE_{RELEASE}_54009_{RESOLUTION}_V1_0.tif"
wildcard_constraints:
YEAR="1975|1980|1985|1990|1995|2000|2005|2010|2015|2020|2025|2030",
RESOLUTION="1000?"
YEAR=range(1975, 2031, 5),
RESOLUTION="100|1000"
shell:
"""
output_dir=$(dirname {output})
mkdir -p $output_dir
wget -nc https://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/GHSL/GHS_POP_GLOBE_R2022A/GHS_POP_E{wildcards.YEAR}_GLOBE_R2022A_54009_{wildcards.RESOLUTION}/V1-0/GHS_POP_E{wildcards.YEAR}_GLOBE_R2022A_54009_{wildcards.RESOLUTION}_V1_0.zip \
wget -nc https://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/GHSL/GHS_POP_GLOBE_{wildcards.RELEASE}/GHS_POP_E{wildcards.YEAR}_GLOBE_{wildcards.RELEASE}_54009_{wildcards.RESOLUTION}/V1-0/GHS_POP_E{wildcards.YEAR}_GLOBE_{wildcards.RELEASE}_54009_{wildcards.RESOLUTION}_V1_0.zip \
--directory-prefix=$output_dir
unzip -o $output_dir/GHS_POP_E{wildcards.YEAR}_GLOBE_R2022A_54009_{wildcards.RESOLUTION}_V1_0.zip \
unzip -o $output_dir/GHS_POP_E{wildcards.YEAR}_GLOBE_{wildcards.RELEASE}_54009_{wildcards.RESOLUTION}_V1_0.zip \
-d $output_dir
"""

Expand All @@ -47,8 +47,9 @@ rule download_ghsl_all:
"{{OUTPUT_DIR}}",
"input",
"ghsl",
"GHS_POP_E{year}_GLOBE_R2022A_54009_{resolution}_V1_0.tif",
"GHS_POP_E{year}_GLOBE_{release}_54009_{resolution}_V1_0.tif",
),
resolution=(100, 1000),
year=(2020, )
year=(2020, ),
release="R2022A" # TODO bump to R2023A
)
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ rule aggregate_disruption_within_sample:
by_event = temp(directory("{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/disruption/{STORM_SET}/{SAMPLE}_pop_affected_by_event.pq")),
by_target = temp(directory("{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/disruption/{STORM_SET}/{SAMPLE}_pop_affected_by_target.pq")),
script:
"../../../scripts/exposure/aggregate_grid_disruption.py"
"./aggregate_grid_disruption.py"

"""
Test with:
Expand Down Expand Up @@ -423,7 +423,7 @@ rule disruption_by_admin_region:
output:
expected_annual_disruption = "{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/disruption/{STORM_SET}/EAPA_{ADMIN_SLUG}.gpq",
script:
"../../../scripts/exposure/grid_disruption_by_admin_region.py"
"./grid_disruption_by_admin_region.py"

"""
Test with:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ rule aggregate_exposure_within_sample:
by_event = temp(directory("{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/exposure/{STORM_SET}/{SAMPLE}_length_m_by_event.pq")),
by_edge = temp(directory("{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/exposure/{STORM_SET}/{SAMPLE}_length_m_by_edge.pq")),
script:
"../../../scripts/exposure/aggregate_grid_exposure.py"
"./aggregate_grid_exposure.py"

"""
Test with:
Expand Down Expand Up @@ -103,7 +103,7 @@ rule plot_event_exposure_distributions_for_country:
output:
country_event_distributions = directory("{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/exposure/{STORM_SET}/length_m_event_dist/")
script:
"../../../scripts/exposure/plot_exposure_distributions.py"
"./plot_exposure_distributions.py"

"""
Test with:
Expand All @@ -124,7 +124,7 @@ rule exposure_by_admin_region:
output:
expected_annual_exposure = "{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/exposure/{STORM_SET}/EAE_{ADMIN_SLUG}.gpq",
script:
"../../../scripts/exposure/grid_exposure_by_admin_region.py"
"./grid_exposure_by_admin_region.py"

"""
Test with:
Expand Down Expand Up @@ -233,4 +233,4 @@ rule merge_exposure_admin_levels:
merged = merge_gadm_admin_levels(merged, other)

merged.reset_index(drop=True).sort_index(axis=1).to_parquet(output.merged_admin_levels)
logging.info("Done")
logging.info("Done")
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ rule electricity_grid_damages:
exposure = protected(directory("{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/exposure/{STORM_SET}/{SAMPLE}/")),
disruption = protected(directory("{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/disruption/{STORM_SET}/{SAMPLE}/")),
script:
"../../../scripts/intersect/grid_disruption.py"
"./grid_disruption.py"

"""
Test with:
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ rule network_components:
component_map="{OUTPUT_DIR}/{DATASET}_{FILTER_SLUG}/network_map_by_component.png",
component_data="{OUTPUT_DIR}/{DATASET}_{FILTER_SLUG}/components.parquet"
script:
"../../scripts/network_components.py"
"./network_components.py"

"""
Test with:
Expand Down
23 changes: 23 additions & 0 deletions workflow/power-tc/network_raster_intersection.smk
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
"""
Intersect a network representation with hazard rasters
"""

rule rasterise_electricity_grid:
"""
Split electricity network edges on raster grid
Assign raster indicies to edges
"""
input:
network="{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/network/edges.geoparquet",
tif_paths=["{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/storms/wind_grid.tiff"],
params:
copy_raster_values=False,
output:
geoparquet="{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/exposure/edges_split.geoparquet",
script:
"../../scripts/intersection.py"

"""
Test with:
snakemake --cores 1 results/power/by_country/PRI/exposure/edges_split.geoparquet
"""
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ rule create_power_network:
nodes="{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/network/nodes.geoparquet",
grid_hull="{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/network/convex_hull.json",
script:
"../../scripts/preprocess/create_electricity_network.py"
"./create_electricity_network.py"

"""
Test with:
Expand Down Expand Up @@ -366,30 +366,3 @@ rule map_network_components:
Test with:
snakemake -c1 results/power/by_country/HTI/edges.png
"""


rule create_transport_network:
"""
Take .geoparquet OSM files and output files of cleaned network nodes and edges
"""
input:
nodes="{OUTPUT_DIR}/geoparquet/{DATASET}_{FILTER_SLUG}/raw/{SLICE_SLUG}_nodes.geoparquet",
edges="{OUTPUT_DIR}/geoparquet/{DATASET}_{FILTER_SLUG}/raw/{SLICE_SLUG}_edges.geoparquet",
admin="{OUTPUT_DIR}/input/admin-boundaries/gadm36_levels.gpkg",
output:
nodes="{OUTPUT_DIR}/geoparquet/{DATASET}_{FILTER_SLUG}/processed/{SLICE_SLUG}_nodes.geoparquet",
edges="{OUTPUT_DIR}/geoparquet/{DATASET}_{FILTER_SLUG}/processed/{SLICE_SLUG}_edges.geoparquet"
params:
# determine the network type from the filter, e.g. road, rail
network_type=lambda wildcards: wildcards.FILTER_SLUG.replace('filter-', ''),
# pass in the slice number so we can label edges and nodes with their slice
# edge and node IDs should be unique across all slices
slice_number=lambda wildcards: int(wildcards.SLICE_SLUG.replace('slice-', ''))
script:
# template the path string with a value from params (can't execute .replace in `script` context)
"../../scripts/transport/create_{params.network_type}_network.py"

"""
Test with:
snakemake --cores all results/geoparquet/tanzania-mini_filter-road/processed/slice-0_edges.geoparquet
"""
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ rule annotate_targets:
output:
targets="{OUTPUT_DIR}/power/targets.geoparquet",
script:
"../../scripts/preprocess/annotate_targets.py"
"./annotate_targets.py"

"""
Test with:
Expand Down
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,3 +1,28 @@
"""
Download WRI powerplants database
Reference
---------
https://www.wri.org/research/global-database-power-plants
"""


rule download_powerplants:
output:
csv = "{OUTPUT_DIR}/input/powerplants/global_power_plant_database.csv"
shell:
"""
mkdir -p {wildcards.OUTPUT_DIR}/input/powerplants
cd {wildcards.OUTPUT_DIR}/input/powerplants
wget https://wri-dataportal-prod.s3.amazonaws.com/manual/global_power_plant_database_v_1_3.zip
unzip -o global_power_plant_database_v_1_3.zip
"""

"""
Test with:
snakemake -c1 -- results/input/powerplants/global_power_plant_database.csv
"""

rule parse_powerplants:
"""
Parse powerplant data for world and save in convenient format
Expand Down
Loading

0 comments on commit a8a60b9

Please sign in to comment.