Skip to content

Commit

Permalink
Reorganise scripts and rules
Browse files Browse the repository at this point in the history
- workflow rules grouped by high-level concept and workflow stage
- scripts sit next to the rules that use them

Aim to reduce cognitive overhead and sense that different sub-workflows
were muddled together in the previous grouping by general stage.

Aim for a clear set of workflow/<stage> directories, where we should
be able to draw a simple high-level DAG for the workflow between
stages, and sets of rules within each stage are roughly linear.
  • Loading branch information
tomalrussell committed Feb 2, 2024
1 parent 62cbfe5 commit bfb5321
Show file tree
Hide file tree
Showing 111 changed files with 365 additions and 414 deletions.
File renamed without changes.
File renamed without changes.
95 changes: 47 additions & 48 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -96,53 +96,52 @@ wildcard_constraints:
TIFF_FILE="[^\/\.\s]+\.[tT][iI][fF][fF]?",

# generate values for global variables used across rules
include: "rules/storm_workflow_global_variables.smk"
include: "tropical-cyclone/tc_workflow_global_variables.smk"

##### load rules #####
include: "rules/download/coastlines.smk"
include: "rules/download/natural-earth.smk"
include: "rules/download/STORM.smk"
include: "rules/download/IRIS.smk"
include: "rules/download/IBTrACS.smk"
include: "rules/download/gadm.smk"
include: "rules/download/gridfinder.smk"
include: "rules/download/ghsl-pop.smk"
include: "rules/download/hazards.smk"
include: "rules/download/dryad-gdp.smk"
include: "rules/download/wri-powerplants.smk"
include: "rules/download/osm.smk"
include: "rules/download/land_cover.smk"

include: "rules/preprocess/gadm.smk"
include: "rules/preprocess/filter_osm_data.smk"
include: "rules/preprocess/trim_hazard_data.smk"
include: "rules/preprocess/create_bbox_extracts.smk"
include: "rules/preprocess/slice.smk"
include: "rules/preprocess/join_network.smk"
include: "rules/preprocess/targets.smk"
include: "rules/preprocess/create_network.smk"
include: "rules/preprocess/join_data.smk"
include: "rules/preprocess/osm_to_geoparquet.smk"
include: "rules/preprocess/create_overall_bbox.smk"
include: "rules/preprocess/powerplants.smk"
include: "rules/preprocess/IBTrACS.smk"
include: "rules/preprocess/STORM.smk"
include: "rules/preprocess/IRIS.smk"

include: "rules/exposure/join_data.smk"
include: "rules/exposure/network_raster_intersection.smk"
include: "rules/exposure/wind_fields.smk"
include: "rules/exposure/flood_damages.smk"
include: "rules/exposure/electricity_grid/intersection.smk"
include: "rules/exposure/electricity_grid/exposure.smk"
include: "rules/exposure/electricity_grid/disruption.smk"
include: "rules/exposure/aggregate_to_admin_area.smk"

include: "rules/analyse/network_components.smk"
include: "rules/analyse/map/storm_tracks.smk"
include: "rules/analyse/map/outages.smk"
include: "rules/analyse/map/wind_fields.smk"
include: "rules/analyse/plot/target_disruption.smk"
include: "rules/analyse/plot/customers_affected_by_storm.smk"

include: "rules/target/cyclone-grid.smk"
include: "context/coastlines.smk"
include: "context/gadm.smk"
include: "context/natural-earth.smk"

include: "nature-ecosystems/land-cover.smk"
include: "population-economy/dryad-gdp.smk"
include: "population-economy/ghsl-pop.smk"

include: "power/gridfinder.smk"
include: "power/wri-powerplants.smk"
include: "power/gridfinder-targets.smk"
include: "power/create_network.smk"

include: "transport/openstreetmap.smk"
include: "transport/create_bbox_extracts.smk"
include: "transport/slice.smk"
include: "transport/join_network.smk"
include: "transport/create_network.smk"
include: "transport/osm_to_geoparquet.smk"
include: "transport/create_overall_bbox.smk"
include: "transport/join_data.smk"

include: "flood/aqueduct.smk"
include: "flood/trim_hazard_data.smk"

include: "tropical-cyclone/IBTrACS.smk"
include: "tropical-cyclone/IRIS.smk"
include: "tropical-cyclone/STORM.smk"
include: "tropical-cyclone/join_tracks.smk"
include: "tropical-cyclone/wind_fields.smk"

include: "transport-flood/network_raster_intersection.smk"
include: "transport-flood/flood_damages.smk"
include: "transport-flood/aggregate_to_admin_area.smk"

include: "power-tc/network_raster_intersection.smk"
include: "power-tc/intersection.smk"
include: "power-tc/exposure.smk"
include: "power-tc/disruption.smk"
include: "power-tc/analyse/network_components.smk"
include: "power-tc/map/storm_tracks.smk"
include: "power-tc/map/outages.smk"
include: "power-tc/map/wind_fields.smk"
include: "power-tc/plot/target_disruption.smk"
include: "power-tc/plot/customers_affected_by_storm.smk"
include: "power-tc/cyclone-grid.smk"
File renamed without changes.
26 changes: 26 additions & 0 deletions workflow/rules/preprocess/gadm.smk → workflow/context/gadm.smk
Original file line number Diff line number Diff line change
@@ -1,3 +1,29 @@
"""
Download GADM boundaries
Reference
---------
https://gadm.org/data.html
"""


rule download_gadm_levels:
output:
gpkg = "{OUTPUT_DIR}/input/admin-boundaries/gadm36_levels.gpkg"
shell:
"""
wget https://geodata.ucdavis.edu/gadm/gadm3.6/gadm36_levels_gpkg.zip \
--output-document={wildcards.OUTPUT_DIR}/input/admin-boundaries/gadm36_levels_gpkg.zip
unzip -o {wildcards.OUTPUT_DIR}/input/admin-boundaries/gadm36_levels_gpkg.zip \
-d {wildcards.OUTPUT_DIR}/input/admin-boundaries
rm {wildcards.OUTPUT_DIR}/input/admin-boundaries/gadm36_levels_gpkg.zip
"""

"""
Test with:
snakemake -c1 -- results/input/admin-boundaries/gadm36_levels.gpkg
"""

rule simplify_admin_bounds:
input:
all_admin_bounds = rules.download_gadm_levels.output.gpkg
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -23,20 +23,20 @@ Concept & Methodology:

rule download_ghsl:
output:
"{OUTPUT_DIR}/input/ghsl/GHS_POP_E{YEAR}_GLOBE_R2022A_54009_{RESOLUTION}_V1_0.tif"
"{OUTPUT_DIR}/input/ghsl/GHS_POP_E{YEAR}_GLOBE_{RELEASE}_54009_{RESOLUTION}_V1_0.tif"
wildcard_constraints:
YEAR="1975|1980|1985|1990|1995|2000|2005|2010|2015|2020|2025|2030",
RESOLUTION="1000?"
YEAR=range(1975, 2031, 5),
RESOLUTION="100|1000"
shell:
"""
output_dir=$(dirname {output})
mkdir -p $output_dir
wget -nc https://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/GHSL/GHS_POP_GLOBE_R2022A/GHS_POP_E{wildcards.YEAR}_GLOBE_R2022A_54009_{wildcards.RESOLUTION}/V1-0/GHS_POP_E{wildcards.YEAR}_GLOBE_R2022A_54009_{wildcards.RESOLUTION}_V1_0.zip \
wget -nc https://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/GHSL/GHS_POP_GLOBE_{wildcards.RELEASE}/GHS_POP_E{wildcards.YEAR}_GLOBE_{wildcards.RELEASE}_54009_{wildcards.RESOLUTION}/V1-0/GHS_POP_E{wildcards.YEAR}_GLOBE_{wildcards.RELEASE}_54009_{wildcards.RESOLUTION}_V1_0.zip \
--directory-prefix=$output_dir
unzip -o $output_dir/GHS_POP_E{wildcards.YEAR}_GLOBE_R2022A_54009_{wildcards.RESOLUTION}_V1_0.zip \
unzip -o $output_dir/GHS_POP_E{wildcards.YEAR}_GLOBE_{wildcards.RELEASE}_54009_{wildcards.RESOLUTION}_V1_0.zip \
-d $output_dir
"""

Expand All @@ -47,8 +47,9 @@ rule download_ghsl_all:
"{{OUTPUT_DIR}}",
"input",
"ghsl",
"GHS_POP_E{year}_GLOBE_R2022A_54009_{resolution}_V1_0.tif",
"GHS_POP_E{year}_GLOBE_{release}_54009_{resolution}_V1_0.tif",
),
resolution=(100, 1000),
year=(2020, )
year=(2020, ),
release="R2022A" # TODO bump to R2023A
)
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,4 @@ rule analyse_aggregate_levels:
output:
AGGREGATE_LEVELS_OUT,
script:
os.path.join("..", "..", "scripts", "analyse", "storm_aggregate_levels.py")
"./storm_aggregate_levels.py"
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,4 @@ rule analyse_country_matrix:
output:
COUNTRY_MATRIX_OUTPUT,
script:
os.path.join(
"..",
"..",
"scripts",
"analyse",
"storm_distribution_empirical_country_matrix.py",
)
"./storm_distribution_empirical_country_matrix.py"
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,4 @@ rule analyse_empirical_distribution:
output:
EMPIRICAL_DISTRIBUTION_OUT
script:
os.path.join('..', '..', 'scripts', 'analyse' , 'storm_distribution_empirical.py')
'./storm_distribution_empirical.py'
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,4 @@ rule analyse_percentile:
output:
PERCENTILE_OUT,
script:
os.path.join("..", "..", "scripts", "analyse", "select_percentile.py")
"./select_percentile.py"
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ rule network_components:
component_map="{OUTPUT_DIR}/{DATASET}_{FILTER_SLUG}/network_map_by_component.png",
component_data="{OUTPUT_DIR}/{DATASET}_{FILTER_SLUG}/components.parquet"
script:
"../../scripts/network_components.py"
"./network_components.py"

"""
Test with:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,4 @@ rule analyse_targets:
f"targets_geo_top{config['top_select']}{SORT_BY_INCREASING_SEVERITY}percent.gpkg",
),
script:
os.path.join(
"..", "..", "scripts", "analyse", "storm_distribution_empirical_geo.py"
)
"./storm_distribution_empirical_geo.py"
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,4 @@ rule analyse_transmission:
output:
TRANSMISSION_OUT,
script:
os.path.join("..", "..", "scripts", "analyse", "transmission_aggregate.py")
"./transmission_aggregate.py"
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ rule aggregate_disruption_within_sample:
by_event = temp(directory("{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/disruption/{STORM_SET}/{SAMPLE}_pop_affected_by_event.pq")),
by_target = temp(directory("{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/disruption/{STORM_SET}/{SAMPLE}_pop_affected_by_target.pq")),
script:
"../../../scripts/exposure/aggregate_grid_disruption.py"
"./aggregate_grid_disruption.py"

"""
Test with:
Expand Down Expand Up @@ -423,7 +423,7 @@ rule disruption_by_admin_region:
output:
expected_annual_disruption = "{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/disruption/{STORM_SET}/EAPA_{ADMIN_SLUG}.gpq",
script:
"../../../scripts/exposure/grid_disruption_by_admin_region.py"
"./grid_disruption_by_admin_region.py"

"""
Test with:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ rule aggregate_exposure_within_sample:
by_event = temp(directory("{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/exposure/{STORM_SET}/{SAMPLE}_length_m_by_event.pq")),
by_edge = temp(directory("{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/exposure/{STORM_SET}/{SAMPLE}_length_m_by_edge.pq")),
script:
"../../../scripts/exposure/aggregate_grid_exposure.py"
"./aggregate_grid_exposure.py"

"""
Test with:
Expand Down Expand Up @@ -103,7 +103,7 @@ rule plot_event_exposure_distributions_for_country:
output:
country_event_distributions = directory("{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/exposure/{STORM_SET}/length_m_event_dist/")
script:
"../../../scripts/exposure/plot_exposure_distributions.py"
"./plot_exposure_distributions.py"

"""
Test with:
Expand All @@ -124,7 +124,7 @@ rule exposure_by_admin_region:
output:
expected_annual_exposure = "{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/exposure/{STORM_SET}/EAE_{ADMIN_SLUG}.gpq",
script:
"../../../scripts/exposure/grid_exposure_by_admin_region.py"
"./grid_exposure_by_admin_region.py"

"""
Test with:
Expand Down Expand Up @@ -233,4 +233,4 @@ rule merge_exposure_admin_levels:
merged = merge_gadm_admin_levels(merged, other)

merged.reset_index(drop=True).sort_index(axis=1).to_parquet(output.merged_admin_levels)
logging.info("Done")
logging.info("Done")
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ rule electricity_grid_damages:
exposure = protected(directory("{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/exposure/{STORM_SET}/{SAMPLE}/")),
disruption = protected(directory("{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/disruption/{STORM_SET}/{SAMPLE}/")),
script:
"../../../scripts/intersect/grid_disruption.py"
"./grid_disruption.py"

"""
Test with:
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
23 changes: 23 additions & 0 deletions workflow/power-tc/network_raster_intersection.smk
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
"""
Intersect a network representation with hazard rasters
"""

rule rasterise_electricity_grid:
"""
Split electricity network edges on raster grid
Assign raster indicies to edges
"""
input:
network="{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/network/edges.geoparquet",
tif_paths=["{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/storms/wind_grid.tiff"],
params:
copy_raster_values=False,
output:
geoparquet="{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/exposure/edges_split.geoparquet",
script:
"../scripts/intersection.py"

"""
Test with:
snakemake --cores 1 results/power/by_country/PRI/exposure/edges_split.geoparquet
"""
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,7 @@ rule fig_cdfs_EAD:
output:
out_cdf_EAD,
script:
os.path.join(
"..", "..", "..", "scripts", "analyse", "figures", "plot_together.py"
)
"./plot_together.py"


rule fig_cdfs_EACA:
Expand All @@ -64,6 +62,4 @@ rule fig_cdfs_EACA:
output:
out_cdf_EACA,
script:
os.path.join(
"..", "..", "..", "scripts", "analyse", "figures", "plot_together.py"
)
"./plot_together.py"
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ rule fig_aggregate_EACA:
output:
out_agg_EACA=out_agg_EACA_file, #
script:
os.path.join("..", "..", "..", "scripts", "analyse", "figures", "mean_agg.py")
"./mean_agg.py"


rule fig_diff_EACA:
Expand All @@ -72,7 +72,7 @@ rule fig_diff_EACA:
output:
out_diff_EACA=out_diff_EACA_file,
script:
os.path.join("..", "..", "..", "scripts", "analyse", "figures", "diff_agg.py")
"./diff_agg.py"


rule fig_plot_current_EACA:
Expand All @@ -90,7 +90,7 @@ rule fig_plot_current_EACA:
output:
out_agg_EACA_plot,
script:
os.path.join("..", "..", "..", "scripts", "analyse", "figures", "plotter.py")
"./plotter.py"


rule fig_plot_diff_EACA:
Expand All @@ -108,4 +108,4 @@ rule fig_plot_diff_EACA:
output:
out_agg_EACA_plot_perc,
script:
os.path.join("..", "..", "..", "scripts", "analyse", "figures", "plotter.py")
"./plotter.py"
Loading

0 comments on commit bfb5321

Please sign in to comment.