Skip to content

Commit

Permalink
Ensure filtered storm tracks are continuous in timestep
Browse files Browse the repository at this point in the history
  • Loading branch information
thomas-fred committed Jan 29, 2024
1 parent 9b70565 commit af80640
Showing 1 changed file with 26 additions and 3 deletions.
29 changes: 26 additions & 3 deletions workflow/scripts/preprocess/slice_storm_tracks.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""
Subset IBTrACS (historic) or STORM (synthetic) storm tracks by a buffer polygon
Subset storm tracks by an AOI buffer polygon, taking all points in the track from
first arrival to last departure (including voyages outside the AOI).
"""

import os
Expand All @@ -8,7 +9,9 @@
import sys

import geopandas as gpd
import pandas as pd
import shapely
from tqdm import tqdm


logging.basicConfig(format="%(asctime)s %(process)d %(filename)s %(message)s", level=logging.INFO)
Expand Down Expand Up @@ -41,8 +44,28 @@
logging.info("Subsetting tracks to those which intersect with grid (+ buffer)")
# adding the buffer reduces the likelihood we construct wind fields where a
# storm suddenly appears well inside the domain
tracks = tracks[tracks.intersects(hull.buffer(track_slicing_buffer_deg))]
AOI_points = tracks[tracks.intersects(hull.buffer(track_slicing_buffer_deg))]
logging.info(f"Found {len(AOI_points.track_id.unique())} tracks passing within buffer")

# some storms impact a country, wander off outside our AOI, then return some days later
# want time continuity in our tracks (so we don't interpolate wildly, so our eye speed estimates are reasonable)
logging.info("Indexing tracks' first arrival to last departure (for time continuity)")
sliced_tracks_by_track: list[pd.DataFrame] = []
for track_id in tqdm(AOI_points.track_id.unique()):
df = AOI_points[AOI_points.track_id == track_id]

# we need at least two track points to calculate the eye velocity and advective winds
if len(df) > 1:
arrival, *_, departure = df.index
sliced = tracks[tracks.track_id == track_id].loc[arrival: departure]
assert sliced.timestep.is_monotonic_increasing
sliced_tracks_by_track.append(sliced)

sliced_tracks = pd.concat(sliced_tracks_by_track)
logging.info(
f"Filtered to {len(sliced_tracks.track_id.unique())} valid (n > 1) tracks, with {len(sliced_tracks)} points"
)

logging.info("Writing tracks subset to disk")
os.makedirs(os.path.dirname(sliced_tracks_path), exist_ok=True)
tracks.to_parquet(sliced_tracks_path)
sliced_tracks.to_parquet(sliced_tracks_path)

0 comments on commit af80640

Please sign in to comment.