From 8edfd7d80f78fff8c4591c0382fd2799ee783a85 Mon Sep 17 00:00:00 2001 From: Andrew Ridden-Harper Date: Thu, 5 Mar 2026 11:59:22 +1300 Subject: [PATCH] Ensure vs30 values are written to disk and verified before exit Add flush and fsync after writing vs30 values to prevent incomplete writes when MPI terminates. Add post-completion validation on master rank to check file size and detect zero vs30 headers. --- workflow/calculation/hf_sim.py | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/workflow/calculation/hf_sim.py b/workflow/calculation/hf_sim.py index 14dfb2c3..fc61b582 100644 --- a/workflow/calculation/hf_sim.py +++ b/workflow/calculation/hf_sim.py @@ -513,6 +513,9 @@ def run_hf( out.seek(HEAD_STAT - 2 * FLOAT_SIZE, 1) e_dist[i].tofile(out) vs.tofile(out) + # Ensure vs30 values are fully written to disk before MPI terminates the process + out.flush() + os.fsync(out.fileno()) # distribute work in a round-robin fashion across ranks for optimisation # if size=4, rank 0 takes [0,4,8...], rank 1 takes [1,5,9...], rank 2 takes [2,6,10...], @@ -553,5 +556,27 @@ def run_hf( logger.error(msg) comm.Abort(1) else: - logger.debug("Simulation completed and size verified.") - print("✅ HF completed successfully", flush=True) + # open r+b and fsync to invalidate any stale NFS/Lustre client cache + with open(args.out_file, "r+b") as hff: + os.fsync(hff.fileno()) + with open(args.out_file, "rb") as hff: + hff.seek(HEAD_SIZE) + vs_values = np.fromfile( + hff, + count=stations.size, + dtype={ + "names": ["vs"], + "formats": ["f4"], + "offsets": [HEAD_STAT - FLOAT_SIZE], + "itemsize": HEAD_STAT, + }, + )["vs"] + zero_vs = np.where(vs_values == 0)[0] + if zero_vs.size > 0: + msg = f"CRITICAL: {zero_vs.size} station(s) have vs=0 in header (indices: {zero_vs.tolist()})" + print(msg, flush=True) + logger.error(msg) + comm.Abort(1) + else: + logger.debug("Simulation completed, size and vs headers verified.") + print("✅ HF completed successfully", flush=True)