Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 4 additions & 11 deletions tests/test_hf.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,24 +87,17 @@ def test_build_hf_input_serialisation() -> None:
assert lines[7] == "{seed}" # seed placeholder
assert lines[9] == "100.0 0.005 20.0 0.045 0.6" # Domain and resolution parameters
assert lines[10] == "0.8 0.7 0.9 2.5 0.0" # rupture velocity + czero,alpha
assert lines[11] == "0.0 2.0 1.0 3.0" # shallow depth, deep depth
assert lines[12] == "-1 1.2" # mom (None -> -1) and rupv
assert lines[13] == str(stoch_ffp) # Stoch file path
assert lines[16] == "0 0.1 0.1 0.1 0.1 1" # Sigs and ic_flag (True -> 1)
assert lines[21] == "-1 -1 -1" # Optional stress parameters
assert lines[11] == "-1 1.2" # mom (None -> -1) and rupv
assert lines[12] == str(stoch_ffp) # Stoch file path
assert lines[15] == "0 0.1 0.1 0.1 0.1 1" # Sigs and ic_flag (True -> 1)
assert lines[20] == "-1 -1 -1" # Optional stress parameters


STATION_STRATEGY = st.text(
min_size=0, max_size=8, alphabet=st.characters(codec="ascii")
)


@given(station=STATION_STRATEGY)
def test_stable_hash(station: str) -> None:
# Check that stable_hash output is always a valid 32-bit integer
assert -(1 << 31) <= hf_sim.stable_hash(station) <= (1 << 31) - 1


def test_station_seeds() -> None:
seed = hf_sim.station_seeds(0, ["station"])
assert seed.dtype == np.int32
Expand Down
40 changes: 40 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import geopandas as gpd
import pytest
import shapely
from hypothesis import assume, given
from hypothesis import strategies as st

from workflow import utils

Expand Down Expand Up @@ -142,3 +144,41 @@ def test_dict_zip_identical_keys_different_order() -> None:
result = utils.dict_zip(d1, d2, strict=True)
assert result["a"] == (1, 10)
assert result["b"] == (2, 20)


@given(
value=st.text(min_size=0, max_size=64, alphabet=st.characters(codec="ascii")),
size=st.sampled_from([16, 32, 64]),
)
def test_stable_hash_bounds(value: str, size: int) -> None:
"""Check that stable_hash output is always a valid ``size``-byte integer"""
assert (
-(1 << (size - 1))
<= utils.stable_hash(value, size=size // 8)
<= (1 << (size - 1)) - 1
)


@given(
value=st.text(min_size=0, max_size=64, alphabet=st.characters(codec="ascii")),
size=st.sampled_from([16, 32, 64]),
)
def test_stable_hash_determinism(value: str, size: int) -> None:
"""Check that stable_hash output is deterministic"""
hash_a = utils.stable_hash(value, size=size // 8)
hash_b = utils.stable_hash(value, size=size // 8)
assert hash_a == hash_b


@given(
value_a=st.text(min_size=0, max_size=64, alphabet=st.characters(codec="ascii")),
value_b=st.text(min_size=1, max_size=64, alphabet=st.characters(codec="ascii")),
size=st.sampled_from([16, 32, 64]),
)
def test_stable_hash_collision(value_a: str, value_b: str, size: int) -> None:
"""Check that stable_hash output lacks collision"""
# Combine value with
assume(value_a != value_b)
hash_a = utils.stable_hash(value_a, size=size // 8)
hash_b = utils.stable_hash(value_b, size=size // 8)
assert hash_a != hash_b
30 changes: 3 additions & 27 deletions workflow/scripts/hf_sim.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,32 +247,6 @@ def hf_simulate_station(
return station_name, epicentre_distance, station_waveform


def stable_hash(station: str) -> int:
"""Compute stable hashes for station names.

The HF binary expects seeds. We want the provided seed to be
independent of the order of stations in the stations lists. This
is so setting HF seed reproduces the same outputs, even for
different orders or subsets of the original station file. To do
that, we generate stable hashes based on the station name.


Parameters
----------
station : str
The station name.

Returns
-------
int
A hash of the station name. This is guaranteed to be in the
range of a signed 32-bit integer.
"""
return int.from_bytes(
hashlib.blake2b(station.encode("utf-8"), digest_size=4).digest(), signed=True
)


def station_seeds(seed: int, stations: Iterable[str]) -> npt.NDArray[np.int32]:
"""Create a list of per-station seeds in an order-invariant fashion with a root seed.

Expand All @@ -289,7 +263,9 @@ def station_seeds(seed: int, stations: Iterable[str]) -> npt.NDArray[np.int32]:
npt.NDArray[np.int32]
A list of station seeds.
"""
station_hashes = np.array([stable_hash(name) for name in stations], dtype=np.int32)
station_hashes = np.array(
[utils.stable_hash(name) for name in stations], dtype=np.int32
)
# Rather than add (which could overflow and cause annoying numpy
# warnings), we just xor the hf seed with the station hashes.
# Since this is invertible, we ensure that the same hf seed gives
Expand Down
5 changes: 4 additions & 1 deletion workflow/scripts/realisation_to_srf.py
Original file line number Diff line number Diff line change
Expand Up @@ -546,7 +546,10 @@ def generate_fault_srf(
gsf_file_path=gsf_file_path,
nx=nx,
ny=ny,
seed=environment.seeds.genslip_seed,
# NOTE: This stable hash trick is also used in hf-sim, and is
# designed to give order invariant stable hashes for segments
# based on their names.
seed=environment.seeds.genslip_seed ^ utils.stable_hash(name),
velocity_model_path=environment.velocity_model_path,
shypo=genslip_hypocentre_coords[0],
dhypo=genslip_hypocentre_coords[1],
Expand Down
25 changes: 25 additions & 0 deletions workflow/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Miscellaneous workflow utilities that couldn't go anywhere else."""

import hashlib
import os
import tempfile
import urllib.request
Expand Down Expand Up @@ -174,3 +175,27 @@ def dict_zip(*dicts: Mapping[K, Any], strict: bool = True) -> dict[K, tuple[Any,

result = {key: tuple(d[key] for d in dicts) for key in list(keys)}
return result


def stable_hash(value: str, size: int = 4) -> int:
"""Compute stable hashes for strings.

Parameters
----------
value : str
String to hash.
size : int, optional
Digest size in bytes. This is passed as ``digest_size`` to
`hashlib.blake2b` and must be within the valid range for
BLAKE2b (1 to 64 bytes).

Returns
-------
int
A hash of the value derived from a ``size``-byte BLAKE2b digest.
The result is within the range of a signed integer representable
with ``size`` bytes.
"""
return int.from_bytes(
hashlib.blake2b(value.encode("utf-8"), digest_size=size).digest(), signed=True
)
Loading