Skip to content

RF tests, enable test_single_batch_entry globally #1699

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Mar 5, 2025
2 changes: 1 addition & 1 deletion tests/rf_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def run_model(
dyn_dim_min_sizes: Optional[Dict[Dim, int]] = None,
test_tensorflow: bool = True,
allow_inf_nan_in_output: bool = False,
test_single_batch_entry: bool = False, # can later enable this globally
test_single_batch_entry: bool = True,
) -> TensorDict:
"""run"""
print(f"* run_model with dyn_dim_max_sizes={dyn_dim_max_sizes!r}")
Expand Down
7 changes: 4 additions & 3 deletions tests/test_rf_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,7 +364,8 @@ def _forward_step(*, model: _Net, extern_data: TensorDict):
out = model(extern_data["data"])
out.mark_as_default_output(shape=(batch_dim, time_dim, in_dim))

run_model(extern_data, lambda *, epoch, step: _Net(), _forward_step)
# Note: The tested op here is a bit meaningless. It also is not consinstent for different batch sizes...
run_model(extern_data, lambda *, epoch, step: _Net(), _forward_step, test_single_batch_entry=False)


def test_expand_dim():
Expand Down Expand Up @@ -791,7 +792,7 @@ def _forward_step(*, extern_data: TensorDict, **_kwargs):
out = rf.reverse_sequence(extern_data["data"], axis=time_dim, handle_dynamic_dims=False)
out.mark_as_default_output(shape=(batch_dim, time_dim, in_dim))

run_model(extern_data, lambda *, epoch, step: rf.Module(), _forward_step)
run_model(extern_data, lambda *, epoch, step: rf.Module(), _forward_step, test_single_batch_entry=False)


def test_where():
Expand Down Expand Up @@ -877,7 +878,7 @@ def _forward_step(*, model: rf.Conv1d, extern_data: TensorDict):
x, _ = rf.pool1d(x, mode="avg", pool_size=3, strides=1, padding="same", in_spatial_dim=time_dim)
x.mark_as_default_output(shape=(batch_dim, time_dim, in_dim))

run_model(extern_data, lambda *, epoch, step: rf.Module(), _forward_step)
run_model(extern_data, lambda *, epoch, step: rf.Module(), _forward_step, test_single_batch_entry=False)


def test_cast_sparse():
Expand Down
98 changes: 85 additions & 13 deletions tests/test_rf_cond.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def _forward_step(*, model: _Net, extern_data: TensorDict):
out = model(extern_data["data"])
out.mark_as_default_output(shape=(batch_dim, time_dim, out_dim))

run_model(extern_data, lambda *, epoch, step: _Net(), _forward_step)
run_model(extern_data, lambda *, epoch, step: _Net(), _forward_step, test_single_batch_entry=False)


def test_cond_via_time_even():
Expand Down Expand Up @@ -69,8 +69,20 @@ def _forward_step(*, model: _Net, extern_data: TensorDict):
out = model(extern_data["data"])
out.mark_as_default_output(shape=(batch_dim, time_dim, out_dim))

run_model(extern_data, lambda *, epoch, step: _Net(), _forward_step, dyn_dim_max_sizes={time_dim: 5})
run_model(extern_data, lambda *, epoch, step: _Net(), _forward_step, dyn_dim_max_sizes={time_dim: 6})
run_model(
extern_data,
lambda *, epoch, step: _Net(),
_forward_step,
dyn_dim_max_sizes={time_dim: 5},
test_single_batch_entry=False,
)
run_model(
extern_data,
lambda *, epoch, step: _Net(),
_forward_step,
dyn_dim_max_sizes={time_dim: 6},
test_single_batch_entry=False,
)


def test_cond_shared_params():
Expand Down Expand Up @@ -100,8 +112,20 @@ def _forward_step(*, model: _Net, extern_data: TensorDict):
out = model(extern_data["data"])
out.mark_as_default_output(shape=(batch_dim, time_dim, out_dim))

run_model(extern_data, lambda *, epoch, step: _Net(), _forward_step, dyn_dim_max_sizes={time_dim: 5})
run_model(extern_data, lambda *, epoch, step: _Net(), _forward_step, dyn_dim_max_sizes={time_dim: 6})
run_model(
extern_data,
lambda *, epoch, step: _Net(),
_forward_step,
dyn_dim_max_sizes={time_dim: 5},
test_single_batch_entry=False,
)
run_model(
extern_data,
lambda *, epoch, step: _Net(),
_forward_step,
dyn_dim_max_sizes={time_dim: 6},
test_single_batch_entry=False,
)


def test_cond_twice_shared_params():
Expand Down Expand Up @@ -140,8 +164,20 @@ def _forward_step(*, model: _Net, extern_data: TensorDict):
out = model(extern_data["data"])
out.mark_as_default_output(shape=(batch_dim, time_dim, out_dim))

run_model(extern_data, lambda *, epoch, step: _Net(), _forward_step, dyn_dim_max_sizes={time_dim: 5})
run_model(extern_data, lambda *, epoch, step: _Net(), _forward_step, dyn_dim_max_sizes={time_dim: 6})
run_model(
extern_data,
lambda *, epoch, step: _Net(),
_forward_step,
dyn_dim_max_sizes={time_dim: 5},
test_single_batch_entry=False,
)
run_model(
extern_data,
lambda *, epoch, step: _Net(),
_forward_step,
dyn_dim_max_sizes={time_dim: 6},
test_single_batch_entry=False,
)


def test_cond_param_assign():
Expand Down Expand Up @@ -173,8 +209,20 @@ def _forward_step(*, model: _Net, extern_data: TensorDict):
out = model(extern_data["data"])
out.mark_as_default_output(shape=())

out1 = run_model(extern_data, lambda *, epoch, step: _Net(), _forward_step, dyn_dim_max_sizes={time_dim: 5})
out2 = run_model(extern_data, lambda *, epoch, step: _Net(), _forward_step, dyn_dim_max_sizes={time_dim: 6})
out1 = run_model(
extern_data,
lambda *, epoch, step: _Net(),
_forward_step,
dyn_dim_max_sizes={time_dim: 5},
test_single_batch_entry=False,
)
out2 = run_model(
extern_data,
lambda *, epoch, step: _Net(),
_forward_step,
dyn_dim_max_sizes={time_dim: 6},
test_single_batch_entry=False,
)
assert out1["output"].raw_tensor == 2
assert out2["output"].raw_tensor == 5

Expand Down Expand Up @@ -208,8 +256,20 @@ def _forward_step(*, model: _Net, extern_data: TensorDict):
out = model(extern_data["data"])
out.mark_as_default_output(shape=())

out1 = run_model(extern_data, lambda *, epoch, step: _Net(), _forward_step, dyn_dim_max_sizes={time_dim: 5})
out2 = run_model(extern_data, lambda *, epoch, step: _Net(), _forward_step, dyn_dim_max_sizes={time_dim: 6})
out1 = run_model(
extern_data,
lambda *, epoch, step: _Net(),
_forward_step,
dyn_dim_max_sizes={time_dim: 5},
test_single_batch_entry=False,
)
out2 = run_model(
extern_data,
lambda *, epoch, step: _Net(),
_forward_step,
dyn_dim_max_sizes={time_dim: 6},
test_single_batch_entry=False,
)
assert out1["output"].raw_tensor == 9
assert out2["output"].raw_tensor == 5

Expand Down Expand Up @@ -246,8 +306,20 @@ def _forward_step(*, model: _Net, extern_data: TensorDict):
out.mark_as_default_output(shape=())
param.mark_as_output(shape=(), name="param")

out1 = run_model(extern_data, lambda *, epoch, step: _Net(), _forward_step, dyn_dim_max_sizes={time_dim: 5})
out2 = run_model(extern_data, lambda *, epoch, step: _Net(), _forward_step, dyn_dim_max_sizes={time_dim: 6})
out1 = run_model(
extern_data,
lambda *, epoch, step: _Net(),
_forward_step,
dyn_dim_max_sizes={time_dim: 5},
test_single_batch_entry=False,
)
out2 = run_model(
extern_data,
lambda *, epoch, step: _Net(),
_forward_step,
dyn_dim_max_sizes={time_dim: 6},
test_single_batch_entry=False,
)
assert out1["output"].raw_tensor == 6 and out1["param"].raw_tensor == 2
assert out2["output"].raw_tensor == 42 and out2["param"].raw_tensor == 5

Expand Down
2 changes: 1 addition & 1 deletion tests/test_rf_conv.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,7 @@ def _forward_step(*, model: _Net, extern_data: TensorDict):
# Note: Currently not the single batch test because there is another problem with RF PT pool,
# which does not correctly handle this case. We get:
# RuntimeError: max_pool1d() Invalid computed output size: -1
# test_single_batch_entry=True,
test_single_batch_entry=False,
)
out = out["output"]
(out_spatial_dim,) = out.get_dyn_size_tags()
Expand Down
31 changes: 21 additions & 10 deletions tests/test_rf_loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,20 +66,28 @@ def test_while_loop():

class _Net(rf.Module):
def __call__(self, x: Tensor) -> Tensor:
def _cond(s: Tuple[Tensor, Tensor]):
t, s_ = s
def _cond(s: Tuple[Tensor, Tensor, Tensor]) -> Tensor:
t, ended, s_ = s
if t.raw_tensor.__class__.__module__.startswith("torch"):
print("**", t.raw_tensor, rf.reduce_sum(s_, axis=s_.dims).raw_tensor)
return rf.logical_and(rf.reduce_sum(s_, axis=s_.dims) < 50, t < time_dim.get_dim_value_tensor())
print("**", t.raw_tensor, ended.raw_tensor, rf.reduce_sum(s_, axis=in_dim).raw_tensor)
return rf.logical_not(rf.reduce_all(ended, axis=[batch_dim]))

def _body(s):
t, s_ = s
return t + 1, s_ + rf.abs(rf.gather(x, indices=t, axis=time_dim))

_, final_s = rf.while_loop(
t, ended, s_ = s
cont = rf.logical_and(rf.reduce_sum(s_, axis=in_dim) < 50, t < time_dim.get_size_tensor())
ended = rf.logical_or(ended, rf.logical_not(cont))
s__ = s_ + rf.abs(rf.gather(x, indices=t, axis=time_dim, clip_to_valid=True))
s__ = rf.where(ended, s_, s__)
return t + 1, ended, s__

_, _, final_s = rf.while_loop(
_cond,
_body,
initial=(rf.zeros((), dtype=rf.get_default_array_index_dtype()), rf.zeros((batch_dim, in_dim))),
initial=(
rf.zeros((), dtype=rf.get_default_array_index_dtype()), # t
rf.zeros((batch_dim,), dtype="bool"), # ended
rf.zeros((batch_dim, in_dim)), # s
),
)
return final_s

Expand Down Expand Up @@ -209,4 +217,7 @@ def _forward_step(*, model: _Net, extern_data: TensorDict):
out, beam_dim = model(extern_data["data"])
out.mark_as_default_output(shape=(batch_dim, beam_dim, in_dim))

run_model(extern_data, lambda *, epoch, step: _Net(), _forward_step, test_tensorflow=False)
# TODO the way this is implemented, accessing y[-1], is not consistent w.r.t. different batch sizes...
run_model(
extern_data, lambda *, epoch, step: _Net(), _forward_step, test_tensorflow=False, test_single_batch_entry=False
)
11 changes: 10 additions & 1 deletion tests/test_rf_normalization.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ def _forward_step(*, model: _Net, extern_data: TensorDict):
out = model(extern_data["data"])
out.mark_as_default_output(shape=(batch_dim, time_dim, in_dim))

# Note: no test_single_batch_entry=False needed here because we currently don't check the running stats,
# and the output currently uses the initial running stats, i.e. should be the same for all batches.
run_model(extern_data, lambda *, epoch, step: _Net(), _forward_step)


Expand All @@ -62,4 +64,11 @@ def _forward_step(*, model: _Net, extern_data: TensorDict):
out = model(extern_data["data"])
out.mark_as_default_output(shape=(batch_dim, time_dim, in_dim))

run_model(extern_data, lambda *, epoch, step: _Net(), _forward_step)
run_model(
extern_data,
lambda *, epoch, step: _Net(),
_forward_step,
# BatchNorm by definition uses the batch dim.
# Needed here because track_running_stats=False and thus use_current_batch_stats=True.
test_single_batch_entry=False,
)
28 changes: 12 additions & 16 deletions tests/test_rf_rec.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ def test_lstm():
extern_data = TensorDict(
{
"data": Tensor("data", [batch_dim, time_dim, in_dim], dtype="float32"),
"state_h": Tensor("state_h", [batch_dim, out_dim], dtype="float32"),
"state_c": Tensor("state_c", [batch_dim, out_dim], dtype="float32"),
"classes": Tensor("classes", [batch_dim, time_dim], dtype="int32", sparse_dim=out_dim),
}
)
Expand All @@ -32,10 +34,7 @@ def __call__(self, x: Tensor, *, spatial_dim: Dim, state: rf.LstmState) -> Tuple

# noinspection PyShadowingNames
def _forward_step(*, model: _Net, extern_data: TensorDict):
state = rf.LstmState(
h=rf.random_normal(dims=[batch_dim, out_dim], dtype="float32"),
c=rf.random_normal(dims=[batch_dim, out_dim], dtype="float32"),
)
state = rf.LstmState(h=extern_data["state_h"], c=extern_data["state_c"])
out, new_state = model(extern_data["data"], state=state, spatial_dim=time_dim)
out.mark_as_output("out", shape=(batch_dim, time_dim, out_dim))
new_state.h.mark_as_output("h", shape=(batch_dim, out_dim))
Expand All @@ -49,6 +48,8 @@ def test_lstm_single_step():
extern_data = TensorDict(
{
"data": Tensor("data", [batch_dim, in_dim], dtype="float32"),
"state_h": Tensor("state_h", [batch_dim, out_dim], dtype="float32"),
"state_c": Tensor("state_c", [batch_dim, out_dim], dtype="float32"),
}
)

Expand All @@ -64,10 +65,7 @@ def __call__(self, x: Tensor, *, spatial_dim: Dim, state: rf.LstmState) -> Tuple

# noinspection PyShadowingNames
def _forward_step(*, model: _Net, extern_data: TensorDict):
state = rf.LstmState(
h=rf.random_normal(dims=[batch_dim, out_dim], dtype="float32"),
c=rf.random_normal(dims=[batch_dim, out_dim], dtype="float32"),
)
state = rf.LstmState(h=extern_data["state_h"], c=extern_data["state_c"])
out, new_state = model(extern_data["data"], state=state, spatial_dim=single_step_dim)
out.mark_as_output("out", shape=(batch_dim, out_dim))
new_state.h.mark_as_output("h", shape=(batch_dim, out_dim))
Expand All @@ -82,6 +80,8 @@ def test_zoneout_lstm():
extern_data = TensorDict(
{
"data": Tensor("data", [batch_dim, time_dim, in_dim], dtype="float32"),
"state_h": Tensor("state_h", [batch_dim, out_dim], dtype="float32", feature_dim=out_dim),
"state_c": Tensor("state_c", [batch_dim, out_dim], dtype="float32", feature_dim=out_dim),
"classes": Tensor("classes", [batch_dim, time_dim], dtype="int32", sparse_dim=out_dim),
}
)
Expand All @@ -103,10 +103,7 @@ def __call__(self, x: Tensor, *, spatial_dim: Dim, state: rf.LstmState) -> Tuple

# noinspection PyShadowingNames
def _forward_step(*, model: _Net, extern_data: TensorDict):
state = rf.LstmState(
h=rf.random_normal(dims=[batch_dim, out_dim], dtype="float32", feature_dim=out_dim),
c=rf.random_normal(dims=[batch_dim, out_dim], dtype="float32", feature_dim=out_dim),
)
state = rf.LstmState(h=extern_data["state_h"], c=extern_data["state_c"])
out, new_state = model(extern_data["data"], state=state, spatial_dim=time_dim)
out.mark_as_output("out", shape=(batch_dim, time_dim, out_dim))
new_state.h.mark_as_output("h", shape=(batch_dim, out_dim))
Expand All @@ -121,6 +118,8 @@ def test_zoneout_lstm_single_step():
extern_data = TensorDict(
{
"data": Tensor("data", [batch_dim, in_dim], dtype="float32"),
"state_h": Tensor("state_h", [batch_dim, out_dim], dtype="float32"),
"state_c": Tensor("state_c", [batch_dim, out_dim], dtype="float32"),
}
)

Expand All @@ -141,10 +140,7 @@ def __call__(self, x: Tensor, *, spatial_dim: Dim, state: rf.LstmState) -> Tuple

# noinspection PyShadowingNames
def _forward_step(*, model: _Net, extern_data: TensorDict):
state = rf.LstmState(
h=rf.random_normal(dims=[batch_dim, out_dim], dtype="float32", feature_dim=out_dim),
c=rf.random_normal(dims=[batch_dim, out_dim], dtype="float32", feature_dim=out_dim),
)
state = rf.LstmState(h=extern_data["state_h"], c=extern_data["state_c"])
out, new_state = model(extern_data["data"], state=state, spatial_dim=single_step_dim)
out.mark_as_output("out", shape=(batch_dim, out_dim))
new_state.h.mark_as_output("h", shape=(batch_dim, out_dim))
Expand Down