diff --git a/ibis/backends/tests/test_impure.py b/ibis/backends/tests/test_impure.py new file mode 100644 index 0000000000000..b547d51147cbe --- /dev/null +++ b/ibis/backends/tests/test_impure.py @@ -0,0 +1,65 @@ +from __future__ import annotations + +import random + +import pandas.testing as tm +import pytest + +import ibis +from ibis import _ + + +@ibis.udf.scalar.python(side_effects=True) +def get_id() -> int: + return random.randint(0, 1000) + + +@pytest.mark.parametrize( + "impure", + [ + pytest.param( + lambda: ibis.random(), + id="random", + ), + pytest.param( + lambda: ibis.uuid(), + id="uuid", + ), + pytest.param( + get_id, + id="udf", + ), + ], +) +def test_impure_uncorrelated(alltypes, impure): + df = alltypes.select(x=impure(), y=impure()).execute() + assert (df.x != df.y).mean() >= 0.999 + # Even if the two expressions have the exact same ID, they should still be + # uncorrelated + common = impure() + df = alltypes.select(x=common, y=common).execute() + assert (df.x != df.y).mean() >= 0.999 + + +@pytest.mark.parametrize( + "impure", + [ + pytest.param( + lambda: ibis.random(), + id="random", + ), + pytest.param( + lambda: ibis.uuid(), + id="uuid", + ), + pytest.param( + get_id, + id="udf", + # once this is fixed, can we unify these params with the above params? + marks=pytest.mark.xfail(reason="executed multiple times"), + ), + ], +) +def test_impure_correlated(alltypes, impure): + df = alltypes.select(common=impure()).select(x=_.common, y=_.common).execute() + tm.assert_series_equal(df.x, df.y, check_names=False)