Skip to content

Commit

Permalink
test: add test for impure function correlation behavior
Browse files Browse the repository at this point in the history
Need to fix the UDF test case.

Related to ibis-project#8921,
trying to write down exactly what
the expected behavior is.
  • Loading branch information
NickCrews committed Apr 19, 2024
1 parent 08a33e9 commit 1fea07f
Showing 1 changed file with 65 additions and 0 deletions.
65 changes: 65 additions & 0 deletions ibis/backends/tests/test_impure.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
from __future__ import annotations

import random

import pandas.testing as tm
import pytest

import ibis
from ibis import _


@ibis.udf.scalar.python(side_effects=True)
def get_id() -> int:
return random.randint(0, 1000)


@pytest.mark.parametrize(
"impure",
[
pytest.param(
lambda: ibis.random(),
id="random",
),
pytest.param(
lambda: ibis.uuid(),
id="uuid",
),
pytest.param(
get_id,
id="udf",
),
],
)
def test_impure_uncorrelated(alltypes, impure):
df = alltypes.select(x=impure(), y=impure()).execute()
assert (df.x != df.y).mean() >= 0.999
# Even if the two expressions have the exact same ID, they should still be
# uncorrelated
common = impure()
df = alltypes.select(x=common, y=common).execute()
assert (df.x != df.y).mean() >= 0.999


@pytest.mark.parametrize(
"impure",
[
pytest.param(
lambda: ibis.random(),
id="random",
),
pytest.param(
lambda: ibis.uuid(),
id="uuid",
),
pytest.param(
get_id,
id="udf",
# once this is fixed, can we unify these params with the above params?
marks=pytest.mark.xfail(reason="executed multiple times"),
),
],
)
def test_impure_correlated(alltypes, impure):
df = alltypes.select(common=impure()).select(x=_.common, y=_.common).execute()
tm.assert_series_equal(df.x, df.y, check_names=False)

0 comments on commit 1fea07f

Please sign in to comment.