Skip to content

Commit 1055c37

Browse files
IndexSeekgforsyth
authored andcommitted
test(core): categorize_pandas
1 parent c8c2f2c commit 1055c37

File tree

1 file changed

+39
-0
lines changed

1 file changed

+39
-0
lines changed

tests/test_core.py

+39
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from textwrap import dedent
22

33
import ibis
4+
import pandas as pd
45
import pyarrow as pa
56

67
import ibisml as ml
@@ -50,3 +51,41 @@ def test_transform_result_repr():
5051
d int64
5152
}"""
5253
)
54+
55+
56+
def test_categorize_pandas():
57+
categories = {
58+
"num_col": ml.core.Categories(pa.array([1, 2, 3, 4, 5]), ordered=True),
59+
"str_col": ml.core.Categories(pa.array(["a", "b", "c"]), ordered=False),
60+
}
61+
62+
df = pd.DataFrame(
63+
{
64+
"num_col": [
65+
0,
66+
1,
67+
2,
68+
-1,
69+
4,
70+
],
71+
"str_col": [
72+
0,
73+
1,
74+
2,
75+
0,
76+
-1,
77+
],
78+
}
79+
)
80+
81+
t = ibis.table({"num_col": "int64", "str_col": "string"})
82+
res = ml.TransformResult(t, categories=categories)
83+
84+
transformed_df = res._categorize_pandas(df)
85+
86+
assert isinstance(transformed_df["num_col"].dtype, pd.CategoricalDtype)
87+
assert isinstance(transformed_df["str_col"].dtype, pd.CategoricalDtype)
88+
assert transformed_df["num_col"].cat.ordered is True
89+
assert transformed_df["str_col"].cat.ordered is False
90+
assert transformed_df["num_col"].isna().sum() == 1
91+
assert transformed_df["str_col"].isna().sum() == 1

0 commit comments

Comments
 (0)