Skip to content

Commit 741063a

Browse files
kszucscpcloud
andauthored
fix(ir): make impure ibis.random() and ibis.uuid() functions return unique node instances (#8967)
Co-authored-by: Phillip Cloud <[email protected]>
1 parent 200e4ea commit 741063a

File tree

55 files changed

+502
-54
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

55 files changed

+502
-54
lines changed

ibis/backends/bigquery/compiler.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -120,8 +120,6 @@ class BigQueryCompiler(SQLGlotCompiler):
120120
ops.RPad: "rpad",
121121
ops.Levenshtein: "edit_distance",
122122
ops.Modulus: "mod",
123-
ops.RandomScalar: "rand",
124-
ops.RandomUUID: "generate_uuid",
125123
ops.RegexReplace: "regexp_replace",
126124
ops.RegexSearch: "regexp_contains",
127125
ops.Time: "time",
@@ -695,3 +693,6 @@ def visit_CountDistinct(self, op, *, arg, where):
695693
if where is not None:
696694
arg = self.if_(where, arg, NULL)
697695
return self.f.count(sge.Distinct(expressions=[arg]))
696+
697+
def visit_RandomUUID(self, op, **kwargs):
698+
return self.f.generate_uuid()

ibis/backends/clickhouse/compiler.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -98,8 +98,6 @@ class ClickHouseCompiler(SQLGlotCompiler):
9898
ops.NotNull: "isNotNull",
9999
ops.NullIf: "nullIf",
100100
ops.RStrip: "trimRight",
101-
ops.RandomScalar: "randCanonical",
102-
ops.RandomUUID: "generateUUIDv4",
103101
ops.RegexReplace: "replaceRegexpAll",
104102
ops.RowNumber: "row_number",
105103
ops.StartsWith: "startsWith",
@@ -635,6 +633,12 @@ def visit_TimestampRange(self, op, *, start, stop, step):
635633
def visit_RegexSplit(self, op, *, arg, pattern):
636634
return self.f.splitByRegexp(pattern, self.cast(arg, dt.String(nullable=False)))
637635

636+
def visit_RandomScalar(self, op, **kwargs):
637+
return self.f.randCanonical()
638+
639+
def visit_RandomUUID(self, op, **kwargs):
640+
return self.f.generateUUIDv4()
641+
638642
@staticmethod
639643
def _generate_groups(groups):
640644
return groups

ibis/backends/datafusion/compiler.py

-1
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,6 @@ class DataFusionCompiler(SQLGlotCompiler):
7575
ops.Last: "last_value",
7676
ops.Median: "median",
7777
ops.StringLength: "character_length",
78-
ops.RandomUUID: "uuid",
7978
ops.RegexSplit: "regex_split",
8079
ops.EndsWith: "ends_with",
8180
ops.ArrayIntersect: "array_intersect",

ibis/backends/druid/compiler.py

+1
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ class DruidCompiler(SQLGlotCompiler):
5959
ops.Median,
6060
ops.MultiQuantile,
6161
ops.Quantile,
62+
ops.RandomUUID,
6263
ops.RegexReplace,
6364
ops.RegexSplit,
6465
ops.RowID,

ibis/backends/duckdb/compiler.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@ class DuckDBCompiler(SQLGlotCompiler):
4848
ops.MapMerge: "map_concat",
4949
ops.MapValues: "map_values",
5050
ops.Mode: "mode",
51-
ops.RandomUUID: "uuid",
5251
ops.TimeFromHMS: "make_time",
5352
ops.TypeOf: "typeof",
5453
ops.GeoPoint: "st_point",
@@ -450,3 +449,9 @@ def visit_StructField(self, op, *, arg, field):
450449
expression=sg.to_identifier(field, quoted=self.quoted),
451450
)
452451
return super().visit_StructField(op, arg=arg, field=field)
452+
453+
def visit_RandomScalar(self, op, **kwargs):
454+
return self.f.random()
455+
456+
def visit_RandomUUID(self, op, **kwargs):
457+
return self.f.uuid()

ibis/backends/exasol/compiler.py

+1
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ class ExasolCompiler(SQLGlotCompiler):
6767
ops.Median,
6868
ops.MultiQuantile,
6969
ops.Quantile,
70+
ops.RandomUUID,
7071
ops.ReductionVectorizedUDF,
7172
ops.RegexExtract,
7273
ops.RegexReplace,

ibis/backends/flink/compiler.py

-2
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,6 @@ class FlinkCompiler(SQLGlotCompiler):
7979
ops.MapKeys: "map_keys",
8080
ops.MapValues: "map_values",
8181
ops.Power: "power",
82-
ops.RandomScalar: "rand",
83-
ops.RandomUUID: "uuid",
8482
ops.RegexSearch: "regexp",
8583
ops.StrRight: "right",
8684
ops.StringLength: "char_length",

ibis/backends/impala/compiler.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,6 @@ class ImpalaCompiler(SQLGlotCompiler):
7676
ops.Hash: "fnv_hash",
7777
ops.LStrip: "ltrim",
7878
ops.Ln: "ln",
79-
ops.RandomUUID: "uuid",
8079
ops.RStrip: "rtrim",
8180
ops.Strip: "trim",
8281
ops.TypeOf: "typeof",
@@ -146,7 +145,7 @@ def visit_CountDistinct(self, op, *, arg, where):
146145
def visit_Xor(self, op, *, left, right):
147146
return sg.and_(sg.or_(left, right), sg.not_(sg.and_(left, right)))
148147

149-
def visit_RandomScalar(self, op):
148+
def visit_RandomScalar(self, op, **_):
150149
return self.f.rand(self.f.utc_to_unix_micros(self.f.utc_timestamp()))
151150

152151
def visit_DayOfWeekIndex(self, op, *, arg):

ibis/backends/mssql/compiler.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -129,8 +129,6 @@ class MSSQLCompiler(SQLGlotCompiler):
129129
ops.Ln: "log",
130130
ops.Log10: "log10",
131131
ops.Power: "power",
132-
ops.RandomScalar: "rand",
133-
ops.RandomUUID: "newid",
134132
ops.Repeat: "replicate",
135133
ops.Reverse: "reverse",
136134
ops.StringAscii: "ascii",
@@ -172,6 +170,9 @@ def _minimize_spec(start, end, spec):
172170
return None
173171
return spec
174172

173+
def visit_RandomUUID(self, op, **kwargs):
174+
return self.f.newid()
175+
175176
def visit_StringLength(self, op, *, arg):
176177
"""The MSSQL LEN function doesn't count trailing spaces.
177178

ibis/backends/oracle/compiler.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ class OracleCompiler(SQLGlotCompiler):
8080
ops.ExtractWeekOfYear,
8181
ops.ExtractDayOfYear,
8282
ops.RowID,
83+
ops.RandomUUID,
8384
)
8485
)
8586

@@ -221,7 +222,7 @@ def visit_Log(self, op, *, arg, base):
221222
def visit_IsInf(self, op, *, arg):
222223
return arg.isin(self.POS_INF, self.NEG_INF)
223224

224-
def visit_RandomScalar(self, op):
225+
def visit_RandomScalar(self, op, **_):
225226
# Not using FuncGen here because of dotted function call
226227
return sg.func("dbms_random.value")
227228

ibis/backends/pandas/executor.py

-8
Original file line numberDiff line numberDiff line change
@@ -81,14 +81,6 @@ def visit(cls, op: ops.Cast, arg, to):
8181
else:
8282
return PandasConverter.convert_scalar(arg, to)
8383

84-
@classmethod
85-
def visit(cls, op: ops.TypeOf, arg):
86-
raise OperationNotDefinedError("TypeOf is not implemented")
87-
88-
@classmethod
89-
def visit(cls, op: ops.RandomScalar):
90-
raise OperationNotDefinedError("RandomScalar is not implemented")
91-
9284
@classmethod
9385
def visit(cls, op: ops.Greatest, arg):
9486
return cls.columnwise(lambda df: df.max(axis=1), arg)

ibis/backends/postgres/compiler.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,6 @@ class PostgresCompiler(SQLGlotCompiler):
100100
ops.MapContains: "exist",
101101
ops.MapKeys: "akeys",
102102
ops.MapValues: "avals",
103-
ops.RandomUUID: "gen_random_uuid",
104103
ops.RegexSearch: "regexp_like",
105104
ops.TimeFromHMS: "make_time",
106105
}
@@ -111,6 +110,9 @@ def _aggregate(self, funcname: str, *args, where):
111110
return sge.Filter(this=expr, expression=sge.Where(this=where))
112111
return expr
113112

113+
def visit_RandomUUID(self, op, **kwargs):
114+
return self.f.gen_random_uuid()
115+
114116
def visit_Mode(self, op, *, arg, where):
115117
expr = self.f.mode()
116118
expr = sge.WithinGroup(

ibis/backends/pyspark/compiler.py

+1
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ class PySparkCompiler(SQLGlotCompiler):
5757
(
5858
ops.RowID,
5959
ops.TimestampBucket,
60+
ops.RandomUUID,
6061
)
6162
)
6263

ibis/backends/snowflake/compiler.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,6 @@ class SnowflakeCompiler(SQLGlotCompiler):
7878
ops.Hash: "hash",
7979
ops.Median: "median",
8080
ops.Mode: "mode",
81-
ops.RandomUUID: "uuid_string",
8281
ops.StringToTimestamp: "to_timestamp_tz",
8382
ops.TimeFromHMS: "time_from_parts",
8483
ops.TimestampFromYMDHMS: "timestamp_from_parts",
@@ -250,11 +249,14 @@ def visit_MapLength(self, op, *, arg):
250249
def visit_Log(self, op, *, arg, base):
251250
return self.f.log(base, arg, dialect=self.dialect)
252251

253-
def visit_RandomScalar(self, op):
252+
def visit_RandomScalar(self, op, **kwargs):
254253
return self.f.uniform(
255254
self.f.to_double(0.0), self.f.to_double(1.0), self.f.random()
256255
)
257256

257+
def visit_RandomUUID(self, op, **kwargs):
258+
return self.f.uuid_string()
259+
258260
def visit_ApproxMedian(self, op, *, arg, where):
259261
return self.agg.approx_percentile(arg, 0.5, where=where)
260262

ibis/backends/sql/compiler.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -262,7 +262,6 @@ class SQLGlotCompiler(abc.ABC):
262262
ops.Power: "pow",
263263
ops.RPad: "rpad",
264264
ops.Radians: "radians",
265-
ops.RandomScalar: "random",
266265
ops.RegexSearch: "regexp_like",
267266
ops.RegexSplit: "regexp_split",
268267
ops.Repeat: "repeat",
@@ -688,6 +687,14 @@ def visit_Round(self, op, *, arg, digits):
688687
return sge.Round(this=arg, decimals=digits)
689688
return sge.Round(this=arg)
690689

690+
### Random Noise
691+
692+
def visit_RandomScalar(self, op, **kwargs):
693+
return self.f.rand()
694+
695+
def visit_RandomUUID(self, op, **kwargs):
696+
return self.f.uuid()
697+
691698
### Dtype Dysmorphia
692699

693700
def visit_TryCast(self, op, *, arg, to):

ibis/backends/sql/rewrites.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,13 @@ def merge_select_select(_, **kwargs):
152152
from the inner Select are inlined into the outer Select.
153153
"""
154154
# don't merge if either the outer or the inner select has window functions
155-
blocking = (ops.WindowFunction, ops.ExistsSubquery, ops.InSubquery, ops.Unnest)
155+
blocking = (
156+
ops.WindowFunction,
157+
ops.ExistsSubquery,
158+
ops.InSubquery,
159+
ops.Unnest,
160+
ops.Impure,
161+
)
156162
if _.find_below(blocking, filter=ops.Value):
157163
return _
158164
if _.parent.find_below(blocking, filter=ops.Value):

ibis/backends/sqlite/compiler.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,6 @@ class SQLiteCompiler(SQLGlotCompiler):
103103
ops.Mode: "_ibis_mode",
104104
ops.Time: "time",
105105
ops.Date: "date",
106-
ops.RandomUUID: "uuid",
107106
}
108107

109108
def _aggregate(self, funcname: str, *args, where):
@@ -213,7 +212,7 @@ def visit_Clip(self, op, *, arg, lower, upper):
213212

214213
return arg
215214

216-
def visit_RandomScalar(self, op):
215+
def visit_RandomScalar(self, op, **kwargs):
217216
return 0.5 + self.f.random() / sge.Literal.number(float(-1 << 64))
218217

219218
def visit_Cot(self, op, *, arg):
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
SELECT
2+
`t1`.`x`,
3+
`t1`.`y`,
4+
`t1`.`z`,
5+
IF(`t1`.`y` = `t1`.`z`, 'big', 'small') AS `size`
6+
FROM (
7+
SELECT
8+
`t0`.`x`,
9+
RAND() AS `y`,
10+
RAND() AS `z`
11+
FROM `t` AS `t0`
12+
) AS `t1`
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
SELECT
2+
`t1`.`x`,
3+
`t1`.`y`,
4+
`t1`.`z`,
5+
IF(`t1`.`y` = `t1`.`z`, 'big', 'small') AS `size`
6+
FROM (
7+
SELECT
8+
`t0`.`x`,
9+
generate_uuid() AS `y`,
10+
generate_uuid() AS `z`
11+
FROM `t` AS `t0`
12+
) AS `t1`
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
SELECT
2+
"t1"."x",
3+
"t1"."y",
4+
"t1"."z",
5+
CASE WHEN "t1"."y" = "t1"."z" THEN 'big' ELSE 'small' END AS "size"
6+
FROM (
7+
SELECT
8+
"t0"."x",
9+
randCanonical() AS "y",
10+
randCanonical() AS "z"
11+
FROM "t" AS "t0"
12+
) AS "t1"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
SELECT
2+
"t1"."x",
3+
"t1"."y",
4+
"t1"."z",
5+
CASE WHEN "t1"."y" = "t1"."z" THEN 'big' ELSE 'small' END AS "size"
6+
FROM (
7+
SELECT
8+
"t0"."x",
9+
generateUUIDv4() AS "y",
10+
generateUUIDv4() AS "z"
11+
FROM "t" AS "t0"
12+
) AS "t1"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
SELECT
2+
"t1"."x",
3+
"t1"."y",
4+
"t1"."z",
5+
CASE WHEN "t1"."y" = "t1"."z" THEN 'big' ELSE 'small' END AS "size"
6+
FROM (
7+
SELECT
8+
"t0"."x",
9+
RANDOM() AS "y",
10+
RANDOM() AS "z"
11+
FROM "t" AS "t0"
12+
) AS "t1"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
SELECT
2+
"t1"."x",
3+
"t1"."y",
4+
"t1"."z",
5+
CASE WHEN "t1"."y" = "t1"."z" THEN 'big' ELSE 'small' END AS "size"
6+
FROM (
7+
SELECT
8+
"t0"."x",
9+
UUID() AS "y",
10+
UUID() AS "z"
11+
FROM "t" AS "t0"
12+
) AS "t1"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
SELECT
2+
"t1"."x",
3+
"t1"."y",
4+
"t1"."z",
5+
CASE WHEN "t1"."y" = "t1"."z" THEN 'big' ELSE 'small' END AS "size"
6+
FROM (
7+
SELECT
8+
"t0"."x",
9+
RANDOM() AS "y",
10+
RANDOM() AS "z"
11+
FROM "t" AS "t0"
12+
) AS "t1"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
SELECT
2+
"t1"."x",
3+
"t1"."y",
4+
"t1"."z",
5+
CASE WHEN "t1"."y" = "t1"."z" THEN 'big' ELSE 'small' END AS "size"
6+
FROM (
7+
SELECT
8+
"t0"."x",
9+
RANDOM() AS "y",
10+
RANDOM() AS "z"
11+
FROM "t" AS "t0"
12+
) AS "t1"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
SELECT
2+
"t1"."x",
3+
"t1"."y",
4+
"t1"."z",
5+
CASE WHEN "t1"."y" = "t1"."z" THEN 'big' ELSE 'small' END AS "size"
6+
FROM (
7+
SELECT
8+
"t0"."x",
9+
UUID() AS "y",
10+
UUID() AS "z"
11+
FROM "t" AS "t0"
12+
) AS "t1"

0 commit comments

Comments
 (0)