Skip to content

Commit

Permalink
feat(datafusion): pivot_longer
Browse files Browse the repository at this point in the history
  • Loading branch information
cpcloud authored and jcrist committed Jul 31, 2024
1 parent a706f54 commit 2330b0c
Show file tree
Hide file tree
Showing 5 changed files with 100 additions and 5 deletions.
7 changes: 7 additions & 0 deletions ibis/backends/sql/compilers/datafusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -478,3 +478,10 @@ def visit_Aggregate(self, op, *, parent, groups, metrics):
sel = sel.group_by(*by_names_quoted)

return sel

def visit_StructColumn(self, op, *, names, values):
args = []
for name, value in zip(names, values):
args.append(sge.convert(name))
args.append(value)
return self.f.named_struct(*args)
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
WITH "t5" AS (
SELECT
"t4"."field_of_study",
FIRST_VALUE("t4"."diff") FILTER(WHERE
NOT "t4"."diff" IS NULL) AS "diff"
FROM (
SELECT
"t4"."years",
"t4"."degrees",
"t4"."earliest_degrees",
"t4"."latest_degrees",
"t4"."diff",
"t4"."field_of_study"
FROM (
SELECT
"t3"."field_of_study",
"t3"."years",
"t3"."degrees",
"t3"."earliest_degrees",
"t3"."latest_degrees",
"t3"."latest_degrees" - "t3"."earliest_degrees" AS "diff"
FROM (
SELECT
"t2"."field_of_study",
"t2"."years",
"t2"."degrees",
FIRST_VALUE("t2"."degrees") OVER (PARTITION BY "t2"."field_of_study" ORDER BY "t2"."years" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS "earliest_degrees",
LAST_VALUE("t2"."degrees") OVER (PARTITION BY "t2"."field_of_study" ORDER BY "t2"."years" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS "latest_degrees"
FROM (
SELECT
"t1"."field_of_study",
"t1"."__pivoted__"."years" AS "years",
"t1"."__pivoted__"."degrees" AS "degrees"
FROM (
SELECT
"t0"."field_of_study",
UNNEST(
MAKE_ARRAY(
NAMED_STRUCT('years', '1970-71', 'degrees', "t0"."1970-71"),
NAMED_STRUCT('years', '1975-76', 'degrees', "t0"."1975-76"),
NAMED_STRUCT('years', '1980-81', 'degrees', "t0"."1980-81"),
NAMED_STRUCT('years', '1985-86', 'degrees', "t0"."1985-86"),
NAMED_STRUCT('years', '1990-91', 'degrees', "t0"."1990-91"),
NAMED_STRUCT('years', '1995-96', 'degrees', "t0"."1995-96"),
NAMED_STRUCT('years', '2000-01', 'degrees', "t0"."2000-01"),
NAMED_STRUCT('years', '2005-06', 'degrees', "t0"."2005-06"),
NAMED_STRUCT('years', '2010-11', 'degrees', "t0"."2010-11"),
NAMED_STRUCT('years', '2011-12', 'degrees', "t0"."2011-12"),
NAMED_STRUCT('years', '2012-13', 'degrees', "t0"."2012-13"),
NAMED_STRUCT('years', '2013-14', 'degrees', "t0"."2013-14"),
NAMED_STRUCT('years', '2014-15', 'degrees', "t0"."2014-15"),
NAMED_STRUCT('years', '2015-16', 'degrees', "t0"."2015-16"),
NAMED_STRUCT('years', '2016-17', 'degrees', "t0"."2016-17"),
NAMED_STRUCT('years', '2017-18', 'degrees', "t0"."2017-18"),
NAMED_STRUCT('years', '2018-19', 'degrees', "t0"."2018-19"),
NAMED_STRUCT('years', '2019-20', 'degrees', "t0"."2019-20")
)
) AS "__pivoted__"
FROM "humanities" AS "t0"
) AS "t1"
) AS "t2"
) AS "t3"
) AS "t4"
) AS t4
GROUP BY
"t4"."field_of_study"
)
SELECT
*
FROM (
SELECT
*
FROM "t5" AS "t6"
ORDER BY
"t6"."diff" DESC NULLS LAST
LIMIT 10
) AS "t9"
UNION ALL
SELECT
*
FROM (
SELECT
*
FROM "t5" AS "t6"
WHERE
"t6"."diff" < 0
ORDER BY
"t6"."diff" ASC
LIMIT 10
) AS "t10"
4 changes: 1 addition & 3 deletions ibis/backends/tests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -901,9 +901,7 @@ def test_zip_null(con, fn):
@builtin_array
@pytest.mark.notimpl(["postgres"], raises=PsycoPg2SyntaxError)
@pytest.mark.notimpl(["risingwave"], raises=PsycoPg2ProgrammingError)
@pytest.mark.notimpl(
["datafusion"], raises=Exception, reason="probably generating invalid SQL"
)
@pytest.mark.notimpl(["datafusion"], raises=Exception, reason="not yet supported")
@pytest.mark.notimpl(
["polars"],
raises=com.OperationNotDefinedError,
Expand Down
2 changes: 1 addition & 1 deletion ibis/backends/tests/test_generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1403,7 +1403,7 @@ def query(t, group_cols):
@pytest.mark.notimpl(["oracle", "exasol"], raises=com.OperationNotDefinedError)
@pytest.mark.notimpl(["druid"], raises=AssertionError)
@pytest.mark.notyet(
["datafusion", "impala", "mssql", "mysql", "sqlite"],
["impala", "mssql", "mysql", "sqlite"],
reason="backend doesn't support arrays and we don't implement pivot_longer with unions yet",
raises=com.OperationNotDefinedError,
)
Expand Down
2 changes: 1 addition & 1 deletion ibis/backends/tests/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def test_isin_bug(con, snapshot):
raises=NotImplementedError,
)
@pytest.mark.notyet(
["datafusion", "exasol", "oracle", "flink"],
["exasol", "oracle", "flink"],
reason="no unnest support",
raises=exc.OperationNotDefinedError,
)
Expand Down

0 comments on commit 2330b0c

Please sign in to comment.