Skip to content

Commit

Permalink
feat: prql namespace for polars.DataFrame and polars.LazyFrame (#373)
Browse files Browse the repository at this point in the history
  • Loading branch information
eitsupi authored Apr 29, 2024
1 parent afb61cc commit 86a908e
Show file tree
Hide file tree
Showing 7 changed files with 77 additions and 11 deletions.
19 changes: 18 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
pyprql contains:

- pyprql.pandas_accessor — Pandas integration for PRQL
- pyprql.polars_namespace — Polars integration for PRQL
- pyprql.magic — IPython magic for connecting to databases using `%%prql`
- pyprql.compile — An export of `prqlc`'s `compile` function

Expand All @@ -24,6 +25,12 @@ For docs, check out the [pyprql docs](https://pyprql.readthedocs.io/), and the
pip install pyprql
```

Or, install with optional dependencies:

```sh
pip install pyprql[polars]
```

## Usage

### Pandas integration
Expand All @@ -36,6 +43,16 @@ df = (...)
results_df = df.prql.query("select {age, name, occupation} | filter age > 21")
```

### Polars integration

```python
import polars as pl
import pyprql.polars_namespace

df = (...)
results_df = df.prql.query("select {age, name, occupation} | filter age > 21")
```

### Jupyter Magic

```python
Expand Down Expand Up @@ -71,7 +88,7 @@ FROM
artists
```

For context, `prqlc` is the Python binding for `prql-compiler`, so only
For context, `prqlc` in Python is the Python binding for the `prqlc` Rust crate, so only
contains functions for compilation; and this library offers broader python
integrations and tooling.

Expand Down
2 changes: 1 addition & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 3 additions & 7 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ jupysql = ">=0.10"
pandas = ">=1.5"
prqlc = "^0.11.0"
traitlets = ">=5"
polars = { version = ">= 0.20.23", extras = ["polars"] }

[tool.poetry.dev-dependencies]
Sphinx = "~7.1"
Expand Down Expand Up @@ -66,12 +67,7 @@ changelog_file = "CHANGELOG.md"
major_on_zero = false
upload_to_pypi = true
upload_to_release = true
version_toml = [
"pyproject.toml:tool.poetry.version",
]
version_variable = [
"pyprql/__init__.py:__version__",
]
version_toml = ["pyproject.toml:tool.poetry.version"]

[tool.pytest.ini_options]
addopts = """
Expand All @@ -97,7 +93,7 @@ requires = ["poetry-core>=1.3.0"]

[tool.ruff]
fix = true
ignore = [
lint.ignore = [
# Line length — black handles
"E5", #
# No lambdas — too strict
Expand Down
2 changes: 0 additions & 2 deletions pyprql/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,3 @@

import prqlc # noqa: F401
from prqlc import compile # noqa: F401

__version__ = "0.8.0"
1 change: 1 addition & 0 deletions pyprql/polars_namespace/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .prql import PrqlNamespace # noqa: F401
25 changes: 25 additions & 0 deletions pyprql/polars_namespace/prql.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from __future__ import annotations
import polars as pl
import prqlc

from typing import TypeVar, Generic


T_DF = TypeVar("T_DF", pl.DataFrame, pl.LazyFrame)


@pl.api.register_dataframe_namespace("prql")
@pl.api.register_lazyframe_namespace("prql")
class PrqlNamespace(Generic[T_DF]):
def __init__(self, df: T_DF):
self._df = df

def query(self, prql_query: str, *, table_name: str | None = None) -> T_DF:
prepended_query: str = f"from self \n {prql_query}"
sql_query: str = prqlc.compile(
prepended_query,
options=prqlc.CompileOptions(
target="sql.any", format=False, signature_comment=False
),
)
return self._df.sql(sql_query, table_name=table_name)
29 changes: 29 additions & 0 deletions pyprql/tests/test_polars_namespace.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import polars as pl
import pytest


@pytest.fixture(autouse=True)
def import_accessor():
import pyprql.polars_namespace # noqa


def test_polars_df_namespace():
df = pl.DataFrame({"latitude": [1, 2, 3], "longitude": [1, 2, 3]})
res = df.prql.query(
"select {latitude, longitude} | filter latitude > 1 | sort latitude"
)
assert res.to_dict(as_series=False) == {
"latitude": [2, 3],
"longitude": [2, 3],
}


def test_polars_lf_namespace():
df = pl.LazyFrame({"latitude": [1, 2, 3], "longitude": [1, 2, 3]})
res = df.prql.query(
"select {latitude, longitude} | filter latitude > 1 | sort latitude"
).collect()
assert res.to_dict(as_series=False) == {
"latitude": [2, 3],
"longitude": [2, 3],
}

0 comments on commit 86a908e

Please sign in to comment.