From 9636b1a2996ed34fa40fe47daad2369a70c71a05 Mon Sep 17 00:00:00 2001 From: Fan Yang Date: Fri, 6 Dec 2024 18:56:18 +0800 Subject: [PATCH] doc: demonstrate retrieving query result in arrow format (#263) --- compatibility/pg-pytools/pyarrow_test.py | 13 +++++++++++++ docs/tutorial/pg-python-data-tools.md | 17 +++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/compatibility/pg-pytools/pyarrow_test.py b/compatibility/pg-pytools/pyarrow_test.py index 186f8232..1bbaba85 100644 --- a/compatibility/pg-pytools/pyarrow_test.py +++ b/compatibility/pg-pytools/pyarrow_test.py @@ -48,3 +48,16 @@ df_from_pg = df_from_pg.astype({'id': 'int64', 'num': 'int64'}) # Compare the original DataFrame with the DataFrame from PostgreSQL assert df.equals(df_from_pg), "DataFrames are not equal" + + # Copy query result to a pandas DataFrame + arrow_data = io.BytesIO() + with cur.copy("COPY (SELECT id, num * num AS num FROM test.tb1) TO STDOUT (FORMAT arrow)") as copy: + for block in copy: + arrow_data.write(block) + + with pa.ipc.open_stream(arrow_data.getvalue()) as reader: + df_from_pg = reader.read_pandas().astype({'id': 'int64', 'num': 'int64'}) + df['num'] = df['num'] ** 2 + df = df.drop('data', axis='columns') + # Compare the original DataFrame with the DataFrame from PostgreSQL + assert df.equals(df_from_pg), "DataFrames are not equal" diff --git a/docs/tutorial/pg-python-data-tools.md b/docs/tutorial/pg-python-data-tools.md index 870f5467..790ec925 100644 --- a/docs/tutorial/pg-python-data-tools.md +++ b/docs/tutorial/pg-python-data-tools.md @@ -119,3 +119,20 @@ polars_df = pl.from_arrow(arrow_df) ```python polars_df = pl.from_pandas(pandas_df) ``` + +## 4. Retrieving Query Results as DataFrames + +You can also retrieve query results from MyDuck Server as DataFrames using Arrow format. Here is an example: + +```python +# Copy query result to a Polars DataFrame +arrow_data = io.BytesIO() +with cur.copy("COPY (SELECT id, num * num AS num FROM test.tb1) TO STDOUT (FORMAT arrow)") as copy: + for block in copy: + arrow_data.write(block) + + with pa.ipc.open_stream(arrow_data.getvalue()) as reader: + arrow_table = reader.read_all() + polars_df = pl.from_arrow(arrow_table) + print(polars_df) +``` \ No newline at end of file