|
| 1 | +# -*- coding: utf-8 -*- |
| 2 | +from __future__ import absolute_import, print_function, division |
| 3 | + |
| 4 | +# standard library dependencies |
| 5 | +from petl.compat import PY2 |
| 6 | +from petl.io.pandas import fromdataframe, todataframe |
| 7 | +# internal dependencies |
| 8 | +from petl.util.base import Table |
| 9 | +from petl.io.sources import read_source_from_arg, write_source_from_arg |
| 10 | + |
| 11 | + |
| 12 | +# third-party dependencies |
| 13 | +import pandas as pd |
| 14 | + |
| 15 | + |
| 16 | +def fromparquet(source=None, **kwargs): |
| 17 | + """ |
| 18 | + Extract data from a Parquet file and return as a PETL table. |
| 19 | +
|
| 20 | + The input can be a local filesystem path or any URL supported by fsspec (e.g., S3, GCS). |
| 21 | +
|
| 22 | + Example: |
| 23 | +
|
| 24 | + >>> import petl as etl |
| 25 | + >>> # read a Parquet file into a PETL table |
| 26 | + ... table = etl.fromparquet('data/example.parquet') |
| 27 | + >>> table |
| 28 | + +-------+------+ |
| 29 | + | name | age | |
| 30 | + +=======+======+ |
| 31 | + | 'Amy' | 22 | |
| 32 | + +-------+------+ |
| 33 | + | 'Bob' | 34 | |
| 34 | + +-------+------+ |
| 35 | +
|
| 36 | + :param source: path or URL to Parquet file |
| 37 | + :param kwargs: passed through to pandas.read_parquet |
| 38 | + :returns: a PETL Table |
| 39 | + """ |
| 40 | + |
| 41 | + src = read_source_from_arg(source) |
| 42 | + with src.open('rb') as f: |
| 43 | + df = pd.read_parquet(f, **kwargs) |
| 44 | + return fromdataframe(df) |
| 45 | + |
| 46 | +def toparquet(table, source=None, **kwargs): |
| 47 | + """ |
| 48 | + Write a PETL table or pandas DataFrame out to a Parquet file via pandas. |
| 49 | +
|
| 50 | + :param table_or_df: PETL table or pandas DataFrame |
| 51 | + :param source: filesystem path or fsspec-supported URL for output |
| 52 | + :param kwargs: passed through to pandas.DataFrame.to_parquet |
| 53 | + :returns: the original PETL Table or pandas DataFrame |
| 54 | + """ |
| 55 | + src = write_source_from_arg(source) |
| 56 | + with src.open('wb') as f: |
| 57 | + df = df = todataframe(table) |
| 58 | + df.to_parquet(f, **kwargs) |
| 59 | + return table |
| 60 | + |
| 61 | + |
| 62 | + |
| 63 | +Table.fromparquet = fromparquet |
| 64 | +Table.toparquet = toparquet |
0 commit comments