Skip to content

Commit

Permalink
refactor(duckdb): remove the read_in_memory method from the duckdb …
Browse files Browse the repository at this point in the history
…backend

BREAKING CHANGE: The `read_in_memory` method is removed from the duckdb backend. Use `ibis.memtable` instead.
  • Loading branch information
cpcloud committed Dec 18, 2024
1 parent b258278 commit 2f7f8c5
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 75 deletions.
37 changes: 0 additions & 37 deletions ibis/backends/duckdb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -808,43 +808,6 @@ def _read_parquet_pyarrow_dataset(
# by the time we execute against this so we register it
# explicitly.

@util.deprecated(
instead="Pass in-memory data to `memtable` instead.",
as_of="9.1",
removed_in="10.0",
)
def read_in_memory(
self,
source: pd.DataFrame
| pa.Table
| pa.RecordBatchReader
| pl.DataFrame
| pl.LazyFrame,
table_name: str | None = None,
) -> ir.Table:
"""Register an in-memory table object in the current database.
Supported objects include pandas DataFrame, a Polars
DataFrame/LazyFrame, or a PyArrow Table or RecordBatchReader.
Parameters
----------
source
The data source.
table_name
An optional name to use for the created table. This defaults to
a sequentially generated name.
Returns
-------
ir.Table
The just-registered table
"""
table_name = table_name or util.gen_name("read_in_memory")
_read_in_memory(source, table_name, self)
return self.table(table_name)

def read_delta(
self,
source_table: str,
Expand Down
48 changes: 10 additions & 38 deletions ibis/backends/duckdb/tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,21 +275,6 @@ def test_attach_sqlite(data_dir, tmp_path):
assert dt.String(nullable=True) in set(types)


def test_re_read_in_memory_overwrite(con):
df_pandas_1 = pd.DataFrame({"a": ["a"], "b": [1], "d": ["hi"]})
df_pandas_2 = pd.DataFrame({"a": [1], "c": [1.4]})

with pytest.warns(FutureWarning, match="memtable"):
table = con.read_in_memory(df_pandas_1, table_name="df")
assert len(table.columns) == 3
assert table.schema() == ibis.schema([("a", "str"), ("b", "int"), ("d", "str")])

with pytest.warns(FutureWarning, match="memtable"):
table = con.read_in_memory(df_pandas_2, table_name="df")
assert len(table.columns) == 2
assert table.schema() == ibis.schema([("a", "int"), ("c", "float")])


def test_memtable_with_nullable_dtypes(con):
data = pd.DataFrame(
{
Expand Down Expand Up @@ -381,37 +366,24 @@ def test_s3_403_fallback(con, httpserver, monkeypatch):

def test_register_numpy_str(con):
data = pd.DataFrame({"a": [np.str_("xyz"), None]})
with pytest.warns(FutureWarning, match="memtable"):
result = con.read_in_memory(data)
tm.assert_frame_equal(result.execute(), data)
result = ibis.memtable(data)
tm.assert_frame_equal(con.execute(result), data)


def test_register_recordbatchreader_warns(con):
def test_memtable_recordbatchreader_raises(con):
table = pa.Table.from_batches(
[
pa.RecordBatch.from_pydict({"x": [1, 2]}),
pa.RecordBatch.from_pydict({"x": [3, 4]}),
]
map(pa.RecordBatch.from_pydict, [{"x": [1, 2]}, {"x": [3, 4]}])
)
reader = table.to_reader()
sol = table.to_pandas()
with pytest.warns(FutureWarning, match="memtable"):
t = con.read_in_memory(reader)

# First execute is fine
res = t.execute()
tm.assert_frame_equal(res, sol)
with pytest.raises(TypeError):
ibis.memtable(reader)

# Later executes warn
with pytest.warns(UserWarning, match="RecordBatchReader"):
t.limit(2).execute()
t = ibis.memtable(reader.read_all())

# Re-registering over the name with a new reader is fine
reader = table.to_reader()
with pytest.warns(FutureWarning, match="memtable"):
t = con.read_in_memory(reader, table_name=t.get_name())
res = t.execute()
tm.assert_frame_equal(res, sol)
# First execute is fine
res = con.execute(t)
tm.assert_frame_equal(res, table.to_pandas())


def test_csv_with_slash_n_null(con, tmp_path):
Expand Down

0 comments on commit 2f7f8c5

Please sign in to comment.