-
Notifications
You must be signed in to change notification settings - Fork 121
Open
Labels
bugSomething isn't workingSomething isn't working
Description
Description
It fails for any column, for example file.size
.
import datachain as dc
def process(idx_list: list[int], files: list[dc.File]):
idx = idx_list[0]
jsons = [
f for f in files
if f.get_file_ext() == "json"
]
print(f"Batch {idx}: {len(jsons)}/{len(files)}")
yield idx, len(jsons), len(files)
data = dc.read_storage("gs://mpii-human-pose/")
(
data
.mutate(idx=data.c("sys.id") % 13)
.persist()
.agg(
process,
params=("idx", "file"),
output={"idx": int, "processed": int, "total": int},
partition_by="idx",
)
.save("batch-id")
)
Output CLI:
/Users/dmitry/src/audio_examples/.audio/lib/python3.12/site-packages/datachain/data_storage/db_engine.py:56: SAWarning: SELECT statement has a cartesian product between FROM element(s) "aCMSImcSmdLLNIhj" and FROM element "anon_2". Apply join condition(s) between each element to resolve.
return statement.compile(dialect=cls.dialect, **kwargs)
Traceback (most recent call last):
File "/Users/dmitry/src/audio_examples/tmp.py", line 18, in <module>
.persist()
^^^^^^^^^
File "/Users/dmitry/src/audio_examples/.audio/lib/python3.12/site-packages/datachain/lib/dc/datachain.py", line 534, in persist
query=self._query.save(project=project, feature_schema=schema)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/dmitry/src/audio_examples/.audio/lib/python3.12/site-packages/datachain/query/dataset.py", line 1802, in save
self.catalog.warehouse.copy_table(dr.get_table(), query.select())
File "/Users/dmitry/src/audio_examples/.audio/lib/python3.12/site-packages/datachain/data_storage/sqlite.py", line 794, in copy_table
ids = self.db.execute(select_ids).fetchall()
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/dmitry/src/audio_examples/.audio/lib/python3.12/site-packages/datachain/data_storage/sqlite.py", line 101, in wrapper
raise exc
File "/Users/dmitry/src/audio_examples/.audio/lib/python3.12/site-packages/datachain/data_storage/sqlite.py", line 97, in wrapper
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/Users/dmitry/src/audio_examples/.audio/lib/python3.12/site-packages/datachain/data_storage/sqlite.py", line 241, in execute
result = self.db.execute(*self.compile_to_args(query))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
sqlite3.OperationalError: no such table: aCMSImcSmdLLNIhj
Output Studio:
/tmp/local/datachain_venv/python3.12/default/lib/python3.12/site-packages/clickhouse_sqlalchemy/drivers/compilers/sqlcompiler.py:312: SAWarning: SELECT statement has a cartesian product between FROM element(s) "anon_2" and FROM element "YyEJafkdNIGfqJtN". Apply join condition(s) between each element to resolve.
from_linter.warn()
Traceback (most recent call last):
File "/tmp/local/datachain_venv/python3.12/default/lib/python3.12/site-packages/clickhouse_driver/dbapi/cursor.py", line 111, in execute
response = execute(
^^^^^^^^
File "/tmp/local/datachain_venv/python3.12/default/lib/python3.12/site-packages/clickhouse_driver/client.py", line 382, in execute
rv = self.process_ordinary_query(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/tmp/local/datachain_venv/python3.12/default/lib/python3.12/site-packages/clickhouse_driver/client.py", line 580, in process_ordinary_query
return self.receive_result(with_column_types=with_column_types,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/tmp/local/datachain_venv/python3.12/default/lib/python3.12/site-packages/clickhouse_driver/client.py", line 212, in receive_result
return result.get_result()
^^^^^^^^^^^^^^^^^^^
File "/tmp/local/datachain_venv/python3.12/default/lib/python3.12/site-packages/clickhouse_driver/result.py", line 50, in get_result
for packet in self.packet_generator:
^^^^^^^^^^^^^^^^^^^^^
File "/tmp/local/datachain_venv/python3.12/default/lib/python3.12/site-packages/clickhouse_driver/client.py", line 228, in packet_generator
packet = self.receive_packet()
^^^^^^^^^^^^^^^^^^^^^
File "/tmp/local/datachain_venv/python3.12/default/lib/python3.12/site-packages/clickhouse_driver/client.py", line 245, in receive_packet
raise packet.exception
clickhouse_driver.errors.ServerException: Code: 60.
DB::Exception: Table studio_production_db.YyEJafkdNIGfqJtN does not exist. Stack trace:
0. ./ci/tmp/build/./src/Common/Exception.cpp:112: DB::Exception::Exception(DB::Exception::MessageMasked&&, int, bool) @ 0x000000000d529e08
1. DB::Exception::Exception(PreformattedMessage&&, int) @ 0x0000000009182e3c
2. DB::Exception::Exception<String, String>(int, FormatStringHelperImpl<std::type_identity<String>::type, std::type_identity<String>::type>, String&&, String&&) @ 0x000000000918295c
Version Info
Metadata
Metadata
Assignees
Labels
bugSomething isn't workingSomething isn't working