Skip to content

Commit 7fc1b2e

Browse files
committed
Encode additional parameters in Component.data and store in 1 table
1 parent 72cbed1 commit 7fc1b2e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

57 files changed

+785
-727
lines changed

.github/workflows/ci_code.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ jobs:
103103
104104
- name: Unit Testing
105105
run: |
106+
sqlite3 test.db "create table t(f int); drop table t;"
106107
make unit_testing pytest_arguments="--cov=superduper --cov-report=xml" SUPERDUPER_CONFIG=test/configs/${{ matrix.config }}
107108
108109
- name: Usecase Testing

.github/workflows/ci_plugins.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,9 @@ jobs:
9595
- name: Plugin Testing
9696
run: |
9797
export PYTHONPATH=./
98+
if [ "${{ matrix.plugin }}" = "sql" ]; then
99+
sqlite3 test.db "create table t(f int); drop table t;"
100+
fi
98101
if [ -d "plugins/${{ matrix.plugin }}/plugin_test" ]; then
99102
pytest --cov=superduper --cov-report=xml plugins/${{ matrix.plugin }}/plugin_test
100103
else
@@ -103,6 +106,9 @@ jobs:
103106
104107
- name: Optionally run the base testing
105108
run: |
109+
if [ "${{ matrix.plugin }}" = "sql" ]; then
110+
sqlite3 test.db "create table t(f int); drop table t;"
111+
fi
106112
SUPERDUPER_CONFIG="plugins/${{ matrix.plugin }}/plugin_test/config.yaml"
107113
if [ -f "$SUPERDUPER_CONFIG" ]; then
108114
echo "Running the base testing..."

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1616
- Add assertion to verify directory copy in FileSystemArtifactStore
1717
- Batch the Qdrant requests and add a retry to the config of Qdrant
1818
- Add use_component_cache to config
19+
- Save data in the `Component` table instead of in individual tables
1920

2021
### Bug fixes
2122

plugins/mongodb/superduper_mongodb/data_backend.py

Lines changed: 24 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from bson.objectid import ObjectId
77
from superduper import CFG, logging
88
from superduper.backends.base.data_backend import BaseDataBackend
9+
from superduper.base.base import REGISTRY
910
from superduper.base.query import Query
1011
from superduper.base.schema import Schema
1112

@@ -171,24 +172,29 @@ def missing_outputs(self, query, predict_id: str):
171172
def select(self, query: Query):
172173
"""Select data from the table."""
173174
if query.decomposition.outputs:
174-
return self._outputs(query)
175-
176-
collection = self._database[query.table]
177-
178-
logging.debug(str(query))
179-
180-
limit = self._get_limit(query)
181-
if limit:
182-
native_query = collection.find(
183-
self._mongo_filter(query), self._get_project(query)
184-
).limit(limit)
185-
if skip := self._get_skip(query):
186-
native_query = native_query.skip(skip)
187-
return list(native_query)
188-
189-
return list(
190-
collection.find(self._mongo_filter(query), self._get_project(query))
191-
)
175+
output = self._outputs(query)
176+
else:
177+
collection = self._database[query.table]
178+
179+
logging.debug(str(query))
180+
181+
limit = self._get_limit(query)
182+
if limit:
183+
native_query = collection.find(
184+
self._mongo_filter(query), self._get_project(query)
185+
).limit(limit)
186+
if skip := self._get_skip(query):
187+
native_query = native_query.skip(skip)
188+
output = list(native_query)
189+
else:
190+
output = list(
191+
collection.find(self._mongo_filter(query), self._get_project(query))
192+
)
193+
if query.table in REGISTRY and REGISTRY[query.table].primary_id != '_id':
194+
for o in output:
195+
if '_id' in o:
196+
del o['_id']
197+
return output
192198

193199
def to_id(self, id):
194200
"""Convert the ID to the correct format."""

plugins/qdrant/superduper_qdrant/qdrant.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,6 @@ def _do_scroll(offset):
160160

161161
return ids
162162

163-
164163
def _create_collection(self):
165164
measure = (
166165
self.measure.name

plugins/sql/plugin_test/config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
data_backend: sqlite://
1+
data_backend: sqlite://./test.db
22
auto_schema: false
33
force_apply: true
44
json_native: false

plugins/sql/plugin_test/test_query.py

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import typing as t
12
from test.utils.setup.fake_data import add_listeners, add_models, add_random_data
23

34
import numpy as np
@@ -44,7 +45,7 @@ def test_renamings(db):
4445
add_random_data(db, n=5)
4546
add_models(db)
4647
add_listeners(db)
47-
t = db["documents"]
48+
t = db["documentz"]
4849
listener_uuid = [db.load('Listener', k).outputs for k in db.show("Listener")][0]
4950
q = t.select("id", "x", "y").outputs(listener_uuid.split('__', 1)[-1])
5051
data = q.execute()
@@ -62,13 +63,13 @@ def test_serialize_query(db):
6263

6364
def test_get_data(db):
6465
add_random_data(db, n=5)
65-
db["documents"].limit(2)
66-
db.metadata.get_component("Table", "documents")
66+
db["documentz"].limit(2)
67+
db.metadata.get_component("Table", "documentz")
6768

6869

6970
def test_insert_select(db):
7071
add_random_data(db, n=5)
71-
q = db["documents"].select("id", "x", "y").limit(2)
72+
q = db["documentz"].select("id", "x", "y").limit(2)
7273
r = q.execute()
7374

7475
assert len(r) == 2
@@ -77,7 +78,7 @@ def test_insert_select(db):
7778

7879
def test_filter(db):
7980
add_random_data(db, n=5)
80-
t = db["documents"]
81+
t = db["documentz"]
8182
q = t.select("id", "y")
8283
r = q.execute()
8384
ys = [x["y"] for x in r]
@@ -88,17 +89,18 @@ def test_filter(db):
8889
assert len(r) == uq[1][0]
8990

9091

91-
class documents(Base):
92+
class documents_plugin(Base):
93+
primary_id: t.ClassVar[str] = 'id'
9294
this: 'str'
9395

9496

9597
def test_select_using_ids(db):
96-
db.create(documents)
98+
db.create(documents_plugin)
9799

98-
table = db["documents"]
100+
table = db["documents_plugin"]
99101
table.insert([{"this": f"is a test {i}", "id": str(i)} for i in range(4)])
100102

101-
basic_select = db['documents'].select()
103+
basic_select = db['documents_plugin'].select()
102104

103105
assert len(basic_select.execute()) == 4
104106
assert len(basic_select.subset(['1', '2'])) == 2
@@ -112,16 +114,16 @@ def my_func(this: str):
112114

113115
my_func = ObjectModel('my_func', object=my_func)
114116

115-
db.create(documents)
117+
db.create(documents_plugin)
116118

117-
table = db["documents"]
119+
table = db["documents_plugin"]
118120
table.insert([{"this": f"is a test {i}", "id": str(i)} for i in range(4)])
119121

120122
listener = Listener(
121123
'test',
122124
model=my_func,
123125
key='this',
124-
select=db['documents'].select(),
126+
select=db['documents_plugin'].select(),
125127
)
126128
db.apply(listener)
127129

plugins/sql/superduper_sql/data_backend.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -359,7 +359,10 @@ class SQLDatabackend(IbisDataBackend):
359359

360360
def __init__(self, uri, plugin, flavour=None):
361361
super().__init__(uri, plugin, flavour)
362-
self._create_sqlalchemy_engine()
362+
if 'sqlite://./' in uri:
363+
self._create_sqlalchemy_engine(uri.replace('./', '//'))
364+
else:
365+
self._create_sqlalchemy_engine(uri)
363366
self.sm = sessionmaker(bind=self.alchemy_engine)
364367

365368
@property
@@ -374,6 +377,8 @@ def update(self, table, condition, key, value):
374377
with self.sm() as session:
375378
metadata = MetaData()
376379

380+
assert table in self.list_tables()
381+
377382
metadata.reflect(bind=session.bind)
378383
table = Table(table, metadata, autoload_with=session.bind)
379384

@@ -422,16 +427,16 @@ def delete(self, table, condition):
422427
except NoSuchTableError:
423428
raise exceptions.NotFound("Table", table)
424429

425-
def _create_sqlalchemy_engine(self):
430+
def _create_sqlalchemy_engine(self, uri):
426431
with self.connection_manager.get_connection() as conn:
427-
self.alchemy_engine = create_engine(self.uri, creator=lambda: conn.con)
432+
self.alchemy_engine = create_engine(uri, creator=lambda: conn.con)
428433
if not self._test_engine():
429434
logging.warn(
430435
"Unable to reuse the ibis connection "
431436
"to create the SQLAlchemy engine. "
432437
"Creating a new connection with the URI."
433438
)
434-
self.alchemy_engine = create_engine(self.uri)
439+
self.alchemy_engine = create_engine(uri)
435440

436441
def _test_engine(self):
437442
"""Test the engine."""

0 commit comments

Comments
 (0)