Skip to content

Commit 2babe3d

Browse files
authored
Fix: duplicates on migrations (#862)
1 parent 721e310 commit 2babe3d

File tree

5 files changed

+15
-23
lines changed

5 files changed

+15
-23
lines changed

sqlmesh/core/model/seed.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,7 @@ def columns_to_types(self) -> t.Dict[str, exp.DataType]:
2929
def column_hashes(self) -> t.Dict[str, str]:
3030
df = self._get_df()
3131
return {
32-
column_name: str( # type: ignore
33-
zlib.crc32(df[column_name].to_string(index=False).encode("utf-8")) # type: ignore
34-
)
32+
column_name: str(zlib.crc32(df[column_name].to_json().encode("utf-8")))
3533
for column_name in df.columns
3634
}
3735

sqlmesh/core/state_sync/engine_adapter.py

+4-15
Original file line numberDiff line numberDiff line change
@@ -449,7 +449,6 @@ def _migrate_rows(self) -> None:
449449
environments = self.get_environments()
450450

451451
snapshot_mapping = {}
452-
cache: t.Dict[SnapshotId, t.Dict] = {}
453452

454453
for snapshot in all_snapshots.values():
455454
seen = set()
@@ -473,18 +472,9 @@ def _migrate_rows(self) -> None:
473472
continue
474473

475474
queue.update(s.parents)
476-
cached_env = cache.get(snapshot_id)
477-
478-
if cached_env:
479-
models.update(cached_env["models"])
480-
audits.update(cached_env["audits"])
481-
else:
482-
models[s.name] = s.model
483-
484-
for audit in s.audits:
485-
audits[audit.name] = audit
486-
487-
cache[snapshot_id] = env
475+
models[s.name] = s.model
476+
for audit in s.audits:
477+
audits[audit.name] = audit
488478

489479
new_snapshot = deepcopy(snapshot)
490480

@@ -496,7 +486,6 @@ def _migrate_rows(self) -> None:
496486
models=models,
497487
audits=audits,
498488
)
499-
500489
new_snapshot.parents = tuple(
501490
SnapshotId(
502491
name=name,
@@ -560,7 +549,7 @@ def map_data_versions(
560549
}
561550

562551
self.delete_snapshots(snapshot_mapping)
563-
self._push_snapshots(snapshot_mapping.values())
552+
self._push_snapshots(set(snapshot_mapping.values()), overwrite=True)
564553

565554
updated_environments = []
566555

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
"""Seed hashes moved from to_string to to_json for performance."""
2+
3+
4+
def migrate(state_sync): # type: ignore
5+
pass

tests/core/test_seed.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,9 @@ def test_column_hashes():
4343
"""
4444
seed = Seed(content=content)
4545
assert seed.column_hashes == {
46-
"key": "1460551213",
47-
"value": "3337736980",
48-
"ds": "1377719241",
46+
"key": "122302783",
47+
"value": "1969959181",
48+
"ds": "725407375",
4949
}
5050

5151
content_column_changed = """key,value,ds
@@ -57,5 +57,5 @@ def test_column_hashes():
5757
seed_column_changed = Seed(content=content_column_changed)
5858
assert seed_column_changed.column_hashes == {
5959
**seed.column_hashes,
60-
"ds": "646363718",
60+
"ds": "3396890652",
6161
}

tests/core/test_snapshot.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -399,7 +399,7 @@ def test_fingerprint_seed_model():
399399
)
400400

401401
expected_fingerprint = SnapshotFingerprint(
402-
data_hash="4143164165",
402+
data_hash="4076055159",
403403
metadata_hash="1120323454",
404404
)
405405

0 commit comments

Comments
 (0)