Skip to content

Commit 2e97471

Browse files
authored
Merge pull request #1031 from koordinates/faster-diff-4
diff: Add --no-sort-keys option
2 parents 202407d + 1b9a7fe commit 2e97471

File tree

5 files changed

+22
-6
lines changed

5 files changed

+22
-6
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ _When adding new entries to the changelog, please include issue/PR numbers where
99

1010
- Much faster access to tabular/vector datasets (about 75% more features processed per second) by switching to [msgspec](https://jcristharif.com/msgspec/) - [#1025](https://github.com/koordinates/kart/pull/1025)
1111
- diff: Faster JSON-Lines output (also using msgspec)
12+
- diff: Added `--no-sort-keys` option to disable sorting of features by name/PK. This is a minor speed improvement.
1213
- Linux builds now require glibc 2.28+ [#1027](https://github.com/koordinates/kart/pull/1027) - This means minimum distro versions are:
1314
- Debian 10+
1415
- Ubuntu 18.10+

kart/base_diff_writer.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ def __init__(
9191
diff_estimate_accuracy=None,
9292
# used by html diff only
9393
html_template=None,
94+
sort_keys=True,
9495
):
9596
self.repo = repo
9697
self.commit_spec = commit_spec
@@ -134,6 +135,7 @@ def __init__(
134135
self.commit = None
135136
self.do_convert_to_dataset_format = None
136137
self.do_full_file_diffs = False
138+
self.sort_keys = sort_keys
137139

138140
def include_target_commit_as_header(self):
139141
"""
@@ -451,6 +453,11 @@ def get_file_diff(self):
451453
repo_key_filter=self.repo_key_filter,
452454
)
453455

456+
def iter_deltadiff_items(self, deltas):
457+
if self.sort_keys:
458+
return deltas.sorted_items()
459+
return deltas.items()
460+
454461
def filtered_dataset_deltas(self, ds_path, ds_diff):
455462
"""
456463
Yields the key, delta for only those deltas from the given dataset diff that match
@@ -462,7 +469,7 @@ def filtered_dataset_deltas(self, ds_path, ds_diff):
462469
if not item_type or item_type not in ds_diff:
463470
return
464471

465-
unfiltered_deltas = ds_diff[item_type].sorted_items()
472+
unfiltered_deltas = self.iter_deltadiff_items(ds_diff[item_type])
466473

467474
if self.spatial_filter.match_all:
468475
yield from unfiltered_deltas

kart/diff.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,12 @@ def feature_count_diff(
154154
help="Provide a user defined/specific html template for diff representation",
155155
type=click.Path(exists=True),
156156
)
157+
@click.option(
158+
"--sort-keys/--no-sort-keys",
159+
is_flag=True,
160+
default=True,
161+
help="Sort keys in the output. This is the default behaviour, but it can be disabled for a slight speed improvement.",
162+
)
157163
@click.argument(
158164
"args",
159165
metavar="[REVISIONS] [--] [FILTERS]",
@@ -175,6 +181,7 @@ def diff(
175181
diff_format,
176182
delta_filter,
177183
html_template,
184+
sort_keys,
178185
args,
179186
):
180187
"""
@@ -235,6 +242,7 @@ def diff(
235242
target_crs=crs,
236243
diff_estimate_accuracy=add_feature_count_estimate,
237244
html_template=html_template,
245+
sort_keys=sort_keys,
238246
)
239247
diff_writer.convert_to_dataset_format(convert_to_dataset_format)
240248
diff_writer.full_file_diffs(diff_files)

kart/json_diff_writers.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ def default(self, obj):
101101
if ds_path == FILES_KEY:
102102
return {
103103
key: self._postprocess_attachment_delta(value)
104-
for key, value in ds_diff[FILES_KEY].sorted_items()
104+
for key, value in self.iter_deltadiff_items(ds_diff[FILES_KEY])
105105
}
106106

107107
result = {}
@@ -348,7 +348,7 @@ def write_meta_deltas(self, ds_path, ds_diff):
348348
return
349349

350350
obj = {"type": "meta", "dataset": ds_path, "key": None, "change": None}
351-
for key, delta in ds_diff["meta"].sorted_items():
351+
for key, delta in self.iter_deltadiff_items(ds_diff["meta"]):
352352
obj["key"] = key
353353
obj["change"] = delta.to_plus_minus_dict()
354354
self.dump(obj)
@@ -382,7 +382,7 @@ def write_file_diff(self, file_diff):
382382
if not self.do_full_file_diffs:
383383
obj.pop("binary")
384384

385-
for key, delta in file_diff.sorted_items():
385+
for key, delta in self.iter_deltadiff_items(file_diff):
386386
obj["path"] = key
387387
if self.do_full_file_diffs:
388388
delta = self._full_file_delta(delta)

kart/text_diff_writer.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ def write_header(self):
6161

6262
def write_ds_diff(self, ds_path, ds_diff, diff_format=DiffFormat.FULL):
6363
if "meta" in ds_diff:
64-
for key, delta in ds_diff["meta"].sorted_items():
64+
for key, delta in self.iter_deltadiff_items(ds_diff["meta"]):
6565
self.write_meta_delta(ds_path, key, delta)
6666
if diff_format != DiffFormat.NO_DATA_CHANGES:
6767
item_type = self._get_old_or_new_dataset(ds_path).ITEM_TYPE
@@ -313,7 +313,7 @@ def _pair_items(cls, old_list, new_list):
313313
old_index += 1
314314

315315
def write_file_diff(self, file_diff):
316-
for key, delta in file_diff.sorted_items():
316+
for key, delta in self.iter_deltadiff_items(file_diff):
317317
self.write_file_delta(key, delta)
318318
return bool(file_diff)
319319

0 commit comments

Comments
 (0)