Skip to content

Commit 514f13b

Browse files
authored
feat(cli): add JSON output format to 'renku dataset ls' and 'renku dataset ls-files' (#2084)
1 parent 50bb67b commit 514f13b

File tree

6 files changed

+96
-0
lines changed

6 files changed

+96
-0
lines changed

renku/cli/dataset.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,9 @@
8080
8181
Displayed results are sorted based on the value of the first column.
8282
83+
You can specify output formats by passing ``--format`` with a value of ``tabular``,
84+
``json-ld`` or ``json``.
85+
8386
To inspect the state of the dataset on a given commit we can use ``--revision``
8487
flag for it:
8588
@@ -352,6 +355,9 @@
352355
353356
Displayed results are sorted based on the value of the first column.
354357
358+
You can specify output formats by passing ``--format`` with a value of ``tabular``,
359+
``json-ld`` or ``json``.
360+
355361
Sometimes you want to filter the files. For this we use ``--dataset``,
356362
``--include`` and ``--exclude`` flags:
357363

renku/core/commands/dataset.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,7 @@ def _list_files(client, datasets=None, creators=None, include=None, exclude=None
273273
for record in records:
274274
record.title = record.dataset.title
275275
record.dataset_name = record.dataset.name
276+
record.dataset_id = record.dataset._id
276277
record.creators_csv = record.dataset.creators_csv
277278
record.creators_full_csv = record.dataset.creators_full_csv
278279

renku/core/commands/format/dataset_files.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121

2222
from humanize import naturalsize
2323

24+
from renku.core.models.datasets import DatasetFileDetailsJson
25+
2426
from .tabulate import tabulate
2527

2628

@@ -108,9 +110,28 @@ def jsonld(client, records, **kwargs):
108110
return dumps(data, indent=2)
109111

110112

113+
def json(client, records, **kwargs):
114+
"""Format dataset files as JSON.
115+
116+
:param client: LocalClient instance.
117+
:param records: Filtered collection.
118+
"""
119+
from renku.core.models.json import dumps
120+
121+
_get_lfs_file_sizes(client, records)
122+
_get_lfs_tracking(client, records)
123+
124+
for record in records:
125+
record.creators = record.dataset.creators
126+
127+
data = [DatasetFileDetailsJson().dump(record) for record in records]
128+
return dumps(data, indent=2)
129+
130+
111131
DATASET_FILES_FORMATS = {
112132
"tabular": tabular,
113133
"json-ld": jsonld,
134+
"json": json,
114135
}
115136
"""Valid formatting options."""
116137

renku/core/commands/format/datasets.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
"""Serializers for datasets."""
1919
import textwrap
2020

21+
from renku.core.models.datasets import DatasetDetailsJson
2122
from renku.core.models.json import dumps
2223

2324
from .tabulate import tabulate
@@ -45,9 +46,16 @@ def jsonld(client, datasets, **kwargs):
4546
return dumps(data, indent=2)
4647

4748

49+
def json(client, datasets, **kwargs):
50+
"""Format datasets as JSON."""
51+
data = [DatasetDetailsJson().dump(dataset) for dataset in datasets]
52+
return dumps(data, indent=2)
53+
54+
4855
DATASETS_FORMATS = {
4956
"tabular": tabular,
5057
"json-ld": jsonld,
58+
"json": json,
5159
}
5260
"""Valid formatting options."""
5361

renku/core/models/datasets.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -992,3 +992,19 @@ class DatasetDetailsJson(marshmallow.Schema):
992992
description = marshmallow.fields.String()
993993
keywords = marshmallow.fields.List(marshmallow.fields.String())
994994
identifier = marshmallow.fields.String()
995+
996+
997+
class DatasetFileDetailsJson(marshmallow.Schema):
998+
"""Serialize dataset files to a response object."""
999+
1000+
path = marshmallow.fields.String()
1001+
created = marshmallow.fields.DateTime()
1002+
added = marshmallow.fields.DateTime()
1003+
1004+
size = marshmallow.fields.String()
1005+
is_lfs = marshmallow.fields.Boolean()
1006+
1007+
dataset_id = marshmallow.fields.String()
1008+
dataset_name = marshmallow.fields.String()
1009+
1010+
creators = marshmallow.fields.List(marshmallow.fields.Nested(DatasetCreatorsJson))

tests/cli/test_datasets.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -746,6 +746,50 @@ def test_datasets_ls_files_lfs(tmpdir, large_file, runner, project):
746746
assert file2_entry.endswith("*")
747747

748748

749+
def test_datasets_ls_files_json(tmpdir, large_file, runner, project):
750+
"""Test file listing lfs status."""
751+
# NOTE: create a dataset
752+
result = runner.invoke(cli, ["dataset", "create", "my-dataset"])
753+
assert 0 == result.exit_code
754+
assert "OK" in result.output
755+
756+
# NOTE: create some data
757+
paths = []
758+
759+
new_file = tmpdir.join("file_1")
760+
new_file.write(str(1))
761+
paths.append(str(new_file))
762+
763+
paths.append(str(large_file))
764+
765+
# NOTE: add data to dataset
766+
result = runner.invoke(cli, ["dataset", "add", "my-dataset"] + paths, catch_exceptions=False,)
767+
assert 0 == result.exit_code
768+
769+
# NOTE: check files
770+
result = runner.invoke(cli, ["dataset", "ls-files", "--format", "json"])
771+
assert 0 == result.exit_code
772+
773+
result = json.loads(result.output)
774+
775+
assert len(result) == 2
776+
file1 = next((f for f in result if f["path"].endswith("file_1")))
777+
file2 = next((f for f in result if f["path"].endswith(large_file.name)))
778+
779+
assert not file1["is_lfs"]
780+
assert file2["is_lfs"]
781+
782+
assert file1["creators"]
783+
assert file1["size"]
784+
assert file1["dataset_name"]
785+
assert file1["dataset_id"]
786+
787+
assert file2["creators"]
788+
assert file2["size"]
789+
assert file2["dataset_name"]
790+
assert file2["dataset_id"]
791+
792+
749793
@pytest.mark.parametrize("column", DATASET_FILES_COLUMNS.keys())
750794
def test_datasets_ls_files_columns_correctly(runner, project, column, directory_tree):
751795
"""Test file listing only shows requested columns."""

0 commit comments

Comments
 (0)