feat(cli): add JSON output format to 'renku dataset ls' and 'renku dataset ls-files' (#2084)

Panaetius · web-flow · commit 514f13b1f34d · 2021-05-17T09:50:51.000+02:00
diff --git a/renku/cli/dataset.py b/renku/cli/dataset.py
@@ -80,6 +80,9 @@
 
 Displayed results are sorted based on the value of the first column.
 
+You can specify output formats by passing ``--format`` with a value of ``tabular``,
+``json-ld`` or ``json``.
+
 To inspect the state of the dataset on a given commit we can use ``--revision``
 flag for it:
 
@@ -352,6 +355,9 @@
 
 Displayed results are sorted based on the value of the first column.
 
+You can specify output formats by passing ``--format`` with a value of ``tabular``,
+``json-ld`` or ``json``.
+
 Sometimes you want to filter the files. For this we use ``--dataset``,
 ``--include`` and ``--exclude`` flags:
 
diff --git a/renku/core/commands/dataset.py b/renku/core/commands/dataset.py
@@ -273,6 +273,7 @@ def _list_files(client, datasets=None, creators=None, include=None, exclude=None
     for record in records:
         record.title = record.dataset.title
         record.dataset_name = record.dataset.name
+        record.dataset_id = record.dataset._id
         record.creators_csv = record.dataset.creators_csv
         record.creators_full_csv = record.dataset.creators_full_csv
 
diff --git a/renku/core/commands/format/dataset_files.py b/renku/core/commands/format/dataset_files.py
@@ -21,6 +21,8 @@
 
 from humanize import naturalsize
 
+from renku.core.models.datasets import DatasetFileDetailsJson
+
 from .tabulate import tabulate
 
 
@@ -108,9 +110,28 @@ def jsonld(client, records, **kwargs):
     return dumps(data, indent=2)
 
 
+def json(client, records, **kwargs):
+    """Format dataset files as JSON.
+
+    :param client: LocalClient instance.
+    :param records: Filtered collection.
+    """
+    from renku.core.models.json import dumps
+
+    _get_lfs_file_sizes(client, records)
+    _get_lfs_tracking(client, records)
+
+    for record in records:
+        record.creators = record.dataset.creators
+
+    data = [DatasetFileDetailsJson().dump(record) for record in records]
+    return dumps(data, indent=2)
+
+
 DATASET_FILES_FORMATS = {
     "tabular": tabular,
     "json-ld": jsonld,
+    "json": json,
 }
 """Valid formatting options."""
 
diff --git a/renku/core/commands/format/datasets.py b/renku/core/commands/format/datasets.py
@@ -18,6 +18,7 @@
 """Serializers for datasets."""
 import textwrap
 
+from renku.core.models.datasets import DatasetDetailsJson
 from renku.core.models.json import dumps
 
 from .tabulate import tabulate
@@ -45,9 +46,16 @@ def jsonld(client, datasets, **kwargs):
     return dumps(data, indent=2)
 
 
+def json(client, datasets, **kwargs):
+    """Format datasets as JSON."""
+    data = [DatasetDetailsJson().dump(dataset) for dataset in datasets]
+    return dumps(data, indent=2)
+
+
 DATASETS_FORMATS = {
     "tabular": tabular,
     "json-ld": jsonld,
+    "json": json,
 }
 """Valid formatting options."""
 
diff --git a/renku/core/models/datasets.py b/renku/core/models/datasets.py
@@ -992,3 +992,19 @@ class DatasetDetailsJson(marshmallow.Schema):
     description = marshmallow.fields.String()
     keywords = marshmallow.fields.List(marshmallow.fields.String())
     identifier = marshmallow.fields.String()
+
+
+class DatasetFileDetailsJson(marshmallow.Schema):
+    """Serialize dataset files to a response object."""
+
+    path = marshmallow.fields.String()
+    created = marshmallow.fields.DateTime()
+    added = marshmallow.fields.DateTime()
+
+    size = marshmallow.fields.String()
+    is_lfs = marshmallow.fields.Boolean()
+
+    dataset_id = marshmallow.fields.String()
+    dataset_name = marshmallow.fields.String()
+
+    creators = marshmallow.fields.List(marshmallow.fields.Nested(DatasetCreatorsJson))
diff --git a/tests/cli/test_datasets.py b/tests/cli/test_datasets.py
@@ -746,6 +746,50 @@ def test_datasets_ls_files_lfs(tmpdir, large_file, runner, project):
     assert file2_entry.endswith("*")
 
 
+def test_datasets_ls_files_json(tmpdir, large_file, runner, project):
+    """Test file listing lfs status."""
+    # NOTE: create a dataset
+    result = runner.invoke(cli, ["dataset", "create", "my-dataset"])
+    assert 0 == result.exit_code
+    assert "OK" in result.output
+
+    # NOTE: create some data
+    paths = []
+
+    new_file = tmpdir.join("file_1")
+    new_file.write(str(1))
+    paths.append(str(new_file))
+
+    paths.append(str(large_file))
+
+    # NOTE: add data to dataset
+    result = runner.invoke(cli, ["dataset", "add", "my-dataset"] + paths, catch_exceptions=False,)
+    assert 0 == result.exit_code
+
+    # NOTE: check files
+    result = runner.invoke(cli, ["dataset", "ls-files", "--format", "json"])
+    assert 0 == result.exit_code
+
+    result = json.loads(result.output)
+
+    assert len(result) == 2
+    file1 = next((f for f in result if f["path"].endswith("file_1")))
+    file2 = next((f for f in result if f["path"].endswith(large_file.name)))
+
+    assert not file1["is_lfs"]
+    assert file2["is_lfs"]
+
+    assert file1["creators"]
+    assert file1["size"]
+    assert file1["dataset_name"]
+    assert file1["dataset_id"]
+
+    assert file2["creators"]
+    assert file2["size"]
+    assert file2["dataset_name"]
+    assert file2["dataset_id"]
+
+
 @pytest.mark.parametrize("column", DATASET_FILES_COLUMNS.keys())
 def test_datasets_ls_files_columns_correctly(runner, project, column, directory_tree):
     """Test file listing only shows requested columns."""