From 8bccbf57471a665ea201b964a2c3502e3dcda96f Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Mon, 26 Sep 2022 15:22:40 -0400 Subject: [PATCH] ls WiP: tried to add filtering and {}formating with alt names Example is quite neatly working for remote dandiset (git)lena:~/proj/dandi/dandi-cli-master[master]git $> dandi --pdb ls -F 'path,size,ID=={metadata[id]}' -f pyout -r --metadata assets dandi://dandi/000029@draft/ PATH SIZE ID 000029 20.7 MB DANDI:000029/draft sub-anm369964/sub-anm369964_behavior+ecephys.nwb 7.7 MB dandiasset:689d7c0c-d980-462e-9a56-3df87efc9658 sub-RAT123/sub-RAT123.nwb 18.8 kB dandiasset:86e09d7e-4355-4887-9c5a-7a137c046953 sub-anm369962/sub-anm369962_behavior+ecephys.nwb 6.6 MB dandiasset:4f6a8f1d-ca04-491f-a530-9ed00a909c21 sub-anm369963/sub-anm369963_behavior+ecephys.nwb 6.4 MB dandiasset:38f2024d-62a9-4c79-8a22-7a0ff34b331d Summary: 41.4 MB but for the local one with all the async fields we do not get it and crashes etc. --- dandi/cli/cmd_ls.py | 56 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 44 insertions(+), 12 deletions(-) diff --git a/dandi/cli/cmd_ls.py b/dandi/cli/cmd_ls.py index 62002044b..e65d9a96c 100644 --- a/dandi/cli/cmd_ls.py +++ b/dandi/cli/cmd_ls.py @@ -28,9 +28,10 @@ @click.option( "-F", "--fields", - help="Comma-separated list of fields to display. " - "An empty value to trigger a list of " - "available fields to be printed out", + help="Comma-separated list of fields to display. 'path' field is mandatory thus" + "will always be added. Field could provide an alternative name as " + "'ALTERNATIVE==ORIGINAL', and '{STRING}' will be considered to be a subject to" + "'str.format()' operation using all other fields.", ) @click.option( "-f", @@ -120,12 +121,12 @@ def ls( fields = fields.split(",") # Map possibly present short names back to full names fields = [PYOUT_SHORT_NAMES_rev.get(f.lower(), f) for f in fields] - unknown_fields = set(fields).difference(all_fields) - if unknown_fields: - display_known_fields(all_fields) - raise click.UsageError( - "Following fields are not known: %s" % ", ".join(unknown_fields) - ) + # unknown_fields = set(fields).difference(all_fields) + # if unknown_fields: + # display_known_fields(all_fields) + # raise click.UsageError( + # "Following fields are not known: %s" % ", ".join(unknown_fields) + # ) urls = map(is_url, paths) # Actually I do not see why and it could be useful to compare local-vs-remote @@ -159,11 +160,15 @@ def assets_gen(): if format == "auto": format = "yaml" if any(urls) or (len(paths) == 1 and not recursive) else "pyout" + field_names = {f.split("==", 1)[0]: f for f in fields} + if len(field_names) != len(fields): + raise ValueError("non unique names detected") + if format == "pyout": if fields and fields[0] != "path": # we must always have path - our "id" fields = ["path"] + fields - out = PYOUTFormatter(fields=fields, wait_for_top=3, max_workers=jobs) + out = PYOUTFormatter(fields=field_names, wait_for_top=3, max_workers=jobs) elif format == "json": out = JSONFormatter() elif format == "json_pp": @@ -177,7 +182,7 @@ def assets_gen(): async_keys = set(all_fields) if fields is not None: - async_keys = async_keys.intersection(fields) + async_keys = async_keys.intersection(field_names) async_keys = tuple(async_keys.difference(common_fields)) errors = defaultdict(list) # problem: [] paths @@ -233,7 +238,34 @@ def assets_gen(): errors["Empty record"].append(asset) lgr.debug("Skipping a record for %s since empty", asset) continue - out(rec) + if fields: + # get it flattened out and only the ones requested + rec_display = {} + for f in fields: + f_name = f + # could be alt_name==field + if "==" in f: + f_name, f = f.split("==", 1) + if f in rec: + # as is, nothing fancy + f_value = rec[f] + elif f.startswith("{") and f.endswith("}"): + # # it is a str.format, strip {} for display + # if f_name.startswith('{'): + # f_name = f_name + try: + # TODO: this all doesn't work on those fields which are "async" + # i.e. loadded delayed in a thread within pyout upon a callback + f_value = f.format(**rec) + except Exception: + lgr.error("Cannot str.format %r using %r", f, rec) + f_value = "ERROR" + else: + f_value = "N/A" + rec_display[f_name] = f_value + else: + rec_display = rec + out(rec_display) if errors: lgr.warning( "Failed to operate on some paths (empty records were listed):\n %s",