Skip to content

Commit

Permalink
Count only for the first hit for subject or tissue within filename
Browse files Browse the repository at this point in the history
Also added assertion so we do not count incorrectly. But may be should
be just a warning?

Closes #172
  • Loading branch information
yarikoptic committed Apr 6, 2023
1 parent b310e3e commit fecfb26
Showing 1 changed file with 11 additions and 5 deletions.
16 changes: 11 additions & 5 deletions dandischema/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,13 +311,17 @@ def _add_asset_to_stats(assetmeta: Dict[str, Any], stats: _stats_type) -> None:
stats = _get_samples(value, stats, hierarchy)
break

# which components already found, so we do not count more than
# once in some incorrectly named datasets
found = {}
for part in Path(assetmeta["path"]).name.split(".")[0].split("_"):
if part.startswith("sub-"):
subject = part.replace("sub-", "")
if found.get("subject") and part.startswith("sub-"):
found["subject"] = subject = part.split("sub-", 1)[1]
if subject not in stats["subjects"]:
stats["subjects"].append(subject)
if part.startswith("sample-"):
sample = part.replace("sample-", "")
found.add("subject")
if not found.get("sample") and part.startswith("sample-"):
found["sample"] = sample = part.replace("sample-", "")
if sample not in stats["tissuesample"]:
stats["tissuesample"].append(sample)

Expand All @@ -338,10 +342,12 @@ def aggregate_assets_summary(metadata: Iterable[Dict[str, Any]]) -> dict:
stats: _stats_type = {}
for meta in metadata:
_add_asset_to_stats(meta, stats)

stats["numberOfBytes"] = stats.get("numberOfBytes", 0)
stats["numberOfFiles"] = stats.get("numberOfFiles", 0)
stats["numberOfSubjects"] = len(stats.pop("subjects", [])) or None
if stats["numberOfFiles"]:
# Must not happen. If does -- a bug in software
assert stats["numberOfSubjects"] <= stats["numberOfFiles"]
stats["numberOfSamples"] = (
len(stats.pop("tissuesample", [])) + len(stats.pop("slice", []))
) or None
Expand Down

0 comments on commit fecfb26

Please sign in to comment.