From 36a121e47bafbba5bfa5d7a4cb71fd2cb4c51ce9 Mon Sep 17 00:00:00 2001
From: sanbrock <sandor.brockhauser@gmail.com>
Date: Fri, 10 Jan 2025 22:38:08 +0100
Subject: [PATCH 01/31] use pynxtools.nomad.schema.Root

---
 src/pynxtools/nomad/entrypoints.py | 22 ++++++++++----
 src/pynxtools/nomad/parser.py      | 49 ++++++++++++++++--------------
 src/pynxtools/nomad/schema.py      |  2 +-
 3 files changed, 44 insertions(+), 29 deletions(-)

diff --git a/src/pynxtools/nomad/entrypoints.py b/src/pynxtools/nomad/entrypoints.py
index dfd957a8f..66b3cb843 100644
--- a/src/pynxtools/nomad/entrypoints.py
+++ b/src/pynxtools/nomad/entrypoints.py
@@ -76,7 +76,7 @@ def load(self):
     SearchQuantities,
 )
 
-schema = "pynxtools.nomad.schema.NeXus"
+schema = "pynxtools.nomad.schema.Root"
 
 nexus_app = AppEntryPoint(
     name="NexusApp",
@@ -105,17 +105,17 @@ def load(self):
             Column(quantity=f"entry_type", selected=True),
             Column(
                 title="definition",
-                quantity=f"data.*.ENTRY[*].definition__field#{schema}",
+                quantity=f"data.ENTRY[*].definition__field#{schema}",
                 selected=True,
             ),
             Column(
                 title="start_time",
-                quantity=f"data.*.ENTRY[*].start_time__field#{schema}",
+                quantity=f"data.ENTRY[*].start_time__field#{schema}",
                 selected=True,
             ),
             Column(
                 title="title",
-                quantity=f"data.*.ENTRY[*].title__field#{schema}",
+                quantity=f"data.ENTRY[*].title__field#{schema}",
                 selected=True,
             ),
         ],
@@ -161,8 +161,8 @@ def load(self):
                     "autorange": True,
                     "nbins": 30,
                     "scale": "linear",
-                    "quantity": f"data.Root.datetime#{schema}",
-                    "title": "Procesing Time",
+                    "quantity": f"data.ENTRY.start_time__field#{schema}",
+                    "title": "Start Time",
                     "layout": {
                         "lg": {"minH": 3, "minW": 3, "h": 4, "w": 12, "y": 0, "x": 0}
                     },
@@ -177,6 +177,16 @@ def load(self):
                         "lg": {"minH": 3, "minW": 3, "h": 8, "w": 4, "y": 0, "x": 12}
                     },
                 },
+                {
+                    "type": "terms",
+                    "show_input": False,
+                    "scale": "linear",
+                    "quantity": f"data.ENTRY.definition__field#{schema}",
+                    "title": "Definition",
+                    "layout": {
+                        "lg": {"minH": 3, "minW": 3, "h": 8, "w": 4, "y": 0, "x": 16}
+                    },
+                },
                 {
                     "type": "periodic_table",
                     "scale": "linear",
diff --git a/src/pynxtools/nomad/parser.py b/src/pynxtools/nomad/parser.py
index 2fea9afde..00ec67b61 100644
--- a/src/pynxtools/nomad/parser.py
+++ b/src/pynxtools/nomad/parser.py
@@ -60,6 +60,7 @@ def _to_section(
     nx_def: str,
     nx_node: Optional[ET.Element],
     current: MSection,
+    nx_root,
 ) -> MSection:
     """
     Args:
@@ -105,7 +106,17 @@ def _to_section(
             new_section = section
             break
 
-    if new_section is None:
+    if new_section is not None:
+        return new_section
+    if current == nx_root:
+        cls = getattr(nexus_schema, nx_def, None)
+        sec = cls()
+        new_def_spec = sec.m_def.all_sub_sections[nomad_def_name]
+        sec.m_create(new_def_spec.section_def.section_cls)
+        new_section = sec.m_get_sub_section(new_def_spec, -1)
+        current.ENTRY.append(new_section)
+        new_section.__dict__["nx_name"] = hdf_name
+    else:
         current.m_create(new_def.section_def.section_cls)
         new_section = current.m_get_sub_section(new_def, -1)
         new_section.__dict__["nx_name"] = hdf_name
@@ -194,7 +205,7 @@ def _populate_data(
                         # so values of non-scalar attribute will not end up in metainfo!
 
                 attr_name = attr_name + "__attribute"
-                current = _to_section(attr_name, nx_def, nx_attr, current)
+                current = _to_section(attr_name, nx_def, nx_attr, current, self.nx_root)
 
                 try:
                     if nx_root or nx_parent.tag.endswith("group"):
@@ -332,12 +343,13 @@ def __nexus_populate(self, params: dict, attr=None):  # pylint: disable=W0613
         if nx_path is None or nx_path == "/":
             return
 
-        current: MSection = _to_section(None, nx_def, None, self.nx_root)
+        # current: MSection = _to_section(None, nx_def, None, self.nx_root)
+        current = self.nx_root
         depth: int = 1
         current_hdf_path = ""
         for name in hdf_path.split("/")[1:]:
             nx_node = nx_path[depth] if depth < len(nx_path) else name
-            current = _to_section(name, nx_def, nx_node, current)
+            current = _to_section(name, nx_def, nx_node, current, self.nx_root)
             self._collect_class(current)
             depth += 1
             if depth < len(nx_path):
@@ -468,7 +480,7 @@ def parse(
         child_archives: Dict[str, EntryArchive] = None,
     ) -> None:
         self.archive = archive
-        self.nx_root = nexus_schema.NeXus()  # type: ignore # pylint: disable=no-member
+        self.nx_root = nexus_schema.Root()  # type: ignore # pylint: disable=no-member
 
         self.archive.data = self.nx_root
         self._logger = logger if logger else get_logger(__name__)
@@ -483,25 +495,18 @@ def parse(
             archive.metadata = EntryMetadata()
 
         # Normalise experiment type
-        app_defs = str(self.nx_root).split("(")[1].split(")")[0].split(",")
-        app_def_list = []
-        for app_elem in app_defs:
-            app = app_elem.lstrip()
-            try:
-                app_sec = getattr(self.nx_root, app)
+        # app_defs = str(self.nx_root).split("(")[1].split(")")[0].split(",")
+        app_def_list = set()
+        try:
+            app_entries = getattr(self.nx_root, "ENTRY")
+            for entry in app_entries:
                 try:
-                    app_entry = getattr(app_sec, "ENTRY")
-                    if len(app_entry) < 1:
-                        raise AttributeError()
+                    app = entry.definition__field
+                    app_def_list.add(rename_nx_for_nomad(app) if app else "Generic")
                 except (AttributeError, TypeError):
-                    app_entry = getattr(app_sec, "entry")
-                    if len(app_entry) < 1:
-                        raise AttributeError()
-                app_def_list.append(
-                    app if app != rename_nx_for_nomad("NXroot") else "Generic"
-                )
-            except (AttributeError, TypeError):
-                pass
+                    pass
+        except (AttributeError, TypeError):
+            pass
         if len(app_def_list) == 0:
             app_def = "Experiment"
         else:
diff --git a/src/pynxtools/nomad/schema.py b/src/pynxtools/nomad/schema.py
index dc19f8f14..ed8c8272f 100644
--- a/src/pynxtools/nomad/schema.py
+++ b/src/pynxtools/nomad/schema.py
@@ -113,7 +113,7 @@
 }
 
 
-class NexusMeasurement(Measurement):
+class NexusMeasurement(Measurement, Schema):
     def normalize(self, archive, logger):
         try:
             app_entry = getattr(self, "ENTRY")

From c1bc7d57c11344281dcb5fb93f4d517209922e3a Mon Sep 17 00:00:00 2001
From: sanbrock <sandor.brockhauser@gmail.com>
Date: Fri, 17 Jan 2025 22:28:43 +0100
Subject: [PATCH 02/31] not using inner sections

---
 src/pynxtools/nomad/schema.py | 48 +++++++++++++++++++++--------------
 1 file changed, 29 insertions(+), 19 deletions(-)

diff --git a/src/pynxtools/nomad/schema.py b/src/pynxtools/nomad/schema.py
index ed8c8272f..e18c12769 100644
--- a/src/pynxtools/nomad/schema.py
+++ b/src/pynxtools/nomad/schema.py
@@ -633,7 +633,9 @@ def __create_group(xml_node: ET.Element, root_section: Section):
         nx_type = __rename_nx_for_nomad(xml_attrs["type"])
 
         nx_name = xml_attrs.get("name", nx_type.upper())
-        section_name = __rename_nx_for_nomad(nx_name, is_group=True)
+        section_name = (
+            root_section.name + "__" + __rename_nx_for_nomad(nx_name, is_group=True)
+        )
         group_section = Section(validate=VALIDATE, nx_kind="group", name=section_name)
 
         __attach_base_section(group_section, root_section, __to_section(nx_type))
@@ -651,8 +653,7 @@ def __create_group(xml_node: ET.Element, root_section: Section):
             variable=__if_template(nx_name),
         )
 
-        root_section.inner_section_definitions.append(group_section)
-
+        __section_definitions[section_name] = group_section
         root_section.sub_sections.append(group_subsection)
 
         __create_group(group, group_section)
@@ -707,8 +708,13 @@ def __attach_base_section(section: Section, container: Section, default: Section
     a base-section with a suitable base.
     """
     try:
+        newdefinitions = {}
+        for def_name, act_def in container.all_sub_sections.items():
+            newdefinitions[def_name] = act_def.sub_section
         base_section = nexus_resolve_variadic_name(
-            container.all_inner_section_definitions, section.name, filter=default
+            newdefinitions,
+            section.name.split("__")[-1],
+            filter=default,
         )
     except ValueError:
         base_section = None
@@ -855,7 +861,7 @@ def __add_section_from_nxdl(xml_node: ET.Element) -> Optional[Section]:
         return None
 
 
-def __create_package_from_nxdl_directories(nexus_section: Section) -> Package:
+def __create_package_from_nxdl_directories() -> Package:
     """
     Creates a metainfo package from the given nexus directory. Will generate the
     respective metainfo definitions from all the nxdl files in that directory.
@@ -875,16 +881,28 @@ def __create_package_from_nxdl_directories(nexus_section: Section) -> Package:
             sections.append(section)
     sections.sort(key=lambda x: x.name)
 
+    nexus_sections = {}
+    for section_name in ["_Applications", "_BaseSections"]:  # , '_InnerSections']:
+        nexus_sections[section_name] = Section(validate=VALIDATE, name=section_name)
+        package.section_definitions.append(nexus_sections[section_name])
     for section in sections:
         package.section_definitions.append(section)
-        if section.nx_category == "application" or (
-            section.nx_category == "base" and section.nx_name == "NXroot"
-        ):
-            nexus_section.sub_sections.append(
+        if section.nx_category == "application":
+            nexus_sections["_Applications"].sub_sections.append(
+                SubSection(section_def=section, name=section.name)
+            )
+        elif section.nx_category == "base" and section.nx_name == "NXroot":
+            nexus_sections["_Applications"].sub_sections.append(
                 SubSection(section_def=section, name=section.name)
             )
+        elif section.nx_category == "base":
+            nexus_sections["_BaseSections"].sub_sections.append(
+                SubSection(section_def=section, name=section.name)
+            )
+    for section_name in __section_definitions:
+        if "__" in section_name:
+            package.section_definitions.append(__section_definitions[section_name])
 
-    package.section_definitions.append(nexus_section)
     return package
 
 
@@ -916,14 +934,6 @@ def init_nexus_metainfo():
     if nexus_metainfo_package is not None:
         return
 
-    # We take the application definitions and create a common parent section that allows
-    # to include nexus in an EntryArchive.
-    # To be able to register it into data section, it is expected that this section inherits from Schema.
-    nexus_section = Section(
-        validate=VALIDATE, name=__GROUPING_NAME, label=__GROUPING_NAME
-    )
-    nexus_section.base_sections = [Schema.m_def]
-
     # try:
     #     load_nexus_schema('')
     # except Exception:
@@ -932,7 +942,7 @@ def init_nexus_metainfo():
     #         save_nexus_schema('')
     #     except Exception:
     #         pass
-    nexus_metainfo_package = __create_package_from_nxdl_directories(nexus_section)
+    nexus_metainfo_package = __create_package_from_nxdl_directories()
     nexus_metainfo_package.section_definitions.append(NexusMeasurement.m_def)
 
     # We need to initialize the metainfo definitions. This is usually done automatically,

From fc5b95b1976df75849ee97ceec283ad0886f75f4 Mon Sep 17 00:00:00 2001
From: sanbrock <sandor.brockhauser@gmail.com>
Date: Tue, 21 Jan 2025 18:09:46 +0100
Subject: [PATCH 03/31] fix for doc links if name contains _

---
 src/pynxtools/nomad/schema.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/pynxtools/nomad/schema.py b/src/pynxtools/nomad/schema.py
index e18c12769..742933abf 100644
--- a/src/pynxtools/nomad/schema.py
+++ b/src/pynxtools/nomad/schema.py
@@ -364,7 +364,9 @@ def __get_documentation_url(
     )
     nx_package = xml_parent.get("nxdl_base").split("/")[-1]
     anchor = "-".join([name.lower() for name in reversed(anchor_segments)])
-    return f"{doc_base}/{nx_package}/{anchor_segments[-1]}.html#{anchor}"
+    return (
+        f"{doc_base}/{nx_package}/{anchor_segments[-1].replace("-", "_")}.html#{anchor}"
+    )
 
 
 def __to_section(name: str, **kwargs) -> Section:

From bfce048e016af1672848f837e53169d66ae8e9aa Mon Sep 17 00:00:00 2001
From: sanbrock <sandor.brockhauser@gmail.com>
Date: Tue, 21 Jan 2025 18:14:55 +0100
Subject: [PATCH 04/31] fix format

---
 src/pynxtools/nomad/schema.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/pynxtools/nomad/schema.py b/src/pynxtools/nomad/schema.py
index 742933abf..66b6a3688 100644
--- a/src/pynxtools/nomad/schema.py
+++ b/src/pynxtools/nomad/schema.py
@@ -364,9 +364,8 @@ def __get_documentation_url(
     )
     nx_package = xml_parent.get("nxdl_base").split("/")[-1]
     anchor = "-".join([name.lower() for name in reversed(anchor_segments)])
-    return (
-        f"{doc_base}/{nx_package}/{anchor_segments[-1].replace("-", "_")}.html#{anchor}"
-    )
+    nx_file = anchor_segments[-1].replace("-", "_")
+    return f"{doc_base}/{nx_package}/{nx_file}.html#{anchor}"
 
 
 def __to_section(name: str, **kwargs) -> Section:

From 274e0d63718f2d9d9f01a26816c4ca7fc8cc9782 Mon Sep 17 00:00:00 2001
From: sanbrock <sandor.brockhauser@gmail.com>
Date: Tue, 21 Jan 2025 18:37:49 +0100
Subject: [PATCH 05/31] linting

---
 src/pynxtools/nomad/schema.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/pynxtools/nomad/schema.py b/src/pynxtools/nomad/schema.py
index 66b6a3688..8aaf5c659 100644
--- a/src/pynxtools/nomad/schema.py
+++ b/src/pynxtools/nomad/schema.py
@@ -900,9 +900,9 @@ def __create_package_from_nxdl_directories() -> Package:
             nexus_sections["_BaseSections"].sub_sections.append(
                 SubSection(section_def=section, name=section.name)
             )
-    for section_name in __section_definitions:
+    for section_name, section in __section_definitions.items():
         if "__" in section_name:
-            package.section_definitions.append(__section_definitions[section_name])
+            package.section_definitions.append(section)
 
     return package
 

From 27fa71c04d436ff59e09352b9351f73e7ea5eec6 Mon Sep 17 00:00:00 2001
From: sanbrock <sandor.brockhauser@gmail.com>
Date: Tue, 21 Jan 2025 20:43:06 +0100
Subject: [PATCH 06/31] fixing tests

---
 src/pynxtools/nomad/parser.py | 22 ++++++++++------------
 tests/nomad/test_parsing.py   |  8 ++++----
 2 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/src/pynxtools/nomad/parser.py b/src/pynxtools/nomad/parser.py
index 00ec67b61..dd75c96a0 100644
--- a/src/pynxtools/nomad/parser.py
+++ b/src/pynxtools/nomad/parser.py
@@ -96,19 +96,12 @@ def _to_section(
 
     nomad_def_name = rename_nx_for_nomad(nomad_def_name, is_group=True)
 
-    # for groups, get the definition from the package
-    new_def = current.m_def.all_sub_sections[nomad_def_name]
-
-    new_section: MSection = None  # type:ignore
-
-    for section in current.m_get_sub_sections(new_def):
-        if hdf_name is None or getattr(section, "nx_name", None) == hdf_name:
-            new_section = section
-            break
-
-    if new_section is not None:
-        return new_section
     if current == nx_root:
+        # for groups, get the definition from the package
+        new_def = current.m_def.all_sub_sections["ENTRY"]
+        for section in current.m_get_sub_sections(new_def):
+            if hdf_name is None or getattr(section, "nx_name", None) == hdf_name:
+                return section
         cls = getattr(nexus_schema, nx_def, None)
         sec = cls()
         new_def_spec = sec.m_def.all_sub_sections[nomad_def_name]
@@ -117,6 +110,11 @@ def _to_section(
         current.ENTRY.append(new_section)
         new_section.__dict__["nx_name"] = hdf_name
     else:
+        # for groups, get the definition from the package
+        new_def = current.m_def.all_sub_sections[nomad_def_name]
+        for section in current.m_get_sub_sections(new_def):
+            if hdf_name is None or getattr(section, "nx_name", None) == hdf_name:
+                return section
         current.m_create(new_def.section_def.section_cls)
         new_section = current.m_get_sub_section(new_def, -1)
         new_section.__dict__["nx_name"] = hdf_name
diff --git a/tests/nomad/test_parsing.py b/tests/nomad/test_parsing.py
index 8a71f9af3..b1dda4bb3 100644
--- a/tests/nomad/test_parsing.py
+++ b/tests/nomad/test_parsing.py
@@ -41,7 +41,7 @@ def test_nexus_example():
 
     example_data = "src/pynxtools/data/201805_WSe2_arpes.nxs"
     NexusParser().parse(example_data, archive, get_logger(__name__))
-    arpes_obj = getattr(archive.data, rename_nx_for_nomad("NXarpes"))
+    arpes_obj = archive.data
 
     assert arpes_obj.ENTRY[0].SAMPLE[0].pressure__field == ureg.Quantity(
         "3.27e-10*millibar"
@@ -94,9 +94,9 @@ def test_nexus_example_with_renamed_groups():
         os.path.dirname(__file__), "../data/nomad/NXlauetof.hdf5"
     )
     NexusParser().parse(lauetof_data, archive, get_logger(__name__))
-    lauetof_obj = getattr(archive.data, rename_nx_for_nomad("NXlauetof"))
+    lauetof_obj = archive.data
 
-    assert lauetof_obj.entry.name__group.time_of_flight__field == ureg.Quantity(
+    assert lauetof_obj.ENTRY[0].name__group.time_of_flight__field == ureg.Quantity(
         "1.0*second"
     )
-    assert lauetof_obj.entry.sample.name__field == "SAMPLE-CHAR-DATA"
+    assert lauetof_obj.ENTRY[0].sample.name__field == "SAMPLE-CHAR-DATA"

From 8fb49533810a179984acfd7686f2e4546536e648 Mon Sep 17 00:00:00 2001
From: sanbrock <sandor.brockhauser@gmail.com>
Date: Fri, 24 Jan 2025 11:08:25 +0100
Subject: [PATCH 07/31] fixing mime-type for WSL

---
 src/pynxtools/nomad/entrypoints.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/pynxtools/nomad/entrypoints.py b/src/pynxtools/nomad/entrypoints.py
index 66b3cb843..77ad67617 100644
--- a/src/pynxtools/nomad/entrypoints.py
+++ b/src/pynxtools/nomad/entrypoints.py
@@ -63,7 +63,7 @@ def load(self):
     name="pynxtools parser",
     description="A parser for nexus files.",
     mainfile_name_re=r".*\.nxs",
-    mainfile_mime_re="application/x-hdf5",
+    mainfile_mime_re="application/x-hdf*",
 )
 
 from nomad.config.models.ui import (

From bd22578f7e69662caf7e6538bbe11f1f0c0c91b4 Mon Sep 17 00:00:00 2001
From: sanbrock <sandor.brockhauser@gmail.com>
Date: Fri, 24 Jan 2025 12:38:46 +0100
Subject: [PATCH 08/31] fix for handling raw files in subdirectories

---
 src/pynxtools/nomad/parser.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/pynxtools/nomad/parser.py b/src/pynxtools/nomad/parser.py
index dd75c96a0..6298a0167 100644
--- a/src/pynxtools/nomad/parser.py
+++ b/src/pynxtools/nomad/parser.py
@@ -484,7 +484,13 @@ def parse(
         self._logger = logger if logger else get_logger(__name__)
         self._clear_class_refs()
 
-        *_, self.nxs_fname = mainfile.rsplit("/", 1)
+        mf = mainfile.split("/")
+        # if filename does not follow the pattern
+        # .volumes/fs/<upload type>/<upload 2char>/<upoad>/<raw/arch>/[subdirs?]/<filename>
+        if len(mf) < 7:
+            self.nxs_fname = mainfile
+        else:
+            self.nxs_fname = "/".join(mf[6:])
         nexus_helper = HandleNexus(logger, mainfile)
         nexus_helper.process_nexus_master_file(self.__nexus_populate)
 

From 8f1a0b4fbc4478a98ce2c4e7691cb167f9db0ce2 Mon Sep 17 00:00:00 2001
From: sanbrock <sandor.brockhauser@gmail.com>
Date: Wed, 29 Jan 2025 18:39:38 +0100
Subject: [PATCH 09/31] use references in steps, and results in
 NexusMeasurement

---
 src/pynxtools/nomad/schema.py | 65 ++++++++++++++++++++++++++---------
 1 file changed, 48 insertions(+), 17 deletions(-)

diff --git a/src/pynxtools/nomad/schema.py b/src/pynxtools/nomad/schema.py
index 8aaf5c659..98dbb6c82 100644
--- a/src/pynxtools/nomad/schema.py
+++ b/src/pynxtools/nomad/schema.py
@@ -33,8 +33,9 @@
 try:
     from nomad import utils
     from nomad.datamodel import EntryArchive, EntryMetadata
-    from nomad.datamodel.data import EntryData, Schema
+    from nomad.datamodel.data import ArchiveSection, EntryData, Schema
     from nomad.datamodel.metainfo import basesections
+    from nomad.datamodel.metainfo.annotations import ELNAnnotation
     from nomad.datamodel.metainfo.basesections import (
         ActivityResult,
         ActivityStep,
@@ -101,14 +102,37 @@
 
 __logger = get_logger(__name__)
 
+
+class NexusActivityStep(ActivityStep):
+    reference = Quantity(
+        type=ArchiveSection,
+        description="A reference to a NeXus Activity Step.",
+        a_eln=ELNAnnotation(
+            component="ReferenceEditQuantity",
+            label="section reference",
+        ),
+    )
+
+
+class NexusActivityResult(ActivityResult):
+    reference = Quantity(
+        type=ArchiveSection,
+        description="A reference to a NeXus Activity Result.",
+        a_eln=ELNAnnotation(
+            component="ReferenceEditQuantity",
+            label="section reference",
+        ),
+    )
+
+
 __BASESECTIONS_MAP: Dict[str, Any] = {
     "NXfabrication": [basesections.Instrument],
     "NXsample": [CompositeSystem],
     "NXsample_component": [Component],
     "NXidentifier": [EntityReference],
-    "NXentry": [ActivityStep],
-    "NXprocess": [ActivityStep],
-    "NXdata": [ActivityResult],
+    "NXentry": [NexusActivityStep],
+    "NXprocess": [NexusActivityStep],
+    "NXdata": [NexusActivityResult],
     # "object": BaseSection,
 }
 
@@ -121,23 +145,21 @@ def normalize(self, archive, logger):
                 raise AttributeError()
             self.steps = []
             for entry in app_entry:
-                sec_c = entry.m_copy()
-                self.steps.append(sec_c)
+                ref = NexusActivityStep(name=entry.name, reference=entry)
+                self.steps.append(ref)
                 for sec in entry.m_all_contents():
                     if isinstance(sec, ActivityStep):
-                        sec_c = sec.m_copy()
-                        self.steps.append(sec_c)
+                        ref = NexusActivityStep(name=sec.name, reference=sec)
+                        self.steps.append(ref)
                     elif isinstance(sec, basesections.Instrument):
-                        ref = InstrumentReference(name=sec.name)
-                        ref.reference = sec
+                        ref = InstrumentReference(name=sec.name, reference=sec)
                         self.instruments.append(ref)
                     elif isinstance(sec, CompositeSystem):
-                        ref = CompositeSystemReference(name=sec.name)
-                        ref.reference = sec
+                        ref = CompositeSystemReference(name=sec.name, reference=sec)
                         self.samples.append(ref)
                     elif isinstance(sec, ActivityResult):
-                        sec_c = sec.m_copy()
-                        self.results.append(sec_c)
+                        ref = NexusActivityResult(name=sec.name, reference=sec)
+                        self.results.append(ref)
             if self.m_def.name == "Root":
                 self.method = "Generic Experiment"
             else:
@@ -158,7 +180,7 @@ def normalize(self, archive, logger):
         act_array = archive.workflow2.tasks
         existing_items = {(task.name, task.section) for task in act_array}
         new_items = [
-            item.to_task()
+            item.reference.to_task()
             for item in self.steps
             if (item.name, item) not in existing_items
         ]
@@ -177,9 +199,9 @@ def normalize(self, archive, logger):
         act_array = archive.workflow2.outputs
         existing_items = {(link.name, link.section) for link in act_array}
         new_items = [
-            Link(name=item.name, section=item)
+            Link(name=item.name, section=item.reference)
             for item in self.results
-            if (item.name, item) not in existing_items
+            if (item.name, item.reference) not in existing_items
         ]
         act_array.extend(new_items)
 
@@ -945,6 +967,8 @@ def init_nexus_metainfo():
     #         pass
     nexus_metainfo_package = __create_package_from_nxdl_directories()
     nexus_metainfo_package.section_definitions.append(NexusMeasurement.m_def)
+    nexus_metainfo_package.section_definitions.append(NexusActivityStep.m_def)
+    nexus_metainfo_package.section_definitions.append(NexusActivityResult.m_def)
 
     # We need to initialize the metainfo definitions. This is usually done automatically,
     # when the metainfo schema is defined though MSection Python classes.
@@ -983,6 +1007,13 @@ def normalize_fabrication(self, archive, logger):
     current_cls = __section_definitions[
         __rename_nx_for_nomad("NXfabrication")
     ].section_cls
+    self.name = (
+        self.__dict__["nx_name"]
+        + " ("
+        + ((self.vendor__field + " / ") if self.vendor__field else "")
+        + (self.model__field if self.model__field else "")
+        + ")"
+    )
     super(current_cls, self).normalize(archive, logger)
 
 

From de2e94a90d228da91e923d4c537ee4ac68807b09 Mon Sep 17 00:00:00 2001
From: sanbrock <sandor.brockhauser@gmail.com>
Date: Thu, 30 Jan 2025 20:48:42 +0100
Subject: [PATCH 10/31] make nexus attributes searchable by importing them to
 NOMAD as Quantities

---
 src/pynxtools/nomad/parser.py | 39 ++++++++++++++++++++++-------------
 src/pynxtools/nomad/schema.py | 24 +++++++++++++++------
 src/pynxtools/nomad/utils.py  |  3 +--
 3 files changed, 44 insertions(+), 22 deletions(-)

diff --git a/src/pynxtools/nomad/parser.py b/src/pynxtools/nomad/parser.py
index 6298a0167..138db791e 100644
--- a/src/pynxtools/nomad/parser.py
+++ b/src/pynxtools/nomad/parser.py
@@ -202,12 +202,22 @@ def _populate_data(
                             attr_value = attr_value[0]
                         # so values of non-scalar attribute will not end up in metainfo!
 
-                attr_name = attr_name + "__attribute"
                 current = _to_section(attr_name, nx_def, nx_attr, current, self.nx_root)
 
+                attribute = attr_value
+                # TODO: get unit from attribute <xxx>_units
                 try:
                     if nx_root or nx_parent.tag.endswith("group"):
-                        current.m_set_section_attribute(attr_name, attr_value)
+                        attribute_name = "___" + attr_name
+                        metainfo_def = resolve_variadic_name(
+                            current.m_def.all_properties, attribute_name
+                        )
+                        if metainfo_def.use_full_storage:
+                            attribute = MQuantity.wrap(attribute, attribute_name)
+                        current.m_set(metainfo_def, attribute)
+                        # if attributes are set before setting the quantity, a bug can cause them being set under a wrong variadic name
+                        attribute.m_set_attribute("m_nx_data_path", hdf_node.name)
+                        attribute.m_set_attribute("m_nx_data_file", self.nxs_fname)
                     else:
                         parent_html_name = nx_path[-2].get("name")
 
@@ -216,25 +226,26 @@ def _populate_data(
 
                         metainfo_def = None
                         try:
+                            attribute_name = parent_html_name + "___" + attr_name
                             metainfo_def = resolve_variadic_name(
-                                current.m_def.all_properties, parent_field_name
+                                current.m_def.all_properties, attribute_name
+                            )
+                            data_instance_name = (
+                                hdf_node.name.split("/")[-1] + "___" + attr_name
                             )
+                            if metainfo_def.use_full_storage:
+                                attribute = MQuantity.wrap(
+                                    attribute, data_instance_name
+                                )
                         except ValueError as exc:
                             self._logger.warning(
-                                f"{current.m_def} has no suitable property for {parent_field_name}",
+                                f"{current.m_def} has no suitable property for {parent_field_name} and {attr_name} as {attribute_name}",
                                 target_name=attr_name,
                                 exc_info=exc,
                             )
-                        if parent_field_name in current.__dict__:
-                            quantity = current.__dict__[parent_field_name]
-                            if isinstance(quantity, dict):
-                                quantity = quantity[parent_instance_name]
-                        else:
-                            quantity = None
-                            raise Warning(
-                                "setting attribute attempt before creating quantity"
-                            )
-                        quantity.m_set_attribute(attr_name, attr_value)
+                        current.m_set(metainfo_def, attribute)
+                        attribute.m_set_attribute("m_nx_data_path", hdf_node.name)
+                        attribute.m_set_attribute("m_nx_data_file", self.nxs_fname)
                 except Exception as e:
                     self._logger.warning(
                         "error while setting attribute",
diff --git a/src/pynxtools/nomad/schema.py b/src/pynxtools/nomad/schema.py
index 98dbb6c82..e2e3f1111 100644
--- a/src/pynxtools/nomad/schema.py
+++ b/src/pynxtools/nomad/schema.py
@@ -468,16 +468,19 @@ def __add_common_properties(xml_node: ET.Element, definition: Definition):
         definition.more["nx_optional"] = __if_base(xml_node)
 
 
-def __create_attributes(xml_node: ET.Element, definition: Union[Section, Quantity]):
+def __create_attributes(
+    xml_node: ET.Element, definition: Union[Section, Quantity], field: Quantity = None
+):
     """
     Add all attributes in the given nexus XML node to the given
-    Quantity or SubSection using the Attribute class (new mechanism).
+    Quantity or SubSection using a specially named Quantity class.
 
     todo: account for more attributes of attribute, e.g., default, minOccurs
     """
     for attribute in xml_node.findall("nx:attribute", __XML_NAMESPACES):
         name = __rename_nx_for_nomad(attribute.get("name"), is_attribute=True)
 
+        shape: list = []
         nx_enum = __get_enumeration(attribute)
         if nx_enum:
             nx_type = nx_enum
@@ -496,8 +499,17 @@ def __create_attributes(xml_node: ET.Element, definition: Union[Section, Quantit
             else:
                 nx_shape = []
 
-        m_attribute = Attribute(
-            name=name, variable=__if_template(name), shape=nx_shape, type=nx_type
+        a_name = (field.more["nx_name"] if field else "") + "___" + name
+        m_attribute = Quantity(
+            name=a_name,
+            variable=__if_template(name)
+            or (__if_template(field.more["nx_name"]) if field else False),
+            shape=shape,
+            type=nx_type,
+            flexible_unit=True,
+        )
+        m_attribute.more.update(
+            dict(nx_kind="attribute")  # , nx_type=nx_type, nx_shape=nx_shape)
         )
 
         for name, value in attribute.items():
@@ -505,7 +517,7 @@ def __create_attributes(xml_node: ET.Element, definition: Union[Section, Quantit
 
         __add_common_properties(attribute, m_attribute)
 
-        definition.attributes.append(m_attribute)
+        definition.quantities.append(m_attribute)
 
 
 def __add_additional_attributes(definition: Definition):
@@ -637,7 +649,7 @@ def __create_field(xml_node: ET.Element, container: Section) -> Quantity:
 
     container.quantities.append(value_quantity)
 
-    __create_attributes(xml_node, value_quantity)
+    __create_attributes(xml_node, container, value_quantity)
 
     return value_quantity
 
diff --git a/src/pynxtools/nomad/utils.py b/src/pynxtools/nomad/utils.py
index 794a94e60..30916ec1d 100644
--- a/src/pynxtools/nomad/utils.py
+++ b/src/pynxtools/nomad/utils.py
@@ -79,6 +79,5 @@ def __rename_nx_for_nomad(
     elif is_field:
         name += "__field"
     elif is_attribute:
-        name += "__attribute"
-
+        pass
     return name

From 9c4a36126bb7cdb28c02f27dcf063416181eb59b Mon Sep 17 00:00:00 2001
From: GinzburgLev <ginzburglev95@gmail.com>
Date: Tue, 4 Feb 2025 10:42:17 +0100
Subject: [PATCH 11/31] temporary fix for boolean array as signals or axes -
 show first element as the field value

---
 src/pynxtools/nomad/parser.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/pynxtools/nomad/parser.py b/src/pynxtools/nomad/parser.py
index 138db791e..dfc90a995 100644
--- a/src/pynxtools/nomad/parser.py
+++ b/src/pynxtools/nomad/parser.py
@@ -129,6 +129,9 @@ def _get_value(hdf_node):
 
     hdf_value = hdf_node[...]
     if str(hdf_value.dtype) == "bool":
+        if isinstance(hdf_value, (list, tuple, np.ndarray)):
+            # temporary solution for boolean arrays
+            return bool(hdf_value[0])
         return bool(hdf_value)
     if hdf_value.dtype.kind in "iufc":
         return hdf_value

From 2c4e3f66ca88329a3f66065bed379cf0cc97ba86 Mon Sep 17 00:00:00 2001
From: sanbrock <sandor.brockhauser@gmail.com>
Date: Tue, 4 Feb 2025 18:34:22 +0100
Subject: [PATCH 12/31] handling arrays of strings

---
 src/pynxtools/nomad/parser.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/pynxtools/nomad/parser.py b/src/pynxtools/nomad/parser.py
index dfc90a995..0aa7b0c96 100644
--- a/src/pynxtools/nomad/parser.py
+++ b/src/pynxtools/nomad/parser.py
@@ -136,7 +136,7 @@ def _get_value(hdf_node):
     if hdf_value.dtype.kind in "iufc":
         return hdf_value
     if len(hdf_value.shape) > 0:
-        return hdf_value.astype(str)
+        return str([i for i in hdf_value.astype(str)])
     return hdf_node[()].decode()
 
 
@@ -203,6 +203,8 @@ def _populate_data(
                         attr_value = attr_value.tolist()
                         if len(attr_value) == 1:
                             attr_value = attr_value[0]
+                        else:
+                            attr_value = str(attr_value)
                         # so values of non-scalar attribute will not end up in metainfo!
 
                 current = _to_section(attr_name, nx_def, nx_attr, current, self.nx_root)

From 4aabf3beb79ca68422805a36649a7791ba15e589 Mon Sep 17 00:00:00 2001
From: sanbrock <sandor.brockhauser@gmail.com>
Date: Tue, 4 Feb 2025 18:47:18 +0100
Subject: [PATCH 13/31] handling attribute if it is an array of numbers

---
 src/pynxtools/nomad/parser.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/pynxtools/nomad/parser.py b/src/pynxtools/nomad/parser.py
index 0aa7b0c96..b0aaed7f8 100644
--- a/src/pynxtools/nomad/parser.py
+++ b/src/pynxtools/nomad/parser.py
@@ -200,12 +200,11 @@ def _populate_data(
                 attr_value = hdf_node.attrs[attr_name]
                 if not isinstance(attr_value, str):
                     if isinstance(attr_value, np.ndarray):
-                        attr_value = attr_value.tolist()
-                        if len(attr_value) == 1:
-                            attr_value = attr_value[0]
+                        attr_list = attr_value.tolist()
+                        if len(attr_list) == 1 or attr_value.dtype.kind in "iufc":
+                            attr_value = attr_list[0]
                         else:
-                            attr_value = str(attr_value)
-                        # so values of non-scalar attribute will not end up in metainfo!
+                            attr_value = str(attr_list)
 
                 current = _to_section(attr_name, nx_def, nx_attr, current, self.nx_root)
 

From 2d7106b9d099ea490f9044556ca2e415283202e4 Mon Sep 17 00:00:00 2001
From: sanbrock <sandor.brockhauser@gmail.com>
Date: Tue, 4 Feb 2025 19:39:10 +0100
Subject: [PATCH 14/31] fix for handling bool (arrays) coming from hdf5

---
 src/pynxtools/nomad/parser.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/pynxtools/nomad/parser.py b/src/pynxtools/nomad/parser.py
index b0aaed7f8..4b9bebe69 100644
--- a/src/pynxtools/nomad/parser.py
+++ b/src/pynxtools/nomad/parser.py
@@ -129,9 +129,8 @@ def _get_value(hdf_node):
 
     hdf_value = hdf_node[...]
     if str(hdf_value.dtype) == "bool":
-        if isinstance(hdf_value, (list, tuple, np.ndarray)):
-            # temporary solution for boolean arrays
-            return bool(hdf_value[0])
+        if len(hdf_value.shape) > 0:
+            return bool(hdf_value.tolist()[0])
         return bool(hdf_value)
     if hdf_value.dtype.kind in "iufc":
         return hdf_value

From dfffd21c9fb9acf78eae7ed523bbe5e362e3c0a5 Mon Sep 17 00:00:00 2001
From: Laurenz Rettig <53396064+rettigl@users.noreply.github.com>
Date: Wed, 5 Feb 2025 17:44:00 +0100
Subject: [PATCH 15/31] add array size and ndim as attributes (#537)

---
 src/pynxtools/nomad/parser.py | 18 ++++++++++--------
 src/pynxtools/nomad/schema.py | 19 ++++++++++++-------
 2 files changed, 22 insertions(+), 15 deletions(-)

diff --git a/src/pynxtools/nomad/parser.py b/src/pynxtools/nomad/parser.py
index 4b9bebe69..14971899f 100644
--- a/src/pynxtools/nomad/parser.py
+++ b/src/pynxtools/nomad/parser.py
@@ -272,14 +272,14 @@ def _populate_data(
                 if isinstance(field, np.ndarray) and field.size > 1:
                     mask = np.isfinite(field)
                     if np.any(mask):
-                        field_stats = np.array(
-                            [
-                                np.mean(field[mask]),
-                                np.var(field[mask]),
-                                np.min(field[mask]),
-                                np.max(field[mask]),
-                            ]
-                        )
+                        field_stats = [
+                            np.mean(field[mask]),
+                            np.var(field[mask]),
+                            np.min(field[mask]),
+                            np.max(field[mask]),
+                            np.size(field),
+                            np.ndim(field),
+                        ]
                         field = field_stats[0]
                         if not np.isfinite(field):
                             self._logger.warning(
@@ -330,6 +330,8 @@ def _populate_data(
                     field.m_set_attribute("nx_data_var", field_stats[1])
                     field.m_set_attribute("nx_data_min", field_stats[2])
                     field.m_set_attribute("nx_data_max", field_stats[3])
+                    field.m_set_attribute("nx_data_size", field_stats[4])
+                    field.m_set_attribute("nx_data_ndim", field_stats[5])
             except Exception as e:
                 self._logger.warning(
                     "error while setting field",
diff --git a/src/pynxtools/nomad/schema.py b/src/pynxtools/nomad/schema.py
index e2e3f1111..cbb9a0a5f 100644
--- a/src/pynxtools/nomad/schema.py
+++ b/src/pynxtools/nomad/schema.py
@@ -553,12 +553,17 @@ def __add_additional_attributes(definition: Definition):
         ):
             return
 
-        for nx_array_attr in [
-            "nx_data_mean",
-            "nx_data_var",
-            "nx_data_min",
-            "nx_data_max",
-        ]:
+        for nx_array_attr, dtype in zip(
+            [
+                "nx_data_mean",
+                "nx_data_var",
+                "nx_data_min",
+                "nx_data_max",
+                "nx_data_size",
+                "nx_data_ndim",
+            ],
+            [np.float64, np.float64, np.float64, np.float64, np.int32, np.int32],
+        ):
             if nx_array_attr in definition.all_attributes:
                 continue
             definition.attributes.append(
@@ -566,7 +571,7 @@ def __add_additional_attributes(definition: Definition):
                     name=nx_array_attr,
                     variable=False,
                     shape=[],
-                    type=np.float64,
+                    type=dtype,
                     description="This is a NeXus template property. "
                     "This attribute holds specific statistics of the NeXus data array.",
                 )

From e2371c44cda455cd3cb7c7d092e50144383cd246 Mon Sep 17 00:00:00 2001
From: sanbrock <sandor.brockhauser@gmail.com>
Date: Thu, 6 Feb 2025 18:38:27 +0100
Subject: [PATCH 16/31] field statistics are now searchable quantities
 themselves

---
 src/pynxtools/nomad/parser.py | 29 ++++++++-----
 src/pynxtools/nomad/schema.py | 79 ++++++++++++++++++++---------------
 src/pynxtools/nomad/utils.py  |  8 ++++
 3 files changed, 72 insertions(+), 44 deletions(-)

diff --git a/src/pynxtools/nomad/parser.py b/src/pynxtools/nomad/parser.py
index 14971899f..1d3725fa0 100644
--- a/src/pynxtools/nomad/parser.py
+++ b/src/pynxtools/nomad/parser.py
@@ -40,7 +40,7 @@
 
 import pynxtools.nomad.schema as nexus_schema
 from pynxtools.nexus.nexus import HandleNexus
-from pynxtools.nomad.utils import __REPLACEMENT_FOR_NX
+from pynxtools.nomad.utils import __REPLACEMENT_FOR_NX, get_quantity_base_name
 from pynxtools.nomad.utils import __rename_nx_for_nomad as rename_nx_for_nomad
 
 
@@ -323,15 +323,24 @@ def _populate_data(
                 field.m_set_attribute("m_nx_data_path", hdf_node.name)
                 field.m_set_attribute("m_nx_data_file", self.nxs_fname)
                 if field_stats is not None:
-                    # TODO _add_additional_attributes function has created these nx_data_*
-                    # attributes speculatively already so if the field_stats is None
-                    # this will cause unpopulated attributes in the GUI
-                    field.m_set_attribute("nx_data_mean", field_stats[0])
-                    field.m_set_attribute("nx_data_var", field_stats[1])
-                    field.m_set_attribute("nx_data_min", field_stats[2])
-                    field.m_set_attribute("nx_data_max", field_stats[3])
-                    field.m_set_attribute("nx_data_size", field_stats[4])
-                    field.m_set_attribute("nx_data_ndim", field_stats[5])
+                    concept_basename = get_quantity_base_name(field.name)
+                    instancename = get_quantity_base_name(data_instance_name)
+                    for suffix, stat in zip(
+                        [
+                            "__mean",
+                            "__var",
+                            "__min",
+                            "__max",
+                            "__size",
+                            "__dim",
+                        ],
+                        field_stats[1:],
+                    ):
+                        stat_metainfo_def = resolve_variadic_name(
+                            current.m_def.all_properties, concept_basename + suffix
+                        )
+                        stat = MQuantity.wrap(stat, instancename + suffix)
+                        current.m_set(stat_metainfo_def, stat)
             except Exception as e:
                 self._logger.warning(
                     "error while setting field",
diff --git a/src/pynxtools/nomad/schema.py b/src/pynxtools/nomad/schema.py
index cbb9a0a5f..b0333546b 100644
--- a/src/pynxtools/nomad/schema.py
+++ b/src/pynxtools/nomad/schema.py
@@ -82,7 +82,11 @@
 
 from pynxtools import get_definitions_url
 from pynxtools.definitions.dev_tools.utils.nxdl_utils import get_nexus_definitions_path
-from pynxtools.nomad.utils import __REPLACEMENT_FOR_NX, __rename_nx_for_nomad
+from pynxtools.nomad.utils import (
+    __REPLACEMENT_FOR_NX,
+    __rename_nx_for_nomad,
+    get_quantity_base_name,
+)
 
 # __URL_REGEXP from
 # https://stackoverflow.com/questions/3809401/what-is-a-good-regular-expression-to-match-a-url
@@ -516,11 +520,47 @@ def __create_attributes(
             m_attribute.more[f"nx_{name}"] = value
 
         __add_common_properties(attribute, m_attribute)
+        # TODO: decide if stats should be made searchable for attributes, too
+        # __add_quantity_stats(definition,m_attribute)
 
         definition.quantities.append(m_attribute)
 
 
-def __add_additional_attributes(definition: Definition):
+def __add_quantity_stats(container: Section, quantity: Quantity):
+    # TODO We should also check the shape of the quantity and the datatype as
+    # the statistics are always mapping on float64 even if quantity values are ints
+    if not quantity.name.endswith("__field") or (
+        quantity.type not in [np.float64, np.int64, np.uint64]
+        and not isinstance(quantity.type, Number)
+    ):
+        return
+    basename = get_quantity_base_name(quantity.name)
+    print(quantity.name, basename)
+    for suffix, dtype in zip(
+        [
+            "__mean",
+            "__var",
+            "__min",
+            "__max",
+            "__size",
+            "__ndim",
+        ],
+        [np.float64, np.float64, None, None, np.int32, np.int32],
+    ):
+        print(basename + suffix)
+        container.quantities.append(
+            Quantity(
+                name=basename + suffix,
+                variable=quantity.variable,
+                shape=[],
+                type=dtype if dtype else quantity.type,
+                description="This is a NeXus template property. "
+                "This quantity holds specific statistics of the NeXus data array.",
+            )
+        )
+
+
+def __add_additional_attributes(definition: Definition, container: Section):
     if "m_nx_data_path" not in definition.attributes:
         definition.attributes.append(
             Attribute(
@@ -546,36 +586,7 @@ def __add_additional_attributes(definition: Definition):
         )
 
     if isinstance(definition, Quantity):
-        # TODO We should also check the shape of the quantity and the datatype as
-        # the statistics are always mapping on float64 even if quantity values are ints
-        if definition.type not in [np.float64, np.int64, np.uint64] and not isinstance(
-            definition.type, Number
-        ):
-            return
-
-        for nx_array_attr, dtype in zip(
-            [
-                "nx_data_mean",
-                "nx_data_var",
-                "nx_data_min",
-                "nx_data_max",
-                "nx_data_size",
-                "nx_data_ndim",
-            ],
-            [np.float64, np.float64, np.float64, np.float64, np.int32, np.int32],
-        ):
-            if nx_array_attr in definition.all_attributes:
-                continue
-            definition.attributes.append(
-                Attribute(
-                    name=nx_array_attr,
-                    variable=False,
-                    shape=[],
-                    type=dtype,
-                    description="This is a NeXus template property. "
-                    "This attribute holds specific statistics of the NeXus data array.",
-                )
-            )
+        __add_quantity_stats(container, definition)
 
 
 def __create_field(xml_node: ET.Element, container: Section) -> Quantity:
@@ -1005,9 +1016,9 @@ def init_nexus_metainfo():
     for section in sections:
         if not (str(section).startswith("pynxtools.")):
             continue
-        __add_additional_attributes(section)
+        __add_additional_attributes(section, None)
         for quantity in section.quantities:
-            __add_additional_attributes(quantity)
+            __add_additional_attributes(quantity, section)
 
     # We skip the Python code generation for now and offer Python classes as variables
     # TO DO not necessary right now, could also be done case-by-case by the nexus parser
diff --git a/src/pynxtools/nomad/utils.py b/src/pynxtools/nomad/utils.py
index 30916ec1d..8ea64ae3d 100644
--- a/src/pynxtools/nomad/utils.py
+++ b/src/pynxtools/nomad/utils.py
@@ -81,3 +81,11 @@ def __rename_nx_for_nomad(
     elif is_attribute:
         pass
     return name
+
+
+def get_quantity_base_name(quantity_name):
+    return (
+        quantity_name[:-7]
+        if quantity_name.endswith("__field") and quantity_name[-8] != "_"
+        else quantity_name
+    )

From 35080798825e0573426b015904206ee2d556ebf3 Mon Sep 17 00:00:00 2001
From: sanbrock <sandor.brockhauser@gmail.com>
Date: Thu, 6 Feb 2025 18:59:14 +0100
Subject: [PATCH 17/31] NexusBaseSection for registering NeXUs Group instance
 names as searchable quantity <name>

---
 src/pynxtools/nomad/schema.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/pynxtools/nomad/schema.py b/src/pynxtools/nomad/schema.py
index b0333546b..06e6aea0a 100644
--- a/src/pynxtools/nomad/schema.py
+++ b/src/pynxtools/nomad/schema.py
@@ -107,6 +107,13 @@
 __logger = get_logger(__name__)
 
 
+class NexusBaseSection(BaseSection):
+    def normalize(self, archive, logger):
+        if self.__dict__["nx_name"]:
+            self.name = self.__dict__["nx_name"]
+        super().normalize(archive, logger)
+
+
 class NexusActivityStep(ActivityStep):
     reference = Quantity(
         type=ArchiveSection,
@@ -535,7 +542,6 @@ def __add_quantity_stats(container: Section, quantity: Quantity):
     ):
         return
     basename = get_quantity_base_name(quantity.name)
-    print(quantity.name, basename)
     for suffix, dtype in zip(
         [
             "__mean",
@@ -547,7 +553,6 @@ def __add_quantity_stats(container: Section, quantity: Quantity):
         ],
         [np.float64, np.float64, None, None, np.int32, np.int32],
     ):
-        print(basename + suffix)
         container.quantities.append(
             Quantity(
                 name=basename + suffix,
@@ -796,7 +801,7 @@ def __create_class_section(xml_node: ET.Element) -> Section:
             [NexusMeasurement] if xml_attrs["extends"] == "NXobject" else []
         )
     else:
-        nomad_base_sec_cls = __BASESECTIONS_MAP.get(nx_name, [BaseSection])
+        nomad_base_sec_cls = __BASESECTIONS_MAP.get(nx_name, [NexusBaseSection])
 
     nx_name = __rename_nx_for_nomad(nx_name)
     class_section: Section = __to_section(
@@ -997,6 +1002,7 @@ def init_nexus_metainfo():
     nexus_metainfo_package.section_definitions.append(NexusMeasurement.m_def)
     nexus_metainfo_package.section_definitions.append(NexusActivityStep.m_def)
     nexus_metainfo_package.section_definitions.append(NexusActivityResult.m_def)
+    nexus_metainfo_package.section_definitions.append(NexusBaseSection.m_def)
 
     # We need to initialize the metainfo definitions. This is usually done automatically,
     # when the metainfo schema is defined though MSection Python classes.

From ebafcf71c8708bca4350e5b6335a789f58f9aa08 Mon Sep 17 00:00:00 2001
From: sanbrock <sandor.brockhauser@gmail.com>
Date: Fri, 7 Feb 2025 12:07:37 +0100
Subject: [PATCH 18/31] fix for registering stats

---
 src/pynxtools/nomad/parser.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/pynxtools/nomad/parser.py b/src/pynxtools/nomad/parser.py
index 1d3725fa0..faab54ba7 100644
--- a/src/pynxtools/nomad/parser.py
+++ b/src/pynxtools/nomad/parser.py
@@ -40,8 +40,9 @@
 
 import pynxtools.nomad.schema as nexus_schema
 from pynxtools.nexus.nexus import HandleNexus
-from pynxtools.nomad.utils import __REPLACEMENT_FOR_NX, get_quantity_base_name
+from pynxtools.nomad.utils import __REPLACEMENT_FOR_NX
 from pynxtools.nomad.utils import __rename_nx_for_nomad as rename_nx_for_nomad
+from pynxtools.nomad.utils import get_quantity_base_name
 
 
 def _to_group_name(nx_node: ET.Element):
@@ -332,9 +333,9 @@ def _populate_data(
                             "__min",
                             "__max",
                             "__size",
-                            "__dim",
+                            "__ndim",
                         ],
-                        field_stats[1:],
+                        field_stats,
                     ):
                         stat_metainfo_def = resolve_variadic_name(
                             current.m_def.all_properties, concept_basename + suffix

From 27421b6d976cf7481a27f202f63fb48721f402f5 Mon Sep 17 00:00:00 2001
From: sanbrock <sandor.brockhauser@gmail.com>
Date: Wed, 12 Feb 2025 11:06:19 +0100
Subject: [PATCH 19/31] searcable __name for variadic quantities

---
 src/pynxtools/nomad/parser.py | 19 +++++++++++++++++--
 src/pynxtools/nomad/schema.py | 31 ++++++++++++++++++++++++-------
 2 files changed, 41 insertions(+), 9 deletions(-)

diff --git a/src/pynxtools/nomad/parser.py b/src/pynxtools/nomad/parser.py
index faab54ba7..97cebe12c 100644
--- a/src/pynxtools/nomad/parser.py
+++ b/src/pynxtools/nomad/parser.py
@@ -266,6 +266,7 @@ def _populate_data(
             metainfo_def = resolve_variadic_name(
                 current.m_def.all_properties, field_name
             )
+            isvariadic = any(char.isupper() for char in metainfo_def.more["nx_name"])
 
             # for data arrays only statistics if not all values NINF, Inf, or NaN
             field_stats = None
@@ -309,6 +310,9 @@ def _populate_data(
                     else:
                         pint_unit = ureg.parse_units("1")
                     field = ureg.Quantity(field, pint_unit)
+                    if field_stats is not None:
+                        for i in range(4):
+                            field_stats[i] = ureg.Quantity(field_stats[i], pint_unit)
 
                 except (ValueError, UndefinedUnitError):
                     pass
@@ -323,25 +327,36 @@ def _populate_data(
                 current.m_set(metainfo_def, field)
                 field.m_set_attribute("m_nx_data_path", hdf_node.name)
                 field.m_set_attribute("m_nx_data_file", self.nxs_fname)
+                if isvariadic:
+                    concept_basename = get_quantity_base_name(field.name)
+                    instancename = get_quantity_base_name(data_instance_name)
+                    name_metainfo_def = resolve_variadic_name(
+                        current.m_def.all_properties, concept_basename + "__name"
+                    )
+                    name_value = MQuantity.wrap(instancename, instancename + "__name")
+                    current.m_set(name_metainfo_def, name_value)
+                    name_value.m_set_attribute("m_nx_data_path", hdf_node.name)
+                    name_value.m_set_attribute("m_nx_data_file", self.nxs_fname)
                 if field_stats is not None:
                     concept_basename = get_quantity_base_name(field.name)
                     instancename = get_quantity_base_name(data_instance_name)
                     for suffix, stat in zip(
                         [
-                            "__mean",
                             "__var",
                             "__min",
                             "__max",
                             "__size",
                             "__ndim",
                         ],
-                        field_stats,
+                        field_stats[1:],
                     ):
                         stat_metainfo_def = resolve_variadic_name(
                             current.m_def.all_properties, concept_basename + suffix
                         )
                         stat = MQuantity.wrap(stat, instancename + suffix)
                         current.m_set(stat_metainfo_def, stat)
+                        stat.m_set_attribute("m_nx_data_path", hdf_node.name)
+                        stat.m_set_attribute("m_nx_data_file", self.nxs_fname)
             except Exception as e:
                 self._logger.warning(
                     "error while setting field",
diff --git a/src/pynxtools/nomad/schema.py b/src/pynxtools/nomad/schema.py
index 06e6aea0a..5b77d4b38 100644
--- a/src/pynxtools/nomad/schema.py
+++ b/src/pynxtools/nomad/schema.py
@@ -527,7 +527,7 @@ def __create_attributes(
             m_attribute.more[f"nx_{name}"] = value
 
         __add_common_properties(attribute, m_attribute)
-        # TODO: decide if stats should be made searchable for attributes, too
+        # TODO: decide if stats/instancename should be made searchable for attributes, too
         # __add_quantity_stats(definition,m_attribute)
 
         definition.quantities.append(m_attribute)
@@ -536,22 +536,39 @@ def __create_attributes(
 def __add_quantity_stats(container: Section, quantity: Quantity):
     # TODO We should also check the shape of the quantity and the datatype as
     # the statistics are always mapping on float64 even if quantity values are ints
-    if not quantity.name.endswith("__field") or (
-        quantity.type not in [np.float64, np.int64, np.uint64]
-        and not isinstance(quantity.type, Number)
-    ):
+    if not quantity.name.endswith("__field"):
+        return
+    isvariadic = any(char.isupper() for char in quantity.more["nx_name"])
+    notnumber = quantity.type not in [
+        np.float64,
+        np.int64,
+        np.uint64,
+    ] and not isinstance(quantity.type, Number)
+    if notnumber or not isvariadic:
         return
     basename = get_quantity_base_name(quantity.name)
+    if isvariadic:
+        container.quantities.append(
+            Quantity(
+                name=basename + "__name",
+                variable=quantity.variable,
+                shape=[],
+                type=str,
+                description="This is a NeXus template property. "
+                "This quantity holds the instance name of a NeXus Field.",
+            )
+        )
+    if notnumber:
+        return
     for suffix, dtype in zip(
         [
-            "__mean",
             "__var",
             "__min",
             "__max",
             "__size",
             "__ndim",
         ],
-        [np.float64, np.float64, None, None, np.int32, np.int32],
+        [np.float64, None, None, np.int32, np.int32],
     ):
         container.quantities.append(
             Quantity(

From cb4fa29f805a7a46c085f63e8af830fcd5264b13 Mon Sep 17 00:00:00 2001
From: sanbrock <sandor.brockhauser@gmail.com>
Date: Thu, 13 Feb 2025 12:46:13 +0100
Subject: [PATCH 20/31] bringing statistic definitions to a common place

---
 src/pynxtools/nomad/parser.py | 21 +++++++--------------
 src/pynxtools/nomad/schema.py | 11 +++--------
 src/pynxtools/nomad/utils.py  |  8 ++++++++
 3 files changed, 18 insertions(+), 22 deletions(-)

diff --git a/src/pynxtools/nomad/parser.py b/src/pynxtools/nomad/parser.py
index 97cebe12c..83fc9c895 100644
--- a/src/pynxtools/nomad/parser.py
+++ b/src/pynxtools/nomad/parser.py
@@ -40,7 +40,7 @@
 
 import pynxtools.nomad.schema as nexus_schema
 from pynxtools.nexus.nexus import HandleNexus
-from pynxtools.nomad.utils import __REPLACEMENT_FOR_NX
+from pynxtools.nomad.utils import __FIELD_STATISTICS, __REPLACEMENT_FOR_NX
 from pynxtools.nomad.utils import __rename_nx_for_nomad as rename_nx_for_nomad
 from pynxtools.nomad.utils import get_quantity_base_name
 
@@ -275,12 +275,11 @@ def _populate_data(
                     mask = np.isfinite(field)
                     if np.any(mask):
                         field_stats = [
-                            np.mean(field[mask]),
-                            np.var(field[mask]),
-                            np.min(field[mask]),
-                            np.max(field[mask]),
-                            np.size(field),
-                            np.ndim(field),
+                            func(field[mask] if ismask else field)
+                            for func, ismask in zip(
+                                __FIELD_STATISTICS["function"],
+                                __FIELD_STATISTICS["mask"],
+                            )
                         ]
                         field = field_stats[0]
                         if not np.isfinite(field):
@@ -341,13 +340,7 @@ def _populate_data(
                     concept_basename = get_quantity_base_name(field.name)
                     instancename = get_quantity_base_name(data_instance_name)
                     for suffix, stat in zip(
-                        [
-                            "__var",
-                            "__min",
-                            "__max",
-                            "__size",
-                            "__ndim",
-                        ],
+                        __FIELD_STATISTICS["suffix"][1:],
                         field_stats[1:],
                     ):
                         stat_metainfo_def = resolve_variadic_name(
diff --git a/src/pynxtools/nomad/schema.py b/src/pynxtools/nomad/schema.py
index 5b77d4b38..c990a37f0 100644
--- a/src/pynxtools/nomad/schema.py
+++ b/src/pynxtools/nomad/schema.py
@@ -83,6 +83,7 @@
 from pynxtools import get_definitions_url
 from pynxtools.definitions.dev_tools.utils.nxdl_utils import get_nexus_definitions_path
 from pynxtools.nomad.utils import (
+    __FIELD_STATISTICS,
     __REPLACEMENT_FOR_NX,
     __rename_nx_for_nomad,
     get_quantity_base_name,
@@ -561,14 +562,8 @@ def __add_quantity_stats(container: Section, quantity: Quantity):
     if notnumber:
         return
     for suffix, dtype in zip(
-        [
-            "__var",
-            "__min",
-            "__max",
-            "__size",
-            "__ndim",
-        ],
-        [np.float64, None, None, np.int32, np.int32],
+        __FIELD_STATISTICS["suffix"][1:],
+        __FIELD_STATISTICS["type"][1:],
     ):
         container.quantities.append(
             Quantity(
diff --git a/src/pynxtools/nomad/utils.py b/src/pynxtools/nomad/utils.py
index 8ea64ae3d..79b1a3aae 100644
--- a/src/pynxtools/nomad/utils.py
+++ b/src/pynxtools/nomad/utils.py
@@ -89,3 +89,11 @@ def get_quantity_base_name(quantity_name):
         if quantity_name.endswith("__field") and quantity_name[-8] != "_"
         else quantity_name
     )
+
+
+__FIELD_STATISTICS = {
+    "suffix": ["__mean", "__var", "__min", "__max", "__size", "__ndim"],
+    "function": [np.mean, np.var, np.min, np.max, np.size, np.ndim],
+    "type": [np.float64, np.float64, None, None, np.int32, np.int32],
+    "mask": [True, True, True, True, False, False],
+}

From f6548e9f9a2ed4aff82f16e7460cc04117a2fddd Mon Sep 17 00:00:00 2001
From: sanbrock <45483558+sanbrock@users.noreply.github.com>
Date: Thu, 13 Feb 2025 12:56:37 +0100
Subject: [PATCH 21/31] Update src/pynxtools/nomad/parser.py

Co-authored-by: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com>
---
 src/pynxtools/nomad/parser.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/src/pynxtools/nomad/parser.py b/src/pynxtools/nomad/parser.py
index 83fc9c895..eb4dbfb2c 100644
--- a/src/pynxtools/nomad/parser.py
+++ b/src/pynxtools/nomad/parser.py
@@ -518,13 +518,9 @@ def parse(
         self._logger = logger if logger else get_logger(__name__)
         self._clear_class_refs()
 
-        mf = mainfile.split("/")
         # if filename does not follow the pattern
         # .volumes/fs/<upload type>/<upload 2char>/<upoad>/<raw/arch>/[subdirs?]/<filename>
-        if len(mf) < 7:
-            self.nxs_fname = mainfile
-        else:
-            self.nxs_fname = "/".join(mf[6:])
+        self.nxs_fname = "/".join(mainfile.split("/")[6:]) or mainfile
         nexus_helper = HandleNexus(logger, mainfile)
         nexus_helper.process_nexus_master_file(self.__nexus_populate)
 

From 1231d170e31b93b431754f0b6abf365ba1f5396f Mon Sep 17 00:00:00 2001
From: sanbrock <45483558+sanbrock@users.noreply.github.com>
Date: Thu, 13 Feb 2025 14:05:49 +0100
Subject: [PATCH 22/31] Update src/pynxtools/nomad/schema.py

Co-authored-by: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com>
---
 src/pynxtools/nomad/schema.py | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/src/pynxtools/nomad/schema.py b/src/pynxtools/nomad/schema.py
index c990a37f0..5b7f39adf 100644
--- a/src/pynxtools/nomad/schema.py
+++ b/src/pynxtools/nomad/schema.py
@@ -955,18 +955,15 @@ def __create_package_from_nxdl_directories() -> Package:
         package.section_definitions.append(nexus_sections[section_name])
     for section in sections:
         package.section_definitions.append(section)
-        if section.nx_category == "application":
-            nexus_sections["_Applications"].sub_sections.append(
-                SubSection(section_def=section, name=section.name)
-            )
-        elif section.nx_category == "base" and section.nx_name == "NXroot":
-            nexus_sections["_Applications"].sub_sections.append(
-                SubSection(section_def=section, name=section.name)
-            )
+        if section.nx_category == "application" or section.nx_name == "NXroot":
+            key = "_Applications"
         elif section.nx_category == "base":
-            nexus_sections["_BaseSections"].sub_sections.append(
-                SubSection(section_def=section, name=section.name)
-            )
+            key = "_BaseSections"
+        else:
+            key = None
+
+        if key:
+            nexus_sections[key].sub_sections.append(SubSection(section_def=section, name=section.name))
     for section_name, section in __section_definitions.items():
         if "__" in section_name:
             package.section_definitions.append(section)

From 5259fd28dc9ce835715c027c912899fa552f805e Mon Sep 17 00:00:00 2001
From: sanbrock <sandor.brockhauser@gmail.com>
Date: Thu, 13 Feb 2025 14:29:38 +0100
Subject: [PATCH 23/31] use mapping instead of if/elif-s

---
 src/pynxtools/nomad/schema.py | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/src/pynxtools/nomad/schema.py b/src/pynxtools/nomad/schema.py
index 5b7f39adf..6eb83c112 100644
--- a/src/pynxtools/nomad/schema.py
+++ b/src/pynxtools/nomad/schema.py
@@ -159,19 +159,17 @@ def normalize(self, archive, logger):
             for entry in app_entry:
                 ref = NexusActivityStep(name=entry.name, reference=entry)
                 self.steps.append(ref)
+                mapping = {
+                    ActivityStep: (NexusActivityStep, self.steps),
+                    basesections.Instrument: (InstrumentReference, self.instruments),
+                    CompositeSystem: (CompositeSystemReference, self.samples),
+                    ActivityResult: (NexusActivityResult, self.results),
+                }
                 for sec in entry.m_all_contents():
-                    if isinstance(sec, ActivityStep):
-                        ref = NexusActivityStep(name=sec.name, reference=sec)
-                        self.steps.append(ref)
-                    elif isinstance(sec, basesections.Instrument):
-                        ref = InstrumentReference(name=sec.name, reference=sec)
-                        self.instruments.append(ref)
-                    elif isinstance(sec, CompositeSystem):
-                        ref = CompositeSystemReference(name=sec.name, reference=sec)
-                        self.samples.append(ref)
-                    elif isinstance(sec, ActivityResult):
-                        ref = NexusActivityResult(name=sec.name, reference=sec)
-                        self.results.append(ref)
+                    for cls, (ref_cls, collection) in mapping.items():
+                        if isinstance(sec, cls):
+                            collection.append(ref_cls(name=sec.name, reference=sec))
+                            break
             if self.m_def.name == "Root":
                 self.method = "Generic Experiment"
             else:
@@ -963,7 +961,9 @@ def __create_package_from_nxdl_directories() -> Package:
             key = None
 
         if key:
-            nexus_sections[key].sub_sections.append(SubSection(section_def=section, name=section.name))
+            nexus_sections[key].sub_sections.append(
+                SubSection(section_def=section, name=section.name)
+            )
     for section_name, section in __section_definitions.items():
         if "__" in section_name:
             package.section_definitions.append(section)

From 3f3d09d51ac69cc2961e1c51146c8bc5b89f7c32 Mon Sep 17 00:00:00 2001
From: sanbrock <sandor.brockhauser@gmail.com>
Date: Thu, 13 Feb 2025 15:19:59 +0100
Subject: [PATCH 24/31] manage std instead of var

---
 src/pynxtools/nomad/parser.py | 16 ++++++++++------
 src/pynxtools/nomad/utils.py  |  6 ++++--
 2 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/src/pynxtools/nomad/parser.py b/src/pynxtools/nomad/parser.py
index eb4dbfb2c..8672d5a27 100644
--- a/src/pynxtools/nomad/parser.py
+++ b/src/pynxtools/nomad/parser.py
@@ -40,7 +40,8 @@
 
 import pynxtools.nomad.schema as nexus_schema
 from pynxtools.nexus.nexus import HandleNexus
-from pynxtools.nomad.utils import __FIELD_STATISTICS, __REPLACEMENT_FOR_NX
+from pynxtools.nomad.utils import __FIELD_STATISTICS as FIELD_STATISTICS
+from pynxtools.nomad.utils import __REPLACEMENT_FOR_NX
 from pynxtools.nomad.utils import __rename_nx_for_nomad as rename_nx_for_nomad
 from pynxtools.nomad.utils import get_quantity_base_name
 
@@ -277,8 +278,8 @@ def _populate_data(
                         field_stats = [
                             func(field[mask] if ismask else field)
                             for func, ismask in zip(
-                                __FIELD_STATISTICS["function"],
-                                __FIELD_STATISTICS["mask"],
+                                FIELD_STATISTICS["function"],
+                                FIELD_STATISTICS["mask"],
                             )
                         ]
                         field = field_stats[0]
@@ -310,8 +311,11 @@ def _populate_data(
                         pint_unit = ureg.parse_units("1")
                     field = ureg.Quantity(field, pint_unit)
                     if field_stats is not None:
-                        for i in range(4):
-                            field_stats[i] = ureg.Quantity(field_stats[i], pint_unit)
+                        for i in range(len(field_stats)):
+                            if FIELD_STATISTICS["mask"][i]:
+                                field_stats[i] = ureg.Quantity(
+                                    field_stats[i], pint_unit
+                                )
 
                 except (ValueError, UndefinedUnitError):
                     pass
@@ -340,7 +344,7 @@ def _populate_data(
                     concept_basename = get_quantity_base_name(field.name)
                     instancename = get_quantity_base_name(data_instance_name)
                     for suffix, stat in zip(
-                        __FIELD_STATISTICS["suffix"][1:],
+                        FIELD_STATISTICS["suffix"][1:],
                         field_stats[1:],
                     ):
                         stat_metainfo_def = resolve_variadic_name(
diff --git a/src/pynxtools/nomad/utils.py b/src/pynxtools/nomad/utils.py
index 79b1a3aae..3e25da7c5 100644
--- a/src/pynxtools/nomad/utils.py
+++ b/src/pynxtools/nomad/utils.py
@@ -18,6 +18,8 @@
 
 from typing import Optional
 
+import numpy as np
+
 __REPLACEMENT_FOR_NX = ""
 
 # This is a list of NeXus group names that are not allowed because they are defined as quantities in the BaseSection class.
@@ -92,8 +94,8 @@ def get_quantity_base_name(quantity_name):
 
 
 __FIELD_STATISTICS = {
-    "suffix": ["__mean", "__var", "__min", "__max", "__size", "__ndim"],
-    "function": [np.mean, np.var, np.min, np.max, np.size, np.ndim],
+    "suffix": ["__mean", "__std", "__min", "__max", "__size", "__ndim"],
+    "function": [np.mean, np.std, np.min, np.max, np.size, np.ndim],
     "type": [np.float64, np.float64, None, None, np.int32, np.int32],
     "mask": [True, True, True, True, False, False],
 }

From 81218f894a3d64c52fd4fc45293c5b167a1b583b Mon Sep 17 00:00:00 2001
From: sanbrock <sandor.brockhauser@gmail.com>
Date: Fri, 14 Feb 2025 13:07:23 +0100
Subject: [PATCH 25/31] fix for ganareting __name/stat quantities

---
 src/pynxtools/nomad/schema.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/pynxtools/nomad/schema.py b/src/pynxtools/nomad/schema.py
index 6eb83c112..cb39d9aa6 100644
--- a/src/pynxtools/nomad/schema.py
+++ b/src/pynxtools/nomad/schema.py
@@ -543,7 +543,7 @@ def __add_quantity_stats(container: Section, quantity: Quantity):
         np.int64,
         np.uint64,
     ] and not isinstance(quantity.type, Number)
-    if notnumber or not isvariadic:
+    if notnumber and not isvariadic:
         return
     basename = get_quantity_base_name(quantity.name)
     if isvariadic:

From 1d6c2cc1d1603d55c3f60e37543538b309898ea2 Mon Sep 17 00:00:00 2001
From: sanbrock <sandor.brockhauser@gmail.com>
Date: Fri, 14 Feb 2025 13:08:01 +0100
Subject: [PATCH 26/31] fix for handling attributes

---
 src/pynxtools/nomad/parser.py | 85 ++++++++++++++++-------------------
 1 file changed, 38 insertions(+), 47 deletions(-)

diff --git a/src/pynxtools/nomad/parser.py b/src/pynxtools/nomad/parser.py
index 8672d5a27..1718251cd 100644
--- a/src/pynxtools/nomad/parser.py
+++ b/src/pynxtools/nomad/parser.py
@@ -27,7 +27,7 @@
     from nomad.datamodel import EntryArchive, EntryMetadata
     from nomad.datamodel.data import EntryData
     from nomad.datamodel.results import Material, Results
-    from nomad.metainfo import MSection
+    from nomad.metainfo import MEnum, MSection
     from nomad.metainfo.util import MQuantity, MSubSectionList, resolve_variadic_name
     from nomad.parsing import MatchingParser
     from nomad.units import ureg
@@ -199,61 +199,52 @@ def _populate_data(
 
                 attr_name = nx_attr.get("name")  # could be 1D array, float or int
                 attr_value = hdf_node.attrs[attr_name]
-                if not isinstance(attr_value, str):
-                    if isinstance(attr_value, np.ndarray):
-                        attr_list = attr_value.tolist()
-                        if len(attr_list) == 1 or attr_value.dtype.kind in "iufc":
-                            attr_value = attr_list[0]
-                        else:
-                            attr_value = str(attr_list)
-
                 current = _to_section(attr_name, nx_def, nx_attr, current, self.nx_root)
-
-                attribute = attr_value
-                # TODO: get unit from attribute <xxx>_units
                 try:
                     if nx_root or nx_parent.tag.endswith("group"):
-                        attribute_name = "___" + attr_name
+                        parent_html_name = ""
+                        parent_name = ""
+                        parent_field_name = ""
+                    else:
+                        parent_html_name = nx_path[-2].get("name")
+                        parent_name = hdf_node.name.split("/")[-1]
+                        parent_field_name = parent_html_name + "__field"
+                    attribute_name = parent_html_name + "___" + attr_name
+                    data_instance_name = parent_name + "___" + attr_name
+                    metainfo_def = None
+                    try:
                         metainfo_def = resolve_variadic_name(
                             current.m_def.all_properties, attribute_name
                         )
+                        attribute = attr_value
+                        # TODO: get unit from attribute <xxx>_units
+                        if isinstance(metainfo_def.type, MEnum):
+                            attribute = str(attr_value)
+                        elif not isinstance(attr_value, str):
+                            if isinstance(attr_value, np.ndarray):
+                                attr_list = attr_value.tolist()
+                                if (
+                                    len(attr_list) == 1
+                                    or attr_value.dtype.kind in "iufc"
+                                ):
+                                    attribute = attr_list[0]
+                                else:
+                                    attribute = str(attr_list)
                         if metainfo_def.use_full_storage:
-                            attribute = MQuantity.wrap(attribute, attribute_name)
-                        current.m_set(metainfo_def, attribute)
-                        # if attributes are set before setting the quantity, a bug can cause them being set under a wrong variadic name
-                        attribute.m_set_attribute("m_nx_data_path", hdf_node.name)
-                        attribute.m_set_attribute("m_nx_data_file", self.nxs_fname)
-                    else:
-                        parent_html_name = nx_path[-2].get("name")
-
-                        parent_instance_name = hdf_node.name.split("/")[-1] + "__field"
-                        parent_field_name = parent_html_name + "__field"
-
-                        metainfo_def = None
-                        try:
-                            attribute_name = parent_html_name + "___" + attr_name
-                            metainfo_def = resolve_variadic_name(
-                                current.m_def.all_properties, attribute_name
-                            )
-                            data_instance_name = (
-                                hdf_node.name.split("/")[-1] + "___" + attr_name
-                            )
-                            if metainfo_def.use_full_storage:
-                                attribute = MQuantity.wrap(
-                                    attribute, data_instance_name
-                                )
-                        except ValueError as exc:
-                            self._logger.warning(
-                                f"{current.m_def} has no suitable property for {parent_field_name} and {attr_name} as {attribute_name}",
-                                target_name=attr_name,
-                                exc_info=exc,
-                            )
-                        current.m_set(metainfo_def, attribute)
-                        attribute.m_set_attribute("m_nx_data_path", hdf_node.name)
-                        attribute.m_set_attribute("m_nx_data_file", self.nxs_fname)
+                            attribute = MQuantity.wrap(attribute, data_instance_name)
+                    except ValueError as exc:
+                        self._logger.warning(
+                            f"{current.m_def} has no suitable property for {parent_field_name} and {attr_name} as {attribute_name}",
+                            target_name=attr_name,
+                            exc_info=exc,
+                        )
+                    current.m_set(metainfo_def, attribute)
+                    # if attributes are set before setting the quantity, a bug can cause them being set under a wrong variadic name
+                    attribute.m_set_attribute("m_nx_data_path", hdf_node.name)
+                    attribute.m_set_attribute("m_nx_data_file", self.nxs_fname)
                 except Exception as e:
                     self._logger.warning(
-                        "error while setting attribute",
+                        f"error while setting attribute {data_instance_name} in {current.m_def} as {metainfo_def}",
                         target_name=attr_name,
                         exc_info=e,
                     )

From 04bd30a02df085275884cc911ae95ceeaaf08eda Mon Sep 17 00:00:00 2001
From: sanbrock <sandor.brockhauser@gmail.com>
Date: Fri, 14 Feb 2025 13:08:45 +0100
Subject: [PATCH 27/31] ruffing

---
 src/pynxtools/dataconverter/helpers.py         |  8 +++-----
 .../dataconverter/readers/example/reader.py    | 18 ++++++------------
 .../dataconverter/readers/json_map/reader.py   |  2 +-
 src/pynxtools/nexus/nexus.py                   |  4 ++--
 src/pynxtools/testing/nexus_conversion.py      |  6 +++---
 src/pynxtools/testing/nomad_example.py         |  6 +++---
 tests/dataconverter/test_helpers.py            |  4 ++--
 tests/nexus/test_nexus.py                      | 12 ++++++------
 8 files changed, 26 insertions(+), 34 deletions(-)

diff --git a/src/pynxtools/dataconverter/helpers.py b/src/pynxtools/dataconverter/helpers.py
index 768fdbed0..eb588d675 100644
--- a/src/pynxtools/dataconverter/helpers.py
+++ b/src/pynxtools/dataconverter/helpers.py
@@ -80,13 +80,11 @@ def _log(self, path: str, log_type: ValidationProblem, value: Optional[Any], *ar
 
         if log_type == ValidationProblem.UnitWithoutDocumentation:
             logger.warning(
-                f"The unit, {path} = {value}, "
-                "is being written but has no documentation"
+                f"The unit, {path} = {value}, is being written but has no documentation"
             )
         elif log_type == ValidationProblem.InvalidEnum:
             logger.warning(
-                f"The value at {path} should be on of the "
-                f"following strings: {value}"
+                f"The value at {path} should be on of the following strings: {value}"
             )
         elif log_type == ValidationProblem.MissingRequiredGroup:
             logger.warning(f"The required group, {path}, hasn't been supplied.")
@@ -344,7 +342,7 @@ def get_all_defined_required_children_for_elem(xml_element):
                     list_of_children_to_add.add(f"{name_to_add}/@units")
             elif tag == "group":
                 nxdlpath = (
-                    f'{xml_element.get("nxdlpath")}/{get_nxdl_name_from_elem(child)}'
+                    f"{xml_element.get('nxdlpath')}/{get_nxdl_name_from_elem(child)}"
                 )
                 nxdlbase = xml_element.get("nxdlbase")
                 nx_name = nxdlbase[nxdlbase.rfind("/") + 1 : nxdlbase.rfind(".nxdl")]
diff --git a/src/pynxtools/dataconverter/readers/example/reader.py b/src/pynxtools/dataconverter/readers/example/reader.py
index 3e3fd09af..fefe37f5c 100644
--- a/src/pynxtools/dataconverter/readers/example/reader.py
+++ b/src/pynxtools/dataconverter/readers/example/reader.py
@@ -106,22 +106,16 @@ def read(
 
         # virtual datasets slicing
         my_path = str(f"{os.path.dirname(__file__)}/../../../data/")
-        template[("/ENTRY[entry]" "/test_virtual" "_dataset/sliced" "_dataset")] = {
-            "link": (
-                f"{my_path}/xarray_saved_small_" "calibration.h5:/binned/BinnedData"
-            ),
+        template[("/ENTRY[entry]/test_virtual_dataset/sliced_dataset")] = {
+            "link": (f"{my_path}/xarray_saved_small_calibration.h5:/binned/BinnedData"),
             "shape": np.index_exp[:, 1, :, :],
         }
-        template[("/ENTRY[entry]" "/test_virtual" "_dataset/slic" "ed_dataset2")] = {
-            "link": (
-                f"{my_path}/xarray_saved_small" "_calibration.h5:/binned/BinnedData"
-            ),
+        template[("/ENTRY[entry]/test_virtual_dataset/sliced_dataset2")] = {
+            "link": (f"{my_path}/xarray_saved_small_calibration.h5:/binned/BinnedData"),
             "shape": np.index_exp[:, :, :, 1],
         }
-        template[("/ENTRY[entry]" "/test_virtual" "_dataset/slic" "ed_dataset3")] = {
-            "link": (
-                f"{my_path}/xarray_saved_small" "_calibration.h5:/binned/BinnedData"
-            ),
+        template[("/ENTRY[entry]/test_virtual_dataset/sliced_dataset3")] = {
+            "link": (f"{my_path}/xarray_saved_small_calibration.h5:/binned/BinnedData"),
             "shape": np.index_exp[:, :, :, 2:4],
         }
 
diff --git a/src/pynxtools/dataconverter/readers/json_map/reader.py b/src/pynxtools/dataconverter/readers/json_map/reader.py
index aa8664df3..7d0b2bd8d 100644
--- a/src/pynxtools/dataconverter/readers/json_map/reader.py
+++ b/src/pynxtools/dataconverter/readers/json_map/reader.py
@@ -56,7 +56,7 @@ def get_val_nested_keystring_from_dict(keystring, data):
         return data[current_key].values
     if isinstance(data[current_key], xarray.core.dataset.Dataset):
         raise NotImplementedError(
-            "Xarray datasets are not supported. " "You can only use xarray dataarrays."
+            "Xarray datasets are not supported. You can only use xarray dataarrays."
         )
 
     return data[current_key]
diff --git a/src/pynxtools/nexus/nexus.py b/src/pynxtools/nexus/nexus.py
index fec2eda66..ee106c7d6 100644
--- a/src/pynxtools/nexus/nexus.py
+++ b/src/pynxtools/nexus/nexus.py
@@ -439,7 +439,7 @@ def process_node(hdf_node, hdf_path, parser, logger, doc=True):
             if len(hdf_node.shape) <= 1
             else str(decode_if_string(hdf_node[0])).split("\n")
         )
-        logger.debug(f'value: {val[0]} {"..." if len(val) > 1 else ""}')
+        logger.debug(f"value: {val[0]} {'...' if len(val) > 1 else ''}")
     else:
         logger.debug(
             f"===== GROUP (/{hdf_path} "
@@ -460,7 +460,7 @@ def process_node(hdf_node, hdf_path, parser, logger, doc=True):
     for key, value in hdf_node.attrs.items():
         logger.debug(f"===== ATTRS (/{hdf_path}@{key})")
         val = str(decode_if_string(value)).split("\n")
-        logger.debug(f'value: {val[0]} {"..." if len(val) > 1 else ""}')
+        logger.debug(f"value: {val[0]} {'...' if len(val) > 1 else ''}")
         (req_str, nxdef, nxdl_path) = get_nxdl_doc(hdf_info, logger, doc, attr=key)
         if (
             parser is not None
diff --git a/src/pynxtools/testing/nexus_conversion.py b/src/pynxtools/testing/nexus_conversion.py
index ea33ac946..ffe0e98f2 100644
--- a/src/pynxtools/testing/nexus_conversion.py
+++ b/src/pynxtools/testing/nexus_conversion.py
@@ -103,9 +103,9 @@ def convert_to_nexus(
         """
         Test the example data for the reader plugin.
         """
-        assert hasattr(
-            self.reader, "supported_nxdls"
-        ), f"Reader{self.reader} must have supported_nxdls attribute"
+        assert hasattr(self.reader, "supported_nxdls"), (
+            f"Reader{self.reader} must have supported_nxdls attribute"
+        )
         assert callable(self.reader.read), f"Reader{self.reader} must have read method"
 
         if isinstance(self.files_or_dir, (list, tuple)):
diff --git a/src/pynxtools/testing/nomad_example.py b/src/pynxtools/testing/nomad_example.py
index 59ae61998..9dd23f7e8 100644
--- a/src/pynxtools/testing/nomad_example.py
+++ b/src/pynxtools/testing/nomad_example.py
@@ -124,6 +124,6 @@ def example_upload_entry_point_valid(
                     os.path.abspath(os.path.join(dirpath, filename))
                 )
 
-        assert (
-            sorted(real_upload_files) == sorted(expected_upload_files)
-        ), f"Uploaded files {real_upload_files} do not match the expected files: {expected_upload_files}"
+        assert sorted(real_upload_files) == sorted(expected_upload_files), (
+            f"Uploaded files {real_upload_files} do not match the expected files: {expected_upload_files}"
+        )
diff --git a/tests/dataconverter/test_helpers.py b/tests/dataconverter/test_helpers.py
index b8fed848d..5e1a99a83 100644
--- a/tests/dataconverter/test_helpers.py
+++ b/tests/dataconverter/test_helpers.py
@@ -172,13 +172,13 @@ def fixture_filled_test_data(template, tmp_path):
     template["/ENTRY[my_entry]/program_name"] = "Testing program"
     template["/ENTRY[my_entry]/NXODD_name[nxodd_name]/type"] = "2nd type"
     template["/ENTRY[my_entry]/NXODD_name[nxodd_name]/date_value"] = (
-        "2022-01-22T12" ":14:12.05018+00:00"
+        "2022-01-22T12:14:12.05018+00:00"
     )
     template["/ENTRY[my_entry]/required_group/description"] = "An example description"
     template["/ENTRY[my_entry]/required_group2/description"] = "An example description"
     template["/ENTRY[my_entry]/does/not/exist"] = "random"
     template["/ENTRY[my_entry]/links/ext_link"] = {
-        "link": f"{tmp_path}/" f"xarray_saved_small_cali" f"bration.h5:/axes/ax3"
+        "link": f"{tmp_path}/xarray_saved_small_calibration.h5:/axes/ax3"
     }
     return template
 
diff --git a/tests/nexus/test_nexus.py b/tests/nexus/test_nexus.py
index 6656ee666..5965f5b73 100644
--- a/tests/nexus/test_nexus.py
+++ b/tests/nexus/test_nexus.py
@@ -124,12 +124,12 @@ def test_decode_if_string(string_obj, decode, expected):
 
     # Handle np.ndarray outputs
     if isinstance(expected, np.ndarray):
-        assert isinstance(
-            result, np.ndarray
-        ), f"Expected ndarray, but got {type(result)}"
-        assert (
-            result == expected
-        ).all(), f"Failed for {string_obj} with decode={decode}"
+        assert isinstance(result, np.ndarray), (
+            f"Expected ndarray, but got {type(result)}"
+        )
+        assert (result == expected).all(), (
+            f"Failed for {string_obj} with decode={decode}"
+        )
     # Handle list outputs
     elif isinstance(expected, list):
         assert isinstance(result, list), f"Expected list, but got {type(result)}"

From ed34b3df4a315b981ae383d8aa38b911eea7d2cf Mon Sep 17 00:00:00 2001
From: sanbrock <sandor.brockhauser@gmail.com>
Date: Fri, 14 Feb 2025 13:28:55 +0100
Subject: [PATCH 28/31] ruffing

---
 dev-requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dev-requirements.txt b/dev-requirements.txt
index fcdb87d12..168170419 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -181,7 +181,7 @@ regex==2024.11.6
     # via mkdocs-material
 requests==2.32.3
     # via mkdocs-material
-ruff==0.8.2
+ruff==0.9.6
     # via pynxtools (pyproject.toml)
 scipy==1.14.1
     # via ase

From 6073efff0710c9cc20649ea50072a2fdcbbaad00 Mon Sep 17 00:00:00 2001
From: sanbrock <sandor.brockhauser@gmail.com>
Date: Fri, 14 Feb 2025 13:58:51 +0100
Subject: [PATCH 29/31] linting

---
 src/pynxtools/nomad/utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/pynxtools/nomad/utils.py b/src/pynxtools/nomad/utils.py
index 3e25da7c5..2174d7c47 100644
--- a/src/pynxtools/nomad/utils.py
+++ b/src/pynxtools/nomad/utils.py
@@ -16,7 +16,7 @@
 # limitations under the License.
 #
 
-from typing import Optional
+from typing import Dict, Optional
 
 import numpy as np
 
@@ -93,7 +93,7 @@ def get_quantity_base_name(quantity_name):
     )
 
 
-__FIELD_STATISTICS = {
+__FIELD_STATISTICS: Dict[str, list] = {
     "suffix": ["__mean", "__std", "__min", "__max", "__size", "__ndim"],
     "function": [np.mean, np.std, np.min, np.max, np.size, np.ndim],
     "type": [np.float64, np.float64, None, None, np.int32, np.int32],

From 8879fd9806d5d79921b1614a3d30a59f6422469f Mon Sep 17 00:00:00 2001
From: sanbrock <sandor.brockhauser@gmail.com>
Date: Fri, 14 Feb 2025 16:07:45 +0100
Subject: [PATCH 30/31] fix for tests

---
 tests/nomad/test_parsing.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/tests/nomad/test_parsing.py b/tests/nomad/test_parsing.py
index b1dda4bb3..e050448a7 100644
--- a/tests/nomad/test_parsing.py
+++ b/tests/nomad/test_parsing.py
@@ -72,12 +72,10 @@ def test_nexus_example():
     assert data.energies__field.check("eV")
     # manual name resolution
     assert data.AXISNAME__field["angles__field"] is not None
-    assert (
-        data.AXISNAME__field["angles__field"].attributes["nx_data_max"]
-        == 2.168025463513032
-    )
+    assert data.AXISNAME__max["angles__max"].value == 2.168025463513032
     assert (1 * data.AXISNAME__field["angles__field"].unit).check("1/Å")
     assert (1 * data.AXISNAME__field["delays__field"].unit).check("fs")
+    assert data.___axes == "['angles', 'energies', 'delays']"
 
 
 def test_same_name_field_and_group():

From 0ff536fde76309c7d3626c6fa63537d77de9fe84 Mon Sep 17 00:00:00 2001
From: sanbrock <sandor.brockhauser@gmail.com>
Date: Fri, 14 Feb 2025 16:31:12 +0100
Subject: [PATCH 31/31] adjust to nomad's python requirement

---
 .github/workflows/pytest.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
index b69d94773..0bd4e1fce 100644
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
@@ -34,7 +34,7 @@ jobs:
           curl -LsSf https://astral.sh/uv/install.sh | sh
           uv pip install coverage coveralls
       - name: Install nomad
-        if: "${{ matrix.python_version != '3.8'}}"
+        if: "${{ matrix.python_version != '3.8' && matrix.python_version != '3.9'}}"
         run: |
           uv pip install nomad-lab[infrastructure]@git+https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR.git
       - name: Install pynx