From 7455a8965d7c3ea73cb0429817ea1b3d5c2450df Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Tue, 8 Apr 2025 15:25:51 +0200 Subject: [PATCH 001/118] unpin pint --- dev-requirements.txt | 44 ++++++++++++++++++++++++++------------------ pyproject.toml | 2 +- 2 files changed, 27 insertions(+), 19 deletions(-) diff --git a/dev-requirements.txt b/dev-requirements.txt index 261e7d34c..661946cf0 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -30,7 +30,7 @@ colorama==0.4.6 # pytest contourpy==1.3.1 # via matplotlib -coverage==7.7.1 +coverage==7.8.0 # via pytest-cov cycler==0.12.1 # via matplotlib @@ -38,7 +38,11 @@ distlib==0.3.9 # via virtualenv filelock==3.18.0 # via virtualenv -fonttools==4.56.0 +flexcache==0.3 + # via pint +flexparser==0.4 + # via pint +fonttools==4.57.0 # via matplotlib ghp-import==2.1.0 # via mkdocs @@ -63,7 +67,7 @@ jinja2==3.1.6 # mkdocs-material kiwisolver==1.4.8 # via matplotlib -lxml==5.3.1 +lxml==5.3.2 # via pynxtools (pyproject.toml) markdown==3.7 # via @@ -90,13 +94,13 @@ mkdocs==1.6.1 # pynxtools (pyproject.toml) # mkdocs-macros-plugin # mkdocs-material -mkdocs-click==0.8.1 +mkdocs-click==0.9.0 # via pynxtools (pyproject.toml) mkdocs-get-deps==0.2.0 # via mkdocs mkdocs-macros-plugin==1.3.7 # via pynxtools (pyproject.toml) -mkdocs-material==9.6.9 +mkdocs-material==9.6.11 # via pynxtools (pyproject.toml) mkdocs-material-extensions==1.3.1 # via @@ -123,7 +127,6 @@ packaging==24.2 # matplotlib # mkdocs # mkdocs-macros-plugin - # pint # pytest # xarray paginate==0.5.7 @@ -138,11 +141,12 @@ pathspec==0.12.1 # mkdocs-macros-plugin pillow==11.1.0 # via matplotlib -pint==0.17 +pint==0.24.4 # via pynxtools (pyproject.toml) platformdirs==4.3.7 # via # mkdocs-get-deps + # pint # virtualenv pluggy==1.5.0 # via pytest @@ -159,7 +163,7 @@ pytest==8.3.5 # pynxtools (pyproject.toml) # pytest-cov # pytest-timeout -pytest-cov==6.0.0 +pytest-cov==6.1.1 # via pynxtools (pyproject.toml) pytest-timeout==2.3.1 # via pynxtools (pyproject.toml) @@ -184,7 +188,7 @@ pyyaml-env-tag==0.1 # via mkdocs requests==2.32.3 # via mkdocs-material -ruff==0.11.2 +ruff==0.11.4 # via pynxtools (pyproject.toml) scipy==1.15.2 # via ase @@ -196,33 +200,37 @@ structlog==25.2.0 # via pynxtools (pyproject.toml) super-collections==0.5.3 # via mkdocs-macros-plugin -termcolor==2.5.0 +termcolor==3.0.1 # via mkdocs-macros-plugin tomli==2.2.1 ; python_full_version <= '3.11' # via coverage toposort==1.10 # via pynxtools (pyproject.toml) -types-pytz==2025.1.0.20250318 +types-pytz==2025.2.0.20250326 # via pynxtools (pyproject.toml) -types-pyyaml==6.0.12.20241230 +types-pyyaml==6.0.12.20250402 # via pynxtools (pyproject.toml) -types-requests==2.32.0.20250306 +types-requests==2.32.0.20250328 # via pynxtools (pyproject.toml) -typing-extensions==4.12.2 - # via mypy +typing-extensions==4.13.1 + # via + # flexcache + # flexparser + # mypy + # pint tzdata==2025.2 # via pandas urllib3==2.3.0 # via # requests # types-requests -uv==0.6.9 +uv==0.6.13 # via pynxtools (pyproject.toml) -virtualenv==20.29.3 +virtualenv==20.30.0 # via pre-commit watchdog==6.0.0 # via mkdocs -xarray==2025.3.0 +xarray==2025.3.1 # via pynxtools (pyproject.toml) zipp==3.21.0 # via importlib-metadata diff --git a/pyproject.toml b/pyproject.toml index a988dda8a..77faf6617 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,7 +36,7 @@ dependencies = [ "lxml>=4.9.1", "toposort>=1.10.0", "anytree", - "pint==0.17", + "pint", ] [project.urls] From e4c760555b4b1a0af9d198f112faf9bb32e86fcb Mon Sep 17 00:00:00 2001 From: sanbrock <45483558+sanbrock@users.noreply.github.com> Date: Tue, 8 Apr 2025 16:06:26 +0200 Subject: [PATCH 002/118] Export (#619) * export feature * adjusted example * fix eln * fixing the name * update CITATION --------- Co-authored-by: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> --- CITATION.cff | 2 +- src/pynxtools/nomad/examples/IV_temp.schema.archive.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CITATION.cff b/CITATION.cff index b9aa10740..9d32d6876 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -4,7 +4,7 @@ message: If you use this software, please cite it using the metadata from this file. type: software -version: 0.10.3 +version: 0.10.4 authors: - given-names: Sherjeel family-names: Shabih diff --git a/src/pynxtools/nomad/examples/IV_temp.schema.archive.yaml b/src/pynxtools/nomad/examples/IV_temp.schema.archive.yaml index fa2ae2472..01b5f8b63 100644 --- a/src/pynxtools/nomad/examples/IV_temp.schema.archive.yaml +++ b/src/pynxtools/nomad/examples/IV_temp.schema.archive.yaml @@ -1,7 +1,7 @@ definitions: name: "IV Temp ELN Example" sections: - ELN_for_iv_temp: + ELN for iv_temp: base_sections: - "pynxtools.nomad.dataconverter.NexusDataConverter" - "nomad.datamodel.data.EntryData" From 89a0bbab7d3bd27af5d313b13624aae352bcc252 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Thu, 10 Apr 2025 08:47:09 +0200 Subject: [PATCH 003/118] update definitions with latest mpes fixes --- src/pynxtools/definitions | 2 +- src/pynxtools/nexus-version.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pynxtools/definitions b/src/pynxtools/definitions index 98b028242..297e81c81 160000 --- a/src/pynxtools/definitions +++ b/src/pynxtools/definitions @@ -1 +1 @@ -Subproject commit 98b02824248d9f3544b1e2623bba8abfb3de97cc +Subproject commit 297e81c8188434403750f7e1cb98fbbfec6c5d8f diff --git a/src/pynxtools/nexus-version.txt b/src/pynxtools/nexus-version.txt index 8a3624b86..7f39775bc 100644 --- a/src/pynxtools/nexus-version.txt +++ b/src/pynxtools/nexus-version.txt @@ -1 +1 @@ -v2024.02-1923-g98b02824 \ No newline at end of file +v2024.02-1926-g297e81c8 \ No newline at end of file From ed0cd20735436e79adb769ab0dcf58bd2f575083 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Thu, 17 Apr 2025 16:13:16 +0200 Subject: [PATCH 004/118] update definitions --- src/pynxtools/definitions | 2 +- src/pynxtools/nexus-version.txt | 2 +- src/pynxtools/nomad/schema.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/pynxtools/definitions b/src/pynxtools/definitions index 297e81c81..c530a6eb3 160000 --- a/src/pynxtools/definitions +++ b/src/pynxtools/definitions @@ -1 +1 @@ -Subproject commit 297e81c8188434403750f7e1cb98fbbfec6c5d8f +Subproject commit c530a6eb38da3c15c4696251a16600ef22f855cd diff --git a/src/pynxtools/nexus-version.txt b/src/pynxtools/nexus-version.txt index 7f39775bc..34a169bc2 100644 --- a/src/pynxtools/nexus-version.txt +++ b/src/pynxtools/nexus-version.txt @@ -1 +1 @@ -v2024.02-1926-g297e81c8 \ No newline at end of file +v2024.02-1958-gc530a6eb \ No newline at end of file diff --git a/src/pynxtools/nomad/schema.py b/src/pynxtools/nomad/schema.py index bbe33a401..8503e78b2 100644 --- a/src/pynxtools/nomad/schema.py +++ b/src/pynxtools/nomad/schema.py @@ -914,7 +914,7 @@ def _create_class_section(xml_node: ET.Element) -> Section: if nx_category == "application" or (nx_category == "base" and nx_name == "NXroot"): nomad_base_sec_cls = ( - [NexusMeasurement] if xml_attrs["extends"] == "NXobject" else [] + [NexusMeasurement] if xml_attrs.get("extends") == "NXobject" else [] ) else: nomad_base_sec_cls = BASESECTIONS_MAP.get(nx_name, [NexusBaseSection]) From 8ca456e5cc434ce26afdc3497be6df7e89a0a4ed Mon Sep 17 00:00:00 2001 From: Laurenz Rettig Date: Thu, 17 Apr 2025 20:42:28 +0000 Subject: [PATCH 005/118] add base sections also for NXroot --- src/pynxtools/nomad/schema.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/pynxtools/nomad/schema.py b/src/pynxtools/nomad/schema.py index 8503e78b2..a748fe812 100644 --- a/src/pynxtools/nomad/schema.py +++ b/src/pynxtools/nomad/schema.py @@ -914,7 +914,9 @@ def _create_class_section(xml_node: ET.Element) -> Section: if nx_category == "application" or (nx_category == "base" and nx_name == "NXroot"): nomad_base_sec_cls = ( - [NexusMeasurement] if xml_attrs.get("extends") == "NXobject" else [] + [NexusMeasurement] + if xml_attrs.get("extends") == "NXobject" or nx_name == "NXroot" + else [] ) else: nomad_base_sec_cls = BASESECTIONS_MAP.get(nx_name, [NexusBaseSection]) @@ -929,7 +931,10 @@ def _create_class_section(xml_node: ET.Element) -> Section: class_section.base_sections = [nx_base_sec] + [ cls.m_def for cls in nomad_base_sec_cls ] - elif _rename_nx_for_nomad(nx_name) == "Object": + elif ( + _rename_nx_for_nomad(nx_name) == "Object" + or _rename_nx_for_nomad(nx_name) == "Root" + ): class_section.base_sections = [cls.m_def for cls in nomad_base_sec_cls] _add_common_properties(xml_node, class_section) From 1741b82b98f6976bc0ba288975ca31f1ae559ce1 Mon Sep 17 00:00:00 2001 From: Laurenz Rettig Date: Thu, 17 Apr 2025 20:45:51 +0000 Subject: [PATCH 006/118] clean up variable names --- src/pynxtools/nomad/schema.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/src/pynxtools/nomad/schema.py b/src/pynxtools/nomad/schema.py index a748fe812..d4e1d3ce7 100644 --- a/src/pynxtools/nomad/schema.py +++ b/src/pynxtools/nomad/schema.py @@ -921,20 +921,15 @@ def _create_class_section(xml_node: ET.Element) -> Section: else: nomad_base_sec_cls = BASESECTIONS_MAP.get(nx_name, [NexusBaseSection]) - nx_name = _rename_nx_for_nomad(nx_name) - class_section: Section = to_section( - nx_name, nx_kind=nx_type, nx_category=nx_category - ) + name = _rename_nx_for_nomad(nx_name) + class_section: Section = to_section(name, nx_kind=nx_type, nx_category=nx_category) if "extends" in xml_attrs: nx_base_sec = to_section(_rename_nx_for_nomad(xml_attrs["extends"])) class_section.base_sections = [nx_base_sec] + [ cls.m_def for cls in nomad_base_sec_cls ] - elif ( - _rename_nx_for_nomad(nx_name) == "Object" - or _rename_nx_for_nomad(nx_name) == "Root" - ): + elif name == "Object" or name == "Root": class_section.base_sections = [cls.m_def for cls in nomad_base_sec_cls] _add_common_properties(xml_node, class_section) From 743a938b128d83acf30e583463bb38c9e97532c6 Mon Sep 17 00:00:00 2001 From: rettigl Date: Thu, 17 Apr 2025 23:14:39 +0200 Subject: [PATCH 007/118] update test branch --- .github/workflows/plugin_test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/plugin_test.yaml b/.github/workflows/plugin_test.yaml index 5a8654912..d2ebb3348 100644 --- a/.github/workflows/plugin_test.yaml +++ b/.github/workflows/plugin_test.yaml @@ -33,7 +33,7 @@ jobs: branch: main tests_to_run: tests/. - plugin: pynxtools-mpes - branch: main + branch: update-defs tests_to_run: tests/. - plugin: pynxtools-raman branch: main From c3c90fbe07296e1fcb1ad7d5bd0c298172645f7a Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Tue, 22 Apr 2025 10:58:21 +0200 Subject: [PATCH 008/118] use special pynx-xps branch --- .github/workflows/plugin_test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/plugin_test.yaml b/.github/workflows/plugin_test.yaml index d2ebb3348..9e4e8bd11 100644 --- a/.github/workflows/plugin_test.yaml +++ b/.github/workflows/plugin_test.yaml @@ -42,7 +42,7 @@ jobs: branch: main tests_to_run: tests/. - plugin: pynxtools-xps - branch: main + branch: config-fixes tests_to_run: tests/. - plugin: pynxtools-xrd branch: main From c7ca631f78411ee9ff379c4fa308f7c132bffb74 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Tue, 22 Apr 2025 13:16:04 +0200 Subject: [PATCH 009/118] solve issue with attributes that are lists in writer --- src/pynxtools/dataconverter/writer.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/pynxtools/dataconverter/writer.py b/src/pynxtools/dataconverter/writer.py index 2125a9390..f2a289503 100644 --- a/src/pynxtools/dataconverter/writer.py +++ b/src/pynxtools/dataconverter/writer.py @@ -230,10 +230,12 @@ def __nxdl_to_attrs(self, path: str = "/") -> dict: del elem.attrib["name"] # Fetch values for required attributes requested by the NXDL - for attr_name in elem.findall(f"{self.nxs_namespace}attribute"): - key = f"{path}/@{attr_name.get('name')}" + for attr in elem.findall(f"{self.nxs_namespace}attribute"): + name = attr.get("name") + key = f"{path}/@{name}" if key in self.data: - elem.attrib[attr_name.get("name")] = self.data[key] + value = self.data[key] + elem.attrib[name] = str(value) if isinstance(value, list) else value return elem.attrib From a9fe6e96d18de5d2148069b7068d9c73c428ed74 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Tue, 22 Apr 2025 13:16:45 +0200 Subject: [PATCH 010/118] use specific definitions branch for now --- .gitmodules | 3 ++- src/pynxtools/definitions | 2 +- src/pynxtools/nexus-version.txt | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.gitmodules b/.gitmodules index 71907ead7..e476a3705 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,4 @@ [submodule "src/pynxtools/definitions"] path = src/pynxtools/definitions - url = https://github.com/FAIRmat-NFDI/nexus_definitions.git \ No newline at end of file + url = https://github.com/FAIRmat-NFDI/nexus_definitions.git + branch = reference-in-xps \ No newline at end of file diff --git a/src/pynxtools/definitions b/src/pynxtools/definitions index c530a6eb3..b456e48a2 160000 --- a/src/pynxtools/definitions +++ b/src/pynxtools/definitions @@ -1 +1 @@ -Subproject commit c530a6eb38da3c15c4696251a16600ef22f855cd +Subproject commit b456e48a24b94b137c1afc1ca9c3e3a48bb7cda4 diff --git a/src/pynxtools/nexus-version.txt b/src/pynxtools/nexus-version.txt index 34a169bc2..eae0ff576 100644 --- a/src/pynxtools/nexus-version.txt +++ b/src/pynxtools/nexus-version.txt @@ -1 +1 @@ -v2024.02-1958-gc530a6eb \ No newline at end of file +v2024.02-1959-gb456e48a \ No newline at end of file From a630aec7bb9761741eec565bf0e8745143cc4226 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Tue, 22 Apr 2025 15:22:39 +0200 Subject: [PATCH 011/118] reset definition to fairmat branch --- .gitmodules | 3 +-- src/pynxtools/definitions | 2 +- src/pynxtools/nexus-version.txt | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/.gitmodules b/.gitmodules index e476a3705..71907ead7 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,4 +1,3 @@ [submodule "src/pynxtools/definitions"] path = src/pynxtools/definitions - url = https://github.com/FAIRmat-NFDI/nexus_definitions.git - branch = reference-in-xps \ No newline at end of file + url = https://github.com/FAIRmat-NFDI/nexus_definitions.git \ No newline at end of file diff --git a/src/pynxtools/definitions b/src/pynxtools/definitions index b456e48a2..526d415a5 160000 --- a/src/pynxtools/definitions +++ b/src/pynxtools/definitions @@ -1 +1 @@ -Subproject commit b456e48a24b94b137c1afc1ca9c3e3a48bb7cda4 +Subproject commit 526d415a5a802dae537eeb71a8d3c479a67649ea diff --git a/src/pynxtools/nexus-version.txt b/src/pynxtools/nexus-version.txt index eae0ff576..9f767249f 100644 --- a/src/pynxtools/nexus-version.txt +++ b/src/pynxtools/nexus-version.txt @@ -1 +1 @@ -v2024.02-1959-gb456e48a \ No newline at end of file +v2024.02-1960-g526d415a \ No newline at end of file From 32f5a85688aa3b550ba5310be13b1ef1aeffc0cc Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Tue, 22 Apr 2025 15:25:23 +0200 Subject: [PATCH 012/118] reset plugin branches --- .github/workflows/plugin_test.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/plugin_test.yaml b/.github/workflows/plugin_test.yaml index 9e4e8bd11..5a8654912 100644 --- a/.github/workflows/plugin_test.yaml +++ b/.github/workflows/plugin_test.yaml @@ -33,7 +33,7 @@ jobs: branch: main tests_to_run: tests/. - plugin: pynxtools-mpes - branch: update-defs + branch: main tests_to_run: tests/. - plugin: pynxtools-raman branch: main @@ -42,7 +42,7 @@ jobs: branch: main tests_to_run: tests/. - plugin: pynxtools-xps - branch: config-fixes + branch: main tests_to_run: tests/. - plugin: pynxtools-xrd branch: main From 749e635e7a196a1071d1b1dea8a344a0e26fccb1 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Wed, 23 Apr 2025 13:29:05 +0200 Subject: [PATCH 013/118] update definitions --- src/pynxtools/definitions | 2 +- src/pynxtools/nexus-version.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pynxtools/definitions b/src/pynxtools/definitions index 526d415a5..06c677c6e 160000 --- a/src/pynxtools/definitions +++ b/src/pynxtools/definitions @@ -1 +1 @@ -Subproject commit 526d415a5a802dae537eeb71a8d3c479a67649ea +Subproject commit 06c677c6e6d3496668a2a0905faadae88a7c59d0 diff --git a/src/pynxtools/nexus-version.txt b/src/pynxtools/nexus-version.txt index 9f767249f..db45126b5 100644 --- a/src/pynxtools/nexus-version.txt +++ b/src/pynxtools/nexus-version.txt @@ -1 +1 @@ -v2024.02-1960-g526d415a \ No newline at end of file +v2024.02-1962-g06c677c6 \ No newline at end of file From d87a715b2fd81949150c920171053a3072469316 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Wed, 23 Apr 2025 13:45:23 +0200 Subject: [PATCH 014/118] hardcode that target/reference attr are always allowed --- src/pynxtools/dataconverter/validation.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index 28a45d875..098f0a50f 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -828,6 +828,11 @@ def startswith_with_variations( keys_to_remove = check_attributes_of_nonexisting_field(tree) for not_visited_key in not_visited: + # TODO: remove again if "@target"/"@reference" is sorted out by NIAC + always_allowed_attributes = ("@target", "@reference") + if not_visited_key.endswith(always_allowed_attributes): + # If we want to support this in the future, we could check that the targetted field exists. + continue if not_visited_key.endswith("/@units"): # check that parent exists if not_visited_key.rsplit("/", 1)[0] not in mapping.keys(): From f6726f393246f8498bdd529ada76af5e821047f9 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Wed, 9 Apr 2025 18:00:19 +0200 Subject: [PATCH 015/118] build first idea for proper inheritance of fields and attributes --- src/pynxtools/dataconverter/nexus_tree.py | 128 +++++++++++++++++++++- src/pynxtools/dataconverter/validation.py | 1 + tests/dataconverter/test_validation.py | 31 +++--- 3 files changed, 143 insertions(+), 17 deletions(-) diff --git a/src/pynxtools/dataconverter/nexus_tree.py b/src/pynxtools/dataconverter/nexus_tree.py index 79ae5cf68..f432eba78 100644 --- a/src/pynxtools/dataconverter/nexus_tree.py +++ b/src/pynxtools/dataconverter/nexus_tree.py @@ -41,6 +41,7 @@ is_variadic, is_appdef, remove_namespace_from_tag, + NEXUS_TO_PYTHON_DATA_TYPES ) from pynxtools.definitions.dev_tools.utils.nxdl_utils import ( get_nx_namefit, @@ -214,6 +215,121 @@ def __init__( self.is_a = [] self.parent_of = [] + def _check_compatibility_with(self, xml_elem: ET._Element) -> bool: + """Check compatibility of this node with an XML element from the (possible) inheritance""" + + def _check_name_fit(xml_elem: ET._Element) -> bool: + elem_name = xml_elem.attrib.get("name") + name_any = is_name_type(xml_elem, "any") + name_partial = is_name_type(xml_elem, "partial") + + if get_nx_namefit(self.name, elem_name, name_any, name_partial) < 0: + return False + return True + + def _check_type_fit(xml_elem: ET._Element) -> bool: + elem_type = xml_elem.attrib.get("type") + if elem_type: + if not set(NEXUS_TO_PYTHON_DATA_TYPES[self.dtype]).issubset(NEXUS_TO_PYTHON_DATA_TYPES[elem_type]): + return False + return True + + def _check_units_fit(xml_elem: ET._Element) -> bool: + elem_units = xml_elem.attrib.get("units") + if elem_units and elem_units != "NX_ANY": + if elem_units != self.unit: + if not elem_units == "NX_TRANSFORMATION" and self.unit in ["NX_LENGTH", "NX_ANGLE", "NX_UNITLESS"]: + return False + return True + + def _check_enum_fit(xml_elem: ET._Element) -> bool: + if self.items is None: + return True + elem_enum = xml_elem.find(f"nx:enumeration", namespaces=namespaces) + if elem_enum is not None: + elem_enum_open = elem_enum.attrib.get("open", "false") + + if elem_enum_open == "true": + return True + + elem_enum_items = [] + for items in elem_enum.findall(f"nx:item", namespaces=namespaces): + value = items.attrib["value"] + if value[0] == "[" and value[-1] == "]": + import ast + + try: + elem_enum_items.append(ast.literal_eval(value)) + except (ValueError, SyntaxError): + raise Exception( + f"Error parsing enumeration item in the provided NXDL: {value}" + ) + else: + elem_enum_items.append(value) + + def convert_to_hashable(item): + """Convert lists to tuples for hashable types, leave non-list items as they are.""" + if isinstance(item, list): + return tuple(item) # Convert sublists to tuples + return item # Non-list items remain as they are + + set_items = {convert_to_hashable(sublist) for sublist in self.items} + set_elem_enum_items = {convert_to_hashable(sublist) for sublist in elem_enum_items} + + if not set(set_items).issubset(set_elem_enum_items): + # Should we really be this strict here? Or can appdefs define additional terms? + return False + return True + + def _check_dimensions_fit(xml_elem: ET._Element) -> bool: + if not self.shape: + return True + elem_dimensions = xml_elem.find(f"nx:dimensions", namespaces=namespaces) + if elem_dimensions is not None: + rank = elem_dimensions.attrib.get("rank") + if rank is not None and not isinstance(rank, int): + try: + int(rank) + except ValueError: + # TODO: Handling of symbols + return True + elem_dim = elem_dimensions.findall("nx:dim", namespaces=namespaces) + elem_dimension_rank = rank if rank is not None else len(rank) + dims: List[Optional[int]] = [None] * int(rank) + for dim in elem_dim: + idx = int(dim.attrib["index"]) + if "value" not in dim.attrib: + # This is probably an old dim element with ref + pass + try: + value = int(dim.attrib["value"]) + dims[idx - 1] = value + except ValueError: + # TODO: Handling of symbols + pass + elem_shape = tuple(dims) + + if elem_shape: + if elem_shape != self.shape: + return False + + return True + + check_functions = [ + _check_name_fit, + _check_type_fit, + _check_units_fit, + _check_enum_fit, + _check_dimensions_fit + + ] + + for func in check_functions: + if not func(xml_elem): + return False + return True + + def _construct_inheritance_chain_from_parent(self): """ Builds the inheritance chain of the current node based on the parent node. @@ -222,10 +338,11 @@ def _construct_inheritance_chain_from_parent(self): return for xml_elem in self.parent.inheritance: elem = xml_elem.find( - f"nx:{self.type}/[@name='{self.name}']", namespaces=namespaces + f"nx:{self.type}", namespaces=namespaces ) if elem is not None: - self.inheritance.append(elem) + if self._check_compatibility_with(elem): + self.inheritance.append(elem) def get_path(self) -> str: """ @@ -601,6 +718,7 @@ def add_node_from(self, xml_elem: ET._Element) -> Optional["NexusNode"]: type=tag, optionality=default_optionality, nxdl_base=xml_elem.base, + inheritance=[xml_elem] ) elif tag == "group": name = xml_elem.attrib.get("name") @@ -950,7 +1068,13 @@ def _set_shape(self): def __init__(self, **data) -> None: super().__init__(**data) + self._set_unit() + self._set_type() + self._set_items_and_enum_type() + self._set_optionality() + self._set_shape() self._construct_inheritance_chain_from_parent() + # Set all parameters again based on the acquired inheritance self._set_unit() self._set_type() self._set_items_and_enum_type() diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index 098f0a50f..1d88bbfa1 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -548,6 +548,7 @@ def handle_unknown_type(node: NexusNode, keys: Mapping[str, Any], prev_path: str pass def add_best_matches_for(key: str, node: NexusNode) -> Optional[NexusNode]: + # PRINT = True if "identifier_1/@type" in key else False for name in key[1:].replace("@", "").split("/"): children_to_check = [ node.search_add_child_for(child) diff --git a/tests/dataconverter/test_validation.py b/tests/dataconverter/test_validation.py index 80cbbb9b1..b247dbd96 100644 --- a/tests/dataconverter/test_validation.py +++ b/tests/dataconverter/test_validation.py @@ -729,7 +729,6 @@ def listify_template(data_dict: Template): "/ENTRY[my_entry]/optional_parent/AXISNAME[required_child]", 1, ), - # ToDo: should not raise a warning if sibling inheritance works [ "The data entry corresponding to /ENTRY[my_entry]/optional_parent/" "required_child is required and hasn't been supplied by the reader." @@ -1053,22 +1052,24 @@ def listify_template(data_dict: Template): "123", ), [], - id="specified-identifier-with-type", + id="specified-identifier-without-type", ), # ToDo: reactivate if sibling inheritance works properly - # pytest.param( - # alter_dict( - # alter_dict( - # TEMPLATE, - # "/ENTRY[my_entry]/identified_calibration/identifier_1", - # "123", - # ), - # "/ENTRY[my_entry]/identified_calibration/identifier_1/@type", - # "ORCID", - # ), - # [], - # id="specified-identifier-with-type", - # ), + pytest.param( + alter_dict( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/identified_calibration/identifier_1", + "123", + ), + "/ENTRY[my_entry]/identified_calibration/identifier_1/@type", + "ORCID", + ), + [ + "Attribute /ENTRY[my_entry]/identified_calibration/identifier_1/@type written without documentation." + ], + id="specified-identifier-with-type", + ), pytest.param( alter_dict( alter_dict( From f457a5c0932b6e85e5b7746ced3acfeb0afc6047 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Thu, 20 Mar 2025 10:47:48 +0100 Subject: [PATCH 016/118] warning for variadic notation for non-variadic names --- src/pynxtools/dataconverter/helpers.py | 7 ++++ src/pynxtools/dataconverter/validation.py | 7 ++++ tests/dataconverter/test_validation.py | 41 ++++++++++++++++++++--- 3 files changed, 51 insertions(+), 4 deletions(-) diff --git a/src/pynxtools/dataconverter/helpers.py b/src/pynxtools/dataconverter/helpers.py index f47b171f4..baa366a62 100644 --- a/src/pynxtools/dataconverter/helpers.py +++ b/src/pynxtools/dataconverter/helpers.py @@ -67,6 +67,7 @@ class ValidationProblem(Enum): NXdataMissingAxisData = 19 NXdataAxisMismatch = 20 KeyToBeRemoved = 21 + InvalidConceptForNonVariadic = 22 class Collector: @@ -151,6 +152,12 @@ def _log(self, path: str, log_type: ValidationProblem, value: Optional[Any], *ar elif log_type == ValidationProblem.KeyToBeRemoved: logger.warning(f"The attribute {path} will not be written.") + elif log_type == ValidationProblem.InvalidConceptForNonVariadic: + log_text = f"Given {value.type} name '{path}' conflicts with the non-variadic name '{value}'" + if value.type == "group": + log_text += f", which should be of type {value.nx_class}." + logger.warning(log_text) + def collect_and_log( self, path: str, diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index 1d88bbfa1..4e2951ad5 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -155,6 +155,13 @@ def best_namefit_of(name: str, nodes: Iterable[NexusNode]) -> Optional[NexusNode for node in nodes: if not node.variadic: if instance_name == node.name: + if concept_name and concept_name != node.name: + collector.collect_and_log( + concept_name, + ValidationProblem.InvalidConceptForNonVariadic, + node, + ) + return None return node else: if concept_name and concept_name == node.name: diff --git a/tests/dataconverter/test_validation.py b/tests/dataconverter/test_validation.py index b247dbd96..3e89ae0d2 100644 --- a/tests/dataconverter/test_validation.py +++ b/tests/dataconverter/test_validation.py @@ -719,6 +719,32 @@ def listify_template(data_dict: Template): [], id="no-child-provided-optional-parent", ), + pytest.param( + alter_dict( + remove_from_dict( + remove_from_dict( + remove_from_dict( + TEMPLATE, + "/ENTRY[my_entry]/specified_group/specified_field", + "required", + ), + "/ENTRY[my_entry]/specified_group/specified_field/@specified_attr_in_field", + "required", + ), + "/ENTRY[my_entry]/specified_group/@specified_attr", + "required", + ), + "/ENTRY[my_entry]/SAMPLE[specified_group]/specified_field", + 1.0, + ), + [ + "The required group, /ENTRY[my_entry]/specified_group, hasn't been supplied.", + "Given group name 'SAMPLE' conflicts with the non-variadic name 'specified_group (req)', " + "which should be of type NXdata.", + "Field /ENTRY[my_entry]/SAMPLE[specified_group]/specified_field written without documentation.", + ], + id="illegal-concept-name-for-nonvariadic-group", + ), pytest.param( alter_dict( remove_from_dict( @@ -729,9 +755,13 @@ def listify_template(data_dict: Template): "/ENTRY[my_entry]/optional_parent/AXISNAME[required_child]", 1, ), + # TODO: should not raise a warning if sibling inheritance works [ "The data entry corresponding to /ENTRY[my_entry]/optional_parent/" - "required_child is required and hasn't been supplied by the reader." + "required_child is required and hasn't been supplied by the reader.", + "Given field name 'AXISNAME' conflicts with the non-variadic name " + "'required_child (req)'", + "Field /ENTRY[my_entry]/optional_parent/AXISNAME[required_child] written without documentation.", ], id="concept-name-given-for-nonvariadic-field", ), @@ -746,9 +776,12 @@ def listify_template(data_dict: Template): "test value", ), [ - "The value at /ENTRY[my_entry]/optional_parent/AXISNAME[optional_child] should be " - "one of the following Python types: (, ), as " - "defined in the NXDL as NX_INT." + "Given field name 'AXISNAME' conflicts with the non-variadic name 'optional_child (opt)'", + "Field /ENTRY[my_entry]/optional_parent/AXISNAME[optional_child] written without documentation.", + # TODO: reactivate if sibling inheritance works + # # "The value at /ENTRY[my_entry]/optional_parent/AXISNAME[optional_child] should be " + # "one of the following Python types: (, ), as " + # "defined in the NXDL as NX_INT." ], id="concept-name-given-for-nonvariadic-field-wrong-type", ), From f33aac57d169f0f01818fb356535152a499f121d Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Wed, 9 Apr 2025 12:55:32 +0200 Subject: [PATCH 017/118] allow concept names for non-variadic groups if the concept name matches with the nx_class --- src/pynxtools/data/NXtest.nxdl.xml | 1 + src/pynxtools/dataconverter/validation.py | 21 +++++++++++++------ tests/dataconverter/test_validation.py | 25 +++++++++++++++++++++++ 3 files changed, 41 insertions(+), 6 deletions(-) diff --git a/src/pynxtools/data/NXtest.nxdl.xml b/src/pynxtools/data/NXtest.nxdl.xml index 329f42ef2..46035acfd 100644 --- a/src/pynxtools/data/NXtest.nxdl.xml +++ b/src/pynxtools/data/NXtest.nxdl.xml @@ -123,6 +123,7 @@ + diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index 4e2951ad5..756b4f1d6 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -156,12 +156,21 @@ def best_namefit_of(name: str, nodes: Iterable[NexusNode]) -> Optional[NexusNode if not node.variadic: if instance_name == node.name: if concept_name and concept_name != node.name: - collector.collect_and_log( - concept_name, - ValidationProblem.InvalidConceptForNonVariadic, - node, - ) - return None + if node.type == "group": + if concept_name != node.nx_class[2:].upper(): + collector.collect_and_log( + concept_name, + ValidationProblem.InvalidConceptForNonVariadic, + node, + ) + return None + else: + collector.collect_and_log( + concept_name, + ValidationProblem.InvalidConceptForNonVariadic, + node, + ) + return None return node else: if concept_name and concept_name == node.name: diff --git a/tests/dataconverter/test_validation.py b/tests/dataconverter/test_validation.py index 3e89ae0d2..8101ef09c 100644 --- a/tests/dataconverter/test_validation.py +++ b/tests/dataconverter/test_validation.py @@ -1129,6 +1129,31 @@ def listify_template(data_dict: Template): [], id="name-fitted-identifier-with-type", ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/CALIBRATION[identified_calibration]/identifier_1", + "123", + ), + [], + id="group-with-correct-concept", + ), + pytest.param( + alter_dict( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/CALIBRATION[identified_calibration]/identifier_1", + "123", + ), + "/ENTRY[my_entry]/identified_calibration/identifier_2", + "456", + ), + [ + "The data entry corresponding to /ENTRY[my_entry]/identified_calibration/identifier_1 is required " + "and hasn't been supplied by the reader." + ], + id="group-with-correct-concept-and-non-concept-sibling", + ), # This can be re-used later when we have proper unit checking pytest.param( alter_dict( From ed3ccd9ba3ce9750b66ff16e55625ebde17a91fd Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Wed, 23 Apr 2025 13:28:21 +0200 Subject: [PATCH 018/118] simplify dim checking --- src/pynxtools/dataconverter/nexus_tree.py | 60 ++++++++++++----------- 1 file changed, 32 insertions(+), 28 deletions(-) diff --git a/src/pynxtools/dataconverter/nexus_tree.py b/src/pynxtools/dataconverter/nexus_tree.py index f432eba78..3c55a2a28 100644 --- a/src/pynxtools/dataconverter/nexus_tree.py +++ b/src/pynxtools/dataconverter/nexus_tree.py @@ -41,7 +41,7 @@ is_variadic, is_appdef, remove_namespace_from_tag, - NEXUS_TO_PYTHON_DATA_TYPES + NEXUS_TO_PYTHON_DATA_TYPES, ) from pynxtools.definitions.dev_tools.utils.nxdl_utils import ( get_nx_namefit, @@ -226,22 +226,28 @@ def _check_name_fit(xml_elem: ET._Element) -> bool: if get_nx_namefit(self.name, elem_name, name_any, name_partial) < 0: return False return True - + def _check_type_fit(xml_elem: ET._Element) -> bool: elem_type = xml_elem.attrib.get("type") if elem_type: - if not set(NEXUS_TO_PYTHON_DATA_TYPES[self.dtype]).issubset(NEXUS_TO_PYTHON_DATA_TYPES[elem_type]): + if not set(NEXUS_TO_PYTHON_DATA_TYPES[self.dtype]).issubset( + NEXUS_TO_PYTHON_DATA_TYPES[elem_type] + ): return False return True - + def _check_units_fit(xml_elem: ET._Element) -> bool: elem_units = xml_elem.attrib.get("units") if elem_units and elem_units != "NX_ANY": if elem_units != self.unit: - if not elem_units == "NX_TRANSFORMATION" and self.unit in ["NX_LENGTH", "NX_ANGLE", "NX_UNITLESS"]: + if not elem_units == "NX_TRANSFORMATION" and self.unit in [ + "NX_LENGTH", + "NX_ANGLE", + "NX_UNITLESS", + ]: return False return True - + def _check_enum_fit(xml_elem: ET._Element) -> bool: if self.items is None: return True @@ -251,7 +257,7 @@ def _check_enum_fit(xml_elem: ET._Element) -> bool: if elem_enum_open == "true": return True - + elem_enum_items = [] for items in elem_enum.findall(f"nx:item", namespaces=namespaces): value = items.attrib["value"] @@ -266,7 +272,7 @@ def _check_enum_fit(xml_elem: ET._Element) -> bool: ) else: elem_enum_items.append(value) - + def convert_to_hashable(item): """Convert lists to tuples for hashable types, leave non-list items as they are.""" if isinstance(item, list): @@ -274,13 +280,15 @@ def convert_to_hashable(item): return item # Non-list items remain as they are set_items = {convert_to_hashable(sublist) for sublist in self.items} - set_elem_enum_items = {convert_to_hashable(sublist) for sublist in elem_enum_items} - + set_elem_enum_items = { + convert_to_hashable(sublist) for sublist in elem_enum_items + } + if not set(set_items).issubset(set_elem_enum_items): # Should we really be this strict here? Or can appdefs define additional terms? return False return True - + def _check_dimensions_fit(xml_elem: ET._Element) -> bool: if not self.shape: return True @@ -296,17 +304,17 @@ def _check_dimensions_fit(xml_elem: ET._Element) -> bool: elem_dim = elem_dimensions.findall("nx:dim", namespaces=namespaces) elem_dimension_rank = rank if rank is not None else len(rank) dims: List[Optional[int]] = [None] * int(rank) + for dim in elem_dim: idx = int(dim.attrib["index"]) - if "value" not in dim.attrib: - # This is probably an old dim element with ref - pass - try: - value = int(dim.attrib["value"]) - dims[idx - 1] = value - except ValueError: - # TODO: Handling of symbols - pass + if value := dim.attrib.get("value", None): + # If not, this is probably an old dim element with ref. + try: + value = int(value) + dims[idx] = value + except ValueError: + # TODO: Handling of symbols + pass elem_shape = tuple(dims) if elem_shape: @@ -314,14 +322,13 @@ def _check_dimensions_fit(xml_elem: ET._Element) -> bool: return False return True - + check_functions = [ _check_name_fit, _check_type_fit, _check_units_fit, _check_enum_fit, - _check_dimensions_fit - + _check_dimensions_fit, ] for func in check_functions: @@ -329,7 +336,6 @@ def _check_dimensions_fit(xml_elem: ET._Element) -> bool: return False return True - def _construct_inheritance_chain_from_parent(self): """ Builds the inheritance chain of the current node based on the parent node. @@ -337,9 +343,7 @@ def _construct_inheritance_chain_from_parent(self): if self.parent is None: return for xml_elem in self.parent.inheritance: - elem = xml_elem.find( - f"nx:{self.type}", namespaces=namespaces - ) + elem = xml_elem.find(f"nx:{self.type}", namespaces=namespaces) if elem is not None: if self._check_compatibility_with(elem): self.inheritance.append(elem) @@ -718,7 +722,7 @@ def add_node_from(self, xml_elem: ET._Element) -> Optional["NexusNode"]: type=tag, optionality=default_optionality, nxdl_base=xml_elem.base, - inheritance=[xml_elem] + inheritance=[xml_elem], ) elif tag == "group": name = xml_elem.attrib.get("name") From 9322a46599f26d710955b44675909899ecc3477c Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Wed, 23 Apr 2025 14:03:17 +0200 Subject: [PATCH 019/118] move sibiling inheritance check to NexusEntity --- src/pynxtools/dataconverter/nexus_tree.py | 281 ++++++++++++---------- 1 file changed, 148 insertions(+), 133 deletions(-) diff --git a/src/pynxtools/dataconverter/nexus_tree.py b/src/pynxtools/dataconverter/nexus_tree.py index 3c55a2a28..c1a722d53 100644 --- a/src/pynxtools/dataconverter/nexus_tree.py +++ b/src/pynxtools/dataconverter/nexus_tree.py @@ -215,139 +215,6 @@ def __init__( self.is_a = [] self.parent_of = [] - def _check_compatibility_with(self, xml_elem: ET._Element) -> bool: - """Check compatibility of this node with an XML element from the (possible) inheritance""" - - def _check_name_fit(xml_elem: ET._Element) -> bool: - elem_name = xml_elem.attrib.get("name") - name_any = is_name_type(xml_elem, "any") - name_partial = is_name_type(xml_elem, "partial") - - if get_nx_namefit(self.name, elem_name, name_any, name_partial) < 0: - return False - return True - - def _check_type_fit(xml_elem: ET._Element) -> bool: - elem_type = xml_elem.attrib.get("type") - if elem_type: - if not set(NEXUS_TO_PYTHON_DATA_TYPES[self.dtype]).issubset( - NEXUS_TO_PYTHON_DATA_TYPES[elem_type] - ): - return False - return True - - def _check_units_fit(xml_elem: ET._Element) -> bool: - elem_units = xml_elem.attrib.get("units") - if elem_units and elem_units != "NX_ANY": - if elem_units != self.unit: - if not elem_units == "NX_TRANSFORMATION" and self.unit in [ - "NX_LENGTH", - "NX_ANGLE", - "NX_UNITLESS", - ]: - return False - return True - - def _check_enum_fit(xml_elem: ET._Element) -> bool: - if self.items is None: - return True - elem_enum = xml_elem.find(f"nx:enumeration", namespaces=namespaces) - if elem_enum is not None: - elem_enum_open = elem_enum.attrib.get("open", "false") - - if elem_enum_open == "true": - return True - - elem_enum_items = [] - for items in elem_enum.findall(f"nx:item", namespaces=namespaces): - value = items.attrib["value"] - if value[0] == "[" and value[-1] == "]": - import ast - - try: - elem_enum_items.append(ast.literal_eval(value)) - except (ValueError, SyntaxError): - raise Exception( - f"Error parsing enumeration item in the provided NXDL: {value}" - ) - else: - elem_enum_items.append(value) - - def convert_to_hashable(item): - """Convert lists to tuples for hashable types, leave non-list items as they are.""" - if isinstance(item, list): - return tuple(item) # Convert sublists to tuples - return item # Non-list items remain as they are - - set_items = {convert_to_hashable(sublist) for sublist in self.items} - set_elem_enum_items = { - convert_to_hashable(sublist) for sublist in elem_enum_items - } - - if not set(set_items).issubset(set_elem_enum_items): - # Should we really be this strict here? Or can appdefs define additional terms? - return False - return True - - def _check_dimensions_fit(xml_elem: ET._Element) -> bool: - if not self.shape: - return True - elem_dimensions = xml_elem.find(f"nx:dimensions", namespaces=namespaces) - if elem_dimensions is not None: - rank = elem_dimensions.attrib.get("rank") - if rank is not None and not isinstance(rank, int): - try: - int(rank) - except ValueError: - # TODO: Handling of symbols - return True - elem_dim = elem_dimensions.findall("nx:dim", namespaces=namespaces) - elem_dimension_rank = rank if rank is not None else len(rank) - dims: List[Optional[int]] = [None] * int(rank) - - for dim in elem_dim: - idx = int(dim.attrib["index"]) - if value := dim.attrib.get("value", None): - # If not, this is probably an old dim element with ref. - try: - value = int(value) - dims[idx] = value - except ValueError: - # TODO: Handling of symbols - pass - elem_shape = tuple(dims) - - if elem_shape: - if elem_shape != self.shape: - return False - - return True - - check_functions = [ - _check_name_fit, - _check_type_fit, - _check_units_fit, - _check_enum_fit, - _check_dimensions_fit, - ] - - for func in check_functions: - if not func(xml_elem): - return False - return True - - def _construct_inheritance_chain_from_parent(self): - """ - Builds the inheritance chain of the current node based on the parent node. - """ - if self.parent is None: - return - for xml_elem in self.parent.inheritance: - elem = xml_elem.find(f"nx:{self.type}", namespaces=namespaces) - if elem is not None: - if self._check_compatibility_with(elem): - self.inheritance.append(elem) - def get_path(self) -> str: """ Gets the path of the current node based on the node name. @@ -806,6 +673,19 @@ def __init__(self, **data) -> None: self._construct_inheritance_chain_from_parent() self._set_optionality() + def _construct_inheritance_chain_from_parent(self): + """ + Builds the inheritance chain of the current node based on the parent node. + """ + if self.parent is None: + return + for xml_elem in self.parent.inheritance: + elem = xml_elem.find( + f"nx:{self.type}/[@name='{self.name}']", namespaces=namespaces + ) + if elem is not None: + self.inheritance.append(elem) + class NexusGroup(NexusNode): """ @@ -986,6 +866,141 @@ class NexusEntity(NexusNode): open_enum: bool = False shape: Optional[Tuple[Optional[int], ...]] = None + def _check_compatibility_with(self, xml_elem: ET._Element) -> bool: + """Check compatibility of this node with an XML element from the (possible) inheritance""" + + def _check_name_fit(xml_elem: ET._Element) -> bool: + elem_name = xml_elem.attrib.get("name") + name_any = is_name_type(xml_elem, "any") + name_partial = is_name_type(xml_elem, "partial") + + if get_nx_namefit(self.name, elem_name, name_any, name_partial) < 0: + return False + return True + + def _check_type_fit(xml_elem: ET._Element) -> bool: + elem_type = xml_elem.attrib.get("type") + if elem_type: + if not set(NEXUS_TO_PYTHON_DATA_TYPES[self.dtype]).issubset( + NEXUS_TO_PYTHON_DATA_TYPES[elem_type] + ): + return False + return True + + def _check_units_fit(xml_elem: ET._Element) -> bool: + elem_units = xml_elem.attrib.get("units") + if elem_units and elem_units != "NX_ANY": + if elem_units != self.unit: + if not elem_units == "NX_TRANSFORMATION" and self.unit in [ + "NX_LENGTH", + "NX_ANGLE", + "NX_UNITLESS", + ]: + return False + return True + + def _check_enum_fit(xml_elem: ET._Element) -> bool: + elem_enum = xml_elem.find(f"nx:enumeration", namespaces=namespaces) + if elem_enum is not None: + if self.items is None: + # Case where inherited entity is enumerated, but current node isn't + return False + elem_enum_open = elem_enum.attrib.get("open", "false") + + if elem_enum_open == "true": + return True + + elem_enum_items = [] + for items in elem_enum.findall(f"nx:item", namespaces=namespaces): + value = items.attrib["value"] + if value[0] == "[" and value[-1] == "]": + import ast + + try: + elem_enum_items.append(ast.literal_eval(value)) + except (ValueError, SyntaxError): + raise Exception( + f"Error parsing enumeration item in the provided NXDL: {value}" + ) + else: + elem_enum_items.append(value) + + def convert_to_hashable(item): + """Convert lists to tuples for hashable types, leave non-list items as they are.""" + if isinstance(item, list): + return tuple(item) # Convert sublists to tuples + return item # Non-list items remain as they are + + set_items = {convert_to_hashable(sublist) for sublist in self.items} + set_elem_enum_items = { + convert_to_hashable(sublist) for sublist in elem_enum_items + } + + if not set(set_items).issubset(set_elem_enum_items): + # Should we really be this strict here? Or can appdefs define additional terms? + return False + return True + + def _check_dimensions_fit(xml_elem: ET._Element) -> bool: + if not self.shape: + return True + elem_dimensions = xml_elem.find(f"nx:dimensions", namespaces=namespaces) + if elem_dimensions is not None: + rank = elem_dimensions.attrib.get("rank") + if rank is not None and not isinstance(rank, int): + try: + int(rank) + except ValueError: + # TODO: Handling of symbols + return True + elem_dim = elem_dimensions.findall("nx:dim", namespaces=namespaces) + elem_dimension_rank = rank if rank is not None else len(rank) + dims: List[Optional[int]] = [None] * int(rank) + + for dim in elem_dim: + idx = int(dim.attrib["index"]) + if value := dim.attrib.get("value", None): + # If not, this is probably an old dim element with ref. + try: + value = int(value) + dims[idx] = value + except ValueError: + # TODO: Handling of symbols + pass + elem_shape = tuple(dims) + + if elem_shape: + if elem_shape != self.shape: + return False + + return True + + check_functions = [ + _check_name_fit, + _check_type_fit, + _check_units_fit, + # TODO: check if any inheritance is wrongfully assigned without enum and dim checks + # _check_enum_fit, + # _check_dimensions_fit, + ] + + for func in check_functions: + if not func(xml_elem): + return False + return True + + def _construct_inheritance_chain_from_parent(self): + """ + Builds the inheritance chain of the current node based on the parent node. + """ + if self.parent is None: + return + for xml_elem in self.parent.inheritance: + elem = xml_elem.find(f"nx:{self.type}", namespaces=namespaces) + if elem is not None: + if self._check_compatibility_with(elem): + self.inheritance.append(elem) + def _set_type(self): """ Sets the dtype of the current entity based on the values in the inheritance chain. From e5f331b6d5e5987efc3ae494a52de622c0717642 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Wed, 23 Apr 2025 17:34:09 +0200 Subject: [PATCH 020/118] inherit all fields/attributes from parent --- src/pynxtools/dataconverter/nexus_tree.py | 10 ++++++---- src/pynxtools/dataconverter/validation.py | 1 - tests/dataconverter/test_validation.py | 7 ++----- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/src/pynxtools/dataconverter/nexus_tree.py b/src/pynxtools/dataconverter/nexus_tree.py index c1a722d53..a64a71814 100644 --- a/src/pynxtools/dataconverter/nexus_tree.py +++ b/src/pynxtools/dataconverter/nexus_tree.py @@ -344,6 +344,7 @@ def get_all_direct_children_names( Returns: Set[str]: A set of children names. """ + if depth is not None and (not isinstance(depth, int) or depth < 0): raise ValueError("Depth must be a positive integer or None") @@ -996,10 +997,11 @@ def _construct_inheritance_chain_from_parent(self): if self.parent is None: return for xml_elem in self.parent.inheritance: - elem = xml_elem.find(f"nx:{self.type}", namespaces=namespaces) - if elem is not None: - if self._check_compatibility_with(elem): - self.inheritance.append(elem) + subelems = xml_elem.findall(f"nx:{self.type}", namespaces=namespaces) + if subelems is not None: + for elem in subelems: + if self._check_compatibility_with(elem): + self.inheritance.append(elem) def _set_type(self): """ diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index 756b4f1d6..899a78b3d 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -564,7 +564,6 @@ def handle_unknown_type(node: NexusNode, keys: Mapping[str, Any], prev_path: str pass def add_best_matches_for(key: str, node: NexusNode) -> Optional[NexusNode]: - # PRINT = True if "identifier_1/@type" in key else False for name in key[1:].replace("@", "").split("/"): children_to_check = [ node.search_add_child_for(child) diff --git a/tests/dataconverter/test_validation.py b/tests/dataconverter/test_validation.py index 8101ef09c..27f1db60b 100644 --- a/tests/dataconverter/test_validation.py +++ b/tests/dataconverter/test_validation.py @@ -779,7 +779,7 @@ def listify_template(data_dict: Template): "Given field name 'AXISNAME' conflicts with the non-variadic name 'optional_child (opt)'", "Field /ENTRY[my_entry]/optional_parent/AXISNAME[optional_child] written without documentation.", # TODO: reactivate if sibling inheritance works - # # "The value at /ENTRY[my_entry]/optional_parent/AXISNAME[optional_child] should be " + # "The value at /ENTRY[my_entry]/optional_parent/AXISNAME[optional_child] should be " # "one of the following Python types: (, ), as " # "defined in the NXDL as NX_INT." ], @@ -1087,7 +1087,6 @@ def listify_template(data_dict: Template): [], id="specified-identifier-without-type", ), - # ToDo: reactivate if sibling inheritance works properly pytest.param( alter_dict( alter_dict( @@ -1098,9 +1097,7 @@ def listify_template(data_dict: Template): "/ENTRY[my_entry]/identified_calibration/identifier_1/@type", "ORCID", ), - [ - "Attribute /ENTRY[my_entry]/identified_calibration/identifier_1/@type written without documentation." - ], + [], id="specified-identifier-with-type", ), pytest.param( From f246383598f048bb6e6e93bf940b172ea1788420 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Thu, 24 Apr 2025 09:34:24 +0200 Subject: [PATCH 021/118] allow for keys like AXISNAME[energy] --- src/pynxtools/dataconverter/validation.py | 45 ++++++++++++++++------- tests/dataconverter/test_validation.py | 32 +++++++--------- 2 files changed, 45 insertions(+), 32 deletions(-) diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index 899a78b3d..a88c4f888 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -156,20 +156,24 @@ def best_namefit_of(name: str, nodes: Iterable[NexusNode]) -> Optional[NexusNode if not node.variadic: if instance_name == node.name: if concept_name and concept_name != node.name: - if node.type == "group": - if concept_name != node.nx_class[2:].upper(): + inherited_names = [ + elem.attrib.get("name", elem.attrib["type"][2:].upper()) + for elem in node.inheritance + ] + if concept_name not in inherited_names: + if node.type == "group": + if concept_name != node.nx_class[2:].upper(): + collector.collect_and_log( + concept_name, + ValidationProblem.InvalidConceptForNonVariadic, + node, + ) + else: collector.collect_and_log( concept_name, ValidationProblem.InvalidConceptForNonVariadic, node, ) - return None - else: - collector.collect_and_log( - concept_name, - ValidationProblem.InvalidConceptForNonVariadic, - node, - ) return None return node else: @@ -210,16 +214,31 @@ def validate_dict_against( """ def get_variations_of(node: NexusNode, keys: Mapping[str, Any]) -> List[str]: + variations = [] + if not node.variadic: if f"{'@' if node.type == 'attribute' else ''}{node.name}" in keys: - return [node.name] + variations += [node.name] elif ( hasattr(node, "nx_class") and f"{convert_nexus_to_caps(node.nx_class)}[{node.name}]" in keys ): - return [f"{convert_nexus_to_caps(node.nx_class)}[{node.name}]"] - - variations = [] + variations += [f"{convert_nexus_to_caps(node.nx_class)}[{node.name}]"] + + # Also add all variations like CONCEPT[node.name] for inherited concepts + inherited_names = [] + for elem in node.inheritance: + inherited_name = elem.attrib.get("name") + if not inherited_name: + inherited_name = elem.attrib.get("type")[2:].upper() + if inherited_name.startswith("NX"): + inherited_name = inherited_name[2:].upper() + inherited_names += [inherited_name] + for name in set(inherited_names): + if f"{name}[{node.name}]" in keys: + variations += [f"{name}[{node.name}]"] + + return variations for key in keys: concept_name, instance_name = split_class_and_name_of(key) diff --git a/tests/dataconverter/test_validation.py b/tests/dataconverter/test_validation.py index 27f1db60b..9bf2e7569 100644 --- a/tests/dataconverter/test_validation.py +++ b/tests/dataconverter/test_validation.py @@ -747,22 +747,19 @@ def listify_template(data_dict: Template): ), pytest.param( alter_dict( - remove_from_dict( - TEMPLATE, - "/ENTRY[my_entry]/optional_parent/required_child", - "required", + alter_dict( + remove_from_dict( + TEMPLATE, + "/ENTRY[my_entry]/optional_parent/required_child", + "required", + ), + "/ENTRY[my_entry]/optional_parent/AXISNAME[required_child]", + 1, ), - "/ENTRY[my_entry]/optional_parent/AXISNAME[required_child]", + "/ENTRY[my_entry]/optional_parent/AXISNAME[optional_child]", 1, ), - # TODO: should not raise a warning if sibling inheritance works - [ - "The data entry corresponding to /ENTRY[my_entry]/optional_parent/" - "required_child is required and hasn't been supplied by the reader.", - "Given field name 'AXISNAME' conflicts with the non-variadic name " - "'required_child (req)'", - "Field /ENTRY[my_entry]/optional_parent/AXISNAME[required_child] written without documentation.", - ], + [], id="concept-name-given-for-nonvariadic-field", ), pytest.param( @@ -776,12 +773,9 @@ def listify_template(data_dict: Template): "test value", ), [ - "Given field name 'AXISNAME' conflicts with the non-variadic name 'optional_child (opt)'", - "Field /ENTRY[my_entry]/optional_parent/AXISNAME[optional_child] written without documentation.", - # TODO: reactivate if sibling inheritance works - # "The value at /ENTRY[my_entry]/optional_parent/AXISNAME[optional_child] should be " - # "one of the following Python types: (, ), as " - # "defined in the NXDL as NX_INT." + "The value at /ENTRY[my_entry]/optional_parent/AXISNAME[optional_child] should be " + "one of the following Python types: (, ), as " + "defined in the NXDL as NX_INT." ], id="concept-name-given-for-nonvariadic-field-wrong-type", ), From f37d67a06d40426271a6fafa5240212a149d07ce Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Thu, 24 Apr 2025 09:44:15 +0200 Subject: [PATCH 022/118] mypy fixes --- src/pynxtools/dataconverter/helpers.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/pynxtools/dataconverter/helpers.py b/src/pynxtools/dataconverter/helpers.py index baa366a62..b028b7e83 100644 --- a/src/pynxtools/dataconverter/helpers.py +++ b/src/pynxtools/dataconverter/helpers.py @@ -24,7 +24,7 @@ from datetime import datetime, timezone from enum import Enum from functools import lru_cache -from typing import Any, Callable, List, Optional, Tuple, Union, Sequence +from typing import Any, Callable, List, Optional, Tuple, Union, Sequence, cast import h5py import lxml.etree as ET @@ -153,6 +153,7 @@ def _log(self, path: str, log_type: ValidationProblem, value: Optional[Any], *ar logger.warning(f"The attribute {path} will not be written.") elif log_type == ValidationProblem.InvalidConceptForNonVariadic: + value = cast(Any, value) log_text = f"Given {value.type} name '{path}' conflicts with the non-variadic name '{value}'" if value.type == "group": log_text += f", which should be of type {value.nx_class}." From e38f1148b941f1df5a75fb78984c7747b68b86f3 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Thu, 24 Apr 2025 09:47:52 +0200 Subject: [PATCH 023/118] fix for inherited concepts --- src/pynxtools/dataconverter/validation.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index a88c4f888..91a85621b 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -157,8 +157,13 @@ def best_namefit_of(name: str, nodes: Iterable[NexusNode]) -> Optional[NexusNode if instance_name == node.name: if concept_name and concept_name != node.name: inherited_names = [ - elem.attrib.get("name", elem.attrib["type"][2:].upper()) + name + if (name := elem.attrib.get("name")) is not None + else type_attr[2:].upper() for elem in node.inheritance + if (name := elem.attrib.get("name")) is not None + or (type_attr := elem.attrib.get("type")) + and len(type_attr) > 2 ] if concept_name not in inherited_names: if node.type == "group": From e0a70cdfb3dbcb242e0487a9d4082757e8bfc9d3 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Thu, 24 Apr 2025 10:06:55 +0200 Subject: [PATCH 024/118] use special branches for mpes and raman --- .github/workflows/plugin_test.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/plugin_test.yaml b/.github/workflows/plugin_test.yaml index 5a8654912..a43682dd4 100644 --- a/.github/workflows/plugin_test.yaml +++ b/.github/workflows/plugin_test.yaml @@ -33,10 +33,10 @@ jobs: branch: main tests_to_run: tests/. - plugin: pynxtools-mpes - branch: main + branch: entry-identifier tests_to_run: tests/. - plugin: pynxtools-raman - branch: main + branch: sibling-inheritance tests_to_run: tests/. - plugin: pynxtools-spm branch: main From 6f2d1f826318271eed756918e7b41eed3f55bab2 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Thu, 24 Apr 2025 11:18:43 +0200 Subject: [PATCH 025/118] use special branches for spm --- .github/workflows/plugin_test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/plugin_test.yaml b/.github/workflows/plugin_test.yaml index a43682dd4..f29556adf 100644 --- a/.github/workflows/plugin_test.yaml +++ b/.github/workflows/plugin_test.yaml @@ -39,7 +39,7 @@ jobs: branch: sibling-inheritance tests_to_run: tests/. - plugin: pynxtools-spm - branch: main + branch: field-inheritance tests_to_run: tests/. - plugin: pynxtools-xps branch: main From 4b2f9000b2b713f087d8e4162f036df8aa8fad8e Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Thu, 24 Apr 2025 11:55:17 +0200 Subject: [PATCH 026/118] enable enum checking, but without needing a subset for inherited enum --- src/pynxtools/dataconverter/nexus_tree.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/pynxtools/dataconverter/nexus_tree.py b/src/pynxtools/dataconverter/nexus_tree.py index a64a71814..432ee1371 100644 --- a/src/pynxtools/dataconverter/nexus_tree.py +++ b/src/pynxtools/dataconverter/nexus_tree.py @@ -938,8 +938,12 @@ def convert_to_hashable(item): } if not set(set_items).issubset(set_elem_enum_items): - # Should we really be this strict here? Or can appdefs define additional terms? - return False + if self.name == "definition": + pass + else: + # TODO: should we be this strict here? Or can appdefs define additional terms? + print(self, set_items, set_elem_enum_items) + pass return True def _check_dimensions_fit(xml_elem: ET._Element) -> bool: @@ -980,8 +984,8 @@ def _check_dimensions_fit(xml_elem: ET._Element) -> bool: _check_name_fit, _check_type_fit, _check_units_fit, - # TODO: check if any inheritance is wrongfully assigned without enum and dim checks - # _check_enum_fit, + _check_enum_fit, + # TODO: check if any inheritance is wrongfully assigned without dim checks # _check_dimensions_fit, ] From 5763124bcc3b5b07b624fcb7b7a9775b8bc49aea Mon Sep 17 00:00:00 2001 From: rettigl Date: Thu, 24 Apr 2025 23:35:08 +0200 Subject: [PATCH 027/118] fix attribute checking --- src/pynxtools/dataconverter/validation.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index 91a85621b..d0bd645e2 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -221,8 +221,9 @@ def validate_dict_against( def get_variations_of(node: NexusNode, keys: Mapping[str, Any]) -> List[str]: variations = [] + prefix = f"{'@' if node.type == 'attribute' else ''}" if not node.variadic: - if f"{'@' if node.type == 'attribute' else ''}{node.name}" in keys: + if f"{prefix}{node.name}" in keys: variations += [node.name] elif ( hasattr(node, "nx_class") @@ -240,8 +241,8 @@ def get_variations_of(node: NexusNode, keys: Mapping[str, Any]) -> List[str]: inherited_name = inherited_name[2:].upper() inherited_names += [inherited_name] for name in set(inherited_names): - if f"{name}[{node.name}]" in keys: - variations += [f"{name}[{node.name}]"] + if f"{prefix}{name}[{prefix}{node.name}]" in keys: + variations += [f"{prefix}{name}[{prefix}{node.name}]"] return variations From e2c0677b005914b854d83e8ded58cc25ba30aef0 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Fri, 25 Apr 2025 09:15:08 +0200 Subject: [PATCH 028/118] add a test for optional attribute with nameType=partial and concept name in template --- .github/workflows/plugin_test.yaml | 2 +- src/pynxtools/dataconverter/helpers.py | 1 - src/pynxtools/dataconverter/nexus_tree.py | 1 - tests/dataconverter/test_validation.py | 9 +++++++++ 4 files changed, 10 insertions(+), 3 deletions(-) diff --git a/.github/workflows/plugin_test.yaml b/.github/workflows/plugin_test.yaml index f29556adf..07c5ed3c6 100644 --- a/.github/workflows/plugin_test.yaml +++ b/.github/workflows/plugin_test.yaml @@ -33,7 +33,7 @@ jobs: branch: main tests_to_run: tests/. - plugin: pynxtools-mpes - branch: entry-identifier + branch: main tests_to_run: tests/. - plugin: pynxtools-raman branch: sibling-inheritance diff --git a/src/pynxtools/dataconverter/helpers.py b/src/pynxtools/dataconverter/helpers.py index b028b7e83..d9664a967 100644 --- a/src/pynxtools/dataconverter/helpers.py +++ b/src/pynxtools/dataconverter/helpers.py @@ -151,7 +151,6 @@ def _log(self, path: str, log_type: ValidationProblem, value: Optional[Any], *ar ) elif log_type == ValidationProblem.KeyToBeRemoved: logger.warning(f"The attribute {path} will not be written.") - elif log_type == ValidationProblem.InvalidConceptForNonVariadic: value = cast(Any, value) log_text = f"Given {value.type} name '{path}' conflicts with the non-variadic name '{value}'" diff --git a/src/pynxtools/dataconverter/nexus_tree.py b/src/pynxtools/dataconverter/nexus_tree.py index 432ee1371..5d2a2a9d6 100644 --- a/src/pynxtools/dataconverter/nexus_tree.py +++ b/src/pynxtools/dataconverter/nexus_tree.py @@ -942,7 +942,6 @@ def convert_to_hashable(item): pass else: # TODO: should we be this strict here? Or can appdefs define additional terms? - print(self, set_items, set_elem_enum_items) pass return True diff --git a/tests/dataconverter/test_validation.py b/tests/dataconverter/test_validation.py index 9bf2e7569..67eaa4ef2 100644 --- a/tests/dataconverter/test_validation.py +++ b/tests/dataconverter/test_validation.py @@ -762,6 +762,15 @@ def listify_template(data_dict: Template): [], id="concept-name-given-for-nonvariadic-field", ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/optional_parent/@AXISNAME_indices[@required_child_indices]", + 0, + ), + [], + id="concept-name-given-for-optional-attribute", + ), pytest.param( alter_dict( remove_from_dict( From 842e7780eacad6b5dcb71d9cb8820aae8b2143c6 Mon Sep 17 00:00:00 2001 From: RubelMozumder <32923026+RubelMozumder@users.noreply.github.com> Date: Fri, 25 Apr 2025 10:59:52 +0200 Subject: [PATCH 029/118] Goup tests that use the same resources. (#626) * Goup tests that use the same resources. * Remove test sharing through the worker and the same worker shared the pytest jobs. * Lint * Documentation for parallel tests. * Docs... * lint. * Update docs/how-tos/run-tests-in-parallel.md Co-authored-by: Laurenz Rettig <53396064+rettigl@users.noreply.github.com> * Make group for test_param_file and test_nexus tests. --------- Co-authored-by: Laurenz Rettig <53396064+rettigl@users.noreply.github.com> --- .github/workflows/pytest.yml | 2 +- dev-requirements.txt | 5 +++++ docs/how-tos/run-tests-in-parallel.md | 27 +++++++++++++++++++++++++++ docs/index.md | 1 + pyproject.toml | 1 + tests/dataconverter/test_convert.py | 10 ++++++++-- tests/nexus/test_nexus.py | 2 +- tests/nomad/test_parsing.py | 1 + 8 files changed, 45 insertions(+), 4 deletions(-) create mode 100644 docs/how-tos/run-tests-in-parallel.md diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index befad7abd..f726f4812 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -39,7 +39,7 @@ jobs: uv pip install -e ".[dev]" - name: Test with pytest run: | - coverage run -m pytest -sv --show-capture=no tests + coverage run -m pytest -sv --show-capture=no tests -n auto --dist loadgroup - name: Submit to coveralls continue-on-error: true if: "${{ matrix.python-version == '3.12'}}" diff --git a/dev-requirements.txt b/dev-requirements.txt index 661946cf0..682758aa3 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -36,6 +36,8 @@ cycler==0.12.1 # via matplotlib distlib==0.3.9 # via virtualenv +execnet==2.1.1 + # via pytest-xdist filelock==3.18.0 # via virtualenv flexcache==0.3 @@ -163,10 +165,13 @@ pytest==8.3.5 # pynxtools (pyproject.toml) # pytest-cov # pytest-timeout + # pytest-xdist pytest-cov==6.1.1 # via pynxtools (pyproject.toml) pytest-timeout==2.3.1 # via pynxtools (pyproject.toml) +pytest-xdist==3.6.1 + # via pynxtools (pyproject.toml) python-dateutil==2.9.0.post0 # via # ghp-import diff --git a/docs/how-tos/run-tests-in-parallel.md b/docs/how-tos/run-tests-in-parallel.md new file mode 100644 index 000000000..3bc118f5e --- /dev/null +++ b/docs/how-tos/run-tests-in-parallel.md @@ -0,0 +1,27 @@ +# Running `pynxtools` Tests in Parallel + +The `pytest` framework allows tests to run in sequential and parallel using third-party plugins such as [`pytest-xdist`](https://pytest-xdist.readthedocs.io/en/stable/). In our `pytest` setup for `pynxtools`, we use `pytest-xdist` to execute tests in parallel. To handle shared resources among multiple tests, tests are grouped using the `@pytest.mark.xdist_group` fixture. This prevents classic race conditions by ensuring that tests sharing the same resources are executed sequentially. + +## Running Tests Sequentially + +In a local setup, tests can be run sequentially using the following command: + +```console +$ python -m pytest tests +``` + +This will execute all tests in a sequential manner. For more details, refer to the official documentation: [How to invoke pytest](https://docs.pytest.org/en/stable/how-to/usage.html). + +## Running Tests in Parallel + +The `pytest-xdist` plugin can be used to speed up test execution by distributing tests among available workers. To prevent race conditions, tests that share the same resources are grouped using the `@pytest.mark.xdist_group(name="group_name")` fixture. These grouped tests must be run with the `--dist loadgroup` flag. For example: + +```console +$ python -m pytest tests -n auto --dist loadgroup +``` + +Here: +- The `-n auto` flag tells `pytest` to automatically distribute tests among all available workers. +- The `--dist loadgroup` flag ensures that tests marked with the same @pytest.mark.xdist_group(name="...") are executed serially. + +This setup allows for efficient parallel test execution while maintaining the integrity of tests that depend on shared resources. \ No newline at end of file diff --git a/docs/index.md b/docs/index.md index f99c6ce51..7339cd4f1 100644 --- a/docs/index.md +++ b/docs/index.md @@ -41,6 +41,7 @@ How-to guides provide step-by-step instructions for a wide range of tasks. - [Validation of NeXus files](how-tos/validate-nexus-file.md) - [Creation of NeXus files in python via hard-coding](how-tos/create-nexus-files-by-python.md) - [Using pynxtools test framework for plugins](how-tos/using-pynxtools-test-framework.md) +- [Using pynxtools tests in parallel](how-tos/run-tests-in-parallel.md) __The following How-to guides are still under development:__ diff --git a/pyproject.toml b/pyproject.toml index 77faf6617..9a446fb5d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -58,6 +58,7 @@ dev = [ "pytest", "pytest-timeout", "pytest-cov", + "pytest-xdist", "structlog", "types-pyyaml", "types-pytz", diff --git a/tests/dataconverter/test_convert.py b/tests/dataconverter/test_convert.py index 94506c405..a78ba80b9 100644 --- a/tests/dataconverter/test_convert.py +++ b/tests/dataconverter/test_convert.py @@ -22,9 +22,10 @@ from pathlib import Path import h5py -import pynxtools.dataconverter.convert as dataconverter import pytest from click.testing import CliRunner + +import pynxtools.dataconverter.convert as dataconverter from pynxtools.dataconverter.readers.base.reader import BaseReader @@ -132,6 +133,8 @@ def test_cli(caplog, cli_inputs): assert "Error: Missing option '--nxdl'" in result.output +# Shared resources: xarray_saved_small_calibration.h5 and testdata.json +@pytest.mark.xdist_group(name="shared_resource") def test_links_and_virtual_datasets(tmp_path): """A test for the convert CLI to check whether a Dataset object is created, @@ -198,9 +201,10 @@ def test_links_and_virtual_datasets(tmp_path): restore_xarray_file_from_tmp(tmp_path) +# Shared resources: xarray_saved_small_calibration.h5 and testdata.json +@pytest.mark.xdist_group(name="shared_resource") def test_compression(tmp_path): """A test for the convert CLI to check whether a Dataset object is compressed.""" - dirpath = os.path.join( os.path.dirname(__file__), "../data/dataconverter/readers/example" ) @@ -224,6 +228,8 @@ def test_compression(tmp_path): restore_xarray_file_from_tmp(tmp_path) +# Shared resources: xarray_saved_small_calibration.h5 and testdata.json +@pytest.mark.xdist_group(name="shared_resource") def test_params_file(): """Check if the parameters file is read correctly.""" dirpath = Path(__file__).parent.parent / "data" / "dataconverter" diff --git a/tests/nexus/test_nexus.py b/tests/nexus/test_nexus.py index 4b14b9753..f353a3968 100644 --- a/tests/nexus/test_nexus.py +++ b/tests/nexus/test_nexus.py @@ -19,7 +19,6 @@ import logging import os -import difflib import lxml.etree as ET import numpy as np @@ -160,6 +159,7 @@ def test_get_nexus_classes_units_attributes(): assert "NX_FLOAT" in nexus_attribute_list +@pytest.mark.xdist_group(name="shared_file_201805_WSe2_arpes") def test_nexus(tmp_path): """ The nexus test function diff --git a/tests/nomad/test_parsing.py b/tests/nomad/test_parsing.py index 7f437742e..0cdbcdfb7 100644 --- a/tests/nomad/test_parsing.py +++ b/tests/nomad/test_parsing.py @@ -36,6 +36,7 @@ from pynxtools.nomad.utils import _rename_nx_for_nomad as rename_nx_for_nomad +@pytest.mark.xdist_group(name="shared_file_201805_WSe2_arpes") def test_nexus_example(): archive = EntryArchive() From 50d1ae8af0c5cf8d29f64df85445312d6476de3f Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Fri, 25 Apr 2025 13:02:57 +0200 Subject: [PATCH 030/118] reset plugin branches for test --- .github/workflows/plugin_test.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/plugin_test.yaml b/.github/workflows/plugin_test.yaml index 07c5ed3c6..5a8654912 100644 --- a/.github/workflows/plugin_test.yaml +++ b/.github/workflows/plugin_test.yaml @@ -36,10 +36,10 @@ jobs: branch: main tests_to_run: tests/. - plugin: pynxtools-raman - branch: sibling-inheritance + branch: main tests_to_run: tests/. - plugin: pynxtools-spm - branch: field-inheritance + branch: main tests_to_run: tests/. - plugin: pynxtools-xps branch: main From e6e29d545b70d9688f9f5f24761b992889c7ec25 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Mon, 28 Apr 2025 10:58:43 +0200 Subject: [PATCH 031/118] fix coveralls report --- .github/workflows/pytest.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index f726f4812..d550962f9 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -40,9 +40,13 @@ jobs: - name: Test with pytest run: | coverage run -m pytest -sv --show-capture=no tests -n auto --dist loadgroup + - name: Combine coverage data + run: | + coverage combine + coverage report - name: Submit to coveralls continue-on-error: true - if: "${{ matrix.python-version == '3.12'}}" + if: ${{ matrix.python-version == '3.12' }} env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | From 136e0d3c2587d4cabc26913cae1f07f65d19e050 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Mon, 28 Apr 2025 11:29:31 +0200 Subject: [PATCH 032/118] use pytest --cov --- .github/workflows/pytest.yml | 17 ++++++----------- tests/nexus/test_nexus.py | 1 + 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index d550962f9..d7bfb3b9f 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -32,22 +32,17 @@ jobs: - name: Install nomad if: "${{ matrix.python-version != '3.8' && matrix.python-version != '3.9'}}" run: | - uv pip install nomad-lab[infrastructure] - # @git+https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR.git + uv pip install nomad-lab[infrastructure]@git+https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR.git - name: Install pynx run: | uv pip install -e ".[dev]" - name: Test with pytest run: | - coverage run -m pytest -sv --show-capture=no tests -n auto --dist loadgroup - - name: Combine coverage data - run: | - coverage combine - coverage report + pytest --cov --cov-report xml:cobertura.xml --full-trace --show-capture=no -sv -n auto --dist loadgroup tests/ - name: Submit to coveralls continue-on-error: true if: ${{ matrix.python-version == '3.12' }} - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: | - coveralls --service=github \ No newline at end of file + uses: coverallsapp/github-action@v2 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + file: ./cobertura.xml \ No newline at end of file diff --git a/tests/nexus/test_nexus.py b/tests/nexus/test_nexus.py index f353a3968..78abe97e2 100644 --- a/tests/nexus/test_nexus.py +++ b/tests/nexus/test_nexus.py @@ -23,6 +23,7 @@ import lxml.etree as ET import numpy as np import pytest +import difflib from pynxtools.definitions.dev_tools.utils.nxdl_utils import ( get_inherited_nodes, From 58d285605ccb8828e8fedd724c33cb359d28f86e Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Mon, 28 Apr 2025 14:59:39 +0200 Subject: [PATCH 033/118] use default np printoptions --- scripts/generate_ref_nexus_log.py | 57 +++++++++++++++++++++++++++++++ tests/nexus/test_nexus.py | 14 +++++--- 2 files changed, 66 insertions(+), 5 deletions(-) create mode 100644 scripts/generate_ref_nexus_log.py diff --git a/scripts/generate_ref_nexus_log.py b/scripts/generate_ref_nexus_log.py new file mode 100644 index 000000000..615b538b7 --- /dev/null +++ b/scripts/generate_ref_nexus_log.py @@ -0,0 +1,57 @@ +import os +import logging +import numpy as np + +from pynxtools.nexus.nexus import HandleNexus + +# Set up the logger for the test output +logger = logging.getLogger("pynxtools") + + +def generate_ref_log(): + """ + Function to run the nexus test and generate the Ref_nexus_test.log file. + """ + dirpath = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "../tests/data/nexus" + ) + dirpath = os.path.abspath(dirpath) + example_data = os.path.join( + os.path.dirname(os.path.abspath(__file__)), + "../src/pynxtools/data/201805_WSe2_arpes.nxs", + ) + example_data = os.path.abspath(example_data) + + # Ensure the directory exists for the log file + if not os.path.exists(dirpath): + os.makedirs(dirpath) + + # Set up logger to write directly to the reference log + logger.handlers.clear() + logger.setLevel(logging.DEBUG) + ref_log_path = os.path.join(dirpath, "Ref_nexus_test.log") + handler = logging.FileHandler(ref_log_path, "w", encoding="utf-8") + formatter = logging.Formatter("%(levelname)s - %(message)s") + handler.setLevel(logging.DEBUG) + handler.setFormatter(formatter) + logger.addHandler(handler) + + # Set default print options for numpy (if needed) + default_print_options = { + "edgeitems": 3, + "threshold": 1000, + "precision": 8, + "linewidth": 75, + } + + np.set_printoptions(**default_print_options) + + # Run the actual processing with the nexus_helper + nexus_helper = HandleNexus(logger, example_data, None, None) + nexus_helper.process_nexus_master_file(None) + + print(f"Reference log generated at {ref_log_path}") + + +if __name__ == "__main__": + generate_ref_log() diff --git a/tests/nexus/test_nexus.py b/tests/nexus/test_nexus.py index 78abe97e2..e8408d060 100644 --- a/tests/nexus/test_nexus.py +++ b/tests/nexus/test_nexus.py @@ -178,6 +178,15 @@ def test_nexus(tmp_path): handler.setFormatter(formatter) logger.addHandler(handler) nexus_helper = HandleNexus(logger, example_data, None, None) + + default_print_options = { + "edgeitems": 3, + "threshold": 1000, + "precision": 8, + "linewidth": 75, + } + + np.set_printoptions(**default_print_options) nexus_helper.process_nexus_master_file(None) with open( @@ -204,11 +213,6 @@ def test_nexus(tmp_path): f"Log output does not match reference even though each individual line matches." ) - # import filecmp - # # didn't work with filecmp library - # log = os.path.join(local_dir, '../data/nexus_test_data/nexus_test.log') - # ref = os.path.join(local_dir, '../data/nexus_test_data/Ref_nexus_test.log') - def test_get_node_at_nxdl_path(): """Test to verify if we receive the right XML element for a given NXDL path""" From b872a96716ba2251e6be9e01171d2b281a38fbfe Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Mon, 28 Apr 2025 16:13:22 +0200 Subject: [PATCH 034/118] remove loadgroup for test_nexus --- tests/nexus/test_nexus.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/nexus/test_nexus.py b/tests/nexus/test_nexus.py index e8408d060..98bfb9eb4 100644 --- a/tests/nexus/test_nexus.py +++ b/tests/nexus/test_nexus.py @@ -160,7 +160,6 @@ def test_get_nexus_classes_units_attributes(): assert "NX_FLOAT" in nexus_attribute_list -@pytest.mark.xdist_group(name="shared_file_201805_WSe2_arpes") def test_nexus(tmp_path): """ The nexus test function From 984a90bc50792198f080cea01886b194ee420830 Mon Sep 17 00:00:00 2001 From: rettigl Date: Mon, 28 Apr 2025 16:32:38 +0200 Subject: [PATCH 035/118] remove loadgroup also from nomad tests --- tests/nomad/test_parsing.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/nomad/test_parsing.py b/tests/nomad/test_parsing.py index 0cdbcdfb7..7f437742e 100644 --- a/tests/nomad/test_parsing.py +++ b/tests/nomad/test_parsing.py @@ -36,7 +36,6 @@ from pynxtools.nomad.utils import _rename_nx_for_nomad as rename_nx_for_nomad -@pytest.mark.xdist_group(name="shared_file_201805_WSe2_arpes") def test_nexus_example(): archive = EntryArchive() From f0da96eb81749b04e8fc9c16d462ea067af63793 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Tue, 29 Apr 2025 14:27:47 +0200 Subject: [PATCH 036/118] allow inheritance for fields that are not enumerated from enumerated fields elsewhere --- src/pynxtools/dataconverter/nexus_tree.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pynxtools/dataconverter/nexus_tree.py b/src/pynxtools/dataconverter/nexus_tree.py index 5d2a2a9d6..e60246a88 100644 --- a/src/pynxtools/dataconverter/nexus_tree.py +++ b/src/pynxtools/dataconverter/nexus_tree.py @@ -905,7 +905,7 @@ def _check_enum_fit(xml_elem: ET._Element) -> bool: if elem_enum is not None: if self.items is None: # Case where inherited entity is enumerated, but current node isn't - return False + return True elem_enum_open = elem_enum.attrib.get("open", "false") if elem_enum_open == "true": From 54271c77f7885663bdf9cdf70e0fc76bc099f63f Mon Sep 17 00:00:00 2001 From: GinzburgLev Date: Tue, 29 Apr 2025 17:10:38 +0200 Subject: [PATCH 037/118] add datetime, modify nexus app start_time -> datetime --- src/pynxtools/nomad/entrypoints.py | 4 ++-- src/pynxtools/nomad/schema.py | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/pynxtools/nomad/entrypoints.py b/src/pynxtools/nomad/entrypoints.py index b11c82b74..183e4f091 100644 --- a/src/pynxtools/nomad/entrypoints.py +++ b/src/pynxtools/nomad/entrypoints.py @@ -111,7 +111,7 @@ def load(self): ), Column( title="Start Time", - search_quantity=f"data.ENTRY[*].start_time#{schema}", + search_quantity=f"data.datetime#{schema}", selected=True, ), Column( @@ -261,7 +261,7 @@ def load(self): ), MenuItemHistogram( title="Start Time", - x=f"data.ENTRY.start_time__field#{schema}", + x=f"data.datetime#{schema}", autorange=True, ), MenuItemHistogram( diff --git a/src/pynxtools/nomad/schema.py b/src/pynxtools/nomad/schema.py index d4e1d3ce7..58a4f1d44 100644 --- a/src/pynxtools/nomad/schema.py +++ b/src/pynxtools/nomad/schema.py @@ -238,6 +238,8 @@ def normalize(self, archive, logger): self.steps = [] for entry in app_entry: ref = NexusActivityStep(name=entry.name, reference=entry) + if (self.datetime is None) or (self.datetime > entry.start_time): + self.datetime = entry.start_time self.steps.append(ref) mapping = { ActivityStep: (NexusActivityStep, self.steps), From 386d82fdf401439ba753bc9aa128c241103618f8 Mon Sep 17 00:00:00 2001 From: GinzburgLev Date: Tue, 29 Apr 2025 17:45:56 +0200 Subject: [PATCH 038/118] use start_time__field instead of start_time. Add extra histogram and hidden column for start time by entry --- src/pynxtools/nomad/entrypoints.py | 10 ++++++++++ src/pynxtools/nomad/schema.py | 5 +++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/src/pynxtools/nomad/entrypoints.py b/src/pynxtools/nomad/entrypoints.py index 183e4f091..a31db985e 100644 --- a/src/pynxtools/nomad/entrypoints.py +++ b/src/pynxtools/nomad/entrypoints.py @@ -114,6 +114,11 @@ def load(self): search_quantity=f"data.datetime#{schema}", selected=True, ), + Column( + title="Start Times by Entry", + search_quantity=f"data.ENTRY[*].start_time__field#{schema}", + selected=False, + ), Column( title="Description", search_quantity=f"data.ENTRY[*].experiment_description__field#{schema}", @@ -264,6 +269,11 @@ def load(self): x=f"data.datetime#{schema}", autorange=True, ), + MenuItemHistogram( + title="Start Time by Entry", + x=f"data.ENTRY.start_time__field#{schema}", + autorange=True, + ), MenuItemHistogram( title="Upload Creation Time", x=f"upload_create_time", diff --git a/src/pynxtools/nomad/schema.py b/src/pynxtools/nomad/schema.py index 58a4f1d44..67b0c2f49 100644 --- a/src/pynxtools/nomad/schema.py +++ b/src/pynxtools/nomad/schema.py @@ -238,8 +238,9 @@ def normalize(self, archive, logger): self.steps = [] for entry in app_entry: ref = NexusActivityStep(name=entry.name, reference=entry) - if (self.datetime is None) or (self.datetime > entry.start_time): - self.datetime = entry.start_time + if entry.start_time__field is not None: + if (self.datetime is None) or (self.datetime > entry.start_time__field): + self.datetime = entry.start_time__field self.steps.append(ref) mapping = { ActivityStep: (NexusActivityStep, self.steps), From 6ed797659ffbc4b231bdd21f7c129465e1957298 Mon Sep 17 00:00:00 2001 From: GinzburgLev Date: Wed, 30 Apr 2025 09:06:26 +0200 Subject: [PATCH 039/118] fix linting --- src/pynxtools/nomad/schema.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/pynxtools/nomad/schema.py b/src/pynxtools/nomad/schema.py index 67b0c2f49..9c2bdb754 100644 --- a/src/pynxtools/nomad/schema.py +++ b/src/pynxtools/nomad/schema.py @@ -239,7 +239,9 @@ def normalize(self, archive, logger): for entry in app_entry: ref = NexusActivityStep(name=entry.name, reference=entry) if entry.start_time__field is not None: - if (self.datetime is None) or (self.datetime > entry.start_time__field): + if (self.datetime is None) or ( + self.datetime > entry.start_time__field + ): self.datetime = entry.start_time__field self.steps.append(ref) mapping = { From fcabc91bbe4b5cbf32a6f6a1a2fbbd78efcde92a Mon Sep 17 00:00:00 2001 From: RubelMozumder <32923026+RubelMozumder@users.noreply.github.com> Date: Mon, 5 May 2025 11:22:50 +0200 Subject: [PATCH 040/118] Update nexus version and add new version of pynxtools in citation.cff (#634) * Update nexus version and add new version of pynxtools in citation.cff * pynxtools0.10.6. --- CITATION.cff | 2 +- src/pynxtools/definitions | 2 +- src/pynxtools/nexus-version.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CITATION.cff b/CITATION.cff index 9d32d6876..cc9ca7cca 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -4,7 +4,7 @@ message: If you use this software, please cite it using the metadata from this file. type: software -version: 0.10.4 +version: 0.10.6 authors: - given-names: Sherjeel family-names: Shabih diff --git a/src/pynxtools/definitions b/src/pynxtools/definitions index 06c677c6e..c7ee2df46 160000 --- a/src/pynxtools/definitions +++ b/src/pynxtools/definitions @@ -1 +1 @@ -Subproject commit 06c677c6e6d3496668a2a0905faadae88a7c59d0 +Subproject commit c7ee2df461f3862e2bebe40a1ff3ff988d4b9dca diff --git a/src/pynxtools/nexus-version.txt b/src/pynxtools/nexus-version.txt index db45126b5..a062d6283 100644 --- a/src/pynxtools/nexus-version.txt +++ b/src/pynxtools/nexus-version.txt @@ -1 +1 @@ -v2024.02-1962-g06c677c6 \ No newline at end of file +v2024.02-1963-gc7ee2df4 \ No newline at end of file From de4a9e9ea59058d6def581ea52e4c090181f1a4b Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Wed, 7 May 2025 10:09:37 +0200 Subject: [PATCH 041/118] remove uv from dev dependencies --- dev-requirements.txt | 2 -- pyproject.toml | 1 - src/pynxtools/definitions | 2 +- 3 files changed, 1 insertion(+), 4 deletions(-) diff --git a/dev-requirements.txt b/dev-requirements.txt index 682758aa3..8756972b9 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -229,8 +229,6 @@ urllib3==2.3.0 # via # requests # types-requests -uv==0.6.13 - # via pynxtools (pyproject.toml) virtualenv==20.30.0 # via pre-commit watchdog==6.0.0 diff --git a/pyproject.toml b/pyproject.toml index 9a446fb5d..9e1389c8e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -63,7 +63,6 @@ dev = [ "types-pyyaml", "types-pytz", "types-requests", - "uv", "pre-commit", ] convert = [ diff --git a/src/pynxtools/definitions b/src/pynxtools/definitions index c7ee2df46..06c677c6e 160000 --- a/src/pynxtools/definitions +++ b/src/pynxtools/definitions @@ -1 +1 @@ -Subproject commit c7ee2df461f3862e2bebe40a1ff3ff988d4b9dca +Subproject commit 06c677c6e6d3496668a2a0905faadae88a7c59d0 From 772d1b60380a6dcf9e45ff6c0e679d44cee3488d Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Wed, 7 May 2025 10:11:55 +0200 Subject: [PATCH 042/118] update definitions --- src/pynxtools/definitions | 2 +- src/pynxtools/nexus-version.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pynxtools/definitions b/src/pynxtools/definitions index 06c677c6e..60eb3449d 160000 --- a/src/pynxtools/definitions +++ b/src/pynxtools/definitions @@ -1 +1 @@ -Subproject commit 06c677c6e6d3496668a2a0905faadae88a7c59d0 +Subproject commit 60eb3449d8aa1025a42ee652c8b85b0b684931ec diff --git a/src/pynxtools/nexus-version.txt b/src/pynxtools/nexus-version.txt index a062d6283..995f26d93 100644 --- a/src/pynxtools/nexus-version.txt +++ b/src/pynxtools/nexus-version.txt @@ -1 +1 @@ -v2024.02-1963-gc7ee2df4 \ No newline at end of file +v2024.02-1974-g60eb3449 \ No newline at end of file From 41cc8c0251cad28a914017b1b3e34497f7f05f8d Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Wed, 7 May 2025 14:11:14 +0200 Subject: [PATCH 043/118] temporarily use different plugin branches --- .github/workflows/plugin_test.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/plugin_test.yaml b/.github/workflows/plugin_test.yaml index 5a8654912..729a85d38 100644 --- a/.github/workflows/plugin_test.yaml +++ b/.github/workflows/plugin_test.yaml @@ -24,7 +24,7 @@ jobs: branch: main tests_to_run: tests/. - plugin: pynxtools-ellips - branch: main + branch: update-example-and-ci tests_to_run: tests/. - plugin: pynxtools-em branch: main @@ -36,7 +36,7 @@ jobs: branch: main tests_to_run: tests/. - plugin: pynxtools-raman - branch: main + branch: update-example-and-ci tests_to_run: tests/. - plugin: pynxtools-spm branch: main From e044af12375c9cdf73a958da0afcbc5e812d723a Mon Sep 17 00:00:00 2001 From: rettigl Date: Fri, 9 May 2025 21:58:10 +0200 Subject: [PATCH 044/118] convert int-like values silently into float-like for NX_FLOAT --- src/pynxtools/dataconverter/helpers.py | 26 ++++++++++++++++++++++++++ tests/dataconverter/test_validation.py | 26 ++++++++++++++++++++------ 2 files changed, 46 insertions(+), 6 deletions(-) diff --git a/src/pynxtools/dataconverter/helpers.py b/src/pynxtools/dataconverter/helpers.py index d9664a967..6df90a88e 100644 --- a/src/pynxtools/dataconverter/helpers.py +++ b/src/pynxtools/dataconverter/helpers.py @@ -658,6 +658,26 @@ def convert_str_to_bool_safe(value: str) -> Optional[bool]: raise ValueError(f"Could not interpret string '{value}' as boolean.") +def convert_int_to_float(value): + """ + Converts int-like values to float, including values in arrays, and lists + + Args: + value: The input value, which can be a single value, list, or numpy array. + + Returns: + The input value with all int-like values converted to float. + """ + if isinstance(value, int): + return float(value) + elif isinstance(value, list): + return [convert_int_to_float(v) for v in value] + elif isinstance(value, np.ndarray) and np.issubdtype(value.dtype, np.integer): + return value.astype(float) + else: + return value + + def is_valid_data_field( value: Any, nxdl_type: str, nxdl_enum: list, nxdl_enum_open: bool, path: str ) -> Any: @@ -683,6 +703,12 @@ def is_valid_data_field( collector.collect_and_log( path, ValidationProblem.InvalidType, accepted_types, nxdl_type ) + elif accepted_types[0] is float: + value = convert_int_to_float(value) + if not is_valid_data_type(value, accepted_types): + collector.collect_and_log( + path, ValidationProblem.InvalidType, accepted_types, nxdl_type + ) else: collector.collect_and_log( path, ValidationProblem.InvalidType, accepted_types, nxdl_type diff --git a/tests/dataconverter/test_validation.py b/tests/dataconverter/test_validation.py index 67eaa4ef2..d0a8dce3d 100644 --- a/tests/dataconverter/test_validation.py +++ b/tests/dataconverter/test_validation.py @@ -362,10 +362,19 @@ def listify_template(data_dict: Template): "/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value", 0, ), + [], + id="int-instead-of-float", + ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value", + np.complex128(0), + ), [ "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value should be one of the following Python types: (, ), as defined in the NXDL as NX_FLOAT." ], - id="int-instead-of-float", + id="complex-instead-of-float", ), pytest.param( alter_dict( @@ -534,13 +543,18 @@ def listify_template(data_dict: Template): "/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value", [2], # pylint: disable=E1126 ), - [ - "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value should be " - "one of the following Python types: (, ), as defined in the NXDL " - "as NX_FLOAT." - ], + [], id="list-of-int-instead-of-float", ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value", + np.array([2]), # pylint: disable=E1126 + ), + [], + id="array-of-int-instead-of-float", + ), pytest.param( set_to_none_in_dict( TEMPLATE, From 215779d426b3caffc46c69a7ce8a910822fa714b Mon Sep 17 00:00:00 2001 From: Laurenz Rettig <53396064+rettigl@users.noreply.github.com> Date: Tue, 13 May 2025 08:59:21 +0200 Subject: [PATCH 045/118] Review suggestion Co-authored-by: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> --- src/pynxtools/dataconverter/helpers.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/pynxtools/dataconverter/helpers.py b/src/pynxtools/dataconverter/helpers.py index 6df90a88e..334c6166e 100644 --- a/src/pynxtools/dataconverter/helpers.py +++ b/src/pynxtools/dataconverter/helpers.py @@ -672,6 +672,10 @@ def convert_int_to_float(value): return float(value) elif isinstance(value, list): return [convert_int_to_float(v) for v in value] + elif isinstance(value, tuple): + return tuple(convert_int_to_float(v) for v in value) + elif isinstance(value, set): + return {convert_int_to_float(v) for v in value} elif isinstance(value, np.ndarray) and np.issubdtype(value.dtype, np.integer): return value.astype(float) else: From 84e7437d9e56b5546334f48fe51b65d2a5d842bf Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Thu, 15 May 2025 15:00:06 +0200 Subject: [PATCH 046/118] update definitions --- src/pynxtools/definitions | 2 +- src/pynxtools/nexus-version.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pynxtools/definitions b/src/pynxtools/definitions index 60eb3449d..a14acdde2 160000 --- a/src/pynxtools/definitions +++ b/src/pynxtools/definitions @@ -1 +1 @@ -Subproject commit 60eb3449d8aa1025a42ee652c8b85b0b684931ec +Subproject commit a14acdde2b9880acfc3f967e21ab83f37beadf13 diff --git a/src/pynxtools/nexus-version.txt b/src/pynxtools/nexus-version.txt index 995f26d93..02c850407 100644 --- a/src/pynxtools/nexus-version.txt +++ b/src/pynxtools/nexus-version.txt @@ -1 +1 @@ -v2024.02-1974-g60eb3449 \ No newline at end of file +v2024.02-1983-ga14acdde \ No newline at end of file From 4815b2c3b4ced92350c6516836202113c6f584e7 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Thu, 15 May 2025 15:02:49 +0200 Subject: [PATCH 047/118] raise lower bounds of pynxtools plugins --- pyproject.toml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 9e1389c8e..08bc0ed10 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -73,7 +73,7 @@ apm = [ "pynxtools-apm>=0.2.3", ] ellips = [ - "pynxtools-ellips>=0.0.8", + "pynxtools-ellips>=0.0.9", ] em = [ "pynxtools-em>=0.3.2", @@ -82,16 +82,16 @@ igor = [ "pynxtools-igor>=0.1.1", ] mpes = [ - "pynxtools-mpes>=0.2.2", + "pynxtools-mpes>=0.2.3", ] raman = [ - "pynxtools-raman>=0.0.9", + "pynxtools-raman>=0.0.10", ] spm = [ - "pynxtools-spm>=0.0.8", + "pynxtools-spm>=0.1.1", ] xps = [ - "pynxtools-xps>=0.5.0", + "pynxtools-xps>=0.5.1", ] xrd = [ "pynxtools-xrd>=0.0.3", From 209e87cdffe63d557ade1eaf8e967a237aa5fa91 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Thu, 15 May 2025 16:36:34 +0200 Subject: [PATCH 048/118] reuse main branches from plugins --- .github/workflows/plugin_test.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/plugin_test.yaml b/.github/workflows/plugin_test.yaml index 729a85d38..5a8654912 100644 --- a/.github/workflows/plugin_test.yaml +++ b/.github/workflows/plugin_test.yaml @@ -24,7 +24,7 @@ jobs: branch: main tests_to_run: tests/. - plugin: pynxtools-ellips - branch: update-example-and-ci + branch: main tests_to_run: tests/. - plugin: pynxtools-em branch: main @@ -36,7 +36,7 @@ jobs: branch: main tests_to_run: tests/. - plugin: pynxtools-raman - branch: update-example-and-ci + branch: main tests_to_run: tests/. - plugin: pynxtools-spm branch: main From 8afb8e60de86912bf910abac8ce065487116872c Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Thu, 15 May 2025 18:03:55 +0200 Subject: [PATCH 049/118] validation check for reserved prefixes and suffixes --- src/pynxtools/data/NXtest.nxdl.xml | 9 ++ src/pynxtools/dataconverter/helpers.py | 11 ++ src/pynxtools/dataconverter/validation.py | 118 +++++++++++++++++++++- tests/dataconverter/test_validation.py | 64 +++++++++++- 4 files changed, 197 insertions(+), 5 deletions(-) diff --git a/src/pynxtools/data/NXtest.nxdl.xml b/src/pynxtools/data/NXtest.nxdl.xml index 46035acfd..03e6c9b95 100644 --- a/src/pynxtools/data/NXtest.nxdl.xml +++ b/src/pynxtools/data/NXtest.nxdl.xml @@ -23,6 +23,15 @@ A dummy entry to test optional parent check for a required child. + + A dummy entry to test reserved suffixes. + + + + A dummy entry to test reserved suffixes where the actual field is not given. + Note that this is not allowed by NeXus, but we do this here to test the validation. + + A dummy entry to test optional parent check for an optional child. diff --git a/src/pynxtools/dataconverter/helpers.py b/src/pynxtools/dataconverter/helpers.py index 334c6166e..e4e5495e2 100644 --- a/src/pynxtools/dataconverter/helpers.py +++ b/src/pynxtools/dataconverter/helpers.py @@ -68,6 +68,8 @@ class ValidationProblem(Enum): NXdataAxisMismatch = 20 KeyToBeRemoved = 21 InvalidConceptForNonVariadic = 22 + ReservedSuffixWithoutField = 23 + ReservedPrefixInWrongApplication = 24 class Collector: @@ -157,6 +159,15 @@ def _log(self, path: str, log_type: ValidationProblem, value: Optional[Any], *ar if value.type == "group": log_text += f", which should be of type {value.nx_class}." logger.warning(log_text) + elif log_type == ValidationProblem.ReservedSuffixWithoutField: + logger.warning( + f"Reserved suffix {path} was used, but there is no associated field {value}." + ) + elif log_type == ValidationProblem.ReservedPrefixInWrongApplication: + log_text = f"Reserved prefix {path} was used in key {args[0] if args else ''}, but is not valid here." + if value != "": + log_text += f" It is only valid in the context of {value}." + logger.error(log_text) def collect_and_log( self, diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index d0bd645e2..a5a792d41 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -20,7 +20,18 @@ from collections import defaultdict from functools import reduce from operator import getitem -from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple, Union +from typing import ( + Any, + Iterable, + List, + Mapping, + MutableMapping, + Optional, + Tuple, + Union, + Dict, + Literal, +) import h5py import lxml.etree as ET @@ -515,6 +526,9 @@ def handle_field(node: NexusNode, keys: Mapping[str, Any], prev_path: str): f"{prev_path}/{variant}", ) + _ = check_reserved_suffix(f"{prev_path}/{variant}", mapping) + _ = check_reserved_prefix(f"{prev_path}/{variant}", mapping, "field") + # Check unit category if node.unit is not None: remove_from_not_visited(f"{prev_path}/{variant}/@units") @@ -532,9 +546,6 @@ def handle_field(node: NexusNode, keys: Mapping[str, Any], prev_path: str): prev_path=f"{prev_path}/{variant}", ) - # TODO: Build variadic map for fields and attributes - # Introduce variadic siblings in NexusNode? - def handle_attribute(node: NexusNode, keys: Mapping[str, Any], prev_path: str): full_path = remove_from_not_visited(f"{prev_path}/@{node.name}") variants = get_variations_of(node, keys) @@ -559,6 +570,7 @@ def handle_attribute(node: NexusNode, keys: Mapping[str, Any], prev_path: str): node.open_enum, f"{prev_path}/{variant if variant.startswith('@') else f'@{variant}'}", ) + _ = check_reserved_prefix(f"{prev_path}/{variant}", mapping, "attribute") def handle_choice(node: NexusNode, keys: Mapping[str, Any], prev_path: str): global collector @@ -847,6 +859,92 @@ def startswith_with_variations( # default return (False, 0) + def check_reserved_suffix(key: str, mapping: Dict[str, Any]) -> bool: + """Check if an associated field exists for a key with a reserved suffix.""" + reserved_suffixes = ( + "_end", + "_increment_set", + "_errors", + "_indices", + "_mask", + "_set", + "_weights", + "_scaling_factor", + "_offset", + ) + + if not key.endswith((reserved_suffixes)): + # Ignore this test + return False + + for suffix in reserved_suffixes: + if key.endswith(suffix) and key.rsplit(suffix, 1)[0] not in mapping: + collector.collect_and_log( + key, + ValidationProblem.ReservedSuffixWithoutField, + key.rsplit(suffix, 1)[0], + ) + return False + return True + + def check_reserved_prefix( + key: str, + mapping: Dict[str, Any], + nx_type: Literal["group", "field", "attribute"], + ) -> bool: + """Check if a reserved prefix was used in the correct context.""" + reserved_prefixes = { + "attribute": { + "@BLUESKY_": None, # do not use anywhere + "@DECTRIS_": "NXmx", + "@IDF_": None, # do not use anywhere + "@NDAttr": None, + "@NX_": "all", + "@PDBX_": None, # do not use anywhere + "@SAS_": "NXcanSAS", + "@SILX_": None, # do not use anywhere + }, + "field": { + "DECTRIS_": "NXmx", + }, + } + + prefixes = reserved_prefixes[nx_type] + + if not key.rsplit("/", 1)[-1].startswith(tuple(prefixes.keys())): + return True + + for prefix, context in prefixes.items(): + if not key.rsplit("/", 1)[-1].startswith(prefix): + continue + if not context: + # This prefix should not be used by pynxtools. + collector.collect_and_log( + prefix, + ValidationProblem.ReservedPrefixInWrongApplication, + None, + key, + ) + return False + elif context == "all": + # We can freely use this prefix everywhere. + continue + else: + # Check that the prefix is used in the correct application definition. + definition_key = ( + f"{re.match(r'(/ENTRY\[[^]]+])', key).group(1)}/definition" + ) + if mapping.get(definition_key) != context: + collector.collect_and_log( + prefix, + ValidationProblem.ReservedPrefixInWrongApplication, + context, + key, + ) + return False + + return True + missing_type_err = { "field": ValidationProblem.MissingRequiredField, "group": ValidationProblem.MissingRequiredGroup, @@ -937,6 +1035,18 @@ def startswith_with_variations( keys_to_remove.append(not_visited_key) continue + if "@" not in not_visited_key.rsplit("/", 1)[-1]: + if check_reserved_suffix(not_visited_key, mapping) or check_reserved_prefix( + not_visited_key, mapping, "field" + ): + continue + else: + associated_field = not_visited_key.rsplit("/", 1)[-2] + if check_reserved_prefix( + not_visited_key, mapping, "attribute" + ) and check_reserved_prefix(associated_field, mapping, "field"): + continue + if is_documented(not_visited_key, tree): continue diff --git a/tests/dataconverter/test_validation.py b/tests/dataconverter/test_validation.py index d0a8dce3d..f359835a3 100644 --- a/tests/dataconverter/test_validation.py +++ b/tests/dataconverter/test_validation.py @@ -1216,13 +1216,75 @@ def listify_template(data_dict: Template): [], id="nonvariadic-nxcollection", ), + pytest.param( + alter_dict( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/OPTIONAL_group[my_group]/required_field_set", + 1, + ), + "/ENTRY[my_entry]/OPTIONAL_group[my_group]/some_field_set", + 1, + ), + [ + "Reserved suffix /ENTRY[my_entry]/OPTIONAL_group[my_group]/some_field_set was used, " + "but there is no associated field /ENTRY[my_entry]/OPTIONAL_group[my_group]/some_field." + ], + id="reserved-suffix-from-appdef", + ), + pytest.param( + alter_dict( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/OPTIONAL_group[my_group]/required_field_weights", + 0.1, + ), + "/ENTRY[my_entry]/OPTIONAL_group[my_group]/some_random_field_weights", + 0.1, + ), + [ + "Reserved suffix /ENTRY[my_entry]/OPTIONAL_group[my_group]/some_random_field_weights was used, but there is no associated field /ENTRY[my_entry]/OPTIONAL_group[my_group]/some_random_field.", + "Field /ENTRY[my_entry]/OPTIONAL_group[my_group]/some_random_field_weights written without documentation.", + ], + id="reserved-suffix-from-base-class", + ), + pytest.param( + alter_dict( + alter_dict( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/OPTIONAL_group[my_group]/@BLUESKY_attr", + "some text", + ), + "/ENTRY[my_entry]/OPTIONAL_group[my_group]/@DECTRIS_attr", + "some text", + ), + "/ENTRY[my_entry]/OPTIONAL_group[my_group]/DECTRIS_field", + "some text", + ), + [ + "Reserved prefix @BLUESKY_ was used in key /ENTRY[my_entry]/OPTIONAL_group[my_group]/@BLUESKY_attr, but is not valid here.", + "Attribute /ENTRY[my_entry]/OPTIONAL_group[my_group]/@BLUESKY_attr written without documentation.", + "Reserved prefix @DECTRIS_ was used in key /ENTRY[my_entry]/OPTIONAL_group[my_group]/@DECTRIS_attr, but is not valid here. " + "It is only valid in the context of NXmx.", + "Attribute /ENTRY[my_entry]/OPTIONAL_group[my_group]/@DECTRIS_attr written without documentation.", + "Reserved prefix DECTRIS_ was used in key /ENTRY[my_entry]/OPTIONAL_group[my_group]/DECTRIS_field, but is not valid here. " + "It is only valid in the context of NXmx.", + "Field /ENTRY[my_entry]/OPTIONAL_group[my_group]/DECTRIS_field written without documentation.", + ], + id="reserved-prefix", + ), ], ) def test_validate_data_dict(caplog, data_dict, error_messages, request): """Unit test for the data validation routine.""" + # validate_dict_against("NXtest", data_dict)[0] def format_error_message(msg: str) -> str: - return msg[msg.rfind("G: ") + 3 :].rstrip("\n") + for prefix in ("ERROR:", "WARNING:"): + if msg.startswith(prefix): + return msg[len(prefix) :].lstrip() + return msg if not error_messages: with caplog.at_level(logging.WARNING): From e17bc2ebc1ed0eaece93aa7363e67b3a5536bc71 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Thu, 15 May 2025 18:16:37 +0200 Subject: [PATCH 050/118] mypy fixes --- src/pynxtools/dataconverter/validation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index a5a792d41..478e2a559 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -859,7 +859,7 @@ def startswith_with_variations( # default return (False, 0) - def check_reserved_suffix(key: str, mapping: Dict[str, Any]) -> bool: + def check_reserved_suffix(key: str, mapping: MutableMapping[str, Any]) -> bool: """Check if an associated field exists for a key with a reserved suffix.""" reserved_suffixes = ( "_end", @@ -889,7 +889,7 @@ def check_reserved_suffix(key: str, mapping: Dict[str, Any]) -> bool: def check_reserved_prefix( key: str, - mapping: Dict[str, Any], + mapping: MutableMapping[str, Any], nx_type: Literal["group", "field", "attribute"], ) -> bool: """Check if a reserved prefix was used in the correct context.""" From 2d38dbb59615b259cdd5d3b1d56b90b93ac606bc Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Thu, 15 May 2025 18:22:29 +0200 Subject: [PATCH 051/118] safer matching of regular expression --- src/pynxtools/dataconverter/validation.py | 25 ++++++++++++----------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index 478e2a559..a324b76ec 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -912,7 +912,8 @@ def check_reserved_prefix( prefixes = reserved_prefixes[nx_type] if not key.rsplit("/", 1)[-1].startswith(tuple(prefixes.keys())): - return True + # Ignore this test + return False for prefix, context in prefixes.items(): if not key.rsplit("/", 1)[-1].startswith(prefix): @@ -931,17 +932,17 @@ def check_reserved_prefix( continue else: # Check that the prefix is used in the correct application definition. - definition_key = ( - f"{re.match(r'(/ENTRY\[[^]]+])', key).group(1)}/definition" - ) - if mapping.get(definition_key) != context: - collector.collect_and_log( - prefix, - ValidationProblem.ReservedPrefixInWrongApplication, - context, - key, - ) - return False + match = re.match(r"(/ENTRY\[[^]]+])", key) + if match: + definition_key = f"{match.group(1)}/definition" + if mapping.get(definition_key) != context: + collector.collect_and_log( + prefix, + ValidationProblem.ReservedPrefixInWrongApplication, + context, + key, + ) + return False return True From b5cbdc59bffbe62387b1e71a166314bddf3e641c Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Mon, 19 May 2025 07:46:15 +0200 Subject: [PATCH 052/118] clean up, code review --- src/pynxtools/dataconverter/helpers.py | 4 +- src/pynxtools/dataconverter/validation.py | 129 ++++++++++++++-------- tests/dataconverter/test_validation.py | 15 ++- 3 files changed, 96 insertions(+), 52 deletions(-) diff --git a/src/pynxtools/dataconverter/helpers.py b/src/pynxtools/dataconverter/helpers.py index e4e5495e2..1f5220c42 100644 --- a/src/pynxtools/dataconverter/helpers.py +++ b/src/pynxtools/dataconverter/helpers.py @@ -69,7 +69,7 @@ class ValidationProblem(Enum): KeyToBeRemoved = 21 InvalidConceptForNonVariadic = 22 ReservedSuffixWithoutField = 23 - ReservedPrefixInWrongApplication = 24 + ReservedPrefixInWrongContext = 24 class Collector: @@ -163,7 +163,7 @@ def _log(self, path: str, log_type: ValidationProblem, value: Optional[Any], *ar logger.warning( f"Reserved suffix {path} was used, but there is no associated field {value}." ) - elif log_type == ValidationProblem.ReservedPrefixInWrongApplication: + elif log_type == ValidationProblem.ReservedPrefixInWrongContext: log_text = f"Reserved prefix {path} was used in key {args[0] if args else ''}, but is not valid here." if value != "": log_text += f" It is only valid in the context of {value}." diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index a324b76ec..8e779f2fd 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -860,7 +860,24 @@ def startswith_with_variations( return (False, 0) def check_reserved_suffix(key: str, mapping: MutableMapping[str, Any]) -> bool: - """Check if an associated field exists for a key with a reserved suffix.""" + """ + Check if an associated field exists for a key with a reserved suffix. + + Reserved suffixes imply the presence of an associated base field (e.g., + "temperature_errors" implies "temperature" must exist in the mapping). + + Args: + key (str): + The full key path (e.g., "/ENTRY[entry1]/sample/temperature_errors"). + mapping (MutableMapping[str, Any]): + The mapping containing the data to validate. + This should be a dict of `/` separated paths. + + Returns: + bool: + True if the suffix usage is valid or not applicable. + False if the suffix is used without the expected associated base field. + """ reserved_suffixes = ( "_end", "_increment_set", @@ -873,18 +890,18 @@ def check_reserved_suffix(key: str, mapping: MutableMapping[str, Any]) -> bool: "_offset", ) - if not key.endswith((reserved_suffixes)): - # Ignore this test - return False - for suffix in reserved_suffixes: - if key.endswith(suffix) and key.rsplit(suffix, 1)[0] not in mapping: - collector.collect_and_log( - key, - ValidationProblem.ReservedSuffixWithoutField, - key.rsplit(suffix, 1)[0], - ) - return False + if key.endswith(suffix): + name = key.rsplit(suffix, 1)[0] + if name not in mapping: + collector.collect_and_log( + key, + ValidationProblem.ReservedSuffixWithoutField, + name, + ) + return False + break # We found the suffix and it passed + return True def check_reserved_prefix( @@ -892,7 +909,24 @@ def check_reserved_prefix( mapping: MutableMapping[str, Any], nx_type: Literal["group", "field", "attribute"], ) -> bool: - """Check if a reserved prefix was used in the correct context.""" + """ + Check if a reserved prefix was used in the correct context. + + Args: + key (str): The full key path (e.g., "/ENTRY[entry1]/instrument/detector/@DECTRIS_config"). + mapping (MutableMapping[str, Any]): + The mapping containing the data to validate. + This should be a dict of `/` separated paths. + Attributes are denoted with `@` in front of the last element. + nx_type (Literal["group", "field", "attribute"]): + The NeXus type the key represents. Determines which reserved prefixes are relevant. + + + Returns: + bool: + True if the prefix usage is valid or not applicable. + False if an invalid or misapplied reserved prefix is detected. + """ reserved_prefixes = { "attribute": { "@BLUESKY_": None, # do not use anywhere @@ -909,40 +943,47 @@ def check_reserved_prefix( }, } - prefixes = reserved_prefixes[nx_type] + prefixes = reserved_prefixes.get(nx_type) + if not prefixes: + return True - if not key.rsplit("/", 1)[-1].startswith(tuple(prefixes.keys())): - # Ignore this test - return False + name = key.rsplit("/", 1)[-1] - for prefix, context in prefixes.items(): - if not key.rsplit("/", 1)[-1].startswith(prefix): + if not name.startswith(tuple(prefixes)): + return False # Irrelevant prefix, no check needed + + for prefix, allowed_context in prefixes.items(): + if not name.startswith(prefix): continue - if not context: - # This prefix should not be used by pynxtools. + + if allowed_context is None: + # This prefix is disallowed entirely collector.collect_and_log( prefix, - ValidationProblem.ReservedPrefixInWrongApplication, + ValidationProblem.ReservedPrefixInWrongContext, None, key, ) return False - elif context == "all": + if allowed_context == "all": # We can freely use this prefix everywhere. - continue - else: - # Check that the prefix is used in the correct application definition. - match = re.match(r"(/ENTRY\[[^]]+])", key) - if match: - definition_key = f"{match.group(1)}/definition" - if mapping.get(definition_key) != context: - collector.collect_and_log( - prefix, - ValidationProblem.ReservedPrefixInWrongApplication, - context, - key, - ) - return False + return True + + # Check that the prefix is used in the correct context. + match = re.match(r"(/ENTRY\[[^]]+])", key) + definition_value = None + if match: + definition_key = f"{match.group(1)}/definition" + definition_value = mapping.get(definition_key) + + if definition_value != allowed_context: + collector.collect_and_log( + prefix, + ValidationProblem.ReservedPrefixInWrongContext, + allowed_context, + key, + ) + return False return True @@ -1037,16 +1078,14 @@ def check_reserved_prefix( continue if "@" not in not_visited_key.rsplit("/", 1)[-1]: - if check_reserved_suffix(not_visited_key, mapping) or check_reserved_prefix( - not_visited_key, mapping, "field" - ): - continue + check_reserved_suffix(not_visited_key, mapping) + check_reserved_prefix(not_visited_key, mapping, "field") + else: associated_field = not_visited_key.rsplit("/", 1)[-2] - if check_reserved_prefix( - not_visited_key, mapping, "attribute" - ) and check_reserved_prefix(associated_field, mapping, "field"): - continue + # Check the prefix both for this attribute and the field it belongs to + check_reserved_prefix(not_visited_key, mapping, "attribute") + check_reserved_prefix(associated_field, mapping, "field") if is_documented(not_visited_key, tree): continue diff --git a/tests/dataconverter/test_validation.py b/tests/dataconverter/test_validation.py index f359835a3..200c76cad 100644 --- a/tests/dataconverter/test_validation.py +++ b/tests/dataconverter/test_validation.py @@ -1243,6 +1243,7 @@ def listify_template(data_dict: Template): 0.1, ), [ + "Field /ENTRY[my_entry]/OPTIONAL_group[my_group]/required_field_weights written without documentation.", "Reserved suffix /ENTRY[my_entry]/OPTIONAL_group[my_group]/some_random_field_weights was used, but there is no associated field /ENTRY[my_entry]/OPTIONAL_group[my_group]/some_random_field.", "Field /ENTRY[my_entry]/OPTIONAL_group[my_group]/some_random_field_weights written without documentation.", ], @@ -1252,14 +1253,18 @@ def listify_template(data_dict: Template): alter_dict( alter_dict( alter_dict( - TEMPLATE, - "/ENTRY[my_entry]/OPTIONAL_group[my_group]/@BLUESKY_attr", + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/OPTIONAL_group[my_group]/@BLUESKY_attr", + "some text", + ), + "/ENTRY[my_entry]/OPTIONAL_group[my_group]/@DECTRIS_attr", "some text", ), - "/ENTRY[my_entry]/OPTIONAL_group[my_group]/@DECTRIS_attr", + "/ENTRY[my_entry]/OPTIONAL_group[my_group]/DECTRIS_field", "some text", ), - "/ENTRY[my_entry]/OPTIONAL_group[my_group]/DECTRIS_field", + "/ENTRY[my_entry]/OPTIONAL_group[my_group]/@NX_attr", "some text", ), [ @@ -1271,6 +1276,7 @@ def listify_template(data_dict: Template): "Reserved prefix DECTRIS_ was used in key /ENTRY[my_entry]/OPTIONAL_group[my_group]/DECTRIS_field, but is not valid here. " "It is only valid in the context of NXmx.", "Field /ENTRY[my_entry]/OPTIONAL_group[my_group]/DECTRIS_field written without documentation.", + "Attribute /ENTRY[my_entry]/OPTIONAL_group[my_group]/@NX_attr written without documentation.", ], id="reserved-prefix", ), @@ -1278,7 +1284,6 @@ def listify_template(data_dict: Template): ) def test_validate_data_dict(caplog, data_dict, error_messages, request): """Unit test for the data validation routine.""" - # validate_dict_against("NXtest", data_dict)[0] def format_error_message(msg: str) -> str: for prefix in ("ERROR:", "WARNING:"): From 1d17dfd4a3d5e52c3c71bbbe8b4bd204a120882e Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Mon, 19 May 2025 08:40:17 +0200 Subject: [PATCH 053/118] allow suffixes also for fields with concept names --- src/pynxtools/dataconverter/helpers.py | 2 +- src/pynxtools/dataconverter/validation.py | 37 ++++++++++++++++++++--- tests/dataconverter/test_validation.py | 12 +++----- 3 files changed, 39 insertions(+), 12 deletions(-) diff --git a/src/pynxtools/dataconverter/helpers.py b/src/pynxtools/dataconverter/helpers.py index 1f5220c42..b4cfc2cdc 100644 --- a/src/pynxtools/dataconverter/helpers.py +++ b/src/pynxtools/dataconverter/helpers.py @@ -161,7 +161,7 @@ def _log(self, path: str, log_type: ValidationProblem, value: Optional[Any], *ar logger.warning(log_text) elif log_type == ValidationProblem.ReservedSuffixWithoutField: logger.warning( - f"Reserved suffix {path} was used, but there is no associated field {value}." + f"Reserved suffix '{args[0]}' was used in {path}, but there is no associated field {value}." ) elif log_type == ValidationProblem.ReservedPrefixInWrongContext: log_text = f"Reserved prefix {path} was used in key {args[0] if args else ''}, but is not valid here." diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index 8e779f2fd..c12faf738 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -193,6 +193,9 @@ def best_namefit_of(name: str, nodes: Iterable[NexusNode]) -> Optional[NexusNode return None return node else: + PRINT_HERE = True if "weights" in name else False + # if PRINT_HERE: + # print(node) if concept_name and concept_name == node.name: if instance_name == node.name: return node @@ -601,12 +604,18 @@ def handle_unknown_type(node: NexusNode, keys: Mapping[str, Any], prev_path: str pass def add_best_matches_for(key: str, node: NexusNode) -> Optional[NexusNode]: + PRINT = False # True if "weights" in key else False for name in key[1:].replace("@", "").split("/"): children_to_check = [ node.search_add_child_for(child) for child in node.get_all_direct_children_names() ] + if PRINT: + print("<<<<<<<", name, node) node = best_namefit_of(name, children_to_check) + if PRINT: + print("\t", children_to_check) + print(">>>>>>", node) if node is None: return None @@ -890,14 +899,34 @@ def check_reserved_suffix(key: str, mapping: MutableMapping[str, Any]) -> bool: "_offset", ) + parent_path, name = key.rsplit("/", 1) + concept_name, instance_name = split_class_and_name_of(name) + for suffix in reserved_suffixes: - if key.endswith(suffix): - name = key.rsplit(suffix, 1)[0] - if name not in mapping: + if instance_name.endswith(suffix): + associated_field_name = instance_name.rsplit(suffix, 1)[0] + + # TODO: This strictly limits FIELDNAME_weights[my_field_weights] to match with + # either my_field_weights or FIELDNAME[my_field], but AXISNAME[my_field] will + # not match. + + possible_fields = [associated_field_name] + + if concept_name: + possible_fields += ( + f"{concept_name.rsplit(suffix, 1)[0]}[{associated_field_name}]" + ) + + possible_field_keys = [ + f"{parent_path}/{field}" for field in possible_fields + ] + + if not any(k in mapping for k in possible_field_keys): collector.collect_and_log( key, ValidationProblem.ReservedSuffixWithoutField, - name, + associated_field_name, + suffix, ) return False break # We found the suffix and it passed diff --git a/tests/dataconverter/test_validation.py b/tests/dataconverter/test_validation.py index 200c76cad..e7f2faa53 100644 --- a/tests/dataconverter/test_validation.py +++ b/tests/dataconverter/test_validation.py @@ -1227,8 +1227,8 @@ def listify_template(data_dict: Template): 1, ), [ - "Reserved suffix /ENTRY[my_entry]/OPTIONAL_group[my_group]/some_field_set was used, " - "but there is no associated field /ENTRY[my_entry]/OPTIONAL_group[my_group]/some_field." + "Reserved suffix '_set' was used in /ENTRY[my_entry]/OPTIONAL_group[my_group]/some_field_set, " + "but there is no associated field some_field." ], id="reserved-suffix-from-appdef", ), @@ -1236,16 +1236,14 @@ def listify_template(data_dict: Template): alter_dict( alter_dict( TEMPLATE, - "/ENTRY[my_entry]/OPTIONAL_group[my_group]/required_field_weights", + "/ENTRY[my_entry]/OPTIONAL_group[my_group]/FIELDNAME_weights[required_field_weights]", 0.1, ), - "/ENTRY[my_entry]/OPTIONAL_group[my_group]/some_random_field_weights", + "/ENTRY[my_entry]/OPTIONAL_group[my_group]/FIELDNAME_weights[some_random_field_weights]", 0.1, ), [ - "Field /ENTRY[my_entry]/OPTIONAL_group[my_group]/required_field_weights written without documentation.", - "Reserved suffix /ENTRY[my_entry]/OPTIONAL_group[my_group]/some_random_field_weights was used, but there is no associated field /ENTRY[my_entry]/OPTIONAL_group[my_group]/some_random_field.", - "Field /ENTRY[my_entry]/OPTIONAL_group[my_group]/some_random_field_weights written without documentation.", + "Reserved suffix '_weights' was used in /ENTRY[my_entry]/OPTIONAL_group[my_group]/FIELDNAME_weights[some_random_field_weights], but there is no associated field some_random_field.", ], id="reserved-suffix-from-base-class", ), From 676f37a20beec4157a26fce3762b5083dbf04258 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Mon, 19 May 2025 08:53:01 +0200 Subject: [PATCH 054/118] generalize suffix testing --- src/pynxtools/dataconverter/validation.py | 26 ++++++++--------------- 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index c12faf738..1b6b56d0e 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -904,28 +904,20 @@ def check_reserved_suffix(key: str, mapping: MutableMapping[str, Any]) -> bool: for suffix in reserved_suffixes: if instance_name.endswith(suffix): - associated_field_name = instance_name.rsplit(suffix, 1)[0] + associated_field = instance_name.rsplit(suffix, 1)[0] - # TODO: This strictly limits FIELDNAME_weights[my_field_weights] to match with - # either my_field_weights or FIELDNAME[my_field], but AXISNAME[my_field] will - # not match. - - possible_fields = [associated_field_name] - - if concept_name: - possible_fields += ( - f"{concept_name.rsplit(suffix, 1)[0]}[{associated_field_name}]" + if not any( + k.startswith(parent_path + "/") + and ( + k.endswith(associated_field) + or k.endswith(f"[{associated_field}]") ) - - possible_field_keys = [ - f"{parent_path}/{field}" for field in possible_fields - ] - - if not any(k in mapping for k in possible_field_keys): + for k in mapping + ): collector.collect_and_log( key, ValidationProblem.ReservedSuffixWithoutField, - associated_field_name, + associated_field, suffix, ) return False From 1e964af13bd5cfb37e7960d1e00100f334683ff5 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Mon, 19 May 2025 13:41:15 +0200 Subject: [PATCH 055/118] remove debug code --- src/pynxtools/dataconverter/validation.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index 1b6b56d0e..1fdefd3c0 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -193,9 +193,6 @@ def best_namefit_of(name: str, nodes: Iterable[NexusNode]) -> Optional[NexusNode return None return node else: - PRINT_HERE = True if "weights" in name else False - # if PRINT_HERE: - # print(node) if concept_name and concept_name == node.name: if instance_name == node.name: return node @@ -604,18 +601,12 @@ def handle_unknown_type(node: NexusNode, keys: Mapping[str, Any], prev_path: str pass def add_best_matches_for(key: str, node: NexusNode) -> Optional[NexusNode]: - PRINT = False # True if "weights" in key else False for name in key[1:].replace("@", "").split("/"): children_to_check = [ node.search_add_child_for(child) for child in node.get_all_direct_children_names() ] - if PRINT: - print("<<<<<<<", name, node) node = best_namefit_of(name, children_to_check) - if PRINT: - print("\t", children_to_check) - print(">>>>>>", node) if node is None: return None From 4d14410fddc9b4df78a5b5621d5312e5e5359f78 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Mon, 19 May 2025 13:43:44 +0200 Subject: [PATCH 056/118] add a comment about value conversion --- src/pynxtools/dataconverter/helpers.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/pynxtools/dataconverter/helpers.py b/src/pynxtools/dataconverter/helpers.py index b4cfc2cdc..605847cd0 100644 --- a/src/pynxtools/dataconverter/helpers.py +++ b/src/pynxtools/dataconverter/helpers.py @@ -165,6 +165,7 @@ def _log(self, path: str, log_type: ValidationProblem, value: Optional[Any], *ar ) elif log_type == ValidationProblem.ReservedPrefixInWrongContext: log_text = f"Reserved prefix {path} was used in key {args[0] if args else ''}, but is not valid here." + # Note that value=None" gets converted to "" if value != "": log_text += f" It is only valid in the context of {value}." logger.error(log_text) From 00878f5d78100b428297d32469bef27491e69229 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Wed, 14 May 2025 17:38:43 +0200 Subject: [PATCH 057/118] add a separate error for names with wrong type and remove such keys --- src/pynxtools/data/NXtest.nxdl.xml | 4 +- src/pynxtools/dataconverter/helpers.py | 7 +++ src/pynxtools/dataconverter/validation.py | 60 ++++++++++++++++++++--- tests/dataconverter/test_validation.py | 28 ++++++++++- 4 files changed, 88 insertions(+), 11 deletions(-) diff --git a/src/pynxtools/data/NXtest.nxdl.xml b/src/pynxtools/data/NXtest.nxdl.xml index 03e6c9b95..880a54e66 100644 --- a/src/pynxtools/data/NXtest.nxdl.xml +++ b/src/pynxtools/data/NXtest.nxdl.xml @@ -45,8 +45,8 @@ A group with a name and nameType="specified". - - + + diff --git a/src/pynxtools/dataconverter/helpers.py b/src/pynxtools/dataconverter/helpers.py index 605847cd0..32dfd7e67 100644 --- a/src/pynxtools/dataconverter/helpers.py +++ b/src/pynxtools/dataconverter/helpers.py @@ -70,6 +70,7 @@ class ValidationProblem(Enum): InvalidConceptForNonVariadic = 22 ReservedSuffixWithoutField = 23 ReservedPrefixInWrongContext = 24 + InvalidNexusTypeForNamedConcept = 25 class Collector: @@ -169,6 +170,12 @@ def _log(self, path: str, log_type: ValidationProblem, value: Optional[Any], *ar if value != "": log_text += f" It is only valid in the context of {value}." logger.error(log_text) + elif log_type == ValidationProblem.InvalidNexusTypeForNamedConcept: + value = cast(Any, value) + logger.error( + f"The type ('{args[0] if args else ''}') of the given concept '{path}' " + f"conflicts with another existing concept of the same name, which is of type '{value.type}')." + ) def collect_and_log( self, diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index 1fdefd3c0..7759badb8 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -145,7 +145,12 @@ def split_class_and_name_of(name: str) -> Tuple[Optional[str], str]: ), f"{name_match.group(2)}{'' if prefix is None else prefix}" -def best_namefit_of(name: str, nodes: Iterable[NexusNode]) -> Optional[NexusNode]: +def best_namefit_of( + name: str, + nodes: Iterable[NexusNode], + expected_types: List[str], + check_types: bool = False, +) -> Optional[NexusNode]: """ Get the best namefit of `name` in `keys`. @@ -166,6 +171,19 @@ def best_namefit_of(name: str, nodes: Iterable[NexusNode]) -> Optional[NexusNode for node in nodes: if not node.variadic: if instance_name == node.name: + if node.type not in expected_types and check_types: + expected_types_str = " or ".join(expected_types) + collector.collect_and_log( + name, + ValidationProblem.InvalidNexusTypeForNamedConcept, + node, + expected_types_str, + ) + raise TypeError( + f"The type ('{expected_types_str if expected_types else ''}') " + f"of the given concept {name} conflicts with another existing concept {node.name} (which is of " + f"type '{node.type}')." + ) if concept_name and concept_name != node.name: inherited_names = [ name @@ -176,6 +194,7 @@ def best_namefit_of(name: str, nodes: Iterable[NexusNode]) -> Optional[NexusNode or (type_attr := elem.attrib.get("type")) and len(type_attr) > 2 ] + if concept_name not in inherited_names: if node.type == "group": if concept_name != node.nx_class[2:].upper(): @@ -600,25 +619,52 @@ def handle_unknown_type(node: NexusNode, keys: Mapping[str, Any], prev_path: str # TODO: Raise error or log the issue? pass - def add_best_matches_for(key: str, node: NexusNode) -> Optional[NexusNode]: - for name in key[1:].replace("@", "").split("/"): + def add_best_matches_for( + key: str, node: NexusNode, check_types: bool = False + ) -> Optional[NexusNode]: + key_components = key[1:].split("/") + is_last_attr = key_components[-1].startswith("@") + if is_last_attr: + key_components[-1] = key_components[-1].replace("@", "") + + key_len = len(key_components) + + expected_types = None + for ind, name in enumerate(key_components): + index = ind + 1 children_to_check = [ node.search_add_child_for(child) for child in node.get_all_direct_children_names() ] - node = best_namefit_of(name, children_to_check) + if index < key_len - 1: + expected_types = ["group"] + elif index == key_len - 1: + expected_types = ["group"] if not is_last_attr else ["group", "field"] + elif index == key_len: + expected_types = ["attribute"] if is_last_attr else ["field"] + node = best_namefit_of(name, children_to_check, expected_types, check_types) if node is None: return None return node - def is_documented(key: str, tree: NexusNode) -> bool: + def is_documented(key: str, tree: NexusNode) -> Tuple[bool, bool]: if mapping.get(key) is None: # This value is not really set. Skip checking its documentation. return True - node = add_best_matches_for(key, tree) + try: + node = add_best_matches_for(key, tree, check_types=True) + except TypeError: + node = None + keys_to_remove.append(key) + + collector.collect_and_log( + key, + ValidationProblem.KeyToBeRemoved, + None, + ) if node is None: key_path = key.replace("@", "") @@ -1102,7 +1148,7 @@ def check_reserved_prefix( if is_documented(not_visited_key, tree): continue - if not ignore_undocumented: + if not ignore_undocumented and not_visited_key not in keys_to_remove: collector.collect_and_log( not_visited_key, ValidationProblem.MissingDocumentation, None ) diff --git a/tests/dataconverter/test_validation.py b/tests/dataconverter/test_validation.py index e7f2faa53..1bdffc141 100644 --- a/tests/dataconverter/test_validation.py +++ b/tests/dataconverter/test_validation.py @@ -785,6 +785,32 @@ def listify_template(data_dict: Template): [], id="concept-name-given-for-optional-attribute", ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/identified_calibration/identifier_1/some_field", + "123", + ), + [ + "The type ('group') of the given concept 'identifier_1' conflicts with another " + "existing concept of the same name, which is of type 'field').", + "The attribute /ENTRY[my_entry]/identified_calibration/identifier_1/some_field will not be written.", + ], + id="group-instead-of-named-field", + ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/identified_calibration", + "123", + ), + [ + "The type ('field') of the given concept 'identified_calibration' conflicts with another " + "existing concept of the same name, which is of type 'group').", + "The attribute /ENTRY[my_entry]/identified_calibration will not be written.", + ], + id="field-instead-of-named-group", + ), pytest.param( alter_dict( remove_from_dict( @@ -1299,8 +1325,6 @@ def format_error_message(msg: str) -> str: "baseclass-field-with-illegal-unit", "open-enum-with-new-item", "baseclass-open-enum-with-new-item", - "variadic-nxcollection", - "nonvariadic-nxcollection", ): with caplog.at_level(logging.INFO): assert validate_dict_against("NXtest", data_dict)[0] From 06a97284d79d7ccbe75cc2cc01292e16cfd1cff7 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Wed, 14 May 2025 17:55:09 +0200 Subject: [PATCH 058/118] mypy fixes --- src/pynxtools/dataconverter/validation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index 7759badb8..a3f4f615d 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -629,7 +629,7 @@ def add_best_matches_for( key_len = len(key_components) - expected_types = None + expected_types = [] for ind, name in enumerate(key_components): index = ind + 1 children_to_check = [ @@ -649,7 +649,7 @@ def add_best_matches_for( return node - def is_documented(key: str, tree: NexusNode) -> Tuple[bool, bool]: + def is_documented(key: str, tree: NexusNode) -> bool: if mapping.get(key) is None: # This value is not really set. Skip checking its documentation. return True From 1a8853277192efa38e2be9681358c0b1da3e88d2 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Thu, 15 May 2025 14:55:47 +0200 Subject: [PATCH 059/118] add a workaround for links in template --- src/pynxtools/data/NXtest.nxdl.xml | 2 +- src/pynxtools/dataconverter/validation.py | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/pynxtools/data/NXtest.nxdl.xml b/src/pynxtools/data/NXtest.nxdl.xml index 880a54e66..c58cc0f8c 100644 --- a/src/pynxtools/data/NXtest.nxdl.xml +++ b/src/pynxtools/data/NXtest.nxdl.xml @@ -45,7 +45,7 @@ A group with a name and nameType="specified". - + diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index a3f4f615d..abed5f403 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -636,12 +636,16 @@ def add_best_matches_for( node.search_add_child_for(child) for child in node.get_all_direct_children_names() ] + if index < key_len - 1: expected_types = ["group"] elif index == key_len - 1: expected_types = ["group"] if not is_last_attr else ["group", "field"] elif index == key_len: expected_types = ["attribute"] if is_last_attr else ["field"] + if "link" in str(mapping.get(key, "")): + expected_types += ["group"] + node = best_namefit_of(name, children_to_check, expected_types, check_types) if node is None: From 4a991fd1393b9a8a24c377eb9cb0740586503d55 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Tue, 20 May 2025 11:29:07 +0200 Subject: [PATCH 060/118] handle variable-name attributes in writer --- src/pynxtools/dataconverter/helpers.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/pynxtools/dataconverter/helpers.py b/src/pynxtools/dataconverter/helpers.py index 605847cd0..c3bd192cf 100644 --- a/src/pynxtools/dataconverter/helpers.py +++ b/src/pynxtools/dataconverter/helpers.py @@ -590,8 +590,10 @@ def get_regex(): results = get_regex().search(entry) if results is None: return entry + if entry[0] == "@": - return "@" + results.group(1) + name = results.group(1) + return name if name.startswith("@") else "@" + name return results.group(1) From 8f59666d3d6fbe0ba5b75bda808a305ccec0cd2b Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Wed, 21 May 2025 10:44:20 +0200 Subject: [PATCH 061/118] Apply suggestions from code review Co-authored-by: Laurenz Rettig <53396064+rettigl@users.noreply.github.com> --- tests/dataconverter/test_validation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/dataconverter/test_validation.py b/tests/dataconverter/test_validation.py index 1bdffc141..6d713b26d 100644 --- a/tests/dataconverter/test_validation.py +++ b/tests/dataconverter/test_validation.py @@ -806,8 +806,8 @@ def listify_template(data_dict: Template): ), [ "The type ('field') of the given concept 'identified_calibration' conflicts with another " - "existing concept of the same name, which is of type 'group').", - "The attribute /ENTRY[my_entry]/identified_calibration will not be written.", + "existing concept of the same name, which is of type 'group'.", + "The field /ENTRY[my_entry]/identified_calibration will not be written.", ], id="field-instead-of-named-group", ), From 0a240d9a0a82ef0880fd83b9d0e29c5bd0a9090f Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Wed, 21 May 2025 10:58:52 +0200 Subject: [PATCH 062/118] Apply suggestions from code review Co-authored-by: Laurenz Rettig <53396064+rettigl@users.noreply.github.com> --- src/pynxtools/dataconverter/helpers.py | 2 +- tests/dataconverter/test_validation.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/pynxtools/dataconverter/helpers.py b/src/pynxtools/dataconverter/helpers.py index 32dfd7e67..42fb3c38f 100644 --- a/src/pynxtools/dataconverter/helpers.py +++ b/src/pynxtools/dataconverter/helpers.py @@ -174,7 +174,7 @@ def _log(self, path: str, log_type: ValidationProblem, value: Optional[Any], *ar value = cast(Any, value) logger.error( f"The type ('{args[0] if args else ''}') of the given concept '{path}' " - f"conflicts with another existing concept of the same name, which is of type '{value.type}')." + f"conflicts with another existing concept of the same name, which is of type '{value.type}'." ) def collect_and_log( diff --git a/tests/dataconverter/test_validation.py b/tests/dataconverter/test_validation.py index 6d713b26d..bb54ce586 100644 --- a/tests/dataconverter/test_validation.py +++ b/tests/dataconverter/test_validation.py @@ -793,8 +793,8 @@ def listify_template(data_dict: Template): ), [ "The type ('group') of the given concept 'identifier_1' conflicts with another " - "existing concept of the same name, which is of type 'field').", - "The attribute /ENTRY[my_entry]/identified_calibration/identifier_1/some_field will not be written.", + "existing concept of the same name, which is of type 'field'.", + "The field /ENTRY[my_entry]/identified_calibration/identifier_1/some_field will not be written.", ], id="group-instead-of-named-field", ), From c8b6a7230b3a7894de6e63205493a8f404d0b00c Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Wed, 21 May 2025 11:09:35 +0200 Subject: [PATCH 063/118] log type when key is removed --- src/pynxtools/dataconverter/helpers.py | 2 +- src/pynxtools/dataconverter/validation.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/pynxtools/dataconverter/helpers.py b/src/pynxtools/dataconverter/helpers.py index 42fb3c38f..2d1e8b198 100644 --- a/src/pynxtools/dataconverter/helpers.py +++ b/src/pynxtools/dataconverter/helpers.py @@ -153,7 +153,7 @@ def _log(self, path: str, log_type: ValidationProblem, value: Optional[Any], *ar f"Length of axis {path} does not match to {value} in dimension {args[0]}" ) elif log_type == ValidationProblem.KeyToBeRemoved: - logger.warning(f"The attribute {path} will not be written.") + logger.warning(f"The {value} {path} will not be written.") elif log_type == ValidationProblem.InvalidConceptForNonVariadic: value = cast(Any, value) log_text = f"Given {value.type} name '{path}' conflicts with the non-variadic name '{value}'" diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index abed5f403..54c28e906 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -194,7 +194,6 @@ def best_namefit_of( or (type_attr := elem.attrib.get("type")) and len(type_attr) > 2 ] - if concept_name not in inherited_names: if node.type == "group": if concept_name != node.nx_class[2:].upper(): @@ -662,12 +661,13 @@ def is_documented(key: str, tree: NexusNode) -> bool: node = add_best_matches_for(key, tree, check_types=True) except TypeError: node = None + nx_type = "attribute" if key.split("/")[-1].startswith("@") else "field" keys_to_remove.append(key) collector.collect_and_log( key, ValidationProblem.KeyToBeRemoved, - None, + nx_type, ) if node is None: @@ -800,7 +800,7 @@ def check_attributes_of_nonexisting_field( collector.collect_and_log( key, ValidationProblem.KeyToBeRemoved, - None, + "attribute", ) return keys_to_remove @@ -1087,7 +1087,7 @@ def check_reserved_prefix( collector.collect_and_log( not_visited_key, ValidationProblem.KeyToBeRemoved, - None, + "attribute", ) keys_to_remove.append(not_visited_key) else: @@ -1134,7 +1134,7 @@ def check_reserved_prefix( collector.collect_and_log( not_visited_key, ValidationProblem.KeyToBeRemoved, - None, + "attribute", ) keys_to_remove.append(not_visited_key) continue From e4fa279b9979e9de01e81157ad061850d7ab413e Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Wed, 21 May 2025 16:19:49 +0200 Subject: [PATCH 064/118] properly follow links --- src/pynxtools/dataconverter/convert.py | 8 +- src/pynxtools/dataconverter/helpers.py | 65 ++++---- src/pynxtools/dataconverter/validation.py | 171 +++++++++++++++------- tests/dataconverter/test_readers.py | 2 +- tests/dataconverter/test_validation.py | 45 +++++- 5 files changed, 194 insertions(+), 97 deletions(-) diff --git a/src/pynxtools/dataconverter/convert.py b/src/pynxtools/dataconverter/convert.py index afe273422..508071906 100644 --- a/src/pynxtools/dataconverter/convert.py +++ b/src/pynxtools/dataconverter/convert.py @@ -176,18 +176,12 @@ def transfer_data_into_template( for entry_name in entry_names: helpers.write_nexus_def_to_entry(data, entry_name, nxdl_name) if not skip_verify: - valid, keys_to_remove = validate_dict_against( + valid = validate_dict_against( nxdl_name, data, ignore_undocumented=ignore_undocumented, ) - # remove attributes that belong to non-existing fields - - for key in keys_to_remove: - # data.__delitem__(key) - del data[key] - if fail and not valid: raise ValidationFailed( "The data does not match the given NXDL. " diff --git a/src/pynxtools/dataconverter/helpers.py b/src/pynxtools/dataconverter/helpers.py index 2d1e8b198..b3f66578e 100644 --- a/src/pynxtools/dataconverter/helpers.py +++ b/src/pynxtools/dataconverter/helpers.py @@ -22,7 +22,7 @@ import os import re from datetime import datetime, timezone -from enum import Enum +from enum import Enum, auto from functools import lru_cache from typing import Any, Callable, List, Optional, Tuple, Union, Sequence, cast @@ -46,31 +46,32 @@ class ValidationProblem(Enum): - UnitWithoutDocumentation = 1 - InvalidEnum = 2 - OpenEnumWithNewItem = 3 - MissingRequiredGroup = 4 - MissingRequiredField = 5 - MissingRequiredAttribute = 6 - InvalidType = 7 - InvalidDatetime = 8 - IsNotPosInt = 9 - ExpectedGroup = 10 - MissingDocumentation = 11 - MissingUnit = 12 - ChoiceValidationError = 13 - UnitWithoutField = 14 - AttributeForNonExistingField = 15 - BrokenLink = 16 - FailedNamefitting = 17 - NXdataMissingSignalData = 18 - NXdataMissingAxisData = 19 - NXdataAxisMismatch = 20 - KeyToBeRemoved = 21 - InvalidConceptForNonVariadic = 22 - ReservedSuffixWithoutField = 23 - ReservedPrefixInWrongContext = 24 - InvalidNexusTypeForNamedConcept = 25 + UnitWithoutDocumentation = auto() + InvalidEnum = auto() + OpenEnumWithNewItem = auto() + MissingRequiredGroup = auto() + MissingRequiredField = auto() + MissingRequiredAttribute = auto() + InvalidType = auto() + InvalidDatetime = auto() + IsNotPosInt = auto() + ExpectedGroup = auto() + ExpectedField = auto() + MissingDocumentation = auto() + MissingUnit = auto() + ChoiceValidationError = auto() + UnitWithoutField = auto() + AttributeForNonExistingField = auto() + BrokenLink = auto() + FailedNamefitting = auto() + NXdataMissingSignalData = auto() + NXdataMissingAxisData = auto() + NXdataAxisMismatch = auto() + KeyToBeRemoved = auto() + InvalidConceptForNonVariadic = auto() + ReservedSuffixWithoutField = auto() + ReservedPrefixInWrongContext = auto() + InvalidNexusTypeForNamedConcept = auto() class Collector: @@ -97,9 +98,9 @@ def _log(self, path: str, log_type: ValidationProblem, value: Optional[Any], *ar f"The value at {path} does not match with the enumerated items from the open enumeration: {value}." ) elif log_type == ValidationProblem.MissingRequiredGroup: - logger.warning(f"The required group, {path}, hasn't been supplied.") + logger.error(f"The required group, {path}, hasn't been supplied.") elif log_type == ValidationProblem.MissingRequiredField: - logger.warning( + logger.error( f"The data entry corresponding to {path} is required " "and hasn't been supplied by the reader.", ) @@ -119,9 +120,9 @@ def _log(self, path: str, log_type: ValidationProblem, value: Optional[Any], *ar f"The value at {path} should be a positive int, but is {value}." ) elif log_type == ValidationProblem.ExpectedGroup: - logger.warning( - f"Expected a group at {path} but found a field or attribute." - ) + logger.error(f"Expected a group at {path} but found a field or attribute.") + elif log_type == ValidationProblem.ExpectedField: + logger.error(f"Expected a field at {path} but found a group.") elif log_type == ValidationProblem.MissingDocumentation: if "@" in path.rsplit("/")[-1]: logger.warning(f"Attribute {path} written without documentation.") @@ -141,7 +142,7 @@ def _log(self, path: str, log_type: ValidationProblem, value: Optional[Any], *ar "but the field does not exist." ) elif log_type == ValidationProblem.BrokenLink: - logger.warning(f"Broken link at {path} to {value}") + logger.warning(f"Broken link at {path} to {value}.") elif log_type == ValidationProblem.FailedNamefitting: logger.warning(f"Found no namefit of {path} in {value}.") elif log_type == ValidationProblem.NXdataMissingSignalData: diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index 54c28e906..110962158 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -227,7 +227,7 @@ def best_namefit_of( def validate_dict_against( appdef: str, mapping: MutableMapping[str, Any], ignore_undocumented: bool = False -) -> Tuple[bool, List]: +) -> bool: """ Validates a mapping against the NeXus tree for application definition `appdef`. @@ -244,7 +244,6 @@ def validate_dict_against( Returns: bool: True if the mapping is valid according to `appdef`, False otherwise. - List: list of keys in mapping that correspond to attributes of non-existing fields """ def get_variations_of(node: NexusNode, keys: Mapping[str, Any]) -> List[str]: @@ -416,6 +415,7 @@ def check_nxdata(): for x in keys if x not in [signal, *axes, *indices, *errors, *aux_signals] } + remaining_keys = _follow_link(remaining_keys, prev_path) recurse_tree( node, remaining_keys, @@ -455,6 +455,7 @@ def handle_group(node: NexusGroup, keys: Mapping[str, Any], prev_path: str): return for variant in variants: + variant_path = f"{prev_path}/{variant}" if variant in [node.name for node in node.parent_of]: # Don't process if this is actually a sub-variant of this group continue @@ -462,17 +463,18 @@ def handle_group(node: NexusGroup, keys: Mapping[str, Any], prev_path: str): if not isinstance(keys[variant], Mapping): if nx_class is not None: collector.collect_and_log( - f"{prev_path}/{variant}", + variant_path, ValidationProblem.ExpectedGroup, None, ) continue if node.nx_class == "NXdata": - handle_nxdata(node, keys[variant], prev_path=f"{prev_path}/{variant}") + handle_nxdata(node, keys[variant], prev_path=variant_path) if node.nx_class == "NXcollection": return else: - recurse_tree(node, keys[variant], prev_path=f"{prev_path}/{variant}") + variant_keys = _follow_link(keys[variant], variant_path) + recurse_tree(node, variant_keys, prev_path=variant_path) def remove_from_not_visited(path: str) -> str: if path in not_visited: @@ -480,28 +482,42 @@ def remove_from_not_visited(path: str) -> str: return path def _follow_link( - keys: Optional[Mapping[str, Any]], prev_path: str + keys: Optional[Mapping[str, Any]], prev_path: str, p=False ) -> Optional[Any]: if keys is None: return None - if len(keys) == 1 and "link" in keys: - current_keys = nested_keys - link_key = None - for path_elem in keys["link"][1:].split("/"): + + if not isinstance(keys, dict): + return keys + + resolved_keys = keys + for key, value in keys.copy().items(): + if isinstance(value, dict) and len(value) == 1 and "link" in value: + key_path = f"{prev_path}/{key}" if prev_path else key + current_keys = nested_keys link_key = None - for dict_path_elem in current_keys: - _, hdf_name = split_class_and_name_of(dict_path_elem) - if hdf_name == path_elem: - link_key = hdf_name - break + for path_elem in value["link"][1:].split("/"): + link_key = None + for dict_path_elem in current_keys: + _, hdf_name = split_class_and_name_of(dict_path_elem) + if hdf_name == path_elem: + link_key = hdf_name + current_keys = current_keys[dict_path_elem] + break + if link_key is None: collector.collect_and_log( - prev_path, ValidationProblem.BrokenLink, keys["link"] + key_path, ValidationProblem.BrokenLink, value["link"] ) - return None - current_keys = current_keys[dict_path_elem] - return current_keys - return keys + collector.collect_and_log( + key_path, + ValidationProblem.KeyToBeRemoved, + "key", + ) + del resolved_keys[key] + else: + resolved_keys[key] = current_keys + return resolved_keys def handle_field(node: NexusNode, keys: Mapping[str, Any], prev_path: str): full_path = remove_from_not_visited(f"{prev_path}/{node.name}") @@ -515,6 +531,17 @@ def handle_field(node: NexusNode, keys: Mapping[str, Any], prev_path: str): return for variant in variants: + variant_path = f"{prev_path}/{variant}" + + if isinstance(keys[variant], Mapping) and not all( + k.startswith("@") for k in keys[variant] + ): + collector.collect_and_log( + variant_path, + ValidationProblem.ExpectedField, + None, + ) + continue if node.optionality == "required" and isinstance(keys[variant], Mapping): # Check if all fields in the dict are actual attributes (startswith @) all_attrs = True @@ -524,44 +551,47 @@ def handle_field(node: NexusNode, keys: Mapping[str, Any], prev_path: str): break if all_attrs: collector.collect_and_log( - f"{prev_path}/{variant}", missing_type_err.get(node.type), None + variant_path, missing_type_err.get(node.type), None ) collector.collect_and_log( - f"{prev_path}/{variant}", + variant_path, ValidationProblem.AttributeForNonExistingField, None, ) return - if variant not in keys or mapping.get(f"{prev_path}/{variant}") is None: + if variant not in keys or mapping.get(variant_path) is None: continue # Check general validity - mapping[f"{prev_path}/{variant}"] = is_valid_data_field( - mapping[f"{prev_path}/{variant}"], + mapping[variant_path] = is_valid_data_field( + mapping[variant_path], node.dtype, node.items, node.open_enum, - f"{prev_path}/{variant}", + variant_path, ) - _ = check_reserved_suffix(f"{prev_path}/{variant}", mapping) - _ = check_reserved_prefix(f"{prev_path}/{variant}", mapping, "field") + _ = check_reserved_suffix(variant_path, mapping) + _ = check_reserved_prefix(variant_path, mapping, "field") # Check unit category if node.unit is not None: remove_from_not_visited(f"{prev_path}/{variant}/@units") if f"{variant}@units" not in keys: collector.collect_and_log( - f"{prev_path}/{variant}", + variant_path, ValidationProblem.MissingUnit, node.unit, ) # TODO: Check unit with pint + field_attributes = get_field_attributes(variant, keys) + field_attributes = _follow_link(field_attributes, variant_path) + recurse_tree( node, - get_field_attributes(variant, keys), - prev_path=f"{prev_path}/{variant}", + field_attributes, + prev_path=variant_path, ) def handle_attribute(node: NexusNode, keys: Mapping[str, Any], prev_path: str): @@ -577,18 +607,19 @@ def handle_attribute(node: NexusNode, keys: Mapping[str, Any], prev_path: str): return for variant in variants: - mapping[ + variant_path = ( f"{prev_path}/{variant if variant.startswith('@') else f'@{variant}'}" - ] = is_valid_data_field( + ) + mapping[variant_path] = is_valid_data_field( mapping[ f"{prev_path}/{variant if variant.startswith('@') else f'@{variant}'}" ], node.dtype, node.items, node.open_enum, - f"{prev_path}/{variant if variant.startswith('@') else f'@{variant}'}", + variant_path, ) - _ = check_reserved_prefix(f"{prev_path}/{variant}", mapping, "attribute") + _ = check_reserved_prefix(variant_path, mapping, "attribute") def handle_choice(node: NexusNode, keys: Mapping[str, Any], prev_path: str): global collector @@ -690,7 +721,46 @@ def is_documented(key: str, tree: NexusNode) -> bool: return True if isinstance(mapping[key], dict) and "link" in mapping[key]: - # TODO: Follow link and check consistency with current field + resolved_link = _follow_link({key: mapping[key]}, "") + + is_mapping = isinstance(resolved_link[key], Mapping) + + if node.type == "group" and not is_mapping: + # Groups must have subelements + collector.collect_and_log( + key, + ValidationProblem.ExpectedGroup, + None, + ) + # collector.collect_and_log( + # key, + # ValidationProblem.KeyToBeRemoved, + # "group", + # ) + keys_to_remove.append(key) + return False + + elif node.type == "field" and not all( + k.startswith("@") for k in resolved_link[key] + ): + # Field should only have values. + if is_mapping: + collector.collect_and_log( + key, + ValidationProblem.ExpectedField, + None, + ) + # collector.collect_and_log( + # key, + # ValidationProblem.KeyToBeRemoved, + # "field", + # ) + keys_to_remove.append(key) + return False + resolved_link[key] = is_valid_data_field( + resolved_link[key], node.dtype, node.items, node.open_enum, key + ) + return True if "@" not in key and node.type != "field": @@ -727,7 +797,6 @@ def recurse_tree( for child in node.children: if ignore_names is not None and child.name in ignore_names: continue - keys = _follow_link(keys, prev_path) if keys is None: return @@ -735,24 +804,19 @@ def recurse_tree( def check_attributes_of_nonexisting_field( node: NexusNode, - ) -> list: + ): """ This method runs through the mapping dictionary and checks if there are any attributes assigned to the fields (not groups!) which are not explicitly present in the mapping. If there are any found, a warning is logged and the corresponding items are - added to the list returned by the method. + added to the list that stores all keys that shall be removed. Args: node (NexusNode): the tree generated from application definition. - Returns: - list: list of keys in mapping that correspond to attributes of - non-existing fields """ - keys_to_remove = [] - for key in mapping: last_index = key.rfind("/") if key[last_index + 1] == "@" and key[last_index + 1 :] != "@units": @@ -795,14 +859,8 @@ def check_attributes_of_nonexisting_field( collector.collect_and_log( key[0:last_index], ValidationProblem.AttributeForNonExistingField, - None, - ) - collector.collect_and_log( - key, - ValidationProblem.KeyToBeRemoved, "attribute", ) - return keys_to_remove def check_type_with_tree( node: NexusNode, @@ -1062,13 +1120,16 @@ def check_reserved_prefix( "choice": handle_choice, } + keys_to_remove = [] + tree = generate_tree_from(appdef) collector.clear() nested_keys = build_nested_dict_from(mapping) not_visited = list(mapping) + keys = _follow_link(nested_keys, "") recurse_tree(tree, nested_keys) - keys_to_remove = check_attributes_of_nonexisting_field(tree) + check_attributes_of_nonexisting_field(tree) for not_visited_key in not_visited: # TODO: remove again if "@target"/"@reference" is sorted out by NIAC @@ -1160,7 +1221,11 @@ def check_reserved_prefix( # clear lru_cache NexusNode.search_add_child_for.cache_clear() - return (not collector.has_validation_problems(), keys_to_remove) + # remove keys that are incorrect + for key in set(keys_to_remove): + del mapping[key] + + return not collector.has_validation_problems() def populate_full_tree(node: NexusNode, max_depth: Optional[int] = 5, depth: int = 0): @@ -1194,4 +1259,4 @@ def populate_full_tree(node: NexusNode, max_depth: Optional[int] = 5, depth: int def validate_data_dict( _: MutableMapping[str, Any], read_data: MutableMapping[str, Any], root: ET._Element ) -> bool: - return validate_dict_against(root.attrib["name"], read_data)[0] + return validate_dict_against(root.attrib["name"], read_data) diff --git a/tests/dataconverter/test_readers.py b/tests/dataconverter/test_readers.py index 11ee48d06..91dc9344c 100644 --- a/tests/dataconverter/test_readers.py +++ b/tests/dataconverter/test_readers.py @@ -117,6 +117,6 @@ def test_has_correct_read_func(reader, caplog): with caplog.at_level(logging.WARNING): validate_dict_against( supported_nxdl, read_data, ignore_undocumented=True - )[0] + ) print(caplog.text) diff --git a/tests/dataconverter/test_validation.py b/tests/dataconverter/test_validation.py index bb54ce586..2fbf56f5e 100644 --- a/tests/dataconverter/test_validation.py +++ b/tests/dataconverter/test_validation.py @@ -415,7 +415,11 @@ def listify_template(data_dict: Template): "/ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value", {"link": "/a-link"}, ), - [], + [ + "Broken link at /ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value to /a-link.", + "The key /ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value will not be written.", + "The data entry corresponding to /ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value is required and hasn't been supplied by the reader.", + ], id="link-dict-instead-of-int", ), pytest.param( @@ -792,6 +796,7 @@ def listify_template(data_dict: Template): "123", ), [ + "Expected a field at /ENTRY[my_entry]/identified_calibration/identifier_1 but found a group.", "The type ('group') of the given concept 'identifier_1' conflicts with another " "existing concept of the same name, which is of type 'field'.", "The field /ENTRY[my_entry]/identified_calibration/identifier_1/some_field will not be written.", @@ -811,6 +816,38 @@ def listify_template(data_dict: Template): ], id="field-instead-of-named-group", ), + pytest.param( + alter_dict( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/USER[my_user]", + {"link": "/my_entry/my_group/required_field"}, + ), + "/ENTRY[my_entry]/OPTIONAL_group[some_group]/required_field", + {"link": "/my_entry/specified_group"}, + ), + [ + "Expected a field at /ENTRY[my_entry]/OPTIONAL_group[some_group]/required_field but found a group.", + "Expected a group at /ENTRY[my_entry]/USER[my_user] but found a field or attribute.", + ], + id="appdef-links-with-wrong-nexus-types", + ), + pytest.param( + alter_dict( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/SAMPLE[my_sample]", + {"link": "/my_entry/my_group/required_field"}, + ), + "/ENTRY[my_entry]/SAMPLE[my_sample]/name", + {"link": "/my_entry/my_group"}, + ), + [ + "Expected a group at /ENTRY[my_entry]/SAMPLE[my_sample] but found a field or attribute.", + "Expected a field at /ENTRY[my_entry]/SAMPLE[my_sample]/name but found a group.", + ], + id="base-class-links-with-wrong-nexus-types", + ), pytest.param( alter_dict( remove_from_dict( @@ -1317,7 +1354,7 @@ def format_error_message(msg: str) -> str: if not error_messages: with caplog.at_level(logging.WARNING): - assert validate_dict_against("NXtest", data_dict)[0] + assert validate_dict_against("NXtest", data_dict) assert caplog.text == "" else: if request.node.callspec.id in ( @@ -1327,11 +1364,11 @@ def format_error_message(msg: str) -> str: "baseclass-open-enum-with-new-item", ): with caplog.at_level(logging.INFO): - assert validate_dict_against("NXtest", data_dict)[0] + assert validate_dict_against("NXtest", data_dict) assert error_messages[0] in caplog.text else: with caplog.at_level(logging.WARNING): - assert not validate_dict_against("NXtest", data_dict)[0] + assert not validate_dict_against("NXtest", data_dict) assert len(caplog.records) == len(error_messages) for expected_message, rec in zip(error_messages, caplog.records): assert expected_message == format_error_message(rec.message) From cb076f2c9f2c2a0127eef945e6f9fd2d586254f9 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Wed, 21 May 2025 17:00:05 +0200 Subject: [PATCH 065/118] add docstring --- src/pynxtools/dataconverter/validation.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index 110962158..20f188d38 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -484,6 +484,26 @@ def remove_from_not_visited(path: str) -> str: def _follow_link( keys: Optional[Mapping[str, Any]], prev_path: str, p=False ) -> Optional[Any]: + """ + Resolves internal dictionary "links" by replacing any keys containing a + {"link": "/path/to/target"} structure with the actual referenced content. + + This function traverses the mapping and recursively resolves any keys that + contain a "link" to another path (relative to the global template). + If the link cannot be resolved, the issue is logged. + + Args: + keys (Optional[Mapping[str, Any]]): The dictionary structure to process. + May be None or a non-dict value, in which case it's returned as-is. + prev_path (str): The path leading up to the current `keys` context, used + for logging and error reporting. + p (bool, optional): Unused parameter (possibly for debugging); included + for interface compatibility. Defaults to False. + + Returns: + Optional[Any]: A dictionary with resolved links, the original value if + `keys` is not a dict, or None if `keys` is None. + """ if keys is None: return None From 1976f300711feee596b3390f30a65148a6d1b381 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Thu, 22 May 2025 12:03:20 +0200 Subject: [PATCH 066/118] slightly adjust logic for links for base class concepts --- src/pynxtools/dataconverter/validation.py | 8 ++++---- tests/dataconverter/test_validation.py | 13 +++++++++++++ 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index 20f188d38..22a3eda18 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -760,11 +760,11 @@ def is_documented(key: str, tree: NexusNode) -> bool: keys_to_remove.append(key) return False - elif node.type == "field" and not all( - k.startswith("@") for k in resolved_link[key] - ): + elif node.type == "field": # Field should only have values. - if is_mapping: + if is_mapping and not all( + k.startswith("@") for k in resolved_link[key] + ): collector.collect_and_log( key, ValidationProblem.ExpectedField, diff --git a/tests/dataconverter/test_validation.py b/tests/dataconverter/test_validation.py index 2fbf56f5e..bb5ee2fe4 100644 --- a/tests/dataconverter/test_validation.py +++ b/tests/dataconverter/test_validation.py @@ -832,6 +832,19 @@ def listify_template(data_dict: Template): ], id="appdef-links-with-wrong-nexus-types", ), + pytest.param( + alter_dict( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/SAMPLE[my_sample]", + {"link": "/my_entry/my_group"}, + ), + "/ENTRY[my_entry]/SAMPLE[my_sample]/name", + {"link": "/my_entry/nxodd_name/char_value"}, + ), + [], + id="base-class-links-with-matching-nexus-types", + ), pytest.param( alter_dict( alter_dict( From 710506c4be687ec73ad23b2f652c402d578af744 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Thu, 22 May 2025 12:13:34 +0200 Subject: [PATCH 067/118] add a positive test for links in appdef validation --- tests/dataconverter/test_validation.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/dataconverter/test_validation.py b/tests/dataconverter/test_validation.py index bb5ee2fe4..cb525bc5f 100644 --- a/tests/dataconverter/test_validation.py +++ b/tests/dataconverter/test_validation.py @@ -816,6 +816,23 @@ def listify_template(data_dict: Template): ], id="field-instead-of-named-group", ), + pytest.param( + alter_dict( + alter_dict( + remove_from_dict( + TEMPLATE, + "/ENTRY[my_entry]/required_group/description", + "optional", + ), + "/ENTRY[my_entry]/required_group", + {"link": "/my_entry/required_group2"}, + ), + "/ENTRY[my_entry]/OPTIONAL_group[some_group]/required_field", + {"link": "/my_entry/specified_group/specified_field"}, + ), + [], + id="appdef-links-with-matching-nexus-types", + ), pytest.param( alter_dict( alter_dict( From 27ec688d7504197db767a8910114dfc04d402bef Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Thu, 22 May 2025 17:10:30 +0200 Subject: [PATCH 068/118] avoid uninformative erorr in writer --- src/pynxtools/dataconverter/writer.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/pynxtools/dataconverter/writer.py b/src/pynxtools/dataconverter/writer.py index f2a289503..a20183612 100644 --- a/src/pynxtools/dataconverter/writer.py +++ b/src/pynxtools/dataconverter/writer.py @@ -250,7 +250,12 @@ def ensure_and_get_parent_node(self, path: str, undocumented_paths) -> h5py.Grou attrs = self.__nxdl_to_attrs(parent_path) if attrs is not None: - grp.attrs["NX_class"] = attrs["type"] + if nx_class := attrs.get("type"): + grp.attrs["NX_class"] = nx_class + else: + logger.error( + f"No attribute 'NX_class' could be written for {parent_path}." + ) return grp return self.output_nexus[parent_path_hdf5] From 5941b1f1416109d13a4972d910202c124c512341 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Thu, 22 May 2025 23:33:48 +0200 Subject: [PATCH 069/118] do not remove keys if linked types do not match --- src/pynxtools/dataconverter/validation.py | 30 +++++++++++++++++------ tests/dataconverter/test_validation.py | 2 ++ 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index 22a3eda18..5d65f34ec 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -467,6 +467,13 @@ def handle_group(node: NexusGroup, keys: Mapping[str, Any], prev_path: str): ValidationProblem.ExpectedGroup, None, ) + # TODO: decide if we want to remove such keys + # collector.collect_and_log( + # variant_path, + # ValidationProblem.KeyToBeRemoved, + # node.type, + # ) + # keys_to_remove.append(not_visited_key) continue if node.nx_class == "NXdata": handle_nxdata(node, keys[variant], prev_path=variant_path) @@ -497,8 +504,6 @@ def _follow_link( May be None or a non-dict value, in which case it's returned as-is. prev_path (str): The path leading up to the current `keys` context, used for logging and error reporting. - p (bool, optional): Unused parameter (possibly for debugging); included - for interface compatibility. Defaults to False. Returns: Optional[Any]: A dictionary with resolved links, the original value if @@ -510,7 +515,9 @@ def _follow_link( if not isinstance(keys, dict): return keys - resolved_keys = keys + import copy + + resolved_keys = copy.deepcopy(keys) for key, value in keys.copy().items(): if isinstance(value, dict) and len(value) == 1 and "link" in value: key_path = f"{prev_path}/{key}" if prev_path else key @@ -561,6 +568,13 @@ def handle_field(node: NexusNode, keys: Mapping[str, Any], prev_path: str): ValidationProblem.ExpectedField, None, ) + # TODO: decide if we want to remove such keys + # collector.collect_and_log( + # variant_path, + # ValidationProblem.KeyToBeRemoved, + # node.type, + # ) + # keys_to_remove.append(variant_path) continue if node.optionality == "required" and isinstance(keys[variant], Mapping): # Check if all fields in the dict are actual attributes (startswith @) @@ -713,13 +727,13 @@ def is_documented(key: str, tree: NexusNode) -> bool: except TypeError: node = None nx_type = "attribute" if key.split("/")[-1].startswith("@") else "field" - keys_to_remove.append(key) collector.collect_and_log( key, ValidationProblem.KeyToBeRemoved, nx_type, ) + keys_to_remove.append(key) if node is None: key_path = key.replace("@", "") @@ -752,12 +766,13 @@ def is_documented(key: str, tree: NexusNode) -> bool: ValidationProblem.ExpectedGroup, None, ) + # TODO: decide if we want to remove such keys (and below) # collector.collect_and_log( # key, # ValidationProblem.KeyToBeRemoved, # "group", # ) - keys_to_remove.append(key) + # keys_to_remove.append(key) return False elif node.type == "field": @@ -770,13 +785,14 @@ def is_documented(key: str, tree: NexusNode) -> bool: ValidationProblem.ExpectedField, None, ) + # TODO: decide if we want to remove such keys (and below) # collector.collect_and_log( # key, # ValidationProblem.KeyToBeRemoved, # "field", # ) - keys_to_remove.append(key) - return False + # keys_to_remove.append(key) + # return False resolved_link[key] = is_valid_data_field( resolved_link[key], node.dtype, node.items, node.open_enum, key ) diff --git a/tests/dataconverter/test_validation.py b/tests/dataconverter/test_validation.py index cb525bc5f..b5e3ad6cc 100644 --- a/tests/dataconverter/test_validation.py +++ b/tests/dataconverter/test_validation.py @@ -846,6 +846,7 @@ def listify_template(data_dict: Template): [ "Expected a field at /ENTRY[my_entry]/OPTIONAL_group[some_group]/required_field but found a group.", "Expected a group at /ENTRY[my_entry]/USER[my_user] but found a field or attribute.", + "Field /ENTRY[my_entry]/USER[my_user] written without documentation.", ], id="appdef-links-with-wrong-nexus-types", ), @@ -874,6 +875,7 @@ def listify_template(data_dict: Template): ), [ "Expected a group at /ENTRY[my_entry]/SAMPLE[my_sample] but found a field or attribute.", + "Field /ENTRY[my_entry]/SAMPLE[my_sample] written without documentation.", "Expected a field at /ENTRY[my_entry]/SAMPLE[my_sample]/name but found a group.", ], id="base-class-links-with-wrong-nexus-types", From ec880acb017344084c39dfb33e78d470e3d70334 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Thu, 22 May 2025 23:50:18 +0200 Subject: [PATCH 070/118] add some explanatory comments in new code --- src/pynxtools/dataconverter/validation.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index 5d65f34ec..4bbdfeb50 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -17,6 +17,7 @@ # limitations under the License. # import re +import copy from collections import defaultdict from functools import reduce from operator import getitem @@ -461,6 +462,7 @@ def handle_group(node: NexusGroup, keys: Mapping[str, Any], prev_path: str): continue nx_class, _ = split_class_and_name_of(variant) if not isinstance(keys[variant], Mapping): + # Groups should have subelements if nx_class is not None: collector.collect_and_log( variant_path, @@ -495,6 +497,10 @@ def _follow_link( Resolves internal dictionary "links" by replacing any keys containing a {"link": "/path/to/target"} structure with the actual referenced content. + Note that the keys are only replaced in copies of the incoming keys, NOT in + the gloval mapping. That is, links are resolved for checking, but we still write + links into the HDF5 file. + This function traverses the mapping and recursively resolves any keys that contain a "link" to another path (relative to the global template). If the link cannot be resolved, the issue is logged. @@ -515,8 +521,6 @@ def _follow_link( if not isinstance(keys, dict): return keys - import copy - resolved_keys = copy.deepcopy(keys) for key, value in keys.copy().items(): if isinstance(value, dict) and len(value) == 1 and "link" in value: @@ -563,6 +567,8 @@ def handle_field(node: NexusNode, keys: Mapping[str, Any], prev_path: str): if isinstance(keys[variant], Mapping) and not all( k.startswith("@") for k in keys[variant] ): + # A field should not have a dict of keys that are _not_ all attributes, + # i.e. no sub-fields or sub-groups. collector.collect_and_log( variant_path, ValidationProblem.ExpectedField, @@ -766,7 +772,7 @@ def is_documented(key: str, tree: NexusNode) -> bool: ValidationProblem.ExpectedGroup, None, ) - # TODO: decide if we want to remove such keys (and below) + # TODO: decide if we want to remove such keys # collector.collect_and_log( # key, # ValidationProblem.KeyToBeRemoved, @@ -776,7 +782,8 @@ def is_documented(key: str, tree: NexusNode) -> bool: return False elif node.type == "field": - # Field should only have values. + # A field should not have a dict of keys that are _not_ all attributes, + # i.e. no sub-fields or sub-groups. if is_mapping and not all( k.startswith("@") for k in resolved_link[key] ): @@ -785,7 +792,7 @@ def is_documented(key: str, tree: NexusNode) -> bool: ValidationProblem.ExpectedField, None, ) - # TODO: decide if we want to remove such keys (and below) + # TODO: decide if we want to remove such keys # collector.collect_and_log( # key, # ValidationProblem.KeyToBeRemoved, From 16138b2bdb2ce97b83a52ce937a02fdf1cdc36cc Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Fri, 23 May 2025 00:10:46 +0200 Subject: [PATCH 071/118] allow for links together with shape --- src/pynxtools/dataconverter/validation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index 4bbdfeb50..b04e95db4 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -523,7 +523,7 @@ def _follow_link( resolved_keys = copy.deepcopy(keys) for key, value in keys.copy().items(): - if isinstance(value, dict) and len(value) == 1 and "link" in value: + if isinstance(value, dict) and "link" in value: key_path = f"{prev_path}/{key}" if prev_path else key current_keys = nested_keys link_key = None From afe242ae263847ca5c3253da39c26133166bae32 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Fri, 23 May 2025 14:07:47 +0200 Subject: [PATCH 072/118] remove attributes with no fields from the non-visited list after removal --- src/pynxtools/dataconverter/validation.py | 8 +++++++- tests/dataconverter/test_validation.py | 6 +++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index b04e95db4..bfaf37e1e 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -898,12 +898,18 @@ def check_attributes_of_nonexisting_field( type_of_parent_from_tree == "group" or type_of_parent_from_tree is None ): - keys_to_remove.append(key) collector.collect_and_log( key[0:last_index], ValidationProblem.AttributeForNonExistingField, "attribute", ) + collector.collect_and_log( + key, + ValidationProblem.KeyToBeRemoved, + "attribute", + ) + keys_to_remove.append(key) + remove_from_not_visited(key) def check_type_with_tree( node: NexusNode, diff --git a/tests/dataconverter/test_validation.py b/tests/dataconverter/test_validation.py index b5e3ad6cc..4769e817b 100644 --- a/tests/dataconverter/test_validation.py +++ b/tests/dataconverter/test_validation.py @@ -861,7 +861,7 @@ def listify_template(data_dict: Template): {"link": "/my_entry/nxodd_name/char_value"}, ), [], - id="base-class-links-with-matching-nexus-types", + id="baseclass-links-with-matching-nexus-types", ), pytest.param( alter_dict( @@ -878,7 +878,7 @@ def listify_template(data_dict: Template): "Field /ENTRY[my_entry]/SAMPLE[my_sample] written without documentation.", "Expected a field at /ENTRY[my_entry]/SAMPLE[my_sample]/name but found a group.", ], - id="base-class-links-with-wrong-nexus-types", + id="baseclass-links-with-wrong-nexus-types", ), pytest.param( alter_dict( @@ -1340,7 +1340,7 @@ def listify_template(data_dict: Template): [ "Reserved suffix '_weights' was used in /ENTRY[my_entry]/OPTIONAL_group[my_group]/FIELDNAME_weights[some_random_field_weights], but there is no associated field some_random_field.", ], - id="reserved-suffix-from-base-class", + id="reserved-suffix-from-baseclass", ), pytest.param( alter_dict( From ae46cc3ce547110211fdc733b343090c28644a18 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Fri, 23 May 2025 18:17:28 +0200 Subject: [PATCH 073/118] update CITATION for release --- CITATION.cff | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CITATION.cff b/CITATION.cff index cc9ca7cca..88276769b 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -4,7 +4,7 @@ message: If you use this software, please cite it using the metadata from this file. type: software -version: 0.10.6 +version: 0.10.7 authors: - given-names: Sherjeel family-names: Shabih From f77415b5a5f7a21b43ef1a067a30ad4a387788ed Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Sun, 25 May 2025 18:03:20 +0200 Subject: [PATCH 074/118] handle broken links, add test --- src/pynxtools/dataconverter/validation.py | 5 ++++ tests/dataconverter/test_validation.py | 36 +++++++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index bfaf37e1e..b5457dea7 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -545,6 +545,7 @@ def _follow_link( ValidationProblem.KeyToBeRemoved, "key", ) + keys_to_remove.append(key_path) del resolved_keys[key] else: resolved_keys[key] = current_keys @@ -763,6 +764,10 @@ def is_documented(key: str, tree: NexusNode) -> bool: if isinstance(mapping[key], dict) and "link" in mapping[key]: resolved_link = _follow_link({key: mapping[key]}, "") + if key not in resolved_link: + # Link is broken and key will be removed; no need to check further + return False + is_mapping = isinstance(resolved_link[key], Mapping) if node.type == "group" and not is_mapping: diff --git a/tests/dataconverter/test_validation.py b/tests/dataconverter/test_validation.py index 4769e817b..3a40c39e1 100644 --- a/tests/dataconverter/test_validation.py +++ b/tests/dataconverter/test_validation.py @@ -816,6 +816,24 @@ def listify_template(data_dict: Template): ], id="field-instead-of-named-group", ), + pytest.param( + alter_dict( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/identified_calibration", + {"link": "/my_entry/some_group"}, + ), + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value", + {"link": "/my_entry/specified_group/some_field"}, + ), + [ + "Broken link at /ENTRY[my_entry]/identified_calibration to /my_entry/some_group.", + "The key /ENTRY[my_entry]/identified_calibration will not be written.", + "Broken link at /ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value to /my_entry/specified_group/some_field.", + "The key /ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value will not be written.", + ], + id="appdef-broken-links", + ), pytest.param( alter_dict( alter_dict( @@ -850,6 +868,24 @@ def listify_template(data_dict: Template): ], id="appdef-links-with-wrong-nexus-types", ), + pytest.param( + alter_dict( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/SAMPLE[my_sample]", + {"link": "/my_entry/some_group"}, + ), + "/ENTRY[my_entry]/SAMPLE[my_sample2]/name", + {"link": "/my_entry/specified_group/some_field223"}, + ), + [ + "Broken link at /ENTRY[my_entry]/SAMPLE[my_sample] to /my_entry/some_group.", + "The key /ENTRY[my_entry]/SAMPLE[my_sample] will not be written.", + "Broken link at /ENTRY[my_entry]/SAMPLE[my_sample2]/name to /my_entry/specified_group/some_field223.", + "The key /ENTRY[my_entry]/SAMPLE[my_sample2]/name will not be written.", + ], + id="baseclass-broken-links", + ), pytest.param( alter_dict( alter_dict( From 87ed6fdf9c0bf83f2b62ad67a3456b9ae86b9b7b Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Wed, 28 May 2025 14:11:21 +0200 Subject: [PATCH 075/118] do not log error for missing concepts --- src/pynxtools/dataconverter/helpers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pynxtools/dataconverter/helpers.py b/src/pynxtools/dataconverter/helpers.py index 0ae2f851a..61b774ca8 100644 --- a/src/pynxtools/dataconverter/helpers.py +++ b/src/pynxtools/dataconverter/helpers.py @@ -98,9 +98,9 @@ def _log(self, path: str, log_type: ValidationProblem, value: Optional[Any], *ar f"The value at {path} does not match with the enumerated items from the open enumeration: {value}." ) elif log_type == ValidationProblem.MissingRequiredGroup: - logger.error(f"The required group, {path}, hasn't been supplied.") + logger.warning(f"The required group, {path}, hasn't been supplied.") elif log_type == ValidationProblem.MissingRequiredField: - logger.error( + logger.warning( f"The data entry corresponding to {path} is required " "and hasn't been supplied by the reader.", ) From 2e27912b30cc69b62f225656f3e0ac379e22a4eb Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Mon, 26 May 2025 21:26:47 +0200 Subject: [PATCH 076/118] log error if there re multiple different variadic concepts with the same name --- src/pynxtools/dataconverter/helpers.py | 8 ++++ src/pynxtools/dataconverter/validation.py | 56 ++++++++++++++++++++++- tests/dataconverter/test_validation.py | 37 +++++++++++++++ 3 files changed, 100 insertions(+), 1 deletion(-) diff --git a/src/pynxtools/dataconverter/helpers.py b/src/pynxtools/dataconverter/helpers.py index 61b774ca8..fc668ee64 100644 --- a/src/pynxtools/dataconverter/helpers.py +++ b/src/pynxtools/dataconverter/helpers.py @@ -46,6 +46,7 @@ class ValidationProblem(Enum): + DifferentVariadicNodesWithTheSameName = auto() UnitWithoutDocumentation = auto() InvalidEnum = auto() OpenEnumWithNewItem = auto() @@ -85,6 +86,13 @@ def _log(self, path: str, log_type: ValidationProblem, value: Optional[Any], *ar if value is None: value = "" + if log_type == ValidationProblem.DifferentVariadicNodesWithTheSameName: + value = cast(Any, value) + logger.error( + f"Instance name '{path}' used for multiple different concepts: " + f"{', '.join(sorted(set(c for c, _ in value)))}. " + f"The following keys are affected: {', '.join(sorted(set(k for _, k in value)))}." + ) if log_type == ValidationProblem.UnitWithoutDocumentation: logger.info( f"The unit, {path} = {value}, is being written but has no documentation." diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index b5457dea7..de9f65c02 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -850,6 +850,59 @@ def recurse_tree( handling_map.get(child.type, handle_unknown_type)(child, keys, prev_path) + def find_instance_name_conflicts( + mapping: MutableMapping[str, str], keys_to_remove: List[str] + ) -> None: + """ + Detect and log conflicts where the same variadic instance name is reused across + different concept names. + + This function ensures that a given instance name (e.g., 'my_name') is only used + for a single concept (e.g., SAMPLE or USER, but not both). Reusing the same instance + name for different concept names (e.g., SAMPLE[my_name] and USER[my_name]) is + considered a conflict. + + When such conflicts are found, an error is logged indicating the instance name + and the conflicting concept names. Additionally, all keys involved in the conflict + are logged and added to the `keys_to_remove` list. + + Parameters: + mapping (MutableMapping[str, str]): + The mapping containing the data to validate. + This should be a dict of `/` separated paths, such as + "/ENTRY[entry1]/SAMPLE[sample1]/name". + keys_to_remove (List[str]): + List of keys that will be removed from the template. This is extended here + in the case of conflicts. + + """ + pattern = re.compile(r"(?P[^\[\]/]+)\[(?P[^\]]+)\]") + + # Map from instance name to list of (concept_name, full_key) where it's used + instance_usage: Dict[str, List[Tuple[str, str]]] = defaultdict(list) + + for key in mapping: + for match in pattern.finditer(key): + concept_name, instance_name = match.groups() + instance_usage[instance_name].append((concept_name, key)) + + for instance_name, entries in sorted(instance_usage.items()): + concept_names = {c for c, _ in entries} + if len(concept_names) > 1: + keys = sorted(k for _, k in entries) + collector.collect_and_log( + instance_name, + ValidationProblem.DifferentVariadicNodesWithTheSameName, + entries, + ) + for key in keys: + collector.collect_and_log( + key, + ValidationProblem.KeyToBeRemoved, + "key", + ) + keys_to_remove += keys + def check_attributes_of_nonexisting_field( node: NexusNode, ): @@ -1174,10 +1227,11 @@ def check_reserved_prefix( "choice": handle_choice, } - keys_to_remove = [] + keys_to_remove: List[str] = [] tree = generate_tree_from(appdef) collector.clear() + find_instance_name_conflicts(mapping, keys_to_remove) nested_keys = build_nested_dict_from(mapping) not_visited = list(mapping) keys = _follow_link(nested_keys, "") diff --git a/tests/dataconverter/test_validation.py b/tests/dataconverter/test_validation.py index 3a40c39e1..19f7fc945 100644 --- a/tests/dataconverter/test_validation.py +++ b/tests/dataconverter/test_validation.py @@ -234,6 +234,43 @@ def listify_template(data_dict: Template): @pytest.mark.parametrize( "data_dict,error_messages", [ + pytest.param( + alter_dict( + alter_dict( + alter_dict( + alter_dict( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/SAMPLE[some_name]/name", + "A sample name", + ), + "/ENTRY[my_entry]/USER[some_name]/name", + "A user name", + ), + "/ENTRY[my_entry]/MONITOR[some_name]/name", + "A monitor name", + ), + "/ENTRY[my_entry]/MONITOR[another_name]/name", + "Another monitor name", + ), + "/ENTRY[my_entry]/SAMPLE[another_name]/name", + "Another sample name", + ), + [ + "Instance name 'another_name' used for multiple different concepts: MONITOR, SAMPLE. " + "The following keys are affected: /ENTRY[my_entry]/MONITOR[another_name]/name, " + "/ENTRY[my_entry]/SAMPLE[another_name]/name.", + "The key /ENTRY[my_entry]/MONITOR[another_name]/name will not be written.", + "The key /ENTRY[my_entry]/SAMPLE[another_name]/name will not be written.", + "Instance name 'some_name' used for multiple different concepts: MONITOR, SAMPLE, USER. " + "The following keys are affected: /ENTRY[my_entry]/MONITOR[some_name]/name, " + "/ENTRY[my_entry]/SAMPLE[some_name]/name, /ENTRY[my_entry]/USER[some_name]/name.", + "The key /ENTRY[my_entry]/MONITOR[some_name]/name will not be written.", + "The key /ENTRY[my_entry]/SAMPLE[some_name]/name will not be written.", + "The key /ENTRY[my_entry]/USER[some_name]/name will not be written.", + ], + id="variadic-groups-of-the-same-name", + ), pytest.param( alter_dict( alter_dict( From eaba076c611663ae55ad6bc0e3e46f8883cb13c7 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Mon, 26 May 2025 21:42:29 +0200 Subject: [PATCH 077/118] fix for groups that are not on the same level --- src/pynxtools/dataconverter/validation.py | 25 ++++++++++---- tests/dataconverter/test_validation.py | 42 +++++++++++++---------- 2 files changed, 42 insertions(+), 25 deletions(-) diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index de9f65c02..f0d724517 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -862,6 +862,14 @@ def find_instance_name_conflicts( name for different concept names (e.g., SAMPLE[my_name] and USER[my_name]) is considered a conflict. + For example, this is a conflict: + /ENTRY[entry1]/SAMPLE[my_name]/... + /ENTRY[entry1]/USER[my_name]/... + + But this is NOT a conflict: + /ENTRY[entry1]/INSTRUMENT[instrument]/FIELD[my_name]/... + /ENTRY[entry1]/INSTRUMENT[instrument]/DETECTOR[detector]/FIELD[my_name]/... + When such conflicts are found, an error is logged indicating the instance name and the conflicting concept names. Additionally, all keys involved in the conflict are logged and added to the `keys_to_remove` list. @@ -878,15 +886,20 @@ def find_instance_name_conflicts( """ pattern = re.compile(r"(?P[^\[\]/]+)\[(?P[^\]]+)\]") - # Map from instance name to list of (concept_name, full_key) where it's used - instance_usage: Dict[str, List[Tuple[str, str]]] = defaultdict(list) + # Tracks instance usage with respect to their parent group + instance_usage: Dict[Tuple[str, str], List[Tuple[str, str]]] = defaultdict(list) for key in mapping: - for match in pattern.finditer(key): - concept_name, instance_name = match.groups() - instance_usage[instance_name].append((concept_name, key)) + matches = list(pattern.finditer(key)) + for i, match in enumerate(matches): + concept_name = match.group("concept_name") + instance_name = match.group("instance") + + # Determine the parent path up to just before this match + parent_path = key[: match.start()] + instance_usage[(instance_name, parent_path)].append((concept_name, key)) - for instance_name, entries in sorted(instance_usage.items()): + for (instance_name, parent_path), entries in sorted(instance_usage.items()): concept_names = {c for c, _ in entries} if len(concept_names) > 1: keys = sorted(k for _, k in entries) diff --git a/tests/dataconverter/test_validation.py b/tests/dataconverter/test_validation.py index 19f7fc945..e1eeec42a 100644 --- a/tests/dataconverter/test_validation.py +++ b/tests/dataconverter/test_validation.py @@ -240,32 +240,36 @@ def listify_template(data_dict: Template): alter_dict( alter_dict( alter_dict( - TEMPLATE, - "/ENTRY[my_entry]/SAMPLE[some_name]/name", - "A sample name", + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/SAMPLE[some_name]/name", + "A sample name", + ), + "/ENTRY[my_entry]/USER[some_name]/name", + "A user name", ), - "/ENTRY[my_entry]/USER[some_name]/name", - "A user name", + "/ENTRY[my_entry]/APERTURE[some_name]/name", + "An monitor name", ), - "/ENTRY[my_entry]/MONITOR[some_name]/name", - "A monitor name", + "/ENTRY[my_entry]/APERTURE[another_name]/name", + "Another monitor name", ), - "/ENTRY[my_entry]/MONITOR[another_name]/name", - "Another monitor name", + "/ENTRY[my_entry]/SAMPLE[another_name]/name", + "Another sample name", ), - "/ENTRY[my_entry]/SAMPLE[another_name]/name", - "Another sample name", + "/ENTRY[my_entry]/INSTRUMENT[instrument]/APERTURE[another_name]/name", + "Another c name within an instrument.", ), [ - "Instance name 'another_name' used for multiple different concepts: MONITOR, SAMPLE. " - "The following keys are affected: /ENTRY[my_entry]/MONITOR[another_name]/name, " + "Instance name 'another_name' used for multiple different concepts: APERTURE, SAMPLE. " + "The following keys are affected: /ENTRY[my_entry]/APERTURE[another_name]/name, " "/ENTRY[my_entry]/SAMPLE[another_name]/name.", - "The key /ENTRY[my_entry]/MONITOR[another_name]/name will not be written.", + "The key /ENTRY[my_entry]/APERTURE[another_name]/name will not be written.", "The key /ENTRY[my_entry]/SAMPLE[another_name]/name will not be written.", - "Instance name 'some_name' used for multiple different concepts: MONITOR, SAMPLE, USER. " - "The following keys are affected: /ENTRY[my_entry]/MONITOR[some_name]/name, " + "Instance name 'some_name' used for multiple different concepts: APERTURE, SAMPLE, USER. " + "The following keys are affected: /ENTRY[my_entry]/APERTURE[some_name]/name, " "/ENTRY[my_entry]/SAMPLE[some_name]/name, /ENTRY[my_entry]/USER[some_name]/name.", - "The key /ENTRY[my_entry]/MONITOR[some_name]/name will not be written.", + "The key /ENTRY[my_entry]/APERTURE[some_name]/name will not be written.", "The key /ENTRY[my_entry]/SAMPLE[some_name]/name will not be written.", "The key /ENTRY[my_entry]/USER[some_name]/name will not be written.", ], @@ -1116,11 +1120,11 @@ def listify_template(data_dict: Template): pytest.param( alter_dict( TEMPLATE, - "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/ILLEGAL[my_source]/type", + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/ILLEGAL[my_source2]/type", 1, ), [ - "Field /ENTRY[my_entry]/INSTRUMENT[my_instrument]/ILLEGAL[my_source]/type written without documentation." + "Field /ENTRY[my_entry]/INSTRUMENT[my_instrument]/ILLEGAL[my_source2]/type written without documentation." ], id="bad-namefitting", ), From 29f290e1adf18372bdf83a9a83ed14e16405c210 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Tue, 27 May 2025 21:59:07 +0200 Subject: [PATCH 078/118] remove unneeded enumeration --- src/pynxtools/dataconverter/validation.py | 2 +- tests/dataconverter/test_validation.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index f0d724517..19e11cfd4 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -891,7 +891,7 @@ def find_instance_name_conflicts( for key in mapping: matches = list(pattern.finditer(key)) - for i, match in enumerate(matches): + for match in matches: concept_name = match.group("concept_name") instance_name = match.group("instance") diff --git a/tests/dataconverter/test_validation.py b/tests/dataconverter/test_validation.py index e1eeec42a..34b9cac06 100644 --- a/tests/dataconverter/test_validation.py +++ b/tests/dataconverter/test_validation.py @@ -258,7 +258,7 @@ def listify_template(data_dict: Template): "Another sample name", ), "/ENTRY[my_entry]/INSTRUMENT[instrument]/APERTURE[another_name]/name", - "Another c name within an instrument.", + "Another aperture name within an instrument.", ), [ "Instance name 'another_name' used for multiple different concepts: APERTURE, SAMPLE. " From d499cc6ea4e84f116ba9c859ce47b042013c6318 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Tue, 27 May 2025 22:06:18 +0200 Subject: [PATCH 079/118] continue if keys has no variable concepts --- src/pynxtools/dataconverter/validation.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index 19e11cfd4..a1c6459aa 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -891,6 +891,9 @@ def find_instance_name_conflicts( for key in mapping: matches = list(pattern.finditer(key)) + if not matches: + # The keys contains no concepts with variable name, no need to further check + continue for match in matches: concept_name = match.group("concept_name") instance_name = match.group("instance") From 0225333cd1a36e29fdf880e7e1ed975c3cd268ed Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Wed, 28 May 2025 10:59:18 +0200 Subject: [PATCH 080/118] use existing docs style --- src/pynxtools/dataconverter/validation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index a1c6459aa..4e85a2213 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -874,7 +874,7 @@ def find_instance_name_conflicts( and the conflicting concept names. Additionally, all keys involved in the conflict are logged and added to the `keys_to_remove` list. - Parameters: + Args: mapping (MutableMapping[str, str]): The mapping containing the data to validate. This should be a dict of `/` separated paths, such as From 40295fa3d8374108c96c2ec32a374770048a9f17 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Wed, 28 May 2025 18:31:54 +0200 Subject: [PATCH 081/118] only remove conflicting keys if each of them is valid --- src/pynxtools/dataconverter/helpers.py | 2 +- src/pynxtools/dataconverter/validation.py | 27 ++++++++++-- tests/dataconverter/test_validation.py | 54 ++++++++++++++--------- 3 files changed, 57 insertions(+), 26 deletions(-) diff --git a/src/pynxtools/dataconverter/helpers.py b/src/pynxtools/dataconverter/helpers.py index fc668ee64..3074a6469 100644 --- a/src/pynxtools/dataconverter/helpers.py +++ b/src/pynxtools/dataconverter/helpers.py @@ -88,7 +88,7 @@ def _log(self, path: str, log_type: ValidationProblem, value: Optional[Any], *ar if log_type == ValidationProblem.DifferentVariadicNodesWithTheSameName: value = cast(Any, value) - logger.error( + logger.warning( f"Instance name '{path}' used for multiple different concepts: " f"{', '.join(sorted(set(c for c, _ in value)))}. " f"The following keys are affected: {', '.join(sorted(set(k for _, k in value)))}." diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index 4e85a2213..41526caea 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -911,13 +911,32 @@ def find_instance_name_conflicts( ValidationProblem.DifferentVariadicNodesWithTheSameName, entries, ) + # Now that we have name conflicts, we still need to check that there are + # at least two valid keys in that conclit. Only then we remove these. + # This takes care of the example with keys like + # /ENTRY[my_entry]/USER[some_name]/name and /ENTRY[my_entry]/USERS[some_name]/name, + # where we only want to keep the first one. + valid_keys_with_name_conflicts = [] + for key in keys: + try: + node = add_best_matches_for(key, tree) + if node is not None: + valid_keys_with_name_conflicts.append(key) + continue + except TypeError: + pass collector.collect_and_log( - key, - ValidationProblem.KeyToBeRemoved, - "key", + key, ValidationProblem.KeyToBeRemoved, "key" ) - keys_to_remove += keys + keys_to_remove.append(key) + + if len(valid_keys_with_name_conflicts) > 1: + for valid_key in valid_keys_with_name_conflicts: + collector.collect_and_log( + valid_key, ValidationProblem.KeyToBeRemoved, "key" + ) + keys_to_remove.append(valid_key) def check_attributes_of_nonexisting_field( node: NexusNode, diff --git a/tests/dataconverter/test_validation.py b/tests/dataconverter/test_validation.py index 34b9cac06..b5eb50abc 100644 --- a/tests/dataconverter/test_validation.py +++ b/tests/dataconverter/test_validation.py @@ -241,35 +241,47 @@ def listify_template(data_dict: Template): alter_dict( alter_dict( alter_dict( - TEMPLATE, - "/ENTRY[my_entry]/SAMPLE[some_name]/name", - "A sample name", + alter_dict( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/SAMPLE[some_name]/name", + "A sample name", + ), + "/ENTRY[my_entry]/USER[some_name]/name", + "A user name", + ), + "/ENTRY[my_entry]/MONITOR[some_name]/name", + "An monitor name", ), - "/ENTRY[my_entry]/USER[some_name]/name", - "A user name", + "/ENTRY[my_entry]/INSTRUMENT[instrument]/APERTURE[another_name]/name", + "An aperture within an instrument", ), - "/ENTRY[my_entry]/APERTURE[some_name]/name", - "An monitor name", + "/ENTRY[my_entry]/INSTRUMENT[instrument]/DETECTOR[another_name]/name", + "A detector within an instrument", ), - "/ENTRY[my_entry]/APERTURE[another_name]/name", - "Another monitor name", + "/ENTRY[my_entry]/INSTRUMENT[instrument]/SOURCE[my_source]/APERTURE[another_name]/name", + "An aperture within a source inside an instrument", ), - "/ENTRY[my_entry]/SAMPLE[another_name]/name", - "Another sample name", + "/ENTRY[my_entry]/USER[a_third_name]/name", + "A tird user name", ), - "/ENTRY[my_entry]/INSTRUMENT[instrument]/APERTURE[another_name]/name", - "Another aperture name within an instrument.", + "/ENTRY[my_entry]/USERS[a_third_name]/name", + "An invalid group of the same name", ), [ - "Instance name 'another_name' used for multiple different concepts: APERTURE, SAMPLE. " - "The following keys are affected: /ENTRY[my_entry]/APERTURE[another_name]/name, " - "/ENTRY[my_entry]/SAMPLE[another_name]/name.", - "The key /ENTRY[my_entry]/APERTURE[another_name]/name will not be written.", - "The key /ENTRY[my_entry]/SAMPLE[another_name]/name will not be written.", - "Instance name 'some_name' used for multiple different concepts: APERTURE, SAMPLE, USER. " - "The following keys are affected: /ENTRY[my_entry]/APERTURE[some_name]/name, " + "Instance name 'a_third_name' used for multiple different concepts: USER, USERS. " + "The following keys are affected: /ENTRY[my_entry]/USERS[a_third_name]/name, " + "/ENTRY[my_entry]/USER[a_third_name]/name.", + "The key /ENTRY[my_entry]/USERS[a_third_name]/name will not be written.", + "Instance name 'another_name' used for multiple different concepts: APERTURE, DETECTOR. " + "The following keys are affected: /ENTRY[my_entry]/INSTRUMENT[instrument]/APERTURE[another_name]/name, " + "/ENTRY[my_entry]/INSTRUMENT[instrument]/DETECTOR[another_name]/name.", + "The key /ENTRY[my_entry]/INSTRUMENT[instrument]/APERTURE[another_name]/name will not be written.", + "The key /ENTRY[my_entry]/INSTRUMENT[instrument]/DETECTOR[another_name]/name will not be written.", + "Instance name 'some_name' used for multiple different concepts: MONITOR, SAMPLE, USER. " + "The following keys are affected: /ENTRY[my_entry]/MONITOR[some_name]/name, " "/ENTRY[my_entry]/SAMPLE[some_name]/name, /ENTRY[my_entry]/USER[some_name]/name.", - "The key /ENTRY[my_entry]/APERTURE[some_name]/name will not be written.", + "The key /ENTRY[my_entry]/MONITOR[some_name]/name will not be written.", "The key /ENTRY[my_entry]/SAMPLE[some_name]/name will not be written.", "The key /ENTRY[my_entry]/USER[some_name]/name will not be written.", ], From 02e4d655014f0cb11a5e159de7887d9b0df98dff Mon Sep 17 00:00:00 2001 From: RubelMozumder <32923026+RubelMozumder@users.noreply.github.com> Date: Fri, 30 May 2025 14:30:10 +0200 Subject: [PATCH 082/118] Update pynxtools and other plugins' versions. (#649) --- CITATION.cff | 2 +- pyproject.toml | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/CITATION.cff b/CITATION.cff index 88276769b..45c9ff763 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -4,7 +4,7 @@ message: If you use this software, please cite it using the metadata from this file. type: software -version: 0.10.7 +version: 0.10.8 authors: - given-names: Sherjeel family-names: Shabih diff --git a/pyproject.toml b/pyproject.toml index 08bc0ed10..b65da0b71 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -73,28 +73,28 @@ apm = [ "pynxtools-apm>=0.2.3", ] ellips = [ - "pynxtools-ellips>=0.0.9", + "pynxtools-ellips>=0.0.10", ] em = [ "pynxtools-em>=0.3.2", ] igor = [ - "pynxtools-igor>=0.1.1", + "pynxtools-igor>=0.1.2", ] mpes = [ "pynxtools-mpes>=0.2.3", ] raman = [ - "pynxtools-raman>=0.0.10", + "pynxtools-raman>=0.0.11", ] spm = [ - "pynxtools-spm>=0.1.1", + "pynxtools-spm>=0.1.2", ] xps = [ - "pynxtools-xps>=0.5.1", + "pynxtools-xps>=0.5.2", ] xrd = [ - "pynxtools-xrd>=0.0.3", + "pynxtools-xrd>=0.0.5", ] [project.entry-points.'nomad.plugin'] From 8d0e62b5aabe3edcbbf1aaf35f922962e590e3c4 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Thu, 5 Jun 2025 10:46:20 +0200 Subject: [PATCH 083/118] Apply suggestions from code review Co-authored-by: Laurenz Rettig <53396064+rettigl@users.noreply.github.com> --- src/pynxtools/dataconverter/validation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index 41526caea..4d3b95905 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -912,7 +912,7 @@ def find_instance_name_conflicts( entries, ) # Now that we have name conflicts, we still need to check that there are - # at least two valid keys in that conclit. Only then we remove these. + # at least two valid keys in that conflict. Only then we remove these. # This takes care of the example with keys like # /ENTRY[my_entry]/USER[some_name]/name and /ENTRY[my_entry]/USERS[some_name]/name, # where we only want to keep the first one. From aacf70e8e5d5e415d5905c9207691710c1adfa15 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Thu, 5 Jun 2025 10:49:52 +0200 Subject: [PATCH 084/118] use keys_to_remove from global context --- src/pynxtools/dataconverter/validation.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index 4d3b95905..4dc86c2e7 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -850,9 +850,7 @@ def recurse_tree( handling_map.get(child.type, handle_unknown_type)(child, keys, prev_path) - def find_instance_name_conflicts( - mapping: MutableMapping[str, str], keys_to_remove: List[str] - ) -> None: + def find_instance_name_conflicts(mapping: MutableMapping[str, str]) -> None: """ Detect and log conflicts where the same variadic instance name is reused across different concept names. @@ -1266,7 +1264,7 @@ def check_reserved_prefix( tree = generate_tree_from(appdef) collector.clear() - find_instance_name_conflicts(mapping, keys_to_remove) + find_instance_name_conflicts(mapping) nested_keys = build_nested_dict_from(mapping) not_visited = list(mapping) keys = _follow_link(nested_keys, "") From 6daffdc2f4c12f40c0a01a49f519ed54b7086016 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Thu, 5 Jun 2025 13:26:25 +0200 Subject: [PATCH 085/118] catch case where there are multiple fields in the conflicting groups --- src/pynxtools/dataconverter/validation.py | 23 +++- tests/dataconverter/test_validation.py | 132 ++++++++++++++++------ 2 files changed, 116 insertions(+), 39 deletions(-) diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index 4dc86c2e7..6f30e1e99 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -929,12 +929,23 @@ def find_instance_name_conflicts(mapping: MutableMapping[str, str]) -> None: ) keys_to_remove.append(key) - if len(valid_keys_with_name_conflicts) > 1: - for valid_key in valid_keys_with_name_conflicts: - collector.collect_and_log( - valid_key, ValidationProblem.KeyToBeRemoved, "key" - ) - keys_to_remove.append(valid_key) + if len(valid_keys_with_name_conflicts) >= 1: + # At this point, all invalid keys have been removed. + # If more than one valid concept still uses the same instance name under the same parent path, + # this indicates a semantic ambiguity (e.g., USER[alex] and SAMPLE[alex]). + # We remove these keys as well to avoid conflicts in the writer. + remaining_concepts = { + pattern.findall(k)[-1][0] + for k in valid_keys_with_name_conflicts + if pattern.findall(k) + } + # If multiple valid concept names reuse the same instance name, remove them too + if len(remaining_concepts) > 1: + for valid_key in valid_keys_with_name_conflicts: + collector.collect_and_log( + valid_key, ValidationProblem.KeyToBeRemoved, "key" + ) + keys_to_remove.append(valid_key) def check_attributes_of_nonexisting_field( node: NexusNode, diff --git a/tests/dataconverter/test_validation.py b/tests/dataconverter/test_validation.py index b5eb50abc..4f20c76a0 100644 --- a/tests/dataconverter/test_validation.py +++ b/tests/dataconverter/test_validation.py @@ -240,53 +240,119 @@ def listify_template(data_dict: Template): alter_dict( alter_dict( alter_dict( - alter_dict( - alter_dict( - alter_dict( - TEMPLATE, - "/ENTRY[my_entry]/SAMPLE[some_name]/name", - "A sample name", - ), - "/ENTRY[my_entry]/USER[some_name]/name", - "A user name", - ), - "/ENTRY[my_entry]/MONITOR[some_name]/name", - "An monitor name", - ), - "/ENTRY[my_entry]/INSTRUMENT[instrument]/APERTURE[another_name]/name", - "An aperture within an instrument", + TEMPLATE, + "/ENTRY[my_entry]/SAMPLE[some_name]/name", + "A sample name", ), - "/ENTRY[my_entry]/INSTRUMENT[instrument]/DETECTOR[another_name]/name", - "A detector within an instrument", + "/ENTRY[my_entry]/SAMPLE[some_name]/description", + "A sample description", ), - "/ENTRY[my_entry]/INSTRUMENT[instrument]/SOURCE[my_source]/APERTURE[another_name]/name", - "An aperture within a source inside an instrument", + "/ENTRY[my_entry]/USER[some_name]/name", + "A user name", ), - "/ENTRY[my_entry]/USER[a_third_name]/name", - "A tird user name", + "/ENTRY[my_entry]/MONITOR[some_name]/name", + "A monitor name", ), - "/ENTRY[my_entry]/USERS[a_third_name]/name", - "An invalid group of the same name", + "/ENTRY[my_entry]/MONITOR[some_name]/description", + "A monitor description", ), [ - "Instance name 'a_third_name' used for multiple different concepts: USER, USERS. " - "The following keys are affected: /ENTRY[my_entry]/USERS[a_third_name]/name, " - "/ENTRY[my_entry]/USER[a_third_name]/name.", - "The key /ENTRY[my_entry]/USERS[a_third_name]/name will not be written.", - "Instance name 'another_name' used for multiple different concepts: APERTURE, DETECTOR. " - "The following keys are affected: /ENTRY[my_entry]/INSTRUMENT[instrument]/APERTURE[another_name]/name, " - "/ENTRY[my_entry]/INSTRUMENT[instrument]/DETECTOR[another_name]/name.", - "The key /ENTRY[my_entry]/INSTRUMENT[instrument]/APERTURE[another_name]/name will not be written.", - "The key /ENTRY[my_entry]/INSTRUMENT[instrument]/DETECTOR[another_name]/name will not be written.", "Instance name 'some_name' used for multiple different concepts: MONITOR, SAMPLE, USER. " - "The following keys are affected: /ENTRY[my_entry]/MONITOR[some_name]/name, " + "The following keys are affected: /ENTRY[my_entry]/MONITOR[some_name]/description, " + "/ENTRY[my_entry]/MONITOR[some_name]/name, /ENTRY[my_entry]/SAMPLE[some_name]/description, " "/ENTRY[my_entry]/SAMPLE[some_name]/name, /ENTRY[my_entry]/USER[some_name]/name.", + "The key /ENTRY[my_entry]/MONITOR[some_name]/description will not be written.", "The key /ENTRY[my_entry]/MONITOR[some_name]/name will not be written.", + "The key /ENTRY[my_entry]/SAMPLE[some_name]/description will not be written.", "The key /ENTRY[my_entry]/SAMPLE[some_name]/name will not be written.", "The key /ENTRY[my_entry]/USER[some_name]/name will not be written.", ], id="variadic-groups-of-the-same-name", ), + pytest.param( + alter_dict( + alter_dict( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/INSTRUMENT[instrument]/APERTURE[another_name]/name", + "An aperture within an instrument", + ), + "/ENTRY[my_entry]/INSTRUMENT[instrument]/DETECTOR[another_name]/name", + "A detector within an instrument", + ), + "/ENTRY[my_entry]/INSTRUMENT[instrument]/SOURCE[my_source]/APERTURE[another_name]/name", + "An aperture within a source inside an instrument", + ), + [ + "Instance name 'another_name' used for multiple different concepts: APERTURE, DETECTOR. " + "The following keys are affected: /ENTRY[my_entry]/INSTRUMENT[instrument]/APERTURE[another_name]/name, " + "/ENTRY[my_entry]/INSTRUMENT[instrument]/DETECTOR[another_name]/name.", + "The key /ENTRY[my_entry]/INSTRUMENT[instrument]/APERTURE[another_name]/name will not be written.", + "The key /ENTRY[my_entry]/INSTRUMENT[instrument]/DETECTOR[another_name]/name will not be written.", + ], + id="variadic-groups-of-the-same-name-but-at-different-levels", + ), + pytest.param( + alter_dict( + alter_dict( + alter_dict( + alter_dict( + alter_dict( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/USER[user]/name", + "A user name", + ), + "/ENTRY[my_entry]/USER[user]/role", + "A user role", + ), + "/ENTRY[my_entry]/USER[user]/affiliation", + "A user affiliation", + ), + "/ENTRY[my_entry]/ILLEGAL[user]/name", + "An illegal user name", + ), + "/ENTRY[my_entry]/ILLEGAL[user]/role", + "An illegal user role", + ), + "/ENTRY[my_entry]/ILLEGAL[user]/affiliation", + "An illegal user affiliation", + ), + [ + "Instance name 'user' used for multiple different concepts: ILLEGAL, USER. " + "The following keys are affected: /ENTRY[my_entry]/ILLEGAL[user]/affiliation, /ENTRY[my_entry]/ILLEGAL[user]/name, " + "/ENTRY[my_entry]/ILLEGAL[user]/role, /ENTRY[my_entry]/USER[user]/affiliation, /ENTRY[my_entry]/USER[user]/name, " + "/ENTRY[my_entry]/USER[user]/role.", + "The key /ENTRY[my_entry]/ILLEGAL[user]/affiliation will not be written.", + "The key /ENTRY[my_entry]/ILLEGAL[user]/name will not be written.", + "The key /ENTRY[my_entry]/ILLEGAL[user]/role will not be written.", + ], + id="variadic-groups-of-the-same-name-illegal-concept-multiple-fields", + ), + pytest.param( + alter_dict( + alter_dict( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/USER[user]/name", + "A user name", + ), + "/ENTRY[my_entry]/USERS[user]/name", + "An invalid group of the same name", + ), + "/ENTRY[my_entry]/SAMPLE[user]/name", + "A sample group called user with a name", + ), + [ + "Instance name 'user' used for multiple different concepts: SAMPLE, USER, USERS. " + "The following keys are affected: /ENTRY[my_entry]/SAMPLE[user]/name, " + "/ENTRY[my_entry]/USERS[user]/name, /ENTRY[my_entry]/USER[user]/name.", + "The key /ENTRY[my_entry]/USERS[user]/name will not be written.", + "The key /ENTRY[my_entry]/SAMPLE[user]/name will not be written.", + "The key /ENTRY[my_entry]/USER[user]/name will not be written.", + ], + id="variadic-groups-of-the-same-name-mix-of-valid-and-illegal-concepts", + ), pytest.param( alter_dict( alter_dict( From 14f956211b30739a7a0955801647d3f83dbbdef6 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Thu, 5 Jun 2025 14:04:24 +0200 Subject: [PATCH 086/118] catch case where a key with a concept name has an equivalent non-variadic key in the template already --- src/pynxtools/dataconverter/helpers.py | 6 ++++++ src/pynxtools/dataconverter/validation.py | 24 +++++++++++++++++++++++ tests/dataconverter/test_validation.py | 14 +++++++++++++ 3 files changed, 44 insertions(+) diff --git a/src/pynxtools/dataconverter/helpers.py b/src/pynxtools/dataconverter/helpers.py index 3074a6469..04061ef1c 100644 --- a/src/pynxtools/dataconverter/helpers.py +++ b/src/pynxtools/dataconverter/helpers.py @@ -73,6 +73,7 @@ class ValidationProblem(Enum): ReservedSuffixWithoutField = auto() ReservedPrefixInWrongContext = auto() InvalidNexusTypeForNamedConcept = auto() + KeysWithAndWithoutConcept = auto() class Collector: @@ -185,6 +186,11 @@ def _log(self, path: str, log_type: ValidationProblem, value: Optional[Any], *ar f"The type ('{args[0] if args else ''}') of the given concept '{path}' " f"conflicts with another existing concept of the same name, which is of type '{value.type}'." ) + elif log_type == ValidationProblem.KeysWithAndWithoutConcept: + value = cast(Any, value) + logger.warning( + f"The key '{path}' uses the valid concept name '{args[0]}', but there is another valid key {value} that uses the non-variadic name of the node.'" + ) def collect_and_log( self, diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index 6f30e1e99..af743506d 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -898,6 +898,30 @@ def find_instance_name_conflicts(mapping: MutableMapping[str, str]) -> None: # Determine the parent path up to just before this match parent_path = key[: match.start()] + child_path = key[match.start() :].split("/", 1)[-1] + + # Here we check if for this key with a concept name, another valid key + # with a non-concept name exists. + non_concept_key = f"{parent_path}{instance_name}/{child_path}" + + if non_concept_key in mapping: + try: + node = add_best_matches_for(non_concept_key, tree) + if node is not None: + collector.collect_and_log( + key, + ValidationProblem.KeysWithAndWithoutConcept, + non_concept_key, + concept_name, + ) + collector.collect_and_log( + key, ValidationProblem.KeyToBeRemoved, "key" + ) + keys_to_remove.append(key) + continue + except TypeError: + pass + instance_usage[(instance_name, parent_path)].append((concept_name, key)) for (instance_name, parent_path), entries in sorted(instance_usage.items()): diff --git a/tests/dataconverter/test_validation.py b/tests/dataconverter/test_validation.py index 4f20c76a0..f6217c7df 100644 --- a/tests/dataconverter/test_validation.py +++ b/tests/dataconverter/test_validation.py @@ -234,6 +234,20 @@ def listify_template(data_dict: Template): @pytest.mark.parametrize( "data_dict,error_messages", [ + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NOTE[required_group2]/description", + "an additional description", + ), + [ + "The key '/ENTRY[my_entry]/NOTE[required_group2]/description' uses the valid concept name 'NOTE', " + "but there is another valid key /ENTRY[my_entry]/required_group2/description that uses the non-variadic " + "name of the node.'", + "The key /ENTRY[my_entry]/NOTE[required_group2]/description will not be written.", + ], + id="same-concept-with-and-without-concept-name", + ), pytest.param( alter_dict( alter_dict( From a3bc1de8be002d5cd04bac098b3ceb37fcb1c283 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Thu, 5 Jun 2025 15:55:31 +0200 Subject: [PATCH 087/118] update definitions --- src/pynxtools/definitions | 2 +- src/pynxtools/nexus-version.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pynxtools/definitions b/src/pynxtools/definitions index a14acdde2..1e8c4331e 160000 --- a/src/pynxtools/definitions +++ b/src/pynxtools/definitions @@ -1 +1 @@ -Subproject commit a14acdde2b9880acfc3f967e21ab83f37beadf13 +Subproject commit 1e8c4331ee50905d9272df74b8f311d49f1c7a09 diff --git a/src/pynxtools/nexus-version.txt b/src/pynxtools/nexus-version.txt index 02c850407..0236d1750 100644 --- a/src/pynxtools/nexus-version.txt +++ b/src/pynxtools/nexus-version.txt @@ -1 +1 @@ -v2024.02-1983-ga14acdde \ No newline at end of file +v2024.02-1986-g1e8c4331 \ No newline at end of file From 99ea7ca87c25d87bb87a104d3d3ab057af491c4b Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Thu, 5 Jun 2025 16:50:43 +0200 Subject: [PATCH 088/118] adjust ref nexus log file --- tests/data/nexus/Ref_nexus_test.log | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/data/nexus/Ref_nexus_test.log b/tests/data/nexus/Ref_nexus_test.log index 7f7279708..35d3940c3 100644 --- a/tests/data/nexus/Ref_nexus_test.log +++ b/tests/data/nexus/Ref_nexus_test.log @@ -990,7 +990,7 @@ DEBUG - Note that ``incident_wavelength``, ``incident_energy``, and related fields can be a scalar values or arrays, depending on the use case. To support these use cases, the explicit dimensionality of these fields is not specified, but it can be inferred - by the presence of and shape of accompanying fields, such as incident_wavelength_weights for a polychromatic beam. + by the presense of and shape of accompanying fields, such as incident_wavelength_weights for a polychromatic beam. DEBUG - documentation (NXobject.nxdl.xml:): DEBUG - @@ -1110,7 +1110,7 @@ DEBUG - Note that ``incident_wavelength``, ``incident_energy``, and related fields can be a scalar values or arrays, depending on the use case. To support these use cases, the explicit dimensionality of these fields is not specified, but it can be inferred - by the presence of and shape of accompanying fields, such as incident_wavelength_weights for a polychromatic beam. + by the presense of and shape of accompanying fields, such as incident_wavelength_weights for a polychromatic beam. DEBUG - documentation (NXobject.nxdl.xml:): DEBUG - From 3bd4b1a4a42aeabc13568b32c710b12e33b28ec9 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Wed, 18 Jun 2025 09:25:31 +0200 Subject: [PATCH 089/118] update ruff (#657) --- .pre-commit-config.yaml | 2 +- pyproject.toml | 17 ++++++++++++----- src/pynxtools/__init__.py | 2 +- src/pynxtools/dataconverter/convert.py | 10 ++++------ src/pynxtools/dataconverter/file_hashing.py | 2 +- src/pynxtools/dataconverter/hdfdict.py | 3 +-- src/pynxtools/dataconverter/helpers.py | 2 +- src/pynxtools/dataconverter/nexus_tree.py | 9 ++++----- .../dataconverter/readers/example/reader.py | 4 ++-- .../dataconverter/readers/json_map/reader.py | 8 ++++---- .../dataconverter/readers/json_yml/reader.py | 2 +- src/pynxtools/dataconverter/readers/utils.py | 2 +- src/pynxtools/dataconverter/validation.py | 6 +++--- src/pynxtools/dataconverter/writer.py | 4 ++-- src/pynxtools/eln_mapper/eln.py | 4 ++-- src/pynxtools/eln_mapper/eln_mapper.py | 2 +- src/pynxtools/eln_mapper/reader_eln.py | 8 ++------ src/pynxtools/eln_mapper/schema_eln.py | 8 ++------ src/pynxtools/nomad/dataconverter.py | 8 +++++--- src/pynxtools/nomad/parser.py | 7 +++++-- src/pynxtools/nomad/schema.py | 4 ++-- src/pynxtools/testing/nexus_conversion.py | 10 +++++----- src/pynxtools/testing/nomad_example.py | 7 +++---- tests/dataconverter/test_helpers.py | 3 +-- tests/dataconverter/test_nexus_tree.py | 1 + tests/dataconverter/test_validation.py | 7 ++----- tests/eln_mapper/test_eln_mapper.py | 8 ++++---- tests/nexus/test_nexus.py | 17 +++++++---------- tests/nomad/test_parsing.py | 3 --- 29 files changed, 80 insertions(+), 90 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 382dc3fc5..3e6eac9ea 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,7 +1,7 @@ repos: - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: v0.9.3 + rev: v0.12.0 hooks: # Run the linter. - id: ruff diff --git a/pyproject.toml b/pyproject.toml index b65da0b71..e5d741c2f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,7 +54,7 @@ docs = [ ] dev = [ "mypy", - "ruff>=0.9.3", + "ruff>=0.12.0", "pytest", "pytest-timeout", "pytest-cov", @@ -135,24 +135,31 @@ select = [ "E", # pycodestyle "W", # pycodestyle "PL", # pylint + "UP", # pyupgrade + # "F401", # remove unused import + "I001", # sort imports # "NPY201", # reactivate when np>2.0 is used ] ignore = [ + "E402", # Module level import not at top of file "E501", # Line too long ({width} > {limit} characters) "E701", # Multiple statements on one line (colon) "E731", # Do not assign a lambda expression, use a def - "E402", # Module level import not at top of file + "PLC0415", # `import` should be at the top-level of a file + "PLR0904", # too-many-public-methods "PLR0911", # Too many return statements "PLR0912", # Too many branches "PLR0913", # Too many arguments in function definition "PLR0915", # Too many statements - "PLR2004", # Magic value used instead of constant - "PLW0603", # Using the global statement - "PLW2901", # redefined-loop-name + "PLR0917", # too-many-positional-arguments "PLR1714", # consider-using-in + "PLR2004", # Magic value used instead of constant "PLR5501", # else-if-used + "PLW0603", # Using the global statement + "PLW2901", # redefined-loop-name, ] fixable = ["ALL"] +isort.split-on-trailing-comma = false [tool.ruff.format] quote-style = "double" diff --git a/src/pynxtools/__init__.py b/src/pynxtools/__init__.py index 7d9df3b8b..7694be63b 100644 --- a/src/pynxtools/__init__.py +++ b/src/pynxtools/__init__.py @@ -16,11 +16,11 @@ # limitations under the License. # -from typing import Dict import logging import os import re from datetime import datetime +from typing import Dict from pynxtools._build_wrapper import get_vcs_version from pynxtools.definitions.dev_tools.globals.nxdl import get_nxdl_version diff --git a/src/pynxtools/dataconverter/convert.py b/src/pynxtools/dataconverter/convert.py index 508071906..242b5868f 100644 --- a/src/pynxtools/dataconverter/convert.py +++ b/src/pynxtools/dataconverter/convert.py @@ -373,12 +373,10 @@ def convert_cli( except TypeError as exc: sys.tracebacklimit = 0 raise click.UsageError( - ( - "Please make sure you have the following entries in your " - "parameter file:\n\n# NeXusParser Parameter File - v0.0.1" - "\n\ndataconverter:\n\treader: value\n\tnxdl: value\n\tin" - "put-file: value" - ) + "Please make sure you have the following entries in your " + "parameter file:\n\n# NeXusParser Parameter File - v0.0.1" + "\n\ndataconverter:\n\treader: value\n\tnxdl: value\n\tin" + "put-file: value" ) from exc if nxdl is None: raise click.UsageError("Missing option '--nxdl'") diff --git a/src/pynxtools/dataconverter/file_hashing.py b/src/pynxtools/dataconverter/file_hashing.py index 75d967e78..ce34f728f 100644 --- a/src/pynxtools/dataconverter/file_hashing.py +++ b/src/pynxtools/dataconverter/file_hashing.py @@ -28,7 +28,7 @@ def get_file_hashvalue(file_name: str) -> str: # Read and update hash string value in blocks of 4K for byte_block in iter(lambda: file_handle.read(4096), b""): sha256_hash.update(byte_block) - except IOError: + except OSError: print(f"File {file_name} is not accessible !") return sha256_hash.hexdigest() diff --git a/src/pynxtools/dataconverter/hdfdict.py b/src/pynxtools/dataconverter/hdfdict.py index c5a3d3f79..1f4b8692e 100644 --- a/src/pynxtools/dataconverter/hdfdict.py +++ b/src/pynxtools/dataconverter/hdfdict.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """Taken from: https://github.com/SiggiGue/hdfdict/blob/master/hdfdict/hdfdict.py""" from collections import UserDict @@ -196,7 +195,7 @@ def dump(data, hdf, *args, packer=pack_dataset, **kwargs): def _recurse(datadict, hdfobject): for key, value in datadict.items(): if isinstance(key, tuple): - key = "_".join((str(i) for i in key)) + key = "_".join(str(i) for i in key) if isinstance(value, (dict, LazyHdfDict)): hdfgroup = hdfobject.create_group(key) _recurse(value, hdfgroup) diff --git a/src/pynxtools/dataconverter/helpers.py b/src/pynxtools/dataconverter/helpers.py index 04061ef1c..22982ab41 100644 --- a/src/pynxtools/dataconverter/helpers.py +++ b/src/pynxtools/dataconverter/helpers.py @@ -24,7 +24,7 @@ from datetime import datetime, timezone from enum import Enum, auto from functools import lru_cache -from typing import Any, Callable, List, Optional, Tuple, Union, Sequence, cast +from typing import Any, Callable, List, Optional, Sequence, Tuple, Union, cast import h5py import lxml.etree as ET diff --git a/src/pynxtools/dataconverter/nexus_tree.py b/src/pynxtools/dataconverter/nexus_tree.py index e60246a88..950afe5bb 100644 --- a/src/pynxtools/dataconverter/nexus_tree.py +++ b/src/pynxtools/dataconverter/nexus_tree.py @@ -29,25 +29,24 @@ """ from functools import lru_cache, reduce -from typing import Any, List, Dict, Literal, Optional, Set, Tuple, Union +from typing import Any, Dict, List, Literal, Optional, Set, Tuple, Union import lxml.etree as ET from anytree.node.nodemixin import NodeMixin -from pynxtools import get_definitions_url +from pynxtools import NX_DOC_BASES, get_definitions_url from pynxtools.dataconverter.helpers import ( + NEXUS_TO_PYTHON_DATA_TYPES, get_all_parents_for, get_nxdl_root_and_path, - is_variadic, is_appdef, + is_variadic, remove_namespace_from_tag, - NEXUS_TO_PYTHON_DATA_TYPES, ) from pynxtools.definitions.dev_tools.utils.nxdl_utils import ( get_nx_namefit, is_name_type, ) -from pynxtools import NX_DOC_BASES NexusType = Literal[ "NX_BINARY", diff --git a/src/pynxtools/dataconverter/readers/example/reader.py b/src/pynxtools/dataconverter/readers/example/reader.py index 0d60b335e..71bd91ac7 100644 --- a/src/pynxtools/dataconverter/readers/example/reader.py +++ b/src/pynxtools/dataconverter/readers/example/reader.py @@ -45,11 +45,11 @@ def read( data: dict = {} if not file_paths: - raise IOError("No input files were given to Example Reader.") + raise OSError("No input files were given to Example Reader.") for file_path in file_paths: file_extension = file_path[file_path.rindex(".") :] - with open(file_path, "r", encoding="utf-8") as input_file: + with open(file_path, encoding="utf-8") as input_file: if file_extension == ".json": data = json.loads(input_file.read()) diff --git a/src/pynxtools/dataconverter/readers/json_map/reader.py b/src/pynxtools/dataconverter/readers/json_map/reader.py index 029b5c52c..6fc5a1397 100644 --- a/src/pynxtools/dataconverter/readers/json_map/reader.py +++ b/src/pynxtools/dataconverter/readers/json_map/reader.py @@ -22,8 +22,8 @@ from typing import Any, Tuple import numpy as np -import yaml import xarray +import yaml from mergedeep import merge from pynxtools.dataconverter import hdfdict @@ -186,7 +186,7 @@ def read( for file_path in file_paths: file_extension = file_path[file_path.rindex(".") :] if file_extension == ".json": - with open(file_path, "r", encoding="utf-8") as input_file: + with open(file_path, encoding="utf-8") as input_file: if ".mapping" in file_path: mapping = json.loads(input_file.read()) else: @@ -195,7 +195,7 @@ def read( with open(file_path, "rb") as input_file: # type: ignore[assignment] data = pickle.load(input_file) # type: ignore[arg-type] elif file_extension == ".yaml": - with open(file_path, "r") as input_file: + with open(file_path) as input_file: merge(data, yaml.safe_load(input_file)) else: is_hdf5 = False @@ -216,7 +216,7 @@ def read( template = Template( {x: "/hierarchical/path/in/your/datafile" for x in template} ) - raise IOError( + raise OSError( "Please supply a JSON mapping file: " " my_nxdl_map.mapping.json\n\n You can use this " "template for the required fields: \n" + str(template) diff --git a/src/pynxtools/dataconverter/readers/json_yml/reader.py b/src/pynxtools/dataconverter/readers/json_yml/reader.py index 1c7c8ac92..a77d9601d 100644 --- a/src/pynxtools/dataconverter/readers/json_yml/reader.py +++ b/src/pynxtools/dataconverter/readers/json_yml/reader.py @@ -17,8 +17,8 @@ # """An example reader implementation for the DataConverter.""" -from typing import Tuple, Any, Callable, Dict, List import os +from typing import Any, Callable, Dict, List, Tuple from pynxtools.dataconverter.readers.base.reader import BaseReader from pynxtools.dataconverter.template import Template diff --git a/src/pynxtools/dataconverter/readers/utils.py b/src/pynxtools/dataconverter/readers/utils.py index 7b255b71f..e87e63b68 100644 --- a/src/pynxtools/dataconverter/readers/utils.py +++ b/src/pynxtools/dataconverter/readers/utils.py @@ -297,7 +297,7 @@ def parse_json(file_path: Union[str, Path]) -> Dict[str, Any]: Returns: Dict[str, Any]: The dictionary containing the data readout from the json. """ - with open(file_path, "r", encoding="utf-8") as file: + with open(file_path, encoding="utf-8") as file: return json.load(file) diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index af743506d..3e02c97c7 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -16,22 +16,22 @@ # See the License for the specific language governing permissions and # limitations under the License. # -import re import copy +import re from collections import defaultdict from functools import reduce from operator import getitem from typing import ( Any, + Dict, Iterable, List, + Literal, Mapping, MutableMapping, Optional, Tuple, Union, - Dict, - Literal, ) import h5py diff --git a/src/pynxtools/dataconverter/writer.py b/src/pynxtools/dataconverter/writer.py index a20183612..fac017613 100644 --- a/src/pynxtools/dataconverter/writer.py +++ b/src/pynxtools/dataconverter/writer.py @@ -300,7 +300,7 @@ def add_units_key(dataset, path): except InvalidDictProvided as exc: print(str(exc)) except Exception as exc: - raise IOError( + raise OSError( f"Unknown error occured writing the path: {path} " f"with the following message: {str(exc)}" ) from exc @@ -335,7 +335,7 @@ def add_units_key(dataset, path): ) dataset.attrs[entry_name[1:]] = data except Exception as exc: - raise IOError( + raise OSError( f"Unknown error occured writing the path: {path}" f", while writing the value: {value} " f"with the following message: {str(exc)}" diff --git a/src/pynxtools/eln_mapper/eln.py b/src/pynxtools/eln_mapper/eln.py index d6c3867e7..c548dfd22 100644 --- a/src/pynxtools/eln_mapper/eln.py +++ b/src/pynxtools/eln_mapper/eln.py @@ -17,10 +17,10 @@ # limitations under the License. # -import re import logging +import re from abc import ABC, abstractmethod -from typing import Any, List, Dict, Optional +from typing import Any, Dict, List, Optional import yaml diff --git a/src/pynxtools/eln_mapper/eln_mapper.py b/src/pynxtools/eln_mapper/eln_mapper.py index 310d80077..c7f2eb5ab 100644 --- a/src/pynxtools/eln_mapper/eln_mapper.py +++ b/src/pynxtools/eln_mapper/eln_mapper.py @@ -16,8 +16,8 @@ # limitations under the License. # -from typing import Union, Optional from pathlib import Path +from typing import Optional, Union import click diff --git a/src/pynxtools/eln_mapper/reader_eln.py b/src/pynxtools/eln_mapper/reader_eln.py index cd0ac8357..36206c051 100644 --- a/src/pynxtools/eln_mapper/reader_eln.py +++ b/src/pynxtools/eln_mapper/reader_eln.py @@ -19,13 +19,9 @@ # import re -from typing import List, Dict +from typing import Dict, List -from pynxtools.dataconverter.nexus_tree import ( - NexusEntity, - NexusGroup, - NexusNode, -) +from pynxtools.dataconverter.nexus_tree import NexusEntity, NexusGroup, NexusNode from pynxtools.eln_mapper.eln import ElnGenerator diff --git a/src/pynxtools/eln_mapper/schema_eln.py b/src/pynxtools/eln_mapper/schema_eln.py index e33600f46..d34218ba5 100644 --- a/src/pynxtools/eln_mapper/schema_eln.py +++ b/src/pynxtools/eln_mapper/schema_eln.py @@ -18,13 +18,9 @@ # import re -from typing import List, Dict, Union, Tuple +from typing import Dict, List, Tuple, Union -from pynxtools.dataconverter.nexus_tree import ( - NexusEntity, - NexusGroup, - NexusNode, -) +from pynxtools.dataconverter.nexus_tree import NexusEntity, NexusGroup, NexusNode from pynxtools.eln_mapper.eln import ElnGenerator NEXUS_TO_NOMAD_QUANTITY: Dict[str, Tuple[str, str]] = { diff --git a/src/pynxtools/nomad/dataconverter.py b/src/pynxtools/nomad/dataconverter.py index 5eddb1919..6f66bcc97 100644 --- a/src/pynxtools/nomad/dataconverter.py +++ b/src/pynxtools/nomad/dataconverter.py @@ -18,7 +18,9 @@ from pynxtools.dataconverter import convert as pynxtools_converter from pynxtools.dataconverter import writer as pynxtools_writer from pynxtools.dataconverter.template import Template -from pynxtools.definitions.dev_tools.utils.nxdl_utils import get_app_defs_names # pylint: disable=import-error +from pynxtools.definitions.dev_tools.utils.nxdl_utils import ( + get_app_defs_names, # pylint: disable=import-error +) m_package = Package(name="nexus_data_converter") @@ -176,7 +178,7 @@ class ElnYamlConverter(EntryData): ) def normalize(self, archive, logger): - super(ElnYamlConverter, self).normalize(archive, logger) + super().normalize(archive, logger) eln_dict = create_eln_dict(archive) write_yaml(archive, archive.data.output, eln_dict) @@ -231,7 +233,7 @@ class NexusDataConverter(EntryData): ) def normalize(self, archive, logger): - super(NexusDataConverter, self).normalize(archive, logger) + super().normalize(archive, logger) raw_path = archive.m_context.raw_path() eln_dict = create_eln_dict(archive) diff --git a/src/pynxtools/nomad/parser.py b/src/pynxtools/nomad/parser.py index 2735af3a4..7badeecfb 100644 --- a/src/pynxtools/nomad/parser.py +++ b/src/pynxtools/nomad/parser.py @@ -43,8 +43,11 @@ import pynxtools.nomad.schema as nexus_schema from pynxtools.nexus.nexus import HandleNexus -from pynxtools.nomad.utils import FIELD_STATISTICS -from pynxtools.nomad.utils import REPLACEMENT_FOR_NX, get_quantity_base_name +from pynxtools.nomad.utils import ( + FIELD_STATISTICS, + REPLACEMENT_FOR_NX, + get_quantity_base_name, +) from pynxtools.nomad.utils import _rename_nx_for_nomad as rename_nx_for_nomad diff --git a/src/pynxtools/nomad/schema.py b/src/pynxtools/nomad/schema.py index 9c2bdb754..81220544d 100644 --- a/src/pynxtools/nomad/schema.py +++ b/src/pynxtools/nomad/schema.py @@ -75,14 +75,14 @@ from nomad.normalizing.common import nomad_atoms_from_ase_atoms from nomad.normalizing.topology import add_system, add_system_info from nomad.units import ureg - from nomad.utils import get_logger, strip, hash + from nomad.utils import get_logger, hash, strip except ImportError as exc: raise ImportError( "Could not import nomad package. Please install the package 'nomad-lab'." ) from exc -from pynxtools import get_definitions_url, NX_DOC_BASES +from pynxtools import NX_DOC_BASES, get_definitions_url from pynxtools.definitions.dev_tools.utils.nxdl_utils import get_nexus_definitions_path from pynxtools.nomad.utils import ( FIELD_STATISTICS, diff --git a/src/pynxtools/testing/nexus_conversion.py b/src/pynxtools/testing/nexus_conversion.py index 2f8e99241..d1509f975 100644 --- a/src/pynxtools/testing/nexus_conversion.py +++ b/src/pynxtools/testing/nexus_conversion.py @@ -20,7 +20,7 @@ import logging import os from glob import glob -from typing import Dict, List, Literal, Tuple, Optional +from typing import Dict, List, Literal, Optional, Tuple try: from nomad.client import parse @@ -30,7 +30,7 @@ NOMAD_AVAILABLE = False -from pynxtools.dataconverter.convert import get_reader, convert +from pynxtools.dataconverter.convert import convert, get_reader from pynxtools.dataconverter.helpers import ( add_default_root_attributes, get_nxdl_root_and_path, @@ -150,7 +150,7 @@ def convert_to_nexus( if files_with_expected_output: output_file = files_with_expected_output[0] - with open(output_file, "r") as file: + with open(output_file) as file: expected_messages = [line.strip() for line in file.readlines()] for message in expected_messages: @@ -206,8 +206,8 @@ def load_logs( gen_log_path: str, ref_log_path: str ) -> Tuple[List[str], List[str]]: """Load log files and return their contents as lists of lines.""" - with open(gen_log_path, "r", encoding="utf-8") as gen, open( - ref_log_path, "r", encoding="utf-8" + with open(gen_log_path, encoding="utf-8") as gen, open( + ref_log_path, encoding="utf-8" ) as ref: return gen.readlines(), ref.readlines() diff --git a/src/pynxtools/testing/nomad_example.py b/src/pynxtools/testing/nomad_example.py index 9dd23f7e8..bdc01e0d5 100644 --- a/src/pynxtools/testing/nomad_example.py +++ b/src/pynxtools/testing/nomad_example.py @@ -18,14 +18,13 @@ """Test for NOMAD examples in reader plugins.""" import os -from typing import Any, Dict, List import tempfile +from typing import Any, Dict, List + import pytest try: - from nomad.config.models.plugins import ( - ExampleUploadEntryPoint, - ) + from nomad.config.models.plugins import ExampleUploadEntryPoint from nomad.datamodel import Context, EntryArchive from nomad.parsing.parser import ArchiveParser except ImportError: diff --git a/tests/dataconverter/test_helpers.py b/tests/dataconverter/test_helpers.py index 883684012..140f69fbc 100644 --- a/tests/dataconverter/test_helpers.py +++ b/tests/dataconverter/test_helpers.py @@ -21,13 +21,12 @@ import os import shutil import xml.etree.ElementTree as ET -from typing import Optional import numpy as np import pytest + from pynxtools.dataconverter import helpers from pynxtools.dataconverter.template import Template -from pynxtools.dataconverter.validation import validate_dict_against def alter_dict(data_dict: Template, key: str, value: object): diff --git a/tests/dataconverter/test_nexus_tree.py b/tests/dataconverter/test_nexus_tree.py index 2b4e0c067..6af67b7ad 100644 --- a/tests/dataconverter/test_nexus_tree.py +++ b/tests/dataconverter/test_nexus_tree.py @@ -1,6 +1,7 @@ from typing import Any, List, Tuple, get_args from anytree import Resolver + from pynxtools.dataconverter.nexus_tree import ( NexusNode, NexusType, diff --git a/tests/dataconverter/test_validation.py b/tests/dataconverter/test_validation.py index f6217c7df..da835cb9b 100644 --- a/tests/dataconverter/test_validation.py +++ b/tests/dataconverter/test_validation.py @@ -21,14 +21,11 @@ import numpy as np import pytest + from pynxtools.dataconverter.template import Template from pynxtools.dataconverter.validation import validate_dict_against -from .test_helpers import ( # pylint: disable=unused-import - alter_dict, - fixture_filled_test_data, - fixture_template, -) +from .test_helpers import alter_dict # pylint: disable=unused-import def set_to_none_in_dict(data_dict: Optional[Template], key: str, optionality: str): diff --git a/tests/eln_mapper/test_eln_mapper.py b/tests/eln_mapper/test_eln_mapper.py index 6f092def1..d23a661df 100644 --- a/tests/eln_mapper/test_eln_mapper.py +++ b/tests/eln_mapper/test_eln_mapper.py @@ -95,10 +95,10 @@ def test_reader_eln(tmp_path): ], ) - with open(ref_file, encoding="utf-8", mode="r") as ref_f: + with open(ref_file, encoding="utf-8") as ref_f: ref_dict = yaml.safe_load(ref_f) - with open(test_file, encoding="utf-8", mode="r") as test_f: + with open(test_file, encoding="utf-8") as test_f: test_dict = yaml.safe_load(test_f) check_keys_from_two_dict(ref_dict, test_dict) @@ -122,10 +122,10 @@ def test_scheme_eln(tmp_path): eln_mapper.get_eln, ["--nxdl", "NXscan", "--output-file", test_file, "--eln-type", "schema"], ) - with open(ref_file, encoding="utf-8", mode="r") as ref_f: + with open(ref_file, encoding="utf-8") as ref_f: ref_dict = yaml.safe_load(ref_f) - with open(test_file, encoding="utf-8", mode="r") as test_f: + with open(test_file, encoding="utf-8") as test_f: test_dict = yaml.safe_load(test_f) check_keys_from_two_dict(ref_dict, test_dict) diff --git a/tests/nexus/test_nexus.py b/tests/nexus/test_nexus.py index 98bfb9eb4..b8b34f4dd 100644 --- a/tests/nexus/test_nexus.py +++ b/tests/nexus/test_nexus.py @@ -17,13 +17,13 @@ # limitations under the License. # +import difflib import logging import os import lxml.etree as ET import numpy as np import pytest -import difflib from pynxtools.definitions.dev_tools.utils.nxdl_utils import ( get_inherited_nodes, @@ -188,13 +188,10 @@ def test_nexus(tmp_path): np.set_printoptions(**default_print_options) nexus_helper.process_nexus_master_file(None) - with open( - os.path.join(tmp_path, "nexus_test.log"), "r", encoding="utf-8" - ) as logfile: + with open(os.path.join(tmp_path, "nexus_test.log"), encoding="utf-8") as logfile: log = logfile.readlines() with open( os.path.join(dirpath, "Ref_nexus_test.log"), - "r", encoding="utf-8", ) as reffile: ref = reffile.readlines() @@ -292,7 +289,7 @@ def test_c_option(tmp_path): logger.setLevel(logging.INFO) handler = logging.FileHandler(tmp_file, "w") - with open(ref_file, encoding="utf-8", mode="r") as ref_f: + with open(ref_file, encoding="utf-8") as ref_f: ref = ref_f.readlines() handler = logging.FileHandler(tmp_file, "w") @@ -304,7 +301,7 @@ def test_c_option(tmp_path): nexus_helper = HandleNexus(logger, None, None, "/NXbeam") nexus_helper.process_nexus_master_file(None) - with open(tmp_file, encoding="utf-8", mode="r") as tmp_f: + with open(tmp_file, encoding="utf-8") as tmp_f: tmp = tmp_f.readlines() assert tmp == ref @@ -318,7 +315,7 @@ def test_c_option(tmp_path): nexus_helper = HandleNexus(logger, None, None, "/NXdetector/data") nexus_helper.process_nexus_master_file(None) - with open(tmp_file, encoding="utf-8", mode="r") as tmp_f: + with open(tmp_file, encoding="utf-8") as tmp_f: tmp = tmp_f.readlines() assert tmp[0] == "INFO: entry/instrument/analyser/data\n" @@ -331,7 +328,7 @@ def test_c_option(tmp_path): nexus_helper = HandleNexus(logger, None, None, "/NXdata@signal") nexus_helper.process_nexus_master_file(None) - with open(tmp_file, encoding="utf-8", mode="r") as tmp_f: + with open(tmp_file, encoding="utf-8") as tmp_f: tmp = tmp_f.readlines() assert tmp[0] == "INFO: entry/data@signal\n" @@ -353,7 +350,7 @@ def test_d_option(tmp_path): nexus_helper = HandleNexus(logger, None, "/entry/instrument/analyser/data", None) nexus_helper.process_nexus_master_file(None) - with open(tmp_file, encoding="utf-8", mode="r") as tmp_f: + with open(tmp_file, encoding="utf-8") as tmp_f: tmp = tmp_f.readlines() assert ( diff --git a/tests/nomad/test_parsing.py b/tests/nomad/test_parsing.py index 7f437742e..7f1162e2e 100644 --- a/tests/nomad/test_parsing.py +++ b/tests/nomad/test_parsing.py @@ -29,11 +29,8 @@ except ImportError: pytest.skip("nomad not installed", allow_module_level=True) -from typing import Any from pynxtools.nomad.parser import NexusParser -from pynxtools.nomad.schema import nexus_metainfo_package -from pynxtools.nomad.utils import _rename_nx_for_nomad as rename_nx_for_nomad def test_nexus_example(): From 05868ba2f8cbae2762b52506ab7006a9e83838df Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Mon, 16 Jun 2025 16:00:58 +0200 Subject: [PATCH 090/118] drop support for python 3.8 --- .github/workflows/pytest.yml | 2 +- pyproject.toml | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index d7bfb3b9f..ad20de041 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -15,7 +15,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] + python-version: ["3.9", "3.10", "3.11", "3.12"] steps: - uses: actions/checkout@v4 diff --git a/pyproject.toml b/pyproject.toml index e5d741c2f..fc35561bb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,9 +12,8 @@ authors = [ description = "Extend NeXus for experiments and characterization in Materials Science and Materials Engineering and serve as a NOMAD parser implementation for NeXus." readme = "README.md" license = { file = "LICENSE" } -requires-python = ">=3.8" +requires-python = ">=3.9" classifiers = [ - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", From f82977c061d33b35d31a788f39dcfcc752584bfd Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Wed, 18 Jun 2025 10:12:03 +0200 Subject: [PATCH 091/118] implement UP035: deprecation of typing Dict, List, Set, Tuple --- docs/how-tos/build-a-plugin.md | 23 +++++--- docs/how-tos/use-multi-format-reader.md | 42 ++++++++++++--- docs/learn/multi-format-reader.md | 40 ++++++++++++-- examples/mock-data-reader/reader.py | 9 ++-- src/pynxtools/__init__.py | 3 +- src/pynxtools/dataconverter/convert.py | 12 ++--- src/pynxtools/dataconverter/helpers.py | 21 ++++---- src/pynxtools/dataconverter/nexus_tree.py | 54 +++++++++---------- .../dataconverter/readers/base/reader.py | 6 +-- .../dataconverter/readers/example/reader.py | 6 +-- .../dataconverter/readers/json_map/reader.py | 6 +-- .../dataconverter/readers/json_yml/reader.py | 12 ++--- .../dataconverter/readers/multi/reader.py | 54 +++++++++---------- src/pynxtools/dataconverter/readers/utils.py | 36 ++++++------- src/pynxtools/dataconverter/template.py | 5 +- src/pynxtools/dataconverter/validation.py | 32 ++++------- src/pynxtools/eln_mapper/eln.py | 24 ++++----- src/pynxtools/eln_mapper/reader_eln.py | 13 +++-- src/pynxtools/eln_mapper/schema_eln.py | 28 +++++----- src/pynxtools/nexus/nexus.py | 6 +-- src/pynxtools/nomad/parser.py | 10 ++-- src/pynxtools/nomad/schema.py | 18 +++---- src/pynxtools/nomad/utils.py | 4 +- src/pynxtools/testing/nexus_conversion.py | 25 ++++----- src/pynxtools/testing/nomad_example.py | 6 +-- tests/dataconverter/test_nexus_tree.py | 4 +- tests/dataconverter/test_readers.py | 5 +- tests/eln_mapper/test_eln_mapper.py | 9 ++-- 28 files changed, 283 insertions(+), 230 deletions(-) diff --git a/docs/how-tos/build-a-plugin.md b/docs/how-tos/build-a-plugin.md index 571a01d1a..24d604425 100644 --- a/docs/how-tos/build-a-plugin.md +++ b/docs/how-tos/build-a-plugin.md @@ -6,10 +6,10 @@ Your current data is not supported yet by the built-in pynxtools readers or the Don't worry, the following how-to will guide you through the steps of writing a reader for your own data. - ## Getting started You should start by creating a clean repository that implements the following structure (for a plugin called ```pynxtools-plugin```): + ``` pynxtools-plugin ├── .github/workflows @@ -30,6 +30,7 @@ pynxtools-plugin ``` To identify `pynxtools-plugin` as a plugin for pynxtools, an entry point must be established (in the `pyproject.toml` file): + ``` [project.entry-points."pynxtools.reader"] mydatareader = "pynxtools_plugin.reader:MyDataReader" @@ -44,7 +45,6 @@ Here, we will focus mostly on the `reader.py` file and how to build a reader. Fo - ## Writing a Reader After you have established the main structure, you can start writing your reader. The new reader shall be placed in `reader.py`. @@ -53,7 +53,7 @@ Then implement the reader function: ```python title="reader.py" """MyDataReader implementation for the DataConverter to convert mydata to NeXus.""" -from typing import Tuple, Any +from typing import Any from pynxtools.dataconverter.readers.base.reader import BaseReader @@ -67,8 +67,8 @@ class MyDataReader(BaseReader): def read( self, template: dict = None, - file_paths: Tuple[str] = None, - objects: Tuple[Any] = None + file_paths: tuple[str] = None, + objects: tuple[Any] = None ) -> dict: """Reads data from given file and returns a filled template dictionary""" # Here, you must provide functionality to fill the the template, see below. @@ -80,8 +80,8 @@ class MyDataReader(BaseReader): # This has to be set to allow the convert script to use this reader. Set it to "MyDataReader". READER = MyDataReader - ``` + ### The reader template dictionary The read function takes a [`Template`](https://github.com/FAIRmat-NFDI/pynxtools/blob/master/src/pynxtools/dataconverter/template.py) dictionary, which is used to map from the measurement (meta)data to the concepts defined in the NeXus application definition. The template contains keys that match the concepts in the provided NXDL file. @@ -98,11 +98,13 @@ Example for a template entry: ``` For a given NXDL schema, you can generate an empty template with the command + ```console user@box:~$ dataconverter generate-template --nxdl NXmynxdl ``` #### Naming of groups + In case the NXDL does not define a `name` for the group the requested data belongs to, the template dictionary will list it as `/NAME_IN_NXDL[name_in_output_nexus]`. You can choose any name you prefer instead of the suggested `name_in_output_nexus` (see [here](../learn/nexus-rules.md) for the naming conventions). This allows the reader function to repeat groups defined in the NXDL to be outputted to the NeXus file. ```json @@ -112,6 +114,7 @@ In case the NXDL does not define a `name` for the group the requested data belon ``` #### Attributes + For attributes defined in the NXDL, the reader template dictionary will have the assosciated key with a "@" prefix to the attributes name at the end of the path: ```json @@ -121,6 +124,7 @@ For attributes defined in the NXDL, the reader template dictionary will have the ``` #### Units + If there is a field defined in the NXDL, the converter expects a filled in /data/@units entry in the template dictionary corresponding to the right /data field unless it is specified as NX_UNITLESS in the NXDL. Otherwise, a warning will be shown. ```json @@ -131,6 +135,7 @@ If there is a field defined in the NXDL, the converter expects a filled in /data ``` #### Links + You can also define links by setting the value to sub dictionary object with key `link`: ```python @@ -138,10 +143,12 @@ template["/entry/instrument/source"] = {"link": "/path/to/source/data"} ``` ### Building off of the BaseReader + When building off the [`BaseReader`](https://github.com/FAIRmat-NFDI/pynxtools/blob/master/src/pynxtools/dataconverter/readers/base/reader.py), the developer has the most flexibility. Any new reader must implement the `read` function, which must return a filled template object. ### Building off of the MultiFormatReader + While building on the ```BaseReader``` allows for the most flexibility, in most cases it is desirable to implement a reader that can read in multiple file formats and then populate the template based on the read data. For this purpose, `pynxtools` has the [**`MultiFormatReader`**](https://github.com/FAIRmat-NFDI/pynxtools/blob/master/src/pynxtools/dataconverter/readers/multi/reader.py), which can be readily extended for your own data. You can find an extensive how-to guide to build off the `MultiFormatReader` [here](./use-multi-format-reader.md). @@ -149,9 +156,11 @@ You can find an extensive how-to guide to build off the `MultiFormatReader` [her ## Calling the reader from the command line The dataconverter can be executed using: + ```console user@box:~$ dataconverter --reader mydatareader --nxdl NXmynxdl --output path_to_output.nxs ``` + Here, the ``--reader`` flag must match the reader name defined in `[project.entry-points."pynxtools.reader"]` in the pyproject.toml file. The NXDL name passed to ``--nxdl``must be a valid NeXus NXDL/XML file in `pynxtools.definitions`. Aside from this default structure, there are many more flags that can be passed to the @@ -162,4 +171,4 @@ dataconverter call. Here is its API: :prog_name: dataconverter :depth: 2 :style: table - :list_subcommands: True \ No newline at end of file + :list_subcommands: True diff --git a/docs/how-tos/use-multi-format-reader.md b/docs/how-tos/use-multi-format-reader.md index ebb795c9b..8058c8b4b 100644 --- a/docs/how-tos/use-multi-format-reader.md +++ b/docs/how-tos/use-multi-format-reader.md @@ -1,4 +1,5 @@ # How to use the built-in MultiFormatReader + While building on the ```BaseReader``` allows for the most flexibility, in most cases it is desirable to implement a reader that can read in multiple file formats and then populate the template based on the read data. For this purpose, `pynxtools` has the [**`MultiFormatReader`**](https://github.com/FAIRmat-NFDI/pynxtools/blob/master/src/pynxtools/dataconverter/readers/multi/reader.py), which can be readily extended for your own data. In this how-to guide, we will focus on an implementation using a concrete example. If you are also interested in the general structure of the `MultiFormatReader`, you can find more information [here](../learn/multi-format-reader.md). ## Getting started @@ -71,9 +72,10 @@ Note that in order to be recognized as a valid application definition, this file We first start by implementing the class and its ``__init__`` call: + ```python title="reader.py" """MyDataReader implementation for the DataConverter to convert mydata to NeXus.""" -from typing import Tuple, Any +from typing import Any from pynxtools.dataconverter.readers.base.reader import ParseJsonCallbacks, MultiFormatReader @@ -97,12 +99,15 @@ class MyDataReader(MultiFormatReader): READER = MyDataReader ``` + Note that here we are adding handlers for three types of data file extensions: + 1. `".hdf5"`, `".h5"`: This will be used to parse in the (meta)data from the instrument's HDF5 file. 2. `".yml"`, `".yaml"`: This will be used to parse in the (meta)data from the ELN file. 3. `".json"`: This will be used to read in the **config file**, which is used to map from the (meta)data concepts from the instrument and ELN data to the concepts in the NXDL file. ## Reading in the instrument's data and metadata + First, we will have a look at the HDF5 file. This mock HDF5 file was generated with `h5py` using a [simple script](https://github.com/FAIRmat-NFDI/pynxtools/tree/master/examples/mock-data-reader/create_mock_data.py). @@ -110,6 +115,7 @@ First, we will have a look at the HDF5 file. This mock HDF5 file was generated w Here, we see that we have a `data` group with x and y values, as well as some additional metadata for the instrument. Here is one way to implement the method to read in the data: + ```python title="reader.py" import h5py @@ -132,9 +138,11 @@ def handle_hdf5_file(filepath): return {} ``` + Note that here we are returning an empty dictionary because we don't want to fill the template just yet, but only read in the HDF5 data for now. We will use the config file later to fill the template with the read-in data. Note that it is also possible to return a dictionary here to update the template directly. `self.hdf5_data` will look like this: + ```python { "data/x_values": array([-10. , -9.7979798 , -9.5959596 , ..., 10. ]), @@ -147,9 +155,12 @@ Note that here we are returning an empty dictionary because we don't want to fil "metadata/instrument/detector/count_time_units": s", } ``` + ## Reading in ELN data + As we can see in the application definition `NXsimple` above, there are some concepts defined for which there is no equivalent metadata in the HDF5 file. We are therefore using a YAML ELN file to add additional metadata. The ELN file `eln_data.yaml` looks like this: + ```yaml title="eln_data.yaml" title: My experiment user: @@ -179,7 +190,7 @@ CONVERT_DICT = { "sample": "SAMPLE[sample]", } -def handle_eln_file(self, file_path: str) -> Dict[str, Any]: +def handle_eln_file(self, file_path: str) -> dict[str, Any]: self.eln_data = parse_yml( file_path, convert_dict=CONVERT_DICT, @@ -188,7 +199,9 @@ def handle_eln_file(self, file_path: str) -> Dict[str, Any]: return {} ``` + When this method is called, `self.eln_data` will look like this: + ```python { "/ENTRY[entry]/title": "My experiment", @@ -200,9 +213,11 @@ When this method is called, `self.eln_data` will look like this: "/ENTRY[entry]/SAMPLE[sample]/temperature/@units": "K" } ``` + Note that here we are using `parent_key="/ENTRY[entry]"` as well as a `CONVERT_DICT`, meaning that each key in `self.eln_data` will start with `"/ENTRY[entry]"` and some of the paths will be converted to match the template notation. This will be important later. ## Parsing the config file + Next up, we can make use of the config file, which is a JSON file that tells the reader how to map the concepts from the HDF5 and ELN files in order to populate the template designed to match `NXsimple`. The choices made in the config file define how semantics from the source (data file) and target (NeXus application definition) side are mapped. Essentially, the config file should contain all keys that are present in the NXDL. In our case, the config file looks like this: ```json title="config_file.json" @@ -235,11 +250,13 @@ Next up, we can make use of the config file, which is a JSON file that tells the } } ``` + Note that here we are using `@`-prefixes which are used to fill the template from the different data sources. We dicuss this below in more detail. We also implement a method for setting the config file in the reader: + ```python title="reader.py" -def set_config_file(self, file_path: str) -> Dict[str, Any]: +def set_config_file(self, file_path: str) -> dict[str, Any]: if self.config_file is not None: logger.info( f"Config file already set. Replaced by the new file {file_path}." @@ -247,12 +264,14 @@ def set_config_file(self, file_path: str) -> Dict[str, Any]: self.config_file = file_path return {} -``` +``` ## Filling the template from the read-in data + Finally, after reading in all of the data and metadata as well as designing the config file, we can start filling the template. For this, we must implement functions that are called using the reader's **callbacks**. We will start with the `@attrs` prefix, associated with the `attrs_callback`. We must implement the `get_attr` method: + ```python title="reader.py" def get_attr(self, key: str, path: str) -> Any: """ @@ -263,13 +282,16 @@ def get_attr(self, key: str, path: str) -> Any: return self.hdf5_data.get(path) ``` + This method (and all similar callbacks methods) have two inputs: + 1. **`key`**, which is a key in the config file. Note that here, the generic `"/ENTRY/"` gets replaced by `f"/ENTRY[{entry_name}]/"`, where `entry_name` is the one of the entries of the `self.get_entry_names` method. 2. **`path`**, which is the part of the config value that comes after the `@attrs:` prefix. For example, for the config value `"@attrs:my-metadata"`, the extracted path is `my-metadata`. For the `get_attr` method, we are making use of the `path`. For example, for the config value `"@attrs:metadata/instrument/version"`, the extracted path is `metadata/instrument/version`, which is also one of the keys of the `self.hdf5_data` dictionary. For the ELN data, we must implement the `get_eln_data` function that gets called from the `eln_callback` when using the `@eln` prefix: + ```python title="reader.py" def get_eln_data(self, key: str, path: str) -> Any: """Returns data from the given eln path.""" @@ -278,9 +300,11 @@ def get_eln_data(self, key: str, path: str) -> Any: return self.eln_data.get(key) ``` + Here, we are making use of the fact that we have used `CONVERT_DICT` in the `parse_yml` function above. Thus, the keys of the `self.eln_data` dictionary are exactly the same as those in the config file (for example, the config key `"/ENTRY[entry]/USER[user]/address"` also exists in `self.eln_data`). Therefore, we can just get this data using the `key` coming from the config file. Finally, we also need to address the `@data` prefix, which gets used in the `data_callback` to populate the NXdata group in the template. Note that here we use the same `@data` prefix to fill the `x_values` as well as the `data` (from `y_values`) fields. We achieve this by using the path that follows `@data:` in the config file: + ```python title="reader.py" def get_data(self, key: str, path: str) -> Any: """Returns measurement data from the given hdf5 path.""" @@ -291,11 +315,12 @@ def get_data(self, key: str, path: str) -> Any: ``` ## Bringing it all together + Et voilà! That's all we need to read in our data and populate the `NXsimple` template. Our final reader looks like this: ```python title="reader.py" import logging -from typing import Dict, Any +from typing import Any import h5py from pynxtools.dataconverter.readers.multi.reader import MultiFormatReader @@ -331,7 +356,7 @@ class MyDataReader(MultiFormatReader): ".h5": self.handle_hdf5_file, } - def set_config_file(self, file_path: str) -> Dict[str, Any]: + def set_config_file(self, file_path: str) -> dict[str, Any]: if self.config_file is not None: logger.info( f"Config file already set. Replaced by the new file {file_path}." @@ -339,7 +364,7 @@ class MyDataReader(MultiFormatReader): self.config_file = file_path return {} - def handle_hdf5_file(self, filepath) -> Dict[str, Any]: + def handle_hdf5_file(self, filepath) -> dict[str, Any]: def recursively_read_group(group, path=""): result = {} for key, item in group.items(): @@ -358,7 +383,7 @@ class MyDataReader(MultiFormatReader): return {} - def handle_eln_file(self, file_path: str) -> Dict[str, Any]: + def handle_eln_file(self, file_path: str) -> dict[str, Any]: self.eln_data = parse_yml( file_path, convert_dict=CONVERT_DICT, @@ -394,6 +419,7 @@ READER = MyDataReader ``` ## Using the reader + We can call our reader using the following command ```console diff --git a/docs/learn/multi-format-reader.md b/docs/learn/multi-format-reader.md index 40efc33ef..47a1ede4c 100644 --- a/docs/learn/multi-format-reader.md +++ b/docs/learn/multi-format-reader.md @@ -15,9 +15,10 @@ Here, we will explain the inner workings of the `MultiFormatReader`. Note that t ## The basic structure For extending the `MultiFormatReader`, the following basic structure must be implemented: + ```python title="multi/reader.py" """MyDataReader implementation for the DataConverter to convert mydata to NeXus.""" -from typing import Tuple, Any +from typing import Any from pynxtools.dataconverter.readers.base.reader import MultiFormatReader @@ -38,24 +39,28 @@ READER = MyDataReader ``` In order to understand the capabilities of the `MultiFormatReader` and which methods need to be implemented when extending it, we will have a look at its ```read``` method: + ```python title="multi/reader.py" def read( self, template: dict = None, - file_paths: Tuple[str] = None, - objects: Optional[Tuple[Any]] = None, + file_paths: tuple[str] = None, + objects: Optional[tuple[Any]] = None, **kwargs, ) -> dict: self.kwargs = kwargs self.config_file = self.kwargs.get("config_file", self.config_file) self.overwrite_keys = self.kwargs.get("overwrite_keys", self.overwrite_keys) ``` + ## Template initialization and processing order + An empty `Template` object is initialized that later gets filled from the data files later. + ```python title="multi/reader.py" template = Template(overwrite_keys=self.overwrite_keys) - def get_processing_order(path: str) -> Tuple[int, Union[str, int]]: + def get_processing_order(path: str) -> tuple[int, Union[str, int]]: """ Returns the processing order of the file. """ @@ -66,10 +71,12 @@ An empty `Template` object is initialized that later gets filled from the data f sorted_paths = sorted(file_paths, key=get_processing_order) ``` + If the reader has a `self.processing_order`, the input files get sorted in this order. If `self.overwrite_keys` is True, later files get precedent. For example, if `self.processing_order = [".yaml", ".hdf5"]`, any values coming from HDF5 files would overwrite values from the YAML files. ## Reading of input files + ```python title="multi/reader.py" for file_path in sorted_paths: extension = os.path.splitext(file_path)[1].lower() @@ -84,22 +91,28 @@ If `self.overwrite_keys` is True, later files get precedent. For example, if `se template.update(self.extensions.get(extension, lambda _: {})(file_path)) ``` + This parts reads in the data from all data files. The `MultiFormatReader` has an `extensions` property, which is a dictionary that for each file extension calls a function that reads in data from files with that extension. If the reader shall handle e.g. an HDF5 file, a method for handling this type of file should be added, i.e., `self.extensions[".hdf5"] = self.handle_hdf5`. Note that these methods should also implement any logic depending on the provided data, i.e., it may not be sufficient to rely on the filename suffix, but the reader may also need to check for different file versions, binary signature, mimetype, etc. Any of these methods should take as input only the file path, e.g. + ```python title="multi/reader.py" -def handle_eln_file(self, file_path: str) -> Dict[str, Any] +def handle_eln_file(self, file_path: str) -> dict[str, Any] ``` + These methods must return a dictionary. One possibility is to return a dictionary that directly fills the template (see the `template.update` call above) with the data from the file. Another option is to return an empty dictionary (i.e., not fill the template at this stage) and only later fill the template from a config file (see below). Note that for several input formats, standardized parser functions already exist within the `MultiFormatReader`. For example, YAML files can be parsed using the `pynxtools.dataconverter.readers.utils.parse_yml` function. ## Setting default values in the template + ```python title="multi/reader.py" template.update(self.setup_template()) ``` + Next, the `setup_template` method can be implemented, which is used to populate the template with initial data that does not come from the files themselves. This may be used to set fixed information, e.g., about the reader. As an example, `NXentry/program_name` (which is defined as the name of program used to generate the NeXus file) scan be set to `pynxtools-plugin` by making `setup_template` return a dictionary of the form + ```json { "/ENTRY[my_entry]/program_name": "pynxtools-plugin", @@ -108,20 +121,25 @@ Next, the `setup_template` method can be implemented, which is used to populate ``` ## Handling objects + ```python title="multi/reader.py" if objects is not None: template.update(self.handle_objects(objects)) ``` + Aside from data files, it is also possible to directly pass any Python objects to the `read` function (e.g., a numpy array with measurement data). In order to exploit this, the `handle_objects` method must implemented, which should return a dictionary that populates the template. ## Parsing the config file + ```python title="multi/reader.py" if self.config_file is not None: self.config_dict = parse_flatten_json( self.config_file, create_link_dict=False ) ``` + Next up, we can make use of the config file, which is a JSON file that tells the reader which input data to use to populate the template. In other words, the config.json is used for ontology mapping between the input file paths and the NeXus application definition. Essentially, the config file should contain all keys that are present in the NXDL. A subset of a typical config file may look like this: + ```json { "/ENTRY/title": "@attrs:metadata/title", @@ -148,6 +166,7 @@ Next up, we can make use of the config file, which is a JSON file that tells the } } ``` + Here, the `parse_flatten_json` method is used that allows us to write the config dict in the structured manner above and internally flattens it (so that it has a similar structure as the Template). In the config file, one can @@ -158,12 +177,15 @@ In the config file, one can Note that in order to use a `link_callback` (see below), `create_link_dict` must be set to `False`, which means that at this stage, config values of the form `"@link:"/path/to/source/data"` get NOT yet converted to `{"link": "/path/to/source/data"}`. ## Data post processing + ```python title="multi/reader.py" self.post_process() ``` + In case there is the need for any post-processing on the data and/or config dictionary _after_ they have been read, the `post_process` method can be implemented. For example, this can be helpful if there are multiple entities of a given NX_CLASS (for example, multiple detectors) on the same level and the config dict shall be set up to fill the template with all of these entities. ## Filling the template from the read-in data + ```python title="multi/reader.py" if self.config_dict: suppress_warning = kwargs.pop("suppress_warning", False) @@ -178,9 +200,11 @@ In case there is the need for any post-processing on the data and/or config dict return template ``` + As a last step, the template is being filled from the config dict using the data. If there is more than one entry, the `get_entry_names` method must be implemented, which shall return a list of all entry names. The `fill_from_config` method iterates through all of the them and replaces the generic `/ENTRY/` in the config file by keys of the form `/ENTRY[my-entry]/` to fill the template. Here, we are using **callbacks**, which are used to bring in data based on `@`-prefixes in the config file. These are defined in the reader's ``__init__`` call using the `pynxtools.dataconverter.readers.multi.ParseJsonCallbacks` class: + ```python title="multi/reader.py" self.callbacks = ParseJsonCallbacks( attrs_callback=self.get_attr, @@ -189,7 +213,9 @@ self.callbacks = ParseJsonCallbacks( dims=self.get_data_dims, ) ``` + The `ParseJsonCallbacks` class has an attribute called `special_key_map` that makes use of these callbacks to populate the template based on the starting prefix of the config dict value: + ```python title="multi/reader.py" self.special_key_map = { "@attrs": attrs_callback if attrs_callback is not None else self.identity, @@ -198,6 +224,7 @@ self.special_key_map = { "@eln": eln_callback if eln_callback is not None else self.identity, } ``` + That means, if the config file has an entry ```{"/ENTRY/title": "@attrs:metadata/title"}```, the `get_attr` method of the reader gets called and should return an attribute from the given path, i.e., in this case from `metadata/title`. By default, the MultiFormatReader supports the following special prefixes: @@ -212,12 +239,15 @@ The destinction between data and metadata is somewhat arbitrary here. The reason In addition, the reader can also implement the `get_data_dims` method, which is used to return a list of the data dimensions (see below for more details). All of `get_attr`, `get_data`, and `get_eln_data` (as well as any similar method that might be implemented) should have the same call signature: + ```python def get_data(self, key: str, path: str) -> Any: ``` + Here, `key` is the config dict key (e.g., `"/ENTRY[my-entry]/data/data"`) and path is the path that comes _after_ the prefix in the config file. In the example config file above, `path` would be `mydata`. With these two inputs, the reader should be able to return the correct data for this template key. ### Special rules + - **Lists as config value**: It is possible to write a list of possible configurations of the sort ```json "/ENTRY/title":"['@attrs:my_title', '@eln', 'no title']" diff --git a/examples/mock-data-reader/reader.py b/examples/mock-data-reader/reader.py index 0aa7120a8..0c4d92cf0 100644 --- a/examples/mock-data-reader/reader.py +++ b/examples/mock-data-reader/reader.py @@ -17,7 +17,8 @@ """An example reader implementation based on the MultiFormatReader.""" import logging -from typing import Dict, Any +from typing import Any + import h5py from pynxtools.dataconverter.readers.multi.reader import MultiFormatReader @@ -51,7 +52,7 @@ def __init__(self, *args, **kwargs): ".h5": self.handle_hdf5_file, } - def set_config_file(self, file_path: str) -> Dict[str, Any]: + def set_config_file(self, file_path: str) -> dict[str, Any]: if self.config_file is not None: logger.info( f"Config file already set. Replaced by the new file {file_path}." @@ -59,7 +60,7 @@ def set_config_file(self, file_path: str) -> Dict[str, Any]: self.config_file = file_path return {} - def handle_hdf5_file(self, filepath) -> Dict[str, Any]: + def handle_hdf5_file(self, filepath) -> dict[str, Any]: def recursively_read_group(group, path=""): result = {} for key, item in group.items(): @@ -78,7 +79,7 @@ def recursively_read_group(group, path=""): return {} - def handle_eln_file(self, file_path: str) -> Dict[str, Any]: + def handle_eln_file(self, file_path: str) -> dict[str, Any]: self.eln_data = parse_yml( file_path, convert_dict=CONVERT_DICT, diff --git a/src/pynxtools/__init__.py b/src/pynxtools/__init__.py index 7694be63b..89371446d 100644 --- a/src/pynxtools/__init__.py +++ b/src/pynxtools/__init__.py @@ -20,7 +20,6 @@ import os import re from datetime import datetime -from typing import Dict from pynxtools._build_wrapper import get_vcs_version from pynxtools.definitions.dev_tools.globals.nxdl import get_nxdl_version @@ -29,7 +28,7 @@ MAIN_BRANCH_NAME = "fairmat" -NX_DOC_BASES: Dict[str, str] = { +NX_DOC_BASES: dict[str, str] = { "https://github.com/nexusformat/definitions.git": "https://manual.nexusformat.org/classes", "https://github.com/FAIRmat-NFDI/nexus_definitions.git": "https://fairmat-nfdi.github.io/nexus_definitions/classes", } diff --git a/src/pynxtools/dataconverter/convert.py b/src/pynxtools/dataconverter/convert.py index 242b5868f..eaff73988 100644 --- a/src/pynxtools/dataconverter/convert.py +++ b/src/pynxtools/dataconverter/convert.py @@ -26,7 +26,7 @@ import sys from gettext import gettext from pathlib import Path -from typing import List, Literal, Optional, Tuple +from typing import Literal, Optional import click import lxml.etree as ET @@ -82,7 +82,7 @@ def get_reader(reader_name) -> BaseReader: return module.READER # type: ignore[attr-defined] -def get_names_of_all_readers() -> List[str]: +def get_names_of_all_readers() -> list[str]: """Helper function to populate a list of all available readers""" path_prefix = ( f"{os.path.dirname(__file__)}{os.sep}" if os.path.dirname(__file__) else "" @@ -192,7 +192,7 @@ def transfer_data_into_template( # pylint: disable=too-many-arguments,too-many-locals,W1203 def convert( - input_file: Tuple[str, ...], + input_file: tuple[str, ...], reader: str, nxdl: str, output: str, @@ -203,7 +203,7 @@ def convert( Parameters ---------- - input_file : Tuple[str] + input_file : tuple[str] Tuple of files or file reader: str Name of reader such as xps @@ -352,8 +352,8 @@ def main_cli(): ) # pylint: disable=too-many-arguments def convert_cli( - files: Tuple[str, ...], - input_file: Tuple[str, ...], + files: tuple[str, ...], + input_file: tuple[str, ...], reader: str, nxdl: str, output: str, diff --git a/src/pynxtools/dataconverter/helpers.py b/src/pynxtools/dataconverter/helpers.py index 22982ab41..72be2792b 100644 --- a/src/pynxtools/dataconverter/helpers.py +++ b/src/pynxtools/dataconverter/helpers.py @@ -21,10 +21,11 @@ import logging import os import re +from collections.abc import Sequence from datetime import datetime, timezone from enum import Enum, auto -from functools import lru_cache -from typing import Any, Callable, List, Optional, Sequence, Tuple, Union, cast +from functools import cache, lru_cache +from typing import Any, Callable, Optional, Union, cast import h5py import lxml.etree as ET @@ -302,7 +303,7 @@ def is_appdef(xml_elem: ET._Element) -> bool: return get_appdef_root(xml_elem).attrib.get("category") == "application" -def get_all_parents_for(xml_elem: ET._Element) -> List[ET._Element]: +def get_all_parents_for(xml_elem: ET._Element) -> list[ET._Element]: """ Get all parents from the nxdl (via extends keyword) @@ -310,7 +311,7 @@ def get_all_parents_for(xml_elem: ET._Element) -> List[ET._Element]: xml_elem (ET._Element): The element to get the parents for. Returns: - List[ET._Element]: The list of parents xml nodes. + list[ET._Element]: The list of parents xml nodes. """ root = get_appdef_root(xml_elem) inheritance_chain = [] @@ -412,7 +413,7 @@ def get_all_defined_required_children_for_elem(xml_element): return list_of_children_to_add -visited_paths: List[str] = [] +visited_paths: list[str] = [] def get_all_defined_required_children(nxdl_path, nxdl_name): @@ -605,7 +606,7 @@ def get_name_from_data_dict_entry(entry: str) -> str: ENTRY[entry] -> entry """ - @lru_cache(maxsize=None) + @cache def get_regex(): return re.compile(r"(?<=\[)(.*?)(?=\])") @@ -630,7 +631,7 @@ def convert_data_dict_path_to_hdf5_path(path) -> str: return hdf5path -def is_value_valid_element_of_enum(value, elist) -> Tuple[bool, list]: +def is_value_valid_element_of_enum(value, elist) -> tuple[bool, list]: """Checks whether a value has to be specific from the NXDL enumeration and returns options.""" for elem in elist: enums = get_enums(elem) @@ -784,8 +785,8 @@ def is_valid_data_field( return value -@lru_cache(maxsize=None) -def path_in_data_dict(nxdl_path: str, data_keys: Tuple[str, ...]) -> List[str]: +@cache +def path_in_data_dict(nxdl_path: str, data_keys: tuple[str, ...]) -> list[str]: """Checks if there is an accepted variation of path in the dictionary & returns the path.""" found_keys = [] for key in data_keys: @@ -1081,7 +1082,7 @@ def transform_to_intended_dt(str_value: Any) -> Optional[Any]: for sym in symbol_list_for_data_seperation: if sym in str_value: parts = str_value.split(sym) - modified_parts: List = [] + modified_parts: list = [] for part in parts: part = transform_to_intended_dt(part) if isinstance(part, (int, float)): diff --git a/src/pynxtools/dataconverter/nexus_tree.py b/src/pynxtools/dataconverter/nexus_tree.py index 950afe5bb..98f123710 100644 --- a/src/pynxtools/dataconverter/nexus_tree.py +++ b/src/pynxtools/dataconverter/nexus_tree.py @@ -29,7 +29,7 @@ """ from functools import lru_cache, reduce -from typing import Any, Dict, List, Literal, Optional, Set, Tuple, Union +from typing import Any, Literal, Optional, Union import lxml.etree as ET from anytree.node.nodemixin import NodeMixin @@ -130,7 +130,7 @@ class NexusNode(NodeMixin): This is set automatically on init and will be True if the `nameTYPE` is "any" or "partial" and False otherwise. Defaults to False. - inheritance (List[InstanceOf[ET._Element]]): + inheritance (list[InstanceOf[ET._Element]]): The inheritance chain of the node. The first element of the list is the xml representation of this node. All following elements are the xml nodes of the node if these are @@ -142,12 +142,12 @@ class NexusNode(NodeMixin): for a tree, i.e., setting the parent of a node is enough to add it to the tree and to its parent's children. For the root this is None. - is_a: List["NexusNode"]: + is_a: list["NexusNode"]: A list of NexusNodes the current node represents. This is used for attaching siblings to the current node, e.g., if the parent appdef has a field `DATA(NXdata)` and the current appdef has a field `my_data(NXdata)` the relation `my_data` `is_a` `DATA` is set. - parent_of: List["NexusNode"]: + parent_of: list["NexusNode"]: The inverse of the above `is_a`. In the example case `DATA` `parent_of` `my_data`. nxdl_base: str @@ -159,9 +159,9 @@ class NexusNode(NodeMixin): name_type: Optional[Literal["specified", "any", "partial"]] = "specified" optionality: Literal["required", "recommended", "optional"] = "required" variadic: bool = False - inheritance: List[ET._Element] - is_a: List["NexusNode"] - parent_of: List["NexusNode"] + inheritance: list[ET._Element] + is_a: list["NexusNode"] + parent_of: list["NexusNode"] nxdl_base: str def _set_optionality(self): @@ -194,7 +194,7 @@ def __init__( optionality: Literal["required", "recommended", "optional"] = "required", variadic: Optional[bool] = None, parent: Optional["NexusNode"] = None, - inheritance: Optional[List[Any]] = None, + inheritance: Optional[list[Any]] = None, nxdl_base: Optional[str] = None, ) -> None: super().__init__() @@ -222,14 +222,14 @@ def get_path(self) -> str: str: The full path up to the parent of the current node. """ current_node = self - names: List[str] = [] + names: list[str] = [] while current_node.parent is not None: names.insert(0, current_node.name) current_node = current_node.parent return "/" + "/".join(names) def search_add_child_for_multiple( - self, names: Tuple[str, ...] + self, names: tuple[str, ...] ) -> Optional["NexusNode"]: """ Searchs and adds a child with one of the names in `names` to the current node. @@ -237,7 +237,7 @@ def search_add_child_for_multiple( The found child is then returned. Args: - name (Tuple[str, ...]): + name (tuple[str, ...]): A tuple of names of the child to search for. Returns: @@ -311,7 +311,7 @@ def get_all_direct_children_names( nx_class: Optional[str] = None, depth: Optional[int] = None, only_appdef: bool = False, - ) -> Set[str]: + ) -> set[str]: """ Get all children names of the current node up to a certain depth. Only `field`, `group` `choice` or `attribute` are considered as children. @@ -341,7 +341,7 @@ def get_all_direct_children_names( ValueError: If depth is not int or negativ. Returns: - Set[str]: A set of children names. + set[str]: A set of children names. """ if depth is not None and (not isinstance(depth, int) or depth < 0): @@ -376,7 +376,7 @@ def required_fields_and_attrs_names( self, prev_path: str = "", level: Literal["required", "recommended", "optional"] = "required", - ) -> List[str]: + ) -> list[str]: """ Gets all required fields and attributes names of the current node and its children. @@ -394,7 +394,7 @@ def required_fields_and_attrs_names( Defaults to "required". Returns: - List[str]: A list of required fields and attributes names. + list[str]: A list of required fields and attributes names. """ lvl_map = { "required": ("required",), @@ -425,7 +425,7 @@ def required_fields_and_attrs_names( return req_children - def get_docstring(self, depth: Optional[int] = None) -> Dict[str, str]: + def get_docstring(self, depth: Optional[int] = None) -> dict[str, str]: """ Gets the docstrings of the current node and its parents up to a certain depth. @@ -439,7 +439,7 @@ def get_docstring(self, depth: Optional[int] = None) -> Dict[str, str]: ValueError: If depth is not int or negativ. Returns: - List[str]: A list of docstrings one for each parent doc. + list[str]: A list of docstrings one for each parent doc. """ if depth is not None and depth < 0: raise ValueError("Depth must be a positive integer or None") @@ -486,7 +486,7 @@ def get_link(self) -> str: return f"{doc_base}/{nx_file}.html#{anchor}" - def _build_inheritance_chain(self, xml_elem: ET._Element) -> List[ET._Element]: + def _build_inheritance_chain(self, xml_elem: ET._Element) -> list[ET._Element]: """ Builds the inheritance chain based on the given xml node and the inheritance chain of this node. @@ -495,7 +495,7 @@ def _build_inheritance_chain(self, xml_elem: ET._Element) -> List[ET._Element]: xml_elem (ET._Element): The xml element to build the inheritance chain for. Returns: - List[ET._Element]: + list[ET._Element]: The list of xml nodes representing the inheritance chain. This represents the direct field or group inside the specific xml file. """ @@ -694,7 +694,7 @@ class NexusGroup(NexusNode): Args: nx_class (str): - occurence_limits (Tuple[Optional[int], Optional[int]]): + occurence_limits (tuple[Optional[int], Optional[int]]): Denotes the minimum and maximum number of occurrences of the group. First element denotes the minimum, second one the maximum. If the respective value is None, then there is no limit. @@ -703,7 +703,7 @@ class NexusGroup(NexusNode): """ nx_class: str - occurrence_limits: Tuple[ + occurrence_limits: tuple[ # TODO: Use Annotated[int, Field(strict=True, ge=0)] for py>3.8 Optional[int], Optional[int], @@ -837,7 +837,7 @@ class NexusEntity(NexusNode): Also the base classes of these entities are considered. If it is not present in any of the xml nodes, it will be set to `NX_CHAR`. Defaults to "NX_CHAR". - items (Optional[List[str]]): + items (Optional[list[str]]): This is a restriction of the field value to a list of items. Only applies to nodes of dtype `NX_CHAR`. This is set automatically on init based on the values found in the nxdl file. @@ -848,7 +848,7 @@ class NexusEntity(NexusNode): If enumerations are used, the enumeration can be open (i.e., the value is not limited to the enumeration items) or closed (i.e., the value must exactly match one of the enumeration items). This is controlled by the open_enum boolean. By default, it is closed. - shape (Optional[Tuple[Optional[int], ...]]): + shape (Optional[tuple[Optional[int], ...]]): The shape of the entity as given by the dimensions tag. This is set automatically on init based on the values found in the nxdl file. Also the base classes of these entities are considered. @@ -862,9 +862,9 @@ class NexusEntity(NexusNode): type: Literal["field", "attribute"] unit: Optional[NexusUnitCategory] = None dtype: NexusType = "NX_CHAR" - items: Optional[List[str]] = None + items: Optional[list[str]] = None open_enum: bool = False - shape: Optional[Tuple[Optional[int], ...]] = None + shape: Optional[tuple[Optional[int], ...]] = None def _check_compatibility_with(self, xml_elem: ET._Element) -> bool: """Check compatibility of this node with an XML element from the (possible) inheritance""" @@ -958,7 +958,7 @@ def _check_dimensions_fit(xml_elem: ET._Element) -> bool: return True elem_dim = elem_dimensions.findall("nx:dim", namespaces=namespaces) elem_dimension_rank = rank if rank is not None else len(rank) - dims: List[Optional[int]] = [None] * int(rank) + dims: list[Optional[int]] = [None] * int(rank) for dim in elem_dim: idx = int(dim.attrib["index"]) @@ -1074,7 +1074,7 @@ def _set_shape(self): return xml_dim = dimension.findall("nx:dim", namespaces=namespaces) rank = rank if rank is not None else len(xml_dim) - dims: List[Optional[int]] = [None] * int(rank) + dims: list[Optional[int]] = [None] * int(rank) for dim in xml_dim: idx = int(dim.attrib["index"]) if "value" not in dim.attrib: diff --git a/src/pynxtools/dataconverter/readers/base/reader.py b/src/pynxtools/dataconverter/readers/base/reader.py index 2755dc31a..f91f38bfc 100644 --- a/src/pynxtools/dataconverter/readers/base/reader.py +++ b/src/pynxtools/dataconverter/readers/base/reader.py @@ -18,7 +18,7 @@ """The abstract class off of which to implement readers.""" from abc import ABC, abstractmethod -from typing import Any, Tuple +from typing import Any class BaseReader(ABC): @@ -44,8 +44,8 @@ class BaseReader(ABC): def read( self, template: dict = None, - file_paths: Tuple[str] = None, - objects: Tuple[Any] = None, + file_paths: tuple[str] = None, + objects: tuple[Any] = None, ) -> dict: """Reads data from given file and returns a filled template dictionary""" return template diff --git a/src/pynxtools/dataconverter/readers/example/reader.py b/src/pynxtools/dataconverter/readers/example/reader.py index 71bd91ac7..7601f6aa7 100644 --- a/src/pynxtools/dataconverter/readers/example/reader.py +++ b/src/pynxtools/dataconverter/readers/example/reader.py @@ -19,7 +19,7 @@ import json import os -from typing import Any, Tuple +from typing import Any import numpy as np @@ -37,8 +37,8 @@ class ExampleReader(BaseReader): def read( self, template: dict = None, - file_paths: Tuple[str] = None, - objects: Tuple[Any] = None, + file_paths: tuple[str] = None, + objects: tuple[Any] = None, **_, ) -> dict: """Reads data from given file and returns a filled template dictionary""" diff --git a/src/pynxtools/dataconverter/readers/json_map/reader.py b/src/pynxtools/dataconverter/readers/json_map/reader.py index 6fc5a1397..948d1f43c 100644 --- a/src/pynxtools/dataconverter/readers/json_map/reader.py +++ b/src/pynxtools/dataconverter/readers/json_map/reader.py @@ -19,7 +19,7 @@ import json import pickle -from typing import Any, Tuple +from typing import Any import numpy as np import xarray @@ -166,8 +166,8 @@ class JsonMapReader(BaseReader): def read( self, template: dict = None, - file_paths: Tuple[str] = None, - objects: Tuple[Any] = None, + file_paths: tuple[str] = None, + objects: tuple[Any] = None, ) -> dict: """ Reads data from given file and returns a filled template dictionary. diff --git a/src/pynxtools/dataconverter/readers/json_yml/reader.py b/src/pynxtools/dataconverter/readers/json_yml/reader.py index a77d9601d..7b14469cd 100644 --- a/src/pynxtools/dataconverter/readers/json_yml/reader.py +++ b/src/pynxtools/dataconverter/readers/json_yml/reader.py @@ -18,7 +18,7 @@ """An example reader implementation for the DataConverter.""" import os -from typing import Any, Callable, Dict, List, Tuple +from typing import Any, Callable from pynxtools.dataconverter.readers.base.reader import BaseReader from pynxtools.dataconverter.template import Template @@ -30,15 +30,15 @@ class YamlJsonReader(BaseReader): # pylint: disable=too-few-public-methods # Whitelist for the NXDLs that the reader supports and can process - supported_nxdls: List[str] = [] - extensions: Dict[str, Callable[[Any], dict]] = {} - kwargs: Dict[str, Any] = None + supported_nxdls: list[str] = [] + extensions: dict[str, Callable[[Any], dict]] = {} + kwargs: dict[str, Any] = None def read( self, template: dict = None, - file_paths: Tuple[str] = None, - objects: Tuple[Any] = None, + file_paths: tuple[str] = None, + objects: tuple[Any] = None, **kwargs, ) -> dict: """ diff --git a/src/pynxtools/dataconverter/readers/multi/reader.py b/src/pynxtools/dataconverter/readers/multi/reader.py index 4a11f5db7..dfede666c 100644 --- a/src/pynxtools/dataconverter/readers/multi/reader.py +++ b/src/pynxtools/dataconverter/readers/multi/reader.py @@ -21,7 +21,7 @@ import logging import os import re -from typing import Any, Callable, Dict, List, Optional, Tuple, Union +from typing import Any, Callable, Optional, Union from pynxtools.dataconverter.readers.base.reader import BaseReader from pynxtools.dataconverter.readers.utils import ( @@ -85,15 +85,15 @@ class ParseJsonCallbacks: The callback to retrieve links under the specified key. eln_callback (Callable[[str], Any]): The callback to retrieve eln values under the specified key. - dims (List[str]): + dims (list[str]): The dimension labels of the data. Defaults to None. entry_name (str): The current entry name to use. """ - special_key_map: Dict[str, Callable[[str, str], Any]] + special_key_map: dict[str, Callable[[str, str], Any]] entry_name: str - dims: Callable[[str, str], List[str]] + dims: Callable[[str, str], list[str]] def __init__( self, @@ -101,7 +101,7 @@ def __init__( data_callback: Optional[Callable[[str, str], Any]] = None, link_callback: Optional[Callable[[str, str], Any]] = None, eln_callback: Optional[Callable[[str, str], Any]] = None, - dims: Optional[Callable[[str, str], List[str]]] = None, + dims: Optional[Callable[[str, str], list[str]]] = None, entry_name: str = "entry", ): self.special_key_map = { @@ -114,7 +114,7 @@ def __init__( self.dims = dims if dims is not None else lambda *_, **__: [] self.entry_name = entry_name - def link_callback(self, key: str, value: str) -> Dict[str, Any]: + def link_callback(self, key: str, value: str) -> dict[str, Any]: """ Modify links to dictionaries with the correct entry name. """ @@ -136,10 +136,10 @@ def apply_special_key(self, precursor, key, value): def resolve_special_keys( - new_entry_dict: Dict[str, Any], + new_entry_dict: dict[str, Any], key: str, value: Any, - optional_groups_to_remove: List[str], + optional_groups_to_remove: list[str], callbacks: ParseJsonCallbacks, suppress_warning: bool = False, ) -> None: @@ -165,7 +165,7 @@ def try_convert(value: str) -> Union[str, float, int, bool]: return value - def parse_config_value(value: str) -> Tuple[str, Any]: + def parse_config_value(value: str) -> tuple[str, Any]: """ Separates the prefixes (denoted by "@") from the rest of the value. @@ -177,7 +177,7 @@ def parse_config_value(value: str) -> Tuple[str, Any]: Returns ------- - Tuple[str, Any] + tuple[str, Any] Tuple like (prefix, path). """ @@ -193,7 +193,7 @@ def parse_config_value(value: str) -> Tuple[str, Any]: new_entry_dict[key] = value return - prefixes: List[Tuple[str, str]] = [] + prefixes: list[tuple[str, str]] = [] try: # Safely evaluate the string to a list @@ -237,8 +237,8 @@ def parse_config_value(value: str) -> Tuple[str, Any]: def fill_from_config( - config_dict: Dict[str, Any], - entry_names: List[str], + config_dict: dict[str, Any], + entry_names: list[str], callbacks: Optional[ParseJsonCallbacks] = None, suppress_warning: bool = False, ) -> dict: @@ -256,7 +256,7 @@ def has_missing_main(key: str) -> bool: return True return False - def dict_sort_key(keyval: Tuple[str, Any]) -> bool: + def dict_sort_key(keyval: tuple[str, Any]) -> bool: """ The function to sort the dict by. This just sets False for keys starting with "!" to put them at the beginning. @@ -271,7 +271,7 @@ def dict_sort_key(keyval: Tuple[str, Any]) -> bool: # Use default callbacks if none are explicitly provided callbacks = ParseJsonCallbacks() - optional_groups_to_remove: List[str] = [] + optional_groups_to_remove: list[str] = [] new_entry_dict = {} for entry_name in entry_names: callbacks.entry_name = entry_name @@ -319,13 +319,13 @@ class MultiFormatReader(BaseReader): """ # Whitelist for the NXDLs that the reader supports and can process - supported_nxdls: List[str] = [] - extensions: Dict[str, Callable[[Any], dict]] = {} - kwargs: Optional[Dict[str, Any]] = None + supported_nxdls: list[str] = [] + extensions: dict[str, Callable[[Any], dict]] = {} + kwargs: Optional[dict[str, Any]] = None overwrite_keys: bool = True - processing_order: Optional[List[str]] = None + processing_order: Optional[list[str]] = None config_file: Optional[str] = None - config_dict: Dict[str, Any] + config_dict: dict[str, Any] def __init__(self, config_file: Optional[str] = None): self.callbacks = ParseJsonCallbacks( @@ -337,7 +337,7 @@ def __init__(self, config_file: Optional[str] = None): self.config_file = config_file self.config_dict = {} - def setup_template(self) -> Dict[str, Any]: + def setup_template(self) -> dict[str, Any]: """ Setups the initial data in the template. This may be used to set fixed information, e.g., about the reader. @@ -345,7 +345,7 @@ def setup_template(self) -> Dict[str, Any]: return {} # pylint: disable=unused-argument - def handle_objects(self, objects: Tuple[Any]) -> Dict[str, Any]: + def handle_objects(self, objects: tuple[Any]) -> dict[str, Any]: """ Handles the objects passed into the reader. """ @@ -372,13 +372,13 @@ def get_eln_data(self, key: str, path: str) -> Any: """ return {} - def get_data_dims(self, key: str, path: str) -> List[str]: + def get_data_dims(self, key: str, path: str) -> list[str]: """ Returns the dimensions of the data from the given path. """ return [] - def get_entry_names(self) -> List[str]: + def get_entry_names(self) -> list[str]: """ Returns a list of entry names which should be constructed from the data. Defaults to creating a single entry named "entry". @@ -393,8 +393,8 @@ def post_process(self) -> None: def read( self, template: dict = None, - file_paths: Tuple[str] = None, - objects: Optional[Tuple[Any]] = None, + file_paths: tuple[str] = None, + objects: Optional[tuple[Any]] = None, **kwargs, ) -> dict: """ @@ -407,7 +407,7 @@ def read( template = Template(overwrite_keys=self.overwrite_keys) - def get_processing_order(path: str) -> Tuple[int, Union[str, int]]: + def get_processing_order(path: str) -> tuple[int, Union[str, int]]: """ Returns the processing order of the file. """ diff --git a/src/pynxtools/dataconverter/readers/utils.py b/src/pynxtools/dataconverter/readers/utils.py index e87e63b68..4905e257c 100644 --- a/src/pynxtools/dataconverter/readers/utils.py +++ b/src/pynxtools/dataconverter/readers/utils.py @@ -23,7 +23,7 @@ from collections.abc import Mapping from dataclasses import dataclass, replace from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple, Union +from typing import Any, Optional, Union import yaml @@ -81,14 +81,14 @@ def is_value_unit_pair(val: Any) -> bool: return False -def uniquify_keys(ldic: list) -> List[Any]: +def uniquify_keys(ldic: list) -> list[Any]: """Uniquifys keys in a list of tuple lists containing key value pairs. Args: ldic (list): List of lists of length two, containing key value pairs. Returns: - List[Any]: Uniquified list, where duplicate keys are appended with 1, 2, etc. + list[Any]: Uniquified list, where duplicate keys are appended with 1, 2, etc. """ dic: dict = {} for key, val in ldic: @@ -107,7 +107,7 @@ def uniquify_keys(ldic: list) -> List[Any]: return list(map(list, dic.items())) -def parse_section(key: str, val: Any, settings: FlattenSettings) -> List[Any]: +def parse_section(key: str, val: Any, settings: FlattenSettings) -> list[Any]: """Parse a section, i.e. an entry containing a list of entries. Args: @@ -116,12 +116,12 @@ def parse_section(key: str, val: Any, settings: FlattenSettings) -> List[Any]: settings (FlattenSettings): The flattening settings. Returns: - List[Any]: A list of list tuples containing key, value pairs. + list[Any]: A list of list tuples containing key, value pairs. """ if not is_section(val): return [(key, val)] - groups: List[Any] = [] + groups: list[Any] = [] for group in val: groups.extend( flatten_and_replace( @@ -142,7 +142,7 @@ def flatten_and_replace(settings: FlattenSettings) -> dict: Returns: dict: Flattened dictionary """ - items: List[Any] = [] + items: list[Any] = [] for key, val in settings.dic.items(): if settings.ignore_keys and key in settings.ignore_keys: continue @@ -175,14 +175,14 @@ def parse_yml( convert_dict: Optional[dict] = None, replace_nested: Optional[dict] = None, parent_key: str = "/ENTRY[entry]", -) -> Dict[str, Any]: +) -> dict[str, Any]: """Parses a metadata yaml file into a dictionary. Args: file_path (str): The file path of the yml file. Returns: - Dict[str, Any]: The dictionary containing the data readout from the yml. + dict[str, Any]: The dictionary containing the data readout from the yml. """ if convert_dict is None: convert_dict = {} @@ -207,17 +207,17 @@ def parse_yml( def flatten_json( - json_data: Dict[str, Any], + json_data: dict[str, Any], base_key: Optional[str] = None, replacement_key: Optional[str] = None, dont_flatten_link_dict: bool = False, create_link_dict: bool = True, -) -> Dict[str, Any]: +) -> dict[str, Any]: """ Flattens a json dict into a flat dictionary of absolute paths. Args: - json_data (Dict[str, Any]): The dictionary read from the json file. + json_data (dict[str, Any]): The dictionary read from the json file. base_key (Optional[str], optional): A base key to prefix to all keys. Defaults to None. @@ -232,7 +232,7 @@ def flatten_json( Defaults to True. Returns: - Dict[str, Any]: The flattened dict + dict[str, Any]: The flattened dict """ if ( dont_flatten_link_dict @@ -288,14 +288,14 @@ def update_config(key, value, rkey): return flattened_config -def parse_json(file_path: Union[str, Path]) -> Dict[str, Any]: +def parse_json(file_path: Union[str, Path]) -> dict[str, Any]: """Parses a metadata json file into a dictionary. Args: file_path (str): The file path of the json file. Returns: - Dict[str, Any]: The dictionary containing the data readout from the json. + dict[str, Any]: The dictionary containing the data readout from the json. """ with open(file_path, encoding="utf-8") as file: return json.load(file) @@ -304,7 +304,7 @@ def parse_json(file_path: Union[str, Path]) -> Dict[str, Any]: def parse_flatten_json( file_path: Union[str, Path], create_link_dict: bool = True, -) -> Dict[str, Any]: +) -> dict[str, Any]: """ Parses a metadata json file into a dictionary and flattens it into a flat dictionary of absolute paths. @@ -316,13 +316,13 @@ def parse_flatten_json( Defaults to True. Returns: - Dict[str, Any]: + dict[str, Any]: The flattened dictionary containing the data readout from the json. """ return flatten_json(parse_json(file_path), create_link_dict=create_link_dict) -def handle_objects(objects: Tuple[Any]) -> Dict[str, Any]: +def handle_objects(objects: tuple[Any]) -> dict[str, Any]: """Handle objects and generate template entries from them""" if objects is None: return {} diff --git a/src/pynxtools/dataconverter/template.py b/src/pynxtools/dataconverter/template.py index 89490cbd4..e3e96a305 100644 --- a/src/pynxtools/dataconverter/template.py +++ b/src/pynxtools/dataconverter/template.py @@ -21,7 +21,6 @@ import json import logging import re -from typing import Set from pynxtools.dataconverter import helpers @@ -201,12 +200,12 @@ def rename_entry(self, old_name: str, new_name: str, deepcopy=True): internal_dict[f"/ENTRY[{new_name}]{rest_of_path}"] = value del internal_dict[key] - def get_all_entry_names(self) -> Set[str]: + def get_all_entry_names(self) -> set[str]: """ Get all entry names in the template. Returns: - Set[str]: A set of entry names. + set[str]: A set of entry names. """ entry_names = set() for key in self: diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index 3e02c97c7..763e4555f 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -19,20 +19,10 @@ import copy import re from collections import defaultdict +from collections.abc import Iterable, Mapping, MutableMapping from functools import reduce from operator import getitem -from typing import ( - Any, - Dict, - Iterable, - List, - Literal, - Mapping, - MutableMapping, - Optional, - Tuple, - Union, -) +from typing import Any, Literal, Optional, Union import h5py import lxml.etree as ET @@ -121,7 +111,7 @@ def default_to_regular_dict(d): return default_to_regular_dict(data_tree) -def split_class_and_name_of(name: str) -> Tuple[Optional[str], str]: +def split_class_and_name_of(name: str) -> tuple[Optional[str], str]: """ Return the class and the name of a data dict entry of the form `split_class_and_name_of("ENTRY[entry]")`, which will return `("ENTRY", "entry")`. @@ -132,7 +122,7 @@ def split_class_and_name_of(name: str) -> Tuple[Optional[str], str]: name (str): The data dict entry Returns: - Tuple[Optional[str], str]: + tuple[Optional[str], str]: First element is the class name of the entry, second element is the name. The class name will be None if it is not present. """ @@ -149,7 +139,7 @@ def split_class_and_name_of(name: str) -> Tuple[Optional[str], str]: def best_namefit_of( name: str, nodes: Iterable[NexusNode], - expected_types: List[str], + expected_types: list[str], check_types: bool = False, ) -> Optional[NexusNode]: """ @@ -247,7 +237,7 @@ def validate_dict_against( bool: True if the mapping is valid according to `appdef`, False otherwise. """ - def get_variations_of(node: NexusNode, keys: Mapping[str, Any]) -> List[str]: + def get_variations_of(node: NexusNode, keys: Mapping[str, Any]) -> list[str]: variations = [] prefix = f"{'@' if node.type == 'attribute' else ''}" @@ -840,7 +830,7 @@ def recurse_tree( node: NexusNode, keys: Mapping[str, Any], prev_path: str = "", - ignore_names: Optional[List[str]] = None, + ignore_names: Optional[list[str]] = None, ): for child in node.children: if ignore_names is not None and child.name in ignore_names: @@ -877,7 +867,7 @@ def find_instance_name_conflicts(mapping: MutableMapping[str, str]) -> None: The mapping containing the data to validate. This should be a dict of `/` separated paths, such as "/ENTRY[entry1]/SAMPLE[sample1]/name". - keys_to_remove (List[str]): + keys_to_remove (list[str]): List of keys that will be removed from the template. This is extended here in the case of conflicts. @@ -885,7 +875,7 @@ def find_instance_name_conflicts(mapping: MutableMapping[str, str]) -> None: pattern = re.compile(r"(?P[^\[\]/]+)\[(?P[^\]]+)\]") # Tracks instance usage with respect to their parent group - instance_usage: Dict[Tuple[str, str], List[Tuple[str, str]]] = defaultdict(list) + instance_usage: dict[tuple[str, str], list[tuple[str, str]]] = defaultdict(list) for key in mapping: matches = list(pattern.finditer(key)) @@ -1098,7 +1088,7 @@ def check_type_with_tree( def startswith_with_variations( large_str: str, baseline_str: str - ) -> Tuple[bool, int]: + ) -> tuple[bool, int]: """ Recursively check if the large_str starts with baseline_str or an allowed equivalent (i.e. .../AXISNAME[energy]/... matches .../energy/...). @@ -1295,7 +1285,7 @@ def check_reserved_prefix( "choice": handle_choice, } - keys_to_remove: List[str] = [] + keys_to_remove: list[str] = [] tree = generate_tree_from(appdef) collector.clear() diff --git a/src/pynxtools/eln_mapper/eln.py b/src/pynxtools/eln_mapper/eln.py index c548dfd22..feada224f 100644 --- a/src/pynxtools/eln_mapper/eln.py +++ b/src/pynxtools/eln_mapper/eln.py @@ -20,7 +20,7 @@ import logging import re from abc import ABC, abstractmethod -from typing import Any, Dict, List, Optional +from typing import Any, Optional import yaml @@ -34,10 +34,10 @@ logger = logging.getLogger("pynxtools") -NODES_TO_SKIP: List[str] = ["definition"] +NODES_TO_SKIP: list[str] = ["definition"] -def clean_filters(filter_list: Optional[List[str]]) -> Optional[List[str]]: +def clean_filters(filter_list: Optional[list[str]]) -> Optional[list[str]]: """ Clean list of filters by converting keys from data converter style path" to NXDL style path: @@ -48,7 +48,7 @@ def clean_filters(filter_list: Optional[List[str]]) -> Optional[List[str]]: return [convert_data_converter_dict_to_nxdl_path(key) for key in filter_list] -def _should_skip_iteration(node: NexusNode, filter_list: Optional[List[str]]) -> bool: +def _should_skip_iteration(node: NexusNode, filter_list: Optional[list[str]]) -> bool: """Filter those nodes that are _not_ in filter_list. Parameters @@ -70,7 +70,7 @@ def __init__( output_file: Optional[str] = None, skip_top_levels: int = 0, optionality: Optional[str] = "required", - filter: Optional[List[str]] = None, + filter: Optional[list[str]] = None, ) -> None: self.nxdl = nxdl self.output_file = output_file @@ -79,7 +79,7 @@ def __init__( self.filter = clean_filters(filter) self.out_file = self._generate_output_file_name(output_file) - self.recursive_dict: Dict[str, Any] = {} + self.recursive_dict: dict[str, Any] = {} if self.skip_top_levels == 1: logger.warning( @@ -101,7 +101,7 @@ def _generate_output_file_name(self, output_file: str): """ return "" - def _generate_eln_header(self) -> Dict: + def _generate_eln_header(self) -> dict: """ Generate a header for YAML ELN. @@ -115,7 +115,7 @@ def _generate_eln_header(self) -> Dict: def _construct_group_structure( self, node: NexusGroup, - recursive_dict: Dict, + recursive_dict: dict, recursion_level: int, ) -> bool: """ @@ -142,7 +142,7 @@ def _construct_group_structure( @abstractmethod def _construct_entity_structure( - self, node: NexusEntity, recursive_dict: Dict, recursion_level: int + self, node: NexusEntity, recursive_dict: dict, recursion_level: int ) -> bool: """Handle NeXus field or attribute. @@ -161,7 +161,7 @@ def _construct_entity_structure( return True def _recurse_tree( - self, node: NexusNode, recursive_dict: Dict, recursion_level: int + self, node: NexusNode, recursive_dict: dict, recursion_level: int ) -> None: """Recurse the NeXus node and add the parsed elements to the recursive dict. @@ -169,14 +169,14 @@ def _recurse_tree( ---------- node : NexusNode NeXus node to recurse. - recursive_dict : Dict + recursive_dict : dict A dict that store hierarchical structure of schema ELN. recursion_level: int Recursion level in the tree, used to (optionally) skip upper levels like NXentry """ def _handle_unknown_type( - node: NexusNode, section_dict: Dict, recursion_level: int + node: NexusNode, section_dict: dict, recursion_level: int ): # This should normally not happen if # the handling map includes all types allowed in NexusNode.type diff --git a/src/pynxtools/eln_mapper/reader_eln.py b/src/pynxtools/eln_mapper/reader_eln.py index 36206c051..2a02ae146 100644 --- a/src/pynxtools/eln_mapper/reader_eln.py +++ b/src/pynxtools/eln_mapper/reader_eln.py @@ -19,7 +19,6 @@ # import re -from typing import Dict, List from pynxtools.dataconverter.nexus_tree import NexusEntity, NexusGroup, NexusNode from pynxtools.eln_mapper.eln import ElnGenerator @@ -66,7 +65,7 @@ def _generate_output_file_name(self, output_file: str): return out_file def _construct_group_structure( - self, node: NexusGroup, recursive_dict: Dict, recursion_level: int + self, node: NexusGroup, recursive_dict: dict, recursion_level: int ) -> None: """Handle NeXus group, to construct group structure as follows: : @@ -79,8 +78,8 @@ def _construct_group_structure( ---------- node: NexusGroup NeXus group to recurse - recursive_dict : Dict - Dict into which the group is recursively added + recursive_dict : dict + dict into which the group is recursively added recursion_level: int Recursion level in the tree, used to (optionally) skip upper levels like NXentry """ @@ -105,7 +104,7 @@ def _construct_group_structure( self._recurse_tree(node, recursive_dict[group_name], recursion_level + 1) def _construct_entity_structure( - self, node: NexusEntity, recursive_dict: Dict, recursion_level: int + self, node: NexusEntity, recursive_dict: dict, recursion_level: int ): """Handle NeXus field or attribute, to construct structure like: : @@ -121,8 +120,8 @@ def _construct_entity_structure( ---------- node: NexusEntity NeXus field/attribute to recurse - recursive_dict : Dict - Dict into which the entity is recursively added + recursive_dict : dict + dict into which the entity is recursively added recursion_level: int Recursion level in the tree, used to (optionally) skip upper levels like NXentry """ diff --git a/src/pynxtools/eln_mapper/schema_eln.py b/src/pynxtools/eln_mapper/schema_eln.py index d34218ba5..c2295e040 100644 --- a/src/pynxtools/eln_mapper/schema_eln.py +++ b/src/pynxtools/eln_mapper/schema_eln.py @@ -18,12 +18,12 @@ # import re -from typing import Dict, List, Tuple, Union +from typing import Union from pynxtools.dataconverter.nexus_tree import NexusEntity, NexusGroup, NexusNode from pynxtools.eln_mapper.eln import ElnGenerator -NEXUS_TO_NOMAD_QUANTITY: Dict[str, Tuple[str, str]] = { +NEXUS_TO_NOMAD_QUANTITY: dict[str, tuple[str, str]] = { "NX_BINARY": ("bytes", "NumberEditQuantity"), "NX_BOOLEAN": ("bool", "BoolEditQuantity"), "NX_CHAR": ("str", "StringEditQuantity"), @@ -37,7 +37,7 @@ "NX_UINT": ("int", "NumberEditQuantity"), } -DEFAULT_UNITS: Dict[str, Union[str, None]] = { +DEFAULT_UNITS: dict[str, Union[str, None]] = { "NX_ANGLE": "degree", "NX_ANY": None, "NX_AREA": "m**2", @@ -73,7 +73,7 @@ "NX_WAVENUMBER": "1 / m", } -DEFAULT_READER: Dict[str, str] = { +DEFAULT_READER: dict[str, str] = { "NXafm": "spm", "NXapm": "apm", "NXellipsometry": "ellips", @@ -88,7 +88,7 @@ } -def construct_description(node: NexusNode, concept_dict: Dict) -> None: +def construct_description(node: NexusNode, concept_dict: dict) -> None: """Collect doc from concept doc (and inherited docs).""" inherited_docstrings = node.get_docstring() @@ -139,7 +139,7 @@ def _generate_output_file_name(self, output_file: str): return out_file - def _generate_eln_header(self) -> Dict: + def _generate_eln_header(self) -> dict: """Generate the header for the NOMAD ELN""" # Basic building blocks of ELN @@ -164,7 +164,7 @@ def _generate_eln_header(self) -> Dict: reader = DEFAULT_READER.get(self.nxdl, "") - m_annotations: Dict = { + m_annotations: dict = { "m_annotations": { "template": {"reader": reader, "nxdl": self.nxdl}, "eln": {"hide": []}, @@ -175,7 +175,7 @@ def _generate_eln_header(self) -> Dict: return sections[root_name] def _construct_group_structure( - self, node: NexusGroup, recursive_dict: Dict, recursion_level: int + self, node: NexusGroup, recursive_dict: dict, recursion_level: int ) -> None: """Handle NeXus group, to construct group structure as follows: : @@ -188,8 +188,8 @@ def _construct_group_structure( ---------- node: NexusGroup NeXus group to recurse - recursive_dict : Dict - Dict into which the group is recursively added + recursive_dict : dict + dict into which the group is recursively added recursion_level: int Recursion level in the tree, used to (optionally) skip upper levels like NXentry """ @@ -226,7 +226,7 @@ def _construct_group_structure( self._recurse_tree(node, section, recursion_level + 1) def _construct_entity_structure( - self, node: NexusEntity, recursive_dict: Dict, recursion_level: int + self, node: NexusEntity, recursive_dict: dict, recursion_level: int ): """Handle NeXus field or attribute, to construct structure like: : @@ -242,8 +242,8 @@ def _construct_entity_structure( ---------- node: NexusEntity NeXus field/attribute to recurse - recursive_dict : Dict - Dict into which the entity is recursively added + recursive_dict : dict + dict into which the entity is recursively added recursion_level: int Recursion level in the tree, used to (optionally) skip upper levels like NXentry """ @@ -278,7 +278,7 @@ def _construct_entity_structure( entity_dict["type"] = entity_type - display_dict: Dict[str, Union[bool, str]] = {"visible": True} + display_dict: dict[str, Union[bool, str]] = {"visible": True} if unit: entity_dict["unit"] = unit display_dict["unit"] = unit diff --git a/src/pynxtools/nexus/nexus.py b/src/pynxtools/nexus/nexus.py index 3679be299..19d7186d7 100644 --- a/src/pynxtools/nexus/nexus.py +++ b/src/pynxtools/nexus/nexus.py @@ -4,8 +4,8 @@ import logging import os import sys -from functools import lru_cache -from typing import Any, List, Optional, Union +from functools import cache, lru_cache +from typing import Any, Optional, Union import click import h5py @@ -370,7 +370,7 @@ def get_hdf_path(hdf_info): # pylint: disable=too-many-arguments,too-many-locals -@lru_cache(maxsize=None) +@cache def get_inherited_hdf_nodes( nx_name: str = None, elem: ET._Element = None, diff --git a/src/pynxtools/nomad/parser.py b/src/pynxtools/nomad/parser.py index 7badeecfb..851526662 100644 --- a/src/pynxtools/nomad/parser.py +++ b/src/pynxtools/nomad/parser.py @@ -16,7 +16,7 @@ # limitations under the License. # -from typing import Dict, Optional, Set +from typing import Optional import lxml.etree as ET import numpy as np @@ -437,13 +437,13 @@ def get_sub_elements(self, elem: MSection, type_filter: str = None): filtered.append(individual) return filtered - def _get_chemical_formulas(self) -> Set[str]: + def _get_chemical_formulas(self) -> set[str]: """ Parses the descriptive chemical formula from a nexus entry. """ material = self.archive.m_setdefault("results.material") - element_set: Set[str] = set() - chemical_formulas: Set[str] = set() + element_set: set[str] = set() + chemical_formulas: set[str] = set() # DEBUG added here 'sample' only to test that I think the root cause # of the bug is that when the appdef defines at the level of the HDF5 @@ -526,7 +526,7 @@ def parse( mainfile: str, archive: EntryArchive, logger=None, - child_archives: Dict[str, EntryArchive] = None, + child_archives: dict[str, EntryArchive] = None, ) -> None: if DEBUG_PYNXTOOLS_WITH_NOMAD: import debugpy # will connect to debugger if in debug mode diff --git a/src/pynxtools/nomad/schema.py b/src/pynxtools/nomad/schema.py index 81220544d..999273e0f 100644 --- a/src/pynxtools/nomad/schema.py +++ b/src/pynxtools/nomad/schema.py @@ -26,7 +26,7 @@ # noinspection PyPep8Naming import xml.etree.ElementTree as ET -from typing import Any, Dict, List, Optional, Tuple, Union +from typing import Any, Optional, Union import h5py import numpy as np @@ -107,7 +107,7 @@ # TO DO the validation still show some problems. Most notably there are a few higher # dimensional fields with non number types, which the metainfo does not support -section_definitions: Dict[str, Section] = dict() +section_definitions: dict[str, Section] = dict() logger_ = get_logger(__name__) @@ -301,7 +301,7 @@ def normalize(self, archive, logger): act_array.extend(new_items) -BASESECTIONS_MAP: Dict[str, Any] = { +BASESECTIONS_MAP: dict[str, Any] = { "NXfabrication": [basesections.Instrument], "NXsample": [CompositeSystem], "NXsample_component": [Component], @@ -313,7 +313,7 @@ def normalize(self, archive, logger): VALIDATE = False -XML_PARENT_MAP: Dict[ET.Element, ET.Element] +XML_PARENT_MAP: dict[ET.Element, ET.Element] PACKAGE_NAME = "pynxtools.nomad.schema" @@ -481,8 +481,8 @@ def nxdata_ensure_definition( if isinstance(def_or_name, str): # check enums for or actual values of signals and axes # TODO: also check symbol table dimensions - acceptable_data: List[str] = [] - acceptable_axes: List[str] = [] + acceptable_data: list[str] = [] + acceptable_axes: list[str] = [] # set filter string according chk_name = def_or_name.split("_errors")[0] if chk_name in acceptable_data: @@ -528,7 +528,7 @@ class nexus definition. return section -def _get_enumeration(xml_node: ET.Element) -> Tuple[Optional[MEnum], Optional[bool]]: +def _get_enumeration(xml_node: ET.Element) -> tuple[Optional[MEnum], Optional[bool]]: """ Get the enumeration field from xml node """ @@ -608,7 +608,7 @@ def _create_attributes( nx_enum, nx_enum_open = _get_enumeration(attribute) if nx_enum and not nx_enum_open: nx_type = nx_enum - nx_shape: List[str] = [] + nx_shape: list[str] = [] else: nx_type = get_nx_type(attribute.get("type", "NX_CHAR")) # type: ignore has_bound = False @@ -1417,7 +1417,7 @@ def build_nomad_topology(archive): build_nomad_topology(archive) -NORMALIZER_MAP: Dict[str, Any] = { +NORMALIZER_MAP: dict[str, Any] = { _rename_nx_for_nomad("NXfabrication"): normalize_fabrication, _rename_nx_for_nomad("NXsample"): normalize_sample, _rename_nx_for_nomad("NXsample_component"): normalize_sample_component, diff --git a/src/pynxtools/nomad/utils.py b/src/pynxtools/nomad/utils.py index bbfceddaf..ba45c4dbf 100644 --- a/src/pynxtools/nomad/utils.py +++ b/src/pynxtools/nomad/utils.py @@ -16,7 +16,7 @@ # limitations under the License. # -from typing import Dict, Optional +from typing import Optional import numpy as np @@ -55,7 +55,7 @@ "NX_CHAR_OR_NUMBER": m_float64, # TODO: fix this mapping } -FIELD_STATISTICS: Dict[str, list] = { +FIELD_STATISTICS: dict[str, list] = { "suffix": ["__mean", "__std", "__min", "__max", "__size", "__ndim"], "function": [np.mean, np.std, np.min, np.max, np.size, np.ndim], "type": [np.float64, np.float64, None, None, np.int32, np.int32], diff --git a/src/pynxtools/testing/nexus_conversion.py b/src/pynxtools/testing/nexus_conversion.py index d1509f975..3fc02c721 100644 --- a/src/pynxtools/testing/nexus_conversion.py +++ b/src/pynxtools/testing/nexus_conversion.py @@ -20,7 +20,7 @@ import logging import os from glob import glob -from typing import Dict, List, Literal, Optional, Tuple +from typing import Literal, Optional try: from nomad.client import parse @@ -78,7 +78,7 @@ def __init__( Pytest fixture variable, used to clean up the files generated during the test. caplog : _pytest.logging.LogCaptureFixture Pytest fixture variable, used to capture the log messages during the test. - kwargs : Dict[str, Any] + kwargs : dict[str, Any] Any additional keyword arguments to be passed to the readers' read function. """ @@ -176,15 +176,15 @@ def convert_to_nexus( def check_reproducibility_of_nexus(self, **kwargs): """Reproducibility test for the generated nexus file.""" - reader_ignore_lines: List[str] = kwargs.get("ignore_lines", []) - reader_ignore_sections: Dict[str, List[str]] = kwargs.get("ignore_sections", {}) + reader_ignore_lines: list[str] = kwargs.get("ignore_lines", []) + reader_ignore_sections: dict[str, list[str]] = kwargs.get("ignore_sections", {}) - IGNORE_LINES: List[str] = reader_ignore_lines + [ + IGNORE_LINES: list[str] = reader_ignore_lines + [ "DEBUG - value: v", "DEBUG - value: https://github.com/FAIRmat-NFDI/nexus_definitions/blob/", "DEBUG - ===== GROUP (// [NXroot::]):", ] - IGNORE_SECTIONS: Dict[str, List[str]] = { + IGNORE_SECTIONS: dict[str, list[str]] = { **reader_ignore_sections, "ATTRS (//@HDF5_Version)": ["DEBUG - value:"], "ATTRS (//@file_name)": ["DEBUG - value:"], @@ -195,7 +195,7 @@ def check_reproducibility_of_nexus(self, **kwargs): SECTION_SEPARATOR = "DEBUG - ===== " - def should_skip_line(gen_l: str, ref_l: str, ignore_lines: List[str]) -> bool: + def should_skip_line(gen_l: str, ref_l: str, ignore_lines: list[str]) -> bool: """Check if both lines start with any ignored prefix.""" return any( gen_l.startswith(ignore) and ref_l.startswith(ignore) @@ -204,14 +204,15 @@ def should_skip_line(gen_l: str, ref_l: str, ignore_lines: List[str]) -> bool: def load_logs( gen_log_path: str, ref_log_path: str - ) -> Tuple[List[str], List[str]]: + ) -> tuple[list[str], list[str]]: """Load log files and return their contents as lists of lines.""" - with open(gen_log_path, encoding="utf-8") as gen, open( - ref_log_path, encoding="utf-8" - ) as ref: + with ( + open(gen_log_path, encoding="utf-8") as gen, + open(ref_log_path, encoding="utf-8") as ref, + ): return gen.readlines(), ref.readlines() - def compare_logs(gen_lines: List[str], ref_lines: List[str]) -> None: + def compare_logs(gen_lines: list[str], ref_lines: list[str]) -> None: """Compare log lines, ignoring specific differences.""" if len(gen_lines) != len(ref_lines): raise AssertionError( diff --git a/src/pynxtools/testing/nomad_example.py b/src/pynxtools/testing/nomad_example.py index bdc01e0d5..a9fa58b5a 100644 --- a/src/pynxtools/testing/nomad_example.py +++ b/src/pynxtools/testing/nomad_example.py @@ -19,7 +19,7 @@ import os import tempfile -from typing import Any, Dict, List +from typing import Any import pytest @@ -68,14 +68,14 @@ def get_file_parameter(example_path: str): yield pytest.param(os.path.join(root, file), id=file) -def parse_nomad_examples(mainfile: str) -> Dict[str, Any]: +def parse_nomad_examples(mainfile: str) -> dict[str, Any]: """Parse a NOMAD example file and return its dictionary representation. Args: mainfile (str): The path to the NOMAD example file to be parsed. Returns: - Dict[str, Any]: A dictionary representation of the parsed NOMAD example. + dict[str, Any]: A dictionary representation of the parsed NOMAD example. Raises: FileNotFoundError: If the mainfile does not exist. diff --git a/tests/dataconverter/test_nexus_tree.py b/tests/dataconverter/test_nexus_tree.py index 6af67b7ad..67797c46c 100644 --- a/tests/dataconverter/test_nexus_tree.py +++ b/tests/dataconverter/test_nexus_tree.py @@ -1,4 +1,4 @@ -from typing import Any, List, Tuple, get_args +from typing import Any, get_args from anytree import Resolver @@ -40,7 +40,7 @@ def test_correct_extension_of_tree(): nxtest = generate_tree_from("NXtest") nxtest_extended = generate_tree_from("NXtest_extended") - def get_node_fields(tree: NexusNode) -> List[Tuple[str, Any]]: + def get_node_fields(tree: NexusNode) -> list[tuple[str, Any]]: return list( filter( lambda x: not x[0].startswith("_") diff --git a/tests/dataconverter/test_readers.py b/tests/dataconverter/test_readers.py index 91dc9344c..69c4fbfbe 100644 --- a/tests/dataconverter/test_readers.py +++ b/tests/dataconverter/test_readers.py @@ -21,7 +21,6 @@ import logging import os import xml.etree.ElementTree as ET -from typing import List import pytest from _pytest.mark.structures import ParameterSet @@ -43,12 +42,12 @@ def get_reader_name_from_reader_object(reader) -> str: return "" -def get_readers_file_names() -> List[str]: +def get_readers_file_names() -> list[str]: """Helper function to parametrize paths of all the reader Python files""" return sorted(glob.glob("pynxtools/dataconverter/readers/*/reader.py")) -def get_all_readers() -> List[ParameterSet]: +def get_all_readers() -> list[ParameterSet]: """Scans through the reader list and returns them for pytest parametrization""" readers = [] diff --git a/tests/eln_mapper/test_eln_mapper.py b/tests/eln_mapper/test_eln_mapper.py index d23a661df..d490fd830 100644 --- a/tests/eln_mapper/test_eln_mapper.py +++ b/tests/eln_mapper/test_eln_mapper.py @@ -18,7 +18,6 @@ # import os -from typing import Dict, List, Tuple import yaml from click import testing @@ -26,19 +25,19 @@ from pynxtools.eln_mapper import eln_mapper -def check_keys_from_two_dict(dict1: Dict, dict2: Dict, path: str = ""): +def check_keys_from_two_dict(dict1: dict, dict2: dict, path: str = ""): """Compare keys of two dicts and report all differences. Parameters ---------- - dict1 : Dict + dict1 : dict First dictionary to compare. - dict2 : Dict + dict2 : dict Second dictionary to compare. path : str, optional Current key path being checked (used for recursive calls). """ - differences: List[Tuple[str, str]] = [] + differences: list[tuple[str, str]] = [] keys1 = set(dict1.keys()) keys2 = set(dict2.keys()) From 1bfa08880d0fb2813759a669ee5d230426bb8e7b Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Tue, 27 May 2025 23:22:59 +0200 Subject: [PATCH 092/118] add unit registry --- .gitignore | 2 + src/pynxtools/units/__init__.py | 129 ++++++ src/pynxtools/units/constants_en.txt | 72 ++++ src/pynxtools/units/default_en.txt | 597 +++++++++++++++++++++++++++ 4 files changed, 800 insertions(+) create mode 100644 src/pynxtools/units/__init__.py create mode 100644 src/pynxtools/units/constants_en.txt create mode 100644 src/pynxtools/units/default_en.txt diff --git a/.gitignore b/.gitignore index 3ec5e7e5f..ebdaf8b0c 100644 --- a/.gitignore +++ b/.gitignore @@ -203,6 +203,8 @@ cython_debug/ !mkdocs-requirements.txt !src/pynxtools/nexus-version.txt !src/pynxtools/remote_definitions_url.txt +!src/pynxtools/units/constants_en.txt +!src/pynxtools/units/default_en.txt build/ nexusparser.egg-info/PKG-INFO .python-version diff --git a/src/pynxtools/units/__init__.py b/src/pynxtools/units/__init__.py new file mode 100644 index 000000000..33c36d044 --- /dev/null +++ b/src/pynxtools/units/__init__.py @@ -0,0 +1,129 @@ +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""A unit registry for NeXus units""" + +import os +from typing import Optional, Dict, Any +from pint import UnitRegistry +from pint.errors import UndefinedUnitError, DefinitionSyntaxError + +try: + from nomad.units import ureg +except ImportError as exc: + ureg = UnitRegistry(os.path.join(os.path.dirname(__file__), "default_en.txt")) + + +class NXUnitSet: + """ + Maps from `NX_` tokens to dimensionality. + + - None -> disables dimensionality check + - '1' -> dimensionless quantities + - 'transformation' -> specially handled elsewhere + """ + + mapping: Dict[str, Optional[str]] = { + "NX_ANGLE": "[angle]", + "NX_ANY": None, + "NX_AREA": "[area]", + "NX_CHARGE": "[charge]", + "NX_COUNT": "1", + "NX_CROSS_SECTION": "[area]", + "NX_CURRENT": "[current]", + "NX_DIMENSIONLESS": "1", + "NX_EMITTANCE": "[length] * [angle]", + "NX_ENERGY": "[energy]", + "NX_FLUX": "1 / [time] / [area]", + "NX_FREQUENCY": "[frequency]", + "NX_LENGTH": "[length]", + "NX_MASS": "[mass]", + "NX_MASS_DENSITY": "[mass] / [volume]", + "NX_MOLECULAR_WEIGHT": "[mass] / [substance]", + "NX_PERIOD": "[time]", + "NX_PER_AREA": "1 / [area]", + "NX_PER_LENGTH": "1 / [length]", + "NX_POWER": "[power]", + "NX_PRESSURE": "[pressure]", + "NX_PULSES": "1", + "NX_SCATTERING_LENGTH_DENSITY": "1 / [area]", + "NX_SOLID_ANGLE": "[angle] * [angle]", + "NX_TEMPERATURE": "[temperature]", + "NX_TIME": "[time]", + "NX_TIME_OF_FLIGHT": "[time]", + "NX_TRANSFORMATION": "transformation", + "NX_UNITLESS": "1", + "NX_VOLTAGE": "[energy] / [current] / [time]", + "NX_VOLUME": "[volume]", + "NX_WAVELENGTH": "[length]", + "NX_WAVENUMBER": "1 / [length]", + } + + _dimensionalities: Dict[str, Optional[Any]] = {} + + @staticmethod + def normalize(value: str) -> str: + """Normalize the given token to 'NX_' prefix form.""" + value = value.upper() + if not value.startswith("NX_"): + value = "NX_" + value + return value + + @classmethod + def is_nx_token(cls, value: str) -> bool: + """Check if a given token is one of the known NX tokens.""" + return cls.normalize(value) in cls.mapping + + @classmethod + def get_dimensionality(cls, token: str) -> Optional[Any]: + """Get the dimensionality object for a given NX token.""" + token = cls.normalize(token) + if token in cls._dimensionalities: + return cls._dimensionalities[token] + + definition = cls.mapping.get(token) + if definition is None or definition == "transformation": + cls._dimensionalities[token] = None + elif definition == "1": + cls._dimensionalities[token] = ureg("").dimensionality + else: + try: + cls._dimensionalities[token] = ureg.get_dimensionality(definition) + except (UndefinedUnitError, DefinitionSyntaxError) as e: + cls._dimensionalities[token] = None + + return cls._dimensionalities[token] + + @classmethod + def matches(cls, expected_token: str, actual_unit: str) -> bool: + """Check whether the actual unit matches the expected NX token by comparing dimensionalities.""" + if expected_token in ["NX_ANY", "NX_UNITLESS"]: + return True + + expected_dim = cls.get_dimensionality(expected_token) + if expected_dim is None: + return True + + if expected_dim is "dimensionless" and actual_unit: + return False + + try: + actual_dim = (1 * ureg(actual_unit)).dimensionality + except (UndefinedUnitError, DefinitionSyntaxError): + return False + + return actual_dim == expected_dim diff --git a/src/pynxtools/units/constants_en.txt b/src/pynxtools/units/constants_en.txt new file mode 100644 index 000000000..19ad0379c --- /dev/null +++ b/src/pynxtools/units/constants_en.txt @@ -0,0 +1,72 @@ +# Default Pint constants definition file +# Based on the International System of Units +# Language: english +# Source: https://physics.nist.gov/cuu/Constants/ +# https://physics.nist.gov/PhysRefData/XrayTrans/Html/search.html +# :copyright: 2013,2019 by Pint Authors, see AUTHORS for more details. + +#### MATHEMATICAL CONSTANTS #### +# As computed by Maxima with fpprec:50 + +pi = 3.1415926535897932384626433832795028841971693993751 = π # pi +tansec = 4.8481368111333441675396429478852851658848753880815e-6 # tangent of 1 arc-second ~ arc_second/radian +ln10 = 2.3025850929940456840179914546843642076011014886288 # natural logarithm of 10 +wien_x = 4.9651142317442763036987591313228939440555849867973 # solution to (x-5)*exp(x)+5 = 0 => x = W(5/exp(5))+5 +wien_u = 2.8214393721220788934031913302944851953458817440731 # solution to (u-3)*exp(u)+3 = 0 => u = W(3/exp(3))+3 + +#### DEFINED EXACT CONSTANTS #### + +speed_of_light = 299792458 m/s = c = c_0 # since 1983 +planck_constant = 6.62607015e-34 J s = h # since May 2019 +elementary_charge = 1.602176634e-19 C = e # since May 2019 +avogadro_number = 6.02214076e23 # since May 2019 +boltzmann_constant = 1.380649e-23 J K^-1 = k = k_B # since May 2019 +standard_gravity = 9.80665 m/s^2 = g_0 = g0 = g_n = gravity # since 1901 +standard_atmosphere = 1.01325e5 Pa = atm = atmosphere # since 1954 +conventional_josephson_constant = 4.835979e14 Hz / V = K_J90 # since Jan 1990 +conventional_von_klitzing_constant = 2.5812807e4 ohm = R_K90 # since Jan 1990 + +#### DERIVED EXACT CONSTANTS #### +# Floating-point conversion may introduce inaccuracies + +zeta = c / (cm/s) = ζ +dirac_constant = h / (2 * π) = ħ = h_bar = atomic_unit_of_action = a_u_action +avogadro_constant = avogadro_number * mol^-1 = N_A +molar_gas_constant = k * N_A = R +faraday_constant = e * N_A +conductance_quantum = 2 * e ** 2 / h = G_0 +magnetic_flux_quantum = h / (2 * e) = Φ_0 = Phi_0 +josephson_constant = 2 * e / h = K_J +von_klitzing_constant = h / e ** 2 = R_K +stefan_boltzmann_constant = 2 / 15 * π ** 5 * k ** 4 / (h ** 3 * c ** 2) = σ = sigma +first_radiation_constant = 2 * π * h * c ** 2 = c_1 +second_radiation_constant = h * c / k = c_2 +wien_wavelength_displacement_law_constant = h * c / (k * wien_x) +wien_frequency_displacement_law_constant = wien_u * k / h + +#### MEASURED CONSTANTS #### +# Recommended CODATA-2018 values +# To some extent, what is measured and what is derived is a bit arbitrary. +# The choice of measured constants is based on convenience and on available uncertainty. +# The uncertainty in the last significant digits is given in parentheses as a comment. + +newtonian_constant_of_gravitation = 6.67430e-11 m^3/(kg s^2) = _ = gravitational_constant # (15) +rydberg_constant = 1.0973731568160e7 * m^-1 = R_∞ = R_inf # (21) +electron_g_factor = -2.00231930436256 = g_e # (35) +atomic_mass_constant = 1.66053906660e-27 kg = m_u # (50) +electron_mass = 9.1093837015e-31 kg = m_e = atomic_unit_of_mass = a_u_mass # (28) +proton_mass = 1.67262192369e-27 kg = m_p # (51) +neutron_mass = 1.67492749804e-27 kg = m_n # (95) +K_alpha_Cu_d_220 = 0.80232719 # (22) +K_alpha_Mo_d_220 = 0.36940604 # (19) +K_alpha_W_d_220 = 0.108852175 # (98) + +#### DERIVED CONSTANTS #### + +fine_structure_constant = (2 * h * R_inf / (m_e * c)) ** 0.5 = α = alpha +vacuum_permeability = 2 * α * h / (e ** 2 * c) = µ_0 = mu_0 = mu0 = magnetic_constant +vacuum_permittivity = e ** 2 / (2 * α * h * c) = ε_0 = epsilon_0 = eps_0 = eps0 = electric_constant +impedance_of_free_space = 2 * α * h / e ** 2 = Z_0 = characteristic_impedance_of_vacuum +coulomb_constant = α * h_bar * c / e ** 2 = k_C +classical_electron_radius = α * h_bar / (m_e * c) = r_e +thomson_cross_section = 8 / 3 * π * r_e ** 2 = σ_e = sigma_e \ No newline at end of file diff --git a/src/pynxtools/units/default_en.txt b/src/pynxtools/units/default_en.txt new file mode 100644 index 000000000..0c07409b9 --- /dev/null +++ b/src/pynxtools/units/default_en.txt @@ -0,0 +1,597 @@ +# Default Pint units definition file +# Based on the International System of Units +# Language: english +# :copyright: 2013,2019 by Pint Authors, see AUTHORS for more details. + +# Syntax +# ====== +# Units +# ----- +# = [= ] [= ] [ = ] [...] +# +# The canonical name and aliases should be expressed in singular form. +# Pint automatically deals with plurals built by adding 's' to the singular form; plural +# forms that don't follow this rule should be instead explicitly listed as aliases. +# +# If a unit has no symbol and one wants to define aliases, then the symbol should be +# conventionally set to _. +# +# Example: +# millennium = 1e3 * year = _ = millennia +# +# +# Prefixes +# -------- +# - = [= ] [= ] [ = ] [...] +# +# Example: +# deca- = 1e+1 = da- = deka- +# +# +# Derived dimensions +# ------------------ +# [dimension name] = +# +# Example: +# [density] = [mass] / [volume] +# +# Note that primary dimensions don't need to be declared; they can be +# defined for the first time in a unit definition. +# E.g. see below `meter = [length]` +# +# +# Additional aliases +# ------------------ +# @alias = [ = ] [...] +# +# Used to add aliases to already existing unit definitions. +# Particularly useful when one wants to enrich definitions +# from defaults_en.txt with custom aliases. +# +# Example: +# @alias meter = my_meter + +# See also: https://pint.readthedocs.io/en/latest/defining.html + +@defaults + group = international + system = mks +@end + + +#### PREFIXES #### + +# decimal prefixes +yocto- = 1e-24 = y- +zepto- = 1e-21 = z- +atto- = 1e-18 = a- +femto- = 1e-15 = f- +pico- = 1e-12 = p- +nano- = 1e-9 = n- +micro- = 1e-6 = µ- = u- +milli- = 1e-3 = m- +centi- = 1e-2 = c- +deci- = 1e-1 = d- +deca- = 1e+1 = da- = deka- +hecto- = 1e2 = h- +kilo- = 1e3 = k- +mega- = 1e6 = M- +giga- = 1e9 = G- +tera- = 1e12 = T- +peta- = 1e15 = P- +exa- = 1e18 = E- +zetta- = 1e21 = Z- +yotta- = 1e24 = Y- + +# binary_prefixes +kibi- = 2**10 = Ki- +mebi- = 2**20 = Mi- +gibi- = 2**30 = Gi- +tebi- = 2**40 = Ti- +pebi- = 2**50 = Pi- +exbi- = 2**60 = Ei- +zebi- = 2**70 = Zi- +yobi- = 2**80 = Yi- + +#### BASE UNITS #### + +meter = [length] = m = metre +second = [time] = s = sec +ampere = [current] = A = amp +candela = [luminosity] = cd = candle +gram = [mass] = g +mole = [substance] = mol +kelvin = [temperature]; offset: 0 = K = degK = °K = degree_Kelvin = degreeK # older names supported for compatibility +radian = [angle] = rad +bit = [information] +pixel = [digital_image_resolution] = px = pel + + +#### CONSTANTS #### + +@import constants_en.txt + + +#### UNITS #### +# Common and less common, grouped by quantity. +# Conversion factors are exact (except when noted), +# although floating-point conversion may introduce inaccuracies + +# Angle +degree = π / 180 * radian = ° = deg = arcdeg = arcdegree = angular_degree +arcminute = degree / 60 = arcmin = arc_minute = angular_minute +arcsecond = arcminute / 60 = arcsec = arc_second = angular_second +milliarcsecond = 1e-3 * arcsecond = mas +grade = π / 200 * radian = grad = gon +mil = π / 32000 * radian + +# Solid angle +steradian = radian ** 2 = sr +square_degree = (π / 180) ** 2 * sr = sq_deg = sqdeg + +# Information +byte = 8 * bit = B = octet +baud = bit / second = Bd = bps + +# Length +angstrom = 1e-10 * meter = Å = ångström = Å +micron = micrometer = µ +fermi = femtometer +atomic_unit_of_length = h_bar / (alpha * m_e * c) = bohr = a_0 = a0 = bohr_radius = a_u_length +planck_length = (h_bar * gravitational_constant / c ** 3) ** 0.5 + +# Mass +metric_ton = 1e3 * kilogram = tonne +unified_atomic_mass_unit = atomic_mass_constant = u = amu +dalton = atomic_mass_constant = Da +grain = 64.79891 * milligram = gr +gamma_mass = microgram +carat = 200 * milligram = ct = karat +planck_mass = (h_bar * c / gravitational_constant) ** 0.5 + +# Time +minute = 60 * second +hour = 60 * minute = hr +atomic_unit_of_time = h_bar / E_h = a_u_time +planck_time = (h_bar * gravitational_constant / c ** 5) ** 0.5 + +# Temperature +degree_Celsius = kelvin; offset: 273.15 = °C = celsius = degC = degreeC +degree_Rankine = 5 / 9 * kelvin; offset: 0 = °R = rankine = degR = degreeR +degree_Fahrenheit = 5 / 9 * kelvin; offset: 233.15 + 200 / 9 = °F = fahrenheit = degF = degreeF +degree_Reaumur = 4 / 5 * kelvin; offset: 273.15 = °Re = reaumur = degRe = degreeRe = degree_Réaumur = réaumur +atomic_unit_of_temperature = E_h / k = a_u_temp +planck_temperature = (h_bar * c ** 5 / gravitational_constant / k ** 2) ** 0.5 + +# Area +[area] = [length] ** 2 +barn = 1e-28 * meter ** 2 = b +darcy = centipoise * centimeter ** 2 / (second * atmosphere) + +# Volume +[volume] = [length] ** 3 +liter = decimeter ** 3 = l = L = litre +lambda = microliter = λ + +# Frequency +[frequency] = 1 / [time] +hertz = 1 / second = Hz +revolutions_per_minute = 1 / minute = rpm +revolutions_per_second = 1 / second = rps +counts_per_second = 1 / second = cps + +# Wavenumber +[wavenumber] = 1 / [length] +reciprocal_centimeter = 1 / cm = cm_1 = kayser + +# Speed +[speed] = [length] / [time] +mile_per_hour = mile / hour = mph = MPH +kilometer_per_hour = kilometer / hour = kph = KPH +kilometer_per_second = kilometer / second = kps +meter_per_second = meter / second = mps +foot_per_second = foot / second = fps + +# Acceleration +[acceleration] = [speed] / [time] +galileo = centimeter / second ** 2 = Gal + +# Force +[force] = [mass] * [acceleration] +newton = kilogram * meter / second ** 2 = N +dyne = gram * centimeter / second ** 2 = dyn +force_kilogram = g_0 * kilogram = pond +force_gram = g_0 * gram = gf = gram_force +force_metric_ton = g_0 * metric_ton = tf = metric_ton_force = force_t = t_force +atomic_unit_of_force = E_h / a_0 = a_u_force + +# Energy +[energy] = [force] * [length] +joule = newton * meter = J +erg = dyne * centimeter +watt_hour = watt * hour = Wh = watthour +electron_volt = e * volt = eV +rydberg = h * c * R_inf = Ry +hartree = 2 * rydberg = Ha = E_h = hartree_energy = atomic_unit_of_energy = a_u_energy +calorie = 4.184 * joule = cal = thermochemical_calorie = cal_th +international_calorie = 4.1868 * joule = cal_it = international_steam_table_calorie +fifteen_degree_calorie = 4.1855 * joule = cal_15 + +# Power +[power] = [energy] / [time] +watt = joule / second = W +volt_ampere = volt * ampere = VA +horsepower = 550 * foot * force_pound / second = hp = UK_horsepower = hydraulic_horsepower +metric_horsepower = 75 * force_kilogram * meter / second +electrical_horsepower = 746 * watt +standard_liter_per_minute = atmosphere * liter / minute = slpm = slm +conventional_watt_90 = K_J90 ** 2 * R_K90 / (K_J ** 2 * R_K) * watt = W_90 + +# Momentum +[momentum] = [length] * [mass] / [time] + +# Density (as auxiliary for pressure) +[density] = [mass] / [volume] +mercury = 13.5951 * kilogram / liter = Hg = Hg_0C = Hg_32F = conventional_mercury +water = 1.0 * kilogram / liter = H2O = conventional_water +mercury_60F = 13.5568 * kilogram / liter = Hg_60F # approximate +water_39F = 0.999972 * kilogram / liter = water_4C # approximate +water_60F = 0.999001 * kilogram / liter # approximate + +# Pressure +[pressure] = [force] / [area] +pascal = newton / meter ** 2 = Pa +barye = dyne / centimeter ** 2 = Ba = barie = barad = barrie = baryd +bar = 1e5 * pascal +torr = atm / 760 +pound_force_per_square_inch = force_pound / inch ** 2 = psi +kip_per_square_inch = kip / inch ** 2 = ksi +millimeter_Hg = millimeter * Hg * g_0 = mmHg = mm_Hg = millimeter_Hg_0C +centimeter_Hg = centimeter * Hg * g_0 = cmHg = cm_Hg = centimeter_Hg_0C +inch_Hg = inch * Hg * g_0 = inHg = in_Hg = inch_Hg_32F +inch_Hg_60F = inch * Hg_60F * g_0 +inch_H2O_39F = inch * water_39F * g_0 +inch_H2O_60F = inch * water_60F * g_0 +foot_H2O = foot * water * g_0 = ftH2O = feet_H2O +centimeter_H2O = centimeter * water * g_0 = cmH2O = cm_H2O +atomic_unit_of_pressure = E_h / bohr_radius ** 3 = a_u_pressure + +# Viscosity +[viscosity] = [pressure] * [time] +poise = 0.1 * Pa * second = P +reyn = psi * second + +# Kinematic viscosity +[kinematic_viscosity] = [area] / [time] +stokes = centimeter ** 2 / second = St + +# Fluidity +[fluidity] = 1 / [viscosity] +rhe = 1 / poise + +# Amount of substance +particle = 1 / N_A = _ = molec = molecule + +# Concentration +[concentration] = [substance] / [volume] +molar = mole / liter = M + +# Catalytic activity +[activity] = [substance] / [time] +katal = mole / second = kat +enzyme_unit = micromole / minute = U = enzymeunit + +# Entropy +[entropy] = [energy] / [temperature] +clausius = calorie / kelvin = Cl + +# Molar entropy +[molar_entropy] = [entropy] / [substance] +entropy_unit = calorie / kelvin / mole = eu + +# Radiation +becquerel = counts_per_second = Bq +curie = 3.7e10 * becquerel = Ci +rutherford = 1e6 * becquerel = Rd +gray = joule / kilogram = Gy +sievert = joule / kilogram = Sv +rem = 0.01 * sievert +roentgen = 2.58e-4 * coulomb / kilogram = _ = röntgen # approximate, depends on medium + +# Luminance +[luminance] = [luminosity] / [area] +nit = candela / meter ** 2 +stilb = candela / centimeter ** 2 +lambert = 1 / π * candela / centimeter ** 2 + +# Luminous flux +[luminous_flux] = [luminosity] * [angle] ** 2 +lumen = candela * steradian = lm + +# Illuminance +[illuminance] = [luminous_flux] / [area] +lux = lumen / meter ** 2 = lx + +# Intensity +[intensity] = [power] / [area] +atomic_unit_of_intensity = 0.5 * ε_0 * c * atomic_unit_of_electric_field ** 2 = a_u_intensity + +# Current +biot = 10 * ampere = Bi +abampere = biot = abA +atomic_unit_of_current = e / atomic_unit_of_time = a_u_current +mean_international_ampere = mean_international_volt / mean_international_ohm = A_it +US_international_ampere = US_international_volt / US_international_ohm = A_US +conventional_ampere_90 = K_J90 * R_K90 / (K_J * R_K) * ampere = A_90 +planck_current = (c ** 6 / gravitational_constant / k_C) ** 0.5 + +# Charge +[charge] = [current] * [time] +coulomb = ampere * second = C +abcoulomb = 10 * C = abC +faraday = e * N_A * mole +conventional_coulomb_90 = K_J90 * R_K90 / (K_J * R_K) * coulomb = C_90 + +# Electric potential +[electric_potential] = [energy] / [charge] +volt = joule / coulomb = V +abvolt = 1e-8 * volt = abV +mean_international_volt = 1.00034 * volt = V_it # approximate +US_international_volt = 1.00033 * volt = V_US # approximate +conventional_volt_90 = K_J90 / K_J * volt = V_90 + +# Electric field +[electric_field] = [electric_potential] / [length] +atomic_unit_of_electric_field = e * k_C / a_0 ** 2 = a_u_electric_field + +# Electric displacement field +[electric_displacement_field] = [charge] / [area] + +# Resistance +[resistance] = [electric_potential] / [current] +ohm = volt / ampere = Ω +abohm = 1e-9 * ohm = abΩ +mean_international_ohm = 1.00049 * ohm = Ω_it = ohm_it # approximate +US_international_ohm = 1.000495 * ohm = Ω_US = ohm_US # approximate +conventional_ohm_90 = R_K / R_K90 * ohm = Ω_90 = ohm_90 + +# Resistivity +[resistivity] = [resistance] * [length] + +# Conductance +[conductance] = [current] / [electric_potential] +siemens = ampere / volt = S = mho +absiemens = 1e9 * siemens = abS = abmho + +# Capacitance +[capacitance] = [charge] / [electric_potential] +farad = coulomb / volt = F +abfarad = 1e9 * farad = abF +conventional_farad_90 = R_K90 / R_K * farad = F_90 + +# Inductance +[inductance] = [magnetic_flux] / [current] +henry = weber / ampere = H +abhenry = 1e-9 * henry = abH +conventional_henry_90 = R_K / R_K90 * henry = H_90 + +# Magnetic flux +[magnetic_flux] = [electric_potential] * [time] +weber = volt * second = Wb +unit_pole = µ_0 * biot * centimeter + +# Magnetic field +[magnetic_field] = [magnetic_flux] / [area] +tesla = weber / meter ** 2 = T +gamma = 1e-9 * tesla = γ +gauss = 1e-4 * tesla = G + +# Magnetic field strength +[magnetic_field_strength] = [current] / [length] + +# Electric dipole moment +[electric_dipole] = [charge] * [length] +debye = 1e-9 / ζ * coulomb * angstrom = D # formally 1 D = 1e-10 Fr*Å, but we generally want to use it outside the Gaussian context + +# Electric quadrupole moment +[electric_quadrupole] = [charge] * [area] +buckingham = debye * angstrom + +# Magnetic dipole moment +[magnetic_dipole] = [current] * [area] +bohr_magneton = e * h_bar / (2 * m_e) = µ_B = mu_B +nuclear_magneton = e * h_bar / (2 * m_p) = µ_N = mu_N + +# Pixel density +[pixel_density] = [digital_image_resolution] / [length] +pixels_per_inch = px / inch = PPI = ppi +pixels_per_centimeter = px / cm = PPCM = ppcm + +#### UNIT GROUPS #### +# Mostly for length, area, volume, mass, force +# (customary or specialized units) + +@group USCSLengthInternational + thou = 1e-3 * inch = th = mil_length + inch = yard / 36 = in = international_inch = inches = international_inches + hand = 4 * inch + foot = yard / 3 = ft = international_foot = feet = international_feet + yard = 0.9144 * meter = yd = international_yard # since Jul 1959 + mile = 1760 * yard = mi = international_mile + + square_inch = inch ** 2 = sq_in = square_inches + square_foot = foot ** 2 = sq_ft = square_feet + square_yard = yard ** 2 = sq_yd + square_mile = mile ** 2 = sq_mi + + cubic_inch = in ** 3 = cu_in + cubic_foot = ft ** 3 = cu_ft = cubic_feet + cubic_yard = yd ** 3 = cu_yd +@end + +@group USCSLengthSurvey + link = 1e-2 * chain = li = survey_link + survey_foot = 1200 / 3937 * meter = sft + fathom = 6 * survey_foot + rod = 16.5 * survey_foot = rd = pole = perch + chain = 4 * rod + furlong = 40 * rod = fur + cables_length = 120 * fathom + survey_mile = 5280 * survey_foot = smi = us_statute_mile + league = 3 * survey_mile + + square_rod = rod ** 2 = sq_rod = sq_pole = sq_perch + acre = 10 * chain ** 2 + square_survey_mile = survey_mile ** 2 = _ = section + square_league = league ** 2 + + acre_foot = acre * survey_foot = _ = acre_feet +@end + +@group USCSLiquidVolume + minim = pint / 7680 + fluid_dram = pint / 128 = fldr = fluidram = US_fluid_dram = US_liquid_dram + fluid_ounce = pint / 16 = floz = US_fluid_ounce = US_liquid_ounce + gill = pint / 4 = gi = liquid_gill = US_liquid_gill + pint = quart / 2 = liquid_pint = US_pint + fifth = gallon / 5 = _ = US_liquid_fifth + quart = gallon / 4 = qt = liquid_quart = US_liquid_quart + gallon = 231 * cubic_inch = gal = liquid_gallon = US_liquid_gallon +@end + +@group Avoirdupois + dram = pound / 256 = dr = avoirdupois_dram = avdp_dram = drachm + ounce = pound / 16 = oz = avoirdupois_ounce = avdp_ounce + pound = 7e3 * grain = lb = avoirdupois_pound = avdp_pound + stone = 14 * pound + quarter = 28 * stone + bag = 94 * pound + hundredweight = 100 * pound = cwt = short_hundredweight + long_hundredweight = 112 * pound + ton = 2e3 * pound = _ = short_ton + long_ton = 2240 * pound + slug = g_0 * pound * second ** 2 / foot + slinch = g_0 * pound * second ** 2 / inch = blob = slugette + + force_ounce = g_0 * ounce = ozf = ounce_force + force_pound = g_0 * pound = lbf = pound_force + force_ton = g_0 * ton = _ = ton_force = force_short_ton = short_ton_force + force_long_ton = g_0 * long_ton = _ = long_ton_force + kip = 1e3 * force_pound + poundal = pound * foot / second ** 2 = pdl +@end + +@group AvoirdupoisUK using Avoirdupois + UK_hundredweight = long_hundredweight = UK_cwt + UK_ton = long_ton + UK_force_ton = force_long_ton = _ = UK_ton_force +@end + +@group AvoirdupoisUS using Avoirdupois + US_hundredweight = hundredweight = US_cwt + US_ton = ton + US_force_ton = force_ton = _ = US_ton_force +@end + +@group Troy + pennyweight = 24 * grain = dwt + troy_ounce = 480 * grain = toz = ozt + troy_pound = 12 * troy_ounce = tlb = lbt +@end + +@group ImperialVolume + imperial_minim = imperial_fluid_ounce / 480 + imperial_fluid_scruple = imperial_fluid_ounce / 24 + imperial_fluid_drachm = imperial_fluid_ounce / 8 = imperial_fldr = imperial_fluid_dram + imperial_fluid_ounce = imperial_pint / 20 = imperial_floz = UK_fluid_ounce + imperial_gill = imperial_pint / 4 = imperial_gi = UK_gill + imperial_cup = imperial_pint / 2 = imperial_cp = UK_cup + imperial_pint = imperial_gallon / 8 = imperial_pt = UK_pint + imperial_quart = imperial_gallon / 4 = imperial_qt = UK_quart + imperial_gallon = 4.54609 * liter = imperial_gal = UK_gallon + imperial_peck = 2 * imperial_gallon = imperial_pk = UK_pk + imperial_bushel = 8 * imperial_gallon = imperial_bu = UK_bushel + imperial_barrel = 36 * imperial_gallon = imperial_bbl = UK_bbl +@end + + +#### CONVERSION CONTEXTS #### + +@context(n=1) spectroscopy = sp + # n index of refraction of the medium. + [length] <-> [frequency]: speed_of_light / n / value + [frequency] -> [energy]: planck_constant * value + [energy] -> [frequency]: value / planck_constant + # allow wavenumber / kayser + [wavenumber] <-> [length]: 1 / value +@end + +@context boltzmann + [temperature] -> [energy]: boltzmann_constant * value + [energy] -> [temperature]: value / boltzmann_constant +@end + +@context energy + [energy] -> [energy] / [substance]: value * N_A + [energy] / [substance] -> [energy]: value / N_A + [energy] -> [mass]: value / c ** 2 + [mass] -> [energy]: value * c ** 2 +@end + +@context(mw=0,volume=0,solvent_mass=0) chemistry = chem + # mw is the molecular weight of the species + # volume is the volume of the solution + # solvent_mass is the mass of solvent in the solution + + # moles -> mass require the molecular weight + [substance] -> [mass]: value * mw + [mass] -> [substance]: value / mw + + # moles/volume -> mass/volume and moles/mass -> mass/mass + # require the molecular weight + [substance] / [volume] -> [mass] / [volume]: value * mw + [mass] / [volume] -> [substance] / [volume]: value / mw + [substance] / [mass] -> [mass] / [mass]: value * mw + [mass] / [mass] -> [substance] / [mass]: value / mw + + # moles/volume -> moles requires the solution volume + [substance] / [volume] -> [substance]: value * volume + [substance] -> [substance] / [volume]: value / volume + + # moles/mass -> moles requires the solvent (usually water) mass + [substance] / [mass] -> [substance]: value * solvent_mass + [substance] -> [substance] / [mass]: value / solvent_mass + + # moles/mass -> moles/volume require the solvent mass and the volume + [substance] / [mass] -> [substance]/[volume]: value * solvent_mass / volume + [substance] / [volume] -> [substance] / [mass]: value / solvent_mass * volume + +@end + + +#### SYSTEMS OF UNITS #### + +@system SI + second + meter + kilogram + ampere + kelvin + mole + candela +@end + +@system mks using international + meter + kilogram + second +@end + +@system atomic using international + # based on unit m_e, e, h_bar, k_C, k + bohr: meter + electron_mass: gram + atomic_unit_of_time: second + atomic_unit_of_current: ampere + atomic_unit_of_temperature: kelvin +@end From 9f3a9bd3bb76d5718f428f156b83a39706ec27f4 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Tue, 27 May 2025 23:23:34 +0200 Subject: [PATCH 093/118] test units in validation --- src/pynxtools/dataconverter/helpers.py | 6 ++++ src/pynxtools/dataconverter/nexus_tree.py | 37 ++--------------------- src/pynxtools/dataconverter/validation.py | 35 +++++++++++++++++++-- tests/dataconverter/test_validation.py | 36 +++++++++++++++++++--- 4 files changed, 71 insertions(+), 43 deletions(-) diff --git a/src/pynxtools/dataconverter/helpers.py b/src/pynxtools/dataconverter/helpers.py index 72be2792b..f7d6f99a8 100644 --- a/src/pynxtools/dataconverter/helpers.py +++ b/src/pynxtools/dataconverter/helpers.py @@ -49,6 +49,7 @@ class ValidationProblem(Enum): DifferentVariadicNodesWithTheSameName = auto() UnitWithoutDocumentation = auto() + InvalidUnit = auto() InvalidEnum = auto() OpenEnumWithNewItem = auto() MissingRequiredGroup = auto() @@ -99,6 +100,11 @@ def _log(self, path: str, log_type: ValidationProblem, value: Optional[Any], *ar logger.info( f"The unit, {path} = {value}, is being written but has no documentation." ) + if log_type == ValidationProblem.InvalidUnit: + value = cast(Any, value) + logger.warning( + f"The unit '{args[0]}' at {path} does not match with the unit category {value.unit} of '{value.name}'." + ) elif log_type == ValidationProblem.InvalidEnum: logger.warning( f"The value at {path} should be one of the following: {value}." diff --git a/src/pynxtools/dataconverter/nexus_tree.py b/src/pynxtools/dataconverter/nexus_tree.py index 98f123710..16efd90c0 100644 --- a/src/pynxtools/dataconverter/nexus_tree.py +++ b/src/pynxtools/dataconverter/nexus_tree.py @@ -47,6 +47,7 @@ get_nx_namefit, is_name_type, ) +from pynxtools.units import NXUnitSet NexusType = Literal[ "NX_BINARY", @@ -65,41 +66,7 @@ "NX_UINT", ] -NexusUnitCategory = Literal[ - "NX_ANGLE", - "NX_ANY", - "NX_AREA", - "NX_CHARGE", - "NX_COUNT", - "NX_CROSS_SECTION", - "NX_CURRENT", - "NX_DIMENSIONLESS", - "NX_EMITTANCE", - "NX_ENERGY", - "NX_FLUX", - "NX_FREQUENCY", - "NX_LENGTH", - "NX_MASS", - "NX_MASS_DENSITY", - "NX_MOLECULAR_WEIGHT", - "NX_PERIOD", - "NX_PER_AREA", - "NX_PER_LENGTH", - "NX_POWER", - "NX_PRESSURE", - "NX_PULSES", - "NX_SCATTERING_LENGTH_DENSITY", - "NX_SOLID_ANGLE", - "NX_TEMPERATURE", - "NX_TIME", - "NX_TIME_OF_FLIGHT", - "NX_TRANSFORMATION", - "NX_UNITLESS", - "NX_VOLTAGE", - "NX_VOLUME", - "NX_WAVELENGTH", - "NX_WAVENUMBER", -] +NexusUnitCategory = Literal[tuple(NXUnitSet.mapping.keys())] # This is the NeXus namespace for finding tags. # It's updated from the nxdl file when `generate_tree_from` is called. diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index 763e4555f..3e1a9c139 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -27,6 +27,7 @@ import h5py import lxml.etree as ET import numpy as np +import pint from pynxtools.dataconverter.helpers import ( Collector, @@ -42,6 +43,7 @@ generate_tree_from, ) from pynxtools.definitions.dev_tools.utils.nxdl_utils import get_nx_namefit +from pynxtools.units import NXUnitSet, ureg def validate_hdf_group_against(appdef: str, data: h5py.Group): @@ -216,6 +218,24 @@ def best_namefit_of( return best_match +def is_valid_unit_for_node(node: NexusNode, unit: str, unit_path: str) -> None: + """ + Check if a given unit matches the unit category for a node. + """ + # Need to use a list as `NXtransformation` is a special use case + node_unit_categories = ( + ["NX_LENGTH", "NX_ANGLE", "NX_UNITLESS"] + if node.unit == "NX_TRANSFORMATION" + else [node.unit] + ) + + for node_unit_category in node_unit_categories: + if NXUnitSet.matches(node_unit_category, unit): + return + + collector.collect_and_log(unit_path, ValidationProblem.InvalidUnit, node, unit) + + def validate_dict_against( appdef: str, mapping: MutableMapping[str, Any], ignore_undocumented: bool = False ) -> bool: @@ -606,15 +626,19 @@ def handle_field(node: NexusNode, keys: Mapping[str, Any], prev_path: str): _ = check_reserved_prefix(variant_path, mapping, "field") # Check unit category - if node.unit is not None: - remove_from_not_visited(f"{prev_path}/{variant}/@units") + if node.unit is not None and node.unit != "NX_UNITLESS": + unit_path = f"{variant_path}/@units" + remove_from_not_visited(unit_path) if f"{variant}@units" not in keys: collector.collect_and_log( variant_path, ValidationProblem.MissingUnit, node.unit, ) - # TODO: Check unit with pint + break + + unit = keys[f"{variant}@units"] + is_valid_unit_for_node(node, unit, unit_path) field_attributes = get_field_attributes(variant, keys) field_attributes = _follow_link(field_attributes, variant_path) @@ -1344,6 +1368,11 @@ def check_reserved_prefix( mapping[not_visited_key], ) + if node.unit is not None and node.unit != "NX_UNITLESS": + is_valid_unit_for_node( + node, mapping[not_visited_key], not_visited_key + ) + # parent key will be checked on its own if it exists, because it is in the list continue diff --git a/tests/dataconverter/test_validation.py b/tests/dataconverter/test_validation.py index da835cb9b..ddee7a9b8 100644 --- a/tests/dataconverter/test_validation.py +++ b/tests/dataconverter/test_validation.py @@ -120,7 +120,7 @@ def listify_template(data_dict: Template): ] = 2 TEMPLATE["optional"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value"] = 2.0 # pylint: disable=E1126 TEMPLATE["optional"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value/@units"] = ( - "nm" # pylint: disable=E1126 + "eV" # pylint: disable=E1126 ) TEMPLATE["optional"][ "/ENTRY[my_entry]/NXODD_name[nxodd_name]/DATA[float_value_no_attr]" @@ -132,14 +132,14 @@ def listify_template(data_dict: Template): TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/bool_value"] = True # pylint: disable=E1126 TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/bool_value/@units"] = "" TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value"] = 2 # pylint: disable=E1126 -TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value/@units"] = "eV" # pylint: disable=E1126 +TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value/@units"] = "nm" # pylint: disable=E1126 TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/posint_value"] = np.array( [1, 2, 3], # pylint: disable=E1126 dtype=np.int8, ) # pylint: disable=E1126 TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/posint_value/@units"] = ( - "kg" # pylint: disable=E1126 + "mm" # pylint: disable=E1126 ) TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value"] = ( "just chars" # pylint: disable=E1126 @@ -166,7 +166,7 @@ def listify_template(data_dict: Template): ] = 2 # pylint: disable=E1126 TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/int_value"] = 2 # pylint: disable=E1126 TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/int_value/@units"] = ( - "eV" # pylint: disable=E1126 + "m" # pylint: disable=E1126 ) TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/posint_value"] = ( np.array( @@ -176,7 +176,7 @@ def listify_template(data_dict: Template): ) # pylint: disable=E1126 TEMPLATE["required"][ "/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/posint_value/@units" -] = "kg" # pylint: disable=E1126 +] = "cm" # pylint: disable=E1126 TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name[nxodd_two_name]/char_value"] = ( "just chars" # pylint: disable=E1126 ) @@ -1126,6 +1126,32 @@ def listify_template(data_dict: Template): ], id="wrong-value-array-in-attribute", ), + pytest.param( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value/@units", + "m", + ), + [ + "The unit 'm' at /ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value/@units does not match with the unit category NX_ENERGY of 'float_value'." + ], + id="appdef-invalid-units", + ), + pytest.param( + alter_dict( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/duration", + 2, + ), + "/ENTRY[my_entry]/duration/@units", + "kg", + ), + [ + "The unit 'kg' at /ENTRY[my_entry]/duration/@units does not match with the unit category NX_TIME of 'duration'." + ], + id="baseclass-invalid-units", + ), pytest.param( remove_from_dict( TEMPLATE, From b7903dc7122e9af6a0388d7cc9354c56c961997b Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Tue, 27 May 2025 23:27:25 +0200 Subject: [PATCH 094/118] further refine unit matching --- src/pynxtools/units/__init__.py | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/src/pynxtools/units/__init__.py b/src/pynxtools/units/__init__.py index 33c36d044..798d34d66 100644 --- a/src/pynxtools/units/__init__.py +++ b/src/pynxtools/units/__init__.py @@ -109,21 +109,33 @@ def get_dimensionality(cls, token: str) -> Optional[Any]: return cls._dimensionalities[token] @classmethod - def matches(cls, expected_token: str, actual_unit: str) -> bool: - """Check whether the actual unit matches the expected NX token by comparing dimensionalities.""" - if expected_token in ["NX_ANY", "NX_UNITLESS"]: - return True + def matches(cls, unit_category: str, unit: str) -> bool: + """Check whether the actual unit matches the expected unit_categorys by comparing dimensionalities.""" + + def is_valid_unit(unit): + """Check if unit is generally valid.""" + if not unit: + return False + try: + ureg(unit) + return True + except (UndefinedUnitError, DefinitionSyntaxError): + return False - expected_dim = cls.get_dimensionality(expected_token) - if expected_dim is None: + if unit_category in ("NX_ANY"): + return is_valid_unit(unit) + + expected_dim = cls.get_dimensionality(unit_category) + if expected_dim is None and not unit: return True - if expected_dim is "dimensionless" and actual_unit: + if expected_dim == "dimensionless" and unit: return False - try: - actual_dim = (1 * ureg(actual_unit)).dimensionality - except (UndefinedUnitError, DefinitionSyntaxError): + # At this point, we expect a valid unit. + if not is_valid_unit(unit): return False + actual_dim = (1 * ureg(unit)).dimensionality + return actual_dim == expected_dim From 78d228757704bcf2e948078eeeb853a3bbbdf99c Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Wed, 28 May 2025 00:03:14 +0200 Subject: [PATCH 095/118] implement pynxtools ureg in nomad integration --- src/pynxtools/nomad/parser.py | 2 +- src/pynxtools/nomad/schema.py | 71 +++-------------------------------- 2 files changed, 7 insertions(+), 66 deletions(-) diff --git a/src/pynxtools/nomad/parser.py b/src/pynxtools/nomad/parser.py index 851526662..3ba6788b4 100644 --- a/src/pynxtools/nomad/parser.py +++ b/src/pynxtools/nomad/parser.py @@ -33,7 +33,6 @@ from nomad.metainfo import MEnum, MSection from nomad.metainfo.util import MQuantity, MSubSectionList, resolve_variadic_name from nomad.parsing import MatchingParser - from nomad.units import ureg from nomad.utils import get_logger from pint.errors import UndefinedUnitError except ImportError as exc: @@ -49,6 +48,7 @@ get_quantity_base_name, ) from pynxtools.nomad.utils import _rename_nx_for_nomad as rename_nx_for_nomad +from pynxtools.units import ureg def _to_group_name(nx_node: ET.Element): diff --git a/src/pynxtools/nomad/schema.py b/src/pynxtools/nomad/schema.py index 999273e0f..5283ea4c4 100644 --- a/src/pynxtools/nomad/schema.py +++ b/src/pynxtools/nomad/schema.py @@ -74,7 +74,6 @@ from nomad.metainfo.metainfo import resolve_variadic_name from nomad.normalizing.common import nomad_atoms_from_ase_atoms from nomad.normalizing.topology import add_system, add_system_info - from nomad.units import ureg from nomad.utils import get_logger, hash, strip except ImportError as exc: @@ -83,6 +82,7 @@ ) from exc from pynxtools import NX_DOC_BASES, get_definitions_url +from pynxtools.units import NXUnitSet, ureg from pynxtools.definitions.dev_tools.utils.nxdl_utils import get_nexus_definitions_path from pynxtools.nomad.utils import ( FIELD_STATISTICS, @@ -327,68 +327,6 @@ def get_nx_type(nx_type: str) -> Optional[Datatype]: return None -class NXUnitSet: - """ - maps from `NX_` token to dimensionality - None -> disable dimensionality check - '1' -> dimensionless quantities - 'transformation' -> Specially handled in metainfo - """ - - mapping: dict = { - "NX_ANGLE": "[angle]", - "NX_ANY": None, - "NX_AREA": "[area]", - "NX_CHARGE": "[charge]", - "NX_COUNT": "1", - "NX_CROSS_SECTION": "[area]", - "NX_CURRENT": "[current]", - "NX_DIMENSIONLESS": "1", - "NX_EMITTANCE": "[length] * [angle]", - "NX_ENERGY": "[energy]", - "NX_FLUX": "1 / [time] / [area]", - "NX_FREQUENCY": "[frequency]", - "NX_LENGTH": "[length]", - "NX_MASS": "[mass]", - "NX_MASS_DENSITY": "[mass] / [volume]", - "NX_MOLECULAR_WEIGHT": "[mass] / [substance]", - "NX_PERIOD": "[time]", - "NX_PER_AREA": "1 / [area]", - "NX_PER_LENGTH": "1 / [length]", - "NX_POWER": "[power]", - "NX_PRESSURE": "[pressure]", - "NX_PULSES": "1", - "NX_SCATTERING_LENGTH_DENSITY": "1 / [area]", - "NX_SOLID_ANGLE": "[angle] * [angle]", - "NX_TEMPERATURE": "[temperature]", - "NX_TIME": "[time]", - "NX_TIME_OF_FLIGHT": "[time]", - "NX_TRANSFORMATION": "transformation", - "NX_UNITLESS": "1", - "NX_VOLTAGE": "[energy] / [current] / [time]", - "NX_VOLUME": "[volume]", - "NX_WAVELENGTH": "[length]", - "NX_WAVENUMBER": "1 / [length]", - } - - @staticmethod - def normalise(value: str) -> str: - """ - Normalise the given token - """ - value = value.upper() - if not value.startswith("NX_"): - value = "NX_" + value - return value - - @staticmethod - def is_nx_token(value: str) -> bool: - """ - Check if a given token is one of NX tokens - """ - return NXUnitSet.normalise(value) in NXUnitSet.mapping.keys() - - # def _to_camel_case(snake_str: str, upper: bool = False) -> str: # """ # Take as input a snake case variable and return a camel case one @@ -747,8 +685,11 @@ def _create_field(xml_node: ET.Element, container: Section) -> Quantity: # dimensionality nx_dimensionality = xml_attrs.get("units", None) if nx_dimensionality: - dimensionality = NXUnitSet.mapping.get(nx_dimensionality) - if not dimensionality and nx_dimensionality != "NX_ANY": + if nx_dimensionality == "NX_TRANSFORMATION": + # TODO: Remove workaround for NX_TRANSFORMATTION + nx_dimensionality = "NX_ANY" + dimensionality = NXUnitSet.get_dimensionality(nx_dimensionality) + if dimensionality is None and nx_dimensionality != "NX_ANY": try: quantity = 1 * ureg(nx_dimensionality) if quantity.dimensionality == "dimensionless": From e993a6aee2f1cf1772336a8435da6df651271f19 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Wed, 28 May 2025 00:05:38 +0200 Subject: [PATCH 096/118] remove unneed import --- src/pynxtools/dataconverter/validation.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index 3e1a9c139..20d38b8bf 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -27,7 +27,6 @@ import h5py import lxml.etree as ET import numpy as np -import pint from pynxtools.dataconverter.helpers import ( Collector, From 95680f88cf39f78af33a028e7cde49eb8104e119 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Wed, 28 May 2025 00:22:14 +0200 Subject: [PATCH 097/118] remove subkeys from non visited if a whole group is None --- src/pynxtools/dataconverter/validation.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index 20d38b8bf..d154dd0f7 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -457,6 +457,10 @@ def handle_group(node: NexusGroup, keys: Mapping[str, Any], prev_path: str): and node.optionality == "required" and node.type in missing_type_err ): + # Remove any subkeys from further checking. + for key in mapping: + if key.startswith(f"{prev_path}/{node.name}"): + remove_from_not_visited(key) collector.collect_and_log( f"{prev_path}/{node.name}", missing_type_err.get(node.type), From 8833c6ab813a8af436afb27866b832c9a1d393e8 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Wed, 28 May 2025 18:46:03 +0200 Subject: [PATCH 098/118] add support for unit examples --- src/pynxtools/dataconverter/nexus_tree.py | 36 ++++++++++++++++- src/pynxtools/nomad/schema.py | 16 ++------ src/pynxtools/units/__init__.py | 49 +++++++++++------------ tests/dataconverter/test_validation.py | 1 - 4 files changed, 61 insertions(+), 41 deletions(-) diff --git a/src/pynxtools/dataconverter/nexus_tree.py b/src/pynxtools/dataconverter/nexus_tree.py index 16efd90c0..0db0539f4 100644 --- a/src/pynxtools/dataconverter/nexus_tree.py +++ b/src/pynxtools/dataconverter/nexus_tree.py @@ -66,7 +66,41 @@ "NX_UINT", ] -NexusUnitCategory = Literal[tuple(NXUnitSet.mapping.keys())] +NexusUnitCategory = Literal[ + "NX_ANGLE", + "NX_ANY", + "NX_AREA", + "NX_CHARGE", + "NX_COUNT", + "NX_CROSS_SECTION", + "NX_CURRENT", + "NX_DIMENSIONLESS", + "NX_EMITTANCE", + "NX_ENERGY", + "NX_FLUX", + "NX_FREQUENCY", + "NX_LENGTH", + "NX_MASS", + "NX_MASS_DENSITY", + "NX_MOLECULAR_WEIGHT", + "NX_PERIOD", + "NX_PER_AREA", + "NX_PER_LENGTH", + "NX_POWER", + "NX_PRESSURE", + "NX_PULSES", + "NX_SCATTERING_LENGTH_DENSITY", + "NX_SOLID_ANGLE", + "NX_TEMPERATURE", + "NX_TIME", + "NX_TIME_OF_FLIGHT", + "NX_TRANSFORMATION", + "NX_UNITLESS", + "NX_VOLTAGE", + "NX_VOLUME", + "NX_WAVELENGTH", + "NX_WAVENUMBER", +] # This is the NeXus namespace for finding tags. # It's updated from the nxdl file when `generate_tree_from` is called. diff --git a/src/pynxtools/nomad/schema.py b/src/pynxtools/nomad/schema.py index 5283ea4c4..3779368b2 100644 --- a/src/pynxtools/nomad/schema.py +++ b/src/pynxtools/nomad/schema.py @@ -690,19 +690,9 @@ def _create_field(xml_node: ET.Element, container: Section) -> Quantity: nx_dimensionality = "NX_ANY" dimensionality = NXUnitSet.get_dimensionality(nx_dimensionality) if dimensionality is None and nx_dimensionality != "NX_ANY": - try: - quantity = 1 * ureg(nx_dimensionality) - if quantity.dimensionality == "dimensionless": - dimensionality = "1" - else: - dimensionality = str(quantity.dimensionality) - except ( - pint.errors.UndefinedUnitError, - pint.errors.DefinitionSyntaxError, - ) as err: - raise NotImplementedError( - f"Unit {nx_dimensionality} is not supported for {name}." - ) from err + raise NotImplementedError( + f"Unit {nx_dimensionality} is not supported for {name}." + ) else: dimensionality = None diff --git a/src/pynxtools/units/__init__.py b/src/pynxtools/units/__init__.py index 798d34d66..638e4f8f1 100644 --- a/src/pynxtools/units/__init__.py +++ b/src/pynxtools/units/__init__.py @@ -30,7 +30,7 @@ class NXUnitSet: """ - Maps from `NX_` tokens to dimensionality. + Maps from `NX_` unit_categories (or unit examples) to dimensionality. - None -> disables dimensionality check - '1' -> dimensionless quantities @@ -75,42 +75,39 @@ class NXUnitSet: _dimensionalities: Dict[str, Optional[Any]] = {} - @staticmethod - def normalize(value: str) -> str: - """Normalize the given token to 'NX_' prefix form.""" - value = value.upper() - if not value.startswith("NX_"): - value = "NX_" + value - return value - - @classmethod - def is_nx_token(cls, value: str) -> bool: - """Check if a given token is one of the known NX tokens.""" - return cls.normalize(value) in cls.mapping - @classmethod - def get_dimensionality(cls, token: str) -> Optional[Any]: - """Get the dimensionality object for a given NX token.""" - token = cls.normalize(token) - if token in cls._dimensionalities: - return cls._dimensionalities[token] + def get_dimensionality(cls, nx_unit: str) -> Optional[Any]: + """Get the dimensionality object for a given NeXus unit category or example.""" + if nx_unit in cls._dimensionalities: + return cls._dimensionalities[nx_unit] - definition = cls.mapping.get(token) + definition = cls.mapping.get(nx_unit) if definition is None or definition == "transformation": - cls._dimensionalities[token] = None + if definition is None: + try: + quantity = 1 * ureg(nx_unit) + if quantity.dimensionality == "dimensionless": + cls._dimensionalities[nx_unit] = "1" + else: + cls._dimensionalities[nx_unit] = str(quantity.dimensionality) + except (UndefinedUnitError, DefinitionSyntaxError): + cls._dimensionalities[nx_unit] = None elif definition == "1": - cls._dimensionalities[token] = ureg("").dimensionality + cls._dimensionalities[nx_unit] = ureg("").dimensionality else: try: - cls._dimensionalities[token] = ureg.get_dimensionality(definition) + cls._dimensionalities[nx_unit] = ureg.get_dimensionality(definition) except (UndefinedUnitError, DefinitionSyntaxError) as e: - cls._dimensionalities[token] = None + cls._dimensionalities[nx_unit] = None - return cls._dimensionalities[token] + return cls._dimensionalities[nx_unit] @classmethod def matches(cls, unit_category: str, unit: str) -> bool: - """Check whether the actual unit matches the expected unit_categorys by comparing dimensionalities.""" + """ + Check whether the actual unit matches the expected unit category (or example) + by comparing dimensionalities. + """ def is_valid_unit(unit): """Check if unit is generally valid.""" diff --git a/tests/dataconverter/test_validation.py b/tests/dataconverter/test_validation.py index ddee7a9b8..b0b38a9dd 100644 --- a/tests/dataconverter/test_validation.py +++ b/tests/dataconverter/test_validation.py @@ -1455,7 +1455,6 @@ def listify_template(data_dict: Template): ], id="group-with-correct-concept-and-non-concept-sibling", ), - # This can be re-used later when we have proper unit checking pytest.param( alter_dict( alter_dict( From 31388ed82b13139512278f95cc25bda338849cfd Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Mon, 2 Jun 2025 11:46:41 +0200 Subject: [PATCH 099/118] workarounds for NX_TRANSFORMATION --- src/pynxtools/dataconverter/validation.py | 12 +++--- src/pynxtools/units/__init__.py | 15 ++++--- tests/dataconverter/test_validation.py | 51 +++++++++++++++++------ 3 files changed, 55 insertions(+), 23 deletions(-) diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index d154dd0f7..298c40f42 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -632,7 +632,7 @@ def handle_field(node: NexusNode, keys: Mapping[str, Any], prev_path: str): if node.unit is not None and node.unit != "NX_UNITLESS": unit_path = f"{variant_path}/@units" remove_from_not_visited(unit_path) - if f"{variant}@units" not in keys: + if f"{variant}@units" not in keys and node.unit != "NX_TRANSFORMATION": collector.collect_and_log( variant_path, ValidationProblem.MissingUnit, @@ -640,7 +640,7 @@ def handle_field(node: NexusNode, keys: Mapping[str, Any], prev_path: str): ) break - unit = keys[f"{variant}@units"] + unit = keys.get(f"{variant}@units") is_valid_unit_for_node(node, unit, unit_path) field_attributes = get_field_attributes(variant, keys) @@ -847,9 +847,11 @@ def is_documented(key: str, tree: NexusNode) -> bool: and node.unit is not None and f"{key}/@units" not in mapping ): - collector.collect_and_log( - f"{key}", ValidationProblem.MissingUnit, node.unit - ) + # Workaround for NX_UNITLESS of NX_TRANSFORMATION unit category + if node.unit != "NX_TRANSFORMATION": + collector.collect_and_log( + f"{key}", ValidationProblem.MissingUnit, node.unit + ) return True diff --git a/src/pynxtools/units/__init__.py b/src/pynxtools/units/__init__.py index 638e4f8f1..71eaef1bd 100644 --- a/src/pynxtools/units/__init__.py +++ b/src/pynxtools/units/__init__.py @@ -20,7 +20,7 @@ import os from typing import Optional, Dict, Any from pint import UnitRegistry -from pint.errors import UndefinedUnitError, DefinitionSyntaxError +from pint.errors import UndefinedUnitError, DefinitionSyntaxError, DimensionalityError try: from nomad.units import ureg @@ -109,14 +109,19 @@ def matches(cls, unit_category: str, unit: str) -> bool: by comparing dimensionalities. """ - def is_valid_unit(unit): + def is_valid_unit(unit: str): """Check if unit is generally valid.""" if not unit: return False try: ureg(unit) return True - except (UndefinedUnitError, DefinitionSyntaxError): + except ( + UndefinedUnitError, + DefinitionSyntaxError, + AttributeError, + DimensionalityError, + ): return False if unit_category in ("NX_ANY"): @@ -126,8 +131,8 @@ def is_valid_unit(unit): if expected_dim is None and not unit: return True - if expected_dim == "dimensionless" and unit: - return False + if str(expected_dim) == "dimensionless": + return True if unit is None else False # At this point, we expect a valid unit. if not is_valid_unit(unit): diff --git a/tests/dataconverter/test_validation.py b/tests/dataconverter/test_validation.py index b0b38a9dd..21cd2b23d 100644 --- a/tests/dataconverter/test_validation.py +++ b/tests/dataconverter/test_validation.py @@ -1152,6 +1152,44 @@ def listify_template(data_dict: Template): ], id="baseclass-invalid-units", ), + pytest.param( + alter_dict( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/MONOCHROMATOR[monochromator]/energy_dispersion", + 0.5, + ), + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/MONOCHROMATOR[monochromator]/energy_dispersion/@units", + "J/mm", + ), + [], + id="baseclass-valid-units-xample", + ), + pytest.param( + alter_dict( + alter_dict( + alter_dict( + alter_dict( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[translation]", + 1.0, + ), + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[translation]/@units", + "m", + ), + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[rotation]", + 1.0, + ), + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[rotation]/@units", + "degree", + ), + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[direction]", + 1.0, + ), + [], + id="nxtransformations-unit", + ), pytest.param( remove_from_dict( TEMPLATE, @@ -1455,19 +1493,6 @@ def listify_template(data_dict: Template): ], id="group-with-correct-concept-and-non-concept-sibling", ), - pytest.param( - alter_dict( - alter_dict( - TEMPLATE, - "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/MONOCHROMATOR[monochromator]/energy_dispersion", - 0.5, - ), - "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/MONOCHROMATOR[monochromator]/energy_dispersion/@units", - "J/mm", - ), - [], - id="baseclass-unit-example", - ), pytest.param( alter_dict( alter_dict( From 3992227b6f6988f2cfcf49daf32e435b3bc31c6b Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Thu, 5 Jun 2025 16:35:36 +0200 Subject: [PATCH 100/118] test NXtransformation units based on the presence of transformation_type --- src/pynxtools/dataconverter/helpers.py | 8 ++-- src/pynxtools/dataconverter/validation.py | 51 +++++++++++++++++------ tests/dataconverter/test_validation.py | 47 ++++++++++++++++----- 3 files changed, 80 insertions(+), 26 deletions(-) diff --git a/src/pynxtools/dataconverter/helpers.py b/src/pynxtools/dataconverter/helpers.py index f7d6f99a8..0c28410fd 100644 --- a/src/pynxtools/dataconverter/helpers.py +++ b/src/pynxtools/dataconverter/helpers.py @@ -102,9 +102,11 @@ def _log(self, path: str, log_type: ValidationProblem, value: Optional[Any], *ar ) if log_type == ValidationProblem.InvalidUnit: value = cast(Any, value) - logger.warning( - f"The unit '{args[0]}' at {path} does not match with the unit category {value.unit} of '{value.name}'." - ) + log_text = f"The unit '{args[0]}' at {path} does not match with the unit category {value.unit} of '{value.name}'." + if len(args) == 2 and args[1] is not None: + log_text += f" Based on the 'transformation_type' of the field {path.replace('/@units', '')}, it should match with '{args[1]}'." + logger.warning(log_text) + elif log_type == ValidationProblem.InvalidEnum: logger.warning( f"The value at {path} should be one of the following: {value}." diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index 298c40f42..ae481799d 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -217,22 +217,33 @@ def best_namefit_of( return best_match -def is_valid_unit_for_node(node: NexusNode, unit: str, unit_path: str) -> None: +def is_valid_unit_for_node( + node: NexusNode, unit: str, unit_path: str, hints: Dict[str, Any] +) -> None: """ Check if a given unit matches the unit category for a node. """ # Need to use a list as `NXtransformation` is a special use case - node_unit_categories = ( - ["NX_LENGTH", "NX_ANGLE", "NX_UNITLESS"] - if node.unit == "NX_TRANSFORMATION" - else [node.unit] - ) + if node.unit == "NX_TRANSFORMATION": + if (transformation_type := hints.get("transformation_type")) is not None: + category_map: Dict[str, str] = { + "translation": "NX_LENGTH", + "rotation": "NX_ANGLE", + } + node_unit_category = category_map.get(transformation_type, "NX_UNITLESS") + else: + node_unit_category = "NX_UNITLESS" + log_input = node_unit_category + else: + node_unit_category = node.unit + log_input = None - for node_unit_category in node_unit_categories: - if NXUnitSet.matches(node_unit_category, unit): - return + if NXUnitSet.matches(node_unit_category, unit): + return - collector.collect_and_log(unit_path, ValidationProblem.InvalidUnit, node, unit) + collector.collect_and_log( + unit_path, ValidationProblem.InvalidUnit, node, unit, log_input + ) def validate_dict_against( @@ -641,7 +652,14 @@ def handle_field(node: NexusNode, keys: Mapping[str, Any], prev_path: str): break unit = keys.get(f"{variant}@units") - is_valid_unit_for_node(node, unit, unit_path) + # Special case: NX_TRANSFORMATION unit depends on `@transformation_type` attribute + if ( + transformation_type := keys.get(f"{variant}@transformation_type") + ) is not None: + hints = {"transformation_type": transformation_type} + else: + hints = {} + is_valid_unit_for_node(node, unit, unit_path, hints) field_attributes = get_field_attributes(variant, keys) field_attributes = _follow_link(field_attributes, variant_path) @@ -1374,8 +1392,17 @@ def check_reserved_prefix( ) if node.unit is not None and node.unit != "NX_UNITLESS": + # Special case: NX_TRANSFORMATION unit depends on `@transformation_type` attribute + if ( + transformation_type := mapping.get( + not_visited_key.replace("/@units", "/@transformation_type") + ) + ) is not None: + hints = {"transformation_type": transformation_type} + else: + hints = {} is_valid_unit_for_node( - node, mapping[not_visited_key], not_visited_key + node, mapping[not_visited_key], not_visited_key, hints ) # parent key will be checked on its own if it exists, because it is in the list diff --git a/tests/dataconverter/test_validation.py b/tests/dataconverter/test_validation.py index 21cd2b23d..1b58bd2e8 100644 --- a/tests/dataconverter/test_validation.py +++ b/tests/dataconverter/test_validation.py @@ -1171,23 +1171,48 @@ def listify_template(data_dict: Template): alter_dict( alter_dict( alter_dict( - TEMPLATE, - "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[translation]", - 1.0, + alter_dict( + alter_dict( + alter_dict( + alter_dict( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[translation]", + 1.0, + ), + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[translation]/@transformation_type", + "translation", + ), + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[translation]/@units", + "m", + ), + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[rotation]", + 1.0, + ), + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[rotation]/@transformation_type", + "rotation", + ), + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[rotation]/@units", + "degree", ), - "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[translation]/@units", - "m", + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[direction]", + 1.0, ), - "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[rotation]", + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[wrong_translation]", 1.0, ), - "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[rotation]/@units", - "degree", + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[wrong_translation]/@transformation_type", + "rotation", ), - "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[direction]", - 1.0, + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[wrong_translation]/@units", + "m", ), - [], + [ + "The unit 'm' at /ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[wrong_translation]/@units " + "does not match with the unit category NX_TRANSFORMATION of 'AXISNAME'. Based on the 'transformation_type' of the field " + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[wrong_translation], " + "it should match with 'NX_ANGLE'." + ], id="nxtransformations-unit", ), pytest.param( From cfe3bc960e9d5767729e228c82d93dff54206179 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Wed, 11 Jun 2025 15:28:00 +0200 Subject: [PATCH 101/118] allow empty units for NX_ANY --- src/pynxtools/units/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/pynxtools/units/__init__.py b/src/pynxtools/units/__init__.py index 71eaef1bd..66e09f83d 100644 --- a/src/pynxtools/units/__init__.py +++ b/src/pynxtools/units/__init__.py @@ -125,7 +125,8 @@ def is_valid_unit(unit: str): return False if unit_category in ("NX_ANY"): - return is_valid_unit(unit) + # Note: we allow empty string units here + return is_valid_unit(unit) or unit == "" expected_dim = cls.get_dimensionality(unit_category) if expected_dim is None and not unit: From 975d8a372830ccee0b353008046dfe6fd1a7c593 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Wed, 11 Jun 2025 16:33:40 +0200 Subject: [PATCH 102/118] check NX_UNITLESS as well --- src/pynxtools/dataconverter/validation.py | 40 ++++++++++++++++------- src/pynxtools/units/__init__.py | 31 +++++++++++++++--- tests/dataconverter/test_validation.py | 21 +++++++++++- 3 files changed, 75 insertions(+), 17 deletions(-) diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index ae481799d..7ed086a53 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -221,7 +221,21 @@ def is_valid_unit_for_node( node: NexusNode, unit: str, unit_path: str, hints: Dict[str, Any] ) -> None: """ - Check if a given unit matches the unit category for a node. + Validate whether a unit string is compatible with the expected unit category for a given NeXus node. + + This function checks if the provided `unit` string matches the expected unit dimensionality + defined in the node's `unit` field. Special logic is applied for "NX_TRANSFORMATION", where + the dimensionality depends on the `transformation_type` hint. + + If the unit does not match the expected dimensionality, a validation problem is logged. + + Args: + node (NexusNode): The node containing unit metadata to validate against. + unit (str): The unit string to validate (e.g., "m", "eV", "1", ""). + unit_path (str): The path to the unit in the NeXus template, used for logging. + hints (Dict[str, Any]): Additional metadata used during validation. For example, + hints["transformation_type"] may be used to determine the expected unit category + if the node represents a transformation. """ # Need to use a list as `NXtransformation` is a special use case if node.unit == "NX_TRANSFORMATION": @@ -640,16 +654,20 @@ def handle_field(node: NexusNode, keys: Mapping[str, Any], prev_path: str): _ = check_reserved_prefix(variant_path, mapping, "field") # Check unit category - if node.unit is not None and node.unit != "NX_UNITLESS": + if node.unit is not None: unit_path = f"{variant_path}/@units" - remove_from_not_visited(unit_path) - if f"{variant}@units" not in keys and node.unit != "NX_TRANSFORMATION": - collector.collect_and_log( - variant_path, - ValidationProblem.MissingUnit, - node.unit, - ) - break + if node.unit != "NX_UNITLESS": + remove_from_not_visited(unit_path) + if ( + f"{variant}@units" not in keys + and node.unit != "NX_TRANSFORMATION" + ): + collector.collect_and_log( + variant_path, + ValidationProblem.MissingUnit, + node.unit, + ) + break unit = keys.get(f"{variant}@units") # Special case: NX_TRANSFORMATION unit depends on `@transformation_type` attribute @@ -1391,7 +1409,7 @@ def check_reserved_prefix( mapping[not_visited_key], ) - if node.unit is not None and node.unit != "NX_UNITLESS": + if node.unit is not None: # and node.unit != "NX_UNITLESS": # Special case: NX_TRANSFORMATION unit depends on `@transformation_type` attribute if ( transformation_type := mapping.get( diff --git a/src/pynxtools/units/__init__.py b/src/pynxtools/units/__init__.py index 66e09f83d..383244fed 100644 --- a/src/pynxtools/units/__init__.py +++ b/src/pynxtools/units/__init__.py @@ -77,7 +77,16 @@ class NXUnitSet: @classmethod def get_dimensionality(cls, nx_unit: str) -> Optional[Any]: - """Get the dimensionality object for a given NeXus unit category or example.""" + """ + Get the dimensionality object for a given NeXus unit category or example. + + Args: + nx_unit (str): The NeXus unit category or a specific unit string. + + Returns: + Optional[Any]: The dimensionality object, '1' for dimensionless, + or None if undefined. + """ if nx_unit in cls._dimensionalities: return cls._dimensionalities[nx_unit] @@ -87,7 +96,7 @@ def get_dimensionality(cls, nx_unit: str) -> Optional[Any]: try: quantity = 1 * ureg(nx_unit) if quantity.dimensionality == "dimensionless": - cls._dimensionalities[nx_unit] = "1" + cls._dimensionalities[nx_unit] = ureg("").dimensionality else: cls._dimensionalities[nx_unit] = str(quantity.dimensionality) except (UndefinedUnitError, DefinitionSyntaxError): @@ -105,8 +114,19 @@ def get_dimensionality(cls, nx_unit: str) -> Optional[Any]: @classmethod def matches(cls, unit_category: str, unit: str) -> bool: """ - Check whether the actual unit matches the expected unit category (or example) - by comparing dimensionalities. + Check whether the actual unit matches the expected unit category or example. + + This is determined by comparing dimensionalities. Special handling is + included for NX_ANY (accepts any valid unit or empty string) and for + dimensionless cases. + + Args: + unit_category (str): The expected NeXus unit category. + unit (str): The actual unit string to validate. + + Returns: + bool: True if the actual unit matches the expected dimensionality; + False otherwise. """ def is_valid_unit(unit: str): @@ -129,11 +149,12 @@ def is_valid_unit(unit: str): return is_valid_unit(unit) or unit == "" expected_dim = cls.get_dimensionality(unit_category) + if expected_dim is None and not unit: return True if str(expected_dim) == "dimensionless": - return True if unit is None else False + return True if not unit else False # At this point, we expect a valid unit. if not is_valid_unit(unit): diff --git a/tests/dataconverter/test_validation.py b/tests/dataconverter/test_validation.py index 1b58bd2e8..5288096d4 100644 --- a/tests/dataconverter/test_validation.py +++ b/tests/dataconverter/test_validation.py @@ -1163,7 +1163,26 @@ def listify_template(data_dict: Template): "J/mm", ), [], - id="baseclass-valid-units-xample", + id="baseclass-valid-unit-example", + ), + pytest.param( + alter_dict( + alter_dict( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/SAMPLE[sample1]]/changer_position", + 1, + ), + "/ENTRY[my_entry]/SAMPLE[sample1]]/changer_position/@units", + "mm", + ), + "/ENTRY[my_entry]/SAMPLE[sample2]]/changer_position", + 1, + ), + [ + "The unit 'mm' at /ENTRY[my_entry]/SAMPLE[sample1]]/changer_position/@units does not match with the unit category NX_UNITLESS of 'changer_position'." + ], + id="baseclass-unitless-field", ), pytest.param( alter_dict( From d4e261000855d0d184ca4d8db35c8233bdd1688b Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Wed, 11 Jun 2025 17:23:44 +0200 Subject: [PATCH 103/118] pass dimensionality as str for nomad schema --- src/pynxtools/nomad/schema.py | 5 +++-- src/pynxtools/units/__init__.py | 8 ++++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/pynxtools/nomad/schema.py b/src/pynxtools/nomad/schema.py index 3779368b2..5e89b6629 100644 --- a/src/pynxtools/nomad/schema.py +++ b/src/pynxtools/nomad/schema.py @@ -31,7 +31,6 @@ import h5py import numpy as np import pandas as pd -import pint from ase import Atoms from ase.data import atomic_numbers from scipy.spatial import cKDTree @@ -689,7 +688,9 @@ def _create_field(xml_node: ET.Element, container: Section) -> Quantity: # TODO: Remove workaround for NX_TRANSFORMATTION nx_dimensionality = "NX_ANY" dimensionality = NXUnitSet.get_dimensionality(nx_dimensionality) - if dimensionality is None and nx_dimensionality != "NX_ANY": + if dimensionality is not None: + dimensionality = str(dimensionality) + elif nx_dimensionality != "NX_ANY": raise NotImplementedError( f"Unit {nx_dimensionality} is not supported for {name}." ) diff --git a/src/pynxtools/units/__init__.py b/src/pynxtools/units/__init__.py index 383244fed..4be2b447b 100644 --- a/src/pynxtools/units/__init__.py +++ b/src/pynxtools/units/__init__.py @@ -91,18 +91,18 @@ def get_dimensionality(cls, nx_unit: str) -> Optional[Any]: return cls._dimensionalities[nx_unit] definition = cls.mapping.get(nx_unit) - if definition is None or definition == "transformation": + if definition == "1": + cls._dimensionalities[nx_unit] = ureg("").dimensionality + elif definition is None or definition == "transformation": if definition is None: try: quantity = 1 * ureg(nx_unit) if quantity.dimensionality == "dimensionless": cls._dimensionalities[nx_unit] = ureg("").dimensionality else: - cls._dimensionalities[nx_unit] = str(quantity.dimensionality) + cls._dimensionalities[nx_unit] = quantity.dimensionality except (UndefinedUnitError, DefinitionSyntaxError): cls._dimensionalities[nx_unit] = None - elif definition == "1": - cls._dimensionalities[nx_unit] = ureg("").dimensionality else: try: cls._dimensionalities[nx_unit] = ureg.get_dimensionality(definition) From acd3258f59ff3f0ef9c239e2fe2144f7c2c02d0c Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Wed, 11 Jun 2025 17:30:45 +0200 Subject: [PATCH 104/118] remove unused import --- src/pynxtools/dataconverter/nexus_tree.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/pynxtools/dataconverter/nexus_tree.py b/src/pynxtools/dataconverter/nexus_tree.py index 0db0539f4..98f123710 100644 --- a/src/pynxtools/dataconverter/nexus_tree.py +++ b/src/pynxtools/dataconverter/nexus_tree.py @@ -47,7 +47,6 @@ get_nx_namefit, is_name_type, ) -from pynxtools.units import NXUnitSet NexusType = Literal[ "NX_BINARY", From 258b4c99d8382ef2c57400e2cf2d3168fe2c2f68 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Thu, 12 Jun 2025 14:40:39 +0200 Subject: [PATCH 105/118] add count as a unit --- src/pynxtools/units/default_en.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/pynxtools/units/default_en.txt b/src/pynxtools/units/default_en.txt index 0c07409b9..f3994da8d 100644 --- a/src/pynxtools/units/default_en.txt +++ b/src/pynxtools/units/default_en.txt @@ -105,6 +105,7 @@ kelvin = [temperature]; offset: 0 = K = degK = °K = degree_Kelvin = degreeK # radian = [angle] = rad bit = [information] pixel = [digital_image_resolution] = px = pel +count = [] #### CONSTANTS #### @@ -594,4 +595,4 @@ pixels_per_centimeter = px / cm = PPCM = ppcm atomic_unit_of_time: second atomic_unit_of_current: ampere atomic_unit_of_temperature: kelvin -@end +@end \ No newline at end of file From 962e1226e704b344885b390159d24fdeb8cf207d Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Thu, 12 Jun 2025 16:08:30 +0200 Subject: [PATCH 106/118] use count for cps --- src/pynxtools/units/default_en.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pynxtools/units/default_en.txt b/src/pynxtools/units/default_en.txt index f3994da8d..86ff810e1 100644 --- a/src/pynxtools/units/default_en.txt +++ b/src/pynxtools/units/default_en.txt @@ -179,7 +179,7 @@ lambda = microliter = λ hertz = 1 / second = Hz revolutions_per_minute = 1 / minute = rpm revolutions_per_second = 1 / second = rps -counts_per_second = 1 / second = cps +counts_per_second = count / second = cps # Wavenumber [wavenumber] = 1 / [length] From d104e2f02f2da04685729ef88c2c265739f83884 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Fri, 13 Jun 2025 09:25:11 +0200 Subject: [PATCH 107/118] workaround for NX_LENGTH and pixels --- src/pynxtools/dataconverter/validation.py | 2 +- src/pynxtools/units/__init__.py | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index 7ed086a53..de035a940 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -1409,7 +1409,7 @@ def check_reserved_prefix( mapping[not_visited_key], ) - if node.unit is not None: # and node.unit != "NX_UNITLESS": + if node.unit is not None: # Special case: NX_TRANSFORMATION unit depends on `@transformation_type` attribute if ( transformation_type := mapping.get( diff --git a/src/pynxtools/units/__init__.py b/src/pynxtools/units/__init__.py index 4be2b447b..4f8ff9d34 100644 --- a/src/pynxtools/units/__init__.py +++ b/src/pynxtools/units/__init__.py @@ -160,6 +160,10 @@ def is_valid_unit(unit: str): if not is_valid_unit(unit): return False + # Workaround for pixels as units in transformations + if ureg.Unit(unit) == ureg.Unit("pixel") and str(expected_dim) == "[length]": + return True + actual_dim = (1 * ureg(unit)).dimensionality return actual_dim == expected_dim From ea3be37c4e228a55c7da390f4957709c1720d37f Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Fri, 13 Jun 2025 17:33:19 +0200 Subject: [PATCH 108/118] add a test for a unit for a transformation with no transformation_type --- src/pynxtools/units/__init__.py | 2 +- tests/dataconverter/test_validation.py | 44 ++++++++++++++++---------- 2 files changed, 29 insertions(+), 17 deletions(-) diff --git a/src/pynxtools/units/__init__.py b/src/pynxtools/units/__init__.py index 4f8ff9d34..42d95cc18 100644 --- a/src/pynxtools/units/__init__.py +++ b/src/pynxtools/units/__init__.py @@ -42,7 +42,7 @@ class NXUnitSet: "NX_ANY": None, "NX_AREA": "[area]", "NX_CHARGE": "[charge]", - "NX_COUNT": "1", + "NX_COUNT": "[count]", "NX_CROSS_SECTION": "[area]", "NX_CURRENT": "[current]", "NX_DIMENSIONLESS": "1", diff --git a/tests/dataconverter/test_validation.py b/tests/dataconverter/test_validation.py index 5288096d4..50b8ee810 100644 --- a/tests/dataconverter/test_validation.py +++ b/tests/dataconverter/test_validation.py @@ -1195,27 +1195,35 @@ def listify_template(data_dict: Template): alter_dict( alter_dict( alter_dict( - TEMPLATE, - "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[translation]", - 1.0, + alter_dict( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[translation]", + 1.0, + ), + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[translation]/@transformation_type", + "translation", + ), + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[translation]/@units", + "m", ), - "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[translation]/@transformation_type", - "translation", + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[rotation]", + 1.0, ), - "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[translation]/@units", - "m", + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[rotation]/@transformation_type", + "rotation", ), - "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[rotation]", - 1.0, + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[rotation]/@units", + "degree", ), - "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[rotation]/@transformation_type", - "rotation", + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[direction]", + 1.0, ), - "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[rotation]/@units", - "degree", + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[direction_with_unit]", + 1.0, ), - "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[direction]", - 1.0, + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[direction_with_unit]/@units", + "m", ), "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[wrong_translation]", 1.0, @@ -1227,10 +1235,14 @@ def listify_template(data_dict: Template): "m", ), [ + "The unit 'm' at /ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[direction_with_unit]/@units " + "does not match with the unit category NX_TRANSFORMATION of 'AXISNAME'. Based on the 'transformation_type' of the field " + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[direction_with_unit], " + "it should match with 'NX_UNITLESS'.", "The unit 'm' at /ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[wrong_translation]/@units " "does not match with the unit category NX_TRANSFORMATION of 'AXISNAME'. Based on the 'transformation_type' of the field " "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[wrong_translation], " - "it should match with 'NX_ANGLE'." + "it should match with 'NX_ANGLE'.", ], id="nxtransformations-unit", ), From d2f44d598ded718b5c21875eef95d7b7743552e3 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Fri, 13 Jun 2025 17:47:40 +0200 Subject: [PATCH 109/118] split up tests for NX_TRANSFORMATION unit --- src/pynxtools/dataconverter/validation.py | 11 ++- tests/dataconverter/test_validation.py | 96 +++++++++++++++-------- 2 files changed, 69 insertions(+), 38 deletions(-) diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index de035a940..66ab0b484 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -658,9 +658,10 @@ def handle_field(node: NexusNode, keys: Mapping[str, Any], prev_path: str): unit_path = f"{variant_path}/@units" if node.unit != "NX_UNITLESS": remove_from_not_visited(unit_path) - if ( - f"{variant}@units" not in keys - and node.unit != "NX_TRANSFORMATION" + if f"{variant}@units" not in keys and ( + node.unit != "NX_TRANSFORMATION" + or mapping.get(f"{variant_path}/@transformations_type") + in ("translation", "rotation") ): collector.collect_and_log( variant_path, @@ -884,7 +885,9 @@ def is_documented(key: str, tree: NexusNode) -> bool: and f"{key}/@units" not in mapping ): # Workaround for NX_UNITLESS of NX_TRANSFORMATION unit category - if node.unit != "NX_TRANSFORMATION": + if node.unit != "NX_TRANSFORMATION" or mapping.get( + f"{key}/@transformations_type" + ) in ("translation", "rotation"): collector.collect_and_log( f"{key}", ValidationProblem.MissingUnit, node.unit ) diff --git a/tests/dataconverter/test_validation.py b/tests/dataconverter/test_validation.py index 50b8ee810..dc5555296 100644 --- a/tests/dataconverter/test_validation.py +++ b/tests/dataconverter/test_validation.py @@ -1191,38 +1191,14 @@ def listify_template(data_dict: Template): alter_dict( alter_dict( alter_dict( - alter_dict( - alter_dict( - alter_dict( - alter_dict( - alter_dict( - alter_dict( - TEMPLATE, - "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[translation]", - 1.0, - ), - "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[translation]/@transformation_type", - "translation", - ), - "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[translation]/@units", - "m", - ), - "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[rotation]", - 1.0, - ), - "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[rotation]/@transformation_type", - "rotation", - ), - "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[rotation]/@units", - "degree", - ), - "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[direction]", + TEMPLATE, + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[translation]", 1.0, ), - "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[direction_with_unit]", - 1.0, + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[translation]/@transformation_type", + "translation", ), - "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[direction_with_unit]/@units", + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[translation]/@units", "m", ), "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[wrong_translation]", @@ -1235,16 +1211,68 @@ def listify_template(data_dict: Template): "m", ), [ - "The unit 'm' at /ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[direction_with_unit]/@units " - "does not match with the unit category NX_TRANSFORMATION of 'AXISNAME'. Based on the 'transformation_type' of the field " - "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[direction_with_unit], " - "it should match with 'NX_UNITLESS'.", "The unit 'm' at /ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[wrong_translation]/@units " "does not match with the unit category NX_TRANSFORMATION of 'AXISNAME'. Based on the 'transformation_type' of the field " "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[wrong_translation], " "it should match with 'NX_ANGLE'.", ], - id="nxtransformations-unit", + id="nxtransformations-translation-units", + ), + pytest.param( + alter_dict( + alter_dict( + alter_dict( + alter_dict( + alter_dict( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[rotation]", + 1.0, + ), + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[rotation]/@transformation_type", + "rotation", + ), + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[rotation]/@units", + "degree", + ), + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[wrong_rotation]", + 1.0, + ), + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[wrong_rotation]/@transformation_type", + "translation", + ), + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[wrong_rotation]/@units", + "degree", + ), + [ + "The unit 'degree' at /ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[wrong_rotation]/@units " + "does not match with the unit category NX_TRANSFORMATION of 'AXISNAME'. Based on the 'transformation_type' of the field " + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[wrong_rotation], " + "it should match with 'NX_LENGTH'.", + ], + id="nxtransformations-rotation-units", + ), + pytest.param( + alter_dict( + alter_dict( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[direction]", + 1.0, + ), + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[direction_with_unit]", + 1.0, + ), + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[direction_with_unit]/@units", + "m", + ), + [ + "The unit 'm' at /ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[direction_with_unit]/@units " + "does not match with the unit category NX_TRANSFORMATION of 'AXISNAME'. Based on the 'transformation_type' of the field " + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[direction_with_unit], " + "it should match with 'NX_UNITLESS'.", + ], + id="nxtransformations-direction-units", ), pytest.param( remove_from_dict( From d50914fb21c78a4dbeab96a765d61ef2f9956156 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Sat, 14 Jun 2025 00:34:42 +0200 Subject: [PATCH 110/118] fix for transformation dimensionalities --- src/pynxtools/units/__init__.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/src/pynxtools/units/__init__.py b/src/pynxtools/units/__init__.py index 42d95cc18..de25625cc 100644 --- a/src/pynxtools/units/__init__.py +++ b/src/pynxtools/units/__init__.py @@ -93,16 +93,18 @@ def get_dimensionality(cls, nx_unit: str) -> Optional[Any]: definition = cls.mapping.get(nx_unit) if definition == "1": cls._dimensionalities[nx_unit] = ureg("").dimensionality - elif definition is None or definition == "transformation": - if definition is None: - try: - quantity = 1 * ureg(nx_unit) - if quantity.dimensionality == "dimensionless": - cls._dimensionalities[nx_unit] = ureg("").dimensionality - else: - cls._dimensionalities[nx_unit] = quantity.dimensionality - except (UndefinedUnitError, DefinitionSyntaxError): - cls._dimensionalities[nx_unit] = None + elif definition is None: + try: + quantity = 1 * ureg(nx_unit) + if quantity.dimensionality == ureg("").dimensionality: + cls._dimensionalities[nx_unit] = ureg("").dimensionality + else: + cls._dimensionalities[nx_unit] = quantity.dimensionality + except (UndefinedUnitError, DefinitionSyntaxError): + cls._dimensionalities[nx_unit] = None + elif definition == "transformation": + cls._dimensionalities[nx_unit] = None + else: try: cls._dimensionalities[nx_unit] = ureg.get_dimensionality(definition) From 8d233c9755b29f43b9e2a347d4075ca16902500b Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Mon, 16 Jun 2025 09:27:00 +0200 Subject: [PATCH 111/118] add a check that NOMAD and pynxtools units are the same --- ...equirements.yml => nomad-compatibility.yml} | 18 +++++++++++++++--- src/pynxtools/units/constants_en.txt | 2 +- src/pynxtools/units/default_en.txt | 2 +- 3 files changed, 17 insertions(+), 5 deletions(-) rename .github/workflows/{nomad-requirements.yml => nomad-compatibility.yml} (77%) diff --git a/.github/workflows/nomad-requirements.yml b/.github/workflows/nomad-compatibility.yml similarity index 77% rename from .github/workflows/nomad-requirements.yml rename to .github/workflows/nomad-compatibility.yml index 567d82c04..ab220d87e 100644 --- a/.github/workflows/nomad-requirements.yml +++ b/.github/workflows/nomad-compatibility.yml @@ -1,4 +1,4 @@ -name: NOMAD dependencies compatibility +name: NOMAD compatibility on: push: @@ -16,7 +16,7 @@ env: python-version: 3.11 jobs: - validate_dependencies: + validate_compatibility: runs-on: ubuntu-latest steps: @@ -28,13 +28,25 @@ jobs: git clone --depth 1 --branch develop --recurse-submodules https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR.git nomad git submodule update --init --recursive --depth 1 + - name: Compare unit definition files + run: | + diff --unified=3 nomad/units/default_en.txt src/pynxtools/units/default_en.txt || { + echo "::error file=src/pynxtools/units/default_en.txt::default_en.txt differs from NOMAD"; + exit 1; + } + + diff --unified=3 nomad/units/constants_en.txt src/pynxtools/units/constants_en.txt || { + echo "::error file=src/pynxtools/units/constants.txt::constants.txt differs from NOMAD"; + exit 1; + } + - name: Add pynxtools dependency in NOMAD test_plugins.txt working-directory: ./nomad run: | echo "" >> test_plugins.txt echo "pynxtools@git+https://github.com/FAIRmat-NFDI/pynxtools.git@${{ github.head_ref || github.ref_name }}" >> test_plugins.txt - name: Install uv and set the python version to ${{ env.python-version }} - uses: astral-sh/setup-uv@v5 + uses: astral-sh/setup-uv@v6 with: python-version: ${{ env.python-version }} - name: Generate (dev-)requirements.txt from modified pyproject.toml diff --git a/src/pynxtools/units/constants_en.txt b/src/pynxtools/units/constants_en.txt index 19ad0379c..6ec8d2dbc 100644 --- a/src/pynxtools/units/constants_en.txt +++ b/src/pynxtools/units/constants_en.txt @@ -69,4 +69,4 @@ vacuum_permittivity = e ** 2 / (2 * α * h * c) = ε_0 = epsilon_0 = eps_0 = eps impedance_of_free_space = 2 * α * h / e ** 2 = Z_0 = characteristic_impedance_of_vacuum coulomb_constant = α * h_bar * c / e ** 2 = k_C classical_electron_radius = α * h_bar / (m_e * c) = r_e -thomson_cross_section = 8 / 3 * π * r_e ** 2 = σ_e = sigma_e \ No newline at end of file +thomson_cross_section = 8 / 3 * π * r_e ** 2 = σ_e = sigma_e diff --git a/src/pynxtools/units/default_en.txt b/src/pynxtools/units/default_en.txt index 86ff810e1..523d8bb2e 100644 --- a/src/pynxtools/units/default_en.txt +++ b/src/pynxtools/units/default_en.txt @@ -595,4 +595,4 @@ pixels_per_centimeter = px / cm = PPCM = ppcm atomic_unit_of_time: second atomic_unit_of_current: ampere atomic_unit_of_temperature: kelvin -@end \ No newline at end of file +@end From 76baf8497ef8420841a400345ba5ddcbd14b9fee Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Mon, 16 Jun 2025 09:38:50 +0200 Subject: [PATCH 112/118] add test for missing units in translation and rotations --- src/pynxtools/dataconverter/validation.py | 4 +- tests/dataconverter/test_validation.py | 72 +++++++++++++++-------- 2 files changed, 48 insertions(+), 28 deletions(-) diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index 66ab0b484..6de6e69d6 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -660,7 +660,7 @@ def handle_field(node: NexusNode, keys: Mapping[str, Any], prev_path: str): remove_from_not_visited(unit_path) if f"{variant}@units" not in keys and ( node.unit != "NX_TRANSFORMATION" - or mapping.get(f"{variant_path}/@transformations_type") + or mapping.get(f"{variant_path}/@transformation_type") in ("translation", "rotation") ): collector.collect_and_log( @@ -886,7 +886,7 @@ def is_documented(key: str, tree: NexusNode) -> bool: ): # Workaround for NX_UNITLESS of NX_TRANSFORMATION unit category if node.unit != "NX_TRANSFORMATION" or mapping.get( - f"{key}/@transformations_type" + f"{key}/@transformation_type" ) in ("translation", "rotation"): collector.collect_and_log( f"{key}", ValidationProblem.MissingUnit, node.unit diff --git a/tests/dataconverter/test_validation.py b/tests/dataconverter/test_validation.py index dc5555296..6ee2a9cdc 100644 --- a/tests/dataconverter/test_validation.py +++ b/tests/dataconverter/test_validation.py @@ -1191,30 +1191,40 @@ def listify_template(data_dict: Template): alter_dict( alter_dict( alter_dict( - TEMPLATE, - "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[translation]", - 1.0, + alter_dict( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[translation]", + 1.0, + ), + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[translation]/@transformation_type", + "translation", + ), + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[translation]/@units", + "m", ), - "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[translation]/@transformation_type", - "translation", + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[wrong_translation]", + 1.0, ), - "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[translation]/@units", - "m", + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[wrong_translation]/@transformation_type", + "rotation", ), - "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[wrong_translation]", - 1.0, + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[wrong_translation]/@units", + "m", ), - "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[wrong_translation]/@transformation_type", - "rotation", + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[translation_no_units]", + 1.0, ), - "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[wrong_translation]/@units", - "m", + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[translation_no_units]/@transformation_type", + "translation", ), [ "The unit 'm' at /ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[wrong_translation]/@units " "does not match with the unit category NX_TRANSFORMATION of 'AXISNAME'. Based on the 'transformation_type' of the field " "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[wrong_translation], " "it should match with 'NX_ANGLE'.", + "Field /ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[translation_no_units] requires a unit " + "in the unit category NX_TRANSFORMATION.", ], id="nxtransformations-translation-units", ), @@ -1225,30 +1235,40 @@ def listify_template(data_dict: Template): alter_dict( alter_dict( alter_dict( - TEMPLATE, - "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[rotation]", - 1.0, + alter_dict( + alter_dict( + TEMPLATE, + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[rotation]", + 1.0, + ), + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[rotation]/@transformation_type", + "rotation", + ), + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[rotation]/@units", + "degree", ), - "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[rotation]/@transformation_type", - "rotation", + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[wrong_rotation]", + 1.0, ), - "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[rotation]/@units", - "degree", + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[wrong_rotation]/@transformation_type", + "translation", ), - "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[wrong_rotation]", - 1.0, + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[wrong_rotation]/@units", + "degree", ), - "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[wrong_rotation]/@transformation_type", - "translation", + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[rotation_no_units]", + 1.0, ), - "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[wrong_rotation]/@units", - "degree", + "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[rotation_no_units]/@transformation_type", + "rotation", ), [ "The unit 'degree' at /ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[wrong_rotation]/@units " "does not match with the unit category NX_TRANSFORMATION of 'AXISNAME'. Based on the 'transformation_type' of the field " "/ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[wrong_rotation], " "it should match with 'NX_LENGTH'.", + "Field /ENTRY[my_entry]/INSTRUMENT[my_instrument]/SOURCE[source]/TRANSFORMATIONS[transformations]/AXISNAME[rotation_no_units] requires a unit " + "in the unit category NX_TRANSFORMATION.", ], id="nxtransformations-rotation-units", ), From b815c208b0de8754c9706d22f5e61299f24cb2ca Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Mon, 16 Jun 2025 09:40:37 +0200 Subject: [PATCH 113/118] fix paths --- .github/workflows/nomad-compatibility.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/nomad-compatibility.yml b/.github/workflows/nomad-compatibility.yml index ab220d87e..039ccab1e 100644 --- a/.github/workflows/nomad-compatibility.yml +++ b/.github/workflows/nomad-compatibility.yml @@ -30,12 +30,12 @@ jobs: - name: Compare unit definition files run: | - diff --unified=3 nomad/units/default_en.txt src/pynxtools/units/default_en.txt || { + diff --unified=3 nomad/nomad/units/default_en.txt src/pynxtools/units/default_en.txt || { echo "::error file=src/pynxtools/units/default_en.txt::default_en.txt differs from NOMAD"; exit 1; } - diff --unified=3 nomad/units/constants_en.txt src/pynxtools/units/constants_en.txt || { + diff --unified=3 nomad/nomad/units/constants_en.txt src/pynxtools/units/constants_en.txt || { echo "::error file=src/pynxtools/units/constants.txt::constants.txt differs from NOMAD"; exit 1; } From 6d28203aba7c475f8258649fc760a49d1ff21638 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Mon, 16 Jun 2025 15:59:07 +0200 Subject: [PATCH 114/118] dont check not visted keys where the value is None --- src/pynxtools/dataconverter/validation.py | 8 ++++---- src/pynxtools/units/__init__.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index 6de6e69d6..b497fb734 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -482,10 +482,6 @@ def handle_group(node: NexusGroup, keys: Mapping[str, Any], prev_path: str): and node.optionality == "required" and node.type in missing_type_err ): - # Remove any subkeys from further checking. - for key in mapping: - if key.startswith(f"{prev_path}/{node.name}"): - remove_from_not_visited(key) collector.collect_and_log( f"{prev_path}/{node.name}", missing_type_err.get(node.type), @@ -1366,6 +1362,10 @@ def check_reserved_prefix( check_attributes_of_nonexisting_field(tree) for not_visited_key in not_visited: + if mapping.get(not_visited_key) is None: + # This value is not really set. Skip checking its validity. + continue + # TODO: remove again if "@target"/"@reference" is sorted out by NIAC always_allowed_attributes = ("@target", "@reference") if not_visited_key.endswith(always_allowed_attributes): diff --git a/src/pynxtools/units/__init__.py b/src/pynxtools/units/__init__.py index de25625cc..221516175 100644 --- a/src/pynxtools/units/__init__.py +++ b/src/pynxtools/units/__init__.py @@ -42,7 +42,7 @@ class NXUnitSet: "NX_ANY": None, "NX_AREA": "[area]", "NX_CHARGE": "[charge]", - "NX_COUNT": "[count]", + "NX_COUNT": "1", "NX_CROSS_SECTION": "[area]", "NX_CURRENT": "[current]", "NX_DIMENSIONLESS": "1", From e873e0b7da084f3234ad595adea79e24f66e352d Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Tue, 17 Jun 2025 11:40:55 +0200 Subject: [PATCH 115/118] use older uv setup for now --- .github/workflows/nomad-compatibility.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/nomad-compatibility.yml b/.github/workflows/nomad-compatibility.yml index 039ccab1e..33c0f5c4f 100644 --- a/.github/workflows/nomad-compatibility.yml +++ b/.github/workflows/nomad-compatibility.yml @@ -46,7 +46,7 @@ jobs: echo "" >> test_plugins.txt echo "pynxtools@git+https://github.com/FAIRmat-NFDI/pynxtools.git@${{ github.head_ref || github.ref_name }}" >> test_plugins.txt - name: Install uv and set the python version to ${{ env.python-version }} - uses: astral-sh/setup-uv@v6 + uses: astral-sh/setup-uv@v5 with: python-version: ${{ env.python-version }} - name: Generate (dev-)requirements.txt from modified pyproject.toml From 7cd93fa96009a7d07cc12ab2a099792950e94356 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Fri, 20 Jun 2025 12:43:00 +0200 Subject: [PATCH 116/118] ruff and mypy fixes --- src/pynxtools/dataconverter/validation.py | 6 +++--- src/pynxtools/nomad/schema.py | 2 +- src/pynxtools/units/__init__.py | 9 +++++---- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index b497fb734..37b68f35e 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -218,7 +218,7 @@ def best_namefit_of( def is_valid_unit_for_node( - node: NexusNode, unit: str, unit_path: str, hints: Dict[str, Any] + node: NexusNode, unit: str, unit_path: str, hints: dict[str, Any] ) -> None: """ Validate whether a unit string is compatible with the expected unit category for a given NeXus node. @@ -233,14 +233,14 @@ def is_valid_unit_for_node( node (NexusNode): The node containing unit metadata to validate against. unit (str): The unit string to validate (e.g., "m", "eV", "1", ""). unit_path (str): The path to the unit in the NeXus template, used for logging. - hints (Dict[str, Any]): Additional metadata used during validation. For example, + hints (dict[str, Any]): Additional metadata used during validation. For example, hints["transformation_type"] may be used to determine the expected unit category if the node represents a transformation. """ # Need to use a list as `NXtransformation` is a special use case if node.unit == "NX_TRANSFORMATION": if (transformation_type := hints.get("transformation_type")) is not None: - category_map: Dict[str, str] = { + category_map: dict[str, str] = { "translation": "NX_LENGTH", "rotation": "NX_ANGLE", } diff --git a/src/pynxtools/nomad/schema.py b/src/pynxtools/nomad/schema.py index 5e89b6629..35c9cced7 100644 --- a/src/pynxtools/nomad/schema.py +++ b/src/pynxtools/nomad/schema.py @@ -81,7 +81,6 @@ ) from exc from pynxtools import NX_DOC_BASES, get_definitions_url -from pynxtools.units import NXUnitSet, ureg from pynxtools.definitions.dev_tools.utils.nxdl_utils import get_nexus_definitions_path from pynxtools.nomad.utils import ( FIELD_STATISTICS, @@ -90,6 +89,7 @@ _rename_nx_for_nomad, get_quantity_base_name, ) +from pynxtools.units import NXUnitSet, ureg # URL_REGEXP from # https://stackoverflow.com/questions/3809401/what-is-a-good-regular-expression-to-match-a-url diff --git a/src/pynxtools/units/__init__.py b/src/pynxtools/units/__init__.py index 221516175..ef0fcb868 100644 --- a/src/pynxtools/units/__init__.py +++ b/src/pynxtools/units/__init__.py @@ -18,9 +18,10 @@ """A unit registry for NeXus units""" import os -from typing import Optional, Dict, Any +from typing import Any, Optional + from pint import UnitRegistry -from pint.errors import UndefinedUnitError, DefinitionSyntaxError, DimensionalityError +from pint.errors import DefinitionSyntaxError, DimensionalityError, UndefinedUnitError try: from nomad.units import ureg @@ -37,7 +38,7 @@ class NXUnitSet: - 'transformation' -> specially handled elsewhere """ - mapping: Dict[str, Optional[str]] = { + mapping: dict[str, Optional[str]] = { "NX_ANGLE": "[angle]", "NX_ANY": None, "NX_AREA": "[area]", @@ -73,7 +74,7 @@ class NXUnitSet: "NX_WAVENUMBER": "1 / [length]", } - _dimensionalities: Dict[str, Optional[Any]] = {} + _dimensionalities: dict[str, Optional[Any]] = {} @classmethod def get_dimensionality(cls, nx_unit: str) -> Optional[Any]: From 5e86fe5cd54b61ef336d6f97cf6f733a7c3d13f2 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Mon, 30 Jun 2025 10:22:08 +0200 Subject: [PATCH 117/118] update CITATION.cff --- CITATION.cff | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CITATION.cff b/CITATION.cff index 45c9ff763..305923e84 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -4,7 +4,7 @@ message: If you use this software, please cite it using the metadata from this file. type: software -version: 0.10.8 +version: 0.10.9 authors: - given-names: Sherjeel family-names: Shabih From 90eadbe8d67ed3b117b1ee6f12b28344881ef59d Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Mon, 30 Jun 2025 10:36:16 +0200 Subject: [PATCH 118/118] add units files to MANIFEST --- MANIFEST.in | 1 + 1 file changed, 1 insertion(+) diff --git a/MANIFEST.in b/MANIFEST.in index e0bb29d2a..79f8564b5 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -11,4 +11,5 @@ include src/pynxtools/definitions/*.xsd include src/pynxtools/nexus-version.txt include src/pynxtools/remote_definitions_url.txt include src/pynxtools/definitions/NXDL_VERSION +include src/pynxtools/units/*.txt graft src/pynxtools/nomad/examples \ No newline at end of file