From 1f27802325d6dbc64b37183f8a74cd55d212f666 Mon Sep 17 00:00:00 2001 From: Theodore Chang Date: Mon, 23 Sep 2024 23:20:38 +0200 Subject: [PATCH 01/22] Use quantity name directly --- src/pynxtools/nomad/parser.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/pynxtools/nomad/parser.py b/src/pynxtools/nomad/parser.py index 8f26bdc61..2e65c6686 100644 --- a/src/pynxtools/nomad/parser.py +++ b/src/pynxtools/nomad/parser.py @@ -234,7 +234,7 @@ def _populate_data( "setting attribute attempt before creating quantity" ) current.m_set_quantity_attribute( - metainfo_def, attr_name, attr_value, quantity=quantity + quantity.name, attr_name, attr_value ) except Exception as e: self._logger.warning( @@ -308,26 +308,26 @@ def _populate_data( try: current.m_set(metainfo_def, field) current.m_set_quantity_attribute( - metainfo_def, "m_nx_data_path", hdf_node.name, quantity=field + data_instance_name, "m_nx_data_path", hdf_node.name ) current.m_set_quantity_attribute( - metainfo_def, "m_nx_data_file", self.nxs_fname, quantity=field + data_instance_name, "m_nx_data_file", self.nxs_fname ) if field_stats is not None: # TODO _add_additional_attributes function has created these nx_data_* # attributes speculatively already so if the field_stats is None # this will cause unpopulated attributes in the GUI current.m_set_quantity_attribute( - metainfo_def, "nx_data_mean", field_stats[0], quantity=field + data_instance_name, "nx_data_mean", field_stats[0] ) current.m_set_quantity_attribute( - metainfo_def, "nx_data_var", field_stats[1], quantity=field + data_instance_name, "nx_data_var", field_stats[1] ) current.m_set_quantity_attribute( - metainfo_def, "nx_data_min", field_stats[2], quantity=field + data_instance_name, "nx_data_min", field_stats[2] ) current.m_set_quantity_attribute( - metainfo_def, "nx_data_max", field_stats[3], quantity=field + data_instance_name, "nx_data_max", field_stats[3] ) except Exception as e: self._logger.warning( From c7fa0c144c39423fbbe805b92dc85bd704986467 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Wed, 9 Oct 2024 16:16:56 +0200 Subject: [PATCH 02/22] fix link in docs --- docs/tutorial/converting-data-to-nexus.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tutorial/converting-data-to-nexus.md b/docs/tutorial/converting-data-to-nexus.md index 1558562f1..5c5510503 100644 --- a/docs/tutorial/converting-data-to-nexus.md +++ b/docs/tutorial/converting-data-to-nexus.md @@ -27,7 +27,7 @@ We will use the [XPS reader plugin](https://github.com/FAIRmat-NFDI/pynxtools-xp #### Steps -1. Download the example files from here: [Example files](https://download-directory.github.io/?url=https://github.com/FAIRmat-NFDI/pynxtools-xps/tree/main/examples/sle) +1. Download the example files from here: [Example files](https://download-directory.github.io/?url=https://github.com/FAIRmat-NFDI/pynxtools-xps/tree/main/examples/specs/sle) 2. **Extract** the zip and copy the files in your current working directory. You can find the working directory by typing the following in your terminal: ```console pwd From 6a57a61fe025e2c3266203a331f64e96ddd3d431 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Thu, 10 Oct 2024 10:23:49 +0200 Subject: [PATCH 03/22] remove mpes and sts example README --- examples/README.md | 24 +++++++++--------------- examples/mpes/README.md | 35 ----------------------------------- examples/sts/README.md | 32 -------------------------------- 3 files changed, 9 insertions(+), 82 deletions(-) delete mode 100644 examples/mpes/README.md delete mode 100644 examples/sts/README.md diff --git a/examples/README.md b/examples/README.md index f9ba28974..6ff784932 100644 --- a/examples/README.md +++ b/examples/README.md @@ -1,20 +1,14 @@ ## Getting started -We offer examples of how you can convert your data (raw data, numerical data, metadata), +Here, we provide examples of how you can convert your data (raw data, numerical data, metadata), from your acquisition software or electronic lab notebook (ELN), into a NeXus/HDF5 file -using the [dataconverter](../pynxtools/dataconverter) tool. -This tool offers parsers/readers/data extractors for various experimental techniques via -technique specific plugins. +using the [built-in readers of pynxtools](https://fairmat-nfdi.github.io/pynxtools/reference/built-in-readers.html). -The examples contain code snippets for creating a NeXus/HDF5 file for the experimental technique -according to a standardized NeXus application definition (e.g. NXem, NXmpes, NXellipsometry, -NXapm, NXopt, NXxps, NXraman). -Respective [Jupyter Notebooks](https://jupyter.org/) are used for running these examples. +There is also [documentation](https://fairmat-nfdi.github.io/pynxtools/learn/dataconverter-and-readers.html) of the [dataconverter](../pynxtools/dataconverter) available. You can write a [reader](../pynxtools/dataconverter/readers) for your experimental technique +if it is not supported yet, see documentation [here](https://fairmat-nfdi.github.io/pynxtools/how-tos/build-a-plugin.html). -There is also a documentation of the [dataconverter](../pynxtools/dataconverter) available. -You can also write a [reader](../pynxtools/dataconverter/readers) for your experimental technique -if it is not supported yet. Feel also free to [contact](../README.md#questions-suggestions) -us if you need help. - -For giving specific feedback to specific parsers/readers/data extractors please contact the -respective developers directly and checkout the domain-specific pynxtools plugins: +Note that `pynxtools` offers a number of FAIRmat-supported parsers/readers/data extractors for various experimental techniques via +technique specific plugins. You can find the list [here](https://fairmat-nfdi.github.io/pynxtools/reference/plugins.html). You can find +examples for using each of them in the individual repositories and in their documentation. +For giving feedback to specific parsers/readers/data extractors, please checkout the domain-specific `pynxtools` plugins and their examples +or contact the respective developers directly. diff --git a/examples/mpes/README.md b/examples/mpes/README.md deleted file mode 100644 index 2fe4d27a1..000000000 --- a/examples/mpes/README.md +++ /dev/null @@ -1,35 +0,0 @@ -# What is MPES? - -The [NXmpes](https://fairmat-experimental.github.io/nexus-fairmat-proposal/9636feecb79bb32b828b1a9804269573256d7696/classes/contributed_definitions/NXmpes.html#nxmpes) application definition is an umbrella definition for all photo-emission related techniques, such as ARPES or XPS. - -# How to use it? - -This is an example to use the dataconvert with the `mpes` reader and the `NXmpes` application definition. -If you want to use some example data you can find small example files in [`tests/data/dataconverter/readers/mpes`](https://github.com/FAIRmat-NFDI/pynxtools/tree/master/tests/data/dataconverter/readers/mpes). - -```shell -dataconverter --reader mpes \\ - --nxdl NXmpes \\ - xarray_saved_small_calibration \\ - config_file.json \\ - eln_data.yaml \\ - --output mpes_example.nxs -``` - -The reader is a tailored parser for research data in a common format. This particular example is able to read and map hdf5 files, as well as json and yaml files. Feel free to contact FAIRmat if you want to create a parser for your research data. - -For XPS data, you may install the [`pnyxtoools-xps`](https://github.com/FAIRmat-NFDI/pynxtools-xps) plugin and use its [example data in XML format](https://github.com/FAIRmat-NFDI/pynxtools-xps/tree/main/examples/xml) with the command - -```shell -dataconverter --reader xps \\ - --nxdl NXmpes \\ - eln_data.yaml \\ - In-situ_PBTTT_XPS_SPECS.xml \\ - --output xps_example.nxs -``` - -# Are there detailed examples? - -Yes, [here](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-remote-tools-hub/-/tree/develop/docker/mpes) you can find exhaustive examples how to use `pynxtools` for your ARPES research data pipeline. - -There is also an [example](https://gitlab.mpcdf.mpg.de/nomad-lab/north/xps) for using `pynxtools` and `pnyxtoools-xps` for an XPS pipeline. diff --git a/examples/sts/README.md b/examples/sts/README.md deleted file mode 100644 index 9a0d0409e..000000000 --- a/examples/sts/README.md +++ /dev/null @@ -1,32 +0,0 @@ -# STS Reader -***Note: Though the reader name is STS reader it also supports STM type experiment. This is the first version of the reader according to the NeXus application definition [NXsts](https://github.com/FAIRmat-NFDI/nexus_definitions/blob/fairmat/contributed_definitions/NXsts.nxdl.xml) which is a generic template of concepts' definition for STS and STM experiments. Later on, both application definitions and readers specific to the STM, STS and AFM will be available. To stay upto date keep visiting this page time to time. From now onwards we will mention STS referring both STM and STS.*** - -Main goal of STS Reader is to transform different file formats from diverse STS lab into STS community standard [STS application definition](https://github.com/FAIRmat-NFDI/nexus_definitions/blob/fairmat/contributed_definitions/NXsts.nxdl.xml), community defined template that define indivisual concept associated with STS experiment constructed by SPM community. -## STS Example -It has diverse examples from several versions (Generic 5e and Generic 4.5) of Nanonis software for STS experiments at [https://gitlab.mpcdf.mpg.de](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-remote-tools-hub/-/tree/develop/docker/sts). But, to utilize that examples one must have an account at https://gitlab.mpcdf.mpg.de. If still you want to try the examples from the sts reader out, please reach out to [Rubel Mozumder](mozumder@physik.hu-berlin.de) or the docker container (discussed below). - -To get a detailed overview of the sts reader implementation visit [pynxtools-stm](https://github.com/FAIRmat-NFDI/pynxtools-stm). - -## STS deocker image -STS docker image contains all prerequisite tools (e.g. jupyter-notebook) and library to run STS reader. To use the image user needs to [install docker engine](https://docs.docker.com/engine/install/). - -STS Image: `gitlab-registry.mpcdf.mpg.de/nomad-lab/nomad-remote-tools-hub/sts-jupyter:latest` - -To run the STS image as a docker container copy the code below in a file `docker-compose.yaml` - -```docker -# docker-compose.yaml - -version: "3.9" - -services: - sts: - image: gitlab-registry.mpcdf.mpg.de/nomad-lab/nomad-remote-tools-hub/sts-jupyter:latest - ports: - - 8888:8888 - volumes: - - ./example:/home/jovyan/work_dir - working_dir: /home/jovyan/work_dir -``` - -and launch the file from the same directory with `docker compose up` command. From 557f6df78b4b07c9d1dfdc3fcba34905083e939c Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Thu, 10 Oct 2024 14:16:12 +0200 Subject: [PATCH 04/22] update links in examples README --- examples/README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/examples/README.md b/examples/README.md index 6ff784932..a8269f2c9 100644 --- a/examples/README.md +++ b/examples/README.md @@ -3,8 +3,7 @@ Here, we provide examples of how you can convert your data (raw data, numerical from your acquisition software or electronic lab notebook (ELN), into a NeXus/HDF5 file using the [built-in readers of pynxtools](https://fairmat-nfdi.github.io/pynxtools/reference/built-in-readers.html). -There is also [documentation](https://fairmat-nfdi.github.io/pynxtools/learn/dataconverter-and-readers.html) of the [dataconverter](../pynxtools/dataconverter) available. You can write a [reader](../pynxtools/dataconverter/readers) for your experimental technique -if it is not supported yet, see documentation [here](https://fairmat-nfdi.github.io/pynxtools/how-tos/build-a-plugin.html). +There is also [documentation](https://fairmat-nfdi.github.io/pynxtools/learn/dataconverter-and-readers.html) of the [dataconverter](../src/pynxtools/dataconverter/README.md) available. You can write a reader plugin if the data for your experimental technique is not supported yet, see documentation [here](https://fairmat-nfdi.github.io/pynxtools/how-tos/build-a-plugin.html). Note that `pynxtools` offers a number of FAIRmat-supported parsers/readers/data extractors for various experimental techniques via technique specific plugins. You can find the list [here](https://fairmat-nfdi.github.io/pynxtools/reference/plugins.html). You can find From 6f7b703b7c89bcf6702691533bc0d01cc2455a9a Mon Sep 17 00:00:00 2001 From: sanbrock <45483558+sanbrock@users.noreply.github.com> Date: Fri, 25 Oct 2024 18:17:57 +0200 Subject: [PATCH 05/22] replace BasicEln (#451) * code for being added when NOMAD GUI supports mixed use of use_full_storage in quantities * removing NXsuffices; removing nexus section and using it inside data * fixing tests * fix for handling problem where NO NXentry found * rename group names if they are used in the BaseSection class * restructure nomad tests * temporarily install nomad feature branch in tests * add test for schema * ignore myp error on nexus_schema.NeXus * use renaming function in tests * bring XML_NAMESPACES back to schema * rename to __XML_NAMESPACES * include field and attribute renaming, capitalization * small docs change * remove capitalization * temporarily install nomad feature branch in tests * capitalise NX class names in NOMAD * instead of using the buggy m_set_quantity_attribute of a section, use m_set_attribute directly on the quantity * update nomad branch to check against * code simplification according to review suggestion --------- Co-authored-by: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> --- .github/workflows/pytest.yml | 2 +- src/pynxtools/nexus/nexus.py | 7 +- src/pynxtools/nomad/parser.py | 72 +++++------------ src/pynxtools/nomad/schema.py | 99 +++++++++++++---------- src/pynxtools/nomad/utils.py | 65 ++++++++++++++-- tests/data/nomad/NXlauetof.hdf5 | Bin 0 -> 21472 bytes tests/nexus/test_nexus.py | 8 +- tests/nomad/test_metainfo_schema.py | 117 ++++++++++++++++++++++++++++ tests/nomad/test_parsing.py | 109 +++++--------------------- 9 files changed, 282 insertions(+), 197 deletions(-) create mode 100644 tests/data/nomad/NXlauetof.hdf5 create mode 100644 tests/nomad/test_metainfo_schema.py diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 034091f82..1a14e2804 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -36,7 +36,7 @@ jobs: - name: Install nomad if: "${{ matrix.python_version != '3.8' && matrix.python_version != '3.12'}}" run: | - uv pip install nomad-lab@git+https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR.git + uv pip install nomad-lab@git+https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR.git@Sprint_Nomad_BaseSection - name: Install pynx run: | uv pip install ".[dev]" diff --git a/src/pynxtools/nexus/nexus.py b/src/pynxtools/nexus/nexus.py index c4f06d4fe..679d4f94d 100644 --- a/src/pynxtools/nexus/nexus.py +++ b/src/pynxtools/nexus/nexus.py @@ -5,8 +5,7 @@ import os import sys from functools import lru_cache - -from typing import Optional, Union, List, Any +from typing import Any, List, Optional, Union import click import h5py @@ -16,6 +15,7 @@ from pynxtools.definitions.dev_tools.utils.nxdl_utils import ( add_base_classes, check_attr_name_nxdl, + decode_or_not, get_best_child, get_hdf_info_parent, get_local_name_from_xml, @@ -29,7 +29,6 @@ try_find_units, walk_elist, write_doc_string, - decode_or_not, ) @@ -378,6 +377,8 @@ def get_inherited_hdf_nodes( # let us start with the given definition file if hdf_node is None: raise ValueError("hdf_node must not be None") + if nx_name == "NO NXentry found": + return (None, [], []) elist = [] # type: ignore[var-annotated] add_base_classes(elist, nx_name, elem) nxdl_elem_path = [elist[0]] diff --git a/src/pynxtools/nomad/parser.py b/src/pynxtools/nomad/parser.py index 2e65c6686..ea3dd3c3e 100644 --- a/src/pynxtools/nomad/parser.py +++ b/src/pynxtools/nomad/parser.py @@ -24,7 +24,8 @@ try: from ase.data import chemical_symbols from nomad.atomutils import Formula - from nomad.datamodel import EntryArchive + from nomad.datamodel import EntryArchive, EntryMetadata + from nomad.datamodel.data import EntryData from nomad.datamodel.results import Material, Results from nomad.metainfo import MSection from nomad.metainfo.util import MQuantity, MSubSectionList, resolve_variadic_name @@ -39,23 +40,8 @@ import pynxtools.nomad.schema as nexus_schema from pynxtools.nexus.nexus import HandleNexus - -__REPLACEMENT_FOR_NX = "BS" -__REPLACEMENT_LEN = len(__REPLACEMENT_FOR_NX) - - -def _rename_nx_to_nomad(name: str) -> Optional[str]: - """ - Rename the NXDL name to NOMAD. - For example: NXdata -> BSdata, - except NXobject -> NXobject - """ - if name == "NXobject": - return name - if name is not None: - if name.startswith("NX"): - return name.replace("NX", __REPLACEMENT_FOR_NX) - return name +from pynxtools.nomad.utils import __REPLACEMENT_FOR_NX +from pynxtools.nomad.utils import __rename_nx_for_nomad as rename_nx_for_nomad def _to_group_name(nx_node: ET.Element): @@ -63,9 +49,7 @@ def _to_group_name(nx_node: ET.Element): Normalise the given group name """ # assuming always upper() is incorrect, e.g. NXem_msr is a specific one not EM_MSR! - grp_nm = nx_node.attrib.get( - "name", nx_node.attrib["type"][__REPLACEMENT_LEN:].upper() - ) + grp_nm = nx_node.attrib.get("name", nx_node.attrib["type"][2:].upper()) return grp_nm @@ -109,6 +93,8 @@ def _to_section( # no need to change section for quantities and attributes return current + nomad_def_name = rename_nx_for_nomad(nomad_def_name, is_group=True) + # for groups, get the definition from the package new_def = current.m_def.all_sub_sections[nomad_def_name] @@ -233,9 +219,7 @@ def _populate_data( raise Warning( "setting attribute attempt before creating quantity" ) - current.m_set_quantity_attribute( - quantity.name, attr_name, attr_value - ) + quantity.m_set_attribute(attr_name, attr_value) except Exception as e: self._logger.warning( "error while setting attribute", @@ -307,28 +291,16 @@ def _populate_data( # may need to check if the given unit is in the allowable list try: current.m_set(metainfo_def, field) - current.m_set_quantity_attribute( - data_instance_name, "m_nx_data_path", hdf_node.name - ) - current.m_set_quantity_attribute( - data_instance_name, "m_nx_data_file", self.nxs_fname - ) + field.m_set_attribute("m_nx_data_path", hdf_node.name) + field.m_set_attribute("m_nx_data_file", self.nxs_fname) if field_stats is not None: # TODO _add_additional_attributes function has created these nx_data_* # attributes speculatively already so if the field_stats is None # this will cause unpopulated attributes in the GUI - current.m_set_quantity_attribute( - data_instance_name, "nx_data_mean", field_stats[0] - ) - current.m_set_quantity_attribute( - data_instance_name, "nx_data_var", field_stats[1] - ) - current.m_set_quantity_attribute( - data_instance_name, "nx_data_min", field_stats[2] - ) - current.m_set_quantity_attribute( - data_instance_name, "nx_data_max", field_stats[3] - ) + field.m_set_attribute("nx_data_mean", field_stats[0]) + field.m_set_attribute("nx_data_var", field_stats[1]) + field.m_set_attribute("nx_data_min", field_stats[2]) + field.m_set_attribute("nx_data_max", field_stats[3]) except Exception as e: self._logger.warning( "error while setting field", @@ -349,7 +321,8 @@ def __nexus_populate(self, params: dict, attr=None): # pylint: disable=W0613 hdf_path: str = hdf_info["hdf_path"] hdf_node = hdf_info["hdf_node"] if nx_def is not None: - nx_def = _rename_nx_to_nomad(nx_def) + nx_def = rename_nx_for_nomad(nx_def) + if nx_path is None: return @@ -489,8 +462,9 @@ def parse( child_archives: Dict[str, EntryArchive] = None, ) -> None: self.archive = archive - self.archive.m_create(nexus_schema.NeXus) # type: ignore # pylint: disable=no-member - self.nx_root = self.archive.nexus + self.nx_root = nexus_schema.NeXus() # type: ignore # pylint: disable=no-member + + self.archive.data = self.nx_root self._logger = logger if logger else get_logger(__name__) self._clear_class_refs() @@ -500,14 +474,10 @@ def parse( # TODO: domain experiment could also be registered if archive.metadata is None: - return + archive.metadata = EntryMetadata() # Normalise experiment type - app_def: str = "" - for var in dir(archive.nexus): - if getattr(archive.nexus, var, None) is not None: - app_def = var - break + app_def = str(self.nx_root).split("(")[1].split(")")[0].split(",")[0] if archive.metadata.entry_type is None: archive.metadata.entry_type = app_def archive.metadata.domain = "nexus" diff --git a/src/pynxtools/nomad/schema.py b/src/pynxtools/nomad/schema.py index 5fbbb6ac8..98dd86f85 100644 --- a/src/pynxtools/nomad/schema.py +++ b/src/pynxtools/nomad/schema.py @@ -31,7 +31,8 @@ try: from nomad import utils - from nomad.datamodel import EntryArchive + from nomad.datamodel import EntryArchive, EntryMetadata + from nomad.datamodel.data import EntryData from nomad.datamodel.metainfo.basesections import ( BaseSection, Component, @@ -73,7 +74,7 @@ from pynxtools import get_definitions_url from pynxtools.definitions.dev_tools.utils.nxdl_utils import get_nexus_definitions_path -from pynxtools.nomad.utils import __REPLACEMENT_FOR_NX, __rename_nx_to_nomad +from pynxtools.nomad.utils import __REPLACEMENT_FOR_NX, __rename_nx_for_nomad # __URL_REGEXP from # https://stackoverflow.com/questions/3809401/what-is-a-good-regular-expression-to-match-a-url @@ -82,6 +83,7 @@ r"(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+" r'(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'".,<>?«»“”‘’]))' ) + # noinspection HttpUrlsUsage __XML_NAMESPACES = {"nx": "http://definition.nexusformat.org/nxdl/3.1"} @@ -93,11 +95,11 @@ __logger = get_logger(__name__) __BASESECTIONS_MAP: Dict[str, Any] = { - "BSfabrication": [Instrument], - "BSsample": [CompositeSystem], - "BSsample_component": [Component], - "BSidentifier": [EntityReference], - # "BSobject": BaseSection, + __rename_nx_for_nomad("NXfabrication"): [Instrument], + __rename_nx_for_nomad("NXsample"): [CompositeSystem], + __rename_nx_for_nomad("NXsample_component"): [Component], + __rename_nx_for_nomad("NXidentifier"): [EntityReference], + # "object": BaseSection, } @@ -293,8 +295,6 @@ def __to_section(name: str, **kwargs) -> Section: class nexus definition. """ - # name = __rename_nx_to_nomad(name) - if name in __section_definitions: section = __section_definitions[name] section.more.update(**kwargs) @@ -372,7 +372,7 @@ def __create_attributes(xml_node: ET.Element, definition: Union[Section, Quantit todo: account for more attributes of attribute, e.g., default, minOccurs """ for attribute in xml_node.findall("nx:attribute", __XML_NAMESPACES): - name = attribute.get("name") + "__attribute" + name = __rename_nx_for_nomad(attribute.get("name"), is_attribute=True) nx_enum = __get_enumeration(attribute) if nx_enum: @@ -465,7 +465,8 @@ def __create_field(xml_node: ET.Element, container: Section) -> Quantity: # name assert "name" in xml_attrs, "Expecting name to be present" - name = xml_attrs["name"] + "__field" + + name = __rename_nx_for_nomad(xml_attrs["name"], is_field=True) # type nx_type = xml_attrs.get("type", "NX_CHAR") @@ -548,10 +549,11 @@ def __create_group(xml_node: ET.Element, root_section: Section): xml_attrs = group.attrib assert "type" in xml_attrs, "Expecting type to be present" - nx_type = __rename_nx_to_nomad(xml_attrs["type"]) + nx_type = __rename_nx_for_nomad(xml_attrs["type"]) nx_name = xml_attrs.get("name", nx_type) - group_section = Section(validate=VALIDATE, nx_kind="group", name=nx_name) + section_name = __rename_nx_for_nomad(nx_name, is_group=True) + group_section = Section(validate=VALIDATE, nx_kind="group", name=section_name) __attach_base_section(group_section, root_section, __to_section(nx_type)) __add_common_properties(group, group_section) @@ -559,10 +561,11 @@ def __create_group(xml_node: ET.Element, root_section: Section): nx_name = xml_attrs.get( "name", nx_type.replace(__REPLACEMENT_FOR_NX, "").upper() ) + subsection_name = __rename_nx_for_nomad(nx_name, is_group=True) group_subsection = SubSection( section_def=group_section, nx_kind="group", - name=nx_name, + name=subsection_name, repeats=__if_repeats(nx_name, xml_attrs.get("maxOccurs", "0")), variable=__if_template(nx_name), ) @@ -604,7 +607,7 @@ def __create_class_section(xml_node: ET.Element) -> Section: nx_type = xml_attrs["type"] nx_category = xml_attrs["category"] - nx_name = __rename_nx_to_nomad(nx_name) + nx_name = __rename_nx_for_nomad(nx_name) class_section: Section = __to_section( nx_name, nx_kind=nx_type, nx_category=nx_category ) @@ -612,7 +615,7 @@ def __create_class_section(xml_node: ET.Element) -> Section: nomad_base_sec_cls = __BASESECTIONS_MAP.get(nx_name, [BaseSection]) if "extends" in xml_attrs: - nx_base_sec = __to_section(__rename_nx_to_nomad(xml_attrs["extends"])) + nx_base_sec = __to_section(__rename_nx_for_nomad(xml_attrs["extends"])) class_section.base_sections = [nx_base_sec] + [ cls.m_def for cls in nomad_base_sec_cls ] @@ -779,7 +782,9 @@ def init_nexus_metainfo(): # We take the application definitions and create a common parent section that allows # to include nexus in an EntryArchive. - nexus_section = Section(validate=VALIDATE, name=__GROUPING_NAME) + nexus_section = Section( + validate=VALIDATE, name=__GROUPING_NAME, label=__GROUPING_NAME + ) # try: # load_nexus_schema('') @@ -791,10 +796,6 @@ def init_nexus_metainfo(): # pass nexus_metainfo_package = __create_package_from_nxdl_directories(nexus_section) - EntryArchive.nexus = SubSection(name="nexus", section_def=nexus_section) - EntryArchive.nexus.init_metainfo() - EntryArchive.m_def.sub_sections.append(EntryArchive.nexus) - nexus_metainfo_package.section_definitions.append(nexus_section) # We need to initialize the metainfo definitions. This is usually done automatically, @@ -813,6 +814,9 @@ def init_nexus_metainfo(): sections.append(section) for section in sections: + # TODO: add when quantities with mixed use_full_storage are supported by GUI + # if not (str(section).startswith("nexus.")): + # continue __add_additional_attributes(section) for quantity in section.quantities: __add_additional_attributes(quantity) @@ -827,16 +831,20 @@ def init_nexus_metainfo(): init_nexus_metainfo() -def normalize_BSfabrication(self, archive, logger): - """Normalizer for BSfabrication section.""" - current_cls = __section_definitions["BSfabrication"].section_cls +def normalize_fabrication(self, archive, logger): + """Normalizer for fabrication section.""" + current_cls = __section_definitions[ + __rename_nx_for_nomad("NXfabrication") + ].section_cls super(current_cls, self).normalize(archive, logger) self.lab_id = "Hello" -def normalize_BSsample_component(self, archive, logger): - """Normalizer for BSsample_component section.""" - current_cls = __section_definitions["BSsample_component"].section_cls +def normalize_sample_component(self, archive, logger): + """Normalizer for sample_component section.""" + current_cls = __section_definitions[ + __rename_nx_for_nomad("NXsample_component") + ].section_cls if self.name__field: self.name = self.name__field if self.mass__field: @@ -845,24 +853,31 @@ def normalize_BSsample_component(self, archive, logger): super(current_cls, self).normalize(archive, logger) -def normalize_BSsample(self, archive, logger): - """Normalizer for BSsample section.""" - current_cls = __section_definitions["BSsample"].section_cls +def normalize_sample(self, archive, logger): + """Normalizer for sample section.""" + current_cls = __section_definitions[__rename_nx_for_nomad("NXsample")].section_cls if self.name__field: self.name = self.name__field - # one could also copy local ids to BSidentifier for search purposes + # one could also copy local ids to identifier for search purposes super(current_cls, self).normalize(archive, logger) -def normalize_BSidentifier(self, archive, logger): - """Normalizer for BSidentifier section.""" +def normalize_identifier(self, archive, logger): + """Normalizer for identifier section.""" def create_Entity(lab_id, archive, f_name): + # TODO: use this instead of BasicEln() when use_full_storage is properly supported by the GUI + # entitySec = Entity() + # entitySec.lab_id = lab_id + # entity = EntryArchive ( + # data = entitySec, + # m_context=archive.m_context, + # metadata=EntryMetadata(entry_type = "identifier"), #upload_id=archive.m_context.upload_id, + # ) + # with archive.m_context.raw_file(f_name, 'w') as f_obj: + # json.dump(entity.m_to_dict(with_meta=True), f_obj) entity = BasicEln() entity.lab_id = lab_id - entity.entity = Entity() - entity.entity.lab_id = lab_id - with archive.m_context.raw_file(f_name, "w") as f_obj: json.dump( {"data": entity.m_to_dict(with_meta=True, include_derived=True)}, @@ -880,7 +895,9 @@ def get_entry_reference(archive, f_name): return f"/entries/{entry_id}/archive#/data" - current_cls = __section_definitions["BSidentifier"].section_cls + current_cls = __section_definitions[ + __rename_nx_for_nomad("NXidentifier") + ].section_cls # super(current_cls, self).normalize(archive, logger) if self.identifier__field: logger.info(f"{self.identifier__field} - identifier received") @@ -896,10 +913,10 @@ def get_entry_reference(archive, f_name): __NORMALIZER_MAP: Dict[str, Any] = { - "BSfabrication": normalize_BSfabrication, - "BSsample": normalize_BSsample, - "BSsample_component": normalize_BSsample_component, - "BSidentifier": normalize_BSidentifier, + __rename_nx_for_nomad("NXfabrication"): normalize_fabrication, + __rename_nx_for_nomad("NXsample"): normalize_sample, + __rename_nx_for_nomad("NXsample_component"): normalize_sample_component, + __rename_nx_for_nomad("NXidentifier"): normalize_identifier, } # Handling nomad BaseSection and other inherited Section from BaseSection diff --git a/src/pynxtools/nomad/utils.py b/src/pynxtools/nomad/utils.py index 203e52bc7..794a94e60 100644 --- a/src/pynxtools/nomad/utils.py +++ b/src/pynxtools/nomad/utils.py @@ -18,18 +18,67 @@ from typing import Optional -__REPLACEMENT_FOR_NX = "BS" +__REPLACEMENT_FOR_NX = "" +# This is a list of NeXus group names that are not allowed because they are defined as quantities in the BaseSection class. +UNALLOWED_GROUP_NAMES = {"name", "datetime", "lab_id", "description"} -def __rename_nx_to_nomad(name: str) -> Optional[str]: + +def __rename_classes_in_nomad(nx_name: str) -> Optional[str]: + """ + Modify group names that conflict with NOMAD due to being defined as quantities + in the BaseSection class by appending '__group' to those names. + + Some quantities names names are reserved in the BaseSection class (or even higher up in metainfo), + and thus require renaming to avoid collisions. + + Args: + nx_name (str): The original group name. + + Returns: + Optional[str]: The modified group name with '__group' appended if it's in + UNALLOWED_GROUP_NAMES, or the original name if no change is needed. + """ + return nx_name + "__group" if nx_name in UNALLOWED_GROUP_NAMES else nx_name + + +def __rename_nx_for_nomad( + name: str, + is_group: bool = False, + is_field: bool = False, + is_attribute: bool = False, +) -> Optional[str]: """ - Rename the NXDL name to NOMAD. - For example: NXdata -> BSdata, - except NXobject -> NXobject + Rename NXDL names for compatibility with NOMAD, applying specific rules + based on the type of the NeXus concept. (group, field, or attribute). + + - NXobject is unchanged. + - NX-prefixed names (e.g., NXdata) are renamed by replacing 'NX' with a custom string. + - Group names are passed to __rename_classes_in_nomad(), and the result is capitalized. + - Fields and attributes have '__field' or '__attribute' appended, respectively. + + Args: + name (str): The NXDL name. + is_group (bool): Whether the name represents a group. + is_field (bool): Whether the name represents a field. + is_attribute (bool): Whether the name represents an attribute. + + Returns: + Optional[str]: The renamed NXDL name, with group names capitalized, + or None if input is invalid. """ if name == "NXobject": return name - if name is not None: - if name.startswith("NX"): - return name.replace("NX", __REPLACEMENT_FOR_NX) + + if name and name.startswith("NX"): + name = __REPLACEMENT_FOR_NX + name[2:] + name = name[0].upper() + name[1:] + + if is_group: + name = __rename_classes_in_nomad(name) + elif is_field: + name += "__field" + elif is_attribute: + name += "__attribute" + return name diff --git a/tests/data/nomad/NXlauetof.hdf5 b/tests/data/nomad/NXlauetof.hdf5 new file mode 100644 index 0000000000000000000000000000000000000000..40b524d6e711b24e0cbc66df5feed7bdd2b0b44f GIT binary patch literal 21472 zcmeHP%X$}LvF0#6U8=6`{&iJN^<(tE z;^NKICqFr9;g!u=N303?$hV*9dU;n-IM2UV=^;bu6H1?ssfUb}weYudM4uq}jjt7< zO7!(ROG_4TvV2bIXY1ur^^|48C_PjN6qgEX*x@qunzi&f=^2CJv_gLyGOUk9{$ap0 zV;y^|{!SiO0(VGmlKwvXL@}}*4A`7ev7addy8h;=A5X}yakt!75c=IFsIj61cC+I- zmQ`zmVz)i3Wj7&bS#@W_ZMmV_ZYe(2cU`~kmR1)tpuy}n#dt4&k@5|#9g1soX9W1Y z^;~;zowAgN$yg_>vw(7`T&a0>5OBGVGa32DLw}{lNR0%KmvkdEI7o}+os#mF$`#-F zZ`bvmx)hlt?+9{{Jk2%A^7?IgVRc2eg&b;6yMVb=RvwjQN%=qrC6#(e>I3aoSE<~q z!OYJomt%EPfd|%8cS5HYwtXe%7|GMik=x}iLOEq2)MbcY)W!9jpggCgaJAWs(h*Mqcf3noMC@ABOS|E#LxJ@gFkeePNlt3+3?)WMo8u6DP=jU8yjxTwLMXC%H2Se z%8M%^s5P9XEyA{V)NpGJ5jNaFY`C5yAiCIS`^`XDeg2VKJn%k)kOWfE@$8oE zi%W}wn?EfIo+s?qriTozx~RKBXt!#R^PLUH$LhicQ~>}$uDv01QE95@s+O)pkxtvQ z{fZPVCc{TkPH10%);qq_BSt_Yp=mXzJ}s1>zmq&3ca&#;>o&V#13*zS`;qexlEZ#z z(Ftn4+X>x=j=)$9JEYz~s^V+J|0AVXZ`*gF^EL)+qG^Y|yCv*et?k#{)+WMpqZ{B8 zg16#4+{V#(2nB#Wm=E{Zeba6>p%H~m+igu5dIY@C z#ujL!?LnQ7d^Zf877wd+U8{GY-ED=~5715D37~5aZLjNy`*r}mhmJWeXrC`?Uc2@{ zpy_}r!QHOTRn724`O8FSc?(zXeG@o-aIfP!UJ$yq2abQw>prl7;@R7qJ^)w0cWL(0+{N7N z#oSk-v_%2Xy;3jX&Rv=Y5dNjn zbJMxGX*PEFZI*!{zELtlV1&R3fe`||2xMT!g}$HXYrM{prHR!Ju_vu>KBhpIr;bY^aKD0{)`959eF0~Gga%GM=e3+-p~Y^C$^Tt;^aW&Mp ztH;z4mN!ZHBK2#M%6I62`M#t44o#dHkH_a#0p@#3`RBYKU}(><$!BO^PVZl|9y!?h zrd%&0Fvp=2k-USiZ(@vV+MjtY6kR^q0H>_H zq-3n)a$R0c(=01#Jzm;5=|9i4NVH7!{h4R<-ev#rit<@rNQxI8J?|pGam$O_id3av zB7ZZO|C0LyG=4axjN~2Mf8m_g6o2LEJ=U-P+A63H(`$!%KS~Y@0_?xa)ZuwbUukg# zuLs4}Br9r7QWd~8yb7-2QF-=5g7SL8af2SmM0PTm-AR)pyTd(%NZ!Hi4#k@6ZinK; ze(mlxEvyM0m{I;vAfVeF&o8Q^M+`!$c4%=IjA7{JM=Qoj^ z3}$yHu#seU_#DYQxZUBq)D*AJ(tNC6yL(Ctz>GIGdvqNh1laEMcztbUxEOGpIeACb zQ>E%6I~mOG`E`xZRQdFKNDn?|Sw>cxEO|9Nw2LQ@6OEj;P`2_q#CR94;l~2(1PL%j5X4XhxiO5wpgu0AK%>v-r<9Usyln`lSIk# zfx$f-im*oX=X@~Ez(-xQ-tfUo17DTs=lCF)fp3=RYkaWKz?Ucb_k3W`z}GK%91ui> zI3uo87@N1|-m~U@!p$$ZB0V@J<6>NI!!y1~adBJ^J^+4>=kpY(n&SEI4Eh+w^GbiC zcz%M8=<%O2jV5?^$EQmEwE;Zw%Z%sb z(D1}9%QW%iZ#m-qik%IYb&^w`s!At<8C=#C2+r$>kr#no=>Z@Y}0ydo*DL; zCh2<`dV=4y4A1Unho&d!Qqt5DA?4-aj(F_veGs?1^Rm2JFUuSJ82d%*wye(T-|psz zh9`b&lqR0IbvUSgO4!K1;#PLQe#}};!V^Tn6~!xg!c(rqslsrN+P%McCB9zwxs+3s zGSi4x;#)#0-X5oStz|XxvKr%!d-se#2rzI?68*)3f#DZM?pLoGiF&2S!D~aaQyil- z?es!yO#8*-#lG<<4-JnmQpO{Gw6kA4=4qnZh+&D3QT(L)o$Ap14hl|FKc-`SQhw*i z?mWM!+*^#J_c2S^my5l0J;(lOep&yeH!mn#zST?rx#jxRtM3!_O7}aRq1o9CfSjgY z&Bg`||NlU~m=ofq?ANMLmC8o;rQ@)bDh~1aGk!ytCJySeGqx1fvEjwzk5k1(wwjub zpWLO13ohkFaoGOiak(~0?ZR)amERiMjhFcUp~e*4v`Rr4NLV*@zI?=TD4(M7Q0JIK z+uwA;3n{6piRj>@yge>;qPzUdv`u}ccLM4xSq88^CQUS@xdj)Vbv4Y1N`=% z>q#58cs;nq!}X+%+j)@9xaDO%Z(si&)NbTi9~%Dx$mjKa*SJ|K~NReEhbG_8FHVJVx?P%l0*{bhaxG f;g1sNpV^Y+#p6<(66x~b?>Vnj{xg_7JR Date: Thu, 17 Oct 2024 10:31:24 +0200 Subject: [PATCH 06/22] add dev install notes for verify_nexus in validation how-to --- docs/how-tos/validate-nexus-file.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/docs/how-tos/validate-nexus-file.md b/docs/how-tos/validate-nexus-file.md index 2534bdb33..a3f936fda 100644 --- a/docs/how-tos/validate-nexus-file.md +++ b/docs/how-tos/validate-nexus-file.md @@ -120,6 +120,23 @@ Options: --help Show this message and exit. ``` +*Development verion installation* + +If this installation procedure above does not work, you can use the devlopment installation by using git: +``` +python -m venv .py39 +source .py39/bin/activate +git clone https://github.com/FAIRmat-NFDI/pynxtools.git +cd pynxtools/ +git checkout hdf-based-validation +git submodule sync –recursive +git submodule update --init --recursive --jobs=4 +python -m pip install --upgrade pip +python -m pip install -e . +python -m pip install -e ".[dev]“ +verify_nexus --help +``` + ### Using *verify_nexus* From 204deed688b1da056a4884a41b810e13616c304c Mon Sep 17 00:00:00 2001 From: Ron <139139971+RonHildebrandt@users.noreply.github.com> Date: Thu, 17 Oct 2024 12:58:05 +0200 Subject: [PATCH 07/22] Apply suggestions from code review typos Co-authored-by: Laurenz Rettig <53396064+rettigl@users.noreply.github.com> --- docs/how-tos/validate-nexus-file.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/how-tos/validate-nexus-file.md b/docs/how-tos/validate-nexus-file.md index a3f936fda..fd640d5c2 100644 --- a/docs/how-tos/validate-nexus-file.md +++ b/docs/how-tos/validate-nexus-file.md @@ -120,9 +120,9 @@ Options: --help Show this message and exit. ``` -*Development verion installation* +*Development version installation* -If this installation procedure above does not work, you can use the devlopment installation by using git: +If this installation procedure above does not work, you can use the development installation by using git: ``` python -m venv .py39 source .py39/bin/activate From 58ce701e94af75e7646bc2e4601bde5ee5153dbf Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Mon, 21 Oct 2024 11:23:27 +0000 Subject: [PATCH 08/22] bump ruff to v0.6 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index b2b22876d..d421d75eb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,7 +52,7 @@ docs = [ ] dev = [ "mypy", - "ruff==0.5.5", + "ruff>=0.6", "pytest", "pytest-timeout", "pytest-cov", From 654324d76760529f21500cc3140b61c9436ecb2b Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Tue, 22 Oct 2024 10:39:58 +0200 Subject: [PATCH 09/22] update dev-requirements --- dev-requirements.txt | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/dev-requirements.txt b/dev-requirements.txt index 2035d46d1..8c5095f55 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -21,7 +21,11 @@ click==8.1.7 click-default-group==1.2.4 # via pynxtools (pyproject.toml) colorama==0.4.6 - # via mkdocs-material + # via + # click + # mkdocs + # mkdocs-material + # pytest contourpy==1.3.0 # via matplotlib coverage==7.6.1 @@ -170,7 +174,7 @@ regex==2024.9.11 # via mkdocs-material requests==2.32.3 # via mkdocs-material -ruff==0.5.5 +ruff==0.7.0 # via pynxtools (pyproject.toml) scipy==1.14.1 # via ase From 6d9583d3d63e9fb244f79728fccd554346ababd9 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Tue, 22 Oct 2024 15:49:10 +0200 Subject: [PATCH 10/22] update ruff in pre-commit --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 81a689386..8f97e0730 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,7 +1,7 @@ repos: - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: v0.5.5 + rev: v0.7.0 hooks: # Run the linter. - id: ruff From 62f8b2574fae1d0d5f6975cea54000a79c7298b1 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Tue, 22 Oct 2024 16:01:40 +0200 Subject: [PATCH 11/22] update dev-requirements --- dev-requirements.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dev-requirements.txt b/dev-requirements.txt index 8c5095f55..ef602710f 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,5 +1,5 @@ # This file was autogenerated by uv via the following command: -# uv pip compile --extra=dev --extra=docs --output-file=dev-requirements.txt pyproject.toml +# uv pip compile --universal -p 3.11 --extra=dev --extra=docs --output-file=dev-requirements.txt pyproject.toml anytree==2.12.1 # via pynxtools (pyproject.toml) ase==3.23.0 @@ -186,6 +186,8 @@ structlog==24.4.0 # via pynxtools (pyproject.toml) termcolor==2.4.0 # via mkdocs-macros-plugin +tomli==2.0.2 ; python_full_version == '3.11' + # via coverage types-pytz==2024.2.0.20240913 # via pynxtools (pyproject.toml) types-pyyaml==6.0.12.20240808 From 6cc76a1b745241780e6f7281fd3ac97c826a17bc Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Tue, 22 Oct 2024 16:03:55 +0200 Subject: [PATCH 12/22] update dev-requirements --- dev-requirements.txt | 65 +++++++++++++++++++++++++------------------- 1 file changed, 37 insertions(+), 28 deletions(-) diff --git a/dev-requirements.txt b/dev-requirements.txt index ef602710f..42ff92ac9 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -10,7 +10,7 @@ certifi==2024.8.30 # via requests cfgv==3.4.0 # via pre-commit -charset-normalizer==3.3.2 +charset-normalizer==3.4.0 # via requests click==8.1.7 # via @@ -28,23 +28,27 @@ colorama==0.4.6 # pytest contourpy==1.3.0 # via matplotlib -coverage==7.6.1 +coverage==7.6.4 # via pytest-cov cycler==0.12.1 # via matplotlib -distlib==0.3.8 +distlib==0.3.9 # via virtualenv -filelock==3.16.0 +filelock==3.16.1 # via virtualenv -fonttools==4.53.1 +fonttools==4.54.1 # via matplotlib ghp-import==2.1.0 # via mkdocs -h5py==3.11.0 +h5py==3.12.1 # via pynxtools (pyproject.toml) -identify==2.6.0 +hjson==3.1.0 + # via + # mkdocs-macros-plugin + # super-collections +identify==2.6.1 # via pre-commit -idna==3.8 +idna==3.10 # via requests importlib-metadata==8.5.0 # via pynxtools (pyproject.toml) @@ -68,7 +72,7 @@ markdown==3.7 # pymdown-extensions markdown-include==0.8.1 # via pynxtools (pyproject.toml) -markupsafe==2.1.5 +markupsafe==3.0.2 # via # jinja2 # mkdocs @@ -88,15 +92,15 @@ mkdocs-click==0.8.1 # via pynxtools (pyproject.toml) mkdocs-get-deps==0.2.0 # via mkdocs -mkdocs-macros-plugin==1.0.5 +mkdocs-macros-plugin==1.3.6 # via pynxtools (pyproject.toml) -mkdocs-material==9.5.34 +mkdocs-material==9.5.42 # via pynxtools (pyproject.toml) mkdocs-material-extensions==1.3.1 # via # pynxtools (pyproject.toml) # mkdocs-material -mypy==1.11.2 +mypy==1.12.1 # via pynxtools (pyproject.toml) mypy-extensions==1.0.0 # via mypy @@ -116,31 +120,34 @@ packaging==24.1 # via # matplotlib # mkdocs + # mkdocs-macros-plugin # pytest # xarray paginate==0.5.7 # via mkdocs-material -pandas==2.2.2 +pandas==2.2.3 # via # pynxtools (pyproject.toml) # xarray pathspec==0.12.1 - # via mkdocs -pillow==10.4.0 + # via + # mkdocs + # mkdocs-macros-plugin +pillow==11.0.0 # via matplotlib -platformdirs==4.3.2 +platformdirs==4.3.6 # via # mkdocs-get-deps # virtualenv pluggy==1.5.0 # via pytest -pre-commit==3.8.0 +pre-commit==4.0.1 # via pynxtools (pyproject.toml) pygments==2.18.0 # via mkdocs-material -pymdown-extensions==10.9 +pymdown-extensions==10.11.2 # via mkdocs-material -pyparsing==3.1.4 +pyparsing==3.2.0 # via matplotlib pytest==8.3.3 # via @@ -184,31 +191,33 @@ six==1.16.0 # python-dateutil structlog==24.4.0 # via pynxtools (pyproject.toml) -termcolor==2.4.0 +super-collections==0.5.3 + # via mkdocs-macros-plugin +termcolor==2.5.0 # via mkdocs-macros-plugin tomli==2.0.2 ; python_full_version == '3.11' # via coverage -types-pytz==2024.2.0.20240913 +types-pytz==2024.2.0.20241003 # via pynxtools (pyproject.toml) -types-pyyaml==6.0.12.20240808 +types-pyyaml==6.0.12.20240917 # via pynxtools (pyproject.toml) -types-requests==2.32.0.20240907 +types-requests==2.32.0.20241016 # via pynxtools (pyproject.toml) typing-extensions==4.12.2 # via mypy -tzdata==2024.1 +tzdata==2024.2 # via pandas urllib3==2.2.3 # via # requests # types-requests -uv==0.4.9 +uv==0.4.25 # via pynxtools (pyproject.toml) -virtualenv==20.26.4 +virtualenv==20.27.0 # via pre-commit -watchdog==5.0.2 +watchdog==5.0.3 # via mkdocs xarray==2024.9.0 # via pynxtools (pyproject.toml) -zipp==3.20.1 +zipp==3.20.2 # via importlib-metadata From b1064bc7f41acd31130634972afc3d4d17e3f47c Mon Sep 17 00:00:00 2001 From: sanbrock Date: Mon, 28 Oct 2024 22:33:28 +0100 Subject: [PATCH 13/22] carefull name generation for new BasicELN archive --- src/pynxtools/nomad/schema.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/pynxtools/nomad/schema.py b/src/pynxtools/nomad/schema.py index 98dd86f85..9aa755a3e 100644 --- a/src/pynxtools/nomad/schema.py +++ b/src/pynxtools/nomad/schema.py @@ -16,6 +16,7 @@ # limitations under the License. # +import hashlib import json import os import os.path @@ -905,8 +906,10 @@ def get_entry_reference(archive, f_name): EntityReference.normalize(self, archive, logger) if not self.reference: logger.info(f"{self.lab_id} to be created") - - f_name = f"{current_cls.__name__}_{self.lab_id}.archive.json" + f_name = re.split("([0-9a-zA-Z.]+)", self.lab_id)[1] + if len(f_name) != len(self.lab_id): + f_name = f_name + hashlib.md5(self.lab_id.encode()).hexdigest() + f_name = f"{current_cls.__name__}_{f_name}.archive.json" create_Entity(self.lab_id, archive, f_name) self.reference = get_entry_reference(archive, f_name) logger.info(f"{self.reference} - referenced directly") From 7a17d13385ed499eb33bdece88d58c76de908f79 Mon Sep 17 00:00:00 2001 From: sanbrock Date: Tue, 29 Oct 2024 02:16:04 +0100 Subject: [PATCH 14/22] processing NXroot, too --- src/pynxtools/nexus/nexus.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/pynxtools/nexus/nexus.py b/src/pynxtools/nexus/nexus.py index 679d4f94d..8188ac551 100644 --- a/src/pynxtools/nexus/nexus.py +++ b/src/pynxtools/nexus/nexus.py @@ -85,6 +85,11 @@ def decode_if_string( def get_nxdl_entry(hdf_info): """Get the nxdl application definition for an HDF5 node""" entry = hdf_info + if ( + "NX_class" in entry["hdf_node"].attrs.keys() + and decode_if_string(entry["hdf_node"].attrs["NX_class"]) == "NXroot" + ): + return "NXroot" while ( isinstance(entry["hdf_node"], h5py.Dataset) or "NX_class" not in entry["hdf_node"].attrs.keys() @@ -97,7 +102,7 @@ def get_nxdl_entry(hdf_info): nxdef = entry["hdf_node"]["definition"][()] return nxdef.decode() except KeyError: # 'NO Definition referenced' - return "NXentry" + return "NXroot" def get_nx_class_path(hdf_info): @@ -398,6 +403,8 @@ def get_inherited_hdf_nodes( path = hdf_path for pind in range(len(path)): + if len(path) == 1 and path[0] == "": + return (["NXroot"], ["/"], elist) hdf_info2 = [hdf_path, hdf_node, hdf_class_path] [ hdf_path, @@ -803,9 +810,7 @@ def not_yet_visited(self, root, name): def full_visit(self, root, hdf_node, name, func): """visiting recursivly all children, but avoiding endless cycles""" - # print(name) - if len(name) > 0: - func(name, hdf_node) + func(name, hdf_node) if isinstance(hdf_node, h5py.Group): for ch_name, child in hdf_node.items(): full_name = ch_name if len(name) == 0 else name + "/" + ch_name From 96477620e4a3ffa65cf20161ea64889643539f19 Mon Sep 17 00:00:00 2001 From: sanbrock Date: Tue, 29 Oct 2024 02:29:54 +0100 Subject: [PATCH 15/22] fix nexus test file --- tests/data/nexus/Ref_nexus_test.log | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/tests/data/nexus/Ref_nexus_test.log b/tests/data/nexus/Ref_nexus_test.log index bd8b70692..d37370392 100644 --- a/tests/data/nexus/Ref_nexus_test.log +++ b/tests/data/nexus/Ref_nexus_test.log @@ -1,3 +1,32 @@ +DEBUG - ===== GROUP (// [NO NXentry found::]): +DEBUG - classpath: None +DEBUG - NOT IN SCHEMA +DEBUG - +DEBUG - ===== ATTRS (//@HDF5_Version) +DEBUG - value: 1.10.5 +DEBUG - classpath: None +DEBUG - NOT IN SCHEMA +DEBUG - +DEBUG - ===== ATTRS (//@file_name) +DEBUG - value: /home/tommaso/Desktop/NeXus/Test/201805_WSe2_arpes.nxs +DEBUG - classpath: None +DEBUG - NOT IN SCHEMA +DEBUG - +DEBUG - ===== ATTRS (//@file_time) +DEBUG - value: 2020-06-04T19:19:48.464472 +DEBUG - classpath: None +DEBUG - NOT IN SCHEMA +DEBUG - +DEBUG - ===== ATTRS (//@h5py_version) +DEBUG - value: 2.10.0 +DEBUG - classpath: None +DEBUG - NOT IN SCHEMA +DEBUG - +DEBUG - ===== ATTRS (//@nexusformat_version) +DEBUG - value: 0.5.2 +DEBUG - classpath: None +DEBUG - NOT IN SCHEMA +DEBUG - DEBUG - ===== GROUP (//entry [NXarpes::/NXentry]): DEBUG - classpath: ['NXentry'] DEBUG - classes: From 443c2092d1ef30abcaad4105ca2152abebfdd088 Mon Sep 17 00:00:00 2001 From: sanbrock Date: Tue, 29 Oct 2024 09:56:40 +0100 Subject: [PATCH 16/22] populate NXroot and its attributes --- src/pynxtools/nexus/nexus.py | 2 +- src/pynxtools/nomad/parser.py | 20 +++++++++++++------- src/pynxtools/nomad/schema.py | 4 +++- 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/src/pynxtools/nexus/nexus.py b/src/pynxtools/nexus/nexus.py index 8188ac551..177074df4 100644 --- a/src/pynxtools/nexus/nexus.py +++ b/src/pynxtools/nexus/nexus.py @@ -404,7 +404,7 @@ def get_inherited_hdf_nodes( for pind in range(len(path)): if len(path) == 1 and path[0] == "": - return (["NXroot"], ["/"], elist) + return ([""], ["/"], elist) hdf_info2 = [hdf_path, hdf_node, hdf_class_path] [ hdf_path, diff --git a/src/pynxtools/nomad/parser.py b/src/pynxtools/nomad/parser.py index ea3dd3c3e..67a533381 100644 --- a/src/pynxtools/nomad/parser.py +++ b/src/pynxtools/nomad/parser.py @@ -159,15 +159,21 @@ def _collect_class(self, current: MSection): self._sample_class_refs[class_name].append(current) def _populate_data( - self, depth: int, nx_path: list, nx_def: str, hdf_node, current: MSection + self, depth: int, nx_path: list, nx_def: str, hdf_node, current: MSection, attr ): """ Populate attributes and fields """ - if depth < len(nx_path): + if attr: # it is an attribute of either field or group - nx_attr = nx_path[depth] - nx_parent: ET.Element = nx_path[depth - 1] + if nx_path[0] == "/": + nx_attr = nx_path[1] + nx_parent = nx_attr.getparent() + nx_root = True + else: + nx_root = False + nx_attr = nx_path[depth] + nx_parent: ET.Element = nx_path[depth - 1] if isinstance(nx_attr, str): if nx_attr != "units": @@ -191,7 +197,7 @@ def _populate_data( current = _to_section(attr_name, nx_def, nx_attr, current) try: - if nx_parent.tag.endswith("group"): + if nx_root or nx_parent.tag.endswith("group"): current.m_set_section_attribute(attr_name, attr_value) else: parent_html_name = nx_path[-2].get("name") @@ -323,7 +329,7 @@ def __nexus_populate(self, params: dict, attr=None): # pylint: disable=W0613 if nx_def is not None: nx_def = rename_nx_for_nomad(nx_def) - if nx_path is None: + if nx_path is None or nx_path == "/": return current: MSection = _to_section(None, nx_def, None, self.nx_root) @@ -340,7 +346,7 @@ def __nexus_populate(self, params: dict, attr=None): # pylint: disable=W0613 if nx_node.tag.endswith("group"): current.m_set_section_attribute("m_nx_data_path", current_hdf_path) current.m_set_section_attribute("m_nx_data_file", self.nxs_fname) - self._populate_data(depth, nx_path, nx_def, hdf_node, current) + self._populate_data(depth, nx_path, nx_def, hdf_node, current, attr) def get_sub_element_names(self, elem: MSection): return elem.m_def.all_aliases.keys() diff --git a/src/pynxtools/nomad/schema.py b/src/pynxtools/nomad/schema.py index 9aa755a3e..43a6639c8 100644 --- a/src/pynxtools/nomad/schema.py +++ b/src/pynxtools/nomad/schema.py @@ -745,7 +745,9 @@ def __create_package_from_nxdl_directories(nexus_section: Section) -> Package: for section in sections: package.section_definitions.append(section) - if section.nx_category == "application": + if section.nx_category == "application" or ( + section.nx_category == "base" and section.nx_name == "NXroot" + ): nexus_section.sub_sections.append( SubSection(section_def=section, name=section.name) ) From 1f0ff23ce351586a3d009d58091b271a461edba5 Mon Sep 17 00:00:00 2001 From: sanbrock Date: Tue, 29 Oct 2024 10:11:45 +0100 Subject: [PATCH 17/22] linting --- src/pynxtools/nomad/parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pynxtools/nomad/parser.py b/src/pynxtools/nomad/parser.py index 67a533381..cb56de77f 100644 --- a/src/pynxtools/nomad/parser.py +++ b/src/pynxtools/nomad/parser.py @@ -166,14 +166,14 @@ def _populate_data( """ if attr: # it is an attribute of either field or group + nx_root = False if nx_path[0] == "/": nx_attr = nx_path[1] nx_parent = nx_attr.getparent() nx_root = True else: - nx_root = False nx_attr = nx_path[depth] - nx_parent: ET.Element = nx_path[depth - 1] + nx_parent = nx_path[depth - 1] if isinstance(nx_attr, str): if nx_attr != "units": From 1c8f05cf7e356a94890b65c370b885f3b634ae0e Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Thu, 31 Oct 2024 10:32:50 +0100 Subject: [PATCH 18/22] skip some NXroot attributes in generic test framework --- src/pynxtools/testing/nexus_conversion.py | 43 +++++++++++++++++++++-- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/src/pynxtools/testing/nexus_conversion.py b/src/pynxtools/testing/nexus_conversion.py index ca732c413..cc5d62ea1 100644 --- a/src/pynxtools/testing/nexus_conversion.py +++ b/src/pynxtools/testing/nexus_conversion.py @@ -14,7 +14,10 @@ from pynxtools.dataconverter.convert import get_reader, transfer_data_into_template -from pynxtools.dataconverter.helpers import get_nxdl_root_and_path +from pynxtools.dataconverter.helpers import ( + get_nxdl_root_and_path, + add_default_root_attributes, +) from pynxtools.dataconverter.validation import validate_dict_against from pynxtools.dataconverter.writer import Writer from pynxtools.nexus.nexus import HandleNexus @@ -115,6 +118,9 @@ def convert_to_nexus( ) assert self.caplog.text == "" + add_default_root_attributes( + data=read_data, filename=os.path.basename(self.created_nexus) + ) Writer(read_data, nxdl_file, self.created_nexus).write() if NOMAD_AVAILABLE: @@ -133,7 +139,19 @@ def check_reproducibility_of_nexus(self): IGNORE_LINES = [ "DEBUG - value: v", "DEBUG - value: https://github.com/FAIRmat-NFDI/nexus_definitions/blob/", + "DEBUG - ===== GROUP (// [NXroot::]):", ] + SECTION_IGNORE = { + "ATTRS (//@file_name)": ["DEBUG - value:"], + "ATTRS (//@file_time)": ["DEBUG - value:"], + "ATTRS (//@file_update_time)": ["DEBUG - value:"], + "ATTRS (//@h5py_version)": ["DEBUG - value:"], + } + + section = None + section_ignore_lines = [] + section_separator = "DEBUG - ===== " + ref_log = get_log_file(self.ref_nexus_file, "ref_nexus.log", self.tmp_path) gen_log = get_log_file(self.created_nexus, "gen_nexus.log", self.tmp_path) with open(gen_log, "r", encoding="utf-8") as gen, open( @@ -142,14 +160,33 @@ def check_reproducibility_of_nexus(self): gen_lines = gen.readlines() ref_lines = ref.readlines() if len(gen_lines) != len(ref_lines): - assert False, "Log files are different" + assert False, ( + f"Log files are different: mismatched line counts. " + f"Generated file has {len(gen_lines)} lines, " + f"while reference file has {len(ref_lines)} lines." + ) for ind, (gen_l, ref_l) in enumerate(zip(gen_lines, ref_lines)): + skip_it = False + if gen_l.startswith(section_separator) and ref_l.startswith( + section_separator + ): + section = gen_l.rsplit(section_separator)[-1].strip() + section_ignore_lines = SECTION_IGNORE.get(section, []) if gen_l != ref_l: # skip ignored lines (mainly version conflicts) for ignore_line in IGNORE_LINES: if gen_l.startswith(ignore_line) and ref_l.startswith(ignore_line): + skip_it = True break - else: + if not skip_it: + # skip ignored lines for this section + for ignore_line in section_ignore_lines: + if gen_l.startswith(ignore_line) and ref_l.startswith( + ignore_line + ): + skip_it = True + break + if not skip_it: assert False, ( f"Log files are different at line {ind}" f" generated: {gen_l} \n referenced : {ref_l}" From c7d58a8c154a7abbca18dd6b28b8dfaae7490a28 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Thu, 31 Oct 2024 10:39:21 +0100 Subject: [PATCH 19/22] ignore HDF5 version attribute in NXroot --- src/pynxtools/testing/nexus_conversion.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/pynxtools/testing/nexus_conversion.py b/src/pynxtools/testing/nexus_conversion.py index cc5d62ea1..9a3e40fb7 100644 --- a/src/pynxtools/testing/nexus_conversion.py +++ b/src/pynxtools/testing/nexus_conversion.py @@ -142,6 +142,7 @@ def check_reproducibility_of_nexus(self): "DEBUG - ===== GROUP (// [NXroot::]):", ] SECTION_IGNORE = { + "ATTRS (//@HDF5_version)": ["DEBUG - value:"], "ATTRS (//@file_name)": ["DEBUG - value:"], "ATTRS (//@file_time)": ["DEBUG - value:"], "ATTRS (//@file_update_time)": ["DEBUG - value:"], From 3a9d9e32a6d4f259e7bc9ebf2e9a3b35e890d002 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Thu, 31 Oct 2024 12:16:16 +0100 Subject: [PATCH 20/22] clean up ref log comparison --- src/pynxtools/testing/nexus_conversion.py | 87 ++++++++++++----------- 1 file changed, 47 insertions(+), 40 deletions(-) diff --git a/src/pynxtools/testing/nexus_conversion.py b/src/pynxtools/testing/nexus_conversion.py index 9a3e40fb7..a55b8c378 100644 --- a/src/pynxtools/testing/nexus_conversion.py +++ b/src/pynxtools/testing/nexus_conversion.py @@ -148,47 +148,54 @@ def check_reproducibility_of_nexus(self): "ATTRS (//@file_update_time)": ["DEBUG - value:"], "ATTRS (//@h5py_version)": ["DEBUG - value:"], } + SECTION_SEPARATOR = "DEBUG - ===== " - section = None - section_ignore_lines = [] - section_separator = "DEBUG - ===== " - - ref_log = get_log_file(self.ref_nexus_file, "ref_nexus.log", self.tmp_path) - gen_log = get_log_file(self.created_nexus, "gen_nexus.log", self.tmp_path) - with open(gen_log, "r", encoding="utf-8") as gen, open( - ref_log, "r", encoding="utf-8" - ) as ref: - gen_lines = gen.readlines() - ref_lines = ref.readlines() - if len(gen_lines) != len(ref_lines): - assert False, ( - f"Log files are different: mismatched line counts. " - f"Generated file has {len(gen_lines)} lines, " - f"while reference file has {len(ref_lines)} lines." + def should_skip_line(gen_l: str, ref_l: str, ignore_lines: list[str]) -> bool: + """Check if both lines start with any ignored prefix.""" + return any( + gen_l.startswith(ignore) and ref_l.startswith(ignore) + for ignore in ignore_lines ) - for ind, (gen_l, ref_l) in enumerate(zip(gen_lines, ref_lines)): - skip_it = False - if gen_l.startswith(section_separator) and ref_l.startswith( - section_separator - ): - section = gen_l.rsplit(section_separator)[-1].strip() - section_ignore_lines = SECTION_IGNORE.get(section, []) - if gen_l != ref_l: - # skip ignored lines (mainly version conflicts) - for ignore_line in IGNORE_LINES: - if gen_l.startswith(ignore_line) and ref_l.startswith(ignore_line): - skip_it = True - break - if not skip_it: - # skip ignored lines for this section - for ignore_line in section_ignore_lines: - if gen_l.startswith(ignore_line) and ref_l.startswith( - ignore_line - ): - skip_it = True - break - if not skip_it: + + def load_logs( + gen_log_path: str, ref_log_path: str + ) -> tuple[list[str], list[str]]: + """Load log files and return their contents as lists of lines.""" + with open(gen_log_path, "r", encoding="utf-8") as gen, open( + ref_log_path, "r", encoding="utf-8" + ) as ref: + return gen.readlines(), ref.readlines() + + def compare_logs(gen_lines: list[str], ref_lines: list[str]) -> None: + """Compare log lines, ignoring specific differences.""" + if len(gen_lines) != len(ref_lines): + assert False, ( + f"Log files are different: mismatched line counts. " + f"Generated file has {len(gen_lines)} lines, " + f"while reference file has {len(ref_lines)} lines." + ) + + section_ignore_lines = [] + section = None + for ind, (gen_l, ref_l) in enumerate(zip(gen_lines, ref_lines)): + if gen_l.startswith(SECTION_SEPARATOR) and ref_l.startswith( + SECTION_SEPARATOR + ): + section = gen_l.rsplit(SECTION_SEPARATOR)[-1].strip() + section_ignore_lines = SECTION_IGNORE.get(section, []) + + # Compare lines if not in ignore list + if gen_l != ref_l and not should_skip_line( + gen_l, ref_l, IGNORE_LINES + section_ignore_lines + ): assert False, ( - f"Log files are different at line {ind}" - f" generated: {gen_l} \n referenced : {ref_l}" + f"Log files are different at line {ind}\n" + f"generated: {gen_l}\nreferenced: {ref_l}" ) + + ref_log_path = get_log_file(self.ref_nexus_file, "ref_nexus.log", self.tmp_path) + gen_log_path = get_log_file(self.created_nexus, "gen_nexus.log", self.tmp_path) + gen_lines, ref_lines = load_logs(gen_log_path, ref_log_path) + + # Compare logs + compare_logs(gen_lines, ref_lines) From 4af0896396b6778a8f9497d61f10f7c13707c008 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Thu, 31 Oct 2024 12:29:35 +0100 Subject: [PATCH 21/22] check against NOMAD develop branch again --- .github/workflows/pytest.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 1a14e2804..034091f82 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -36,7 +36,7 @@ jobs: - name: Install nomad if: "${{ matrix.python_version != '3.8' && matrix.python_version != '3.12'}}" run: | - uv pip install nomad-lab@git+https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR.git@Sprint_Nomad_BaseSection + uv pip install nomad-lab@git+https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR.git - name: Install pynx run: | uv pip install ".[dev]" From ae26766d19ccb7d722749cb60f1463bc6c0219e2 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Thu, 31 Oct 2024 12:45:46 +0100 Subject: [PATCH 22/22] update citation --- CITATION.cff | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CITATION.cff b/CITATION.cff index 974376912..1886951c7 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -4,7 +4,7 @@ message: If you use this software, please cite it using the metadata from this file. type: software -version: 0.7.4 +version: 0.8.0 authors: - given-names: Sherjeel family-names: Shabih