From 68a694ca6abb0b4d1a3ea04f74b7a23dbc53dc96 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Mon, 31 Mar 2025 16:15:38 +0200 Subject: [PATCH 01/17] remodel eln_mapper to use the NeXusTree --- src/pynxtools/eln_mapper/README.md | 8 +- src/pynxtools/eln_mapper/eln.py | 333 +++++++++--------- src/pynxtools/eln_mapper/eln_mapper.py | 81 ++++- src/pynxtools/eln_mapper/scheme_eln.py | 308 ---------------- tests/data/eln_mapper/eln.yaml | 29 +- .../data/eln_mapper/scan.scheme.archive.yaml | 259 ++++++++++++-- tests/eln_mapper/test_eln_mapper.py | 6 +- 7 files changed, 496 insertions(+), 528 deletions(-) delete mode 100644 src/pynxtools/eln_mapper/scheme_eln.py diff --git a/src/pynxtools/eln_mapper/README.md b/src/pynxtools/eln_mapper/README.md index 99b3b4369..47e7e1316 100644 --- a/src/pynxtools/eln_mapper/README.md +++ b/src/pynxtools/eln_mapper/README.md @@ -1,8 +1,12 @@ # ELN generator + This is a helper tool for generating ELN files that can be used to add metadata to the dataconverter routine. + Two types of ELN are supported (by passing the flag `eln-type`): -- **eln**: The simple ELN generator that can be used in a console or jupyter-notebook. -- **scheme_eln**: Scheme based ELN generator that can be used in NOMAD and the ELN can be used as a custom scheme in NOMAD. + +- **`reader`**: The simple ELN generator that can be used in a console or jupyter-notebook, e.g. by the `pynxtools` dataconverter. +- **`schema`**: Scheme based ELN generator that can be used in NOMAD and the ELN can be used as a custom scheme in NOMAD. Here you can find more information about the tool: + - [API documentation](https://fairmat-nfdi.github.io/pynxtools/reference/cli-api.html#generate_eln) \ No newline at end of file diff --git a/src/pynxtools/eln_mapper/eln.py b/src/pynxtools/eln_mapper/eln.py index 0c13928ff..e23d5693f 100644 --- a/src/pynxtools/eln_mapper/eln.py +++ b/src/pynxtools/eln_mapper/eln.py @@ -1,7 +1,3 @@ -"""For functions that directly or indirectly help to for rendering ELN. -Note that this not schema eln that is rendered to Nomad rather the eln that -is generated by schema eln.""" - # Copyright The NOMAD Authors. # # This file is part of NOMAD. See https://nomad-lab.eu for further info. @@ -21,175 +17,188 @@ import os import re -import xml.etree.ElementTree as ET -from typing import Any, Dict +import logging +from typing import Any, List, Dict, Optional import yaml -from pynxtools.dataconverter.helpers import generate_template_from_nxdl -from pynxtools.dataconverter.template import Template -from pynxtools.definitions.dev_tools.utils.nxdl_utils import get_nexus_definitions_path - - -def retrieve_nxdl_file(nexus_def: str) -> str: - """Retrive full path of nexus file. - - Parameters - ---------- - nexus_def : str - Name of nexus definition e.g. NXmpes - - Returns - ------- - str - Returns full path of file e.g. /NXmpes.nxdl.xml - - Raises - ------ - ValueError - Need correct definition name, e.g. NXmpes not NXmpes.nxdl.xml - """ - definition_path = get_nexus_definitions_path() - - def_path = os.path.join( - definition_path, "contributed_definitions", f"{nexus_def}.nxdl.xml" - ) - if os.path.exists(def_path): - return def_path - - def_path = os.path.join( - definition_path, "base_definitions", f"{nexus_def}.nxdl.xml" - ) - - if os.path.exists(def_path): - return def_path - - def_path = os.path.join(definition_path, "applications", f"{nexus_def}.nxdl.xml") - if os.path.exists(def_path): - return def_path +from pynxtools.dataconverter.helpers import convert_data_converter_dict_to_nxdl_path +from pynxtools.dataconverter.nexus_tree import ( + NexusEntity, + NexusGroup, + NexusNode, + generate_tree_from, +) - raise ValueError( - "Incorrect definition is rendered, try with correct definition name." - ) +logger = logging.getLogger("pynxtools") +NODES_TO_SKIP: List[str] = {"definition"} -def get_empty_template(nexus_def: str) -> Template: - """Generate eln in yaml file. - - Parameters - ---------- - nexus_def : str - Name of NeXus definition e.g. NXmpes - Return - ------ - Template +def clean_filters(filter_list: Optional[List[str]]) -> Optional[List[str]]: """ + Clean list of filters by converting keys from data converter style path" + to NXDL style path: + /ENTRY[entry]/sample -> /ENTRY/sample + """ + if filter_list is None: + return + return [convert_data_converter_dict_to_nxdl_path(key) for key in filter_list] - nxdl_file = retrieve_nxdl_file(nexus_def) - nxdl_root = ET.parse(nxdl_file).getroot() - template = Template() - generate_template_from_nxdl(nxdl_root, template) - - return template - - -def take_care_of_special_concepts(key: str): - """For some special concepts such as @units.""" - - def unit_concept(): - return {"value": None, "unit": None} - - if key == "@units": - return unit_concept() - - -def get_recursive_dict( - concatenated_key: str, recursive_dict: Dict[str, Any], level_to_skip: int -) -> None: - """Get recursive dict for concatenated string of keys. - Parameters - ---------- - concatenated_key : str - String of keys separated by slash - recursive_dict : dict - Dict to recursively stroring data. - level_to_skip : int - Integer to skip the level of hierarchical level - """ - # splitig keys like: '/entry[ENTRY]/position[POSITION]/xx'. - # skiping the first empty '' and top parts as directed by users. - key_li = concatenated_key.split("/")[level_to_skip + 1 :] - # list of key for special consideration - sp_key_li = ["@units"] - last_key = "" - last_dict = {} - for key in key_li: - if "[" in key and "/" not in key: - key = re.findall( - r"\[(.*?)\]", - key, - )[0].capitalize() - if not key: - continue - last_key = key - last_dict = recursive_dict - if key in recursive_dict: - if recursive_dict[key] is None: - recursive_dict[key] = {} - recursive_dict = recursive_dict[key] - - else: - if key in sp_key_li: - recursive_dict.update(take_care_of_special_concepts(key)) - else: - recursive_dict = recursive_dict[key] - else: - if key in sp_key_li: - recursive_dict.update(take_care_of_special_concepts(key)) - else: - recursive_dict[key] = {} - recursive_dict = recursive_dict[key] - # For special key cleaning parts occurs inside take_care_of_special_concepts func. - if last_key not in sp_key_li: - last_dict[last_key] = None - - -def generate_eln(nexus_def: str, eln_file: str = "", level_to_skip: int = 1) -> None: - """Genrate eln from application definition. +def _should_skip_iteration(node: NexusNode, filter_list: Optional[List[str]]) -> bool: + """Filter those nodes that are _not_ in filter_list. Parameters ---------- - nexus_def : str - _description_ - eln_file : str - _description_ - - Returns: - None + node : NexusNode + The node to investigate. """ - - template = get_empty_template(nexus_def) - recursive_dict: Dict[str, Any] = {} - for key, _ in template.items(): - get_recursive_dict(key, recursive_dict, level_to_skip) - - name_split = eln_file.rsplit(".") - if not eln_file: - if nexus_def[0:2] == "NX": - raw_name = nexus_def[2:] - eln_file = raw_name + ".yaml" - - elif len(name_split) == 1: - eln_file = eln_file + ".yaml" - - elif len(name_split) == 2 and name_split[1] == "yaml": - pass - else: - raise ValueError( - "Eln file should come with 'yaml' extension or without extension." - ) - - with open(eln_file, encoding="utf-8", mode="w") as eln_f: - yaml.dump(recursive_dict, sort_keys=False, stream=eln_f) + if filter_list is None: + return False + if node.get_path() in filter_list: + return False + return True + + +class ElnGenerator: + def __init__( + self, + nxdl: str, + output_file: Optional[str] = None, + skip_top_levels: int = 1, + optionality: Optional[str] = "required", + filter: Optional[List[str]] = None, + ) -> None: + self.nxdl = nxdl + self.output_file = output_file + self.skip_top_levels = skip_top_levels + self.optionality = optionality + self.filter = clean_filters(filter) + + self.out_file = self._generate_output_file_name(output_file) + self.recursive_dict: Dict[str, Any] = {} + + if self.skip_top_levels >= 1: + logger.warning( + f"The first {self.skip_top_levels} levels of the NeXus tree " + "are skipped, is this intentional?" + ) + + def _generate_output_file_name(self, output_file: str): + """ + Generate the output file name of the schema ELN generator. + + To be implemented by the different subclasses of ElnGenerator. + """ + return "" + + def _generate_eln_header(self) -> Dict: + """ + Generate a header for YAML ELN. + + Returns the header section of the ELN, which is to be filled from + the application definition. + To be implemented by the different subclasses of ElnGenerator. + """ + return self.recursive_dict + + def _construct_group_structure( + self, node: NexusGroup, recursive_dict: Dict, recursion_level: int + ) -> bool: + """ + Handle NeXus group. + + To be extended by the different subclasses of ElnGenerator. The return value indicates + where the subclass should continue with this function after the super() call. + """ + # Skip top levels in iteration + if recursion_level <= self.skip_top_levels: + self._recurse_tree(node, recursive_dict, recursion_level + 1) + return False # early exit + + if self.filter is not None and all( + _should_skip_iteration(child, self.filter) for child in node.children + ): + self._recurse_tree(node, recursive_dict, recursion_level + 1) + return False # early exit + + return True + + def _construct_entity_structure( + self, node: NexusEntity, recursive_dict: Dict, recursion_level: int + ) -> bool: + """Handle NeXus field or attribute. + + To be extended by the different subclasses of ElnGenerator. The return value indicates + where the subclass should continue with this function after the super() call. + """ + # Skip top levels in iteration + if recursion_level <= self.skip_top_levels: + self._recurse_tree(node, recursive_dict, recursion_level + 1) + return False # early exit + + if self.filter is not None and _should_skip_iteration(node, self.filter): + self._recurse_tree(node, recursive_dict, recursion_level + 1) + return False # early exit + + return True + + def _recurse_tree( + self, node: NexusNode, recursive_dict: Dict, recursion_level: int + ) -> None: + """Recurse the NeXus node and add the parsed elements to the recursive dict. + + Parameters + ---------- + node : NexusNode + NeXus node to recurse. + recursive_dict : Dict + A dict that store hierarchical structure of schema ELN. + recursion_level: int + Recursion level in the tree, used to (optionally) skip upper levels like NXentry + """ + + def _handle_unknown_type(node: NexusNode, section_dict: Dict): + # This should normally not happen if + # the handling map includes all types allowed in NexusNode.type + # Still, it's good to have a fallback + # TODO: Raise error or log the issue? + pass + + handling_map = { + "group": self._construct_group_structure, + "field": self._construct_entity_structure, + "attribute": self._construct_entity_structure, + } + + lvl_map = { + "required": ("required",), + "recommended": ("recommended", "required"), + "optional": ("optional", "recommended", "required"), + "all": ("optional", "recommended", "required"), + } + + for child in node.children: + if child.name in NODES_TO_SKIP: + continue + if child.optionality not in lvl_map[self.optionality]: + continue + + handling_map.get(child.type, _handle_unknown_type)( + child, recursive_dict, recursion_level + ) + + def _write_yaml(self): + """Write the final dict into a YAML file""" + with open(self.out_file, mode="w", encoding="utf-8") as out_f: + yaml.dump(self.recursive_dict, sort_keys=False, stream=out_f) + logger.info(f"Schema ELN file {self.out_file} was created successfully.") + + def generate_eln(self) -> None: + """Generate ELN file.""" + tree = generate_tree_from(self.nxdl) + + top_level_section = self._generate_eln_header() + self._recurse_tree(tree, top_level_section, recursion_level=0) + self._write_yaml() diff --git a/src/pynxtools/eln_mapper/eln_mapper.py b/src/pynxtools/eln_mapper/eln_mapper.py index 983ed3627..64c5417bf 100644 --- a/src/pynxtools/eln_mapper/eln_mapper.py +++ b/src/pynxtools/eln_mapper/eln_mapper.py @@ -16,9 +16,13 @@ # limitations under the License. # +from typing import Union, Optional +from pathlib import Path + import click -from pynxtools.eln_mapper.eln import generate_eln -from pynxtools.eln_mapper.scheme_eln import generate_scheme_eln + +from pynxtools.eln_mapper.reader_eln import ReaderElnGenerator +from pynxtools.eln_mapper.schema_eln import NomadElnGenerator @click.command() @@ -29,35 +33,86 @@ ) @click.option( "--skip-top-levels", - default=1, + default=0, required=False, type=int, show_default=True, help=( - "To skip the level of parent hierarchy level. E.g. for default 1 the part " + "To skip the level of parent hierarchy level. E.g. for default the part " "Entry[ENTRY] from /Entry[ENTRY]/Instrument[INSTRUMENT]/... will be skiped." ), ) @click.option( "--output-file", required=False, - default="eln_data", - help=("Name of file that is neede to generated output file."), + default=None, + help=("Name of file that is needed to generated output file."), ) @click.option( "--eln-type", required=True, - type=click.Choice(["eln", "scheme_eln"], case_sensitive=False), + type=click.Choice(["reader", "schema"], case_sensitive=False), default="eln", - help=("Choose a type of ELN output (eln or scheme_eln)."), + help=("Choose a type of ELN output (reader or schema)."), +) +@click.option( + "--optionality", + required=False, + type=click.Choice( + ["required", "recommended", "optional", "all"], case_sensitive=False + ), + default="required", + help=( + "Level of requiredness to generate. If any of ('required', 'recommended', 'optional', " + "only those concepts matching this requiredness level are created. If 'all', all optional " + "concepts from the base classes are also created." + ), ) -def get_eln(nxdl: str, skip_top_levels: int, output_file: str, eln_type: str): +@click.option( + "--filter-file", + required=False, + default=None, + help=( + "JSON configuration file to filter NeXus concepts (based on the presence of the '@eln' keyword). " + "This is a positive filter, i.e., all concepts in the filter file will be included in the ELN." + ), +) +def get_eln( + nxdl: str, + skip_top_levels: int, + output_file: Optional[str], + eln_type: str, + optionality: Optional[str], + filter_file: Optional[Union[str, Path]], +): """Helper tool for generating ELN files in YAML format.""" + filter = None + if filter_file: + filter = [] + from pynxtools.dataconverter.readers.utils import parse_flatten_json + + filter_dict = parse_flatten_json(filter_file) + for key, value in filter_dict.items(): + if isinstance(value, list): + if any( + isinstance(item, str) and item.startswith("@eln") for item in value + ): + filter += [key] + elif isinstance(value, str): + if value.startswith("@eln"): + filter += [key] + eln_type = eln_type.lower() - if eln_type == "eln": - generate_eln(nxdl, output_file, skip_top_levels) - elif eln_type == "scheme_eln": - generate_scheme_eln(nxdl, eln_file_name=output_file) + if eln_type == "reader": + eln_generator = ReaderElnGenerator( + nxdl, output_file, skip_top_levels, optionality, filter + ) + elif eln_type == "schema": + eln_generator = NomadElnGenerator( + nxdl, output_file, skip_top_levels, optionality, filter + ) + + eln_generator.generate_eln() if __name__ == "__main__": diff --git a/src/pynxtools/eln_mapper/scheme_eln.py b/src/pynxtools/eln_mapper/scheme_eln.py deleted file mode 100644 index 6c04b4eac..000000000 --- a/src/pynxtools/eln_mapper/scheme_eln.py +++ /dev/null @@ -1,308 +0,0 @@ -"""This module intended to generate schema eln which usually randeredto NOMAD.""" - -# Copyright The NOMAD Authors. -# -# This file is part of NOMAD. See https://nomad-lab.eu for further info. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from typing import Any, Dict - -import lxml.etree as ET -import yaml - -from pynxtools.dataconverter.helpers import remove_namespace_from_tag -from pynxtools.eln_mapper.eln import retrieve_nxdl_file - -NEXUS_TYPE_TO_NUMPY_TYPE = { - "NX_CHAR": { - "convert_typ": "str", - "component_nm": "StringEditQuantity", - "default_unit_display": "", - }, - "NX_BOOLEAN": { - "convert_typ": "bool", - "component_nm": "BoolEditQuantity", - "default_unit_display": "", - }, - "NX_DATE_TIME": { - "convert_typ": "Datetime", - "component_nm": "DateTimeEditQuantity", - "default_unit_display": "", - }, - "NX_FLOAT": { - "convert_typ": "np.float64", - "component_nm": "NumberEditQuantity", - "default_unit_display": "", - }, - "NX_INT": { - "convert_typ": "int", - "component_nm": "NumberEditQuantity", - "default_unit_display": "", - }, - "NX_NUMBER": { - "convert_typ": "np.float64", - "component_nm": "NumberEditQuantity", - "default_unit_display": "", - }, - "": { - "convert_typ": "", - "component_nm": "", - "default_unit_display": "", - }, -} - - -def construct_field_structure(fld_elem, quntities_dict): - """Construct field structure such as unit, value. - Parameters - ---------- - elem : _type_ - _description_ - quntities_dict : _type_ - _description_ - """ - elm_attr = fld_elem.attrib - fld_nm = elm_attr["name"].lower() - quntities_dict[fld_nm] = {} - fld_dict = quntities_dict[fld_nm] - - # handle type - if "type" in elm_attr: - nx_fld_typ = elm_attr["type"] - else: - nx_fld_typ = "NX_CHAR" - - if nx_fld_typ in NEXUS_TYPE_TO_NUMPY_TYPE: - cov_fld_typ = NEXUS_TYPE_TO_NUMPY_TYPE[nx_fld_typ]["convert_typ"] - - fld_dict["type"] = cov_fld_typ - if "units" in elm_attr: - fld_dict["unit"] = f"" - fld_dict["value"] = "" - - # handle m_annotation - m_annotation = { - "m_annotations": { - "eln": { - "component": NEXUS_TYPE_TO_NUMPY_TYPE[nx_fld_typ]["component_nm"], - "defaultDisplayUnit": ( - NEXUS_TYPE_TO_NUMPY_TYPE[nx_fld_typ]["default_unit_display"] - ), - } - } - } - fld_dict.update(m_annotation) - - # handle description - construct_decription(fld_elem, fld_dict) - - -def construct_decription(elm: ET._Element, concept_dict: Dict) -> None: - """Collect doc from concept doc.""" - desc_text = "" - for child_elm in elm: - tag = remove_namespace_from_tag(child_elm.tag) - if tag == "doc": - desc_text = child_elm.text - desc_text = " ".join([x.strip() for x in desc_text.split("\n")]) - break - - concept_dict["description"] = desc_text - - -def construct_group_structure(grp_elm: ET._Element, subsections: Dict) -> None: - """To construct group structure as follows: - : - section: - m_annotations: - eln: - overview: true - - Parameters - ---------- - elm : ET._Element - Group element - subsections : Dict - Dict to include group recursively - """ - - default_m_annot = {"m_annotations": {"eln": {"overview": True}}} - - elm_attrib = grp_elm.attrib - grp_desig = "" - if "name" in elm_attrib: - grp_desig = elm_attrib["name"].capitalize() - elif "type" in elm_attrib: - grp_desig = elm_attrib["type"][2:].capitalize() - - subsections[grp_desig] = {} - grp_dict = subsections[grp_desig] - - # add setion in group - grp_dict["section"] = {} - section = grp_dict["section"] - section.update(default_m_annot) - - # pass the grp elment for recursive search - scan_xml_element_recursively(grp_elm, section) - - -def _should_skip_iteration(elm: ET._Element) -> bool: - """Define some elements here that should be skipped. - - Parameters - ---------- - elm : ET._Element - The element to investigate to skip - """ - attr = elm.attrib - elm_type = "" - if "type" in attr: - elm_type = attr["type"] - if elm_type in ["NXentry"]: - return True - return False - - -def scan_xml_element_recursively( - nxdl_element: ET._Element, - recursive_dict: Dict, - root_name: str = "", - reader_name: str = "", - is_root: bool = False, -) -> None: - """Scan xml elements, and pass the element to the type of element handaler. - - Parameters - ---------- - nxdl_element : ET._Element - This xml element that will be scanned through the descendants. - recursive_dict : Dict - A dict that store hierarchical structure of scheme eln. - root_name : str, optional - Name of root that user want to see to name their application, e.g. MPES, - by default 'ROOT_NAME' - reader_name : Prefered name of the reader. - is_root : bool, optional - Declar the elment as root or not, by default False - """ - - if is_root: - # Note for later: crate a new function to handle root part - nxdl = "NX.nxdl" - recursive_dict[root_name] = { - "base_sections": [ - "nomad.datamodel.metainfo.eln.NexusDataConverter", - "nomad.datamodel.data.EntryData", - ] - } - - m_annotations: Dict = { - "m_annotations": { - "template": {"reader": reader_name, "nxdl": nxdl}, - "eln": {"hide": []}, - } - } - - recursive_dict[root_name].update(m_annotations) - - recursive_dict = recursive_dict[root_name] - - # Define quantities for taking care of field - quantities: Dict = None - subsections: Dict = None - for elm in nxdl_element: - tag = remove_namespace_from_tag(elm.tag) - # To skip NXentry group but only consider the child elments - if _should_skip_iteration(elm): - scan_xml_element_recursively(elm, recursive_dict) - continue - if tag == "field": - if quantities is None: - recursive_dict["quantities"] = {} - quantities = recursive_dict["quantities"] - construct_field_structure(elm, quantities) - if tag == "group": - if subsections is None: - recursive_dict["sub_sections"] = {} - subsections = recursive_dict["sub_sections"] - construct_group_structure(elm, subsections) - - -def get_eln_recursive_dict(recursive_dict: Dict, nexus_full_file: str) -> None: - """Develop a recursive dict that has hierarchical structure of scheme eln. - - Parameters - ---------- - recursive_dict : Dict - A dict that store hierarchical structure of scheme eln. - nexus_full_file : str - Full path of NeXus file e.g. /paNXmpes.nxdl.xml - """ - - nxdl_root = ET.parse(nexus_full_file).getroot() - root_name = ( - nxdl_root.attrib["name"][2:] if "name" in nxdl_root.attrib else "" - ) - recursive_dict["definitions"] = {"name": "", "sections": {}} - sections = recursive_dict["definitions"]["sections"] - - scan_xml_element_recursively(nxdl_root, sections, root_name=root_name, is_root=True) - - -def generate_scheme_eln(nexus_def: str, eln_file_name: str = None) -> None: - """Generate schema eln that should go to Nomad while running the reader. - The output file will be .scheme.archive.yaml - - Parameters - ---------- - nexus_def : str - Name of nexus definition e.g. NXmpes - eln_file_name : str - Name of output file e.g. mpes - - Returns: - None - """ - - file_parts: list = [] - out_file_ext = "scheme.archive.yaml" - raw_name = "" - out_file = "" - - nxdl_file = retrieve_nxdl_file(nexus_def) - - if eln_file_name is None: - # raw_name from e.g. //NXmpes.nxdl.xml - raw_name = nxdl_file.split("/")[-1].split(".")[0][2:] - out_file = ".".join([raw_name, out_file_ext]) - else: - file_parts = eln_file_name.split(".") - if len(file_parts) == 1: - raw_name = file_parts[0] - out_file = ".".join([raw_name, out_file_ext]) - elif len(file_parts) == 4 and ".".join(file_parts[1:]) == out_file_ext: - out_file = eln_file_name - elif nexus_def[0:2] == "NX": - raw_name = nexus_def[2:] - out_file = ".".join([raw_name, out_file_ext]) - else: - raise ValueError("Check for correct NeXus definition and output file name.") - - recursive_dict: Dict[str, Any] = {} - get_eln_recursive_dict(recursive_dict, nxdl_file) - - with open(out_file, mode="w", encoding="utf-8") as out_f: - yaml.dump(recursive_dict, sort_keys=False, stream=out_f) diff --git a/tests/data/eln_mapper/eln.yaml b/tests/data/eln_mapper/eln.yaml index ad42ae7d5..bf54b7ff5 100644 --- a/tests/data/eln_mapper/eln.yaml +++ b/tests/data/eln_mapper/eln.yaml @@ -1,14 +1,17 @@ -Data: - data: null - rotation_angle: null -Instrument: - Detector: - data: null -Monitor: - data: null -Sample: - rotation_angle: null -definition: null -end_time: null -start_time: null title: null +start_time: null +end_time: null +instrument: + detector: + data: + value: null + unit: null +sample: + rotation_angle: + value: null + unit: null +monitor: + data: + value: null + unit: null +data: null diff --git a/tests/data/eln_mapper/scan.scheme.archive.yaml b/tests/data/eln_mapper/scan.scheme.archive.yaml index c8544598e..0dd49d8cc 100644 --- a/tests/data/eln_mapper/scan.scheme.archive.yaml +++ b/tests/data/eln_mapper/scan.scheme.archive.yaml @@ -1,14 +1,14 @@ definitions: name: sections: - scan: + ELN for SCAN: base_sections: - - nomad.datamodel.metainfo.eln.NexusDataConverter + - pynxtools.nomad.dataconverter.NexusDataConverter - nomad.datamodel.data.EntryData m_annotations: template: reader: - nxdl: NX.nxdl + nxdl: NXscan eln: hide: [] quantities: @@ -17,77 +17,282 @@ definitions: m_annotations: eln: component: StringEditQuantity - defaultDisplayUnit: - description: '' + description: Extended title for entry start_time: type: Datetime m_annotations: eln: component: DateTimeEditQuantity - defaultDisplayUnit: - description: '' + description: Starting time of measurement end_time: type: Datetime m_annotations: eln: component: DateTimeEditQuantity - defaultDisplayUnit: - description: '' - definition: - type: str - m_annotations: - eln: - component: StringEditQuantity - defaultDisplayUnit: - description: Official NeXus NXDL schema to which this file conforms + description: Ending time of measurement sub_sections: - Instrument: + instrument: section: m_annotations: eln: overview: true + description: Collection of the components of the instrument or beamline. + Template of instrument descriptions comprising various beamline components. + Each component will also be a NeXus group defined by its distance from + the sample. Negative distances represent beamline components that are + before the sample while positive distances represent components that + are after the sample. This device allows the unique identification of + beamline components in a way that is valid for both reactor and pulsed + instrumentation. sub_sections: - Detector: + detector: section: m_annotations: eln: overview: true + description: A detector, detector bank, or multidetector. quantities: data: type: int + value: m_annotations: eln: component: NumberEditQuantity - defaultDisplayUnit: - description: '' - Sample: + description: Data values from the detector. The rank and dimension + ordering should follow a principle of slowest to fastest measurement + axes and may be explicitly specified in application definitions. + Mechanical scanning of objects (e.g. sample position/angle, + incident beam energy, etc) tends to be the slowest part of + an experiment and so any such scan axes should be allocated + to the first dimensions of the array. Note that in some cases + it may be useful to represent a 2D set of scan points as a + single scan-axis in the data array, especially if the scan + pattern doesn't fit a rectangular array nicely. Repetition + of an experiment in a time series tends to be used similar + to a slow scan axis and so will often be in the first dimension + of the data array. The next fastest axes are typically the + readout of the detector. A point detector will not add any + dimensions (as it is just a single value per scan point) to + the data array, a strip detector will add one dimension, an + imaging detector will add two dimensions (e.g. X, Y axes) + and detectors outputting higher dimensional data will add + the corresponding number of dimensions. Note that the detector + dimensions don't necessarily have to be written in order of + the actual readout speeds - the slowest to fastest rule principle + is only a guide. Finally, detectors that operate in a time-of-flight + mode, such as a neutron spectrometer or a silicon drift detector + (used for X-ray fluorescence) tend to have their dimension(s) + added to the last dimensions in the data array. The type of + each dimension should should follow the order of scan points, + detector pixels, then time-of-flight (i.e. spectroscopy, spectrometry). + The rank and dimension sizes (see symbol list) shown here + are merely illustrative of coordination between related datasets. + sample: section: m_annotations: eln: overview: true + description: Any information on the sample. This could include scanned + variables that are associated with one of the data dimensions, e.g. + the magnetic field, or logged data, e.g. monitored temperature vs elapsed + time. quantities: rotation_angle: type: np.float64 + unit: degree + value: m_annotations: eln: component: NumberEditQuantity - defaultDisplayUnit: - description: '' - Monitor: + defaultDisplayUnit: degree + description: Optional rotation angle for the case when the powder + diagram has been obtained through an omega-2theta scan like from + a traditional single detector powder diffractometer. Note, it is + recommended to use NXtransformations instead. + monitor: section: m_annotations: eln: overview: true + description: A monitor of incident beam data. It is similar to the :ref:`NXdata` + groups containing monitor data and its associated axis coordinates, + e.g. time_of_flight or wavelength in pulsed neutron instruments. However, + it may also include integrals, or scalar monitor counts, which are often + used in both in both pulsed and steady-state instrumentation. quantities: data: type: int + value: m_annotations: eln: component: NumberEditQuantity - defaultDisplayUnit: - description: '' - Data: + description: Monitor data + data: section: m_annotations: eln: overview: true + description: 'The data group .. note:: Before the NIAC2016 meeting [#]_, + at least one :ref:`NXdata` group was required in each :ref:`NXentry` + group. At the NIAC2016 meeting, it was decided to make :ref:`NXdata` + an optional group in :ref:`NXentry` groups for data files that do not + use an application definition. It is recommended strongly that all NeXus + data files provide a NXdata group. It is permissible to omit the NXdata + group only when defining the default plot is not practical or possible + from the available data. For example, neutron event data may not have + anything that makes a useful plot without extensive processing. Certain + application definitions override this decision and require an :ref:`NXdata` + group in the :ref:`NXentry` group. The ``minOccurs=0`` attribute in + the application definition will indicate the :ref:`NXdata` group is + optional, otherwise, it is required. .. [#] NIAC2016: https://www.nexusformat.org/NIAC2016.html, + https://github.com/nexusformat/NIAC/issues/16' + entry: + section: + m_annotations: + eln: + overview: true + description: (**required**) :ref:`NXentry` describes the measurement. + The top-level NeXus group which contains all the data and associated + information that comprise a single measurement. It is mandatory that + there is at least one group of this type in the NeXus file. + quantities: + title: + type: str + m_annotations: + eln: + component: StringEditQuantity + description: Extended title for entry + start_time: + type: Datetime + m_annotations: + eln: + component: DateTimeEditQuantity + description: Starting time of measurement + end_time: + type: Datetime + m_annotations: + eln: + component: DateTimeEditQuantity + description: Ending time of measurement + sub_sections: + instrument: + section: + m_annotations: + eln: + overview: true + description: Collection of the components of the instrument or beamline. + Template of instrument descriptions comprising various beamline + components. Each component will also be a NeXus group defined + by its distance from the sample. Negative distances represent + beamline components that are before the sample while positive + distances represent components that are after the sample. This + device allows the unique identification of beamline components + in a way that is valid for both reactor and pulsed instrumentation. + sub_sections: + detector: + section: + m_annotations: + eln: + overview: true + description: A detector, detector bank, or multidetector. + quantities: + data: + type: int + value: + m_annotations: + eln: + component: NumberEditQuantity + description: Data values from the detector. The rank and + dimension ordering should follow a principle of slowest + to fastest measurement axes and may be explicitly specified + in application definitions. Mechanical scanning of objects + (e.g. sample position/angle, incident beam energy, etc) + tends to be the slowest part of an experiment and so + any such scan axes should be allocated to the first + dimensions of the array. Note that in some cases it + may be useful to represent a 2D set of scan points as + a single scan-axis in the data array, especially if + the scan pattern doesn't fit a rectangular array nicely. + Repetition of an experiment in a time series tends to + be used similar to a slow scan axis and so will often + be in the first dimension of the data array. The next + fastest axes are typically the readout of the detector. + A point detector will not add any dimensions (as it + is just a single value per scan point) to the data array, + a strip detector will add one dimension, an imaging + detector will add two dimensions (e.g. X, Y axes) and + detectors outputting higher dimensional data will add + the corresponding number of dimensions. Note that the + detector dimensions don't necessarily have to be written + in order of the actual readout speeds - the slowest + to fastest rule principle is only a guide. Finally, + detectors that operate in a time-of-flight mode, such + as a neutron spectrometer or a silicon drift detector + (used for X-ray fluorescence) tend to have their dimension(s) + added to the last dimensions in the data array. The + type of each dimension should should follow the order + of scan points, detector pixels, then time-of-flight + (i.e. spectroscopy, spectrometry). The rank and dimension + sizes (see symbol list) shown here are merely illustrative + of coordination between related datasets. + sample: + section: + m_annotations: + eln: + overview: true + description: Any information on the sample. This could include scanned + variables that are associated with one of the data dimensions, + e.g. the magnetic field, or logged data, e.g. monitored temperature + vs elapsed time. + quantities: + rotation_angle: + type: np.float64 + unit: degree + value: + m_annotations: + eln: + component: NumberEditQuantity + defaultDisplayUnit: degree + description: Optional rotation angle for the case when the powder + diagram has been obtained through an omega-2theta scan like + from a traditional single detector powder diffractometer. + Note, it is recommended to use NXtransformations instead. + monitor: + section: + m_annotations: + eln: + overview: true + description: A monitor of incident beam data. It is similar to the + :ref:`NXdata` groups containing monitor data and its associated + axis coordinates, e.g. time_of_flight or wavelength in pulsed + neutron instruments. However, it may also include integrals, or + scalar monitor counts, which are often used in both in both pulsed + and steady-state instrumentation. + quantities: + data: + type: int + value: + m_annotations: + eln: + component: NumberEditQuantity + description: Monitor data + data: + section: + m_annotations: + eln: + overview: true + description: 'The data group .. note:: Before the NIAC2016 meeting + [#]_, at least one :ref:`NXdata` group was required in each :ref:`NXentry` + group. At the NIAC2016 meeting, it was decided to make :ref:`NXdata` + an optional group in :ref:`NXentry` groups for data files that + do not use an application definition. It is recommended strongly + that all NeXus data files provide a NXdata group. It is permissible + to omit the NXdata group only when defining the default plot is + not practical or possible from the available data. For example, + neutron event data may not have anything that makes a useful plot + without extensive processing. Certain application definitions + override this decision and require an :ref:`NXdata` group in the + :ref:`NXentry` group. The ``minOccurs=0`` attribute in the application + definition will indicate the :ref:`NXdata` group is optional, + otherwise, it is required. .. [#] NIAC2016: https://www.nexusformat.org/NIAC2016.html, + https://github.com/nexusformat/NIAC/issues/16' diff --git a/tests/eln_mapper/test_eln_mapper.py b/tests/eln_mapper/test_eln_mapper.py index 8fe8b7214..8a682eee7 100644 --- a/tests/eln_mapper/test_eln_mapper.py +++ b/tests/eln_mapper/test_eln_mapper.py @@ -62,11 +62,11 @@ def test_reader_eln(tmp_path): "--nxdl", "NXscan", "--skip-top-levels", - 1, + 0, "--output-file", test_file, "--eln-type", - "eln", + "reader", ], ) @@ -95,7 +95,7 @@ def test_scheme_eln(tmp_path): cli_run = testing.CliRunner() cli_run.invoke( eln_mapper.get_eln, - ["--nxdl", "NXscan", "--output-file", test_file, "--eln-type", "scheme_eln"], + ["--nxdl", "NXscan", "--output-file", test_file, "--eln-type", "schema"], ) with open(ref_file, encoding="utf-8", mode="r") as ref_f: ref_dict = yaml.safe_load(ref_f) From 83fa296160e50675a5bf2b34c52683f0870f742a Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Mon, 31 Mar 2025 16:40:14 +0200 Subject: [PATCH 02/17] do not generate root attributes in ELN --- src/pynxtools/dataconverter/nexus_tree.py | 13 +- src/pynxtools/eln_mapper/eln.py | 13 +- src/pynxtools/eln_mapper/eln_mapper.py | 2 +- src/pynxtools/eln_mapper/reader_eln.py | 153 +++++++++++ src/pynxtools/eln_mapper/schema_eln.py | 304 ++++++++++++++++++++++ tests/eln_mapper/test_eln_mapper.py | 2 +- 6 files changed, 477 insertions(+), 10 deletions(-) create mode 100644 src/pynxtools/eln_mapper/reader_eln.py create mode 100644 src/pynxtools/eln_mapper/schema_eln.py diff --git a/src/pynxtools/dataconverter/nexus_tree.py b/src/pynxtools/dataconverter/nexus_tree.py index 7eb8ed79b..0eafac87c 100644 --- a/src/pynxtools/dataconverter/nexus_tree.py +++ b/src/pynxtools/dataconverter/nexus_tree.py @@ -932,7 +932,7 @@ def populate_tree_from_parents(node: NexusNode): populate_tree_from_parents(child_node) -def generate_tree_from(appdef: str) -> NexusNode: +def generate_tree_from(appdef: str, set_root_attr: bool = True) -> NexusNode: """ Generates a NexusNode tree from an application definition. NexusNode is based on anytree nodes and anytree's functions can be used @@ -940,6 +940,7 @@ def generate_tree_from(appdef: str) -> NexusNode: Args: appdef (str): The application definition name to generate the NexusNode tree from. + set_root_attr (bool): Whether or not to set the root attributes. Returns: NexusNode: The tree representing the application definition. @@ -968,6 +969,7 @@ def add_children_to(parent: NexusNode, xml_elem: ET._Element) -> None: add_children_to(current_elem, child) appdef_xml_root, _ = get_nxdl_root_and_path(appdef) + global namespaces namespaces = {"nx": appdef_xml_root.nsmap[None]} @@ -985,10 +987,11 @@ def add_children_to(parent: NexusNode, xml_elem: ET._Element) -> None: inheritance=appdef_inheritance_chain, ) # Set root attributes - nx_root, _ = get_nxdl_root_and_path("NXroot") - for root_attrib in nx_root.findall("nx:attribute", namespaces=namespaces): - child = tree.add_node_from(root_attrib) - child.optionality = "optional" + if set_root_attr: + nx_root, _ = get_nxdl_root_and_path("NXroot") + for root_attrib in nx_root.findall("nx:attribute", namespaces=namespaces): + child = tree.add_node_from(root_attrib) + child.optionality = "optional" entry = appdef_xml_root.find("nx:group[@type='NXentry']", namespaces=namespaces) add_children_to(tree, entry) diff --git a/src/pynxtools/eln_mapper/eln.py b/src/pynxtools/eln_mapper/eln.py index e23d5693f..5d8a55a31 100644 --- a/src/pynxtools/eln_mapper/eln.py +++ b/src/pynxtools/eln_mapper/eln.py @@ -1,3 +1,5 @@ +"""Define general structure of the ELN mapper.""" + # Copyright The NOMAD Authors. # # This file is part of NOMAD. See https://nomad-lab.eu for further info. @@ -79,9 +81,14 @@ def __init__( self.out_file = self._generate_output_file_name(output_file) self.recursive_dict: Dict[str, Any] = {} - if self.skip_top_levels >= 1: + if self.skip_top_levels == 1: + logger.warning( + f"The first level below NXentry of the NeXus tree " + "are skipped, is this intentional?" + ) + elif self.skip_top_levels > 1: logger.warning( - f"The first {self.skip_top_levels} levels of the NeXus tree " + f"The first {self.skip_top_levels - 1} levels of the NeXus tree " "are skipped, is this intentional?" ) @@ -197,7 +204,7 @@ def _write_yaml(self): def generate_eln(self) -> None: """Generate ELN file.""" - tree = generate_tree_from(self.nxdl) + tree = generate_tree_from(self.nxdl, set_root_attr=False) top_level_section = self._generate_eln_header() self._recurse_tree(tree, top_level_section, recursion_level=0) diff --git a/src/pynxtools/eln_mapper/eln_mapper.py b/src/pynxtools/eln_mapper/eln_mapper.py index 64c5417bf..39b91778c 100644 --- a/src/pynxtools/eln_mapper/eln_mapper.py +++ b/src/pynxtools/eln_mapper/eln_mapper.py @@ -1,4 +1,4 @@ -"""This module Generate ELN in a hierarchical format according to NEXUS definition.""" +"""This module generates ELN file in a hierarchical format according to a NeXus application definition.""" # Copyright The NOMAD Authors. # # This file is part of NOMAD. See https://nomad-lab.eu for further info. diff --git a/src/pynxtools/eln_mapper/reader_eln.py b/src/pynxtools/eln_mapper/reader_eln.py new file mode 100644 index 000000000..05f9e4c55 --- /dev/null +++ b/src/pynxtools/eln_mapper/reader_eln.py @@ -0,0 +1,153 @@ +"""For functions that directly or indirectly help to for rendering ELN. +Note that this not schema eln that is rendered to Nomad rather the eln that +is generated by schema eln.""" +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import re +from typing import List, Dict + +from pynxtools.dataconverter.nexus_tree import ( + NexusEntity, + NexusGroup, + NexusNode, +) +from pynxtools.eln_mapper.eln import ElnGenerator + + +class ReaderElnGenerator(ElnGenerator): + """Class for creating YAML files to be used directly in the pynxtools dataconverter.""" + + def _generate_output_file_name(self, output_file: str): + """ + Generate the output file name of the schema ELN generator. + + The output file name will be: + - .eln_data.yaml or + - if output_file already ends on eln_data.yaml + + If no output_file is given, the output will be .eln_data.yaml, + where is the name of the application definition without the leading NX + (e.g., for NXmpes, the file is called mpes.scheme.archive.yaml). + + """ + file_parts: list = [] + out_file_ext = "eln.yaml" + raw_name = "" + out_file = "" + + if self.output_file is None: + out_file = ".".join([self.nxdl[2:], out_file_ext]) + else: + if output_file.endswith(out_file_ext): + out_file = self.output_file + else: + file_parts = output_file.split(".") + if len(file_parts) == 1: + raw_name = file_parts[0] + out_file = ".".join([raw_name, out_file_ext]) + elif len(file_parts) == 2 and ".".join(file_parts[1:]) == out_file_ext: + out_file = output_file + else: + raise ValueError( + "Check for correct NeXus definition and output file name." + ) + + return out_file + + def _construct_group_structure( + self, node: NexusGroup, recursive_dict: Dict, recursion_level: int + ) -> None: + """Handle NeXus group, to construct group structure as follows: + : + section: + m_annotations: + eln: + overview: true + + Parameters + ---------- + node: NexusGroup + NeXus group to recurse + recursive_dict : Dict + Dict into which the group is recursively added + recursion_level: int + Recursion level in the tree, used to (optionally) skip upper levels like NXentry + """ + if not super()._construct_group_structure( + node, recursive_dict, recursion_level + ): + return + + group_name = node.name + + if node.variadic: + # TODO: allow variadic names?! + group_name = group_name.lower() + + if not node.children: + recursive_dict[group_name] = None + return + + recursive_dict[group_name] = {} + + # pass the grp elment for recursive search + self._recurse_tree(node, recursive_dict[group_name], recursion_level + 1) + + def _construct_entity_structure( + self, node: NexusEntity, recursive_dict: Dict, recursion_level: int + ): + """Handle NeXus field or attribute, to construct structure like: + : + type: np.float64 (matching with the node's type) + unit: (matching with the node's unit) + m_annotations: + eln: + component: NumberEditQuantity (matching with the node's type) + defaultDisplayUnit: (matching with the node's unit) + description: node docs + + Parameters + ---------- + node: NexusEntity + NeXus field/attribute to recurse + recursive_dict : Dict + Dict into which the entity is recursively added + recursion_level: int + Recursion level in the tree, used to (optionally) skip upper levels like NXentry + """ + + if not super()._construct_entity_structure( + node, recursive_dict, recursion_level + ): + return + + entity_name = node.name.lower() + + if node.type == "attribute": + entity_name = f"/@{entity_name}" + + if not node.children and not node.unit: + recursive_dict[entity_name] = None + return + + recursive_dict[entity_name] = {} + + if node.unit: + recursive_dict[entity_name].update({"value": None, "unit": None}) + + self._recurse_tree(node, recursive_dict[entity_name], recursion_level + 1) diff --git a/src/pynxtools/eln_mapper/schema_eln.py b/src/pynxtools/eln_mapper/schema_eln.py new file mode 100644 index 000000000..12d920e36 --- /dev/null +++ b/src/pynxtools/eln_mapper/schema_eln.py @@ -0,0 +1,304 @@ +"""Generate schema ELN files which can be passed to NOMAD to define an ELN.""" + +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import re +from typing import List, Dict + +from pynxtools.dataconverter.nexus_tree import ( + NexusEntity, + NexusGroup, + NexusNode, +) +from pynxtools.eln_mapper.eln import ElnGenerator + +DEFAULT_UNITS: Dict[str, str | None] = { + "NX_ANGLE": "degree", + "NX_ANY": None, + "NX_AREA": "m**2", + "NX_CHARGE": "C", + "NX_COUNT": None, + "NX_CROSS_SECTION": "m**2", + "NX_CURRENT": "A", + "NX_DIMENSIONLESS": None, + "NX_EMITTANCE": "m * rad", + "NX_ENERGY": "eV", + "NX_FLUX": "1 / (m**2 * s)", + "NX_FREQUENCY": "Hz", + "NX_LENGTH": "m", + "NX_MASS": "kg", + "NX_MASS_DENSITY": "kg / m**3", + "NX_MOLECULAR_WEIGHT": "g / mol", + "NX_PERIOD": "s", + "NX_PER_AREA": "1 / m**2", + "NX_PER_LENGTH": "1 / m", + "NX_POWER": "W", + "NX_PRESSURE": "Pa", + "NX_PULSES": None, + "NX_SCATTERING_LENGTH_DENSITY": "m / m**3", + "NX_SOLID_ANGLE": "sr", + "NX_TEMPERATURE": "K", + "NX_TIME": "s", + "NX_TIME_OF_FLIGHT": "s", + "NX_TRANSFORMATION": None, # Unit is either m or degree or None + "NX_UNITLESS": "", # Explicitly unitless + "NX_VOLTAGE": "V", + "NX_VOLUME": "m**3", + "NX_WAVELENGTH": "nm", + "NX_WAVENUMBER": "1 / m", +} + +NEXUS_TYPE_TO_PYTHON_TYPE = { + "NX_CHAR": { + "convert_type": "str", + "component_name": "StringEditQuantity", + }, + "NX_BOOLEAN": { + "convert_type": "bool", + "component_name": "BoolEditQuantity", + }, + "NX_DATE_TIME": { + "convert_type": "Datetime", + "component_name": "DateTimeEditQuantity", + }, + "NX_FLOAT": { + "convert_type": "np.float64", + "component_name": "NumberEditQuantity", + }, + "NX_INT": { + "convert_type": "int", + "component_name": "NumberEditQuantity", + }, + "NX_NUMBER": { + "convert_type": "np.float64", + "component_name": "NumberEditQuantity", + }, + "": { + "convert_type": "", + "component_nm": "", + }, +} + + +def construct_description(node: NexusNode, concept_dict: Dict) -> None: + """Collect doc from concept doc (and inherited docs).""" + inherited_docstrings = node.get_docstring() + + for doc in inherited_docstrings[::-1]: + if doc: + doc = re.sub(r"\s+", " ", doc).strip() + concept_dict["description"] = doc + break + + +class NomadElnGenerator(ElnGenerator): + """Class for creating NOMAD ELN schemas from NeXus application definitions.""" + + def _generate_output_file_name(self, output_file: str): + """ + Generate the output file name of the schema ELN generator. + + The output file name will be: + - .scheme.archive.yaml or + - if output_file already ends on scheme.archive.yaml + + If no output_file is given, the output will be .scheme.archive.yaml, + where is the name of the application definition without the leading NX + (e.g., for NXmpes, the file is called mpes.scheme.archive.yaml). + + """ + file_parts: list = [] + out_file_ext = "scheme.archive.yaml" + raw_name = "" + out_file = "" + + if self.output_file is None: + out_file = ".".join([self.nxdl[2:], out_file_ext]) + else: + if output_file.endswith(out_file_ext): + out_file = self.output_file + else: + file_parts = output_file.split(".") + if len(file_parts) == 1: + raw_name = file_parts[0] + out_file = ".".join([raw_name, out_file_ext]) + elif len(file_parts) == 4 and ".".join(file_parts[1:]) == out_file_ext: + out_file = output_file + else: + raise ValueError( + "Check for correct NeXus definition and output file name." + ) + + return out_file + + def _generate_eln_header(self) -> Dict: + """Generate the header for the NOMAD ELN""" + + # Basic building blocks of ELN + self.recursive_dict["definitions"] = { + "name": "", + "sections": {}, + } + sections = self.recursive_dict["definitions"]["sections"] + + root_name = f"ELN for {self.nxdl.lstrip('NX').upper()}" + sections[root_name] = {} + + # Note for later: create a new function to handle root part + sections[root_name].update( + { + "base_sections": [ + "pynxtools.nomad.dataconverter.NexusDataConverter", + "nomad.datamodel.data.EntryData", + ] + } + ) + + m_annotations: Dict = { + "m_annotations": { + "template": {"reader": "", "nxdl": self.nxdl}, + "eln": {"hide": []}, + } + } + sections[root_name].update(m_annotations) + + return sections[root_name] + + def _construct_group_structure( + self, node: NexusGroup, recursive_dict: Dict, recursion_level: int + ) -> None: + """Handle NeXus group, to construct group structure as follows: + : + section: + m_annotations: + eln: + overview: true + + Parameters + ---------- + node: NexusGroup + NeXus group to recurse + recursive_dict : Dict + Dict into which the group is recursively added + recursion_level: int + Recursion level in the tree, used to (optionally) skip upper levels like NXentry + """ + if not super()._construct_group_structure( + node, recursive_dict, recursion_level + ): + return + + # if subsections is None: + if "sub_sections" not in recursive_dict: + recursive_dict["sub_sections"] = {} + subsections = recursive_dict["sub_sections"] + + default_m_annot = {"m_annotations": {"eln": {"overview": True}}} + + group_name = node.name + if node.variadic: + if node.name_type == "any": + group_name = ( + group_name.lower() + ) # this is just a suggestion for easier use + + subsections[group_name] = {} + group_dict = subsections[group_name] + + # add section in group + group_dict["section"] = {} + section = group_dict["section"] + section.update(default_m_annot) + + # handle description + construct_description(node, section) + + # pass the grp elment for recursive search + self._recurse_tree(node, section, recursion_level + 1) + + def _construct_entity_structure( + self, node: NexusEntity, recursive_dict: Dict, recursion_level: int + ): + """Handle NeXus field or attribute, to construct structure like: + : + type: np.float64 (matching with the node's type) + unit: (matching with the node's unit) + m_annotations: + eln: + component: NumberEditQuantity (matching with the node's type) + defaultDisplayUnit: (matching with the node's unit) + description: node docs + + Parameters + ---------- + node: NexusEntity + NeXus field/attribute to recurse + recursive_dict : Dict + Dict into which the entity is recursively added + recursion_level: int + Recursion level in the tree, used to (optionally) skip upper levels like NXentry + """ + if not super()._construct_entity_structure( + node, recursive_dict, recursion_level + ): + return + + if "quantities" not in recursive_dict: + recursive_dict["quantities"] = {} + quantities_dict = recursive_dict["quantities"] + + entity_name = node.name + if node.variadic: + if node.name_type == "any": + entity_name = ( + entity_name.lower() + ) # this is just a suggestion for easier use + + quantities_dict[entity_name] = {} + entity_dict = quantities_dict[entity_name] + + # handle type + nx_field_type = node.dtype + convert_dict = NEXUS_TYPE_TO_PYTHON_TYPE.get(nx_field_type) + + if convert_dict: + entity_type = convert_dict["convert_type"] + + entity_dict["type"] = entity_type + + unit = None + if node.unit: + unit = DEFAULT_UNITS.get(node.unit) + if unit: + entity_dict["unit"] = unit + entity_dict["value"] = "" + + # handle m_annotation + eln_dict = { + "component": convert_dict["component_name"], + } + if unit: + eln_dict["defaultDisplayUnit"] = unit + m_annotation = {"m_annotations": {"eln": eln_dict}} + + entity_dict.update(m_annotation) + + # handle description + construct_description(node, entity_dict) + + self._recurse_tree(node, entity_dict, recursion_level + 1) diff --git a/tests/eln_mapper/test_eln_mapper.py b/tests/eln_mapper/test_eln_mapper.py index 8a682eee7..c19e5a156 100644 --- a/tests/eln_mapper/test_eln_mapper.py +++ b/tests/eln_mapper/test_eln_mapper.py @@ -1,4 +1,4 @@ -"""This test is dedicated generate_eln converter tool.""" +"""This test is dedicated to the generate_eln converter tool.""" # Copyright The NOMAD Authors. # From 3c02e30eefef6bcc31fcffac570bb2e2d712c47e Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Mon, 31 Mar 2025 16:54:06 +0200 Subject: [PATCH 03/17] use abstract base classes --- src/pynxtools/eln_mapper/eln.py | 14 +++++++++----- src/pynxtools/eln_mapper/eln_mapper.py | 3 +++ 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/pynxtools/eln_mapper/eln.py b/src/pynxtools/eln_mapper/eln.py index 5d8a55a31..ab2b7e2bd 100644 --- a/src/pynxtools/eln_mapper/eln.py +++ b/src/pynxtools/eln_mapper/eln.py @@ -17,9 +17,9 @@ # limitations under the License. # -import os import re import logging +from abc import ABC, abstractmethod from typing import Any, List, Dict, Optional import yaml @@ -34,7 +34,7 @@ logger = logging.getLogger("pynxtools") -NODES_TO_SKIP: List[str] = {"definition"} +NODES_TO_SKIP: List[str] = ["definition"] def clean_filters(filter_list: Optional[List[str]]) -> Optional[List[str]]: @@ -44,7 +44,7 @@ def clean_filters(filter_list: Optional[List[str]]) -> Optional[List[str]]: /ENTRY[entry]/sample -> /ENTRY/sample """ if filter_list is None: - return + return None return [convert_data_converter_dict_to_nxdl_path(key) for key in filter_list] @@ -63,7 +63,7 @@ def _should_skip_iteration(node: NexusNode, filter_list: Optional[List[str]]) -> return True -class ElnGenerator: +class ElnGenerator(ABC): def __init__( self, nxdl: str, @@ -110,6 +110,7 @@ def _generate_eln_header(self) -> Dict: """ return self.recursive_dict + @abstractmethod def _construct_group_structure( self, node: NexusGroup, recursive_dict: Dict, recursion_level: int ) -> bool: @@ -132,6 +133,7 @@ def _construct_group_structure( return True + @abstractmethod def _construct_entity_structure( self, node: NexusEntity, recursive_dict: Dict, recursion_level: int ) -> bool: @@ -166,7 +168,9 @@ def _recurse_tree( Recursion level in the tree, used to (optionally) skip upper levels like NXentry """ - def _handle_unknown_type(node: NexusNode, section_dict: Dict): + def _handle_unknown_type( + node: NexusNode, section_dict: Dict, recursion_level: int + ): # This should normally not happen if # the handling map includes all types allowed in NexusNode.type # Still, it's good to have a fallback diff --git a/src/pynxtools/eln_mapper/eln_mapper.py b/src/pynxtools/eln_mapper/eln_mapper.py index 39b91778c..61f87da98 100644 --- a/src/pynxtools/eln_mapper/eln_mapper.py +++ b/src/pynxtools/eln_mapper/eln_mapper.py @@ -103,6 +103,9 @@ def get_eln( filter += [key] eln_type = eln_type.lower() + + eln_generator: Union[ReaderElnGenerator, NomadElnGenerator] + if eln_type == "reader": eln_generator = ReaderElnGenerator( nxdl, output_file, skip_top_levels, optionality, filter From 2a5b1256757a850c9e7157f8cd07ef5360304190 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Mon, 31 Mar 2025 17:12:10 +0200 Subject: [PATCH 04/17] bug fix for filter options --- src/pynxtools/eln_mapper/eln.py | 8 ++++---- src/pynxtools/eln_mapper/eln_mapper.py | 9 +++------ 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/src/pynxtools/eln_mapper/eln.py b/src/pynxtools/eln_mapper/eln.py index ab2b7e2bd..0d213b0a1 100644 --- a/src/pynxtools/eln_mapper/eln.py +++ b/src/pynxtools/eln_mapper/eln.py @@ -68,7 +68,7 @@ def __init__( self, nxdl: str, output_file: Optional[str] = None, - skip_top_levels: int = 1, + skip_top_levels: int = 0, optionality: Optional[str] = "required", filter: Optional[List[str]] = None, ) -> None: @@ -128,8 +128,9 @@ def _construct_group_structure( if self.filter is not None and all( _should_skip_iteration(child, self.filter) for child in node.children ): - self._recurse_tree(node, recursive_dict, recursion_level + 1) - return False # early exit + if not all([child.type == "group" for child in node.children]): + self._recurse_tree(node, recursive_dict, recursion_level) + return False # early exit return True @@ -187,7 +188,6 @@ def _handle_unknown_type( "required": ("required",), "recommended": ("recommended", "required"), "optional": ("optional", "recommended", "required"), - "all": ("optional", "recommended", "required"), } for child in node.children: diff --git a/src/pynxtools/eln_mapper/eln_mapper.py b/src/pynxtools/eln_mapper/eln_mapper.py index 61f87da98..310d80077 100644 --- a/src/pynxtools/eln_mapper/eln_mapper.py +++ b/src/pynxtools/eln_mapper/eln_mapper.py @@ -38,7 +38,7 @@ type=int, show_default=True, help=( - "To skip the level of parent hierarchy level. E.g. for default the part " + "To skip the level of parent hierarchy level. For example, by default the part " "Entry[ENTRY] from /Entry[ENTRY]/Instrument[INSTRUMENT]/... will be skiped." ), ) @@ -58,14 +58,11 @@ @click.option( "--optionality", required=False, - type=click.Choice( - ["required", "recommended", "optional", "all"], case_sensitive=False - ), + type=click.Choice(["required", "recommended", "optional"], case_sensitive=False), default="required", help=( "Level of requiredness to generate. If any of ('required', 'recommended', 'optional', " - "only those concepts matching this requiredness level are created. If 'all', all optional " - "concepts from the base classes are also created." + "only those concepts matching this requiredness level are created." ), ) @click.option( From cb00a6d2bc9571d91a64397354892c58c1d698b4 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Mon, 31 Mar 2025 17:19:38 +0200 Subject: [PATCH 05/17] fix for python>=3.9 --- src/pynxtools/eln_mapper/schema_eln.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pynxtools/eln_mapper/schema_eln.py b/src/pynxtools/eln_mapper/schema_eln.py index 12d920e36..27c55cdaf 100644 --- a/src/pynxtools/eln_mapper/schema_eln.py +++ b/src/pynxtools/eln_mapper/schema_eln.py @@ -18,7 +18,7 @@ # import re -from typing import List, Dict +from typing import List, Dict, Union from pynxtools.dataconverter.nexus_tree import ( NexusEntity, @@ -27,7 +27,7 @@ ) from pynxtools.eln_mapper.eln import ElnGenerator -DEFAULT_UNITS: Dict[str, str | None] = { +DEFAULT_UNITS: Dict[str, Union[str, None]] = { "NX_ANGLE": "degree", "NX_ANY": None, "NX_AREA": "m**2", From bd3a3dcd54d8a5b952deba0fdb2502f66c276da8 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Tue, 1 Apr 2025 08:46:41 +0200 Subject: [PATCH 06/17] map all NeXus types to NOMAD quantities --- src/pynxtools/eln_mapper/schema_eln.py | 83 ++++++++++---------------- 1 file changed, 32 insertions(+), 51 deletions(-) diff --git a/src/pynxtools/eln_mapper/schema_eln.py b/src/pynxtools/eln_mapper/schema_eln.py index 27c55cdaf..f1ac83bd3 100644 --- a/src/pynxtools/eln_mapper/schema_eln.py +++ b/src/pynxtools/eln_mapper/schema_eln.py @@ -27,6 +27,19 @@ ) from pynxtools.eln_mapper.eln import ElnGenerator +NEXUS_TO_NOMAD_QUANTITY = { + "NX_BINARY": ("bytes", "NumberEditQuantity"), + "NX_BOOLEAN": ("bool", "BoolEditQuantity"), + "NX_CHAR": ("str", "StringEditQuantity"), + "NX_CHAR_OR_NUMBER": ("np.float64", "NumberEditQuantity"), + "NX_COMPLEX": ("numpy.complex64", "NumberEditQuantity"), + "NX_DATE_TIME": ("Datetime", "DateTimeEditQuantity"), + "NX_FLOAT": ("np.float64", "NumberEditQuantity"), + "NX_INT": ("int", "NumberEditQuantity"), + "NX_NUMBER": ("np.float64", "NumberEditQuantity"), + "NX_POSINT": ("int", "NumberEditQuantity"), +} + DEFAULT_UNITS: Dict[str, Union[str, None]] = { "NX_ANGLE": "degree", "NX_ANY": None, @@ -63,37 +76,6 @@ "NX_WAVENUMBER": "1 / m", } -NEXUS_TYPE_TO_PYTHON_TYPE = { - "NX_CHAR": { - "convert_type": "str", - "component_name": "StringEditQuantity", - }, - "NX_BOOLEAN": { - "convert_type": "bool", - "component_name": "BoolEditQuantity", - }, - "NX_DATE_TIME": { - "convert_type": "Datetime", - "component_name": "DateTimeEditQuantity", - }, - "NX_FLOAT": { - "convert_type": "np.float64", - "component_name": "NumberEditQuantity", - }, - "NX_INT": { - "convert_type": "int", - "component_name": "NumberEditQuantity", - }, - "NX_NUMBER": { - "convert_type": "np.float64", - "component_name": "NumberEditQuantity", - }, - "": { - "convert_type": "", - "component_nm": "", - }, -} - def construct_description(node: NexusNode, concept_dict: Dict) -> None: """Collect doc from concept doc (and inherited docs).""" @@ -273,30 +255,29 @@ def _construct_entity_structure( entity_dict = quantities_dict[entity_name] # handle type - nx_field_type = node.dtype - convert_dict = NEXUS_TYPE_TO_PYTHON_TYPE.get(nx_field_type) - - if convert_dict: - entity_type = convert_dict["convert_type"] + default_types = ("str", "StringEditQuantity") + entity_type, component_name = NEXUS_TO_NOMAD_QUANTITY.get( + node.dtype, default_types + ) - entity_dict["type"] = entity_type + unit = None + if node.unit: + unit = DEFAULT_UNITS.get(node.unit) - unit = None - if node.unit: - unit = DEFAULT_UNITS.get(node.unit) - if unit: - entity_dict["unit"] = unit - entity_dict["value"] = "" + entity_dict["type"] = entity_type + if unit: + entity_dict["unit"] = unit + entity_dict["value"] = "" - # handle m_annotation - eln_dict = { - "component": convert_dict["component_name"], - } - if unit: - eln_dict["defaultDisplayUnit"] = unit - m_annotation = {"m_annotations": {"eln": eln_dict}} + # handle m_annotation + eln_dict = { + "component": component_name, + } + if unit: + eln_dict["defaultDisplayUnit"] = unit + m_annotation = {"m_annotations": {"eln": eln_dict}} - entity_dict.update(m_annotation) + entity_dict.update(m_annotation) # handle description construct_description(node, entity_dict) From 2d8371076d4933c5435e315d97f0ff6ecde86a37 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Tue, 1 Apr 2025 09:06:08 +0200 Subject: [PATCH 07/17] print a more informative error message in eln_mapper tests --- .../data/eln_mapper/scan.scheme.archive.yaml | 153 +----------------- tests/eln_mapper/test_eln_mapper.py | 41 ++++- 2 files changed, 36 insertions(+), 158 deletions(-) diff --git a/tests/data/eln_mapper/scan.scheme.archive.yaml b/tests/data/eln_mapper/scan.scheme.archive.yaml index 0dd49d8cc..90634a48f 100644 --- a/tests/data/eln_mapper/scan.scheme.archive.yaml +++ b/tests/data/eln_mapper/scan.scheme.archive.yaml @@ -14,18 +14,21 @@ definitions: quantities: title: type: str + value: m_annotations: eln: component: StringEditQuantity description: Extended title for entry start_time: type: Datetime + value: m_annotations: eln: component: DateTimeEditQuantity description: Starting time of measurement end_time: type: Datetime + value: m_annotations: eln: component: DateTimeEditQuantity @@ -146,153 +149,3 @@ definitions: the application definition will indicate the :ref:`NXdata` group is optional, otherwise, it is required. .. [#] NIAC2016: https://www.nexusformat.org/NIAC2016.html, https://github.com/nexusformat/NIAC/issues/16' - entry: - section: - m_annotations: - eln: - overview: true - description: (**required**) :ref:`NXentry` describes the measurement. - The top-level NeXus group which contains all the data and associated - information that comprise a single measurement. It is mandatory that - there is at least one group of this type in the NeXus file. - quantities: - title: - type: str - m_annotations: - eln: - component: StringEditQuantity - description: Extended title for entry - start_time: - type: Datetime - m_annotations: - eln: - component: DateTimeEditQuantity - description: Starting time of measurement - end_time: - type: Datetime - m_annotations: - eln: - component: DateTimeEditQuantity - description: Ending time of measurement - sub_sections: - instrument: - section: - m_annotations: - eln: - overview: true - description: Collection of the components of the instrument or beamline. - Template of instrument descriptions comprising various beamline - components. Each component will also be a NeXus group defined - by its distance from the sample. Negative distances represent - beamline components that are before the sample while positive - distances represent components that are after the sample. This - device allows the unique identification of beamline components - in a way that is valid for both reactor and pulsed instrumentation. - sub_sections: - detector: - section: - m_annotations: - eln: - overview: true - description: A detector, detector bank, or multidetector. - quantities: - data: - type: int - value: - m_annotations: - eln: - component: NumberEditQuantity - description: Data values from the detector. The rank and - dimension ordering should follow a principle of slowest - to fastest measurement axes and may be explicitly specified - in application definitions. Mechanical scanning of objects - (e.g. sample position/angle, incident beam energy, etc) - tends to be the slowest part of an experiment and so - any such scan axes should be allocated to the first - dimensions of the array. Note that in some cases it - may be useful to represent a 2D set of scan points as - a single scan-axis in the data array, especially if - the scan pattern doesn't fit a rectangular array nicely. - Repetition of an experiment in a time series tends to - be used similar to a slow scan axis and so will often - be in the first dimension of the data array. The next - fastest axes are typically the readout of the detector. - A point detector will not add any dimensions (as it - is just a single value per scan point) to the data array, - a strip detector will add one dimension, an imaging - detector will add two dimensions (e.g. X, Y axes) and - detectors outputting higher dimensional data will add - the corresponding number of dimensions. Note that the - detector dimensions don't necessarily have to be written - in order of the actual readout speeds - the slowest - to fastest rule principle is only a guide. Finally, - detectors that operate in a time-of-flight mode, such - as a neutron spectrometer or a silicon drift detector - (used for X-ray fluorescence) tend to have their dimension(s) - added to the last dimensions in the data array. The - type of each dimension should should follow the order - of scan points, detector pixels, then time-of-flight - (i.e. spectroscopy, spectrometry). The rank and dimension - sizes (see symbol list) shown here are merely illustrative - of coordination between related datasets. - sample: - section: - m_annotations: - eln: - overview: true - description: Any information on the sample. This could include scanned - variables that are associated with one of the data dimensions, - e.g. the magnetic field, or logged data, e.g. monitored temperature - vs elapsed time. - quantities: - rotation_angle: - type: np.float64 - unit: degree - value: - m_annotations: - eln: - component: NumberEditQuantity - defaultDisplayUnit: degree - description: Optional rotation angle for the case when the powder - diagram has been obtained through an omega-2theta scan like - from a traditional single detector powder diffractometer. - Note, it is recommended to use NXtransformations instead. - monitor: - section: - m_annotations: - eln: - overview: true - description: A monitor of incident beam data. It is similar to the - :ref:`NXdata` groups containing monitor data and its associated - axis coordinates, e.g. time_of_flight or wavelength in pulsed - neutron instruments. However, it may also include integrals, or - scalar monitor counts, which are often used in both in both pulsed - and steady-state instrumentation. - quantities: - data: - type: int - value: - m_annotations: - eln: - component: NumberEditQuantity - description: Monitor data - data: - section: - m_annotations: - eln: - overview: true - description: 'The data group .. note:: Before the NIAC2016 meeting - [#]_, at least one :ref:`NXdata` group was required in each :ref:`NXentry` - group. At the NIAC2016 meeting, it was decided to make :ref:`NXdata` - an optional group in :ref:`NXentry` groups for data files that - do not use an application definition. It is recommended strongly - that all NeXus data files provide a NXdata group. It is permissible - to omit the NXdata group only when defining the default plot is - not practical or possible from the available data. For example, - neutron event data may not have anything that makes a useful plot - without extensive processing. Certain application definitions - override this decision and require an :ref:`NXdata` group in the - :ref:`NXentry` group. The ``minOccurs=0`` attribute in the application - definition will indicate the :ref:`NXdata` group is optional, - otherwise, it is required. .. [#] NIAC2016: https://www.nexusformat.org/NIAC2016.html, - https://github.com/nexusformat/NIAC/issues/16' diff --git a/tests/eln_mapper/test_eln_mapper.py b/tests/eln_mapper/test_eln_mapper.py index c19e5a156..097772517 100644 --- a/tests/eln_mapper/test_eln_mapper.py +++ b/tests/eln_mapper/test_eln_mapper.py @@ -18,7 +18,7 @@ # import os -from typing import Dict +from typing import Dict, List, Tuple import yaml from click import testing @@ -26,20 +26,45 @@ from pynxtools.eln_mapper import eln_mapper -def check_keys_from_two_dict(dict1: Dict, dict2: Dict): - """Compare keys of two dicts. +def check_keys_from_two_dict(dict1: Dict, dict2: Dict, path: str = ""): + """Compare keys of two dicts and report all differences. Parameters ---------- dict1 : Dict - Dict-1 to compare the key with Dict-2 + First dictionary to compare. dict2 : Dict - Dict-2 to compare the key with Dict-1 + Second dictionary to compare. + path : str, optional + Current key path being checked (used for recursive calls). """ - for (key1, val1), (key2, val2) in zip(dict1.items(), dict2.items()): - assert key1 == key2, "Test and Ref yaml file have different keys." + differences: List[Tuple[str, str]] = [] + + keys1 = set(dict1.keys()) + keys2 = set(dict2.keys()) + + # Find missing and extra keys + missing_in_dict2 = keys1 - keys2 + missing_in_dict1 = keys2 - keys1 + + for key in missing_in_dict2: + differences.append((f"{path}.{key}".lstrip("."), "Missing in dict2")) + + for key in missing_in_dict1: + differences.append((f"{path}.{key}".lstrip("."), "Missing in dict1")) + + # Check common keys recursively + for key in keys1 & keys2: + val1, val2 = dict1[key], dict2[key] if isinstance(val1, dict) and isinstance(val2, dict): - check_keys_from_two_dict(val1, val2) + check_keys_from_two_dict(val1, val2, path=f"{path}.{key}".lstrip(".")) + + # Raise error if there are differences + if differences: + error_message = "Key mismatches found:\n" + "\n".join( + f"- {key}: {msg}" for key, msg in differences + ) + raise AssertionError(error_message) def test_reader_eln(tmp_path): From afa6cf4d14ffa8eb0a7f99d3e427afeba1c3b4a5 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Tue, 1 Apr 2025 12:04:27 +0200 Subject: [PATCH 08/17] use default readers to build valid ELN for direct upload --- src/pynxtools/dataconverter/nexus_tree.py | 79 +++++++++++++++++++++-- src/pynxtools/eln_mapper/schema_eln.py | 34 +++++++--- 2 files changed, 101 insertions(+), 12 deletions(-) diff --git a/src/pynxtools/dataconverter/nexus_tree.py b/src/pynxtools/dataconverter/nexus_tree.py index 0eafac87c..a6bd6e0a1 100644 --- a/src/pynxtools/dataconverter/nexus_tree.py +++ b/src/pynxtools/dataconverter/nexus_tree.py @@ -29,11 +29,12 @@ """ from functools import lru_cache, reduce -from typing import Any, List, Literal, Optional, Set, Tuple, Union +from typing import Any, List, Dict, Literal, Optional, Set, Tuple, Union import lxml.etree as ET from anytree.node.nodemixin import NodeMixin +from pynxtools import get_definitions_url from pynxtools.dataconverter.helpers import ( get_all_parents_for, get_nxdl_root_and_path, @@ -430,7 +431,7 @@ def required_fields_and_attrs_names( return req_children - def get_docstring(self, depth: Optional[int] = None) -> List[str]: + def get_docstring(self, depth: Optional[int] = None) -> Dict[str, str]: """ Gets the docstrings of the current node and its parents up to a certain depth. @@ -449,14 +450,84 @@ def get_docstring(self, depth: Optional[int] = None) -> List[str]: if depth is not None and depth < 0: raise ValueError("Depth must be a positive integer or None") - docstrings = [] + docstrings = {} for elem in self.inheritance[:depth][::-1]: doc = elem.find("nx:doc", namespaces=namespaces) + if doc is not None: - docstrings.append(doc.text) + name = elem.attrib.get("name") + if not name: + name = elem.attrib["type"][2:].upper() + docstrings[name] = doc.text return docstrings + # TODO: add a function to the the link to the documentation item + # def get_link(self) -> Optional[str]: + # """ + # Get documentation url + # """ + # def __get_documentation_url( + # xml_node: ET.Element, nx_type: Optional[str] + # ) -> Optional[str]: + # """ + # Get documentation url + # """ + # if nx_type is None: + # return None + + # anchor_segments = [] + # if nx_type != "class": + # anchor_segments.append(nx_type) + + # while True: + # nx_type = xml_node.get("type") + # if nx_type: + # nx_type = nx_type.replace("NX", "") + # segment = xml_node.get("name", nx_type) # type: ignore + # anchor_segments.append(segment.replace("_", "-")) + + # xml_parent = xml_node + # xml_node = __XML_PARENT_MAP.get(xml_node) + # if xml_node is None: + # break + + # definitions_url = get_definitions_url() + + # doc_base = __NX_DOC_BASES.get( + # definitions_url, "https://manual.nexusformat.org/classes" + # ) + # nx_package = xml_parent.get("nxdl_base").split("/")[-1] + # anchor = "-".join([name.lower() for name in reversed(anchor_segments)]) + # nx_file = anchor_segments[-1].replace("-", "_") + # return f"{doc_base}/{nx_package}/{nx_file}.html#{anchor}" + + # anchor_segments = [] + # if self.type != "class": + # anchor_segments.append(self.type) + + # while True: + # if nx_type: + # nx_type = nx_type.replace("NX", "") + # segment = xml_node.get("name", nx_type) # type: ignore + # anchor_segments.append(segment.replace("_", "-")) + + # for elem in self.inheritance[::-1]: + # xml_node = __XML_PARENT_MAP.get(xml_node) + # if xml_node is None: + # break + + # definitions_url = get_definitions_url() + + # doc_base = __NX_DOC_BASES.get( + # definitions_url, "https://manual.nexusformat.org/classes" + # ) + # nx_package = xml_parent.get("nxdl_base").split("/")[-1] + # anchor = "-".join([name.lower() for name in reversed(anchor_segments)]) + # nx_file = anchor_segments[-1].replace("-", "_") + # return f"{doc_base}/{nx_package}/{nx_file}.html#{anchor}" + # pass + def _build_inheritance_chain(self, xml_elem: ET._Element) -> List[ET._Element]: """ Builds the inheritance chain based on the given xml node and the inheritance diff --git a/src/pynxtools/eln_mapper/schema_eln.py b/src/pynxtools/eln_mapper/schema_eln.py index f1ac83bd3..3d8481559 100644 --- a/src/pynxtools/eln_mapper/schema_eln.py +++ b/src/pynxtools/eln_mapper/schema_eln.py @@ -18,7 +18,7 @@ # import re -from typing import List, Dict, Union +from typing import List, Dict, Union, Tuple from pynxtools.dataconverter.nexus_tree import ( NexusEntity, @@ -27,7 +27,7 @@ ) from pynxtools.eln_mapper.eln import ElnGenerator -NEXUS_TO_NOMAD_QUANTITY = { +NEXUS_TO_NOMAD_QUANTITY: Dict[str, Tuple[str]] = { "NX_BINARY": ("bytes", "NumberEditQuantity"), "NX_BOOLEAN": ("bool", "BoolEditQuantity"), "NX_CHAR": ("str", "StringEditQuantity"), @@ -38,6 +38,7 @@ "NX_INT": ("int", "NumberEditQuantity"), "NX_NUMBER": ("np.float64", "NumberEditQuantity"), "NX_POSINT": ("int", "NumberEditQuantity"), + "NX_UINT": ("int", "NumberEditQuantity"), } DEFAULT_UNITS: Dict[str, Union[str, None]] = { @@ -76,13 +77,28 @@ "NX_WAVENUMBER": "1 / m", } +DEFAULT_READER: Dict[str, str] = { + "NXafm": "spm", + "NXapm": "apm", + "NXellips": "ellips", + "NXem": "em", + "NXmpes": "mpes", + "NXxps": "xps", + "NXraman": "raman", + "NXspm": "spm", + "NXsts": "spm", + "NXstm": "spm", + "NXxps": "xps", + "NXxrd": "xrd", +} + def construct_description(node: NexusNode, concept_dict: Dict) -> None: """Collect doc from concept doc (and inherited docs).""" inherited_docstrings = node.get_docstring() - for doc in inherited_docstrings[::-1]: - if doc: + for key, doc in list(inherited_docstrings.items())[::-1]: + if doc is not None: doc = re.sub(r"\s+", " ", doc).strip() concept_dict["description"] = doc break @@ -133,12 +149,12 @@ def _generate_eln_header(self) -> Dict: # Basic building blocks of ELN self.recursive_dict["definitions"] = { - "name": "", + "name": f"{self.nxdl.lstrip('NX')} ELN data schema", "sections": {}, } sections = self.recursive_dict["definitions"]["sections"] - root_name = f"ELN for {self.nxdl.lstrip('NX').upper()}" + root_name = f"ELN for {self.nxdl.lstrip('NX')}" sections[root_name] = {} # Note for later: create a new function to handle root part @@ -151,9 +167,11 @@ def _generate_eln_header(self) -> Dict: } ) + reader = DEFAULT_READER.get(self.nxdl, "") + m_annotations: Dict = { "m_annotations": { - "template": {"reader": "", "nxdl": self.nxdl}, + "template": {"reader": reader, "nxdl": self.nxdl}, "eln": {"hide": []}, } } @@ -267,7 +285,7 @@ def _construct_entity_structure( entity_dict["type"] = entity_type if unit: entity_dict["unit"] = unit - entity_dict["value"] = "" + # entity_dict["value"] = "" # handle m_annotation eln_dict = { From e365888e3a05a47b0f15513841ccc5036fe6c2bf Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Wed, 2 Apr 2025 18:01:38 +0200 Subject: [PATCH 09/17] add links to generate ELN schema --- src/pynxtools/dataconverter/nexus_tree.py | 105 ++++++++-------------- src/pynxtools/eln_mapper/eln.py | 1 + src/pynxtools/eln_mapper/schema_eln.py | 5 +- 3 files changed, 43 insertions(+), 68 deletions(-) diff --git a/src/pynxtools/dataconverter/nexus_tree.py b/src/pynxtools/dataconverter/nexus_tree.py index a6bd6e0a1..72f7ca9f7 100644 --- a/src/pynxtools/dataconverter/nexus_tree.py +++ b/src/pynxtools/dataconverter/nexus_tree.py @@ -149,6 +149,8 @@ class NexusNode(NodeMixin): parent_of: List["NexusNode"]: The inverse of the above `is_a`. In the example case `DATA` `parent_of` `my_data`. + nxdl_base: str + Base of the NXDL file where the XML element for this node is defined """ name: str @@ -159,6 +161,7 @@ class NexusNode(NodeMixin): inheritance: List[ET._Element] is_a: List["NexusNode"] parent_of: List["NexusNode"] + nxdl_base: str def _set_optionality(self): """ @@ -191,12 +194,14 @@ def __init__( variadic: Optional[bool] = None, parent: Optional["NexusNode"] = None, inheritance: Optional[List[Any]] = None, + nxdl_base: Optional[str] = None, ) -> None: super().__init__() self.name = name self.type = type self.name_type = name_type self.optionality = optionality + self.nxdl_base = nxdl_base self.variadic = is_variadic(self.name, self.name_type) if variadic is not None: self.variadic = variadic @@ -462,71 +467,36 @@ def get_docstring(self, depth: Optional[int] = None) -> Dict[str, str]: return docstrings - # TODO: add a function to the the link to the documentation item - # def get_link(self) -> Optional[str]: - # """ - # Get documentation url - # """ - # def __get_documentation_url( - # xml_node: ET.Element, nx_type: Optional[str] - # ) -> Optional[str]: - # """ - # Get documentation url - # """ - # if nx_type is None: - # return None - - # anchor_segments = [] - # if nx_type != "class": - # anchor_segments.append(nx_type) - - # while True: - # nx_type = xml_node.get("type") - # if nx_type: - # nx_type = nx_type.replace("NX", "") - # segment = xml_node.get("name", nx_type) # type: ignore - # anchor_segments.append(segment.replace("_", "-")) - - # xml_parent = xml_node - # xml_node = __XML_PARENT_MAP.get(xml_node) - # if xml_node is None: - # break - - # definitions_url = get_definitions_url() - - # doc_base = __NX_DOC_BASES.get( - # definitions_url, "https://manual.nexusformat.org/classes" - # ) - # nx_package = xml_parent.get("nxdl_base").split("/")[-1] - # anchor = "-".join([name.lower() for name in reversed(anchor_segments)]) - # nx_file = anchor_segments[-1].replace("-", "_") - # return f"{doc_base}/{nx_package}/{nx_file}.html#{anchor}" - - # anchor_segments = [] - # if self.type != "class": - # anchor_segments.append(self.type) - - # while True: - # if nx_type: - # nx_type = nx_type.replace("NX", "") - # segment = xml_node.get("name", nx_type) # type: ignore - # anchor_segments.append(segment.replace("_", "-")) - - # for elem in self.inheritance[::-1]: - # xml_node = __XML_PARENT_MAP.get(xml_node) - # if xml_node is None: - # break - - # definitions_url = get_definitions_url() - - # doc_base = __NX_DOC_BASES.get( - # definitions_url, "https://manual.nexusformat.org/classes" - # ) - # nx_package = xml_parent.get("nxdl_base").split("/")[-1] - # anchor = "-".join([name.lower() for name in reversed(anchor_segments)]) - # nx_file = anchor_segments[-1].replace("-", "_") - # return f"{doc_base}/{nx_package}/{nx_file}.html#{anchor}" - # pass + def get_link(self) -> Optional[str]: + """ + Get documentation url + """ + from pynxtools.nomad.utils import NX_DOC_BASES + + anchor_segments = [self.type] + current_node = self + + while True: + if not current_node: + break + + segment = current_node.name + anchor_segments.append(current_node.name.replace("_", "-")) + current_node = current_node.parent + + definitions_url = get_definitions_url() + doc_base = NX_DOC_BASES.get( + definitions_url, "https://manual.nexusformat.org/classes" + ) + nx_file = self.nxdl_base.split("/definitions/")[-1].split(".nxdl.xml")[0] + + # add the name of the base file at the end, drop the appdef name + anchor_segments = anchor_segments[:-1] + anchor_segments += [self.nxdl_base.split("/")[-1].split(".nxdl.xml")[0].lower()] + + anchor = "-".join([name.lower() for name in reversed(anchor_segments)]) + + return f"{doc_base}/{nx_file}.html#{anchor}" def _build_inheritance_chain(self, xml_elem: ET._Element) -> List[ET._Element]: """ @@ -630,6 +600,7 @@ def add_node_from(self, xml_elem: ET._Element) -> Optional["NexusNode"]: name_type=name_type, type=tag, optionality=default_optionality, + nxdl_base=xml_elem.base, ) elif tag == "group": name = xml_elem.attrib.get("name") @@ -646,6 +617,7 @@ def add_node_from(self, xml_elem: ET._Element) -> Optional["NexusNode"]: nx_class=xml_elem.attrib["type"], inheritance=inheritance_chain, optionality=default_optionality, + nxdl_base=xml_elem.base, ) elif tag == "choice": current_elem = NexusChoice( @@ -653,6 +625,7 @@ def add_node_from(self, xml_elem: ET._Element) -> Optional["NexusNode"]: name=xml_elem.attrib["name"], name_type=name_type, optionality=default_optionality, + nxdl_base=xml_elem.base, ) else: # TODO: Tags: link @@ -1056,6 +1029,7 @@ def add_children_to(parent: NexusNode, xml_elem: ET._Element) -> None: variadic=False, parent=None, inheritance=appdef_inheritance_chain, + nxdl_base=appdef_xml_root.base, ) # Set root attributes if set_root_attr: @@ -1070,5 +1044,4 @@ def add_children_to(parent: NexusNode, xml_elem: ET._Element) -> None: # Add all fields and attributes from the parent appdefs if len(appdef_inheritance_chain) > 1: populate_tree_from_parents(tree) - return tree diff --git a/src/pynxtools/eln_mapper/eln.py b/src/pynxtools/eln_mapper/eln.py index 0d213b0a1..485c3a0dd 100644 --- a/src/pynxtools/eln_mapper/eln.py +++ b/src/pynxtools/eln_mapper/eln.py @@ -92,6 +92,7 @@ def __init__( "are skipped, is this intentional?" ) + @abstractmethod def _generate_output_file_name(self, output_file: str): """ Generate the output file name of the schema ELN generator. diff --git a/src/pynxtools/eln_mapper/schema_eln.py b/src/pynxtools/eln_mapper/schema_eln.py index 3d8481559..5f0303976 100644 --- a/src/pynxtools/eln_mapper/schema_eln.py +++ b/src/pynxtools/eln_mapper/schema_eln.py @@ -225,8 +225,9 @@ def _construct_group_structure( section = group_dict["section"] section.update(default_m_annot) - # handle description + # handle description and link construct_description(node, section) + section["links"] = [node.get_link()] # pass the grp elment for recursive search self._recurse_tree(node, section, recursion_level + 1) @@ -297,7 +298,7 @@ def _construct_entity_structure( entity_dict.update(m_annotation) - # handle description construct_description(node, entity_dict) + entity_dict["links"] = [node.get_link()] self._recurse_tree(node, entity_dict, recursion_level + 1) From 92338db6c063959a8b7eae7da2f2bfc9010a3932 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Wed, 2 Apr 2025 19:27:53 +0200 Subject: [PATCH 10/17] do not use deprecated m_annotations --- src/pynxtools/eln_mapper/eln.py | 9 +++++-- src/pynxtools/eln_mapper/reader_eln.py | 2 +- src/pynxtools/eln_mapper/schema_eln.py | 34 +++++++++++++------------- 3 files changed, 25 insertions(+), 20 deletions(-) diff --git a/src/pynxtools/eln_mapper/eln.py b/src/pynxtools/eln_mapper/eln.py index 485c3a0dd..06c2d578b 100644 --- a/src/pynxtools/eln_mapper/eln.py +++ b/src/pynxtools/eln_mapper/eln.py @@ -113,7 +113,10 @@ def _generate_eln_header(self) -> Dict: @abstractmethod def _construct_group_structure( - self, node: NexusGroup, recursive_dict: Dict, recursion_level: int + self, + node: NexusGroup, + recursive_dict: Dict, + recursion_level: int, ) -> bool: """ Handle NeXus group. @@ -129,7 +132,9 @@ def _construct_group_structure( if self.filter is not None and all( _should_skip_iteration(child, self.filter) for child in node.children ): - if not all([child.type == "group" for child in node.children]): + if not node.children or not all( + [child.type == "group" for child in node.children] + ): self._recurse_tree(node, recursive_dict, recursion_level) return False # early exit diff --git a/src/pynxtools/eln_mapper/reader_eln.py b/src/pynxtools/eln_mapper/reader_eln.py index 05f9e4c55..d1a43f2a4 100644 --- a/src/pynxtools/eln_mapper/reader_eln.py +++ b/src/pynxtools/eln_mapper/reader_eln.py @@ -89,7 +89,7 @@ def _construct_group_structure( Recursion level in the tree, used to (optionally) skip upper levels like NXentry """ if not super()._construct_group_structure( - node, recursive_dict, recursion_level + node, recursive_dict, recursion_level=True ): return diff --git a/src/pynxtools/eln_mapper/schema_eln.py b/src/pynxtools/eln_mapper/schema_eln.py index 5f0303976..36b551bc7 100644 --- a/src/pynxtools/eln_mapper/schema_eln.py +++ b/src/pynxtools/eln_mapper/schema_eln.py @@ -202,20 +202,16 @@ def _construct_group_structure( node, recursive_dict, recursion_level ): return - # if subsections is None: if "sub_sections" not in recursive_dict: recursive_dict["sub_sections"] = {} subsections = recursive_dict["sub_sections"] - default_m_annot = {"m_annotations": {"eln": {"overview": True}}} + m_annotations = {"m_annotations": {"display": {"visible": True}}} group_name = node.name - if node.variadic: - if node.name_type == "any": - group_name = ( - group_name.lower() - ) # this is just a suggestion for easier use + if node.name_type == "any": + group_name = group_name.lower() # this is just a suggestion for easier use subsections[group_name] = {} group_dict = subsections[group_name] @@ -223,7 +219,9 @@ def _construct_group_structure( # add section in group group_dict["section"] = {} section = group_dict["section"] - section.update(default_m_annot) + if node.variadic: + section["repeats"] = True + section.update(m_annotations) # handle description and link construct_description(node, section) @@ -284,19 +282,21 @@ def _construct_entity_structure( unit = DEFAULT_UNITS.get(node.unit) entity_dict["type"] = entity_type + + display_dict = {"visible": True} if unit: - entity_dict["unit"] = unit - # entity_dict["value"] = "" + display_dict["unit"] = unit - # handle m_annotation - eln_dict = { - "component": component_name, + m_annotations = { + "m_annotations": { + "eln": { + "component": component_name, + }, + "display": display_dict, + } } - if unit: - eln_dict["defaultDisplayUnit"] = unit - m_annotation = {"m_annotations": {"eln": eln_dict}} - entity_dict.update(m_annotation) + entity_dict.update(m_annotations) construct_description(node, entity_dict) entity_dict["links"] = [node.get_link()] From ff7f1be80c961bef1ed7b68ecc226d2e91c18168 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Wed, 2 Apr 2025 19:49:51 +0200 Subject: [PATCH 11/17] fix for tests --- src/pynxtools/eln_mapper/eln.py | 3 + src/pynxtools/eln_mapper/reader_eln.py | 4 +- .../{eln.yaml => scan.eln_data.yaml} | 0 .../data/eln_mapper/scan.scheme.archive.yaml | 72 +++++++++++++------ tests/eln_mapper/test_eln_mapper.py | 4 +- 5 files changed, 59 insertions(+), 24 deletions(-) rename tests/data/eln_mapper/{eln.yaml => scan.eln_data.yaml} (100%) diff --git a/src/pynxtools/eln_mapper/eln.py b/src/pynxtools/eln_mapper/eln.py index 06c2d578b..d6c3867e7 100644 --- a/src/pynxtools/eln_mapper/eln.py +++ b/src/pynxtools/eln_mapper/eln.py @@ -218,4 +218,7 @@ def generate_eln(self) -> None: top_level_section = self._generate_eln_header() self._recurse_tree(tree, top_level_section, recursion_level=0) + if not self.recursive_dict: + logger.error("Could not write YAML file as it would be empty!") + return self._write_yaml() diff --git a/src/pynxtools/eln_mapper/reader_eln.py b/src/pynxtools/eln_mapper/reader_eln.py index d1a43f2a4..cd0ac8357 100644 --- a/src/pynxtools/eln_mapper/reader_eln.py +++ b/src/pynxtools/eln_mapper/reader_eln.py @@ -46,7 +46,7 @@ def _generate_output_file_name(self, output_file: str): """ file_parts: list = [] - out_file_ext = "eln.yaml" + out_file_ext = "eln_data.yaml" raw_name = "" out_file = "" @@ -89,7 +89,7 @@ def _construct_group_structure( Recursion level in the tree, used to (optionally) skip upper levels like NXentry """ if not super()._construct_group_structure( - node, recursive_dict, recursion_level=True + node, recursive_dict, recursion_level ): return diff --git a/tests/data/eln_mapper/eln.yaml b/tests/data/eln_mapper/scan.eln_data.yaml similarity index 100% rename from tests/data/eln_mapper/eln.yaml rename to tests/data/eln_mapper/scan.eln_data.yaml diff --git a/tests/data/eln_mapper/scan.scheme.archive.yaml b/tests/data/eln_mapper/scan.scheme.archive.yaml index 90634a48f..97a18fe39 100644 --- a/tests/data/eln_mapper/scan.scheme.archive.yaml +++ b/tests/data/eln_mapper/scan.scheme.archive.yaml @@ -1,7 +1,7 @@ definitions: - name: + name: scan ELN data schema sections: - ELN for SCAN: + ELN for scan: base_sections: - pynxtools.nomad.dataconverter.NexusDataConverter - nomad.datamodel.data.EntryData @@ -14,31 +14,41 @@ definitions: quantities: title: type: str - value: m_annotations: eln: component: StringEditQuantity + display: + visible: true description: Extended title for entry + links: + - https://fairmat-nfdi.github.io/nexus_definitions/classes/applications/NXscan.html#nxscan-entry-title-field start_time: type: Datetime - value: m_annotations: eln: component: DateTimeEditQuantity + display: + visible: true description: Starting time of measurement + links: + - https://fairmat-nfdi.github.io/nexus_definitions/classes/applications/NXscan.html#nxscan-entry-start-time-field end_time: type: Datetime - value: m_annotations: eln: component: DateTimeEditQuantity + display: + visible: true description: Ending time of measurement + links: + - https://fairmat-nfdi.github.io/nexus_definitions/classes/applications/NXscan.html#nxscan-entry-end-time-field sub_sections: instrument: section: + repeats: true m_annotations: - eln: - overview: true + display: + visible: true description: Collection of the components of the instrument or beamline. Template of instrument descriptions comprising various beamline components. Each component will also be a NeXus group defined by its distance from @@ -47,20 +57,26 @@ definitions: are after the sample. This device allows the unique identification of beamline components in a way that is valid for both reactor and pulsed instrumentation. + links: + - https://fairmat-nfdi.github.io/nexus_definitions/classes/applications/NXscan.html#nxscan-entry-instrument-group sub_sections: detector: section: + repeats: true m_annotations: - eln: - overview: true + display: + visible: true description: A detector, detector bank, or multidetector. + links: + - https://fairmat-nfdi.github.io/nexus_definitions/classes/applications/NXscan.html#nxscan-entry-instrument-detector-group quantities: data: type: int - value: m_annotations: eln: component: NumberEditQuantity + display: + visible: true description: Data values from the detector. The rank and dimension ordering should follow a principle of slowest to fastest measurement axes and may be explicitly specified in application definitions. @@ -90,51 +106,65 @@ definitions: detector pixels, then time-of-flight (i.e. spectroscopy, spectrometry). The rank and dimension sizes (see symbol list) shown here are merely illustrative of coordination between related datasets. + links: + - https://fairmat-nfdi.github.io/nexus_definitions/classes/applications/NXscan.html#nxscan-entry-instrument-detector-data-field sample: section: + repeats: true m_annotations: - eln: - overview: true + display: + visible: true description: Any information on the sample. This could include scanned variables that are associated with one of the data dimensions, e.g. the magnetic field, or logged data, e.g. monitored temperature vs elapsed time. + links: + - https://fairmat-nfdi.github.io/nexus_definitions/classes/applications/NXscan.html#nxscan-entry-sample-group quantities: rotation_angle: type: np.float64 - unit: degree - value: m_annotations: eln: component: NumberEditQuantity - defaultDisplayUnit: degree + display: + visible: true + unit: degree description: Optional rotation angle for the case when the powder diagram has been obtained through an omega-2theta scan like from a traditional single detector powder diffractometer. Note, it is recommended to use NXtransformations instead. + links: + - https://fairmat-nfdi.github.io/nexus_definitions/classes/applications/NXscan.html#nxscan-entry-sample-rotation-angle-field monitor: section: + repeats: true m_annotations: - eln: - overview: true + display: + visible: true description: A monitor of incident beam data. It is similar to the :ref:`NXdata` groups containing monitor data and its associated axis coordinates, e.g. time_of_flight or wavelength in pulsed neutron instruments. However, it may also include integrals, or scalar monitor counts, which are often used in both in both pulsed and steady-state instrumentation. + links: + - https://fairmat-nfdi.github.io/nexus_definitions/classes/applications/NXscan.html#nxscan-entry-monitor-group quantities: data: type: int - value: m_annotations: eln: component: NumberEditQuantity + display: + visible: true description: Monitor data + links: + - https://fairmat-nfdi.github.io/nexus_definitions/classes/applications/NXscan.html#nxscan-entry-monitor-data-field data: section: + repeats: true m_annotations: - eln: - overview: true + display: + visible: true description: 'The data group .. note:: Before the NIAC2016 meeting [#]_, at least one :ref:`NXdata` group was required in each :ref:`NXentry` group. At the NIAC2016 meeting, it was decided to make :ref:`NXdata` @@ -149,3 +179,5 @@ definitions: the application definition will indicate the :ref:`NXdata` group is optional, otherwise, it is required. .. [#] NIAC2016: https://www.nexusformat.org/NIAC2016.html, https://github.com/nexusformat/NIAC/issues/16' + links: + - https://fairmat-nfdi.github.io/nexus_definitions/classes/applications/NXscan.html#nxscan-entry-data-group diff --git a/tests/eln_mapper/test_eln_mapper.py b/tests/eln_mapper/test_eln_mapper.py index 097772517..e894ede8b 100644 --- a/tests/eln_mapper/test_eln_mapper.py +++ b/tests/eln_mapper/test_eln_mapper.py @@ -77,9 +77,9 @@ def test_reader_eln(tmp_path): """ local_dir = os.path.abspath(os.path.dirname(__file__)) - ref_file = os.path.join(local_dir, "../data/eln_mapper/eln.yaml") + ref_file = os.path.join(local_dir, "../data/eln_mapper/scan.eln_data.yaml") - test_file = os.path.join(tmp_path, "eln.yaml") + test_file = os.path.join(tmp_path, "scan.eln_data.yaml") cli_run = testing.CliRunner() cli_run.invoke( eln_mapper.get_eln, From 4901c78eb1329ca27f38f81de73149a0a2c1b549 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Wed, 2 Apr 2025 19:58:28 +0200 Subject: [PATCH 12/17] mypy fixes --- src/pynxtools/dataconverter/nexus_tree.py | 6 +++--- src/pynxtools/eln_mapper/schema_eln.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/pynxtools/dataconverter/nexus_tree.py b/src/pynxtools/dataconverter/nexus_tree.py index 72f7ca9f7..46e3ac807 100644 --- a/src/pynxtools/dataconverter/nexus_tree.py +++ b/src/pynxtools/dataconverter/nexus_tree.py @@ -467,7 +467,7 @@ def get_docstring(self, depth: Optional[int] = None) -> Dict[str, str]: return docstrings - def get_link(self) -> Optional[str]: + def get_link(self) -> str: """ Get documentation url """ @@ -481,7 +481,7 @@ def get_link(self) -> Optional[str]: break segment = current_node.name - anchor_segments.append(current_node.name.replace("_", "-")) + anchor_segments.append(current_node.name.replace("_", "-")) # type: ignore[arg-type] current_node = current_node.parent definitions_url = get_definitions_url() @@ -492,7 +492,7 @@ def get_link(self) -> Optional[str]: # add the name of the base file at the end, drop the appdef name anchor_segments = anchor_segments[:-1] - anchor_segments += [self.nxdl_base.split("/")[-1].split(".nxdl.xml")[0].lower()] + anchor_segments += [self.nxdl_base.split("/")[-1].split(".nxdl.xml")[0].lower()] # type: ignore[list-item] anchor = "-".join([name.lower() for name in reversed(anchor_segments)]) diff --git a/src/pynxtools/eln_mapper/schema_eln.py b/src/pynxtools/eln_mapper/schema_eln.py index 36b551bc7..c55bc527d 100644 --- a/src/pynxtools/eln_mapper/schema_eln.py +++ b/src/pynxtools/eln_mapper/schema_eln.py @@ -27,7 +27,7 @@ ) from pynxtools.eln_mapper.eln import ElnGenerator -NEXUS_TO_NOMAD_QUANTITY: Dict[str, Tuple[str]] = { +NEXUS_TO_NOMAD_QUANTITY: Dict[str, Tuple[str, str]] = { "NX_BINARY": ("bytes", "NumberEditQuantity"), "NX_BOOLEAN": ("bool", "BoolEditQuantity"), "NX_CHAR": ("str", "StringEditQuantity"), @@ -283,7 +283,7 @@ def _construct_entity_structure( entity_dict["type"] = entity_type - display_dict = {"visible": True} + display_dict: Dict[str, Union[bool, str]] = {"visible": True} if unit: display_dict["unit"] = unit From 2f033e16ae8772815e5808f05e2d0051b23f47c5 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Thu, 3 Apr 2025 09:14:05 +0200 Subject: [PATCH 13/17] fix test for nexus_tree --- tests/dataconverter/test_nexus_tree.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/dataconverter/test_nexus_tree.py b/tests/dataconverter/test_nexus_tree.py index 67cf33c44..2b4e0c067 100644 --- a/tests/dataconverter/test_nexus_tree.py +++ b/tests/dataconverter/test_nexus_tree.py @@ -43,7 +43,7 @@ def get_node_fields(tree: NexusNode) -> List[Tuple[str, Any]]: return list( filter( lambda x: not x[0].startswith("_") - and x[0] not in ("inheritance", "is_a", "parent_of"), + and x[0] not in ("inheritance", "is_a", "parent_of", "nxdl_base"), tree.__dict__.items(), ) ) From 44b80354046f5ea1c934efaef4abc9e989f0105f Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Thu, 3 Apr 2025 09:15:53 +0200 Subject: [PATCH 14/17] fix test for eln_mapper --- tests/eln_mapper/test_eln_mapper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/eln_mapper/test_eln_mapper.py b/tests/eln_mapper/test_eln_mapper.py index e894ede8b..6f092def1 100644 --- a/tests/eln_mapper/test_eln_mapper.py +++ b/tests/eln_mapper/test_eln_mapper.py @@ -116,7 +116,7 @@ def test_scheme_eln(tmp_path): local_dir = os.path.abspath(os.path.dirname(__file__)) ref_file = os.path.join(local_dir, "../data/eln_mapper/scan.scheme.archive.yaml") - test_file = os.path.join(tmp_path, ".scheme.archive.yaml") + test_file = os.path.join(tmp_path, "scan.scheme.archive.yaml") cli_run = testing.CliRunner() cli_run.invoke( eln_mapper.get_eln, From da02c0067d08329034722c40839711e4ecd98135 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Thu, 3 Apr 2025 09:34:40 +0200 Subject: [PATCH 15/17] update default readers --- src/pynxtools/eln_mapper/schema_eln.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/pynxtools/eln_mapper/schema_eln.py b/src/pynxtools/eln_mapper/schema_eln.py index c55bc527d..48560fa9d 100644 --- a/src/pynxtools/eln_mapper/schema_eln.py +++ b/src/pynxtools/eln_mapper/schema_eln.py @@ -80,14 +80,13 @@ DEFAULT_READER: Dict[str, str] = { "NXafm": "spm", "NXapm": "apm", - "NXellips": "ellips", + "NXellipsometry": "ellips", "NXem": "em", "NXmpes": "mpes", - "NXxps": "xps", "NXraman": "raman", "NXspm": "spm", - "NXsts": "spm", "NXstm": "spm", + "NXsts": "spm", "NXxps": "xps", "NXxrd": "xrd", } From 63737796f4a448185a83fb2161386b64284a3bd8 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Thu, 3 Apr 2025 09:52:48 +0200 Subject: [PATCH 16/17] restructure NOMAD imports --- src/pynxtools/dataconverter/nexus_tree.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pynxtools/dataconverter/nexus_tree.py b/src/pynxtools/dataconverter/nexus_tree.py index 46e3ac807..79ae5cf68 100644 --- a/src/pynxtools/dataconverter/nexus_tree.py +++ b/src/pynxtools/dataconverter/nexus_tree.py @@ -46,6 +46,7 @@ get_nx_namefit, is_name_type, ) +from pynxtools import NX_DOC_BASES NexusType = Literal[ "NX_BINARY", @@ -471,7 +472,6 @@ def get_link(self) -> str: """ Get documentation url """ - from pynxtools.nomad.utils import NX_DOC_BASES anchor_segments = [self.type] current_node = self From 14089cd4181f2c76558d51f5bd3ca0fc6cd0013e Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Thu, 3 Apr 2025 14:03:10 +0200 Subject: [PATCH 17/17] include units for NumberEditQuantity --- src/pynxtools/eln_mapper/schema_eln.py | 1 + tests/data/eln_mapper/scan.scheme.archive.yaml | 1 + 2 files changed, 2 insertions(+) diff --git a/src/pynxtools/eln_mapper/schema_eln.py b/src/pynxtools/eln_mapper/schema_eln.py index 48560fa9d..e33600f46 100644 --- a/src/pynxtools/eln_mapper/schema_eln.py +++ b/src/pynxtools/eln_mapper/schema_eln.py @@ -284,6 +284,7 @@ def _construct_entity_structure( display_dict: Dict[str, Union[bool, str]] = {"visible": True} if unit: + entity_dict["unit"] = unit display_dict["unit"] = unit m_annotations = { diff --git a/tests/data/eln_mapper/scan.scheme.archive.yaml b/tests/data/eln_mapper/scan.scheme.archive.yaml index 97a18fe39..5e884f746 100644 --- a/tests/data/eln_mapper/scan.scheme.archive.yaml +++ b/tests/data/eln_mapper/scan.scheme.archive.yaml @@ -123,6 +123,7 @@ definitions: quantities: rotation_angle: type: np.float64 + unit: degree m_annotations: eln: component: NumberEditQuantity