FAIRmat-NFDI · domna · Jan 22, 2024 · Jan 8, 2024 · Jan 11, 2024 · Jan 17, 2024
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -6,8 +6,8 @@
   "[python]": {
     "editor.formatOnSave": true,
     "editor.codeActionsOnSave": {
-      "source.fixAll": true,
-      "source.organizeImports": true
+      "source.fixAll": "explicit",
+      "source.organizeImports": "explicit"
     },
     "editor.defaultFormatter": "charliermarsh.ruff"
   },

diff --git a/pynxtools/dataconverter/helpers.py b/pynxtools/dataconverter/helpers.py
@@ -18,8 +18,8 @@
 """Helper functions commonly used by the convert routine."""
 
 import json
-import re
 import logging
+import re
 import xml.etree.ElementTree as ET
 from datetime import datetime, timezone
 from typing import Any, Callable, List, Optional, Tuple, Union
@@ -386,11 +386,20 @@ def is_valid_data_field(value, nxdl_type, path):
     return value
 
 
-def path_in_data_dict(nxdl_path: str, data: dict) -> Tuple[bool, str]:
+def path_in_data_dict(nxdl_path: str, hdf_path: str, data: dict) -> Tuple[bool, str]:
     """Checks if there is an accepted variation of path in the dictionary & returns the path."""
+    accepted_unfilled_key = None
     for key in data.keys():
-        if nxdl_path == convert_data_converter_dict_to_nxdl_path(key):
+        if (
+            nxdl_path == convert_data_converter_dict_to_nxdl_path(key)
+            or convert_data_dict_path_to_hdf5_path(key) == hdf_path
+        ):
+            if data[key] is None:
+                accepted_unfilled_key = key
+                cont
8000
inue
             return True, key
+    if accepted_unfilled_key:
+        return True, accepted_unfilled_key
     return False, None
 
 
@@ -435,7 +444,12 @@ def all_required_children_are_set(optional_parent_path, data, nxdl_root):
         if (
             nxdl_key[0 : nxdl_key.rfind("/")] == optional_parent_path
             and is_node_required(nxdl_key, nxdl_root)
-            and data[key] is None
+            and data[
+                path_in_data_dict(
+                    nxdl_key, convert_data_dict_path_to_hdf5_path(key), data
+                )[1]
+            ]
+            is None
         ):
             return False
 
@@ -497,7 +511,9 @@ def ensure_all_required_fields_exist(template, data, nxdl_root):
         if entry_name == "@units":
             continue
         nxdl_path = convert_data_converter_dict_to_nxdl_path(path)
-        is_path_in_data_dict, renamed_path = path_in_data_dict(nxdl_path, data)
+        is_path_in_data_dict, renamed_path = path_in_data_dict(
+            nxdl_path, convert_data_dict_path_to_hdf5_path(path), data
+        )
 
         renamed_path = path if renamed_path is None else renamed_path
         if path in template["lone_groups"]:
@@ -529,6 +545,16 @@ def try_undocumented(data, nxdl_root: ET.Element):
         nxdl_path = convert_data_converter_dict_to_nxdl_path(path)
 
         if entry_name == "@units":
+            field_path = path.rsplit("/", 1)[0]
+            if field_path in data.get_documented() and path in data.undocumented:
+                    field_requiredness = get_required_string(
+                    nexus.get_node_at_nxdl_path(
+                        nxdl_path=convert_data_converter_dict_to_nxdl_path(field_path),
+                        elem=nxdl_root,
+                    )
+                )
+                data[field_requiredness][path] = data.undocumented[path]
+                del data.undocumented[path]
             continue
 
         if entry_name[0] == "@" and "@" in nxdl_path:

diff --git a/pynxtools/dataconverter/readers/mpes/reader.py b/pynxtools/dataconverter/readers/mpes/reader.py
@@ -17,18 +17,20 @@
 #
 """MPES reader implementation for the DataConverter."""
 import errno
-import json
 import os
 from functools import reduce
-from typing import Any
-from typing import Tuple
+from typing import Any, Tuple
 
 import h5py
 import xarray as xr
 import yaml
 
 from pynxtools.dataconverter.readers.base.reader import BaseReader
-from pynxtools.dataconverter.readers.utils import flatten_and_replace, FlattenSettings
+from pynxtools.dataconverter.readers.utils import (
+    FlattenSettings,
+    flatten_and_replace,
+    parse_flatten_json,
+)
 
 DEFAULT_UNITS = {
     "X": "step",
@@ -160,19 +162,35 @@ def iterate_dictionary(dic, key_string):
     "Instrument": "INSTRUMENT[instrument]",
     "Analyzer": "ELECTRONANALYSER[electronanalyser]",
     "Manipulator": "MANIPULATOR[manipulator]",
-    "Beam": "BEAM[beam]",
+    "Beam": "beam_TYPE[beam]",
     "unit": "@units",
     "Sample": "SAMPLE[sample]",
-    "Source": "SOURCE[source]",
+    "Source": "source_TYPE[source]",
     "User": "USER[user]",
+    "energy_resolution": "energy_resolution/resolution",
+    "momentum_resolution": "RESOLUTION[momentum_resolution]/resolution",
+    "temporal_resolution": "RESOLUTION[temporal_resolution]/resolution",
+    "spatial_resolution": "RESOLUTION[spatial_resolution]/resolution",
+    "sample_temperature": "temperature_sensor/value",
 }
 
 REPLACE_NESTED = {
-    "SOURCE[source]/Probe": "SOURCE[source]",
-    "SOURCE[source]/Pump": "SOURCE[source_pump]",
-    "BEAM[beam]/Probe": "BEAM[beam]",
-    "BEAM[beam]/Pump": "BEAM[beam_pump]",
-    "sample_history": "sample_history/description",
+    "SAMPLE[sample]/chemical_formula": "SAMPLE[sample]/SUBSTANCE[substance]/molecular_formula_hill",
+    "source_TYPE[source]/Probe": "source_TYPE[source_probe]",
+    "source_TYPE[source]/Pump": "source_TYPE[source_pump]",
+    "beam_TYPE[beam]/Probe": "beam_TYPE[beam_probe]",
+    "beam_TYPE[beam]/Pump": "beam_TYPE[beam_pump]",
+    "sample_history": "sample_history/notes",
+    "ELECTRONANALYSER[electronanalyser]/RESOLUTION[momentum_resolution]": (
+        "ELECTRONANALYSER[electronanalyser]/momentum_resolution"
+    ),
+    "ELECTRONANALYSER[electronanalyser]/RESOLUTION[spatial_resolution]": (
+        "ELECTRONANALYSER[electronanalyser]/spatial_resolution"
+    ),
+    "SAMPLE[sample]/gas_pressure": "INSTRUMENT[instrument]/pressure_gauge/value",
+    "SAMPLE[sample]/temperature": (
+        "INSTRUMENT[instrument]/MANIPULATOR[manipulator]/temperature_sensor/value"
+    ),
 }
 
 
@@ -208,8 +226,7 @@ def handle_h5_and_json_file(file_paths, objects):
         if file_extension == ".h5":
             x_array_loaded = h5_to_xarray(file_path)
         elif file_extension == ".json":
-            with open(file_path, encoding="utf-8") as file:
-                config_file_dict = json.load(file)
+            config_file_dict = parse_flatten_json(file_path)
         elif file_extension in [".yaml", ".yml"]:
             with open(file_path, encoding="utf-8") as feln:
                 eln_data_dict = flatten_and_replace(

diff --git a/pynxtools/dataconverter/readers/utils.py b/pynxtools/dataconverter/readers/utils.py
@@ -16,11 +16,13 @@
 # limitations under the License.
 #
 """Utility functions for the NeXus reader classes."""
+import json
 import logging
-from dataclasses import dataclass, replace
-from typing import List, Any, Dict, Optional, Tuple
+import re
 from collections.abc import Mapping
-import json
+from dataclasses import dataclass, replace
+from typing import Any, Dict, List, Optional, Tuple
+
 import yaml
 
 logger = logging.getLogger(__name__)
@@ -197,6 +199,83 @@ def parse_yml(
         )
 
 
+short_notation_regex = re.compile(r"\*\{([\w,]+)\}")
+
+
+def flatten_json(
+    json_data: Dict[str, Any],
+    base_key: Optional[str] = None,
+    replacement_key: Optional[str] = None,
+    dont_flatten_link_dict: bool = False,
+) -> Dict[str, Any]:
+    """
+    Flattens a json dict into a flat dictionary of absolute paths.
+
+    Args:
+        json_data (Dict[str, Any]): The dictionary read from the json file.
+        base_key (Optional[str], optional):
+            A base key to prefix to all keys.
+            Defaults to None.
+        replacement_key (Optional[str], optional):
+            A replacement key which replaces all occurences of * with this string.
+            Defaults to None.
+        dont_flatten_link_dict (bool):
+            If true, the dict will not be flattened if it only contains a link key.
+            Defaults to False.
+
+    Returns:
+        Dict[str, Any]: The flattened dict
+    """
+    if (
+        dont_flatten_link_dict
+        and base_key is not None
+        and len(json_data) == 1
+        and "link" in json_data
+    ):
+        return {base_key: json_data}
+
+    flattened_config = {}
+
+    def update_config(key, value, rkey):
+        if isinstance(value, dict):
+            flattened_config.update(
+                flatten_json(
+                    value,
+                    base_key=key,
+                    replacement_key=rkey,
+                    dont_flatten_link_dict=dont_flatten_link_dict,
+                )
+            )
+        elif isinstance(value, str) and value.startswith("@link:"):
+            flattened_config[key] = {"link": value[6:]}
+        else:
+            flattened_config[key] = value
+
+    for key, value in json_data.items():
+        if base_key is not None:
+            key = f"{base_key}/{key}"
+
+        if replacement_key is not None:
+            key = key.replace("*", replacement_key)
+            if isinstance(value, str):
+                value = value.replace("*", replacement_key)
+
+        expand_match = short_notation_regex.search(key)
+        if replacement_key is None and expand_match is not None:
+            expand_keys = expand_match.group(1).split(",")
+            for ekey in expand_keys:
+                rkey = key.replace(expand_match.group(0), ekey)
+
+                if isinstance(value, str):
+                    value = value.replace("*", ekey)
+
+                update_config(rkey, value, ekey)
+            continue
+
+        update_config(key, value, None)
+    return flattened_config
+
+
 def parse_json(file_path: str) -> Dict[str, Any]:
     """Parses a metadata json file into a dictionary.
 
@@ -210,6 +289,21 @@ def parse_json(file_path: str) -> Dict[str, Any]:
         return json.load(file)
 
 
+def parse_flatten_json(file_path: str) -> Dict[str, Any]:
+    """
+    Parses a metadata json file into a dictionary and
+    flattens it into a flat dictionary of absolute paths.
+
+    Args:
+        file_path (str): The file path of the json file.
+
+    Returns:
+        Dict[str, Any]:
+            The flattened dictionary containing the data readout from the json.
+    """
+    return flatten_json(parse_json(file_path))
+
+
 def handle_objects(objects: Tuple[Any]) -> Dict[str, Any]:
     """Handle objects and generate template entries from them"""
     if objects is None:

diff --git a/pynxtools/definitions b/pynxtools/definitions