FAIRmat-NFDI · sherjeelshabih · Oct 20, 2022 · Oct 18, 2022
diff --git a/nexusutils/dataconverter/readers/hall/reader.py b/nexusutils/dataconverter/readers/hall/reader.py
@@ -17,12 +17,12 @@
 #
 """Lake Shore Hall file reader implementation for the DataConverter."""
 import re
-from typing import Any, List, TextIO, Dict
+from typing import Any, List, TextIO, Dict, Optional
 import numpy as np
 import pandas as pd
 
 from nexusutils.dataconverter.readers.json_yml.reader import YamlJsonReader
-import nexusutils.dataconverter.readers.hall.helpers as helpers
+from nexusutils.dataconverter.readers.hall import helpers
 from nexusutils.dataconverter.readers.utils import parse_json, parse_yml
 
 # Replacement dict for section names
@@ -47,7 +47,7 @@
 MEASUREMENT_KEYS = ["Contact Sets"]
 
 
-def split_add_key(fobj: TextIO, dic: dict, prefix: str, expr: str) -> None:
+def split_add_key(fobj: Optional[TextIO], dic: dict, prefix: str, expr: str) -> None:
     """Splits a key value pair and adds it to the dictionary.
     It also checks for measurement headers and adds the full tabular data as a
     pandas array to the dictionary.
@@ -61,7 +61,7 @@ def split_add_key(fobj: TextIO, dic: dict, prefix: str, expr: str) -> None:
     key, *val = re.split(r"\s*[:|=]\s*", expr)
     jval = "".join(val).strip()
 
-    if key in MEASUREMENT_KEYS:
+    if fobj is not None and key in MEASUREMENT_KEYS:
         data = []
         for line in fobj:
             if not line.strip():
@@ -150,8 +150,8 @@ class HallReader(YamlJsonReader):
     extensions = {
         ".txt": lambda fname: parse_txt(fname, "iso-8859-1"),
         ".json": parse_json,
-        ".yml": lambda fname: parse_yml(fname, {}, {}),
-        ".yaml": lambda fname: parse_yml(fname, {}, {}),
+        ".yml": lambda fname: parse_yml(fname, None, None),
+        ".yaml": lambda fname: parse_yml(fname, None, None),
     }
 
 

diff --git a/nexusutils/dataconverter/readers/mpes/reader.py b/nexusutils/dataconverter/readers/mpes/reader.py
@@ -28,7 +28,7 @@
 import yaml
 
 from nexusutils.dataconverter.readers.base.reader import BaseReader
-from nexusutils.dataconverter.readers.utils import flatten_and_replace
+from nexusutils.dataconverter.readers.utils import flatten_and_replace, FlattenSettings
 
 DEFAULT_UNITS = {
     "X": "step",
@@ -225,7 +225,11 @@ def handle_h5_and_json_file(file_paths, objects):
         elif file_extension in [".yaml", ".yml"]:
             with open(file_path) as feln:
                 eln_data_dict = flatten_and_replace(
-                    yaml.safe_load(feln), CONVERT_DICT, REPLACE_NESTED
+                    FlattenSettings(
+                        dic=yaml.safe_load(feln),
+                        convert_dict=CONVERT_DICT,
+                        replace_nested=REPLACE_NESTED
+                    )
                 )
 
     if objects is not None:

diff --git a/nexusutils/dataconverter/readers/utils.py b/nexusutils/dataconverter/readers/utils.py
@@ -16,21 +16,16 @@
 # limitations under the License.
 #
 """Utility functions for the NeXus reader classes."""
+from dataclasses import dataclass, replace
 from typing import List, Any, Dict, Optional
 from collections.abc import Mapping
 import json
 import yaml
 
 
-def flatten_and_replace(
-        dic: Mapping,
-        convert_dict: dict,
-        replace_nested: dict,
-        parent_key: str = "/ENTRY[entry]",
-        sep: str = "/"
-) -> dict:
-    """Flatten a nested dictionary, and replace the keys with the appropriate
-    paths in the nxs file.
+@dataclass
+class FlattenSettings():
+    """Settings for flattening operations.
 
     Args:
         dic (dict): Dictionary to flatten
@@ -39,26 +34,117 @@ def flatten_and_replace(
         parent_key (str, optional):
             Parent key of the dictionary. Defaults to "/ENTRY[entry]".
         sep (str, optional): Separator for the keys. Defaults to "/".
+    """
+    dic: Mapping
+    convert_dict: dict
+    replace_nested: dict
+    parent_key: str = "/ENTRY[entry]"
+    sep: str = "/"
+    is_in_section: bool = False
+
+
+def is_section(val: Any) -> bool:
+    """Checks whether a value is a section.
+
+    Args:
+        val (Any): A list or value.
+
+    Returns:
+        bool: True if val is a section.
+    """
+    return isinstance(val, list) and len(val) > 0 and isinstance(val[0], dict)
+
+
+def is_key_value_pair(val: Any) -> bool:
+    if not isinstance(val, dict):
+        return False
+
+    if len(val) == 2 and "value" in val and "unit" in val:
+        return True
+    return False
+
+
+def uniquify_keys(ldic: list) -> List[Any]:
+    """Uniquifys keys in a list of tuple lists containing key value pairs.
+
+    Args:
+        ldic (list): List of lists of length two, containing key value pairs.
+
+    Returns:
+        List[Any]: Uniquified list, where duplicate keys are appended with 1, 2, etc.
+    """
+    dic: dict = {}
+    for key, val in ldic:
+        suffix = 0
+        sstr = "" if suffix == 0 else str(suffix)
+        while f"{key}{sstr}" in dic.keys():
+            sstr = "" if suffix == 0 else str(suffix)
+            suffix += 1
+
+        if is_key_value_pair(val):
+            dic[f"{key}{sstr}"] = val["value"]
+            dic[f"{key}{sstr}/@units"] = val["unit"]
+            continue
+        dic[f"{key}{sstr}"] = val
+
+    return list(map(list, dic.items()))
+
+
+def parse_section(key: str, val: Any, settings: FlattenSettings) -> List[Any]:
+    """Parse a section, i.e. an entry containing a list of entries.
+
+    Args:
+        key (str): The key which is currently being checked.
+        val (Any): The value at the current key.
+        settings (FlattenSettings): The flattening settings.
+
+    Returns:
+        List[Any]: A list of list tuples containing key, value pairs.
+    """
+    if not is_section(val):
+        return [(key, val)]
+
+    groups: List[Any] = []
+    for group in val:
+        groups.extend(
+            flatten_and_replace(
+                replace(settings, dic=group, parent_key=key, is_in_section=True)
+            ).items()
+        )
+
+    return uniquify_keys(groups)
+
+
+def flatten_and_replace(settings: FlattenSettings) -> dict:
+    """Flatten a nested dictionary, and replace the keys with the appropriate
+    paths in the nxs file.
+
+    Args:
+        settings (FlattenSettings): Settings dataclass for flattening the data.
 
     Returns:
         dict: Flattened dictionary
     """
     items: List[Any] = []
-    for key, val in dic.items():
-        new_key = parent_key + sep + convert_dict.get(key, key)
+    for key, val in settings.dic.items():
+        new_key = settings.parent_key + settings.sep + settings.convert_dict.get(key, key)
         if isinstance(val, Mapping):
             items.extend(
-                flatten_and_replace(val, convert_dict, replace_nested, new_key, sep=sep)
+                flatten_and_replace(replace(settings, dic=val, parent_key=new_key))
                 .items()
+                if not (settings.is_in_section and is_key_value_pair(val))
+                else [[new_key, val]]
             )
+            continue
+
+        for old, new in settings.replace_nested.items():
+            new_key = new_key.replace(old, new)
+
+        if new_key.endswith("/value"):
+            items.append((new_key[:-6], val))
         else:
-            for old, new in replace_nested.items():
-                new_key = new_key.replace(old, new)
+            items.extend(parse_section(new_key, val, settings))
 
-            if new_key.endswith("/value"):
-                items.append((new_key[:-6], val))
-            else:
-                items.append((new_key, val))
     return dict(items)
 
 
@@ -81,8 +167,16 @@ def parse_yml(
     if replace_nested is None:
         replace_nested = {}
 
+    convert_dict["unit"] = "@units"
+
     with open(file_path) as file:
-        return flatten_and_replace(yaml.safe_load(file), convert_dict, replace_nested)
+        return flatten_and_replace(
+            FlattenSettings(
+                dic=yaml.safe_load(file),
+                convert_dict=convert_dict,
+                replace_nested=replace_nested
+            )
+        )
 
 
 def parse_json(file_path: str) -> Dict[str, Any]:

diff --git a/tests/data/dataconverter/readers/json_yml/eln_data_w_subsections.yaml b/tests/data/dataconverter/readers/json_yml/eln_data_w_subsections.yaml
@@ -0,0 +1,33 @@
+instrument:
+  amperemeter: Keithley 485 Autoranging Picoammeter
+  current_source: Keithley 220 Programmable current source
+  model: Lake Shore Hall Measurement System
+  software: Lake Shore Hall Measurement Software
+  switch_matrix: Keithley 7001 Switch System
+  voltmeter: Keithley 2182 Nanovoltmeter
+samples:
+  final_sample:
+    - contacts_grafting:
+        - metal_stack:
+            - empirical_formula: Ti
+            - empirical_formula: Au
+            - empirical_formula: Ra
+          process_steps:
+            - elapsed_time:
+                unit: minute
+                value: 1.0
+              step_comment: Was there a post-annealing step?
+              step_name: Post-annealing
+            - elapsed_time:
+                unit: minute
+                value: 5.0
+              step_comment: "Was there a pre-treatment of the surface? \n"
+              step_name: Pre-treatment
+            - step_comment: Was an implatation made for contact formation?
+              step_name: implantation
+            - step_comment: Was a regrowth made for contact formation?
+              step_name: regrowth
+  layer:
+    - components:
+        - ../upload/raw/Substrate1.data.archive.yaml#data
+      epitaxy_type: Homoepitaxy
diff --git a/tests/data/dataconverter/readers/mpes/eln_data.yaml b/tests/data/dataconverter/readers/mpes/eln_data.yaml
@@ -0,0 +1,65 @@
+Instrument:
+  Analyzer:
+    energy_resolution: 110
+    momentum_resolution: 0.08
+    slow_axes: delay
+    spatial_resolution: 10
+  Beam:
+    Probe:
+      extent:
+        unit: m
+        value:
+          - 7.999999999999999e-05
+          - 7.999999999999999e-05
+      incident_energy:
+        unit: J
+        value: 3.4767232957799996e-18
+      incident_energy_spread:
+        unit: J
+        value: 1.7623942974e-20
+      incident_polarization:
+        - 1
+        - 1
+        - 0
+        - 0
+      pulse_duration:
+        unit: s
+        value: 2.0000000000000003e-14
+    Pump:
+      extent:
+        unit: m
+        value:
+          - 0.00022999999999999998
+          - 0.000265
+      incident_energy:
+        unit: J
+        value: 2.4833737826999997e-19
+      incident_energy_spread:
+        unit: J
+        value: 1.2817413071999999e-20
+      incident_polarization:
+        - 1
+        - -1
+        - 0
+        - 0
+      pulse_duration:
+        unit: s
+        value: 3.5e-14
+Sample:
+  chemical_formula: WSe2
+  description: Sample
+  name: WSe2 Single Crystal
+  preparation_date: "2019-01-13T09:00:00+00:00"
+  pressure:
+    unit: bar
+    value: 5.0e-14
+  sample_history: Cleaved
+  temperature:
+    unit: °C
+    value: 26.850000000000023
+User:
+  address: Faradayweg 4-6, 14915 Berlin
+  affiliation: Fritz Haber Institute of the Max Planck Society
+  email: maklar@fhi-berlin.mpg.de
+  name: Julian Maklar
+  role: Principal Investigator
diff --git a/tests/dataconverter/test_convert.py b/tests/dataconverter/test_convert.py
@@ -198,3 +198,23 @@ def test_mpes_writing(tmp_path):
     with open(os.path.join(dirpath, 'Ref_nexus_mpes.log'), 'r') as logfile:
         ref_log = logfile.readlines()
     assert log == ref_log
+
+
+def test_eln_data(tmp_path):
+    """Check if the subsections in the eln_data.yml file work."""
+    dirpath = os.path.join(os.path.dirname(__file__), "../data/dataconverter/readers/mpes")
+    dataconverter.convert((os.path.join(dirpath, "xarray_saved_small_calibration.h5"),
+                           os.path.join(dirpath, "config_file.json"),
+                           os.path.join(dirpath, "eln_data.yaml")),
+                          "mpes", "NXmpes",
+                          os.path.join(tmp_path, "mpes.small_test.nxs"),
+                          False, False)
+
+
+def test_eln_data_subsections(tmp_path):
+    """Check if the subsections in the eln_data.yml file work."""
+    dirpath = os.path.join(os.path.dirname(__file__), "../data/dataconverter/readers/json_yml")
+    dataconverter.convert((os.path.join(dirpath, "eln_data_w_subsections.yaml",),),
+                          "hall", "NXroot",
+                          os.path.join(tmp_path, "hall.nxs"),
+                          False, False)