FAIRmat-NFDI · domna · Sep 7, 2022 · Aug 26, 2022 · Aug 26, 2022 · Aug 26, 2022
diff --git a/nexusparser/tools/dataconverter/convert.py b/nexusparser/tools/dataconverter/convert.py
@@ -16,7 +16,6 @@
 # limitations under the License.
 #
 """This script runs the conversion routine using a selected reader and write out a Nexus file."""
-
 import glob
 import importlib.machinery
 import importlib.util
@@ -64,7 +63,8 @@ def get_names_of_all_readers() -> List[str]:
     return all_readers
 
 
-def convert(input_file: Tuple[str],  # pylint: disable=too-many-arguments
+# pylint: disable=too-many-arguments,too-many-branches
+def convert(input_file: Tuple[str],
             reader: str,
             nxdl: str,
             output: str,
@@ -73,10 +73,12 @@ def convert(input_file: Tuple[str],  # pylint: disable=too-many-arguments
             objects: Tuple[Any] = None):
     """The conversion routine that takes the input parameters and calls the necessary functions."""
     # Reading in the NXDL and generating a template
+    definitions_path = nexus.get_nexus_definitions_path()
     if nxdl == "NXtest":
         nxdl_path = os.path.join("tests", "data", "tools", "dataconverter", "NXtest.nxdl.xml")
+    elif nxdl == "NXroot":
+        nxdl_path = os.path.join(definitions_path, "base_classes", "NXroot.nxdl.xml")
     else:
-        definitions_path = nexus.get_nexus_definitions_path()
         nxdl_path = os.path.join(definitions_path, "contributed_definitions", f"{nxdl}.nxdl.xml")
         if not os.path.exists(nxdl_path):
             nxdl_path = os.path.join(definitions_path, "applications", f"{nxdl}.nxdl.xml")

diff --git a/nexusparser/tools/dataconverter/readers/hall/__init__.py b/nexusparser/tools/dataconverter/readers/hall/__init__.py
diff --git a/nexusparser/tools/dataconverter/readers/hall/helpers.py b/nexusparser/tools/dataconverter/readers/hall/helpers.py
@@ -0,0 +1,231 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Helper functions for reading sections from Lake Shore files"""
+from typing import Tuple, Union, Dict, Any
+import re
+from datetime import datetime
+import numpy as np
+import pandas as pd
+
+
+def is_section(expr: str) -> bool:
+    """Checks whether an expression follows the form of a section
+    i.e. is of the form [section]
+
+    Args:
+        expr (str): The current expression to check
+
+    Returns:
+        bool: Returns true if the expr is of the form of a section
+    """
+    return bool(re.search(r"^\[.+\]$", expr))
+
+
+def is_measurement(expr):
+    """Checks whether an expression follows the form of a measurement indicator
+    i.e. is of the form <measurement>
+
+    Args:
+        expr (str): The current expression to check
+
+    Returns:
+        bool: Returns true if the expr is of the form of a measurement indicator
+    """
+    return bool(re.search(r"^\<.+\>$", expr))
+
+
+def is_key(expr: str) -> bool:
+    """Checks whether an expression follows the form of a key value pair
+    i.e. is of the form key: value or key = value
+
+    Args:
+        expr (str): The current expression to check
+
+    Returns:
+        bool: Returns true if the expr is of the form of a key value pair
+    """
+    return bool(re.search(r"^.+\s*[:|=]\s*.+$", expr))
+
+
+def is_meas_header(expr: str) -> bool:
+    """Checks whether an expression follows the form of a measurement header,
+    i.e. starts with: Word [Unit]
+
+    Args:
+        expr (str): The current expression to check
+
+    Returns:
+        bool: Returns true if the expr is of the form of a measurement header
+    """
+    return bool(re.search(r"^[^\]]+\[[^\]]+\]", expr))
+
+
+def is_value_with_unit(expr: str) -> bool:
+    """Checks whether an expression is a value with a unit,
+    i.e. is of the form value [unit].
+
+    Args:
+        expr (str): The expression to check
+
+    Returns:
+        bool: Returns true if the expr is a value with unit
+    """
+    return bool(re.search(r"^.+\s\[.+\]$", expr))
+
+
+def is_number(expr: str) -> bool:
+    """Checks whether an expression is a number,
+    i.e. is of the form 0.3, 3, 1e-3, 1E5 etc.
+
+    Args:
+        expr (str): The expression to check
+
+    Returns:
+        bool: Returns true if the expr is a number
+    """
+    return bool(
+        re.search(r"^[+-]?(\d+([.]\d*)?([eE][+-]?\d+)?|[.]\d+([eE][+-]?\d+)?)$", expr)
+    )
+
+
+def split_str_with_unit(expr: str, lower: bool = True) -> Tuple[str, str]:
+    """Splits an expression into a string and a unit.
+    The input expression should be of the form value [unit] as
+    is checked with is_value_with_unit function.
+
+    Args:
+        expr (str): The expression to split
+        lower (bool, optional):
+            If True the value is converted to lower case. Defaults to True.
+
+    Returns:
+        Tuple[str, str]: A tuple of a value unit pair.
+    """
+    value = re.split(r"\s+\[.+\]", expr)[0]
+    unit = re.search(r"(?<=\[).+?(?=\])", expr)[0]
+
+    if lower:
+        return value.lower(), unit
+    return value, unit
+
+
+def split_value_with_unit(expr: str) -> Tuple[Union[float, str], str]:
+    """Splits an expression into a string or float and a unit.
+    The input expression should be of the form value [unit] as
+    is checked with is_value_with_unit function.
+    The value is automatically converted to a float if it is a number.
+
+    Args:
+        expr (str): The expression to split
+
+    Returns:
+        Tuple[Union[float, str], str]: A tuple of a value unit pair.
+    """
+    value, unit = split_str_with_unit(expr, False)
+
+    if is_number(value):
+        return float(value), unit
+
+    return value, unit
+
+
+def get_unique_dkey(dic: dict, dkey: str) -> str:
+    """Checks whether a data key is already contained in a dictionary
+    and returns a unique key if it is not.
+
+    Args:
+        dic (dict): The dictionary to check for keys
+        dkey (str): The data key which shall be written.
+
+    Returns:
+        str: A unique data key. If a key already exists it is appended with a number
+    """
+    suffix = 0
+    while f"{dkey}{suffix}" in dic:
+        suffix += 1
+
+    return f"{dkey}{suffix}"
+
+
+def pandas_df_to_template(prefix: str, data: pd.DataFrame) -> Dict[str, Any]:
+    """Converts a dataframe to a NXdata entry template.
+
+    Args:
+        prefix (str): The path prefix to write the data into. Without a trailing slash.
+        df (pd.DataFrame): The dataframe which should be converted.
+
+    Returns:
+        Dict[str, Any]: The dict containing the data and metainfo.
+    """
+    if prefix.endswith('/'):
+        prefix = prefix[:-1]
+
+    template: Dict[str, Any] = {}
+    template[f'{prefix}/@NX_class'] = 'NXdata'
+
+    def write_data(header: str, attr: str, data: np.ndarray) -> None:
+        if header is None:
+            print('Warning: Trying to write dataframe without a header. Skipping.')
+            return
+
+        if is_value_with_unit(header):
+            name, unit = split_str_with_unit(header)
+            template[f'{prefix}/{name}/@units'] = unit
+        else:
+            name = header.lower()
+
+        if attr == '@auxiliary_signals':
+            if f'{prefix}/{attr}' in template:
+                template[f'{prefix}/{attr}'].append(name)
+            else:
+                template[f'{prefix}/{attr}'] = [name]
+        else:
+            template[f'{prefix}/{attr}'] = name
+        template[f'{prefix}/{name}'] = data
+
    if data.index.name is None:
+        data = data.set_index(data.columns[0])
+
+    # Drop last line if it has an errornous zero temperature
+    if data.index.values[-1] == 0:
+        data = data.iloc[:-1]
+
+    write_data(data.index.name, '@axes', data.index.values)
+    write_data(data.columns[0], '@signal', data.iloc[:, 0].values)
+
+    for column in data.columns[1:]:
+        write_data(column, '@auxiliary_signals', data[column].values)
+
+    return template
+
+
+def convert_date(datestr: str) -> str:
+    """Converts a hall date formated string to isoformat string.
+
+    Args:
+        datestr (str): The hall date string
+
+    Returns:
+        str: The iso formatted string.
+    """
+
+    try:
+        return datetime.strptime(datestr, r'%m/%d/%y %H%M%S').isoformat()
+    except ValueError:
+        print("Warning: datestring does not conform to date format. Skipping.")
+        return datestr