From 52c2c51da636f13b7e458aa2824de6f8de277339 Mon Sep 17 00:00:00 2001
From: "markus.kuehbach" <markus.kuehbach@hu-berlin.de>
Date: Tue, 15 Aug 2023 16:34:57 +0200
Subject: [PATCH 01/84] Starting a refactoring of EM parsers using the
 refactored EM appdef and base class definitions from the base_class_templates
 nexus_definitions branch

---
 pynxtools/definitions | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pynxtools/definitions b/pynxtools/definitions
index ff35ff729..f121449f7 160000
--- a/pynxtools/definitions
+++ b/pynxtools/definitions
@@ -1 +1 @@
-Subproject commit ff35ff729aed3054e59c52e487fce3f54a30f1bb
+Subproject commit f121449f728d3be0971b35243f67d8560cee45cf

From 1638344b13e8b5b5815886b0d25b8e1e8b727213 Mon Sep 17 00:00:00 2001
From: "markus.kuehbach" <markus.kuehbach@hu-berlin.de>
Date: Tue, 15 Aug 2023 16:54:29 +0200
Subject: [PATCH 02/84] Using also the forwarded converter NXDL files from the
 base_class_templates branch

---
 pynxtools/definitions | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pynxtools/definitions b/pynxtools/definitions
index f121449f7..9f09b1efa 160000
--- a/pynxtools/definitions
+++ b/pynxtools/definitions
@@ -1 +1 @@
-Subproject commit f121449f728d3be0971b35243f67d8560cee45cf
+Subproject commit 9f09b1efafd6f897ec54e9824a919bffa1f312d4

From 39d6a1985359e8ac71ddac9e11e3f9fb6f7f4e90 Mon Sep 17 00:00:00 2001
From: "markus.kuehbach" <markus.kuehbach@hu-berlin.de>
Date: Tue, 15 Aug 2023 17:38:45 +0200
Subject: [PATCH 03/84] Workpackage

---
 pynxtools/dataconverter/readers/em/README.md  | 57 +++++++++++++++++++
 .../dataconverter/readers/em/__init__.py      |  0
 2 files changed, 57 insertions(+)
 create mode 100644 pynxtools/dataconverter/readers/em/README.md
 create mode 100644 pynxtools/dataconverter/readers/em/__init__.py

diff --git a/pynxtools/dataconverter/readers/em/README.md b/pynxtools/dataconverter/readers/em/README.md
new file mode 100644
index 000000000..e9a2cca0b
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/README.md
@@ -0,0 +1,57 @@
+# em reader
+
+## Purpose
+Tool for parsing electron microscopy research data from different representations
+of technology partners on an instance of a NeXus/HDF5 file which is formatted
+according to the NXem application definition.
+
+In the process this data artifact is verified for the existence of certain
+pieces of information and the formatting of these pieces of information.
+
+Internally the tool uses different file format readers and mapping tables
+whereby pieces of information in respective are mapped on corresponding
+concepts represented by the nexus-fairmat-proposal.
+
+The resulting data artifact is an HDF5 file which can be imported for
+instance into the NOMAD OASIS research data management system to enable
+functionalities and data exploration in NOMAD.
+
+The parser documents how several non-trivial examples from electron microscopy
+research can be read technically, mapped on semantic concepts using NeXus, and
+the resulting data artifact verified using an application definition.
+
+The parser does not convert and map every piece of information which the supported
+file formats can technically store. This is to keep a balance between avoid a
+duplication of data and metadata but adding additional value in that pieces of
+information from different sources are combined and represented in an interpreted
+form already to inject research content readily consumable by humans in a
+research data management system. Default plots - presented in a harmonized form,
+irrespective from which technology partner format they were originally stored in,
+is one such benefit.
+
+In effect, this is an example how pynxtools is an effective library for
+developers and users of research data management systems whereby they can
+offload mapping and harmonization code surplus have the possibility for
+a verification of the content via application definitions.
+
+For the example of the NOMAD OASIS research data management system
+pynxtools is such a library to enable users an injection of domain-specific
+content using existent community file format readers and generic information
+mapping capabilities. 
+
+## Support
+The following table shows which use cases and associated technology partner file formats the em dataconverter handles.
+| Use case | Community | Supported file formats | Previous parser |
+| -------- | -------- | ------- | ------- |
+| 1 | Electron backscatter diffraction | MTex/Matlab \*.nxs.mtex, Oxford Instruments \*.h5oina, DREAM3D \*.dream3d, EDAX APEX HDF5, Bruker Nanoscience HDF5, zip-compressed set of Kikuchi pattern | em_om |
+| 2 | Nion Co. TEM and nionswift users | zip-compressed nionswift project directory | em_nion |
+| 3 | SEM/TEM generic imaging | ThermoFisher TIFF | n/a |
+| 4 | Energy-dispersive X-ray spectroscopy (EDS) via SEM/TEM | Bruker \*.bcf, ThermoFisher Velox \*.emd, Gatan Digital Micrograph \*.dm3, EDAX APEX \*.edaxh5 | em_spctrscpy |
+| 5 | Electron energy loss spectroscopy (EELS) | DM3, zip-compressed nionswift project directory | em_spctrscpy |
+| 6 | In-situ microscopy with Protochips AXONStudio | zip-compressed AXON Studio project | n/a |
+
+## Manual
+Please inspect the 
+
+## Contact person for this reader
+Markus Kühbach
\ No newline at end of file
diff --git a/pynxtools/dataconverter/readers/em/__init__.py b/pynxtools/dataconverter/readers/em/__init__.py
new file mode 100644
index 000000000..e69de29bb

From 79709b385851ed4c0472566054b17f1766dfc404 Mon Sep 17 00:00:00 2001
From: "markus.kuehbach" <markus.kuehbach@hu-berlin.de>
Date: Wed, 16 Aug 2023 17:35:45 +0200
Subject: [PATCH 04/84] Store initial work that it does not get lost but this
 is not even a draft

---
 pynxtools/dataconverter/readers/em/reader.py | 148 +++++++++++++++++++
 1 file changed, 148 insertions(+)
 create mode 100644 pynxtools/dataconverter/readers/em/reader.py

diff --git a/pynxtools/dataconverter/readers/em/reader.py b/pynxtools/dataconverter/readers/em/reader.py
new file mode 100644
index 000000000..52e3002bf
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/reader.py
@@ -0,0 +1,148 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Parser for loading generic orientation microscopy data based on ."""
+
+# pylint: disable=no-member
+
+from typing import Tuple, Any
+
+from pynxtools.dataconverter.readers.base.reader import BaseReader
+
+"""
+from pynxtools.dataconverter.readers.em_om.utils.use_case_selector \
+    import EmOmUseCaseSelector
+
+from pynxtools.dataconverter.readers.em_om.utils.generic_eln_io \
+    import NxEmOmGenericElnSchemaParser
+
+from pynxtools.dataconverter.readers.em_om.utils.orix_ebsd_parser \
+    import NxEmOmOrixEbsdParser
+
+from pynxtools.dataconverter.readers.em_om.utils.mtex_ebsd_parser \
+    import NxEmOmMtexEbsdParser
+
+from pynxtools.dataconverter.readers.em_om.utils.zip_ebsd_parser \
+    import NxEmOmZipEbsdParser
+
+from pynxtools.dataconverter.readers.em_om.utils.dream3d_ebsd_parser \
+    import NxEmOmDreamThreedEbsdParser
+
+from pynxtools.dataconverter.readers.em_om.utils.em_nexus_plots \
+    import em_om_default_plot_generator"""
+
+
+class EmReader(BaseReader):
+    """Parse content from file formats of the electron microscopy community."""
+
+    # pylint: disable=too-few-public-methods
+
+    # Whitelist for the NXDLs that the reader supports and can process
+    supported_nxdls = ["NXem"]
+
+    # pylint: disable=duplicate-code
+    def read(self,
+             template: dict = None,
+             file_paths: Tuple[str] = None,
+             objects: Tuple[Any] = None) -> dict:
+        """Read data from given file, return filled template dictionary em."""
+        # pylint: disable=duplicate-code
+        template.clear()
+
+        # this em_om parser combines multiple sub-parsers
+        # so we need the following input:
+        # logical analysis which use case
+        # data input from an ELN (using an ELN-agnostic) YAML representation
+        # data input from technology partner files
+        # functionalities for creating default plots
+
+        entry_id = 1
+        # if len(file_paths) != 2:
+        #     print("Generation of example data not implemented yet...!")
+        #     return {}
+
+        print("Identify information sources (ELN, RDM config, tech files) to deal with...")
+        case = EmUseCaseSelector(file_paths)
+        if case.is_valid is False:
+            print("Such a combination of input (file) is not supported !")
+            return {}
+
+        print("Process pieces of information within RDM-specific ELN export file...")
+        if case.eln_parser_type == "oasis":
+            # pattern_simulation = False
+            # if case.dat_parser_type == "zip":
+            #     pattern_simulation = True
+            eln = OasisCustomSchemaInstanceFileParser(case.eln[0], entry_id)
+            eln.parse(template)
+        else:
+            print("No interpretable ELN input found!")
+
+        print("Process pieces of information in RDM-specific configuration files...")
+        if case.cfg_parser_type == "oasis":
+            cfg = OasisSpecificConfigInstanceFileParser(case.cfg[0], entry_id)
+            cfg.parse(template)
+        else:
+            print("No interpretable configuration file offered")
+
+        nxs = NxEmAppDefContent()
+        nxs.parse(template)
+
+        print("Parse and map pieces of information within files from tech partners...")
+        # for dat_instance in case.dat_parser_type:
+        #     print(f"Process pieces of information in {dat_instance} tech partner file...")
+        #    continue
+"""             if case.dat_parser_type == "orix":
+                orix_parser = NxEmOmOrixEbsdParser(case.dat[0], entry_id)
+                # h5oina parser evaluating content and plotting with orix on the fly
+                orix_parser.parse(template)
+            elif case.dat_parser_type == "mtex":
+                mtex_parser = NxEmOmMtexEbsdParser(case.dat[0], entry_id)
+                # ebsd parser because concept suggested for MTex by M. Kühbach
+                # would include different HDF5 dumps for different MTex classes
+                mtex_parser.parse(template)
+            elif case.dat_parser_type == "zip":
+                zip_parser = NxEmOmZipEbsdParser(case.dat[0], entry_id)
+                zip_parser.parse(template)
+            elif case.dat_parser_type == "dream3d":
+                dream_parser = NxEmOmDreamThreedEbsdParser(case.dat[0], entry_id)
+                dream_parser.parse(template)
+            # elif case.dat_parser_type == "kikuchipy":
+            # elif case.dat_parser_type == "pyxem":
+            # elif case.dat_parser_type == "score":
+            # elif case.dat_parser_type == "qube":
+            # elif case.dat_parser_type == "paradis":
+            # elif case.dat_parser_type == "brinckmann": """
+
+        # at this point the data for the default plots should already exist
+        # we only need to decorate the template to point to the mandatory ROI overview
+        # print("Create NeXus default plottable data...")
+        # em_default_plot_generator(template, 1)
+
+        debugging = False
+        if debugging is True:
+            print("Reporting state of template before passing to HDF5 writing...")
+            for keyword in template.keys():
+                print(keyword)
+                # print(type(template[keyword]))
+                # print(template[keyword])
+
+        print("Forward instantiated template to the NXS writer...")
+        return template
+
+
+# This has to be set to allow the convert script to use this reader.
+READER = EmReader

From 71371c77c1dbe485961732fcefaaece78a988af6 Mon Sep 17 00:00:00 2001
From: "markus.kuehbach" <markus.kuehbach@hu-berlin.de>
Date: Thu, 17 Aug 2023 14:20:37 +0200
Subject: [PATCH 05/84] dataconverter, added and tested successfully an option
 to append to an already existent NeXus/HDF5 file

---
 NXem_refactoring.nxdl.xml                    | 31 +++++++
 NXem_refactoring.yaml                        |  9 ++
 pynxtools/dataconverter/convert.py           | 11 ++-
 pynxtools/dataconverter/readers/em/reader.py | 87 ++++++++++----------
 pynxtools/dataconverter/writer.py            |  5 +-
 5 files changed, 96 insertions(+), 47 deletions(-)
 create mode 100644 NXem_refactoring.nxdl.xml
 create mode 100644 NXem_refactoring.yaml

diff --git a/NXem_refactoring.nxdl.xml b/NXem_refactoring.nxdl.xml
new file mode 100644
index 000000000..172e18654
--- /dev/null
+++ b/NXem_refactoring.nxdl.xml
@@ -0,0 +1,31 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-stylesheet type="text/xsl" href="nxdlformat.xsl"?>
+<!--
+# NeXus - Neutron and X-ray Common Data Format
+# 
+# Copyright (C) 2014-2022 NeXus International Advisory Committee (NIAC)
+# 
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 3 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#
+# For further information, see http://www.nexusformat.org
+-->
+<definition xmlns="http://definition.nexusformat.org/nxdl/3.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" category="application" type="group" name="NXem_refactoring" extends="NXroot" xsi:schemaLocation="http://definition.nexusformat.org/nxdl/3.1 ../nxdl.xsd">
+    <doc>
+         Debugging.
+    </doc>
+    <group type="NXentry" minOccurs="1" maxOccurs="unbounded">
+        <attribute name="version" optional="true"/>
+    </group>
+</definition>
diff --git a/NXem_refactoring.yaml b/NXem_refactoring.yaml
new file mode 100644
index 000000000..b0c857655
--- /dev/null
+++ b/NXem_refactoring.yaml
@@ -0,0 +1,9 @@
+category: application
+doc: |
+  Debugging.
+type: group
+NXem_refactoring(NXroot):
+  (NXentry):
+    exists: [min, 1, max, infty]
+    \@version:
+      exists: optional
diff --git a/pynxtools/dataconverter/convert.py b/pynxtools/dataconverter/convert.py
index ca3cc472d..656a31a95 100644
--- a/pynxtools/dataconverter/convert.py
+++ b/pynxtools/dataconverter/convert.py
@@ -68,6 +68,7 @@ def convert(input_file: Tuple[str],
             reader: str,
             nxdl: str,
             output: str,
+            io_mode: str = "w",
             generate_template: bool = False,
             fair: bool = False,
             **kwargs):
@@ -124,7 +125,7 @@ def convert(input_file: Tuple[str],
             continue
         logger.warning("The path, %s, is being written but has no documentation.", path)
 
-    Writer(data=data, nxdl_path=nxdl_path, output_path=output).write()
+    Writer(data=data, nxdl_path=nxdl_path, output_path=output, io_mode=io_mode).write()
 
     logger.info("The output file generated: %s", output)
 
@@ -161,6 +162,11 @@ def parse_params_file(params_file):
     default='output.nxs',
     help='The path to the output NeXus file to be generated.'
 )
+@click.option(
+    '--io_mode',
+    default='w',
+    help='I/O mode on the output NeXus file, see h5py doc for mode details, default="w".'
+)
 @click.option(
     '--generate-template',
     is_flag=True,
@@ -183,6 +189,7 @@ def convert_cli(input_file: Tuple[str],
                 reader: str,
                 nxdl: str,
                 output: str,
+                io_mode: str,
                 generate_template: bool,
                 fair: bool,
                 params_file: str):
@@ -201,7 +208,7 @@ def convert_cli(input_file: Tuple[str],
             sys.tracebacklimit = 0
             raise IOError("\nError: Please supply an NXDL file with the option:"
                           " --nxdl <path to NXDL>")
-        convert(input_file, reader, nxdl, output, generate_template, fair)
+        convert(input_file, reader, nxdl, output, io_mode, generate_template, fair)
 
 
 if __name__ == '__main__':
diff --git a/pynxtools/dataconverter/readers/em/reader.py b/pynxtools/dataconverter/readers/em/reader.py
index 52e3002bf..a9edf93f7 100644
--- a/pynxtools/dataconverter/readers/em/reader.py
+++ b/pynxtools/dataconverter/readers/em/reader.py
@@ -52,7 +52,7 @@ class EmReader(BaseReader):
     # pylint: disable=too-few-public-methods
 
     # Whitelist for the NXDLs that the reader supports and can process
-    supported_nxdls = ["NXem"]
+    supported_nxdls = ["NXem_refactoring"]
 
     # pylint: disable=duplicate-code
     def read(self,
@@ -63,6 +63,8 @@ def read(self,
         # pylint: disable=duplicate-code
         template.clear()
 
+        debug_id = 2
+        template[f"/ENTRY[entry1]/test{debug_id}"] = f"test{debug_id}"
         # this em_om parser combines multiple sub-parsers
         # so we need the following input:
         # logical analysis which use case
@@ -76,69 +78,68 @@ def read(self,
         #     return {}
 
         print("Identify information sources (ELN, RDM config, tech files) to deal with...")
-        case = EmUseCaseSelector(file_paths)
-        if case.is_valid is False:
-            print("Such a combination of input (file) is not supported !")
-            return {}
+        # case = EmUseCaseSelector(file_paths)
+        # if case.is_valid is False:
+        #     print("Such a combination of input (file) is not supported !")
+        #    return {}
 
         print("Process pieces of information within RDM-specific ELN export file...")
-        if case.eln_parser_type == "oasis":
-            # pattern_simulation = False
-            # if case.dat_parser_type == "zip":
-            #     pattern_simulation = True
-            eln = OasisCustomSchemaInstanceFileParser(case.eln[0], entry_id)
-            eln.parse(template)
-        else:
-            print("No interpretable ELN input found!")
-
-        print("Process pieces of information in RDM-specific configuration files...")
-        if case.cfg_parser_type == "oasis":
-            cfg = OasisSpecificConfigInstanceFileParser(case.cfg[0], entry_id)
-            cfg.parse(template)
-        else:
-            print("No interpretable configuration file offered")
-
-        nxs = NxEmAppDefContent()
-        nxs.parse(template)
+        # if case.eln_parser_type == "oasis":
+        #     # pattern_simulation = False
+        #     # if case.dat_parser_type == "zip":
+        #     #     pattern_simulation = True
+        #     eln = OasisCustomSchemaInstanceFileParser(case.eln[0], entry_id)
+        #     eln.parse(template)
+        # else:
+        #     print("No interpretable ELN input found!")
+
+        # print("Process pieces of information in RDM-specific configuration files...")
+        # if case.cfg_parser_type == "oasis":
+        #     cfg = OasisSpecificConfigInstanceFileParser(case.cfg[0], entry_id)
+        #     cfg.parse(template)
+        # else:
+        #     print("No interpretable configuration file offered")
+
+        print("Parse NeXus appdef-specific content...")
+        # nxs = NxEmAppDefContent()
+        # nxs.parse(template)
 
         print("Parse and map pieces of information within files from tech partners...")
         # for dat_instance in case.dat_parser_type:
         #     print(f"Process pieces of information in {dat_instance} tech partner file...")
         #    continue
-"""             if case.dat_parser_type == "orix":
-                orix_parser = NxEmOmOrixEbsdParser(case.dat[0], entry_id)
-                # h5oina parser evaluating content and plotting with orix on the fly
-                orix_parser.parse(template)
-            elif case.dat_parser_type == "mtex":
-                mtex_parser = NxEmOmMtexEbsdParser(case.dat[0], entry_id)
-                # ebsd parser because concept suggested for MTex by M. Kühbach
-                # would include different HDF5 dumps for different MTex classes
-                mtex_parser.parse(template)
-            elif case.dat_parser_type == "zip":
-                zip_parser = NxEmOmZipEbsdParser(case.dat[0], entry_id)
-                zip_parser.parse(template)
-            elif case.dat_parser_type == "dream3d":
-                dream_parser = NxEmOmDreamThreedEbsdParser(case.dat[0], entry_id)
-                dream_parser.parse(template)
+        # if case.dat_parser_type == "orix":
+            #     orix_parser = NxEmOmOrixEbsdParser(case.dat[0], entry_id)
+            #     # h5oina parser evaluating content and plotting with orix on the fly
+            #     orix_parser.parse(template)
+            # elif case.dat_parser_type == "mtex":
+            #     mtex_parser = NxEmOmMtexEbsdParser(case.dat[0], entry_id)
+            #     # ebsd parser because concept suggested for MTex by M. Kühbach
+            #     # would include different HDF5 dumps for different MTex classes
+            #     mtex_parser.parse(template)
+            # elif case.dat_parser_type == "zip":
+            #     zip_parser = NxEmOmZipEbsdParser(case.dat[0], entry_id)
+            #     zip_parser.parse(template)
+            # elif case.dat_parser_type == "dream3d":
+            #     dream_parser = NxEmOmDreamThreedEbsdParser(case.dat[0], entry_id)
+            #     dream_parser.parse(template)
             # elif case.dat_parser_type == "kikuchipy":
             # elif case.dat_parser_type == "pyxem":
             # elif case.dat_parser_type == "score":
             # elif case.dat_parser_type == "qube":
             # elif case.dat_parser_type == "paradis":
-            # elif case.dat_parser_type == "brinckmann": """
-
+            # elif case.dat_parser_type == "brinckmann":
         # at this point the data for the default plots should already exist
         # we only need to decorate the template to point to the mandatory ROI overview
         # print("Create NeXus default plottable data...")
         # em_default_plot_generator(template, 1)
-
-        debugging = False
+        debugging = True
         if debugging is True:
             print("Reporting state of template before passing to HDF5 writing...")
             for keyword in template.keys():
                 print(keyword)
                 # print(type(template[keyword]))
-                # print(template[keyword])
+                print(template[keyword])
 
         print("Forward instantiated template to the NXS writer...")
         return template
diff --git a/pynxtools/dataconverter/writer.py b/pynxtools/dataconverter/writer.py
index d40e6a90b..486d48ace 100644
--- a/pynxtools/dataconverter/writer.py
+++ b/pynxtools/dataconverter/writer.py
@@ -183,12 +183,13 @@ class Writer:
         nxs_namespace (str): The namespace used in the NXDL tags. Helps search for XML children.
     """
 
-    def __init__(self, data: dict = None, nxdl_path: str = None, output_path: str = None):
+    def __init__(self, data: dict = None, nxdl_path: str = None,
+                 output_path: str = None, io_mode: str = "w"):
         """Constructs the necessary objects required by the Writer class."""
         self.data = data
         self.nxdl_path = nxdl_path
         self.output_path = output_path
-        self.output_nexus = h5py.File(self.output_path, "w")
+        self.output_nexus = h5py.File(self.output_path, io_mode)
         self.nxdl_data = ET.parse(self.nxdl_path).getroot()
         self.nxs_namespace = get_namespace(self.nxdl_data)
 

From e1bb68b766de70ef0823046e1d7e51262a67b497 Mon Sep 17 00:00:00 2001
From: "markus.kuehbach" <markus.kuehbach@hu-berlin.de>
Date: Thu, 17 Aug 2023 16:38:39 +0200
Subject: [PATCH 06/84] Added control logic to instruct the dataconverter to
 create the output file based on an existent input-file to which then the
 template data are appended. This functionality is useful in cases when a
 scientific software has already generated a NeXus file but just some
 additional pieces of information are missing for the injection into the RDM.
 Examples of such missing info could be users, samples, project information,
 etc. This proof-of-concept implementation copies that input file and
 subsequently opens it as the output file and appends the template data
 subsequently. The current implementation does not verify though the NeXus
 content of this inputfile. However, in the future, this could be useful. The
 question is when to verify this and how: right after the copy?, after the
 template data were written?, or via loading all input file content first into
 the template and verify it as usual prior to writing to disk? While the
 latter idea enables overwriting content from inputfiles, the disadvantage is
 that template data might become too costly (wrt to memory demands,
 irrespective whether on the client or server side).

---
 processed.nxs.mtex.zip                       | Bin 0 -> 530 bytes
 pynxtools/dataconverter/convert.py           |  40 +++++++++++++++++++
 pynxtools/dataconverter/readers/em/reader.py |   2 +-
 3 files changed, 41 insertions(+), 1 deletion(-)
 create mode 100644 processed.nxs.mtex.zip

diff --git a/processed.nxs.mtex.zip b/processed.nxs.mtex.zip
new file mode 100644
index 0000000000000000000000000000000000000000..9e0746d107e1f4d98e7bb61c56457b43b3f98d3b
GIT binary patch
literal 530
zcmWIWW@Zs#U|`^25HAu8x6E_=l)=crpkTqkAOsXGD9TSxEiO(?(aWnS*2^tPt$2HD
zqu(J13HA^F!}k;j^}0U_m}9l+(32H{Tbdac^W|jma5fhTh}~znf582Np-e4{`+b(J
z-!m<nk7TF_{M*m>|K3vH*{7#x7w_D?W#j(1TUrn2ZjER>yknx*w!>yub}q}{GnH3*
zd!A3BExc`Z#od>Mn>jh$;vM!%%op3zbLrVT!FweSEYB|IXngne!Lx1Ct|xcx7Zhpw
zRr>2mmf%dey0HE07I8+oCH{T()x?B@XV+gx-_3Sr>)$;n_ltVl=8*7EwY6ZM-`2av
z3p7s^2p`YpUD@|s%PjHYt2wc8v0r`%h)#Ut_ucZDnZ*A^WtsQS$iCUi@2bA+!`?k%
z>azm{4;nkpeI{q_-~s`~UI_~STyBS5)>dfqi9BwwA=0`_VW0W8@0^oPYR~&VBfV~~
zPE%`_;nd$JSb#dX4?me-<nsTQ{exBAKkR{qY~kT)Skl!~v)f+b4ts$6yP3ZkIv_yo
zoOXaWBa=M?u9#5)#tjHGfJtoe#KoWhmSbd)V9;^Bxt`_XO~&aCSNKGMG>jkM&B_K+
P#0Z2bKzar+<{20OM$gxi

literal 0
HcmV?d00001

diff --git a/pynxtools/dataconverter/convert.py b/pynxtools/dataconverter/convert.py
index 656a31a95..f63e782e2 100644
--- a/pynxtools/dataconverter/convert.py
+++ b/pynxtools/dataconverter/convert.py
@@ -22,12 +22,14 @@
 import logging
 import os
 import sys
+from shutil import copyfile
 from typing import List, Tuple
 import xml.etree.ElementTree as ET
 
 import click
 import yaml
 
+
 from pynxtools.dataconverter.readers.base.reader import BaseReader
 from pynxtools.dataconverter import helpers
 from pynxtools.dataconverter.writer import Writer
@@ -63,6 +65,40 @@ def get_names_of_all_readers() -> List[str]:
     return all_readers
 
 
+def append_template_data_to_acopy_of_one_inputfile(input: Tuple[str], output: str):
+    """Helper function to build outputfile based on one inputfile plus template data."""
+    # There are cases in which one of the inputfiles may contain already NeXus content
+    # typically because the scientific software tool generates such a file
+    # matching a specific application definition and thus additional pieces of information
+    # inside the template (e.g. from an ELN) should just be added to that inputfile
+
+    # one may or not in this case demand for a verification of that input file
+    # before continuing, currently we ignore this verification
+    for file_name in input:
+        if file_name[0:file_name.rfind('.')] != output:
+            continue
+        else:
+            print(f"Creating the output {output} based the this input {file_name}\n" \
+                  f"NeXus content in {file_name} is currently not verified !!!")
+            copyfile(file_name, output)
+
+            print(f"Template data will be added to the output {output}...\n" \
+                  f"Only these template data will be verified !!!")
+    # when calling dataconverter with
+    # --input-file processed.nxs.mtex
+    # --output processed.nxs
+    # -- io_mode="r+"
+    # these calls can be executed repetitively as the first step is
+    # the copying operation of *.nxs.mtex to *.nxs and then the access on the *.nxs
+    # file using h5py is then read/write without regeneration
+    # a repeated call has factually the same effect as the dataconverter
+    # used to work i.e. using h5py with "w" would regenerate the *.nxs if already existent
+    # this is a required to assure that repetitive calls of the ELN save function
+    # in NOMAD do not end up with write conflicts on the *.nxs i.e. the output file
+    # when the dataconverter is called
+    return
+
+
 # pylint: disable=too-many-arguments
 def convert(input_file: Tuple[str],
             reader: str,
@@ -125,6 +161,10 @@ def convert(input_file: Tuple[str],
             continue
         logger.warning("The path, %s, is being written but has no documentation.", path)
 
+    if io_mode == "r+":
+        append_template_data_to_acopy_of_one_inputfile(
+            input=input_file, output=output)
+
     Writer(data=data, nxdl_path=nxdl_path, output_path=output, io_mode=io_mode).write()
 
     logger.info("The output file generated: %s", output)
diff --git a/pynxtools/dataconverter/readers/em/reader.py b/pynxtools/dataconverter/readers/em/reader.py
index a9edf93f7..7349cee2a 100644
--- a/pynxtools/dataconverter/readers/em/reader.py
+++ b/pynxtools/dataconverter/readers/em/reader.py
@@ -63,7 +63,7 @@ def read(self,
         # pylint: disable=duplicate-code
         template.clear()
 
-        debug_id = 2
+        debug_id = 3
         template[f"/ENTRY[entry1]/test{debug_id}"] = f"test{debug_id}"
         # this em_om parser combines multiple sub-parsers
         # so we need the following input:

From 6ddf56e54ba8ddef517546961246254bb8dcf70e Mon Sep 17 00:00:00 2001
From: "markus.kuehbach" <markus.kuehbach@hu-berlin.de>
Date: Fri, 18 Aug 2023 11:38:08 +0200
Subject: [PATCH 07/84] Implemented nxs_mtex subparser, default plot annotator,
 and NeXus root decorator

---
 .../readers/em/concepts/concept_mapper.py     | 116 ++++++++++++++++++
 .../readers/em/concepts/nexus_concepts.py     |  58 +++++++++
 pynxtools/dataconverter/readers/em/reader.py  |  36 ++++--
 .../readers/em/subparsers/nxs_mtex.py         |  54 ++++++++
 .../readers/em/utils/default_plots.py         |  56 +++++++++
 5 files changed, 312 insertions(+), 8 deletions(-)
 create mode 100644 pynxtools/dataconverter/readers/em/concepts/concept_mapper.py
 create mode 100644 pynxtools/dataconverter/readers/em/concepts/nexus_concepts.py
 create mode 100644 pynxtools/dataconverter/readers/em/subparsers/nxs_mtex.py
 create mode 100644 pynxtools/dataconverter/readers/em/utils/default_plots.py

diff --git a/pynxtools/dataconverter/readers/em/concepts/concept_mapper.py b/pynxtools/dataconverter/readers/em/concepts/concept_mapper.py
new file mode 100644
index 000000000..6ee855b84
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/concepts/concept_mapper.py
@@ -0,0 +1,116 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Utilities for working with NeXus concepts encoded as Python dicts in the concepts dir."""
+
+# pylint: disable=no-member
+
+import pytz
+
+from datetime import datetime
+
+
+def load_from_modifier(terms, fd_dct):
+    """Implement modifier which reads values of different type from fd_dct."""
+    if isinstance(terms, str):
+        if terms in fd_dct.keys():
+            return fd_dct[terms]
+    if all(isinstance(entry, str) for entry in terms) is True:
+        if isinstance(terms, list):
+            lst = []
+            for entry in terms:
+                lst.append(fd_dct[entry])
+            return lst
+    return None
+
+
+def convert_iso8601_modifier(terms, dct: dict):
+    """Implement modifier which transforms nionswift time stamps to proper UTC ISO8601."""
+    if terms is not None:
+        if isinstance(terms, str):
+            if terms in dct.keys():
+                return None
+        elif (isinstance(terms, list)) and (len(terms) == 2) \
+                and (all(isinstance(entry, str) for entry in terms) is True):
+            # assume the first argument is a local time
+            # assume the second argument is a timezone string
+            if terms[0] in dct.keys() and terms[1] in dct.keys():
+                # handle the case that these times can be arbitrarily formatted
+                # for now we let ourselves be guided
+                # by how time stamps are returned in Christoph Koch's
+                # nionswift instances also formatting-wise
+                date_time_str = dct[terms[0]].replace("T", " ")
+                time_zone_str = dct[terms[1]]
+                if time_zone_str in pytz.all_timezones:
+                    date_time_obj \
+                        = datetime.strptime(date_time_str, '%Y-%m-%d %H:%M:%S.%f')
+                    utc_time_zone_aware \
+                        = pytz.timezone(time_zone_str).localize(date_time_obj)
+                    return utc_time_zone_aware
+                else:
+                    raise ValueError('Invalid timezone string!')
+                return None
+        else:
+            return None
+    return None
+
+
+def apply_modifier(modifier, dct: dict):
+    """Interpret a functional mapping using data from dct via calling modifiers."""
+    if isinstance(modifier, dict):
+        # different commands are available
+        if set(["fun", "terms"]) == set(modifier.keys()):
+            if modifier["fun"] == "load_from":
+                return load_from_modifier(modifier["terms"], dct)
+            if modifier["fun"] == "convert_iso8601":
+                return convert_iso8601_modifier(modifier["terms"], dct)
+        elif set(["link"]) == set(modifier.keys()):
+            # CURRENTLY NOT IMPLEMENTED
+            # with the jsonmap reader Sherjeel conceptualized "link"
+            return None
+        else:
+            return None
+    if isinstance(modifier, str):
+        return modifier
+    return None
+
+
+# examples/tests how to use modifiers
+# modd = "µs"
+# modd = {"link": "some_link_to_somewhere"}
+# modd = {"fun": "load_from", "terms": "metadata/scan/scan_device_properties/mag_boards/MagBoard 1 DAC 11"}
+# modd = {"fun": "load_from", "terms": ["metadata/scan/scan_device_properties/mag_boards/MagBoard 1 DAC 11",
+#     "metadata/scan/scan_device_properties/mag_boards/MagBoard 1 Relay"]}
+# modd = {"fun": "convert_iso8601", "terms": ["data_modified", "timezone"]}
+# print(apply_modifier(modd, yml))
+
+def variadic_path_to_specific_path(path: str, instance_identifier: list):
+    """Transforms a variadic path to an actual path with instances."""
+    if (path is not None) and (path != ""):
+        narguments = path.count("*")
+        if narguments == 0:  # path is not variadic
+            return path
+        if len(instance_identifier) >= narguments:
+            tmp = path.split("*")
+            if len(tmp) == narguments + 1:
+                nx_specific_path = ""
+                for idx in range(0, narguments):
+                    nx_specific_path += f"{tmp[idx]}{instance_identifier[idx]}"
+                    idx += 1
+                nx_specific_path += f"{tmp[-1]}"
+                return nx_specific_path
+    return None
diff --git a/pynxtools/dataconverter/readers/em/concepts/nexus_concepts.py b/pynxtools/dataconverter/readers/em/concepts/nexus_concepts.py
new file mode 100644
index 000000000..2f12ab5de
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/concepts/nexus_concepts.py
@@ -0,0 +1,58 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Implement NeXus-specific groups and fields to document software and versions used."""
+
+# pylint: disable=no-member
+
+from pynxtools.dataconverter.readers.em.concepts.concept_mapper \
+    import variadic_path_to_specific_path, apply_modifier
+
+
+PYNXTOOLS_VERSION = "n/a"
+PYNXTOOLS_URL = "https://www.github.com/FAIRmat-NFDI/pynxtools"
+
+NXEM_NAME = "NXem"
+NXEM_VERSION = "n/a"
+NXEM_URL = "https://www.github.com/FAIRmat-NFDI/nexus_definitions"
+
+NxEmRoot = {"/PROGRAM[program1]/program": "pynxtools/dataconverter/readers/em",
+            "/PROGRAM[program1]/program/@version": PYNXTOOLS_VERSION,
+            "/PROGRAM[program1]/program/@url": PYNXTOOLS_URL,
+            "/ENTRY[entry*]/@version": NXEM_VERSION,
+            "/ENTRY[entry*]/@url": NXEM_URL,
+            "/ENTRY[entry*]/definition": NXEM_NAME}
+
+
+class NxEmAppDef():
+    """Add NeXus NXem appdef specific contextualization.
+
+    """
+    def __init__(self):
+        pass
+
+    def parse(self, template: dict, entry_id: int = 1, cmd_line_args = []) -> dict:
+        for nx_path, modifier in NxEmRoot.items():
+            if (nx_path != "IGNORE") and (nx_path != "UNCLEAR"):
+                trg = variadic_path_to_specific_path(nx_path, [entry_id])
+                res = apply_modifier(modifier, modifier)
+                if res is not None:
+                    template[trg] = res
+        if cmd_line_args != [] and all(isinstance(item, str) for item in cmd_line_args):
+            template["/cs_profiling/@NX_class"] = "NXcs_profiling"
+            template["/cs_profiling/command_line_call"] = cmd_line_args
+        return template
diff --git a/pynxtools/dataconverter/readers/em/reader.py b/pynxtools/dataconverter/readers/em/reader.py
index 7349cee2a..e424bbf9d 100644
--- a/pynxtools/dataconverter/readers/em/reader.py
+++ b/pynxtools/dataconverter/readers/em/reader.py
@@ -23,10 +23,13 @@
 
 from pynxtools.dataconverter.readers.base.reader import BaseReader
 
-"""
-from pynxtools.dataconverter.readers.em_om.utils.use_case_selector \
-    import EmOmUseCaseSelector
+from pynxtools.dataconverter.readers.em.concepts.nexus_concepts import NxEmAppDef
+
+from pynxtools.dataconverter.readers.em.subparsers.nxs_mtex import NxEmNxsMTexSubParser
+
+from pynxtools.dataconverter.readers.em.utils.default_plots import NxEmDefaultPlotResolver
 
+"""
 from pynxtools.dataconverter.readers.em_om.utils.generic_eln_io \
     import NxEmOmGenericElnSchemaParser
 
@@ -52,7 +55,7 @@ class EmReader(BaseReader):
     # pylint: disable=too-few-public-methods
 
     # Whitelist for the NXDLs that the reader supports and can process
-    supported_nxdls = ["NXem_refactoring"]
+    supported_nxdls = ["NXem"]  # ["NXem_refactoring"]
 
     # pylint: disable=duplicate-code
     def read(self,
@@ -63,8 +66,8 @@ def read(self,
         # pylint: disable=duplicate-code
         template.clear()
 
-        debug_id = 3
-        template[f"/ENTRY[entry1]/test{debug_id}"] = f"test{debug_id}"
+        # debug_id = 3
+        # template[f"/ENTRY[entry1]/test{debug_id}"] = f"test{debug_id}"
         # this em_om parser combines multiple sub-parsers
         # so we need the following input:
         # logical analysis which use case
@@ -100,11 +103,20 @@ def read(self,
         # else:
         #     print("No interpretable configuration file offered")
 
+        input_file_names = []
+        for file_path in file_paths:
+            if file_path != "":
+                input_file_names.append(file_path)
         print("Parse NeXus appdef-specific content...")
-        # nxs = NxEmAppDefContent()
-        # nxs.parse(template)
+        nxs = NxEmAppDef()
+        nxs.parse(template, entry_id, input_file_names)
 
         print("Parse and map pieces of information within files from tech partners...")
+        sub_parser = "nxs_mtex"
+        subparser = NxEmNxsMTexSubParser()
+        subparser.parse(template, entry_id)
+        # add further with resolving cases
+
         # for dat_instance in case.dat_parser_type:
         #     print(f"Process pieces of information in {dat_instance} tech partner file...")
         #    continue
@@ -133,6 +145,14 @@ def read(self,
         # we only need to decorate the template to point to the mandatory ROI overview
         # print("Create NeXus default plottable data...")
         # em_default_plot_generator(template, 1)
+        nxs_plt = NxEmDefaultPlotResolver()
+        # if nxs_mtex is the sub-parser
+        resolved_path = nxs_plt.nxs_mtex_get_nxpath_to_default_plot(
+            entry_id, file_paths[0])
+        print(f"DEFAULT PLOT IS {resolved_path}")
+        if resolved_path != "":
+            nxs_plt.annotate_default_plot(template, resolved_path)
+
         debugging = True
         if debugging is True:
             print("Reporting state of template before passing to HDF5 writing...")
diff --git a/pynxtools/dataconverter/readers/em/subparsers/nxs_mtex.py b/pynxtools/dataconverter/readers/em/subparsers/nxs_mtex.py
new file mode 100644
index 000000000..4ebd685ca
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/subparsers/nxs_mtex.py
@@ -0,0 +1,54 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""(Sub-)parser mapping concepts and content from *.nxs.mtex files on NXem."""
+
+"""
+README.md
+*.nxs.mtex is a specific HDF5-based data processing report format for users of
+the MTex/Matlab texture toolbox to export results from MTex to other software.
+
+The format uses several concepts from the NXem appdef.
+Instances of *.nxs.mtex files thus contain several but not necessarily
+all pieces of information which the NXem application definition demands
+as required.
+
+Therefore, pynxtools can be used to append these missing pieces of information.
+
+Currently implemented I/O support for this format:
+The current implementation of *.nxs.mtex sub-parser in the em reader
+is implemented such that an existent *.nxs.mtex file is copied and
+this copy annotated with the missing pieces of information.
+
+The nxs_mtex sub-parser is the only sub-parser of the em parser
+with this copying-the-input-file design. For all other file formats
+the em parser uses the template to instantiate the complete file
+including all numerical data eventually generated by one or several
+of the sub-parsers.
+"""
+
+
+class NxEmNxsMTexSubParser():
+    """Map content from *.nxs.mtex files on an instance of NXem.
+
+    """
+    def __init__(self):
+        pass
+
+    def parse(self, template: dict, entry_id: int = 1) -> dict:
+        """Pass because for *.nxs.mtex all data are already in the copy of the output."""
+        return template
diff --git a/pynxtools/dataconverter/readers/em/utils/default_plots.py b/pynxtools/dataconverter/readers/em/utils/default_plots.py
new file mode 100644
index 000000000..4b1781c34
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/utils/default_plots.py
@@ -0,0 +1,56 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Logics and functionality to identify and annotate a default plot NXem."""
+
+import h5py
+import numpy as np
+
+
+class NxEmDefaultPlotResolver():
+    """Annotate the default plot in an instance of NXem.
+
+    """
+    def __init__(self):
+        pass
+
+    def annotate_default_plot(self, template: dict, plot_nxpath: str = "") -> dict:
+        """Write path to the default plot from root to plot_nxpath."""
+        if plot_nxpath != "":
+            print(plot_nxpath)
+            tmp = plot_nxpath.split("/")
+            print(tmp)
+            for idx in np.arange(0, len(tmp)):
+                if tmp[idx] != "":
+                    if idx != 0:
+                        template[f'{"/".join(tmp[0:idx])}/@default'] = tmp[idx]
+        return template
+
+    def nxs_mtex_get_nxpath_to_default_plot(self,
+                                            entry_id: int = 1,
+                                            nxs_mtex_file_name: str = "") -> str:
+        """Find a path to a default plot (i.e. NXdata instance) if any."""
+        h5r = h5py.File(nxs_mtex_file_name, "r")
+        if f"/entry{entry_id}/roi1/ebsd/indexing/roi" in h5r:
+            h5r.close()
+            return f"/entry{entry_id}/roi1/ebsd/indexing/roi"
+        h5r.close()
+        return ""
+
+    def parse(self, template: dict, entry_id: int = 1) -> dict:
+        """Pass because for *.nxs.mtex all data are already in the copy of the output."""
+        return template

From 5dd2c66b3087bb78702e18021f8f548835c96f46 Mon Sep 17 00:00:00 2001
From: "markus.kuehbach" <markus.kuehbach@hu-berlin.de>
Date: Fri, 18 Aug 2023 11:39:06 +0200
Subject: [PATCH 08/84] Updated nxdefs to use latest refactored NXem appdef

---
 pynxtools/definitions | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pynxtools/definitions b/pynxtools/definitions
index 9f09b1efa..2b18f2cbd 160000
--- a/pynxtools/definitions
+++ b/pynxtools/definitions
@@ -1 +1 @@
-Subproject commit 9f09b1efafd6f897ec54e9824a919bffa1f312d4
+Subproject commit 2b18f2cbd7efdce201328c71eda7fbe91e06b6c1

From be663d271eaa15a87726241489567f975625542d Mon Sep 17 00:00:00 2001
From: "markus.kuehbach" <markus.kuehbach@hu-berlin.de>
Date: Fri, 18 Aug 2023 13:58:32 +0200
Subject: [PATCH 09/84] Added logic and code for annotating original references
 and atom_types from phase_names based on a table composed from an EM domain
 expert

---
 .../readers/em/examples/ebsd_database.py      | 305 ++++++++++++++++++
 pynxtools/dataconverter/readers/em/reader.py  |   2 +
 .../readers/em/subparsers/nxs_mtex.py         |  68 ++++
 3 files changed, 375 insertions(+)
 create mode 100644 pynxtools/dataconverter/readers/em/examples/ebsd_database.py

diff --git a/pynxtools/dataconverter/readers/em/examples/ebsd_database.py b/pynxtools/dataconverter/readers/em/examples/ebsd_database.py
new file mode 100644
index 000000000..0196a52f5
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/examples/ebsd_database.py
@@ -0,0 +1,305 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Pieces of information relevant for the parsing the ebsd_database use case."""
+
+# for the Summer2023 EBSD database example with NOMAD OASIS:
+# we collected several EBSD datasets from colleagues. In these research studies,
+# names of the phases used for indexing (NXem_ebsd_crystal_structure_candidate)
+# instances were very different elements, substances, free-text annotations,
+# mineral names, names of mineral groups, or names of rocks
+# here is the FOR NOW human expert dictionary composed from these names
+# to map used phase names on atom types
+# the idea behind this was to identify what is the minimal information that
+# is recoverable when there is no common agreement about the phases used and their
+# exact atomic configuration
+
+
+FreeTextToUniquePhase = {"Actinolite": "Actinolite",
+                         "al": "Al",
+                         "Al2 O3": "Al2O3",
+                         "Albite_MS_DATA": "Albite",
+                         "Albite-Sodium Calcium Aluminum Silicate": "Albite",
+                         "Almandine": "Almandine",
+                         "Alumina (alpha)": "Al2O3",
+                         "Aluminium": "Aluminum",
+                         "Aluminum": "Aluminum",
+                         "Amphibole": "Amphibole",
+                         "Anorthite": "Anorthite",
+                         "Apatite": "Apatite",
+                         "Aragonit": "Aragonite",
+                         "Aragonite": "Aragonite",
+                         "Augite": "Augite",
+                         "Austenite": "Fe",
+                         "Beta_Ti1023": "Ti",
+                         "Biotite": "Biotite",
+                         "Bytownite": "Bytownite",
+                         "BYTOWNITE An": "Bytownite",
+                         "calcite": "Calcite",
+                         "Calcite": "Calcite",
+                         "Chlorite": "Chlorite",
+                         "Chlorite Mg12(Si,Al)8": "Chlorite",
+                         "Chlorite Mg12(Si.Al)8": "Chlorite",
+                         "Chloritoid 2M": "Chloritoid",
+                         "Chromite": "Chromite",
+                         "Clinochlore 1MIa": "Clinochlore",
+                         "Clinochlore IIb-2": "Clinochlore",
+                         "Clinopyroxene": "Clinopyroxene",
+                         "Clinozoisite": "Clinozoisite",
+                         "Co FCC": "Co",
+                         "Co Hexagonal": "Co",
+                         "Cobalt": "Co",
+                         "Coesite": "Coesite",
+                         "Copper": "Cu",
+                         "Copper": "Cu",
+                         "Corderite": "Cordierite",
+                         "Diopside": "Diopside",
+                         "Diopside   CaMgSi2O6": "Diopside",
+                         "Dolomite": "Dolomite",
+                         "Enstatite": "Enstatite",
+                         "Enstatite  Opx AV77": "Enstatite",
+                         "Epidote": "Epidote",
+                         "Fe3C": "Fe3C",
+                         "Fe-BCC": "Fe",
+                         "Fe-FCC": "Fe",
+                         "Feldspar": "Feldspar",
+                         "Ferrite": "Fe",
+                         "Ferrite, bcc (New)": "Fe",
+                         "Ferrite, bcc 110 (old)": "Fe",
+                         "Ferrosilite, magnesian": "Ferrosilite",
+                         "Forsterite": "Forsterite",
+                         "Forsterite , 90%Mg": "Forsterite",
+                         "Ga N": "GaN",
+                         "Gallium nitride": "GaN",
+                         "Garnet": "Garnet",
+                         "Glaucophane": "Glaucophane",
+                         "Graphite": "Graphite",
+                         "Halite": "Halite",
+                         "Halite": "Halite",
+                         "Hematite": "Hematite",
+                         "Hornblende": "Hornblende",
+                         "Hornblende  C2/m": "Hornblende",
+                         "Hortonolite": "Hortonolite",
+                         "Hydroxylapatite": "Hydroxylapatite",
+                         "Ice 1h": "H2O",
+                         "Ice Ih": "H2O",
+                         "Ilmenite": "Ilmenite",
+                         "Ilmenite - MgSiO3": "Ilmenite",
+                         "Ilmenite FeTiO3 trig": "Ilmenite",
+                         "Iron - Alpha": "Fe",
+                         "Iron (Alpha)": "Fe",
+                         "Iron (Gamma)": "Fe",
+                         "Iron bcc (old)": "Fe",
+                         "Iron bcc (old)": "Fe",
+                         "Iron fcc": "Fe",
+                         "Iron fcc": "Fe",
+                         "Iron Oxide (C)": "Iron oxide",
+                         "Jadeite": "Jadeite",
+                         "K Fsp": "Orthoclase",
+                         "Kyanite": "Kyanite",
+                         "Lawsonite": "Lawsonite",
+                         "Low albite": "Albite",
+                         "Magnesite": "Magnesite",
+                         "Magnesium": "Mg",
+                         "Magnetite": "Magnetite",
+                         "Magnetite low": "Magnetite",
+                         "MagnetiteFe3O4 Fd3m": "Magnetite",
+                         "martensite_Ti1023": "Ti",
+                         "Mg Zn2": "MgZn2",
+                         "Mg2 Zn11": "Mg2Zn11",
+                         "MgSiO3 pv (DY)": "Enstatite",
+                         "Mullite": "Mullite",
+                         "Muscovite": "Muscovite",
+                         "Muscovite - 2M1": "Muscovite",
+                         "Muscovite 2M1": "Muscovite",
+                         "N Ti": "TiN",
+                         "N Zr": "ZrN",
+                         "Nb": "Nb",
+                         "Ni3Al": "Ni3Al",
+                         "Nickel": "Ni",
+                         "Nickel": "Ni",
+                         "Ni-superalloy": "Ni",
+                         "Ni-superalloy": "Ni",
+                         "notIndexed": "notIndexed",
+                         "olivine": "Olivine",
+                         "Olivine": "Olivine",
+                         "ompaciteP2n": "Omphacite",
+                         "Omphacite": "Omphacite",
+                         "omphacite": "Omphacite",
+                         "OR X": "n/a",
+                         "Orthoclase": "Orthoclase",
+                         "Orthoclase inverted": "Orthoclase",
+                         "Orthopyroxene": "Orthopyroxene",
+                         "Pargasite": "Pargasite",
+                         "Pargasite C2/m": "Pargasite",
+                         "Periclase": "Periclase",
+                         "Pigeonite": "Pigeonite",
+                         "Plagioclase": "Plagioclase",
+                         "Prehnite": "Prehnite",
+                         "Pumpellyite": "Pumpellyite",
+                         "Pyrite": "Pyrite",
+                         "Pyrope": "Pyrope",
+                         "Quartz": "Quartz",
+                         "Quartz low": "Quartz",
+                         "Quartz_hex": "Quartz",
+                         "Quartz-new": "Quartz",
+                         "Ringwoodite": "Ringwoodite",
+                         "Rutile": "Rutile",
+                         "Sanidine": "Sanidine",
+                         "Siderite": "Siderite",
+                         "Silicon": "Si",
+                         "Silver": "Ag",
+                         "Spessartine, ferroan": "Spessartine",
+                         "Spinel": "Spinel",
+                         "Spinel - (Mg,Fe)2SiO4": "Spinel",
+                         "Stishovite": "Stishovite",
+                         "Sulfoapatite": "Sulfoapatite",
+                         "Superalloy-MC": "n/a",
+                         "Tantalum": "Ta",
+                         "Ti O": "TiO",
+                         "Ti O2": "TiO2",
+                         "TiC": "TiC",
+                         "Ti-Hex": "Ti",
+                         "Tin": "Sn",
+                         "Titanite": "Titanite",
+                         "Titanium": "Ti",
+                         "Titanium cubic": "Ti",
+                         "Titanium-beta": "Ti",
+                         "Tremolite": "Tremolite",
+                         "troilite": "Troilite",
+                         "Wadsleyite": "Wadsleyite",
+                         "Zeolite": "Zeolite",
+                         "Zinc": "Zn",
+                         "Zirc-alloy4": "Zirc-alloy4",
+                         "Zircon": "Zr",
+                         "Zircon": "Zircon",
+                         "Zirconia tetragonal": "Zirconia",
+                         "Zirconium": "Zr",
+                         "Zirconium - alpha": "Zr",
+                         "Zirconium - alpha": "Zr",
+                         "Zoisite": "Zoisite",
+                         "Zr02 monoclinic": "ZrO",
+                         "": "n/a"}
+
+UniquePhaseToAtomTypes = {"Actinolite": "Ca;Mg;Fe;Si;O;H",
+                          "Ag": "Ag",
+                          "Al": "Al",
+                          "Al2O3": "Al;O",
+                          "Albite": "Na;Al;Si;O",
+                          "Almandine": "Fe;Si;Al;O",
+                          "Aluminum": "Al",
+                          "Amphibole": "Si;O;H",
+                          "Anorthite": "Ca;Al;Si;O",
+                          "Apatite": "P;O",
+                          "Aragonite": "Ca;C;O",
+                          "Augite": "Ca;Mg;Fe;Si;O",
+                          "Biotite": "K;Si;O",
+                          "Bytownite": "Ca;Na;Si;Al;O",
+                          "Calcite": "Ca;C;O",
+                          "Chlorite": "Fe;Mg;Al;Si;Al;O;H",
+                          "Chloritoid": "Fe;Al;O;Si;H",
+                          "Chromite": "Fe;Cr;O",
+                          "Clinochlore": "Mg;Al;Si;O;H",
+                          "Clinopyroxene": "Si;O",
+                          "Clinozoisite": "Ca;Al;Si;O;H",
+                          "Co": "Co",
+                          "Coesite": "Si;O",
+                          "Cordierite": "Mg;Al;Si;O",
+                          "Cu": "Cu",
+                          "Diopside": "Ca;Mg;Si;O",
+                          "Dolomite": "Ca;Mg;C;O",
+                          "Enstatite": "Mg;Si;O",
+                          "Epidote": "Ca;Al;Fe;Si;O;H",
+                          "Fe": "Fe",
+                          "Fe3C": "Fe;C",
+                          "Feldspar": "Si;O",
+                          "Ferrosilite": "Fe;Si;O",
+                          "Forsterite": "Mg;Si;O",
+                          "GaN": "Ga;N",
+                          "GaN": "Ga;N",
+                          "Garnet": "Si;O",
+                          "Glaucophane": "Na;Mg;Al;Si;O;H",
+                          "Graphite": "C",
+                          "H2O": "H;O",
+                          "Halite": "Na;Cl",
+                          "Halite": "Na;Cl",
+                          "Hematite": "Fe;O",
+                          "Hornblende": "Al;Si;O",
+                          "Hortonolite": "Fe;Si;O",
+                          "Hydroxylapatite": "Ca;P;O;H",
+                          "Ilmenite": "Fe;Ti;O",
+                          "Iron oxide": "Fe;O",
+                          "Jadeite": "Na;Al;Si;O",
+                          "Kyanite": "Al;O;Si",
+                          "Lawsonite": "Ca;Al;Si;O;H",
+                          "Magnesite": "Mg;C;O",
+                          "Magnetite": "Fe;O",
+                          "Mg": "Mg",
+                          "Mg2Zn11": "Mg;Zn",
+                          "MgZn2": "Mg;Zn",
+                          "Mullite": "Al;Si;O",
+                          "Muscovite": "K;Al;Si;O;H",
+                          "Nb": "Nb",
+                          "Ni": "Ni",
+                          "Ni": "Ni",
+                          "Ni3Al": "Ni;Al",
+                          "notIndexed": "",
+                          "n/a": "",
+                          "Olivine": "Mg;Fe;Si;O",
+                          "Omphacite": "Ca;Na;Mg;Fe;Al;Si;O",
+                          "Orthoclase": "K;Al;Si;O",
+                          "Orthopyroxene": "Mg;Si;O",
+                          "Pargasite": "Na;Ca;Mg;Al;Si;O;H",
+                          "Periclase": "Mg;O",
+                          "Pigeonite": "Mg;Fe;Ca;Si;O",
+                          "Plagioclase": "Al;Si;O",
+                          "Prehnite": "Ca;Al;Si;O;H",
+                          "Pumpellyite": "Ca;Si;O;H",
+                          "Pyrite": "Fe;S",
+                          "Pyrope": "Mg;Al;Si;O",
+                          "Quartz": "Si;O",
+                          "Ringwoodite": "Si;Mg;O",
+                          "Rutile": "Ti;O",
+                          "Sanidine": "K;Al;Si;O",
+                          "Si": "Si",
+                          "Siderite": "Fe;C;O",
+                          "Sn": "Sn",
+                          "Spessartine": "Mn;Al;Si;O",
+                          "Spinel": "Mg;Al;O",
+                          "Stishovite": "Si;O",
+                          "Sulfoapatite": "S;P;O",
+                          "Ta": "Ta",
+                          "Ti": "Ti",
+                          "TiC": "Ti;C",
+                          "TiN": "Ti;N",
+                          "TiO": "Ti;O",
+                          "TiO2": "Ti;O",
+                          "Titanite": "Ca;Ti;Si;O",
+                          "Tremolite": "Ca;Mg;Fe;Si;O;H",
+                          "Troilite": "Fe;S",
+                          "Wadsleyite": "Mg;Si;O",
+                          "Zeolite": "",
+                          "Zirc-alloy4": "Zr;Sn;Fe;Cr",
+                          "Zircon": "Zr;Si;O",
+                          "Zirconia": "Zr;O",
+                          "Zn": "Zn",
+                          "Zoisite": "Ca;Al;Si;O;H",
+                          "Zr": "Zr",
+                          "ZrN": "Zr;N",
+                          "ZrO": "Zr;O"}
+
+ProjectIdToCitation = {"Forsterite.ctf.nxs.mtex": {"data": "someurl", "paper": "someurl"}}
diff --git a/pynxtools/dataconverter/readers/em/reader.py b/pynxtools/dataconverter/readers/em/reader.py
index e424bbf9d..a62c6bccf 100644
--- a/pynxtools/dataconverter/readers/em/reader.py
+++ b/pynxtools/dataconverter/readers/em/reader.py
@@ -115,6 +115,8 @@ def read(self,
         sub_parser = "nxs_mtex"
         subparser = NxEmNxsMTexSubParser()
         subparser.parse(template, entry_id)
+        subparser.example_ebsd_database_set_atom_types(template, entry_id, file_paths[0])
+        subparser.example_ebsd_database_set_citations(template, entry_id, file_paths[0])
         # add further with resolving cases
 
         # for dat_instance in case.dat_parser_type:
diff --git a/pynxtools/dataconverter/readers/em/subparsers/nxs_mtex.py b/pynxtools/dataconverter/readers/em/subparsers/nxs_mtex.py
index 4ebd685ca..6d96e9352 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/nxs_mtex.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/nxs_mtex.py
@@ -41,6 +41,14 @@
 of the sub-parsers.
 """
 
+import re
+import h5py
+
+from ase.data import chemical_symbols
+
+from pynxtools.dataconverter.readers.em.examples.ebsd_database \
+    import FreeTextToUniquePhase, UniquePhaseToAtomTypes, ProjectIdToCitation
+
 
 class NxEmNxsMTexSubParser():
     """Map content from *.nxs.mtex files on an instance of NXem.
@@ -52,3 +60,63 @@ def __init__(self):
     def parse(self, template: dict, entry_id: int = 1) -> dict:
         """Pass because for *.nxs.mtex all data are already in the copy of the output."""
         return template
+
+    def example_ebsd_database_set_atom_types(self,
+                                             template: dict,
+                                             entry_id: int = 1,
+                                             input_file_name: str = "") -> dict:
+        """Add phase name surplus other data to the copy of the *.nxs.mtex instance."""
+        # for each phase map elements
+        atom_types = set()
+        if input_file_name == "":
+            template[f"/ENTRY[entry{entry_id}]/sample/atom_types"] = ""
+            return template
+        h5r = h5py.File(input_file_name, "r")
+        trg = f"/entry{entry_id}/roi1/ebsd/indexing"
+        if trg in h5r:
+            for node_name in h5r[trg].keys():
+                if re.match("phase[0-9]+", node_name) is not None:
+                    if f"{trg}/{node_name}/phase_name" in h5r:
+                        obj = h5r[f"{trg}/{node_name}/phase_name"][()].decode('utf-8')
+                        free_text_phase_name = obj.rstrip(" ").lstrip(" ")
+                        if free_text_phase_name in FreeTextToUniquePhase.keys():
+                            unique_phase_name \
+                                = FreeTextToUniquePhase[free_text_phase_name]
+                            if unique_phase_name in UniquePhaseToAtomTypes.keys():
+                                curr_atom_types \
+                                    = UniquePhaseToAtomTypes[unique_phase_name]
+                                symbols = curr_atom_types.split(";")
+                                for symbol in symbols:
+                                    if symbol in chemical_symbols[1::]:
+                                        atom_types.add(symbol)
+        h5r.close()
+
+        if len(atom_types) > 0:
+            template[f"/ENTRY[entry{entry_id}]/sample/atom_types"] \
+                = ", ".join(list(atom_types))
+        else:
+            template[f"/ENTRY[entry{entry_id}]/sample/atom_types"] = ""
+        return template
+
+    def example_ebsd_database_set_citations(self,
+                                            template: dict,
+                                            entry_id: int = 1,
+                                            input_file_name: str = "") -> dict:
+        """Add doi for location and paper of orig. work in the *.nxs.mtex instance."""
+        # TODO::add list of all projects
+        proj_id_key = input_file_name
+        if proj_id_key in ProjectIdToCitation.keys():
+            # data citation
+            cite_id = 1
+            if "data" in ProjectIdToCitation[proj_id_key].keys():
+                template[f"/ENTRY[entry{entry_id}]/CITE[cite{cite_id}]/url"] \
+                    = ProjectIdToCitation[proj_id_key]["data"]
+                template[f"/ENTRY[entry{entry_id}]/CITE[cite{cite_id}]/description"] \
+                    = "Link to the actual data repository from where these data were collected."
+                cite_id += 1
+            if "paper" in ProjectIdToCitation[proj_id_key].keys():
+                template[f"/ENTRY[entry{entry_id}]/CITE[cite{cite_id}]/url"] \
+                    = ProjectIdToCitation[proj_id_key]["paper"]
+                template[f"/ENTRY[entry{entry_id}]/CITE[cite{cite_id}]/description"] \
+                    = "Link to (the or a) paper which is evidently associated with these data."
+        return template

From d20e49e5d9f9455d87ae45fef3b1e71326843a3d Mon Sep 17 00:00:00 2001
From: "markus.kuehbach" <markus.kuehbach@hu-berlin.de>
Date: Fri, 18 Aug 2023 14:46:29 +0200
Subject: [PATCH 10/84] Returning an intermediate result for now, but the
 verification with fairmat is not for some reason completely bypassed, some
 silent assumptions in nexus and the verification code?

---
 pynxtools/dataconverter/readers/em/reader.py  | 11 ++-
 .../readers/em/subparsers/nxs_mtex.py         | 79 +++++++++++++------
 2 files changed, 62 insertions(+), 28 deletions(-)

diff --git a/pynxtools/dataconverter/readers/em/reader.py b/pynxtools/dataconverter/readers/em/reader.py
index a62c6bccf..3a6dfaf9d 100644
--- a/pynxtools/dataconverter/readers/em/reader.py
+++ b/pynxtools/dataconverter/readers/em/reader.py
@@ -55,7 +55,7 @@ class EmReader(BaseReader):
     # pylint: disable=too-few-public-methods
 
     # Whitelist for the NXDLs that the reader supports and can process
-    supported_nxdls = ["NXem"]  # ["NXem_refactoring"]
+    supported_nxdls = ["NXem", "NXroot"]
 
     # pylint: disable=duplicate-code
     def read(self,
@@ -113,10 +113,9 @@ def read(self,
 
         print("Parse and map pieces of information within files from tech partners...")
         sub_parser = "nxs_mtex"
-        subparser = NxEmNxsMTexSubParser()
-        subparser.parse(template, entry_id)
-        subparser.example_ebsd_database_set_atom_types(template, entry_id, file_paths[0])
-        subparser.example_ebsd_database_set_citations(template, entry_id, file_paths[0])
+        subparser = NxEmNxsMTexSubParser(entry_id, file_paths[0])
+        subparser.parse(template)
+
         # add further with resolving cases
 
         # for dat_instance in case.dat_parser_type:
@@ -151,7 +150,7 @@ def read(self,
         # if nxs_mtex is the sub-parser
         resolved_path = nxs_plt.nxs_mtex_get_nxpath_to_default_plot(
             entry_id, file_paths[0])
-        print(f"DEFAULT PLOT IS {resolved_path}")
+        # print(f"DEFAULT PLOT IS {resolved_path}")
         if resolved_path != "":
             nxs_plt.annotate_default_plot(template, resolved_path)
 
diff --git a/pynxtools/dataconverter/readers/em/subparsers/nxs_mtex.py b/pynxtools/dataconverter/readers/em/subparsers/nxs_mtex.py
index 6d96e9352..ba96f8977 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/nxs_mtex.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/nxs_mtex.py
@@ -42,6 +42,8 @@
 """
 
 import re
+from typing import Any
+from typing_extensions import SupportsIndex
 import h5py
 
 from ase.data import chemical_symbols
@@ -54,25 +56,33 @@ class NxEmNxsMTexSubParser():
     """Map content from *.nxs.mtex files on an instance of NXem.
 
     """
-    def __init__(self):
-        pass
+    def __init__(self, entry_id: int = 1, input_file_name: str = ""):
+        if entry_id > 0:
+            self.entry_id = entry_id
+        else:
+            self.entry_id = 1
+        self.input_file_name = input_file_name
+        # the file written out by MTex/Matlab this file is already preformatted for NeXus
 
-    def parse(self, template: dict, entry_id: int = 1) -> dict:
+    def parse(self, template: dict) -> dict:
         """Pass because for *.nxs.mtex all data are already in the copy of the output."""
+        self.example_ebsd_database_set_atom_types(template)
+        self.example_ebsd_database_set_citations(template)
+        self.example_ebsd_database_set_project(template, proj_id="186", map_id="n/a")
+        self.example_ebsd_database_set_coordinate_system(template)
+        self.example_ebsd_database_set_user(template)
+        self.example_ebsd_database_conventions(template)
         return template
 
-    def example_ebsd_database_set_atom_types(self,
-                                             template: dict,
-                                             entry_id: int = 1,
-                                             input_file_name: str = "") -> dict:
+    def example_ebsd_database_set_atom_types(self, template: dict) -> dict:
         """Add phase name surplus other data to the copy of the *.nxs.mtex instance."""
         # for each phase map elements
         atom_types = set()
-        if input_file_name == "":
-            template[f"/ENTRY[entry{entry_id}]/sample/atom_types"] = ""
+        if self.input_file_name == "":
+            template[f"/ENTRY[entry{self.entry_id}]/sample/atom_types"] = ""
             return template
-        h5r = h5py.File(input_file_name, "r")
-        trg = f"/entry{entry_id}/roi1/ebsd/indexing"
+        h5r = h5py.File(self.input_file_name, "r")
+        trg = f"/entry{self.entry_id}/roi1/ebsd/indexing"
         if trg in h5r:
             for node_name in h5r[trg].keys():
                 if re.match("phase[0-9]+", node_name) is not None:
@@ -92,31 +102,56 @@ def example_ebsd_database_set_atom_types(self,
         h5r.close()
 
         if len(atom_types) > 0:
-            template[f"/ENTRY[entry{entry_id}]/sample/atom_types"] \
+            template[f"/ENTRY[entry{self.entry_id}]/sample/atom_types"] \
                 = ", ".join(list(atom_types))
         else:
-            template[f"/ENTRY[entry{entry_id}]/sample/atom_types"] = ""
+            template[f"/ENTRY[entry{self.entry_id}]/sample/atom_types"] = ""
         return template
 
-    def example_ebsd_database_set_citations(self,
-                                            template: dict,
-                                            entry_id: int = 1,
-                                            input_file_name: str = "") -> dict:
+    def example_ebsd_database_set_citations(self, template: dict) -> dict:
         """Add doi for location and paper of orig. work in the *.nxs.mtex instance."""
         # TODO::add list of all projects
-        proj_id_key = input_file_name
+        proj_id_key = self.input_file_name
         if proj_id_key in ProjectIdToCitation.keys():
             # data citation
             cite_id = 1
             if "data" in ProjectIdToCitation[proj_id_key].keys():
-                template[f"/ENTRY[entry{entry_id}]/CITE[cite{cite_id}]/url"] \
+                template[f"/ENTRY[entry{self.entry_id}]/CITE[cite{cite_id}]/url"] \
                     = ProjectIdToCitation[proj_id_key]["data"]
-                template[f"/ENTRY[entry{entry_id}]/CITE[cite{cite_id}]/description"] \
+                template[f"/ENTRY[entry{self.entry_id}]/CITE[cite{cite_id}]/description"] \
                     = "Link to the actual data repository from where these data were collected."
                 cite_id += 1
             if "paper" in ProjectIdToCitation[proj_id_key].keys():
-                template[f"/ENTRY[entry{entry_id}]/CITE[cite{cite_id}]/url"] \
+                template[f"/ENTRY[entry{self.entry_id}]/CITE[cite{cite_id}]/url"] \
                     = ProjectIdToCitation[proj_id_key]["paper"]
-                template[f"/ENTRY[entry{entry_id}]/CITE[cite{cite_id}]/description"] \
+                template[f"/ENTRY[entry{self.entry_id}]/CITE[cite{cite_id}]/description"] \
                     = "Link to (the or a) paper which is evidently associated with these data."
         return template
+
+    def example_ebsd_database_set_project(self,
+                                          template: dict,
+                                          proj_id: str = "",
+                                          map_id: str = "") -> dict:
+        """Add top-level project and map identifier."""
+        template[f"/ENTRY[entry{self.entry_id}]/experiment_identifier"] \
+            = f"map_id: {map_id}"
+        template[f"/ENTRY[entry{self.entry_id}]/experiment_description"] \
+            = f"project_id: {proj_id}, map_id: {map_id}"
+        # TODO::start_time and end_time, other missing sample details
+        return template
+
+    def example_ebsd_database_set_user(self, template: dict) -> dict:
+        """Add user involved in the ebsd case study."""
+        # TODO::add coauthors as parsed out from DOIs of paper and data at least first author
+        template[f"/ENTRY[entry{self.entry_id}]/USER[user1]/name"] = "M. Kühbach et al."
+        return template
+
+    def example_ebsd_database_set_coordinate_system(self, template: dict) -> dict:
+        """Add used conventions and coordinate systems."""
+        # TODO::parse these from the project table
+        return template
+
+    def example_ebsd_database_conventions(self, template: dict) -> dict:
+        """Add conventions made for EBSD setup and geometry."""
+        # TODO::parse these from the project table
+        return template

From d5ae63fd2088ace89c33763bad0e964dad1d3c40 Mon Sep 17 00:00:00 2001
From: "markus.kuehbach" <markus.kuehbach@hu-berlin.de>
Date: Thu, 19 Oct 2023 16:09:36 +0200
Subject: [PATCH 11/84] Prepped HDF5 parsing using subparsers

---
 .../readers/em/concepts/nexus_concepts.py     |   7 +-
 .../readers/em/geometry/convention_mapper.py  |  83 ++
 .../em/geometry/euler_angle_convention.py     |  63 ++
 .../readers/em/geometry/geometry.py           |  58 ++
 .../readers/em/geometry/handed_cartesian.py   | 749 ++++++++++++++++++
 .../readers/em/geometry/msmse_convention.py   |  51 ++
 pynxtools/dataconverter/readers/em/reader.py  |   6 +
 .../readers/em/subparsers/hfive.py            | 608 ++++++++++++++
 .../readers/em/subparsers/hfive_apex.py       |  68 ++
 .../readers/em/subparsers/hfive_bruker.py     |  65 ++
 .../readers/em/subparsers/hfive_concept.py    |  86 ++
 .../readers/em/subparsers/hfive_ebsd.py       |  67 ++
 .../readers/em/subparsers/hfive_edax.py       |  70 ++
 .../readers/em/subparsers/hfive_emsoft.py     |  50 ++
 .../readers/em/subparsers/hfive_oxford.py     | 271 +++++++
 .../readers/em/subparsers/pyxem_processor.py  |  99 +++
 .../readers/em/utils/hfive_web_constants.py   |  20 +
 .../readers/em/utils/which_string_encoding.py |  28 +
 pyproject.toml                                |   6 +-
 19 files changed, 2449 insertions(+), 6 deletions(-)
 create mode 100644 pynxtools/dataconverter/readers/em/geometry/convention_mapper.py
 create mode 100644 pynxtools/dataconverter/readers/em/geometry/euler_angle_convention.py
 create mode 100644 pynxtools/dataconverter/readers/em/geometry/geometry.py
 create mode 100644 pynxtools/dataconverter/readers/em/geometry/handed_cartesian.py
 create mode 100644 pynxtools/dataconverter/readers/em/geometry/msmse_convention.py
 create mode 100644 pynxtools/dataconverter/readers/em/subparsers/hfive.py
 create mode 100644 pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
 create mode 100644 pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py
 create mode 100644 pynxtools/dataconverter/readers/em/subparsers/hfive_concept.py
 create mode 100644 pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py
 create mode 100644 pynxtools/dataconverter/readers/em/subparsers/hfive_edax.py
 create mode 100644 pynxtools/dataconverter/readers/em/subparsers/hfive_emsoft.py
 create mode 100644 pynxtools/dataconverter/readers/em/subparsers/hfive_oxford.py
 create mode 100644 pynxtools/dataconverter/readers/em/subparsers/pyxem_processor.py
 create mode 100644 pynxtools/dataconverter/readers/em/utils/hfive_web_constants.py
 create mode 100644 pynxtools/dataconverter/readers/em/utils/which_string_encoding.py

diff --git a/pynxtools/dataconverter/readers/em/concepts/nexus_concepts.py b/pynxtools/dataconverter/readers/em/concepts/nexus_concepts.py
index 2f12ab5de..8617052a6 100644
--- a/pynxtools/dataconverter/readers/em/concepts/nexus_concepts.py
+++ b/pynxtools/dataconverter/readers/em/concepts/nexus_concepts.py
@@ -30,12 +30,13 @@
 NXEM_VERSION = "n/a"
 NXEM_URL = "https://www.github.com/FAIRmat-NFDI/nexus_definitions"
 
-NxEmRoot = {"/PROGRAM[program1]/program": "pynxtools/dataconverter/readers/em",
-            "/PROGRAM[program1]/program/@version": PYNXTOOLS_VERSION,
-            "/PROGRAM[program1]/program/@url": PYNXTOOLS_URL,
+NxEmRoot = {"/ENTRY[entry*]/PROGRAM[program1]/program": "pynxtools/dataconverter/readers/em",
+            "/ENTRY[entry*]/PROGRAM[program1]/program/@version": PYNXTOOLS_VERSION,
+            "/ENTRY[entry*]/PROGRAM[program1]/program/@url": PYNXTOOLS_URL,
             "/ENTRY[entry*]/@version": NXEM_VERSION,
             "/ENTRY[entry*]/@url": NXEM_URL,
             "/ENTRY[entry*]/definition": NXEM_NAME}
+# alternatively the above-mentioned program1 entries to place under "/"
 
 
 class NxEmAppDef():
diff --git a/pynxtools/dataconverter/readers/em/geometry/convention_mapper.py b/pynxtools/dataconverter/readers/em/geometry/convention_mapper.py
new file mode 100644
index 000000000..7e08427c5
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/geometry/convention_mapper.py
@@ -0,0 +1,83 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Parse conventions from an ELN schema instance."""
+
+# pylint: disable=no-member
+
+# ,duplicate-code
+
+from pynxtools.dataconverter.readers.em.geometry.handed_cartesian \
+    import REFERENCE_FRAMES, AXIS_DIRECTIONS, is_cs_well_defined
+
+from pynxtools.dataconverter.readers.em.concepts.concept_mapper \
+    import variadic_path_to_specific_path, apply_modifier
+
+from pynxtools.dataconverter.readers.em.geometry.geometry \
+    import NxEmConventions
+
+# example how to check against different types of Euler angle conventions
+# from pynxtools.dataconverter.readers.em.geometry.euler_angle_convention \
+#    import which_euler_convention
+
+# example how to check if set of conventions matches to some suggestion in the literature
+# from pynxtools.dataconverter.readers.em.geometry.msmse_convention \
+#     import is_consistent_with_msmse_convention
+
+
+class NxEmConventionMapper:
+    """TODO::
+
+    """
+
+    def __init__(self, file_name: str, entry_id: int = 1):  # , pattern_simulation: bool):
+        """Fill template with ELN pieces of information."""
+        if entry_id > 0:
+            self.entry_id = entry_id
+        else:
+            self.entry_id = 1
+        # self.pattern_simulation = pattern_simulation
+        # print(f"Extracting data from ELN file: {file_name}")
+        # if (file_name.rsplit('/', 1)[-1].startswith("eln_data")
+        #         or file_name.startswith("eln_data")) and entry_id > 0:
+        #     self.entry_id = entry_id
+        #     self.file_name = file_name
+        #     with open(self.file_name, "r", encoding="utf-8") as stream:
+        #         self.yml = fd.FlatDict(yaml.safe_load(stream), delimiter=":")
+        # else:
+        #     self.entry_id = 1
+        #     self.file_name = ""
+        #     self.yml = {}
+        # if "ElectronBackscatterDiffraction" in self.yml:
+        #     self.yml = self.yml["ElectronBackscatterDiffraction"]
+
+    def parse(self, template: dict) -> dict:
+        """Extract metadata from generic ELN text file to respective NeXus objects."""
+        print("Parsing conventions...")
+        for nx_path, modifier in NxEmConventions.items():
+            if (nx_path != "IGNORE") and (nx_path != "UNCLEAR"):
+                trg = variadic_path_to_specific_path(nx_path, [self.entry_id])
+                res = apply_modifier(modifier, modifier)
+                if res is not None:
+                    template[trg] = res
+        return template
+        # self.parse_rotation_convention_section(template)
+        # self.parse_processing_frame_section(template)
+        # self.parse_sample_frame_section(template)
+        # self.parse_detector_frame_section(template)
+        # self.parse_gnomonic_projection_section(template)
+        # return template
diff --git a/pynxtools/dataconverter/readers/em/geometry/euler_angle_convention.py b/pynxtools/dataconverter/readers/em/geometry/euler_angle_convention.py
new file mode 100644
index 000000000..64b2be8a2
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/geometry/euler_angle_convention.py
@@ -0,0 +1,63 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Definitions for different conventions associated with Euler angles."""
+
+# pylint: disable=no-member
+
+# Euler angles are a parameterization for orientations which use three
+# consecutively rotations (3D) rotations to rotate a configuration
+# described by an attached Cartesian CS na(False, "", ""),         "", ""),med A into configuration
+# described by an attached Cartesian CS named B
+# the sequence about which specific axis and new/intermediate axes
+# one rotates enables to distinguish different types of Euler angle
+# so-called Euler-angle conventions, most commonly used in materials
+# science is the convention of H.-J. Bunge aka zxz convention
+
+# "Bunge" https://doi.org/10.1016/C2013-0-11769-2
+# "Rowenhorst" https://doi.org/10.1088/0965-0393/23/8/083501
+# "Morawiec" https://doi.org/10.1007/978-3-662-09156-2
+# "Britton" https://doi.org/10.1016/j.matchar.2016.04.008
+
+
+which_euler_convention = {"xxx": (False, "", ""),
+                          "xxy": (True, "", ""),
+                          "xxz": (True, "", ""),
+                          "xyx": (True, "", ""),
+                          "xyy": (True, "", ""),
+                          "xyz": (True, "", ""),
+                          "xzx": (True, "", ""),
+                          "xzy": (True, "", ""),
+                          "xzz": (True, "", ""),
+                          "yxx": (True, "", ""),
+                          "yxy": (True, "", ""),
+                          "yxz": (True, "", ""),
+                          "yyx": (True, "", ""),
+                          "yyy": (False, "", ""),
+                          "yyz": (True, "", ""),
+                          "yzx": (True, "", ""),
+                          "yzy": (True, "", ""),
+                          "yzz": (True, "", ""),
+                          "zxx": (True, "", ""),
+                          "zxy": (True, "", ""),
+                          "zxz": (True, "Bunge", "proper"),
+                          "zyx": (True, "", ""),
+                          "zyy": (True, "", ""),
+                          "zyz": (True, "", ""),
+                          "zzx": (True, "", ""),
+                          "zzy": (True, "", ""),
+                          "zzz": (False, "", "")}
diff --git a/pynxtools/dataconverter/readers/em/geometry/geometry.py b/pynxtools/dataconverter/readers/em/geometry/geometry.py
new file mode 100644
index 000000000..5df234248
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/geometry/geometry.py
@@ -0,0 +1,58 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Utilities for defining NXcoordinate_system(_set) and NXtransformation instances."""
+
+# pylint: disable=no-member
+
+
+NxEmConventions = {"/ENTRY[entry*]/EM_CONVENTIONS[em_conventions]/OBJECT[rotation_conventions]/axis_angle_convention": "undefined",
+                   "/ENTRY[entry*]/EM_CONVENTIONS[em_conventions]/OBJECT[rotation_conventions]/euler_angle_convention": "undefined",
+                   "/ENTRY[entry*]/EM_CONVENTIONS[em_conventions]/OBJECT[rotation_conventions]/sign_convention": "undefined",
+                   "/ENTRY[entry*]/EM_CONVENTIONS[em_conventions]/OBJECT[rotation_conventions]/rotation_convention": "undefined",
+                   "/ENTRY[entry*]/EM_CONVENTIONS[em_conventions]/OBJECT[rotation_conventions]/rotation_handedness": "undefined",
+                   "/ENTRY[entry*]/EM_CONVENTIONS[em_conventions]/COORDINATE_SYSTEM[processing_reference_frame]/type": "undefined",
+                   "/ENTRY[entry*]/EM_CONVENTIONS[em_conventions]/COORDINATE_SYSTEM[processing_reference_frame]/handedness": "undefined",
+                   "/ENTRY[entry*]/EM_CONVENTIONS[em_conventions]/COORDINATE_SYSTEM[processing_reference_frame]/x_direction": "undefined",
+                   "/ENTRY[entry*]/EM_CONVENTIONS[em_conventions]/COORDINATE_SYSTEM[processing_reference_frame]/x_alias": "undefined",
+                   "/ENTRY[entry*]/EM_CONVENTIONS[em_conventions]/COORDINATE_SYSTEM[processing_reference_frame]/y_direction": "undefined",
+                   "/ENTRY[entry*]/EM_CONVENTIONS[em_conventions]/COORDINATE_SYSTEM[processing_reference_frame]/y_alias": "undefined",
+                   "/ENTRY[entry*]/EM_CONVENTIONS[em_conventions]/COORDINATE_SYSTEM[processing_reference_frame]/z_direction": "undefined",
+                   "/ENTRY[entry*]/EM_CONVENTIONS[em_conventions]/COORDINATE_SYSTEM[processing_reference_frame]/z_alias": "undefined",
+                   "/ENTRY[entry*]/EM_CONVENTIONS[em_conventions]/COORDINATE_SYSTEM[processing_reference_frame]/origin": "undefined",
+                   "/ENTRY[entry*]/EM_CONVENTIONS[em_conventions]/COORDINATE_SYSTEM[sample_reference_frame]/type": "undefined",
+                   "/ENTRY[entry*]/EM_CONVENTIONS[em_conventions]/COORDINATE_SYSTEM[sample_reference_frame]/handedness": "undefined",
+                   "/ENTRY[entry*]/EM_CONVENTIONS[em_conventions]/COORDINATE_SYSTEM[sample_reference_frame]/x_direction": "undefined",
+                   "/ENTRY[entry*]/EM_CONVENTIONS[em_conventions]/COORDINATE_SYSTEM[sample_reference_frame]/y_direction": "undefined",
+                   "/ENTRY[entry*]/EM_CONVENTIONS[em_conventions]/COORDINATE_SYSTEM[sample_reference_frame]/z_direction": "undefined",
+                   "/ENTRY[entry*]/EM_CONVENTIONS[em_conventions]/COORDINATE_SYSTEM[sample_reference_frame]/origin": "undefined",
+                   "/ENTRY[entry*]/EM_CONVENTIONS[em_conventions]/COORDINATE_SYSTEM[detector_reference_frame1]/type": "undefined",
+                   "/ENTRY[entry*]/EM_CONVENTIONS[em_conventions]/COORDINATE_SYSTEM[detector_reference_frame1]/handedness": "undefined",
+                   "/ENTRY[entry*]/EM_CONVENTIONS[em_conventions]/COORDINATE_SYSTEM[detector_reference_frame1]/x_direction": "undefined",
+                   "/ENTRY[entry*]/EM_CONVENTIONS[em_conventions]/COORDINATE_SYSTEM[detector_reference_frame1]/y_direction": "undefined",
+                   "/ENTRY[entry*]/EM_CONVENTIONS[em_conventions]/COORDINATE_SYSTEM[detector_reference_frame1]/z_direction": "undefined",
+                   "/ENTRY[entry*]/EM_CONVENTIONS[em_conventions]/COORDINATE_SYSTEM[detector_reference_frame1]/origin": "undefined",
+                   "/ENTRY[entry*]/EM_CONVENTIONS[em_conventions]/COORDINATE_SYSTEM[gnomonic_projection_reference_frame]/type": "undefined",
+                   "/ENTRY[entry*]/EM_CONVENTIONS[em_conventions]/COORDINATE_SYSTEM[gnomonic_projection_reference_frame]/handedness": "undefined",
+                   "/ENTRY[entry*]/EM_CONVENTIONS[em_conventions]/COORDINATE_SYSTEM[gnomonic_projection_reference_frame]/x_direction": "undefined",
+                   "/ENTRY[entry*]/EM_CONVENTIONS[em_conventions]/COORDINATE_SYSTEM[gnomonic_projection_reference_frame]/y_direction": "undefined",
+                   "/ENTRY[entry*]/EM_CONVENTIONS[em_conventions]/COORDINATE_SYSTEM[gnomonic_projection_reference_frame]/z_direction": "undefined",
+                   "/ENTRY[entry*]/EM_CONVENTIONS[em_conventions]/COORDINATE_SYSTEM[gnomonic_projection_reference_frame]/origin": "undefined",
+                   "/ENTRY[entry*]/EM_CONVENTIONS[em_conventions]/OBJECT[pattern_centre]/[pattern_centre]/x_boundary_convention": "undefined",
+                   "/ENTRY[entry*]/EM_CONVENTIONS[em_conventions]/OBJECT[pattern_centre]/[pattern_centre]/x_normalization_direction": "undefined",
+                   "/ENTRY[entry*]/EM_CONVENTIONS[em_conventions]/OBJECT[pattern_centre]/[pattern_centre]/y_boundary_convention": "undefined",
+                   "/ENTRY[entry*]/EM_CONVENTIONS[em_conventions]/OBJECT[pattern_centre]/[pattern_centre]/y_normalization_direction": "undefined"}
diff --git a/pynxtools/dataconverter/readers/em/geometry/handed_cartesian.py b/pynxtools/dataconverter/readers/em/geometry/handed_cartesian.py
new file mode 100644
index 000000000..c5f06b5db
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/geometry/handed_cartesian.py
@@ -0,0 +1,749 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Definitions for right- or left-handed Cartesian coordinate systems."""
+
+# pylint: disable=no-member
+
+# a coordinate system can be defined using an instance of a custom schema
+# with a set of sets of RadioEditButtons, each set can be used to define the
+# unique orientation of a right-handed Cartesian coordinate system's base vectors when
+# viewed by an outer observer looking towards the origin of the coordinate system
+# triplet vector, specifically we inspect the line of sight and a compass
+# the compass defines four directions, north aka up, east aka right, south aka down,
+# and west aka left. The plane of sight defines two additional directions in aka
+# into the plane and out aka out-of-the-plane
+# there are different strategies what users find convenient to report to specify
+# the directions into which the base vectors are assumed pointing
+
+# in general only two perpendicular directions have to be specified as the third
+# follows implicitly i.e. there is no need to overconstrain
+# the term "undefined" is the default to use unless one is sure about each statement
+
+# the is_cs_rh_unambiguous dictionary is a lookup table whereby the input of the user
+# can be evaluated to check if the radioeditbutton choices result in a fully constraint
+# and thus unique definition. All other choices are reported as a not fully constraint
+# i.e. eventually problematic coordinate system
+
+REFERENCE_FRAMES = ["undefined", "right_handed_cartesian", "left_handed_cartesian"]
+
+AXIS_DIRECTIONS = ["undefined", "north", "east", "south", "west", "in", "out"]
+
+# is a right-handed (rh) Cartesian coordinate system (cs) sufficiently constrained, when
+# at least two base vector directions are chosen, the order is x, y, z
+is_cs_rh_unambiguous = {"undefined_undefined_undefined": False,
+                        "undefined_undefined_north": False,
+                        "undefined_undefined_east": False,
+                        "undefined_undefined_south": False,
+                        "undefined_undefined_west": False,
+                        "undefined_undefined_in": False,
+                        "undefined_undefined_out": False,
+                        "undefined_north_undefined": False,
+                        "undefined_north_north": False,
+                        "undefined_north_east": True,
+                        "undefined_north_south": True,
+                        "undefined_north_west": True,
+                        "undefined_north_in": True,
+                        "undefined_north_out": True,
+                        "undefined_east_undefined": False,
+                        "undefined_east_north": True,
+                        "undefined_east_east": False,
+                        "undefined_east_south": True,
+                        "undefined_east_west": False,
+                        "undefined_east_in": True,
+                        "undefined_east_out": True,
+                        "undefined_south_undefined": False,
+                        "undefined_south_north": False,
+                        "undefined_south_east": True,
+                        "undefined_south_south": False,
+                        "undefined_south_west": True,
+                        "undefined_south_in": True,
+                        "undefined_south_out": True,
+                        "undefined_west_undefined": False,
+                        "undefined_west_north": True,
+                        "undefined_west_east": False,
+                        "undefined_west_south": True,
+                        "undefined_west_west": False,
+                        "undefined_west_in": True,
+                        "undefined_west_out": True,
+                        "undefined_in_undefined": False,
+                        "undefined_in_north": True,
+                        "undefined_in_east": True,
+                        "undefined_in_south": True,
+                        "undefined_in_west": True,
+                        "undefined_in_in": False,
+                        "undefined_in_out": False,
+                        "undefined_out_undefined": False,
+                        "undefined_out_north": True,
+                        "undefined_out_east": True,
+                        "undefined_out_south": True,
+                        "undefined_out_west": True,
+                        "undefined_out_in": False,
+                        "undefined_out_out": False,
+                        "north_undefined_undefined": False,
+                        "north_undefined_north": False,
+                        "north_undefined_east": True,
+                        "north_undefined_south": False,
+                        "north_undefined_west": True,
+                        "north_undefined_in": True,
+                        "north_undefined_out": True,
+                        "north_north_undefined": False,
+                        "north_north_north": False,
+                        "north_north_east": False,
+                        "north_north_south": False,
+                        "north_north_west": False,
+                        "north_north_in": False,
+                        "north_north_out": False,
+                        "north_east_undefined": True,
+                        "north_east_north": False,
+                        "north_east_east": False,
+                        "north_east_south": False,
+                        "north_east_west": False,
+                        "north_east_in": True,
+                        "north_east_out": False,
+                        "north_south_undefined": False,
+                        "north_south_north": False,
+                        "north_south_east": False,
+                        "north_south_south": False,
+                        "north_south_west": False,
+                        "north_south_in": False,
+                        "north_south_out": False,
+                        "north_west_undefined": True,
+                        "north_west_north": False,
+                        "north_west_east": False,
+                        "north_west_south": False,
+                        "north_west_west": False,
+                        "north_west_in": False,
+                        "north_west_out": True,
+                        "north_in_undefined": True,
+                        "north_in_north": False,
+                        "north_in_east": False,
+                        "north_in_south": False,
+                        "north_in_west": True,
+                        "north_in_in": False,
+                        "north_in_out": False,
+                        "north_out_undefined": True,
+                        "north_out_north": False,
+                        "north_out_east": True,
+                        "north_out_south": False,
+                        "north_out_west": False,
+                        "north_out_in": False,
+                        "north_out_out": False,
+                        "east_undefined_undefined": False,
+                        "east_undefined_north": True,
+                        "east_undefined_east": False,
+                        "east_undefined_south": True,
+                        "east_undefined_west": False,
+                        "east_undefined_in": True,
+                        "east_undefined_out": True,
+                        "east_north_undefined": True,
+                        "east_north_north": False,
+                        "east_north_east": False,
+                        "east_north_south": False,
+                        "east_north_west": False,
+                        "east_north_in": False,
+                        "east_north_out": True,
+                        "east_east_undefined": False,
+                        "east_east_north": False,
+                        "east_east_east": False,
+                        "east_east_south": False,
+                        "east_east_west": False,
+                        "east_east_in": False,
+                        "east_east_out": False,
+                        "east_south_undefined": True,
+                        "east_south_north": False,
+                        "east_south_east": False,
+                        "east_south_south": False,
+                        "east_south_west": False,
+                        "east_south_in": True,
+                        "east_south_out": False,
+                        "east_west_undefined": False,
+                        "east_west_north": False,
+                        "east_west_east": False,
+                        "east_west_south": False,
+                        "east_west_west": False,
+                        "east_west_in": False,
+                        "east_west_out": False,
+                        "east_in_undefined": True,
+                        "east_in_north": True,
+                        "east_in_east": False,
+                        "east_in_south": False,
+                        "east_in_west": False,
+                        "east_in_in": False,
+                        "east_in_out": False,
+                        "east_out_undefined": True,
+                        "east_out_north": False,
+                        "east_out_east": False,
+                        "east_out_south": True,
+                        "east_out_west": False,
+                        "east_out_in": False,
+                        "east_out_out": False,
+                        "south_undefined_undefined": False,
+                        "south_undefined_north": False,
+                        "south_undefined_east": True,
+                        "south_undefined_south": False,
+                        "south_undefined_west": True,
+                        "south_undefined_in": True,
+                        "south_undefined_out": True,
+                        "south_north_undefined": False,
+                        "south_north_north": False,
+                        "south_north_east": False,
+                        "south_north_south": False,
+                        "south_north_west": False,
+                        "south_north_in": False,
+                        "south_north_out": False,
+                        "south_east_undefined": True,
+                        "south_east_north": False,
+                        "south_east_east": False,
+                        "south_east_south": False,
+                        "south_east_west": False,
+                        "south_east_in": False,
+                        "south_east_out": True,
+                        "south_south_undefined": False,
+                        "south_south_north": False,
+                        "south_south_east": False,
+                        "south_south_south": False,
+                        "south_south_west": False,
+                        "south_south_in": False,
+                        "south_south_out": False,
+                        "south_west_undefined": True,
+                        "south_west_north": False,
+                        "south_west_east": False,
+                        "south_west_south": False,
+                        "south_west_west": False,
+                        "south_west_in": True,
+                        "south_west_out": False,
+                        "south_in_undefined": True,
+                        "south_in_north": False,
+                        "south_in_east": True,
+                        "south_in_south": False,
+                        "south_in_west": False,
+                        "south_in_in": False,
+                        "south_in_out": False,
+                        "south_out_undefined": True,
+                        "south_out_north": False,
+                        "south_out_east": False,
+                        "south_out_south": False,
+                        "south_out_west": True,
+                        "south_out_in": False,
+                        "south_out_out": False,
+                        "west_undefined_undefined": False,
+                        "west_undefined_north": True,
+                        "west_undefined_east": False,
+                        "west_undefined_south": True,
+                        "west_undefined_west": False,
+                        "west_undefined_in": True,
+                        "west_undefined_out": True,
+                        "west_north_undefined": True,
+                        "west_north_north": False,
+                        "west_north_east": False,
+                        "west_north_south": False,
+                        "west_north_west": False,
+                        "west_north_in": True,
+                        "west_north_out": False,
+                        "west_east_undefined": False,
+                        "west_east_north": False,
+                        "west_east_east": False,
+                        "west_east_south": False,
+                        "west_east_west": False,
+                        "west_east_in": False,
+                        "west_east_out": False,
+                        "west_south_undefined": True,
+                        "west_south_north": False,
+                        "west_south_east": False,
+                        "west_south_south": False,
+                        "west_south_west": False,
+                        "west_south_in": False,
+                        "west_south_out": True,
+                        "west_west_undefined": False,
+                        "west_west_north": False,
+                        "west_west_east": False,
+                        "west_west_south": False,
+                        "west_west_west": False,
+                        "west_west_in": False,
+                        "west_west_out": False,
+                        "west_in_undefined": True,
+                        "west_in_north": False,
+                        "west_in_east": False,
+                        "west_in_south": True,
+                        "west_in_west": False,
+                        "west_in_in": False,
+                        "west_in_out": False,
+                        "west_out_undefined": True,
+                        "west_out_north": True,
+                        "west_out_east": False,
+                        "west_out_south": False,
+                        "west_out_west": False,
+                        "west_out_in": False,
+                        "west_out_out": False,
+                        "in_undefined_undefined": False,
+                        "in_undefined_north": True,
+                        "in_undefined_east": True,
+                        "in_undefined_south": True,
+                        "in_undefined_west": True,
+                        "in_undefined_in": False,
+                        "in_undefined_out": False,
+                        "in_north_undefined": True,
+                        "in_north_north": False,
+                        "in_north_east": True,
+                        "in_north_south": False,
+                        "in_north_west": False,
+                        "in_north_in": False,
+                        "in_north_out": False,
+                        "in_east_undefined": True,
+                        "in_east_north": False,
+                        "in_east_east": False,
+                        "in_east_south": True,
+                        "in_east_west": False,
+                        "in_east_in": False,
+                        "in_east_out": False,
+                        "in_south_undefined": True,
+                        "in_south_north": False,
+                        "in_south_east": False,
+                        "in_south_south": False,
+                        "in_south_west": True,
+                        "in_south_in": False,
+                        "in_south_out": False,
+                        "in_west_undefined": True,
+                        "in_west_north": True,
+                        "in_west_east": False,
+                        "in_west_south": False,
+                        "in_west_west": False,
+                        "in_west_in": False,
+                        "in_west_out": False,
+                        "in_in_undefined": False,
+                        "in_in_north": False,
+                        "in_in_east": False,
+                        "in_in_south": False,
+                        "in_in_west": False,
+                        "in_in_in": False,
+                        "in_in_out": False,
+                        "in_out_undefined": False,
+                        "in_out_north": False,
+                        "in_out_east": False,
+                        "in_out_south": False,
+                        "in_out_west": False,
+                        "in_out_in": False,
+                        "in_out_out": False,
+                        "out_undefined_undefined": False,
+                        "out_undefined_north": True,
+                        "out_undefined_east": True,
+                        "out_undefined_south": True,
+                        "out_undefined_west": True,
+                        "out_undefined_in": False,
+                        "out_undefined_out": False,
+                        "out_north_undefined": True,
+                        "out_north_north": False,
+                        "out_north_east": False,
+                        "out_north_south": False,
+                        "out_north_west": True,
+                        "out_north_in": False,
+                        "out_north_out": False,
+                        "out_east_undefined": True,
+                        "out_east_north": True,
+                        "out_east_east": False,
+                        "out_east_south": False,
+                        "out_east_west": False,
+                        "out_east_in": False,
+                        "out_east_out": False,
+                        "out_south_undefined": True,
+                        "out_south_north": False,
+                        "out_south_east": True,
+                        "out_south_south": False,
+                        "out_south_west": False,
+                        "out_south_in": False,
+                        "out_south_out": False,
+                        "out_west_undefined": True,
+                        "out_west_north": False,
+                        "out_west_east": False,
+                        "out_west_south": True,
+                        "out_west_west": False,
+                        "out_west_in": False,
+                        "out_west_out": False,
+                        "out_in_undefined": False,
+                        "out_in_north": False,
+                        "out_in_east": False,
+                        "out_in_south": False,
+                        "out_in_west": False,
+                        "out_in_in": False,
+                        "out_in_out": False,
+                        "out_out_undefined": False,
+                        "out_out_north": False,
+                        "out_out_east": False,
+                        "out_out_south": False,
+                        "out_out_west": False,
+                        "out_out_in": False,
+                        "out_out_out": False}
+
+# the same story for a left-handed (lh) Cartesian coordinate system (cs)
+is_cs_lh_unambiguous = {"undefined_undefined_undefined": False,
+                        "undefined_undefined_north": False,
+                        "undefined_undefined_east": False,
+                        "undefined_undefined_south": False,
+                        "undefined_undefined_west": False,
+                        "undefined_undefined_in": False,
+                        "undefined_undefined_out": False,
+                        "undefined_north_undefined": False,
+                        "undefined_north_north": False,
+                        "undefined_north_east": True,
+                        "undefined_north_south": False,
+                        "undefined_north_west": True,
+                        "undefined_north_in": True,
+                        "undefined_north_out": True,
+                        "undefined_east_undefined": False,
+                        "undefined_east_north": True,
+                        "undefined_east_east": False,
+                        "undefined_east_south": True,
+                        "undefined_east_west": False,
+                        "undefined_east_in": True,
+                        "undefined_east_out": True,
+                        "undefined_south_undefined": False,
+                        "undefined_south_north": False,
+                        "undefined_south_east": True,
+                        "undefined_south_south": False,
+                        "undefined_south_west": True,
+                        "undefined_south_in": True,
+                        "undefined_south_out": True,
+                        "undefined_west_undefined": False,
+                        "undefined_west_north": True,
+                        "undefined_west_east": False,
+                        "undefined_west_south": True,
+                        "undefined_west_west": False,
+                        "undefined_west_in": True,
+                        "undefined_west_out": True,
+                        "undefined_in_undefined": False,
+                        "undefined_in_north": True,
+                        "undefined_in_east": True,
+                        "undefined_in_south": True,
+                        "undefined_in_west": True,
+                        "undefined_in_in": False,
+                        "undefined_in_out": False,
+                        "undefined_out_undefined": False,
+                        "undefined_out_north": True,
+                        "undefined_out_east": True,
+                        "undefined_out_south": True,
+                        "undefined_out_west": True,
+                        "undefined_out_in": False,
+                        "undefined_out_out": False,
+                        "north_undefined_undefined": False,
+                        "north_undefined_north": False,
+                        "north_undefined_east": True,
+                        "north_undefined_south": False,
+                        "north_undefined_west": True,
+                        "north_undefined_in": True,
+                        "north_undefined_out": True,
+                        "north_north_undefined": False,
+                        "north_north_north": False,
+                        "north_north_east": False,
+                        "north_north_south": False,
+                        "north_north_west": False,
+                        "north_north_in": False,
+                        "north_north_out": False,
+                        "north_east_undefined": True,
+                        "north_east_north": False,
+                        "north_east_east": False,
+                        "north_east_south": False,
+                        "north_east_west": False,
+                        "north_east_in": False,
+                        "north_east_out": True,
+                        "north_south_undefined": False,
+                        "north_south_north": False,
+                        "north_south_east": False,
+                        "north_south_south": False,
+                        "north_south_west": False,
+                        "north_south_in": False,
+                        "north_south_out": False,
+                        "north_west_undefined": True,
+                        "north_west_north": False,
+                        "north_west_east": False,
+                        "north_west_south": False,
+                        "north_west_west": False,
+                        "north_west_in": True,
+                        "north_west_out": False,
+                        "north_in_undefined": True,
+                        "north_in_north": False,
+                        "north_in_east": True,
+                        "north_in_south": False,
+                        "north_in_west": False,
+                        "north_in_in": False,
+                        "north_in_out": False,
+                        "north_out_undefined": True,
+                        "north_out_north": False,
+                        "north_out_east": False,
+                        "north_out_south": False,
+                        "north_out_west": True,
+                        "north_out_in": False,
+                        "north_out_out": False,
+                        "east_undefined_undefined": False,
+                        "east_undefined_north": True,
+                        "east_undefined_east": False,
+                        "east_undefined_south": True,
+                        "east_undefined_west": False,
+                        "east_undefined_in": True,
+                        "east_undefined_out": True,
+                        "east_north_undefined": True,
+                        "east_north_north": False,
+                        "east_north_east": False,
+                        "east_north_south": False,
+                        "east_north_west": False,
+                        "east_north_in": True,
+                        "east_north_out": False,
+                        "east_east_undefined": False,
+                        "east_east_north": False,
+                        "east_east_east": False,
+                        "east_east_south": False,
+                        "east_east_west": False,
+                        "east_east_in": False,
+                        "east_east_out": False,
+                        "east_south_undefined": True,
+                        "east_south_north": False,
+                        "east_south_east": False,
+                        "east_south_south": False,
+                        "east_south_west": False,
+                        "east_south_in": False,
+                        "east_south_out": True,
+                        "east_west_undefined": False,
+                        "east_west_north": False,
+                        "east_west_east": False,
+                        "east_west_south": False,
+                        "east_west_west": False,
+                        "east_west_in": False,
+                        "east_west_out": False,
+                        "east_in_undefined": True,
+                        "east_in_north": False,
+                        "east_in_east": False,
+                        "east_in_south": True,
+                        "east_in_west": False,
+                        "east_in_in": False,
+                        "east_in_out": False,
+                        "east_out_undefined": True,
+                        "east_out_north": True,
+                        "east_out_east": False,
+                        "east_out_south": False,
+                        "east_out_west": False,
+                        "east_out_in": False,
+                        "east_out_out": False,
+                        "south_undefined_undefined": False,
+                        "south_undefined_north": False,
+                        "south_undefined_east": True,
+                        "south_undefined_south": False,
+                        "south_undefined_west": True,
+                        "south_undefined_in": True,
+                        "south_undefined_out": True,
+                        "south_north_undefined": False,
+                        "south_north_north": False,
+                        "south_north_east": False,
+                        "south_north_south": False,
+                        "south_north_west": False,
+                        "south_north_in": False,
+                        "south_north_out": False,
+                        "south_east_undefined": True,
+                        "south_east_north": False,
+                        "south_east_east": False,
+                        "south_east_south": False,
+                        "south_east_west": False,
+                        "south_east_in": True,
+                        "south_east_out": False,
+                        "south_south_undefined": False,
+                        "south_south_north": False,
+                        "south_south_east": False,
+                        "south_south_south": False,
+                        "south_south_west": False,
+                        "south_south_in": False,
+                        "south_south_out": False,
+                        "south_west_undefined": True,
+                        "south_west_north": False,
+                        "south_west_east": False,
+                        "south_west_south": False,
+                        "south_west_west": False,
+                        "south_west_in": False,
+                        "south_west_out": True,
+                        "south_in_undefined": True,
+                        "south_in_north": False,
+                        "south_in_east": False,
+                        "south_in_south": False,
+                        "south_in_west": True,
+                        "south_in_in": False,
+                        "south_in_out": False,
+                        "south_out_undefined": True,
+                        "south_out_north": False,
+                        "south_out_east": True,
+                        "south_out_south": False,
+                        "south_out_west": False,
+                        "south_out_in": False,
+                        "south_out_out": False,
+                        "west_undefined_undefined": False,
+                        "west_undefined_north": True,
+                        "west_undefined_east": False,
+                        "west_undefined_south": True,
+                        "west_undefined_west": False,
+                        "west_undefined_in": True,
+                        "west_undefined_out": True,
+                        "west_north_undefined": True,
+                        "west_north_north": False,
+                        "west_north_east": False,
+                        "west_north_south": False,
+                        "west_north_west": False,
+                        "west_north_in": False,
+                        "west_north_out": True,
+                        "west_east_undefined": False,
+                        "west_east_north": False,
+                        "west_east_east": False,
+                        "west_east_south": False,
+                        "west_east_west": False,
+                        "west_east_in": False,
+                        "west_east_out": False,
+                        "west_south_undefined": True,
+                        "west_south_north": False,
+                        "west_south_east": False,
+                        "west_south_south": False,
+                        "west_south_west": False,
+                        "west_south_in": True,
+                        "west_south_out": False,
+                        "west_west_undefined": False,
+                        "west_west_north": False,
+                        "west_west_east": False,
+                        "west_west_south": False,
+                        "west_west_west": False,
+                        "west_west_in": False,
+                        "west_west_out": False,
+                        "west_in_undefined": True,
+                        "west_in_north": True,
+                        "west_in_east": False,
+                        "west_in_south": False,
+                        "west_in_west": False,
+                        "west_in_in": False,
+                        "west_in_out": False,
+                        "west_out_undefined": True,
+                        "west_out_north": False,
+                        "west_out_east": False,
+                        "west_out_south": True,
+                        "west_out_west": False,
+                        "west_out_in": False,
+                        "west_out_out": False,
+                        "in_undefined_undefined": False,
+                        "in_undefined_north": True,
+                        "in_undefined_east": True,
+                        "in_undefined_south": True,
+                        "in_undefined_west": True,
+                        "in_undefined_in": False,
+                        "in_undefined_out": False,
+                        "in_north_undefined": True,
+                        "in_north_north": False,
+                        "in_north_east": False,
+                        "in_north_south": False,
+                        "in_north_west": True,
+                        "in_north_in": False,
+                        "in_north_out": False,
+                        "in_east_undefined": True,
+                        "in_east_north": True,
+                        "in_east_east": False,
+                        "in_east_south": False,
+                        "in_east_west": False,
+                        "in_east_in": False,
+                        "in_east_out": False,
+                        "in_south_undefined": True,
+                        "in_south_north": False,
+                        "in_south_east": True,
+                        "in_south_south": False,
+                        "in_south_west": False,
+                        "in_south_in": False,
+                        "in_south_out": False,
+                        "in_west_undefined": True,
+                        "in_west_north": False,
+                        "in_west_east": False,
+                        "in_west_south": True,
+                        "in_west_west": False,
+                        "in_west_in": False,
+                        "in_west_out": False,
+                        "in_in_undefined": False,
+                        "in_in_north": False,
+                        "in_in_east": False,
+                        "in_in_south": False,
+                        "in_in_west": False,
+                        "in_in_in": False,
+                        "in_in_out": False,
+                        "in_out_undefined": False,
+                        "in_out_north": False,
+                        "in_out_east": False,
+                        "in_out_south": False,
+                        "in_out_west": False,
+                        "in_out_in": False,
+                        "in_out_out": False,
+                        "out_undefined_undefined": False,
+                        "out_undefined_north": True,
+                        "out_undefined_east": True,
+                        "out_undefined_south": True,
+                        "out_undefined_west": True,
+                        "out_undefined_in": False,
+                        "out_undefined_out": False,
+                        "out_north_undefined": True,
+                        "out_north_north": False,
+                        "out_north_east": True,
+                        "out_north_south": False,
+                        "out_north_west": False,
+                        "out_north_in": False,
+                        "out_north_out": False,
+                        "out_east_undefined": True,
+                        "out_east_north": False,
+                        "out_east_east": False,
+                        "out_east_south": True,
+                        "out_east_west": False,
+                        "out_east_in": False,
+                        "out_east_out": False,
+                        "out_south_undefined": True,
+                        "out_south_north": False,
+                        "out_south_east": False,
+                        "out_south_south": False,
+                        "out_south_west": True,
+                        "out_south_in": False,
+                        "out_south_out": False,
+                        "out_west_undefined": True,
+                        "out_west_north": True,
+                        "out_west_east": False,
+                        "out_west_south": False,
+                        "out_west_west": False,
+                        "out_west_in": False,
+                        "out_west_out": False,
+                        "out_in_undefined": False,
+                        "out_in_north": False,
+                        "out_in_east": False,
+                        "out_in_south": False,
+                        "out_in_west": False,
+                        "out_in_in": False,
+                        "out_in_out": False,
+                        "out_out_undefined": False,
+                        "out_out_north": False,
+                        "out_out_east": False,
+                        "out_out_south": False,
+                        "out_out_west": False,
+                        "out_out_in": False,
+                        "out_out_out": False}
+
+
+def is_cs_well_defined(handedness, directions):
+    """Check if the axis directions yield an unambiguous definition right handed"""
+    keyword = f"{directions[0]}_{directions[1]}_{directions[2]}"
+    if handedness == "right_handed_cartesian":
+        if keyword in is_cs_rh_unambiguous:
+            if is_cs_rh_unambiguous[keyword] is True:
+                return True
+    if handedness == "left_handed_cartesian":
+        if keyword in is_cs_lh_unambiguous:
+            if is_cs_lh_unambiguous[keyword] is True:
+                return True
+    return False
diff --git a/pynxtools/dataconverter/readers/em/geometry/msmse_convention.py b/pynxtools/dataconverter/readers/em/geometry/msmse_convention.py
new file mode 100644
index 000000000..d3304dacc
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/geometry/msmse_convention.py
@@ -0,0 +1,51 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Conventions used in the EBSD communnity
+
+suggested by D. Rowenhorst et al. in DOI: 10.1088/0965-0393/23/8/083501.
+"""
+
+# pylint: disable=no-member,duplicate-code
+
+msmse_convention = {
+    "three_dimensional_rotation_handedness": "counter_clockwise",
+    "rotation_convention": "passive",
+    "euler_angle_convention": "zxz",
+    "axis_angle_convention": "rotation_angle_on_interval_zero_to_pi"
+}
+# the sign convention is mentioned in the paper but left as a parameter
+# "sign_convention": "p_minus_one"
+
+
+def is_consistent_with_msmse_convention(dct):
+    """Checks if a set of conventions is consistent with that paper (see above)."""
+    # triple boolean, yes, no, unclear
+    req_fields = [
+        "three_dimensional_rotation_handedness",
+        "rotation_convention",
+        "euler_angle_convention",
+        "axis_angle_convention"]
+    for field_name in req_fields:
+        if (field_name not in dct) or (field_name not in msmse_convention):
+            return "unclear"
+    # okay, so either matching or not
+    for field_name in req_fields:
+        if dct[field_name] == msmse_convention[field_name]:
+            continue
+        return "inconsistent"
+    return "consistent"
diff --git a/pynxtools/dataconverter/readers/em/reader.py b/pynxtools/dataconverter/readers/em/reader.py
index 3a6dfaf9d..1f67c9aad 100644
--- a/pynxtools/dataconverter/readers/em/reader.py
+++ b/pynxtools/dataconverter/readers/em/reader.py
@@ -29,6 +29,9 @@
 
 from pynxtools.dataconverter.readers.em.utils.default_plots import NxEmDefaultPlotResolver
 
+from pynxtools.dataconverter.readers.em.geometry.convention_mapper \
+    import NxEmConventionMapper
+
 """
 from pynxtools.dataconverter.readers.em_om.utils.generic_eln_io \
     import NxEmOmGenericElnSchemaParser
@@ -111,6 +114,9 @@ def read(self,
         nxs = NxEmAppDef()
         nxs.parse(template, entry_id, input_file_names)
 
+        conventions = NxEmConventionMapper(entry_id)
+        conventions.parse(template)
+
         print("Parse and map pieces of information within files from tech partners...")
         sub_parser = "nxs_mtex"
         subparser = NxEmNxsMTexSubParser(entry_id, file_paths[0])
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive.py b/pynxtools/dataconverter/readers/em/subparsers/hfive.py
new file mode 100644
index 000000000..fceeff135
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive.py
@@ -0,0 +1,608 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""HDF5 base parser to inherit from for tech-partner-specific HDF5 subparsers."""
+
+import numpy as np
+import os, glob, re, sys
+import h5py
+import yaml
+import json
+# from jupyterlab_h5web import H5Web
+# import jupyter_capture_output
+
+from pynxtools.dataconverter.readers.em.subparsers.hfive_concept import IS_GROUP, \
+    IS_REGULAR_DATASET, IS_COMPOUND_DATASET, IS_ATTRIBUTE, IS_FIELD_IN_COMPOUND_DATASET, \
+    Concept
+
+
+def read_strings_from_dataset(self, obj):
+    # print(f"type {type(obj)}, np.shape {np.shape(obj)}, obj {obj}")
+    # if hasattr(obj, "dtype"):
+    #     print(obj.dtype)
+    if isinstance(obj, np.ndarray):
+        retval = []
+        for entry in obj:
+            if isinstance(entry, bytes):
+                retval.append(entry.decode("utf-8"))
+            elif isinstance(entry, str):
+                retval.append(entry)
+            else:
+                continue
+                # raise ValueError("Neither bytes nor str inside np.ndarray!")
+        # specific implementation rule that all lists with a single string
+        # will be returned in paraprobe as a scalar string
+        if len(retval) > 1:
+            return retval
+        elif len(retval) == 1:
+            return retval[0]
+        else:
+            return None
+    elif isinstance(obj, bytes):
+        return obj.decode("utf8")
+    elif isinstance(obj, str):
+        return obj
+    else:
+        return None
+        # raise ValueError("Neither np.ndarray, nor bytes, nor str !")
+
+
+class HdfFiveGenericReader:
+    def __init__(self, file_name: str = ""):
+        # self.supported_version = VERSION_MANAGEMENT
+        # self.version = VERSION_MANAGEMENT
+        # tech_partner the company which designed this format
+        # schema_name the specific name of the family of schemas supported by this reader
+        # schema_version the specific version(s) supported by this reader
+        # writer_name the specific name of the tech_partner's (typically proprietary) software
+        #   with which an instance of a file formatted according to schema_name and schema_version
+        #   was written e.g. Oxford Instruments AZTec software in some version may generate
+        #   an instance of a file whose schema belongs to the H5OINA family of HDF5 container formats
+        #   specifically using version 5
+        self.source = None
+        self.file_name = None
+        # collection of instance path
+        self.groups = {}
+        self.datasets = {}
+        self.attributes = {}
+        self.instances = {}
+        # collection of template
+        self.template_groups = []
+        self.template_datasets = []
+        self.template_attributes = []
+        self.templates = {}
+        self.h5r = None
+        if file_name is not None and file_name != "":
+            self.file_name = file_name
+
+    def open(self):
+        if self.h5r is None:
+            self.h5r = h5py.File(self.file_name, "r")
+
+    def close(self):
+        if self.h5r is not None:
+            self.h5r.close()
+            self.h5r = None
+
+    # def find_node(node_name, node_obj):
+    #     if isinstance(node_obj, h5py.Dataset):
+    #         return (node_name, "is_dataset")
+    #     return (node_name, "is_group")
+
+    def __call__(self, node_name, h5obj):
+        # only h5py datasets have dtype attribute, so we can search on this
+        if isinstance(h5obj, h5py.Dataset):
+            if not node_name in self.datasets.keys():
+                if hasattr(h5obj, "dtype"):
+                    if hasattr(h5obj.dtype, "fields") and hasattr(h5obj.dtype, "names"):
+                        if h5obj.dtype.names is not None:
+                            self.datasets[node_name] \
+                                = ("IS_COMPOUND_DATASET",
+                                   type(h5obj),
+                                   np.shape(h5obj),
+                                   h5obj[0])
+                            self.instances[node_name] \
+                                = Concept(node_name,
+                                          None,
+                                          None,
+                                          type(h5obj),
+                                          np.shape(h5obj), None,
+                                          hdf_type="compound_dataset")
+                            n_dims = len(np.shape(h5obj))
+                            if n_dims == 1:
+                                for name in h5obj.dtype.names:
+                                    self.datasets[f"{node_name}/#{name}"] \
+                                        = ("IS_FIELD_IN_COMPOUND_DATASET",
+                                           h5obj.fields(name)[()].dtype,
+                                           np.shape(h5obj.fields(name)[()]),
+                                           h5obj.fields(name)[0])
+                                    self.instances[f"{node_name}/{name}"] \
+                                        = Concept(node_name,
+                                                  None,
+                                                  None,
+                                                  h5obj.fields(name)[()].dtype,
+                                                  np.shape(h5obj.fields(name)[()]),
+                                                  None,
+                                                  hdf_type="compound_dataset_entry")
+                            else:
+                                raise LogicError(
+                                    f"Unknown formatting of an h5py.Dataset, inspect {node_name} !")
+                        else:  # h5obj.dtype.names is a tuple of struct variable names
+                            n_dims = len(np.shape(h5obj))
+                            if n_dims == 0:
+                                self.datasets[node_name] \
+                                    = ("IS_REGULAR_DATASET",
+                                       type(h5obj),
+                                       np.shape(h5obj),
+                                       h5obj[()])
+                                self.instances[node_name] \
+                                    = Concept(node_name,
+                                              None,
+                                              None,
+                                              type(h5obj),
+                                              np.shape(h5obj),
+                                              None,
+                                              hdf_type="regular_dataset")
+                            elif n_dims == 1:
+                                if not 0 in np.shape(h5obj):
+                                    self.datasets[node_name] \
+                                        = ("IS_REGULAR_DATASET",
+                                           type(h5obj),
+                                           np.shape(h5obj),
+                                           h5obj[0])
+                                    self.instances[node_name] \
+                                        = Concept(node_name,
+                                                  None,
+                                                  None,
+                                                  type(h5obj),
+                                                  np.shape(h5obj),
+                                                  None,
+                                                  hdf_type="regular_dataset")
+                                else:
+                                    self.datasets[node_name] \
+                                        = ("IS_REGULAR_DATASET",
+                                           type(h5obj),
+                                           np.shape(h5obj),
+                                           h5obj[()])
+                                    self.instances[node_name] \
+                                        = Concept(node_name,
+                                                  None,
+                                                  None,
+                                                  type(h5obj),
+                                                  np.shape(h5obj),
+                                                  None,
+                                                  hdf_type="regular_dataset")
+                            elif n_dims == 2:
+                                self.datasets[node_name] \
+                                    = ("IS_REGULAR_DATASET",
+                                       type(h5obj),
+                                       np.shape(h5obj),
+                                       h5obj[0, 0])
+                                self.instances[node_name] \
+                                    = Concept(node_name,
+                                              None,
+                                              None,
+                                              type(h5obj),
+                                              np.shape(h5obj),
+                                              None,
+                                              hdf_type="regular_dataset")
+                            elif n_dims == 3:
+                                self.datasets[node_name] \
+                                    = ("IS_REGULAR_DATASET",
+                                       type(h5obj),
+                                       np.shape(h5obj),
+                                       h5obj[0, 0, 0])
+                                self.instances[node_name] \
+                                    = Concept(node_name,
+                                              None,
+                                              None,
+                                              type(h5obj),
+                                              np.shape(h5obj),
+                                              None,
+                                              hdf_type="regular_dataset")
+                            else:
+                                self.datasets[node_name] \
+                                    = ("IS_REGULAR_DATASET",
+                                       type(h5obj),
+                                       np.shape(h5obj),
+                                       "Inspect in HDF5 file directly!")
+                                self.instances[node_name] \
+                                    = Concept(node_name,
+                                              None,
+                                              None,
+                                              type(h5obj),
+                                              np.shape(h5obj),
+                                              None,
+                                              hdf_type="regular_dataset")
+                    else:
+                        raise LogicError(
+                            f"hasattr(h5obj.dtype, 'fields') and hasattr(" \
+                            f"h5obj.dtype, 'names') failed, inspect {node_name} !")
+                else:
+                    raise LogicError(f"hasattr(h5obj, dtype) failed, inspect {node_name} !")
+        else:
+            if not node_name in self.groups.keys():
+                self.groups[node_name] = ("IS_GROUP")
+                self.instances[node_name] \
+                    = Concept(node_name,
+                              None,
+                              None,
+                              type(h5obj),
+                              np.shape(h5obj),
+                              None,
+                              hdf_type="group")
+        # if hasattr(h5obj, 'dtype') and not node_name in self.metadata.keys():
+        #     self.metadata[node_name] = ["dataset"]
+
+    def get_attribute_data_structure(self, prefix, src_dct):
+        # trg_dct is self.attributes
+        for key, val in src_dct.items():
+            if not f"{prefix}/@{key}" in self.attributes.keys():
+                if isinstance(val, str):
+                    self.attributes[f"{prefix}/@{key}"] \
+                        = ("IS_ATTRIBUTE", type(val), np.shape(val), str, val)
+                    self.instances[f"{prefix}/{key}"] \
+                        = Concept(f"{prefix}/@{key}",
+                                  None,
+                                  None,
+                                  type(val),
+                                  np.shape(val),
+                                  None,
+                                  hdf_type="attribute")
+                elif hasattr(val, "dtype"):
+                    self.attributes[f"{prefix}/@{key}"] \
+                        = ("IS_ATTRIBUTE",
+                           type(val),
+                           np.shape(val),
+                           val.dtype, val)
+                    self.instances[f"{prefix}/{key}"] \
+                        = Concept(f"{prefix}/@{key}",
+                                  None,
+                                  None,
+                                  type(val),
+                                  np.shape(val),
+                                  None,
+                                  hdf_type="attribute")
+                else:
+                    raise LogicError(
+                        f"Unknown formatting of an attribute, inspect {prefix}/@{key} !")
+
+    def get_content(self):
+        """Walk recursively through the file to get content."""
+        if self.h5r is not None:  # if self.file_name is not None:
+            # with h5py.File(self.file_name, "r") as h5r:
+                # first step visit all groups and datasets recursively
+                # get their full path within the HDF5 file
+            self.h5r.visititems(self)
+            # second step visit all these and get their attributes
+            for h5path, h5ifo in self.groups.items():
+                self.get_attribute_data_structure(h5path, dict(self.h5r[h5path].attrs))
+            for h5path, h5ifo in self.datasets.items():
+                if h5path.count("#") == 0:  # skip resolved fields in compound data types
+                    self.get_attribute_data_structure(h5path, dict(self.h5r[h5path].attrs))
+
+    def get_file_format(self, rules):
+        """Identify which versioned file format self is an instance of."""
+        # rules is a dictionary of pairs: first, a templatized path, second, an identifier
+        # what is a templatized path? take this example from an v4 H5OINA file with SEM/ESBD data
+        # 1/Data Processing/Analyses/IPF1, IS_GROUP
+        # 1/Data Processing/Analyses/IPF2, IS_GROUP
+        # both pathes are conceptually instances of the same concept
+        # */Data Processing/Analyses/IPF*
+        # where the stars in this templatized path serve as placeholders
+        # masking different instance ids
+        # Contextualization:
+        # HDF5 is a container (file) format lik TIFF.
+        # Therefore, neither the mime type nor the file name suffix can substantiate
+        # which not just format but version an instance comes formatted with.
+        # Therefore, the specific content and formatting of an instance
+        # e.g. do we talk about an HDF5 file whose content matches the rules
+        # of an e.g. Oxford Instrument v4 H5OINA file?
+        # the versioning is key to understand and read
+        # tech partners can make changes to I/O routines in their software
+        # this can result in that data end up formatted differently across file
+        # instances written over time
+        # therefore, it is necessary to ensure (before interpreting the file) that
+        # it matches a certain set of expectations (versioned format) so that the
+        # information content aka the knowledge, the pieces of information, in that file
+        # can be logically interpreted correctly
+        # The existence of a libraries and only best practices but not generally accepted
+        # rules how content in container files should be formatted enables for a
+        # potentially large number of possibilities how the same piece of information
+        # is encoded
+        # Consider the following simple example from electron microscopy with two quantities:
+        # hv (high_voltage) and wd (working_distance)
+        # these are two numbers each with a unit category or actual unit instance
+        # (voltage) and (length) respectively
+        # in hdf5 one could store the same information very differently technically
+        # as a dataset instance named "hv" with a scalar number and an attribute
+        # instance with a scalar string for the unit
+        # (this is assumed somewhat the best practice)
+        # however neither this is required nor assured
+        # in practice one could do much more e.g.
+        # as a group named hv_voltage with an attribute value
+        # as a compound dataset with two values packed as a struct with pairs of value and string
+        # first the value for hv followed by its unit, thereafter the value of wd followed by its unit
+        # also nobody is required to name an HDF5 instance using English because nodes in HDF5
+        # end up as links and these can have UTF8 encoding, so in principle even group and dataset names
+        # can use terms from other languages than English, one can use also special characters
+        # there can be typos or synonyms used like hv and high_voltage or voltage
+        # the key point is all these representations are allowed when we use HDF5 files
+        # but for each of these combinations a different code has to be implemented to extract
+        # and verify these pieces of information when one would like to use these pieces
+        # for further processing, this observation holds for every serialization of information
+        # into a file and thus one cannot escape the necessity that one needs to define
+        # a clear set of rules based on which one can decide if some instance is interpretable or
+        # not, in general we therefore see that there is much more work need that just to acknowledge
+        # that it is clear that one cannot infer the potential relevance of a file for an analysis
+        # based on its file format ending (mime type, magic cookie) etc
+        # although interesting this is exactly what the magic cookie
+        # (the initial few bytes to the beginning of the byte stream of a file)
+        # were originally conceptualized for
+        pass
+
+    def templatize_instance_name(self, instance):
+        if isinstance(instance, str):
+            translation_dict = {}
+            for i in np.arange(0, 10):
+                translation_dict[str(i)] = "*"
+            # print(translation_dict)
+            return re.sub('\*\*+', '*', instance.translate(str.maketrans(translation_dict)))
+        return None
+
+    def is_instance_name_valid(self, instance):
+        if isinstance(instance, str):
+            t = set("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_ ")
+            # print(t)
+            tmp = instance.split("/")
+            if len(tmp) > 0:
+                for entry in tmp:
+                    if entry != "":
+                        s = set(entry)
+                        # s = set("hallo") # ẟ€ᴩᴪᴪ"
+                        # s = set(instance)
+                        # use only a sub-set of the characters offered by UTF8 and ASCII,
+                        # i.e. even a subset of the Latin basic plane UCS4
+                        # print(s)
+                        # is every member of the set lng also in the set valid?
+                        if s.difference(t) == set():
+                            continue
+                        else:
+                            return False
+                    else:
+                        return False
+                return True
+            else:
+                return False
+        return False
+
+    def verify_instances(self):
+        retval = True
+        for key, ifo in self.instances.items():
+            if self.is_instance_name_valid(key) is True:
+                continue
+            else:
+                retval = False
+            #     print(f"raise ValueError: {key} is an invalid instance name!")
+        print(f"Verification result {retval}")
+
+    def templatize_instances(self):  # , dct):
+        # first step replace instance_names with stars, this is too naive because an
+        # instance_name filler8 would then become filler* which it must not!
+        # but this first step of templatization is useful
+        # for key, ifo in dct.items():
+        #    print(f"{key}, {self.templatize_instance_name(key)},
+        #    {self.is_instance_name_valid(key)}")
+        for instance, concept in self.instances.items():
+            template_name = self.templatize_instance_name(instance)
+            if template_name not in self.templates.keys():
+                self.templates[template_name] = concept  # add checks here
+
+    # def get_templatized_groups(self):
+    #     for key, ifo in self.groups.items():
+    #         template_key = self.templatize_instance_name(key)
+    #         if template_key not in self.template_groups:
+    #             self.template_groups.append(template_key)
+    #     # self.templatize(self.groups)
+
+    # def get_templatized_datasets(self):
+    #     for key, ifo in self.datasets.items():
+    #         template_key = self.templatize_instance_name(key)
+    #         if template_key not in self.template_datasets:
+    #             self.template_datasets.append(template_key)
+    #     # self.templatize(self.datasets)
+
+    # def get_templatized_attributes(self):
+    #     for key, ifo in self.attributes.items():
+    #         template_key = self.templatize_instance_name(key)
+    #         if template_key not in self.template_attributes:
+    #             self.template_attributes.append(template_key)
+    #     # self.templatize(self.attributes)
+
+    # def get_templatized(self):
+    #     # print(f"{self.file_name} contains the following template_groups:")
+    #     self.get_templatized_groups()
+    #     # for entry in self.template_groups:
+    #     #     print(entry)
+    #     # print(f"{self.file_name} contains the following template_datasets:")
+    #     self.get_templatized_datasets()
+    #     # for entry in self.template_datasets:
+    #     #     print(entry)
+    #     # print(f"{self.file_name} contains the following template_attributes:")
+    #     self.get_templatized_attributes()
+    #     # for entry in self.template_attributes:
+    #     #     print(entry)
+
+    def report_groups(self):
+        print(f"{self.file_name} contains the following groups:")
+        for key, ifo in self.groups.items():
+            print(f"{key}, {ifo}")
+
+    def report_datasets(self):
+        print(f"{self.file_name} contains the following datasets:")
+        for key, ifo in self.datasets.items():
+            print(f"{key}, {ifo}")
+
+    def report_attributes(self):
+        print(f"{self.file_name} contains the following attributes:")
+        for key, ifo in self.attributes.items():
+            print(f"{key}, {ifo}")
+
+    def report_content(self):
+        self.report_groups()
+        self.report_datasets()
+        self.report_attributes()
+
+    def store_report(self, store_instances=False, store_instances_templatized=True, store_templates=False):
+        if store_instances is True:
+            print(f"Storing analysis results in {self.file_name[self.file_name.rfind('/')+1:]}." \
+                  f"EbsdHdfFileInstanceNames.txt...")
+            with open(f"{self.file_name}.EbsdHdfFileInstanceNames.txt", "w") as txt:
+                # print(f"{self.file_name} contains the following groups:")
+                # txt.write(f"{self.file_name} was analyzed for the formatting of its content.\n")
+                # txt.write(f"{self.file_name} contains the following groups:\n")
+                # for key, ifo in self.groups.items():
+                #     txt.write(f"{key}, {ifo}\n")
+                # txt.write(f"{self.file_name} contains the following datasets:\n")
+                # for key, ifo in self.datasets.items():
+                #     txt.write(f"{key}, {ifo}\n")
+                # txt.write(f"{self.file_name} contains the following attributes:\n")
+                # for key, ifo in self.attributes.items():
+                #     txt.write(f"{key}, {ifo}\n")
+                for instance_name, concept in self.instances.items():
+                    txt.write(f"/{instance_name}, hdf: {concept.hdf}, " \
+                              f"type: {concept.dtype}, shape: {concept.shape}\n")
+
+        if store_instances_templatized is True:
+            print(f"Storing analysis results in {self.file_name[self.file_name.rfind('/')+1:]}" \
+                  f".EbsdHdfFileInstanceNamesTemplatized.txt...")
+            with open(f"{self.file_name}.EbsdHdfFileInstanceNamesTemplatized.txt", "w") as txt:
+                for instance_name, concept in self.instances.items():
+                    txt.write(f"/{instance_name}, hdf: {concept.hdf}\n")
+
+        if store_templates is True:
+            print(f"Storing analysis results in {self.file_name[self.file_name.rfind('/')+1:]}" \
+                  "f.EbsdHdfFileTemplateNames.txt...")
+            with open(f"{self.file_name}.EbsdHdfFileTemplateNames.txt", "w") as txt:
+                # txt.write(f"{self.file_name} was analyzed for the formatting of its content.\n")
+                # txt.write(f"{self.file_name} contains the following template groups:\n")
+                # for key in self.template_groups:
+                #     txt.write(f"{key}, IS_GROUP\n")
+                # txt.write(f"{self.file_name} contains the following template datasets:\n")
+                # for key in self.template_datasets:
+                #     txt.write(f"{key}, IS_DATASET\n")
+                # txt.write(f"{self.file_name} contains the following template attributes:\n")
+                # for key in self.template_attributes:
+                #    txt.write(f"{key}, IS_ATTRIBUTE\n")
+                for template_name, concept in self.templates.items():
+                    txt.write(f"{template_name}, hdf: {concept.hdf}, "\
+                              f"type: {concept.dtype}, shape: {concept.shape}\n")
+
+    def get_attribute_value(self, h5path):
+        if self.h5r is not None:
+            if h5path in self.attributes.keys():
+                trg, attrnm = h5path.split("@")
+                # with (self.file_name, "r") as h5r:
+                obj = self.h5r[trg].attrs[attrnm]
+                if isinstance(obj, np.bytes_):
+                    return obj[0].decode("utf8")
+                else:
+                    return obj
+        return None
+
+    def get_dataset_value(self, h5path):
+        if self.h5r is not None:
+            if h5path in self.datasets.keys():
+                if self.datasets[h5path][0] == "IS_REGULAR_DATASET":
+                    # with (self.file_name, "r") as h5r:
+                    obj = self.h5r[h5path]
+                    if isinstance(obj[0], np.bytes_):
+                        return obj[0].decode("utf8")
+                    else:
+                        return obj  # [()].decode("utf8")
+            # implement get the entire compound dataset
+            if h5path.count("#") == 1:
+                # with (self.file_name, "r") as h5r:
+                obj = self.h5r[h5path[0:h5path.rfind("#")]]
+                return obj.fields(h5path[h5path.rfind("#")+1:])[:]
+            return None
+
+    def get_value(self, h5path):
+        """Return tuple of normalized regular ndarray for h5path or None."""
+        # h5path with exactly one @ after rfind("/") indicating an attribute
+        # h5path with exactly one # after rfind("/") indicating a field name in compound type
+        # most likely h5path names a dataset
+        if h5path.count("@") == 0:
+            return self.get_dataset_value(h5path)
+        if h5path.count("@") == 1:
+            return self.get_attribute_value(h5path)
+        # no need to check groups as they have no value
+        return None
+
+    # def get_version(self):
+    #     for key, val in self.version.items():
+    #         print(f"{key}, {val}")
+# https://stackoverflow.com/questions/31146036/how-do-i-traverse-a-hdf5-file-using-h5py
+
+
+def identify_hfive_type(fpath):
+    """Identify if HDF5 file referred to by fpath matches a format with a subparser."""
+    # Like TIFF, HDF5 is a container file format
+    # Therefore, inspecting the mime type alone is insufficient to infer the schema
+    # with which the content in the HDF5 file is formatted
+    # Therefore, at least some of the content and how that content is
+    # formatted is inspected to make an informed decision which specific hfive
+    # subparser can be expected to deal at all with the content of the HDF5 file
+    # referred to by fpath
+
+    # For the example of EBSD there was once a suggestion made by the academic community
+    # to report EBSD results via HDF5, specifically via H5EBSD (Jackson et al.).
+    # Different tech partners and community projects though have implemented these
+    # ideas differently. In effect, there are now multiple HDF5 files circulating
+    # in the EBSD community where the same conceptual information is stored
+    # differently i.e. under different names
+
+    # This function shows an example how this dilemna can be
+    # solved for six examples that all are HDF5 variants used for "storing EBSD data"
+    # oxford - H5OINA format of Oxford Instrument (comes in different versions)
+    # edax - OIM Analysis based reporting of EDAX/AMETEK (comes in different versions)
+    # apex - APEX based reporting of EDAX/AMETEK (can be considered the newer EDAX reporting)
+    # bruker - Bruker Esprit based reporting which replaces Bruker's bcf format that
+    #     is notoriously difficult to parse as it uses a commercial library SFS from AidAim
+    # emsort - HDF5-based reporting of parameter used by Marc de Graeff's EMsoft
+    #     dynamic electron diffraction simulation software
+    # hebsd - a variant of Jackson's proposal of the original H5EBSD the example here
+    #    explores from content of the community as used by e.g. T. B. Britton's group
+    hdf = HdfFiveOinaReader(f"{fpath}")
+    if hdf.supported is True:
+        return "oxford"
+    hdf = HdfFiveEdaxOimAnalysisReader(f"{fpath}")
+    if hdf.supported is True:
+        return "edax"
+    hdf = HdfFiveEdaxApexReader(f"{fpath}")
+    if hdf.supported is True:
+        return "apex"
+    hdf = HdfFiveBrukerEspritReader(f"{fpath}")
+    if hdf.supported is True:
+        return "bruker"
+    hdf = HdfFiveEmSoftReader(f"{fpath}")
+    if hdf.supported is True:
+        return "emsoft"
+    hdf = HdfFiveCommunityReader(f"{fpath}")
+    if hdf.supported is True:
+        return "hebsd"
+    return None
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
new file mode 100644
index 000000000..b4f5abb13
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
@@ -0,0 +1,68 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""(Sub-)parser mapping concepts and content from EDAX/AMETEK *.edaxh5 (APEX) files on NXem."""
+
+import numpy as np
+import h5py
+from pynxtools.dataconverter.readers.em.subparsers.hfive import HdfFiveGenericReader
+
+
+class HdfFiveEdaxApexReader(HdfFiveGenericReader):
+    """Read APEX edaxh5"""
+    def __init__(self, file_name: str = ""):
+        super().__init__(file_name)
+        # this specialized reader implements reading capabilities for the following formats
+        self.supported_version = {}
+        self.version = {}
+        self.supported_version["tech_partner"] = ["EDAX, LLC"]
+        self.supported_version["schema_name"] = ["EDAXH5"]
+        self.supported_version["schema_version"] = ["2.5.1001.0001"]
+        self.supported_version["writer_name"] = ["APEX"]
+        self.supported_version["writer_version"] = ["2.5.1001.0001"]
+        self.supported = True
+        # check if instance to process matches any of these constraints
+        h5r = h5py.File(self.file_name, "r")
+        # parse Company and PRODUCT_VERSION attribute values from the first group below / but these are not scalar but single value lists
+        # so much about interoperability
+        # but hehe for the APEX example from Sebastian and Sabine there is again no Company but PRODUCT_VERSION, 2 files, 2 "formats"
+        if "/Manufacturer" in h5r:
+            self.version["tech_partner"] \
+                = super().read_strings_from_dataset(h5r["/Manufacturer"][()])
+            if self.version["tech_partner"] not in self.supported_version["tech_partner"]:
+                # print(f"{self.version['tech_partner']} is not {self.supported_version['tech_partner']} !")
+                self.supported = False
+        else:
+            self.supported = False
+        if "/Version" in h5r:
+            self.version["schema_version"] \
+                = super().read_strings_from_dataset(h5r["/Version"][()])
+            if self.version["schema_version"] not in self.supported_version["schema_version"]:
+                # print(f"{self.version['schema_version']} is not any of {self.supported_version['schema_version']} !")
+                self.supported = False
+        else:
+            self.supported = False
+        h5r.close()
+
+        if self.supported is True:
+            # print(f"Reading {self.file_name} is supported")
+            self.version["schema_name"] = self.supported_version["schema_name"]
+            self.version["writer_name"] = self.supported_version["writer_name"]
+            self.version["writer_version"] = self.supported_version["writer_version"]
+            # print(f"{self.version['schema_name']}, {self.supported_version['schema_version']}, {self.supported_version['writer_name']}, {self.supported_version['writer_version']}")
+        # else:
+            # print(f"Reading {self.file_name} is not supported!")
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py
new file mode 100644
index 000000000..34c05902b
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py
@@ -0,0 +1,65 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""(Sub-)parser mapping concepts and content from Bruker *.h5 files on NXem."""
+
+import numpy as np
+import h5py
+from pynxtools.dataconverter.readers.em.subparsers.hfive import HdfFiveGenericReader
+
+
+class HdfFiveBrukerEspritReader(HdfFiveGenericReader):
+    """Read Bruker Esprit H5"""
+    def __init__(self, file_name: str = ""):
+        super().__init__(file_name)
+        # this specialized reader implements reading capabilities for the following formats
+        self.supported_version = {}
+        self.version = {}
+        self.supported_version["tech_partner"] = ["Bruker Nano"]
+        self.supported_version["schema_name"] = ["H5"]
+        self.supported_version["schema_version"] = ["Esprit 2.X"]
+        self.supported_version["writer_name"] = []
+        self.supported_version["writer_version"] = ["Esprit 2.X"]
+        self.supported = True
+        # check if instance to process matches any of these constraints
+        h5r = h5py.File(self.file_name, "r")
+        if "/Manufacturer" in h5r:
+            self.version["tech_partner"] \
+                = super().read_strings_from_dataset(h5r["/Manufacturer"][()])
+            if self.version["tech_partner"] not in self.supported_version["tech_partner"]:
+                # print(f"{self.version['tech_partner']} is not {self.supported_version['tech_partner']} !")
+                self.supported = False
+        else:
+            self.supported = False
+        if "/Version" in h5r:
+            self.version["schema_version"] \
+                = super().read_strings_from_dataset(h5r["/Version"][()])
+            if self.version["schema_version"] not in self.supported_version["schema_version"]:
+                # print(f"{self.version['schema_version']} is not any of {self.supported_version['schema_version']} !")
+                self.supported = False
+        else:
+            self.supported = False
+        h5r.close()
+
+        if self.supported is True:
+            # print(f"Reading {self.file_name} is supported")
+            self.version["schema_name"] = self.supported_version["schema_name"]
+            self.version["writer_name"] = self.supported_version["writer_name"]
+            self.version["writer_version"] = self.supported_version["writer_version"]
+            # print(f"{self.version['schema_name']}, {self.supported_version['schema_version']}, {self.supported_version['writer_name']}, {self.supported_version['writer_version']}")
+        # else:
+            # print(f"Reading {self.file_name} is not supported!")
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_concept.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_concept.py
new file mode 100644
index 000000000..55e8714c0
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_concept.py
@@ -0,0 +1,86 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Constants and utilities used when parsing concepts from HDF5 files."""
+
+IS_GROUP = 0
+IS_REGULAR_DATASET = 1
+IS_COMPOUND_DATASET = 2
+IS_FIELD_IN_COMPOUND_DATASET = 3
+IS_ATTRIBUTE = 4
+VERSION_MANAGEMENT = {"tech_partner": [],
+                      "schema_name": [], "schema_version": [],
+                      "writer_name": [], "writer_version": []}
+
+
+class Concept():
+    def __init__(self, instance_name=None,
+                 concept_name=None, value=None, dtype=None,
+                 shape=None, unit_info=None, **kwargs):
+        if instance_name is not None:
+            if isinstance(instance_name, str):
+                if len(instance_name) > 0:
+                    self.name = instance_name
+                else:
+                    raise ValueError("instance_name must not be empty!")
+            else:
+                raise ValueError("instance_name has to be a string or None!")
+        else:
+            self.name = None
+        if concept_name is not None:
+            if isinstance(concept_name, str):
+                if len(concept_name) > 0:
+                    self.concept = concept_name
+                else:
+                    raise ValueError("concept_name must not be empty!")
+            else:
+                raise ValueError("concept_name has to be a string or None!")
+        else:
+            self.concept = None
+        self.value = value
+        self.dtype = dtype
+        self.shape = shape
+        if unit_info is not None:
+            # unit existence, unit category, or specific unit statement
+            if isinstance(unit_info, str):
+                if len(unit_info) > 0:
+                    # testing against pint
+                    self.unit = unit_info
+                else:
+                    raise ValueError("unit_info must not be empty!")
+            else:
+                raise ValueError("unit_info has to be a string or None!")
+        else:
+            self.unit = None
+        if "hdf_type" in kwargs.keys():
+            if kwargs["hdf_type"] is not None:
+                if isinstance(kwargs["hdf_type"], str):
+                    if kwargs["hdf_type"] in ["group",
+                                              "regular_dataset",
+                                              "compound_dataset",
+                                              "compound_dataset_entry",
+                                              "attribute"]:
+                        self.hdf = kwargs["hdf_type"]
+
+    def report(self):
+        members = vars(self)
+        for key, val in members.items():
+            print(f"{key}, type: {type(val)}, value: {val}")
+
+# test = Concept("1/@Test", "*/@Test", 1, type(1), np.shape(1),
+#                "UNITLESS", hdf_type="group")
+# test.report()
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py
new file mode 100644
index 000000000..3c82ecc08
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py
@@ -0,0 +1,67 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""(Sub-)parser mapping concepts and content from community *.h5/*.h5ebsd files on NXem."""
+
+import numpy as np
+import h5py
+from pynxtools.dataconverter.readers.em.subparsers.hfive import HdfFiveGenericReader
+
+
+class HdfFiveCommunityReader(HdfFiveGenericReader):
+    """Read modified H5EBSD (likely from Britton group)"""
+    def __init__(self, file_name: str = ""):
+        super().__init__(file_name)
+        # this specialized reader implements reading capabilities for the following formats
+        self.supported_version = {}
+        self.version = {}
+        self.supported_version["tech_partner"] = ["xcdskd"]
+        self.supported_version["schema_name"] = ["H5EBSD"]
+        self.supported_version["schema_version"] = ["0.1"]
+        self.supported_version["writer_name"] = ["not standardized"]
+        self.supported_version["writer_version"] = ["0.1"]
+        self.supported = True
+        # check if instance to process matches any of these constraints
+        h5r = h5py.File(self.file_name, "r")
+        if "/Manufacturer" in h5r:
+            self.version["tech_partner"] \
+                = super().read_strings_from_dataset(h5r["/Manufacturer"][()])
+            # print(f"britton {self.version['tech_partner']}")
+            if self.version["tech_partner"] not in self.supported_version["tech_partner"]:
+                # print(f"{self.version['tech_partner']} is not {self.supported_version['tech_partner']} !")
+                self.supported = False
+        else:
+            self.supported = False
+        if "/Version" in h5r:
+            self.version["schema_version"] \
+                = super().read_strings_from_dataset(h5r["/Version"][()])
+            # print(f"britton {self.version['schema_version']}")
+            if self.version["schema_version"] not in self.supported_version["schema_version"]:
+                # print(f"{self.version['schema_version']} is not any of {self.supported_version['schema_version']} !")
+                self.supported = False
+        else:
+            self.supported = False
+        h5r.close()
+
+        if self.supported is True:
+            # print(f"Reading {self.file_name} is supported")
+            self.version["schema_name"] = self.supported_version["schema_name"]
+            self.version["writer_name"] = self.supported_version["writer_name"]
+            self.version["writer_version"] = self.supported_version["writer_version"]
+            # print(f"{self.version['schema_name']}, {self.supported_version['schema_version']}, {self.supported_version['writer_name']}, {self.supported_version['writer_version']}")
+        # else:
+            # print(f"Reading {self.file_name} is not supported!")
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_edax.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_edax.py
new file mode 100644
index 000000000..b4f3443a5
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_edax.py
@@ -0,0 +1,70 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""(Sub-)parser mapping concepts and content from EDAX/AMETEK *.oh5/*.h5 (OIM Analysis) files on NXem."""
+
+import numpy as np
+import h5py
+from pynxtools.dataconverter.readers.em.subparsers.hfive import HdfFiveGenericReader
+
+
+class HdfFiveEdaxOimAnalysisReader(HdfFiveGenericReader):
+    """Read EDAX (O)H5"""
+    def __init__(self, file_name: str = ""):
+        super().__init__(file_name)
+        # this specialized reader implements reading capabilities for the following formats
+        self.supported_version = {}
+        self.version = {}
+        self.supported_version["tech_partner"] = ["EDAX"]
+        self.supported_version["schema_name"] = ["H5"]
+        self.supported_version["schema_version"] \
+            = ["OIM Analysis 8.6.0050 x64 [18 Oct 2021]", "OIM Analysis 8.5.1002 x64 [07-17-20]"]
+        self.supported_version["writer_name"] = ["OIM Analysis"]
+        self.supported_version["writer_version"] \
+            = ["OIM Analysis 8.6.0050 x64 [18 Oct 2021]", "OIM Analysis 8.5.1002 x64 [07-17-20]"]
+        self.supported = True
+        # check if instance to process matches any of these constraints
+        h5r = h5py.File(self.file_name, "r")
+        if "/Manufacturer" in h5r:
+            self.version["tech_partner"] \
+                = super().read_strings_from_dataset(h5r["/Manufacturer"][()])
+            # print(self.version["tech_partner"])
+            # for 8.6.0050 but for 8.5.1002 it is a matrix, this is because how strings end up in HDF5 allowed for so much flexibility!
+            if self.version["tech_partner"] not in self.supported_version["tech_partner"]:
+                # print(f"{self.version['tech_partner']} is not {self.supported_version['tech_partner']} !")
+                self.supported = False
+        else:
+            self.supported = False
+        if "/Version" in h5r:
+            self.version["schema_version"] \
+                = super().read_strings_from_dataset(h5r["/Version"][()])
+            # print(self.version["schema_version"])
+            if self.version["schema_version"] not in self.supported_version["schema_version"]:
+                # print(f"{self.version['schema_version']} is not any of {self.supported_version['schema_version']} !")
+                self.supported = False
+        else:
+            self.supported = False
+        h5r.close()
+
+        if self.supported is True:
+            # print(f"Reading {self.file_name} is supported")
+            self.version["schema_name"] = self.supported_version["schema_name"]
+            self.version["writer_name"] = self.supported_version["writer_name"]
+            self.version["writer_version"] = self.supported_version["writer_version"]
+            # print(f"{self.version['schema_name']}, {self.supported_version['schema_version']}, {self.supported_version['writer_name']}, {self.supported_version['writer_version']}")
+        # else:
+            # print(f"Reading {self.file_name} is not supported!")
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_emsoft.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_emsoft.py
new file mode 100644
index 000000000..4ec2b3695
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_emsoft.py
@@ -0,0 +1,50 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""(Sub-)parser mapping concepts and content from Marc deGraeff's EMsoft *.h5 files on NXem."""
+
+import numpy as np
+import h5py
+from pynxtools.dataconverter.readers.em.subparsers.hfive import HdfFiveGenericReader
+
+
+class HdfFiveEmSoftReader(HdfFiveGenericReader):
+    """Read EMsoft H5 (Marc deGraeff Carnegie Mellon)"""
+    def __init__(self, file_name: str = ""):
+        super().__init__(file_name)
+        # this specialized reader implements reading capabilities for the following formats
+        self.supported_version = {}
+        self.version = {}
+        self.supported_version["tech_partner"] = ["EMsoft"]
+        self.supported_version["schema_name"] = ["EMsoft"]
+        self.supported_version["schema_version"] = ["EMsoft"]
+        self.supported_version["writer_name"] = ["EMsoft"]
+        self.supported_version["writer_version"] = ["EMsoft"]
+        self.supported = True
+        # check if instance to process matches any of these constraints
+        h5r = h5py.File(self.file_name, "r")
+        required_groups = ["CrystalData", "EMData", "EMheader", "NMLfiles", "NMLparameters"]
+        for required_group in required_groups:
+            if f"/{required_group}" not in h5r:
+                self.supported = False
+        h5r.close()
+        if self.supported is True:
+            # print(f"Reading {self.file_name} is supported")
+            self.version = self.supported_version.copy()
+            # print(f"{self.version['schema_name']}, {self.supported_version['schema_version']}, {self.supported_version['writer_name']}, {self.supported_version['writer_version']}")
+        # else:
+            # print(f"Reading {self.file_name} is not supported!")
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_oxford.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_oxford.py
new file mode 100644
index 000000000..f2080b6e5
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_oxford.py
@@ -0,0 +1,271 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""(Sub-)parser mapping concepts and content from Oxford Instruments *.h5oina files on NXem."""
+
+import os
+from typing import Dict, Any, List
+
+import numpy as np
+import h5py
+
+# import imageio.v3 as iio
+from PIL import Image as pil
+
+import diffsims
+import orix
+from diffpy.structure import Lattice, Structure
+from orix import plot
+from orix.crystal_map import create_coordinate_arrays, CrystalMap, PhaseList
+from orix.quaternion import Rotation
+from orix.vector import Vector3d
+
+import matplotlib.pyplot as plt
+
+from pynxtools.dataconverter.readers.em.subparsers.hfive import \
+    HdfFiveGenericReader, read_strings_from_dataset
+from pynxtools.dataconverter.readers.em.subparsers.pyxem_processor import PyxemProcessor
+
+
+class HdfFiveOinaReader(HdfFiveGenericReader):
+    """Read h5oina"""
+    def __init__(self, file_name: str = ""):
+        super().__init__(file_name)
+        # this specialized reader implements reading capabilities for the following formats
+        self.supported_version = {}
+        self.version = {}
+        self.supported_version["tech_partner"] = ["Oxford Instruments"]
+        self.supported_version["schema_name"] = ["H5OINA"]
+        self.supported_version["schema_version"] = ["2.0", "3.0", "4.0", "5.0"]
+        self.supported_version["writer_name"] = ["AZTec"]
+        self.supported_version["writer_version"] \
+            = ["4.4.7495.1", "5.0.7643.1", "5.1.7829.1", "6.0.8014.1", "6.0.8196.1"]
+        self.supported = True
+        # check if instance matches all constraints to qualify as that supported h5oina
+        h5r = h5py.File(self.file_name, "r")
+        if "/Manufacturer" in h5r:
+            self.version["tech_partner"] \
+                = super().read_strings_from_dataset(h5r["/Manufacturer"][()])
+            if self.version["tech_partner"] not in self.supported_version["tech_partner"]:
+                # print(f"{self.version['tech_partner']} is not {self.version['tech_partner']} !")
+                self.supported = False
+        else:
+            self.supported = False
+        # only because we know (thanks to Philippe Pinard who wrote the H5OINA writer) that different
+        # writer versions should implement the different HDF version correctly we can lift the
+        # constraint on the writer_version for which we had examples available
+        if "/Software Version" in h5r:
+            self.version["writer_version"] \
+                = super().read_strings_from_dataset(h5r["/Software Version"][()])
+            if self.version["writer_version"] not in self.supported_version["writer_version"]:
+                # print(f"{self.version['writer_version']} is not any of {self.supported_version['writer_version']} !")
+                self.supported = False
+        else:
+            self.supported = False
+        if "/Format Version" in h5r:
+            self.version["schema_version"] \
+                = super().read_strings_from_dataset(h5r["/Format Version"][()])
+            if self.version["schema_version"] not in self.supported_version["schema_version"]:
+                # print(f"{self.version['schema_version']} is not any of {self.supported_version['schema_version']} !")
+                self.supported = False
+        else:
+            self.supported = False
+        h5r.close()
+
+        if self.supported is True:
+            # print(f"Reading {self.file_name} is supported")
+            self.version["schema_name"] = self.supported_version["schema_name"]
+            self.version["writer_name"] = self.supported_version["writer_name"]
+            # print(f"{self.version['schema_name']}, {self.supported_version['schema_version']}, {self.supported_version['writer_name']}, {self.supported_version['writer_version']}")
+        # else:
+            # print(f"Reading {self.file_name} is not supported!")
+
+    def parse(self, template: dict, entry_id=1) -> dict:
+        """Parse NeXus-relevant (meta)data from an H5OINA file."""
+        print(f"Parsing with sub-parser {__class__.__name__}, " \
+              f"file: {self.file_name}, entry_id: {entry_id}")
+        # find how many slices there are
+        with h5py.File(f"{self.file_name}", "r") as h5r:
+            entries = sorted(list(h5r["/"]), key=int)
+            for entry in entries:
+                if entry.isdigit() is True: # non-negative integer
+                    if entry == "1":
+                        self.slice = {}
+                        self.parse_and_normalize_slice(h5r, int(entry))
+                        # at this point all Oxford jargon is ironed out and the
+                        # call is the same irrespective of the tech partner
+                        # that was used to take the orientation maps
+                        pyx = PyxemProcessor(entry_id)
+                        pyx.process_roi_overview(template)
+                        pyx.process_roi_xmap(template)
+                        pyx.process_roi_phases(template)
+                        pyx.process_roi_inverse_pole_figures(template)
+        return template
+
+    def parse_and_normalize_slice(fp, slice_id: int):
+        """Read and normalize away Oxford-specific formatting of data in specific slice."""
+        self.parse_and_normalize_slice_ebsd_data(fp, slice_id)
+        self.parse_and_normalize_slice_ebsd_header(fp, slice_id)
+
+    def parse_and_normalize_slice_ebsd_data(fp, slice_id: int):
+        # https://github.com/oinanoanalysis/h5oina/blob/master/H5OINAFile.md
+        group_name = f"/{slice_id}/EBSD/Data"
+        self.slice["slice_id"] = slice_id
+        print(f"Parsing {group_name}, {self.slice['slice_id']}")
+        # Euler, yes, H5T_NATIVE_FLOAT, (size, 3), Orientation of Crystal (CS2) to Sample-Surface (CS1).
+        if f"{group_name}/Euler" in fp:
+            is_degrees = False
+            if read_strings_from_dataset(fp[f"{group_name}/Euler"].attrs["Unit"]) == "rad":
+                is_degrees = False
+            self.slice["rotation"] = Rotation.from_euler(euler=fp[f"{group_name}/Euler"],
+                                                         direction='lab2crystal',
+                                                         degrees=is_degrees)
+        else:
+            raise ValueError(f"Unable to parse Euler !")
+
+        # Phase, yes, H5T_NATIVE_INT32, (size, 1), Index of phase, 0 if not indexed
+        # no normalization needed, also in NXem_ebsd the null model notIndexed is phase_identifier 0
+        if f"{group_name}/Phase" in fp:
+            self.slice["phase_id"] = np.asarray(fp[f"{group_name}/Phase"], np.int32)
+        else:
+            raise ValueError(f"Unable to parse Phase !")
+
+        # X, no, H5T_NATIVE_FLOAT, (size, 1), X position of each pixel in micrometers (origin: top left corner)
+        if f"{group_name}/X" in fp:
+            self.slice["scan_point_x"] = np.asarray(fp[f"{group_name}/X"], np.float32)
+        else:
+            raise ValueError(f"Unable to parse pixel position X !")
+
+        # Y, no, H5T_NATIVE_FLOAT, (size, 1), Y position of each pixel in micrometers (origin: top left corner)
+        if f"{group_name}/Y" in fp:
+            self.slice["scan_point_y"] = np.asarray(fp[f"{group_name}/Y"], np.float32)
+            # TODO::inconsistent float vs f32
+        else:
+            raise ValueError(f"Unable to parse pixel position Y !")
+
+        # Band Contrast, no, H5T_NATIVE_INT32, (size, 1)
+        if f"{group_name}/Band Contrast" in fp:
+            self.slice["band_contrast"] = np.asarray(fp[f"{group_name}/Band Contrast"], np.uint8)
+            # TODO::inconsistent int32 vs uint8
+        else:
+            raise ValueError(f"Unable to band contrast !")
+
+        # TODO::processed patterns
+
+    def parse_and_normalize_slice_ebsd_header(fp, slice_id: int):
+        """Parse EBSD header section for specific slice."""
+        group_name = f"/{slice_id}/EBSD/Header"
+        # Phases, yes, Contains a subgroup for each phase where the name of each subgroup is the index of the phase starting at 1.
+        if f"{group_name}/Phases" in fp:
+            phase_ids = sorted(list(fp[f"{group_name}/Phases"]), key=int)
+            self.slice["phase"] = []
+            self.slice["space_group"] = []
+            self.slice["phases"] = {}
+            for phase_id in phase_ids:
+                if phase_id.isdigit() is True:
+                    self.slice["phases"][int(phase_id)] = {}
+                    sub_group_name = f"/{slice_id}/EBSD/Header/Phases"
+                    # Phase Name, yes, H5T_STRING, (1, 1)
+                    if f"{sub_group_name}/Phase Name" in fp:
+                        phase_name = read_strings_from_dataset(fp[f"{sub_group_name}/Phase Name"][()])
+                        self.slice["phases"][int(phase_id)]["phase_name"] = phase_name
+                    else:
+                        raise ValueError("Unable to parse Phase Name !")
+
+                    # Reference, yes, H5T_STRING, (1, 1), Changed in version 2.0 to mandatory
+                    if f"{sub_group_name}/Reference" in fp:
+                        self.slice["phases"][int(phase_id)]["reference"] \
+                            = read_strings_from_dataset(fp[f"{sub_group_name}/Reference"][()])
+                    else:
+                        raise ValueError("Unable to parse Reference !")
+
+                    # Lattice Angles, yes, H5T_NATIVE_FLOAT, (1, 3), Three columns for the alpha, beta and gamma angles in radians
+                    if f"{sub_group_name}/Lattice Angles" in fp:
+                        is_degrees = False
+                        if read_strings_from_dataset(fp[f"{sub_group_name}/Lattice Angles"].attrs["Unit"]) == "rad":
+                            is_degrees = False
+                        angles = np.asarray(fp[f"{sub_group_name}/Lattice Angles"][:].flatten()) / np.pi * 180.
+                        self.slice["phases"][int(phase_id)]["alpha_beta_gamma"] \
+                            = angles
+                    else:
+                        raise ValueError("Unable to parse Lattice Angles !")
+
+                    # Lattice Dimensions, yes, H5T_NATIVE_FLOAT, (1, 3), Three columns for a, b and c dimensions in Angstroms
+                    if f"{sub_group_name}/Lattice Dimensions" in fp:
+                        is_nanometer = False
+                        if read_strings_from_dataset(fp[f"{sub_group_name}/Lattice Dimensions"].attrs["Unit"]) == "angstrom":
+                            is_nanometer = False
+                        a_b_c = np.asarray(fp[f"{sub_group_name}/Lattice Dimensions"][:].flatten()) * 0.1
+                        self.slice["phases"][int(phase_id)]["a_b_c"] = a_b_c
+                    else:
+                        raise ValueError("Unable to parse Lattice Dimensions !")
+
+                    # Space Group, no, H5T_NATIVE_INT32, (1, 1), Space group index.
+                    # The attribute Symbol contains the string representation, for example P m -3 m.
+                    if f"{sub_group_name}/Space Group" in fp:
+                        space_group = int(fp[f"{sub_group_name}/Space Group"][0])
+                        self.slice["phases"][int(phase_id)]["space_group"] = space_group
+                    else:
+                        raise ValueError("Unable to parse Space Group !")
+                    if len(self.slice["space_group"]) > 0:
+                        self.slice["space_group"].append(space_group)
+                    else:
+                        self.slice["space_group"] = [space_group]
+
+                    if len(self.slice["phase"]) > 0:
+                        Structure(title=phase_name, atoms=None,
+                                  lattice=Lattice(a_b_c[0], a_b_c[1], a_b_c[2],
+                                  angles[0], angles[1], angles[2]))
+                    else:
+                        self.slice["phase"] \
+                            = [Structure(title=phase_name, atoms=None,
+                                         lattice=Lattice(a_b_c[0], a_b_c[1], a_b_c[2],
+                                         angles[0], angles[1], angles[2]))]
+        else:
+            raise ValueError("Unable to parse Phases !")
+
+        # X Cells, yes, H5T_NATIVE_INT32, (1, 1), Map: Width in pixels, Line scan: Length in pixels.
+        if f"{group_name}/X Cell" in fp:
+            self.slice["n_x"] = fp[f"{group_name}/X Cells"][0]
+        else:
+            raise ValueError("Unable to parse X Cells !")
+        # Y Cells, yes, H5T_NATIVE_INT32, (1, 1), Map: Height in pixels. Line scan: Always set to 1.
+        if f"{group_name}/Y Cell" in fp:
+            self.slice["n_x"] = fp[f"{group_name}/Y Cells"][0]
+        else:
+            raise ValueError("Unable to parse Y Cells !")
+        # X Step, yes, H5T_NATIVE_FLOAT, (1, 1), Map: Step size along x-axis in micrometers. Line scan: step size along the line scan in micrometers.
+        if f"{group_name}/X Step" in fp:
+            if read_strings_from_dataset(fp[f"{group_name}/X Step"].attrs["Unit"]) == "um":
+                self.slice["s_x"] = fp[f"{group_name}/X Step"][0]
+                self.slice["s_unit"] = "µm"
+            else:
+                raise ValueError("Unexpected X Step Unit attribute !")
+        else:
+            raise ValueError("Unable to parse X Step !")
+        # Y Step, yes, H5T_NATIVE_FLOAT, (1, 1), Map: Step size along y-axis in micrometers. Line scan: Always set to 0.
+        if f"{group_name}/Y Step" in fp:
+            if read_strings_from_dataset(fp[f"{group_name}/Y Step"].attrs["Unit"]) == "um":
+                self.slice["s_y"] = fp[f"{group_name}/Y Step"][0]
+            else:
+                raise ValueError("Unexpected Y Step Unit attribute !")
+        else:
+            raise ValueError("Unable to parse Y Step !")
+        # TODO::check that all data in the self.oina are consistent
+
+        for key, val in self.slice.items():
+            print(f"{key}, type: {type(val)}, shape: {np.shape(val)}")
diff --git a/pynxtools/dataconverter/readers/em/subparsers/pyxem_processor.py b/pynxtools/dataconverter/readers/em/subparsers/pyxem_processor.py
new file mode 100644
index 000000000..e8ee7d8b4
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/subparsers/pyxem_processor.py
@@ -0,0 +1,99 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Process standardized orientation map using pyxem from normalized orientation data."""
+
+import os
+from typing import Dict, Any, List
+
+import numpy as np
+import h5py
+
+# import imageio.v3 as iio
+from PIL import Image as pil
+
+import diffsims
+import orix
+from diffpy.structure import Lattice, Structure
+from orix import plot
+from orix.crystal_map import create_coordinate_arrays, CrystalMap, PhaseList
+from orix.quaternion import Rotation
+from orix.vector import Vector3d
+
+import matplotlib.pyplot as plt
+
+from pynxtools.dataconverter.readers.em.utils.hfive_web_constants \
+    import HFIVE_WEB_MAXIMUM_RGB
+
+
+class PyxemProcessor:
+    def __init__(self, entry_id: int):
+        self.entry_id = entry_id
+        self.xmap = None
+        pass
+
+    def process_roi_overview(inp: dict, template: dict) -> dict:
+        pass
+
+    def process_roi_xmap(inp: dict) -> dict:
+        """Process standardized IPF orientation map using pyxem from normalized orientation data."""
+        # for NeXus would like to create a default
+        if np.max(inp["n_x"], inp["n_y"]) < HFIVE_WEB_MAXIMUM_RGB:
+            # can use the map discretization as is
+            coordinates, _ = create_coordinate_arrays(
+                (inp["n_x"], inp["n_y"]), (inp["s_x"], inp["s_y"]))
+            xaxis = coordinates["x"]
+            yaxis = coordinates["y"]
+            del coordinates
+        # else:
+            # need to regrid to downsample too large maps
+            # TODO::implement 1NN-based downsampling approach
+            #       build grid
+            #       tree-based 1NN
+            #       proceed as usual
+
+        pyxem_phase_identifier = inp["phase_identifier"] \
+            - (np.min(inp["phase_identifier"]) - (-1))  # pyxem, non-indexed has to be -1
+        print(np.unique(pyxem_phase_identifier))
+
+        self.xmap = CrystalMap(rotations=inp["rotation"],
+                               x=self.xaxis, y=self.yaxis,
+                               phase_id=pyxem_phase_identifier,
+                               phase_list=PhaseList(space_groups=inp["space_group"],
+                                                    structures=inp["phase"]),
+                               prop={"bc": inp["band_contrast"]},
+                               scan_unit=inp["s_unit"])
+        print(self.xmap)
+
+    def process_roi_phases(self, template: dict) -> dict:
+        pass
+
+    def process_roi_inverse_pole_figures(self, template: dict) -> dict:
+        """Parse inverse pole figures (IPF) mappings."""
+        # call process_roi_ipf_map
+        # call process_roi_ipf_color_key
+        return template
+
+    def process_roi_ipf_map(self, identifier, template: dict) -> dict:
+        """Parse and create inverse-pole-figure (IPF) mappings on their color models."""
+        # +1 because for orix not_indexed -1 and "first" phase has ID 0 !
+        return template
+
+    def process_roi_ipf_color_key(self, identifier, template: dict) -> dict:
+        """Parse color key renderings of inverse-pole-figure (IPF) mappings."""
+        # +1 because for orix not_indexed -1 and "first" phase has ID 0 !
+        return template
diff --git a/pynxtools/dataconverter/readers/em/utils/hfive_web_constants.py b/pynxtools/dataconverter/readers/em/utils/hfive_web_constants.py
new file mode 100644
index 000000000..8f480dbaa
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/utils/hfive_web_constants.py
@@ -0,0 +1,20 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Constants relevant when working with H5Web."""
+
+HFIVE_WEB_MAXIMUM_RGB = 2**14
diff --git a/pynxtools/dataconverter/readers/em/utils/which_string_encoding.py b/pynxtools/dataconverter/readers/em/utils/which_string_encoding.py
new file mode 100644
index 000000000..5767b5749
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/utils/which_string_encoding.py
@@ -0,0 +1,28 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Identify which string encoding is used."""
+
+import sys
+
+
+def get_string_encoding():
+    if sys.maxunicode == 1114111:
+        return "ucs4"
+    if sys.maxunicode == 65536:
+        return "ucs2"
+    return None
diff --git a/pyproject.toml b/pyproject.toml
index ac38b985f..22a7d3899 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -28,12 +28,12 @@ dependencies = [
     "pandas>=1.3.2",
     "ase>=3.19.0",
     "flatdict>=4.0.1",
-    "hyperspy>=1.7.4",
+    "hyperspy>=1.7.5",
     "ifes_apt_tc_data_modeling>=0.0.9",
     "gitpython>=3.1.24",
     "pytz>=2021.1",
-    "kikuchipy>=0.8.2",
-    "pyxem>=0.14.2",
+    "kikuchipy>=0.8.7",
+    "pyxem>=0.15.1",
     "zipfile37==0.1.3",
     "nionswift==0.16.8",
     "tzlocal<=4.3",

From 9c708dafeb4a9d68d9828bc90a4c357e87f104fc Mon Sep 17 00:00:00 2001
From: "markus.kuehbach" <markus.kuehbach@hu-berlin.de>
Date: Sun, 22 Oct 2023 18:35:41 +0200
Subject: [PATCH 12/84] Refactoring and implementation for normalization
 oxford, bruker, apex and tested

---
 pynxtools/dataconverter/readers/em/reader.py  |  13 +-
 .../readers/em/subparsers/hfive_apex.py       | 237 ++++++++--
 .../em/subparsers/{hfive.py => hfive_base.py} | 444 ++++++------------
 .../readers/em/subparsers/hfive_bruker.py     | 266 +++++++++--
 .../readers/em/subparsers/hfive_ebsd.py       |  64 +--
 .../readers/em/subparsers/hfive_edax.py       |  69 +--
 .../readers/em/subparsers/hfive_emsoft.py     |  40 +-
 .../readers/em/subparsers/hfive_oxford.py     | 344 +++++++-------
 .../readers/em/subparsers/nxs_hfive.py        | 234 +++++++++
 .../readers/em/subparsers/pyxem_processor.py  |  99 ----
 .../readers/em/utils/hfive_utils.py           |  55 +++
 11 files changed, 1152 insertions(+), 713 deletions(-)
 rename pynxtools/dataconverter/readers/em/subparsers/{hfive.py => hfive_base.py} (50%)
 create mode 100644 pynxtools/dataconverter/readers/em/subparsers/nxs_hfive.py
 delete mode 100644 pynxtools/dataconverter/readers/em/subparsers/pyxem_processor.py
 create mode 100644 pynxtools/dataconverter/readers/em/utils/hfive_utils.py

diff --git a/pynxtools/dataconverter/readers/em/reader.py b/pynxtools/dataconverter/readers/em/reader.py
index 1f67c9aad..2d5391417 100644
--- a/pynxtools/dataconverter/readers/em/reader.py
+++ b/pynxtools/dataconverter/readers/em/reader.py
@@ -27,6 +27,8 @@
 
 from pynxtools.dataconverter.readers.em.subparsers.nxs_mtex import NxEmNxsMTexSubParser
 
+from pynxtools.dataconverter.readers.em.subparsers.nxs_hfive import NxEmNxsHfiveSubParser
+
 from pynxtools.dataconverter.readers.em.utils.default_plots import NxEmDefaultPlotResolver
 
 from pynxtools.dataconverter.readers.em.geometry.convention_mapper \
@@ -118,11 +120,16 @@ def read(self,
         conventions.parse(template)
 
         print("Parse and map pieces of information within files from tech partners...")
-        sub_parser = "nxs_mtex"
-        subparser = NxEmNxsMTexSubParser(entry_id, file_paths[0])
-        subparser.parse(template)
+        # sub_parser = "nxs_mtex"
+        # subparser = NxEmNxsMTexSubParser(entry_id, file_paths[0])
+        # subparser.parse(template)
 
         # add further with resolving cases
+        # if file_path is an HDF5 will use hfive parser
+        sub_parser = "nxs_hfive"
+        subparser = NxEmNxsHfiveSubParser(entry_id, file_paths[0])
+        subparser.parse(template)
+        exit(1)
 
         # for dat_instance in case.dat_parser_type:
         #     print(f"Process pieces of information in {dat_instance} tech partner file...")
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
index b4f5abb13..0e15a7eb5 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
@@ -19,50 +19,217 @@
 
 import numpy as np
 import h5py
-from pynxtools.dataconverter.readers.em.subparsers.hfive import HdfFiveGenericReader
+from itertools import groupby
+# import imageio.v3 as iio
+from PIL import Image as pil
 
+import diffsims
+import orix
+from diffpy.structure import Lattice, Structure
+from orix import plot
+from orix.crystal_map import create_coordinate_arrays, CrystalMap, PhaseList
+from orix.quaternion import Rotation
+from orix.vector import Vector3d
 
-class HdfFiveEdaxApexReader(HdfFiveGenericReader):
+import matplotlib.pyplot as plt
+
+from pynxtools.dataconverter.readers.em.subparsers.hfive_base import HdfFiveBaseParser
+from pynxtools.dataconverter.readers.em.utils.hfive_utils import read_strings_from_dataset
+
+
+def om_eu(inp):
+    return inp[0:2]
+
+
+class HdfFiveEdaxApexReader(HdfFiveBaseParser):
     """Read APEX edaxh5"""
-    def __init__(self, file_name: str = ""):
-        super().__init__(file_name)
-        # this specialized reader implements reading capabilities for the following formats
+    def __init__(self, file_path: str = ""):
+        super().__init__(file_path)
+        self.prfx = None
+        self.tmp = {}
         self.supported_version = {}
         self.version = {}
+        self.init_support()
+        self.supported = False
+        self.check_if_supported()
+
+    def init_support(self):
+        """Init supported versions."""
         self.supported_version["tech_partner"] = ["EDAX, LLC"]
         self.supported_version["schema_name"] = ["EDAXH5"]
         self.supported_version["schema_version"] = ["2.5.1001.0001"]
         self.supported_version["writer_name"] = ["APEX"]
         self.supported_version["writer_version"] = ["2.5.1001.0001"]
-        self.supported = True
-        # check if instance to process matches any of these constraints
-        h5r = h5py.File(self.file_name, "r")
-        # parse Company and PRODUCT_VERSION attribute values from the first group below / but these are not scalar but single value lists
-        # so much about interoperability
-        # but hehe for the APEX example from Sebastian and Sabine there is again no Company but PRODUCT_VERSION, 2 files, 2 "formats"
-        if "/Manufacturer" in h5r:
-            self.version["tech_partner"] \
-                = super().read_strings_from_dataset(h5r["/Manufacturer"][()])
-            if self.version["tech_partner"] not in self.supported_version["tech_partner"]:
-                # print(f"{self.version['tech_partner']} is not {self.supported_version['tech_partner']} !")
-                self.supported = False
+
+    def check_if_supported(self):
+        """Check if instance matches all constraints to qualify as supported H5OINA"""
+        self.supported = True  # try to falsify
+        with h5py.File(self.file_path, "r") as h5r:
+            # parse Company and PRODUCT_VERSION attribute values from the first group below / but these are not scalar but single value lists
+            # so much about interoperability
+            # but hehe for the APEX example from Sebastian and Sabine there is again no Company but PRODUCT_VERSION, 2 files, 2 "formats"
+            grp_names = list(h5r["/"])
+            if len(grp_names) == 1:
+                if read_strings_from_dataset(h5r[grp_names[0]].attrs["Company"][0]) \
+                    not in self.supported_version["tech_partner"]:
+                    self.supported = False
+                if read_strings_from_dataset(h5r[grp_names[0]].attrs["PRODUCT_VERSION"][0]) \
+                    not in self.supported_version["schema_version"]:
+                    self.supported = False
+            if self.supported is True:
+                self.version = self.supported_version.copy()
+
+    def parse_and_normalize(self):
+        """Read and normalize away EDAX/APEX-specific formatting with an equivalent in NXem."""
+        with h5py.File(f"{self.file_path}", "r") as h5r:
+            cache_id = 0
+            grp_nms = list(h5r["/"])
+            for grp_nm in grp_nms:
+                sub_grp_nms = list(h5r[grp_nm])
+                for sub_grp_nm in sub_grp_nms:
+                    sub_sub_grp_nms = list(h5r[f"/{grp_nm}/{sub_grp_nm}"])
+                    for sub_sub_grp_nm in sub_sub_grp_nms:
+                        if sub_sub_grp_nm.startswith("Area"):
+                            area_grp_nms = list(h5r[f"/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}"])
+                            for area_grp_nm in area_grp_nms:
+                                if area_grp_nm.startswith("OIM Map"):
+                                    self.prfx = f"/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}/{area_grp_nm}"
+                                    print(f"Parsing {self.prfx}")
+                                    ckey = self.init_named_cache(f"ebsd{cache_id}")
+                                    self.parse_and_normalize_group_ebsd_header(h5r, ckey)
+                                    self.parse_and_normalize_group_ebsd_phases(h5r, ckey)
+                                    self.parse_and_normalize_group_ebsd_data(h5r, ckey)
+                                    cache_id += 1
+
+    def parse_and_normalize_group_ebsd_header(self, fp, ckey: str):
+        # no official documentation yet from EDAX/APEX, deeply nested, chunking, virtual ds
+        if f"{self.prfx}/EBSD/ANG/DATA/DATA" not in fp:
+            raise ValueError(f"Unable to parse {self.prfx}/EBSD/ANG/DATA/DATA !")
+
+        grid_type = None
+        # for a regular tiling of R^2 with perfect hexagons
+        n_pts = 0
+        # their vertical center of mass distance is smaller than the horizontal
+        # center of mass distance (x cols, y rows)
+        req_fields = ["Grid Type",
+                      "Step X", "Step Y",
+                      "Number Of Rows", "Number Of Columns"]
+        for req_field in req_fields:
+            if f"{self.prfx}/Sample/{req_field}" not in fp:
+                raise ValueError(f"Unable to parse {self.prfx}/Sample/{req_field} !")
+
+        grid_type = read_strings_from_dataset(fp[f"{self.prfx}/Sample/Grid Type"][()])
+        if grid_type != "HexGrid":
+            raise ValueError(f"Grid Type {grid_type} is currently not supported !")
+        self.tmp[ckey]["s_x"] = fp[f"{self.prfx}/Sample/Step X"][0]
+        self.tmp[ckey]["s_unit"] = "µm"  # TODO::always micron?
+        self.tmp[ckey]["n_x"] = fp[f"{self.prfx}/Sample/Number Of Columns"][0]
+        self.tmp[ckey]["s_y"] = fp[f"{self.prfx}/Sample/Step Y"][0]
+        self.tmp[ckey]["n_y"] = fp[f"{self.prfx}/Sample/Number Of Rows"][0]
+
+    def parse_and_normalize_group_ebsd_phases(self, fp, ckey: str):
+        grp_name = f"{self.prfx}/EBSD/ANG/HEADER/Phase"
+        # Phases, contains a subgroup for each phase where the name
+        # of each subgroup is the index of the phase starting at 1.
+        if f"{grp_name}" in fp:
+            phase_ids = sorted(list(fp[f"{grp_name}"]), key=int)
+            self.tmp[ckey]["phase"] = []
+            self.tmp[ckey]["space_group"] = []
+            self.tmp[ckey]["phases"] = {}
+            for phase_id in phase_ids:
+                if phase_id.isdigit() is True:
+                    self.tmp[ckey]["phases"][int(phase_id)] = {}
+                    sub_grp_name = f"{grp_name}/{phase_id}"
+                    # Name
+                    if f"{sub_grp_name}/Material Name" in fp:
+                        phase_name = read_strings_from_dataset(fp[f"{sub_grp_name}/Material Name"][0])
+                        self.tmp[ckey]["phases"][int(phase_id)]["phase_name"] = phase_name
+                    else:
+                        raise ValueError(f"Unable to parse {sub_grp_name}/Material Name !")
+
+                    # Reference not available only Info but this can be empty
+                    self.tmp[ckey]["phases"][int(phase_id)]["reference"] = "n/a"
+
+                    req_fields = ["A", "B", "C", "Alpha", "Beta", "Gamma"]
+                    for req_field in req_fields:
+                        if f"{sub_grp_name}/Lattice Constant {req_field}" not in fp:
+                            raise ValueError(f"Unable to parse ../Lattice Constant {req_field} !")
+                    a_b_c = [fp[f"{sub_grp_name}/Lattice Constant A"][0],
+                             fp[f"{sub_grp_name}/Lattice Constant B"][0],
+                             fp[f"{sub_grp_name}/Lattice Constant C"][0]]
+                    angles = [fp[f"{sub_grp_name}/Lattice Constant Alpha"][0],
+                              fp[f"{sub_grp_name}/Lattice Constant Beta"][0],
+                              fp[f"{sub_grp_name}/Lattice Constant Gamma"][0]]
+                    self.tmp[ckey]["phases"][int(phase_id)]["a_b_c"] \
+                        = np.asarray(a_b_c, np.float32) * 0.1
+                    self.tmp[ckey]["phases"][int(phase_id)]["alpha_beta_gamma"] \
+                        = np.asarray(angles, np.float32)
+
+                    # Space Group not stored, only laue group, point group and symmetry
+                    # problematic because mapping is not bijective!
+                    # if you know the space group we know laue and point group and symmetry
+                    # but the opposite direction leaves room for ambiguities
+                    space_group = "n/a"
+                    self.tmp[ckey]["phases"][int(phase_id)]["space_group"] = space_group
+
+                    if len(self.tmp[ckey]["space_group"]) > 0:
+                        self.tmp[ckey]["space_group"].append(space_group)
+                    else:
+                        self.tmp[ckey]["space_group"] = [space_group]
+
+                    if len(self.tmp[ckey]["phase"]) > 0:
+                        self.tmp[ckey]["phase"].append(
+                            Structure(title=phase_name, atoms=None,
+                                      lattice=Lattice(a_b_c[0], a_b_c[1], a_b_c[2],
+                                      angles[0], angles[1], angles[2])))
+                    else:
+                        self.tmp[ckey]["phase"] \
+                            = [Structure(title=phase_name, atoms=None,
+                                         lattice=Lattice(a_b_c[0], a_b_c[1], a_b_c[2],
+                                         angles[0], angles[1], angles[2]))]
         else:
-            self.supported = False
-        if "/Version" in h5r:
-            self.version["schema_version"] \
-                = super().read_strings_from_dataset(h5r["/Version"][()])
-            if self.version["schema_version"] not in self.supported_version["schema_version"]:
-                # print(f"{self.version['schema_version']} is not any of {self.supported_version['schema_version']} !")
-                self.supported = False
+            raise ValueError(f"Unable to parse {grp_name} !")
+
+    def parse_and_normalize_group_ebsd_data(self, fp, ckey: str):
+        grp_name = f"{self.prfx}/EBSD/ANG/DATA/DATA"
+        n_pts = self.tmp[ckey]["n_x"] * self.tmp[ckey]["n_y"]
+        if f"{grp_name}" in fp:
+            if np.shape(fp[f"{grp_name}"]) != (n_pts,) and n_pts > 0:
+                raise ValueError(f"Unexpected shape of {grp_name} !")
+
+            dat = fp[f"{grp_name}"]
+            self.tmp[ckey]["euler"] = np.zeros((n_pts, 3), np.float32)
+            # index of phase, 0 if not indexed
+            # # no normalization needed, also in NXem_ebsd the null model notIndexed is phase_identifier 0
+            self.tmp[ckey]["phase_id"] = np.zeros((n_pts,), np.int32)
+            self.tmp[ckey]["ci"] = np.zeros((n_pts,), np.float32)
+
+            for i in np.arange(0, n_pts):
+                # check shape of internal virtual chunked number array
+                r = Rotation.from_matrix([np.reshape(dat[i][0], (3, 3))])
+                self.tmp[ckey]["euler"][i, :] = r.to_euler(degrees=False)
+                self.tmp[ckey]["phase_id"][i] = dat[i][2]
+                self.tmp[ckey]["ci"][i] = dat[i][3]
+
+            # TODO::convert orientation matrix to Euler angles via om_eu but what are conventions !
+            # orix based transformation ends up in positive half space and with degrees=False
+            # as radiants but the from_matrix command above might miss one rotation
+
+            # inconsistency f32 in file although specification states float
+            # Rotation.from_euler(euler=fp[f"{grp_name}/Euler"],
+            #                                 direction='lab2crystal',
+            #                                degrees=is_degrees)
+
+            # compute explicit hexagon grid cells center of mass pixel positions
+            # TODO::currently assuming HexGrid
+            self.tmp[ckey]["scan_point_x"] = np.asarray(
+                np.linspace(0, self.tmp[ckey]["n_x"] - 1,
+                            num=self.tmp[ckey]["n_x"],
+                            endpoint=True) * self.tmp[ckey]["s_x"] + 0., np.float32)
+
+            self.tmp[ckey]["scan_point_y"] = np.asarray(
+                np.linspace(0, self.tmp[ckey]["n_y"] - 1,
+                            num=self.tmp[ckey]["n_y"],
+                            endpoint=True) * self.tmp[ckey]["s_y"] + 0., np.float32)
         else:
-            self.supported = False
-        h5r.close()
-
-        if self.supported is True:
-            # print(f"Reading {self.file_name} is supported")
-            self.version["schema_name"] = self.supported_version["schema_name"]
-            self.version["writer_name"] = self.supported_version["writer_name"]
-            self.version["writer_version"] = self.supported_version["writer_version"]
-            # print(f"{self.version['schema_name']}, {self.supported_version['schema_version']}, {self.supported_version['writer_name']}, {self.supported_version['writer_version']}")
-        # else:
-            # print(f"Reading {self.file_name} is not supported!")
+            raise ValueError(f"Unable to parse {grp_name} !")
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_base.py
similarity index 50%
rename from pynxtools/dataconverter/readers/em/subparsers/hfive.py
rename to pynxtools/dataconverter/readers/em/subparsers/hfive_base.py
index fceeff135..33a836fb5 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_base.py
@@ -15,54 +15,41 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+"""(Sub-)parser mapping concepts and content from EDAX/AMETEK *.edaxh5 (APEX) files on NXem."""
+
 """HDF5 base parser to inherit from for tech-partner-specific HDF5 subparsers."""
 
-import numpy as np
+# the base parser implements the processing of standardized orientation maps via
+# the pyxem software package from the electron microscopy community
+# specifically so-called NeXus default plots are generated to add RDMS-relevant
+# information to the NeXus file which supports scientists with judging the potential
+# value of the dataset in the context of them using research data management systems (RDMS)
+# in effect this parser is the partner of the MTex parser for all those file formats
+# which are HDF5 based and which (at the time of working on this example Q3/Q4 2023)
+# where not supported my MTex
+# with offering this parser we also would like to embrace and acknowledge the efforts
+# of other electron microscopists (like the pyxem team, hyperspy etc.) and their work
+# towards software tools which are complementary to the MTex texture toolbox
+# one could have also implemented the HDF5 parsing inside MTex but we leave this as a
+# task for the community and instead focus here on showing a more diverse example
+# towards more interoperability between the different tools in the community
+
 import os, glob, re, sys
+from typing import Dict, Any, List
+import numpy as np
 import h5py
-import yaml
-import json
-# from jupyterlab_h5web import H5Web
-# import jupyter_capture_output
+import yaml, json
+# import imageio.v3 as iio
+from PIL import Image as pil
 
-from pynxtools.dataconverter.readers.em.subparsers.hfive_concept import IS_GROUP, \
-    IS_REGULAR_DATASET, IS_COMPOUND_DATASET, IS_ATTRIBUTE, IS_FIELD_IN_COMPOUND_DATASET, \
-    Concept
 
+from pynxtools.dataconverter.readers.em.subparsers.hfive_concept import \
+    IS_GROUP, IS_REGULAR_DATASET, IS_COMPOUND_DATASET, IS_ATTRIBUTE, \
+    IS_FIELD_IN_COMPOUND_DATASET, Concept
 
-def read_strings_from_dataset(self, obj):
-    # print(f"type {type(obj)}, np.shape {np.shape(obj)}, obj {obj}")
-    # if hasattr(obj, "dtype"):
-    #     print(obj.dtype)
-    if isinstance(obj, np.ndarray):
-        retval = []
-        for entry in obj:
-            if isinstance(entry, bytes):
-                retval.append(entry.decode("utf-8"))
-            elif isinstance(entry, str):
-                retval.append(entry)
-            else:
-                continue
-                # raise ValueError("Neither bytes nor str inside np.ndarray!")
-        # specific implementation rule that all lists with a single string
-        # will be returned in paraprobe as a scalar string
-        if len(retval) > 1:
-            return retval
-        elif len(retval) == 1:
-            return retval[0]
-        else:
-            return None
-    elif isinstance(obj, bytes):
-        return obj.decode("utf8")
-    elif isinstance(obj, str):
-        return obj
-    else:
-        return None
-        # raise ValueError("Neither np.ndarray, nor bytes, nor str !")
 
-
-class HdfFiveGenericReader:
-    def __init__(self, file_name: str = ""):
+class HdfFiveBaseParser:
+    def __init__(self, file_path: str = ""):
         # self.supported_version = VERSION_MANAGEMENT
         # self.version = VERSION_MANAGEMENT
         # tech_partner the company which designed this format
@@ -73,8 +60,10 @@ def __init__(self, file_name: str = ""):
         #   was written e.g. Oxford Instruments AZTec software in some version may generate
         #   an instance of a file whose schema belongs to the H5OINA family of HDF5 container formats
         #   specifically using version 5
+        self.prfx = None
+        self.tmp = {}
         self.source = None
-        self.file_name = None
+        self.file_path = None
         # collection of instance path
         self.groups = {}
         self.datasets = {}
@@ -86,23 +75,29 @@ def __init__(self, file_name: str = ""):
         self.template_attributes = []
         self.templates = {}
         self.h5r = None
-        if file_name is not None and file_name != "":
-            self.file_name = file_name
+        if file_path is not None and file_path != "":
+            self.file_path = file_path
+        else:
+            raise ValueError(f"{__class__.__name__} needs proper instantiation !")
+
+    def init_named_cache(self, ckey: str):
+        """Init a new cache for normalized EBSD data if not existent."""
+        # purpose of the cache is to hold normalized information
+        if ckey not in self.tmp.keys():
+            self.tmp[ckey] = {}
+            return ckey
+        else:
+            raise ValueError(f"Existent named cache {ckey} must not be overwritten !")
 
     def open(self):
         if self.h5r is None:
-            self.h5r = h5py.File(self.file_name, "r")
+            self.h5r = h5py.File(self.file_path, "r")
 
     def close(self):
         if self.h5r is not None:
             self.h5r.close()
             self.h5r = None
 
-    # def find_node(node_name, node_obj):
-    #     if isinstance(node_obj, h5py.Dataset):
-    #         return (node_name, "is_dataset")
-    #     return (node_name, "is_group")
-
     def __call__(self, node_name, h5obj):
         # only h5py datasets have dtype attribute, so we can search on this
         if isinstance(h5obj, h5py.Dataset):
@@ -139,7 +134,7 @@ def __call__(self, node_name, h5obj):
                                                   None,
                                                   hdf_type="compound_dataset_entry")
                             else:
-                                raise LogicError(
+                                raise ValueError(
                                     f"Unknown formatting of an h5py.Dataset, inspect {node_name} !")
                         else:  # h5obj.dtype.names is a tuple of struct variable names
                             n_dims = len(np.shape(h5obj))
@@ -229,11 +224,11 @@ def __call__(self, node_name, h5obj):
                                               None,
                                               hdf_type="regular_dataset")
                     else:
-                        raise LogicError(
+                        raise ValueError(
                             f"hasattr(h5obj.dtype, 'fields') and hasattr(" \
                             f"h5obj.dtype, 'names') failed, inspect {node_name} !")
                 else:
-                    raise LogicError(f"hasattr(h5obj, dtype) failed, inspect {node_name} !")
+                    raise ValueError(f"hasattr(h5obj, dtype) failed, inspect {node_name} !")
         else:
             if not node_name in self.groups.keys():
                 self.groups[node_name] = ("IS_GROUP")
@@ -278,13 +273,13 @@ def get_attribute_data_structure(self, prefix, src_dct):
                                   None,
                                   hdf_type="attribute")
                 else:
-                    raise LogicError(
+                    raise ValueError(
                         f"Unknown formatting of an attribute, inspect {prefix}/@{key} !")
 
     def get_content(self):
         """Walk recursively through the file to get content."""
-        if self.h5r is not None:  # if self.file_name is not None:
-            # with h5py.File(self.file_name, "r") as h5r:
+        if self.h5r is not None:  # if self.file_path is not None:
+            # with h5py.File(self.file_path, "r") as h5r:
                 # first step visit all groups and datasets recursively
                 # get their full path within the HDF5 file
             self.h5r.visititems(self)
@@ -295,170 +290,18 @@ def get_content(self):
                 if h5path.count("#") == 0:  # skip resolved fields in compound data types
                     self.get_attribute_data_structure(h5path, dict(self.h5r[h5path].attrs))
 
-    def get_file_format(self, rules):
-        """Identify which versioned file format self is an instance of."""
-        # rules is a dictionary of pairs: first, a templatized path, second, an identifier
-        # what is a templatized path? take this example from an v4 H5OINA file with SEM/ESBD data
-        # 1/Data Processing/Analyses/IPF1, IS_GROUP
-        # 1/Data Processing/Analyses/IPF2, IS_GROUP
-        # both pathes are conceptually instances of the same concept
-        # */Data Processing/Analyses/IPF*
-        # where the stars in this templatized path serve as placeholders
-        # masking different instance ids
-        # Contextualization:
-        # HDF5 is a container (file) format lik TIFF.
-        # Therefore, neither the mime type nor the file name suffix can substantiate
-        # which not just format but version an instance comes formatted with.
-        # Therefore, the specific content and formatting of an instance
-        # e.g. do we talk about an HDF5 file whose content matches the rules
-        # of an e.g. Oxford Instrument v4 H5OINA file?
-        # the versioning is key to understand and read
-        # tech partners can make changes to I/O routines in their software
-        # this can result in that data end up formatted differently across file
-        # instances written over time
-        # therefore, it is necessary to ensure (before interpreting the file) that
-        # it matches a certain set of expectations (versioned format) so that the
-        # information content aka the knowledge, the pieces of information, in that file
-        # can be logically interpreted correctly
-        # The existence of a libraries and only best practices but not generally accepted
-        # rules how content in container files should be formatted enables for a
-        # potentially large number of possibilities how the same piece of information
-        # is encoded
-        # Consider the following simple example from electron microscopy with two quantities:
-        # hv (high_voltage) and wd (working_distance)
-        # these are two numbers each with a unit category or actual unit instance
-        # (voltage) and (length) respectively
-        # in hdf5 one could store the same information very differently technically
-        # as a dataset instance named "hv" with a scalar number and an attribute
-        # instance with a scalar string for the unit
-        # (this is assumed somewhat the best practice)
-        # however neither this is required nor assured
-        # in practice one could do much more e.g.
-        # as a group named hv_voltage with an attribute value
-        # as a compound dataset with two values packed as a struct with pairs of value and string
-        # first the value for hv followed by its unit, thereafter the value of wd followed by its unit
-        # also nobody is required to name an HDF5 instance using English because nodes in HDF5
-        # end up as links and these can have UTF8 encoding, so in principle even group and dataset names
-        # can use terms from other languages than English, one can use also special characters
-        # there can be typos or synonyms used like hv and high_voltage or voltage
-        # the key point is all these representations are allowed when we use HDF5 files
-        # but for each of these combinations a different code has to be implemented to extract
-        # and verify these pieces of information when one would like to use these pieces
-        # for further processing, this observation holds for every serialization of information
-        # into a file and thus one cannot escape the necessity that one needs to define
-        # a clear set of rules based on which one can decide if some instance is interpretable or
-        # not, in general we therefore see that there is much more work need that just to acknowledge
-        # that it is clear that one cannot infer the potential relevance of a file for an analysis
-        # based on its file format ending (mime type, magic cookie) etc
-        # although interesting this is exactly what the magic cookie
-        # (the initial few bytes to the beginning of the byte stream of a file)
-        # were originally conceptualized for
-        pass
-
-    def templatize_instance_name(self, instance):
-        if isinstance(instance, str):
-            translation_dict = {}
-            for i in np.arange(0, 10):
-                translation_dict[str(i)] = "*"
-            # print(translation_dict)
-            return re.sub('\*\*+', '*', instance.translate(str.maketrans(translation_dict)))
-        return None
-
-    def is_instance_name_valid(self, instance):
-        if isinstance(instance, str):
-            t = set("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_ ")
-            # print(t)
-            tmp = instance.split("/")
-            if len(tmp) > 0:
-                for entry in tmp:
-                    if entry != "":
-                        s = set(entry)
-                        # s = set("hallo") # ẟ€ᴩᴪᴪ"
-                        # s = set(instance)
-                        # use only a sub-set of the characters offered by UTF8 and ASCII,
-                        # i.e. even a subset of the Latin basic plane UCS4
-                        # print(s)
-                        # is every member of the set lng also in the set valid?
-                        if s.difference(t) == set():
-                            continue
-                        else:
-                            return False
-                    else:
-                        return False
-                return True
-            else:
-                return False
-        return False
-
-    def verify_instances(self):
-        retval = True
-        for key, ifo in self.instances.items():
-            if self.is_instance_name_valid(key) is True:
-                continue
-            else:
-                retval = False
-            #     print(f"raise ValueError: {key} is an invalid instance name!")
-        print(f"Verification result {retval}")
-
-    def templatize_instances(self):  # , dct):
-        # first step replace instance_names with stars, this is too naive because an
-        # instance_name filler8 would then become filler* which it must not!
-        # but this first step of templatization is useful
-        # for key, ifo in dct.items():
-        #    print(f"{key}, {self.templatize_instance_name(key)},
-        #    {self.is_instance_name_valid(key)}")
-        for instance, concept in self.instances.items():
-            template_name = self.templatize_instance_name(instance)
-            if template_name not in self.templates.keys():
-                self.templates[template_name] = concept  # add checks here
-
-    # def get_templatized_groups(self):
-    #     for key, ifo in self.groups.items():
-    #         template_key = self.templatize_instance_name(key)
-    #         if template_key not in self.template_groups:
-    #             self.template_groups.append(template_key)
-    #     # self.templatize(self.groups)
-
-    # def get_templatized_datasets(self):
-    #     for key, ifo in self.datasets.items():
-    #         template_key = self.templatize_instance_name(key)
-    #         if template_key not in self.template_datasets:
-    #             self.template_datasets.append(template_key)
-    #     # self.templatize(self.datasets)
-
-    # def get_templatized_attributes(self):
-    #     for key, ifo in self.attributes.items():
-    #         template_key = self.templatize_instance_name(key)
-    #         if template_key not in self.template_attributes:
-    #             self.template_attributes.append(template_key)
-    #     # self.templatize(self.attributes)
-
-    # def get_templatized(self):
-    #     # print(f"{self.file_name} contains the following template_groups:")
-    #     self.get_templatized_groups()
-    #     # for entry in self.template_groups:
-    #     #     print(entry)
-    #     # print(f"{self.file_name} contains the following template_datasets:")
-    #     self.get_templatized_datasets()
-    #     # for entry in self.template_datasets:
-    #     #     print(entry)
-    #     # print(f"{self.file_name} contains the following template_attributes:")
-    #     self.get_templatized_attributes()
-    #     # for entry in self.template_attributes:
-    #     #     print(entry)
-
     def report_groups(self):
-        print(f"{self.file_name} contains the following groups:")
+        print(f"{self.file_path} contains the following groups:")
         for key, ifo in self.groups.items():
             print(f"{key}, {ifo}")
 
     def report_datasets(self):
-        print(f"{self.file_name} contains the following datasets:")
+        print(f"{self.file_path} contains the following datasets:")
         for key, ifo in self.datasets.items():
             print(f"{key}, {ifo}")
 
     def report_attributes(self):
-        print(f"{self.file_name} contains the following attributes:")
+        print(f"{self.file_path} contains the following attributes:")
         for key, ifo in self.attributes.items():
             print(f"{key}, {ifo}")
 
@@ -467,47 +310,32 @@ def report_content(self):
         self.report_datasets()
         self.report_attributes()
 
-    def store_report(self, store_instances=False, store_instances_templatized=True, store_templates=False):
+    def store_report(self,
+                     store_instances=False,
+                     store_instances_templatized=True,
+                     store_templates=False):
         if store_instances is True:
-            print(f"Storing analysis results in {self.file_name[self.file_name.rfind('/')+1:]}." \
+            print(f"Storing analysis results in " \
+                  f"{self.file_path[self.file_path.rfind('/')+1:]}." \
                   f"EbsdHdfFileInstanceNames.txt...")
-            with open(f"{self.file_name}.EbsdHdfFileInstanceNames.txt", "w") as txt:
-                # print(f"{self.file_name} contains the following groups:")
-                # txt.write(f"{self.file_name} was analyzed for the formatting of its content.\n")
-                # txt.write(f"{self.file_name} contains the following groups:\n")
-                # for key, ifo in self.groups.items():
-                #     txt.write(f"{key}, {ifo}\n")
-                # txt.write(f"{self.file_name} contains the following datasets:\n")
-                # for key, ifo in self.datasets.items():
-                #     txt.write(f"{key}, {ifo}\n")
-                # txt.write(f"{self.file_name} contains the following attributes:\n")
-                # for key, ifo in self.attributes.items():
-                #     txt.write(f"{key}, {ifo}\n")
+            with open(f"{self.file_path}.EbsdHdfFileInstanceNames.txt", "w") as txt:
                 for instance_name, concept in self.instances.items():
                     txt.write(f"/{instance_name}, hdf: {concept.hdf}, " \
                               f"type: {concept.dtype}, shape: {concept.shape}\n")
 
         if store_instances_templatized is True:
-            print(f"Storing analysis results in {self.file_name[self.file_name.rfind('/')+1:]}" \
+            print(f"Storing analysis results in " \
+                  f"{self.file_path[self.file_path.rfind('/')+1:]}" \
                   f".EbsdHdfFileInstanceNamesTemplatized.txt...")
-            with open(f"{self.file_name}.EbsdHdfFileInstanceNamesTemplatized.txt", "w") as txt:
+            with open(f"{self.file_path}.EbsdHdfFileInstanceNamesTemplatized.txt", "w") as txt:
                 for instance_name, concept in self.instances.items():
                     txt.write(f"/{instance_name}, hdf: {concept.hdf}\n")
 
         if store_templates is True:
-            print(f"Storing analysis results in {self.file_name[self.file_name.rfind('/')+1:]}" \
-                  "f.EbsdHdfFileTemplateNames.txt...")
-            with open(f"{self.file_name}.EbsdHdfFileTemplateNames.txt", "w") as txt:
-                # txt.write(f"{self.file_name} was analyzed for the formatting of its content.\n")
-                # txt.write(f"{self.file_name} contains the following template groups:\n")
-                # for key in self.template_groups:
-                #     txt.write(f"{key}, IS_GROUP\n")
-                # txt.write(f"{self.file_name} contains the following template datasets:\n")
-                # for key in self.template_datasets:
-                #     txt.write(f"{key}, IS_DATASET\n")
-                # txt.write(f"{self.file_name} contains the following template attributes:\n")
-                # for key in self.template_attributes:
-                #    txt.write(f"{key}, IS_ATTRIBUTE\n")
+            print(f"Storing analysis results in "\
+                  f"{self.file_path[self.file_path.rfind('/')+1:]}" \
+                  f".EbsdHdfFileTemplateNames.txt...")
+            with open(f"{self.file_path}.EbsdHdfFileTemplateNames.txt", "w") as txt:
                 for template_name, concept in self.templates.items():
                     txt.write(f"{template_name}, hdf: {concept.hdf}, "\
                               f"type: {concept.dtype}, shape: {concept.shape}\n")
@@ -516,7 +344,7 @@ def get_attribute_value(self, h5path):
         if self.h5r is not None:
             if h5path in self.attributes.keys():
                 trg, attrnm = h5path.split("@")
-                # with (self.file_name, "r") as h5r:
+                # with (self.file_path, "r") as h5r:
                 obj = self.h5r[trg].attrs[attrnm]
                 if isinstance(obj, np.bytes_):
                     return obj[0].decode("utf8")
@@ -528,7 +356,7 @@ def get_dataset_value(self, h5path):
         if self.h5r is not None:
             if h5path in self.datasets.keys():
                 if self.datasets[h5path][0] == "IS_REGULAR_DATASET":
-                    # with (self.file_name, "r") as h5r:
+                    # with (self.file_path, "r") as h5r:
                     obj = self.h5r[h5path]
                     if isinstance(obj[0], np.bytes_):
                         return obj[0].decode("utf8")
@@ -536,7 +364,7 @@ def get_dataset_value(self, h5path):
                         return obj  # [()].decode("utf8")
             # implement get the entire compound dataset
             if h5path.count("#") == 1:
-                # with (self.file_name, "r") as h5r:
+                # with (self.file_path, "r") as h5r:
                 obj = self.h5r[h5path[0:h5path.rfind("#")]]
                 return obj.fields(h5path[h5path.rfind("#")+1:])[:]
             return None
@@ -553,56 +381,86 @@ def get_value(self, h5path):
         # no need to check groups as they have no value
         return None
 
-    # def get_version(self):
-    #     for key, val in self.version.items():
-    #         print(f"{key}, {val}")
+# Like TIFF, HDF5 is a container file format
+# Therefore, inspecting the mime type alone is insufficient to infer the schema
+# with which the content in the HDF5 file is formatted
+# Therefore, at least some of the content and how that content is
+# formatted is inspected to make an informed decision which specific hfive
+# subparser can be expected to deal at all with the content of the HDF5 file
+
+# For the example of EBSD there was once a suggestion made by the academic community
+# to report EBSD results via HDF5, specifically via H5EBSD (Jackson et al.).
+# Different tech partners and community projects though have implemented these
+# ideas differently. In effect, there are now multiple HDF5 files circulating
+# in the EBSD community where the same conceptual information is stored
+# differently i.e. under different names
+
+# This function shows an example how this dilemna can be
+# solved for six examples that all are HDF5 variants used for "storing EBSD data"
+# oxford - H5OINA format of Oxford Instrument
+# edax - OIMself.input_file_path Analysis based reporting of EDAX/AMETEK
+# apex - APEX based reporting of EDAX/AMETEK (can be considered the newer EDAX reporting)
+# bruker - Bruker Esprit based reporting which replaces Bruker's bcf format that
+#     is notoriously difficult to parse as it uses a commercial library SFS from AidAim
+# emsort - HDF5-based reporting of parameter used by Marc de Graeff's EMsoft
+#     dynamic electron diffraction simulation software
+# hebsd - a variant of Jackson's proposal of the original H5EBSD the example here
+#    explores from content of the community as used by e.g. T. B. Britton's group
 # https://stackoverflow.com/questions/31146036/how-do-i-traverse-a-hdf5-file-using-h5py
 
-
-def identify_hfive_type(fpath):
-    """Identify if HDF5 file referred to by fpath matches a format with a subparser."""
-    # Like TIFF, HDF5 is a container file format
-    # Therefore, inspecting the mime type alone is insufficient to infer the schema
-    # with which the content in the HDF5 file is formatted
-    # Therefore, at least some of the content and how that content is
-    # formatted is inspected to make an informed decision which specific hfive
-    # subparser can be expected to deal at all with the content of the HDF5 file
-    # referred to by fpath
-
-    # For the example of EBSD there was once a suggestion made by the academic community
-    # to report EBSD results via HDF5, specifically via H5EBSD (Jackson et al.).
-    # Different tech partners and community projects though have implemented these
-    # ideas differently. In effect, there are now multiple HDF5 files circulating
-    # in the EBSD community where the same conceptual information is stored
-    # differently i.e. under different names
-
-    # This function shows an example how this dilemna can be
-    # solved for six examples that all are HDF5 variants used for "storing EBSD data"
-    # oxford - H5OINA format of Oxford Instrument (comes in different versions)
-    # edax - OIM Analysis based reporting of EDAX/AMETEK (comes in different versions)
-    # apex - APEX based reporting of EDAX/AMETEK (can be considered the newer EDAX reporting)
-    # bruker - Bruker Esprit based reporting which replaces Bruker's bcf format that
-    #     is notoriously difficult to parse as it uses a commercial library SFS from AidAim
-    # emsort - HDF5-based reporting of parameter used by Marc de Graeff's EMsoft
-    #     dynamic electron diffraction simulation software
-    # hebsd - a variant of Jackson's proposal of the original H5EBSD the example here
-    #    explores from content of the community as used by e.g. T. B. Britton's group
-    hdf = HdfFiveOinaReader(f"{fpath}")
-    if hdf.supported is True:
-        return "oxford"
-    hdf = HdfFiveEdaxOimAnalysisReader(f"{fpath}")
-    if hdf.supported is True:
-        return "edax"
-    hdf = HdfFiveEdaxApexReader(f"{fpath}")
-    if hdf.supported is True:
-        return "apex"
-    hdf = HdfFiveBrukerEspritReader(f"{fpath}")
-    if hdf.supported is True:
-        return "bruker"
-    hdf = HdfFiveEmSoftReader(f"{fpath}")
-    if hdf.supported is True:
-        return "emsoft"
-    hdf = HdfFiveCommunityReader(f"{fpath}")
-    if hdf.supported is True:
-        return "hebsd"
-    return None
+# rules is a dictionary of pairs: first, a templatized path, second, an identifier
+# what is a templatized path? take this example from an v4 H5OINA file with SEM/ESBD data
+# 1/Data Processing/Analyses/IPF1, IS_GROUP
+# 1/Data Processing/Analyses/IPF2, IS_GROUP
+# both pathes are conceptually instances of the same concept
+# */Data Processing/Analyses/IPF*
+# where the stars in this templatized path serve as placeholders
+# masking different instance ids
+# Contextualization:
+# HDF5 is a container (file) format lik TIFF.
+# Therefore, neither the mime type nor the file name suffix can substantiate
+# which not just format but version an instance comes formatted with.
+# Therefore, the specific content and formatting of an instance
+# e.g. do we talk about an HDF5 file whose content matches the rules
+# of an e.g. Oxford Instrument v4 H5OINA file?
+# the versioning is key to understand and read
+# tech partners can make changes to I/O routines in their software
+# this can result in that data end up formatted differently across file
+# instances written over time
+# therefore, it is necessary to ensure (before interpreting the file) that
+# it matches a certain set of expectations (versioned format) so that the
+# information content aka the knowledge, the pieces of information, in that file
+# can be logically interpreted correctly
+# The existence of a libraries and only best practices but not generally accepted
+# rules how content in container files should be formatted enables for a
+# potentially large number of possibilities how the same piece of information
+# is encoded
+# Consider the following simple example from electron microscopy with two quantities:
+# hv (high_voltage) and wd (working_distance)
+# these are two numbers each with a unit category or actual unit instance
+# (voltage) and (length) respectively
+# in hdf5 one could store the same information very differently technically
+# as a dataset instance named "hv" with a scalar number and an attribute
+# instance with a scalar string for the unit
+# (this is assumed somewhat the best practice)
+# however neither this is required nor assured
+# in practice one could do much more e.g.
+# as a group named hv_voltage with an attribute value
+# as a compound dataset with two values packed as a struct with pairs of value and string
+# first the value for hv followed by its unit, thereafter the value of wd followed by its unit
+# also nobody is required to name an HDF5 instance using English because nodes in HDF5
+# end up as links and these can have UTF8 encoding, so in principle even group and dataset names
+# can use terms from other languages than English, one can use also special characters
+# there can be typos or synonyms used like hv and high_voltage or voltage
+# the key point is all these representations are allowed when we use HDF5 files
+# but for each of these combinations a different code has to be implemented to extract
+# and verify these pieces of information when one would like to use these pieces
+# for further processing, this observation holds for every serialization of information
+# into a file and thus one cannot escape the necessity that one needs to define
+# a clear set of rules based on which one can decide if some instance is interpretable or
+# not, in general we therefore see that there is much more work need that just to acknowledge
+# that it is clear that one cannot infer the potential relevance of a file for an analysis
+# based on its file format ending (mime type, magic cookie) etc
+# although interesting this is exactly what the magic cookie
+# (the initial few bytes to the beginning of the byte stream of a file)
+# were originally conceptualized for
\ No newline at end of file
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py
index 34c05902b..06d7a1027 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py
@@ -17,49 +17,261 @@
 #
 """(Sub-)parser mapping concepts and content from Bruker *.h5 files on NXem."""
 
+import os
+from typing import Dict, Any, List
 import numpy as np
 import h5py
-from pynxtools.dataconverter.readers.em.subparsers.hfive import HdfFiveGenericReader
+from itertools import groupby
+# import imageio.v3 as iio
+from PIL import Image as pil
 
+import diffsims
+import orix
+from diffpy.structure import Lattice, Structure
+from orix import plot
+from orix.crystal_map import create_coordinate_arrays, CrystalMap, PhaseList
+from orix.quaternion import Rotation
+from orix.vector import Vector3d
 
-class HdfFiveBrukerEspritReader(HdfFiveGenericReader):
+import matplotlib.pyplot as plt
+
+from pynxtools.dataconverter.readers.em.subparsers.hfive_base import HdfFiveBaseParser
+from pynxtools.dataconverter.readers.em.utils.hfive_utils import read_strings_from_dataset
+
+
+def all_equal(iterable):
+    g = groupby(iterable)
+    return next(g, True) and not next(g, False)
+
+BRUKER_MAP_SPACEGROUP = {"F m#ovl3m": 225}
+
+
+class HdfFiveBrukerEspritReader(HdfFiveBaseParser):
     """Read Bruker Esprit H5"""
-    def __init__(self, file_name: str = ""):
-        super().__init__(file_name)
-        # this specialized reader implements reading capabilities for the following formats
+    def __init__(self, file_path: str = ""):
+        super().__init__(file_path)
+        self.prfx = None
+        self.tmp = {}
         self.supported_version = {}
         self.version = {}
+        self.init_support()
+        self.supported = False
+        self.check_if_supported()
+
+    def init_support(self):
+        """Init supported versions."""
         self.supported_version["tech_partner"] = ["Bruker Nano"]
         self.supported_version["schema_name"] = ["H5"]
         self.supported_version["schema_version"] = ["Esprit 2.X"]
         self.supported_version["writer_name"] = []
         self.supported_version["writer_version"] = ["Esprit 2.X"]
-        self.supported = True
-        # check if instance to process matches any of these constraints
-        h5r = h5py.File(self.file_name, "r")
-        if "/Manufacturer" in h5r:
-            self.version["tech_partner"] \
-                = super().read_strings_from_dataset(h5r["/Manufacturer"][()])
-            if self.version["tech_partner"] not in self.supported_version["tech_partner"]:
-                # print(f"{self.version['tech_partner']} is not {self.supported_version['tech_partner']} !")
+
+    def check_if_supported(self):
+        """Check if instance matches all constraints to qualify as supported Bruker H5"""
+        self.supported = True  # try to falsify
+        with h5py.File(self.file_path, "r") as h5r:
+            if "/Manufacturer" in h5r:
+                self.version["tech_partner"] \
+                    = read_strings_from_dataset(h5r["/Manufacturer"][()])
+                if self.version["tech_partner"] not in self.supported_version["tech_partner"]:
+                    self.supported = False
+            else:
                 self.supported = False
-        else:
-            self.supported = False
-        if "/Version" in h5r:
-            self.version["schema_version"] \
-                = super().read_strings_from_dataset(h5r["/Version"][()])
-            if self.version["schema_version"] not in self.supported_version["schema_version"]:
-                # print(f"{self.version['schema_version']} is not any of {self.supported_version['schema_version']} !")
+            if "/Version" in h5r:
+                self.version["schema_version"] \
+                    = read_strings_from_dataset(h5r["/Version"][()])
+                if self.version["schema_version"] not in self.supported_version["schema_version"]:
+                    self.supported = False
+            else:
                 self.supported = False
-        else:
-            self.supported = False
-        h5r.close()
 
         if self.supported is True:
-            # print(f"Reading {self.file_name} is supported")
             self.version["schema_name"] = self.supported_version["schema_name"]
             self.version["writer_name"] = self.supported_version["writer_name"]
             self.version["writer_version"] = self.supported_version["writer_version"]
-            # print(f"{self.version['schema_name']}, {self.supported_version['schema_version']}, {self.supported_version['writer_name']}, {self.supported_version['writer_version']}")
-        # else:
-            # print(f"Reading {self.file_name} is not supported!")
+
+    def parse_and_normalize(self):
+        """Read and normalize away Bruker-specific formatting with an equivalent in NXem."""
+        with h5py.File(f"{self.file_path}", "r") as h5r:
+            cache_id = 0
+            grp_names = list(h5r["/"])
+            for grp_name in grp_names:
+                if grp_name not in ["Version", "Manufacturer"]:
+                    self.prfx = f"/{grp_name}"
+                    ckey = self.init_named_cache(f"ebsd{cache_id}")
+                    self.parse_and_normalize_group_ebsd_header(h5r, ckey)
+                    self.parse_and_normalize_group_ebsd_phases(h5r, ckey)
+                    self.parse_and_normalize_group_ebsd_data(h5r, ckey)
+                    # add more information to pass to hfive parser
+                    cache_id += 1
+
+    def parse_and_normalize_group_ebsd_header(self, fp, ckey: str):
+        grp_name = f"{self.prfx}/EBSD/Header"
+        if f"{grp_name}/NCOLS" in fp:  # TODO::what is y and x depends on coordinate system
+            self.tmp[ckey]["n_x"] = fp[f"{grp_name}/NCOLS"][()]
+        else:
+            raise ValueError(f"Unable to parse {grp_name}/NCOLS !")
+
+        if f"{grp_name}/NROWS" in fp:
+            self.tmp[ckey]["n_y"] = fp[f"{grp_name}/NROWS"][()]
+        else:
+            raise ValueError(f"Unable to parse {grp_name}/NROWS !")
+
+        if f"{grp_name}/SEPixelSizeX" in fp:
+            self.tmp[ckey]["s_x"] = fp[f"{grp_name}/SEPixelSizeX"][()]
+            self.tmp[ckey]["s_unit"] = "µm"  # TODO::always micron?
+        else:
+            raise ValueError(f"Unable to parse {grp_name}/SEPixelSizeX !")
+
+        if f"{grp_name}/SEPixelSizeY" in fp:
+            self.tmp[ckey]["s_y"] = fp[f"{grp_name}/SEPixelSizeY"][()]
+        else:
+            raise ValueError(f"Unable to parse {grp_name}/SEPixelSizeY !")
+        # TODO::check that all data in the self.oina are consistent
+
+    def parse_and_normalize_group_ebsd_phases(self, fp, ckey: str):
+        grp_name = f"{self.prfx}/EBSD/Header"
+        # Phases, contains a subgroup for each phase where the name
+        # of each subgroup is the index of the phase starting at 1.
+        if f"{grp_name}/Phases" in fp:
+            phase_ids = sorted(list(fp[f"{grp_name}/Phases"]), key=int)
+            self.tmp[ckey]["phase"] = []
+            self.tmp[ckey]["space_group"] = []
+            self.tmp[ckey]["phases"] = {}
+            for phase_id in phase_ids:
+                if phase_id.isdigit() is True:
+                    self.tmp[ckey]["phases"][int(phase_id)] = {}
+                    sub_grp_name = f"/{grp_name}/Phases/{phase_id}"
+                    # Name
+                    if f"{sub_grp_name}/Name" in fp:
+                        phase_name = read_strings_from_dataset(fp[f"{sub_grp_name}/Name"][()])
+                        self.tmp[ckey]["phases"][int(phase_id)]["phase_name"] = phase_name
+                    else:
+                        raise ValueError(f"Unable to parse {sub_grp_name}/Name !")
+
+                    # Reference not available
+                    self.tmp[ckey]["phases"][int(phase_id)]["reference"] = "n/a"
+
+                    # LatticeConstants, a, b, c (angstrom) followed by alpha, beta and gamma angles in degree
+                    if f"{sub_grp_name}/LatticeConstants" in fp:
+                        values = np.asarray(fp[f"{sub_grp_name}/LatticeConstants"][:].flatten())
+                        a_b_c = values[0:3]
+                        angles = values[3:6]
+                        self.tmp[ckey]["phases"][int(phase_id)]["a_b_c"] \
+                            = a_b_c * 0.1
+                        self.tmp[ckey]["phases"][int(phase_id)]["alpha_beta_gamma"] \
+                            = angles
+                    else:
+                        raise ValueError(f"Unable to parse {sub_grp_name}/LatticeConstants !")
+
+                    # Space Group, no, H5T_NATIVE_INT32, (1, 1), Space group index.
+                    # The attribute Symbol contains the string representation, for example P m -3 m.
+                    if f"{sub_grp_name}/SpaceGroup" in fp:
+                        spc_grp  = read_strings_from_dataset(fp[f"{sub_grp_name}/SpaceGroup"][()])
+                        if spc_grp in BRUKER_MAP_SPACEGROUP.keys():
+                            space_group = BRUKER_MAP_SPACEGROUP[spc_grp]
+                            self.tmp[ckey]["phases"][int(phase_id)]["space_group"] = space_group
+                        else:
+                            raise ValueError(f"Unable to decode improperly formatted space group {spc_grp} !")
+                    else:
+                        raise ValueError(f"Unable to parse {sub_grp_name}/SpaceGroup !")
+                    # formatting is a nightmare F m#ovl3m for F m 3bar m...
+                    if len(self.tmp[ckey]["space_group"]) > 0:
+                        self.tmp[ckey]["space_group"].append(space_group)
+                    else:
+                        self.tmp[ckey]["space_group"] = [space_group]
+
+                    if len(self.tmp[ckey]["phase"]) > 0:
+                        self.tmp[ckey]["phase"].append(
+                            Structure(title=phase_name, atoms=None,
+                                      lattice=Lattice(a_b_c[0], a_b_c[1], a_b_c[2],
+                                      angles[0], angles[1], angles[2])))
+                    else:
+                        self.tmp[ckey]["phase"] \
+                            = [Structure(title=phase_name, atoms=None,
+                                         lattice=Lattice(a_b_c[0], a_b_c[1], a_b_c[2],
+                                         angles[0], angles[1], angles[2]))]
+        else:
+            raise ValueError(f"Unable to parse {grp_name}/Phases !")
+
+    def parse_and_normalize_group_ebsd_data(self, fp, ckey: str):
+        # no official documentation yet from Bruker but seems inspired by H5EBSD
+        grp_name = f"{self.prfx}/EBSD/Data"
+        print(f"Parsing {grp_name}")
+        # Euler, yes, H5T_NATIVE_FLOAT, (size, 3), Orientation of Crystal (CS2) to Sample-Surface (CS1).
+        n_pts = 0
+        if f"{grp_name}/phi1" in fp and f"{grp_name}/PHI" in fp and f"{grp_name}/phi2" in fp:
+            n_pts = (np.shape(fp[f"{grp_name}/phi1"][:])[0],
+                     np.shape(fp[f"{grp_name}/PHI"][:])[0],
+                     np.shape(fp[f"{grp_name}/phi2"][:])[0])
+            if all_equal(n_pts) is True and n_pts[0] > 0:
+                self.tmp[ckey]["euler"] = np.zeros((n_pts[0], 3), np.float32)
+                column_id = 0
+                for angle in ["phi1", "PHI", "phi2"]:
+                    self.tmp[ckey]["euler"][:, column_id] \
+                        = np.asarray(fp[f"{grp_name}/{angle}"][:], np.float32)
+                    column_id += 1
+                is_degrees = False
+                is_negative = False
+                for column_id in [0, 1, 2]:
+                    if np.max(np.abs(self.tmp[ckey]["euler"][:, column_id])) > 2. * np.pi:
+                        is_degrees = True
+                    if np.min(self.tmp[ckey]["euler"][:, column_id]) < 0.:
+                        is_negative = True
+                if is_degrees is True:
+                    self.tmp[ckey]["euler"] = self.tmp[ckey]["euler"] / 180. * np.pi
+                if is_negative is True:
+                    symmetrize = [2. * np.pi, np.pi, 2. * np.pi]
+                    # TODO::symmetry in Euler space really at PHI=180deg?
+                    for column_id in [0, 1, 2]:
+                        self.tmp[ckey]["euler"][:, column_id] \
+                            = self.tmp[ckey]["euler"][:, column_id] + symmetrize[column_id]
+                n_pts = n_pts[0]
+            # inconsistency f32 in file although specification states float
+                #Rotation.from_euler(euler=fp[f"{grp_name}/Euler"],
+                 #                                 direction='lab2crystal',
+                  #                                degrees=is_degrees)
+        else:
+            raise ValueError(f"Unable to parse {grp_name}/phi1, ../PHI, ../phi2 !")
+
+        # index of phase, 0 if not indexed
+        # # no normalization needed, also in NXem_ebsd the null model notIndexed is phase_identifier 0
+        if f"{grp_name}/Phase" in fp:
+            if np.shape(fp[f"{grp_name}/Phase"][:])[0] == n_pts:
+                self.tmp[ckey]["phase_id"] = np.asarray(fp[f"{grp_name}/Phase"][:], np.int32)
+            else:
+                raise ValueError(f"{grp_name}/Phase has unexpected shape !")
+        else:
+            raise ValueError(f"Unable to parse {grp_name}/Phase !")
+
+        # X
+        if f"{grp_name}/X SAMPLE" in fp:
+            if np.shape(fp[f"{grp_name}/X SAMPLE"][:])[0] == n_pts:
+                self.tmp[ckey]["scan_point_x"] \
+                    = np.asarray(fp[f"{grp_name}/X SAMPLE"][:], np.float32)
+            else:
+                raise ValueError(f"{grp_name}/X SAMPLE has unexpected shape !")
+        else:
+            raise ValueError(f"Unable to parse {grp_name}/X SAMPLE !")
+
+        # Y
+        if f"{grp_name}/Y SAMPLE" in fp:
+            if np.shape(fp[f"{grp_name}/Y SAMPLE"][:])[0] == n_pts:
+                self.tmp[ckey]["scan_point_y"] \
+                    = np.asarray(fp[f"{grp_name}/Y SAMPLE"], np.float32)
+            else:
+                raise ValueError(f"{grp_name}/Y SAMPLE has unexpected shape !")
+        else:
+            raise ValueError(f"Unable to parse {grp_name}/Y SAMPLE !")
+
+        # Band Contrast is not stored in Bruker but Radon Quality or MAD
+        # but this is s.th. different as it is the mean angular deviation between
+        # indexed with simulated and measured pattern
+        if f"{grp_name}/MAD" in fp:
+            if np.shape(fp[f"{grp_name}/MAD"][:])[0] == n_pts:
+                self.tmp[ckey]["mad"] = np.asarray(fp[f"{grp_name}/MAD"][:], np.float32)
+            else:
+                raise ValueError(f"{grp_name}/MAD has unexpected shape !")
+        else:
+            raise ValueError(f"Unable to parse {grp_name}/MAD !")
+
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py
index 3c82ecc08..8f59f54b6 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py
@@ -19,14 +19,24 @@
 
 import numpy as np
 import h5py
-from pynxtools.dataconverter.readers.em.subparsers.hfive import HdfFiveGenericReader
+from pynxtools.dataconverter.readers.em.subparsers.hfive_base import HdfFiveGenericReader
+from pynxtools.dataconverter.readers.em.utils.hfive_utils import read_strings_from_dataset
 
 
 class HdfFiveCommunityReader(HdfFiveGenericReader):
     """Read modified H5EBSD (likely from Britton group)"""
-    def __init__(self, file_name: str = ""):
-        super().__init__(file_name)
-        # this specialized reader implements reading capabilities for the following formats
+    def __init__(self, file_path: str = ""):
+        super().__init__(file_path)
+        self.prfx = None
+        self.tmp = {}
+        self.supported_version = {}
+        self.version = {}
+        self.init_support()
+        self.supported = False
+        self.check_if_supported()
+
+    def init_support(self):
+        """Init supported versions."""
         self.supported_version = {}
         self.version = {}
         self.supported_version["tech_partner"] = ["xcdskd"]
@@ -34,34 +44,26 @@ def __init__(self, file_name: str = ""):
         self.supported_version["schema_version"] = ["0.1"]
         self.supported_version["writer_name"] = ["not standardized"]
         self.supported_version["writer_version"] = ["0.1"]
-        self.supported = True
+
+    def check_if_supported(self):
         # check if instance to process matches any of these constraints
-        h5r = h5py.File(self.file_name, "r")
-        if "/Manufacturer" in h5r:
-            self.version["tech_partner"] \
-                = super().read_strings_from_dataset(h5r["/Manufacturer"][()])
-            # print(f"britton {self.version['tech_partner']}")
-            if self.version["tech_partner"] not in self.supported_version["tech_partner"]:
-                # print(f"{self.version['tech_partner']} is not {self.supported_version['tech_partner']} !")
+        with h5py.File(self.file_path, "r") as h5r:
+            if "/Manufacturer" in h5r:
+                self.version["tech_partner"] \
+                    = super().read_strings_from_dataset(h5r["/Manufacturer"][()])
+                if self.version["tech_partner"] not in self.supported_version["tech_partner"]:
+                    self.supported = False
+            else:
                 self.supported = False
-        else:
-            self.supported = False
-        if "/Version" in h5r:
-            self.version["schema_version"] \
-                = super().read_strings_from_dataset(h5r["/Version"][()])
-            # print(f"britton {self.version['schema_version']}")
-            if self.version["schema_version"] not in self.supported_version["schema_version"]:
-                # print(f"{self.version['schema_version']} is not any of {self.supported_version['schema_version']} !")
+            if "/Version" in h5r:
+                self.version["schema_version"] \
+                    = super().read_strings_from_dataset(h5r["/Version"][()])
+                if self.version["schema_version"] not in self.supported_version["schema_version"]:
+                    self.supported = False
+            else:
                 self.supported = False
-        else:
-            self.supported = False
-        h5r.close()
 
-        if self.supported is True:
-            # print(f"Reading {self.file_name} is supported")
-            self.version["schema_name"] = self.supported_version["schema_name"]
-            self.version["writer_name"] = self.supported_version["writer_name"]
-            self.version["writer_version"] = self.supported_version["writer_version"]
-            # print(f"{self.version['schema_name']}, {self.supported_version['schema_version']}, {self.supported_version['writer_name']}, {self.supported_version['writer_version']}")
-        # else:
-            # print(f"Reading {self.file_name} is not supported!")
+            if self.supported is True:
+                self.version["schema_name"] = self.supported_version["schema_name"]
+                self.version["writer_name"] = self.supported_version["writer_name"]
+                self.version["writer_version"] = self.supported_version["writer_version"]
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_edax.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_edax.py
index b4f3443a5..babcbe6af 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_edax.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_edax.py
@@ -19,16 +19,24 @@
 
 import numpy as np
 import h5py
-from pynxtools.dataconverter.readers.em.subparsers.hfive import HdfFiveGenericReader
+from pynxtools.dataconverter.readers.em.subparsers.hfive_base import HdfFiveBaseParser
+from pynxtools.dataconverter.readers.em.utils.hfive_utils import read_strings_from_dataset
 
 
-class HdfFiveEdaxOimAnalysisReader(HdfFiveGenericReader):
+class HdfFiveEdaxOimAnalysisReader(HdfFiveBaseParser):
     """Read EDAX (O)H5"""
-    def __init__(self, file_name: str = ""):
-        super().__init__(file_name)
-        # this specialized reader implements reading capabilities for the following formats
+    def __init__(self, file_path: str = ""):
+        super().__init__(file_path)
+        self.prfx = None
+        self.tmp = {}
         self.supported_version = {}
         self.version = {}
+        self.init_support()
+        self.supported = False
+        self.check_if_supported()
+
+    def init_support(self):
+        """Init supported versions."""
         self.supported_version["tech_partner"] = ["EDAX"]
         self.supported_version["schema_name"] = ["H5"]
         self.supported_version["schema_version"] \
@@ -36,35 +44,28 @@ def __init__(self, file_name: str = ""):
         self.supported_version["writer_name"] = ["OIM Analysis"]
         self.supported_version["writer_version"] \
             = ["OIM Analysis 8.6.0050 x64 [18 Oct 2021]", "OIM Analysis 8.5.1002 x64 [07-17-20]"]
-        self.supported = True
-        # check if instance to process matches any of these constraints
-        h5r = h5py.File(self.file_name, "r")
-        if "/Manufacturer" in h5r:
-            self.version["tech_partner"] \
-                = super().read_strings_from_dataset(h5r["/Manufacturer"][()])
-            # print(self.version["tech_partner"])
-            # for 8.6.0050 but for 8.5.1002 it is a matrix, this is because how strings end up in HDF5 allowed for so much flexibility!
-            if self.version["tech_partner"] not in self.supported_version["tech_partner"]:
-                # print(f"{self.version['tech_partner']} is not {self.supported_version['tech_partner']} !")
+
+    def check_if_supported(self):
+        """Check if instance matches all constraints to qualify as old EDAX"""
+        self.supported = False
+        with h5py.File(self.file_path, "r") as h5r:
+            if "/Manufacturer" in h5r:
+                self.version["tech_partner"] \
+                    = super().read_strings_from_dataset(h5r["/Manufacturer"][()])
+                # for 8.6.0050 but for 8.5.1002 it is a matrix, this is because how strings end up in HDF5 allowed for so much flexibility!
+                if self.version["tech_partner"] not in self.supported_version["tech_partner"]:
+                    self.supported = False
+            else:
                 self.supported = False
-        else:
-            self.supported = False
-        if "/Version" in h5r:
-            self.version["schema_version"] \
-                = super().read_strings_from_dataset(h5r["/Version"][()])
-            # print(self.version["schema_version"])
-            if self.version["schema_version"] not in self.supported_version["schema_version"]:
-                # print(f"{self.version['schema_version']} is not any of {self.supported_version['schema_version']} !")
+            if "/Version" in h5r:
+                self.version["schema_version"] \
+                    = super().read_strings_from_dataset(h5r["/Version"][()])
+                if self.version["schema_version"] not in self.supported_version["schema_version"]:
+                    self.supported = False
+            else:
                 self.supported = False
-        else:
-            self.supported = False
-        h5r.close()
 
-        if self.supported is True:
-            # print(f"Reading {self.file_name} is supported")
-            self.version["schema_name"] = self.supported_version["schema_name"]
-            self.version["writer_name"] = self.supported_version["writer_name"]
-            self.version["writer_version"] = self.supported_version["writer_version"]
-            # print(f"{self.version['schema_name']}, {self.supported_version['schema_version']}, {self.supported_version['writer_name']}, {self.supported_version['writer_version']}")
-        # else:
-            # print(f"Reading {self.file_name} is not supported!")
+            if self.supported is True:
+                self.version["schema_name"] = self.supported_version["schema_name"]
+                self.version["writer_name"] = self.supported_version["writer_name"]
+                self.version["writer_version"] = self.supported_version["writer_version"]
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_emsoft.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_emsoft.py
index 4ec2b3695..1b96073bb 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_emsoft.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_emsoft.py
@@ -19,32 +19,38 @@
 
 import numpy as np
 import h5py
-from pynxtools.dataconverter.readers.em.subparsers.hfive import HdfFiveGenericReader
+from pynxtools.dataconverter.readers.em.subparsers.hfive_base import HdfFiveGenericReader
+from pynxtools.dataconverter.readers.em.utils.hfive_utils import read_strings_from_dataset
 
 
 class HdfFiveEmSoftReader(HdfFiveGenericReader):
     """Read EMsoft H5 (Marc deGraeff Carnegie Mellon)"""
-    def __init__(self, file_name: str = ""):
-        super().__init__(file_name)
-        # this specialized reader implements reading capabilities for the following formats
+    def __init__(self, file_path: str = ""):
+        super().__init__(file_path)
+        self.prfx = None
+        self.tmp = {}
         self.supported_version = {}
         self.version = {}
+        self.init_support()
+        self.supported = False
+        self.check_if_supported()
+
+    def init_support(self):
+        """Init supported versions."""
         self.supported_version["tech_partner"] = ["EMsoft"]
         self.supported_version["schema_name"] = ["EMsoft"]
         self.supported_version["schema_version"] = ["EMsoft"]
         self.supported_version["writer_name"] = ["EMsoft"]
         self.supported_version["writer_version"] = ["EMsoft"]
+
+    def check_if_supported(self):
+        """Check if instance matches all constraints to EMsoft"""
         self.supported = True
-        # check if instance to process matches any of these constraints
-        h5r = h5py.File(self.file_name, "r")
-        required_groups = ["CrystalData", "EMData", "EMheader", "NMLfiles", "NMLparameters"]
-        for required_group in required_groups:
-            if f"/{required_group}" not in h5r:
-                self.supported = False
-        h5r.close()
-        if self.supported is True:
-            # print(f"Reading {self.file_name} is supported")
-            self.version = self.supported_version.copy()
-            # print(f"{self.version['schema_name']}, {self.supported_version['schema_version']}, {self.supported_version['writer_name']}, {self.supported_version['writer_version']}")
-        # else:
-            # print(f"Reading {self.file_name} is not supported!")
+        with h5py.File(self.file_path, "r") as h5r:
+            req_groups = ["CrystalData", "EMData", "EMheader", "NMLfiles", "NMLparameters"]
+            for req_group in req_groups:
+                if f"/{req_group}" not in h5r:
+                    self.supported = False
+
+            if self.supported is True:
+                self.version = self.supported_version.copy()
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_oxford.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_oxford.py
index f2080b6e5..b3bfc101f 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_oxford.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_oxford.py
@@ -19,10 +19,9 @@
 
 import os
 from typing import Dict, Any, List
-
 import numpy as np
 import h5py
-
+from itertools import groupby
 # import imageio.v3 as iio
 from PIL import Image as pil
 
@@ -36,236 +35,233 @@
 
 import matplotlib.pyplot as plt
 
-from pynxtools.dataconverter.readers.em.subparsers.hfive import \
-    HdfFiveGenericReader, read_strings_from_dataset
-from pynxtools.dataconverter.readers.em.subparsers.pyxem_processor import PyxemProcessor
+from pynxtools.dataconverter.readers.em.subparsers.hfive_base import HdfFiveBaseParser
+from pynxtools.dataconverter.readers.em.utils.hfive_utils import read_strings_from_dataset
 
 
-class HdfFiveOinaReader(HdfFiveGenericReader):
-    """Read h5oina"""
-    def __init__(self, file_name: str = ""):
-        super().__init__(file_name)
+class HdfFiveOxfordReader(HdfFiveBaseParser):
+    """Overwrite constructor of hfive_base reader"""
+    def __init__(self, file_path: str = ""):
+        super().__init__(file_path)
         # this specialized reader implements reading capabilities for the following formats
+        self.prfx = None  # path handling
+        self.tmp = {}  # local cache in which normalized data are stored
+        # that are once fully populated passed to the base class process_roi* functions
+        # which perform plotting and data processing functionalities
+        # this design effectively avoids that different specialized hfive readers need to
+        # duplicate the code of the base hfive parser for generating NeXus default plots
         self.supported_version = {}
         self.version = {}
+        self.init_support()
+        self.supported = False
+        self.check_if_supported()
+
+    def init_support(self):
+        """Init supported versions."""
         self.supported_version["tech_partner"] = ["Oxford Instruments"]
         self.supported_version["schema_name"] = ["H5OINA"]
         self.supported_version["schema_version"] = ["2.0", "3.0", "4.0", "5.0"]
         self.supported_version["writer_name"] = ["AZTec"]
         self.supported_version["writer_version"] \
             = ["4.4.7495.1", "5.0.7643.1", "5.1.7829.1", "6.0.8014.1", "6.0.8196.1"]
-        self.supported = True
-        # check if instance matches all constraints to qualify as that supported h5oina
-        h5r = h5py.File(self.file_name, "r")
-        if "/Manufacturer" in h5r:
-            self.version["tech_partner"] \
-                = super().read_strings_from_dataset(h5r["/Manufacturer"][()])
-            if self.version["tech_partner"] not in self.supported_version["tech_partner"]:
-                # print(f"{self.version['tech_partner']} is not {self.version['tech_partner']} !")
+
+    def check_if_supported(self):
+        """Check if instance matches all constraints to qualify as supported H5OINA"""
+        self.supported = True  # try to falsify
+        with h5py.File(self.file_path, "r") as h5r:
+            if "/Manufacturer" in h5r:
+                self.version["tech_partner"] \
+                    = read_strings_from_dataset(h5r["/Manufacturer"][()])
+                if self.version["tech_partner"] not in self.supported_version["tech_partner"]:
+                    # print(f"{self.version['tech_partner']} is not {self.version['tech_partner']} !")
+                    self.supported = False
+            else:
                 self.supported = False
-        else:
-            self.supported = False
-        # only because we know (thanks to Philippe Pinard who wrote the H5OINA writer) that different
-        # writer versions should implement the different HDF version correctly we can lift the
-        # constraint on the writer_version for which we had examples available
-        if "/Software Version" in h5r:
-            self.version["writer_version"] \
-                = super().read_strings_from_dataset(h5r["/Software Version"][()])
-            if self.version["writer_version"] not in self.supported_version["writer_version"]:
-                # print(f"{self.version['writer_version']} is not any of {self.supported_version['writer_version']} !")
+            # only because we know (thanks to Philippe Pinard who wrote the H5OINA writer) that different
+            # writer versions should implement the different HDF version correctly we can lift the
+            # constraint on the writer_version for which we had examples available
+            if "/Software Version" in h5r:
+                self.version["writer_version"] \
+                    = read_strings_from_dataset(h5r["/Software Version"][()])
+                if self.version["writer_version"] not in self.supported_version["writer_version"]:
+                    self.supported = False
+            else:
                 self.supported = False
-        else:
-            self.supported = False
-        if "/Format Version" in h5r:
-            self.version["schema_version"] \
-                = super().read_strings_from_dataset(h5r["/Format Version"][()])
-            if self.version["schema_version"] not in self.supported_version["schema_version"]:
-                # print(f"{self.version['schema_version']} is not any of {self.supported_version['schema_version']} !")
+            if "/Format Version" in h5r:
+                self.version["schema_version"] \
+                    = read_strings_from_dataset(h5r["/Format Version"][()])
+                if self.version["schema_version"] not in self.supported_version["schema_version"]:
+                    self.supported = False
+            else:
                 self.supported = False
-        else:
-            self.supported = False
-        h5r.close()
 
         if self.supported is True:
-            # print(f"Reading {self.file_name} is supported")
             self.version["schema_name"] = self.supported_version["schema_name"]
             self.version["writer_name"] = self.supported_version["writer_name"]
-            # print(f"{self.version['schema_name']}, {self.supported_version['schema_version']}, {self.supported_version['writer_name']}, {self.supported_version['writer_version']}")
-        # else:
-            # print(f"Reading {self.file_name} is not supported!")
-
-    def parse(self, template: dict, entry_id=1) -> dict:
-        """Parse NeXus-relevant (meta)data from an H5OINA file."""
-        print(f"Parsing with sub-parser {__class__.__name__}, " \
-              f"file: {self.file_name}, entry_id: {entry_id}")
-        # find how many slices there are
-        with h5py.File(f"{self.file_name}", "r") as h5r:
-            entries = sorted(list(h5r["/"]), key=int)
-            for entry in entries:
-                if entry.isdigit() is True: # non-negative integer
-                    if entry == "1":
-                        self.slice = {}
-                        self.parse_and_normalize_slice(h5r, int(entry))
-                        # at this point all Oxford jargon is ironed out and the
-                        # call is the same irrespective of the tech partner
-                        # that was used to take the orientation maps
-                        pyx = PyxemProcessor(entry_id)
-                        pyx.process_roi_overview(template)
-                        pyx.process_roi_xmap(template)
-                        pyx.process_roi_phases(template)
-                        pyx.process_roi_inverse_pole_figures(template)
-        return template
 
-    def parse_and_normalize_slice(fp, slice_id: int):
-        """Read and normalize away Oxford-specific formatting of data in specific slice."""
-        self.parse_and_normalize_slice_ebsd_data(fp, slice_id)
-        self.parse_and_normalize_slice_ebsd_header(fp, slice_id)
+    def parse_and_normalize(self):
+        """Read and normalize away Oxford-specific formatting with an equivalent in NXem."""
+        with h5py.File(f"{self.file_path}", "r") as h5r:
+            cache_id = 0
+            slice_ids = sorted(list(h5r["/"]))
+            for slice_id in slice_ids:
+                if slice_id.isdigit() is True and slice_id == "1":
+                    # non-negative int, parse for now only the 1. slice
+                    self.prfx = f"/{slice_id}"
+                    ckey = self.init_named_cache(f"ebsd{cache_id}")  # name of the cache to use
+                    self.parse_and_normalize_slice_ebsd_header(h5r, ckey)
+                    self.parse_and_normalize_slice_ebsd_phases(h5r, ckey)
+                    self.parse_and_normalize_slice_ebsd_data(h5r, ckey)
+                    # add more information to pass to hfive parser
+                    cache_id += 1
 
-    def parse_and_normalize_slice_ebsd_data(fp, slice_id: int):
-        # https://github.com/oinanoanalysis/h5oina/blob/master/H5OINAFile.md
-        group_name = f"/{slice_id}/EBSD/Data"
-        self.slice["slice_id"] = slice_id
-        print(f"Parsing {group_name}, {self.slice['slice_id']}")
-        # Euler, yes, H5T_NATIVE_FLOAT, (size, 3), Orientation of Crystal (CS2) to Sample-Surface (CS1).
-        if f"{group_name}/Euler" in fp:
-            is_degrees = False
-            if read_strings_from_dataset(fp[f"{group_name}/Euler"].attrs["Unit"]) == "rad":
-                is_degrees = False
-            self.slice["rotation"] = Rotation.from_euler(euler=fp[f"{group_name}/Euler"],
-                                                         direction='lab2crystal',
-                                                         degrees=is_degrees)
-        else:
-            raise ValueError(f"Unable to parse Euler !")
-
-        # Phase, yes, H5T_NATIVE_INT32, (size, 1), Index of phase, 0 if not indexed
-        # no normalization needed, also in NXem_ebsd the null model notIndexed is phase_identifier 0
-        if f"{group_name}/Phase" in fp:
-            self.slice["phase_id"] = np.asarray(fp[f"{group_name}/Phase"], np.int32)
+    def parse_and_normalize_slice_ebsd_header(self, fp, ckey: str):
+        # X Cells, yes, H5T_NATIVE_INT32, (1, 1), Map: Width in pixels, Line scan: Length in pixels.
+        grp_name = f"{self.prfx}/EBSD/Header"
+        if f"{grp_name}/X Cells" in fp:
+            self.tmp[ckey]["n_x"] = fp[f"{grp_name}/X Cells"][0]
         else:
-            raise ValueError(f"Unable to parse Phase !")
-
-        # X, no, H5T_NATIVE_FLOAT, (size, 1), X position of each pixel in micrometers (origin: top left corner)
-        if f"{group_name}/X" in fp:
-            self.slice["scan_point_x"] = np.asarray(fp[f"{group_name}/X"], np.float32)
+            raise ValueError(f"Unable to parse {grp_name}/X Cells !")
+        # Y Cells, yes, H5T_NATIVE_INT32, (1, 1), Map: Height in pixels. Line scan: Always set to 1.
+        if f"{grp_name}/Y Cells" in fp:
+            self.tmp[ckey]["n_y"] = fp[f"{grp_name}/Y Cells"][0]
         else:
-            raise ValueError(f"Unable to parse pixel position X !")
-
-        # Y, no, H5T_NATIVE_FLOAT, (size, 1), Y position of each pixel in micrometers (origin: top left corner)
-        if f"{group_name}/Y" in fp:
-            self.slice["scan_point_y"] = np.asarray(fp[f"{group_name}/Y"], np.float32)
-            # TODO::inconsistent float vs f32
+            raise ValueError(f"Unable to parse{grp_name}/Y Cells !")
+        # X Step, yes, H5T_NATIVE_FLOAT, (1, 1), Map: Step size along x-axis in micrometers. Line scan: step size along the line scan in micrometers.
+        if f"{grp_name}/X Step" in fp:
+            if read_strings_from_dataset(fp[f"{grp_name}/X Step"].attrs["Unit"]) == "um":
+                self.tmp[ckey]["s_x"] = fp[f"{grp_name}/X Step"][0]
+                self.tmp[ckey]["s_unit"] = "µm"
+            else:
+                raise ValueError(f"Unexpected X Step Unit attribute !")
         else:
-            raise ValueError(f"Unable to parse pixel position Y !")
-
-        # Band Contrast, no, H5T_NATIVE_INT32, (size, 1)
-        if f"{group_name}/Band Contrast" in fp:
-            self.slice["band_contrast"] = np.asarray(fp[f"{group_name}/Band Contrast"], np.uint8)
-            # TODO::inconsistent int32 vs uint8
+            raise ValueError(f"Unable to parse {grp_name}/X Step !")
+        # Y Step, yes, H5T_NATIVE_FLOAT, (1, 1), Map: Step size along y-axis in micrometers. Line scan: Always set to 0.
+        if f"{grp_name}/Y Step" in fp:
+            if read_strings_from_dataset(fp[f"{grp_name}/Y Step"].attrs["Unit"]) == "um":
+                self.tmp[ckey]["s_y"] = fp[f"{grp_name}/Y Step"][0]
+            else:
+                raise ValueError(f"Unexpected Y Step Unit attribute !")
         else:
-            raise ValueError(f"Unable to band contrast !")
-
-        # TODO::processed patterns
+            raise ValueError(f"Unable to parse {grp_name}/Y Step !")
+        # TODO::check that all data in the self.oina are consistent
 
-    def parse_and_normalize_slice_ebsd_header(fp, slice_id: int):
+    def parse_and_normalize_slice_ebsd_phases(self, fp, ckey: str):
         """Parse EBSD header section for specific slice."""
-        group_name = f"/{slice_id}/EBSD/Header"
-        # Phases, yes, Contains a subgroup for each phase where the name of each subgroup is the index of the phase starting at 1.
-        if f"{group_name}/Phases" in fp:
-            phase_ids = sorted(list(fp[f"{group_name}/Phases"]), key=int)
-            self.slice["phase"] = []
-            self.slice["space_group"] = []
-            self.slice["phases"] = {}
+        grp_name = f"{self.prfx}/EBSD/Header"
+        # Phases, yes, Contains a subgroup for each phase where the name
+        # of each subgroup is the index of the phase starting at 1.
+        if f"{grp_name}/Phases" in fp:
+            phase_ids = sorted(list(fp[f"{grp_name}/Phases"]), key=int)
+            self.tmp[ckey]["phase"] = []
+            self.tmp[ckey]["space_group"] = []
+            self.tmp[ckey]["phases"] = {}
             for phase_id in phase_ids:
                 if phase_id.isdigit() is True:
-                    self.slice["phases"][int(phase_id)] = {}
-                    sub_group_name = f"/{slice_id}/EBSD/Header/Phases"
+                    self.tmp[ckey]["phases"][int(phase_id)] = {}
+                    sub_grp_name = f"/{grp_name}/Phases/{phase_id}"
                     # Phase Name, yes, H5T_STRING, (1, 1)
-                    if f"{sub_group_name}/Phase Name" in fp:
-                        phase_name = read_strings_from_dataset(fp[f"{sub_group_name}/Phase Name"][()])
-                        self.slice["phases"][int(phase_id)]["phase_name"] = phase_name
+                    if f"{sub_grp_name}/Phase Name" in fp:
+                        phase_name = read_strings_from_dataset(fp[f"{sub_grp_name}/Phase Name"][()])
+                        self.tmp[ckey]["phases"][int(phase_id)]["phase_name"] = phase_name
                     else:
-                        raise ValueError("Unable to parse Phase Name !")
+                        raise ValueError(f"Unable to parse {sub_grp_name}/Phase Name !")
 
                     # Reference, yes, H5T_STRING, (1, 1), Changed in version 2.0 to mandatory
-                    if f"{sub_group_name}/Reference" in fp:
-                        self.slice["phases"][int(phase_id)]["reference"] \
-                            = read_strings_from_dataset(fp[f"{sub_group_name}/Reference"][()])
+                    if f"{sub_grp_name}/Reference" in fp:
+                        self.tmp[ckey]["phases"][int(phase_id)]["reference"] \
+                            = read_strings_from_dataset(fp[f"{sub_grp_name}/Reference"][()])
                     else:
-                        raise ValueError("Unable to parse Reference !")
+                        raise ValueError(f"Unable to parse {sub_grp_name}/Reference !")
 
                     # Lattice Angles, yes, H5T_NATIVE_FLOAT, (1, 3), Three columns for the alpha, beta and gamma angles in radians
-                    if f"{sub_group_name}/Lattice Angles" in fp:
+                    if f"{sub_grp_name}/Lattice Angles" in fp:
                         is_degrees = False
-                        if read_strings_from_dataset(fp[f"{sub_group_name}/Lattice Angles"].attrs["Unit"]) == "rad":
+                        if read_strings_from_dataset(fp[f"{sub_grp_name}/Lattice Angles"].attrs["Unit"]) == "rad":
                             is_degrees = False
-                        angles = np.asarray(fp[f"{sub_group_name}/Lattice Angles"][:].flatten()) / np.pi * 180.
-                        self.slice["phases"][int(phase_id)]["alpha_beta_gamma"] \
+                        angles = np.asarray(fp[f"{sub_grp_name}/Lattice Angles"][:].flatten()) / np.pi * 180.
+                        self.tmp[ckey]["phases"][int(phase_id)]["alpha_beta_gamma"] \
                             = angles
                     else:
-                        raise ValueError("Unable to parse Lattice Angles !")
+                        raise ValueError(f"Unable to parse {sub_grp_name}/Lattice Angles !")
 
                     # Lattice Dimensions, yes, H5T_NATIVE_FLOAT, (1, 3), Three columns for a, b and c dimensions in Angstroms
-                    if f"{sub_group_name}/Lattice Dimensions" in fp:
+                    if f"{sub_grp_name}/Lattice Dimensions" in fp:
                         is_nanometer = False
-                        if read_strings_from_dataset(fp[f"{sub_group_name}/Lattice Dimensions"].attrs["Unit"]) == "angstrom":
+                        if read_strings_from_dataset(fp[f"{sub_grp_name}/Lattice Dimensions"].attrs["Unit"]) == "angstrom":
                             is_nanometer = False
-                        a_b_c = np.asarray(fp[f"{sub_group_name}/Lattice Dimensions"][:].flatten()) * 0.1
-                        self.slice["phases"][int(phase_id)]["a_b_c"] = a_b_c
+                        a_b_c = np.asarray(fp[f"{sub_grp_name}/Lattice Dimensions"][:].flatten()) * 0.1
+                        self.tmp[ckey]["phases"][int(phase_id)]["a_b_c"] = a_b_c
                     else:
-                        raise ValueError("Unable to parse Lattice Dimensions !")
+                        raise ValueError(f"Unable to parse {sub_grp_name}/Lattice Dimensions !")
 
                     # Space Group, no, H5T_NATIVE_INT32, (1, 1), Space group index.
                     # The attribute Symbol contains the string representation, for example P m -3 m.
-                    if f"{sub_group_name}/Space Group" in fp:
-                        space_group = int(fp[f"{sub_group_name}/Space Group"][0])
-                        self.slice["phases"][int(phase_id)]["space_group"] = space_group
+                    if f"{sub_grp_name}/Space Group" in fp:
+                        space_group = int(fp[f"{sub_grp_name}/Space Group"][0])
+                        self.tmp[ckey]["phases"][int(phase_id)]["space_group"] = space_group
                     else:
-                        raise ValueError("Unable to parse Space Group !")
-                    if len(self.slice["space_group"]) > 0:
-                        self.slice["space_group"].append(space_group)
+                        raise ValueError(f"Unable to parse {sub_grp_name}/Space Group !")
+                    if len(self.tmp[ckey]["space_group"]) > 0:
+                        self.tmp[ckey]["space_group"].append(space_group)
                     else:
-                        self.slice["space_group"] = [space_group]
+                        self.tmp[ckey]["space_group"] = [space_group]
 
-                    if len(self.slice["phase"]) > 0:
-                        Structure(title=phase_name, atoms=None,
-                                  lattice=Lattice(a_b_c[0], a_b_c[1], a_b_c[2],
-                                  angles[0], angles[1], angles[2]))
+                    if len(self.tmp[ckey]["phase"]) > 0:
+                        self.tmp[ckey]["phase"].append(
+                            Structure(title=phase_name, atoms=None,
+                                      lattice=Lattice(a_b_c[0], a_b_c[1], a_b_c[2],
+                                      angles[0], angles[1], angles[2])))
                     else:
-                        self.slice["phase"] \
+                        self.tmp[ckey]["phase"] \
                             = [Structure(title=phase_name, atoms=None,
                                          lattice=Lattice(a_b_c[0], a_b_c[1], a_b_c[2],
                                          angles[0], angles[1], angles[2]))]
         else:
-            raise ValueError("Unable to parse Phases !")
+            raise ValueError(f"Unable to parse {grp_name}/Phases !")
 
-        # X Cells, yes, H5T_NATIVE_INT32, (1, 1), Map: Width in pixels, Line scan: Length in pixels.
-        if f"{group_name}/X Cell" in fp:
-            self.slice["n_x"] = fp[f"{group_name}/X Cells"][0]
+    def parse_and_normalize_slice_ebsd_data(self, fp, ckey: str):
+        # https://github.com/oinanoanalysis/h5oina/blob/master/H5OINAFile.md
+        grp_name = f"{self.prfx}/EBSD/Data"
+        print(f"Parsing {grp_name}")
+        # Euler, yes, H5T_NATIVE_FLOAT, (size, 3), Orientation of Crystal (CS2) to Sample-Surface (CS1).
+        if f"{grp_name}/Euler" in fp:
+            is_degrees = False
+            if read_strings_from_dataset(fp[f"{grp_name}/Euler"].attrs["Unit"]) == "rad":
+                is_degrees = False
+            self.tmp[ckey]["euler"] = np.asarray(fp[f"{grp_name}/Euler"], np.float32)
+            # inconsistency f32 in file although specification states float
+                #Rotation.from_euler(euler=fp[f"{grp_name}/Euler"],
+                 #                                 direction='lab2crystal',
+                  #                                degrees=is_degrees)
         else:
-            raise ValueError("Unable to parse X Cells !")
-        # Y Cells, yes, H5T_NATIVE_INT32, (1, 1), Map: Height in pixels. Line scan: Always set to 1.
-        if f"{group_name}/Y Cell" in fp:
-            self.slice["n_x"] = fp[f"{group_name}/Y Cells"][0]
+            raise ValueError(f"Unable to parse {grp_name}/Euler !")
+
+        # Phase, yes, H5T_NATIVE_INT32, (size, 1), Index of phase, 0 if not indexed
+        # no normalization needed, also in NXem_ebsd the null model notIndexed is phase_identifier 0
+        if f"{grp_name}/Phase" in fp:
+            self.tmp[ckey]["phase_id"] = np.asarray(fp[f"{grp_name}/Phase"], np.int32)
         else:
-            raise ValueError("Unable to parse Y Cells !")
-        # X Step, yes, H5T_NATIVE_FLOAT, (1, 1), Map: Step size along x-axis in micrometers. Line scan: step size along the line scan in micrometers.
-        if f"{group_name}/X Step" in fp:
-            if read_strings_from_dataset(fp[f"{group_name}/X Step"].attrs["Unit"]) == "um":
-                self.slice["s_x"] = fp[f"{group_name}/X Step"][0]
-                self.slice["s_unit"] = "µm"
-            else:
-                raise ValueError("Unexpected X Step Unit attribute !")
+            raise ValueError(f"Unable to parse {grp_name}/Phase !")
+
+        # X, no, H5T_NATIVE_FLOAT, (size, 1), X position of each pixel in micrometers (origin: top left corner)
+        if f"{grp_name}/X" in fp:
+            self.tmp[ckey]["scan_point_x"] = np.asarray(fp[f"{grp_name}/X"], np.float32)
+            # inconsistency f32 in file although specification states float
         else:
-            raise ValueError("Unable to parse X Step !")
-        # Y Step, yes, H5T_NATIVE_FLOAT, (1, 1), Map: Step size along y-axis in micrometers. Line scan: Always set to 0.
-        if f"{group_name}/Y Step" in fp:
-            if read_strings_from_dataset(fp[f"{group_name}/Y Step"].attrs["Unit"]) == "um":
-                self.slice["s_y"] = fp[f"{group_name}/Y Step"][0]
-            else:
-                raise ValueError("Unexpected Y Step Unit attribute !")
+            raise ValueError(f"Unable to parse {grp_name}/X !")
+
+        # Y, no, H5T_NATIVE_FLOAT, (size, 1), Y position of each pixel in micrometers (origin: top left corner)
+        if f"{grp_name}/Y" in fp:
+            self.tmp[ckey]["scan_point_y"] = np.asarray(fp[f"{grp_name}/Y"], np.float32)
+            # inconsistency f32 in file although specification states float
         else:
-            raise ValueError("Unable to parse Y Step !")
-        # TODO::check that all data in the self.oina are consistent
+            raise ValueError(f"Unable to parse {grp_name}/Y !")
 
-        for key, val in self.slice.items():
-            print(f"{key}, type: {type(val)}, shape: {np.shape(val)}")
+        # Band Contrast, no, H5T_NATIVE_INT32, (size, 1)
+        if f"{grp_name}/Band Contrast" in fp:
+            self.tmp[ckey]["band_contrast"] = np.asarray(fp[f"{grp_name}/Band Contrast"], np.int32)
+            # inconsistency uint8 in file although specification states should be int32
+            # promoting uint8 to int32 no problem
+        else:
+            raise ValueError(f"Unable to parse {grp_name}/Band Contrast !")
diff --git a/pynxtools/dataconverter/readers/em/subparsers/nxs_hfive.py b/pynxtools/dataconverter/readers/em/subparsers/nxs_hfive.py
new file mode 100644
index 000000000..dde1c10ee
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/subparsers/nxs_hfive.py
@@ -0,0 +1,234 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""HDF5 base parser to inherit from for tech-partner-specific HDF5 subparsers."""
+
+# the base parser implements the processing of standardized orientation maps via
+# the pyxem software package from the electron microscopy community
+# specifically so-called NeXus default plots are generated to add RDMS-relevant
+# information to the NeXus file which supports scientists with judging the potential
+# value of the dataset in the context of them using research data management systems (RDMS)
+# in effect this parser is the partner of the MTex parser for all those file formats
+# which are HDF5 based and which (at the time of working on this example Q3/Q4 2023)
+# where not supported my MTex
+# with offering this parser we also would like to embrace and acknowledge the efforts
+# of other electron microscopists (like the pyxem team, hyperspy etc.) and their work
+# towards software tools which are complementary to the MTex texture toolbox
+# one could have also implemented the HDF5 parsing inside MTex but we leave this as a
+# task for the community and instead focus here on showing a more diverse example
+# towards more interoperability between the different tools in the community
+
+import os, glob, re, sys
+from typing import Dict, Any, List
+import numpy as np
+import h5py
+import yaml, json
+# import imageio.v3 as iio
+from PIL import Image as pil
+
+import diffsims
+import orix
+from diffpy.structure import Lattice, Structure
+from orix import plot
+from orix.crystal_map import create_coordinate_arrays, CrystalMap, PhaseList
+from orix.quaternion import Rotation
+from orix.vector import Vector3d
+
+import matplotlib.pyplot as plt
+
+from pynxtools.dataconverter.readers.em.utils.hfive_utils import read_strings_from_dataset
+from pynxtools.dataconverter.readers.em.utils.hfive_web_constants import HFIVE_WEB_MAXIMUM_RGB
+
+from pynxtools.dataconverter.readers.em.subparsers.hfive_oxford import HdfFiveOxfordReader
+from pynxtools.dataconverter.readers.em.subparsers.hfive_bruker import HdfFiveBrukerEspritReader
+# from pynxtools.dataconverter.readers.em.subparsers.hfive_edax import HdfFiveEdaxOimAnalysisReader
+from pynxtools.dataconverter.readers.em.subparsers.hfive_apex import HdfFiveEdaxApexReader
+# from pynxtools.dataconverter.readers.em.subparsers.hfive_emsoft import HdfFiveEmSoftReader
+# from pynxtools.dataconverter.readers.em.subparsers.hfive_ebsd import HdfFiveCommunityReader
+
+
+class NxEmNxsHfiveSubParser:
+    """Map content from different type of *.h5 files on an instance of NXem."""
+
+    def __init__(self, entry_id: int = 1, input_file_name: str = ""):
+        """Overwrite constructor of the generic reader."""
+        if entry_id > 0:
+            self.entry_id = entry_id
+        else:
+            self.entry_id = 1
+        self.file_path = input_file_name
+        self.cache = {"is_filled": False}
+        self.xmap = None
+
+    def parse(self, template: dict) -> dict:
+        hfive_parser_type = self.identify_hfive_type()
+        if hfive_parser_type is None:
+            print(f"{self.file_path} does not match any of the supported HDF5 formats")
+            return template
+        print(f"Parsing via {hfive_parser_type}...")
+
+        # ##MK::current implementation pulls all entries into the template
+        # before writing them out, this might not fit into main memory
+        # copying over all data and content within tech partner files into NeXus makes
+        # not much sense as the data exists and we would like to motivate that
+        # tech partners and community members write NeXus content directly
+        # therefore currently in this example we carry over the EBSD map and some
+        # metadata to motivate that there is indeed value wrt to interoperability
+        # when such data are harmonized exactly this is the point we would like to
+        # make with this example for NeXus and NOMAD OASIS within the FAIRmat project
+        # it is practically beyond our resources to implement a mapping for all cases
+        # and corner cases of the vendor files
+        # ideally concept mapping would be applied to just point to pieces of information
+        # in the HDF5 file that is written by the tech partners however because of the
+        # fact that currently these pieces of information are formatted very differently
+        # it is non-trivial to establish this mapping and only because of this we
+        # map over manually
+        if hfive_parser_type == "oxford":
+            oina = HdfFiveOxfordReader(self.file_path)
+            oina.parse_and_normalize()
+            self.process_into_template(oina.tmp, template)
+        elif hfive_parser_type == "bruker":
+            bruker = HdfFiveBrukerEspritReader(self.file_path)
+            bruker.parse_and_normalize()
+            self.process_into_template(bruker.tmp, template)
+        elif hfive_parser_type == "apex":
+            apex = HdfFiveEdaxApexReader(self.file_path)
+            apex.parse_and_normalize()
+            self.process_into_template(apex.tmp, template)
+        """
+        elif hfive_parser_type == "edax":
+            with h5py.File(f"{self.file_path}", "r") as h5r:
+                grp_nms = list(h5r["/"])
+                for grp_nm in grp_nms:
+                    if grp_nm not in ["Version", "Manufacturer"]:
+                        edax_oim = HdfFiveEdaxOimAnalysisReader(self.file_path)
+                        edax_oim.parse_and_normalize_group(
+                            h5r,
+                            f"/{grp_nm}",
+                            self.cache_ebsd)
+        elif hfive_parser_type == "hebsd":
+            with h5py.File(f"{self.file_path}", "r") as h5r:
+                grp_nms = list(h5r["/"])
+                for grp_nm in grp_nms:
+                    if grp_nm not in ["Version", "Manufacturer"]:
+                        edax_oim = HdfFiveCommunityReader(self.file_path)
+                        edax_oim.parse_and_normalize_group(
+                            h5r,
+                            f"/{grp_nm}",
+                            self.cache_ebsd)
+        elif hfive_parser_type == "emsoft":
+            return template
+        else:  # none or something unsupported
+            return template
+
+        for key, val in self.cache_ebsd.items():
+            print(f"{key}, type: {type(val)}, shape: {np.shape(val)}")
+
+        if self.cache["is_filled"] is True:
+            self.process_roi_overview(template)
+            self.process_roi_xmap(template)
+            self.process_roi_phases(template)
+            self.process_roi_inverse_pole_figures(template)
+        """
+        return template
+
+    def identify_hfive_type(self):
+        """Identify if HDF5 file matches a known format for which a subparser exists."""
+        hdf = HdfFiveOxfordReader(f"{self.file_path}")
+        if hdf.supported is True:
+            return "oxford"
+        # hdf = HdfFiveEdaxOimAnalysisReader(f"{self.file_path}")
+        # if hdf.supported is True:
+        #     return "edax"
+        hdf = HdfFiveEdaxApexReader(f"{self.file_path}")
+        if hdf.supported is True:
+            return "apex"
+        hdf = HdfFiveBrukerEspritReader(f"{self.file_path}")
+        if hdf.supported is True:
+            return "bruker"
+        # hdf = HdfFiveEmSoftReader(f"{self.file_path}")
+        # if hdf.supported is True:
+        #     return "emsoft"
+        # hdf = HdfFiveCommunityReader(f"{self.file_path}")
+        # if hdf.supported is True:
+        #     return "hebsd"
+        return None
+
+    def process_into_template(self, inp: dict, template: dict) -> dict:
+        for key, val in inp.items():
+            if isinstance(val, dict):
+                for ckey, cval in val.items():
+                    print(f"{ckey}, {cval}")
+            else:
+                print(f"{key}, {val}")
+        return template
+        # super().process_ebsd_cache(self.tmp, template)
+        # return template
+
+    def process_roi_overview(inp: dict, template: dict) -> dict:
+        return template
+
+    def process_roi_xmap(inp: dict) -> dict:
+        """Process standardized IPF orientation map using pyxem from normalized orientation data."""
+        # for NeXus would like to create a default
+        '''
+        if np.max(inp["n_x"], inp["n_y"]) < HFIVE_WEB_MAXIMUM_RGB:
+            # can use the map discretization as is
+            coordinates, _ = create_coordinate_arrays(
+                (inp["n_x"], inp["n_y"]), (inp["s_x"], inp["s_y"]))
+            xaxis = coordinates["x"]
+            yaxis = coordinates["y"]
+            del coordinates
+        # else:
+            # need to regrid to downsample too large maps
+            # TODO::implement 1NN-based downsampling approach
+            #       build grid
+            #       tree-based 1NN
+            #       proceed as usual
+
+        pyxem_phase_identifier = inp["phase_identifier"] \
+            - (np.min(inp["phase_identifier"]) - (-1))  # pyxem, non-indexed has to be -1
+        print(np.unique(pyxem_phase_identifier))
+
+        self.xmap = CrystalMap(rotations=inp["rotation"],
+                               x=self.xaxis, y=self.yaxis,
+                               phase_id=pyxem_phase_identifier,
+                               phase_list=PhaseList(space_groups=inp["space_group"],
+                                                    structures=inp["phase"]),
+                               prop={"bc": inp["band_contrast"]},
+                               scan_unit=inp["s_unit"])
+        print(self.xmap)
+        '''
+
+    def process_roi_phases(self, template: dict) -> dict:
+        return template
+
+    def process_roi_inverse_pole_figures(self, template: dict) -> dict:
+        """Parse inverse pole figures (IPF) mappings."""
+        # call process_roi_ipf_map
+        # call process_roi_ipf_color_key
+        return template
+
+    def process_roi_ipf_map(self, identifier, template: dict) -> dict:
+        """Parse and create inverse-pole-figure (IPF) mappings on their color models."""
+        # +1 because for orix not_indexed -1 and "first" phase has ID 0 !
+        return template
+
+    def process_roi_ipf_color_key(self, identifier, template: dict) -> dict:
+        """Parse color key renderings of inverse-pole-figure (IPF) mappings."""
+        # +1 because for orix not_indexed -1 and "first" phase has ID 0 !
+        return template
diff --git a/pynxtools/dataconverter/readers/em/subparsers/pyxem_processor.py b/pynxtools/dataconverter/readers/em/subparsers/pyxem_processor.py
deleted file mode 100644
index e8ee7d8b4..000000000
--- a/pynxtools/dataconverter/readers/em/subparsers/pyxem_processor.py
+++ /dev/null
@@ -1,99 +0,0 @@
-#
-# Copyright The NOMAD Authors.
-#
-# This file is part of NOMAD. See https://nomad-lab.eu for further info.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-"""Process standardized orientation map using pyxem from normalized orientation data."""
-
-import os
-from typing import Dict, Any, List
-
-import numpy as np
-import h5py
-
-# import imageio.v3 as iio
-from PIL import Image as pil
-
-import diffsims
-import orix
-from diffpy.structure import Lattice, Structure
-from orix import plot
-from orix.crystal_map import create_coordinate_arrays, CrystalMap, PhaseList
-from orix.quaternion import Rotation
-from orix.vector import Vector3d
-
-import matplotlib.pyplot as plt
-
-from pynxtools.dataconverter.readers.em.utils.hfive_web_constants \
-    import HFIVE_WEB_MAXIMUM_RGB
-
-
-class PyxemProcessor:
-    def __init__(self, entry_id: int):
-        self.entry_id = entry_id
-        self.xmap = None
-        pass
-
-    def process_roi_overview(inp: dict, template: dict) -> dict:
-        pass
-
-    def process_roi_xmap(inp: dict) -> dict:
-        """Process standardized IPF orientation map using pyxem from normalized orientation data."""
-        # for NeXus would like to create a default
-        if np.max(inp["n_x"], inp["n_y"]) < HFIVE_WEB_MAXIMUM_RGB:
-            # can use the map discretization as is
-            coordinates, _ = create_coordinate_arrays(
-                (inp["n_x"], inp["n_y"]), (inp["s_x"], inp["s_y"]))
-            xaxis = coordinates["x"]
-            yaxis = coordinates["y"]
-            del coordinates
-        # else:
-            # need to regrid to downsample too large maps
-            # TODO::implement 1NN-based downsampling approach
-            #       build grid
-            #       tree-based 1NN
-            #       proceed as usual
-
-        pyxem_phase_identifier = inp["phase_identifier"] \
-            - (np.min(inp["phase_identifier"]) - (-1))  # pyxem, non-indexed has to be -1
-        print(np.unique(pyxem_phase_identifier))
-
-        self.xmap = CrystalMap(rotations=inp["rotation"],
-                               x=self.xaxis, y=self.yaxis,
-                               phase_id=pyxem_phase_identifier,
-                               phase_list=PhaseList(space_groups=inp["space_group"],
-                                                    structures=inp["phase"]),
-                               prop={"bc": inp["band_contrast"]},
-                               scan_unit=inp["s_unit"])
-        print(self.xmap)
-
-    def process_roi_phases(self, template: dict) -> dict:
-        pass
-
-    def process_roi_inverse_pole_figures(self, template: dict) -> dict:
-        """Parse inverse pole figures (IPF) mappings."""
-        # call process_roi_ipf_map
-        # call process_roi_ipf_color_key
-        return template
-
-    def process_roi_ipf_map(self, identifier, template: dict) -> dict:
-        """Parse and create inverse-pole-figure (IPF) mappings on their color models."""
-        # +1 because for orix not_indexed -1 and "first" phase has ID 0 !
-        return template
-
-    def process_roi_ipf_color_key(self, identifier, template: dict) -> dict:
-        """Parse color key renderings of inverse-pole-figure (IPF) mappings."""
-        # +1 because for orix not_indexed -1 and "first" phase has ID 0 !
-        return template
diff --git a/pynxtools/dataconverter/readers/em/utils/hfive_utils.py b/pynxtools/dataconverter/readers/em/utils/hfive_utils.py
new file mode 100644
index 000000000..639d73421
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/utils/hfive_utils.py
@@ -0,0 +1,55 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Utility functions when working with parsing HDF5."""
+
+import numpy as np
+import os, glob, re, sys
+import h5py
+import yaml
+import json
+
+
+def read_strings_from_dataset(obj):
+    # print(f"type {type(obj)}, np.shape {np.shape(obj)}, obj {obj}")
+    # if hasattr(obj, "dtype"):
+    #     print(obj.dtype)
+    if isinstance(obj, np.ndarray):
+        retval = []
+        for entry in obj:
+            if isinstance(entry, bytes):
+                retval.append(entry.decode("utf-8"))
+            elif isinstance(entry, str):
+                retval.append(entry)
+            else:
+                continue
+                # raise ValueError("Neither bytes nor str inside np.ndarray!")
+        # specific implementation rule that all lists with a single string
+        # will be returned in paraprobe as a scalar string
+        if len(retval) > 1:
+            return retval
+        elif len(retval) == 1:
+            return retval[0]
+        else:
+            return None
+    elif isinstance(obj, bytes):
+        return obj.decode("utf8")
+    elif isinstance(obj, str):
+        return obj
+    else:
+        return None
+        # raise ValueError("Neither np.ndarray, nor bytes, nor str !")

From 407609644edcd296f7e788fd8a818c29c4fe625c Mon Sep 17 00:00:00 2001
From: "markus.kuehbach" <markus.kuehbach@hu-berlin.de>
Date: Mon, 23 Oct 2023 12:43:12 +0200
Subject: [PATCH 13/84] Bug fixing, implementation for normalization old edax,
 community hebsd, and tested

---
 .../readers/em/subparsers/hfive_apex.py       | 110 +++----
 .../readers/em/subparsers/hfive_bruker.py     | 301 ++++++++----------
 .../readers/em/subparsers/hfive_ebsd.py       | 205 +++++++++++-
 .../readers/em/subparsers/hfive_edax.py       | 179 ++++++++++-
 .../readers/em/subparsers/hfive_oxford.py     | 271 ++++++++--------
 .../readers/em/subparsers/nxs_hfive.py        |  46 +--
 .../readers/em/utils/hfive_utils.py           |  31 ++
 7 files changed, 709 insertions(+), 434 deletions(-)

diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
index 0e15a7eb5..2c838c14c 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
@@ -34,11 +34,8 @@
 import matplotlib.pyplot as plt
 
 from pynxtools.dataconverter.readers.em.subparsers.hfive_base import HdfFiveBaseParser
-from pynxtools.dataconverter.readers.em.utils.hfive_utils import read_strings_from_dataset
-
-
-def om_eu(inp):
-    return inp[0:2]
+from pynxtools.dataconverter.readers.em.utils.hfive_utils import \
+    read_strings_from_dataset, format_euler_parameterization
 
 
 class HdfFiveEdaxApexReader(HdfFiveBaseParser):
@@ -63,21 +60,22 @@ def init_support(self):
 
     def check_if_supported(self):
         """Check if instance matches all constraints to qualify as supported H5OINA"""
-        self.supported = True  # try to falsify
+        self.supported = 0  # voting-based
         with h5py.File(self.file_path, "r") as h5r:
             # parse Company and PRODUCT_VERSION attribute values from the first group below / but these are not scalar but single value lists
             # so much about interoperability
             # but hehe for the APEX example from Sebastian and Sabine there is again no Company but PRODUCT_VERSION, 2 files, 2 "formats"
             grp_names = list(h5r["/"])
             if len(grp_names) == 1:
-                if read_strings_from_dataset(h5r[grp_names[0]].attrs["Company"][0]) \
-                    not in self.supported_version["tech_partner"]:
-                    self.supported = False
-                if read_strings_from_dataset(h5r[grp_names[0]].attrs["PRODUCT_VERSION"][0]) \
-                    not in self.supported_version["schema_version"]:
-                    self.supported = False
-            if self.supported is True:
+                if read_strings_from_dataset(h5r[grp_names[0]].attrs["Company"][0]) in self.supported_version["tech_partner"]:
+                    self.supported += 1
+                if read_strings_from_dataset(h5r[grp_names[0]].attrs["PRODUCT_VERSION"][0]) in self.supported_version["schema_version"]:
+                    self.supported += 1
+            if self.supported == 2:
                 self.version = self.supported_version.copy()
+                self.supported = True
+            else:
+                self.supported = False
 
     def parse_and_normalize(self):
         """Read and normalize away EDAX/APEX-specific formatting with an equivalent in NXem."""
@@ -119,13 +117,15 @@ def parse_and_normalize_group_ebsd_header(self, fp, ckey: str):
                 raise ValueError(f"Unable to parse {self.prfx}/Sample/{req_field} !")
 
         grid_type = read_strings_from_dataset(fp[f"{self.prfx}/Sample/Grid Type"][()])
-        if grid_type != "HexGrid":
+        if grid_type not in ["HexGrid", "SqrGrid"]:
             raise ValueError(f"Grid Type {grid_type} is currently not supported !")
+        self.tmp[ckey]["grid_type"] = grid_type
         self.tmp[ckey]["s_x"] = fp[f"{self.prfx}/Sample/Step X"][0]
         self.tmp[ckey]["s_unit"] = "µm"  # TODO::always micron?
         self.tmp[ckey]["n_x"] = fp[f"{self.prfx}/Sample/Number Of Columns"][0]
         self.tmp[ckey]["s_y"] = fp[f"{self.prfx}/Sample/Step Y"][0]
         self.tmp[ckey]["n_y"] = fp[f"{self.prfx}/Sample/Number Of Rows"][0]
+        # TODO::check that all data are consistent
 
     def parse_and_normalize_group_ebsd_phases(self, fp, ckey: str):
         grp_name = f"{self.prfx}/EBSD/ANG/HEADER/Phase"
@@ -192,44 +192,46 @@ def parse_and_normalize_group_ebsd_phases(self, fp, ckey: str):
 
     def parse_and_normalize_group_ebsd_data(self, fp, ckey: str):
         grp_name = f"{self.prfx}/EBSD/ANG/DATA/DATA"
-        n_pts = self.tmp[ckey]["n_x"] * self.tmp[ckey]["n_y"]
-        if f"{grp_name}" in fp:
-            if np.shape(fp[f"{grp_name}"]) != (n_pts,) and n_pts > 0:
-                raise ValueError(f"Unexpected shape of {grp_name} !")
-
-            dat = fp[f"{grp_name}"]
-            self.tmp[ckey]["euler"] = np.zeros((n_pts, 3), np.float32)
-            # index of phase, 0 if not indexed
-            # # no normalization needed, also in NXem_ebsd the null model notIndexed is phase_identifier 0
-            self.tmp[ckey]["phase_id"] = np.zeros((n_pts,), np.int32)
-            self.tmp[ckey]["ci"] = np.zeros((n_pts,), np.float32)
-
-            for i in np.arange(0, n_pts):
-                # check shape of internal virtual chunked number array
-                r = Rotation.from_matrix([np.reshape(dat[i][0], (3, 3))])
-                self.tmp[ckey]["euler"][i, :] = r.to_euler(degrees=False)
-                self.tmp[ckey]["phase_id"][i] = dat[i][2]
-                self.tmp[ckey]["ci"][i] = dat[i][3]
-
-            # TODO::convert orientation matrix to Euler angles via om_eu but what are conventions !
-            # orix based transformation ends up in positive half space and with degrees=False
-            # as radiants but the from_matrix command above might miss one rotation
-
-            # inconsistency f32 in file although specification states float
-            # Rotation.from_euler(euler=fp[f"{grp_name}/Euler"],
-            #                                 direction='lab2crystal',
-            #                                degrees=is_degrees)
-
-            # compute explicit hexagon grid cells center of mass pixel positions
-            # TODO::currently assuming HexGrid
-            self.tmp[ckey]["scan_point_x"] = np.asarray(
-                np.linspace(0, self.tmp[ckey]["n_x"] - 1,
-                            num=self.tmp[ckey]["n_x"],
-                            endpoint=True) * self.tmp[ckey]["s_x"] + 0., np.float32)
-
-            self.tmp[ckey]["scan_point_y"] = np.asarray(
-                np.linspace(0, self.tmp[ckey]["n_y"] - 1,
-                            num=self.tmp[ckey]["n_y"],
-                            endpoint=True) * self.tmp[ckey]["s_y"] + 0., np.float32)
-        else:
+        if f"{grp_name}" not in fp:
             raise ValueError(f"Unable to parse {grp_name} !")
+
+        n_pts = self.tmp[ckey]["n_x"] * self.tmp[ckey]["n_y"]
+        if np.shape(fp[f"{grp_name}"]) != (n_pts,) and n_pts > 0:
+            raise ValueError(f"Unexpected shape of {grp_name} !")
+
+        dat = fp[f"{grp_name}"]
+        self.tmp[ckey]["euler"] = np.zeros((n_pts, 3), np.float32)
+        # index of phase, 0 if not indexed
+        # # no normalization needed, also in NXem_ebsd the null model notIndexed is phase_identifier 0
+        self.tmp[ckey]["phase_id"] = np.zeros((n_pts,), np.int32)
+        self.tmp[ckey]["ci"] = np.zeros((n_pts,), np.float32)
+
+        for i in np.arange(0, n_pts):
+            # check shape of internal virtual chunked number array
+            r = Rotation.from_matrix([np.reshape(dat[i][0], (3, 3))])
+            self.tmp[ckey]["euler"][i, :] = r.to_euler(degrees=False)
+            self.tmp[ckey]["phase_id"][i] = dat[i][2]
+            self.tmp[ckey]["ci"][i] = dat[i][3]
+
+        # TODO::convert orientation matrix to Euler angles via om_eu but what are conventions !
+        # orix based transformation ends up in positive half space and with degrees=False
+        # as radiants but the from_matrix command above might miss one rotation
+
+        # inconsistency f32 in file although specification states float
+        # Rotation.from_euler(euler=fp[f"{grp_name}/Euler"],
+        #                                 direction='lab2crystal',
+        #                                degrees=is_degrees)
+
+        # compute explicit hexagon grid cells center of mass pixel positions
+        # TODO::currently assuming s_x and s_y are already the correct center of mass
+        # distances for hexagonal or square tiling of R^2
+        # self.tmp[ckey]["grid_type"] in ["HexGrid", "SqrGrid"]:
+        self.tmp[ckey]["scan_point_x"] = np.asarray(
+            np.linspace(0, self.tmp[ckey]["n_x"] - 1,
+                        num=self.tmp[ckey]["n_x"],
+                        endpoint=True) * self.tmp[ckey]["s_x"] + 0., np.float32)
+
+        self.tmp[ckey]["scan_point_y"] = np.asarray(
+            np.linspace(0, self.tmp[ckey]["n_y"] - 1,
+                        num=self.tmp[ckey]["n_y"],
+                        endpoint=True) * self.tmp[ckey]["s_y"] + 0., np.float32)
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py
index 06d7a1027..c1f02a775 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py
@@ -36,14 +36,8 @@
 import matplotlib.pyplot as plt
 
 from pynxtools.dataconverter.readers.em.subparsers.hfive_base import HdfFiveBaseParser
-from pynxtools.dataconverter.readers.em.utils.hfive_utils import read_strings_from_dataset
-
-
-def all_equal(iterable):
-    g = groupby(iterable)
-    return next(g, True) and not next(g, False)
-
-BRUKER_MAP_SPACEGROUP = {"F m#ovl3m": 225}
+from pynxtools.dataconverter.readers.em.utils.hfive_utils import EBSD_MAP_SPACEGROUP, \
+    read_strings_from_dataset, all_equal, format_euler_parameterization
 
 
 class HdfFiveBrukerEspritReader(HdfFiveBaseParser):
@@ -68,27 +62,28 @@ def init_support(self):
 
     def check_if_supported(self):
         """Check if instance matches all constraints to qualify as supported Bruker H5"""
-        self.supported = True  # try to falsify
+        self.supported = 0  # voting-based
         with h5py.File(self.file_path, "r") as h5r:
-            if "/Manufacturer" in h5r:
-                self.version["tech_partner"] \
-                    = read_strings_from_dataset(h5r["/Manufacturer"][()])
-                if self.version["tech_partner"] not in self.supported_version["tech_partner"]:
+            req_fields = ["Manufacturer", "Version"]
+            for req_field in req_fields:
+                if f"/{req_field}" not in h5r:
                     self.supported = False
+                    return
+
+            self.version["tech_partner"] = read_strings_from_dataset(h5r["/Manufacturer"][()])
+            if self.version["tech_partner"] in self.supported_version["tech_partner"]:
+                self.supported += 1
+            self.version["schema_version"] = read_strings_from_dataset(h5r["/Version"][()])
+            if self.version["schema_version"] in self.supported_version["schema_version"]:
+                self.supported += 1
+
+            if self.supported == 2:
+                self.version["schema_name"] = self.supported_version["schema_name"]
+                self.version["writer_name"] = self.supported_version["writer_name"]
+                self.version["writer_version"] = self.supported_version["writer_version"]
+                self.supported = True
             else:
                 self.supported = False
-            if "/Version" in h5r:
-                self.version["schema_version"] \
-                    = read_strings_from_dataset(h5r["/Version"][()])
-                if self.version["schema_version"] not in self.supported_version["schema_version"]:
-                    self.supported = False
-            else:
-                self.supported = False
-
-        if self.supported is True:
-            self.version["schema_name"] = self.supported_version["schema_name"]
-            self.version["writer_name"] = self.supported_version["writer_name"]
-            self.version["writer_version"] = self.supported_version["writer_version"]
 
     def parse_and_normalize(self):
         """Read and normalize away Bruker-specific formatting with an equivalent in NXem."""
@@ -107,171 +102,135 @@ def parse_and_normalize(self):
 
     def parse_and_normalize_group_ebsd_header(self, fp, ckey: str):
         grp_name = f"{self.prfx}/EBSD/Header"
-        if f"{grp_name}/NCOLS" in fp:  # TODO::what is y and x depends on coordinate system
-            self.tmp[ckey]["n_x"] = fp[f"{grp_name}/NCOLS"][()]
-        else:
-            raise ValueError(f"Unable to parse {grp_name}/NCOLS !")
-
-        if f"{grp_name}/NROWS" in fp:
-            self.tmp[ckey]["n_y"] = fp[f"{grp_name}/NROWS"][()]
-        else:
-            raise ValueError(f"Unable to parse {grp_name}/NROWS !")
-
-        if f"{grp_name}/SEPixelSizeX" in fp:
-            self.tmp[ckey]["s_x"] = fp[f"{grp_name}/SEPixelSizeX"][()]
-            self.tmp[ckey]["s_unit"] = "µm"  # TODO::always micron?
-        else:
-            raise ValueError(f"Unable to parse {grp_name}/SEPixelSizeX !")
-
-        if f"{grp_name}/SEPixelSizeY" in fp:
-            self.tmp[ckey]["s_y"] = fp[f"{grp_name}/SEPixelSizeY"][()]
-        else:
-            raise ValueError(f"Unable to parse {grp_name}/SEPixelSizeY !")
-        # TODO::check that all data in the self.oina are consistent
+        if f"{grp_name}" not in fp:
+            raise ValueError(f"Unable to parse {grp_name} !")
+
+        req_fields = ["NCOLS", "NROWS", "SEPixelSizeX", "SEPixelSizeY"]
+        for req_field in req_fields:
+            if f"{grp_name}/{req_field}" not in fp:
+                raise ValueError(f"Unable to parse {grp_name}/{req_field} !")
+
+        self.tmp[ckey]["n_x"] = fp[f"{grp_name}/NCOLS"][()]
+        self.tmp[ckey]["n_y"] = fp[f"{grp_name}/NROWS"][()]
+        self.tmp[ckey]["s_x"] = fp[f"{grp_name}/SEPixelSizeX"][()]
+        self.tmp[ckey]["s_unit"] = "µm"  # TODO::always micron?
+        self.tmp[ckey]["s_y"] = fp[f"{grp_name}/SEPixelSizeY"][()]
+        # TODO::check that all data are consistent
+        # TODO::what is y and x depends on coordinate system
 
     def parse_and_normalize_group_ebsd_phases(self, fp, ckey: str):
-        grp_name = f"{self.prfx}/EBSD/Header"
+        grp_name = f"{self.prfx}/EBSD/Header/Phases"
+        if f"{grp_name}" not in fp:
+            raise ValueError(f"Unable parse {grp_name} !")
+
         # Phases, contains a subgroup for each phase where the name
         # of each subgroup is the index of the phase starting at 1.
-        if f"{grp_name}/Phases" in fp:
-            phase_ids = sorted(list(fp[f"{grp_name}/Phases"]), key=int)
-            self.tmp[ckey]["phase"] = []
-            self.tmp[ckey]["space_group"] = []
-            self.tmp[ckey]["phases"] = {}
-            for phase_id in phase_ids:
-                if phase_id.isdigit() is True:
-                    self.tmp[ckey]["phases"][int(phase_id)] = {}
-                    sub_grp_name = f"/{grp_name}/Phases/{phase_id}"
-                    # Name
-                    if f"{sub_grp_name}/Name" in fp:
-                        phase_name = read_strings_from_dataset(fp[f"{sub_grp_name}/Name"][()])
-                        self.tmp[ckey]["phases"][int(phase_id)]["phase_name"] = phase_name
-                    else:
-                        raise ValueError(f"Unable to parse {sub_grp_name}/Name !")
-
-                    # Reference not available
-                    self.tmp[ckey]["phases"][int(phase_id)]["reference"] = "n/a"
-
-                    # LatticeConstants, a, b, c (angstrom) followed by alpha, beta and gamma angles in degree
-                    if f"{sub_grp_name}/LatticeConstants" in fp:
-                        values = np.asarray(fp[f"{sub_grp_name}/LatticeConstants"][:].flatten())
-                        a_b_c = values[0:3]
-                        angles = values[3:6]
-                        self.tmp[ckey]["phases"][int(phase_id)]["a_b_c"] \
-                            = a_b_c * 0.1
-                        self.tmp[ckey]["phases"][int(phase_id)]["alpha_beta_gamma"] \
-                            = angles
-                    else:
-                        raise ValueError(f"Unable to parse {sub_grp_name}/LatticeConstants !")
-
-                    # Space Group, no, H5T_NATIVE_INT32, (1, 1), Space group index.
-                    # The attribute Symbol contains the string representation, for example P m -3 m.
-                    if f"{sub_grp_name}/SpaceGroup" in fp:
-                        spc_grp  = read_strings_from_dataset(fp[f"{sub_grp_name}/SpaceGroup"][()])
-                        if spc_grp in BRUKER_MAP_SPACEGROUP.keys():
-                            space_group = BRUKER_MAP_SPACEGROUP[spc_grp]
-                            self.tmp[ckey]["phases"][int(phase_id)]["space_group"] = space_group
-                        else:
-                            raise ValueError(f"Unable to decode improperly formatted space group {spc_grp} !")
-                    else:
-                        raise ValueError(f"Unable to parse {sub_grp_name}/SpaceGroup !")
-                    # formatting is a nightmare F m#ovl3m for F m 3bar m...
-                    if len(self.tmp[ckey]["space_group"]) > 0:
-                        self.tmp[ckey]["space_group"].append(space_group)
-                    else:
-                        self.tmp[ckey]["space_group"] = [space_group]
-
-                    if len(self.tmp[ckey]["phase"]) > 0:
-                        self.tmp[ckey]["phase"].append(
-                            Structure(title=phase_name, atoms=None,
-                                      lattice=Lattice(a_b_c[0], a_b_c[1], a_b_c[2],
-                                      angles[0], angles[1], angles[2])))
-                    else:
-                        self.tmp[ckey]["phase"] \
-                            = [Structure(title=phase_name, atoms=None,
-                                         lattice=Lattice(a_b_c[0], a_b_c[1], a_b_c[2],
-                                         angles[0], angles[1], angles[2]))]
-        else:
-            raise ValueError(f"Unable to parse {grp_name}/Phases !")
+        phase_ids = sorted(list(fp[f"{grp_name}"]), key=int)
+        self.tmp[ckey]["phase"] = []
+        self.tmp[ckey]["space_group"] = []
+        self.tmp[ckey]["phases"] = {}
+        for phase_id in phase_ids:
+            if phase_id.isdigit() is True:
+                self.tmp[ckey]["phases"][int(phase_id)] = {}
+                sub_grp_name = f"/{grp_name}/{phase_id}"
+                req_fields = ["Name", "LatticeConstants", "SpaceGroup"]
+                for req_field in req_fields:
+                    if f"{sub_grp_name}/{req_field}" not in fp:
+                        raise ValueError(f"Unable to parse {sub_grp_name}/{req_field} !")
+                # Name
+                phase_name = read_strings_from_dataset(fp[f"{sub_grp_name}/Name"][()])
+                self.tmp[ckey]["phases"][int(phase_id)]["phase_name"] = phase_name
+
+                # Reference not available
+                self.tmp[ckey]["phases"][int(phase_id)]["reference"] = "n/a"
+
+                # LatticeConstants, a, b, c (angstrom) followed by alpha, beta and gamma angles in degree
+                values = np.asarray(fp[f"{sub_grp_name}/LatticeConstants"][:].flatten())
+                a_b_c = values[0:3]
+                angles = values[3:6]
+                self.tmp[ckey]["phases"][int(phase_id)]["a_b_c"] = a_b_c * 0.1
+                self.tmp[ckey]["phases"][int(phase_id)]["alpha_beta_gamma"] = angles
+
+                # Space Group, no, H5T_NATIVE_INT32, (1, 1), Space group index.
+                # The attribute Symbol contains the string representation, for example P m -3 m.
+                spc_grp  = read_strings_from_dataset(fp[f"{sub_grp_name}/SpaceGroup"][()])
+                if spc_grp in EBSD_MAP_SPACEGROUP.keys():
+                    space_group = EBSD_MAP_SPACEGROUP[spc_grp]
+                    self.tmp[ckey]["phases"][int(phase_id)]["space_group"] = space_group
+                else:
+                    raise ValueError(f"Unable to decode improperly formatted space group {spc_grp} !")
+
+                # formatting is a nightmare F m#ovl3m for F m 3bar m...
+                if len(self.tmp[ckey]["space_group"]) > 0:
+                    self.tmp[ckey]["space_group"].append(space_group)
+                else:
+                    self.tmp[ckey]["space_group"] = [space_group]
+
+                if len(self.tmp[ckey]["phase"]) > 0:
+                    self.tmp[ckey]["phase"].append(
+                        Structure(title=phase_name, atoms=None,
+                                  lattice=Lattice(a_b_c[0], a_b_c[1], a_b_c[2],
+                                  angles[0], angles[1], angles[2])))
+                else:
+                    self.tmp[ckey]["phase"] \
+                        = [Structure(title=phase_name, atoms=None,
+                                     lattice=Lattice(a_b_c[0], a_b_c[1], a_b_c[2],
+                                     angles[0], angles[1], angles[2]))]
 
     def parse_and_normalize_group_ebsd_data(self, fp, ckey: str):
         # no official documentation yet from Bruker but seems inspired by H5EBSD
         grp_name = f"{self.prfx}/EBSD/Data"
-        print(f"Parsing {grp_name}")
-        # Euler, yes, H5T_NATIVE_FLOAT, (size, 3), Orientation of Crystal (CS2) to Sample-Surface (CS1).
-        n_pts = 0
-        if f"{grp_name}/phi1" in fp and f"{grp_name}/PHI" in fp and f"{grp_name}/phi2" in fp:
-            n_pts = (np.shape(fp[f"{grp_name}/phi1"][:])[0],
-                     np.shape(fp[f"{grp_name}/PHI"][:])[0],
-                     np.shape(fp[f"{grp_name}/phi2"][:])[0])
-            if all_equal(n_pts) is True and n_pts[0] > 0:
-                self.tmp[ckey]["euler"] = np.zeros((n_pts[0], 3), np.float32)
-                column_id = 0
-                for angle in ["phi1", "PHI", "phi2"]:
-                    self.tmp[ckey]["euler"][:, column_id] \
-                        = np.asarray(fp[f"{grp_name}/{angle}"][:], np.float32)
-                    column_id += 1
-                is_degrees = False
-                is_negative = False
-                for column_id in [0, 1, 2]:
-                    if np.max(np.abs(self.tmp[ckey]["euler"][:, column_id])) > 2. * np.pi:
-                        is_degrees = True
-                    if np.min(self.tmp[ckey]["euler"][:, column_id]) < 0.:
-                        is_negative = True
-                if is_degrees is True:
-                    self.tmp[ckey]["euler"] = self.tmp[ckey]["euler"] / 180. * np.pi
-                if is_negative is True:
-                    symmetrize = [2. * np.pi, np.pi, 2. * np.pi]
-                    # TODO::symmetry in Euler space really at PHI=180deg?
-                    for column_id in [0, 1, 2]:
-                        self.tmp[ckey]["euler"][:, column_id] \
-                            = self.tmp[ckey]["euler"][:, column_id] + symmetrize[column_id]
-                n_pts = n_pts[0]
-            # inconsistency f32 in file although specification states float
-                #Rotation.from_euler(euler=fp[f"{grp_name}/Euler"],
-                 #                                 direction='lab2crystal',
-                  #                                degrees=is_degrees)
-        else:
-            raise ValueError(f"Unable to parse {grp_name}/phi1, ../PHI, ../phi2 !")
+        if f"{grp_name}" not in fp:
+            raise ValueError(f"Unable to parse {grp_name} !")
+
+        req_fields = ["phi1", "PHI", "phi2", "Phase", "X SAMPLE", "Y SAMPLE", "MAD"]
+        for req_field in req_fields:
+            if f"{grp_name}/{req_field}" not in fp:
+                raise ValueError(f"Unable to parse {grp_name}/{req_field} !")
+
+        # Euler
+        n_pts = (np.shape(fp[f"{grp_name}/phi1"][:])[0],
+                 np.shape(fp[f"{grp_name}/PHI"][:])[0],
+                 np.shape(fp[f"{grp_name}/phi2"][:])[0])
+        if all_equal(n_pts) is True and n_pts[0] == (self.tmp[ckey]["n_x"] * self.tmp[ckey]["n_y"]):
+            self.tmp[ckey]["euler"] = np.zeros((n_pts[0], 3), np.float32)
+            column_id = 0
+            for angle in ["phi1", "PHI", "phi2"]:
+                self.tmp[ckey]["euler"][:, column_id] \
+                    = np.asarray(fp[f"{grp_name}/{angle}"][:], np.float32)
+                column_id += 1
+            self.tmp[ckey]["euler"] = format_euler_parameterization(self.tmp[ckey]["euler"])
+            n_pts = n_pts[0]
+        # inconsistency f32 in file although specification states float
+        # Rotation.from_euler(euler=fp[f"{grp_name}/Euler"],
+        #                                 direction='lab2crystal',
+        #                                degrees=is_degrees)
 
         # index of phase, 0 if not indexed
-        # # no normalization needed, also in NXem_ebsd the null model notIndexed is phase_identifier 0
-        if f"{grp_name}/Phase" in fp:
-            if np.shape(fp[f"{grp_name}/Phase"][:])[0] == n_pts:
-                self.tmp[ckey]["phase_id"] = np.asarray(fp[f"{grp_name}/Phase"][:], np.int32)
-            else:
-                raise ValueError(f"{grp_name}/Phase has unexpected shape !")
+        # no normalization needed, also in NXem_ebsd the null model notIndexed is phase_identifier 0
+        if np.shape(fp[f"{grp_name}/Phase"][:])[0] == n_pts:
+            self.tmp[ckey]["phase_id"] = np.asarray(fp[f"{grp_name}/Phase"][:], np.int32)
         else:
-            raise ValueError(f"Unable to parse {grp_name}/Phase !")
+            raise ValueError(f"{grp_name}/Phase has unexpected shape !")
 
         # X
-        if f"{grp_name}/X SAMPLE" in fp:
-            if np.shape(fp[f"{grp_name}/X SAMPLE"][:])[0] == n_pts:
-                self.tmp[ckey]["scan_point_x"] \
-                    = np.asarray(fp[f"{grp_name}/X SAMPLE"][:], np.float32)
-            else:
-                raise ValueError(f"{grp_name}/X SAMPLE has unexpected shape !")
+        if np.shape(fp[f"{grp_name}/X SAMPLE"][:])[0] == n_pts:
+            self.tmp[ckey]["scan_point_x"] \
+                = np.asarray(fp[f"{grp_name}/X SAMPLE"][:], np.float32)
         else:
-            raise ValueError(f"Unable to parse {grp_name}/X SAMPLE !")
+            raise ValueError(f"{grp_name}/X SAMPLE has unexpected shape !")
 
         # Y
-        if f"{grp_name}/Y SAMPLE" in fp:
-            if np.shape(fp[f"{grp_name}/Y SAMPLE"][:])[0] == n_pts:
-                self.tmp[ckey]["scan_point_y"] \
-                    = np.asarray(fp[f"{grp_name}/Y SAMPLE"], np.float32)
-            else:
-                raise ValueError(f"{grp_name}/Y SAMPLE has unexpected shape !")
+        if np.shape(fp[f"{grp_name}/Y SAMPLE"][:])[0] == n_pts:
+            self.tmp[ckey]["scan_point_y"] \
+                = np.asarray(fp[f"{grp_name}/Y SAMPLE"], np.float32)
         else:
-            raise ValueError(f"Unable to parse {grp_name}/Y SAMPLE !")
+            raise ValueError(f"{grp_name}/Y SAMPLE has unexpected shape !")
 
         # Band Contrast is not stored in Bruker but Radon Quality or MAD
         # but this is s.th. different as it is the mean angular deviation between
         # indexed with simulated and measured pattern
-        if f"{grp_name}/MAD" in fp:
-            if np.shape(fp[f"{grp_name}/MAD"][:])[0] == n_pts:
-                self.tmp[ckey]["mad"] = np.asarray(fp[f"{grp_name}/MAD"][:], np.float32)
-            else:
-                raise ValueError(f"{grp_name}/MAD has unexpected shape !")
+        if np.shape(fp[f"{grp_name}/MAD"][:])[0] == n_pts:
+            self.tmp[ckey]["mad"] = np.asarray(fp[f"{grp_name}/MAD"][:], np.float32)
         else:
-            raise ValueError(f"Unable to parse {grp_name}/MAD !")
-
+            raise ValueError(f"{grp_name}/MAD has unexpected shape !")
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py
index 8f59f54b6..f268037e3 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py
@@ -17,13 +17,29 @@
 #
 """(Sub-)parser mapping concepts and content from community *.h5/*.h5ebsd files on NXem."""
 
+import os
+from typing import Dict, Any, List
 import numpy as np
 import h5py
-from pynxtools.dataconverter.readers.em.subparsers.hfive_base import HdfFiveGenericReader
-from pynxtools.dataconverter.readers.em.utils.hfive_utils import read_strings_from_dataset
+# import imageio.v3 as iio
+from PIL import Image as pil
 
+import diffsims
+import orix
+from diffpy.structure import Lattice, Structure
+from orix import plot
+from orix.crystal_map import create_coordinate_arrays, CrystalMap, PhaseList
+from orix.quaternion import Rotation
+from orix.vector import Vector3d
 
-class HdfFiveCommunityReader(HdfFiveGenericReader):
+import matplotlib.pyplot as plt
+
+from pynxtools.dataconverter.readers.em.subparsers.hfive_base import HdfFiveBaseParser
+from pynxtools.dataconverter.readers.em.utils.hfive_utils import EBSD_MAP_SPACEGROUP, \
+    read_strings_from_dataset, all_equal, format_euler_parameterization
+
+
+class HdfFiveCommunityReader(HdfFiveBaseParser):
     """Read modified H5EBSD (likely from Britton group)"""
     def __init__(self, file_path: str = ""):
         super().__init__(file_path)
@@ -47,23 +63,178 @@ def init_support(self):
 
     def check_if_supported(self):
         # check if instance to process matches any of these constraints
+        self.supported = 0  # voting-based
         with h5py.File(self.file_path, "r") as h5r:
-            if "/Manufacturer" in h5r:
-                self.version["tech_partner"] \
-                    = super().read_strings_from_dataset(h5r["/Manufacturer"][()])
-                if self.version["tech_partner"] not in self.supported_version["tech_partner"]:
-                    self.supported = False
-            else:
-                self.supported = False
-            if "/Version" in h5r:
-                self.version["schema_version"] \
-                    = super().read_strings_from_dataset(h5r["/Version"][()])
-                if self.version["schema_version"] not in self.supported_version["schema_version"]:
+            req_fields = ["Manufacturer", "Version"]
+            for req_field in req_fields:
+                if f"/{req_field}" not in h5r:
                     self.supported = False
-            else:
-                self.supported = False
+                    return
 
-            if self.supported is True:
+            self.version["tech_partner"] = read_strings_from_dataset(h5r["/Manufacturer"][()])
+            if self.version["tech_partner"] in self.supported_version["tech_partner"]:
+                self.supported += 1
+            self.version["schema_version"] = read_strings_from_dataset(h5r["/Version"][()])
+            if self.version["schema_version"] in self.supported_version["schema_version"]:
+                self.supported += 1
+
+            if self.supported == 2:
                 self.version["schema_name"] = self.supported_version["schema_name"]
                 self.version["writer_name"] = self.supported_version["writer_name"]
                 self.version["writer_version"] = self.supported_version["writer_version"]
+                self.supported = True
+            else:
+                self.supported = False
+
+    def parse_and_normalize(self):
+        """Read and normalize away community-specific formatting with an equivalent in NXem."""
+        with h5py.File(f"{self.file_path}", "r") as h5r:
+            cache_id = 0
+            grp_names = list(h5r["/"])
+            for grp_name in grp_names:
+                if grp_name not in ["Version", "Manufacturer"]:
+                    self.prfx = f"/{grp_name}"
+                    ckey = self.init_named_cache(f"ebsd{cache_id}")
+                    self.parse_and_normalize_group_ebsd_header(h5r, ckey)
+                    self.parse_and_normalize_group_ebsd_phases(h5r, ckey)
+                    self.parse_and_normalize_group_ebsd_data(h5r, ckey)
+                    # add more information to pass to hfive parser
+                    cache_id += 1
+
+    def parse_and_normalize_group_ebsd_header(self, fp, ckey: str):
+        grp_name = f"{self.prfx}/EBSD/Header"
+        if f"{grp_name}" not in fp:
+            raise ValueError(f"Unable to parse {grp_name} !")
+
+        req_fields = ["NCOLS", "NROWS", "SEPixelSizeX", "SEPixelSizeY"]
+        for req_field in req_fields:
+            if f"{grp_name}/{req_field}" not in fp:
+                raise ValueError(f"Unable to parse {grp_name}/{req_field} !")
+
+        self.tmp[ckey]["n_x"] = fp[f"{grp_name}/NCOLS"][()]
+        self.tmp[ckey]["n_y"] = fp[f"{grp_name}/NROWS"][()]
+        self.tmp[ckey]["s_x"] = fp[f"{grp_name}/SEPixelSizeX"][()]
+        self.tmp[ckey]["s_unit"] = "µm"  # TODO::always micron?
+        self.tmp[ckey]["s_y"] = fp[f"{grp_name}/SEPixelSizeY"][()]
+        # TODO::check that all data are consistent
+        # TODO::what is y and x depends on coordinate system
+        # TODO::why is SEPixelSize* half the value of *STEP for * X and Y respectively?
+
+    def parse_and_normalize_group_ebsd_phases(self, fp, ckey: str):
+        grp_name = f"{self.prfx}/EBSD/Header/Phases"
+        if f"{grp_name}" not in fp:
+            raise ValueError(f"Unable parse {grp_name} !")
+
+        # Phases, contains a subgroup for each phase where the name
+        # of each subgroup is the index of the phase starting at 1.
+        phase_ids = sorted(list(fp[f"{grp_name}"]), key=int)
+        self.tmp[ckey]["phase"] = []
+        self.tmp[ckey]["space_group"] = []
+        self.tmp[ckey]["phases"] = {}
+        for phase_id in phase_ids:
+            if phase_id.isdigit() is True:
+                self.tmp[ckey]["phases"][int(phase_id)] = {}
+                sub_grp_name = f"/{grp_name}/{phase_id}"
+                req_fields = ["Name", "LatticeConstants", "SpaceGroup"]
+                for req_field in req_fields:
+                    if f"{sub_grp_name}/{req_field}" not in fp:
+                        raise ValueError(f"Unable to parse {sub_grp_name}/{req_field} !")
+                # Name
+                phase_name = read_strings_from_dataset(fp[f"{sub_grp_name}/Name"][()])
+                self.tmp[ckey]["phases"][int(phase_id)]["phase_name"] = phase_name
+
+                # Reference not available
+                self.tmp[ckey]["phases"][int(phase_id)]["reference"] = "n/a"
+
+                # LatticeConstants, a, b, c (angstrom) followed by alpha, beta and gamma angles in degree
+                values = np.asarray(fp[f"{sub_grp_name}/LatticeConstants"][:].flatten())
+                a_b_c = values[0:3]
+                angles = values[3:6]
+                self.tmp[ckey]["phases"][int(phase_id)]["a_b_c"] = a_b_c * 0.1
+                self.tmp[ckey]["phases"][int(phase_id)]["alpha_beta_gamma"] = angles
+
+                # Space Group, no, H5T_NATIVE_INT32, (1, 1), Space group index.
+                # The attribute Symbol contains the string representation, for example P m -3 m.
+                # formatting is a nightmare F m#ovl3m for F m 3bar m... but IT i.e.
+                # international table of crystallography identifier
+                spc_grp  = read_strings_from_dataset(fp[f"{sub_grp_name}/SpaceGroup"][()])
+                if spc_grp in EBSD_MAP_SPACEGROUP.keys():
+                    space_group = EBSD_MAP_SPACEGROUP[spc_grp]
+                    self.tmp[ckey]["phases"][int(phase_id)]["space_group"] = space_group
+                else:
+                    raise ValueError(f"Unable to decode improperly formatted space group {spc_grp} !")
+
+
+                if len(self.tmp[ckey]["space_group"]) > 0:
+                    self.tmp[ckey]["space_group"].append(space_group)
+                else:
+                    self.tmp[ckey]["space_group"] = [space_group]
+
+                if len(self.tmp[ckey]["phase"]) > 0:
+                    self.tmp[ckey]["phase"].append(
+                        Structure(title=phase_name, atoms=None,
+                                  lattice=Lattice(a_b_c[0], a_b_c[1], a_b_c[2],
+                                  angles[0], angles[1], angles[2])))
+                else:
+                    self.tmp[ckey]["phase"] \
+                        = [Structure(title=phase_name, atoms=None,
+                                     lattice=Lattice(a_b_c[0], a_b_c[1], a_b_c[2],
+                                     angles[0], angles[1], angles[2]))]
+
+    def parse_and_normalize_group_ebsd_data(self, fp, ckey: str):
+        # no official documentation yet from Bruker but seems inspired by H5EBSD
+        grp_name = f"{self.prfx}/EBSD/Data"
+        if f"{grp_name}" not in fp:
+            raise ValueError(f"Unable to parse {grp_name} !")
+
+        req_fields = ["phi1", "PHI", "phi2", "Phase", "X SAMPLE", "Y SAMPLE", "MAD"]
+        for req_field in req_fields:
+            if f"{grp_name}/{req_field}" not in fp:
+                raise ValueError(f"Unable to parse {grp_name}/{req_field} !")
+
+        # Euler
+        n_pts = (np.shape(fp[f"{grp_name}/phi1"][:])[0],
+                 np.shape(fp[f"{grp_name}/PHI"][:])[0],
+                 np.shape(fp[f"{grp_name}/phi2"][:])[0])
+        if all_equal(n_pts) is True and n_pts[0] == (self.tmp[ckey]["n_x"] * self.tmp[ckey]["n_y"]):
+            self.tmp[ckey]["euler"] = np.zeros((n_pts[0], 3), np.float32)
+            column_id = 0
+            for angle in ["phi1", "PHI", "phi2"]:
+                self.tmp[ckey]["euler"][:, column_id] \
+                    = np.asarray(fp[f"{grp_name}/{angle}"][:], np.float32)
+                column_id += 1
+            self.tmp[ckey]["euler"] = format_euler_parameterization(self.tmp[ckey]["euler"])
+            n_pts = n_pts[0]
+        # inconsistency f32 in file although specification states float
+        # Rotation.from_euler(euler=fp[f"{grp_name}/Euler"],
+        #                                 direction='lab2crystal',
+        #                                degrees=is_degrees)
+
+        # index of phase, 0 if not indexed
+        # no normalization needed, also in NXem_ebsd the null model notIndexed is phase_identifier 0
+        if np.shape(fp[f"{grp_name}/Phase"][:])[0] == n_pts:
+            self.tmp[ckey]["phase_id"] = np.asarray(fp[f"{grp_name}/Phase"][:], np.int32)
+        else:
+            raise ValueError(f"{grp_name}/Phase has unexpected shape !")
+
+        # X
+        if np.shape(fp[f"{grp_name}/X SAMPLE"][:])[0] == n_pts:
+            self.tmp[ckey]["scan_point_x"] \
+                = np.asarray(fp[f"{grp_name}/X SAMPLE"][:], np.float32)
+        else:
+            raise ValueError(f"{grp_name}/X SAMPLE has unexpected shape !")
+
+        # Y
+        if np.shape(fp[f"{grp_name}/Y SAMPLE"][:])[0] == n_pts:
+            self.tmp[ckey]["scan_point_y"] \
+                = np.asarray(fp[f"{grp_name}/Y SAMPLE"], np.float32)
+        else:
+            raise ValueError(f"{grp_name}/Y SAMPLE has unexpected shape !")
+
+        # Band Contrast is not stored in Bruker but Radon Quality or MAD
+        # but this is s.th. different as it is the mean angular deviation between
+        # indexed with simulated and measured pattern
+        if np.shape(fp[f"{grp_name}/MAD"][:])[0] == n_pts:
+            self.tmp[ckey]["mad"] = np.asarray(fp[f"{grp_name}/MAD"][:], np.float32)
+        else:
+            raise ValueError(f"{grp_name}/MAD has unexpected shape !")
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_edax.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_edax.py
index babcbe6af..46925ffaa 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_edax.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_edax.py
@@ -17,10 +17,27 @@
 #
 """(Sub-)parser mapping concepts and content from EDAX/AMETEK *.oh5/*.h5 (OIM Analysis) files on NXem."""
 
+import os
+from typing import Dict, Any, List
 import numpy as np
 import h5py
+from itertools import groupby
+# import imageio.v3 as iio
+from PIL import Image as pil
+
+import diffsims
+import orix
+from diffpy.structure import Lattice, Structure
+from orix import plot
+from orix.crystal_map import create_coordinate_arrays, CrystalMap, PhaseList
+from orix.quaternion import Rotation
+from orix.vector import Vector3d
+
+import matplotlib.pyplot as plt
+
 from pynxtools.dataconverter.readers.em.subparsers.hfive_base import HdfFiveBaseParser
-from pynxtools.dataconverter.readers.em.utils.hfive_utils import read_strings_from_dataset
+from pynxtools.dataconverter.readers.em.utils.hfive_utils import \
+    read_strings_from_dataset, format_euler_parameterization
 
 
 class HdfFiveEdaxOimAnalysisReader(HdfFiveBaseParser):
@@ -47,25 +64,153 @@ def init_support(self):
 
     def check_if_supported(self):
         """Check if instance matches all constraints to qualify as old EDAX"""
-        self.supported = False
+        self.supported = 0  # voting-based
         with h5py.File(self.file_path, "r") as h5r:
-            if "/Manufacturer" in h5r:
-                self.version["tech_partner"] \
-                    = super().read_strings_from_dataset(h5r["/Manufacturer"][()])
-                # for 8.6.0050 but for 8.5.1002 it is a matrix, this is because how strings end up in HDF5 allowed for so much flexibility!
-                if self.version["tech_partner"] not in self.supported_version["tech_partner"]:
-                    self.supported = False
-            else:
-                self.supported = False
-            if "/Version" in h5r:
-                self.version["schema_version"] \
-                    = super().read_strings_from_dataset(h5r["/Version"][()])
-                if self.version["schema_version"] not in self.supported_version["schema_version"]:
+            req_fields = ["Manufacturer", "Version"]
+            for req_field in req_fields:
+                if f"/{req_field}" not in h5r:
                     self.supported = False
-            else:
-                self.supported = False
+                    return
+
+            self.version["tech_partner"] = read_strings_from_dataset(h5r["/Manufacturer"][()])
+            # for 8.6.0050 but for 8.5.1002 it is a matrix, this is because how strings end up in HDF5 allowed for so much flexibility!
+            if self.version["tech_partner"] in self.supported_version["tech_partner"]:
+                self.supported += 1
+            self.version["schema_version"] = read_strings_from_dataset(h5r["/Version"][()])
+            if self.version["schema_version"] in self.supported_version["schema_version"]:
+                self.supported += 1
 
-            if self.supported is True:
+            if self.supported == 2:
                 self.version["schema_name"] = self.supported_version["schema_name"]
                 self.version["writer_name"] = self.supported_version["writer_name"]
                 self.version["writer_version"] = self.supported_version["writer_version"]
+                self.supported = True
+            else:
+                self.supported = False
+
+    def parse_and_normalize(self):
+        """Read and normalize away EDAX-specific formatting with an equivalent in NXem."""
+        with h5py.File(f"{self.file_path}", "r") as h5r:
+            cache_id = 0
+            grp_names = list(h5r["/"])
+            for grp_name in grp_names:
+                if grp_name not in ["Version", "Manufacturer"]:
+                    self.prfx = f"/{grp_name}"
+                    ckey = self.init_named_cache(f"ebsd{cache_id}")
+                    self.parse_and_normalize_group_ebsd_header(h5r, ckey)
+                    self.parse_and_normalize_group_ebsd_phases(h5r, ckey)
+                    self.parse_and_normalize_group_ebsd_data(h5r, ckey)
+                    # add more information to pass to hfive parser
+                    cache_id += 1
+
+    def parse_and_normalize_group_ebsd_header(self, fp, ckey: str):
+        grp_name = f"{self.prfx}/EBSD/Header"
+        if f"{grp_name}" not in fp:
+            raise ValueError(f"Unable to parse {grp_name} !")
+
+        grid_type = None
+        n_pts = 0
+        req_fields = ["Grid Type", "Step X", "Step Y", "nColumns", "nRows"]
+        for req_field in req_fields:
+            if f"{grp_name}/{req_field}" not in fp:
+                raise ValueError(f"Unable to parse {grp_name}/{req_field} !")
+
+        grid_type = read_strings_from_dataset(fp[f"{grp_name}/Grid Type"][()])
+        if grid_type not in ["HexGrid", "SqrGrid"]:
+            raise ValueError(f"Grid Type {grid_type} is currently not supported !")
+        self.tmp[ckey]["grid_type"] = grid_type
+        self.tmp[ckey]["s_x"] = fp[f"{grp_name}/Step X"][()]
+        self.tmp[ckey]["s_unit"] = "µm"  # TODO::always micron?
+        self.tmp[ckey]["n_x"] = fp[f"{grp_name}/nColumns"][()]
+        self.tmp[ckey]["s_y"] = fp[f"{grp_name}/Step Y"][()]
+        self.tmp[ckey]["n_y"] = fp[f"{grp_name}/nRows"][()]
+        # TODO::check that all data are consistent
+
+    def parse_and_normalize_group_ebsd_phases(self, fp, ckey: str):
+        grp_name = f"{self.prfx}/EBSD/Header/Phase"
+        # Phases, contains a subgroup for each phase where the name
+        # of each subgroup is the index of the phase starting at 1.
+        if f"{grp_name}" in fp:
+            phase_ids = sorted(list(fp[f"{grp_name}"]), key=int)
+            self.tmp[ckey]["phase"] = []
+            self.tmp[ckey]["space_group"] = []
+            self.tmp[ckey]["phases"] = {}
+            for phase_id in phase_ids:
+                if phase_id.isdigit() is True:
+                    self.tmp[ckey]["phases"][int(phase_id)] = {}
+                    sub_grp_name = f"{grp_name}/{phase_id}"
+                    # Name
+                    if f"{sub_grp_name}/MaterialName" in fp:
+                        phase_name = read_strings_from_dataset(fp[f"{sub_grp_name}/MaterialName"][0])
+                        self.tmp[ckey]["phases"][int(phase_id)]["phase_name"] = phase_name
+                    else:
+                        raise ValueError(f"Unable to parse {sub_grp_name}/MaterialName !")
+
+                    # Reference not available only Info but this can be empty
+                    self.tmp[ckey]["phases"][int(phase_id)]["reference"] = "n/a"
+
+                    req_fields = ["a", "b", "c", "alpha", "beta", "gamma"]
+                    for req_field in req_fields:
+                        if f"{sub_grp_name}/Lattice Constant {req_field}" not in fp:
+                            raise ValueError(f"Unable to parse ../Lattice Constant {req_field} !")
+                    a_b_c = [fp[f"{sub_grp_name}/Lattice Constant a"][()],
+                             fp[f"{sub_grp_name}/Lattice Constant b"][()],
+                             fp[f"{sub_grp_name}/Lattice Constant c"][()]]
+                    angles = [fp[f"{sub_grp_name}/Lattice Constant alpha"][()],
+                              fp[f"{sub_grp_name}/Lattice Constant beta"][()],
+                              fp[f"{sub_grp_name}/Lattice Constant gamma"][()]]
+                    self.tmp[ckey]["phases"][int(phase_id)]["a_b_c"] \
+                        = np.asarray(a_b_c, np.float32) * 0.1
+                    self.tmp[ckey]["phases"][int(phase_id)]["alpha_beta_gamma"] \
+                        = np.asarray(angles, np.float32)
+
+                    # Space Group not stored, only laue group, point group and symmetry
+                    # problematic because mapping is not bijective!
+                    # if you know the space group we know laue and point group and symmetry
+                    # but the opposite direction leaves room for ambiguities
+                    space_group = "n/a"
+                    self.tmp[ckey]["phases"][int(phase_id)]["space_group"] = space_group
+
+                    if len(self.tmp[ckey]["space_group"]) > 0:
+                        self.tmp[ckey]["space_group"].append(space_group)
+                    else:
+                        self.tmp[ckey]["space_group"] = [space_group]
+
+                    if len(self.tmp[ckey]["phase"]) > 0:
+                        self.tmp[ckey]["phase"].append(
+                            Structure(title=phase_name, atoms=None,
+                                      lattice=Lattice(a_b_c[0], a_b_c[1], a_b_c[2],
+                                      angles[0], angles[1], angles[2])))
+                    else:
+                        self.tmp[ckey]["phase"] \
+                            = [Structure(title=phase_name, atoms=None,
+                                         lattice=Lattice(a_b_c[0], a_b_c[1], a_b_c[2],
+                                         angles[0], angles[1], angles[2]))]
+        else:
+            raise ValueError(f"Unable to parse {grp_name} !")
+
+    def parse_and_normalize_group_ebsd_data(self, fp, ckey: str):
+        grp_name = f"{self.prfx}/EBSD/Data"
+        if f"{grp_name}" not in fp:
+            raise ValueError(f"Unable to parse {grp_name} !")
+
+        req_fields = ["CI", "Phase", "Phi1", "Phi", "Phi2", "X Position", "Y Position"]
+        for req_field in req_fields:
+            if f"{grp_name}/{req_field}" not in fp:
+                raise ValueError(f"Unable to parse {grp_name}/{req_field} !")
+
+        n_pts = self.tmp[ckey]["n_x"] * self.tmp[ckey]["n_y"]
+        self.tmp[ckey]["euler"] = np.zeros((n_pts, 3), np.float32)
+        self.tmp[ckey]["euler"][:, 0] = np.asarray(fp[f"{grp_name}/Phi1"][:], np.float32)
+        self.tmp[ckey]["euler"][:, 1] = np.asarray(fp[f"{grp_name}/Phi"][:], np.float32)
+        self.tmp[ckey]["euler"][:, 2] = np.asarray(fp[f"{grp_name}/Phi2"][:], np.float32)
+        # TODO::seems to be the situation in the example but there is no documentation
+        self.tmp[ckey]["euler"] = format_euler_parameterization(self.tmp[ckey]["euler"])
+
+        self.tmp[ckey]["phase_id"] = np.asarray(fp[f"{grp_name}/Phase"][:], np.int32)
+        # promoting int8 to int32 no problem
+        self.tmp[ckey]["ci"] = np.asarray(fp[f"{grp_name}/CI"][:], np.float32)
+        self.tmp[ckey]["scan_point_x"] = np.asarray(
+                fp[f"{grp_name}/X Position"][:] * self.tmp[ckey]["s_x"] + 0., np.float32)
+        self.tmp[ckey]["scan_point_y"] = np.asarray(
+                fp[f"{grp_name}/Y Position"][:] * self.tmp[ckey]["s_y"] + 0., np.float32)
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_oxford.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_oxford.py
index b3bfc101f..ecdb28c5d 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_oxford.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_oxford.py
@@ -67,38 +67,35 @@ def init_support(self):
 
     def check_if_supported(self):
         """Check if instance matches all constraints to qualify as supported H5OINA"""
-        self.supported = True  # try to falsify
+        self.supported = 0  # voting-based
         with h5py.File(self.file_path, "r") as h5r:
-            if "/Manufacturer" in h5r:
-                self.version["tech_partner"] \
-                    = read_strings_from_dataset(h5r["/Manufacturer"][()])
-                if self.version["tech_partner"] not in self.supported_version["tech_partner"]:
-                    # print(f"{self.version['tech_partner']} is not {self.version['tech_partner']} !")
+            req_fields = ["Manufacturer", "Software Version", "Format Version"]
+            for req_field in req_fields:
+                if f"/{req_field}" not in h5r:
                     self.supported = False
-            else:
-                self.supported = False
+                    return
+
+            self.version["tech_partner"] = read_strings_from_dataset(h5r["/Manufacturer"][()])
+            if self.version["tech_partner"] in self.supported_version["tech_partner"]:
+                # print(f"{self.version['tech_partner']} is not {self.version['tech_partner']} !")
+                self.supported += 1
             # only because we know (thanks to Philippe Pinard who wrote the H5OINA writer) that different
             # writer versions should implement the different HDF version correctly we can lift the
             # constraint on the writer_version for which we had examples available
-            if "/Software Version" in h5r:
-                self.version["writer_version"] \
-                    = read_strings_from_dataset(h5r["/Software Version"][()])
-                if self.version["writer_version"] not in self.supported_version["writer_version"]:
-                    self.supported = False
-            else:
-                self.supported = False
-            if "/Format Version" in h5r:
-                self.version["schema_version"] \
-                    = read_strings_from_dataset(h5r["/Format Version"][()])
-                if self.version["schema_version"] not in self.supported_version["schema_version"]:
-                    self.supported = False
+            self.version["writer_version"] = read_strings_from_dataset(h5r["/Software Version"][()])
+            if self.version["writer_version"] in self.supported_version["writer_version"]:
+                self.supported += 1
+            self.version["schema_version"] = read_strings_from_dataset(h5r["/Format Version"][()])
+            if self.version["schema_version"] in self.supported_version["schema_version"]:
+                self.supported += 1
+
+            if self.supported == 3:
+                self.version["schema_name"] = self.supported_version["schema_name"]
+                self.version["writer_name"] = self.supported_version["writer_name"]
+                self.supported = True
             else:
                 self.supported = False
 
-        if self.supported is True:
-            self.version["schema_name"] = self.supported_version["schema_name"]
-            self.version["writer_name"] = self.supported_version["writer_name"]
-
     def parse_and_normalize(self):
         """Read and normalize away Oxford-specific formatting with an equivalent in NXem."""
         with h5py.File(f"{self.file_path}", "r") as h5r:
@@ -116,152 +113,134 @@ def parse_and_normalize(self):
                     cache_id += 1
 
     def parse_and_normalize_slice_ebsd_header(self, fp, ckey: str):
-        # X Cells, yes, H5T_NATIVE_INT32, (1, 1), Map: Width in pixels, Line scan: Length in pixels.
         grp_name = f"{self.prfx}/EBSD/Header"
-        if f"{grp_name}/X Cells" in fp:
-            self.tmp[ckey]["n_x"] = fp[f"{grp_name}/X Cells"][0]
-        else:
-            raise ValueError(f"Unable to parse {grp_name}/X Cells !")
+        if f"{grp_name}" not in fp:
+            raise ValueError(f"Unable to parse {grp_name} !")
+
+        req_fields = ["X Cells", "Y Cells", "X Step", "Y Step"]
+        for req_field in req_fields:
+            if f"{grp_name}/{req_field}" not in fp:
+                raise ValueError(f"Unable to parse {grp_name}/{req_field} !")
+
+        # X Cells, yes, H5T_NATIVE_INT32, (1, 1), Map: Width in pixels, Line scan: Length in pixels.
+        self.tmp[ckey]["n_x"] = fp[f"{grp_name}/X Cells"][0]
         # Y Cells, yes, H5T_NATIVE_INT32, (1, 1), Map: Height in pixels. Line scan: Always set to 1.
-        if f"{grp_name}/Y Cells" in fp:
-            self.tmp[ckey]["n_y"] = fp[f"{grp_name}/Y Cells"][0]
-        else:
-            raise ValueError(f"Unable to parse{grp_name}/Y Cells !")
+        self.tmp[ckey]["n_y"] = fp[f"{grp_name}/Y Cells"][0]
         # X Step, yes, H5T_NATIVE_FLOAT, (1, 1), Map: Step size along x-axis in micrometers. Line scan: step size along the line scan in micrometers.
-        if f"{grp_name}/X Step" in fp:
-            if read_strings_from_dataset(fp[f"{grp_name}/X Step"].attrs["Unit"]) == "um":
-                self.tmp[ckey]["s_x"] = fp[f"{grp_name}/X Step"][0]
-                self.tmp[ckey]["s_unit"] = "µm"
-            else:
-                raise ValueError(f"Unexpected X Step Unit attribute !")
+        if read_strings_from_dataset(fp[f"{grp_name}/X Step"].attrs["Unit"]) == "um":
+            self.tmp[ckey]["s_x"] = fp[f"{grp_name}/X Step"][0]
+            self.tmp[ckey]["s_unit"] = "µm"
         else:
-            raise ValueError(f"Unable to parse {grp_name}/X Step !")
+            raise ValueError(f"Unexpected X Step Unit attribute !")
         # Y Step, yes, H5T_NATIVE_FLOAT, (1, 1), Map: Step size along y-axis in micrometers. Line scan: Always set to 0.
-        if f"{grp_name}/Y Step" in fp:
-            if read_strings_from_dataset(fp[f"{grp_name}/Y Step"].attrs["Unit"]) == "um":
-                self.tmp[ckey]["s_y"] = fp[f"{grp_name}/Y Step"][0]
-            else:
-                raise ValueError(f"Unexpected Y Step Unit attribute !")
+        if read_strings_from_dataset(fp[f"{grp_name}/Y Step"].attrs["Unit"]) == "um":
+            self.tmp[ckey]["s_y"] = fp[f"{grp_name}/Y Step"][0]
         else:
-            raise ValueError(f"Unable to parse {grp_name}/Y Step !")
+            raise ValueError(f"Unexpected Y Step Unit attribute !")
         # TODO::check that all data in the self.oina are consistent
 
     def parse_and_normalize_slice_ebsd_phases(self, fp, ckey: str):
         """Parse EBSD header section for specific slice."""
-        grp_name = f"{self.prfx}/EBSD/Header"
+        grp_name = f"{self.prfx}/EBSD/Header/Phases"
+        if f"{grp_name}" not in fp:
+            raise ValueError(f"Unable to parse {grp_name} !")
+
         # Phases, yes, Contains a subgroup for each phase where the name
         # of each subgroup is the index of the phase starting at 1.
-        if f"{grp_name}/Phases" in fp:
-            phase_ids = sorted(list(fp[f"{grp_name}/Phases"]), key=int)
-            self.tmp[ckey]["phase"] = []
-            self.tmp[ckey]["space_group"] = []
-            self.tmp[ckey]["phases"] = {}
-            for phase_id in phase_ids:
-                if phase_id.isdigit() is True:
-                    self.tmp[ckey]["phases"][int(phase_id)] = {}
-                    sub_grp_name = f"/{grp_name}/Phases/{phase_id}"
-                    # Phase Name, yes, H5T_STRING, (1, 1)
-                    if f"{sub_grp_name}/Phase Name" in fp:
-                        phase_name = read_strings_from_dataset(fp[f"{sub_grp_name}/Phase Name"][()])
-                        self.tmp[ckey]["phases"][int(phase_id)]["phase_name"] = phase_name
-                    else:
-                        raise ValueError(f"Unable to parse {sub_grp_name}/Phase Name !")
-
-                    # Reference, yes, H5T_STRING, (1, 1), Changed in version 2.0 to mandatory
-                    if f"{sub_grp_name}/Reference" in fp:
-                        self.tmp[ckey]["phases"][int(phase_id)]["reference"] \
-                            = read_strings_from_dataset(fp[f"{sub_grp_name}/Reference"][()])
-                    else:
-                        raise ValueError(f"Unable to parse {sub_grp_name}/Reference !")
-
-                    # Lattice Angles, yes, H5T_NATIVE_FLOAT, (1, 3), Three columns for the alpha, beta and gamma angles in radians
-                    if f"{sub_grp_name}/Lattice Angles" in fp:
-                        is_degrees = False
-                        if read_strings_from_dataset(fp[f"{sub_grp_name}/Lattice Angles"].attrs["Unit"]) == "rad":
-                            is_degrees = False
-                        angles = np.asarray(fp[f"{sub_grp_name}/Lattice Angles"][:].flatten()) / np.pi * 180.
-                        self.tmp[ckey]["phases"][int(phase_id)]["alpha_beta_gamma"] \
-                            = angles
-                    else:
-                        raise ValueError(f"Unable to parse {sub_grp_name}/Lattice Angles !")
-
-                    # Lattice Dimensions, yes, H5T_NATIVE_FLOAT, (1, 3), Three columns for a, b and c dimensions in Angstroms
-                    if f"{sub_grp_name}/Lattice Dimensions" in fp:
-                        is_nanometer = False
-                        if read_strings_from_dataset(fp[f"{sub_grp_name}/Lattice Dimensions"].attrs["Unit"]) == "angstrom":
-                            is_nanometer = False
-                        a_b_c = np.asarray(fp[f"{sub_grp_name}/Lattice Dimensions"][:].flatten()) * 0.1
-                        self.tmp[ckey]["phases"][int(phase_id)]["a_b_c"] = a_b_c
-                    else:
-                        raise ValueError(f"Unable to parse {sub_grp_name}/Lattice Dimensions !")
-
-                    # Space Group, no, H5T_NATIVE_INT32, (1, 1), Space group index.
-                    # The attribute Symbol contains the string representation, for example P m -3 m.
-                    if f"{sub_grp_name}/Space Group" in fp:
-                        space_group = int(fp[f"{sub_grp_name}/Space Group"][0])
-                        self.tmp[ckey]["phases"][int(phase_id)]["space_group"] = space_group
-                    else:
-                        raise ValueError(f"Unable to parse {sub_grp_name}/Space Group !")
-                    if len(self.tmp[ckey]["space_group"]) > 0:
-                        self.tmp[ckey]["space_group"].append(space_group)
-                    else:
-                        self.tmp[ckey]["space_group"] = [space_group]
-
-                    if len(self.tmp[ckey]["phase"]) > 0:
-                        self.tmp[ckey]["phase"].append(
-                            Structure(title=phase_name, atoms=None,
-                                      lattice=Lattice(a_b_c[0], a_b_c[1], a_b_c[2],
-                                      angles[0], angles[1], angles[2])))
-                    else:
-                        self.tmp[ckey]["phase"] \
-                            = [Structure(title=phase_name, atoms=None,
-                                         lattice=Lattice(a_b_c[0], a_b_c[1], a_b_c[2],
-                                         angles[0], angles[1], angles[2]))]
-        else:
-            raise ValueError(f"Unable to parse {grp_name}/Phases !")
+        phase_ids = sorted(list(fp[f"{grp_name}"]), key=int)
+        self.tmp[ckey]["phase"] = []
+        self.tmp[ckey]["space_group"] = []
+        self.tmp[ckey]["phases"] = {}
+        for phase_id in phase_ids:
+            if phase_id.isdigit() is True:
+                self.tmp[ckey]["phases"][int(phase_id)] = {}
+                sub_grp_name = f"/{grp_name}/{phase_id}"
+
+                req_fields = ["Phase Name", "Reference", "Lattice Angles",
+                              "Lattice Dimensions", "Space Group"]
+                for req_field in req_fields:
+                    if f"{sub_grp_name}/{req_field}" not in fp:
+                        raise ValueError(f"Unable to parse {sub_grp_name}/{req_field} !")
+
+                # Phase Name, yes, H5T_STRING, (1, 1)
+                phase_name = read_strings_from_dataset(fp[f"{sub_grp_name}/Phase Name"][()])
+                self.tmp[ckey]["phases"][int(phase_id)]["phase_name"] = phase_name
+
+                # Reference, yes, H5T_STRING, (1, 1), Changed in version 2.0 to mandatory
+                self.tmp[ckey]["phases"][int(phase_id)]["reference"] \
+                    = read_strings_from_dataset(fp[f"{sub_grp_name}/Reference"][()])
+
+                # Lattice Angles, yes, H5T_NATIVE_FLOAT, (1, 3), Three columns for the alpha, beta and gamma angles in radians
+                is_degrees = False
+                if read_strings_from_dataset(fp[f"{sub_grp_name}/Lattice Angles"].attrs["Unit"]) == "rad":
+                    is_degrees = False
+                angles = np.asarray(fp[f"{sub_grp_name}/Lattice Angles"][:].flatten()) / np.pi * 180.
+                self.tmp[ckey]["phases"][int(phase_id)]["alpha_beta_gamma"] = angles
+
+                # Lattice Dimensions, yes, H5T_NATIVE_FLOAT, (1, 3), Three columns for a, b and c dimensions in Angstroms
+                is_nanometer = False
+                if read_strings_from_dataset(fp[f"{sub_grp_name}/Lattice Dimensions"].attrs["Unit"]) == "angstrom":
+                    is_nanometer = False
+                a_b_c = np.asarray(fp[f"{sub_grp_name}/Lattice Dimensions"][:].flatten()) * 0.1
+                self.tmp[ckey]["phases"][int(phase_id)]["a_b_c"] = a_b_c
+
+                # Space Group, no, H5T_NATIVE_INT32, (1, 1), Space group index.
+                # The attribute Symbol contains the string representation, for example P m -3 m.
+                space_group = int(fp[f"{sub_grp_name}/Space Group"][0])
+                self.tmp[ckey]["phases"][int(phase_id)]["space_group"] = space_group
+                if len(self.tmp[ckey]["space_group"]) > 0:
+                    self.tmp[ckey]["space_group"].append(space_group)
+                else:
+                    self.tmp[ckey]["space_group"] = [space_group]
+
+                if len(self.tmp[ckey]["phase"]) > 0:
+                    self.tmp[ckey]["phase"].append(
+                        Structure(title=phase_name, atoms=None,
+                                    lattice=Lattice(a_b_c[0], a_b_c[1], a_b_c[2],
+                                    angles[0], angles[1], angles[2])))
+                else:
+                    self.tmp[ckey]["phase"] \
+                        = [Structure(title=phase_name, atoms=None,
+                                        lattice=Lattice(a_b_c[0], a_b_c[1], a_b_c[2],
+                                        angles[0], angles[1], angles[2]))]
 
     def parse_and_normalize_slice_ebsd_data(self, fp, ckey: str):
         # https://github.com/oinanoanalysis/h5oina/blob/master/H5OINAFile.md
         grp_name = f"{self.prfx}/EBSD/Data"
-        print(f"Parsing {grp_name}")
+        if f"{grp_name}" not in fp:
+            raise ValueError(f"Unable to parse {grp_name} !")
+
+        req_fields = ["Euler", "Phase", "X", "Y", "Band Contrast"]
+        for req_field in req_fields:
+            if f"{grp_name}/{req_field}" not in fp:
+                raise ValueError(f"Unable to parse {grp_name}/{req_field} !")
+
         # Euler, yes, H5T_NATIVE_FLOAT, (size, 3), Orientation of Crystal (CS2) to Sample-Surface (CS1).
-        if f"{grp_name}/Euler" in fp:
+        is_degrees = False
+        is_negative = False
+        if read_strings_from_dataset(fp[f"{grp_name}/Euler"].attrs["Unit"]) == "rad":
             is_degrees = False
-            if read_strings_from_dataset(fp[f"{grp_name}/Euler"].attrs["Unit"]) == "rad":
-                is_degrees = False
-            self.tmp[ckey]["euler"] = np.asarray(fp[f"{grp_name}/Euler"], np.float32)
-            # inconsistency f32 in file although specification states float
-                #Rotation.from_euler(euler=fp[f"{grp_name}/Euler"],
-                 #                                 direction='lab2crystal',
-                  #                                degrees=is_degrees)
-        else:
-            raise ValueError(f"Unable to parse {grp_name}/Euler !")
+        self.tmp[ckey]["euler"] = np.asarray(fp[f"{grp_name}/Euler"], np.float32)
+        # TODO::handle possible case of negative Euler angles (examples though do not indicate)
+        # that AZTec reports negative Euler angles...
+        # inconsistency f32 in file although specification states float
+        # Rotation.from_euler(euler=fp[f"{grp_name}/Euler"],
+        #                                 direction='lab2crystal',
+        #                                degrees=is_degrees)
 
         # Phase, yes, H5T_NATIVE_INT32, (size, 1), Index of phase, 0 if not indexed
         # no normalization needed, also in NXem_ebsd the null model notIndexed is phase_identifier 0
-        if f"{grp_name}/Phase" in fp:
-            self.tmp[ckey]["phase_id"] = np.asarray(fp[f"{grp_name}/Phase"], np.int32)
-        else:
-            raise ValueError(f"Unable to parse {grp_name}/Phase !")
+        self.tmp[ckey]["phase_id"] = np.asarray(fp[f"{grp_name}/Phase"], np.int32)
 
         # X, no, H5T_NATIVE_FLOAT, (size, 1), X position of each pixel in micrometers (origin: top left corner)
-        if f"{grp_name}/X" in fp:
-            self.tmp[ckey]["scan_point_x"] = np.asarray(fp[f"{grp_name}/X"], np.float32)
-            # inconsistency f32 in file although specification states float
-        else:
-            raise ValueError(f"Unable to parse {grp_name}/X !")
+        self.tmp[ckey]["scan_point_x"] = np.asarray(fp[f"{grp_name}/X"], np.float32)
+        # inconsistency f32 in file although specification states float
 
         # Y, no, H5T_NATIVE_FLOAT, (size, 1), Y position of each pixel in micrometers (origin: top left corner)
-        if f"{grp_name}/Y" in fp:
-            self.tmp[ckey]["scan_point_y"] = np.asarray(fp[f"{grp_name}/Y"], np.float32)
-            # inconsistency f32 in file although specification states float
-        else:
-            raise ValueError(f"Unable to parse {grp_name}/Y !")
+        self.tmp[ckey]["scan_point_y"] = np.asarray(fp[f"{grp_name}/Y"], np.float32)
+        # inconsistency f32 in file although specification states float
 
         # Band Contrast, no, H5T_NATIVE_INT32, (size, 1)
-        if f"{grp_name}/Band Contrast" in fp:
-            self.tmp[ckey]["band_contrast"] = np.asarray(fp[f"{grp_name}/Band Contrast"], np.int32)
-            # inconsistency uint8 in file although specification states should be int32
-            # promoting uint8 to int32 no problem
-        else:
-            raise ValueError(f"Unable to parse {grp_name}/Band Contrast !")
+        self.tmp[ckey]["band_contrast"] = np.asarray(fp[f"{grp_name}/Band Contrast"], np.int32)
+        # inconsistency uint8 in file although specification states should be int32
+        # promoting uint8 to int32 no problem
diff --git a/pynxtools/dataconverter/readers/em/subparsers/nxs_hfive.py b/pynxtools/dataconverter/readers/em/subparsers/nxs_hfive.py
index dde1c10ee..17c0656c3 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/nxs_hfive.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/nxs_hfive.py
@@ -55,10 +55,10 @@
 
 from pynxtools.dataconverter.readers.em.subparsers.hfive_oxford import HdfFiveOxfordReader
 from pynxtools.dataconverter.readers.em.subparsers.hfive_bruker import HdfFiveBrukerEspritReader
-# from pynxtools.dataconverter.readers.em.subparsers.hfive_edax import HdfFiveEdaxOimAnalysisReader
+from pynxtools.dataconverter.readers.em.subparsers.hfive_edax import HdfFiveEdaxOimAnalysisReader
 from pynxtools.dataconverter.readers.em.subparsers.hfive_apex import HdfFiveEdaxApexReader
+from pynxtools.dataconverter.readers.em.subparsers.hfive_ebsd import HdfFiveCommunityReader
 # from pynxtools.dataconverter.readers.em.subparsers.hfive_emsoft import HdfFiveEmSoftReader
-# from pynxtools.dataconverter.readers.em.subparsers.hfive_ebsd import HdfFiveCommunityReader
 
 
 class NxEmNxsHfiveSubParser:
@@ -109,33 +109,20 @@ def parse(self, template: dict) -> dict:
             apex = HdfFiveEdaxApexReader(self.file_path)
             apex.parse_and_normalize()
             self.process_into_template(apex.tmp, template)
-        """
         elif hfive_parser_type == "edax":
-            with h5py.File(f"{self.file_path}", "r") as h5r:
-                grp_nms = list(h5r["/"])
-                for grp_nm in grp_nms:
-                    if grp_nm not in ["Version", "Manufacturer"]:
-                        edax_oim = HdfFiveEdaxOimAnalysisReader(self.file_path)
-                        edax_oim.parse_and_normalize_group(
-                            h5r,
-                            f"/{grp_nm}",
-                            self.cache_ebsd)
+            edax = HdfFiveEdaxOimAnalysisReader(self.file_path)
+            edax.parse_and_normalize()
+            self.process_into_template(edax.tmp, template)
         elif hfive_parser_type == "hebsd":
-            with h5py.File(f"{self.file_path}", "r") as h5r:
-                grp_nms = list(h5r["/"])
-                for grp_nm in grp_nms:
-                    if grp_nm not in ["Version", "Manufacturer"]:
-                        edax_oim = HdfFiveCommunityReader(self.file_path)
-                        edax_oim.parse_and_normalize_group(
-                            h5r,
-                            f"/{grp_nm}",
-                            self.cache_ebsd)
+            ebsd = HdfFiveCommunityReader(self.file_path)
+            ebsd.parse_and_normalize()
+            self.process_into_template(ebsd.tmp, template)
         elif hfive_parser_type == "emsoft":
             return template
         else:  # none or something unsupported
             return template
 
-        for key, val in self.cache_ebsd.items():
+        for key, val in self.cache.items():
             print(f"{key}, type: {type(val)}, shape: {np.shape(val)}")
 
         if self.cache["is_filled"] is True:
@@ -143,29 +130,30 @@ def parse(self, template: dict) -> dict:
             self.process_roi_xmap(template)
             self.process_roi_phases(template)
             self.process_roi_inverse_pole_figures(template)
-        """
         return template
 
     def identify_hfive_type(self):
         """Identify if HDF5 file matches a known format for which a subparser exists."""
+        # tech partner formats used for measurement
         hdf = HdfFiveOxfordReader(f"{self.file_path}")
         if hdf.supported is True:
             return "oxford"
-        # hdf = HdfFiveEdaxOimAnalysisReader(f"{self.file_path}")
-        # if hdf.supported is True:
-        #     return "edax"
+        hdf = HdfFiveEdaxOimAnalysisReader(f"{self.file_path}")
+        if hdf.supported is True:
+            return "edax"
         hdf = HdfFiveEdaxApexReader(f"{self.file_path}")
         if hdf.supported is True:
             return "apex"
         hdf = HdfFiveBrukerEspritReader(f"{self.file_path}")
         if hdf.supported is True:
             return "bruker"
+        hdf = HdfFiveCommunityReader(f"{self.file_path}")
+        if hdf.supported is True:
+            return "hebsd"
+        # computer simulation tools
         # hdf = HdfFiveEmSoftReader(f"{self.file_path}")
         # if hdf.supported is True:
         #     return "emsoft"
-        # hdf = HdfFiveCommunityReader(f"{self.file_path}")
-        # if hdf.supported is True:
-        #     return "hebsd"
         return None
 
     def process_into_template(self, inp: dict, template: dict) -> dict:
diff --git a/pynxtools/dataconverter/readers/em/utils/hfive_utils.py b/pynxtools/dataconverter/readers/em/utils/hfive_utils.py
index 639d73421..0f4aacd82 100644
--- a/pynxtools/dataconverter/readers/em/utils/hfive_utils.py
+++ b/pynxtools/dataconverter/readers/em/utils/hfive_utils.py
@@ -22,8 +22,34 @@
 import h5py
 import yaml
 import json
+from itertools import groupby
 
 
+EBSD_MAP_SPACEGROUP = {"F m#ovl3m": 225,
+                       "I m#ovl3m": 229}
+
+def format_euler_parameterization(triplet_set):
+    """Transform degrees to radiant and apply orientation space symmetry"""
+    is_degrees = False
+    for column_id in [0, 1, 2]:
+        # not robust enough as a single crystal close to the cube orientation
+        # with a very low orientation spread may also have all Euler angle values
+        # smaller than 2pi
+        # TODO::therefore the real specs of each tech partner's format is needed!
+        if np.max(np.abs(triplet_set[:, column_id])) > 2. * np.pi:
+            is_degrees = True
+    if is_degrees is True:
+        for column_id in [0, 1, 2]:
+            triplet_set[:, column_id] = triplet_set[:, column_id] / 180. * np.pi
+
+    sothree_shift = [2. * np.pi, np.pi, 2. * np.pi]
+    for column_id in [0, 1, 2]:
+        here = np.where(triplet_set[:, column_id] < 0.)
+        if len(here[0]) > 0:
+            triplet_set[here, column_id] \
+                = sothree_shift[column_id] + triplet_set[here, column_id]
+    return triplet_set
+
 def read_strings_from_dataset(obj):
     # print(f"type {type(obj)}, np.shape {np.shape(obj)}, obj {obj}")
     # if hasattr(obj, "dtype"):
@@ -53,3 +79,8 @@ def read_strings_from_dataset(obj):
     else:
         return None
         # raise ValueError("Neither np.ndarray, nor bytes, nor str !")
+
+
+def all_equal(iterable):
+    g = groupby(iterable)
+    return next(g, True) and not next(g, False)

From f947f9de7fa1a83e08324153aa75a008d43f4650 Mon Sep 17 00:00:00 2001
From: "markus.kuehbach" <markus.kuehbach@hu-berlin.de>
Date: Mon, 23 Oct 2023 17:56:22 +0200
Subject: [PATCH 14/84] Implemented xmap mapping, runthrough tests with all
 datasets in HDF5 family files, bugfixing

---
 .../readers/em/concepts/README.md             |   3 +
 .../readers/em/examples/README.md             |   4 +
 .../readers/em/examples/ebsd_database.py      |   3 +
 .../readers/em/geometry/README.md             |   4 +
 pynxtools/dataconverter/readers/em/reader.py  |  11 +-
 .../readers/em/subparsers/README.md           |   3 +
 .../readers/em/subparsers/hfive_apex.py       |   8 +-
 .../readers/em/subparsers/hfive_bruker.py     |   5 +-
 .../readers/em/subparsers/hfive_ebsd.py       |   2 +-
 .../readers/em/subparsers/hfive_edax.py       |  26 ++--
 .../readers/em/subparsers/hfive_oxford.py     |   6 +-
 .../readers/em/subparsers/nxs_hfive.py        | 123 +++++++++++++-----
 .../dataconverter/readers/em/utils/README.md  |   3 +
 .../readers/em/utils/hfive_utils.py           |  23 +++-
 test.all.sh                                   |  15 +++
 15 files changed, 178 insertions(+), 61 deletions(-)
 create mode 100644 pynxtools/dataconverter/readers/em/concepts/README.md
 create mode 100644 pynxtools/dataconverter/readers/em/examples/README.md
 create mode 100644 pynxtools/dataconverter/readers/em/geometry/README.md
 create mode 100644 pynxtools/dataconverter/readers/em/subparsers/README.md
 create mode 100644 pynxtools/dataconverter/readers/em/utils/README.md
 create mode 100755 test.all.sh

diff --git a/pynxtools/dataconverter/readers/em/concepts/README.md b/pynxtools/dataconverter/readers/em/concepts/README.md
new file mode 100644
index 000000000..e215df41a
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/concepts/README.md
@@ -0,0 +1,3 @@
+# Context
+
+Mapping of pieces of information from concepts onto NeXus concepts.
diff --git a/pynxtools/dataconverter/readers/em/examples/README.md b/pynxtools/dataconverter/readers/em/examples/README.md
new file mode 100644
index 000000000..f916d5e28
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/examples/README.md
@@ -0,0 +1,4 @@
+# Context
+
+Specific code that is relevant only for the implementation of examples for
+this em parser and the NOMAD OASIS research data management system.
diff --git a/pynxtools/dataconverter/readers/em/examples/ebsd_database.py b/pynxtools/dataconverter/readers/em/examples/ebsd_database.py
index 0196a52f5..3de3ce542 100644
--- a/pynxtools/dataconverter/readers/em/examples/ebsd_database.py
+++ b/pynxtools/dataconverter/readers/em/examples/ebsd_database.py
@@ -303,3 +303,6 @@
                           "ZrO": "Zr;O"}
 
 ProjectIdToCitation = {"Forsterite.ctf.nxs.mtex": {"data": "someurl", "paper": "someurl"}}
+
+AssumePhaseNameToSpaceGroup = {"Silver": 225,
+                               "Copper": 225}
diff --git a/pynxtools/dataconverter/readers/em/geometry/README.md b/pynxtools/dataconverter/readers/em/geometry/README.md
new file mode 100644
index 000000000..1f442f99f
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/geometry/README.md
@@ -0,0 +1,4 @@
+# Context
+
+Utility code relevant to handle coordinate systems and geometrical entities
+used or normalized by the em parser or components of the em parser.
diff --git a/pynxtools/dataconverter/readers/em/reader.py b/pynxtools/dataconverter/readers/em/reader.py
index 2d5391417..7e6a94d66 100644
--- a/pynxtools/dataconverter/readers/em/reader.py
+++ b/pynxtools/dataconverter/readers/em/reader.py
@@ -134,15 +134,6 @@ def read(self,
         # for dat_instance in case.dat_parser_type:
         #     print(f"Process pieces of information in {dat_instance} tech partner file...")
         #    continue
-        # if case.dat_parser_type == "orix":
-            #     orix_parser = NxEmOmOrixEbsdParser(case.dat[0], entry_id)
-            #     # h5oina parser evaluating content and plotting with orix on the fly
-            #     orix_parser.parse(template)
-            # elif case.dat_parser_type == "mtex":
-            #     mtex_parser = NxEmOmMtexEbsdParser(case.dat[0], entry_id)
-            #     # ebsd parser because concept suggested for MTex by M. Kühbach
-            #     # would include different HDF5 dumps for different MTex classes
-            #     mtex_parser.parse(template)
             # elif case.dat_parser_type == "zip":
             #     zip_parser = NxEmOmZipEbsdParser(case.dat[0], entry_id)
             #     zip_parser.parse(template)
@@ -167,7 +158,7 @@ def read(self,
         if resolved_path != "":
             nxs_plt.annotate_default_plot(template, resolved_path)
 
-        debugging = True
+        debugging = False
         if debugging is True:
             print("Reporting state of template before passing to HDF5 writing...")
             for keyword in template.keys():
diff --git a/pynxtools/dataconverter/readers/em/subparsers/README.md b/pynxtools/dataconverter/readers/em/subparsers/README.md
new file mode 100644
index 000000000..0fdc22fa2
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/subparsers/README.md
@@ -0,0 +1,3 @@
+# Context
+
+Specific parsers for metadata and data stored in HDF5 files from technology partners.
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
index 2c838c14c..bd1c53693 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
@@ -80,7 +80,7 @@ def check_if_supported(self):
     def parse_and_normalize(self):
         """Read and normalize away EDAX/APEX-specific formatting with an equivalent in NXem."""
         with h5py.File(f"{self.file_path}", "r") as h5r:
-            cache_id = 0
+            cache_id = 1
             grp_nms = list(h5r["/"])
             for grp_nm in grp_nms:
                 sub_grp_nms = list(h5r[grp_nm])
@@ -169,7 +169,7 @@ def parse_and_normalize_group_ebsd_phases(self, fp, ckey: str):
                     # problematic because mapping is not bijective!
                     # if you know the space group we know laue and point group and symmetry
                     # but the opposite direction leaves room for ambiguities
-                    space_group = "n/a"
+                    space_group = None
                     self.tmp[ckey]["phases"][int(phase_id)]["space_group"] = space_group
 
                     if len(self.tmp[ckey]["space_group"]) > 0:
@@ -210,8 +210,8 @@ def parse_and_normalize_group_ebsd_data(self, fp, ckey: str):
             # check shape of internal virtual chunked number array
             r = Rotation.from_matrix([np.reshape(dat[i][0], (3, 3))])
             self.tmp[ckey]["euler"][i, :] = r.to_euler(degrees=False)
-            self.tmp[ckey]["phase_id"][i] = dat[i][2]
-            self.tmp[ckey]["ci"][i] = dat[i][3]
+            self.tmp[ckey]["ci"][i] = dat[i][2]
+            self.tmp[ckey]["phase_id"][i] = dat[i][3]
 
         # TODO::convert orientation matrix to Euler angles via om_eu but what are conventions !
         # orix based transformation ends up in positive half space and with degrees=False
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py
index c1f02a775..fdadde47d 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py
@@ -88,7 +88,7 @@ def check_if_supported(self):
     def parse_and_normalize(self):
         """Read and normalize away Bruker-specific formatting with an equivalent in NXem."""
         with h5py.File(f"{self.file_path}", "r") as h5r:
-            cache_id = 0
+            cache_id = 1
             grp_names = list(h5r["/"])
             for grp_name in grp_names:
                 if grp_name not in ["Version", "Manufacturer"]:
@@ -159,8 +159,9 @@ def parse_and_normalize_group_ebsd_phases(self, fp, ckey: str):
                     self.tmp[ckey]["phases"][int(phase_id)]["space_group"] = space_group
                 else:
                     raise ValueError(f"Unable to decode improperly formatted space group {spc_grp} !")
-
                 # formatting is a nightmare F m#ovl3m for F m 3bar m...
+                # TODO::in some case instead a group IT (likely International Tables of Crystallography)
+                # was there so parse this instead of the above used mapping table
                 if len(self.tmp[ckey]["space_group"]) > 0:
                     self.tmp[ckey]["space_group"].append(space_group)
                 else:
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py
index f268037e3..8c831735f 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py
@@ -89,7 +89,7 @@ def check_if_supported(self):
     def parse_and_normalize(self):
         """Read and normalize away community-specific formatting with an equivalent in NXem."""
         with h5py.File(f"{self.file_path}", "r") as h5r:
-            cache_id = 0
+            cache_id = 1
             grp_names = list(h5r["/"])
             for grp_name in grp_names:
                 if grp_name not in ["Version", "Manufacturer"]:
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_edax.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_edax.py
index 46925ffaa..c0a2a31e8 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_edax.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_edax.py
@@ -37,7 +37,7 @@
 
 from pynxtools.dataconverter.readers.em.subparsers.hfive_base import HdfFiveBaseParser
 from pynxtools.dataconverter.readers.em.utils.hfive_utils import \
-    read_strings_from_dataset, format_euler_parameterization
+    read_strings_from_dataset, read_first_scalar, format_euler_parameterization
 
 
 class HdfFiveEdaxOimAnalysisReader(HdfFiveBaseParser):
@@ -91,7 +91,7 @@ def check_if_supported(self):
     def parse_and_normalize(self):
         """Read and normalize away EDAX-specific formatting with an equivalent in NXem."""
         with h5py.File(f"{self.file_path}", "r") as h5r:
-            cache_id = 0
+            cache_id = 1
             grp_names = list(h5r["/"])
             for grp_name in grp_names:
                 if grp_name not in ["Version", "Manufacturer"]:
@@ -119,11 +119,14 @@ def parse_and_normalize_group_ebsd_header(self, fp, ckey: str):
         if grid_type not in ["HexGrid", "SqrGrid"]:
             raise ValueError(f"Grid Type {grid_type} is currently not supported !")
         self.tmp[ckey]["grid_type"] = grid_type
-        self.tmp[ckey]["s_x"] = fp[f"{grp_name}/Step X"][()]
+        self.tmp[ckey]["s_x"] = read_first_scalar(fp[f"{grp_name}/Step X"])
         self.tmp[ckey]["s_unit"] = "µm"  # TODO::always micron?
-        self.tmp[ckey]["n_x"] = fp[f"{grp_name}/nColumns"][()]
-        self.tmp[ckey]["s_y"] = fp[f"{grp_name}/Step Y"][()]
-        self.tmp[ckey]["n_y"] = fp[f"{grp_name}/nRows"][()]
+        self.tmp[ckey]["n_x"] = read_first_scalar(fp[f"{grp_name}/nColumns"])
+        self.tmp[ckey]["s_y"] = read_first_scalar(fp[f"{grp_name}/Step Y"])
+        self.tmp[ckey]["n_y"] = read_first_scalar(fp[f"{grp_name}/nRows"])
+        # TODO::different version store the same concept with the same path name with different shape
+        # the read_first_scalar is not an optimal solution, in the future all reads from
+        # HDF5 should check for the shape instead
         # TODO::check that all data are consistent
 
     def parse_and_normalize_group_ebsd_phases(self, fp, ckey: str):
@@ -165,10 +168,11 @@ def parse_and_normalize_group_ebsd_phases(self, fp, ckey: str):
                         = np.asarray(angles, np.float32)
 
                     # Space Group not stored, only laue group, point group and symmetry
+                    # https://doi.org/10.1107/S1600576718012724 is a relevant read here
                     # problematic because mapping is not bijective!
                     # if you know the space group we know laue and point group and symmetry
                     # but the opposite direction leaves room for ambiguities
-                    space_group = "n/a"
+                    space_group = None
                     self.tmp[ckey]["phases"][int(phase_id)]["space_group"] = space_group
 
                     if len(self.tmp[ckey]["space_group"]) > 0:
@@ -207,7 +211,13 @@ def parse_and_normalize_group_ebsd_data(self, fp, ckey: str):
         # TODO::seems to be the situation in the example but there is no documentation
         self.tmp[ckey]["euler"] = format_euler_parameterization(self.tmp[ckey]["euler"])
 
-        self.tmp[ckey]["phase_id"] = np.asarray(fp[f"{grp_name}/Phase"][:], np.int32)
+        # given no official EDAX OimAnalysis spec we cannot define for sure if
+        # phase_id == 0 means just all was indexed with the first/zeroth phase or nothing
+        # was indexed, TODO::assuming it means all indexed:
+        if np.all(fp[f"{grp_name}/Phase"][:] == 0):
+            self.tmp[ckey]["phase_id"] = np.zeros(n_pts, np.int32) + 1
+        else:
+            self.tmp[ckey]["phase_id"] = np.asarray(fp[f"{grp_name}/Phase"][:], np.int32)
         # promoting int8 to int32 no problem
         self.tmp[ckey]["ci"] = np.asarray(fp[f"{grp_name}/CI"][:], np.float32)
         self.tmp[ckey]["scan_point_x"] = np.asarray(
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_oxford.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_oxford.py
index ecdb28c5d..e3dc77562 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_oxford.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_oxford.py
@@ -99,10 +99,10 @@ def check_if_supported(self):
     def parse_and_normalize(self):
         """Read and normalize away Oxford-specific formatting with an equivalent in NXem."""
         with h5py.File(f"{self.file_path}", "r") as h5r:
-            cache_id = 0
+            cache_id = 1
             slice_ids = sorted(list(h5r["/"]))
             for slice_id in slice_ids:
-                if slice_id.isdigit() is True and slice_id == "1":
+                if slice_id.isdigit() is True and slice_id == "1" and f"/{slice_id}/EBSD" in h5r:
                     # non-negative int, parse for now only the 1. slice
                     self.prfx = f"/{slice_id}"
                     ckey = self.init_named_cache(f"ebsd{cache_id}")  # name of the cache to use
@@ -241,6 +241,6 @@ def parse_and_normalize_slice_ebsd_data(self, fp, ckey: str):
         # inconsistency f32 in file although specification states float
 
         # Band Contrast, no, H5T_NATIVE_INT32, (size, 1)
-        self.tmp[ckey]["band_contrast"] = np.asarray(fp[f"{grp_name}/Band Contrast"], np.int32)
+        self.tmp[ckey]["bc"] = np.asarray(fp[f"{grp_name}/Band Contrast"], np.int32)
         # inconsistency uint8 in file although specification states should be int32
         # promoting uint8 to int32 no problem
diff --git a/pynxtools/dataconverter/readers/em/subparsers/nxs_hfive.py b/pynxtools/dataconverter/readers/em/subparsers/nxs_hfive.py
index 17c0656c3..3f7342482 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/nxs_hfive.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/nxs_hfive.py
@@ -121,15 +121,6 @@ def parse(self, template: dict) -> dict:
             return template
         else:  # none or something unsupported
             return template
-
-        for key, val in self.cache.items():
-            print(f"{key}, type: {type(val)}, shape: {np.shape(val)}")
-
-        if self.cache["is_filled"] is True:
-            self.process_roi_overview(template)
-            self.process_roi_xmap(template)
-            self.process_roi_phases(template)
-            self.process_roi_inverse_pole_figures(template)
         return template
 
     def identify_hfive_type(self):
@@ -157,50 +148,118 @@ def identify_hfive_type(self):
         return None
 
     def process_into_template(self, inp: dict, template: dict) -> dict:
-        for key, val in inp.items():
-            if isinstance(val, dict):
-                for ckey, cval in val.items():
-                    print(f"{ckey}, {cval}")
-            else:
-                print(f"{key}, {val}")
+        debugging = False
+        if debugging is True:
+            for key, val in inp.items():
+                if isinstance(val, dict):
+                    for ckey, cval in val.items():
+                        print(f"{ckey}, {cval}")
+                else:
+                    print(f"{key}, {val}")
+
+        self.process_roi_overview(inp, template)
+        self.process_roi_ebsd_maps(inp, template)
+        return template
+
+    def process_roi_overview(self, inp: dict, template: dict) -> dict:
+        for ckey in inp.keys():
+            if ckey.startswith("ebsd"):
+                self.process_roi_overview_ebsd_based(
+                    inp[ckey], ckey.replace("ebsd", ""), template)
+                break  # only one roi for now
         return template
-        # super().process_ebsd_cache(self.tmp, template)
-        # return template
 
-    def process_roi_overview(inp: dict, template: dict) -> dict:
+    def process_roi_overview_ebsd_based(self,
+                                        inp: dict,
+                                        roi_id: str,
+                                        template: dict) -> dict:
+        print("Parse ROI default plot...")
+        # prfx = f"/ENTRY[entry{self.entry_id}]/experiment/indexing/region_of_interest/roi{roi_id}"
+        prfx = f"/roi{roi_id}"
+        trg = f"{prfx}"
+        template[f"{trg}/title"] = str("Region-of-interest overview image")
+        template[f"{trg}/@signal"] = "data"
+        template[f"{trg}/@axes"] = ["axis_y", "axis_x"]
+        template[f"{trg}/@AXISNAME_indices[axis_x_indices]"] = np.uint32(0)
+        template[f"{trg}/@AXISNAME_indices[axis_y_indices]"] = np.uint32(1)
+        trg = f"{prfx}/data"
+        contrast_modes = [(None, "n/a"),
+                          ("bc", "normalized_band_contrast"),
+                          ("ci", "normalized_confidence_index"),
+                          ("mad", "normalized_mean_angular_deviation")]
+        success = False
+        for contrast_mode in contrast_modes:
+            if contrast_mode[0] in inp.keys() and success is False:
+                template[f"{trg}"] = {"compress": np.reshape(np.asarray(np.asarray((inp[contrast_mode[0]] / np.max(inp[contrast_mode[0]]) * 255.), np.uint32), np.uint8), (inp["n_y"], inp["n_x"]), order="C"), "strength": 1}
+                template[f"{prfx}/descriptor"] = contrast_mode[1]
+                success = True
+        if success is False:
+            raise ValueError(f"{__name__} unable to generate plot for {prfx} !")
+        # 0 is y while 1 is x !
+        template[f"{trg}/@long_name"] = "Signal"
+        template[f"{trg}/@CLASS"] = "IMAGE"  # required by H5Web to plot RGB maps
+        template[f"{trg}/@IMAGE_VERSION"] = "1.2"
+        template[f"{trg}/@SUBCLASS_VERSION"] = np.int64(15)
+
+        trg = f"{prfx}/axis_x"
+        template[f"{trg}"] = {"compress": np.asarray(inp["scan_point_x"], np.float32), "strength": 1}
+        template[f"{trg}/@long_name"] = f"Coordinate along x-axis ({inp['s_unit']})"
+        template[f"{trg}/@units"] = f"{inp['s_unit']}"
+        trg = f"{prfx}/axis_y"
+        template[f"{trg}"] = {"compress": np.asarray(inp["scan_point_y"], np.float32), "strength": 1}
+        template[f"{trg}/@long_name"] = f"Coordinate along y-axis ({inp['s_unit']})"
+        template[f"{trg}/@units"] =  f"{inp['s_unit']}"
+        return template
+
+    def process_roi_ebsd_maps(self, inp: dict, template: dict) -> dict:
+        for ckey in inp.keys():
+            if ckey.startswith("ebsd"):
+                roi_identifier = ckey.replace("ebsd", "")
+                self.process_roi_xmap(
+                    inp[ckey], roi_identifier, template)
+                # self.process_roi_phases(
+                #     inp[ckey], roi_identifier, template)
+                # self.process_roi_inverse_pole_figures(
+                #     inp[ckey], roi_identifier, template)
+                break  # only one roi for now
         return template
 
-    def process_roi_xmap(inp: dict) -> dict:
-        """Process standardized IPF orientation map using pyxem from normalized orientation data."""
-        # for NeXus would like to create a default
-        '''
-        if np.max(inp["n_x"], inp["n_y"]) < HFIVE_WEB_MAXIMUM_RGB:
+    def process_roi_xmap(self, inp: dict, roi_id: str, template: dict) -> dict:
+        """Process crystal orientation map from normalized orientation data."""
+        # for NeXus to create a default representation of the EBSD map to explore
+        if np.max((inp["n_x"], inp["n_y"])) < HFIVE_WEB_MAXIMUM_RGB:
             # can use the map discretization as is
             coordinates, _ = create_coordinate_arrays(
                 (inp["n_x"], inp["n_y"]), (inp["s_x"], inp["s_y"]))
             xaxis = coordinates["x"]
             yaxis = coordinates["y"]
             del coordinates
-        # else:
+        else:
+            raise ValueError(f"Downsampling for too large EBSD maps is currently not supported !")
             # need to regrid to downsample too large maps
             # TODO::implement 1NN-based downsampling approach
             #       build grid
             #       tree-based 1NN
             #       proceed as usual
 
-        pyxem_phase_identifier = inp["phase_identifier"] \
-            - (np.min(inp["phase_identifier"]) - (-1))  # pyxem, non-indexed has to be -1
-        print(np.unique(pyxem_phase_identifier))
+        pyxem_phase_identifier = inp["phase_id"] - 1
+        # inp["phase_id"] - (np.min(inp["phase_id"]) - (-1))
+        # for pyxem the non-indexed has to be -1 instead of 0 which is what NeXus uses
+        # -1 always because content of inp["phase_id"] is normalized
+        # to NeXus NXem_ebsd_crystal_structure concept already!
+        print(f"Unique pyxem_phase_identifier {np.unique(pyxem_phase_identifier)}")
 
-        self.xmap = CrystalMap(rotations=inp["rotation"],
-                               x=self.xaxis, y=self.yaxis,
+        self.xmap = CrystalMap(rotations=Rotation.from_euler(euler=inp["euler"],
+                                                             direction='lab2crystal',
+                                                             degrees=False),
+                               x=xaxis, y=yaxis,
                                phase_id=pyxem_phase_identifier,
                                phase_list=PhaseList(space_groups=inp["space_group"],
                                                     structures=inp["phase"]),
-                               prop={"bc": inp["band_contrast"]},
-                               scan_unit=inp["s_unit"])
+                               prop={})
+        # "bc": inp["band_contrast"]}, scan_unit=inp["s_unit"])
         print(self.xmap)
-        '''
+        return template
 
     def process_roi_phases(self, template: dict) -> dict:
         return template
diff --git a/pynxtools/dataconverter/readers/em/utils/README.md b/pynxtools/dataconverter/readers/em/utils/README.md
new file mode 100644
index 000000000..24f160935
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/utils/README.md
@@ -0,0 +1,3 @@
+# Context
+
+Utility code used by eventual multiple (sub)parsers of the em parser.
diff --git a/pynxtools/dataconverter/readers/em/utils/hfive_utils.py b/pynxtools/dataconverter/readers/em/utils/hfive_utils.py
index 0f4aacd82..5ae6a9cae 100644
--- a/pynxtools/dataconverter/readers/em/utils/hfive_utils.py
+++ b/pynxtools/dataconverter/readers/em/utils/hfive_utils.py
@@ -25,8 +25,17 @@
 from itertools import groupby
 
 
-EBSD_MAP_SPACEGROUP = {"F m#ovl3m": 225,
+EBSD_MAP_SPACEGROUP = {"P 6#sub3mc": 186,
+                       "P 6/mmm": 191,
+                       "P 6#sub3/mmc": 194,
+                       "F #ovl43m": 216,
+                       "P m#ovl3m": 221,
+                       "F m#ovl3m": 225,
+                       "Fd#ovl3m(*)": 227,
                        "I m#ovl3m": 229}
+# see here for typical examples http://img.chem.ucl.ac.uk/sgp/large/186az1.htm
+
+DIRTY_FIX_SPACEGROUP = {}
 
 def format_euler_parameterization(triplet_set):
     """Transform degrees to radiant and apply orientation space symmetry"""
@@ -81,6 +90,18 @@ def read_strings_from_dataset(obj):
         # raise ValueError("Neither np.ndarray, nor bytes, nor str !")
 
 
+def read_first_scalar(obj):
+    if hasattr(obj, "shape"):
+        if obj.shape == ():
+            return obj[()]
+        elif obj.shape == (1,):
+            return obj[0]
+        else:
+            raise ValueError(f"Unexpected shape found in {__name__} from object {obj} !")
+    else:
+        raise ValueError(f"Unexpected input passed to {__name__} with object {obj} !")
+
+
 def all_equal(iterable):
     g = groupby(iterable)
     return next(g, True) and not next(g, False)
diff --git a/test.all.sh b/test.all.sh
new file mode 100755
index 000000000..6f5e4d894
--- /dev/null
+++ b/test.all.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+
+# dataconverter --reader em --nxdl NXroot --input-file 130_0003.h5 --output debug.bruker.nxs 1>stdout.bruker.txt 2>stderr.bruker.txt
+# dataconverter --reader em --nxdl NXroot --input-file 207_2081.edaxh5 --output debug.apex.nxs 1>stdout.apex.txt 2>stderr.apex.txt
+# dataconverter --reader em --nxdl NXroot --input-file 229_2097.oh5 --output debug.edax.nxs 1>stdout.edax.txt 2>stderr.edax.txt
+# dataconverter --reader em --nxdl NXroot --input-file 088_0009.h5 --output debug.britton.nxs 1>stdout.britton.txt 2>stderr.britton.txt
+
+Examples="026_0007.h5 026_0027.h5 026_0029.h5 026_0030.h5 026_0033.h5 026_0039.h5 026_0041.h5 026_0046.h5oina 026_0049.h5oina 026_0050.h5oina 026_0052.h5oina 066_0013.h5 066_0015.h5 066_0016.h5 066_0023.h5 066_0025.h5 066_0034.h5 078_0004.h5 087_0021.h5 088_0009.h5 093_0045.h5oina 093_0047.h5oina 093_0048.h5oina 093_0051.h5oina 093_0053.h5oina 093_0054.h5oina 093_0055.h5oina 093_0058.h5oina 093_0059.h5oina 093_0060.h5oina 093_0062.h5oina 093_0063.h5oina 101_0040.h5 110_0012.h5 114_0017.h5 116_0008.h5 116_0014.h5 116_0018.h5 116_0019.h5 116_0020.h5 116_0022.h5 116_0037.h5 116_0042.h5 124_0002.h5 124_0036.h5 125_0006.h5 126_0038.h5 130_0003.h5 130_2082.h5 130_2083.h5 130_2084.h5 130_2085.h5 130_2086.h5 130_2087.h5 130_2088.h5 130_2089.h5 130_2090.h5 130_2091.h5 130_2092.h5 130_2093.h5 130_2094.h5 132_0005.h5 144_0043.h5 173_0056.h5oina 173_0057.h5oina 174_0031.h5 207_2081.edaxh5 208_0061.h5oina 212_2095.h5oina 229_2096.oh5 229_2097.oh5"
+
+# Examples="132_0005.h5"
+for example in $Examples; do
+	echo $example
+	dataconverter --reader em --nxdl NXroot --input-file $example --output debug.$example.nxs 1>stdout.$example.nxs.txt 2>stderr.$example.nxs.txt
+done

From ba45b67ed316b37c0ef34b03257315c63d875f58 Mon Sep 17 00:00:00 2001
From: "markus.kuehbach" <markus.kuehbach@hu-berlin.de>
Date: Tue, 24 Oct 2023 16:54:18 +0200
Subject: [PATCH 15/84] Implemented phase-specific plots, bugfixing

---
 .gitignore                                    |   7 +
 .../readers/em/examples/ebsd_database.py      |   6 +-
 pynxtools/dataconverter/readers/em/reader.py  |   7 +-
 .../readers/em/subparsers/hfive_apex.py       | 130 ++++++-----
 .../readers/em/subparsers/hfive_bruker.py     |   9 +-
 .../readers/em/subparsers/hfive_ebsd.py       |   9 +-
 .../readers/em/subparsers/hfive_edax.py       |   4 +
 .../readers/em/subparsers/hfive_emsoft.py     |  23 +-
 .../readers/em/subparsers/nxs_hfive.py        | 220 ++++++++++++++----
 .../readers/em/utils/image_processing.py      |  54 +++++
 test.all.sh                                   |   8 +-
 11 files changed, 350 insertions(+), 127 deletions(-)
 create mode 100644 pynxtools/dataconverter/readers/em/utils/image_processing.py

diff --git a/.gitignore b/.gitignore
index d15843202..4b1dad0c2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -202,3 +202,10 @@ cython_debug/
 build/
 nexusparser.egg-info/PKG-INFO
 .python-version
+
+# reader specific custom settings
+*.h5
+*.oh5
+*.edaxh5
+*.h5oina
+*.nxs
diff --git a/pynxtools/dataconverter/readers/em/examples/ebsd_database.py b/pynxtools/dataconverter/readers/em/examples/ebsd_database.py
index 3de3ce542..620284f23 100644
--- a/pynxtools/dataconverter/readers/em/examples/ebsd_database.py
+++ b/pynxtools/dataconverter/readers/em/examples/ebsd_database.py
@@ -304,5 +304,7 @@
 
 ProjectIdToCitation = {"Forsterite.ctf.nxs.mtex": {"data": "someurl", "paper": "someurl"}}
 
-AssumePhaseNameToSpaceGroup = {"Silver": 225,
-                               "Copper": 225}
+ASSUME_PHASE_NAME_TO_SPACE_GROUP = {"Silver": 225,
+                                    "Copper": 225,
+                                    "Ni (Nickel)": 225,
+                                    "Face Centered Cubic": 225}
diff --git a/pynxtools/dataconverter/readers/em/reader.py b/pynxtools/dataconverter/readers/em/reader.py
index 7e6a94d66..7f2d15aaa 100644
--- a/pynxtools/dataconverter/readers/em/reader.py
+++ b/pynxtools/dataconverter/readers/em/reader.py
@@ -116,8 +116,9 @@ def read(self,
         nxs = NxEmAppDef()
         nxs.parse(template, entry_id, input_file_names)
 
-        conventions = NxEmConventionMapper(entry_id)
-        conventions.parse(template)
+        print("Parse conventions of reference frames...")
+        # conventions = NxEmConventionMapper(entry_id)
+        # conventions.parse(template)
 
         print("Parse and map pieces of information within files from tech partners...")
         # sub_parser = "nxs_mtex"
@@ -129,7 +130,7 @@ def read(self,
         sub_parser = "nxs_hfive"
         subparser = NxEmNxsHfiveSubParser(entry_id, file_paths[0])
         subparser.parse(template)
-        exit(1)
+        # exit(1)
 
         # for dat_instance in case.dat_parser_type:
         #     print(f"Process pieces of information in {dat_instance} tech partner file...")
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
index bd1c53693..3d8d921a8 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
@@ -35,7 +35,9 @@
 
 from pynxtools.dataconverter.readers.em.subparsers.hfive_base import HdfFiveBaseParser
 from pynxtools.dataconverter.readers.em.utils.hfive_utils import \
-    read_strings_from_dataset, format_euler_parameterization
+    read_strings_from_dataset, read_first_scalar, format_euler_parameterization
+from pynxtools.dataconverter.readers.em.examples.ebsd_database import \
+    ASSUME_PHASE_NAME_TO_SPACE_GROUP
 
 
 class HdfFiveEdaxApexReader(HdfFiveBaseParser):
@@ -129,66 +131,68 @@ def parse_and_normalize_group_ebsd_header(self, fp, ckey: str):
 
     def parse_and_normalize_group_ebsd_phases(self, fp, ckey: str):
         grp_name = f"{self.prfx}/EBSD/ANG/HEADER/Phase"
+        if f"{grp_name}" not in fp:
+            raise ValueError(f"Unable to parse {grp_name} !")
+
         # Phases, contains a subgroup for each phase where the name
         # of each subgroup is the index of the phase starting at 1.
-        if f"{grp_name}" in fp:
-            phase_ids = sorted(list(fp[f"{grp_name}"]), key=int)
-            self.tmp[ckey]["phase"] = []
-            self.tmp[ckey]["space_group"] = []
-            self.tmp[ckey]["phases"] = {}
-            for phase_id in phase_ids:
-                if phase_id.isdigit() is True:
-                    self.tmp[ckey]["phases"][int(phase_id)] = {}
-                    sub_grp_name = f"{grp_name}/{phase_id}"
-                    # Name
-                    if f"{sub_grp_name}/Material Name" in fp:
-                        phase_name = read_strings_from_dataset(fp[f"{sub_grp_name}/Material Name"][0])
-                        self.tmp[ckey]["phases"][int(phase_id)]["phase_name"] = phase_name
-                    else:
-                        raise ValueError(f"Unable to parse {sub_grp_name}/Material Name !")
-
-                    # Reference not available only Info but this can be empty
-                    self.tmp[ckey]["phases"][int(phase_id)]["reference"] = "n/a"
-
-                    req_fields = ["A", "B", "C", "Alpha", "Beta", "Gamma"]
-                    for req_field in req_fields:
-                        if f"{sub_grp_name}/Lattice Constant {req_field}" not in fp:
-                            raise ValueError(f"Unable to parse ../Lattice Constant {req_field} !")
-                    a_b_c = [fp[f"{sub_grp_name}/Lattice Constant A"][0],
-                             fp[f"{sub_grp_name}/Lattice Constant B"][0],
-                             fp[f"{sub_grp_name}/Lattice Constant C"][0]]
-                    angles = [fp[f"{sub_grp_name}/Lattice Constant Alpha"][0],
-                              fp[f"{sub_grp_name}/Lattice Constant Beta"][0],
-                              fp[f"{sub_grp_name}/Lattice Constant Gamma"][0]]
-                    self.tmp[ckey]["phases"][int(phase_id)]["a_b_c"] \
-                        = np.asarray(a_b_c, np.float32) * 0.1
-                    self.tmp[ckey]["phases"][int(phase_id)]["alpha_beta_gamma"] \
-                        = np.asarray(angles, np.float32)
-
-                    # Space Group not stored, only laue group, point group and symmetry
-                    # problematic because mapping is not bijective!
-                    # if you know the space group we know laue and point group and symmetry
-                    # but the opposite direction leaves room for ambiguities
-                    space_group = None
-                    self.tmp[ckey]["phases"][int(phase_id)]["space_group"] = space_group
-
-                    if len(self.tmp[ckey]["space_group"]) > 0:
-                        self.tmp[ckey]["space_group"].append(space_group)
-                    else:
-                        self.tmp[ckey]["space_group"] = [space_group]
-
-                    if len(self.tmp[ckey]["phase"]) > 0:
-                        self.tmp[ckey]["phase"].append(
-                            Structure(title=phase_name, atoms=None,
-                                      lattice=Lattice(a_b_c[0], a_b_c[1], a_b_c[2],
-                                      angles[0], angles[1], angles[2])))
-                    else:
-                        self.tmp[ckey]["phase"] \
-                            = [Structure(title=phase_name, atoms=None,
-                                         lattice=Lattice(a_b_c[0], a_b_c[1], a_b_c[2],
-                                         angles[0], angles[1], angles[2]))]
-        else:
-            raise ValueError(f"Unable to parse {grp_name} !")
+        phase_ids = sorted(list(fp[f"{grp_name}"]), key=int)
+        self.tmp[ckey]["phase"] = []
+        self.tmp[ckey]["space_group"] = []
+        self.tmp[ckey]["phases"] = {}
+        for phase_id in phase_ids:
+            if phase_id.isdigit() is True:
+                self.tmp[ckey]["phases"][int(phase_id)] = {}
+                sub_grp_name = f"{grp_name}/{phase_id}"
+                # Name
+                if f"{sub_grp_name}/Material Name" in fp:
+                    phase_name = read_strings_from_dataset(fp[f"{sub_grp_name}/Material Name"][0])
+                    self.tmp[ckey]["phases"][int(phase_id)]["phase_name"] = phase_name
+                else:
+                    raise ValueError(f"Unable to parse {sub_grp_name}/Material Name !")
+
+                # Reference not available only Info but this can be empty
+                self.tmp[ckey]["phases"][int(phase_id)]["reference"] = "n/a"
+
+                req_fields = ["A", "B", "C", "Alpha", "Beta", "Gamma"]
+                for req_field in req_fields:
+                    if f"{sub_grp_name}/Lattice Constant {req_field}" not in fp:
+                        raise ValueError(f"Unable to parse ../Lattice Constant {req_field} !")
+                a_b_c = [fp[f"{sub_grp_name}/Lattice Constant A"][0],
+                            fp[f"{sub_grp_name}/Lattice Constant B"][0],
+                            fp[f"{sub_grp_name}/Lattice Constant C"][0]]
+                angles = [fp[f"{sub_grp_name}/Lattice Constant Alpha"][0],
+                            fp[f"{sub_grp_name}/Lattice Constant Beta"][0],
+                            fp[f"{sub_grp_name}/Lattice Constant Gamma"][0]]
+                self.tmp[ckey]["phases"][int(phase_id)]["a_b_c"] \
+                    = np.asarray(a_b_c, np.float32) * 0.1
+                self.tmp[ckey]["phases"][int(phase_id)]["alpha_beta_gamma"] \
+                    = np.asarray(angles, np.float32)
+
+                # Space Group not stored, only laue group, point group and symmetry
+                # problematic because mapping is not bijective!
+                # if you know the space group we know laue and point group and symmetry
+                # but the opposite direction leaves room for ambiguities
+                space_group = None
+                if phase_name in ASSUME_PHASE_NAME_TO_SPACE_GROUP.keys():
+                    space_group = ASSUME_PHASE_NAME_TO_SPACE_GROUP[phase_name]
+                self.tmp[ckey]["phases"][int(phase_id)]["space_group"] = space_group
+
+                if len(self.tmp[ckey]["space_group"]) > 0:
+                    self.tmp[ckey]["space_group"].append(space_group)
+                else:
+                    self.tmp[ckey]["space_group"] = [space_group]
+
+                if len(self.tmp[ckey]["phase"]) > 0:
+                    self.tmp[ckey]["phase"].append(
+                        Structure(title=phase_name, atoms=None,
+                                    lattice=Lattice(a_b_c[0], a_b_c[1], a_b_c[2],
+                                    angles[0], angles[1], angles[2])))
+                else:
+                    self.tmp[ckey]["phase"] \
+                        = [Structure(title=phase_name, atoms=None,
+                                        lattice=Lattice(a_b_c[0], a_b_c[1], a_b_c[2],
+                                        angles[0], angles[1], angles[2]))]
 
     def parse_and_normalize_group_ebsd_data(self, fp, ckey: str):
         grp_name = f"{self.prfx}/EBSD/ANG/DATA/DATA"
@@ -201,17 +205,17 @@ def parse_and_normalize_group_ebsd_data(self, fp, ckey: str):
 
         dat = fp[f"{grp_name}"]
         self.tmp[ckey]["euler"] = np.zeros((n_pts, 3), np.float32)
-        # index of phase, 0 if not indexed
-        # # no normalization needed, also in NXem_ebsd the null model notIndexed is phase_identifier 0
-        self.tmp[ckey]["phase_id"] = np.zeros((n_pts,), np.int32)
         self.tmp[ckey]["ci"] = np.zeros((n_pts,), np.float32)
+        self.tmp[ckey]["phase_id"] = np.zeros((n_pts,), np.int32)
 
         for i in np.arange(0, n_pts):
             # check shape of internal virtual chunked number array
             r = Rotation.from_matrix([np.reshape(dat[i][0], (3, 3))])
             self.tmp[ckey]["euler"][i, :] = r.to_euler(degrees=False)
             self.tmp[ckey]["ci"][i] = dat[i][2]
-            self.tmp[ckey]["phase_id"][i] = dat[i][3]
+            self.tmp[ckey]["phase_id"][i] = dat[i][3] + 1  # APEX seems to define
+            # notIndexed as -1 and the first valid phase id 0
+
 
         # TODO::convert orientation matrix to Euler angles via om_eu but what are conventions !
         # orix based transformation ends up in positive half space and with degrees=False
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py
index fdadde47d..8d6108124 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py
@@ -36,8 +36,10 @@
 import matplotlib.pyplot as plt
 
 from pynxtools.dataconverter.readers.em.subparsers.hfive_base import HdfFiveBaseParser
-from pynxtools.dataconverter.readers.em.utils.hfive_utils import EBSD_MAP_SPACEGROUP, \
-    read_strings_from_dataset, all_equal, format_euler_parameterization
+from pynxtools.dataconverter.readers.em.utils.hfive_utils import \
+    EBSD_MAP_SPACEGROUP, read_strings_from_dataset, all_equal, format_euler_parameterization
+from pynxtools.dataconverter.readers.em.examples.ebsd_database import \
+    ASSUME_PHASE_NAME_TO_SPACE_GROUP
 
 
 class HdfFiveBrukerEspritReader(HdfFiveBaseParser):
@@ -157,6 +159,9 @@ def parse_and_normalize_group_ebsd_phases(self, fp, ckey: str):
                 if spc_grp in EBSD_MAP_SPACEGROUP.keys():
                     space_group = EBSD_MAP_SPACEGROUP[spc_grp]
                     self.tmp[ckey]["phases"][int(phase_id)]["space_group"] = space_group
+                elif phase_name in ASSUME_PHASE_NAME_TO_SPACE_GROUP.keys():
+                    space_group = ASSUME_PHASE_NAME_TO_SPACE_GROUP[phase_name]
+                    self.tmp[ckey]["phases"][int(phase_id)]["space_group"] = space_group
                 else:
                     raise ValueError(f"Unable to decode improperly formatted space group {spc_grp} !")
                 # formatting is a nightmare F m#ovl3m for F m 3bar m...
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py
index 8c831735f..7500ce57a 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py
@@ -35,8 +35,10 @@
 import matplotlib.pyplot as plt
 
 from pynxtools.dataconverter.readers.em.subparsers.hfive_base import HdfFiveBaseParser
-from pynxtools.dataconverter.readers.em.utils.hfive_utils import EBSD_MAP_SPACEGROUP, \
-    read_strings_from_dataset, all_equal, format_euler_parameterization
+from pynxtools.dataconverter.readers.em.utils.hfive_utils import \
+    EBSD_MAP_SPACEGROUP, read_strings_from_dataset, all_equal, format_euler_parameterization
+from pynxtools.dataconverter.readers.em.examples.ebsd_database import \
+    ASSUME_PHASE_NAME_TO_SPACE_GROUP
 
 
 class HdfFiveCommunityReader(HdfFiveBaseParser):
@@ -161,6 +163,9 @@ def parse_and_normalize_group_ebsd_phases(self, fp, ckey: str):
                 if spc_grp in EBSD_MAP_SPACEGROUP.keys():
                     space_group = EBSD_MAP_SPACEGROUP[spc_grp]
                     self.tmp[ckey]["phases"][int(phase_id)]["space_group"] = space_group
+                elif phase_name in ASSUME_PHASE_NAME_TO_SPACE_GROUP.keys():
+                    space_group = ASSUME_PHASE_NAME_TO_SPACE_GROUP[phase_name]
+                    self.tmp[ckey]["phases"][int(phase_id)]["space_group"] = space_group
                 else:
                     raise ValueError(f"Unable to decode improperly formatted space group {spc_grp} !")
 
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_edax.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_edax.py
index c0a2a31e8..80f9711e7 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_edax.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_edax.py
@@ -38,6 +38,8 @@
 from pynxtools.dataconverter.readers.em.subparsers.hfive_base import HdfFiveBaseParser
 from pynxtools.dataconverter.readers.em.utils.hfive_utils import \
     read_strings_from_dataset, read_first_scalar, format_euler_parameterization
+from pynxtools.dataconverter.readers.em.examples.ebsd_database import \
+    ASSUME_PHASE_NAME_TO_SPACE_GROUP
 
 
 class HdfFiveEdaxOimAnalysisReader(HdfFiveBaseParser):
@@ -173,6 +175,8 @@ def parse_and_normalize_group_ebsd_phases(self, fp, ckey: str):
                     # if you know the space group we know laue and point group and symmetry
                     # but the opposite direction leaves room for ambiguities
                     space_group = None
+                    if phase_name in ASSUME_PHASE_NAME_TO_SPACE_GROUP.keys():
+                        space_group = ASSUME_PHASE_NAME_TO_SPACE_GROUP[phase_name]
                     self.tmp[ckey]["phases"][int(phase_id)]["space_group"] = space_group
 
                     if len(self.tmp[ckey]["space_group"]) > 0:
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_emsoft.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_emsoft.py
index 1b96073bb..5aa5e8f0c 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_emsoft.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_emsoft.py
@@ -17,13 +17,29 @@
 #
 """(Sub-)parser mapping concepts and content from Marc deGraeff's EMsoft *.h5 files on NXem."""
 
+import os
+from typing import Dict, Any, List
 import numpy as np
 import h5py
-from pynxtools.dataconverter.readers.em.subparsers.hfive_base import HdfFiveGenericReader
+from itertools import groupby
+# import imageio.v3 as iio
+from PIL import Image as pil
+
+import diffsims
+import orix
+from diffpy.structure import Lattice, Structure
+from orix import plot
+from orix.crystal_map import create_coordinate_arrays, CrystalMap, PhaseList
+from orix.quaternion import Rotation
+from orix.vector import Vector3d
+
+import matplotlib.pyplot as plt
+
+from pynxtools.dataconverter.readers.em.subparsers.hfive_base import HdfFiveBaseParser
 from pynxtools.dataconverter.readers.em.utils.hfive_utils import read_strings_from_dataset
 
 
-class HdfFiveEmSoftReader(HdfFiveGenericReader):
+class HdfFiveEmSoftReader(HdfFiveBaseParser):
     """Read EMsoft H5 (Marc deGraeff Carnegie Mellon)"""
     def __init__(self, file_path: str = ""):
         super().__init__(file_path)
@@ -54,3 +70,6 @@ def check_if_supported(self):
 
             if self.supported is True:
                 self.version = self.supported_version.copy()
+
+    def parse_and_normalize(self):
+        pass
diff --git a/pynxtools/dataconverter/readers/em/subparsers/nxs_hfive.py b/pynxtools/dataconverter/readers/em/subparsers/nxs_hfive.py
index 3f7342482..039ec69c0 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/nxs_hfive.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/nxs_hfive.py
@@ -52,13 +52,14 @@
 
 from pynxtools.dataconverter.readers.em.utils.hfive_utils import read_strings_from_dataset
 from pynxtools.dataconverter.readers.em.utils.hfive_web_constants import HFIVE_WEB_MAXIMUM_RGB
+from pynxtools.dataconverter.readers.em.utils.image_processing import thumbnail
 
 from pynxtools.dataconverter.readers.em.subparsers.hfive_oxford import HdfFiveOxfordReader
 from pynxtools.dataconverter.readers.em.subparsers.hfive_bruker import HdfFiveBrukerEspritReader
 from pynxtools.dataconverter.readers.em.subparsers.hfive_edax import HdfFiveEdaxOimAnalysisReader
 from pynxtools.dataconverter.readers.em.subparsers.hfive_apex import HdfFiveEdaxApexReader
 from pynxtools.dataconverter.readers.em.subparsers.hfive_ebsd import HdfFiveCommunityReader
-# from pynxtools.dataconverter.readers.em.subparsers.hfive_emsoft import HdfFiveEmSoftReader
+from pynxtools.dataconverter.readers.em.subparsers.hfive_emsoft import HdfFiveEmSoftReader
 
 
 class NxEmNxsHfiveSubParser:
@@ -118,7 +119,8 @@ def parse(self, template: dict) -> dict:
             ebsd.parse_and_normalize()
             self.process_into_template(ebsd.tmp, template)
         elif hfive_parser_type == "emsoft":
-            return template
+            emsoft = HdfFiveEmSoftReader(self.file_path)
+            emsoft.parse_and_normalize()
         else:  # none or something unsupported
             return template
         return template
@@ -142,9 +144,9 @@ def identify_hfive_type(self):
         if hdf.supported is True:
             return "hebsd"
         # computer simulation tools
-        # hdf = HdfFiveEmSoftReader(f"{self.file_path}")
-        # if hdf.supported is True:
-        #     return "emsoft"
+        hdf = HdfFiveEmSoftReader(f"{self.file_path}")
+        if hdf.supported is True:
+            return "emsoft"
         return None
 
     def process_into_template(self, inp: dict, template: dict) -> dict:
@@ -157,13 +159,13 @@ def process_into_template(self, inp: dict, template: dict) -> dict:
                 else:
                     print(f"{key}, {val}")
 
-        self.process_roi_overview(inp, template)
+        # self.process_roi_overview(inp, template)
         self.process_roi_ebsd_maps(inp, template)
         return template
 
     def process_roi_overview(self, inp: dict, template: dict) -> dict:
         for ckey in inp.keys():
-            if ckey.startswith("ebsd"):
+            if ckey.startswith("ebsd") and inp[ckey] != {}:
                 self.process_roi_overview_ebsd_based(
                     inp[ckey], ckey.replace("ebsd", ""), template)
                 break  # only one roi for now
@@ -175,14 +177,13 @@ def process_roi_overview_ebsd_based(self,
                                         template: dict) -> dict:
         print("Parse ROI default plot...")
         # prfx = f"/ENTRY[entry{self.entry_id}]/experiment/indexing/region_of_interest/roi{roi_id}"
-        prfx = f"/roi{roi_id}"
-        trg = f"{prfx}"
-        template[f"{trg}/title"] = str("Region-of-interest overview image")
+        # prfx = f"/roi{roi_id}"
+        trg = f"/ENTRY[entry{self.entry_id}]/ROI[roi{roi_id}]/ebsd/indexing/DATA[roi]"
+        template[f"{trg}/title"] = f"Region-of-interest overview image"
         template[f"{trg}/@signal"] = "data"
         template[f"{trg}/@axes"] = ["axis_y", "axis_x"]
         template[f"{trg}/@AXISNAME_indices[axis_x_indices]"] = np.uint32(0)
         template[f"{trg}/@AXISNAME_indices[axis_y_indices]"] = np.uint32(1)
-        trg = f"{prfx}/data"
         contrast_modes = [(None, "n/a"),
                           ("bc", "normalized_band_contrast"),
                           ("ci", "normalized_confidence_index"),
@@ -190,47 +191,42 @@ def process_roi_overview_ebsd_based(self,
         success = False
         for contrast_mode in contrast_modes:
             if contrast_mode[0] in inp.keys() and success is False:
-                template[f"{trg}"] = {"compress": np.reshape(np.asarray(np.asarray((inp[contrast_mode[0]] / np.max(inp[contrast_mode[0]]) * 255.), np.uint32), np.uint8), (inp["n_y"], inp["n_x"]), order="C"), "strength": 1}
-                template[f"{prfx}/descriptor"] = contrast_mode[1]
+                template[f"{trg}/data"] = {"compress": np.reshape(np.asarray(np.asarray((inp[contrast_mode[0]] / np.max(inp[contrast_mode[0]]) * 255.), np.uint32), np.uint8), (inp["n_y"], inp["n_x"]), order="C"), "strength": 1}
+                template[f"{trg}/descriptor"] = contrast_mode[1]
                 success = True
         if success is False:
-            raise ValueError(f"{__name__} unable to generate plot for {prfx} !")
+            raise ValueError(f"{__name__} unable to generate plot for {trg} !")
         # 0 is y while 1 is x !
-        template[f"{trg}/@long_name"] = "Signal"
-        template[f"{trg}/@CLASS"] = "IMAGE"  # required by H5Web to plot RGB maps
-        template[f"{trg}/@IMAGE_VERSION"] = "1.2"
-        template[f"{trg}/@SUBCLASS_VERSION"] = np.int64(15)
-
-        trg = f"{prfx}/axis_x"
-        template[f"{trg}"] = {"compress": np.asarray(inp["scan_point_x"], np.float32), "strength": 1}
-        template[f"{trg}/@long_name"] = f"Coordinate along x-axis ({inp['s_unit']})"
-        template[f"{trg}/@units"] = f"{inp['s_unit']}"
-        trg = f"{prfx}/axis_y"
-        template[f"{trg}"] = {"compress": np.asarray(inp["scan_point_y"], np.float32), "strength": 1}
-        template[f"{trg}/@long_name"] = f"Coordinate along y-axis ({inp['s_unit']})"
-        template[f"{trg}/@units"] =  f"{inp['s_unit']}"
+        template[f"{trg}/data/@long_name"] = f"Signal"
+        template[f"{trg}/data/@CLASS"] = "IMAGE"  # required by H5Web to plot RGB maps
+        template[f"{trg}/data/@IMAGE_VERSION"] = f"1.2"
+        template[f"{trg}/data/@SUBCLASS_VERSION"] = np.int64(15)
+
+        template[f"{trg}/AXISNAME[axis_x]"] = {"compress": np.asarray(inp["scan_point_x"], np.float32), "strength": 1}
+        template[f"{trg}/AXISNAME[axis_x]/@long_name"] = f"Coordinate along x-axis ({inp['s_unit']})"
+        template[f"{trg}/AXISNAME[axis_x]/@units"] = f"{inp['s_unit']}"
+        template[f"{trg}/AXISNAME[axis_y]"] = {"compress": np.asarray(inp["scan_point_y"], np.float32), "strength": 1}
+        template[f"{trg}/AXISNAME[axis_y]/@long_name"] = f"Coordinate along y-axis ({inp['s_unit']})"
+        template[f"{trg}/AXISNAME[axis_y]/@units"] =  f"{inp['s_unit']}"
         return template
 
     def process_roi_ebsd_maps(self, inp: dict, template: dict) -> dict:
         for ckey in inp.keys():
-            if ckey.startswith("ebsd"):
-                roi_identifier = ckey.replace("ebsd", "")
-                self.process_roi_xmap(
-                    inp[ckey], roi_identifier, template)
-                # self.process_roi_phases(
-                #     inp[ckey], roi_identifier, template)
-                # self.process_roi_inverse_pole_figures(
-                #     inp[ckey], roi_identifier, template)
-                break  # only one roi for now
+            if ckey.startswith("ebsd") and inp[ckey] != {}:
+                if ckey.replace("ebsd", "").isdigit():
+                    roi_id = int(ckey.replace("ebsd", ""))
+                    self.process_roi_xmap(inp[ckey], roi_id, template)
+                    self.process_roi_phases(inp[ckey], roi_id, template)
         return template
 
-    def process_roi_xmap(self, inp: dict, roi_id: str, template: dict) -> dict:
+    def process_roi_xmap(self, inp: dict, roi_id: int, template: dict) -> dict:
         """Process crystal orientation map from normalized orientation data."""
         # for NeXus to create a default representation of the EBSD map to explore
+        self.xmap = None
         if np.max((inp["n_x"], inp["n_y"])) < HFIVE_WEB_MAXIMUM_RGB:
             # can use the map discretization as is
             coordinates, _ = create_coordinate_arrays(
-                (inp["n_x"], inp["n_y"]), (inp["s_x"], inp["s_y"]))
+                (inp["n_y"], inp["n_x"]), (inp["s_y"], inp["s_x"]))
             xaxis = coordinates["x"]
             yaxis = coordinates["y"]
             del coordinates
@@ -242,7 +238,16 @@ def process_roi_xmap(self, inp: dict, roi_id: str, template: dict) -> dict:
             #       tree-based 1NN
             #       proceed as usual
 
-        pyxem_phase_identifier = inp["phase_id"] - 1
+        # TODO::there was one example 093_0060.h5oina
+        # where HitRate was 75% but no pixel left unidentified ??
+        print(f"Unique phase_identifier {np.unique(inp['phase_id'])}")
+        min_phase_id = np.min(np.unique(inp["phase_id"]))
+        if min_phase_id > 0:
+            pyxem_phase_identifier = inp["phase_id"] - min_phase_id
+        elif min_phase_id == 0:
+            pyxem_phase_identifier = inp["phase_id"] - 1
+        else:
+            raise ValueError(f"Unable how to deal with unexpected phase_identifier!")
         # inp["phase_id"] - (np.min(inp["phase_id"]) - (-1))
         # for pyxem the non-indexed has to be -1 instead of 0 which is what NeXus uses
         # -1 always because content of inp["phase_id"] is normalized
@@ -256,26 +261,139 @@ def process_roi_xmap(self, inp: dict, roi_id: str, template: dict) -> dict:
                                phase_id=pyxem_phase_identifier,
                                phase_list=PhaseList(space_groups=inp["space_group"],
                                                     structures=inp["phase"]),
-                               prop={})
+                               prop={},
+                               scan_unit=inp["s_unit"])
         # "bc": inp["band_contrast"]}, scan_unit=inp["s_unit"])
         print(self.xmap)
         return template
 
-    def process_roi_phases(self, template: dict) -> dict:
+    def process_roi_phases(self, inp: dict, roi_id: int, template: dict) -> dict:
+        print("Parse crystal_structure_models aka phases...")
+        phase_id = 0
+        prfx = f"/ENTRY[entry{self.entry_id}]/ROI[roi{roi_id}]/ebsd/indexing"
+        n_pts = inp["n_x"] * inp["n_y"]
+        n_pts_indexed = np.sum(inp["phase_id"] != 0)
+        print(f"n_pts {n_pts}, n_pts_indexed {n_pts_indexed}")
+        template[f"{prfx}/number_of_scan_points"] = np.uint32(n_pts)
+        template[f"{prfx}/indexing_rate"] = np.float64(100. n_pts_indexed / n_pts)
+        template[f"{prfx}/indexing_rate/@units"] = f"%"
+        template[f"{prfx}/phase{phase_id}/number_of_scan_points"] = np.uint32(0)
+        template[f"{prfx}/phase{phase_id}/phase_identifier"] = np.uint32(phase_id)
+        template[f"{prfx}/phase{phase_id}/phase_name"] = f"notIndexed"
+
+        for pyxem_phase_id in np.arange(0, np.max(self.xmap.phase_id) + 1):
+            # this loop is implicitly ignored as when xmap is None
+            print(f"inp[phases].keys(): {inp['phases'].keys()}")
+            if (pyxem_phase_id + 1) not in inp["phases"].keys():
+                raise ValueError(f"{pyxem_phase_id + 1} is not a key in inp['phases'] !")
+            # if isinstance(inp["phases"][phase_id], dict) is True:
+            # phase_id of pyxem notIndexed is -1 while for NeXus it is 0 so add + 1 in naming schemes
+            trg = f"{prfx}/phase{pyxem_phase_id + 1}"
+            template[f"{trg}/number_of_scan_points"] \
+                = np.uint32(np.sum(self.xmap.phase_id == pyxem_phase_id))
+            # print(f"{pyxem_phase_id + 1}, " \
+            #       f"{np.uint32(np.sum(self.xmap.phase_id == pyxem_phase_id))}," \
+            #       f" {inp['phases'][pyxem_phase_id + 1]['phase_name']}")
+            template[f"{trg}/phase_identifier"] = np.uint32(pyxem_phase_id + 1)
+            template[f"{trg}/phase_name"] = f"{inp['phases'][pyxem_phase_id + 1]['phase_name']}"
+
+            self.process_roi_phase_inverse_pole_figures(roi_id, pyxem_phase_id, template)
         return template
 
-    def process_roi_inverse_pole_figures(self, template: dict) -> dict:
+    def process_roi_phase_inverse_pole_figures(self,
+                                               roi_id: int,
+                                               pyxem_phase_id: int,
+                                               template: dict) -> dict:
         """Parse inverse pole figures (IPF) mappings."""
         # call process_roi_ipf_map
-        # call process_roi_ipf_color_key
-        return template
+        phase_name = self.xmap.phases[pyxem_phase_id].name
+        print(f"Generate IPF map for {pyxem_phase_id}, {phase_name}...")
 
-    def process_roi_ipf_map(self, identifier, template: dict) -> dict:
-        """Parse and create inverse-pole-figure (IPF) mappings on their color models."""
-        # +1 because for orix not_indexed -1 and "first" phase has ID 0 !
-        return template
+        projection_directions = [("X", [1., 0., 0.]),
+                                 ("Y", [0., 1., 0.]),
+                                 ("Z", [0., 0., 1.])]
+        projection_vectors = [Vector3d.xvector(), Vector3d.yvector(), Vector3d.zvector()]
+        for idx in [0, 1, 2]:
+            ipf_key = plot.IPFColorKeyTSL(
+                self.xmap.phases[pyxem_phase_id].point_group.laue,
+                direction=projection_vectors[idx])
+
+            fig = ipf_key.plot(return_figure=True)
+            fig.savefig("temporary.png", dpi=300, facecolor='w', edgecolor='w',
+                        orientation='landscape', format='png', transparent=False,
+                        bbox_inches='tight', pad_inches=0.1, metadata=None)
+            img = np.asarray(thumbnail(pil.open("temporary.png", "r", ["png"]),
+                             size=HFIVE_WEB_MAXIMUM_RGB), np.uint8)  # no flipping
+            img = img[:, :, 0:3]  # discard alpha channel
+            if os.path.exists("temporary.png"):
+                os.remove("temporary.png")
+
+            rgb_px_with_phase_id = np.asarray(
+                np.asarray(ipf_key.orientation2color(
+                    self.xmap[phase_name].rotations) * 255., np.uint32), np.uint8)
+
+            print(f"idx {idx}, phase_name {phase_name}, shape {self.xmap.shape}")
+            ipf_rgb_map = np.asarray(
+                np.uint8(np.zeros((self.xmap.shape[0] * self.xmap.shape[1], 3)) * 255.))
+            # background is black instead of white (which would be more pleasing)
+            # but IPF color maps have a whitepoint which encodes in fact an orientation
+            # and because of that we may have a single crystal with an orientation
+            # close to the whitepoint which become a fully white seemingly "empty" image
+            ipf_rgb_map[self.xmap.phase_id == pyxem_phase_id, :] = rgb_px_with_phase_id
+            ipf_rgb_map = np.reshape(
+                ipf_rgb_map, (self.xmap.shape[0], self.xmap.shape[1], 3), order="C")
+            # 0 is y while 1 is x !
+
+            trg = f"/ENTRY[entry{self.entry_id}]/ROI[roi{roi_id}]/ebsd/indexing" \
+                  f"/phase{pyxem_phase_id + 1}/ipf{idx + 1}"
+            template[f"{trg}/projection_direction"] = np.asarray([0., 0., 1.], np.float32)
 
-    def process_roi_ipf_color_key(self, identifier, template: dict) -> dict:
-        """Parse color key renderings of inverse-pole-figure (IPF) mappings."""
-        # +1 because for orix not_indexed -1 and "first" phase has ID 0 !
+            # add the IPF color map
+            template[f"{trg}/DATA[map]/title"] \
+                = f"Inverse pole figure {projection_directions[idx][0]} {phase_name}"
+            template[f"{trg}/DATA[map]/@signal"] = "data"
+            template[f"{trg}/DATA[map]/@axes"] = ["axis_y", "axis_x"]
+            template[f"{trg}/DATA[map]/@AXISNAME_indices[axis_x_indices]"] = np.uint32(0)
+            template[f"{trg}/DATA[map]/@AXISNAME_indices[axis_y_indices]"] = np.uint32(1)
+            template[f"{trg}/DATA[map]/DATA[data]"] = {"compress": ipf_rgb_map, "strength": 1}
+            template[f"{trg}/DATA[map]/DATA[data]/@CLASS"] = "IMAGE"  # required, H5Web, RGB
+            template[f"{trg}/DATA[map]/DATA[data]/@IMAGE_VERSION"] = "1.2"
+            template[f"{trg}/DATA[map]/DATA[data]/@SUBCLASS_VERSION"] = np.int64(15)
+
+            template[f"{trg}/DATA[map]/AXISNAME[axis_x]"] \
+                = {"compress": np.asarray(self.xmap.x, np.float32), "strength": 1}
+            template[f"{trg}/DATA[map]/AXISNAME[axis_x]/@long_name"] \
+                = f"Coordinate along x-axis ({self.xmap.scan_unit})"
+            template[f"{trg}/DATA[map]/AXISNAME[axis_x]/@units"] \
+                = f"{self.xmap.scan_unit}"
+            template[f"{trg}/DATA[map]/AXISNAME[axis_y]"] \
+                = {"compress": np.asarray(self.xmap.y, np.float32), "strength": 1}
+            template[f"{trg}/DATA[map]/AXISNAME[axis_y]/@long_name"] \
+                = f"Coordinate along y-axis ({self.xmap.scan_unit})"
+            template[f"{trg}/DATA[map]/AXISNAME[axis_y]/@units"] \
+                = f"{self.xmap.scan_unit}"
+
+            # add the IPF color map legend/key
+            template[f"{trg}/DATA[legend]/title"] \
+                = f"Inverse pole figure {projection_directions[idx][0]} {phase_name}"
+            # template[f"{trg}/title"] = f"Inverse pole figure color key with SST"
+            template[f"{trg}/DATA[legend]/@signal"] = "data"
+            template[f"{trg}/DATA[legend]/@axes"] = ["axis_y", "axis_x"]
+            template[f"{trg}/DATA[legend]/@AXISNAME_indices[axis_x_indices]"] = np.uint32(0)
+            template[f"{trg}/DATA[legend]/@AXISNAME_indices[axis_y_indices]"] = np.uint32(1)
+            template[f"{trg}/DATA[legend]/DATA[data]"] = {"compress": img, "strength": 1}
+            template[f"{trg}/DATA[legend]/DATA[data]/@CLASS"] = "IMAGE"  # required by H5Web to plot RGB maps
+            template[f"{trg}/DATA[legend]/DATA[data]/@IMAGE_VERSION"] = "1.2"
+            template[f"{trg}/DATA[legend]/DATA[data]/@SUBCLASS_VERSION"] = np.int64(15)
+
+            template[f"{trg}/DATA[legend]/AXISNAME[axis_x]"] \
+                = {"compress": np.asarray(np.linspace(1, np.shape(img)[0], num=np.shape(img)[0], endpoint=True), np.uint32), "strength": 1}
+            template[f"{trg}/DATA[legend]/AXISNAME[axis_x]/@long_name"] = "Pixel along x-axis"
+            template[f"{trg}/DATA[legend]/AXISNAME[axis_x]/@units"] = "px"
+            template[f"{trg}/DATA[legend]/AXISNAME[axis_y]"] \
+                = {"compress": np.asarray(np.linspace(1, np.shape(img)[1], num=np.shape(img)[1], endpoint=True), np.uint32), "strength": 1}
+            template[f"{trg}/DATA[legend]/AXISNAME[axis_y]/@long_name"] = "Pixel along y-axis"
+            template[f"{trg}/DATA[legend]/AXISNAME[axis_y]/@units"] = "px"
+
+        # call process_roi_ipf_color_key
         return template
diff --git a/pynxtools/dataconverter/readers/em/utils/image_processing.py b/pynxtools/dataconverter/readers/em/utils/image_processing.py
new file mode 100644
index 000000000..34f98266f
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/utils/image_processing.py
@@ -0,0 +1,54 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Resize images so that they display properly in H5Web."""
+
+# pylint: disable=no-member
+
+# f"https://stackoverflow.com/questions/4321290/"
+# f" how-do-i-make-pil-take-into-account-the-shortest-side-when-creating-a-thumbnail"
+
+import numpy as np
+from PIL import Image as pil
+
+
+def thumbnail(img, size=300):
+    """Create a thumbnail, i.e. resized version of an image."""
+    img = img.copy()
+
+    if img.mode not in ('L', 'RGB'):
+        img = img.convert('RGB')
+
+    old_width, old_height = img.size
+
+    if old_width < size and old_height < size:
+        return img
+
+    if old_width == old_height:
+        img.thumbnail((size, size), pil.ANTIALIAS)
+
+    elif old_height > old_width:
+        ratio = float(old_width) / float(old_height)
+        new_width = ratio * size
+        img = img.resize((int(np.floor(new_width)), size), pil.ANTIALIAS)
+
+    elif old_width > old_height:
+        ratio = float(old_height) / float(old_width)
+        new_height = ratio * size
+        img = img.resize((size, int(np.floor(new_height))), pil.ANTIALIAS)
+
+    return img
diff --git a/test.all.sh b/test.all.sh
index 6f5e4d894..5d1720508 100755
--- a/test.all.sh
+++ b/test.all.sh
@@ -6,9 +6,13 @@
 # dataconverter --reader em --nxdl NXroot --input-file 229_2097.oh5 --output debug.edax.nxs 1>stdout.edax.txt 2>stderr.edax.txt
 # dataconverter --reader em --nxdl NXroot --input-file 088_0009.h5 --output debug.britton.nxs 1>stdout.britton.txt 2>stderr.britton.txt
 
-Examples="026_0007.h5 026_0027.h5 026_0029.h5 026_0030.h5 026_0033.h5 026_0039.h5 026_0041.h5 026_0046.h5oina 026_0049.h5oina 026_0050.h5oina 026_0052.h5oina 066_0013.h5 066_0015.h5 066_0016.h5 066_0023.h5 066_0025.h5 066_0034.h5 078_0004.h5 087_0021.h5 088_0009.h5 093_0045.h5oina 093_0047.h5oina 093_0048.h5oina 093_0051.h5oina 093_0053.h5oina 093_0054.h5oina 093_0055.h5oina 093_0058.h5oina 093_0059.h5oina 093_0060.h5oina 093_0062.h5oina 093_0063.h5oina 101_0040.h5 110_0012.h5 114_0017.h5 116_0008.h5 116_0014.h5 116_0018.h5 116_0019.h5 116_0020.h5 116_0022.h5 116_0037.h5 116_0042.h5 124_0002.h5 124_0036.h5 125_0006.h5 126_0038.h5 130_0003.h5 130_2082.h5 130_2083.h5 130_2084.h5 130_2085.h5 130_2086.h5 130_2087.h5 130_2088.h5 130_2089.h5 130_2090.h5 130_2091.h5 130_2092.h5 130_2093.h5 130_2094.h5 132_0005.h5 144_0043.h5 173_0056.h5oina 173_0057.h5oina 174_0031.h5 207_2081.edaxh5 208_0061.h5oina 212_2095.h5oina 229_2096.oh5 229_2097.oh5"
+# 026_0007.h5 026_0027.h5 026_0029.h5 026_0030.h5 026_0033.h5 026_0039.h5 026_0041.h5 delmic hdf5 have no ebsd data
+# 173_0056.h5oina has only eds data
 
-# Examples="132_0005.h5"
+Examples="026_0046.h5oina 026_0049.h5oina 026_0050.h5oina 026_0052.h5oina 066_0013.h5 066_0015.h5 066_0016.h5 066_0023.h5 066_0025.h5 066_0034.h5 078_0004.h5 087_0021.h5 088_0009.h5 093_0045.h5oina 093_0047.h5oina 093_0048.h5oina 093_0051.h5oina 093_0053.h5oina 093_0054.h5oina 093_0055.h5oina 093_0058.h5oina 093_0059.h5oina 093_0060.h5oina 093_0062.h5oina 093_0063.h5oina 101_0040.h5 110_0012.h5 114_0017.h5 116_0008.h5 116_0014.h5 116_0018.h5 116_0019.h5 116_0020.h5 116_0022.h5 116_0037.h5 116_0042.h5 124_0002.h5 124_0036.h5 125_0006.h5 126_0038.h5 130_0003.h5 130_2082.h5 130_2083.h5 130_2084.h5 130_2085.h5 130_2086.h5 130_2087.h5 130_2088.h5 130_2089.h5 130_2090.h5 130_2091.h5 130_2092.h5 130_2093.h5 130_2094.h5 132_0005.h5 144_0043.h5 173_0056.h5oina 173_0057.h5oina 174_0031.h5 207_2081.edaxh5 208_0061.h5oina 212_2095.h5oina 229_2096.oh5 229_2097.oh5"
+
+# Examples="207_2081.edaxh5"
+# Examples="173_0057.h5oina"
 for example in $Examples; do
 	echo $example
 	dataconverter --reader em --nxdl NXroot --input-file $example --output debug.$example.nxs 1>stdout.$example.nxs.txt 2>stderr.$example.nxs.txt

From fea78428cb9b4e50cf6bdf48329fb75a3dd33f06 Mon Sep 17 00:00:00 2001
From: "markus.kuehbach" <markus.kuehbach@hu-berlin.de>
Date: Tue, 24 Oct 2023 17:38:04 +0200
Subject: [PATCH 16/84] Runthrough with all HDF5 examples from the community
 was successful but the calibration of ipf and roi axis coordinates uses
 pixels currently but should use pixel times scan unit, next steps: i)
 implement this and tree-based downsampling, ii) linting, styling

---
 .../readers/em/subparsers/hfive_apex.py       |   2 +-
 .../readers/em/subparsers/hfive_bruker.py     |   2 +-
 .../readers/em/subparsers/hfive_ebsd.py       |   2 +-
 .../readers/em/subparsers/hfive_edax.py       |   2 +-
 .../readers/em/subparsers/hfive_oxford.py     |   2 +-
 .../readers/em/subparsers/nxs_hfive.py        | 121 ++++++++++--------
 test.all.sh                                   |   3 +-
 7 files changed, 75 insertions(+), 59 deletions(-)

diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
index 3d8d921a8..1eb64d936 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
@@ -123,7 +123,7 @@ def parse_and_normalize_group_ebsd_header(self, fp, ckey: str):
             raise ValueError(f"Grid Type {grid_type} is currently not supported !")
         self.tmp[ckey]["grid_type"] = grid_type
         self.tmp[ckey]["s_x"] = fp[f"{self.prfx}/Sample/Step X"][0]
-        self.tmp[ckey]["s_unit"] = "µm"  # TODO::always micron?
+        self.tmp[ckey]["s_unit"] = "um"  # "µm"  # TODO::always micron?
         self.tmp[ckey]["n_x"] = fp[f"{self.prfx}/Sample/Number Of Columns"][0]
         self.tmp[ckey]["s_y"] = fp[f"{self.prfx}/Sample/Step Y"][0]
         self.tmp[ckey]["n_y"] = fp[f"{self.prfx}/Sample/Number Of Rows"][0]
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py
index 8d6108124..e9a1e3215 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py
@@ -115,7 +115,7 @@ def parse_and_normalize_group_ebsd_header(self, fp, ckey: str):
         self.tmp[ckey]["n_x"] = fp[f"{grp_name}/NCOLS"][()]
         self.tmp[ckey]["n_y"] = fp[f"{grp_name}/NROWS"][()]
         self.tmp[ckey]["s_x"] = fp[f"{grp_name}/SEPixelSizeX"][()]
-        self.tmp[ckey]["s_unit"] = "µm"  # TODO::always micron?
+        self.tmp[ckey]["s_unit"] = "um"  # "µm"  # TODO::always micron?
         self.tmp[ckey]["s_y"] = fp[f"{grp_name}/SEPixelSizeY"][()]
         # TODO::check that all data are consistent
         # TODO::what is y and x depends on coordinate system
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py
index 7500ce57a..022e73b2f 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py
@@ -116,7 +116,7 @@ def parse_and_normalize_group_ebsd_header(self, fp, ckey: str):
         self.tmp[ckey]["n_x"] = fp[f"{grp_name}/NCOLS"][()]
         self.tmp[ckey]["n_y"] = fp[f"{grp_name}/NROWS"][()]
         self.tmp[ckey]["s_x"] = fp[f"{grp_name}/SEPixelSizeX"][()]
-        self.tmp[ckey]["s_unit"] = "µm"  # TODO::always micron?
+        self.tmp[ckey]["s_unit"] = "um"  # "µm"  # TODO::always micron?
         self.tmp[ckey]["s_y"] = fp[f"{grp_name}/SEPixelSizeY"][()]
         # TODO::check that all data are consistent
         # TODO::what is y and x depends on coordinate system
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_edax.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_edax.py
index 80f9711e7..26371535e 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_edax.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_edax.py
@@ -122,7 +122,7 @@ def parse_and_normalize_group_ebsd_header(self, fp, ckey: str):
             raise ValueError(f"Grid Type {grid_type} is currently not supported !")
         self.tmp[ckey]["grid_type"] = grid_type
         self.tmp[ckey]["s_x"] = read_first_scalar(fp[f"{grp_name}/Step X"])
-        self.tmp[ckey]["s_unit"] = "µm"  # TODO::always micron?
+        self.tmp[ckey]["s_unit"] = "um"  # "µm"  # TODO::always micron?
         self.tmp[ckey]["n_x"] = read_first_scalar(fp[f"{grp_name}/nColumns"])
         self.tmp[ckey]["s_y"] = read_first_scalar(fp[f"{grp_name}/Step Y"])
         self.tmp[ckey]["n_y"] = read_first_scalar(fp[f"{grp_name}/nRows"])
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_oxford.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_oxford.py
index e3dc77562..d169c68d6 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_oxford.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_oxford.py
@@ -129,7 +129,7 @@ def parse_and_normalize_slice_ebsd_header(self, fp, ckey: str):
         # X Step, yes, H5T_NATIVE_FLOAT, (1, 1), Map: Step size along x-axis in micrometers. Line scan: step size along the line scan in micrometers.
         if read_strings_from_dataset(fp[f"{grp_name}/X Step"].attrs["Unit"]) == "um":
             self.tmp[ckey]["s_x"] = fp[f"{grp_name}/X Step"][0]
-            self.tmp[ckey]["s_unit"] = "µm"
+            self.tmp[ckey]["s_unit"] = "um"  # "µm"
         else:
             raise ValueError(f"Unexpected X Step Unit attribute !")
         # Y Step, yes, H5T_NATIVE_FLOAT, (1, 1), Map: Step size along y-axis in micrometers. Line scan: Always set to 0.
diff --git a/pynxtools/dataconverter/readers/em/subparsers/nxs_hfive.py b/pynxtools/dataconverter/readers/em/subparsers/nxs_hfive.py
index 039ec69c0..a9b4f842a 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/nxs_hfive.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/nxs_hfive.py
@@ -159,7 +159,7 @@ def process_into_template(self, inp: dict, template: dict) -> dict:
                 else:
                     print(f"{key}, {val}")
 
-        # self.process_roi_overview(inp, template)
+        self.process_roi_overview(inp, template)
         self.process_roi_ebsd_maps(inp, template)
         return template
 
@@ -202,11 +202,15 @@ def process_roi_overview_ebsd_based(self,
         template[f"{trg}/data/@IMAGE_VERSION"] = f"1.2"
         template[f"{trg}/data/@SUBCLASS_VERSION"] = np.int64(15)
 
-        template[f"{trg}/AXISNAME[axis_x]"] = {"compress": np.asarray(inp["scan_point_x"], np.float32), "strength": 1}
-        template[f"{trg}/AXISNAME[axis_x]/@long_name"] = f"Coordinate along x-axis ({inp['s_unit']})"
+        template[f"{trg}/AXISNAME[axis_x]"] \
+            = {"compress": np.asarray(inp["scan_point_x"], np.float32), "strength": 1}
+        template[f"{trg}/AXISNAME[axis_x]/@long_name"] \
+            = f"Coordinate along x-axis ({inp['s_unit']})"
         template[f"{trg}/AXISNAME[axis_x]/@units"] = f"{inp['s_unit']}"
-        template[f"{trg}/AXISNAME[axis_y]"] = {"compress": np.asarray(inp["scan_point_y"], np.float32), "strength": 1}
-        template[f"{trg}/AXISNAME[axis_y]/@long_name"] = f"Coordinate along y-axis ({inp['s_unit']})"
+        template[f"{trg}/AXISNAME[axis_y]"] \
+            = {"compress": np.asarray(inp["scan_point_y"], np.float32), "strength": 1}
+        template[f"{trg}/AXISNAME[axis_y]/@long_name"] \
+            = f"Coordinate along y-axis ({inp['s_unit']})"
         template[f"{trg}/AXISNAME[axis_y]/@units"] =  f"{inp['s_unit']}"
         return template
 
@@ -229,6 +233,7 @@ def process_roi_xmap(self, inp: dict, roi_id: int, template: dict) -> dict:
                 (inp["n_y"], inp["n_x"]), (inp["s_y"], inp["s_x"]))
             xaxis = coordinates["x"]
             yaxis = coordinates["y"]
+            print(f"xmi {np.min(xaxis)}, xmx {np.max(xaxis)}, ymi {np.min(yaxis)}, ymx {np.max(yaxis)}")
             del coordinates
         else:
             raise ValueError(f"Downsampling for too large EBSD maps is currently not supported !")
@@ -263,6 +268,8 @@ def process_roi_xmap(self, inp: dict, roi_id: int, template: dict) -> dict:
                                                     structures=inp["phase"]),
                                prop={},
                                scan_unit=inp["s_unit"])
+        del xaxis
+        del yaxis
         # "bc": inp["band_contrast"]}, scan_unit=inp["s_unit"])
         print(self.xmap)
         return template
@@ -275,27 +282,26 @@ def process_roi_phases(self, inp: dict, roi_id: int, template: dict) -> dict:
         n_pts_indexed = np.sum(inp["phase_id"] != 0)
         print(f"n_pts {n_pts}, n_pts_indexed {n_pts_indexed}")
         template[f"{prfx}/number_of_scan_points"] = np.uint32(n_pts)
-        template[f"{prfx}/indexing_rate"] = np.float64(100. n_pts_indexed / n_pts)
+        template[f"{prfx}/indexing_rate"] = np.float64(100. * n_pts_indexed / n_pts)
         template[f"{prfx}/indexing_rate/@units"] = f"%"
-        template[f"{prfx}/phase{phase_id}/number_of_scan_points"] = np.uint32(0)
-        template[f"{prfx}/phase{phase_id}/phase_identifier"] = np.uint32(phase_id)
-        template[f"{prfx}/phase{phase_id}/phase_name"] = f"notIndexed"
+        grp_name = f"{prfx}/EM_EBSD_CRYSTAL_STRUCTURE_MODEL[phase{phase_id}]"
+        template[f"{grp_name}/number_of_scan_points"] = np.uint32(0)
+        template[f"{grp_name}/phase_identifier"] = np.uint32(phase_id)
+        template[f"{grp_name}/phase_name"] = f"notIndexed"
 
         for pyxem_phase_id in np.arange(0, np.max(self.xmap.phase_id) + 1):
             # this loop is implicitly ignored as when xmap is None
             print(f"inp[phases].keys(): {inp['phases'].keys()}")
             if (pyxem_phase_id + 1) not in inp["phases"].keys():
                 raise ValueError(f"{pyxem_phase_id + 1} is not a key in inp['phases'] !")
-            # if isinstance(inp["phases"][phase_id], dict) is True:
-            # phase_id of pyxem notIndexed is -1 while for NeXus it is 0 so add + 1 in naming schemes
-            trg = f"{prfx}/phase{pyxem_phase_id + 1}"
+            # phase_id of pyxem notIndexed is -1 while for NeXus
+            # it is 0 so add + 1 in naming schemes
+            trg = f"{prfx}/EM_EBSD_CRYSTAL_STRUCTURE_MODEL[phase{pyxem_phase_id + 1}]"
             template[f"{trg}/number_of_scan_points"] \
                 = np.uint32(np.sum(self.xmap.phase_id == pyxem_phase_id))
-            # print(f"{pyxem_phase_id + 1}, " \
-            #       f"{np.uint32(np.sum(self.xmap.phase_id == pyxem_phase_id))}," \
-            #       f" {inp['phases'][pyxem_phase_id + 1]['phase_name']}")
             template[f"{trg}/phase_identifier"] = np.uint32(pyxem_phase_id + 1)
-            template[f"{trg}/phase_name"] = f"{inp['phases'][pyxem_phase_id + 1]['phase_name']}"
+            template[f"{trg}/phase_name"] \
+                = f"{inp['phases'][pyxem_phase_id + 1]['phase_name']}"
 
             self.process_roi_phase_inverse_pole_figures(roi_id, pyxem_phase_id, template)
         return template
@@ -345,55 +351,64 @@ def process_roi_phase_inverse_pole_figures(self,
             # 0 is y while 1 is x !
 
             trg = f"/ENTRY[entry{self.entry_id}]/ROI[roi{roi_id}]/ebsd/indexing" \
-                  f"/phase{pyxem_phase_id + 1}/ipf{idx + 1}"
+                  f"/EM_EBSD_CRYSTAL_STRUCTURE_MODEL[phase{pyxem_phase_id + 1}]" \
+                  f"/MS_IPF[ipf{idx + 1}]"
             template[f"{trg}/projection_direction"] = np.asarray([0., 0., 1.], np.float32)
 
             # add the IPF color map
-            template[f"{trg}/DATA[map]/title"] \
+            mpp = f"{trg}/DATA[map]"
+            template[f"{mpp}/title"] \
                 = f"Inverse pole figure {projection_directions[idx][0]} {phase_name}"
-            template[f"{trg}/DATA[map]/@signal"] = "data"
-            template[f"{trg}/DATA[map]/@axes"] = ["axis_y", "axis_x"]
-            template[f"{trg}/DATA[map]/@AXISNAME_indices[axis_x_indices]"] = np.uint32(0)
-            template[f"{trg}/DATA[map]/@AXISNAME_indices[axis_y_indices]"] = np.uint32(1)
-            template[f"{trg}/DATA[map]/DATA[data]"] = {"compress": ipf_rgb_map, "strength": 1}
-            template[f"{trg}/DATA[map]/DATA[data]/@CLASS"] = "IMAGE"  # required, H5Web, RGB
-            template[f"{trg}/DATA[map]/DATA[data]/@IMAGE_VERSION"] = "1.2"
-            template[f"{trg}/DATA[map]/DATA[data]/@SUBCLASS_VERSION"] = np.int64(15)
-
-            template[f"{trg}/DATA[map]/AXISNAME[axis_x]"] \
+            template[f"{mpp}/@signal"] = "data"
+            template[f"{mpp}/@axes"] = ["axis_y", "axis_x"]
+            template[f"{mpp}/@AXISNAME_indices[axis_x_indices]"] = np.uint32(0)
+            template[f"{mpp}/@AXISNAME_indices[axis_y_indices]"] = np.uint32(1)
+            template[f"{mpp}/DATA[data]"] = {"compress": ipf_rgb_map, "strength": 1}
+            template[f"{mpp}/DATA[data]/@CLASS"] = "IMAGE"  # required, H5Web, RGB
+            template[f"{mpp}/DATA[data]/@IMAGE_VERSION"] = "1.2"
+            template[f"{mpp}/DATA[data]/@SUBCLASS_VERSION"] = np.int64(15)
+
+            template[f"{mpp}/AXISNAME[axis_x]"] \
                 = {"compress": np.asarray(self.xmap.x, np.float32), "strength": 1}
-            template[f"{trg}/DATA[map]/AXISNAME[axis_x]/@long_name"] \
+            template[f"{mpp}/AXISNAME[axis_x]/@long_name"] \
                 = f"Coordinate along x-axis ({self.xmap.scan_unit})"
-            template[f"{trg}/DATA[map]/AXISNAME[axis_x]/@units"] \
-                = f"{self.xmap.scan_unit}"
-            template[f"{trg}/DATA[map]/AXISNAME[axis_y]"] \
+            template[f"{mpp}/AXISNAME[axis_x]/@units"] = f"{self.xmap.scan_unit}"
+            template[f"{mpp}/AXISNAME[axis_y]"] \
                 = {"compress": np.asarray(self.xmap.y, np.float32), "strength": 1}
-            template[f"{trg}/DATA[map]/AXISNAME[axis_y]/@long_name"] \
+            template[f"{mpp}/AXISNAME[axis_y]/@long_name"] \
                 = f"Coordinate along y-axis ({self.xmap.scan_unit})"
-            template[f"{trg}/DATA[map]/AXISNAME[axis_y]/@units"] \
-                = f"{self.xmap.scan_unit}"
+            template[f"{mpp}/AXISNAME[axis_y]/@units"] = f"{self.xmap.scan_unit}"
 
             # add the IPF color map legend/key
-            template[f"{trg}/DATA[legend]/title"] \
+            lgd = f"{trg}/DATA[legend]"
+            template[f"{lgd}/title"] \
                 = f"Inverse pole figure {projection_directions[idx][0]} {phase_name}"
             # template[f"{trg}/title"] = f"Inverse pole figure color key with SST"
-            template[f"{trg}/DATA[legend]/@signal"] = "data"
-            template[f"{trg}/DATA[legend]/@axes"] = ["axis_y", "axis_x"]
-            template[f"{trg}/DATA[legend]/@AXISNAME_indices[axis_x_indices]"] = np.uint32(0)
-            template[f"{trg}/DATA[legend]/@AXISNAME_indices[axis_y_indices]"] = np.uint32(1)
-            template[f"{trg}/DATA[legend]/DATA[data]"] = {"compress": img, "strength": 1}
-            template[f"{trg}/DATA[legend]/DATA[data]/@CLASS"] = "IMAGE"  # required by H5Web to plot RGB maps
-            template[f"{trg}/DATA[legend]/DATA[data]/@IMAGE_VERSION"] = "1.2"
-            template[f"{trg}/DATA[legend]/DATA[data]/@SUBCLASS_VERSION"] = np.int64(15)
-
-            template[f"{trg}/DATA[legend]/AXISNAME[axis_x]"] \
-                = {"compress": np.asarray(np.linspace(1, np.shape(img)[0], num=np.shape(img)[0], endpoint=True), np.uint32), "strength": 1}
-            template[f"{trg}/DATA[legend]/AXISNAME[axis_x]/@long_name"] = "Pixel along x-axis"
-            template[f"{trg}/DATA[legend]/AXISNAME[axis_x]/@units"] = "px"
-            template[f"{trg}/DATA[legend]/AXISNAME[axis_y]"] \
-                = {"compress": np.asarray(np.linspace(1, np.shape(img)[1], num=np.shape(img)[1], endpoint=True), np.uint32), "strength": 1}
-            template[f"{trg}/DATA[legend]/AXISNAME[axis_y]/@long_name"] = "Pixel along y-axis"
-            template[f"{trg}/DATA[legend]/AXISNAME[axis_y]/@units"] = "px"
+            template[f"{lgd}/@signal"] = "data"
+            template[f"{lgd}/@axes"] = ["axis_y", "axis_x"]
+            template[f"{lgd}/@AXISNAME_indices[axis_x_indices]"] = np.uint32(0)
+            template[f"{lgd}/@AXISNAME_indices[axis_y_indices]"] = np.uint32(1)
+            template[f"{lgd}/data"] = {"compress": img, "strength": 1}
+            template[f"{lgd}/data/@CLASS"] = f"IMAGE"  # required by H5Web to plot RGB maps
+            template[f"{lgd}/data/@IMAGE_VERSION"] = f"1.2"
+            template[f"{lgd}/data/@SUBCLASS_VERSION"] = np.int64(15)
+
+            template[f"{lgd}/AXISNAME[axis_x]"] \
+                = {"compress": np.asarray(np.linspace(1,
+                                                      np.shape(img)[0],
+                                                      num=np.shape(img)[0],
+                                                      endpoint=True), np.uint32),
+                   "strength": 1}
+            template[f"{lgd}/AXISNAME[axis_x]/@long_name"] = "Pixel along x-axis"
+            template[f"{lgd}/AXISNAME[axis_x]/@units"] = "px"
+            template[f"{lgd}/AXISNAME[axis_y]"] \
+                = {"compress": np.asarray(np.linspace(1,
+                                                      np.shape(img)[1],
+                                                      num=np.shape(img)[1],
+                                                      endpoint=True), np.uint32),
+                   "strength": 1}
+            template[f"{lgd}/AXISNAME[axis_y]/@long_name"] = "Pixel along y-axis"
+            template[f"{lgd}/AXISNAME[axis_y]/@units"] = "px"
 
         # call process_roi_ipf_color_key
         return template
diff --git a/test.all.sh b/test.all.sh
index 5d1720508..e86434c5d 100755
--- a/test.all.sh
+++ b/test.all.sh
@@ -9,10 +9,11 @@
 # 026_0007.h5 026_0027.h5 026_0029.h5 026_0030.h5 026_0033.h5 026_0039.h5 026_0041.h5 delmic hdf5 have no ebsd data
 # 173_0056.h5oina has only eds data
 
-Examples="026_0046.h5oina 026_0049.h5oina 026_0050.h5oina 026_0052.h5oina 066_0013.h5 066_0015.h5 066_0016.h5 066_0023.h5 066_0025.h5 066_0034.h5 078_0004.h5 087_0021.h5 088_0009.h5 093_0045.h5oina 093_0047.h5oina 093_0048.h5oina 093_0051.h5oina 093_0053.h5oina 093_0054.h5oina 093_0055.h5oina 093_0058.h5oina 093_0059.h5oina 093_0060.h5oina 093_0062.h5oina 093_0063.h5oina 101_0040.h5 110_0012.h5 114_0017.h5 116_0008.h5 116_0014.h5 116_0018.h5 116_0019.h5 116_0020.h5 116_0022.h5 116_0037.h5 116_0042.h5 124_0002.h5 124_0036.h5 125_0006.h5 126_0038.h5 130_0003.h5 130_2082.h5 130_2083.h5 130_2084.h5 130_2085.h5 130_2086.h5 130_2087.h5 130_2088.h5 130_2089.h5 130_2090.h5 130_2091.h5 130_2092.h5 130_2093.h5 130_2094.h5 132_0005.h5 144_0043.h5 173_0056.h5oina 173_0057.h5oina 174_0031.h5 207_2081.edaxh5 208_0061.h5oina 212_2095.h5oina 229_2096.oh5 229_2097.oh5"
+# Examples="026_0046.h5oina 026_0049.h5oina 026_0050.h5oina 026_0052.h5oina 066_0013.h5 066_0015.h5 066_0016.h5 066_0023.h5 066_0025.h5 066_0034.h5 078_0004.h5 087_0021.h5 088_0009.h5 093_0045.h5oina 093_0047.h5oina 093_0048.h5oina 093_0051.h5oina 093_0053.h5oina 093_0054.h5oina 093_0055.h5oina 093_0058.h5oina 093_0059.h5oina 093_0060.h5oina 093_0062.h5oina 093_0063.h5oina 101_0040.h5 110_0012.h5 114_0017.h5 116_0008.h5 116_0014.h5 116_0018.h5 116_0019.h5 116_0020.h5 116_0022.h5 116_0037.h5 116_0042.h5 124_0002.h5 124_0036.h5 125_0006.h5 126_0038.h5 130_0003.h5 130_2082.h5 130_2083.h5 130_2084.h5 130_2085.h5 130_2086.h5 130_2087.h5 130_2088.h5 130_2089.h5 130_2090.h5 130_2091.h5 130_2092.h5 130_2093.h5 130_2094.h5 132_0005.h5 144_0043.h5 173_0056.h5oina 173_0057.h5oina 174_0031.h5 207_2081.edaxh5 208_0061.h5oina 212_2095.h5oina 229_2096.oh5 229_2097.oh5"
 
 # Examples="207_2081.edaxh5"
 # Examples="173_0057.h5oina"
+Examples="229_2097.oh5"
 for example in $Examples; do
 	echo $example
 	dataconverter --reader em --nxdl NXroot --input-file $example --output debug.$example.nxs 1>stdout.$example.nxs.txt 2>stderr.$example.nxs.txt

From 0a2307f35f32892b669889272f7a7b0a578f9b2c Mon Sep 17 00:00:00 2001
From: "markus.kuehbach" <markus.kuehbach@hu-berlin.de>
Date: Wed, 25 Oct 2023 13:25:21 +0200
Subject: [PATCH 17/84] Bugfix proper reading and handling of scan positions
 into calibrated scan positions

---
 .../readers/em/subparsers/hfive_apex.py       |  4 +-
 .../readers/em/subparsers/hfive_bruker.py     | 37 ++++++-----
 .../readers/em/subparsers/hfive_ebsd.py       | 44 ++++++++-----
 .../readers/em/subparsers/hfive_edax.py       | 27 ++++++--
 .../readers/em/subparsers/hfive_oxford.py     |  4 ++
 .../readers/em/subparsers/nxs_hfive.py        | 62 ++++++++++++-------
 test.all.sh                                   |  3 +-
 7 files changed, 120 insertions(+), 61 deletions(-)

diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
index 1eb64d936..86df97733 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
@@ -233,9 +233,9 @@ def parse_and_normalize_group_ebsd_data(self, fp, ckey: str):
         self.tmp[ckey]["scan_point_x"] = np.asarray(
             np.linspace(0, self.tmp[ckey]["n_x"] - 1,
                         num=self.tmp[ckey]["n_x"],
-                        endpoint=True) * self.tmp[ckey]["s_x"] + 0., np.float32)
+                        endpoint=True) * self.tmp[ckey]["s_x"], np.float32)
 
         self.tmp[ckey]["scan_point_y"] = np.asarray(
             np.linspace(0, self.tmp[ckey]["n_y"] - 1,
                         num=self.tmp[ckey]["n_y"],
-                        endpoint=True) * self.tmp[ckey]["s_y"] + 0., np.float32)
+                        endpoint=True) * self.tmp[ckey]["s_y"], np.float32)
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py
index e9a1e3215..bc37da596 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py
@@ -107,16 +107,16 @@ def parse_and_normalize_group_ebsd_header(self, fp, ckey: str):
         if f"{grp_name}" not in fp:
             raise ValueError(f"Unable to parse {grp_name} !")
 
-        req_fields = ["NCOLS", "NROWS", "SEPixelSizeX", "SEPixelSizeY"]
+        req_fields = ["NCOLS", "NROWS", "XSTEP", "YSTEP"]
         for req_field in req_fields:
             if f"{grp_name}/{req_field}" not in fp:
                 raise ValueError(f"Unable to parse {grp_name}/{req_field} !")
 
         self.tmp[ckey]["n_x"] = fp[f"{grp_name}/NCOLS"][()]
         self.tmp[ckey]["n_y"] = fp[f"{grp_name}/NROWS"][()]
-        self.tmp[ckey]["s_x"] = fp[f"{grp_name}/SEPixelSizeX"][()]
+        self.tmp[ckey]["s_x"] = fp[f"{grp_name}/XSTEP"][()]
         self.tmp[ckey]["s_unit"] = "um"  # "µm"  # TODO::always micron?
-        self.tmp[ckey]["s_y"] = fp[f"{grp_name}/SEPixelSizeY"][()]
+        self.tmp[ckey]["s_y"] = fp[f"{grp_name}/YSTEP"][()]
         # TODO::check that all data are consistent
         # TODO::what is y and x depends on coordinate system
 
@@ -189,7 +189,7 @@ def parse_and_normalize_group_ebsd_data(self, fp, ckey: str):
         if f"{grp_name}" not in fp:
             raise ValueError(f"Unable to parse {grp_name} !")
 
-        req_fields = ["phi1", "PHI", "phi2", "Phase", "X SAMPLE", "Y SAMPLE", "MAD"]
+        req_fields = ["phi1", "PHI", "phi2", "Phase", "MAD"]
         for req_field in req_fields:
             if f"{grp_name}/{req_field}" not in fp:
                 raise ValueError(f"Unable to parse {grp_name}/{req_field} !")
@@ -219,19 +219,22 @@ def parse_and_normalize_group_ebsd_data(self, fp, ckey: str):
         else:
             raise ValueError(f"{grp_name}/Phase has unexpected shape !")
 
-        # X
-        if np.shape(fp[f"{grp_name}/X SAMPLE"][:])[0] == n_pts:
-            self.tmp[ckey]["scan_point_x"] \
-                = np.asarray(fp[f"{grp_name}/X SAMPLE"][:], np.float32)
-        else:
-            raise ValueError(f"{grp_name}/X SAMPLE has unexpected shape !")
-
-        # Y
-        if np.shape(fp[f"{grp_name}/Y SAMPLE"][:])[0] == n_pts:
-            self.tmp[ckey]["scan_point_y"] \
-                = np.asarray(fp[f"{grp_name}/Y SAMPLE"], np.float32)
-        else:
-            raise ValueError(f"{grp_name}/Y SAMPLE has unexpected shape !")
+        # X and Y
+        # there is X SAMPLE and Y SAMPLE but these are not defined somewhere instead
+        # here adding x and y assuming that we scan first lines along positive x and then
+        # moving downwards along +y
+        self.tmp[ckey]["scan_point_x"] \
+            = np.asarray(np.tile(np.linspace(0.,
+                                             self.tmp[ckey]["n_x"] - 1.,
+                                             num=self.tmp[ckey]["n_x"],
+                                             endpoint=True) * self.tmp[ckey]["s_x"],
+                                             self.tmp[ckey]["n_y"]), np.float32)
+        self.tmp[ckey]["scan_point_y"] \
+            = np.asarray(np.repeat(np.linspace(0.,
+                                               self.tmp[ckey]["n_y"] - 1.,
+                                               num=self.tmp[ckey]["n_y"],
+                                               endpoint=True) * self.tmp[ckey]["s_y"],
+                                               self.tmp[ckey]["n_x"]), np.float32)
 
         # Band Contrast is not stored in Bruker but Radon Quality or MAD
         # but this is s.th. different as it is the mean angular deviation between
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py
index 022e73b2f..1c85eab4c 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py
@@ -108,16 +108,16 @@ def parse_and_normalize_group_ebsd_header(self, fp, ckey: str):
         if f"{grp_name}" not in fp:
             raise ValueError(f"Unable to parse {grp_name} !")
 
-        req_fields = ["NCOLS", "NROWS", "SEPixelSizeX", "SEPixelSizeY"]
+        req_fields = ["NCOLS", "NROWS", "XSTEP", "YSTEP"]
         for req_field in req_fields:
             if f"{grp_name}/{req_field}" not in fp:
                 raise ValueError(f"Unable to parse {grp_name}/{req_field} !")
 
         self.tmp[ckey]["n_x"] = fp[f"{grp_name}/NCOLS"][()]
         self.tmp[ckey]["n_y"] = fp[f"{grp_name}/NROWS"][()]
-        self.tmp[ckey]["s_x"] = fp[f"{grp_name}/SEPixelSizeX"][()]
+        self.tmp[ckey]["s_x"] = fp[f"{grp_name}/XSTEP"][()]
         self.tmp[ckey]["s_unit"] = "um"  # "µm"  # TODO::always micron?
-        self.tmp[ckey]["s_y"] = fp[f"{grp_name}/SEPixelSizeY"][()]
+        self.tmp[ckey]["s_y"] = fp[f"{grp_name}/YSTEP"][()]
         # TODO::check that all data are consistent
         # TODO::what is y and x depends on coordinate system
         # TODO::why is SEPixelSize* half the value of *STEP for * X and Y respectively?
@@ -222,19 +222,33 @@ def parse_and_normalize_group_ebsd_data(self, fp, ckey: str):
         else:
             raise ValueError(f"{grp_name}/Phase has unexpected shape !")
 
-        # X
-        if np.shape(fp[f"{grp_name}/X SAMPLE"][:])[0] == n_pts:
-            self.tmp[ckey]["scan_point_x"] \
-                = np.asarray(fp[f"{grp_name}/X SAMPLE"][:], np.float32)
-        else:
-            raise ValueError(f"{grp_name}/X SAMPLE has unexpected shape !")
+        # X and Y
+        # there exist X SAMPLE and Y SAMPLE which give indeed calibrated coordinates
+        # relative to the sample coordinate system, ignore this for now an
+        # and TOD::just calibrate on image dimension
+        self.tmp[ckey]["scan_point_x"] \
+            = np.asarray(np.tile(np.linspace(0.,
+                                             self.tmp[ckey]["n_x"] - 1.,
+                                             num=self.tmp[ckey]["n_x"],
+                                             endpoint=True) * self.tmp[ckey]["s_x"],
+                                             self.tmp[ckey]["n_y"]), np.float32)
+        self.tmp[ckey]["scan_point_y"] \
+            = np.asarray(np.repeat(np.linspace(0.,
+                                               self.tmp[ckey]["n_y"] - 1.,
+                                               num=self.tmp[ckey]["n_y"],
+                                               endpoint=True) * self.tmp[ckey]["s_y"],
+                                               self.tmp[ckey]["n_x"]), np.float32)
 
-        # Y
-        if np.shape(fp[f"{grp_name}/Y SAMPLE"][:])[0] == n_pts:
-            self.tmp[ckey]["scan_point_y"] \
-                = np.asarray(fp[f"{grp_name}/Y SAMPLE"], np.float32)
-        else:
-            raise ValueError(f"{grp_name}/Y SAMPLE has unexpected shape !")
+        # if np.shape(fp[f"{grp_name}/X SAMPLE"][:])[0] == n_pts:
+        #     self.tmp[ckey]["scan_point_x"] \
+        #         = np.asarray(fp[f"{grp_name}/X SAMPLE"][:], np.float32)
+        # else:
+        #     raise ValueError(f"{grp_name}/X SAMPLE has unexpected shape !")
+        # if np.shape(fp[f"{grp_name}/Y SAMPLE"][:])[0] == n_pts:
+        #     self.tmp[ckey]["scan_point_y"] \
+        #         = np.asarray(fp[f"{grp_name}/Y SAMPLE"], np.float32)
+        # else:
+        #     raise ValueError(f"{grp_name}/Y SAMPLE has unexpected shape !")
 
         # Band Contrast is not stored in Bruker but Radon Quality or MAD
         # but this is s.th. different as it is the mean angular deviation between
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_edax.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_edax.py
index 26371535e..eaa459cb2 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_edax.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_edax.py
@@ -224,7 +224,26 @@ def parse_and_normalize_group_ebsd_data(self, fp, ckey: str):
             self.tmp[ckey]["phase_id"] = np.asarray(fp[f"{grp_name}/Phase"][:], np.int32)
         # promoting int8 to int32 no problem
         self.tmp[ckey]["ci"] = np.asarray(fp[f"{grp_name}/CI"][:], np.float32)
-        self.tmp[ckey]["scan_point_x"] = np.asarray(
-                fp[f"{grp_name}/X Position"][:] * self.tmp[ckey]["s_x"] + 0., np.float32)
-        self.tmp[ckey]["scan_point_y"] = np.asarray(
-                fp[f"{grp_name}/Y Position"][:] * self.tmp[ckey]["s_y"] + 0., np.float32)
+        # normalize pixel coordinates to physical positions even though the origin can still dangle somewhere
+        # expected is order on x is first all possible x values while y == 0
+        # followed by as many copies of this linear sequence for each y increment
+        # tricky situation is that for one version pixel coordinates while in another case
+        # calibrated e.g. micron coordinates are reported that is in the first case px needs
+        # multiplication with step size in the other one must not multiple with step size
+        # as the step size has already been accounted for by the tech partner when writing!
+        if self.version["schema_version"] in ["OIM Analysis 8.5.1002 x64 [07-17-20]"]:
+            print(f"{self.version['schema_version']}, tech partner accounted for calibration")
+            self.tmp[ckey]["scan_point_x"] \
+                = np.asarray(fp[f"{grp_name}/X Position"][:], np.float32)
+            self.tmp[ckey]["scan_point_y"] \
+                = np.asarray(fp[f"{grp_name}/Y Position"][:], np.float32)
+        else:
+            print(f"{self.version['schema_version']}, parser has to do the calibration")
+            self.tmp[ckey]["scan_point_x"] = np.asarray(
+                    fp[f"{grp_name}/X Position"][:] * self.tmp[ckey]["s_x"], np.float32)
+            self.tmp[ckey]["scan_point_y"] = np.asarray(
+                    fp[f"{grp_name}/Y Position"][:] * self.tmp[ckey]["s_y"], np.float32)
+        print(f"xmin {np.min(self.tmp[ckey]['scan_point_x'])}," \
+              f"xmax {np.max(self.tmp[ckey]['scan_point_x'])}," \
+              f"ymin {np.min(self.tmp[ckey]['scan_point_y'])}," \
+              f"ymax {np.max(self.tmp[ckey]['scan_point_y'])}")
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_oxford.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_oxford.py
index d169c68d6..75740996f 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_oxford.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_oxford.py
@@ -232,6 +232,10 @@ def parse_and_normalize_slice_ebsd_data(self, fp, ckey: str):
         # no normalization needed, also in NXem_ebsd the null model notIndexed is phase_identifier 0
         self.tmp[ckey]["phase_id"] = np.asarray(fp[f"{grp_name}/Phase"], np.int32)
 
+        # normalize pixel coordinates to physical positions even though the origin can still dangle somewhere
+        # expected is order on x is first all possible x values while y == 0
+        # followed by as many copies of this linear sequence for each y increment
+        # no action needed Oxford reports already the pixel coordinate multiplied by step
         # X, no, H5T_NATIVE_FLOAT, (size, 1), X position of each pixel in micrometers (origin: top left corner)
         self.tmp[ckey]["scan_point_x"] = np.asarray(fp[f"{grp_name}/X"], np.float32)
         # inconsistency f32 in file although specification states float
diff --git a/pynxtools/dataconverter/readers/em/subparsers/nxs_hfive.py b/pynxtools/dataconverter/readers/em/subparsers/nxs_hfive.py
index a9b4f842a..4bc3fbcf5 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/nxs_hfive.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/nxs_hfive.py
@@ -163,6 +163,12 @@ def process_into_template(self, inp: dict, template: dict) -> dict:
         self.process_roi_ebsd_maps(inp, template)
         return template
 
+    def get_named_axis(self, inp: dict, dim_name: str):
+        return np.asarray(np.linspace(0,
+                                      inp[f"n_{dim_name}"] - 1,
+                                      num=inp[f"n_{dim_name}"],
+                                      endpoint=True) * inp[f"s_{dim_name}"], np.float32)
+
     def process_roi_overview(self, inp: dict, template: dict) -> dict:
         for ckey in inp.keys():
             if ckey.startswith("ebsd") and inp[ckey] != {}:
@@ -180,6 +186,7 @@ def process_roi_overview_ebsd_based(self,
         # prfx = f"/roi{roi_id}"
         trg = f"/ENTRY[entry{self.entry_id}]/ROI[roi{roi_id}]/ebsd/indexing/DATA[roi]"
         template[f"{trg}/title"] = f"Region-of-interest overview image"
+        template[f"{trg}/@NX_class"] = f"NXdata"  # TODO::writer should decorate automatically!
         template[f"{trg}/@signal"] = "data"
         template[f"{trg}/@axes"] = ["axis_y", "axis_x"]
         template[f"{trg}/@AXISNAME_indices[axis_x_indices]"] = np.uint32(0)
@@ -198,20 +205,23 @@ def process_roi_overview_ebsd_based(self,
             raise ValueError(f"{__name__} unable to generate plot for {trg} !")
         # 0 is y while 1 is x !
         template[f"{trg}/data/@long_name"] = f"Signal"
-        template[f"{trg}/data/@CLASS"] = "IMAGE"  # required by H5Web to plot RGB maps
+        template[f"{trg}/data/@CLASS"] = "IMAGE"  # required H5Web, RGB map
         template[f"{trg}/data/@IMAGE_VERSION"] = f"1.2"
         template[f"{trg}/data/@SUBCLASS_VERSION"] = np.int64(15)
 
+        scan_unit = inp["s_unit"]
+        if scan_unit == "um":
+            scan_unit = "µm"
         template[f"{trg}/AXISNAME[axis_x]"] \
-            = {"compress": np.asarray(inp["scan_point_x"], np.float32), "strength": 1}
+            = {"compress": self.get_named_axis(inp, "x"), "strength": 1}
         template[f"{trg}/AXISNAME[axis_x]/@long_name"] \
-            = f"Coordinate along x-axis ({inp['s_unit']})"
-        template[f"{trg}/AXISNAME[axis_x]/@units"] = f"{inp['s_unit']}"
+            = f"Coordinate along x-axis ({scan_unit})"
+        template[f"{trg}/AXISNAME[axis_x]/@units"] = f"{scan_unit}"
         template[f"{trg}/AXISNAME[axis_y]"] \
-            = {"compress": np.asarray(inp["scan_point_y"], np.float32), "strength": 1}
+            = {"compress": self.get_named_axis(inp, "y"), "strength": 1}
         template[f"{trg}/AXISNAME[axis_y]/@long_name"] \
-            = f"Coordinate along y-axis ({inp['s_unit']})"
-        template[f"{trg}/AXISNAME[axis_y]/@units"] =  f"{inp['s_unit']}"
+            = f"Coordinate along y-axis ({scan_unit})"
+        template[f"{trg}/AXISNAME[axis_y]/@units"] =  f"{scan_unit}"
         return template
 
     def process_roi_ebsd_maps(self, inp: dict, template: dict) -> dict:
@@ -227,14 +237,19 @@ def process_roi_xmap(self, inp: dict, roi_id: int, template: dict) -> dict:
         """Process crystal orientation map from normalized orientation data."""
         # for NeXus to create a default representation of the EBSD map to explore
         self.xmap = None
+        self.axis_x = None
+        self.axis_y = None
         if np.max((inp["n_x"], inp["n_y"])) < HFIVE_WEB_MAXIMUM_RGB:
             # can use the map discretization as is
             coordinates, _ = create_coordinate_arrays(
                 (inp["n_y"], inp["n_x"]), (inp["s_y"], inp["s_x"]))
             xaxis = coordinates["x"]
             yaxis = coordinates["y"]
-            print(f"xmi {np.min(xaxis)}, xmx {np.max(xaxis)}, ymi {np.min(yaxis)}, ymx {np.max(yaxis)}")
+            print(f"xmi {np.min(xaxis)}, xmx {np.max(xaxis)}, " \
+                  f"ymi {np.min(yaxis)}, ymx {np.max(yaxis)}")
             del coordinates
+            self.axis_x = self.get_named_axis(inp, "x")
+            self.axis_y = self.get_named_axis(inp, "y")
         else:
             raise ValueError(f"Downsampling for too large EBSD maps is currently not supported !")
             # need to regrid to downsample too large maps
@@ -359,6 +374,7 @@ def process_roi_phase_inverse_pole_figures(self,
             mpp = f"{trg}/DATA[map]"
             template[f"{mpp}/title"] \
                 = f"Inverse pole figure {projection_directions[idx][0]} {phase_name}"
+            template[f"{mpp}/@NX_class"] = f"NXdata"  # TODO::writer should decorate automatically!
             template[f"{mpp}/@signal"] = "data"
             template[f"{mpp}/@axes"] = ["axis_y", "axis_x"]
             template[f"{mpp}/@AXISNAME_indices[axis_x_indices]"] = np.uint32(0)
@@ -368,22 +384,24 @@ def process_roi_phase_inverse_pole_figures(self,
             template[f"{mpp}/DATA[data]/@IMAGE_VERSION"] = "1.2"
             template[f"{mpp}/DATA[data]/@SUBCLASS_VERSION"] = np.int64(15)
 
-            template[f"{mpp}/AXISNAME[axis_x]"] \
-                = {"compress": np.asarray(self.xmap.x, np.float32), "strength": 1}
+            scan_unit = self.xmap.scan_unit
+            if scan_unit == "um":
+                scan_unit = "µm"
+            template[f"{mpp}/AXISNAME[axis_x]"] = {"compress": self.axis_x, "strength": 1}
             template[f"{mpp}/AXISNAME[axis_x]/@long_name"] \
-                = f"Coordinate along x-axis ({self.xmap.scan_unit})"
-            template[f"{mpp}/AXISNAME[axis_x]/@units"] = f"{self.xmap.scan_unit}"
-            template[f"{mpp}/AXISNAME[axis_y]"] \
-                = {"compress": np.asarray(self.xmap.y, np.float32), "strength": 1}
+                = f"Coordinate along x-axis ({scan_unit})"
+            template[f"{mpp}/AXISNAME[axis_x]/@units"] = f"{scan_unit}"
+            template[f"{mpp}/AXISNAME[axis_y]"] = {"compress": self.axis_y, "strength": 1}
             template[f"{mpp}/AXISNAME[axis_y]/@long_name"] \
-                = f"Coordinate along y-axis ({self.xmap.scan_unit})"
-            template[f"{mpp}/AXISNAME[axis_y]/@units"] = f"{self.xmap.scan_unit}"
+                = f"Coordinate along y-axis ({scan_unit})"
+            template[f"{mpp}/AXISNAME[axis_y]/@units"] = f"{scan_unit}"
 
             # add the IPF color map legend/key
             lgd = f"{trg}/DATA[legend]"
             template[f"{lgd}/title"] \
                 = f"Inverse pole figure {projection_directions[idx][0]} {phase_name}"
             # template[f"{trg}/title"] = f"Inverse pole figure color key with SST"
+            template[f"{lgd}/@NX_class"] = f"NXdata"  # TODO::writer should decorate automatically!
             template[f"{lgd}/@signal"] = "data"
             template[f"{lgd}/@axes"] = ["axis_y", "axis_x"]
             template[f"{lgd}/@AXISNAME_indices[axis_x_indices]"] = np.uint32(0)
@@ -394,17 +412,17 @@ def process_roi_phase_inverse_pole_figures(self,
             template[f"{lgd}/data/@SUBCLASS_VERSION"] = np.int64(15)
 
             template[f"{lgd}/AXISNAME[axis_x]"] \
-                = {"compress": np.asarray(np.linspace(1,
-                                                      np.shape(img)[0],
-                                                      num=np.shape(img)[0],
+                = {"compress": np.asarray(np.linspace(0,
+                                                      np.shape(img)[1] - 1,
+                                                      num=np.shape(img)[1],
                                                       endpoint=True), np.uint32),
                    "strength": 1}
             template[f"{lgd}/AXISNAME[axis_x]/@long_name"] = "Pixel along x-axis"
             template[f"{lgd}/AXISNAME[axis_x]/@units"] = "px"
             template[f"{lgd}/AXISNAME[axis_y]"] \
-                = {"compress": np.asarray(np.linspace(1,
-                                                      np.shape(img)[1],
-                                                      num=np.shape(img)[1],
+                = {"compress": np.asarray(np.linspace(0,
+                                                      np.shape(img)[0] - 1,
+                                                      num=np.shape(img)[0],
                                                       endpoint=True), np.uint32),
                    "strength": 1}
             template[f"{lgd}/AXISNAME[axis_y]/@long_name"] = "Pixel along y-axis"
diff --git a/test.all.sh b/test.all.sh
index e86434c5d..c93c93f02 100755
--- a/test.all.sh
+++ b/test.all.sh
@@ -13,7 +13,8 @@
 
 # Examples="207_2081.edaxh5"
 # Examples="173_0057.h5oina"
-Examples="229_2097.oh5"
+# oxford, bruker, britton, edax old noncali, edax old calib, apex
+Examples="173_0057.h5oina 130_0003.h5 088_0009.h5 116_0014.h5 229_2097.oh5 207_2081.edaxh5"
 for example in $Examples; do
 	echo $example
 	dataconverter --reader em --nxdl NXroot --input-file $example --output debug.$example.nxs 1>stdout.$example.nxs.txt 2>stderr.$example.nxs.txt

From 1459b8c49bc4dbc6e5bc80f881eda54b18708b68 Mon Sep 17 00:00:00 2001
From: "markus.kuehbach" <markus.kuehbach@hu-berlin.de>
Date: Thu, 26 Oct 2023 11:50:12 +0200
Subject: [PATCH 18/84] Added EBSD map downsampling and bugfixing of non
 working AM example, this completes the implementation of the tech partner
 HDF5 parsers for EBSD, next step linting and adding EDS reader (for IKZ, etc)

---
 .../readers/em/subparsers/hfive_apex.py       |  18 +--
 .../readers/em/subparsers/hfive_bruker.py     |   8 +-
 .../readers/em/subparsers/hfive_ebsd.py       |   8 +-
 .../readers/em/subparsers/hfive_edax.py       |  19 ++-
 .../readers/em/subparsers/hfive_oxford.py     |  29 ++--
 .../readers/em/subparsers/nxs_hfive.py        | 137 ++++++++++++++----
 .../readers/em/utils/hfive_utils.py           |  24 ++-
 .../readers/em/utils/hfive_web_constants.py   |   3 +-
 .../readers/em/utils/image_processing.py      |   7 +-
 test.all.sh                                   |   7 +-
 10 files changed, 172 insertions(+), 88 deletions(-)

diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
index 86df97733..26008e21b 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
@@ -28,14 +28,14 @@
 from diffpy.structure import Lattice, Structure
 from orix import plot
 from orix.crystal_map import create_coordinate_arrays, CrystalMap, PhaseList
-from orix.quaternion import Rotation
+from orix.quaternion import Rotation, Orientation
 from orix.vector import Vector3d
 
 import matplotlib.pyplot as plt
 
 from pynxtools.dataconverter.readers.em.subparsers.hfive_base import HdfFiveBaseParser
 from pynxtools.dataconverter.readers.em.utils.hfive_utils import \
-    read_strings_from_dataset, read_first_scalar, format_euler_parameterization
+    read_strings_from_dataset
 from pynxtools.dataconverter.readers.em.examples.ebsd_database import \
     ASSUME_PHASE_NAME_TO_SPACE_GROUP
 
@@ -164,6 +164,7 @@ def parse_and_normalize_group_ebsd_phases(self, fp, ckey: str):
                 angles = [fp[f"{sub_grp_name}/Lattice Constant Alpha"][0],
                             fp[f"{sub_grp_name}/Lattice Constant Beta"][0],
                             fp[f"{sub_grp_name}/Lattice Constant Gamma"][0]]
+                # TODO::available examples support reporting in angstroem and degree
                 self.tmp[ckey]["phases"][int(phase_id)]["a_b_c"] \
                     = np.asarray(a_b_c, np.float32) * 0.1
                 self.tmp[ckey]["phases"][int(phase_id)]["alpha_beta_gamma"] \
@@ -210,22 +211,17 @@ def parse_and_normalize_group_ebsd_data(self, fp, ckey: str):
 
         for i in np.arange(0, n_pts):
             # check shape of internal virtual chunked number array
-            r = Rotation.from_matrix([np.reshape(dat[i][0], (3, 3))])
-            self.tmp[ckey]["euler"][i, :] = r.to_euler(degrees=False)
+            oris = Orientation.from_matrix([np.reshape(dat[i][0], (3, 3))])
+            self.tmp[ckey]["euler"][i, :] = oris.to_euler(degrees=False)
             self.tmp[ckey]["ci"][i] = dat[i][2]
             self.tmp[ckey]["phase_id"][i] = dat[i][3] + 1  # APEX seems to define
             # notIndexed as -1 and the first valid phase id 0
-
-
+        if np.isnan(self.tmp[ckey]["euler"]).any():
+            raise ValueError(f"Conversion of om2eu unexpectedly resulted in NaN !")
         # TODO::convert orientation matrix to Euler angles via om_eu but what are conventions !
         # orix based transformation ends up in positive half space and with degrees=False
         # as radiants but the from_matrix command above might miss one rotation
 
-        # inconsistency f32 in file although specification states float
-        # Rotation.from_euler(euler=fp[f"{grp_name}/Euler"],
-        #                                 direction='lab2crystal',
-        #                                degrees=is_degrees)
-
         # compute explicit hexagon grid cells center of mass pixel positions
         # TODO::currently assuming s_x and s_y are already the correct center of mass
         # distances for hexagonal or square tiling of R^2
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py
index bc37da596..4af1cd5e0 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py
@@ -151,6 +151,7 @@ def parse_and_normalize_group_ebsd_phases(self, fp, ckey: str):
                 a_b_c = values[0:3]
                 angles = values[3:6]
                 self.tmp[ckey]["phases"][int(phase_id)]["a_b_c"] = a_b_c * 0.1
+                # TODO::all examples indicate reporting in angstroem
                 self.tmp[ckey]["phases"][int(phase_id)]["alpha_beta_gamma"] = angles
 
                 # Space Group, no, H5T_NATIVE_INT32, (1, 1), Space group index.
@@ -202,15 +203,12 @@ def parse_and_normalize_group_ebsd_data(self, fp, ckey: str):
             self.tmp[ckey]["euler"] = np.zeros((n_pts[0], 3), np.float32)
             column_id = 0
             for angle in ["phi1", "PHI", "phi2"]:
+                # TODO::available examples support that Bruker reports Euler triplets in degree
                 self.tmp[ckey]["euler"][:, column_id] \
-                    = np.asarray(fp[f"{grp_name}/{angle}"][:], np.float32)
+                    = np.asarray(fp[f"{grp_name}/{angle}"][:], np.float32) / 180. * np.pi
                 column_id += 1
             self.tmp[ckey]["euler"] = format_euler_parameterization(self.tmp[ckey]["euler"])
             n_pts = n_pts[0]
-        # inconsistency f32 in file although specification states float
-        # Rotation.from_euler(euler=fp[f"{grp_name}/Euler"],
-        #                                 direction='lab2crystal',
-        #                                degrees=is_degrees)
 
         # index of phase, 0 if not indexed
         # no normalization needed, also in NXem_ebsd the null model notIndexed is phase_identifier 0
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py
index 1c85eab4c..3a11eddec 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py
@@ -152,6 +152,7 @@ def parse_and_normalize_group_ebsd_phases(self, fp, ckey: str):
                 values = np.asarray(fp[f"{sub_grp_name}/LatticeConstants"][:].flatten())
                 a_b_c = values[0:3]
                 angles = values[3:6]
+                # TODO::available examples support that community H5EBSD reports lattice constants in angstroem
                 self.tmp[ckey]["phases"][int(phase_id)]["a_b_c"] = a_b_c * 0.1
                 self.tmp[ckey]["phases"][int(phase_id)]["alpha_beta_gamma"] = angles
 
@@ -205,15 +206,12 @@ def parse_and_normalize_group_ebsd_data(self, fp, ckey: str):
             self.tmp[ckey]["euler"] = np.zeros((n_pts[0], 3), np.float32)
             column_id = 0
             for angle in ["phi1", "PHI", "phi2"]:
+                # TODO::available examples support that community H5EBSD reports Euler triplets in degree
                 self.tmp[ckey]["euler"][:, column_id] \
-                    = np.asarray(fp[f"{grp_name}/{angle}"][:], np.float32)
+                    = np.asarray(fp[f"{grp_name}/{angle}"][:], np.float32) / 180. * np.pi
                 column_id += 1
             self.tmp[ckey]["euler"] = format_euler_parameterization(self.tmp[ckey]["euler"])
             n_pts = n_pts[0]
-        # inconsistency f32 in file although specification states float
-        # Rotation.from_euler(euler=fp[f"{grp_name}/Euler"],
-        #                                 direction='lab2crystal',
-        #                                degrees=is_degrees)
 
         # index of phase, 0 if not indexed
         # no normalization needed, also in NXem_ebsd the null model notIndexed is phase_identifier 0
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_edax.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_edax.py
index eaa459cb2..586179a51 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_edax.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_edax.py
@@ -36,7 +36,7 @@
 import matplotlib.pyplot as plt
 
 from pynxtools.dataconverter.readers.em.subparsers.hfive_base import HdfFiveBaseParser
-from pynxtools.dataconverter.readers.em.utils.hfive_utils import \
+from pynxtools.dataconverter.readers.em.utils.hfive_utils import EULER_SPACE_SYMMETRY, \
     read_strings_from_dataset, read_first_scalar, format_euler_parameterization
 from pynxtools.dataconverter.readers.em.examples.ebsd_database import \
     ASSUME_PHASE_NAME_TO_SPACE_GROUP
@@ -164,6 +164,7 @@ def parse_and_normalize_group_ebsd_phases(self, fp, ckey: str):
                     angles = [fp[f"{sub_grp_name}/Lattice Constant alpha"][()],
                               fp[f"{sub_grp_name}/Lattice Constant beta"][()],
                               fp[f"{sub_grp_name}/Lattice Constant gamma"][()]]
+                    # TODO::available examples support reporting in angstroem and degree
                     self.tmp[ckey]["phases"][int(phase_id)]["a_b_c"] \
                         = np.asarray(a_b_c, np.float32) * 0.1
                     self.tmp[ckey]["phases"][int(phase_id)]["alpha_beta_gamma"] \
@@ -209,6 +210,12 @@ def parse_and_normalize_group_ebsd_data(self, fp, ckey: str):
 
         n_pts = self.tmp[ckey]["n_x"] * self.tmp[ckey]["n_y"]
         self.tmp[ckey]["euler"] = np.zeros((n_pts, 3), np.float32)
+        # TODO::available examples support that rumour that in EDAX file sometimes values
+        # of Euler angle triplets are larger than mathematically possible
+        # unfortunately there is no confirmation from EDAX what is the reported unit and
+        # normalization for each software version, TODO::here rad is assumed but then values
+        # as large as 12.... should not be possible
+        # TODO::there has to be a mechanism which treats these dirty scan points!
         self.tmp[ckey]["euler"][:, 0] = np.asarray(fp[f"{grp_name}/Phi1"][:], np.float32)
         self.tmp[ckey]["euler"][:, 1] = np.asarray(fp[f"{grp_name}/Phi"][:], np.float32)
         self.tmp[ckey]["euler"][:, 2] = np.asarray(fp[f"{grp_name}/Phi2"][:], np.float32)
@@ -217,11 +224,19 @@ def parse_and_normalize_group_ebsd_data(self, fp, ckey: str):
 
         # given no official EDAX OimAnalysis spec we cannot define for sure if
         # phase_id == 0 means just all was indexed with the first/zeroth phase or nothing
-        # was indexed, TODO::assuming it means all indexed:
+        # was indexed, TODO::assuming it means all indexed with first phase:
         if np.all(fp[f"{grp_name}/Phase"][:] == 0):
             self.tmp[ckey]["phase_id"] = np.zeros(n_pts, np.int32) + 1
         else:
             self.tmp[ckey]["phase_id"] = np.asarray(fp[f"{grp_name}/Phase"][:], np.int32)
+        # TODO::mark scan points as dirty
+        # the line below shows an example how this could be achieved
+        # is_dirty = np.zeros((n_pts,), bool)
+        # for column_id in [0, 1, 2]:
+        #    is_dirty = is_dirty & np.abs(self.tmp[ckey]["euler"][:, column_id]) > EULER_SPACE_SYMMETRY
+        # print(f"Found {np.sum(is_dirty)} scan points which are marked now as dirty!")
+        # self.tmp[ckey]["phase_id"][is_dirty] = 0
+
         # promoting int8 to int32 no problem
         self.tmp[ckey]["ci"] = np.asarray(fp[f"{grp_name}/CI"][:], np.float32)
         # normalize pixel coordinates to physical positions even though the origin can still dangle somewhere
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_oxford.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_oxford.py
index 75740996f..e05d7f4d6 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_oxford.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_oxford.py
@@ -36,7 +36,8 @@
 import matplotlib.pyplot as plt
 
 from pynxtools.dataconverter.readers.em.subparsers.hfive_base import HdfFiveBaseParser
-from pynxtools.dataconverter.readers.em.utils.hfive_utils import read_strings_from_dataset
+from pynxtools.dataconverter.readers.em.utils.hfive_utils import \
+    read_strings_from_dataset, format_euler_parameterization
 
 
 class HdfFiveOxfordReader(HdfFiveBaseParser):
@@ -171,17 +172,17 @@ def parse_and_normalize_slice_ebsd_phases(self, fp, ckey: str):
                     = read_strings_from_dataset(fp[f"{sub_grp_name}/Reference"][()])
 
                 # Lattice Angles, yes, H5T_NATIVE_FLOAT, (1, 3), Three columns for the alpha, beta and gamma angles in radians
-                is_degrees = False
                 if read_strings_from_dataset(fp[f"{sub_grp_name}/Lattice Angles"].attrs["Unit"]) == "rad":
-                    is_degrees = False
-                angles = np.asarray(fp[f"{sub_grp_name}/Lattice Angles"][:].flatten()) / np.pi * 180.
+                    angles = np.asarray(fp[f"{sub_grp_name}/Lattice Angles"][:].flatten())
+                else:
+                    raise ValueError(f"Unexpected case that Lattice Angles are not reported in rad !")
                 self.tmp[ckey]["phases"][int(phase_id)]["alpha_beta_gamma"] = angles
 
                 # Lattice Dimensions, yes, H5T_NATIVE_FLOAT, (1, 3), Three columns for a, b and c dimensions in Angstroms
-                is_nanometer = False
                 if read_strings_from_dataset(fp[f"{sub_grp_name}/Lattice Dimensions"].attrs["Unit"]) == "angstrom":
-                    is_nanometer = False
-                a_b_c = np.asarray(fp[f"{sub_grp_name}/Lattice Dimensions"][:].flatten()) * 0.1
+                    a_b_c = np.asarray(fp[f"{sub_grp_name}/Lattice Dimensions"][:].flatten()) * 0.1
+                else:
+                    raise ValueError(f"Unexpected case that Lattice Dimensions are not reported in angstroem !")
                 self.tmp[ckey]["phases"][int(phase_id)]["a_b_c"] = a_b_c
 
                 # Space Group, no, H5T_NATIVE_INT32, (1, 1), Space group index.
@@ -216,17 +217,11 @@ def parse_and_normalize_slice_ebsd_data(self, fp, ckey: str):
                 raise ValueError(f"Unable to parse {grp_name}/{req_field} !")
 
         # Euler, yes, H5T_NATIVE_FLOAT, (size, 3), Orientation of Crystal (CS2) to Sample-Surface (CS1).
-        is_degrees = False
-        is_negative = False
         if read_strings_from_dataset(fp[f"{grp_name}/Euler"].attrs["Unit"]) == "rad":
-            is_degrees = False
-        self.tmp[ckey]["euler"] = np.asarray(fp[f"{grp_name}/Euler"], np.float32)
-        # TODO::handle possible case of negative Euler angles (examples though do not indicate)
-        # that AZTec reports negative Euler angles...
-        # inconsistency f32 in file although specification states float
-        # Rotation.from_euler(euler=fp[f"{grp_name}/Euler"],
-        #                                 direction='lab2crystal',
-        #                                degrees=is_degrees)
+            self.tmp[ckey]["euler"] = np.asarray(fp[f"{grp_name}/Euler"], np.float32)
+        else:
+            raise ValueError(f"Unexpected case that Euler angle are not reported in rad !")
+        self.tmp[ckey]["euler"] = format_euler_parameterization(self.tmp[ckey]["euler"])
 
         # Phase, yes, H5T_NATIVE_INT32, (size, 1), Index of phase, 0 if not indexed
         # no normalization needed, also in NXem_ebsd the null model notIndexed is phase_identifier 0
diff --git a/pynxtools/dataconverter/readers/em/subparsers/nxs_hfive.py b/pynxtools/dataconverter/readers/em/subparsers/nxs_hfive.py
index 4bc3fbcf5..ae2444519 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/nxs_hfive.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/nxs_hfive.py
@@ -47,11 +47,13 @@
 from orix.crystal_map import create_coordinate_arrays, CrystalMap, PhaseList
 from orix.quaternion import Rotation
 from orix.vector import Vector3d
+from scipy.spatial import KDTree
 
 import matplotlib.pyplot as plt
 
 from pynxtools.dataconverter.readers.em.utils.hfive_utils import read_strings_from_dataset
-from pynxtools.dataconverter.readers.em.utils.hfive_web_constants import HFIVE_WEB_MAXIMUM_RGB
+from pynxtools.dataconverter.readers.em.utils.hfive_web_constants \
+    import HFIVE_WEB_MAXIMUM_ROI, HFIVE_WEB_MAXIMUM_RGB
 from pynxtools.dataconverter.readers.em.utils.image_processing import thumbnail
 
 from pynxtools.dataconverter.readers.em.subparsers.hfive_oxford import HdfFiveOxfordReader
@@ -182,8 +184,10 @@ def process_roi_overview_ebsd_based(self,
                                         roi_id: str,
                                         template: dict) -> dict:
         print("Parse ROI default plot...")
-        # prfx = f"/ENTRY[entry{self.entry_id}]/experiment/indexing/region_of_interest/roi{roi_id}"
-        # prfx = f"/roi{roi_id}"
+        if np.max((inp["n_x"], inp["n_y"])) > HFIVE_WEB_MAXIMUM_ROI:
+            raise ValueError(f"Plotting roi_overviews larger than " \
+                             f"{HFIVE_WEB_MAXIMUM_ROI} is not supported !")
+
         trg = f"/ENTRY[entry{self.entry_id}]/ROI[roi{roi_id}]/ebsd/indexing/DATA[roi]"
         template[f"{trg}/title"] = f"Region-of-interest overview image"
         template[f"{trg}/@NX_class"] = f"NXdata"  # TODO::writer should decorate automatically!
@@ -239,7 +243,78 @@ def process_roi_xmap(self, inp: dict, roi_id: int, template: dict) -> dict:
         self.xmap = None
         self.axis_x = None
         self.axis_y = None
-        if np.max((inp["n_x"], inp["n_y"])) < HFIVE_WEB_MAXIMUM_RGB:
+
+        print(f"Unique phase_identifier {np.unique(inp['phase_id'])}")
+        min_phase_id = np.min(np.unique(inp["phase_id"]))
+
+        if np.max((inp["n_x"], inp["n_y"])) > HFIVE_WEB_MAXIMUM_RGB:
+            # assume center of mass of the scan points
+            # TODO::check if mapping correct for hexagonal and square grid
+            aabb = [np.min(inp["scan_point_x"]) - 0.5 * inp["s_x"],
+                    np.max(inp["scan_point_x"]) + 0.5 * inp["s_x"],
+                    np.min(inp["scan_point_y"]) - 0.5 * inp["s_y"],
+                    np.max(inp["scan_point_y"]) + 0.5 * inp["s_y"]]
+            print(f"{aabb}")
+            if aabb[1] - aabb[0] >= aabb[3] - aabb[2]:
+                sqr_step_size = (aabb[1] - aabb[0]) / HFIVE_WEB_MAXIMUM_RGB
+                nxy = [HFIVE_WEB_MAXIMUM_RGB,
+                       int(np.ceil((aabb[3] - aabb[2]) / sqr_step_size))]
+            else:
+                sqr_step_size = (aabb[3] - aabb[2]) / HFIVE_WEB_MAXIMUM_RGB
+                nxy = [int(np.ceil((aabb[1] - aabb[0]) / sqr_step_size)),
+                       HFIVE_WEB_MAXIMUM_RGB]
+            print(f"H5Web default plot generation, scaling nxy0 {[inp['n_x'], inp['n_y']]}, nxy {nxy}")
+            # the above estimate is not exactly correct (may create a slight real space shift)
+            # of the EBSD map TODO:: regrid the real world axis-aligned bounding box aabb with
+            # a regular tiling of squares or hexagons
+            # https://stackoverflow.com/questions/18982650/differences-between-matlab-and-numpy-and-pythons-round-function
+            # MTex/Matlab round not exactly the same as numpy round but reasonably close
+
+            # scan point positions were normalized by tech partner subparsers such that they
+            # always build on pixel coordinates calibrated for step size not by giving absolute positions
+            # in the sample surface frame of reference as this is typically not yet consistently documented
+            # because we assume in addition that we always start at the top left corner the zeroth/first
+            # coordinate is always 0., 0. !
+            xy = np.column_stack(
+                (np.tile(np.linspace(0, nxy[0] - 1, num=nxy[0], endpoint=True) * sqr_step_size, nxy[1]),
+                np.repeat(np.linspace(0, nxy[1] - 1, num=nxy[1], endpoint=True) * sqr_step_size, nxy[0])))
+            print(f"xy {xy}, shape {np.shape(xy)}")
+            tree = KDTree(np.column_stack((inp["scan_point_x"], inp["scan_point_y"])))
+            d, idx = tree.query(xy, k=1)
+            if np.sum(idx == tree.n) > 0:
+                raise ValueError(f"kdtree query left some query points without a neighbor!")
+            del d
+            del tree
+            pyxem_euler = np.zeros((np.shape(xy)[0], 3), np.float32)
+            pyxem_euler = np.nan
+            pyxem_euler = inp["euler"][idx, :]
+            if np.isnan(pyxem_euler).any() is True:
+                raise ValueError(f"Downsampling of the EBSD map left pixels without euler!")
+            phase_new = np.zeros((np.shape(xy)[0],), np.int32) - 2
+            phase_new = inp["phase_id"][idx]
+            if np.sum(phase_new == -2) > 0:
+                raise ValueError(f"Downsampling of the EBSD map left pixels without euler!")
+            del xy
+
+            if min_phase_id > 0:
+                pyxem_phase_id = phase_new - min_phase_id
+            elif min_phase_id == 0:
+                pyxem_phase_id = phase_new - 1
+            else:
+                raise ValueError(f"Unable how to deal with unexpected phase_identifier!")
+            del phase_new
+
+            coordinates, _ = create_coordinate_arrays(
+                (nxy[1], nxy[0]), (sqr_step_size, sqr_step_size))
+            xaxis = coordinates["x"]
+            yaxis = coordinates["y"]
+            print(f"coordinates" \
+                  f"xmi {np.min(xaxis)}, xmx {np.max(xaxis)}, " \
+                  f"ymi {np.min(yaxis)}, ymx {np.max(yaxis)}")
+            del coordinates
+            self.axis_x = np.linspace(0, nxy[0] - 1, num=nxy[0], endpoint=True) * sqr_step_size
+            self.axis_y = np.linspace(0, nxy[1] - 1, num=nxy[1], endpoint=True) * sqr_step_size
+        else:
             # can use the map discretization as is
             coordinates, _ = create_coordinate_arrays(
                 (inp["n_y"], inp["n_x"]), (inp["s_y"], inp["s_x"]))
@@ -250,35 +325,27 @@ def process_roi_xmap(self, inp: dict, roi_id: int, template: dict) -> dict:
             del coordinates
             self.axis_x = self.get_named_axis(inp, "x")
             self.axis_y = self.get_named_axis(inp, "y")
-        else:
-            raise ValueError(f"Downsampling for too large EBSD maps is currently not supported !")
-            # need to regrid to downsample too large maps
-            # TODO::implement 1NN-based downsampling approach
-            #       build grid
-            #       tree-based 1NN
-            #       proceed as usual
-
-        # TODO::there was one example 093_0060.h5oina
-        # where HitRate was 75% but no pixel left unidentified ??
-        print(f"Unique phase_identifier {np.unique(inp['phase_id'])}")
-        min_phase_id = np.min(np.unique(inp["phase_id"]))
-        if min_phase_id > 0:
-            pyxem_phase_identifier = inp["phase_id"] - min_phase_id
-        elif min_phase_id == 0:
-            pyxem_phase_identifier = inp["phase_id"] - 1
-        else:
-            raise ValueError(f"Unable how to deal with unexpected phase_identifier!")
+
+            pyxem_euler = inp["euler"]
+            # TODO::there was one example 093_0060.h5oina
+            # where HitRate was 75% but no pixel left unidentified ??
+            if min_phase_id > 0:
+                pyxem_phase_id = inp["phase_id"] - min_phase_id
+            elif min_phase_id == 0:
+                pyxem_phase_id = inp["phase_id"] - 1
+            else:
+                raise ValueError(f"Unable how to deal with unexpected phase_identifier!")
+
         # inp["phase_id"] - (np.min(inp["phase_id"]) - (-1))
         # for pyxem the non-indexed has to be -1 instead of 0 which is what NeXus uses
         # -1 always because content of inp["phase_id"] is normalized
         # to NeXus NXem_ebsd_crystal_structure concept already!
-        print(f"Unique pyxem_phase_identifier {np.unique(pyxem_phase_identifier)}")
-
-        self.xmap = CrystalMap(rotations=Rotation.from_euler(euler=inp["euler"],
+        print(f"Unique pyxem_phase_id {np.unique(pyxem_phase_id)}")
+        self.xmap = CrystalMap(rotations=Rotation.from_euler(euler=pyxem_euler,
                                                              direction='lab2crystal',
                                                              degrees=False),
                                x=xaxis, y=yaxis,
-                               phase_id=pyxem_phase_identifier,
+                               phase_id=pyxem_phase_id,
                                phase_list=PhaseList(space_groups=inp["space_group"],
                                                     structures=inp["phase"]),
                                prop={},
@@ -312,8 +379,24 @@ def process_roi_phases(self, inp: dict, roi_id: int, template: dict) -> dict:
             # phase_id of pyxem notIndexed is -1 while for NeXus
             # it is 0 so add + 1 in naming schemes
             trg = f"{prfx}/EM_EBSD_CRYSTAL_STRUCTURE_MODEL[phase{pyxem_phase_id + 1}]"
+
+            min_phase_id = np.min(np.unique(inp["phase_id"]))
+            if min_phase_id > 0:
+                pyx_phase_id = inp["phase_id"] - min_phase_id
+            elif min_phase_id == 0:
+                pyx_phase_id = inp["phase_id"] - 1
+            else:
+                raise ValueError(f"Unable how to deal with unexpected phase_identifier!")
+            del min_phase_id
+
             template[f"{trg}/number_of_scan_points"] \
-                = np.uint32(np.sum(self.xmap.phase_id == pyxem_phase_id))
+                = np.uint32(np.sum(pyx_phase_id == pyxem_phase_id))
+            del pyx_phase_id
+            # not self.xmap.phase_id because in NeXus the number_of_scan_points is always
+            # accounting for the original map size and not the potentially downscaled version
+            # of the map as the purpose of the later one is exclusively to show a plot at all
+            # because of a technical limitation of H5Web if there would be a tool that
+            # could show larger RGB plots we would not need to downscale the EBSD map resolution!
             template[f"{trg}/phase_identifier"] = np.uint32(pyxem_phase_id + 1)
             template[f"{trg}/phase_name"] \
                 = f"{inp['phases'][pyxem_phase_id + 1]['phase_name']}"
diff --git a/pynxtools/dataconverter/readers/em/utils/hfive_utils.py b/pynxtools/dataconverter/readers/em/utils/hfive_utils.py
index 5ae6a9cae..bf1e7af10 100644
--- a/pynxtools/dataconverter/readers/em/utils/hfive_utils.py
+++ b/pynxtools/dataconverter/readers/em/utils/hfive_utils.py
@@ -37,26 +37,22 @@
 
 DIRTY_FIX_SPACEGROUP = {}
 
+EULER_SPACE_SYMMETRY = [2. * np.pi, np.pi, 2. * np.pi]
+
 def format_euler_parameterization(triplet_set):
     """Transform degrees to radiant and apply orientation space symmetry"""
-    is_degrees = False
-    for column_id in [0, 1, 2]:
-        # not robust enough as a single crystal close to the cube orientation
-        # with a very low orientation spread may also have all Euler angle values
-        # smaller than 2pi
-        # TODO::therefore the real specs of each tech partner's format is needed!
-        if np.max(np.abs(triplet_set[:, column_id])) > 2. * np.pi:
-            is_degrees = True
-    if is_degrees is True:
-        for column_id in [0, 1, 2]:
-            triplet_set[:, column_id] = triplet_set[:, column_id] / 180. * np.pi
-
-    sothree_shift = [2. * np.pi, np.pi, 2. * np.pi]
+    # it is not robust in general to judge just from the collection of euler angles
+    # whether they are reported in radiant or degree
+    # indeed an EBSD map of a slightly deformed single crystal close to e.g. the cube ori
+    # can have euler angles for each scan point within pi, 2pi respectively
+    # similarly there was an example in the data 229_2096.oh5 where 3 out of 20.27 mio
+    # scan points where not reported in radiant but rather using 4pi as a marker to indicate
+    # there was a problem with the scan point
     for column_id in [0, 1, 2]:
         here = np.where(triplet_set[:, column_id] < 0.)
         if len(here[0]) > 0:
             triplet_set[here, column_id] \
-                = sothree_shift[column_id] + triplet_set[here, column_id]
+                = EULER_SPACE_SYMMETRY[column_id] + triplet_set[here, column_id]
     return triplet_set
 
 def read_strings_from_dataset(obj):
diff --git a/pynxtools/dataconverter/readers/em/utils/hfive_web_constants.py b/pynxtools/dataconverter/readers/em/utils/hfive_web_constants.py
index 8f480dbaa..72b4f2519 100644
--- a/pynxtools/dataconverter/readers/em/utils/hfive_web_constants.py
+++ b/pynxtools/dataconverter/readers/em/utils/hfive_web_constants.py
@@ -17,4 +17,5 @@
 #
 """Constants relevant when working with H5Web."""
 
-HFIVE_WEB_MAXIMUM_RGB = 2**14
+HFIVE_WEB_MAXIMUM_ROI = 2**14 - 1
+HFIVE_WEB_MAXIMUM_RGB = 2**11 - 1
diff --git a/pynxtools/dataconverter/readers/em/utils/image_processing.py b/pynxtools/dataconverter/readers/em/utils/image_processing.py
index 34f98266f..88d2eb5cf 100644
--- a/pynxtools/dataconverter/readers/em/utils/image_processing.py
+++ b/pynxtools/dataconverter/readers/em/utils/image_processing.py
@@ -39,16 +39,15 @@ def thumbnail(img, size=300):
         return img
 
     if old_width == old_height:
-        img.thumbnail((size, size), pil.ANTIALIAS)
+        img.thumbnail((size, size))
 
     elif old_height > old_width:
         ratio = float(old_width) / float(old_height)
         new_width = ratio * size
-        img = img.resize((int(np.floor(new_width)), size), pil.ANTIALIAS)
+        img = img.resize((int(np.floor(new_width)), size))
 
     elif old_width > old_height:
         ratio = float(old_height) / float(old_width)
         new_height = ratio * size
-        img = img.resize((size, int(np.floor(new_height))), pil.ANTIALIAS)
-
+        img = img.resize((size, int(np.floor(new_height))))
     return img
diff --git a/test.all.sh b/test.all.sh
index c93c93f02..977b7a82f 100755
--- a/test.all.sh
+++ b/test.all.sh
@@ -9,12 +9,15 @@
 # 026_0007.h5 026_0027.h5 026_0029.h5 026_0030.h5 026_0033.h5 026_0039.h5 026_0041.h5 delmic hdf5 have no ebsd data
 # 173_0056.h5oina has only eds data
 
-# Examples="026_0046.h5oina 026_0049.h5oina 026_0050.h5oina 026_0052.h5oina 066_0013.h5 066_0015.h5 066_0016.h5 066_0023.h5 066_0025.h5 066_0034.h5 078_0004.h5 087_0021.h5 088_0009.h5 093_0045.h5oina 093_0047.h5oina 093_0048.h5oina 093_0051.h5oina 093_0053.h5oina 093_0054.h5oina 093_0055.h5oina 093_0058.h5oina 093_0059.h5oina 093_0060.h5oina 093_0062.h5oina 093_0063.h5oina 101_0040.h5 110_0012.h5 114_0017.h5 116_0008.h5 116_0014.h5 116_0018.h5 116_0019.h5 116_0020.h5 116_0022.h5 116_0037.h5 116_0042.h5 124_0002.h5 124_0036.h5 125_0006.h5 126_0038.h5 130_0003.h5 130_2082.h5 130_2083.h5 130_2084.h5 130_2085.h5 130_2086.h5 130_2087.h5 130_2088.h5 130_2089.h5 130_2090.h5 130_2091.h5 130_2092.h5 130_2093.h5 130_2094.h5 132_0005.h5 144_0043.h5 173_0056.h5oina 173_0057.h5oina 174_0031.h5 207_2081.edaxh5 208_0061.h5oina 212_2095.h5oina 229_2096.oh5 229_2097.oh5"
+Examples="026_0046.h5oina 026_0049.h5oina 026_0050.h5oina 026_0052.h5oina 066_0013.h5 066_0015.h5 066_0016.h5 066_0023.h5 066_0025.h5 066_0034.h5 078_0004.h5 087_0021.h5 088_0009.h5 093_0045.h5oina 093_0047.h5oina 093_0048.h5oina 093_0051.h5oina 093_0053.h5oina 093_0054.h5oina 093_0055.h5oina 093_0058.h5oina 093_0059.h5oina 093_0060.h5oina 093_0062.h5oina 093_0063.h5oina 101_0040.h5 110_0012.h5 114_0017.h5 116_0008.h5 116_0014.h5 116_0018.h5 116_0019.h5 116_0020.h5 116_0022.h5 116_0037.h5 116_0042.h5 124_0002.h5 124_0036.h5 125_0006.h5 126_0038.h5 130_0003.h5 130_2082.h5 130_2083.h5 130_2084.h5 130_2085.h5 130_2086.h5 130_2087.h5 130_2088.h5 130_2089.h5 130_2090.h5 130_2091.h5 130_2092.h5 130_2093.h5 130_2094.h5 132_0005.h5 144_0043.h5 173_0056.h5oina 173_0057.h5oina 174_0031.h5 207_2081.edaxh5 208_0061.h5oina 212_2095.h5oina 229_2096.oh5 229_2097.oh5"
 
 # Examples="207_2081.edaxh5"
 # Examples="173_0057.h5oina"
 # oxford, bruker, britton, edax old noncali, edax old calib, apex
-Examples="173_0057.h5oina 130_0003.h5 088_0009.h5 116_0014.h5 229_2097.oh5 207_2081.edaxh5"
+# Examples="173_0057.h5oina 130_0003.h5 088_0009.h5 116_0014.h5 229_2097.oh5 207_2081.edaxh5"
+
+# Examples="229_2096.oh5"  # this is the largest EBSD map, a composite
+# Examples="229_2097.oh5"
 for example in $Examples; do
 	echo $example
 	dataconverter --reader em --nxdl NXroot --input-file $example --output debug.$example.nxs 1>stdout.$example.nxs.txt 2>stderr.$example.nxs.txt

From acf860dd8cce3842cc16a361c56c79a36d6e45da Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@hu-berlin.de>
Date: Tue, 7 Nov 2023 12:44:06 +0100
Subject: [PATCH 19/84] Designed logic to probe if DREAM3D instance has
 relevant data to be mapped on NXem

---
 dev-requirements.txt                          | 197 ++----------------
 .../em/subparsers/hfive_dreamthreed.py        | 195 +++++++++++++++++
 .../readers/em/subparsers/nxs_hfive.py        |   7 +
 pyproject.toml                                |   2 +-
 test.all.sh => test.ebsd2d_hdf5.sh            |   0
 test.ebsd3d_hdf5.sh                           |   9 +
 6 files changed, 227 insertions(+), 183 deletions(-)
 create mode 100644 pynxtools/dataconverter/readers/em/subparsers/hfive_dreamthreed.py
 rename test.all.sh => test.ebsd2d_hdf5.sh (100%)
 create mode 100755 test.ebsd3d_hdf5.sh

diff --git a/dev-requirements.txt b/dev-requirements.txt
index 0620dae14..b8f75ce0b 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -2,19 +2,10 @@
 # This file is autogenerated by pip-compile with Python 3.10
 # by the following command:
 #
-#    pip-compile --extra=dev --output-file=dev-requirements.txt --resolver=backtracking pyproject.toml
+#    pip-compile --extra=dev --output-file=dev-requirements.txt pyproject.toml
 #
-anyio==3.6.1
-    # via jupyter-server
 appdirs==1.4.4
     # via requests-cache
-argon2-cffi==21.3.0
-    # via
-    #   jupyter-server
-    #   nbclassic
-    #   notebook
-argon2-cffi-bindings==21.2.0
-    # via argon2-cffi
 asciitree==0.3.3
     # via zarr
 ase==3.19.0
@@ -28,25 +19,16 @@ astroid==2.12.10
 attrs==22.1.0
     # via
     #   cattrs
-    #   jsonschema
     #   pytest
     #   requests-cache
-babel==2.11.0
-    # via jupyterlab-server
 backcall==0.2.0
     # via ipython
-beautifulsoup4==4.11.1
-    # via nbconvert
-bleach==5.0.1
-    # via nbconvert
 build==0.10.0
     # via pip-tools
 cattrs==22.2.0
     # via requests-cache
 certifi==2022.9.24
     # via requests
-cffi==1.15.1
-    # via argon2-cffi-bindings
 charset-normalizer==2.1.1
     # via requests
 click==8.1.3
@@ -56,11 +38,14 @@ click==8.1.3
 cloudpickle==2.2.0
     # via dask
 coverage[toml]==6.5.0
-    # via pytest-cov
+    # via
+    #   coverage
+    #   pytest-cov
 cycler==0.11.0
     # via matplotlib
 dask[array]==2022.2.0
     # via
+    #   dask
     #   hyperspy
     #   kikuchipy
     #   orix
@@ -71,8 +56,6 @@ decorator==5.1.1
     # via
     #   ipyparallel
     #   ipython
-defusedxml==0.7.1
-    # via nbconvert
 diffpy-structure==3.1.0
     # via
     #   diffsims
@@ -99,8 +82,6 @@ fabio==2023.4.1
     #   silx
 fasteners==0.18
     # via zarr
-fastjsonschema==2.16.2
-    # via nbformat
 flatdict==4.0.1
     # via pynxtools (pyproject.toml)
 fonttools==4.37.4
@@ -115,30 +96,24 @@ gitdb==4.0.10
     # via gitpython
 gitpython==3.1.30
     # via pynxtools (pyproject.toml)
-h5grove==1.2.0
-    # via jupyterlab-h5web
 h5py==3.7.0
     # via
-    #   h5grove
     #   hyperspy
     #   ifes-apt-tc-data-modeling
-    #   jupyterlab-h5web
     #   kikuchipy
     #   nionswift
     #   orix
     #   pyfai
     #   pynxtools (pyproject.toml)
     #   silx
-hyperspy==1.7.4
+hyperspy==1.7.5
     # via
     #   kikuchipy
     #   pynxtools (pyproject.toml)
     #   pyxem
 idna==3.4
-    # via
-    #   anyio
-    #   requests
-ifes-apt-tc-data-modeling==0.0.8
+    # via requests
+ifes-apt-tc-data-modeling==0.0.9
     # via pynxtools (pyproject.toml)
 imageio==2.22.1
     # via
@@ -156,8 +131,6 @@ ipykernel==6.16.0
     # via
     #   ipyparallel
     #   ipywidgets
-    #   nbclassic
-    #   notebook
 ipyparallel==8.4.1
     # via hyperspy
 ipython==7.34.0
@@ -166,11 +139,6 @@ ipython==7.34.0
     #   ipykernel
     #   ipyparallel
     #   ipywidgets
-    #   jupyterlab
-ipython-genutils==0.2.0
-    # via
-    #   nbclassic
-    #   notebook
 ipywidgets==8.0.6
     # via pyxem
 isort==5.10.1
@@ -178,61 +146,18 @@ isort==5.10.1
 jedi==0.18.1
     # via ipython
 jinja2==3.1.2
-    # via
-    #   hyperspy
-    #   jupyter-server
-    #   jupyterlab
-    #   jupyterlab-server
-    #   nbclassic
-    #   nbconvert
-    #   notebook
+    # via hyperspy
 joblib==1.2.0
     # via scikit-learn
-json5==0.9.11
-    # via jupyterlab-server
-jsonschema==4.17.3
-    # via
-    #   jupyterlab-server
-    #   nbformat
 jupyter-client==7.3.5
     # via
     #   ipykernel
     #   ipyparallel
-    #   jupyter-server
-    #   nbclassic
-    #   nbclient
-    #   notebook
 jupyter-core==5.1.5
-    # via
-    #   jupyter-client
-    #   jupyter-server
-    #   jupyterlab
-    #   nbclassic
-    #   nbconvert
-    #   nbformat
-    #   notebook
-jupyter-server==1.23.5
-    # via
-    #   jupyterlab
-    #   jupyterlab-h5web
-    #   jupyterlab-server
-    #   nbclassic
-    #   notebook-shim
-jupyterlab==3.5.3
-    # via
-    #   ifes-apt-tc-data-modeling
-    #   pynxtools (pyproject.toml)
-jupyterlab-h5web==7.0.0
-    # via
-    #   ifes-apt-tc-data-modeling
-    #   pynxtools (pyproject.toml)
-jupyterlab-pygments==0.2.2
-    # via nbconvert
-jupyterlab-server==2.19.0
-    # via jupyterlab
+    # via jupyter-client
 jupyterlab-widgets==3.0.7
     # via ipywidgets
-kikuchipy==0.8.4
+kikuchipy==0.9.0
     # via pynxtools (pyproject.toml)
 kiwisolver==1.4.4
     # via matplotlib
@@ -247,9 +172,7 @@ lmfit==1.2.0
 locket==1.0.0
     # via partd
 markupsafe==2.1.1
-    # via
-    #   jinja2
-    #   nbconvert
+    # via jinja2
 matplotlib==3.5.3
     # via
     #   ase
@@ -269,8 +192,6 @@ matplotlib-scalebar==0.8.1
     # via orix
 mccabe==0.7.0
     # via pylint
-mistune==2.0.4
-    # via nbconvert
 mpmath==1.2.1
     # via sympy
 mypy==0.982
@@ -279,31 +200,10 @@ mypy-extensions==0.4.3
     # via mypy
 natsort==8.2.0
     # via hyperspy
-nbclassic==0.4.8
-    # via
-    #   jupyterlab
-    #   notebook
-nbclient==0.6.8
-    # via nbconvert
-nbconvert==7.1.0
-    # via
-    #   jupyter-server
-    #   nbclassic
-    #   notebook
-nbformat==5.6.1
-    # via
-    #   jupyter-server
-    #   nbclassic
-    #   nbclient
-    #   nbconvert
-    #   notebook
 nest-asyncio==1.5.6
     # via
     #   ipykernel
     #   jupyter-client
-    #   nbclassic
-    #   nbclient
-    #   notebook
 networkx==2.6.3
     # via
     #   radioactivedecay
@@ -324,10 +224,6 @@ nionutils==0.4.6
     #   nionswift
     #   nionswift-io
     #   nionui
-notebook==6.5.2
-    # via jupyterlab
-notebook-shim==0.2.2
-    # via nbclassic
 numba==0.56.2
     # via
     #   diffsims
@@ -348,7 +244,6 @@ numpy==1.21.6
     #   dask
     #   diffsims
     #   fabio
-    #   h5grove
     #   h5py
     #   hyperspy
     #   ifes-apt-tc-data-modeling
@@ -386,19 +281,13 @@ orix==0.11.1
     #   diffsims
     #   kikuchipy
     #   pyxem
-orjson==3.8.0
-    # via h5grove
 packaging==21.3
     # via
     #   build
     #   dask
     #   hyperspy
     #   ipykernel
-    #   jupyter-server
-    #   jupyterlab
-    #   jupyterlab-server
     #   matplotlib
-    #   nbconvert
     #   numexpr
     #   pint
     #   pooch
@@ -409,8 +298,6 @@ pandas==1.3.5
     #   ifes-apt-tc-data-modeling
     #   pynxtools (pyproject.toml)
     #   xarray
-pandocfilters==1.5.0
-    # via nbconvert
 parso==0.8.3
     # via jedi
 partd==1.3.0
@@ -442,11 +329,6 @@ pooch==1.7.0
     #   orix
 prettytable==3.4.1
     # via hyperspy
-prometheus-client==0.14.1
-    # via
-    #   jupyter-server
-    #   nbclassic
-    #   notebook
 prompt-toolkit==3.0.31
     # via ipython
 psutil==5.9.2
@@ -456,23 +338,17 @@ psutil==5.9.2
     #   ipyparallel
     #   pyxem
 ptyprocess==0.7.0
-    # via
-    #   pexpect
-    #   terminado
+    # via pexpect
 py==1.11.0
     # via pytest
 pycifrw==4.4.5
     # via diffpy-structure
 pycodestyle==2.9.1
     # via pynxtools (pyproject.toml)
-pycparser==2.21
-    # via cffi
 pyfai==2023.3.0
     # via pyxem
 pygments==2.13.0
-    # via
-    #   ipython
-    #   nbconvert
+    # via ipython
 pylint==2.15.3
     # via pynxtools (pyproject.toml)
 pyparsing==3.0.9
@@ -481,8 +357,6 @@ pyparsing==3.0.9
     #   packaging
 pyproject-hooks==1.0.0
     # via build
-pyrsistent==0.18.1
-    # via jsonschema
 pytest==7.1.3
     # via
     #   pynxtools (pyproject.toml)
@@ -501,7 +375,6 @@ python-dateutil==2.8.2
     #   pandas
 pytz==2022.4
     # via
-    #   babel
     #   nionswift
     #   pandas
     #   pynxtools (pyproject.toml)
@@ -509,7 +382,7 @@ pytz-deprecation-shim==0.1.0.post0
     # via tzlocal
 pywavelets==1.3.0
     # via scikit-image
-pyxem==0.15.0
+pyxem==0.15.1
     # via pynxtools (pyproject.toml)
 pyyaml==6.0
     # via
@@ -522,15 +395,11 @@ pyzmq==24.0.1
     #   ipykernel
     #   ipyparallel
     #   jupyter-client
-    #   jupyter-server
-    #   nbclassic
-    #   notebook
 radioactivedecay==0.4.16
     # via ifes-apt-tc-data-modeling
 requests==2.28.1
     # via
     #   hyperspy
-    #   jupyterlab-server
     #   pooch
     #   pynxtools (pyproject.toml)
     #   requests-cache
@@ -562,25 +431,15 @@ scipy==1.7.3
     #   scikit-image
     #   scikit-learn
     #   sparse
-send2trash==1.8.0
-    # via
-    #   jupyter-server
-    #   nbclassic
-    #   notebook
 silx==1.1.2
     # via pyfai
 six==1.16.0
     # via
-    #   bleach
     #   diffpy-structure
     #   python-dateutil
     #   url-normalize
 smmap==5.0.0
     # via gitdb
-sniffio==1.3.0
-    # via anyio
-soupsieve==2.3.2.post1
-    # via beautifulsoup4
 sparse==0.13.0
     # via hyperspy
 structlog==22.1.0
@@ -589,25 +448,16 @@ sympy==1.10.1
     # via
     #   hyperspy
     #   radioactivedecay
-terminado==0.16.0
-    # via
-    #   jupyter-server
-    #   nbclassic
-    #   notebook
 threadpoolctl==3.1.0
     # via scikit-learn
 tifffile==2021.11.2
     # via
-    #   h5grove
     #   hyperspy
     #   scikit-image
-tinycss2==1.1.1
-    # via nbconvert
 tomli==2.0.1
     # via
     #   build
     #   coverage
-    #   jupyterlab
     #   mypy
     #   pylint
     #   pyproject-hooks
@@ -624,11 +474,6 @@ tornado==6.2
     #   ipykernel
     #   ipyparallel
     #   jupyter-client
-    #   jupyter-server
-    #   jupyterlab
-    #   nbclassic
-    #   notebook
-    #   terminado
 tqdm==4.64.1
     # via
     #   diffsims
@@ -644,13 +489,7 @@ traitlets==5.4.0
     #   ipywidgets
     #   jupyter-client
     #   jupyter-core
-    #   jupyter-server
     #   matplotlib-inline
-    #   nbclassic
-    #   nbclient
-    #   nbconvert
-    #   nbformat
-    #   notebook
 traits==6.4.1
     # via hyperspy
 transforms3d==0.4.1
@@ -687,12 +526,6 @@ wcwidth==0.2.5
     # via
     #   prettytable
     #   prompt-toolkit
-webencodings==0.5.1
-    # via
-    #   bleach
-    #   tinycss2
-websocket-client==1.4.1
-    # via jupyter-server
 wheel==0.40.0
     # via pip-tools
 widgetsnbextension==4.0.7
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_dreamthreed.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_dreamthreed.py
new file mode 100644
index 000000000..3eda21871
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_dreamthreed.py
@@ -0,0 +1,195 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""(Sub-)parser mapping concepts and content from community *.dream3d files on NXem."""
+
+import os
+from typing import Dict, Any, List
+import numpy as np
+import h5py
+# import imageio.v3 as iio
+from PIL import Image as pil
+
+import diffsims
+import orix
+from diffpy.structure import Lattice, Structure
+from orix import plot
+from orix.crystal_map import create_coordinate_arrays, CrystalMap, PhaseList
+from orix.quaternion import Rotation
+from orix.vector import Vector3d
+
+import matplotlib.pyplot as plt
+
+from pynxtools.dataconverter.readers.em.subparsers.hfive_base import HdfFiveBaseParser
+from pynxtools.dataconverter.readers.em.utils.hfive_utils import \
+    EBSD_MAP_SPACEGROUP, read_strings_from_dataset, all_equal, format_euler_parameterization
+from pynxtools.dataconverter.readers.em.examples.ebsd_database import \
+    ASSUME_PHASE_NAME_TO_SPACE_GROUP
+
+# DREAM3D implements essentially a data analysis workflow with individual steps
+# in the DREAM3D jargon each step is referred to as a filter, filters have well-defined
+# name and version, each filter takes dependent on its version specific input and
+# generates predictable output, this is a benefit and signature of the professional
+# design and idea behind DREAM3D
+# in effect, the combination of versioned filters used in combination with the DREAM3D
+# software version and file version defines how results end up in a DREAM3D file
+
+# TODO::to capture every possible output one would keep a record of the individual
+# schemes for each filter and the differences in these between versions
+# considering the fact that DREAM3D is still in a process of migrating from previous
+# versions to a so-called DREAM3DNX (more professional) version we do not wish to explore
+# for now how this filter-based schema version can be implemented
+# instead we leave it with a few examples, here specifically how to extract if
+# available inverse pole figure maps for the reconstructed discretized three-dimensional
+# microstructure which is the key task that DREAM3D enables users to generate from a
+# collection of EBSD mappings obtained via serial-sectioning
+
+# idea behind this implementation:
+# e.g. a materials scientists/engineer working in the field of e.g. ICME
+# generating N microstructure reconstructions from M measurements
+# in general N and M >= 1 and N can be N >> M i.e. one serial-section study with
+# hundreds of different microstructures, typical case for exploring phase space
+# of thermo-chemo-mechanical material response effect of structure on properties
+# in this case each DREAM3D run should be supplemented with contextualizing metadata
+# e.g. collected via an ELN e.g. user, material, measurement used, etc. i.e. all those
+# pieces of information which are not documented by or not documentable currently by
+# the DREAM3D software within its own realm
+# in effect a research may have say N ~= 1000 uploads with one DREAM3D instance each
+# benefits: i) for the researcher search across explore, ii) for many researchers explore
+# and contextualize
+
+
+class HdfFiveDreamThreedReader(HdfFiveBaseParser):
+    """Read DREAM3D HDF5 files (from Bluequartz's DREAM3D)"""
+    def __init__(self, file_path: str = ""):
+        super().__init__(file_path)
+        self.prfx = None
+        self.tmp = {}
+        self.supported_version = {}
+        self.version = {}
+        self.init_support()
+        self.supported = False
+        self.check_if_supported()
+
+    def init_support(self):
+        """Init supported versions."""
+        self.supported_version = {}
+        self.version = {}
+        self.supported_version["tech_partner"] = ["Bluequartz"]
+        self.supported_version["schema_name"] = ["DREAM3D"]
+        self.supported_version["schema_version"] = ["6.0", "7.0"]
+        # strictly speaking Bluequartz refers the above-mentioned here as File Version
+        # but content is expected adaptive depends on filters used, their versions, and
+        # the sequence in which the execution of these filters was instructed
+        self.supported_version["writer_name"] = ["DREAM3D"]
+        self.supported_version["writer_version"] = [
+            "1.2.812.508bf5f37",
+            "2.0.170.4eecce207",
+            "1.0.107.2080f4e",
+            "2014.03.05",
+            "2014.03.13",
+            "2014.03.15",
+            "2014.03.16",
+            "4.3.6052.263064d",
+            "1.2.828.f45085c83",
+            "2.0.170.4eecce207",
+            "1.2.826.7c66a0e77"]
+
+    def check_if_supported(self):
+        # check if instance to process matches any of these constraints
+        self.supported = 0  # voting-based
+        with h5py.File(self.file_path, "r") as h5r:
+            if len(h5r["/"].attrs.keys()) < 2:
+                self.supported = False
+                print("Not enough attrs")
+                return
+            req_fields = ["DREAM3D Version", "FileVersion"]
+            for req_field in req_fields:
+                if f"{req_field}" not in h5r["/"].attrs.keys():
+                    self.supported = False
+                    print(f"{req_field} not proper!")
+                    return
+            print(read_strings_from_dataset(h5r["/"].attrs["DREAM3D Version"]))
+            if read_strings_from_dataset(h5r["/"].attrs["DREAM3D Version"]) in self.supported_version["writer_version"]:
+                self.supported += 1
+            print(read_strings_from_dataset(h5r["/"].attrs["FileVersion"]))
+            if read_strings_from_dataset(h5r["/"].attrs["FileVersion"]) in self.supported_version["schema_version"]:
+                self.supported += 1
+            print(f"{self.supported}")
+
+            if self.supported == 2:
+                self.supported = True
+                self.version = self.supported_version.copy()
+            else:
+                print("Some other!")
+                self.supported = False
+
+    def search_normalizable_content(self):
+        """Check if that highly customizable DREAM3D file has here supported content."""
+        super().open()
+        super().get_content()
+        super().report_content()
+        super().close()
+        # the logic to find if there is at all a 3D EBSD reconstruction in it
+        # search for a node:
+        #    named _SIMPL_GEOMETRY
+        #    which has childs "DIMENSIONS, ORIGIN, SPACING"
+        # if only one such node found parse only if
+        #    that node has one sibling node called CellData
+        #       which has a group of named EulerAngles shape 4d, (i, j, k, 1) +
+        #       which has a dset named BC or CI or MAD shape 4d (i, j, k, 1) +
+        #       which has a dset named Phases shape 4d (i, j, k, 1) +
+        #    that node has one sibling node called Phase Data
+        #       which has a dset named CrystalStructures, LatticeConstants, MaterialName
+        #           (which should also have specific shape)
+        # but see if that logic does not also check the shape and numerical content
+        # there are still possibilities where this logic fails to detect a concept
+        # reliably, this shows clearly that documenting and offering versioned description
+        # of content is the key barrier to implement more sophisticated conceptual
+        # normalization and assuring that content from other data providers (like DREAM3D)
+        # is understood before being normalized so that results in the RDMS are really
+        # useful and comparable
+
+    def parse_and_normalize(self):
+        """Read and normalize away community-specific formatting with an equivalent in NXem."""
+        self.search_normalizable_content()
+
+        # how to find if at all relevant
+        # search for a node:
+        #    named _SIMPL_GEOMETRY
+        #    which has childs "DIMENSIONS, ORIGIN, SPACING"
+        # if only one such node found
+        #    check that this node has one sibling node called CellData
+        #    which has a group of shape 4d, (>=1, >=1, >=1, 3) uint8 surplus
+        #    a group named either BC, CI or MAD, shape 4d (i, j, k, 1), name
+        """
+        with h5py.File(f"{self.file_path}", "r") as h5r:
+            tmp = HdfFiveBaseParser()
+
+            cache_id = 1
+            grp_names = list(h5r["/"])
+            for grp_name in grp_names:
+                if grp_name not in ["Version", "Manufacturer"]:
+                    self.prfx = f"/{grp_name}"
+                    ckey = self.init_named_cache(f"ebsd{cache_id}")
+                    self.parse_and_normalize_group_ebsd_header(h5r, ckey)
+                    self.parse_and_normalize_group_ebsd_phases(h5r, ckey)
+                    self.parse_and_normalize_group_ebsd_data(h5r, ckey)
+                    # add more information to pass to hfive parser
+                    cache_id += 1
+        """
+        # from hfive_ebsd
diff --git a/pynxtools/dataconverter/readers/em/subparsers/nxs_hfive.py b/pynxtools/dataconverter/readers/em/subparsers/nxs_hfive.py
index ae2444519..8cd9aa783 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/nxs_hfive.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/nxs_hfive.py
@@ -62,6 +62,7 @@
 from pynxtools.dataconverter.readers.em.subparsers.hfive_apex import HdfFiveEdaxApexReader
 from pynxtools.dataconverter.readers.em.subparsers.hfive_ebsd import HdfFiveCommunityReader
 from pynxtools.dataconverter.readers.em.subparsers.hfive_emsoft import HdfFiveEmSoftReader
+from pynxtools.dataconverter.readers.em.subparsers.hfive_dreamthreed import HdfFiveDreamThreedReader
 
 
 class NxEmNxsHfiveSubParser:
@@ -123,6 +124,9 @@ def parse(self, template: dict) -> dict:
         elif hfive_parser_type == "emsoft":
             emsoft = HdfFiveEmSoftReader(self.file_path)
             emsoft.parse_and_normalize()
+        elif hfive_parser_type == "dreamthreed":
+            dreamthreed = HdfFiveDreamThreedReader(self.file_path)
+            dreamthreed.parse_and_normalize()
         else:  # none or something unsupported
             return template
         return template
@@ -149,6 +153,9 @@ def identify_hfive_type(self):
         hdf = HdfFiveEmSoftReader(f"{self.file_path}")
         if hdf.supported is True:
             return "emsoft"
+        hdf = HdfFiveDreamThreedReader(f"{self.file_path}")
+        if hdf.supported is True:
+            return "dreamthreed"
         return None
 
     def process_into_template(self, inp: dict, template: dict) -> dict:
diff --git a/pyproject.toml b/pyproject.toml
index 22a7d3899..93917652e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -32,7 +32,7 @@ dependencies = [
     "ifes_apt_tc_data_modeling>=0.0.9",
     "gitpython>=3.1.24",
     "pytz>=2021.1",
-    "kikuchipy>=0.8.7",
+    "kikuchipy>=0.9.0",
     "pyxem>=0.15.1",
     "zipfile37==0.1.3",
     "nionswift==0.16.8",
diff --git a/test.all.sh b/test.ebsd2d_hdf5.sh
similarity index 100%
rename from test.all.sh
rename to test.ebsd2d_hdf5.sh
diff --git a/test.ebsd3d_hdf5.sh b/test.ebsd3d_hdf5.sh
new file mode 100755
index 000000000..9d72ae40d
--- /dev/null
+++ b/test.ebsd3d_hdf5.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+Examples="067_0003 177_0004 177_0005 177_0006 177_0007 177_0008 177_0009 226_0010 226_0011 226_0012 226_0013 244_0014 SmallIN100_Final"
+
+Examples="SmallIN100_Final"
+for example in $Examples; do
+	echo $example
+	dataconverter --reader em --nxdl NXroot --input-file $example.dream3d --output debug.$example.dream3d.nxs 1>stdout.$example.dream3d.nxs.txt 2>stderr.$example.dream3d.nxs.txt
+done

From e16d8eae396c1dc6ac28e72f78e093913ec87ffa Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@hu-berlin.de>
Date: Tue, 7 Nov 2023 17:34:04 +0100
Subject: [PATCH 20/84] Tested auto-detection of relevant group to harvest 3D
 EBSD data from and ran successfully on all datasets, 3/13 contained relevant
 content

---
 pynxtools/dataconverter/readers/em/reader.py  |  2 +-
 .../em/subparsers/hfive_dreamthreed.py        | 80 +++++++++++++++++--
 test.ebsd3d_hdf5.sh                           | 15 +++-
 3 files changed, 87 insertions(+), 10 deletions(-)

diff --git a/pynxtools/dataconverter/readers/em/reader.py b/pynxtools/dataconverter/readers/em/reader.py
index 7f2d15aaa..57e5acfc4 100644
--- a/pynxtools/dataconverter/readers/em/reader.py
+++ b/pynxtools/dataconverter/readers/em/reader.py
@@ -130,7 +130,7 @@ def read(self,
         sub_parser = "nxs_hfive"
         subparser = NxEmNxsHfiveSubParser(entry_id, file_paths[0])
         subparser.parse(template)
-        # exit(1)
+        exit(1)
 
         # for dat_instance in case.dat_parser_type:
         #     print(f"Process pieces of information in {dat_instance} tech partner file...")
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_dreamthreed.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_dreamthreed.py
index 3eda21871..b2cc382e1 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_dreamthreed.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_dreamthreed.py
@@ -115,47 +115,107 @@ def check_if_supported(self):
         with h5py.File(self.file_path, "r") as h5r:
             if len(h5r["/"].attrs.keys()) < 2:
                 self.supported = False
-                print("Not enough attrs")
                 return
             req_fields = ["DREAM3D Version", "FileVersion"]
             for req_field in req_fields:
                 if f"{req_field}" not in h5r["/"].attrs.keys():
                     self.supported = False
-                    print(f"{req_field} not proper!")
                     return
-            print(read_strings_from_dataset(h5r["/"].attrs["DREAM3D Version"]))
             if read_strings_from_dataset(h5r["/"].attrs["DREAM3D Version"]) in self.supported_version["writer_version"]:
                 self.supported += 1
-            print(read_strings_from_dataset(h5r["/"].attrs["FileVersion"]))
             if read_strings_from_dataset(h5r["/"].attrs["FileVersion"]) in self.supported_version["schema_version"]:
                 self.supported += 1
-            print(f"{self.supported}")
 
             if self.supported == 2:
                 self.supported = True
                 self.version = self.supported_version.copy()
             else:
-                print("Some other!")
                 self.supported = False
 
     def search_normalizable_content(self):
         """Check if that highly customizable DREAM3D file has here supported content."""
         super().open()
         super().get_content()
-        super().report_content()
+        # super().report_content()
         super().close()
         # the logic to find if there is at all a 3D EBSD reconstruction in it
         # search for a node:
+        target_path = []
         #    named _SIMPL_GEOMETRY
+        candidate_paths = []
+        for hdf_node_path in self.datasets.keys():
+            idx = hdf_node_path.find("/_SIMPL_GEOMETRY")
+            if idx > -1:
+                candidate_paths.append((hdf_node_path, idx))
         #    which has childs "DIMENSIONS, ORIGIN, SPACING"
+        for path_idx in candidate_paths:
+            head = path_idx[0][0:path_idx[1]]
+            tail = path_idx[0][path_idx[1]:]
+            found = 0
+            req_fields = ["DIMENSIONS", "ORIGIN", "SPACING"]
+            for req_field in req_fields:
+                if f"{head}/_SIMPL_GEOMETRY/{req_field}" in self.datasets.keys():
+                    found += 1
+            if found == 3:
+                target_path.append(head)
+                break
+        del candidate_paths
         # if only one such node found parse only if
+        if len(target_path) != 1:
+            return
+        else:
+            target_path = target_path[0]
         #    that node has one sibling node called CellData
-        #       which has a group of named EulerAngles shape 4d, (i, j, k, 1) +
+        found = 0
+        i_j_k = (None, None, None)
+        group_name = None
+        for entry in self.datasets.keys():
+            if entry.startswith(f"{target_path}") is True and entry.endswith(f"EulerAngles") is True:
+                group_name = entry[0:-12]  # removing the trailing fwslash
+        #       which has a dset of named EulerAngles shape 4d, (i, j, k, 1) +
+                shp = self.datasets[entry][2]
+                if isinstance(shp, tuple) and len(shp) == 4:
+                    if shp[3] == 3:
+                        i_j_k = (shp[0], shp[1], shp[2])
+                        found += 1
+                        break
+        if group_name is None:
+            return
         #       which has a dset named BC or CI or MAD shape 4d (i, j, k, 1) +
+        one_key_required = ["BC", "Band Contrast", "CI", "Confidence Index", "MAD"]
+        for key in one_key_required:
+            if f"{group_name}/{key}" in self.datasets.keys():
+                shp = self.datasets[f"{group_name}/{key}"][2]
+                if isinstance(shp, tuple) and len(shp) == 4:
+                    if (shp[0], shp[1], shp[2]) == i_j_k:
+                        found += 1
+                        break
         #       which has a dset named Phases shape 4d (i, j, k, 1) +
+        if f"{group_name}/Phases" in self.datasets.keys():
+            shp = self.datasets[f"{group_name}/Phases"][2]
+            if isinstance(shp, tuple) and len(shp) == 4:
+                if (shp[0], shp[1], shp[2]) == i_j_k:
+                    found += 1
         #    that node has one sibling node called Phase Data
+        if found != 3:
+            return
         #       which has a dset named CrystalStructures, LatticeConstants, MaterialName
+        req_fields = ["CrystalStructures", "LatticeConstants", "MaterialName"]
+        found = 0
+        possible_locs = ["Phase Data", "CellEnsembleData"]
+        # TODO::these group names were found in the examples but likely they can be changed depending on how the filters are set
+        for req_field in req_fields:
+            for loc in possible_locs:
+                if f"{target_path}/{loc}/{req_field}" in self.datasets.keys():
         #           (which should also have specific shape)
+                    found += 1
+                    if found != 3:
+                        print(f"Relevant 3D EBSD content found")
+                        print(f"{target_path}")
+                        print(f"{group_name}")
+                        return
+        print(f"No relevant 3D EBSD content found!")
+
         # but see if that logic does not also check the shape and numerical content
         # there are still possibilities where this logic fails to detect a concept
         # reliably, this shows clearly that documenting and offering versioned description
@@ -164,6 +224,10 @@ def search_normalizable_content(self):
         # is understood before being normalized so that results in the RDMS are really
         # useful and comparable
 
+        # this is one approach how to find relevant groups
+        # another would be to interpret really the filters applied and hunt
+        # for the output within the parameters of a specific filter
+
     def parse_and_normalize(self):
         """Read and normalize away community-specific formatting with an equivalent in NXem."""
         self.search_normalizable_content()
diff --git a/test.ebsd3d_hdf5.sh b/test.ebsd3d_hdf5.sh
index 9d72ae40d..ad55a2290 100755
--- a/test.ebsd3d_hdf5.sh
+++ b/test.ebsd3d_hdf5.sh
@@ -2,7 +2,20 @@
 
 Examples="067_0003 177_0004 177_0005 177_0006 177_0007 177_0008 177_0009 226_0010 226_0011 226_0012 226_0013 244_0014 SmallIN100_Final"
 
-Examples="SmallIN100_Final"
+# skip
+# 177_0007 as it is one of the weird examples where the h5py library cannot traverse the content... let's not follow-up on this rabbit hole right now
+# 177_0004 has only vertices
+# 177_0005 has only edges
+# 177_0006 has only surface facets
+# 177_0008 out because old 6.0 format which does not store DIMENSIONS, ORIGIN, SHAPE under _SIMPL yet
+# 177_0009 follows the new structure but has no EulerAngles only Phases thus without following with yet another logic the source for the 
+# respective filter we have no chance to find the orientation data
+# 226_0010 and _0011 are out because they do have only plain images (backscattered electron likely)
+# 226_0013 is out because it has only plain optical image data no EBSD
+# 244_0014 is out because it does not have any quantity whereby to generate a band contrast, confidence index, or mad on to generate a default plot
+
+
+# Examples="SmallIN100_Final"
 for example in $Examples; do
 	echo $example
 	dataconverter --reader em --nxdl NXroot --input-file $example.dream3d --output debug.$example.dream3d.nxs 1>stdout.$example.dream3d.nxs.txt 2>stderr.$example.dream3d.nxs.txt

From 08a001a63f0dc231372dc09af4fb937dbe177e6f Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@hu-berlin.de>
Date: Thu, 9 Nov 2023 00:08:10 +0100
Subject: [PATCH 21/84] Parsing of content from three different DREAM3D cases
 working, ROI code implemented but ROI code does not populate template,
 implement IPF computation

---
 pynxtools/dataconverter/readers/em/reader.py  |   4 +-
 .../em/subparsers/hfive_dreamthreed.py        | 243 +++++++++++++-----
 .../readers/em/subparsers/nxs_hfive.py        |  52 ++--
 test.ebsd3d_hdf5.sh                           |   8 +-
 4 files changed, 223 insertions(+), 84 deletions(-)

diff --git a/pynxtools/dataconverter/readers/em/reader.py b/pynxtools/dataconverter/readers/em/reader.py
index 57e5acfc4..f53c3ec83 100644
--- a/pynxtools/dataconverter/readers/em/reader.py
+++ b/pynxtools/dataconverter/readers/em/reader.py
@@ -130,7 +130,7 @@ def read(self,
         sub_parser = "nxs_hfive"
         subparser = NxEmNxsHfiveSubParser(entry_id, file_paths[0])
         subparser.parse(template)
-        exit(1)
+        # exit(1)
 
         # for dat_instance in case.dat_parser_type:
         #     print(f"Process pieces of information in {dat_instance} tech partner file...")
@@ -159,7 +159,7 @@ def read(self,
         if resolved_path != "":
             nxs_plt.annotate_default_plot(template, resolved_path)
 
-        debugging = False
+        debugging = True
         if debugging is True:
             print("Reporting state of template before passing to HDF5 writing...")
             for keyword in template.keys():
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_dreamthreed.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_dreamthreed.py
index b2cc382e1..4705032db 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_dreamthreed.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_dreamthreed.py
@@ -72,6 +72,24 @@
 # benefits: i) for the researcher search across explore, ii) for many researchers explore
 # and contextualize
 
+# DREAM3D constants
+# http://dream3d.bluequartz.net/Help/Filters/OrientationAnalysisFilters/CreateEnsembleInfo/
+# picking the first in each category here https://en.wikipedia.org/wiki/List_of_space_groups
+
+DREAM_SPACEGROUPS_TO_REPRESENTATIVE_SPACEGROUP = {
+    0: 191,
+    1: 221,
+    2: 175,
+    3: 200,
+    4: 2,
+    5: 10,
+    6: 47,
+    7: 83,
+    8: 123,
+    9: 147,
+    10: 162}
+# UnknownCrystalStructure, 999, Undefined Crystal Structure
+
 
 class HdfFiveDreamThreedReader(HdfFiveBaseParser):
     """Read DREAM3D HDF5 files (from Bluequartz's DREAM3D)"""
@@ -79,6 +97,7 @@ def __init__(self, file_path: str = ""):
         super().__init__(file_path)
         self.prfx = None
         self.tmp = {}
+        self.path_registry = {}
         self.supported_version = {}
         self.version = {}
         self.init_support()
@@ -132,15 +151,26 @@ def check_if_supported(self):
             else:
                 self.supported = False
 
-    def search_normalizable_content(self):
-        """Check if that highly customizable DREAM3D file has here supported content."""
+    def search_normalizable_ebsd_content(self):
+        """Check if that highly customizable DREAM3D file has supported content or not."""
         super().open()
         super().get_content()
         # super().report_content()
         super().close()
+        # DREAM3D allows flexible instance names in the HDF5 tree therefore
+        # first identify the pathes of relevant groups, datasets for EBSD content
+        self.path_registry = {
+            "group_geometry": None,
+            "group_data": None,
+            "group_phases": None,
+            "is_simulated": None,
+            "roi_info": None}
+        # is_simulated is True when that DREAM3D pipeline generated just a synthetic structure
+        # roi_info should be pair of absolute path to dataset (HDF5) and BC, CI or MAD
+        # (like BC, CI, or MAD) to explain from which to render a greyscale image of the ROI
         # the logic to find if there is at all a 3D EBSD reconstruction in it
         # search for a node:
-        target_path = []
+        group_geometry = []
         #    named _SIMPL_GEOMETRY
         candidate_paths = []
         for hdf_node_path in self.datasets.keys():
@@ -157,21 +187,21 @@ def search_normalizable_content(self):
                 if f"{head}/_SIMPL_GEOMETRY/{req_field}" in self.datasets.keys():
                     found += 1
             if found == 3:
-                target_path.append(head)
+                group_geometry.append(head)
                 break
         del candidate_paths
         # if only one such node found parse only if
-        if len(target_path) != 1:
-            return
+        if len(group_geometry) != 1:
+            return False
         else:
-            target_path = target_path[0]
+            group_geometry = group_geometry[0]
         #    that node has one sibling node called CellData
         found = 0
         i_j_k = (None, None, None)
-        group_name = None
+        group_data = None
         for entry in self.datasets.keys():
-            if entry.startswith(f"{target_path}") is True and entry.endswith(f"EulerAngles") is True:
-                group_name = entry[0:-12]  # removing the trailing fwslash
+            if entry.startswith(f"{group_geometry}") is True and entry.endswith(f"EulerAngles") is True:
+                group_data = entry[0:-12]  # removing the trailing fwslash
         #       which has a dset of named EulerAngles shape 4d, (i, j, k, 1) +
                 shp = self.datasets[entry][2]
                 if isinstance(shp, tuple) and len(shp) == 4:
@@ -179,42 +209,77 @@ def search_normalizable_content(self):
                         i_j_k = (shp[0], shp[1], shp[2])
                         found += 1
                         break
-        if group_name is None:
-            return
+        if group_data is None:
+            return False
         #       which has a dset named BC or CI or MAD shape 4d (i, j, k, 1) +
-        one_key_required = ["BC", "Band Contrast", "CI", "Confidence Index", "MAD"]
-        for key in one_key_required:
-            if f"{group_name}/{key}" in self.datasets.keys():
-                shp = self.datasets[f"{group_name}/{key}"][2]
+        group_roi = None
+        roi_info = (None, None)
+        one_key_required = {"BC": "bc",
+                            "Band Contrast": "bc",
+                            "BandContrast": "bc",
+                            "CI": "ci",
+                            "Confidence Index": "ci",
+                            "ConfidenceIndex": "ci",
+                            "MAD": "mad",
+                            "Mean Angular Deviation": "mad",
+                            "MeanAngularDeviation": "mad"}
+        for key in one_key_required.keys():
+            if f"{group_data}/{key}" in self.datasets.keys():
+                shp = self.datasets[f"{group_data}/{key}"][2]
                 if isinstance(shp, tuple) and len(shp) == 4:
                     if (shp[0], shp[1], shp[2]) == i_j_k:
-                        found += 1
+                        roi_info = (f"{group_data}/{key}", one_key_required[key])
                         break
         #       which has a dset named Phases shape 4d (i, j, k, 1) +
-        if f"{group_name}/Phases" in self.datasets.keys():
-            shp = self.datasets[f"{group_name}/Phases"][2]
+        if f"{group_data}/Phases" in self.datasets.keys():
+            shp = self.datasets[f"{group_data}/Phases"][2]
             if isinstance(shp, tuple) and len(shp) == 4:
                 if (shp[0], shp[1], shp[2]) == i_j_k:
                     found += 1
         #    that node has one sibling node called Phase Data
-        if found != 3:
-            return
+        if found != 2:
+            return False
         #       which has a dset named CrystalStructures, LatticeConstants, MaterialName
-        req_fields = ["CrystalStructures", "LatticeConstants", "MaterialName"]
-        found = 0
-        possible_locs = ["Phase Data", "CellEnsembleData"]
-        # TODO::these group names were found in the examples but likely they can be changed depending on how the filters are set
-        for req_field in req_fields:
-            for loc in possible_locs:
-                if f"{target_path}/{loc}/{req_field}" in self.datasets.keys():
-        #           (which should also have specific shape)
-                    found += 1
+
+        # at this point there are at least to scenarios where the data come from
+        # a serial-sectioning experiment or a computer simulated
+        # (RVE instantiation/microstructure synthesis) that generating an input for the
+        # computer simulation without any real sample necessarily characterized
+        # if we have that simulated scenario the location AND that is indicated
+        # by the keyword "SyntheticVolumeDataContainer" we hunt elsewhere
+        group_phases = None
+        is_simulated = None
+        if group_data.find("SyntheticVolumeDataContainer") > -1:
+            is_simulated = True
+            # hunt CrystalStructures
+            for entry in self.datasets.keys():
+                if entry.find("CrystalStructures") > -1:
+                    if group_phases is None:
+                        group_phases = entry[0:-18]  # remove trailing fwslash
+        else:
+            is_simulated = False
+            possible_locs = ["Phase Data", "CellEnsembleData"]  # these locations found in the examples but likely they can be changed depending on how the filters are set
+            for loc in ["Phase Data", "CellEnsembleData"]:
+                if f"{group_geometry}/{loc}/CrystalStructures" in self.datasets.keys():
+                    group_phases = f"{group_geometry}/{loc}"
+                    found = 0
+                    for req_field in ["CrystalStructures", "LatticeConstants", "MaterialName"]:
+                        if f"{group_phases}/{req_field}" in self.datasets.keys():
+                        #           (which should also have specific shape)
+                            found += 1
                     if found != 3:
-                        print(f"Relevant 3D EBSD content found")
-                        print(f"{target_path}")
-                        print(f"{group_name}")
-                        return
-        print(f"No relevant 3D EBSD content found!")
+                        return False
+        if group_phases is None:
+            return False
+
+        self.path_registry["group_geometry"] = group_geometry
+        self.path_registry["group_data"] = group_data
+        self.path_registry["group_phases"] = group_phases
+        self.path_registry["is_simulated"] = is_simulated
+        self.path_registry["roi_info"] = roi_info
+        print(f"Relevant 3D EBSD content found")
+        for key, val in self.path_registry.items():
+            print(f"{key}: {val}")
 
         # but see if that logic does not also check the shape and numerical content
         # there are still possibilities where this logic fails to detect a concept
@@ -227,33 +292,91 @@ def search_normalizable_content(self):
         # this is one approach how to find relevant groups
         # another would be to interpret really the filters applied and hunt
         # for the output within the parameters of a specific filter
+        return True
 
     def parse_and_normalize(self):
         """Read and normalize away community-specific formatting with an equivalent in NXem."""
-        self.search_normalizable_content()
+        cache_id = 1
+        ckey = self.init_named_cache(f"ebsd{cache_id}")
+        if self.search_normalizable_ebsd_content() is True:
+            self.parse_and_normalize_ebsd_header(ckey)
+            self.parse_and_normalize_ebsd_phases(ckey)
+            self.parse_and_normalize_ebsd_data(ckey)
 
-        # how to find if at all relevant
-        # search for a node:
-        #    named _SIMPL_GEOMETRY
-        #    which has childs "DIMENSIONS, ORIGIN, SPACING"
-        # if only one such node found
-        #    check that this node has one sibling node called CellData
-        #    which has a group of shape 4d, (>=1, >=1, >=1, 3) uint8 surplus
-        #    a group named either BC, CI or MAD, shape 4d (i, j, k, 1), name
-        """
-        with h5py.File(f"{self.file_path}", "r") as h5r:
-            tmp = HdfFiveBaseParser()
+    def parse_and_normalize_ebsd_header(self, ckey: str):
+        with h5py.File(self.file_path, "r") as h5r:
+            dims = h5r[f"{self.path_registry['group_geometry']}" \
+                       f"/_SIMPL_GEOMETRY/DIMENSIONS"][:].flatten()
+            org = h5r[f"{self.path_registry['group_geometry']}" \
+                      f"/_SIMPL_GEOMETRY/ORIGIN"][:].flatten()
+            spc = h5r[f"{self.path_registry['group_geometry']}" \
+                      f"/_SIMPL_GEOMETRY/SPACING"][:].flatten()
+            idx = 0
+            for dim in ["x", "y", "z"]:
+                self.tmp[ckey][f"n_{dim}"] = dims[idx]
+                self.tmp[ckey][f"s_{dim}"] = spc[idx]
+                self.tmp[ckey][f"o_{dim}"] = org[idx]
+                idx += 1
+            self.tmp[ckey]["s_unit"] = "um"  # "µm"   #TODO::where is this documented
+            for key, val in self.tmp[ckey].items():
+                print(f"{key}, {np.shape(val)}, {val}")
+
+    def parse_and_normalize_ebsd_phases(self, ckey: str):
+        self.tmp[ckey]["phase"] = []
+        self.tmp[ckey]["space_group"] = []
+        self.tmp[ckey]["phases"] = {}
+        with h5py.File(self.file_path, "r") as h5r:
+            idx = np.asarray(h5r[f"{self.path_registry['group_phases']}/CrystalStructures"][:].flatten(), np.uint32)
+            print(f"csys {np.shape(idx)}, {idx}")
+            for entry in idx:
+                if entry != 999:
+                    self.tmp[ckey]["phases"][int(entry)] = {}
+                    self.tmp[ckey]["phases"][int(entry)]["Space Group"] \
+                        = DREAM_SPACEGROUPS_TO_REPRESENTATIVE_SPACEGROUP[entry]
+                    # TODO::need to do a reindexing of the phase ids as they
+                    # might not be stored in asc. order!
+
+                    # LatticeAngles are implicitly defined for each space group
+                    # LatticeDimensions essentially provides scaling information
+                    # but indeed for simulating a crystal with a computer simulation
+                    # at a length scale larger than atoms (mesoscale and macroscale)
+                    # one can argue the exact spacing is not needed except when
+                    # one wishes to compute the diffraction pattern but as most results
+                    # from DREAM3D implicitly rely on information from a previous workflow
+                    # where these atomistic details have been abstracted away it is
+                    # factually true that there is not really a need for documenting
+                    # the lattice dimensions from a DREAM3D analysis.
+        for key, dct in self.tmp[ckey]["phases"].items():
+            print(f"{key}, {dct}")
+
+    def parse_and_normalize_ebsd_data(self, ckey: str):
+        with h5py.File(self.file_path, "r") as h5r:
+            self.tmp[ckey]["euler"] = np.asarray(
+                h5r[f"{self.path_registry['group_data']}/EulerAngles"], np.float32)
+            # TODO::DREAM3D uses Rowenhorst et. al. conventions
+            # so we are already in positive halfspace, and radiants
+
+            self.tmp[ckey]["phase_id"] = np.asarray(
+                h5r[f"{self.path_registry['group_data']}/Phases"], np.int32)
+            print(np.unique(self.tmp[ckey]["phase_id"]))
+            # Phases here stores C-style index which Phase of the possible ones
+            # we are facing, the marker 999 is equivalent to the null-model notIndexed
+            # in all examples 999 was the first (0th) entry in the list of possible ones
+            # in effect, the phase_id == 0 rightly so marks position indexed with the null-model
 
-            cache_id = 1
-            grp_names = list(h5r["/"])
-            for grp_name in grp_names:
-                if grp_name not in ["Version", "Manufacturer"]:
-                    self.prfx = f"/{grp_name}"
-                    ckey = self.init_named_cache(f"ebsd{cache_id}")
-                    self.parse_and_normalize_group_ebsd_header(h5r, ckey)
-                    self.parse_and_normalize_group_ebsd_phases(h5r, ckey)
-                    self.parse_and_normalize_group_ebsd_data(h5r, ckey)
-                    # add more information to pass to hfive parser
-                    cache_id += 1
-        """
-        # from hfive_ebsd
+            # normalize pixel coordinates to physical positions even though the origin can still dangle somewhere
+            for dim in ["x", "y", "z"]:
+                self.tmp[ckey][f"scan_point_{dim}"] \
+                    = np.asarray(np.linspace(0, self.tmp[ckey][f"n_{dim}"] - 1,
+                                             num=self.tmp[ckey][f"n_{dim}"],
+                                             endpoint=True) \
+                                             * self.tmp[ckey][f"s_{dim}"] \
+                                             + 0.5 * self.tmp[ckey][f"s_{dim}"],
+                                             np.float32)
+            # ROI overviewed rendered from either bc, ci, or mad
+            if isinstance(self.path_registry["roi_info"], tuple) and len(self.path_registry["roi_info"]) == 2:
+                if isinstance(self.path_registry["roi_info"][0], str) is True and isinstance(self.path_registry["roi_info"][1], str) is True:
+                    self.tmp[ckey][self.path_registry["roi_info"][1]] \
+                        = np.asarray(h5r[f"{self.path_registry['roi_info'][0]}"], np.float32)
+            for key, val in self.tmp[ckey].items():
+                print(f"{key}, {np.shape(val)}")
diff --git a/pynxtools/dataconverter/readers/em/subparsers/nxs_hfive.py b/pynxtools/dataconverter/readers/em/subparsers/nxs_hfive.py
index 8cd9aa783..320866110 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/nxs_hfive.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/nxs_hfive.py
@@ -169,7 +169,7 @@ def process_into_template(self, inp: dict, template: dict) -> dict:
                     print(f"{key}, {val}")
 
         self.process_roi_overview(inp, template)
-        self.process_roi_ebsd_maps(inp, template)
+        # self.process_roi_ebsd_maps(inp, template)
         return template
 
     def get_named_axis(self, inp: dict, dim_name: str):
@@ -191,17 +191,31 @@ def process_roi_overview_ebsd_based(self,
                                         roi_id: str,
                                         template: dict) -> dict:
         print("Parse ROI default plot...")
-        if np.max((inp["n_x"], inp["n_y"])) > HFIVE_WEB_MAXIMUM_ROI:
-            raise ValueError(f"Plotting roi_overviews larger than " \
-                             f"{HFIVE_WEB_MAXIMUM_ROI} is not supported !")
+        is_threed = False
+        if "n_z" in inp.keys():
+            is_threed = True
+            if np.max((inp["n_x"], inp["n_y"], inp["n_z"])) > HFIVE_WEB_MAXIMUM_ROI:
+                raise ValueError(f"Plotting 3D roi_overviews larger than " \
+                                 f"{HFIVE_WEB_MAXIMUM_ROI} is not supported !")
+        else:
+            if np.max((inp["n_x"], inp["n_y"])) > HFIVE_WEB_MAXIMUM_ROI:
+                raise ValueError(f"Plotting 2D roi_overviews larger than " \
+                                 f"{HFIVE_WEB_MAXIMUM_ROI} is not supported !")
 
         trg = f"/ENTRY[entry{self.entry_id}]/ROI[roi{roi_id}]/ebsd/indexing/DATA[roi]"
         template[f"{trg}/title"] = f"Region-of-interest overview image"
         template[f"{trg}/@NX_class"] = f"NXdata"  # TODO::writer should decorate automatically!
         template[f"{trg}/@signal"] = "data"
-        template[f"{trg}/@axes"] = ["axis_y", "axis_x"]
-        template[f"{trg}/@AXISNAME_indices[axis_x_indices]"] = np.uint32(0)
-        template[f"{trg}/@AXISNAME_indices[axis_y_indices]"] = np.uint32(1)
+        dims = ["x", "y"]
+        if is_threed is True:
+            dims.append("z")
+        idx = 0
+        for dim in dims:
+            template[f"{trg}/@AXISNAME_indices[axis_{dim}_indices]"] = np.uint32(idx)
+            idx += 1
+        dims.reverse()
+        template[f"{trg}/@axes"] = dims
+
         contrast_modes = [(None, "n/a"),
                           ("bc", "normalized_band_contrast"),
                           ("ci", "normalized_confidence_index"),
@@ -209,12 +223,15 @@ def process_roi_overview_ebsd_based(self,
         success = False
         for contrast_mode in contrast_modes:
             if contrast_mode[0] in inp.keys() and success is False:
-                template[f"{trg}/data"] = {"compress": np.reshape(np.asarray(np.asarray((inp[contrast_mode[0]] / np.max(inp[contrast_mode[0]]) * 255.), np.uint32), np.uint8), (inp["n_y"], inp["n_x"]), order="C"), "strength": 1}
+                if is_three_d is True:
+                    template[f"{trg}/data"] = {"compress": np.asarray(np.asarray((inp[contrast_mode[0]] / np.max(inp[contrast_mode[0]], axis=None) * 255.), np.uint32), np.uint8), "strength": 1}
+                else:
+                    template[f"{trg}/data"] = {"compress": inp[contrast_mode[0]] / np.ma}
                 template[f"{trg}/descriptor"] = contrast_mode[1]
                 success = True
         if success is False:
             raise ValueError(f"{__name__} unable to generate plot for {trg} !")
-        # 0 is y while 1 is x !
+        # 0 is y while 1 is x for 2d, 0 is z, 1 is y, while 2 is x for 3d
         template[f"{trg}/data/@long_name"] = f"Signal"
         template[f"{trg}/data/@CLASS"] = "IMAGE"  # required H5Web, RGB map
         template[f"{trg}/data/@IMAGE_VERSION"] = f"1.2"
@@ -223,16 +240,13 @@ def process_roi_overview_ebsd_based(self,
         scan_unit = inp["s_unit"]
         if scan_unit == "um":
             scan_unit = "µm"
-        template[f"{trg}/AXISNAME[axis_x]"] \
-            = {"compress": self.get_named_axis(inp, "x"), "strength": 1}
-        template[f"{trg}/AXISNAME[axis_x]/@long_name"] \
-            = f"Coordinate along x-axis ({scan_unit})"
-        template[f"{trg}/AXISNAME[axis_x]/@units"] = f"{scan_unit}"
-        template[f"{trg}/AXISNAME[axis_y]"] \
-            = {"compress": self.get_named_axis(inp, "y"), "strength": 1}
-        template[f"{trg}/AXISNAME[axis_y]/@long_name"] \
-            = f"Coordinate along y-axis ({scan_unit})"
-        template[f"{trg}/AXISNAME[axis_y]/@units"] =  f"{scan_unit}"
+        dims.reverse()
+        for dim in dims:
+            template[f"{trg}/AXISNAME[axis_{dim}]"] \
+                = {"compress": self.get_named_axis(inp, dim), "strength": 1}
+            template[f"{trg}/AXISNAME[axis_{dim}]/@long_name"] \
+                = f"Coordinate along {dim}-axis ({scan_unit})"
+            template[f"{trg}/AXISNAME[axis_{dim}]/@units"] = f"{scan_unit}"
         return template
 
     def process_roi_ebsd_maps(self, inp: dict, template: dict) -> dict:
diff --git a/test.ebsd3d_hdf5.sh b/test.ebsd3d_hdf5.sh
index ad55a2290..e69223a4a 100755
--- a/test.ebsd3d_hdf5.sh
+++ b/test.ebsd3d_hdf5.sh
@@ -1,7 +1,8 @@
 #!/bin/bash
 
-Examples="067_0003 177_0004 177_0005 177_0006 177_0007 177_0008 177_0009 226_0010 226_0011 226_0012 226_0013 244_0014 SmallIN100_Final"
-
+Examples="067_0003 177_0004 177_0005 177_0006 177_0008 177_0009 226_0010 226_0011 226_0012 226_0013 244_0014 SmallIN100_Final"
+Examples="067_0003 SmallIN100_Final 244_0014"
+# 177_0007
 # skip
 # 177_0007 as it is one of the weird examples where the h5py library cannot traverse the content... let's not follow-up on this rabbit hole right now
 # 177_0004 has only vertices
@@ -15,7 +16,8 @@ Examples="067_0003 177_0004 177_0005 177_0006 177_0007 177_0008 177_0009 226_001
 # 244_0014 is out because it does not have any quantity whereby to generate a band contrast, confidence index, or mad on to generate a default plot
 
 
-# Examples="SmallIN100_Final"
+Examples="SmallIN100_Final"
+# Examples="244_0014"
 for example in $Examples; do
 	echo $example
 	dataconverter --reader em --nxdl NXroot --input-file $example.dream3d --output debug.$example.dream3d.nxs 1>stdout.$example.dream3d.nxs.txt 2>stderr.$example.dream3d.nxs.txt

From 0c118b59d1eac3566e9f861eff2711d8082a69e1 Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@hu-berlin.de>
Date: Thu, 9 Nov 2023 11:39:48 +0100
Subject: [PATCH 22/84] Fixed ROI overview for all examples, resolved
 successfully now, IPF next to complete use case 3D EBSD

---
 .../readers/em/subparsers/nxs_hfive.py        | 44 +++++++++++--------
 test.ebsd3d_hdf5.sh                           |  2 +-
 2 files changed, 27 insertions(+), 19 deletions(-)

diff --git a/pynxtools/dataconverter/readers/em/subparsers/nxs_hfive.py b/pynxtools/dataconverter/readers/em/subparsers/nxs_hfive.py
index 320866110..87ac68726 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/nxs_hfive.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/nxs_hfive.py
@@ -124,9 +124,11 @@ def parse(self, template: dict) -> dict:
         elif hfive_parser_type == "emsoft":
             emsoft = HdfFiveEmSoftReader(self.file_path)
             emsoft.parse_and_normalize()
+            # self.process_into_template(emsoft.tmp, template)
         elif hfive_parser_type == "dreamthreed":
             dreamthreed = HdfFiveDreamThreedReader(self.file_path)
             dreamthreed.parse_and_normalize()
+            self.process_into_template(dreamthreed.tmp, template)
         else:  # none or something unsupported
             return template
         return template
@@ -191,6 +193,19 @@ def process_roi_overview_ebsd_based(self,
                                         roi_id: str,
                                         template: dict) -> dict:
         print("Parse ROI default plot...")
+        contrast_modes = [(None, "n/a"),
+                          ("bc", "normalized_band_contrast"),
+                          ("ci", "normalized_confidence_index"),
+                          ("mad", "normalized_mean_angular_deviation")]
+        contrast_mode = None
+        for mode in contrast_modes:
+            if mode[0] in inp.keys() and contrast_mode is None:
+                contrast_mode = mode
+                break
+        if contrast_mode is None:
+            print(f"{__name__} unable to generate plot for entry{self.entry_id}, roi{roi_id} !")
+            return template
+
         is_threed = False
         if "n_z" in inp.keys():
             is_threed = True
@@ -202,6 +217,8 @@ def process_roi_overview_ebsd_based(self,
                 raise ValueError(f"Plotting 2D roi_overviews larger than " \
                                  f"{HFIVE_WEB_MAXIMUM_ROI} is not supported !")
 
+        template[f"/ENTRY[entry{self.entry_id}]/ROI[roi{roi_id}]/@NX_class"] = "NXroi"  # TODO::writer should decorate automatically!
+        template[f"/ENTRY[entry{self.entry_id}]/ROI[roi{roi_id}]/ebsd/indexing/@NX_class"] = "NXprocess"  # TODO::writer should decorate automatically!
         trg = f"/ENTRY[entry{self.entry_id}]/ROI[roi{roi_id}]/ebsd/indexing/DATA[roi]"
         template[f"{trg}/title"] = f"Region-of-interest overview image"
         template[f"{trg}/@NX_class"] = f"NXdata"  # TODO::writer should decorate automatically!
@@ -213,24 +230,16 @@ def process_roi_overview_ebsd_based(self,
         for dim in dims:
             template[f"{trg}/@AXISNAME_indices[axis_{dim}_indices]"] = np.uint32(idx)
             idx += 1
-        dims.reverse()
-        template[f"{trg}/@axes"] = dims
+        template[f"{trg}/@axes"] = []
+        for dim in dims[::-1]:
+            template[f"{trg}/@axes"].append(f"axis_{dim}")
+
+        if is_threed is True:
+            template[f"{trg}/data"] = {"compress": np.squeeze(np.asarray(np.asarray((inp[contrast_mode[0]] / np.max(inp[contrast_mode[0]], axis=None) * 255.), np.uint32), np.uint8), axis=3), "strength": 1}
+        else:
+            template[f"{trg}/data"] = {"compress": np.reshape(np.asarray(np.asarray((inp[contrast_mode[0]] / np.max(inp[contrast_mode[0]]) * 255.), np.uint32), np.uint8), (inp["n_y"], inp["n_x"]), order="C"), "strength": 1}
+        template[f"{trg}/descriptor"] = contrast_mode[1]
 
-        contrast_modes = [(None, "n/a"),
-                          ("bc", "normalized_band_contrast"),
-                          ("ci", "normalized_confidence_index"),
-                          ("mad", "normalized_mean_angular_deviation")]
-        success = False
-        for contrast_mode in contrast_modes:
-            if contrast_mode[0] in inp.keys() and success is False:
-                if is_three_d is True:
-                    template[f"{trg}/data"] = {"compress": np.asarray(np.asarray((inp[contrast_mode[0]] / np.max(inp[contrast_mode[0]], axis=None) * 255.), np.uint32), np.uint8), "strength": 1}
-                else:
-                    template[f"{trg}/data"] = {"compress": inp[contrast_mode[0]] / np.ma}
-                template[f"{trg}/descriptor"] = contrast_mode[1]
-                success = True
-        if success is False:
-            raise ValueError(f"{__name__} unable to generate plot for {trg} !")
         # 0 is y while 1 is x for 2d, 0 is z, 1 is y, while 2 is x for 3d
         template[f"{trg}/data/@long_name"] = f"Signal"
         template[f"{trg}/data/@CLASS"] = "IMAGE"  # required H5Web, RGB map
@@ -240,7 +249,6 @@ def process_roi_overview_ebsd_based(self,
         scan_unit = inp["s_unit"]
         if scan_unit == "um":
             scan_unit = "µm"
-        dims.reverse()
         for dim in dims:
             template[f"{trg}/AXISNAME[axis_{dim}]"] \
                 = {"compress": self.get_named_axis(inp, dim), "strength": 1}
diff --git a/test.ebsd3d_hdf5.sh b/test.ebsd3d_hdf5.sh
index e69223a4a..5964514e6 100755
--- a/test.ebsd3d_hdf5.sh
+++ b/test.ebsd3d_hdf5.sh
@@ -16,7 +16,7 @@ Examples="067_0003 SmallIN100_Final 244_0014"
 # 244_0014 is out because it does not have any quantity whereby to generate a band contrast, confidence index, or mad on to generate a default plot
 
 
-Examples="SmallIN100_Final"
+# Examples="SmallIN100_Final"
 # Examples="244_0014"
 for example in $Examples; do
 	echo $example

From 5be172af4dc77e7863f924680d0ceed3128d6856 Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@hu-berlin.de>
Date: Thu, 9 Nov 2023 14:40:18 +0100
Subject: [PATCH 23/84] DREAM3D to pyxem phase id management, 4d to 2d array
 reshaping to bypass xmaps,  and H5Web refactoring done and working, 3D IPF
 maps is the only thing remaining for the DREAM3D example to work on the set
 of 3D example data

---
 .../em/subparsers/hfive_dreamthreed.py        |  45 ++++--
 .../readers/em/subparsers/nxs_hfive.py        | 135 ++++++++++++------
 .../readers/em/utils/hfive_web_constants.py   |  10 ++
 3 files changed, 130 insertions(+), 60 deletions(-)

diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_dreamthreed.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_dreamthreed.py
index 4705032db..406173c06 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_dreamthreed.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_dreamthreed.py
@@ -328,24 +328,33 @@ def parse_and_normalize_ebsd_phases(self, ckey: str):
         with h5py.File(self.file_path, "r") as h5r:
             idx = np.asarray(h5r[f"{self.path_registry['group_phases']}/CrystalStructures"][:].flatten(), np.uint32)
             print(f"csys {np.shape(idx)}, {idx}")
+            nms = None
+            if f"{self.path_registry['group_phases']}/MaterialName" in h5r:
+                nms = read_strings_from_dataset(h5r[f"{self.path_registry['group_phases']}/MaterialName"][:])
+                print(f"nms ---------> {nms}")
+                if len(idx) != len(nms):
+                    raise ValueError(f"{__name__} MaterialName was recoverable but array has different length than for CrystalStructures!")
+            ijk = 0
             for entry in idx:
                 if entry != 999:
-                    self.tmp[ckey]["phases"][int(entry)] = {}
-                    self.tmp[ckey]["phases"][int(entry)]["Space Group"] \
+                    self.tmp[ckey]["phases"][ijk] = {}
+                    self.tmp[ckey]["phases"][ijk]["space_group"] \
                         = DREAM_SPACEGROUPS_TO_REPRESENTATIVE_SPACEGROUP[entry]
-                    # TODO::need to do a reindexing of the phase ids as they
-                    # might not be stored in asc. order!
+                    self.tmp[ckey]["phases"][ijk]["phase_name"] = nms[ijk]
+                ijk += 1
+                # TODO::need to do a reindexing of the phase ids as they
+                # might not be stored in asc. order!
 
-                    # LatticeAngles are implicitly defined for each space group
-                    # LatticeDimensions essentially provides scaling information
-                    # but indeed for simulating a crystal with a computer simulation
-                    # at a length scale larger than atoms (mesoscale and macroscale)
-                    # one can argue the exact spacing is not needed except when
-                    # one wishes to compute the diffraction pattern but as most results
-                    # from DREAM3D implicitly rely on information from a previous workflow
-                    # where these atomistic details have been abstracted away it is
-                    # factually true that there is not really a need for documenting
-                    # the lattice dimensions from a DREAM3D analysis.
+                # LatticeAngles are implicitly defined for each space group
+                # LatticeDimensions essentially provides scaling information
+                # but indeed for simulating a crystal with a computer simulation
+                # at a length scale larger than atoms (mesoscale and macroscale)
+                # one can argue the exact spacing is not needed except when
+                # one wishes to compute the diffraction pattern but as most results
+                # from DREAM3D implicitly rely on information from a previous workflow
+                # where these atomistic details have been abstracted away it is
+                # factually true that there is not really a need for documenting
+                # the lattice dimensions from a DREAM3D analysis.
         for key, dct in self.tmp[ckey]["phases"].items():
             print(f"{key}, {dct}")
 
@@ -353,11 +362,19 @@ def parse_and_normalize_ebsd_data(self, ckey: str):
         with h5py.File(self.file_path, "r") as h5r:
             self.tmp[ckey]["euler"] = np.asarray(
                 h5r[f"{self.path_registry['group_data']}/EulerAngles"], np.float32)
+            old_shp = np.shape(self.tmp[ckey]["euler"])
+            self.tmp[ckey]["euler"] = np.reshape(self.tmp[ckey]["euler"],
+                                                 (int(np.prod(old_shp[0:3])), int(old_shp[3])),
+                                                 order="C")
             # TODO::DREAM3D uses Rowenhorst et. al. conventions
             # so we are already in positive halfspace, and radiants
 
             self.tmp[ckey]["phase_id"] = np.asarray(
                 h5r[f"{self.path_registry['group_data']}/Phases"], np.int32)
+            old_shp = np.shape(self.tmp[ckey]["phase_id"])
+            self.tmp[ckey]["phase_id"] = np.reshape(self.tmp[ckey]["phase_id"],
+                                                    (int(np.prod(old_shp[0:3])), int(old_shp[3])),
+                                                    order="C")
             print(np.unique(self.tmp[ckey]["phase_id"]))
             # Phases here stores C-style index which Phase of the possible ones
             # we are facing, the marker 999 is equivalent to the null-model notIndexed
diff --git a/pynxtools/dataconverter/readers/em/subparsers/nxs_hfive.py b/pynxtools/dataconverter/readers/em/subparsers/nxs_hfive.py
index 87ac68726..5a390d776 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/nxs_hfive.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/nxs_hfive.py
@@ -53,7 +53,7 @@
 
 from pynxtools.dataconverter.readers.em.utils.hfive_utils import read_strings_from_dataset
 from pynxtools.dataconverter.readers.em.utils.hfive_web_constants \
-    import HFIVE_WEB_MAXIMUM_ROI, HFIVE_WEB_MAXIMUM_RGB
+    import HFIVE_WEB_MAXIMUM_ROI, HFIVE_WEB_MAXIMUM_RGB, hfive_web_decorate_nxdata
 from pynxtools.dataconverter.readers.em.utils.image_processing import thumbnail
 
 from pynxtools.dataconverter.readers.em.subparsers.hfive_oxford import HdfFiveOxfordReader
@@ -171,7 +171,7 @@ def process_into_template(self, inp: dict, template: dict) -> dict:
                     print(f"{key}, {val}")
 
         self.process_roi_overview(inp, template)
-        # self.process_roi_ebsd_maps(inp, template)
+        self.process_roi_ebsd_maps(inp, template)
         return template
 
     def get_named_axis(self, inp: dict, dim_name: str):
@@ -242,9 +242,7 @@ def process_roi_overview_ebsd_based(self,
 
         # 0 is y while 1 is x for 2d, 0 is z, 1 is y, while 2 is x for 3d
         template[f"{trg}/data/@long_name"] = f"Signal"
-        template[f"{trg}/data/@CLASS"] = "IMAGE"  # required H5Web, RGB map
-        template[f"{trg}/data/@IMAGE_VERSION"] = f"1.2"
-        template[f"{trg}/data/@SUBCLASS_VERSION"] = np.int64(15)
+        hfive_web_decorate_nxdata(f"{trg}/data", template)
 
         scan_unit = inp["s_unit"]
         if scan_unit == "um":
@@ -262,13 +260,20 @@ def process_roi_ebsd_maps(self, inp: dict, template: dict) -> dict:
             if ckey.startswith("ebsd") and inp[ckey] != {}:
                 if ckey.replace("ebsd", "").isdigit():
                     roi_id = int(ckey.replace("ebsd", ""))
-                    self.process_roi_xmap(inp[ckey], roi_id, template)
-                    self.process_roi_phases(inp[ckey], roi_id, template)
+                    if "n_z" not in inp[ckey].keys():
+                        self.prepare_roi_ipfs_phases_twod(inp[ckey], roi_id, template)
+                        self.process_roi_ipfs_phases_twod(inp[ckey], roi_id, template)
+                    else:
+                        self.onthefly_process_roi_ipfs_phases_threed(inp[ckey], roi_id, template)
         return template
 
-    def process_roi_xmap(self, inp: dict, roi_id: int, template: dict) -> dict:
+    def prepare_roi_ipfs_phases_twod(self, inp: dict, roi_id: int, template: dict) -> dict:
         """Process crystal orientation map from normalized orientation data."""
         # for NeXus to create a default representation of the EBSD map to explore
+        # get rid of this xmap at some point it is really not needed in my option
+        # one can work with passing the set of EulerAngles to the IPF mapper directly
+        # the order of the individual per scan point results arrays anyway are assumed
+        # to have the same sequence of scan points and thus the same len along the scan axes
         self.xmap = None
         self.axis_x = None
         self.axis_y = None
@@ -385,8 +390,8 @@ def process_roi_xmap(self, inp: dict, roi_id: int, template: dict) -> dict:
         print(self.xmap)
         return template
 
-    def process_roi_phases(self, inp: dict, roi_id: int, template: dict) -> dict:
-        print("Parse crystal_structure_models aka phases...")
+    def process_roi_ipfs_phases_twod(self, inp: dict, roi_id: int, template: dict) -> dict:
+        print("Parse crystal_structure_models aka phases (use xmap)...")
         phase_id = 0
         prfx = f"/ENTRY[entry{self.entry_id}]/ROI[roi{roi_id}]/ebsd/indexing"
         n_pts = inp["n_x"] * inp["n_y"]
@@ -430,15 +435,11 @@ def process_roi_phases(self, inp: dict, roi_id: int, template: dict) -> dict:
             template[f"{trg}/phase_name"] \
                 = f"{inp['phases'][pyxem_phase_id + 1]['phase_name']}"
 
-            self.process_roi_phase_inverse_pole_figures(roi_id, pyxem_phase_id, template)
+            self.process_roi_phase_ipfs_twod(roi_id, pyxem_phase_id, template)
         return template
 
-    def process_roi_phase_inverse_pole_figures(self,
-                                               roi_id: int,
-                                               pyxem_phase_id: int,
-                                               template: dict) -> dict:
-        """Parse inverse pole figures (IPF) mappings."""
-        # call process_roi_ipf_map
+    def process_roi_phase_ipfs_twod(self, roi_id: int, pyxem_phase_id: int, template: dict) -> dict:
+        """Parse inverse pole figures (IPF) mappings for a single phase."""
         phase_name = self.xmap.phases[pyxem_phase_id].name
         print(f"Generate IPF map for {pyxem_phase_id}, {phase_name}...")
 
@@ -488,13 +489,16 @@ def process_roi_phase_inverse_pole_figures(self,
                 = f"Inverse pole figure {projection_directions[idx][0]} {phase_name}"
             template[f"{mpp}/@NX_class"] = f"NXdata"  # TODO::writer should decorate automatically!
             template[f"{mpp}/@signal"] = "data"
-            template[f"{mpp}/@axes"] = ["axis_y", "axis_x"]
-            template[f"{mpp}/@AXISNAME_indices[axis_x_indices]"] = np.uint32(0)
-            template[f"{mpp}/@AXISNAME_indices[axis_y_indices]"] = np.uint32(1)
+            dims = ["x", "y"]
+            template[f"{mpp}/@axes"] = []
+            for dim in dims[::-1]:
+                template[f"{mpp}/@axes"].append(f"axis_{dim}")
+            idx = 0
+            for dim in dims:
+                template[f"{mpp}/@AXISNAME_indices[axis_{dim}_indices]"] = np.uint32(idx)
+                idx += 1
             template[f"{mpp}/DATA[data]"] = {"compress": ipf_rgb_map, "strength": 1}
-            template[f"{mpp}/DATA[data]/@CLASS"] = "IMAGE"  # required, H5Web, RGB
-            template[f"{mpp}/DATA[data]/@IMAGE_VERSION"] = "1.2"
-            template[f"{mpp}/DATA[data]/@SUBCLASS_VERSION"] = np.int64(15)
+            hfive_web_decorate_nxdata(f"{mpp}/DATA[data]", template)
 
             scan_unit = self.xmap.scan_unit
             if scan_unit == "um":
@@ -515,30 +519,69 @@ def process_roi_phase_inverse_pole_figures(self,
             # template[f"{trg}/title"] = f"Inverse pole figure color key with SST"
             template[f"{lgd}/@NX_class"] = f"NXdata"  # TODO::writer should decorate automatically!
             template[f"{lgd}/@signal"] = "data"
-            template[f"{lgd}/@axes"] = ["axis_y", "axis_x"]
-            template[f"{lgd}/@AXISNAME_indices[axis_x_indices]"] = np.uint32(0)
-            template[f"{lgd}/@AXISNAME_indices[axis_y_indices]"] = np.uint32(1)
+            template[f"{lgd}/@axes"] = []
+            for dim in dims[::-1]:
+                template[f"{lgd}/@axes"].append(f"axis_{dim}")
+            idx = 0
+            for dim in dims:
+                template[f"{lgd}/@AXISNAME_indices[axis_{dim}_indices]"] = np.uint32(idx)
+                idx += 1
             template[f"{lgd}/data"] = {"compress": img, "strength": 1}
-            template[f"{lgd}/data/@CLASS"] = f"IMAGE"  # required by H5Web to plot RGB maps
-            template[f"{lgd}/data/@IMAGE_VERSION"] = f"1.2"
-            template[f"{lgd}/data/@SUBCLASS_VERSION"] = np.int64(15)
-
-            template[f"{lgd}/AXISNAME[axis_x]"] \
-                = {"compress": np.asarray(np.linspace(0,
-                                                      np.shape(img)[1] - 1,
-                                                      num=np.shape(img)[1],
-                                                      endpoint=True), np.uint32),
-                   "strength": 1}
-            template[f"{lgd}/AXISNAME[axis_x]/@long_name"] = "Pixel along x-axis"
-            template[f"{lgd}/AXISNAME[axis_x]/@units"] = "px"
-            template[f"{lgd}/AXISNAME[axis_y]"] \
-                = {"compress": np.asarray(np.linspace(0,
-                                                      np.shape(img)[0] - 1,
-                                                      num=np.shape(img)[0],
-                                                      endpoint=True), np.uint32),
-                   "strength": 1}
-            template[f"{lgd}/AXISNAME[axis_y]/@long_name"] = "Pixel along y-axis"
-            template[f"{lgd}/AXISNAME[axis_y]/@units"] = "px"
+            hfive_web_decorate_nxdata(f"{lgd}/data", template)
+
+            dims = [("x", 1), ("y", 0)]
+            for dim in dims:
+                template[f"{lgd}/AXISNAME[axis_{dim[0]}]"] \
+                    = {"compress": np.asarray(np.linspace(0,
+                                                          np.shape(img)[dim[1]] - 1,
+                                                          num=np.shape(img)[dim[1]],
+                                                          endpoint=True), np.uint32),
+                       "strength": 1}
+                template[f"{lgd}/AXISNAME[axis_{dim[0]}]/@long_name"] \
+                    = f"Pixel along {dim[0]}-axis"
+                template[f"{lgd}/AXISNAME[axis_{dim[0]}]/@units"] = "px"
 
         # call process_roi_ipf_color_key
         return template
+
+    def onthefly_process_roi_ipfs_phases_threed(self, inp: dict, roi_id: int, template: dict) -> dict:
+        print("Parse crystal_structure_models aka phases (no xmap)...")
+        phase_id = 0
+        prfx = f"/ENTRY[entry{self.entry_id}]/ROI[roi{roi_id}]/ebsd/indexing"
+        n_pts = inp["n_x"] * inp["n_y"] * inp["n_z"]
+        n_pts_indexed = np.sum(inp["phase_id"] != 0)
+        print(f"n_pts {n_pts}, n_pts_indexed {n_pts_indexed}")
+        template[f"{prfx}/number_of_scan_points"] = np.uint32(n_pts)
+        template[f"{prfx}/indexing_rate"] = np.float64(100. * n_pts_indexed / n_pts)
+        template[f"{prfx}/indexing_rate/@units"] = f"%"
+        grp_name = f"{prfx}/EM_EBSD_CRYSTAL_STRUCTURE_MODEL[phase{phase_id}]"
+        template[f"{grp_name}/number_of_scan_points"] = np.uint32(0)
+        template[f"{grp_name}/phase_identifier"] = np.uint32(phase_id)
+        template[f"{grp_name}/phase_name"] = f"notIndexed"
+
+        print(f"----unique inp phase_id--->{np.unique(inp['phase_id'])}")
+        for phase_id in np.arange(1, np.max(np.unique(inp["phase_id"])) + 1):
+            # starting here at ID 1 because TODO::currently the only supported 3D case
+            # is from DREAM3D and here phase_ids start at 0 but this marks in DREAM3D jargon
+            # the 999 i.e. null-model of the notIndexed phase !
+            print(f"inp[phases].keys(): {inp['phases'].keys()}")
+            if phase_id not in inp["phases"].keys():
+                raise ValueError(f"{phase_id} is not a key in inp['phases'] !")
+            # pyxem_phase_id for notIndexed is -1, while for NeXus it is 0 so add + 1 in naming schemes
+            trg = f"{prfx}/EM_EBSD_CRYSTAL_STRUCTURE_MODEL[phase{phase_id}]"
+
+            # TODO::dealing with unexpected phase_identifier should not be an issue
+            # with DREAM3D because that software is more restrictive on this
+            template[f"{trg}/number_of_scan_points"] \
+                = np.uint32(np.sum(inp["phase_id"] == phase_id))
+            template[f"{trg}/phase_identifier"] = np.uint32(phase_id)
+            template[f"{trg}/phase_name"] \
+                = f"{inp['phases'][phase_id]['phase_name']}"
+
+            # mind to pass phase_id - 1 from the perspective of pyxem because
+            # in that software the id of the null-model is -1 and not 0 like in NeXus or DREAM3D!
+            # self.process_roi_phase_ipfs_threed(roi_id, phase_id, template)
+        return template
+
+    def process_roi_phase_ipfs_threed(self, roi_id: int, pyxem_phase_id: int, template: dict) -> dict:
+        return template
diff --git a/pynxtools/dataconverter/readers/em/utils/hfive_web_constants.py b/pynxtools/dataconverter/readers/em/utils/hfive_web_constants.py
index 72b4f2519..03f6abcea 100644
--- a/pynxtools/dataconverter/readers/em/utils/hfive_web_constants.py
+++ b/pynxtools/dataconverter/readers/em/utils/hfive_web_constants.py
@@ -17,5 +17,15 @@
 #
 """Constants relevant when working with H5Web."""
 
+import numpy as np
+
 HFIVE_WEB_MAXIMUM_ROI = 2**14 - 1
 HFIVE_WEB_MAXIMUM_RGB = 2**11 - 1
+
+
+def hfive_web_decorate_nxdata(path: str, inp: dict) -> dict:
+    if f"{path}" in inp.keys():
+        inp[f"{path}/@CLASS"] = f"IMAGE"  # required by H5Web to plot RGB maps
+        inp[f"{path}/@IMAGE_VERSION"] = f"1.2"
+        inp[f"{path}/@SUBCLASS_VERSION"] = np.int64(15)
+    return inp

From 8e1af84e31206a55923bc8d55837173f3cbf53f4 Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@hu-berlin.de>
Date: Thu, 9 Nov 2023 16:36:50 +0100
Subject: [PATCH 24/84] Implemented IPF color mapping without using an xmap but
 there is still a bug with the zeroth dimension of the results array, IPF
 color map key renders correctly though

---
 .../em/subparsers/hfive_dreamthreed.py        |   6 +
 .../readers/em/subparsers/nxs_hfive.py        | 168 ++++++++++++++----
 test.ebsd3d_hdf5.sh                           |   2 +-
 3 files changed, 145 insertions(+), 31 deletions(-)

diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_dreamthreed.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_dreamthreed.py
index 406173c06..80f278502 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_dreamthreed.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_dreamthreed.py
@@ -334,6 +334,12 @@ def parse_and_normalize_ebsd_phases(self, ckey: str):
                 print(f"nms ---------> {nms}")
                 if len(idx) != len(nms):
                     raise ValueError(f"{__name__} MaterialName was recoverable but array has different length than for CrystalStructures!")
+            # alternatively
+            if f"{self.path_registry['group_phases']}/PhaseName" in h5r:
+                nms = read_strings_from_dataset(h5r[f"{self.path_registry['group_phases']}/PhaseName"][:])
+                print(f"nms ---------> {nms}")
+                if len(idx) != len(nms):
+                    raise ValueError(f"{__name__} PhaseName was recoverable but array has different length than for CrystalStructures!")
             ijk = 0
             for entry in idx:
                 if entry != 999:
diff --git a/pynxtools/dataconverter/readers/em/subparsers/nxs_hfive.py b/pynxtools/dataconverter/readers/em/subparsers/nxs_hfive.py
index 5a390d776..d09768fc2 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/nxs_hfive.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/nxs_hfive.py
@@ -46,6 +46,7 @@
 from orix import plot
 from orix.crystal_map import create_coordinate_arrays, CrystalMap, PhaseList
 from orix.quaternion import Rotation
+from orix.quaternion.symmetry import get_point_group
 from orix.vector import Vector3d
 from scipy.spatial import KDTree
 
@@ -56,6 +57,10 @@
     import HFIVE_WEB_MAXIMUM_ROI, HFIVE_WEB_MAXIMUM_RGB, hfive_web_decorate_nxdata
 from pynxtools.dataconverter.readers.em.utils.image_processing import thumbnail
 
+PROJECTION_DIRECTIONS = [("X", [1., 0., 0.]), ("Y", [0., 1., 0.]), ("Z", [0., 0., 1.])]
+# TODO::do not hardcode but use data.flatten() of the following instances
+PROJECTION_VECTORS = [Vector3d.xvector(), Vector3d.yvector(), Vector3d.zvector()]
+
 from pynxtools.dataconverter.readers.em.subparsers.hfive_oxford import HdfFiveOxfordReader
 from pynxtools.dataconverter.readers.em.subparsers.hfive_bruker import HdfFiveBrukerEspritReader
 from pynxtools.dataconverter.readers.em.subparsers.hfive_edax import HdfFiveEdaxOimAnalysisReader
@@ -65,6 +70,21 @@
 from pynxtools.dataconverter.readers.em.subparsers.hfive_dreamthreed import HdfFiveDreamThreedReader
 
 
+def get_ipfdir_legend(ipf_key):
+    """Generate IPF color map key for a specific ipf_key."""
+    img = None
+    fig = ipf_key.plot(return_figure=True)
+    fig.savefig("temporary.png", dpi=300, facecolor='w', edgecolor='w',
+                orientation='landscape', format='png', transparent=False,
+                bbox_inches='tight', pad_inches=0.1, metadata=None)
+    img = np.asarray(thumbnail(pil.open("temporary.png", "r", ["png"]),
+                        size=HFIVE_WEB_MAXIMUM_RGB), np.uint8)  # no flipping
+    img = img[:, :, 0:3]  # discard alpha channel
+    if os.path.exists("temporary.png"):
+        os.remove("temporary.png")
+    return img
+
+
 class NxEmNxsHfiveSubParser:
     """Map content from different type of *.h5 files on an instance of NXem."""
 
@@ -439,28 +459,14 @@ def process_roi_ipfs_phases_twod(self, inp: dict, roi_id: int, template: dict) -
         return template
 
     def process_roi_phase_ipfs_twod(self, roi_id: int, pyxem_phase_id: int, template: dict) -> dict:
-        """Parse inverse pole figures (IPF) mappings for a single phase."""
+        """Parse inverse pole figures (IPF) mappings for specific phase."""
         phase_name = self.xmap.phases[pyxem_phase_id].name
-        print(f"Generate IPF map for {pyxem_phase_id}, {phase_name}...")
-
-        projection_directions = [("X", [1., 0., 0.]),
-                                 ("Y", [0., 1., 0.]),
-                                 ("Z", [0., 0., 1.])]
-        projection_vectors = [Vector3d.xvector(), Vector3d.yvector(), Vector3d.zvector()]
-        for idx in [0, 1, 2]:
+        print(f"Generate 2D IPF map for {pyxem_phase_id}, {phase_name}...")
+        for idx in np.arange(0, len(PROJECTION_VECTORS)):
             ipf_key = plot.IPFColorKeyTSL(
                 self.xmap.phases[pyxem_phase_id].point_group.laue,
-                direction=projection_vectors[idx])
-
-            fig = ipf_key.plot(return_figure=True)
-            fig.savefig("temporary.png", dpi=300, facecolor='w', edgecolor='w',
-                        orientation='landscape', format='png', transparent=False,
-                        bbox_inches='tight', pad_inches=0.1, metadata=None)
-            img = np.asarray(thumbnail(pil.open("temporary.png", "r", ["png"]),
-                             size=HFIVE_WEB_MAXIMUM_RGB), np.uint8)  # no flipping
-            img = img[:, :, 0:3]  # discard alpha channel
-            if os.path.exists("temporary.png"):
-                os.remove("temporary.png")
+                direction=PROJECTION_VECTORS[idx])
+            img = get_ipfdir_legend(ipf_key)
 
             rgb_px_with_phase_id = np.asarray(
                 np.asarray(ipf_key.orientation2color(
@@ -481,22 +487,23 @@ def process_roi_phase_ipfs_twod(self, roi_id: int, pyxem_phase_id: int, template
             trg = f"/ENTRY[entry{self.entry_id}]/ROI[roi{roi_id}]/ebsd/indexing" \
                   f"/EM_EBSD_CRYSTAL_STRUCTURE_MODEL[phase{pyxem_phase_id + 1}]" \
                   f"/MS_IPF[ipf{idx + 1}]"
-            template[f"{trg}/projection_direction"] = np.asarray([0., 0., 1.], np.float32)
+            template[f"{trg}/projection_direction"] \
+                = np.asarray(PROJECTION_VECTORS[idx].data.flatten(), np.float32)
 
             # add the IPF color map
             mpp = f"{trg}/DATA[map]"
             template[f"{mpp}/title"] \
-                = f"Inverse pole figure {projection_directions[idx][0]} {phase_name}"
+                = f"Inverse pole figure {PROJECTION_DIRECTIONS[idx][0]} {phase_name}"
             template[f"{mpp}/@NX_class"] = f"NXdata"  # TODO::writer should decorate automatically!
             template[f"{mpp}/@signal"] = "data"
             dims = ["x", "y"]
             template[f"{mpp}/@axes"] = []
             for dim in dims[::-1]:
                 template[f"{mpp}/@axes"].append(f"axis_{dim}")
-            idx = 0
+            enum = 0
             for dim in dims:
-                template[f"{mpp}/@AXISNAME_indices[axis_{dim}_indices]"] = np.uint32(idx)
-                idx += 1
+                template[f"{mpp}/@AXISNAME_indices[axis_{dim}_indices]"] = np.uint32(enum)
+                enum += 1
             template[f"{mpp}/DATA[data]"] = {"compress": ipf_rgb_map, "strength": 1}
             hfive_web_decorate_nxdata(f"{mpp}/DATA[data]", template)
 
@@ -515,17 +522,17 @@ def process_roi_phase_ipfs_twod(self, roi_id: int, pyxem_phase_id: int, template
             # add the IPF color map legend/key
             lgd = f"{trg}/DATA[legend]"
             template[f"{lgd}/title"] \
-                = f"Inverse pole figure {projection_directions[idx][0]} {phase_name}"
+                = f"Inverse pole figure {PROJECTION_DIRECTIONS[idx][0]} {phase_name}"
             # template[f"{trg}/title"] = f"Inverse pole figure color key with SST"
             template[f"{lgd}/@NX_class"] = f"NXdata"  # TODO::writer should decorate automatically!
             template[f"{lgd}/@signal"] = "data"
             template[f"{lgd}/@axes"] = []
             for dim in dims[::-1]:
                 template[f"{lgd}/@axes"].append(f"axis_{dim}")
-            idx = 0
+            enum = 0
             for dim in dims:
-                template[f"{lgd}/@AXISNAME_indices[axis_{dim}_indices]"] = np.uint32(idx)
-                idx += 1
+                template[f"{lgd}/@AXISNAME_indices[axis_{dim}_indices]"] = np.uint32(enum)
+                enum += 1
             template[f"{lgd}/data"] = {"compress": img, "strength": 1}
             hfive_web_decorate_nxdata(f"{lgd}/data", template)
 
@@ -580,8 +587,109 @@ def onthefly_process_roi_ipfs_phases_threed(self, inp: dict, roi_id: int, templa
 
             # mind to pass phase_id - 1 from the perspective of pyxem because
             # in that software the id of the null-model is -1 and not 0 like in NeXus or DREAM3D!
-            # self.process_roi_phase_ipfs_threed(roi_id, phase_id, template)
+            self.process_roi_phase_ipfs_threed(inp,
+                                               roi_id,
+                                               phase_id,
+                                               inp["phases"][phase_id]["phase_name"],
+                                               inp["phases"][phase_id]["space_group"],
+                                               template)
         return template
 
-    def process_roi_phase_ipfs_threed(self, roi_id: int, pyxem_phase_id: int, template: dict) -> dict:
+    def process_roi_phase_ipfs_threed(self,
+                                      inp: dict,
+                                      roi_id: int,
+                                      pyxem_phase_id: int,
+                                      phase_name: str,
+                                      space_group: int,
+                                      template: dict) -> dict:
+        """Generate inverse pole figures (IPF) for 3D mappings for specific phase."""
+        print(f"Generate 3D IPF map for {pyxem_phase_id}, {phase_name}...")
+        for idx in np.arange(0, len(PROJECTION_VECTORS)):
+            point_group = get_point_group(space_group, proper=False)
+            ipf_key = plot.IPFColorKeyTSL(
+                point_group.laue,
+                direction=PROJECTION_VECTORS[idx])
+            img = get_ipfdir_legend(ipf_key)
+
+            rotations = Rotation.from_euler(euler=inp["euler"][inp["phases"] == pyxem_phase_id],
+                                            direction='lab2crystal',
+                                            degrees=False)
+            print(f"shape rotations -----> {np.shape(rotations)}")
+            rgb_px_with_phase_id = np.asarray(np.asarray(
+                ipf_key.orientation2color(rotations) * 255., np.uint32), np.uint8)
+            print(f"shape rgb_px_with_phase_id -----> {np.shape(rgb_px_with_phase_id)}")
+
+            ipf_rgb_map = np.asarray(
+                np.uint8(np.zeros((inp["n_z"] * inp["n_y"] * inp["n_x"], 3)) * 255.))
+            # background is black instead of white (which would be more pleasing)
+            # but IPF color maps have a whitepoint which encodes in fact an orientation
+            # and because of that we may have a single crystal with an orientation
+            # close to the whitepoint which become a fully white seemingly "empty" image
+            ipf_rgb_map[inp["phases"] == pyxem_phase_id, :] = rgb_px_with_phase_id
+            ipf_rgb_map = np.reshape(
+                ipf_rgb_map, (inp["n_z"], inp["n_y"], inp["n_x"], 3), order="C")
+            # 0 is z, 1 is y, while 2 is x !
+
+            trg = f"/ENTRY[entry{self.entry_id}]/ROI[roi{roi_id}]/ebsd/indexing" \
+                  f"/EM_EBSD_CRYSTAL_STRUCTURE_MODEL[phase{pyxem_phase_id}]" \
+                  f"/MS_IPF[ipf{idx + 1}]"
+            template[f"{trg}/projection_direction"] \
+                = np.asarray(PROJECTION_VECTORS[idx].data.flatten(), np.float32)
+
+            # add the IPF color map
+            mpp = f"{trg}/DATA[map]"
+            template[f"{mpp}/title"] \
+                = f"Inverse pole figure {PROJECTION_DIRECTIONS[idx][0]} {phase_name}"
+            template[f"{mpp}/@NX_class"] = f"NXdata"  # TODO::writer should decorate automatically!
+            template[f"{mpp}/@signal"] = "data"
+            dims = ["x", "y", "z"]
+            template[f"{mpp}/@axes"] = []
+            for dim in dims[::-1]:
+                template[f"{mpp}/@axes"].append(f"axis_{dim}")
+            enum = 0
+            for dim in dims:
+                template[f"{mpp}/@AXISNAME_indices[axis_{dim}_indices]"] = np.uint32(enum)
+                enum += 1
+            template[f"{mpp}/DATA[data]"] = {"compress": ipf_rgb_map, "strength": 1}
+            hfive_web_decorate_nxdata(f"{mpp}/DATA[data]", template)
+
+            scan_unit = inp["s_unit"]  # this is not correct necessarily as the
+            # simulation may be scale-invariant...
+            if scan_unit == "um":
+                scan_unit = "µm"
+            for dim in dims:
+                template[f"{mpp}/AXISNAME[axis_{dim}]"] \
+                = {"compress": self.get_named_axis(inp, f"{dim}"), "strength": 1}
+            template[f"{mpp}/AXISNAME[axis_{dim}]/@long_name"] \
+                = f"Coordinate along {dim}-axis ({scan_unit})"
+            template[f"{mpp}/AXISNAME[axis_{dim}]/@units"] = f"{scan_unit}"
+
+            # add the IPF color map legend/key
+            lgd = f"{trg}/DATA[legend]"
+            template[f"{lgd}/title"] \
+                = f"Inverse pole figure {PROJECTION_DIRECTIONS[idx][0]} {phase_name}"
+            # template[f"{trg}/title"] = f"Inverse pole figure color key with SST"
+            template[f"{lgd}/@NX_class"] = f"NXdata"  # TODO::writer should decorate automatically!
+            template[f"{lgd}/@signal"] = "data"
+            template[f"{lgd}/@axes"] = []
+            for dim in dims[::-1]:
+                template[f"{lgd}/@axes"].append(f"axis_{dim}")
+            enum = 0
+            for dim in dims:
+                template[f"{lgd}/@AXISNAME_indices[axis_{dim}_indices]"] = np.uint32(enum)
+                enum += 1
+            template[f"{lgd}/data"] = {"compress": img, "strength": 1}
+            hfive_web_decorate_nxdata(f"{lgd}/data", template)
+
+            dims = [("x", 1), ("y", 0)]
+            for dim in dims:
+                template[f"{lgd}/AXISNAME[axis_{dim[0]}]"] \
+                    = {"compress": np.asarray(np.linspace(0,
+                                                          np.shape(img)[dim[1]] - 1,
+                                                          num=np.shape(img)[dim[1]],
+                                                          endpoint=True), np.uint32),
+                       "strength": 1}
+                template[f"{lgd}/AXISNAME[axis_{dim[0]}]/@long_name"] \
+                    = f"Pixel along {dim[0]}-axis"
+                template[f"{lgd}/AXISNAME[axis_{dim[0]}]/@units"] = "px"
         return template
diff --git a/test.ebsd3d_hdf5.sh b/test.ebsd3d_hdf5.sh
index 5964514e6..91e2b2e22 100755
--- a/test.ebsd3d_hdf5.sh
+++ b/test.ebsd3d_hdf5.sh
@@ -17,7 +17,7 @@ Examples="067_0003 SmallIN100_Final 244_0014"
 
 
 # Examples="SmallIN100_Final"
-# Examples="244_0014"
+Examples="244_0014"
 for example in $Examples; do
 	echo $example
 	dataconverter --reader em --nxdl NXroot --input-file $example.dream3d --output debug.$example.dream3d.nxs 1>stdout.$example.dream3d.nxs.txt 2>stderr.$example.dream3d.nxs.txt

From 33688c1c9d0377e938795ddabd37705eb3c66657 Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@hu-berlin.de>
Date: Thu, 9 Nov 2023 18:13:41 +0100
Subject: [PATCH 25/84] 3D EBSD working

---
 .../em/subparsers/hfive_dreamthreed.py        |  2 +-
 .../readers/em/subparsers/nxs_hfive.py        | 21 +++++++++++--------
 .../readers/em/utils/hfive_web_constants.py   |  1 +
 test.ebsd3d_hdf5.sh                           |  2 +-
 4 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_dreamthreed.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_dreamthreed.py
index 80f278502..bdb739ff0 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_dreamthreed.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_dreamthreed.py
@@ -379,7 +379,7 @@ def parse_and_normalize_ebsd_data(self, ckey: str):
                 h5r[f"{self.path_registry['group_data']}/Phases"], np.int32)
             old_shp = np.shape(self.tmp[ckey]["phase_id"])
             self.tmp[ckey]["phase_id"] = np.reshape(self.tmp[ckey]["phase_id"],
-                                                    (int(np.prod(old_shp[0:3])), int(old_shp[3])),
+                                                    (int(np.prod(old_shp[0:3])),),
                                                     order="C")
             print(np.unique(self.tmp[ckey]["phase_id"]))
             # Phases here stores C-style index which Phase of the possible ones
diff --git a/pynxtools/dataconverter/readers/em/subparsers/nxs_hfive.py b/pynxtools/dataconverter/readers/em/subparsers/nxs_hfive.py
index d09768fc2..f2fb711a9 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/nxs_hfive.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/nxs_hfive.py
@@ -562,7 +562,8 @@ def onthefly_process_roi_ipfs_phases_threed(self, inp: dict, roi_id: int, templa
         template[f"{prfx}/indexing_rate"] = np.float64(100. * n_pts_indexed / n_pts)
         template[f"{prfx}/indexing_rate/@units"] = f"%"
         grp_name = f"{prfx}/EM_EBSD_CRYSTAL_STRUCTURE_MODEL[phase{phase_id}]"
-        template[f"{grp_name}/number_of_scan_points"] = np.uint32(0)
+        template[f"{grp_name}/number_of_scan_points"] \
+            = np.uint32(np.sum(inp["phase_id"] == 0))
         template[f"{grp_name}/phase_identifier"] = np.uint32(phase_id)
         template[f"{grp_name}/phase_name"] = f"notIndexed"
 
@@ -611,7 +612,7 @@ def process_roi_phase_ipfs_threed(self,
                 direction=PROJECTION_VECTORS[idx])
             img = get_ipfdir_legend(ipf_key)
 
-            rotations = Rotation.from_euler(euler=inp["euler"][inp["phases"] == pyxem_phase_id],
+            rotations = Rotation.from_euler(euler=inp["euler"][inp["phase_id"] == pyxem_phase_id],
                                             direction='lab2crystal',
                                             degrees=False)
             print(f"shape rotations -----> {np.shape(rotations)}")
@@ -625,7 +626,7 @@ def process_roi_phase_ipfs_threed(self,
             # but IPF color maps have a whitepoint which encodes in fact an orientation
             # and because of that we may have a single crystal with an orientation
             # close to the whitepoint which become a fully white seemingly "empty" image
-            ipf_rgb_map[inp["phases"] == pyxem_phase_id, :] = rgb_px_with_phase_id
+            ipf_rgb_map[inp["phase_id"] == pyxem_phase_id, :] = rgb_px_with_phase_id
             ipf_rgb_map = np.reshape(
                 ipf_rgb_map, (inp["n_z"], inp["n_y"], inp["n_x"], 3), order="C")
             # 0 is z, 1 is y, while 2 is x !
@@ -653,16 +654,17 @@ def process_roi_phase_ipfs_threed(self,
             template[f"{mpp}/DATA[data]"] = {"compress": ipf_rgb_map, "strength": 1}
             hfive_web_decorate_nxdata(f"{mpp}/DATA[data]", template)
 
-            scan_unit = inp["s_unit"]  # this is not correct necessarily as the
-            # simulation may be scale-invariant...
+            scan_unit = inp["s_unit"]  # TODO::this is not necessarily correct
+            # could be a scale-invariant synthetic microstructure whose simulation
+            # would work on multiple length-scales as atoms are not resolved directly!
             if scan_unit == "um":
                 scan_unit = "µm"
             for dim in dims:
                 template[f"{mpp}/AXISNAME[axis_{dim}]"] \
-                = {"compress": self.get_named_axis(inp, f"{dim}"), "strength": 1}
-            template[f"{mpp}/AXISNAME[axis_{dim}]/@long_name"] \
-                = f"Coordinate along {dim}-axis ({scan_unit})"
-            template[f"{mpp}/AXISNAME[axis_{dim}]/@units"] = f"{scan_unit}"
+                    = {"compress": self.get_named_axis(inp, f"{dim}"), "strength": 1}
+                template[f"{mpp}/AXISNAME[axis_{dim}]/@long_name"] \
+                    = f"Coordinate along {dim}-axis ({scan_unit})"
+                template[f"{mpp}/AXISNAME[axis_{dim}]/@units"] = f"{scan_unit}"
 
             # add the IPF color map legend/key
             lgd = f"{trg}/DATA[legend]"
@@ -672,6 +674,7 @@ def process_roi_phase_ipfs_threed(self,
             template[f"{lgd}/@NX_class"] = f"NXdata"  # TODO::writer should decorate automatically!
             template[f"{lgd}/@signal"] = "data"
             template[f"{lgd}/@axes"] = []
+            dims = ["x", "y"]
             for dim in dims[::-1]:
                 template[f"{lgd}/@axes"].append(f"axis_{dim}")
             enum = 0
diff --git a/pynxtools/dataconverter/readers/em/utils/hfive_web_constants.py b/pynxtools/dataconverter/readers/em/utils/hfive_web_constants.py
index 03f6abcea..2e31e72a0 100644
--- a/pynxtools/dataconverter/readers/em/utils/hfive_web_constants.py
+++ b/pynxtools/dataconverter/readers/em/utils/hfive_web_constants.py
@@ -28,4 +28,5 @@ def hfive_web_decorate_nxdata(path: str, inp: dict) -> dict:
         inp[f"{path}/@CLASS"] = f"IMAGE"  # required by H5Web to plot RGB maps
         inp[f"{path}/@IMAGE_VERSION"] = f"1.2"
         inp[f"{path}/@SUBCLASS_VERSION"] = np.int64(15)
+        inp[f"{path}/@long_name"] = f"Signal"
     return inp
diff --git a/test.ebsd3d_hdf5.sh b/test.ebsd3d_hdf5.sh
index 91e2b2e22..5964514e6 100755
--- a/test.ebsd3d_hdf5.sh
+++ b/test.ebsd3d_hdf5.sh
@@ -17,7 +17,7 @@ Examples="067_0003 SmallIN100_Final 244_0014"
 
 
 # Examples="SmallIN100_Final"
-Examples="244_0014"
+# Examples="244_0014"
 for example in $Examples; do
 	echo $example
 	dataconverter --reader em --nxdl NXroot --input-file $example.dream3d --output debug.$example.dream3d.nxs 1>stdout.$example.dream3d.nxs.txt 2>stderr.$example.dream3d.nxs.txt

From dcb0cec615d5a3a58314584cf2bb962479a0ac69 Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@hu-berlin.de>
Date: Mon, 4 Dec 2023 09:45:07 +0100
Subject: [PATCH 26/84] Potentially breaking refactoring from the train-ride
 back from the FAIRmat user/project meeting but preparing the removal of the
 crystal xmap also for the 2D cases

---
 .../readers/em/subparsers/nxs_hfive.py        | 48 ++++++++++++-------
 .../readers/em/utils/hfive_web_constants.py   |  9 ----
 .../readers/em/utils/hfive_web_utils.py       | 29 +++++++++++
 3 files changed, 59 insertions(+), 27 deletions(-)
 create mode 100644 pynxtools/dataconverter/readers/em/utils/hfive_web_utils.py

diff --git a/pynxtools/dataconverter/readers/em/subparsers/nxs_hfive.py b/pynxtools/dataconverter/readers/em/subparsers/nxs_hfive.py
index f2fb711a9..7e08bebd4 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/nxs_hfive.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/nxs_hfive.py
@@ -54,12 +54,16 @@
 
 from pynxtools.dataconverter.readers.em.utils.hfive_utils import read_strings_from_dataset
 from pynxtools.dataconverter.readers.em.utils.hfive_web_constants \
-    import HFIVE_WEB_MAXIMUM_ROI, HFIVE_WEB_MAXIMUM_RGB, hfive_web_decorate_nxdata
+    import HFIVE_WEB_MAXIMUM_ROI, HFIVE_WEB_MAXIMUM_RGB
+from pynxtools.dataconverter.readers.em.utils.hfive_web_utils \
+    import hfive_web_decorate_nxdata
 from pynxtools.dataconverter.readers.em.utils.image_processing import thumbnail
 
-PROJECTION_DIRECTIONS = [("X", [1., 0., 0.]), ("Y", [0., 1., 0.]), ("Z", [0., 0., 1.])]
-# TODO::do not hardcode but use data.flatten() of the following instances
 PROJECTION_VECTORS = [Vector3d.xvector(), Vector3d.yvector(), Vector3d.zvector()]
+PROJECTION_DIRECTIONS = [("X", Vector3d.xvector().data.flatten()),
+                         ("Y", Vector3d.yvector().data.flatten()),
+                         ("Z", Vector3d.zvector().data.flatten())]
+
 
 from pynxtools.dataconverter.readers.em.subparsers.hfive_oxford import HdfFiveOxfordReader
 from pynxtools.dataconverter.readers.em.subparsers.hfive_bruker import HdfFiveBrukerEspritReader
@@ -283,6 +287,7 @@ def process_roi_ebsd_maps(self, inp: dict, template: dict) -> dict:
                     if "n_z" not in inp[ckey].keys():
                         self.prepare_roi_ipfs_phases_twod(inp[ckey], roi_id, template)
                         self.process_roi_ipfs_phases_twod(inp[ckey], roi_id, template)
+                        # self.onthefly_process_roi_ipfs_phases_threed(inp[ckey], roi_id, template)
                     else:
                         self.onthefly_process_roi_ipfs_phases_threed(inp[ckey], roi_id, template)
         return template
@@ -458,6 +463,10 @@ def process_roi_ipfs_phases_twod(self, inp: dict, roi_id: int, template: dict) -
             self.process_roi_phase_ipfs_twod(roi_id, pyxem_phase_id, template)
         return template
 
+    def onthefly_process_roi_ipfs_phases_twod(self, inp: dict, roi_id: int, template: dict) -> dict:
+        # TODO: #####
+        return template
+
     def process_roi_phase_ipfs_twod(self, roi_id: int, pyxem_phase_id: int, template: dict) -> dict:
         """Parse inverse pole figures (IPF) mappings for specific phase."""
         phase_name = self.xmap.phases[pyxem_phase_id].name
@@ -596,32 +605,35 @@ def onthefly_process_roi_ipfs_phases_threed(self, inp: dict, roi_id: int, templa
                                                template)
         return template
 
-    def process_roi_phase_ipfs_threed(self,
-                                      inp: dict,
-                                      roi_id: int,
-                                      pyxem_phase_id: int,
-                                      phase_name: str,
-                                      space_group: int,
-                                      template: dict) -> dict:
+    def process_roi_phase_ipfs_threed(self, inp: dict, roi_id: int, pyxem_phase_id: int, phase_name: str, space_group: int, template: dict) -> dict:
         """Generate inverse pole figures (IPF) for 3D mappings for specific phase."""
+        # equivalent to the case in twod, one needs at if required regridding/downsampling
+        # code here when any of the ROI's number of pixels along an edge > HFIVE_WEB_MAXIMUM_RGB
+        # TODO: I have not seen any dataset yet where is limit is exhausted, the largest
+        # dataset is a 3D SEM/FIB study from a UK project this is likely because to
+        # get an EBSD map as large one already scans quite long for one section as making
+        # a ompromise is required and thus such hypothetical large serial-sectioning
+        # studies would block the microscope for a very long time
+        # however I have seen examples from Hadi Pirgazi with L. Kestens from Leuven
+        # where indeed large but thin 3d slabs were characterized
         print(f"Generate 3D IPF map for {pyxem_phase_id}, {phase_name}...")
+        rotations = Rotation.from_euler(
+            euler=inp["euler"][inp["phase_id"] == pyxem_phase_id],
+            direction='lab2crystal', degrees=False)
+        print(f"shape rotations -----> {np.shape(rotations)}")
+
         for idx in np.arange(0, len(PROJECTION_VECTORS)):
             point_group = get_point_group(space_group, proper=False)
             ipf_key = plot.IPFColorKeyTSL(
-                point_group.laue,
-                direction=PROJECTION_VECTORS[idx])
+                point_group.laue, direction=PROJECTION_VECTORS[idx])
             img = get_ipfdir_legend(ipf_key)
 
-            rotations = Rotation.from_euler(euler=inp["euler"][inp["phase_id"] == pyxem_phase_id],
-                                            direction='lab2crystal',
-                                            degrees=False)
-            print(f"shape rotations -----> {np.shape(rotations)}")
             rgb_px_with_phase_id = np.asarray(np.asarray(
                 ipf_key.orientation2color(rotations) * 255., np.uint32), np.uint8)
             print(f"shape rgb_px_with_phase_id -----> {np.shape(rgb_px_with_phase_id)}")
 
-            ipf_rgb_map = np.asarray(
-                np.uint8(np.zeros((inp["n_z"] * inp["n_y"] * inp["n_x"], 3)) * 255.))
+            ipf_rgb_map = np.asarray(np.asarray(
+                np.zeros((inp["n_z"] * inp["n_y"] * inp["n_x"], 3)) * 255., np.uint32), np.uint8)
             # background is black instead of white (which would be more pleasing)
             # but IPF color maps have a whitepoint which encodes in fact an orientation
             # and because of that we may have a single crystal with an orientation
diff --git a/pynxtools/dataconverter/readers/em/utils/hfive_web_constants.py b/pynxtools/dataconverter/readers/em/utils/hfive_web_constants.py
index 2e31e72a0..3547898c7 100644
--- a/pynxtools/dataconverter/readers/em/utils/hfive_web_constants.py
+++ b/pynxtools/dataconverter/readers/em/utils/hfive_web_constants.py
@@ -21,12 +21,3 @@
 
 HFIVE_WEB_MAXIMUM_ROI = 2**14 - 1
 HFIVE_WEB_MAXIMUM_RGB = 2**11 - 1
-
-
-def hfive_web_decorate_nxdata(path: str, inp: dict) -> dict:
-    if f"{path}" in inp.keys():
-        inp[f"{path}/@CLASS"] = f"IMAGE"  # required by H5Web to plot RGB maps
-        inp[f"{path}/@IMAGE_VERSION"] = f"1.2"
-        inp[f"{path}/@SUBCLASS_VERSION"] = np.int64(15)
-        inp[f"{path}/@long_name"] = f"Signal"
-    return inp
diff --git a/pynxtools/dataconverter/readers/em/utils/hfive_web_utils.py b/pynxtools/dataconverter/readers/em/utils/hfive_web_utils.py
new file mode 100644
index 000000000..1821c40ce
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/utils/hfive_web_utils.py
@@ -0,0 +1,29 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Utilities relevant when working with H5Web."""
+
+import numpy as np
+
+
+def hfive_web_decorate_nxdata(path: str, inp: dict) -> dict:
+    if f"{path}" in inp.keys():
+        inp[f"{path}/@CLASS"] = f"IMAGE"  # required by H5Web to plot RGB maps
+        inp[f"{path}/@IMAGE_VERSION"] = f"1.2"
+        inp[f"{path}/@SUBCLASS_VERSION"] = np.int64(15)
+        inp[f"{path}/@long_name"] = f"Signal"
+    return inp

From 96fb1c462dd475c8ec46a31c5b43b1efb01308e9 Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@hu-berlin.de>
Date: Mon, 4 Dec 2023 10:31:39 +0100
Subject: [PATCH 27/84] Update definitions to use fairmat 615ff37 which
 contains the merged in base_class_templates branch

---
 pynxtools/definitions | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pynxtools/definitions b/pynxtools/definitions
index 2b18f2cbd..615ff37cb 160000
--- a/pynxtools/definitions
+++ b/pynxtools/definitions
@@ -1 +1 @@
-Subproject commit 2b18f2cbd7efdce201328c71eda7fbe91e06b6c1
+Subproject commit 615ff37cbafd2ca017fb61c119c0f5c0cf052a34

From 7e1cfd960e4ff1a955f8921529529d557c1d9780 Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@hu-berlin.de>
Date: Mon, 4 Dec 2023 13:00:41 +0100
Subject: [PATCH 28/84] Manual merging in of fairmat 8620c4e into the
 em_refactoring branch to prepare with the resolving of possible merge
 conflicts

---
 .github/workflows/publish.yml                 |    1 +
 .github/workflows/pylint.yml                  |    4 +-
 .github/workflows/pytest.yml                  |   12 +-
 .gitignore                                    |    4 +-
 .vscode/settings.json                         |    7 +
 MANIFEST.in                                   |    4 +-
 README.md                                     |   13 +-
 dev-requirements.txt                          |   20 +-
 examples/apm/Write.NXapm.Example.1.ipynb      |   53 +-
 examples/apm/apm.oasis.specific.yaml          |    1 +
 examples/apm/eln_data_apm.yaml                |  142 +-
 examples/ellipsometry/eln_data.yaml           |    5 +-
 .../em_nion/Write.NXem_nion.Example.1.ipynb   |   14 +-
 .../em_om/Write.NXem_ebsd.Example.1.ipynb     |   16 +-
 .../em_spctrscpy/Write.NXem.Example.1.ipynb   |    8 +-
 examples/json_map/README.md                   |   36 +
 examples/json_map/merge_copied.mapping.json   |   35 +
 examples/json_map/merge_linked.mapping.json   |   25 +
 examples/sts/README.md                        |   32 +
 pynxtools/__init__.py                         |   68 +
 pynxtools/_build_wrapper.py                   |   71 +
 pynxtools/dataconverter/README.md             |   21 +-
 pynxtools/dataconverter/convert.py            |  247 ++-
 pynxtools/dataconverter/hdfdict.py            |   11 +-
 pynxtools/dataconverter/helpers.py            |  149 +-
 .../apm_deployment_specifics_to_nx_map.py     |   52 +
 .../apm/map_concepts/apm_eln_to_nx_map.py     |  109 +
 pynxtools/dataconverter/readers/apm/reader.py |   32 +-
 ...lots.py => apm_create_nx_default_plots.py} |    0
 ...ase_selector.py => apm_define_io_cases.py} |   38 +-
 ...data.py => apm_generate_synthetic_data.py} |    2 +-
 .../readers/apm/utils/apm_generic_eln_io.py   |  409 ----
 .../utils/apm_load_deployment_specifics.py    |   57 +
 .../readers/apm/utils/apm_load_generic_eln.py |  175 ++
 ...{apm_ranging_io.py => apm_load_ranging.py} |    0
 ...ction_io.py => apm_load_reconstruction.py} |    0
 .../apm/utils/apm_parse_composition_table.py  |  179 ++
 .../dataconverter/readers/ellips/reader.py    |   34 +-
 .../{concepts => map_concepts}/README.md      |    0
 .../swift_display_items_to_nx.py}             |    0
 .../swift_eln_to_nx_map.py}                   |    0
 .../swift_to_nx_image_ang_space.py}           |    0
 .../swift_to_nx_image_real_space.py}          |    0
 .../swift_to_nx_spectrum_eels.py}             |    0
 .../dataconverter/readers/em_nion/reader.py   |    4 +-
 .../{versioning.py => em_nion_versioning.py}  |    0
 ...e_selector.py => swift_define_io_cases.py} |    0
 ...xes.py => swift_generate_dimscale_axes.py} |    2 +-
 ...ic_eln_io.py => swift_load_generic_eln.py} |    8 +-
 .../utils/swift_zipped_project_parser.py      |   12 +-
 .../readers/em_om/utils/image_transform.py    |    9 +-
 .../dataconverter/readers/example/reader.py   |    7 +-
 .../dataconverter/readers/json_map/README.md  |   53 +-
 .../dataconverter/readers/json_map/reader.py  |   75 +-
 .../dataconverter/readers/mpes/reader.py      |   39 +-
 .../readers/rii_database/reader.py            |   20 +-
 .../map_concepts/mapping_functors.py}         |    0
 .../readers/shared/shared_utils.py            |   12 +-
 .../readers/transmission/reader.py            |    3 +-
 pynxtools/dataconverter/readers/utils.py      |   22 +-
 pynxtools/dataconverter/readers/xrd/README.md |   40 +
 .../dataconverter/readers/xrd/__init__.py     |   15 +
 pynxtools/dataconverter/readers/xrd/config.py |  117 ++
 pynxtools/dataconverter/readers/xrd/reader.py |  176 ++
 .../dataconverter/readers/xrd/xrd_helper.py   |  293 +++
 .../dataconverter/readers/xrd/xrd_parser.py   |  448 ++++
 pynxtools/dataconverter/template.py           |   32 +-
 pynxtools/dataconverter/writer.py             |   40 +-
 pynxtools/eln_mapper/README.md                |   19 +
 .../apm_utils.py => eln_mapper/__init__.py}   |   10 -
 pynxtools/eln_mapper/eln.py                   |  189 ++
 pynxtools/eln_mapper/eln_mapper.py            |   75 +
 pynxtools/eln_mapper/scheme_eln.py            |  281 +++
 pynxtools/nexus/nexus.py                      |   44 +-
 pynxtools/nexus/nxdl_utils.py                 |    3 +
 pyproject.toml                                |   20 +-
 tests/data/dataconverter/NXtest.nxdl.xml      |    3 +
 .../nxapm.schema.archive.yaml                 |  422 ++--
 .../readers/ellips/eln_data.yaml              |    3 -
 .../dataconverter/readers/json_map/data.json  |    3 +-
 .../readers/json_map/data.mapping.json        |    3 +-
 .../readers/mpes/Ref_nexus_mpes.log           | 1869 ++++++++++++-----
 .../readers/mpes/config_file.json             |   15 +-
 .../readers/xrd/ACZCTS_5-60_181.xrdml         |  106 +
 tests/data/eln_mapper/eln.yaml                |  103 +
 .../data/eln_mapper/mpes.scheme.archive.yaml  |  537 +++++
 tests/data/nexus/NXtest2.nxdl.xml             |  455 ++++
 tests/data/nexus/Ref_nexus_test.log           |  551 +++--
 tests/dataconverter/test_convert.py           |    4 +-
 tests/dataconverter/test_helpers.py           |  161 +-
 tests/dataconverter/test_readers.py           |   29 +
 tests/eln_mapper/__init__.py                  |   16 +
 tests/eln_mapper/test_eln_mapper.py           |  107 +
 tests/nexus/test_nexus.py                     |   19 +-
 tests/nexus/test_version.py                   |   16 +
 95 files changed, 6832 insertions(+), 1744 deletions(-)
 create mode 100644 examples/apm/apm.oasis.specific.yaml
 create mode 100644 examples/json_map/README.md
 create mode 100644 examples/json_map/merge_copied.mapping.json
 create mode 100644 examples/json_map/merge_linked.mapping.json
 create mode 100644 examples/sts/README.md
 create mode 100644 pynxtools/_build_wrapper.py
 create mode 100644 pynxtools/dataconverter/readers/apm/map_concepts/apm_deployment_specifics_to_nx_map.py
 create mode 100644 pynxtools/dataconverter/readers/apm/map_concepts/apm_eln_to_nx_map.py
 rename pynxtools/dataconverter/readers/apm/utils/{apm_nexus_plots.py => apm_create_nx_default_plots.py} (100%)
 rename pynxtools/dataconverter/readers/apm/utils/{apm_use_case_selector.py => apm_define_io_cases.py} (65%)
 rename pynxtools/dataconverter/readers/apm/utils/{apm_example_data.py => apm_generate_synthetic_data.py} (99%)
 delete mode 100644 pynxtools/dataconverter/readers/apm/utils/apm_generic_eln_io.py
 create mode 100644 pynxtools/dataconverter/readers/apm/utils/apm_load_deployment_specifics.py
 create mode 100644 pynxtools/dataconverter/readers/apm/utils/apm_load_generic_eln.py
 rename pynxtools/dataconverter/readers/apm/utils/{apm_ranging_io.py => apm_load_ranging.py} (100%)
 rename pynxtools/dataconverter/readers/apm/utils/{apm_reconstruction_io.py => apm_load_reconstruction.py} (100%)
 create mode 100644 pynxtools/dataconverter/readers/apm/utils/apm_parse_composition_table.py
 rename pynxtools/dataconverter/readers/em_nion/{concepts => map_concepts}/README.md (100%)
 rename pynxtools/dataconverter/readers/em_nion/{concepts/swift_display_items_to_nx_concepts.py => map_concepts/swift_display_items_to_nx.py} (100%)
 rename pynxtools/dataconverter/readers/em_nion/{concepts/generic_eln_mapping.py => map_concepts/swift_eln_to_nx_map.py} (100%)
 rename pynxtools/dataconverter/readers/em_nion/{concepts/nx_image_ang_space.py => map_concepts/swift_to_nx_image_ang_space.py} (100%)
 rename pynxtools/dataconverter/readers/em_nion/{concepts/nx_image_real_space.py => map_concepts/swift_to_nx_image_real_space.py} (100%)
 rename pynxtools/dataconverter/readers/em_nion/{concepts/nx_spectrum_eels.py => map_concepts/swift_to_nx_spectrum_eels.py} (100%)
 rename pynxtools/dataconverter/readers/em_nion/utils/{versioning.py => em_nion_versioning.py} (100%)
 rename pynxtools/dataconverter/readers/em_nion/utils/{use_case_selector.py => swift_define_io_cases.py} (100%)
 rename pynxtools/dataconverter/readers/em_nion/utils/{swift_dimscale_axes.py => swift_generate_dimscale_axes.py} (96%)
 rename pynxtools/dataconverter/readers/em_nion/utils/{em_generic_eln_io.py => swift_load_generic_eln.py} (95%)
 rename pynxtools/dataconverter/readers/{em_nion/concepts/swift_handle_nx_concepts.py => shared/map_concepts/mapping_functors.py} (100%)
 create mode 100644 pynxtools/dataconverter/readers/xrd/README.md
 create mode 100644 pynxtools/dataconverter/readers/xrd/__init__.py
 create mode 100644 pynxtools/dataconverter/readers/xrd/config.py
 create mode 100644 pynxtools/dataconverter/readers/xrd/reader.py
 create mode 100644 pynxtools/dataconverter/readers/xrd/xrd_helper.py
 create mode 100644 pynxtools/dataconverter/readers/xrd/xrd_parser.py
 create mode 100644 pynxtools/eln_mapper/README.md
 rename pynxtools/{dataconverter/readers/apm/utils/apm_utils.py => eln_mapper/__init__.py} (59%)
 create mode 100644 pynxtools/eln_mapper/eln.py
 create mode 100644 pynxtools/eln_mapper/eln_mapper.py
 create mode 100644 pynxtools/eln_mapper/scheme_eln.py
 create mode 100644 tests/data/dataconverter/readers/xrd/ACZCTS_5-60_181.xrdml
 create mode 100644 tests/data/eln_mapper/eln.yaml
 create mode 100644 tests/data/eln_mapper/mpes.scheme.archive.yaml
 create mode 100644 tests/data/nexus/NXtest2.nxdl.xml
 create mode 100644 tests/eln_mapper/__init__.py
 create mode 100644 tests/eln_mapper/test_eln_mapper.py
 create mode 100644 tests/nexus/test_version.py

diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
index b222fc5cf..24b408529 100644
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -19,6 +19,7 @@ jobs:
     steps:
       - uses: actions/checkout@v3
         with:
+          fetch-depth: 0
           submodules: recursive
       - name: Set up Python
         uses: actions/setup-python@v4
diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml
index 8ef2b0e10..f1b418fbe 100644
--- a/.github/workflows/pylint.yml
+++ b/.github/workflows/pylint.yml
@@ -7,10 +7,10 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v2
-      - name: Set up Python 3.8
+      - name: Set up Python 3.10
         uses: actions/setup-python@v2
         with:
-          python-version: 3.8
+          python-version: "3.10"
       - name: Install dependencies
         run: |
           git submodule sync --recursive
diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
index 575788512..ce6cd000e 100644
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
@@ -13,12 +13,14 @@ jobs:
   pytest:
     runs-on: ubuntu-latest
     strategy:
+      fail-fast: false
       matrix:
-        python_version: ["3.8", "3.9", "3.10"]
+        python_version: ["3.8", "3.9", "3.10", "3.11"]
 
     steps:
       - uses: actions/checkout@v3
         with:
+          fetch-depth: 0
           submodules: recursive
       - name: Set up Python ${{ matrix.python_version }}
         uses: actions/setup-python@v4
@@ -26,20 +28,16 @@ jobs:
           python-version: ${{ matrix.python_version }}
       - name: Install dependencies
         run: |
-          git submodule sync --recursive
-          git submodule update --init --recursive --jobs=4
           python -m pip install --upgrade pip
           python -m pip install coverage coveralls
       - name: Install package
         run: |
-          python -m pip install --no-deps .
-      - name: Install dev requirements
-        run: |
-          python -m pip install -r dev-requirements.txt
+          pip install ".[dev]"
       - name: Test with pytest
         run: |
           coverage run -m pytest -sv --show-capture=no tests
       - name: Submit to coveralls
+        continue-on-error: true
         env:
             GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
         run: |
diff --git a/.gitignore b/.gitignore
index 4b1dad0c2..abaa80d10 100644
--- a/.gitignore
+++ b/.gitignore
@@ -88,6 +88,8 @@ cover/
 
 # Django stuff:
 *.log
+!tests/data/dataconverter/readers/mpes/Ref_nexus_mpes.log
+!tests/data/nexus/Ref_nexus_test.log
 local_settings.py
 db.sqlite3
 db.sqlite3-journal
@@ -203,7 +205,7 @@ build/
 nexusparser.egg-info/PKG-INFO
 .python-version
 
-# reader specific custom settings
+# em-reader-specific custom settings
 *.h5
 *.oh5
 *.edaxh5
diff --git a/.vscode/settings.json b/.vscode/settings.json
index 299e6fb07..94dada964 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -23,6 +23,13 @@
     "pynxtools",
     "tests"
   ],
+  "[python]": {
+    "editor.defaultFormatter": "ms-python.black-formatter",
+    "editor.formatOnSave": false,
+    "editor.codeActionsOnSave": {
+      "source.organizeImports": false
+    }
+  },
   "python.testing.pytestArgs": ["tests"],
   "python.testing.unittestEnabled": false,
   "python.testing.pytestEnabled": true
diff --git a/MANIFEST.in b/MANIFEST.in
index 0e91894ff..3c768c9ff 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,4 +1,6 @@
 recursive-include pynxtools/definitions/base_classes/ *.xml
 recursive-include pynxtools/definitions/applications/ *.xml
 recursive-include pynxtools/definitions/contributed_definitions/ *.xml
-include pynxtools/definitions/ *.xsd
+include pynxtools/definitions/*.xsd
+include pynxtools/nexus-version.txt
+include pynxtools/definitions/NXDL_VERSION
\ No newline at end of file
diff --git a/README.md b/README.md
index b034fddb7..d121714ce 100644
--- a/README.md
+++ b/README.md
@@ -11,7 +11,7 @@ It allows to develop ontologies and to create ontological instances based on the
 
 # Installation
 
-It is recommended to use python 3.8 with a dedicated virtual environment for this package.
+It is recommended to use python 3.10 with a dedicated virtual environment for this package.
 Learn how to manage [python versions](https://github.com/pyenv/pyenv) and
 [virtual environments](https://realpython.com/python-virtual-environments-a-primer/).
 
@@ -71,6 +71,17 @@ Especially relevant for developers, there exists a basic test framework written
 python -m pytest -sv tests
 ```
 
+## Run examples
+
+A number of examples exist which document how the tools can be used. For a standalone
+usage convenient jupyter notebooks are available for each tool. To use them jupyter
+and related tools have to be installed in the development environment as follows:
+
+```shell
+python -m pip install jupyter
+python -m pip install jupyterlab
+python -m pip install jupyterlab_h5web
+```
 # Questions, suggestions?
 
 To ask further questions, to make suggestions how we can improve these tools, to get advice
diff --git a/dev-requirements.txt b/dev-requirements.txt
index b8f75ce0b..a4f1ffed2 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -23,6 +23,8 @@ attrs==22.1.0
     #   requests-cache
 backcall==0.2.0
     # via ipython
+blosc2==2.3.2
+    # via tables
 build==0.10.0
     # via pip-tools
 cattrs==22.2.0
@@ -113,7 +115,7 @@ hyperspy==1.7.5
     #   pyxem
 idna==3.4
     # via requests
-ifes-apt-tc-data-modeling==0.0.9
+ifes-apt-tc-data-modeling==0.1
     # via pynxtools (pyproject.toml)
 imageio==2.22.1
     # via
@@ -192,14 +194,20 @@ matplotlib-scalebar==0.8.1
     # via orix
 mccabe==0.7.0
     # via pylint
+mergedeep==1.3.4
+    # via pynxtools (pyproject.toml)
 mpmath==1.2.1
     # via sympy
+msgpack==1.0.7
+    # via blosc2
 mypy==0.982
     # via pynxtools (pyproject.toml)
 mypy-extensions==0.4.3
     # via mypy
 natsort==8.2.0
     # via hyperspy
+ndindex==1.7
+    # via blosc2
 nest-asyncio==1.5.6
     # via
     #   ipykernel
@@ -238,9 +246,11 @@ numexpr==2.8.3
     # via
     #   hyperspy
     #   pyfai
+    #   tables
 numpy==1.21.6
     # via
     #   ase
+    #   blosc2
     #   dask
     #   diffsims
     #   fabio
@@ -271,6 +281,7 @@ numpy==1.21.6
     #   scipy
     #   silx
     #   sparse
+    #   tables
     #   tifffile
     #   xarray
     #   zarr
@@ -293,6 +304,7 @@ packaging==21.3
     #   pooch
     #   pytest
     #   scikit-image
+    #   tables
 pandas==1.3.5
     # via
     #   ifes-apt-tc-data-modeling
@@ -341,6 +353,10 @@ ptyprocess==0.7.0
     # via pexpect
 py==1.11.0
     # via pytest
+py-cpuinfo==9.0.0
+    # via
+    #   blosc2
+    #   tables
 pycifrw==4.4.5
     # via diffpy-structure
 pycodestyle==2.9.1
@@ -448,6 +464,8 @@ sympy==1.10.1
     # via
     #   hyperspy
     #   radioactivedecay
+tables==3.9.2
+    # via ifes-apt-tc-data-modeling
 threadpoolctl==3.1.0
     # via scikit-learn
 tifffile==2021.11.2
diff --git a/examples/apm/Write.NXapm.Example.1.ipynb b/examples/apm/Write.NXapm.Example.1.ipynb
index efae762e3..cb11f3c41 100644
--- a/examples/apm/Write.NXapm.Example.1.ipynb
+++ b/examples/apm/Write.NXapm.Example.1.ipynb
@@ -195,6 +195,20 @@
     "### **Step 3c**: Convert the files in the example into an NXapm-compliant NeXus/HDF5 file."
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<div class=\"alert alert-block alert-info\">\n",
+    "The deployment_specific YAML file can be used to simplify a specific ELN, e.g. to avoid having to type<br>\n",
+    "repetitive information. A typical use case is an OASIS in a laboratory where there is a fixed number of<br>\n",
+    "instruments for which many details stay the same, these can be carried over via a *.oasis.specific.yaml file.<br>\n",
+    "It is the responsibility of the OASIS maintainer to document and version these specific configurations.<br>\n",
+    "We would be happy to learn how this functionality is useful and learn about your feedback to improve<br>\n",
+    "this feature to make working with NeXus and ELNs more convenient.<br>\n",
+    "</div>"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -204,20 +218,22 @@
    "outputs": [],
    "source": [
     "#parser-nexus/tests/data/tools/dataconverter/readers/em_om/\n",
+    "import numpy as np\n",
     "eln_data_file_name = [\"eln_data_apm.yaml\"]\n",
+    "deployment_specific = [\"apm.oasis.specific.yaml\"]\n",
     "input_recon_file_name = [\"Si.apt\",\n",
     "                         \"Si.epos\",\n",
     "                         \"Si.pos\",\n",
-    "                         \"R31_06365-v02.pos\",\n",
-    "                         \"R18_58152-v02.epos\",\n",
-    "                         \"70_50_50.apt\"]\n",
+    "#                         \"R31_06365-v02.pos\",\n",
+    "#                         \"R18_58152-v02.epos\",\n",
+    "#                         \"70_50_50.apt\"]\n",
     "#                         \"R56_01769-v01.pos\"]\n",
     "input_range_file_name = [\"Si.RRNG\",\n",
     "                         \"Si.RNG\",\n",
     "                         \"Si.RNG\",\n",
-    "                         \"R31_06365-v02.rrng\",\n",
-    "                         \"R31_06365-v02.rrng\",\n",
-    "                         \"R31_06365-v02.rrng\"]\n",
+    "#                         \"R31_06365-v02.rrng\",\n",
+    "#                         \"R31_06365-v02.rrng\",\n",
+    "#                         \"R31_06365-v02.rrng\"]\n",
     "#                         \"R56_01769.rng.fig.txt\"]\n",
     "output_file_name = [\"apm.case1.nxs\",\n",
     "                    \"apm.case2.nxs\",\n",
@@ -225,14 +241,15 @@
     "                    \"apm.case4.nxs\",\n",
     "                    \"apm.case5.nxs\",\n",
     "                    \"apm.case6.nxs\"]\n",
-    "for case_id in [0]:\n",
+    "for case_id in np.arange(0, 3):\n",
     "    ELN = eln_data_file_name[0]\n",
+    "    OASIS = deployment_specific[0]\n",
     "    INPUT_RECON = input_recon_file_name[case_id]\n",
     "    INPUT_RANGE = input_range_file_name[case_id]\n",
     "    OUTPUT = output_file_name[case_id]\n",
     "\n",
-    "    ! dataconverter --reader apm --nxdl NXapm --input-file $ELN --input-file \\\n",
-    "    $INPUT_RECON --input-file $INPUT_RANGE --output $OUTPUT"
+    "    ! dataconverter --reader apm --nxdl NXapm --input-file $ELN --input-file $OASIS \\\n",
+    "    --input-file $INPUT_RECON --input-file $INPUT_RANGE --output $OUTPUT"
    ]
   },
   {
@@ -258,7 +275,7 @@
    "outputs": [],
    "source": [
     "# H5Web(OUTPUT)\n",
-    "H5Web(\"apm.case1.nxs\")"
+    "H5Web(\"apm.case3.nxs\")"
    ]
   },
   {
@@ -417,7 +434,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "<div class=\"alert alert-block alert-danger\">\n",
+    "<div class=\"alert alert-block alert-warning\">\n",
     "Currently, this functionality requires a Python environment with a newer version of the ase library than the one<br>\n",
     "which is used by the installation of pynxtools (which is currently ase==3.19.0). Instead, ase>=3.22.1 should be used.<br>\n",
     "The issue with the specific functionalities used in the *create_reconstructed_positions* function is that when using<br>\n",
@@ -448,12 +465,14 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "<div class=\"alert alert-block alert-danger\">\n",
-    "This functionality uses recent features of ase which demands an environment that is currently not supported<br>\n",
+    "<div class=\"alert alert-block alert-warning\">\n",
+    "This functionality uses recent features of ase which demands an environment that is not necessarily supported<br>\n",
     "by NOMAD OASIS. As the here exemplified settings for this example are configured to represent an environment<br>\n",
-    "matching close to NOMAD users who are interested in this developer functionality should do the following:<br>\n",
+    "matching one which is close to NOMAD, users who are interested in this dev functionality should do the following:<br>\n",
     "Run this example in a standalone environment where ase is upgraded to the latest version and then use<br>\n",
     "the generated NeXus files either as is or upload them to NOMAD OASIS.<br>\n",
+    "If the above-mentioned cell detects e.g. that a recent version of ase was installed<br>\n",
+    "(e.g. >3.22.x) then the code in the following cell can be executed without issues.<br>\n",
     "</div>"
    ]
   },
@@ -465,7 +484,7 @@
    },
    "outputs": [],
    "source": [
-    "# ! dataconverter --reader apm --nxdl NXapm --input-file synthesize1 --output apm.case0.nxs"
+    "! dataconverter --reader apm --nxdl NXapm --input-file synthesize1 --output apm.case0.nxs"
    ]
   },
   {
@@ -496,7 +515,7 @@
    "metadata": {},
    "source": [
     "### Contact person for the apm reader and related examples in FAIRmat:\n",
-    "Markus Kühbach, 2023/05<br>\n",
+    "Markus Kühbach, 2023/08/31<br>\n",
     "\n",
     "### Funding\n",
     "<a href=\"https://www.fairmat-nfdi.eu/fairmat\">FAIRmat</a> is a consortium on research data management which is part of the German NFDI.<br>\n",
@@ -527,7 +546,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.16"
+   "version": "3.10.12"
   }
  },
  "nbformat": 4,
diff --git a/examples/apm/apm.oasis.specific.yaml b/examples/apm/apm.oasis.specific.yaml
new file mode 100644
index 000000000..82394f07e
--- /dev/null
+++ b/examples/apm/apm.oasis.specific.yaml
@@ -0,0 +1 @@
+location: Leoben
diff --git a/examples/apm/eln_data_apm.yaml b/examples/apm/eln_data_apm.yaml
index 11e29ced4..ddd67ebcf 100644
--- a/examples/apm/eln_data_apm.yaml
+++ b/examples/apm/eln_data_apm.yaml
@@ -1,82 +1,118 @@
 atom_probe:
   analysis_chamber_pressure:
     unit: torr
-    value: 1.0e-10
+    value: 2.0e-10
   control_software_program: IVAS
-  control_software_program__attr_version: 3.6.4
-  fabrication_capabilities: n/a
-  fabrication_identifier: n/a
+  control_software_program__attr_version: 3.6.8
+  fabrication_identifier: '12'
   fabrication_model: LEAP3000
-  fabrication_vendor: AMETEK/Cameca
+  fabrication_vendor: Cameca
+  field_of_view:
+    unit: nm
+    value: 20.
   flight_path_length:
     unit: m
-    value: 0.9
-  instrument_name: LEAP 3000
-  ion_detector_model: cameca
-  ion_detector_name: none
+    value: 1.2
+  instrument_name: LEAP
+  ion_detector_model: n/a
+  ion_detector_name: n/a
   ion_detector_serial_number: n/a
   ion_detector_type: mcp_dld
-  local_electrode_name: electrode 1
+  local_electrode_name: L1
+  location: Denton
   pulser:
-    laser_source_name: laser
-    laser_source_power:
-      unit: W
-      value: 2.0e-08
-    laser_source_pulse_energy:
-      unit: J
-      value: 1.2e-11
-    laser_source_wavelength:
-      unit: m
-      value: 4.8e-07
-    pulse_fraction: 0.1
+    laser_source:
+    - name: laser1
+      power:
+        unit: nW
+        value: 24.0
+      pulse_energy:
+        unit: pJ
+        value: 24.0
+      wavelength:
+        unit: nm
+        value: 355.0
+    - name: laser2
+      power:
+        unit: nW
+        value: 12.0
+      pulse_energy:
+        unit: pJ
+        value: 12.0
+      wavelength:
+        unit: nm
+        value: 254.0
+    pulse_fraction: 0.8
     pulse_frequency:
       unit: kHz
-      value: 250
-    pulse_mode: laser
+      value: 250.0
+    pulse_mode: laser_and_voltage
   reflectron_applied: true
-  specimen_monitoring_detection_rate: 0.6
+  specimen_monitoring_detection_rate: 0.8
   specimen_monitoring_initial_radius:
     unit: nm
-    value: 30
+    value: 12.0
   specimen_monitoring_shank_angle:
     unit: °
-    value: 5
+    value: 5.0
   stage_lab_base_temperature:
     unit: K
-    value: 30
+    value: 20.0
   status: success
 entry:
-  attr_version: nexus-fairmat-proposal successor of 9636feecb79bb32b828b1a9804269573256d7696
-  definition: NXapm
-  end_time: '2022-09-22T20:00:00+00:00'
-  experiment_description: some details for nomad, ODS steel precipitates for testing
-    a developmental clustering algorithm called OPTICS.
-  experiment_identifier: R31-06365-v02
+  experiment_description: '<p>Normal</p>
+
+    <p><strong>Bold</strong></p>
+
+    <p><em>Italics</em></p>'
+  experiment_identifier: Si test
+  start_time: '2023-06-11T11:20:00+00:00'
+  end_time: '2023-06-11T11:20:00+00:00'
+  run_number: '2121'
   operation_mode: apt
-  program: IVAS
-  program__attr_version: 3.6.4
-  run_number: '6365'
-  start_time: '2022-09-20T20:00:00+00:00'
 ranging:
   program: IVAS
-  program__attr_version: 3.6.4
+  program__attr_version: 3.6.8
 reconstruction:
   crystallographic_calibration: n/a
-  parameter: kf = 1.8, ICF = 1.02, Vat = 60 at/nm^3
+  parameter: kf = 1.8, icf = 3.3
   program: IVAS
-  program__attr_version: 3.6.4
-  protocol_name: cameca
+  program__attr_version: 3.6.8
+  protocol_name: bas
+sample:
+  composition:
+  - Mo
+  - Al 12 +- 3
+  - B 50 ppm +- 12
+  - C 3.6
+  grain_diameter:
+    unit: µm
+    value: 200.0
+  grain_diameter_error:
+    unit: µm
+    value: 50.0
+  heat_treatment_quenching_rate:
+    unit: K / s
+    value: 150.0
+  heat_treatment_quenching_rate_error:
+    unit: K / s
+    value: 10.0
+  heat_treatment_temperature:
+    unit: K
+    value: 600.0
+  heat_treatment_temperature_error:
+    unit: K
+    value: 20.0
 specimen:
-  atom_types:
-  - Fe
-  - Cr
-  - Y
-  - O
-  description: ODS steel, i.e. material with Y2O3 dispersoids
-  name: ODS-Specimen 1
-  preparation_date: '2022-09-12T20:01:00+00:00'
-  sample_history: undocumented
-  short_title: ODS
+  alias: Si
+  description: '<p>normal</p>
+
+    <p><em>bold</em></p>
+
+    <p><em>italics</em></p>'
+  is_polycrystalline: true
+  name: usa_denton_smith_si
+  preparation_date: '2023-06-11T12:51:00+00:00'
 user:
-- name: Jing Wang
-- name: Daniel Schreiber
+- {}
+- {}
diff --git a/examples/ellipsometry/eln_data.yaml b/examples/ellipsometry/eln_data.yaml
index 70b708ef3..f20f75861 100644
--- a/examples/ellipsometry/eln_data.yaml
+++ b/examples/ellipsometry/eln_data.yaml
@@ -5,7 +5,7 @@ Data:
   data_software/version: '3.882'
   data_type: Psi/Delta
   spectrum_type: wavelength
-  spectrum_unit: Angstroms
+  spectrum_unit: angstrom
 Instrument:
   Beam_path:
     Detector:
@@ -58,9 +58,6 @@ colnames:
 - Delta
 - err.Psi
 - err.Delta
-definition: NXellipsometry
-definition/@url: https://github.com/FAIRmat-NFDI/nexus_definitions/blob/fairmat/contributed_definitions/NXellipsometry.nxdl.xml
-definition/@version: 0.0.2
 derived_parameter_type: depolarization
 experiment_description: RC2 scan on 2nm SiO2 on Si in air
 experiment_identifier: exp-ID
diff --git a/examples/em_nion/Write.NXem_nion.Example.1.ipynb b/examples/em_nion/Write.NXem_nion.Example.1.ipynb
index af08fdd0e..0d48dea69 100644
--- a/examples/em_nion/Write.NXem_nion.Example.1.ipynb
+++ b/examples/em_nion/Write.NXem_nion.Example.1.ipynb
@@ -88,7 +88,15 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "! wget https://www.zenodo.org/record/7986279/files/ger_berlin_haas_nionswift_multimodal.zip\n",
+    "! wget https://www.zenodo.org/record/7986279/files/ger_berlin_haas_nionswift_multimodal.zip"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
     "zp.ZipFile(\"ger_berlin_haas_nionswift_multimodal.zip\").extractall(path=\"\", members=None, pwd=None)"
    ]
   },
@@ -240,7 +248,7 @@
    "metadata": {},
    "source": [
     "### Contact person for the em_nion reader and related examples in FAIRmat:\n",
-    "Markus Kühbach, 2023/05<br>\n",
+    "Markus Kühbach, 2023/08/31<br>\n",
     "\n",
     "### Funding\n",
     "<a href=\"https://www.fairmat-nfdi.eu/fairmat\">FAIRmat</a> is a consortium on research data management which is part of the German NFDI.<br>\n",
@@ -271,7 +279,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.6"
+   "version": "3.10.12"
   }
  },
  "nbformat": 4,
diff --git a/examples/em_om/Write.NXem_ebsd.Example.1.ipynb b/examples/em_om/Write.NXem_ebsd.Example.1.ipynb
index dd62925fb..7f5afeb6e 100644
--- a/examples/em_om/Write.NXem_ebsd.Example.1.ipynb
+++ b/examples/em_om/Write.NXem_ebsd.Example.1.ipynb
@@ -259,11 +259,13 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
+    "scrolled": true,
     "tags": []
    },
    "outputs": [],
    "source": [
     "#parser-nexus/tests/data/tools/dataconverter/readers/em_om/\n",
+    "import numpy as np\n",
     "eln_data_file_name = [\"eln_data_em_om.yaml\"]\n",
     "input_data_file_name = [\"PrcShanghaiShi.EBSPs70deg.zip\",\n",
     "                        \"H5OINA_examples_Specimen_1_Map_EDS_+_EBSD_Map_Data_2.h5oina\",\n",
@@ -273,7 +275,7 @@
     "                    \"em_om.case2.nxs\",\n",
     "                    \"em_om.case3e.nxs\",\n",
     "                    \"em_om.case4.nxs\"]\n",
-    "for case_id in [4]:  # [0, 1, 2, 3]:\n",
+    "for case_id in np.arange(0, 3 + 1):\n",
     "    ELN = eln_data_file_name[0]\n",
     "    INPUT = input_data_file_name[case_id]\n",
     "    OUTPUT = output_file_name[case_id]\n",
@@ -305,10 +307,10 @@
    "source": [
     "# H5Web(OUTPUT)\n",
     "H5Web(\"em_om.case0.nxs\")\n",
-    "H5Web(\"em_om.case1.nxs\")\n",
-    "H5Web(\"em_om.case2.nxs\")\n",
-    "H5Web(\"em_om.case3e.nxs\")\n",
-    "H5Web(\"em_om.case4.nxs\")"
+    "# H5Web(\"em_om.case1.nxs\")\n",
+    "# H5Web(\"em_om.case2.nxs\")\n",
+    "# H5Web(\"em_om.case3e.nxs\")\n",
+    "# H5Web(\"em_om.case4.nxs\")"
    ]
   },
   {
@@ -338,7 +340,7 @@
    "metadata": {},
    "source": [
     "### Contact person for the apm reader and related examples in FAIRmat:\n",
-    "Markus Kühbach, 2023/05<br>\n",
+    "Markus Kühbach, 2023/08/31<br>\n",
     "\n",
     "### Funding\n",
     "<a href=\"https://www.fairmat-nfdi.eu/fairmat\">FAIRmat</a> is a consortium on research data management which is part of the German NFDI.<br>\n",
@@ -362,7 +364,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.16"
+   "version": "3.10.12"
   },
   "vscode": {
    "interpreter": {
diff --git a/examples/em_spctrscpy/Write.NXem.Example.1.ipynb b/examples/em_spctrscpy/Write.NXem.Example.1.ipynb
index 61b0f33d3..3b57b7f9f 100644
--- a/examples/em_spctrscpy/Write.NXem.Example.1.ipynb
+++ b/examples/em_spctrscpy/Write.NXem.Example.1.ipynb
@@ -239,9 +239,9 @@
    "outputs": [],
    "source": [
     "# H5Web(OUTPUT)\n",
-    "# H5Web(\"em_sp.case1.nxs\")\n",
+    "H5Web(\"em_sp.case1.nxs\")\n",
     "# H5Web(\"em_sp.case2.nxs\")\n",
-    "H5Web(\"em_sp.case3.nxs\")"
+    "# H5Web(\"em_sp.case3.nxs\")"
    ]
   },
   {
@@ -305,7 +305,7 @@
    "metadata": {},
    "source": [
     "### Contact person for the apm reader and related examples in FAIRmat:\n",
-    "Markus Kühbach, 2023/05<br>\n",
+    "Markus Kühbach, 2023/08/31<br>\n",
     "\n",
     "### Funding\n",
     "<a href=\"https://www.fairmat-nfdi.eu/fairmat\">FAIRmat</a> is a consortium on research data management which is part of the German NFDI.<br>\n",
@@ -336,7 +336,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.16"
+   "version": "3.10.12"
   },
   "vscode": {
    "interpreter": {
diff --git a/examples/json_map/README.md b/examples/json_map/README.md
new file mode 100644
index 000000000..788cb6890
--- /dev/null
+++ b/examples/json_map/README.md
@@ -0,0 +1,36 @@
+# JSON Map Reader
+
+## What is this reader?
+
+This reader is designed to allow users of pynxtools to convert their existing data with the help of a map file. The map file tells the reader what to pick from your data files and convert them to FAIR NeXus files. The following formats are supported as input files:
+* HDF5 (any extension works i.e. h5, hdf5, nxs, etc)
+* JSON
+* Python Dict Objects Pickled with [pickle](https://docs.python.org/3/library/pickle.html). These can contain [xarray.DataArray](https://docs.xarray.dev/en/stable/generated/xarray.DataArray.html) objects as well as regular Python types and Numpy types.
+
+It accepts any NXDL file that you like as long as your mapping file contains all the fields.
+Please use the --generate-template function of the dataconverter to create a .mapping.json file.
+
+```console
+user@box:~$ dataconverter --nxdl NXmynxdl --generate-template > mynxdl.mapping.json
+```
+##### Details on the [mapping.json](/pynxtools/dataconverter/readers/json_map/README.md#the-mappingjson-file) file.
+
+## How to run these examples?
+
+### Automatically merge partial NeXus files
+```console
+user@box:~$ dataconverter --nxdl NXiv_temp --input-file voltage_and_temperature.nxs --input-file current.nxs --output auto_merged.nxs
+```
+
+### Map and copy over data to new NeXus file
+```console
+user@box:~$ dataconverter --nxdl NXiv_temp --mapping merge_copied.mapping.json --input-file voltage_and_temperature.nxs --input-file current.nxs --output merged_copied.nxs
+```
+
+### Map and link over data to new NeXus file
+```console
+user@box:~$ dataconverter --nxdl NXiv_temp --mapping merge_linked.mapping.json --input-file voltage_and_temperature.nxs --input-file current.nxs --output merged_linked.nxs
+```
+
+## Contact person in FAIRmat for this reader
+Sherjeel Shabih
diff --git a/examples/json_map/merge_copied.mapping.json b/examples/json_map/merge_copied.mapping.json
new file mode 100644
index 000000000..bba897874
--- /dev/null
+++ b/examples/json_map/merge_copied.mapping.json
@@ -0,0 +1,35 @@
+{
+    "/@default": "entry",
+    "/ENTRY[entry]/DATA[data]/current": "/entry/data/current",
+    "/ENTRY[entry]/DATA[data]/current_295C": "/entry/data/current_295C",
+    "/ENTRY[entry]/DATA[data]/current_300C": "/entry/data/current_300C",
+    "/ENTRY[entry]/DATA[data]/current_305C": "/entry/data/current_305C",
+    "/ENTRY[entry]/DATA[data]/current_310C": "/entry/data/current_310C",
+    "/ENTRY[entry]/DATA[data]/temperature": "/entry/data/temperature",
+    "/ENTRY[entry]/DATA[data]/voltage": "/entry/data/voltage",
+    "/ENTRY[entry]/INSTRUMENT[instrument]/ENVIRONMENT[environment]/voltage_controller/calibration_time": "/entry/instrument/environment/voltage_controller/calibration_time",
+    "/ENTRY[entry]/INSTRUMENT[instrument]/ENVIRONMENT[environment]/voltage_controller/run_control": "/entry/instrument/environment/voltage_controller/run_control",
+    "/ENTRY[entry]/INSTRUMENT[instrument]/ENVIRONMENT[environment]/voltage_controller/value": "/entry/instrument/environment/voltage_controller/value",
+    "/ENTRY[entry]/INSTRUMENT[instrument]/ENVIRONMENT[environment]/temperature_controller/calibration_time": "/entry/instrument/environment/temperature_controller/calibration_time",
+    "/ENTRY[entry]/INSTRUMENT[instrument]/ENVIRONMENT[environment]/temperature_controller/run_control": "/entry/instrument/environment/temperature_controller/run_control",
+    "/ENTRY[entry]/INSTRUMENT[instrument]/ENVIRONMENT[environment]/temperature_controller/value": "/entry/instrument/environment/temperature_controller/value",
+    "/ENTRY[entry]/INSTRUMENT[instrument]/ENVIRONMENT[environment]/current_sensor/calibration_time": "/entry/instrument/environment/current_sensor/calibration_time",
+    "/ENTRY[entry]/INSTRUMENT[instrument]/ENVIRONMENT[environment]/current_sensor/run_control": "/entry/instrument/environment/current_sensor/run_control",
+    "/ENTRY[entry]/INSTRUMENT[instrument]/ENVIRONMENT[environment]/current_sensor/value": "/entry/instrument/environment/current_sensor/value",
+    "/ENTRY[entry]/INSTRUMENT[instrument]/ENVIRONMENT[environment]/independent_controllers": ["voltage_controller", "temperature_control"],
+    "/ENTRY[entry]/INSTRUMENT[instrument]/ENVIRONMENT[environment]/measurement_sensors": ["current_sensor"],
+    "/ENTRY[entry]/INSTRUMENT[instrument]/ENVIRONMENT[environment]/NXpid[heating_pid]/description": "/entry/instrument/environment/heating_pid/description",
+    "/ENTRY[entry]/INSTRUMENT[instrument]/ENVIRONMENT[environment]/NXpid[heating_pid]/setpoint": "/entry/instrument/environment/heating_pid/setpoint",
+    "/ENTRY[entry]/INSTRUMENT[instrument]/ENVIRONMENT[environment]/NXpid[heating_pid]/K_p_value": "/entry/instrument/environment/heating_pid/K_p_value",
+    "/ENTRY[entry]/INSTRUMENT[instrument]/ENVIRONMENT[environment]/NXpid[heating_pid]/K_i_value": "/entry/instrument/environment/heating_pid/K_i_value",
+    "/ENTRY[entry]/INSTRUMENT[instrument]/ENVIRONMENT[environment]/NXpid[heating_pid]/K_d_value": "/entry/instrument/environment/heating_pid/K_d_value",
+    "/ENTRY[entry]/PROCESS[process]/program": "Bluesky",
+    "/ENTRY[entry]/PROCESS[process]/program/@version": "1.6.7",
+    "/ENTRY[entry]/SAMPLE[sample]/name": "super",
+    "/ENTRY[entry]/SAMPLE[sample]/atom_types": "Si, C",
+    "/ENTRY[entry]/definition": "NXiv_temp",
+    "/ENTRY[entry]/definition/@version": "1",
+    "/ENTRY[entry]/experiment_identifier": "dbdfed37-35ed-4aee-a465-aaa0577205b1",
+    "/ENTRY[entry]/experiment_description": "A simple IV temperature experiment.",
+    "/ENTRY[entry]/start_time": "2022-05-30T16:37:03.909201+02:00"
+}
\ No newline at end of file
diff --git a/examples/json_map/merge_linked.mapping.json b/examples/json_map/merge_linked.mapping.json
new file mode 100644
index 000000000..47ede8b92
--- /dev/null
+++ b/examples/json_map/merge_linked.mapping.json
@@ -0,0 +1,25 @@
+{
+    "/@default": "entry",
+    "/ENTRY[entry]/DATA[data]/current": {"link": "current.nxs:/entry/data/current"},
+    "/ENTRY[entry]/DATA[data]/current_295C": {"link": "current.nxs:/entry/data/current_295C"},
+    "/ENTRY[entry]/DATA[data]/current_300C": {"link": "current.nxs:/entry/data/current_300C"},
+    "/ENTRY[entry]/DATA[data]/current_305C": {"link": "current.nxs:/entry/data/current_305C"},
+    "/ENTRY[entry]/DATA[data]/current_310C": {"link": "current.nxs:/entry/data/current_310C"},
+    "/ENTRY[entry]/DATA[data]/temperature": {"link": "voltage_and_temperature.nxs:/entry/data/temperature"},
+    "/ENTRY[entry]/DATA[data]/voltage": {"link": "voltage_and_temperature.nxs:/entry/data/voltage"},
+    "/ENTRY[entry]/INSTRUMENT[instrument]/ENVIRONMENT[environment]/voltage_controller": {"link": "voltage_and_temperature.nxs:/entry/instrument/environment/voltage_controller"},
+    "/ENTRY[entry]/INSTRUMENT[instrument]/ENVIRONMENT[environment]/temperature_controller": {"link": "voltage_and_temperature.nxs:/entry/instrument/environment/temperature_controller"},
+    "/ENTRY[entry]/INSTRUMENT[instrument]/ENVIRONMENT[environment]/current_sensor": {"link": "current.nxs:/entry/instrument/environment/current_sensor"},
+    "/ENTRY[entry]/INSTRUMENT[instrument]/ENVIRONMENT[environment]/independent_controllers": ["voltage_controller", "temperature_control"],
+    "/ENTRY[entry]/INSTRUMENT[instrument]/ENVIRONMENT[environment]/measurement_sensors": ["current_sensor"],
+    "/ENTRY[entry]/INSTRUMENT[instrument]/ENVIRONMENT[environment]/NXpid[heating_pid]": {"link": "voltage_and_temperature.nxs:/entry/instrument/environment/heating_pid"},
+    "/ENTRY[entry]/PROCESS[process]/program": "Bluesky",
+    "/ENTRY[entry]/PROCESS[process]/program/@version": "1.6.7",
+    "/ENTRY[entry]/SAMPLE[sample]/name": "super",
+    "/ENTRY[entry]/SAMPLE[sample]/atom_types": "Si, C",
+    "/ENTRY[entry]/definition": "NXiv_temp",
+    "/ENTRY[entry]/definition/@version": "1",
+    "/ENTRY[entry]/experiment_identifier": "dbdfed37-35ed-4aee-a465-aaa0577205b1",
+    "/ENTRY[entry]/experiment_description": "A simple IV temperature experiment.",
+    "/ENTRY[entry]/start_time": "2022-05-30T16:37:03.909201+02:00"
+}
\ No newline at end of file
diff --git a/examples/sts/README.md b/examples/sts/README.md
new file mode 100644
index 000000000..eb2c53482
--- /dev/null
+++ b/examples/sts/README.md
@@ -0,0 +1,32 @@
+# STS Reader
+***Note: Though the reader name is STS reader it also supports STM type experiment. This is the first version of the reader according to the NeXus application definition [NXsts](https://github.com/FAIRmat-NFDI/nexus_definitions/blob/fairmat/contributed_definitions/NXsts.nxdl.xml) which is a generic template of concepts' definition for STS and STM experiments. Later on, both application definitions and readers specific to the STM, STS and AFM will be available. To stay upto date keep visiting this page time to time. From now onwards we will mention STS referring both STM and STS.***
+
+Main goal of STS Reader is to transform different file formats from diverse STS lab into STS community standard [STS application definition](https://github.com/FAIRmat-NFDI/nexus_definitions/blob/fairmat/contributed_definitions/NXsts.nxdl.xml), community defined template that define indivisual concept associated with STS experiment constructed by SPM community.
+## STS Example
+It has diverse examples from several versions (Generic 5e and Generic 4.5) of Nanonis software for STS experiments at [https://gitlab.mpcdf.mpg.de](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-remote-tools-hub/-/tree/develop/docker/sts). But, to utilize that examples one must have an account at https://gitlab.mpcdf.mpg.de. If still you want to try the examples from the sts reader out, please reach out to [Rubel Mozumder](mozumder@physik.hu-berlin.de) or the docker container (discussed below).
+
+To get a detailed overview of the sts reader implementation visit [pynxtools](https://github.com/FAIRmat-NFDI/pynxtools/tree/master/pynxtools/dataconverter/readers/sts).
+
+## STS deocker image
+STS docker image contains all prerequisite tools (e.g. jupyter-notebook) and library to run STS reader. To use the image user needs to [install docker engine](https://docs.docker.com/engine/install/).
+
+STS Image: `gitlab-registry.mpcdf.mpg.de/nomad-lab/nomad-remote-tools-hub/sts-jupyter:latest`
+
+To run the STS image as a docker container copy the code below in a file `docker-compose.yaml`
+
+```docker
+# docker-compose.yaml
+
+version: "3.9"
+
+services:
+    sts:
+        image: gitlab-registry.mpcdf.mpg.de/nomad-lab/nomad-remote-tools-hub/sts-jupyter:latest
+        ports:
+            - 8888:8888
+        volumes:
+            - ./example:/home/jovyan/work_dir
+        working_dir: /home/jovyan/work_dir
+```
+
+and launch the file from the same directory with `docker compose up` command.
diff --git a/pynxtools/__init__.py b/pynxtools/__init__.py
index 2290aef3b..12b6f64ba 100644
--- a/pynxtools/__init__.py
+++ b/pynxtools/__init__.py
@@ -18,3 +18,71 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import os
+import re
+from datetime import datetime
+from glob import glob
+from typing import Union
+
+from pynxtools._build_wrapper import get_vcs_version
+from pynxtools.definitions.dev_tools.globals.nxdl import get_nxdl_version
+
+MAIN_BRANCH_NAME = "fairmat"
+
+
+def _build_version(tag: str, distance: int, node: str, dirty: bool) -> str:
+    """
+    Builds the version string for a given set of git states.
+    This resembles `no-guess-dev` + `node-and-date` behavior from setuptools_scm.
+    """
+    if distance == 0 and not dirty:
+        return f"{tag}"
+
+    dirty_appendix = datetime.now().strftime(".d%Y%m%d") if dirty else ""
+    return f"{tag}.post1.dev{distance}+{node}{dirty_appendix}"
+
+
+def format_version(version: str) -> str:
+    """
+    Formats the git describe version string into the local format.
+    """
+    version_parts = version.split("-")
+
+    return _build_version(
+        version_parts[0],
+        int(version_parts[1]),
+        version_parts[2],
+        len(version_parts) == 4 and version_parts[3] == "dirty",
+    )
+
+
+def get_nexus_version() -> str:
+    """
+    The version of the Nexus standard and the NeXus Definition language
+    based on git tags and commits
+    """
+    version = get_vcs_version()
+
+    if version is not None:
+        return format_version(version)
+
+    version_file = os.path.join(os.path.dirname(__file__), "nexus-version.txt")
+
+    if not os.path.exists(version_file):
+        # We are in the limbo, just get the nxdl version from nexus definitions
+        return format_version(get_nxdl_version())
+
+    with open(version_file, encoding="utf-8") as vfile:
+        return format_version(vfile.read().strip())
+
+
+def get_nexus_version_hash() -> str:
+    """
+    Gets the git hash from the nexus version string
+    """
+    version = re.search(r"g([a-z0-9]+)", get_nexus_version())
+
+    if version is None:
+        return MAIN_BRANCH_NAME
+
+    return version.group(1)
diff --git a/pynxtools/_build_wrapper.py b/pynxtools/_build_wrapper.py
new file mode 100644
index 000000000..d7788860d
--- /dev/null
+++ b/pynxtools/_build_wrapper.py
@@ -0,0 +1,71 @@
+"""
+Build wrapper for setuptools to create a nexus-version.txt file
+containing the nexus definitions verison.
+"""
+import os
+from subprocess import CalledProcessError, run
+from typing import Optional
+
+from setuptools import build_meta as _orig
+from setuptools.build_meta import *  # pylint: disable=wildcard-import,unused-wildcard-import
+
+
+def get_vcs_version(tag_match="*[0-9]*") -> Optional[str]:
+    """
+    The version of the Nexus standard and the NeXus Definition language
+    based on git tags and commits
+    """
+    try:
+        return (
+            run(
+                [
+                    "git",
+                    "describe",
+                    "--dirty",
+                    "--tags",
+                    "--long",
+                    "--match",
+                    tag_match,
+                ],
+                cwd=os.path.join(os.path.dirname(__file__), "../pynxtools/definitions"),
+                check=True,
+                capture_output=True,
+            )
+            .stdout.decode("utf-8")
+            .strip()
+        )
+    except (FileNotFoundError, CalledProcessError):
+        return None
+
+
+def _write_version_to_metadata():
+    version = get_vcs_version()
+    if version is None or not version:
+        return
+
+    with open(
+        os.path.join(os.path.dirname(__file__), "nexus-version.txt"),
+        "w+",
+        encoding="utf-8",
+    ) as file:
+        file.write(version)
+
+
+# pylint: disable=function-redefined
+def build_wheel(wheel_directory, config_settings=None, metadata_directory=None):
+    """
+    PEP 517 compliant build wheel hook.
+    This is a wrapper for setuptools and adds a nexus version file.
+    """
+    _write_version_to_metadata()
+    return _orig.build_wheel(wheel_directory, config_settings, metadata_directory)
+
+
+# pylint: disable=function-redefined
+def build_sdist(sdist_directory, config_settings=None):
+    """
+    PEP 517 compliant build sdist hook.
+    This is a wrapper for setuptools and adds a nexus version file.
+    """
+    _write_version_to_metadata()
+    return _orig.build_sdist(sdist_directory, config_settings)
diff --git a/pynxtools/dataconverter/README.md b/pynxtools/dataconverter/README.md
index 617c2de1f..f8d600f41 100644
--- a/pynxtools/dataconverter/README.md
+++ b/pynxtools/dataconverter/README.md
@@ -23,7 +23,7 @@ Usage: dataconverter [OPTIONS]
 Options:
   --input-file TEXT               The path to the input data file to read.
                                   (Repeat for more than one file.)
-  --reader [apm|ellips|em_nion|em_spctrscpy|example|hall|json_map|json_yml|mpes|rii_database|transmission|xps]
+  --reader [apm|ellips|em_nion|em_om|em_spctrscpy|example|hall|json_map|json_yml|mpes|rii_database|sts|transmission|xps]
                                   The reader to use. default="example"
   --nxdl TEXT                     The name of the NXDL file to use without
                                   extension.
@@ -35,9 +35,28 @@ Options:
                                   checking the documentation.
   --params-file FILENAME          Allows to pass a .yaml file with all the
                                   parameters the converter supports.
+  --undocumented                  Shows a log output for all undocumented
+                                  fields
+  --mapping TEXT                  Takes a <name>.mapping.json file and
+                                  converts data from given input files.
   --help                          Show this message and exit.
 ```
 
+#### Merge partial NeXus files into one
+
+```console
+user@box:~$ dataconverter --nxdl nxdl --input-file partial1.nxs --input-file partial2.nxs
+```
+
+#### Map an HDF5/JSON/(Python Dict pickled in a pickle file)
+
+```console
+user@box:~$ dataconverter --nxdl nxdl --input-file any_data.hdf5 --mapping my_custom_map.mapping.json
+```
+
+#### You can find actual examples with data files at [`examples/json_map`](../../examples/json_map/).
+
+
 #### Use with multiple input files
 
 ```console
diff --git a/pynxtools/dataconverter/convert.py b/pynxtools/dataconverter/convert.py
index f63e782e2..46c9af7eb 100644
--- a/pynxtools/dataconverter/convert.py
+++ b/pynxtools/dataconverter/convert.py
@@ -22,22 +22,26 @@
 import logging
 import os
 import sys
-from shutil import copyfile
-from typing import List, Tuple
+from typing import List, Tuple, Optional
 import xml.etree.ElementTree as ET
 
 import click
 import yaml
 
-
 from pynxtools.dataconverter.readers.base.reader import BaseReader
 from pynxtools.dataconverter import helpers
 from pynxtools.dataconverter.writer import Writer
 from pynxtools.dataconverter.template import Template
 from pynxtools.nexus import nexus
 
+if sys.version_info >= (3, 10):
+    from importlib.metadata import entry_points
+else:
+    from importlib_metadata import entry_points
+
 
 logger = logging.getLogger(__name__)  # pylint: disable=C0103
+UNDOCUMENTED = 9
 logger.setLevel(logging.INFO)
 logger.addHandler(logging.StreamHandler(sys.stdout))
 
@@ -47,8 +51,18 @@ def get_reader(reader_name) -> BaseReader:
     path_prefix = f"{os.path.dirname(__file__)}{os.sep}" if os.path.dirname(__file__) else ""
     path = os.path.join(path_prefix, "readers", reader_name, "reader.py")
     spec = importlib.util.spec_from_file_location("reader.py", path)
-    module = importlib.util.module_from_spec(spec)
-    spec.loader.exec_module(module)  # type: ignore[attr-defined]
+    try:
+        module = importlib.util.module_from_spec(spec)
+        spec.loader.exec_module(module)  # type: ignore[attr-defined]
+    except FileNotFoundError as exc:
+        # pylint: disable=unexpected-keyword-arg
+        importlib_module = entry_points(group='pynxtools.reader')
+        if (
+            importlib_module
+            and reader_name in map(lambda ep: ep.name, entry_points(group='pynxtools.reader'))
+        ):
+            return importlib_module[reader_name].load()
+        raise ValueError(f"The reader, {reader_name}, was not found.") from exc
     return module.READER  # type: ignore[attr-defined]
 
 
@@ -62,96 +76,150 @@ def get_names_of_all_readers() -> List[str]:
             index_of_readers_folder_name = file.rindex(f"readers{os.sep}") + len(f"readers{os.sep}")
             index_of_last_path_sep = file.rindex(os.sep)
             all_readers.append(file[index_of_readers_folder_name:index_of_last_path_sep])
-    return all_readers
-
-
-def append_template_data_to_acopy_of_one_inputfile(input: Tuple[str], output: str):
-    """Helper function to build outputfile based on one inputfile plus template data."""
-    # There are cases in which one of the inputfiles may contain already NeXus content
-    # typically because the scientific software tool generates such a file
-    # matching a specific application definition and thus additional pieces of information
-    # inside the template (e.g. from an ELN) should just be added to that inputfile
-
-    # one may or not in this case demand for a verification of that input file
-    # before continuing, currently we ignore this verification
-    for file_name in input:
-        if file_name[0:file_name.rfind('.')] != output:
-            continue
-        else:
-            print(f"Creating the output {output} based the this input {file_name}\n" \
-                  f"NeXus content in {file_name} is currently not verified !!!")
-            copyfile(file_name, output)
-
-            print(f"Template data will be added to the output {output}...\n" \
-                  f"Only these template data will be verified !!!")
-    # when calling dataconverter with
-    # --input-file processed.nxs.mtex
-    # --output processed.nxs
-    # -- io_mode="r+"
-    # these calls can be executed repetitively as the first step is
-    # the copying operation of *.nxs.mtex to *.nxs and then the access on the *.nxs
-    # file using h5py is then read/write without regeneration
-    # a repeated call has factually the same effect as the dataconverter
-    # used to work i.e. using h5py with "w" would regenerate the *.nxs if already existent
-    # this is a required to assure that repetitive calls of the ELN save function
-    # in NOMAD do not end up with write conflicts on the *.nxs i.e. the output file
-    # when the dataconverter is called
-    return
-
-
-# pylint: disable=too-many-arguments
-def convert(input_file: Tuple[str],
-            reader: str,
-            nxdl: str,
-            output: str,
-            io_mode: str = "w",
-            generate_template: bool = False,
-            fair: bool = False,
-            **kwargs):
-    """The conversion routine that takes the input parameters and calls the necessary functions."""
+    plugins = list(map(lambda ep: ep.name, entry_points(group='pynxtools.reader')))
+    return all_readers + plugins
+
+
+def get_nxdl_root_and_path(nxdl: str):
+    """Get xml root element and file path from nxdl name e.g. NXapm.
+
+    Parameters
+    ----------
+    nxdl: str
+        Name of nxdl file e.g. NXapm from NXapm.nxdl.xml.
+
+    Returns
+    -------
+    ET.root
+        Root element of nxdl file.
+    str
+        Path of nxdl file.
+
+    Raises
+    ------
+    FileNotFoundError
+        Error if no file with the given nxdl name is found.
+    """
     # Reading in the NXDL and generating a template
     definitions_path = nexus.get_nexus_definitions_path()
     if nxdl == "NXtest":
-        nxdl_path = os.path.join(
+        nxdl_f_path = os.path.join(
             f"{os.path.abspath(os.path.dirname(__file__))}/../../",
             "tests", "data", "dataconverter", "NXtest.nxdl.xml")
     elif nxdl == "NXroot":
-        nxdl_path = os.path.join(definitions_path, "base_classes", "NXroot.nxdl.xml")
+        nxdl_f_path = os.path.join(definitions_path, "base_classes", "NXroot.nxdl.xml")
     else:
-        nxdl_path = os.path.join(definitions_path, "contributed_definitions", f"{nxdl}.nxdl.xml")
-        if not os.path.exists(nxdl_path):
-            nxdl_path = os.path.join(definitions_path, "applications", f"{nxdl}.nxdl.xml")
-        if not os.path.exists(nxdl_path):
+        nxdl_f_path = os.path.join(definitions_path, "contributed_definitions", f"{nxdl}.nxdl.xml")
+        if not os.path.exists(nxdl_f_path):
+            nxdl_f_path = os.path.join(definitions_path, "applications", f"{nxdl}.nxdl.xml")
+        if not os.path.exists(nxdl_f_path):
+            nxdl_f_path = os.path.join(definitions_path, "base_classes", f"{nxdl}.nxdl.xml")
+        if not os.path.exists(nxdl_f_path):
             raise FileNotFoundError(f"The nxdl file, {nxdl}, was not found.")
 
-    nxdl_root = ET.parse(nxdl_path).getroot()
+    return ET.parse(nxdl_f_path).getroot(), nxdl_f_path
+
+
+def transfer_data_into_template(input_file,
+                                reader, nxdl_name,
+                                nxdl_root: Optional[ET.Element] = None,
+                                **kwargs):
+    """Transfer parse and merged data from input experimental file, config file and eln.
+
+    Experimental and eln files will be parsed and finally will be merged into template.
+    Before returning the template validate the template data.
+
+    Parameters
+    ----------
+    input_file : Union[tuple[str], str]
+        Tuple of files or file
+    reader: str
+        Name of reader such as xps
+    nxdl_name : str
+        Root name of nxdl file, e.g. NXmpes from NXmpes.nxdl.xml
+    nxdl_root : ET.element
+        Root element of nxdl file, otherwise provide nxdl_name
+
+    Returns
+    -------
+    Template
+        Template filled with data from raw file and eln file.
+
+    """
+    if nxdl_root is None:
+        nxdl_root, _ = get_nxdl_root_and_path(nxdl=nxdl_name)
 
     template = Template()
     helpers.generate_template_from_nxdl(nxdl_root, template)
-    if generate_template:
-        logger.info(template)
-        return
 
-    # Setting up all the input data
     if isinstance(input_file, str):
         input_file = (input_file,)
+
     bulletpoint = "\n\u2022 "
     logger.info("Using %s reader to convert the given files: %s ",
                 reader,
                 bulletpoint.join((" ", *input_file)))
 
     data_reader = get_reader(reader)
-    if not (nxdl in data_reader.supported_nxdls or "*" in data_reader.supported_nxdls):
+    if not (nxdl_name in data_reader.supported_nxdls or "*" in data_reader.supported_nxdls):
         raise NotImplementedError("The chosen NXDL isn't supported by the selected reader.")
 
     data = data_reader().read(  # type: ignore[operator]
         template=Template(template),
         file_paths=input_file,
-        **kwargs,
+        **kwargs
     )
-
     helpers.validate_data_dict(template, data, nxdl_root)
+    return data
+
+
+# pylint: disable=too-many-arguments,too-many-locals
+def convert(input_file: Tuple[str, ...],
+            reader: str,
+            nxdl: str,
+            output: str,
+            generate_template: bool = False,
+            fair: bool = False,
+            undocumented: bool = False,
+            **kwargs):
+    """The conversion routine that takes the input parameters and calls the necessary functions.
+
+    Parameters
+    ----------
+    input_file : Tuple[str]
+        Tuple of files or file
+    reader: str
+        Name of reader such as xps
+    nxdl : str
+        Root name of nxdl file, e.g. NXmpes for NXmpes.nxdl.xml
+    output : str
+        Output file name.
+    generate_template : bool, default False
+        True if user wants template in logger info.
+    fair : bool, default False
+        If True, a warning is given that there are undocumented paths
+        in the template.
+    undocumented : bool, default False
+        If True, an undocumented warning is given.
+
+    Returns
+    -------
+    None.
+    """
+
+    nxdl_root, nxdl_f_path = get_nxdl_root_and_path(nxdl)
+
+    if generate_template:
+        template = Template()
+        helpers.generate_template_from_nxdl(nxdl_root, template)
+        logger.info(template)
+        return
 
+    data = transfer_data_into_template(input_file=input_file, reader=reader,
+                                       nxdl_name=nxdl, nxdl_root=nxdl_root,
+                                       **kwargs)
+    if undocumented:
+        logger.setLevel(UNDOCUMENTED)
     if fair and data.undocumented.keys():
         logger.warning("There are undocumented paths in the template. This is not acceptable!")
         return
@@ -159,13 +227,13 @@ def convert(input_file: Tuple[str],
     for path in data.undocumented.keys():
         if "/@default" in path:
             continue
-        logger.warning("The path, %s, is being written but has no documentation.", path)
-
-    if io_mode == "r+":
-        append_template_data_to_acopy_of_one_inputfile(
-            input=input_file, output=output)
-
-    Writer(data=data, nxdl_path=nxdl_path, output_path=output, io_mode=io_mode).write()
+        logger.log(
+            UNDOCUMENTED,
+            "The path, %s, is being written but has no documentation.",
+            path
+        )
+    helpers.add_default_root_attributes(data=data, filename=os.path.basename(output))
+    Writer(data=data, nxdl_f_path=nxdl_f_path, output_path=output).write()
 
     logger.info("The output file generated: %s", output)
 
@@ -187,7 +255,7 @@ def parse_params_file(params_file):
 )
 @click.option(
     '--reader',
-    default='example',
+    default='json_map',
     type=click.Choice(get_names_of_all_readers(), case_sensitive=False),
     help='The reader to use. default="example"'
 )
@@ -202,11 +270,6 @@ def parse_params_file(params_file):
     default='output.nxs',
     help='The path to the output NeXus file to be generated.'
 )
-@click.option(
-    '--io_mode',
-    default='w',
-    help='I/O mode on the output NeXus file, see h5py doc for mode details, default="w".'
-)
 @click.option(
     '--generate-template',
     is_flag=True,
@@ -218,21 +281,33 @@ def parse_params_file(params_file):
     is_flag=True,
     default=False,
     help='Let the converter know to be stricter in checking the documentation.'
-)  # pylint: disable=too-many-arguments
+)
 @click.option(
     '--params-file',
     type=click.File('r'),
     default=None,
     help='Allows to pass a .yaml file with all the parameters the converter supports.'
 )
-def convert_cli(input_file: Tuple[str],
+@click.option(
+    '--undocumented',
+    is_flag=True,
+    default=False,
+    help='Shows a log output for all undocumented fields'
+)
+@click.option(
+    '--mapping',
+    help='Takes a <name>.mapping.json file and converts data from given input files.'
+)
+# pylint: disable=too-many-arguments
+def convert_cli(input_file: Tuple[str, ...],
                 reader: str,
                 nxdl: str,
                 output: str,
-                io_mode: str,
                 generate_template: bool,
                 fair: bool,
-                params_file: str):
+                params_file: str,
+                undocumented: bool,
+                mapping: str):
     """The CLI entrypoint for the convert function"""
     if params_file:
         try:
@@ -248,7 +323,11 @@ def convert_cli(input_file: Tuple[str],
             sys.tracebacklimit = 0
             raise IOError("\nError: Please supply an NXDL file with the option:"
                           " --nxdl <path to NXDL>")
-        convert(input_file, reader, nxdl, output, io_mode, generate_template, fair)
+        if mapping:
+            reader = "json_map"
+            if mapping:
+                input_file = input_file + tuple([mapping])
+        convert(input_file, reader, nxdl, output, generate_template, fair, undocumented)
 
 
 if __name__ == '__main__':
diff --git a/pynxtools/dataconverter/hdfdict.py b/pynxtools/dataconverter/hdfdict.py
index 4edb68259..a4bbf87e6 100644
--- a/pynxtools/dataconverter/hdfdict.py
+++ b/pynxtools/dataconverter/hdfdict.py
@@ -123,7 +123,16 @@ def _recurse(hdfobject, datadict):
             elif isinstance(value, h5py.Dataset):
                 if not lazy:
                     value = unpacker(value)
-                datadict[key] = value
+                datadict[key] = (
+                    value.asstr()[...]
+                    if h5py.check_string_dtype(value.dtype)
+                    else value
+                )
+
+            if "attrs" in dir(value):
+                datadict[key + "@"] = {}
+                for attr, attrval in value.attrs.items():
+                    datadict[key + "@"][attr] = attrval
 
         return datadict
 
diff --git a/pynxtools/dataconverter/helpers.py b/pynxtools/dataconverter/helpers.py
index 75a2bc2b9..57d526f4b 100644
--- a/pynxtools/dataconverter/helpers.py
+++ b/pynxtools/dataconverter/helpers.py
@@ -17,17 +17,24 @@
 #
 """Helper functions commonly used by the convert routine."""
 
-from typing import List
+from typing import List, Optional, Any
 from typing import Tuple, Callable, Union
 import re
 import xml.etree.ElementTree as ET
+from datetime import datetime, timezone
+import logging
+import json
 
 import numpy as np
 from ase.data import chemical_symbols
+import h5py
 
+from pynxtools import get_nexus_version, get_nexus_version_hash
 from pynxtools.nexus import nexus
 from pynxtools.nexus.nexus import NxdlAttributeError
 
+logger = logging.getLogger(__name__)
+
 
 def is_a_lone_group(xml_element) -> bool:
     """Checks whether a given group XML element has no field or attributes mentioned"""
@@ -155,6 +162,20 @@ def generate_template_from_nxdl(root, template, path="", nxdl_root=None, nxdl_na
         path_nxdl = convert_data_converter_dict_to_nxdl_path(path)
         list_of_children_to_add = get_all_defined_required_children(path_nxdl, nxdl_name)
         add_inherited_children(list_of_children_to_add, path, nxdl_root, template)
+    # Handling link: link has a target attibute that store absolute path of concept to be
+    # linked. Writer reads link from template in the format {'link': <ABSOLUTE PATH>}
+    # {'link': ':/<ABSOLUTE PATH TO EXTERNAL FILE>'}
+    elif tag == "link":
+        # NOTE:  The code below can be implemented later once, NeXus brings optionality in
+        # link. Otherwise link will be considered optional by default.
+
+        # optionality = get_required_string(root)
+        # optional_parent = check_for_optional_parent(path, nxdl_root)
+        # optionality = "required" if optional_parent == "<<NOT_FOUND>>" else "optional"
+        # if optionality == "optional":
+        #     template.optional_parents.append(optional_parent)
+        optionality = "optional"
+        template[optionality][path] = {'link': root.attrib['target']}
 
     for child in root:
         generate_template_from_nxdl(child, template, path, nxdl_root, nxdl_name)
@@ -333,7 +354,7 @@ def path_in_data_dict(nxdl_path: str, data: dict) -> Tuple[bool, str]:
     for key in data.keys():
         if nxdl_path == convert_data_converter_dict_to_nxdl_path(key):
             return True, key
-    return False, ""
+    return False, None
 
 
 def check_for_optional_parent(path: str, nxdl_root: ET.Element) -> str:
@@ -366,6 +387,8 @@ def all_required_children_are_set(optional_parent_path, data, nxdl_root):
     """Walks over optional parent's children and makes sure all required ones are set"""
     optional_parent_path = convert_data_converter_dict_to_nxdl_path(optional_parent_path)
     for key in data:
+        if key in data["lone_groups"]:
+            continue
         nxdl_key = convert_data_converter_dict_to_nxdl_path(key)
         if nxdl_key[0:nxdl_key.rfind("/")] == optional_parent_path \
            and is_node_required(nxdl_key, nxdl_root) \
@@ -424,7 +447,7 @@ def does_group_exist(path_to_group, data):
     return False
 
 
-def ensure_all_required_fields_exist(template, data):
+def ensure_all_required_fields_exist(template, data, nxdl_root):
     """Checks whether all the required fields are in the returned data object."""
     for path in template["required"]:
         entry_name = get_name_from_data_dict_entry(path[path.rindex('/') + 1:])
@@ -432,9 +455,18 @@ def ensure_all_required_fields_exist(template, data):
             continue
         nxdl_path = convert_data_converter_dict_to_nxdl_path(path)
         is_path_in_data_dict, renamed_path = path_in_data_dict(nxdl_path, data)
-        if path in template["lone_groups"] and does_group_exist(path, data):
-            continue
 
+        renamed_path = path if renamed_path is None else renamed_path
+        if path in template["lone_groups"]:
+            opt_parent = check_for_optional_parent(path, nxdl_root)
+            if opt_parent != "<<NOT_FOUND>>":
+                if does_group_exist(opt_parent, data) and not does_group_exist(renamed_path, data):
+                    raise ValueError(f"The required group, {path}, hasn't been supplied"
+                                     f" while its optional parent, {path}, is supplied.")
+                continue
+            if not does_group_exist(renamed_path, data):
+                raise ValueError(f"The required group, {path}, hasn't been supplied.")
+            continue
         if not is_path_in_data_dict or data[renamed_path] is None:
             raise ValueError(f"The data entry corresponding to {path} is required "
                              f"and hasn't been supplied by the reader.")
@@ -475,11 +507,10 @@ def validate_data_dict(template, data, nxdl_root: ET.Element):
     nxdl_path_to_elm: dict = {}
 
     # Make sure all required fields exist.
-    ensure_all_required_fields_exist(template, data)
+    ensure_all_required_fields_exist(template, data, nxdl_root)
     try_undocumented(data, nxdl_root)
 
     for path in data.get_documented().keys():
-        # print(f"{path}")
         if data[path] is not None:
             entry_name = get_name_from_data_dict_entry(path[path.rindex('/') + 1:])
             nxdl_path = convert_data_converter_dict_to_nxdl_path(path)
@@ -559,12 +590,38 @@ def convert_to_hill(atoms_typ):
     return atom_list + list(atoms_typ)
 
 
+def add_default_root_attributes(data, filename):
+    """
+    Takes a dict/Template and adds NXroot fields/attributes that are inherently available
+    """
+    def update_and_warn(key: str, value: str):
+        if key in data and data[key] != value:
+            logger.warning(
+                "The NXroot entry '%s' (value: %s) should not be populated by the reader. "
+                "This is overwritten by the actually used value '%s'",
+                key, data[key], value
+            )
+        data[key] = value
+
+    update_and_warn("/@NX_class", "NXroot")
+    update_and_warn("/@file_name", filename)
+    update_and_warn("/@file_time", str(datetime.now(timezone.utc).astimezone()))
+    update_and_warn("/@file_update_time", data["/@file_time"])
+    update_and_warn(
+        "/@NeXus_repository",
+        "https://github.com/FAIRmat-NFDI/nexus_definitions/"
+        f"blob/{get_nexus_version_hash()}"
+    )
+    update_and_warn("/@NeXus_version", get_nexus_version())
+    update_and_warn("/@HDF5_version", '.'.join(map(str, h5py.h5.get_libversion())))
+    update_and_warn("/@h5py_version", h5py.__version__)
+
+
 def extract_atom_types(formula, mode='hill'):
     """Extract atom types form chemical formula."""
-
     atom_types: set = set()
     element: str = ""
-    # tested with "(C38H54S4)n(NaO2)5(CH4)NH3B"
+
     for char in formula:
         if char.isalpha():
             if char.isupper() and element == "":
@@ -594,3 +651,77 @@ def extract_atom_types(formula, mode='hill'):
         return convert_to_hill(atom_types)
 
     return atom_types
+
+
+# pylint: disable=too-many-branches
+def transform_to_intended_dt(str_value: Any) -> Optional[Any]:
+    """Transform string to the intended data type, if not then return str_value.
+
+    E.g '2.5E-2' will be transfor into 2.5E-2
+    tested with: '2.4E-23', '28', '45.98', 'test', ['59', '3.00005', '498E-34'],
+                 '23 34 444 5000', None
+    with result: 2.4e-23, 28, 45.98, test, [5.90000e+01 3.00005e+00 4.98000e-32],
+                 np.array([23 34 444 5000]), None
+    NOTE: add another arg in this func for giving 'hint' what kind of data like
+        numpy array or list
+    Parameters
+    ----------
+    str_value : str
+        Data from other format that comes as string e.g. string of list.
+
+    Returns
+    -------
+    Union[str, int, float, np.ndarray]
+        Converted data type
+    """
+
+    symbol_list_for_data_seperation = [';', ' ']
+    transformed: Any = None
+
+    if isinstance(str_value, list):
+        try:
+            transformed = np.array(str_value, dtype=np.float64)
+            return transformed
+        except ValueError:
+            pass
+
+    elif isinstance(str_value, np.ndarray):
+        return str_value
+    elif isinstance(str_value, str):
+        try:
+            transformed = int(str_value)
+        except ValueError:
+            try:
+                transformed = float(str_value)
+            except ValueError:
+                if '[' in str_value and ']' in str_value:
+                    transformed = json.loads(str_value)
+        if transformed is not None:
+            return transformed
+        for sym in symbol_list_for_data_seperation:
+            if sym in str_value:
+                parts = str_value.split(sym)
+                modified_parts: List = []
+                for part in parts:
+                    part = transform_to_intended_dt(part)
+                    if isinstance(part, (int, float)):
+                        modified_parts.append(part)
+                    else:
+                        return str_value
+                return transform_to_intended_dt(modified_parts)
+
+    return str_value
+
+
+def nested_dict_to_slash_separated_path(nested_dict: dict,
+                                        flattened_dict: dict,
+                                        parent_path=''):
+    """Convert nested dict into slash separeted path upto certain level."""
+    sep = '/'
+
+    for key, val in nested_dict.items():
+        path = parent_path + sep + key
+        if isinstance(val, dict):
+            nested_dict_to_slash_separated_path(val, flattened_dict, path)
+        else:
+            flattened_dict[path] = val
diff --git a/pynxtools/dataconverter/readers/apm/map_concepts/apm_deployment_specifics_to_nx_map.py b/pynxtools/dataconverter/readers/apm/map_concepts/apm_deployment_specifics_to_nx_map.py
new file mode 100644
index 000000000..d4cdf84f6
--- /dev/null
+++ b/pynxtools/dataconverter/readers/apm/map_concepts/apm_deployment_specifics_to_nx_map.py
@@ -0,0 +1,52 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Dict mapping values for a specific deployed config of NOMAD OASIS + ELN + apm reader."""
+
+# pylint: disable=no-member,line-too-long
+
+# currently by virtue of design NOMAD OASIS specific examples show how different tools and
+# services can be specifically coupled and implemented so that they work together
+# currently we assume that the ELN provides all those pieces of information to instantiate
+# a NeXus data artifact which technology-partner-specific files or database blobs can not
+# deliver. Effectively a reader uses the eln_data.yaml generic ELN output to fill in these
+# missing pieces of information while typically heavy data (tensors etc) are translated
+# and written from the technology-partner files
+# for large application definitions this can lead to a practical inconvenience:
+# the ELN that has to be exposed to the user is complex and has many fields to fill in
+# just to assure that all information are included in the ELN output and thus consumable
+# by the dataconverter
+# taking the perspective of a specific lab where a specific version of an ELN provided by
+# or running in addition to NOMAD OASIS is used many pieces of information might not change
+# or administrators do not wish to expose this via the end user ELN in an effort to reduce
+# the complexity for end users and make entering of repetitiv information obsolete
+
+# this is the scenario for which deployment_specific mapping shines
+# parsing of deployment specific details in the apm reader is currently implemented
+# such that it executes after reading generic ELN data (eventually available entries)
+# in the template get overwritten
+
+from pynxtools.dataconverter.readers.apm.utils.apm_versioning \
+    import NX_APM_ADEF_NAME, NX_APM_ADEF_VERSION, NX_APM_EXEC_NAME, NX_APM_EXEC_VERSION
+
+
+NxApmDeploymentSpecificInput \
+    = {"/ENTRY[entry*]/@version": f"{NX_APM_ADEF_VERSION}",
+       "/ENTRY[entry*]/definition": f"{NX_APM_ADEF_NAME}",
+       "/ENTRY[entry*]/PROGRAM[program1]/program": f"{NX_APM_EXEC_NAME}",
+       "/ENTRY[entry*]/PROGRAM[program1]/program/@version": f"{NX_APM_EXEC_VERSION}",
+       "/ENTRY[entry*]/atom_probe/location": {"fun": "load_from", "terms": "location"}}
diff --git a/pynxtools/dataconverter/readers/apm/map_concepts/apm_eln_to_nx_map.py b/pynxtools/dataconverter/readers/apm/map_concepts/apm_eln_to_nx_map.py
new file mode 100644
index 000000000..76c763f47
--- /dev/null
+++ b/pynxtools/dataconverter/readers/apm/map_concepts/apm_eln_to_nx_map.py
@@ -0,0 +1,109 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Dict mapping custom schema instances from eln_data.yaml file on concepts in NXapm."""
+
+NxApmElnInput = {"IGNORE": {"fun": "load_from_dict_list", "terms": "em_lab/detector"},
+                 "IGNORE": {"fun": "load_from", "terms": "em_lab/ebeam_column/aberration_correction/applied"},
+                 "IGNORE": {"fun": "load_from_dict_list", "terms": "em_lab/ebeam_column/aperture_em"},
+                 "/ENTRY[entry*]/PROGRAM[program2]/program": {"fun": "load_from", "terms": "atom_probe/control_software_program"},
+                 "/ENTRY[entry*]/PROGRAM[program2]/program/@version": {"fun": "load_from", "terms": "atom_probe/control_software_program__attr_version"},
+                 "/ENTRY[entry*]/experiment_identifier": {"fun": "load_from", "terms": "entry/experiment_identifier"},
+                 "/ENTRY[entry*]/start_time": {"fun": "load_from", "terms": "entry/start_time"},
+                 "/ENTRY[entry*]/end_time": {"fun": "load_from", "terms": "entry/end_time"},
+                 "/ENTRY[entry*]/run_number": {"fun": "load_from", "terms": "entry/run_number"},
+                 "/ENTRY[entry*]/operation_mode": {"fun": "load_from", "terms": "entry/operation_mode"},
+                 "/ENTRY[entry*]/experiment_description": {"fun": "load_from", "terms": "entry/experiment_description"},
+                 "IGNORE": {"fun": "load_from", "terms": "sample/alias"},
+                 "/ENTRY[entry*]/sample/grain_diameter": {"fun": "load_from", "terms": "sample/grain_diameter/value"},
+                 "/ENTRY[entry*]/sample/grain_diameter/@units": {"fun": "load_from", "terms": "sample/grain_diameter/unit"},
+                 "/ENTRY[entry*]/sample/grain_diameter_error": {"fun": "load_from", "terms": "sample/grain_diameter_error/value"},
+                 "/ENTRY[entry*]/sample/grain_diameter_error/@units": {"fun": "load_from", "terms": "sample/grain_diameter_error/unit"},
+                 "/ENTRY[entry*]/sample/heat_treatment_quenching_rate": {"fun": "load_from", "terms": "sample/heat_treatment_quenching_rate/value"},
+                 "/ENTRY[entry*]/sample/heat_treatment_quenching_rate/@units": {"fun": "load_from", "terms": "sample/heat_treatment_quenching_rate/unit"},
+                 "/ENTRY[entry*]/sample/heat_treatment_quenching_rate_error": {"fun": "load_from", "terms": "sample/heat_treatment_quenching_rate_error/value"},
+                 "/ENTRY[entry*]/sample/heat_treatment_quenching_rate_error/@units": {"fun": "load_from", "terms": "sample/heat_treatment_quenching_rate_error/unit"},
+                 "/ENTRY[entry*]/sample/heat_treatment_temperature": {"fun": "load_from", "terms": "sample/heat_treatment_temperature/value"},
+                 "/ENTRY[entry*]/sample/heat_treatment_temperature/@units": {"fun": "load_from", "terms": "sample/heat_treatment_temperature/unit"},
+                 "/ENTRY[entry*]/sample/heat_treatment_temperature_error": {"fun": "load_from", "terms": "sample/heat_treatment_temperature_error/value"},
+                 "/ENTRY[entry*]/sample/heat_treatment_temperature_error/@units": {"fun": "load_from", "terms": "sample/heat_treatment_temperature_error/unit"},
+                 "/ENTRY[entry*]/specimen/name": {"fun": "load_from", "terms": "specimen/name"},
+                 "/ENTRY[entry*]/specimen/preparation_date": {"fun": "load_from", "terms": "specimen/preparation_date"},
+                 "IGNORE": {"fun": "load_from", "terms": "specimen/sample_history"},
+                 "/ENTRY[entry*]/specimen/alias": {"fun": "load_from", "terms": "specimen/alias"},
+                 "/ENTRY[entry*]/specimen/is_polycrystalline": {"fun": "load_from", "terms": "specimen/is_polycrystalline"},
+                 "/ENTRY[entry*]/specimen/description": {"fun": "load_from", "terms": "specimen/description"},
+                 "/ENTRY[entry*]/atom_probe/FABRICATION[fabrication]/identifier": {"fun": "load_from", "terms": "atom_probe/fabrication_identifier"},
+                 "/ENTRY[entry*]/atom_probe/FABRICATION[fabrication]/model": {"fun": "load_from", "terms": "atom_probe/fabrication_model"},
+                 "/ENTRY[entry*]/atom_probe/FABRICATION[fabrication]/vendor": {"fun": "load_from", "terms": "atom_probe/fabrication_vendor"},
+                 "/ENTRY[entry*]/atom_probe/analysis_chamber/pressure": {"fun": "load_from", "terms": "atom_probe/analysis_chamber_pressure/value"},
+                 "/ENTRY[entry*]/atom_probe/analysis_chamber/pressure/@units": {"fun": "load_from", "terms": "atom_probe/analysis_chamber_pressure/unit"},
+                 "/ENTRY[entry*]/atom_probe/control_software/PROGRAM[program1]/program": {"fun": "load_from", "terms": "atom_probe/control_software_program"},
+                 "/ENTRY[entry*]/atom_probe/control_software/PROGRAM[program1]/program/@version": {"fun": "load_from", "terms": "atom_probe/control_software_program__attr_version"},
+                 "/ENTRY[entry*]/atom_probe/field_of_view": {"fun": "load_from", "terms": "atom_probe/field_of_view/value"},
+                 "/ENTRY[entry*]/atom_probe/field_of_view/@units": {"fun": "load_from", "terms": "atom_probe/field_of_view/unit"},
+                 "/ENTRY[entry*]/atom_probe/flight_path_length": {"fun": "load_from", "terms": "atom_probe/flight_path_length/value"},
+                 "/ENTRY[entry*]/atom_probe/flight_path_length/@units": {"fun": "load_from", "terms": "atom_probe/flight_path_length/unit"},
+                 "/ENTRY[entry*]/atom_probe/instrument_name": {"fun": "load_from", "terms": "atom_probe/instrument_name"},
+                 "/ENTRY[entry*]/atom_probe/ion_detector/model": {"fun": "load_from", "terms": "atom_probe/ion_detector_model"},
+                 "/ENTRY[entry*]/atom_probe/ion_detector/name": {"fun": "load_from", "terms": "atom_probe/ion_detector_name"},
+                 "/ENTRY[entry*]/atom_probe/ion_detector/serial_number": {"fun": "load_from", "terms": "atom_probe/ion_detector_serial_number"},
+                 "/ENTRY[entry*]/atom_probe/ion_detector/type": {"fun": "load_from", "terms": "atom_probe/ion_detector_type"},
+                 "/ENTRY[entry*]/atom_probe/local_electrode/name": {"fun": "load_from", "terms": "atom_probe/local_electrode_name"},
+                 "/ENTRY[entry*]/atom_probe/location": {"fun": "load_from", "terms": "atom_probe/location"},
+                 "/ENTRY[entry*]/atom_probe/REFLECTRON[reflectron]/applied": {"fun": "load_from", "terms": "atom_probe/reflectron_applied"},
+                 "/ENTRY[entry*]/atom_probe/stage_lab/base_temperature": {"fun": "load_from", "terms": "atom_probe/stage_lab_base_temperature/value"},
+                 "/ENTRY[entry*]/atom_probe/stage_lab/base_temperature/@units": {"fun": "load_from", "terms": "atom_probe/stage_lab_base_temperature/unit"},
+                 "/ENTRY[entry*]/atom_probe/specimen_monitoring/detection_rate": {"fun": "load_from", "terms": "atom_probe/specimen_monitoring_detection_rate/value"},
+                 "/ENTRY[entry*]/atom_probe/specimen_monitoring/detection_rate/@units": {"fun": "load_from", "terms": "atom_probe/specimen_monitoring_detection_rate/unit"},
+                 "/ENTRY[entry*]/atom_probe/specimen_monitoring/initial_radius": {"fun": "load_from", "terms": "atom_probe/specimen_monitoring_initial_radius/value"},
+                 "/ENTRY[entry*]/atom_probe/specimen_monitoring/initial_radius/@units": {"fun": "load_from", "terms": "atom_probe/specimen_monitoring_initial_radius/unit"},
+                 "/ENTRY[entry*]/atom_probe/specimen_monitoring/shank_angle": {"fun": "load_from", "terms": "atom_probe/specimen_monitoring_shank_angle/value"},
+                 "/ENTRY[entry*]/atom_probe/specimen_monitoring/shank_angle/@units": {"fun": "load_from", "terms": "atom_probe/specimen_monitoring_shank_angle/unit"},
+                 "/ENTRY[entry*]/atom_probe/status": {"fun": "load_from", "terms": "atom_probe/status"},
+                 "/ENTRY[entry*]/atom_probe/pulser/pulse_fraction": {"fun": "load_from", "terms": "atom_probe/pulser/pulse_fraction"},
+                 "/ENTRY[entry*]/atom_probe/pulser/pulse_frequency": {"fun": "load_from", "terms": "atom_probe/pulser/pulse_frequency/value"},
+                 "/ENTRY[entry*]/atom_probe/pulser/pulse_frequency/@units": {"fun": "load_from", "terms": "atom_probe/pulser/pulse_frequency/unit"},
+                 "/ENTRY[entry*]/atom_probe/pulser/pulse_mode": {"fun": "load_from", "terms": "atom_probe/pulser/pulse_mode"},
+                 "/ENTRY[entry*]/atom_probe/ranging/PROGRAM[program1]/program": {"fun": "load_from", "terms": "atom_probe/ranging/program"},
+                 "/ENTRY[entry*]/atom_probe/ranging/PROGRAM[program1]/program/@version": {"fun": "load_from", "terms": "atom_probe/ranging/program__attr_version"},
+                 "/ENTRY[entry*]/atom_probe/reconstruction/PROGRAM[program1]/program": {"fun": "load_from", "terms": "atom_probe/reconstruction/program"},
+                 "/ENTRY[entry*]/atom_probe/reconstruction/PROGRAM[program1]/program/@version": {"fun": "load_from", "terms": "atom_probe/reconstruction/program__attr_version"},
+                 "/ENTRY[entry*]/atom_probe/reconstruction/crystallographic_calibration": {"fun": "load_from", "terms": "atom_probe/reconstruction/crystallographic_calibration"},
+                 "/ENTRY[entry*]/atom_probe/reconstruction/parameter": {"fun": "load_from", "terms": "atom_probe/reconstruction/parameter"},
+                 "/ENTRY[entry*]/atom_probe/reconstruction/protocol_name": {"fun": "load_from", "terms": "atom_probe/reconstruction/protocol_name"}}
+
+# NeXus concept specific mapping tables which require special treatment as the current
+# NOMAD OASIS custom schema implementation delivers them as a list of dictionaries instead
+# of a directly flattenable list of keyword, value pairs
+
+NxUserFromListOfDict = {"/ENTRY[entry*]/USER[user*]/name": {"fun": "load_from", "terms": "name"},
+                        "/ENTRY[entry*]/USER[user*]/affiliation": {"fun": "load_from", "terms": "affiliation"},
+                        "/ENTRY[entry*]/USER[user*]/address": {"fun": "load_from", "terms": "address"},
+                        "/ENTRY[entry*]/USER[user*]/email": {"fun": "load_from", "terms": "email"},
+                        "/ENTRY[entry*]/USER[user*]/orcid": {"fun": "load_from", "terms": "orcid"},
+                        "/ENTRY[entry*]/USER[user*]/orcid_platform": {"fun": "load_from", "terms": "orcid_platform"},
+                        "/ENTRY[entry*]/USER[user*]/telephone_number": {"fun": "load_from", "terms": "telephone_number"},
+                        "/ENTRY[entry*]/USER[user*]/role": {"fun": "load_from", "terms": "role"},
+                        "/ENTRY[entry*]/USER[user*]/social_media_name": {"fun": "load_from", "terms": "social_media_name"},
+                        "/ENTRY[entry*]/USER[user*]/social_media_platform": {"fun": "load_from", "terms": "social_media_platform"}}
+
+# LEAP6000 can use up to two lasers and voltage pulsing (both at the same time?)
+NxPulserFromListOfDict = {"/ENTRY[entry*]/atom_probe/pulser/SOURCE[source*]/name": {"fun": "load_from", "terms": "name"},
+                          "/ENTRY[entry*]/atom_probe/pulser/SOURCE[source*]/power": {"fun": "load_from", "terms": "power"},
+                          "/ENTRY[entry*]/atom_probe/pulser/SOURCE[source*]/pulse_energy": {"fun": "load_from", "terms": "pulse_energy"},
+                          "/ENTRY[entry*]/atom_probe/pulser/SOURCE[source*]/wavelength": {"fun": "load_from", "terms": "wavelength"}}
diff --git a/pynxtools/dataconverter/readers/apm/reader.py b/pynxtools/dataconverter/readers/apm/reader.py
index 651100fd1..2e946257f 100644
--- a/pynxtools/dataconverter/readers/apm/reader.py
+++ b/pynxtools/dataconverter/readers/apm/reader.py
@@ -23,22 +23,25 @@
 
 from pynxtools.dataconverter.readers.base.reader import BaseReader
 
-from pynxtools.dataconverter.readers.apm.utils.apm_use_case_selector \
+from pynxtools.dataconverter.readers.apm.utils.apm_define_io_cases \
     import ApmUseCaseSelector
 
-from pynxtools.dataconverter.readers.apm.utils.apm_generic_eln_io \
+from pynxtools.dataconverter.readers.apm.utils.apm_load_deployment_specifics \
+    import NxApmNomadOasisConfigurationParser
+
+from pynxtools.dataconverter.readers.apm.utils.apm_load_generic_eln \
     import NxApmNomadOasisElnSchemaParser
 
-from pynxtools.dataconverter.readers.apm.utils.apm_reconstruction_io \
+from pynxtools.dataconverter.readers.apm.utils.apm_load_reconstruction \
     import ApmReconstructionParser
 
-from pynxtools.dataconverter.readers.apm.utils.apm_ranging_io \
+from pynxtools.dataconverter.readers.apm.utils.apm_load_ranging \
     import ApmRangingDefinitionsParser
 
-from pynxtools.dataconverter.readers.apm.utils.apm_nexus_plots \
+from pynxtools.dataconverter.readers.apm.utils.apm_create_nx_default_plots \
     import apm_default_plot_generator
 
-from pynxtools.dataconverter.readers.apm.utils.apm_example_data \
+from pynxtools.dataconverter.readers.apm.utils.apm_generate_synthetic_data \
     import ApmCreateExampleData
 
 # this apm parser combines multiple sub-parsers
@@ -103,6 +106,12 @@ def read(self,
                 print("No input file defined for eln data !")
                 return {}
 
+            print("Parse (meta)data coming from a configuration that specific OASIS...")
+            if len(case.cfg) == 1:
+                nx_apm_cfg = NxApmNomadOasisConfigurationParser(case.cfg[0], entry_id)
+                nx_apm_cfg.report(template)
+            # having and or using a deployment-specific configuration is optional
+
             print("Parse (numerical) data and metadata from ranging definitions file...")
             if len(case.reconstruction) == 1:
                 nx_apm_recon = ApmReconstructionParser(case.reconstruction[0], entry_id)
@@ -120,13 +129,10 @@ def read(self,
         print("Create NeXus default plottable data...")
         apm_default_plot_generator(template, n_entries)
 
-        debugging = False
-        if debugging is True:
-            print("Reporting state of template before passing to HDF5 writing...")
-            for keyword in template.keys():
-                print(keyword)
-                # print(type(template[keyword]))
-                # print(template[keyword])
+        # print("Reporting state of template before passing to HDF5 writing...")
+        # for keyword in template.keys():
+        #     print(keyword)
+        #     print(template[keyword])
 
         print("Forward instantiated template to the NXS writer...")
         return template
diff --git a/pynxtools/dataconverter/readers/apm/utils/apm_nexus_plots.py b/pynxtools/dataconverter/readers/apm/utils/apm_create_nx_default_plots.py
similarity index 100%
rename from pynxtools/dataconverter/readers/apm/utils/apm_nexus_plots.py
rename to pynxtools/dataconverter/readers/apm/utils/apm_create_nx_default_plots.py
diff --git a/pynxtools/dataconverter/readers/apm/utils/apm_use_case_selector.py b/pynxtools/dataconverter/readers/apm/utils/apm_define_io_cases.py
similarity index 65%
rename from pynxtools/dataconverter/readers/apm/utils/apm_use_case_selector.py
rename to pynxtools/dataconverter/readers/apm/utils/apm_define_io_cases.py
index 2819281ba..26a73a1e9 100644
--- a/pynxtools/dataconverter/readers/apm/utils/apm_use_case_selector.py
+++ b/pynxtools/dataconverter/readers/apm/utils/apm_define_io_cases.py
@@ -36,11 +36,21 @@ def __init__(self, file_paths: Tuple[str] = None):
         eln injects additional metadata and eventually numerical data.
         """
         self.case: Dict[str, list] = {}
+        self.eln: List[str] = []
+        self.cfg: List[str] = []
+        self.reconstruction: List[str] = []
+        self.ranging: List[str] = []
         self.is_valid = False
         self.supported_mime_types = [
             "pos", "epos", "apt", "rrng", "rng", "txt", "yaml", "yml"]
         for mime_type in self.supported_mime_types:
             self.case[mime_type] = []
+
+        self.sort_files_by_mime_type(file_paths)
+        self.check_validity_of_file_combinations()
+
+    def sort_files_by_mime_type(self, file_paths: Tuple[str] = None):
+        """Sort all input-files based on their mimetype to prepare validity check."""
         for file_name in file_paths:
             index = file_name.lower().rfind(".")
             if index >= 0:
@@ -48,15 +58,23 @@ def __init__(self, file_paths: Tuple[str] = None):
                 if suffix in self.supported_mime_types:
                     if file_name not in self.case[suffix]:
                         self.case[suffix].append(file_name)
-        recon_input = 0
-        range_input = 0
+
+    def check_validity_of_file_combinations(self):
+        """Check if this combination of types of files is supported."""
+        recon_input = 0  # reconstruction relevant file e.g. POS, ePOS, APT
+        range_input = 0  # ranging definition file, e.g. RNG, RRNG
+        other_input = 0  # generic ELN or OASIS-specific configurations
         for mime_type, value in self.case.items():
             if mime_type in ["pos", "epos", "apt"]:
                 recon_input += len(value)
-            if mime_type in ["rrng", "rng", "txt"]:
+            elif mime_type in ["rrng", "rng", "txt"]:
                 range_input += len(value)
-        eln_input = len(self.case["yaml"]) + len(self.case["yml"])
-        if (recon_input == 1) and (range_input == 1) and (eln_input == 1):
+            elif mime_type in ["yaml", "yml"]:
+                other_input += len(value)
+            else:
+                continue
+
+        if (recon_input == 1) and (range_input == 1) and (1 <= other_input <= 2):
             self.is_valid = True
             self.reconstruction: List[str] = []
             self.ranging: List[str] = []
@@ -64,6 +82,12 @@ def __init__(self, file_paths: Tuple[str] = None):
                 self.reconstruction += self.case[mime_type]
             for mime_type in ["rrng", "rng", "txt"]:
                 self.ranging += self.case[mime_type]
-            self.eln: List[str] = []
+            yml: List[str] = []
             for mime_type in ["yaml", "yml"]:
-                self.eln += self.case[mime_type]
+                yml += self.case[mime_type]
+            for entry in yml:
+                if entry.endswith(".oasis.specific.yaml") \
+                        or entry.endswith(".oasis.specific.yml"):
+                    self.cfg += [entry]
+                else:
+                    self.eln += [entry]
diff --git a/pynxtools/dataconverter/readers/apm/utils/apm_example_data.py b/pynxtools/dataconverter/readers/apm/utils/apm_generate_synthetic_data.py
similarity index 99%
rename from pynxtools/dataconverter/readers/apm/utils/apm_example_data.py
rename to pynxtools/dataconverter/readers/apm/utils/apm_generate_synthetic_data.py
index 47c63f8f3..c34d30f7b 100644
--- a/pynxtools/dataconverter/readers/apm/utils/apm_example_data.py
+++ b/pynxtools/dataconverter/readers/apm/utils/apm_generate_synthetic_data.py
@@ -45,7 +45,7 @@
 from pynxtools.dataconverter.readers.apm.utils.apm_versioning \
     import NX_APM_ADEF_NAME, NX_APM_ADEF_VERSION, NX_APM_EXEC_NAME, NX_APM_EXEC_VERSION
 
-from pynxtools.dataconverter.readers.apm.utils.apm_ranging_io \
+from pynxtools.dataconverter.readers.apm.utils.apm_load_ranging \
     import add_unknown_iontype
 
 
diff --git a/pynxtools/dataconverter/readers/apm/utils/apm_generic_eln_io.py b/pynxtools/dataconverter/readers/apm/utils/apm_generic_eln_io.py
deleted file mode 100644
index 41677a1eb..000000000
--- a/pynxtools/dataconverter/readers/apm/utils/apm_generic_eln_io.py
+++ /dev/null
@@ -1,409 +0,0 @@
-#
-# Copyright The NOMAD Authors.
-#
-# This file is part of NOMAD. See https://nomad-lab.eu for further info.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-"""Wrapping multiple parsers for vendor files with NOMAD OASIS/ELN/YAML metadata."""
-
-# pylint: disable=no-member
-
-import flatdict as fd
-
-import numpy as np
-
-import yaml
-
-from ase.data import chemical_symbols
-
-from pynxtools.dataconverter.readers.apm.utils.apm_versioning \
-    import NX_APM_ADEF_NAME, NX_APM_ADEF_VERSION, NX_APM_EXEC_NAME, NX_APM_EXEC_VERSION
-
-
-class NxApmNomadOasisElnSchemaParser:  # pylint: disable=too-few-public-methods
-    """Parse eln_data.yaml dump file content generated from a NOMAD OASIS YAML.
-
-    This parser implements a design where an instance of a specific NOMAD
-    custom schema ELN template is used to fill pieces of information which
-    are typically not contained in files from technology partners
-    (e.g. pos, epos, apt, rng, rrng, ...). Until now, this custom schema and
-    the NXapm application definition do not use a fully harmonized vocabulary.
-    Therefore, the here hardcoded implementation is needed which maps specifically
-    named pieces of information from the custom schema instance on named fields
-    in an instance of NXapm
-
-    The functionalities in this ELN YAML parser do not check if the
-    instantiated template yields an instance which is compliant NXapm.
-    Instead, this task is handled by the generic part of the dataconverter
-    during the verification of the template dictionary.
-    """
-
-    def __init__(self, file_name: str, entry_id: int):
-        print(f"Extracting data from ELN file: {file_name}")
-        if (file_name.rsplit('/', 1)[-1].startswith("eln_data")
-                or file_name.startswith("eln_data")) and entry_id > 0:
-            self.entry_id = entry_id
-            self.file_name = file_name
-            with open(self.file_name, "r", encoding="utf-8") as stream:
-                self.yml = fd.FlatDict(yaml.safe_load(stream), delimiter=":")
-        else:
-            self.entry_id = 1
-            self.file_name = ""
-            self.yml = {}
-
-    def parse_entry(self, template: dict) -> dict:
-        """Copy data in entry section."""
-        # print("Parsing entry...")
-        trg = f"/ENTRY[entry{self.entry_id}]/"
-        src = "entry"
-        if isinstance(self.yml[src], fd.FlatDict):
-            if (self.yml[f"{src}:attr_version"] == NX_APM_ADEF_VERSION) \
-                    and (self.yml[f"{src}:definition"] == NX_APM_ADEF_NAME):
-                template[f"{trg}@version"] = NX_APM_ADEF_VERSION
-                template[f"{trg}definition"] = NX_APM_ADEF_NAME
-                template[f"{trg}PROGRAM[program1]/program"] = NX_APM_EXEC_NAME
-                template[f"{trg}PROGRAM[program1]/program/@version"] = NX_APM_EXEC_VERSION
-            if ("program" in self.yml[src].keys()) \
-                    and ("program__attr_version" in self.yml[src].keys()):
-                template[f"{trg}PROGRAM[program2]/program"] \
-                    = self.yml[f"{src}:program"]
-                template[f"{trg}PROGRAM[program2]/program/@version"] \
-                    = self.yml[f"{src}:program__attr_version"]
-
-        required_field_names = ["experiment_identifier", "run_number",
-                                "operation_mode"]
-        for field_name in required_field_names:
-            if field_name in self.yml[src].keys():
-                template[f"{trg}{field_name}"] = self.yml[f"{src}:{field_name}"]
-
-        optional_field_names = ["start_time", "end_time",
-                                "experiment_description", "experiment_documentation"]
-        for field_name in optional_field_names:
-            if field_name in self.yml[src].keys():
-                template[f"{trg}{field_name}"] = self.yml[f"{src}:{field_name}"]
-
-        return template
-
-    def parse_user(self, template: dict) -> dict:
-        """Copy data in user section."""
-        # print("Parsing user...")
-        src = "user"
-        if "user" in self.yml.keys():
-            if len(self.yml[src]) >= 1:
-                user_id = 1
-                for user_list in self.yml[src]:
-                    trg = f"/ENTRY[entry{self.entry_id}]/USER[user{user_id}]/"
-
-                    required_field_names = ["name"]
-                    for field_name in required_field_names:
-                        if field_name in user_list.keys():
-                            template[f"{trg}{field_name}"] = user_list[field_name]
-
-                    optional_field_names = ["email", "affiliation", "address",
-                                            "orcid", "orcid_platform",
-                                            "telephone_number", "role",
-                                            "social_media_name", "social_media_platform"]
-                    for field_name in optional_field_names:
-                        if field_name in user_list.keys():
-                            template[f"{trg}{field_name}"] = user_list[field_name]
-                    user_id += 1
-
-        return template
-
-    def parse_specimen(self, template: dict) -> dict:
-        """Copy data in specimen section."""
-        # print("Parsing sample...")
-        src = "specimen"
-        trg = f"/ENTRY[entry{self.entry_id}]/specimen/"
-        if isinstance(self.yml[src], fd.FlatDict):
-            if (isinstance(self.yml[f"{src}:atom_types"], list)) \
-                    and (len(self.yml[src + ":atom_types"]) >= 1):
-                atom_types_are_valid = True
-                for symbol in self.yml[f"{src}:atom_types"]:
-                    valid = isinstance(symbol, str) \
-                        and (symbol in chemical_symbols) and (symbol != "X")
-                    if valid is False:
-                        atom_types_are_valid = False
-                        break
-                if atom_types_are_valid is True:
-                    template[f"{trg}atom_types"] \
-                        = ", ".join(list(self.yml[f"{src}:atom_types"]))
-
-        required_field_names = ["name", "sample_history", "preparation_date"]
-        for field_name in required_field_names:
-            if field_name in self.yml[src].keys():
-                template[f"{trg}{field_name}"] = self.yml[f"{src}:{field_name}"]
-
-        optional_field_names = ["short_title", "description"]
-        for field_name in optional_field_names:
-            if field_name in self.yml[src].keys():
-                template[f"{trg}{field_name}"] = self.yml[f"{src}:{field_name}"]
-
-        return template
-
-    def parse_instrument_header(self, template: dict) -> dict:
-        """Copy data in instrument_header section."""
-        # print("Parsing instrument header...")
-        src = "atom_probe"
-        trg = f"/ENTRY[entry{self.entry_id}]/atom_probe/"
-        if isinstance(self.yml[src], fd.FlatDict):
-            required_field_names = ["instrument_name", "status"]
-            for field_name in required_field_names:
-                if field_name in self.yml[src].keys():
-                    template[f"{trg}{field_name}"] = self.yml[f"{src}:{field_name}"]
-            optional_field_names = ["location"]
-            for field_name in optional_field_names:
-                if field_name in self.yml[src].keys():
-                    template[f"{trg}{field_name}"] = self.yml[f"{src}:{field_name}"]
-
-            float_field_names = ["flight_path_length", "field_of_view"]
-            for field_name in float_field_names:
-                if (f"{field_name}:value" in self.yml[src].keys()) \
-                        and (f"{field_name}:unit" in self.yml[src].keys()):
-                    template[f"{trg}{field_name}"] \
-                        = np.float64(self.yml[f"{src}:{field_name}:value"])
-                    template[f"{trg}{field_name}/@units"] \
-                        = self.yml[f"{src}:{field_name}:unit"]
-
-        return template
-
-    def parse_fabrication(self, template: dict) -> dict:
-        """Copy data in fabrication section."""
-        # print("Parsing fabrication...")
-        src = "atom_probe"
-        trg = f"/ENTRY[entry{self.entry_id}]/atom_probe/FABRICATION[fabrication]/"
-        required_field_names = ["fabrication_vendor", "fabrication_model"]
-        for field_name in required_field_names:
-            if field_name in self.yml[src].keys():
-                suffix = field_name.replace("fabrication_", "")
-                template[f"{trg}{suffix}"] = self.yml[f"{src}:{field_name}"]
-
-        optional_field_names = ["fabrication_identifier", "fabrication_capabilities"]
-        for field_name in optional_field_names:
-            if field_name in self.yml[src].keys():
-                suffix = field_name.replace("fabrication_", "")
-                template[f"{trg}{suffix}"] = self.yml[f"{src}:{field_name}"]
-
-        return template
-
-    def parse_analysis_chamber(self, template: dict) -> dict:
-        """Copy data in analysis_chamber section."""
-        # print("Parsing analysis chamber...")
-        src = "atom_probe"
-        trg = f"/ENTRY[entry{self.entry_id}]/atom_probe/analysis_chamber/"
-        float_field_names = ["analysis_chamber_pressure"]
-        for field_name in float_field_names:
-            if (f"{field_name}:value" in self.yml[src].keys()) \
-                    and (f"{field_name}:unit" in self.yml[src].keys()):
-                suffix = field_name.replace("analysis_chamber_", "")
-                template[f"{trg}{suffix}"] \
-                    = np.float64(self.yml[f"{src}:{field_name}:value"])
-                template[f"{trg}{suffix}/@units"] = self.yml[f"{src}:{field_name}:unit"]
-
-        return template
-
-    def parse_reflectron(self, template: dict) -> dict:
-        """Copy data in reflectron section."""
-        # print("Parsing reflectron...")
-        src = "atom_probe"
-        trg = f"/ENTRY[entry{self.entry_id}]/atom_probe/REFLECTRON[reflectron]/"
-        required_field_names = ["reflectron_applied"]
-        for field_name in required_field_names:
-            if field_name in self.yml[src].keys():
-                suffix = field_name.replace("reflectron_", "")
-                template[f"{trg}{suffix}"] = self.yml[f"{src}:{field_name}"]
-
-        return template
-
-    def parse_local_electrode(self, template: dict) -> dict:
-        """Copy data in local_electrode section."""
-        # print("Parsing local electrode...")
-        src = "atom_probe"
-        trg = f"/ENTRY[entry{self.entry_id}]/atom_probe/local_electrode/"
-        required_field_names = ["local_electrode_name"]
-        for field_name in required_field_names:
-            if field_name in self.yml[src].keys():
-                suffix = field_name.replace("local_electrode_", "")
-            template[f"{trg}{suffix}"] = self.yml[f"{src}:{field_name}"]
-
-        return template
-
-    def parse_detector(self, template: dict) -> dict:
-        """Copy data in ion_detector section."""
-        # print("Parsing detector...")
-        src = "atom_probe"
-        trg = f"/ENTRY[entry{self.entry_id}]/atom_probe/ion_detector/"
-        required_field_names = ["ion_detector_type", "ion_detector_name",
-                                "ion_detector_model", "ion_detector_serial_number"]
-        for field_name in required_field_names:
-            if field_name in self.yml[src].keys():
-                suffix = field_name.replace("ion_detector_", "")
-                template[f"{trg}{suffix}"] = self.yml[f"{src}:{field_name}"]
-
-        return template
-
-    def parse_stage_lab(self, template: dict) -> dict:
-        """Copy data in stage lab section."""
-        # print("Parsing stage_lab...")
-        src = "atom_probe"
-        trg = f"/ENTRY[entry{self.entry_id}]/atom_probe/stage_lab/"
-        if isinstance(self.yml[src], fd.FlatDict):
-            required_value_fields = ["stage_lab_base_temperature"]
-            for field_name in required_value_fields:
-                if (f"{field_name}:value" in self.yml[src].keys()) \
-                        and (f"{field_name}:unit" in self.yml[src].keys()):
-                    suffix = field_name.replace("stage_lab_", "")
-                    template[f"{trg}{suffix}"] \
-                        = np.float64(self.yml[f"{src}:{field_name}:value"])
-                    template[f"{trg}{suffix}/@units"] \
-                        = self.yml[f"{src}:{field_name}:unit"]
-
-        return template
-
-    def parse_specimen_monitoring(self, template: dict) -> dict:
-        """Copy data in specimen_monitoring section."""
-        # print("Parsing specimen_monitoring...")
-        src = "atom_probe"
-        trg = f"/ENTRY[entry{self.entry_id}]/atom_probe/specimen_monitoring/"
-        if isinstance(self.yml[src], fd.FlatDict):
-            required_field_names = ["specimen_monitoring_detection_rate"]
-            for field_name in required_field_names:
-                if field_name in self.yml[src].keys():
-                    template[f"{trg}detection_rate"] \
-                        = np.float64(self.yml[f"{src}:{field_name}"])
-                float_field_names = ["specimen_monitoring_initial_radius",
-                                     "specimen_monitoring_shank_angle"]
-                for float_field_name in float_field_names:
-                    if (f"{float_field_name}:value" in self.yml[src].keys()) \
-                            and (f"{float_field_name}:unit" in self.yml[src].keys()):
-                        suffix = float_field_name.replace("specimen_monitoring_", "")
-                        template[f"{trg}{suffix}"] \
-                            = np.float64(self.yml[f"{src}:{float_field_name}:value"])
-                        template[f"{trg}{suffix}/@units"] \
-                            = self.yml[f"{src}:{float_field_name}:unit"]
-
-        return template
-
-    def parse_control_software(self, template: dict) -> dict:
-        """Copy data in control software section."""
-        # print("Parsing control software...")
-        src = "atom_probe"
-        trg = f"/ENTRY[entry{self.entry_id}]/atom_probe/control_software/"
-        if isinstance(self.yml[src], fd.FlatDict):
-            prefix = "control_software"
-            if (f"{prefix}_program" in self.yml[src].keys()) \
-                    and (f"{prefix}_program__attr_version" in self.yml[src].keys()):
-                template[f"{trg}PROGRAM[program1]/program"] \
-                    = self.yml[f"{src}:{prefix}_program"]
-                template[f"{trg}PROGRAM[program1]/program/@version"] \
-                    = self.yml[f"{src}:{prefix}_program__attr_version"]
-
-        return template
-
-    def parse_pulser(self, template: dict) -> dict:
-        """Copy data in pulser section."""
-        # print("Parsing pulser...")
-        src = "atom_probe:pulser"
-        trg = f"/ENTRY[entry{self.entry_id}]/atom_probe/pulser/"
-        if isinstance(self.yml[src], fd.FlatDict):
-            if "pulse_mode" in self.yml[src].keys():
-                pulse_mode = self.yml[f"{src}:pulse_mode"]
-                template[f"{trg}pulse_mode"] = pulse_mode
-            else:  # can not parse selectively as pulse_mode was not documented
-                return template
-
-            if "pulse_fraction" in self.yml[src].keys():
-                template[f"{trg}pulse_fraction"] \
-                    = np.float64(self.yml[f"{src}:pulse_fraction"])
-
-            float_field_names = ["pulse_frequency"]
-            for field_name in float_field_names:
-                if (f"{field_name}:value" in self.yml[src].keys()) \
-                        and (f"{field_name}:unit" in self.yml[src].keys()):
-                    template[f"{trg}{field_name}"] \
-                        = np.float64(self.yml[f"{src}:{field_name}:value"])
-                    template[f"{trg}{field_name}/@units"] \
-                        = self.yml[f"{src}:{field_name}:unit"]
-            # additionally required data for laser and laser_and_voltage runs
-            if pulse_mode != "voltage":
-                trg = f"/ENTRY[entry{self.entry_id}]/atom_probe/" \
-                      f"pulser/SOURCE[laser_source1]/"
-                if "laser_source_name" in self.yml[src].keys():
-                    template[f"{trg}name"] = self.yml[f"{src}:laser_source_name"]
-
-                float_field_names = ["laser_source_wavelength",
-                                     "laser_source_power",
-                                     "laser_source_pulse_energy"]
-                for field_name in float_field_names:
-                    if (f"{field_name}:value" in self.yml[src].keys()) \
-                            and (f"{field_name}:unit" in self.yml[src].keys()):
-                        suffix = field_name.replace("laser_source_", "")
-                        template[f"{trg}{suffix}"] \
-                            = np.float64(self.yml[f"{src}:{field_name}:value"])
-                        template[f"{trg}{suffix}/@units"] \
-                            = self.yml[f"{src}:{field_name}:unit"]
-
-        return template
-
-    def parse_reconstruction(self, template: dict) -> dict:
-        """Copy data in reconstruction section."""
-        # print("Parsing reconstruction...")
-        src = "reconstruction"
-        trg = f"/ENTRY[entry{self.entry_id}]/atom_probe/reconstruction/"
-        if ("program" in self.yml[src].keys()) \
-                and ("program__attr_version" in self.yml[src].keys()):
-            template[f"{trg}PROGRAM[program1]/program"] \
-                = self.yml[f"{src}:program"]
-            template[f"{trg}PROGRAM[program1]/program/@version"] \
-                = self.yml[f"{src}:program__attr_version"]
-
-        required_field_names = ["protocol_name", "parameter",
-                                "crystallographic_calibration"]
-        for field_name in required_field_names:
-            if field_name in self.yml[src].keys():
-                template[f"{trg}{field_name}"] = self.yml[f"{src}:{field_name}"]
-
-        return template
-
-    def parse_ranging(self, template: dict) -> dict:
-        """Copy data in ranging section."""
-        # print("Parsing ranging...")
-        src = "ranging"
-        trg = f"/ENTRY[entry{self.entry_id}]/atom_probe/ranging/"
-        if ("program" in self.yml[src].keys()) \
-                and ("program__attr_version" in self.yml[src].keys()):
-            template[f"{trg}PROGRAM[program1]/program"] = self.yml[f"{src}:program"]
-            template[f"{trg}PROGRAM[program1]/program/@version"] \
-                = self.yml[f"{src}:program__attr_version"]
-
-        return template
-
-    def report(self, template: dict) -> dict:
-        """Copy data from self into template the appdef instance."""
-        self.parse_entry(template)
-        self.parse_user(template)
-        self.parse_specimen(template)
-        self.parse_instrument_header(template)
-        self.parse_fabrication(template)
-        self.parse_analysis_chamber(template)
-        self.parse_reflectron(template)
-        self.parse_local_electrode(template)
-        self.parse_detector(template)
-        self.parse_stage_lab(template)
-        self.parse_specimen_monitoring(template)
-        self.parse_control_software(template)
-        self.parse_pulser(template)
-        self.parse_reconstruction(template)
-        self.parse_ranging(template)
-        return template
diff --git a/pynxtools/dataconverter/readers/apm/utils/apm_load_deployment_specifics.py b/pynxtools/dataconverter/readers/apm/utils/apm_load_deployment_specifics.py
new file mode 100644
index 000000000..87dc05950
--- /dev/null
+++ b/pynxtools/dataconverter/readers/apm/utils/apm_load_deployment_specifics.py
@@ -0,0 +1,57 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Load deployment-specific quantities."""
+
+# pylint: disable=no-member
+
+import flatdict as fd
+
+import yaml
+
+from pynxtools.dataconverter.readers.apm.map_concepts.apm_deployment_specifics_to_nx_map \
+    import NxApmDeploymentSpecificInput
+
+from pynxtools.dataconverter.readers.shared.map_concepts.mapping_functors \
+    import apply_modifier, variadic_path_to_specific_path
+
+
+class NxApmNomadOasisConfigurationParser:  # pylint: disable=too-few-public-methods
+    """Parse deployment specific configuration."""
+
+    def __init__(self, file_name: str, entry_id: int):
+        print(f"Extracting data from deployment specific configuration file: {file_name}")
+        if (file_name.rsplit('/', 1)[-1].endswith(".oasis.specific.yaml")
+                or file_name.endswith(".oasis.specific.yml")) and entry_id > 0:
+            self.entry_id = entry_id
+            self.file_name = file_name
+            with open(self.file_name, "r", encoding="utf-8") as stream:
+                self.yml = fd.FlatDict(yaml.safe_load(stream), delimiter="/")
+        else:
+            self.entry_id = 1
+            self.file_name = ""
+            self.yml = {}
+
+    def report(self, template: dict) -> dict:
+        """Copy data from configuration applying mapping functors."""
+        for nx_path, modifier in NxApmDeploymentSpecificInput.items():
+            if nx_path not in ("IGNORE", "UNCLEAR"):
+                trg = variadic_path_to_specific_path(nx_path, [self.entry_id, 1])
+                res = apply_modifier(modifier, self.yml)
+                if res is not None:
+                    template[trg] = res
+        return template
diff --git a/pynxtools/dataconverter/readers/apm/utils/apm_load_generic_eln.py b/pynxtools/dataconverter/readers/apm/utils/apm_load_generic_eln.py
new file mode 100644
index 000000000..ed36eec23
--- /dev/null
+++ b/pynxtools/dataconverter/readers/apm/utils/apm_load_generic_eln.py
@@ -0,0 +1,175 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Wrapping multiple parsers for vendor files with NOMAD OASIS/ELN/YAML metadata."""
+
+# pylint: disable=no-member,duplicate-code,too-many-nested-blocks
+
+import flatdict as fd
+
+import yaml
+
+from ase.data import chemical_symbols
+
+from pynxtools.dataconverter.readers.apm.map_concepts.apm_eln_to_nx_map \
+    import NxApmElnInput, NxUserFromListOfDict
+
+from pynxtools.dataconverter.readers.shared.map_concepts.mapping_functors \
+    import variadic_path_to_specific_path, apply_modifier
+
+from pynxtools.dataconverter.readers.apm.utils.apm_parse_composition_table \
+    import parse_composition_table
+
+
+class NxApmNomadOasisElnSchemaParser:  # pylint: disable=too-few-public-methods
+    """Parse eln_data.yaml dump file content generated from a NOMAD OASIS YAML.
+
+    This parser implements a design where an instance of a specific NOMAD
+    custom schema ELN template is used to fill pieces of information which
+    are typically not contained in files from technology partners
+    (e.g. pos, epos, apt, rng, rrng, ...). Until now, this custom schema and
+    the NXapm application definition do not use a fully harmonized vocabulary.
+    Therefore, the here hardcoded implementation is needed which maps specifically
+    named pieces of information from the custom schema instance on named fields
+    in an instance of NXapm
+
+    The functionalities in this ELN YAML parser do not check if the
+    instantiated template yields an instance which is compliant NXapm.
+    Instead, this task is handled by the generic part of the dataconverter
+    during the verification of the template dictionary.
+    """
+
+    def __init__(self, file_name: str, entry_id: int):
+        print(f"Extracting data from ELN file: {file_name}")
+        if (file_name.rsplit('/', 1)[-1].startswith("eln_data")
+                or file_name.startswith("eln_data")) and entry_id > 0:
+            self.entry_id = entry_id
+            self.file_name = file_name
+            with open(self.file_name, "r", encoding="utf-8") as stream:
+                self.yml = fd.FlatDict(yaml.safe_load(stream), delimiter="/")
+        else:
+            self.entry_id = 1
+            self.file_name = ""
+            self.yml = {}
+
+    def parse_sample_composition(self, template: dict) -> dict:
+        """Interpret human-readable ELN input to generate consistent composition table."""
+        src = "sample/composition"
+        if src in self.yml.keys():
+            if isinstance(self.yml[src], list):
+                dct = parse_composition_table(self.yml[src])
+
+                prfx = f"/ENTRY[entry{self.entry_id}]/sample/" \
+                       f"CHEMICAL_COMPOSITION[chemical_composition]"
+                unit = "at.-%"  # the assumed default unit
+                if "normalization" in dct:
+                    if dct["normalization"] in ["%", "at%", "at-%", "at.-%", "ppm", "ppb"]:
+                        unit = "at.-%"
+                        template[f"{prfx}/normalization"] = "atom_percent"
+                    elif dct["normalization"] in ["wt%", "wt-%", "wt.-%"]:
+                        unit = "wt.-%"
+                        template[f"{prfx}/normalization"] = "weight_percent"
+                    else:
+                        return template
+                ion_id = 1
+                for symbol in chemical_symbols[1::]:
+                    # ase convention, chemical_symbols[0] == "X"
+                    # to use ordinal number for indexing
+                    if symbol in dct:
+                        if isinstance(dct[symbol], tuple) and len(dct[symbol]) == 2:
+                            trg = f"{prfx}/ION[ion{ion_id}]"
+                            template[f"{trg}/name"] = symbol
+                            template[f"{trg}/composition"] = dct[symbol][0]
+                            template[f"{trg}/composition/@units"] = unit
+                            if dct[symbol][1] is not None:
+                                template[f"{trg}/composition_error"] = dct[symbol][1]
+                                template[f"{trg}/composition_error/@units"] = unit
+                            ion_id += 1
+        return template
+
+    def parse_user_section(self, template: dict) -> dict:
+        """Copy data from user section into template."""
+        src = "user"
+        if src in self.yml.keys():
+            if isinstance(self.yml[src], list):
+                if all(isinstance(entry, dict) for entry in self.yml[src]) is True:
+                    user_id = 1
+                    # custom schema delivers a list of dictionaries...
+                    for user_dict in self.yml[src]:
+                        # ... for each of them inspect for fields mappable on NeXus
+                        identifier = [self.entry_id, user_id]
+                        # identifier to get instance NeXus path from variadic NeXus path
+                        # try to find all quantities on the left-hand side of the mapping
+                        # table and check if we can find these
+                        for nx_path, modifier in NxUserFromListOfDict.items():
+                            if nx_path not in ("IGNORE", "UNCLEAR"):
+                                trg = variadic_path_to_specific_path(nx_path, identifier)
+                                res = apply_modifier(modifier, user_dict)
+                                if res is not None:
+                                    template[trg] = res
+                        user_id += 1
+        return template
+
+    def parse_laser_pulser_details(self, template: dict) -> dict:
+        """Copy data in pulser section."""
+        # additional laser-specific details only relevant when the laser was used
+        src = "atom_probe/pulser/pulse_mode"
+        if src in self.yml.keys():
+            if self.yml[src] == "voltage":
+                return template
+        else:
+            return template
+        src = "atom_probe/pulser/laser_source"
+        if src in self.yml.keys():
+            if isinstance(self.yml[src], list):
+                if all(isinstance(entry, dict) for entry in self.yml[src]) is True:
+                    laser_id = 1
+                    # custom schema delivers a list of dictionaries...
+                    trg = f"/ENTRY[entry{self.entry_id}]/atom_probe/pulser" \
+                          f"/SOURCE[source{laser_id}]"
+                    for laser_dict in self.yml[src]:
+                        if "name" in laser_dict.keys():
+                            template[f"{trg}/name"] = laser_dict["name"]
+                        quantities = ["power", "pulse_energy", "wavelength"]
+                        for quant in quantities:
+                            if isinstance(laser_dict[quant], dict):
+                                if ("value" in laser_dict[quant].keys()) \
+                                        and ("unit" in laser_dict[quant].keys()):
+                                    template[f"{trg}/{quant}"] \
+                                        = laser_dict[quant]["value"]
+                                    template[f"{trg}/{quant}/@units"] \
+                                        = laser_dict[quant]["unit"]
+                        laser_id += 1
+        return template
+
+    def parse_other_sections(self, template: dict) -> dict:
+        """Copy data from custom schema into template."""
+        for nx_path, modifier in NxApmElnInput.items():
+            if nx_path not in ("IGNORE", "UNCLEAR"):
+                trg = variadic_path_to_specific_path(nx_path, [self.entry_id, 1])
+                res = apply_modifier(modifier, self.yml)
+                if res is not None:
+                    template[trg] = res
+        return template
+
+    def report(self, template: dict) -> dict:
+        """Copy data from self into template the appdef instance."""
+        self.parse_sample_composition(template)
+        self.parse_user_section(template)
+        self.parse_laser_pulser_details(template)
+        self.parse_other_sections(template)
+        return template
diff --git a/pynxtools/dataconverter/readers/apm/utils/apm_ranging_io.py b/pynxtools/dataconverter/readers/apm/utils/apm_load_ranging.py
similarity index 100%
rename from pynxtools/dataconverter/readers/apm/utils/apm_ranging_io.py
rename to pynxtools/dataconverter/readers/apm/utils/apm_load_ranging.py
diff --git a/pynxtools/dataconverter/readers/apm/utils/apm_reconstruction_io.py b/pynxtools/dataconverter/readers/apm/utils/apm_load_reconstruction.py
similarity index 100%
rename from pynxtools/dataconverter/readers/apm/utils/apm_reconstruction_io.py
rename to pynxtools/dataconverter/readers/apm/utils/apm_load_reconstruction.py
diff --git a/pynxtools/dataconverter/readers/apm/utils/apm_parse_composition_table.py b/pynxtools/dataconverter/readers/apm/utils/apm_parse_composition_table.py
new file mode 100644
index 000000000..cf8f2bc56
--- /dev/null
+++ b/pynxtools/dataconverter/readers/apm/utils/apm_parse_composition_table.py
@@ -0,0 +1,179 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Parse human-readable composition infos from set of ELN string text fields."""
+
+# pylint: disable=no-member,too-many-branches
+
+import re
+
+import numpy as np
+
+from ase.data import chemical_symbols
+
+
+def parse_human_readable_composition_case_one(symbol):
+    """Handle specification of matrix or remainder element."""
+    return ("define_matrix", symbol, None, None, None)
+
+
+def parse_human_readable_composition_case_two(args, symbol):
+    """Handle case element and at.-% composition, no comp. stdev."""
+    if args[1] in ["rem", "remainder", "matrix"]:
+        return ("define_matrix", symbol, None, None, None)
+    composition = re.match(r"[-+]?(?:\d*\.*\d+)", args[1])
+    if composition is not None:
+        fraction = np.float64(composition[0])
+        return ("add_element", symbol, fraction, None, "at.-%")
+    return (None, None, None, None, None)
+
+
+def parse_human_readable_composition_case_three(human_input, args, symbol):
+    """Handle case element with different than default normalization, no comp. stdev."""
+    composition = re.findall(r"[-+]?(?:\d*\.*\d+)", human_input)
+    if len(composition) == 1:
+        fraction = np.float64(composition[0])
+        normalization = args[2]
+        if normalization in ["%", "at%", "at-%", "at.-%"]:
+            return ("add_element", symbol, fraction, None, "at.-%")
+        if normalization in ["wt%", "wt-%", "wt.-%"]:
+            return ("add_element", symbol, fraction, None, "wt.-%")
+        if normalization == "ppm":
+            return ("add_element", symbol, fraction / 1.0e4, None, "at.-%")
+        if normalization == "ppb":
+            return ("add_element", symbol, fraction / 1.0e7, None, "at.-%")
+    return (None, None, None, None, None)
+
+
+def parse_human_readable_composition_case_four(human_input, symbol):
+    """Handle case at.-% normalization with comp. stdev."""
+    composition = re.findall(r"[-+]?(?:\d*\.*\d+)", human_input)
+    composition_error = human_input.count("+-")
+    if (len(composition) == 2) and (composition_error == 1):
+        fraction = np.float64(composition[0])
+        error = np.float64(composition[1])
+        return ("add_element", symbol, fraction, error, "at.-%")
+    return (None, None, None, None, None)
+
+
+def parse_human_readable_composition_case_five(human_input, args, symbol):
+    """Handle case with different than standard normalization and comp. stdev."""
+    composition = re.findall(r"[-+]?(?:\d*\.*\d+)", human_input)
+    if (len(composition) == 2) and (human_input.count("+-") == 1):
+        fraction = np.float64(composition[0])
+        error = np.float64(composition[1])
+        normalization = args[2]
+        if normalization in ["%", "at%", "at-%", "at.-%"]:
+            return ("add_element", symbol, fraction, error, "at.-%")
+        if normalization in ["wt%", "wt-%", "wt.-%"]:
+            return ("add_element", symbol, fraction, error, "wt.-%")
+        if normalization == "ppm":
+            return ("add_element", symbol, fraction / 1.0e4, error / 1.0e4, "at.-%")
+        if normalization == "ppb":
+            return ("add_element", symbol, fraction / 1.0e7, error / 1.0e7, "at.-%")
+    return (None, None, None, None, None)
+
+
+def parse_human_readable_composition_information(eln_input):
+    """Identify instruction to parse from eln_input to define composition table."""
+    args = eln_input.split(" ")
+    if len(args) >= 1:
+        element_symbol = args[0]
+        # composition value argument fraction is always expected in percent
+        # i.e. human should have written 98 instead 0.98!
+        if (element_symbol != "X") and (element_symbol in chemical_symbols):
+            # case: "Mo"
+            if len(args) == 1:
+                return parse_human_readable_composition_case_one(
+                    element_symbol)
+            # case: "Mo matrix" or "Mo 98.0", always assuming at.-%!
+            if len(args) == 2:
+                return parse_human_readable_composition_case_two(
+                    args, element_symbol)
+            # case: "Mo 98 wt.-%", selectable at.-%, ppm, ppb, or wt.-%!
+            if len(args) == 3:
+                return parse_human_readable_composition_case_three(
+                    eln_input, args, element_symbol)
+            # case: "Mo 98 +- 2", always assuming at.-%!
+            if len(args) == 4:
+                return parse_human_readable_composition_case_four(
+                    eln_input, element_symbol)
+            # case: "Mo 98 wt.-% +- 2", selectable at.-%, ppm, ppb, or wt.-%!
+            if len(args) == 5:
+                return parse_human_readable_composition_case_five(
+                    eln_input, args, element_symbol)
+    return (None, None, None, None, None)
+
+
+def parse_composition_table(composition_list):
+    """Check if all the entries in the composition list yield a valid composition table."""
+    composition_table = {}
+    # check that there are no contradictions or inconsistenc
+    for entry in composition_list:
+        instruction, element, composition, stdev, normalization \
+            = parse_human_readable_composition_information(entry)
+        # print(f"{instruction}, {element}, {composition}, {stdev}, {normalization}")
+
+        if instruction == "add_element":
+            if "normalization" not in composition_table:
+                if normalization is not None:
+                    composition_table["normalization"] = normalization
+            else:
+                # as the normalization model is already defined, all following statements
+                # need to comply because we assume we are not allowed to mix atom and weight
+                # percent normalization in a composition_table
+                if normalization is not None:
+                    if normalization != composition_table["normalization"]:
+                        raise ValueError("Composition list is contradicting as it \
+                                         mixes atom- with weight-percent normalization!")
+
+            if element not in composition_table:
+                composition_table[element] = (composition, stdev)
+            else:
+                raise ValueError("Composition list is incorrectly formatted as if has \
+                                 at least multiple lines for the same element!")
+            continue
+        if instruction == "define_matrix":
+            if element not in composition_table:
+                composition_table[element] = (None, None)
+                # because the fraction is unclear at this point
+            else:
+                raise ValueError("Composition list is contradicting as it includes \
+                                 at least two statements what the matrix should be!")
+
+    # determine remaining fraction
+    total_fractions = 0.
+    remainder_element = None
+    for keyword, tpl in composition_table.items():
+        if keyword != "normalization":
+            if (tpl is not None) and (tpl != (None, None)):
+                total_fractions += tpl[0]
+            else:
+                remainder_element = keyword
+    # print(f"Total fractions {total_fractions}, remainder element {remainder_element}")
+    if remainder_element is None:
+        raise ValueError("Composition list inconsistent because either fractions for \
+                         elements do not add up to 100. or no symbol for matrix defined!")
+
+    if composition_table:  # means != {}
+        composition_table[remainder_element] = (1.0e2 - total_fractions, None)
+        # error propagation model required
+
+    # document if reporting as percent or fractional values
+    composition_table["percent"] = True
+
+    return composition_table
diff --git a/pynxtools/dataconverter/readers/ellips/reader.py b/pynxtools/dataconverter/readers/ellips/reader.py
index 58a921c2e..bd7c8bf19 100644
--- a/pynxtools/dataconverter/readers/ellips/reader.py
+++ b/pynxtools/dataconverter/readers/ellips/reader.py
@@ -19,14 +19,15 @@
 import os
 from typing import Tuple, Any
 import math
+from importlib.metadata import version
 import yaml
 import pandas as pd
 import numpy as np
-# import h5py
 from pynxtools.dataconverter.readers.base.reader import BaseReader
 from pynxtools.dataconverter.readers.ellips.mock import MockEllips
 from pynxtools.dataconverter.helpers import extract_atom_types
 from pynxtools.dataconverter.readers.utils import flatten_and_replace, FlattenSettings
+from pynxtools import get_nexus_version, get_nexus_version_hash
 
 DEFAULT_HEADER = {'sep': '\t', 'skip': 0}
 
@@ -373,7 +374,7 @@ def write_scan_axis(name: str, values: list, units: str):
 
         header["Instrument/angle_of_incidence"] = unique_angles
         for axis in ["detection_angle", "incident_angle"]:
-            write_scan_axis(axis, unique_angles, "degrees")
+            write_scan_axis(axis, unique_angles, "degree")
 
         # Create mocked ellipsometry data template:
         if is_mock:
@@ -416,7 +417,15 @@ def read(self,
         template = populate_template_dict(header, template)
 
         spectrum_type = header["Data"]["spectrum_type"]
-        spectrum_unit = header["Data"]["spectrum_unit"]
+        if header["Data"]["spectrum_unit"] == "Angstroms":
+            spectrum_unit = "angstrom"
+        else:
+            spectrum_unit = header["Data"]["spectrum_unit"]
+        # MK:: Carola, Ron, Flo, Tamas, Sandor refactor the above-mentioned construct
+        # there has to be a unit parsing control logic already at the level of this reader
+        # because test-data.data has improper units like Angstroms or degrees
+        # the fix above prevents that these incorrect units are get just blindly carried
+        # over into the nxs file and thus causing nomas to fail
         template[f"/ENTRY[entry]/plot/AXISNAME[{spectrum_type}]"] = \
             {"link": f"/entry/data_collection/{spectrum_type}_spectrum"}
         template[f"/ENTRY[entry]/data_collection/NAME_spectrum[{spectrum_type}_spectrum]/@units"] \
@@ -432,16 +441,19 @@ def read(self,
                         "link": "/entry/data_collection/measured_data",
                         "shape": np.index_exp[index, dindx, :]
                 }
-                template[f"/ENTRY[entry]/plot/DATA[{key}]/@units"] = "degrees"
+                # MK:: Carola, Ron, Flo, Tamas, Sandor refactor the following line
+                # using a proper unit parsing logic
+                template[f"/ENTRY[entry]/plot/DATA[{key}]/@units"] = "degree"
                 if dindx == 0 and index == 0:
                     template[f"/ENTRY[entry]/plot/DATA[{key}]/@long_name"] = \
-                        f"{plot_name} (degrees)"
+                        f"{plot_name} (degree)"
                 template[f"/ENTRY[entry]/plot/DATA[{key}_errors]"] = \
                     {
                         "link": "/entry/data_collection/data_error",
                         "shape": np.index_exp[index, dindx, :]
                 }
-                template[f"/ENTRY[entry]/plot/DATA[{key}_errors]/@units"] = "degrees"
+                # MK:: Carola, Ron, Flo, Tamas, Sandor refactor the following line
+                template[f"/ENTRY[entry]/plot/DATA[{key}_errors]/@units"] = "degree"
 
         # Define default plot showing Psi and Delta at all angles:
         template["/@default"] = "entry"
@@ -455,6 +467,16 @@ def read(self,
         for index in range(1, len(data_list)):
             template["/ENTRY[entry]/plot/@auxiliary_signals"] += data_list[index]
 
+        template["/ENTRY[entry]/definition"] = "NXellipsometry"
+        template["/ENTRY[entry]/definition/@url"] = (
+            "https://github.com/FAIRmat-NFDI/nexus_definitions/"
+            f"blob/{get_nexus_version_hash()}/contributed_definitions/NXellipsometry.nxdl.xml"
+        )
+        template["/ENTRY[entry]/definition/@version"] = get_nexus_version()
+        template["/ENTRY[entry]/program_name"] = "pynxtools"
+        template["/ENTRY[entry]/program_name/@version"] = version("pynxtools")
+        template["/ENTRY[entry]/program_name/@url"] = "https://github.com/FAIRmat-NFDI/pynxtools"
+
         return template
 
 
diff --git a/pynxtools/dataconverter/readers/em_nion/concepts/README.md b/pynxtools/dataconverter/readers/em_nion/map_concepts/README.md
similarity index 100%
rename from pynxtools/dataconverter/readers/em_nion/concepts/README.md
rename to pynxtools/dataconverter/readers/em_nion/map_concepts/README.md
diff --git a/pynxtools/dataconverter/readers/em_nion/concepts/swift_display_items_to_nx_concepts.py b/pynxtools/dataconverter/readers/em_nion/map_concepts/swift_display_items_to_nx.py
similarity index 100%
rename from pynxtools/dataconverter/readers/em_nion/concepts/swift_display_items_to_nx_concepts.py
rename to pynxtools/dataconverter/readers/em_nion/map_concepts/swift_display_items_to_nx.py
diff --git a/pynxtools/dataconverter/readers/em_nion/concepts/generic_eln_mapping.py b/pynxtools/dataconverter/readers/em_nion/map_concepts/swift_eln_to_nx_map.py
similarity index 100%
rename from pynxtools/dataconverter/readers/em_nion/concepts/generic_eln_mapping.py
rename to pynxtools/dataconverter/readers/em_nion/map_concepts/swift_eln_to_nx_map.py
diff --git a/pynxtools/dataconverter/readers/em_nion/concepts/nx_image_ang_space.py b/pynxtools/dataconverter/readers/em_nion/map_concepts/swift_to_nx_image_ang_space.py
similarity index 100%
rename from pynxtools/dataconverter/readers/em_nion/concepts/nx_image_ang_space.py
rename to pynxtools/dataconverter/readers/em_nion/map_concepts/swift_to_nx_image_ang_space.py
diff --git a/pynxtools/dataconverter/readers/em_nion/concepts/nx_image_real_space.py b/pynxtools/dataconverter/readers/em_nion/map_concepts/swift_to_nx_image_real_space.py
similarity index 100%
rename from pynxtools/dataconverter/readers/em_nion/concepts/nx_image_real_space.py
rename to pynxtools/dataconverter/readers/em_nion/map_concepts/swift_to_nx_image_real_space.py
diff --git a/pynxtools/dataconverter/readers/em_nion/concepts/nx_spectrum_eels.py b/pynxtools/dataconverter/readers/em_nion/map_concepts/swift_to_nx_spectrum_eels.py
similarity index 100%
rename from pynxtools/dataconverter/readers/em_nion/concepts/nx_spectrum_eels.py
rename to pynxtools/dataconverter/readers/em_nion/map_concepts/swift_to_nx_spectrum_eels.py
diff --git a/pynxtools/dataconverter/readers/em_nion/reader.py b/pynxtools/dataconverter/readers/em_nion/reader.py
index ac785fda3..e226aca91 100644
--- a/pynxtools/dataconverter/readers/em_nion/reader.py
+++ b/pynxtools/dataconverter/readers/em_nion/reader.py
@@ -23,10 +23,10 @@
 
 from pynxtools.dataconverter.readers.base.reader import BaseReader
 
-from pynxtools.dataconverter.readers.em_nion.utils.use_case_selector \
+from pynxtools.dataconverter.readers.em_nion.utils.swift_define_io_cases \
     import EmNionUseCaseSelector
 
-from pynxtools.dataconverter.readers.em_nion.utils.em_generic_eln_io \
+from pynxtools.dataconverter.readers.em_nion.utils.swift_load_generic_eln \
     import NxEmNionElnSchemaParser
 
 from pynxtools.dataconverter.readers.em_nion.utils.swift_zipped_project_parser \
diff --git a/pynxtools/dataconverter/readers/em_nion/utils/versioning.py b/pynxtools/dataconverter/readers/em_nion/utils/em_nion_versioning.py
similarity index 100%
rename from pynxtools/dataconverter/readers/em_nion/utils/versioning.py
rename to pynxtools/dataconverter/readers/em_nion/utils/em_nion_versioning.py
diff --git a/pynxtools/dataconverter/readers/em_nion/utils/use_case_selector.py b/pynxtools/dataconverter/readers/em_nion/utils/swift_define_io_cases.py
similarity index 100%
rename from pynxtools/dataconverter/readers/em_nion/utils/use_case_selector.py
rename to pynxtools/dataconverter/readers/em_nion/utils/swift_define_io_cases.py
diff --git a/pynxtools/dataconverter/readers/em_nion/utils/swift_dimscale_axes.py b/pynxtools/dataconverter/readers/em_nion/utils/swift_generate_dimscale_axes.py
similarity index 96%
rename from pynxtools/dataconverter/readers/em_nion/utils/swift_dimscale_axes.py
rename to pynxtools/dataconverter/readers/em_nion/utils/swift_generate_dimscale_axes.py
index cdc15e895..fbd9cfcf2 100644
--- a/pynxtools/dataconverter/readers/em_nion/utils/swift_dimscale_axes.py
+++ b/pynxtools/dataconverter/readers/em_nion/utils/swift_generate_dimscale_axes.py
@@ -23,7 +23,7 @@
 
 import numpy as np
 
-from pynxtools.dataconverter.readers.em_nion.concepts.swift_display_items_to_nx_concepts \
+from pynxtools.dataconverter.readers.em_nion.map_concepts.swift_display_items_to_nx \
     import metadata_constraints, check_existence_of_required_fields  # nexus_concept_dict
 
 
diff --git a/pynxtools/dataconverter/readers/em_nion/utils/em_generic_eln_io.py b/pynxtools/dataconverter/readers/em_nion/utils/swift_load_generic_eln.py
similarity index 95%
rename from pynxtools/dataconverter/readers/em_nion/utils/em_generic_eln_io.py
rename to pynxtools/dataconverter/readers/em_nion/utils/swift_load_generic_eln.py
index 8be648477..4028e4986 100644
--- a/pynxtools/dataconverter/readers/em_nion/utils/em_generic_eln_io.py
+++ b/pynxtools/dataconverter/readers/em_nion/utils/swift_load_generic_eln.py
@@ -27,16 +27,16 @@
 
 from ase.data import chemical_symbols
 
-from pynxtools.dataconverter.readers.em_nion.utils.versioning \
+from pynxtools.dataconverter.readers.em_nion.utils.em_nion_versioning \
     import NX_EM_NION_ADEF_NAME, NX_EM_NION_ADEF_VERSION
 
-from pynxtools.dataconverter.readers.em_nion.utils.versioning \
+from pynxtools.dataconverter.readers.em_nion.utils.em_nion_versioning \
     import NX_EM_NION_EXEC_NAME, NX_EM_NION_EXEC_VERSION
 
-from pynxtools.dataconverter.readers.em_nion.concepts.swift_handle_nx_concepts \
+from pynxtools.dataconverter.readers.shared.map_concepts.mapping_functors \
     import apply_modifier, variadic_path_to_specific_path
 
-from pynxtools.dataconverter.readers.em_nion.concepts.generic_eln_mapping \
+from pynxtools.dataconverter.readers.em_nion.map_concepts.swift_eln_to_nx_map \
     import NxEmElnInput, NxUserFromListOfDict, NxDetectorListOfDict, NxSample
 
 
diff --git a/pynxtools/dataconverter/readers/em_nion/utils/swift_zipped_project_parser.py b/pynxtools/dataconverter/readers/em_nion/utils/swift_zipped_project_parser.py
index f72f7d48c..17f74ba61 100644
--- a/pynxtools/dataconverter/readers/em_nion/utils/swift_zipped_project_parser.py
+++ b/pynxtools/dataconverter/readers/em_nion/utils/swift_zipped_project_parser.py
@@ -38,21 +38,21 @@
 from pynxtools.dataconverter.readers.em_nion.utils.swift_uuid_to_file_name \
     import uuid_to_file_name
 
-from pynxtools.dataconverter.readers.em_nion.utils.swift_dimscale_axes \
+from pynxtools.dataconverter.readers.em_nion.utils.swift_generate_dimscale_axes \
     import get_list_of_dimension_scale_axes
 
-from pynxtools.dataconverter.readers.em_nion.concepts.swift_display_items_to_nx_concepts \
+from pynxtools.dataconverter.readers.em_nion.map_concepts.swift_display_items_to_nx \
     import nexus_concept_dict, identify_nexus_concept_key
 
-from pynxtools.dataconverter.readers.em_nion.concepts.swift_handle_nx_concepts \
+from pynxtools.dataconverter.readers.shared.map_concepts.mapping_functors \
     import apply_modifier, variadic_path_to_specific_path
 
-from pynxtools.dataconverter.readers.em_nion.concepts.nx_image_real_space \
+from pynxtools.dataconverter.readers.em_nion.map_concepts.swift_to_nx_image_real_space \
     import NxImageRealSpaceDict
 
-from pynxtools.dataconverter.readers.em_nion.utils.versioning \
+from pynxtools.dataconverter.readers.em_nion.utils.em_nion_versioning \
     import NX_EM_NION_SWIFT_NAME, NX_EM_NION_SWIFT_VERSION
-from pynxtools.dataconverter.readers.em_nion.utils.versioning \
+from pynxtools.dataconverter.readers.em_nion.utils.em_nion_versioning \
     import NX_EM_NION_EXEC_NAME, NX_EM_NION_EXEC_VERSION
 
 
diff --git a/pynxtools/dataconverter/readers/em_om/utils/image_transform.py b/pynxtools/dataconverter/readers/em_om/utils/image_transform.py
index 34f98266f..7369ebef8 100644
--- a/pynxtools/dataconverter/readers/em_om/utils/image_transform.py
+++ b/pynxtools/dataconverter/readers/em_om/utils/image_transform.py
@@ -23,7 +23,6 @@
 # f" how-do-i-make-pil-take-into-account-the-shortest-side-when-creating-a-thumbnail"
 
 import numpy as np
-from PIL import Image as pil
 
 
 def thumbnail(img, size=300):
@@ -39,16 +38,14 @@ def thumbnail(img, size=300):
         return img
 
     if old_width == old_height:
-        img.thumbnail((size, size), pil.ANTIALIAS)
-
+        img.thumbnail((size, size))
     elif old_height > old_width:
         ratio = float(old_width) / float(old_height)
         new_width = ratio * size
-        img = img.resize((int(np.floor(new_width)), size), pil.ANTIALIAS)
-
+        img = img.resize((int(np.floor(new_width)), size))
     elif old_width > old_height:
         ratio = float(old_height) / float(old_width)
         new_height = ratio * size
-        img = img.resize((size, int(np.floor(new_height))), pil.ANTIALIAS)
+        img = img.resize((size, int(np.floor(new_height))))
 
     return img
diff --git a/pynxtools/dataconverter/readers/example/reader.py b/pynxtools/dataconverter/readers/example/reader.py
index 81b31b6de..83e7438b0 100644
--- a/pynxtools/dataconverter/readers/example/reader.py
+++ b/pynxtools/dataconverter/readers/example/reader.py
@@ -52,7 +52,8 @@ def read(self,
         for k in template.keys():
             # The entries in the template dict should correspond with what the dataconverter
             # outputs with --generate-template for a provided NXDL file
-            if k.startswith("/ENTRY[entry]/required_group"):
+            if k.startswith("/ENTRY[entry]/required_group") \
+               or k == "/ENTRY[entry]/optional_parent/req_group_in_opt_group":
                 continue
 
             field_name = k[k.rfind("/") + 1:]
@@ -61,6 +62,10 @@ def read(self,
                 if f"{field_name}_units" in data.keys() and f"{k}/@units" in template.keys():
                     template[f"{k}/@units"] = data[f"{field_name}_units"]
 
+        template["required"]["/ENTRY[entry]/optional_parent/required_child"] = 1
+        template["optional"][("/ENTRY[entry]/optional_parent/"
+                              "req_group_in_opt_group/DATA[data]")] = [0, 1]
+
         # Add non template key
         template["/ENTRY[entry]/does/not/exist"] = "None"
         template["/ENTRY[entry]/required_group/description"] = "A test description"
diff --git a/pynxtools/dataconverter/readers/json_map/README.md b/pynxtools/dataconverter/readers/json_map/README.md
index 4b4820c49..b81aec969 100644
--- a/pynxtools/dataconverter/readers/json_map/README.md
+++ b/pynxtools/dataconverter/readers/json_map/README.md
@@ -1,24 +1,63 @@
 # JSON Map Reader
 
-This reader allows you to convert either data from a .json file or an xarray exported as a .pickle using a flat .mapping.json file.
+## What is this reader?
+
+This reader is designed to allow users of pynxtools to convert their existing data with the help of a map file. The map file tells the reader what to pick from your data files and convert them to FAIR NeXus files. The following formats are supported as input files:
+* HDF5 (any extension works i.e. h5, hdf5, nxs, etc)
+* JSON
+* Python Dict Objects Pickled with [pickle](https://docs.python.org/3/library/pickle.html). These can contain [xarray.DataArray](https://docs.xarray.dev/en/stable/generated/xarray.DataArray.html) objects as well as regular Python types and Numpy types.
 
 It accepts any NXDL file that you like as long as your mapping file contains all the fields.
 Please use the --generate-template function of the dataconverter to create a .mapping.json file.
 
 ```console
-user@box:~$ python convert.py --nxdl NXmynxdl --generate-template > mynxdl.mapping.json
+user@box:~$ dataconverter --nxdl NXmynxdl --generate-template > mynxdl.mapping.json
 ```
 
 There are some example files you can use:
 
+[data.mapping.json](/tests/data/dataconverter/readers/json_map/data.mapping.json)
 
-[data.mapping.json](/tests/data/tools/dataconverter/readers/json_map/data.mapping.json)
-
-[data.json](/tests/data/tools/dataconverter/readers/json_map/data.json)
+[data.json](/tests/data/dataconverter/readers/json_map/data.json)
 
 ```console
-user@box:~$ python convert.py --nxdl NXtest --input-file data.json --input-file data.mapping.json --reader json_map
+user@box:~$ dataconverter --nxdl NXtest --input-file data.json --mapping data.mapping.json
+```
+
+##### [Example](/examples/json_map/) with HDF5 files.
+
+## The mapping.json file
+
+This file is designed to let you fill in the requirements of a NeXus Application Definition without writing any code. If you already have data in the formats listed above, you just need to use this mapping file to help the dataconverter pick your data correctly.
+
+The mapping files will always be based on the Template the dataconverter generates. See above on how to generate a mapping file.
+The right hand side values of the Template keys are what you can modify.
+
+Here are the three different ways you can fill the right hand side of the Template keys:
+* Write the nested path in your datafile. This is indicated by a leading `/` before the word `entry` to make `/entry/data/current_295C` below. 
+Example:
+
+```json
+  "/ENTRY[entry]/DATA[data]/current_295C": "/entry/data/current_295C",
+  "/ENTRY[entry]/NXODD_name/posint_value": "/a_level_down/another_level_down/posint_value",
+```
+
+* Write the values directly in the mapping file for missing data from your data file. 
+
+```json
+
+  "/ENTRY[entry]/PROCESS[process]/program": "Bluesky",
+  "/ENTRY[entry]/PROCESS[process]/program/@version": "1.6.7"
+```
+
+* Write JSON objects with a link key. This follows the same link mechanism that the dataconverter implements. In the context of this reader, you can only use external links to your data files. In the example below, `current.nxs` is an already existing HDF5 file that we link to in our new NeXus file without copying over the data. The format is as follows: 
+`"link": "<filename>:<path_in_file>"`
+Note: This only works for HDF5 files currently.
+
+```json
+  "/ENTRY[entry]/DATA[data]/current_295C": {"link": "current.nxs:/entry/data/current_295C"},
+  "/ENTRY[entry]/DATA[data]/current_300C": {"link": "current.nxs:/entry/data/current_300C"},
 ```
 
 ## Contact person in FAIRmat for this reader
-Sherjeel Shabih
\ No newline at end of file
+Sherjeel Shabih
diff --git a/pynxtools/dataconverter/readers/json_map/reader.py b/pynxtools/dataconverter/readers/json_map/reader.py
index 25123dc94..d17bb075b 100644
--- a/pynxtools/dataconverter/readers/json_map/reader.py
+++ b/pynxtools/dataconverter/readers/json_map/reader.py
@@ -21,10 +21,10 @@
 import pickle
 import numpy as np
 import xarray
+from mergedeep import merge
 
 from pynxtools.dataconverter.readers.base.reader import BaseReader
 from pynxtools.dataconverter.template import Template
-from pynxtools.dataconverter.helpers import ensure_all_required_fields_exist
 from pynxtools.dataconverter import hdfdict
 
 
@@ -58,9 +58,26 @@ def get_val_nested_keystring_from_dict(keystring, data):
     return data[current_key]
 
 
+def get_attrib_nested_keystring_from_dict(keystring, data):
+    """
+    Fetches all attributes from the data dict using path strings without a leading '/':
+        'path/to/data/in/dict'
+    """
+    if isinstance(keystring, (list, dict)):
+        return keystring
+
+    key_splits = keystring.split("/")
+    parents = key_splits[:-1]
+    target = key_splits[-1]
+    for key in parents:
+        data = data[key]
+
+    return data[target + "@"] if target + "@" in data.keys() else None
+
+
 def is_path(keystring):
     """Checks whether a given value in the mapping is a mapping path or just data"""
-    return isinstance(keystring, str) and keystring[0] == "/"
+    return isinstance(keystring, str) and len(keystring) > 0 and keystring[0] == "/"
 
 
 def fill_undocumented(mapping, template, data):
@@ -69,6 +86,7 @@ def fill_undocumented(mapping, template, data):
         if is_path(value):
             template["undocumented"][path] = get_val_nested_keystring_from_dict(value[1:],
                                                                                 data)
+            fill_attributes(path, value[1:], data, template)
         else:
             template["undocumented"][path] = value
 
@@ -82,6 +100,7 @@ def fill_documented(template, mapping, template_provided, data):
                 if is_path(map_str):
                     template[path] = get_val_nested_keystring_from_dict(map_str[1:],
                                                                         data)
+                    fill_attributes(path, map_str[1:], data, template)
                 else:
                     template[path] = map_str
 
@@ -90,6 +109,14 @@ def fill_documented(template, mapping, template_provided, data):
                 pass
 
 
+def fill_attributes(path, map_str, data, template):
+    """Fills in the template all attributes found in the data object"""
+    attribs = get_attrib_nested_keystring_from_dict(map_str, data)
+    if attribs:
+        for key, value in attribs.items():
+            template[path + "/@" + key] = value
+
+
 def convert_shapes_to_slice_objects(mapping):
     """Converts shape slice strings to slice objects for indexing"""
     for key in mapping:
@@ -98,6 +125,25 @@ def convert_shapes_to_slice_objects(mapping):
                 mapping[key]["shape"] = parse_slice(mapping[key]["shape"])
 
 
+def get_map_from_partials(partials, template, data):
+    """Takes a list of partials and returns a mapping dictionary to fill partials in our template"""
+    mapping: dict = {}
+    for partial in partials:
+        path = ""
+        template_path = ""
+        for part in partial.split("/")[1:]:
+            path = path + "/" + part
+            attribs = get_attrib_nested_keystring_from_dict(path[1:], data)
+            if template_path + "/" + part in template.keys():
+                template_path = template_path + "/" + part
+            else:
+                nx_name = f"{attribs['NX_class'][2:].upper()}[{part}]" if attribs and "NX_class" in attribs else part  # pylint: disable=line-too-long
+                template_path = template_path + "/" + nx_name
+        mapping[template_path] = path
+
+    return mapping
+
+
 class JsonMapReader(BaseReader):
     """A reader that takes a mapping json file and a data file/object to return a template."""
 
@@ -119,10 +165,10 @@ def read(self,
         The mapping is only accepted as file.mapping.json to the inputs.
         """
         data: dict = {}
-        mapping: dict = {}
+        mapping: dict = None
+        partials: list = []
 
-        if objects:
-            data = objects[0]
+        data = objects[0] if objects else data
 
         for file_path in file_paths:
             file_extension = file_path[file_path.rindex("."):]
@@ -143,23 +189,26 @@ def read(self,
                 if is_hdf5:
                     hdf = hdfdict.load(file_path)
                     hdf.unlazy()
-                    data = dict(hdf)
+                    merge(data, dict(hdf))
+                    if "entry@" in data and "partial" in data["entry@"]:
+                        partials.extend(data["entry@"]["partial"])
 
         if mapping is None:
-            template = Template({x: "/hierarchical/path/in/your/datafile" for x in template})
-            raise IOError("Please supply a JSON mapping file: --input-file"
-                          " my_nxdl_map.mapping.json\n\n You can use this "
-                          "template for the required fields: \n" + str(template))
+            if len(partials) > 0:
+                mapping = get_map_from_partials(partials, template, data)
+            else:
+                template = Template({x: "/hierarchical/path/in/your/datafile" for x in template})
+                raise IOError("Please supply a JSON mapping file: --input-file"
+                              " my_nxdl_map.mapping.json\n\n You can use this "
+                              "template for the required fields: \n" + str(template))
 
+        new_template = Template()
         convert_shapes_to_slice_objects(mapping)
 
-        new_template = Template()
         fill_documented(new_template, mapping, template, data)
 
         fill_undocumented(mapping, new_template, data)
 
-        ensure_all_required_fields_exist(template, new_template)
-
         return new_template
 
 
diff --git a/pynxtools/dataconverter/readers/mpes/reader.py b/pynxtools/dataconverter/readers/mpes/reader.py
index fce988f76..7d860765c 100644
--- a/pynxtools/dataconverter/readers/mpes/reader.py
+++ b/pynxtools/dataconverter/readers/mpes/reader.py
@@ -198,20 +198,6 @@ def handle_h5_and_json_file(file_paths, objects):
                 f"but {file_path} does not match.",
             )
 
-        if not os.path.exists(file_path):
-            file_path = os.path.join(
-                os.path.dirname(__file__),
-                "..",
-                "..",
-                "..",
-                "..",
-                "tests",
-                "data",
-                "dataconverter",
-                "readers",
-                "mpes",
-                file_path,
-            )
         if not os.path.exists(file_path):
             raise FileNotFoundError(
                 errno.ENOENT,
@@ -252,11 +238,30 @@ def _getattr(obj, attr):
 
     if "index" in attr:
         axis = attr.split(".")[0]
-        return str(obj.dims.index(f"{axis}"))
+        return obj.dims.index(f"{axis}")
 
     return reduce(_getattr, [obj] + attr.split("."))
 
 
+def fill_data_indices_in_config(config_file_dict, x_array_loaded):
+    """Add data indices key value pairs to the config_file
+    dictionary from the xarray dimensions if not already
+    present.
+    """
+    for key in list(config_file_dict):
+        if "*" in key:
+            value = config_file_dict[key]
+            for dim in x_array_loaded.dims:
+                new_key = key.replace("*", dim)
+                new_value = value.replace("*", dim)
+
+                if new_key not in config_file_dict.keys() \
+                        and new_value not in config_file_dict.values():
+                    config_file_dict[new_key] = new_value
+
+            config_file_dict.pop(key)
+
+
 class MPESReader(BaseReader):
     """MPES-specific reader class"""
 
@@ -265,7 +270,7 @@ class MPESReader(BaseReader):
     # Whitelist for the NXDLs that the reader supports and can process
     supported_nxdls = ["NXmpes"]
 
-    def read(
+    def read(  # pylint: disable=too-many-branches
             self,
             template: dict = None,
             file_paths: Tuple[str] = None,
@@ -283,6 +288,8 @@ def read(
             eln_data_dict,
         ) = handle_h5_and_json_file(file_paths, objects)
 
+        fill_data_indices_in_config(config_file_dict, x_array_loaded)
+
         for key, value in config_file_dict.items():
 
             if isinstance(value, str) and ":" in value:
diff --git a/pynxtools/dataconverter/readers/rii_database/reader.py b/pynxtools/dataconverter/readers/rii_database/reader.py
index ae36b3884..32fb7c5fa 100644
--- a/pynxtools/dataconverter/readers/rii_database/reader.py
+++ b/pynxtools/dataconverter/readers/rii_database/reader.py
@@ -17,13 +17,12 @@
 #
 """Convert refractiveindex.info yaml files to nexus"""
 from typing import Tuple, Any, Dict
-import logging
 
 from pynxtools.dataconverter.readers.json_yml.reader import YamlJsonReader
 from pynxtools.dataconverter.readers.rii_database.dispersion_reader import (
     DispersionReader,
 )
-from pynxtools.dataconverter.readers.utils import parse_json
+from pynxtools.dataconverter.readers.utils import parse_json, handle_objects
 
 
 class RiiReader(YamlJsonReader):
@@ -40,7 +39,7 @@ def __init__(self, *args, **kwargs):
             ".yaml": self.read_dispersion,
             ".json": self.parse_json_w_fileinfo,
             "default": lambda _: self.appdef_defaults(),
-            "objects": self.handle_objects,
+            "objects": self.handle_rii_objects,
         }
 
     def read_dispersion(self, filename: str):
@@ -86,20 +85,9 @@ def parse_json_w_fileinfo(self, filename: str) -> Dict[str, Any]:
 
         return template
 
-    def handle_objects(self, objects: Tuple[Any]) -> Dict[str, Any]:
+    def handle_rii_objects(self, objects: Tuple[Any]) -> Dict[str, Any]:
         """Handle objects and generate template entries from them"""
-        if objects is None:
-            return {}
-
-        template = {}
-
-        for obj in objects:
-            if not isinstance(obj, dict):
-                logging.warning("Ignoring unknown object of type %s", type(obj))
-                continue
-
-            template.update(obj)
-
+        template = handle_objects(objects)
         self.fill_dispersion_in(template)
 
         return template
diff --git a/pynxtools/dataconverter/readers/em_nion/concepts/swift_handle_nx_concepts.py b/pynxtools/dataconverter/readers/shared/map_concepts/mapping_functors.py
similarity index 100%
rename from pynxtools/dataconverter/readers/em_nion/concepts/swift_handle_nx_concepts.py
rename to pynxtools/dataconverter/readers/shared/map_concepts/mapping_functors.py
diff --git a/pynxtools/dataconverter/readers/shared/shared_utils.py b/pynxtools/dataconverter/readers/shared/shared_utils.py
index 59d28ba6d..629e29a0f 100644
--- a/pynxtools/dataconverter/readers/shared/shared_utils.py
+++ b/pynxtools/dataconverter/readers/shared/shared_utils.py
@@ -22,15 +22,17 @@
 
 # pylint: disable=E1101, R0801
 
-import git
+# import git
 
 
 def get_repo_last_commit() -> str:
     """Identify the last commit to the repository."""
-    repo = git.Repo(search_parent_directories=True)
-    sha = str(repo.head.object.hexsha)
-    if sha != "":
-        return sha
+    # repo = git.Repo(search_parent_directories=True)
+    # sha = str(repo.head.object.hexsha)
+    # if sha != "":
+    #    return sha
+    # currently update-north-markus branch on nomad-FAIR does not pick up
+    # git even though git in the base image and gitpython in pynxtools deps
     return "unknown git commit id or unable to parse git reverse head"
 
 
diff --git a/pynxtools/dataconverter/readers/transmission/reader.py b/pynxtools/dataconverter/readers/transmission/reader.py
index 3d4f0e152..ccc94374e 100644
--- a/pynxtools/dataconverter/readers/transmission/reader.py
+++ b/pynxtools/dataconverter/readers/transmission/reader.py
@@ -22,7 +22,7 @@
 
 from pynxtools.dataconverter.readers.json_yml.reader import YamlJsonReader
 import pynxtools.dataconverter.readers.transmission.metadata_parsers as mpars
-from pynxtools.dataconverter.readers.utils import parse_json, parse_yml
+from pynxtools.dataconverter.readers.utils import parse_json, parse_yml, handle_objects
 
 
 # Dictionary mapping metadata in the asc file to the paths in the NeXus file.
@@ -254,6 +254,7 @@ class TransmissionReader(YamlJsonReader):
         ".yml": lambda fname: parse_yml(fname, CONVERT_DICT, REPLACE_NESTED),
         ".yaml": lambda fname: parse_yml(fname, CONVERT_DICT, REPLACE_NESTED),
         "default": lambda _: add_def_info(),
+        "objects": handle_objects,
     }
 
 
diff --git a/pynxtools/dataconverter/readers/utils.py b/pynxtools/dataconverter/readers/utils.py
index 23fbfbdd9..c1826d744 100644
--- a/pynxtools/dataconverter/readers/utils.py
+++ b/pynxtools/dataconverter/readers/utils.py
@@ -16,12 +16,15 @@
 # limitations under the License.
 #
 """Utility functions for the NeXus reader classes."""
+import logging
 from dataclasses import dataclass, replace
-from typing import List, Any, Dict, Optional
+from typing import List, Any, Dict, Optional, Tuple
 from collections.abc import Mapping
 import json
 import yaml
 
+logger = logging.getLogger(__name__)
+
 
 @dataclass
 class FlattenSettings():
@@ -201,3 +204,20 @@ def parse_json(file_path: str) -> Dict[str, Any]:
     """
     with open(file_path, "r", encoding="utf-8") as file:
         return json.load(file)
+
+
+def handle_objects(objects: Tuple[Any]) -> Dict[str, Any]:
+    """Handle objects and generate template entries from them"""
+    if objects is None:
+        return {}
+
+    template = {}
+
+    for obj in objects:
+        if not isinstance(obj, dict):
+            logger.warning("Ignoring unknown object of type %s", type(obj))
+            continue
+
+        template.update(obj)
+
+    return template
diff --git a/pynxtools/dataconverter/readers/xrd/README.md b/pynxtools/dataconverter/readers/xrd/README.md
new file mode 100644
index 000000000..53c64dfc7
--- /dev/null
+++ b/pynxtools/dataconverter/readers/xrd/README.md
@@ -0,0 +1,40 @@
+# XRD Reader
+With the XRD reader, data from X-ray diffraction experiment can be read and written into a NeXus file (h5 type file with extension .nxs) according to NXxrd_pan application definition in [NeXus](https://github.com/FAIRmat-NFDI/nexus_definitions). There are a few different methods of measuring XRD: 1. θ:2θ instruments (e.g. Rigaku H3R), and 2. θ:θ instrument (e.g. PANalytical X’Pert Pro). The goal with this reader is to support both of these methods.
+
+**NOTE: This reader is still under development. As of now, the reader can only handle files with the extension `.xrdml` , obtained with PANalytical X’Pert Pro version 1.5 (method 2 described above). Currently we are wtoking to include more file types and file versions.**
+
+## Contact Person in FAIRmat
+In principle, you can reach out to any member of Area B of the FAIRmat consortium, but Rubel Mozumder could be more reasonable for the early response.
+
+## Parsers
+Though, in computer science, parser is a process that reads code into smaller parts (called tocken) with relations among tockens in a tree diagram. The process helps compiler to understand the tocken relationship of the source code.
+
+The XRD reader calls a program or class (called parser) that reads the experimenal input file and re-organises the different physical/experiment concepts or properties in a certain structure which is defined by developer.
+
+### class pynxtools.dataconverter.readers.xrd.xrd_parser.XRDMLParser
+
+    **inputs:**
+        file_path: Full path of the input file.
+
+    **Important method:**
+        get_slash_separated_xrd_dict() -> dict
+
+        This method can be used to check if all the data from the input file have been read or not, it returns the slash separated dict as described.
+
+
+### Other Parsers
+    **Coming Soon!!**
+
+### How To
+The reader can be run from Jupyter-notebook or Jupyter-lab with the following command:
+
+```sh
+ ! dataconverter \
+--reader xrd \
+--nxdl NXxrd_pan \
+--input-file $<xps-file location> \
+--input-file $<eln-file location> \
+--output <output-file location>.nxs
+```
+
+An example file can be found here in GitLab in [nomad-remote-tools-hub](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-remote-tools-hub/-/tree/develop/docker/xrd) feel free to vist and try out the reader.
diff --git a/pynxtools/dataconverter/readers/xrd/__init__.py b/pynxtools/dataconverter/readers/xrd/__init__.py
new file mode 100644
index 000000000..d4ec4a8cc
--- /dev/null
+++ b/pynxtools/dataconverter/readers/xrd/__init__.py
@@ -0,0 +1,15 @@
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/pynxtools/dataconverter/readers/xrd/config.py b/pynxtools/dataconverter/readers/xrd/config.py
new file mode 100644
index 000000000..4d3757b10
--- /dev/null
+++ b/pynxtools/dataconverter/readers/xrd/config.py
@@ -0,0 +1,117 @@
+"""This is config file that mainly maps nexus definition to data path in raw file."""
+
+# pylint: disable=C0301
+xrdml = {
+    "/ENTRY[entry]/2theta_plot/chi": {"xrdml_1.5": {"value": "",
+                                                    "@units": "",
+                                                    "@chi_indices": 0},
+                                      },
+    "/ENTRY[entry]/2theta_plot/intensity": {"xrdml_1.5": {"value": "/xrdMeasurements/xrdMeasurement/scan/dataPoints/intensities",
+                                                          "@units": "counts/s"}
+                                            },
+    "/ENTRY[entry]/2theta_plot/omega": {"xrdml_1.5": {"value": "",
+                                                      "@units": "",
+                                                      "@omega_indices": 1},
+                                        },
+    "/ENTRY[entry]/2theta_plot/title": "Intensity Vs. Two Theta (deg.)",
+    "/ENTRY[entry]/2theta_plot/phi": {"xrdml_1.5": {"value": "",
+                                                    "@units": "",
+                                                    "@phi_indices": 0},
+                                      },
+    "/ENTRY[entry]/2theta_plot/two_theta": {"xrdml_1.5": {"value": "",
+                                                          "@units": "deg",
+                                                          "@two_theta_indices": 0},
+                                            },
+    "/ENTRY[entry]/COLLECTION[collection]/beam_attenuation_factors": {"xrdml_1.5": {"value": "/beamAttenuationFactors",
+                                                                                    "@units": ""},
+                                                                      },
+    "/ENTRY[entry]/COLLECTION[collection]/omega/start": {"xrdml_1.5": {"value": "/xrdMeasurements/xrdMeasurement/scan/dataPoints/positions_2/startPosition",
+                                                                       "@units": "/xrdMeasurements/xrdMeasurement/scan/dataPoints/positions_2/unit"},
+                                                         },
+    "/ENTRY[entry]/COLLECTION[collection]/omega/end": {"xrdml_1.5": {"value": "/xrdMeasurements/xrdMeasurement/scan/dataPoints/positions_2/endPosition",
+                                                                     "@units": "/xrdMeasurements/xrdMeasurement/scan/dataPoints/positions_2/unit"},
+                                                       },
+    "/ENTRY[entry]/COLLECTION[collection]/omega/step": {"xrdml_1.5": {"value": "/xrdMeasurements/comment/entry_2/MinimumstepsizeOmega",
+                                                                      "@units": "/xrdMeasurements/xrdMeasurement/scan/dataPoints/positions_2/unit"},
+                                                        },
+    "/ENTRY[entry]/COLLECTION[collection]/2theta/start": {"xrdml_1.5": {"value": "/xrdMeasurements/xrdMeasurement/scan/dataPoints/positions_1/startPosition",
+                                                                        "@units": "/xrdMeasurements/xrdMeasurement/scan/dataPoints/positions_1/unit"},
+                                                          },
+    "/ENTRY[entry]/COLLECTION[collection]/2theta/end": {"xrdml_1.5": {"value": "/xrdMeasurements/xrdMeasurement/scan/dataPoints/positions_1/endPosition",
+                                                                      "@units": "/xrdMeasurements/xrdMeasurement/scan/dataPoints/positions_1/unit"},
+                                                        },
+    "/ENTRY[entry]/COLLECTION[collection]/2theta/step": {"xrdml_1.5": {"value": "/xrdMeasurements/comment/entry_2/Minimumstepsize2Theta",
+                                                                       "@units": "/xrdMeasurements/xrdMeasurement/scan/dataPoints/positions_1/unit"},
+                                                         },
+    "/ENTRY[entry]/COLLECTION[collection]/count_time": {"xrdml_1.5": {"value": "/xrdMeasurements/xrdMeasurement/scan/dataPoints/commonCountingTime",
+                                                                      "@units": "/xrdMeasurements/xrdMeasurement/scan/dataPoints/commonCountingTime/unit"},
+                                                        },
+    "/ENTRY[entry]/COLLECTION[collection]/data_file": {"xrdml_1.5": {"value": ""}
+                                                       },
+    "/ENTRY[entry]/COLLECTION[collection]/goniometer_x": {"xrdml_1.5": {"value": "/X",
+                                                                        "@units": ""},
+                                                          },
+    "/ENTRY[entry]/COLLECTION[collection]/goniometer_y": {"xrdml_1.5": {"value": "/Y",
+                                                                        "@units": ""},
+                                                          },
+    "/ENTRY[entry]/COLLECTION[collection]/goniometer_z": {"xrdml_1.5": {"value": "/Z",
+                                                                        "@units": ""},
+                                                          },
+    "/ENTRY[entry]/COLLECTION[collection]/measurement_type": {"xrdml_1.5": {"value": "/xrdMeasurements/xrdMeasurement/measurementType",
+                                                                            "@units": ""},
+                                                              },
+    "/ENTRY[entry]/INSTRUMENT[instrument]/DETECTOR[detector]/integration_time": {"xrdml_1.5": {"value": "",
+                                                                                               "@units": ""},
+                                                                                 },
+    "/ENTRY[entry]/INSTRUMENT[instrument]/DETECTOR[detector]/integration_time/@units": {"xrdml_1.5": {"value": "",
+                                                                                                      "@units": ""},
+                                                                                        },
+    "/ENTRY[entry]/INSTRUMENT[instrument]/DETECTOR[detector]/scan_axis": {"xrdml_1.5": {"value": "/xrdMeasurements/xrdMeasurement/scan/scanAxis",
+                                                                                        "@units": ""},
+                                                                          },
+    "/ENTRY[entry]/INSTRUMENT[instrument]/DETECTOR[detector]/scan_mode": {"xrdml_1.5": {"value": "/xrdMeasurements/xrdMeasurement/scan/mode",
+                                                                                        "@units": ""},
+                                                                          },
+    "/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/k_alpha_one": {"xrdml_1.5": {"value": "/xrdMeasurements/xrdMeasurement/usedWavelength/kAlpha1",
+                                                                                      "@units": "/xrdMeasurements/xrdMeasurement/usedWavelength/kAlpha1/unit"},
+                                                                        },
+    "/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/k_alpha_two": {"xrdml_1.5": {"value": "/xrdMeasurements/xrdMeasurement/usedWavelength/kAlpha2",
+                                                                                      "@units": "/xrdMeasurements/xrdMeasurement/usedWavelength/kAlpha2/unit"},
+                                                                        },
+    "/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/kbeta": {"xrdml_1.5": {"value": "/xrdMeasurements/xrdMeasurement/usedWavelength/kBeta",
+                                                                                "@units": "/xrdMeasurements/xrdMeasurement/usedWavelength/kBeta/unit"},
+                                                                  },
+    "/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/ratio_k_alphatwo_k_alphaone": {"xrdml_1.5": {"value": "",
+                                                                                                      "@units": ""}
+                                                                                        },
+    "/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_current": {"xrdml_1.5": {"value": "/xrdMeasurements/xrdMeasurement/incidentBeamPath/xRayTube/current",
+                                                                                            "@units": "/xrdMeasurements/xrdMeasurement/incidentBeamPath/xRayTube/current/unit"}
+                                                                              },
+    "/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/source_peak_wavelength": {"xrdml_1.5": {"value": "",
+                                                                                                 "@units": ""}
+                                                                                   },
+    "/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_material": {"xrdml_1.5": {"value": "/xrdMeasurements/xrdMeasurement/incidentBeamPath/xRayTube/anodeMaterial",
+                                                                                             "@units": ""},
+                                                                               },
+    "/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_voltage": {"xrdml_1.5": {"value": "/xrdMeasurements/xrdMeasurement/incidentBeamPath/xRayTube/tension",
+                                                                                            "@units": "/xrdMeasurements/xrdMeasurement/incidentBeamPath/xRayTube/tension/unit"}
+                                                                              },
+    "/ENTRY[entry]/SAMPLE[sample]/prepared_by": {"xrdml_1.5": {"value": ""}
+                                                 },
+    "/ENTRY[entry]/SAMPLE[sample]/sample_id": {"xrdml_1.5": {"value": ""},
+                                               },
+    "/ENTRY[entry]/SAMPLE[sample]/sample_mode": {"xrdml_1.5": {"value": ""},
+                                                 },
+    "/ENTRY[entry]/SAMPLE[sample]/sample_name": {"xrdml_1.5": {"value": ""},
+                                                 },
+    "/ENTRY[entry]/definition": "NXxrd_pan",
+    "/ENTRY[entry]/method": "X-Ray Diffraction (XRD)",
+    "/ENTRY[entry]/q_plot/intensity": {"xrdml_1.5": {"value": "/xrdMeasurements/xrdMeasurement/scan/dataPoints/intensities",
+                                                     "@units": "counts/s"},
+                                       },
+    "/ENTRY[entry]/q_plot/q": {"xrdml_1.5": {"value": "",
+                                             "@units": ""},
+                               },
+    "/@default": "entry",
+    "/ENTRY[entry]/@default": "2theta_plot",
+}
diff --git a/pynxtools/dataconverter/readers/xrd/reader.py b/pynxtools/dataconverter/readers/xrd/reader.py
new file mode 100644
index 000000000..242498790
--- /dev/null
+++ b/pynxtools/dataconverter/readers/xrd/reader.py
@@ -0,0 +1,176 @@
+"""XRD reader."""
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from typing import Tuple, Any, Dict, Union
+import json
+from pathlib import Path
+import xml.etree.ElementTree as ET
+
+import yaml
+
+from pynxtools.dataconverter.helpers import (generate_template_from_nxdl,
+                                             validate_data_dict)
+from pynxtools.dataconverter.template import Template
+from pynxtools.dataconverter.readers.xrd.xrd_parser import parse_and_fill_template
+from pynxtools.dataconverter.readers.utils import flatten_and_replace, FlattenSettings
+from pynxtools.dataconverter.readers.base.reader import BaseReader
+
+CONVERT_DICT: Dict[str, str] = {
+    'unit': '@units',
+    'Instrument': 'INSTRUMENT[instrument]',
+    'Source': 'SOURCE[source]',
+    'Detector': 'DETECTOR[detector]',
+    'Collection': 'COLLECTION[collection]',
+    'Sample': 'SAMPLE[sample]',
+    'version': '@version',
+    'User': 'USER[user]',
+}
+
+
+# Global var to collect the root from get_template_from_nxdl_name()
+# and use it in the the the varidate_data_dict()
+ROOT: ET.Element = None
+REPLACE_NESTED: Dict[str, Any] = {}
+XRD_FILE_EXTENSIONS = [".xrdml", "xrdml", ".udf", ".raw", ".xye"]
+
+
+def get_template_from_nxdl_name(nxdl_name):
+    """Generate template from nxdl name.
+
+    Example of nxdl name could be NXxrd_pan.
+    Parameters
+    ----------
+    nxdl_name : str
+        Name of nxdl file e.g. NXmpes
+
+    Returns
+    -------
+    Template
+        Empty template.
+
+    Raises
+    ------
+    ValueError
+        Error if nxdl file is not found.
+    """
+    nxdl_file = nxdl_name + ".nxdl.xml"
+    current_path = Path(__file__)
+    def_path = current_path.parent.parent.parent.parent / 'definitions'
+    # Check contributed defintions
+    full_nxdl_path = Path(def_path, 'contributed_definitions', nxdl_file)
+    root = None
+    if full_nxdl_path.exists():
+        root = ET.parse(full_nxdl_path).getroot()
+    else:
+        # Check application definition
+        full_nxdl_path = Path(def_path, 'applications', nxdl_file)
+
+    if root is None and full_nxdl_path.exists():
+        root = ET.parse(full_nxdl_path).getroot()
+    else:
+        full_nxdl_path = Path(def_path, 'base_classes', nxdl_file)
+
+    if root is None and full_nxdl_path.exists():
+        root = ET.parse(full_nxdl_path).getroot()
+    elif root is None:
+        raise ValueError("Need correct NXDL name")
+
+    template = Template()
+    generate_template_from_nxdl(root=root, template=template)
+    return template
+
+
+def get_template_from_xrd_reader(nxdl_name, file_paths):
+    """Get filled template from reader.
+
+    Parameters
+    ----------
+    nxdl_name : str
+        Name of nxdl definition
+    file_paths : Tuple[str]
+        Tuple of path of files.
+
+    Returns
+    -------
+    Template
+        Template which is a map from NeXus concept path to value.
+    """
+
+    template = get_template_from_nxdl_name(nxdl_name)
+
+    data = XRDReader().read(template=template,
+                            file_paths=file_paths)
+    validate_data_dict(template=template, data=data, nxdl_root=ROOT)
+    return data
+
+
+# pylint: disable=too-few-public-methods
+class XRDReader(BaseReader):
+    """Reader for XRD."""
+
+    supported_nxdls = ["NXxrd_pan"]
+
+    def read(self,
+             template: dict = None,
+             file_paths: Tuple[str] = None,
+             objects: Tuple[Any] = None):
+        """General read menthod to prepare the template."""
+
+        if not isinstance(file_paths, tuple) and not isinstance(file_paths, list):
+            file_paths = (file_paths,)
+        filled_template: Union[Dict, None] = Template()
+        eln_dict: Union[Dict[str, Any], None] = None
+        config_dict: Dict = {}
+        xrd_file: str = ""
+        xrd_file_ext: str = ""
+        for file in file_paths:
+            ext = "".join(Path(file).suffixes)
+            if ext == '.json':
+                with open(file, mode="r", encoding="utf-8") as fl_obj:
+                    config_dict = json.load(fl_obj)
+            elif ext in ['.yaml', '.yml']:
+                with open(file, mode="r", encoding="utf-8") as fl_obj:
+                    eln_dict = flatten_and_replace(
+                        FlattenSettings(
+                            yaml.safe_load(fl_obj),
+                            CONVERT_DICT, REPLACE_NESTED
+                        )
+                    )
+            elif ext in XRD_FILE_EXTENSIONS:
+                xrd_file_ext = ext
+                xrd_file = file
+        if xrd_file:
+            parse_and_fill_template(template, xrd_file, config_dict, eln_dict)
+        else:
+            raise ValueError(f"Allowed XRD experimental with extenstion from"
+                             f" {XRD_FILE_EXTENSIONS} found {xrd_file_ext}")
+
+        # Get rid of empty concept and cleaning up Template
+        for key, val in template.items():
+
+            if val is None:
+                del template[key]
+            else:
+                filled_template[key] = val
+        if not filled_template.keys():
+            raise ValueError("Reader could not read anything! Check for input files and the"
+                             " corresponding extention.")
+        return filled_template
+
+
+READER = XRDReader
diff --git a/pynxtools/dataconverter/readers/xrd/xrd_helper.py b/pynxtools/dataconverter/readers/xrd/xrd_helper.py
new file mode 100644
index 000000000..40874be50
--- /dev/null
+++ b/pynxtools/dataconverter/readers/xrd/xrd_helper.py
@@ -0,0 +1,293 @@
+"""XRD helper stuffs."""
+
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import warnings
+import numpy as np
+from pynxtools.dataconverter.helpers import transform_to_intended_dt
+from pynxtools.dataconverter.template import Template
+
+
+class KeyValueNotFoundWaring(Warning):
+    """New Wanrning class"""
+
+
+def get_a_value_or_warn(return_value="",
+                        warning_catagory=KeyValueNotFoundWaring,
+                        message="Key-value not found.",
+                        stack_level=2):
+    """It returns a value that and rase the warning massage."""
+
+    warnings.warn(f"\033[1;31m {message}:\033[0m]", warning_catagory, stack_level)
+    return return_value
+
+
+def check_unit(unit: str):
+    """Handle conflicted unit.
+    Some units comes with verdor file that do not follow correct format.
+    """
+    if unit is None:
+        return unit
+    unit_map = {'Angstrom': '\u212B',
+                }
+    correct_unit = unit_map.get(unit, None)
+    if correct_unit is None:
+        return unit
+    return correct_unit
+
+
+# pylint: disable=too-many-statements
+def feed_xrdml_to_template(template, xrd_dict, eln_dict, file_term, config_dict=None):
+    """Fill template with data from xrdml type file.
+
+    Parameters
+    ----------
+    template : Dict
+        Template generated from nxdl definition file.
+    xrd_dict : dict
+        Just a dict mapping slash separated key to the data. The key is equivalent to the
+        path directing the location in data file.
+    eln_dict : dict
+        That brings the data from user especially using NeXus according to NeXus concept.
+    file_term : str
+        Terminological string to describe file ext. and version (e.g. xrdml_1.5) to find proper
+        dict from config file.
+    config_dict : Dict
+        Dictionary from config file that maps NeXus concept to data from different data file
+        versions. E.g.
+        {
+         "/ENTRY[entry]/2theta_plot/chi": {"file_exp": {"value": "",
+                                                        "@units": ""},},
+         "/ENTRY[entry]/2theta_plot/intensity": {"file_exp": {"value": "/detector",
+                                                              "@units": ""},}
+         }
+    """
+
+    def fill_template_from_config_data(config_dict: dict, template: Template,
+                                       xrd_dict: dict, file_term: str) -> None:
+        """
+        Parameters
+        ----------
+        config_dict : dict
+            Python dict that is nested dict for different file versions.
+            e.g.
+            {"/ENTRY[entry]/2theta_plot/chi": {"file_exp": {"value": "",
+                                                    "@units": ""},},
+            "/ENTRY[entry]/2theta_plot/intensity": {"file_exp": {"value": "/detector",
+                                                            "@units": ""},}
+            }
+        template : Template
+
+        Return
+        ------
+        None
+        """
+        for nx_key, val in config_dict.items():
+            if isinstance(val, dict):
+                raw_data_des: dict = val.get(file_term, None)
+                if raw_data_des is None:
+                    raise ValueError(f"conflict file config file does not have any data map"
+                                     f" for file {file_term}")
+                # the field does not have any value
+                if not raw_data_des.get('value', None):
+                    continue
+                # Note: path is the data path in raw file
+                for val_atr_key, path in raw_data_des.items():
+                    # data or field val
+                    if val_atr_key == 'value':
+                        template[nx_key] = xrd_dict.get(path, None)
+                    elif path and val_atr_key == '@units':
+                        template[nx_key + '/' + val_atr_key] = check_unit(
+                            xrd_dict.get(path, None))
+                    # attr e.g. @AXISNAME
+                    elif path and val_atr_key.startswith('@'):
+                        template[nx_key + '/' + val_atr_key] = xrd_dict.get(path, None)
+            if not isinstance(val, dict) and isinstance(val, str):
+                template[nx_key] = val
+
+    def two_theta_plot():
+
+        intesity = transform_to_intended_dt(template.get("/ENTRY[entry]/2theta_plot/intensity",
+                                                         None))
+        if intesity is not None:
+            intsity_len = np.shape(intesity)[0]
+        else:
+            raise ValueError("No intensity is found")
+
+        two_theta_gr = "/ENTRY[entry]/2theta_plot/"
+        if template.get(f"{two_theta_gr}omega", None) is None:
+            omega_start = template.get("/ENTRY[entry]/COLLECTION[collection]/omega/start", None)
+            omega_end = template.get("/ENTRY[entry]/COLLECTION[collection]/omega/end", None)
+
+            template["/ENTRY[entry]/2theta_plot/omega"] = np.linspace(float(omega_start),
+                                                                      float(omega_end),
+                                                                      intsity_len)
+
+        if template.get(f"{two_theta_gr}two_theta", None) is None:
+            tw_theta_start = template.get("/ENTRY[entry]/COLLECTION[collection]/2theta/start",
+                                          None)
+            tw_theta_end = template.get("/ENTRY[entry]/COLLECTION[collection]/2theta/end", None)
+            template[f"{two_theta_gr}two_theta"] = np.linspace(float(tw_theta_start),
+                                                               float(tw_theta_end),
+                                                               intsity_len)
+        template[two_theta_gr + "/" + "@axes"] = ["two_theta"]
+        template[two_theta_gr + "/" + "@signal"] = "intensity"
+
+    def q_plot():
+        q_plot_gr = "/ENTRY[entry]/q_plot"
+        alpha_2 = template.get("/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/k_alpha_two",
+                               None)
+        alpha_1 = template.get("/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/k_alpha_one",
+                               None)
+        two_theta: np.ndarray = template.get("/ENTRY[entry]/2theta_plot/two_theta", None)
+        if two_theta is None:
+            raise ValueError("Two-theta data is not found")
+        if isinstance(two_theta, np.ndarray):
+            theta: np.ndarray = two_theta / 2
+        ratio_k = "/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/ratio_k_alphatwo_k_alphaone"
+        if alpha_1 and alpha_2:
+            ratio = alpha_2 / alpha_1
+            template[ratio_k] = ratio
+            lamda = ratio * alpha_1 + (1 - ratio) * alpha_2
+            q_vec = (4 * np.pi / lamda) * np.sin(np.deg2rad(theta))
+            template[q_plot_gr + "/" + "q_vec"] = q_vec
+            template[q_plot_gr + "/" + "@q_vec_indicies"] = 0
+            template[q_plot_gr + "/" + "@axes"] = ["q_vec"]
+
+        template[q_plot_gr + "/" + "@signal"] = "intensity"
+
+    def handle_special_fields():
+        """Some fields need special treatment."""
+
+        key = "/ENTRY[entry]/COLLECTION[collection]/goniometer_x"
+        gonio_x = template.get(key, None)
+
+        template[key] = gonio_x[0] if (isinstance(gonio_x, np.ndarray)
+                                       and gonio_x.shape == (1,)) else gonio_x
+
+        key = "/ENTRY[entry]/COLLECTION[collection]/goniometer_y"
+        gonio_y = template.get(key, None)
+
+        template[key] = gonio_y[0] if (isinstance(gonio_y, np.ndarray)
+                                       and gonio_y.shape == (1,)) else gonio_y
+
+        key = "/ENTRY[entry]/COLLECTION[collection]/goniometer_z"
+        gonio_z = template.get(key, None)
+
+        template[key] = gonio_z[0] if (isinstance(gonio_z, np.ndarray)
+                                       and gonio_z.shape == (1,)) else gonio_z
+
+        key = "/ENTRY[entry]/COLLECTION[collection]/count_time"
+        count_time = template.get(key, None)
+
+        template[key] = count_time[0] if (isinstance(count_time, np.ndarray)
+                                          and count_time.shape == (1,)) else count_time
+
+    fill_template_from_config_data(config_dict, template,
+                                   xrd_dict, file_term)
+    two_theta_plot()
+    q_plot()
+    handle_special_fields()
+
+    fill_template_from_eln_data(eln_dict, template)
+
+
+# pylint: disable=unused-argument
+def feed_udf_to_template(template, xrd_dict, eln_dict, config_dict):
+    """_summary_
+
+    Parameters
+    ----------
+    template : _type_
+        _description_
+    xrd_dict : _type_
+        _description_
+    eln_dict : _type_
+        _description_
+    config_dict : _type_
+        _description_
+    """
+
+
+def feed_raw_to_template(template, xrd_dict, eln_dict, config_dict):
+    """_summary_
+
+    Parameters
+    ----------
+    template : _type_
+        _description_
+    xrd_dict : _type_
+        _description_
+    eln_dict : _type_
+        _description_
+    config_dict : _type_
+        _description_
+    """
+
+
+def feed_xye_to_template(template, xrd_dict, eln_dict, config_dict):
+    """_summary_
+
+    Parameters
+    ----------
+    template : _type_
+        _description_
+    xrd_dict : _type_
+        _description_
+    eln_dict : _type_
+        _description_
+    config_dict : _type_
+        _description_
+    """
+
+
+def fill_template_from_eln_data(eln_data_dict, template):
+    """Fill out the template from dict that generated from eln yaml file.
+    Parameters:
+    -----------
+    eln_data_dict : dict[str, Any]
+        Python dictionary from eln file.
+    template : dict[str, Any]
+    Return:
+    -------
+    None
+    """
+
+    if eln_data_dict is None:
+        return
+    for e_key, e_val in eln_data_dict.items():
+        template[e_key] = transform_to_intended_dt(e_val)
+
+
+def fill_nxdata_from_xrdml(template,
+                           xrd_flattend_dict,
+                           dt_nevigator_from_config_file,
+                           data_group_concept
+                           ):
+    """_summary_
+
+    Parameters
+    ----------
+    template : _type_
+        _description_
+    xrd_flattend_dict : _type_
+        _description_
+    dt_nevigator_from_config_file : _type_
+        _description_
+    data_group_concept : _type_
+        _description_
+    """
diff --git a/pynxtools/dataconverter/readers/xrd/xrd_parser.py b/pynxtools/dataconverter/readers/xrd/xrd_parser.py
new file mode 100644
index 000000000..9d944cad7
--- /dev/null
+++ b/pynxtools/dataconverter/readers/xrd/xrd_parser.py
@@ -0,0 +1,448 @@
+"""
+XRD file parser collection.
+"""
+
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Dict, Tuple, Optional, List
+
+from pathlib import Path
+import warnings
+import xml.etree.ElementTree as ET  # for XML parsing
+from pynxtools.dataconverter.helpers import transform_to_intended_dt, remove_namespace_from_tag
+from pynxtools.dataconverter.readers.xrd.xrd_helper import feed_xrdml_to_template
+
+
+def fill_slash_sep_dict_from_nested_dict(parent_path: str, nested_dict: dict,
+                                         slash_sep_dict: dict):
+    """Convert a nested dict into slash separated dict.
+
+    Extend slash_sep_dict by key (slash separated key) from nested dict.
+
+    Parameters
+    ----------
+    parent_path : str
+        Parent path to be appended at the starting of slash separated key.
+    nested_dict : dict
+        Dict nesting other dict.
+    slash_sep_dict : dict
+        Plain dict to be extended by key value generated from nested_dict.
+    """
+    for key, val in nested_dict.items():
+        slash_sep_path = parent_path + key
+        if isinstance(val, dict):
+            fill_slash_sep_dict_from_nested_dict(slash_sep_path, val, slash_sep_dict)
+        else:
+            slash_sep_dict[slash_sep_path] = val
+
+
+class IgnoreNodeTextWarning(Warning):
+    """Special class to warn node text skip."""
+
+
+class XRDMLParser:
+    """Parser for xrdml file with the help of other XRD library e.g. panalytical_xml."""
+
+    def __init__(self, file_path):
+        """Construct XRDMLParser obj.
+
+        Parameters
+        ----------
+        file_path : str
+            Path of the file.
+        """
+        # In future it can be utilised later it different versions of file
+        # self.__version = None
+        self.__xrd_dict = {}
+        self.__file_path = file_path
+        self.xrdml_version: str = ""
+        self.xml_root = ET.parse(self.__file_path).getroot()
+        self.find_version()
+        # Important note for key-val pair separator list: preceding elements have precedence on the
+        # on the following elements
+        self.key_val_pair_sprtr = (';', ',')
+        # Important note for key-val separator list: preceding elements have precedence on the
+        # on the following elements
+        self.key_val_sprtr = ('=', ':')
+
+    def find_version(self):
+        """To find xrdml file version."""
+        schema_loc = "{http://www.w3.org/2001/XMLSchema-instance}schemaLocation"
+        # str: 'http://www.xrdml.com/XRDMeasurement/1.5
+        version = self.xml_root.get(schema_loc).split(' ')[0]
+        self.xrdml_version = version.split('/')[-1]
+
+    def get_slash_separated_xrd_dict(self):
+        """Return a dict with slash separated key and value from xrd file.
+
+        The key is the slash separated string path for nested xml elements.
+
+        Returns
+        -------
+        dict:
+            Dictionary where key maps xml nested elements by slash separated str.
+        """
+        # To navigate different functions in future according to some parameters
+        # such as version, and data analysis module from panalytical_xml
+        self.handle_with_panalytical_module()
+        return self.__xrd_dict
+
+    def handle_with_panalytical_module(self):
+        """Handeling XRDml file by parsing xml file and Pnanalytical_xml parser
+
+        Panalytical module extends and constructs some array data from experiment settings
+        comes with xml file.
+        """
+        self.parse_each_elm(parent_path='/', xml_node=self.xml_root)
+        nested_data_dict: Dict[str, any] = {}
+        # Note: To use panalytical lib
+        # Extract other numerical data e.g. 'hkl', 'Omega', '2Theta', CountTime etc
+        # using panalytical_xml module
+        # parsed_data = XRDMLFile(self.__file_path)
+        # nested_data_dict = parsed_data.scan.ddict
+        fill_slash_sep_dict_from_nested_dict('/', nested_data_dict, self.__xrd_dict)
+
+    def process_node_text(self, parent_path, node_txt) -> None:
+        """Processing text of node
+
+        Parameters
+        ----------
+        parent_path : str
+            Starting str of the key when forming a string key.
+        node_txt : str
+            text from node.
+
+        Returns
+        ------
+        None
+        """
+        key_val_pairs = []
+        # get key-val pair
+        for sep in self.key_val_pair_sprtr:
+            if sep in node_txt:
+                key_val_pairs.extend(node_txt.split(sep))
+                break
+        # Separate key-val, build full path and
+        # store them in dict
+        if key_val_pairs:
+            for key_val in key_val_pairs:
+                for k_v_sep in self.key_val_sprtr:
+                    if k_v_sep in key_val:
+                        key, val = key_val.split(k_v_sep)
+                        key = key.replace(' ', '')
+                        self.__xrd_dict['/'.join([parent_path, key])] = val
+                        break
+        # Handling array data comes as node text
+        else:
+            try:
+                self.__xrd_dict[parent_path] = transform_to_intended_dt(node_txt)
+            except ValueError:
+                warnings.warn(f'Element text {node_txt} is ignored from parseing!',
+                              IgnoreNodeTextWarning)
+
+    def parse_each_elm(self, parent_path, xml_node,
+                       multi_childs_tag: str = '',
+                       tag_extensions: Optional[List[int]] = None):
+        """Check each xml element and send the element to intended function.
+
+        Parameters
+        ----------
+        parent_path : str
+            Path to be in the starting of the key composing from element e.g. '/'.
+        xml_node : XML.Element
+            Any element except process instruction nodes.
+        multi_childs_tag : str
+            Tag that is available on several child nodes.
+        tag_extension : List[int]
+            List of extension of the child tag if there are several childs having the same
+            tag.
+
+        Returns
+        ------
+        None
+        """
+
+        tag = remove_namespace_from_tag(xml_node.tag)
+        # Take care of special node of 'entry' tag
+        if tag == 'entry':
+            parent_path = self.parse_entry_elm(parent_path, xml_node,
+                                               multi_childs_tag, tag_extensions)
+        else:
+            parent_path = self.parse_general_elm(parent_path, xml_node,
+                                                 multi_childs_tag, tag_extensions)
+
+        _, multi_childs_tag = self.has_multi_childs_with_same_tag(xml_node)
+        # List of tag extensions for child nodes which have the same tag.
+        tag_extensions = [0]
+        for child in iter(xml_node):
+            if child is not None:
+                self.parse_each_elm(parent_path, child,
+                                    multi_childs_tag, tag_extensions)
+
+    def has_multi_childs_with_same_tag(self, parent_node: ET.Element) -> Tuple[bool, str]:
+        """Check for multiple childs that have the same tag.
+
+        Parameter:
+        ----------
+        parent_node : ET.Element
+            Parent node that might has multiple childs with the same tag.
+
+        Returns:
+        --------
+        Tuple[bool, str]
+            (true if multiple childs with the same tag, tag).
+        """
+        tag: str = None
+        for child in iter(parent_node):
+            temp_tag = remove_namespace_from_tag(child.tag)
+            if tag is None:
+                tag = temp_tag
+            else:
+                if tag == temp_tag:
+                    return (True, tag)
+
+        return (False, '')
+
+    def parse_general_elm(self, parent_path, xml_node,
+                          multi_childs_tag, tag_extensions: List[int]):
+        """Handle general element except entry element.
+        Parameters
+        ----------
+        parent_path : str
+            Path to be in the starting of the key composing from element e.g. '/'.
+        xml_node : XML.Element
+            Any element except process instruction and entry nodes.
+        multi_childs_tag : str
+            Tag that is available on several siblings.
+        tag_extension : List[int]
+            List of extension of the shiblings tag if there are several shiblings having
+            the same tag.
+
+        Returns
+        -------
+        None
+        """
+
+        tag = remove_namespace_from_tag(xml_node.tag)
+        if tag == multi_childs_tag:
+            new_ext = tag_extensions[-1] + 1
+            tag = tag + '_' + str(new_ext)
+            tag_extensions.append(new_ext)
+
+        if parent_path == '/':
+            parent_path = parent_path + tag
+        else:
+            # New parent path ends with element tag
+            parent_path = '/'.join([parent_path, tag])
+
+        node_attr = xml_node.attrib
+        if node_attr:
+            for key, val in node_attr.items():
+                # Some attr has namespace
+                key = remove_namespace_from_tag(key)
+                key = key.replace(' ', '_')
+                path_extend = '/'.join([parent_path, key])
+                self.__xrd_dict[path_extend] = val
+
+        node_txt = xml_node.text
+        if node_txt:
+            self.process_node_text(parent_path, node_txt)
+
+        return parent_path
+
+    def parse_entry_elm(self, parent_path: str, xml_node: ET.Element,
+                        multi_childs_tag: str, tag_extensions: List[int]):
+        """Handle entry element.
+
+        Parameters
+        ----------
+        parent_path : str
+            Path to be in the starting of the key composing from element e.g. '/'.
+        xml_node : XML.Element
+            Any entry node.
+        multi_childs_tag : str
+            Tag that is available on several siblings.
+        tag_extension : List[int]
+            List of extension of the shiblings tag if there are several shiblings having
+            the same tag.
+
+        Returns
+        -------
+        str:
+            Parent path.
+        """
+
+        tag = remove_namespace_from_tag(xml_node.tag)
+
+        if tag == multi_childs_tag:
+            new_ext = tag_extensions[-1] + 1
+            tag_extensions.append(new_ext)
+            tag = tag + '_' + str(new_ext)
+
+        if parent_path == '/':
+            parent_path = '/' + tag
+        else:
+            # Parent path ends with element tag
+            parent_path = '/'.join([parent_path, tag])
+
+        node_attr = xml_node.attrib
+        if node_attr:
+            for key, val in node_attr.items():
+                # Some attributes have namespace
+                key = remove_namespace_from_tag(key)
+                path_extend = '/'.join([parent_path, key])
+                self.__xrd_dict[path_extend] = val
+
+        # In entry element text must get special care on it
+        node_txt = xml_node.text
+        if node_txt:
+            self.process_node_text(parent_path, node_txt)
+
+        return parent_path
+
+
+class FormatParser:
+    """A class to identify and parse different file formats."""
+
+    def __init__(self, file_path):
+        """Construct FormatParser obj.
+
+        Parameters
+        ----------
+        file_path : str
+            XRD file to be parsed.
+
+        Returns
+        -------
+        None
+        """
+        self.file_path = file_path
+        self.file_parser = XRDMLParser(self.file_path)
+        # termilnological name of file to read config file
+        self.file_term = 'xrdml_' + self.file_parser.xrdml_version
+
+    def get_file_format(self):
+        """Identifies the format of a given file.
+
+        Returns:
+        --------
+        str:
+            The file extension of the file.
+        """
+        file_extension = ''.join(Path(self.file_path).suffixes)
+        return file_extension
+
+    def parse_xrdml(self):
+        """Parses a Panalytical XRDML file.
+
+        Returns
+        -------
+        dict
+            A dictionary containing the parsed XRDML data.
+        """
+        return self.file_parser.get_slash_separated_xrd_dict()
+
+    def parse_panalytical_udf(self):
+        """Parse the Panalytical .udf file.
+
+        Returns
+        -------
+        None
+            Placeholder for parsing .udf files.
+        """
+
+    def parse_bruker_raw(self):
+        """Parse the Bruker .raw file.
+
+        Returns
+        None
+        """
+
+    def parse_bruker_xye(self):
+        """Parse the Bruker .xye file.
+
+        Returns
+        None
+        """
+
+    # pylint: disable=import-outside-toplevel
+    def parse_and_populate_template(self, template, config_dict, eln_dict):
+        """Parse xrd file into dict and fill the template.
+
+        Parameters
+        ----------
+        template : Template
+            NeXus template generated from NeXus application definitions.
+        xrd_file : str
+            Name of the xrd file.
+        config_dict : dict
+            A dict geenerated from python
+        eln_dict : dict
+            A dict generatd from eln yaml file.
+        Returns:
+        None
+        """
+
+        xrd_dict = self.parse()
+        if len(config_dict) == 0 and self.file_parser.xrdml_version == '1.5':
+            from pynxtools.dataconverter.readers.xrd.config import xrdml
+            config_dict = xrdml
+        feed_xrdml_to_template(template, xrd_dict, eln_dict,
+                               file_term=self.file_term, config_dict=config_dict)
+
+    def parse(self):
+        '''Parses the file based on its format.
+
+        Returns:
+        dict
+            A dictionary containing the parsed data.
+
+        Raises:
+            ValueError: If the file format is unsupported.
+        '''
+        file_format = self.get_file_format()
+        slash_sep_dict = {}
+        if file_format == ".xrdml":
+            slash_sep_dict = self.parse_xrdml()
+        # elif file_format == ".udf":
+        #     return self.parse_panalytical_udf()
+        # elif file_format == ".raw":
+        #     return self.parse_bruker_raw()
+        # elif file_format == ".xye":
+        #     return self.parse_bruker_xye()
+        # else:
+        #     raise ValueError(f"Unsupported file format: {file_format}")
+        return slash_sep_dict
+
+
+def parse_and_fill_template(template, xrd_file, config_dict, eln_dict):
+    """Parse xrd file and fill the template with data from that file.
+
+    Parameters
+    ----------
+    template : Template[dict]
+        Template gnenerated from nxdl definition.
+    xrd_file : str
+        Name of the xrd file with extension
+    config_dict : Dict
+        Dictionary from config.json or similar file.
+    eln_dict : Dict
+        Plain and '/' separated dictionary from yaml for ELN.
+    """
+
+    format_parser = FormatParser(xrd_file)
+    format_parser.parse_and_populate_template(template, config_dict, eln_dict)
diff --git a/pynxtools/dataconverter/template.py b/pynxtools/dataconverter/template.py
index 286cbaaed..fa6907d36 100644
--- a/pynxtools/dataconverter/template.py
+++ b/pynxtools/dataconverter/template.py
@@ -114,6 +114,24 @@ def get_documented(self):
         """Returns a dictionary of all the optionalities merged into one."""
         return {**self.optional, **self.recommended, **self.required}
 
+    def __contains__(self, k):
+        """
+        Supports in operator for the nested Template keys
+        """
+        return any([
+            k in self.optional,
+            k in self.recommended,
+            k in self.undocumented,
+            k in self.required
+        ])
+
+    def get(self, key: str, default=None):
+        """Proxies the get function to our internal __getitem__"""
+        try:
+            return self[key]
+        except KeyError:
+            return default
+
     def __getitem__(self, k):
         """Handles how values are accessed from the Template object."""
         # Try setting item in all else throw error. Does not append to default.
@@ -130,7 +148,10 @@ def __getitem__(self, k):
                         return self.required[k]
                     except KeyError:
                         return self.undocumented[k]
-        return self.get_optionality(k)
+        if k in ("required", "optional", "recommended", "undocumented"):
+            return self.get_optionality(k)
+        raise KeyError("Only paths starting with '/' or one of [optional_parents, "
+                       "lone_groups, required, optional, recommended, undocumented] can be used.")
 
     def clear(self):
         """Clears all data stored in the Template object."""
@@ -171,12 +192,15 @@ def add_entry(self, entry_name):
 
     def __delitem__(self, key):
         """Delete a dictionary key or template key"""
-
         if key in self.optional.keys():
             del self.optional[key]
 
-        if key in self.required.keys():
+        elif key in self.required.keys():
             del self.required[key]
 
-        if key in self.recommended.keys():
+        elif key in self.recommended.keys():
             del self.recommended[key]
+        elif key in self.undocumented.keys():
+            del self.undocumented[key]
+        else:
+            raise KeyError(f"{key} does not exist.")
diff --git a/pynxtools/dataconverter/writer.py b/pynxtools/dataconverter/writer.py
index 486d48ace..81b3045da 100644
--- a/pynxtools/dataconverter/writer.py
+++ b/pynxtools/dataconverter/writer.py
@@ -105,6 +105,7 @@ def handle_shape_entries(data, file, path):
     return layout
 
 
+# pylint: disable=too-many-locals, inconsistent-return-statements
 def handle_dicts_entries(data, grp, entry_name, output_path, path):
     """Handle function for dictionaries found as value of the nexus file.
 
@@ -163,7 +164,13 @@ def handle_dicts_entries(data, grp, entry_name, output_path, path):
         raise InvalidDictProvided("A dictionary was provided to the template but it didn't"
                                   " fall into any of the know cases of handling"
                                   " dictionaries. This occured for: " + entry_name)
-    return grp[entry_name]
+    # Check whether link has been stabilished or not
+    try:
+        return grp[entry_name]
+    except KeyError:
+        logger.warning("No path '%s' available to be linked.", path)
+        del grp[entry_name]
+        return None
 
 
 class Writer:
@@ -171,26 +178,27 @@ class Writer:
 
     Args:
         data (dict): Dictionary containing the data to convert.
-        nxdl_path (str): Path to the nxdl file to use during conversion.
+        nxdl_f_path (str): Path to the nxdl file to use during conversion.
         output_path (str): Path to the output NeXus file.
 
     Attributes:
         data (dict): Dictionary containing the data to convert.
-        nxdl_path (str): Path to the nxdl file to use during conversion.
+        nxdl_f_path (str): Path to the nxdl file to use during conversion.
         output_path (str): Path to the output NeXus file.
         output_nexus (h5py.File): The h5py file object to manipulate output file.
         nxdl_data (dict): Stores xml data from given nxdl file to use during conversion.
         nxs_namespace (str): The namespace used in the NXDL tags. Helps search for XML children.
     """
 
-    def __init__(self, data: dict = None, nxdl_path: str = None,
-                 output_path: str = None, io_mode: str = "w"):
+    def __init__(self, data: dict = None,
+                 nxdl_f_path: str = None,
+                 output_path: str = None):
         """Constructs the necessary objects required by the Writer class."""
         self.data = data
-        self.nxdl_path = nxdl_path
+        self.nxdl_f_path = nxdl_f_path
         self.output_path = output_path
-        self.output_nexus = h5py.File(self.output_path, io_mode)
-        self.nxdl_data = ET.parse(self.nxdl_path).getroot()
+        self.output_nexus = h5py.File(self.output_path, "w")
+        self.nxdl_data = ET.parse(self.nxdl_f_path).getroot()
         self.nxs_namespace = get_namespace(self.nxdl_data)
 
     def __nxdl_to_attrs(self, path: str = '/') -> dict:
@@ -235,8 +243,9 @@ def ensure_and_get_parent_node(self, path: str, undocumented_paths) -> h5py.Grou
             return grp
         return self.output_nexus[parent_path_hdf5]
 
-    def write(self):
-        """Writes the NeXus file with previously validated data from the reader with NXDL attrs."""
+    def _put_data_into_hdf5(self):
+        """Store data in hdf5 in in-memory file or file."""
+
         hdf5_links_for_later = []
 
         def add_units_key(dataset, path):
@@ -274,6 +283,9 @@ def add_units_key(dataset, path):
 
         for links in hdf5_links_for_later:
             dataset = handle_dicts_entries(*links)
+            if dataset is None:
+                # If target of a link is invalid to be linked
+                del self.data[links[-1]]
 
         for path, value in self.data.items():
             try:
@@ -288,6 +300,7 @@ def add_units_key(dataset, path):
 
                 if entry_name[0] != "@":
                     path_hdf5 = helpers.convert_data_dict_path_to_hdf5_path(path)
+
                     add_units_key(self.output_nexus[path_hdf5], path)
                 else:
                     # consider changing the name here the lvalue can also be group!
@@ -297,4 +310,9 @@ def add_units_key(dataset, path):
                 raise IOError(f"Unknown error occured writing the path: {path} "
                               f"with the following message: {str(exc)}") from exc
 
-        self.output_nexus.close()
+    def write(self):
+        """Writes the NeXus file with previously validated data from the reader with NXDL attrs."""
+        try:
+            self._put_data_into_hdf5()
+        finally:
+            self.output_nexus.close()
diff --git a/pynxtools/eln_mapper/README.md b/pynxtools/eln_mapper/README.md
new file mode 100644
index 000000000..13f759466
--- /dev/null
+++ b/pynxtools/eln_mapper/README.md
@@ -0,0 +1,19 @@
+# ELN generator
+This is a helper tool for generating eln
+- The simple eln generator that can be used in a console or jupyter-notebook
+- Scheme based eln generator that can be used in NOMAD and the eln can be used as a custom scheme in NOMAD.
+
+```
+$ eln_generator --options <value>
+
+Options:
+  --nxdl TEXT                  Name of NeXus definition without extension
+                               (.nxdl.xml).  [required]
+  --skip-top-levels INTEGER    To skip upto a level of parent hierarchical structure.
+                               E.g. for default 1 the part Entry[ENTRY] from
+                               /Entry[ENTRY]/Instrument[INSTRUMENT]/... will
+                               be skiped.  [default: 1]
+  --output-file TEXT           Name of  output file.
+  --eln-type [eln|scheme_eln]  Choose a type from the eln or scheme_eln.  [required]
+  --help                       Show this message and exit.
+```
diff --git a/pynxtools/dataconverter/readers/apm/utils/apm_utils.py b/pynxtools/eln_mapper/__init__.py
similarity index 59%
rename from pynxtools/dataconverter/readers/apm/utils/apm_utils.py
rename to pynxtools/eln_mapper/__init__.py
index f04c329ee..7f1819634 100644
--- a/pynxtools/dataconverter/readers/apm/utils/apm_utils.py
+++ b/pynxtools/eln_mapper/__init__.py
@@ -1,4 +1,3 @@
-#
 # Copyright The NOMAD Authors.
 #
 # This file is part of NOMAD. See https://nomad-lab.eu for further info.
@@ -15,12 +14,3 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-"""Set of utility tools for parsing file formats used by atom probe."""
-
-# pylint: disable=no-member
-
-# ifes_apt_tc_data_modeling replaces now the previously here stored
-# convenience functions which translated human-readable ion names into
-# isotope_vector descriptions and vice versa as proposed by M. Kuehbach et al. in
-# DOI: 10.1017/S1431927621012241 to the human-readable ion names which are use
-# in P. Felfer et al."s atom probe toolbox
diff --git a/pynxtools/eln_mapper/eln.py b/pynxtools/eln_mapper/eln.py
new file mode 100644
index 000000000..078dd4d18
--- /dev/null
+++ b/pynxtools/eln_mapper/eln.py
@@ -0,0 +1,189 @@
+"""For functions that directly or indirectly help to for rendering ELN.
+Note that this not schema eln that is rendered to Nomad rather the eln that
+is generated by schema eln."""
+
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import re
+from typing import Any, Dict
+import xml.etree.ElementTree as ET
+import yaml
+
+from pynxtools.dataconverter.helpers import generate_template_from_nxdl
+from pynxtools.dataconverter.template import Template
+from pynxtools.nexus.nexus import get_nexus_definitions_path
+
+
+def retrieve_nxdl_file(nexus_def: str) -> str:
+    """Retrive full path of nexus file.
+
+    Parameters
+    ----------
+    nexus_def : str
+        Name of nexus definition e.g. NXmpes
+
+    Returns
+    -------
+    str
+        Returns full path of file e.g. <full_path>/NXmpes.nxdl.xml
+
+    Raises
+    ------
+    ValueError
+        Need correct definition name, e.g. NXmpes not NXmpes.nxdl.xml
+    """
+    definition_path = get_nexus_definitions_path()
+
+    def_path = os.path.join(definition_path,
+                            'contributed_definitions',
+                            f"{nexus_def}.nxdl.xml")
+    if os.path.exists(def_path):
+        return def_path
+
+    def_path = os.path.join(definition_path,
+                            'base_definitions',
+                            f"{nexus_def}.nxdl.xml")
+
+    if os.path.exists(def_path):
+        return def_path
+
+    def_path = os.path.join(definition_path,
+                            'applications',
+                            f"{nexus_def}.nxdl.xml")
+    if os.path.exists(def_path):
+        return def_path
+
+    raise ValueError("Incorrect definition is rendered, try with correct definition name.")
+
+
+def get_empty_template(nexus_def: str) -> Template:
+    """Generate eln in yaml file.
+
+    Parameters
+    ----------
+    nexus_def : str
+        Name of NeXus definition e.g. NXmpes
+
+    Return
+    ------
+        Template
+    """
+
+    nxdl_file = retrieve_nxdl_file(nexus_def)
+    nxdl_root = ET.parse(nxdl_file).getroot()
+    template = Template()
+    generate_template_from_nxdl(nxdl_root, template)
+
+    return template
+
+
+def take_care_of_special_concepts(key: str):
+    """For some special concepts such as @units."""
+    def unit_concept():
+        return {'value': None,
+                'unit': None}
+
+    if key == '@units':
+        return unit_concept()
+
+
+def get_recursive_dict(concatenated_key: str,
+                       recursive_dict: Dict[str, Any],
+                       level_to_skip: int) -> None:
+    """Get recursive dict for concatenated string of keys.
+
+    Parameters
+    ----------
+    concatenated_key : str
+        String of keys separated by slash
+    recursive_dict : dict
+        Dict to recursively stroring data.
+    level_to_skip : int
+        Integer to skip the level of hierarchical level
+    """
+    # splitig keys like: '/entry[ENTRY]/position[POSITION]/xx'.
+    # skiping the first empty '' and top parts as directed by users.
+    key_li = concatenated_key.split('/')[level_to_skip + 1:]
+    # list of key for special consideration
+    sp_key_li = ['@units']
+    last_key = ""
+    last_dict = {}
+    for key in key_li:
+        if '[' in key and '/' not in key:
+            key = re.findall(r'\[(.*?)\]', key,)[0].capitalize()
+        if not key:
+            continue
+        last_key = key
+        last_dict = recursive_dict
+        if key in recursive_dict:
+            if recursive_dict[key] is None:
+                recursive_dict[key] = {}
+                recursive_dict = recursive_dict[key]
+
+            else:
+                if key in sp_key_li:
+                    recursive_dict.update(take_care_of_special_concepts(key))
+                else:
+                    recursive_dict = recursive_dict[key]
+        else:
+            if key in sp_key_li:
+                recursive_dict.update(take_care_of_special_concepts(key))
+            else:
+                recursive_dict[key] = {}
+                recursive_dict = recursive_dict[key]
+    # For special key cleaning parts occurs inside take_care_of_special_concepts func.
+    if last_key not in sp_key_li:
+        last_dict[last_key] = None
+
+
+def generate_eln(nexus_def: str, eln_file: str = '', level_to_skip: int = 1) -> None:
+    """Genrate eln from application definition.
+
+    Parameters
+    ----------
+    nexus_def : str
+        _description_
+    eln_file : str
+        _description_
+
+    Returns:
+        None
+    """
+
+    template = get_empty_template(nexus_def)
+    recursive_dict: Dict[str, Any] = {}
+    for key, _ in template.items():
+        get_recursive_dict(key, recursive_dict, level_to_skip)
+
+    name_split = eln_file.rsplit('.')
+    if not eln_file:
+        if nexus_def[0:2] == 'NX':
+            raw_name = nexus_def[2:]
+            eln_file = raw_name + '.yaml'
+
+    elif len(name_split) == 1:
+        eln_file = eln_file + '.yaml'
+
+    elif len(name_split) == 2 and name_split[1] == 'yaml':
+        pass
+    else:
+        raise ValueError("Eln file should come with 'yaml' extension or without extension.")
+
+    with open(eln_file, encoding='utf-8', mode='w') as eln_f:
+        yaml.dump(recursive_dict, sort_keys=False, stream=eln_f)
diff --git a/pynxtools/eln_mapper/eln_mapper.py b/pynxtools/eln_mapper/eln_mapper.py
new file mode 100644
index 000000000..d23918f73
--- /dev/null
+++ b/pynxtools/eln_mapper/eln_mapper.py
@@ -0,0 +1,75 @@
+"""This module Generate ELN in a hierarchical format according to NEXUS definition."""
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import click
+from pynxtools.eln_mapper.eln import generate_eln
+from pynxtools.eln_mapper.scheme_eln import generate_scheme_eln
+
+
+@click.command()
+@click.option(
+    '--nxdl',
+    required=True,
+    help="Name of NeXus definition without extension (.nxdl.xml)."
+)
+@click.option(
+    '--skip-top-levels',
+    default=1,
+    required=False,
+    type=int,
+    show_default=True,
+    help=("To skip the level of parent hierarchy level. E.g. for default 1 the part"
+          "Entry[ENTRY] from /Entry[ENTRY]/Instrument[INSTRUMENT]/... will be skiped.")
+)
+@click.option(
+    '--output-file',
+    required=False,
+    default='eln_data',
+    help=('Name of file that is neede to generated output file.')
+)
+@click.option(
+    '--eln-type',
+    required=True,
+    type=click.Choice(['eln', 'scheme_eln'], case_sensitive=False),
+    default='eln'
+)
+def get_eln(nxdl: str,
+            skip_top_levels: int,
+            output_file: str,
+            eln_type: str):
+    """To generate ELN in yaml file format.
+
+    Parameters
+    ----------
+
+        nxdl : str
+            Name of NeXus definition e.g. NXmpes
+        skip_top_levels : int
+            To skip hierarchical levels
+        output_file : str
+            Name of the output file.
+    """
+    eln_type = eln_type.lower()
+    if eln_type == 'eln':
+        generate_eln(nxdl, output_file, skip_top_levels)
+    elif eln_type == 'scheme_eln':
+        generate_scheme_eln(nxdl, eln_file_name=output_file)
+
+
+if __name__ == "__main__":
+    get_eln().parse()  # pylint: disable=no-value-for-parameter
diff --git a/pynxtools/eln_mapper/scheme_eln.py b/pynxtools/eln_mapper/scheme_eln.py
new file mode 100644
index 000000000..1152bbd08
--- /dev/null
+++ b/pynxtools/eln_mapper/scheme_eln.py
@@ -0,0 +1,281 @@
+"""This module intended to generate schema eln which usually randeredto NOMAD."""
+
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import Dict, Any
+import xml.etree.ElementTree as ET
+import yaml
+from pynxtools.eln_mapper.eln import retrieve_nxdl_file
+from pynxtools.dataconverter.helpers import remove_namespace_from_tag
+
+
+NEXUS_TYPE_TO_NUMPY_TYPE = {'NX_CHAR': {'convert_typ': 'str',
+                                        'component_nm': 'StringEditQuantity',
+                                        'default_unit_display': '<No Default unit>'},
+                            'NX_BOOLEAN': {'convert_typ': 'bool',
+                                           'component_nm': 'BoolEditQuantity',
+                                           'default_unit_display': '<No Default unit>'},
+                            'NX_DATE_TIME': {'convert_typ': 'Datetime',
+                                             'component_nm': 'DateTimeEditQuantity',
+                                             'default_unit_display': '<No Default unit>'},
+                            'NX_FLOAT': {'convert_typ': 'np.float64',
+                                         'component_nm': 'NumberEditQuantity',
+                                         'default_unit_display': '<No Default unit>'},
+                            'NX_INT': {'convert_typ': 'int',
+                                       'component_nm': 'NumberEditQuantity',
+                                       'default_unit_display': '<No Default unit>'},
+                            'NX_NUMBER': {'convert_typ': 'np.float64',
+                                          'component_nm': 'NumberEditQuantity',
+                                          'default_unit_display': '<No Default unit>'},
+                            '<NO FILED TYPE>': {'convert_typ': '<NO FILED TYPE>',
+                                                'component_nm': '<No Edit Quantity>',
+                                                'default_unit_display': '<No Default unit>'},
+                            }
+
+
+def construct_field_structure(fld_elem, quntities_dict):
+    """Construct field structure such as unit, value.
+    Parameters
+    ----------
+    elem : _type_
+        _description_
+    quntities_dict : _type_
+        _description_
+    """
+    elm_attr = fld_elem.attrib
+    fld_nm = elm_attr['name'].lower()
+    quntities_dict[fld_nm] = {}
+    fld_dict = quntities_dict[fld_nm]
+
+    # handle type
+    if 'type' in elm_attr:
+        nx_fld_typ = elm_attr['type']
+    else:
+        nx_fld_typ = 'NX_CHAR'
+
+    if nx_fld_typ in NEXUS_TYPE_TO_NUMPY_TYPE:
+        cov_fld_typ = NEXUS_TYPE_TO_NUMPY_TYPE[nx_fld_typ]['convert_typ']
+
+    fld_dict['type'] = cov_fld_typ
+    if 'units' in elm_attr:
+        fld_dict['unit'] = f"<hint: {elm_attr['units']}>"
+        fld_dict['value'] = "<ADD default value>"
+
+    # handle m_annotation
+    m_annotation = {'m_annotations': {'eln':
+                                      {'component':
+                                       NEXUS_TYPE_TO_NUMPY_TYPE[nx_fld_typ]['component_nm'],
+                                       'defaultDisplayUnit':
+                                       (NEXUS_TYPE_TO_NUMPY_TYPE[nx_fld_typ]
+                                        ['default_unit_display'])}}}
+    fld_dict.update(m_annotation)
+
+    # handle description
+    construct_decription(fld_elem, fld_dict)
+
+
+def construct_decription(elm: ET.Element, concept_dict: Dict) -> None:
+    """Collect doc from concept doc.
+    """
+    desc_text = ''
+    for child_elm in elm:
+        tag = remove_namespace_from_tag(child_elm.tag)
+        if tag == 'doc':
+            desc_text = child_elm.text
+            desc_text = ' '.join([x.strip() for x in desc_text.split('\n')])
+            break
+
+    concept_dict['description'] = desc_text
+
+
+def construct_group_structure(grp_elm: ET.Element, subsections: Dict) -> None:
+    """To construct group structure as follows:
+    <group_name>:
+        section:
+            m_annotations:
+                eln:
+                    overview: true
+
+    Parameters
+    ----------
+    elm : ET.Element
+        Group element
+    subsections : Dict
+        Dict to include group recursively
+    """
+
+    default_m_annot = {'m_annotations': {'eln': {'overview': True}}}
+
+    elm_attrib = grp_elm.attrib
+    grp_desig = ""
+    if 'name' in elm_attrib:
+        grp_desig = elm_attrib['name'].capitalize()
+    elif 'type' in elm_attrib:
+        grp_desig = elm_attrib['type'][2:].capitalize()
+
+    subsections[grp_desig] = {}
+    grp_dict = subsections[grp_desig]
+
+    # add setion in group
+    grp_dict['section'] = {}
+    section = grp_dict['section']
+    section.update(default_m_annot)
+
+    # pass the grp elment for recursive search
+    scan_xml_element_recursively(grp_elm, section)
+
+
+def _should_skip_iteration(elm: ET.Element) -> bool:
+    """Define some elements here that should be skipped.
+
+    Parameters
+    ----------
+    elm : ET.Element
+        The element to investigate to skip
+    """
+    attr = elm.attrib
+    elm_type = ''
+    if 'type' in attr:
+        elm_type = attr['type']
+        if elm_type in ['NXentry']:
+            return True
+    return False
+
+
+def scan_xml_element_recursively(nxdl_element: ET.Element,
+                                 recursive_dict: Dict,
+                                 root_name: str = "",
+                                 reader_name: str = '<READER_NAME>',
+                                 is_root: bool = False) -> None:
+    """Scan xml elements, and pass the element to the type of element handaler.
+
+    Parameters
+    ----------
+    nxdl_element : ET.Element
+        This xml element that will be scanned through the descendants.
+    recursive_dict : Dict
+        A dict that store hierarchical structure of scheme eln.
+    root_name : str, optional
+        Name of root that user want to see to name their application, e.g. MPES,
+        by default 'ROOT_NAME'
+    reader_name : Prefered name of the reader.
+    is_root : bool, optional
+        Declar the elment as root or not, by default False
+    """
+
+    if is_root:
+        # Note for later: crate a new function to handle root part
+        nxdl = 'NX<NAME>.nxdl'
+        recursive_dict[root_name] = {'base_sections':
+                                     ['nomad.datamodel.metainfo.eln.NexusDataConverter',
+                                      'nomad.datamodel.data.EntryData']}
+
+        m_annotations: Dict = {'m_annotations': {'template': {'reader': reader_name,
+                                                              'nxdl': nxdl},
+                                                 'eln': {'hide': []}}}
+
+        recursive_dict[root_name].update(m_annotations)
+
+        recursive_dict = recursive_dict[root_name]
+
+    # Define quantities for taking care of field
+    quantities: Dict = None
+    subsections: Dict = None
+    for elm in nxdl_element:
+        tag = remove_namespace_from_tag(elm.tag)
+        # To skip NXentry group but only consider the child elments
+        if _should_skip_iteration(elm):
+            scan_xml_element_recursively(elm, recursive_dict)
+            continue
+        if tag == 'field':
+            if quantities is None:
+                recursive_dict['quantities'] = {}
+                quantities = recursive_dict['quantities']
+            construct_field_structure(elm, quantities)
+        if tag == 'group':
+            if subsections is None:
+                recursive_dict['sub_sections'] = {}
+                subsections = recursive_dict['sub_sections']
+            construct_group_structure(elm, subsections)
+
+
+def get_eln_recursive_dict(recursive_dict: Dict, nexus_full_file: str) -> None:
+    """Develop a recursive dict that has hierarchical structure of scheme eln.
+
+    Parameters
+    ----------
+    recursive_dict : Dict
+        A dict that store hierarchical structure of scheme eln.
+    nexus_full_file : str
+        Full path of NeXus file e.g. <full_path>/paNXmpes.nxdl.xml
+    """
+
+    nxdl_root = ET.parse(nexus_full_file).getroot()
+    root_name = nxdl_root.attrib['name'][2:] if 'name' in nxdl_root.attrib else "<ROOT_NAME>"
+    recursive_dict['definitions'] = {'name': '<ADD PREFERED NAME>',
+                                     'sections': {}}
+    sections = recursive_dict['definitions']['sections']
+
+    scan_xml_element_recursively(nxdl_root, sections,
+                                 root_name=root_name, is_root=True)
+
+
+def generate_scheme_eln(nexus_def: str, eln_file_name: str = None) -> None:
+    """Generate schema eln that should go to Nomad while running the reader.
+    The output file will be <eln_file_name>.scheme.archive.yaml
+
+    Parameters
+    ----------
+    nexus_def : str
+        Name of nexus definition e.g. NXmpes
+    eln_file_name : str
+        Name of output file e.g. mpes
+
+    Returns:
+        None
+    """
+
+    file_parts: list = []
+    out_file_ext = 'scheme.archive.yaml'
+    raw_name = ""
+    out_file = ""
+
+    nxdl_file = retrieve_nxdl_file(nexus_def)
+
+    if eln_file_name is None:
+        # raw_name from e.g. /<path>/NXmpes.nxdl.xml
+        raw_name = nxdl_file.split('/')[-1].split('.')[0][2:]
+        out_file = '.'.join([raw_name, out_file_ext])
+    else:
+        file_parts = eln_file_name.split('.')
+        if len(file_parts) == 1:
+            raw_name = file_parts[0]
+            out_file = '.'.join([raw_name, out_file_ext])
+        elif len(file_parts) == 4 and '.'.join(file_parts[1:]) == out_file_ext:
+            out_file = eln_file_name
+        elif nexus_def[0:2] == 'NX':
+            raw_name = nexus_def[2:]
+            out_file = '.'.join([raw_name, out_file_ext])
+        else:
+            raise ValueError("Check for correct NeXus definition and output file name.")
+
+    recursive_dict: Dict[str, Any] = {}
+    get_eln_recursive_dict(recursive_dict, nxdl_file)
+
+    with open(out_file, mode='w', encoding='utf-8') as out_f:
+        yaml.dump(recursive_dict, sort_keys=False, stream=out_f)
diff --git a/pynxtools/nexus/nexus.py b/pynxtools/nexus/nexus.py
index 9afa711fb..ef5f64cd5 100644
--- a/pynxtools/nexus/nexus.py
+++ b/pynxtools/nexus/nexus.py
@@ -258,8 +258,9 @@ def get_hdf_path(hdf_info):
     return hdf_info['hdf_node'].name.split('/')[1:]
 
 
+# pylint: disable=too-many-arguments,too-many-locals
 @lru_cache(maxsize=None)
-def get_inherited_hdf_nodes(nx_name: str = None, elem: ET.Element = None,  # pylint: disable=too-many-arguments,too-many-locals
+def get_inherited_hdf_nodes(nx_name: str = None, elem: ET.Element = None,
                             hdf_node=None, hdf_path=None, hdf_root=None, attr=False):
     """Returns a list of ET.Element for the given path."""
     # let us start with the given definition file
@@ -563,8 +564,11 @@ def hdf_node_to_self_concept_path(hdf_info, logger):
 
 class HandleNexus:
     """documentation"""
+
+    # pylint: disable=too-many-instance-attributes
     def __init__(self, logger, nexus_file,
-                 d_inq_nd=None, c_inq_nd=None):
+                 d_inq_nd=None, c_inq_nd=None,
+                 is_in_memory_file=False):
         self.logger = logger
         local_dir = os.path.abspath(os.path.dirname(__file__))
 
@@ -572,6 +576,7 @@ def __init__(self, logger, nexus_file,
             os.path.join(local_dir, '../../tests/data/nexus/201805_WSe2_arpes.nxs')
         self.parser = None
         self.in_file = None
+        self.is_hdf5_file_obj = is_in_memory_file
         self.d_inq_nd = d_inq_nd
         self.c_inq_nd = c_inq_nd
         # Aggregating hdf path corresponds to concept query node
@@ -638,19 +643,28 @@ def full_visit(self, root, hdf_node, name, func):
     def process_nexus_master_file(self, parser):
         """Process a nexus master file by processing all its nodes and their attributes"""
         self.parser = parser
-        self.in_file = h5py.File(
-            self.input_file_name[0]
-            if isinstance(self.input_file_name, list)
-            else self.input_file_name, 'r'
-        )
-        self.full_visit(self.in_file, self.in_file, '', self.visit_node)
-        if self.d_inq_nd is None and self.c_inq_nd is None:
-            get_default_plotable(self.in_file, self.logger)
-        # To log the provided concept and concepts founded
-        if self.c_inq_nd is not None:
-            for hdf_path in self.hdf_path_list_for_c_inq_nd:
-                self.logger.info(hdf_path)
-        self.in_file.close()
+        try:
+            if not self.is_hdf5_file_obj:
+                self.in_file = h5py.File(
+                    self.input_file_name[0]
+                    if isinstance(self.input_file_name, list)
+                    else self.input_file_name, 'r'
+                )
+            else:
+                self.in_file = self.input_file_name
+
+            self.full_visit(self.in_file, self.in_file, '', self.visit_node)
+
+            if self.d_inq_nd is None and self.c_inq_nd is None:
+                get_default_plotable(self.in_file, self.logger)
+            # To log the provided concept and concepts founded
+            if self.c_inq_nd is not None:
+                for hdf_path in self.hdf_path_list_for_c_inq_nd:
+                    self.logger.info(hdf_path)
+        finally:
+            # To test if hdf_file is open print(self.in_file.id.valid)
+            self.in_file.close()
+            # To test if hdf_file is open print(self.in_file.id.valid)
 
 
 @click.command()
diff --git a/pynxtools/nexus/nxdl_utils.py b/pynxtools/nexus/nxdl_utils.py
index 706390a7c..aa64d5caa 100644
--- a/pynxtools/nexus/nxdl_utils.py
+++ b/pynxtools/nexus/nxdl_utils.py
@@ -701,6 +701,9 @@ def get_node_at_nxdl_path(nxdl_path: str = None,
     we are looking for or the root elem from a previously loaded NXDL file
     and finds the corresponding XML element with the needed attributes."""
     try:
+        if nxdl_path.count("/") == 1 and nxdl_path not in ("/ENTRY", "/entry"):
+            elem = None
+            nx_name = "NXroot"
         (class_path, nxdlpath, elist) = get_inherited_nodes(nxdl_path, nx_name, elem)
     except ValueError as value_error:
         if exc:
diff --git a/pyproject.toml b/pyproject.toml
index 93917652e..d2c7853f0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,7 @@
 [build-system]
 requires = ["setuptools>=64.0.1", "setuptools-scm[toml]>=6.2"]
-build-backend = "setuptools.build_meta"
+backend-path = ["pynxtools"]
+build-backend = "_build_wrapper"
 
 [project]
 name = "pynxtools"
@@ -8,14 +9,15 @@ dynamic = ["version"]
 authors = [
     { name = "The NOMAD Authors" },
 ]
-description = "Extend NeXus for materials science experiment and serve as a NOMAD parser implementation for NeXus."
+description = "Extend NeXus for experiments and characterization in Materials Science and Materials Engineering and serve as a NOMAD parser implementation for NeXus."
 readme = "README.md"
-license = { file = "LICENSE.txt" }
-requires-python = ">=3.8,<3.11"
+license = { file = "LICENSE" }
+requires-python = ">=3.8,!=3.12"
 classifiers = [
     "Programming Language :: Python :: 3.8",
     "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
     "License :: OSI Approved :: Apache Software License",
     "Operating System :: OS Independent",
 ]
@@ -29,20 +31,23 @@ dependencies = [
     "ase>=3.19.0",
     "flatdict>=4.0.1",
     "hyperspy>=1.7.5",
-    "ifes_apt_tc_data_modeling>=0.0.9",
+    "ifes_apt_tc_data_modeling>=0.1",
     "gitpython>=3.1.24",
     "pytz>=2021.1",
     "kikuchipy>=0.9.0",
     "pyxem>=0.15.1",
     "zipfile37==0.1.3",
-    "nionswift==0.16.8",
+    "nionswift>=0.16.8",
     "tzlocal<=4.3",
     "scipy>=1.7.1",
     "lark>=1.1.5",
     "requests",
     "requests_cache",
+    "mergedeep"
 ]
 
+[options]
+install_requires = "importlib-metadata ; python_version < '3.10'"
 [project.urls]
 "Homepage" = "https://github.com/FAIRmat-NFDI/pynxtools"
 "Bug Tracker" = "https://github.com/FAIRmat-NFDI/pynxtools/issues"
@@ -66,6 +71,7 @@ dev = [
 read_nexus = "pynxtools.nexus.nexus:main"
 dataconverter = "pynxtools.dataconverter.convert:convert_cli"
 nyaml2nxdl = "pynxtools.nyaml2nxdl.nyaml2nxdl:launch_tool"
+generate_eln = "pynxtools.eln_mapper.eln_mapper:get_eln"
 
 [tool.setuptools.package-data]
 pynxtools = ["definitions/**/*.xml", "definitions/**/*.xsd"]
@@ -77,5 +83,5 @@ pynxtools = ["definitions/**/*.xml", "definitions/**/*.xsd"]
 exclude = ["pynxtools/definitions*"]
 
 [tool.setuptools_scm]
-version_scheme = "guess-next-dev"
+version_scheme = "no-guess-dev"
 local_scheme = "node-and-date"
diff --git a/tests/data/dataconverter/NXtest.nxdl.xml b/tests/data/dataconverter/NXtest.nxdl.xml
index a2cc553fa..f4aa0aab4 100644
--- a/tests/data/dataconverter/NXtest.nxdl.xml
+++ b/tests/data/dataconverter/NXtest.nxdl.xml
@@ -60,6 +60,9 @@
             <field name="optional_child" optional="true" type="NX_INT">
                 <doc>A dummy entry to test optional parent check for required child.</doc>
             </field>
+            <group type="NXdata" name="req_group_in_opt_group">
+                <doc>This is a required group in an optional group.</doc>
+            </group>
         </group>
     </group>
 </definition>
diff --git a/tests/data/dataconverter/readers/apm/nomad_oasis_eln_schema_for_nx_apm/nxapm.schema.archive.yaml b/tests/data/dataconverter/readers/apm/nomad_oasis_eln_schema_for_nx_apm/nxapm.schema.archive.yaml
index a750d3a80..ba4a00b3b 100644
--- a/tests/data/dataconverter/readers/apm/nomad_oasis_eln_schema_for_nx_apm/nxapm.schema.archive.yaml
+++ b/tests/data/dataconverter/readers/apm/nomad_oasis_eln_schema_for_nx_apm/nxapm.schema.archive.yaml
@@ -18,8 +18,7 @@ definitions:
         # This would be useful to make the default values set in `template` fixed.
         # Leave the hide key even if you want to pass an empty list like in this example.
         eln: 
-          # hide: ['nxdl', 'reader']
-          hide: []
+          hide: ['nxdl', 'reader']
       sub_sections:
         entry:
           section:
@@ -29,24 +28,6 @@ definitions:
               eln:
                 overview: true
             quantities:
-              attr_version:
-                type:
-                  type_kind: Enum
-                  type_data:
-                    - 'nexus-fairmat-proposal successor of 9636feecb79bb32b828b1a9804269573256d7696'
-                description: Hashvalue of the NeXus application definition file
-                m_annotations:
-                  eln:
-                   component: RadioEnumEditQuantity
-              definition:
-                type:
-                  type_kind: Enum
-                  type_data:
-                    - NXapm
-                description: NeXus NXDL schema to which this file conforms
-                m_annotations:
-                  eln:
-                    component: RadioEnumEditQuantity
               experiment_identifier:
                 type: str
                 description: GUID of the experiment
@@ -58,40 +39,31 @@ definitions:
                 description: Free text details about the experiment
                 m_annotations:
                   eln:
-                    component: StringEditQuantity
+                    component: RichTextEditQuantity
               start_time:
                 type: Datetime
-                description: ISO 8601 time code with local time zone offset to UTC when the experiment started.
+                description: |
+                  ISO 8601 time code with local time zone offset
+                  to UTC when the experiment started.
                 m_annotations:
                   eln:
                     component: DateTimeEditQuantity
               end_time:
                 type: Datetime
-                description: ISO 8601 time code with local time zone offset to UTC when the experiment ended.
+                description: |
+                  ISO 8601 time code with local time zone offset
+                  to UTC when the experiment ended.
                 m_annotations:
                   eln:
                     component: DateTimeEditQuantity
-              program:
-                type: str
-                description: Name of the program used to create this file.
-                m_annotations:
-                  eln:
-                    component: StringEditQuantity
-              program__attr_version:
-                type: str
-                description: Version plus build number, commit hash, or description of the program to support reproducibility.
-                m_annotations:
-                  eln:
-                    component: StringEditQuantity
               run_number:
                 type: str
-                description: Identifier in the instrument control software given for this experiment.
+                description: |
+                  Identifier in the instrument control software
+                  given for this experiment.
                 m_annotations:
                   eln:
                     component: StringEditQuantity
-              # experiment_documentation(NXnote):
-              # thumbnail(NXnote):
-              #   attr_type:
               operation_mode:
                 type:
                   type_kind: Enum
@@ -124,6 +96,173 @@ definitions:
               #   m_annotations:
               #     eln:
               #       component: FileEditQuantity
+
+        sample:
+          section:
+            description: |
+              Description of the sample from which the specimen was prepared or
+              site-specifically cut out using e.g. a focused-ion beam instrument.
+            m_annotations:
+              eln:
+            quantities:
+              composition:
+                type: str
+                shape: ['*']
+                description: |
+                  Chemical composition of the sample. The composition from e.g.
+                  a composition table can be added as individual strings.
+                  One string for each element with statements separated via a
+                  single space. The string is expected to have the following format:
+                  Symbol value unit +- stdev
+                  
+                  An example: B 1. +- 0.2, means
+                  composition of boron 1. at.-% +- 0.2 at.%.
+                  If a string contains only a symbol this is interpreted
+                  that the symbol specifies the matrix or remainder element
+                  for the composition table.
+                  
+                  If unit is omitted or named % this is interpreted as at.-%.
+                  Unit can be at% or wt% but all strings have to use either atom
+                  or weight percent but no mixtures.
+                  No unit for stdev should be repeated as it has to be the
+                  same unit as is used for the composition value.
+                m_annotations:
+                  eln:
+                    component: StringEditQuantity
+              grain_diameter: 
+                type: np.float64
+                unit: micrometer
+                description: |
+                  Qualitative information about the grain size, here specifically
+                  described as the equivalent spherical diameter of an assumed
+                  average grain size for the crystal ensemble.
+                m_annotations:
+                  eln:
+                    component: NumberEditQuantity
+                    minValue: 0.0
+                    defaultDisplayUnit: micrometer
+              grain_diameter_error:
+                type: np.float64
+                unit: micrometer
+                description: |
+                  Magnitude of the standard deviation to the grain_diameter.
+                m_annotations:
+                  eln:
+                    component: NumberEditQuantity
+                    minValue: 0.0
+                    defaultDisplayUnit: micrometer
+              heat_treatment_temperature:
+                type: np.float64
+                unit: kelvin
+                description: |
+                  The temperature of the last heat treatment step before quenching.
+                m_annotations:
+                  eln:
+                    component: NumberEditQuantity
+                    minValue: 0.0
+                    defaultDisplayUnit: kelvin
+              heat_treatment_temperature_error:
+                type: np.float64
+                unit: kelvin
+                description: |
+                  Magnitude of the standard deviation of the heat_treatment_temperature.
+                m_annotations:
+                  eln:
+                    component: NumberEditQuantity
+                    minValue: 0.0
+                    defaultDisplayUnit: kelvin
+              heat_treatment_quenching_rate:
+                type: np.float64
+                unit: kelvin/second
+                description: |
+                  Rate of the last quenching step.
+                m_annotations:
+                  eln:
+                    component: NumberEditQuantity
+                    minValue: 0.0
+                    defaultDisplayUnit: kelvin/second
+              heat_treatment_quenching_rate_error:
+                type: np.float64
+                unit: K/s
+                description: |
+                  Magnitude of the standard deviation of the heat_treatment_quenching_rate.
+                m_annotations:
+                  eln:
+                    component: NumberEditQuantity
+                    minValue: 0.0
+                    defaultDisplayUnit: K/s
+        specimen:
+          section:
+            description: |
+              Details about the specimen and its immediate environment.
+            m_annotations:
+              eln:
+            quantities:
+              name: 
+                type: str
+                description: |
+                  GUID which distinguishes the specimen from all others and especially 
+                  the predecessor/origin from where the specimen was cut.
+                  In cases where the specimen was e.g. site-specifically cut from
+                  samples or in cases of an instrument session during which multiple
+                  specimens are loaded, the name has to be descriptive enough to 
+                  resolve which specimen on e.g. the microtip array was taken.
+                  This field must not be used for an alias of the specimen.
+                  Instead, use short_title.
+                m_annotations:
+                  eln:
+                    component: StringEditQuantity
+              # sample_history:
+              #   type: str
+              #   description: |
+              #     Reference to the location of or a GUID providing as many details
+              #     as possible of the material, its microstructure, and its 
+              #     thermo-chemo-mechanical processing/preparation history.
+              #   m_annotations:
+              #     eln:
+              #       component: StringEditQuantity
+              preparation_date:
+                type: Datetime
+                description: |
+                  ISO 8601 time code with local time zone offset to UTC
+                  when the measured specimen surface was prepared last time.
+                m_annotations:
+                  eln:
+                    component: DateTimeEditQuantity
+              is_polycrystalline:
+                type: bool
+                description: |
+                  Is the specimen, i.e. the tip, polycrystalline, i.e. does
+                  it includes a grain or phase boundary?
+                m_annotations:
+                  eln:
+                    component: BoolEditQuantity
+              alias:
+                type: str
+                description: |
+                  Possibility to give an abbreviation of the specimen name field.
+                m_annotations:
+                  eln:
+                    component: StringEditQuantity
+              # atom_types should be a list of strings
+              # atom_types:
+              #   type: str
+              #   shape: ['*']
+              #   description: |
+              #     Use Hill's system for listing elements of the periodic table which
+              #     are inside or attached to the surface of the specimen and thus
+              #     relevant from a scientific point of view.
+              #   m_annotations:
+              #     eln:
+              #       component: StringEditQuantity
+              description:
+                type: str
+                description: |
+                  Discouraged free text field to be used in the case when properly
+                  designed records for the sample_history are not available.
+                m_annotations:
+                  eln:
+                    component: RichTextEditQuantity
         user:
           repeats: true
           section:
@@ -193,102 +332,6 @@ definitions:
                 m_annotations:
                   eln:
                     component: StringEditQuantity
-        specimen:
-          section:
-            description: |
-              Details about the specimen and its immediate environment.
-            m_annotations:
-              eln:
-            quantities:
-              name: 
-                type: str
-                description: |
-                  GUID which distinguishes the specimen from all others and especially 
-                  the predecessor/origin from where the specimen was cut.
-                  In cases where the specimen was e.g. site-specifically cut from
-                  samples or in cases of an instrument session during which multiple
-                  specimens are loaded, the name has to be descriptive enough to 
-                  resolve which specimen on e.g. the microtip array was taken.
-                  This field must not be used for an alias of the specimen.
-                  Instead, use short_title.
-                m_annotations:
-                  eln:
-                    component: StringEditQuantity
-              sample_history:
-                type: str
-                description: |
-                  Reference to the location of or a GUID providing as many details
-                  as possible of the material, its microstructure, and its 
-                  thermo-chemo-mechanical processing/preparation history.
-                m_annotations:
-                  eln:
-                    component: StringEditQuantity
-              preparation_date:
-                type: Datetime
-                description: |
-                  ISO 8601 time code with local time zone offset to UTC information when
-                  the measured specimen surface was actively prepared.
-                m_annotations:
-                  eln:
-                    component: DateTimeEditQuantity
-              short_title:
-                type: str
-                description: Possibility to give an abbreviation of the specimen name field.
-                m_annotations:
-                  eln:
-                    component: StringEditQuantity
-              # atom_types should be a list of strings
-              atom_types:
-                type: str
-                shape: ['*']
-                description: |
-                  Use Hill's system for listing elements of the periodic table which
-                  are inside or attached to the surface of the specimen and thus
-                  relevant from a scientific point of view.
-                m_annotations:
-                  eln:
-                    component: StringEditQuantity
-              description:
-                type: str
-                description: |
-                  Discouraged free text field to be used in the case when properly
-                  designed records for the sample_history are not available.
-                m_annotations:
-                  eln:
-                    component: StringEditQuantity
-              # composition_element_symbol:
-              #   type: str
-              #   shape: ['*']
-              #   description: |
-              #     Chemical symbol.
-              #   m_annotations:
-              #     eln:
-              #       component: StringEditQuantity
-              # composition_mass_fraction:
-              #   type: np.float64
-              #   shape: ['*']
-              #   description: |
-              #     Composition but this can be atomic or mass fraction.
-              #     Best is you specify which you want. Under the hood oasis uses pint
-              #     /nomad/nomad/units is the place where you can predefine exotic
-              #     constants and units for a local oasis instance
-              #   m_annotations:
-              #     eln:
-              #       component: NumberEditQuantity
-              #       minValue: 0.
-              #       maxValue: 1.
-              # composition_mass_fraction_error:
-              #   type: np.float64
-              #   shape: ['*']
-              #   description: |
-              #     Composition but this can be atomic or mass fraction.
-              #     Also here best to be specific. If people write at.-% but mean wt.-% you
-              #     cannot guard yourself against this
-              #   m_annotations:
-              #     eln:
-              #       component: NumberEditQuantity
-              #       minValue: 0.
-              #       maxValue: 1.
         atom_probe:
           section:
             description: |
@@ -302,6 +345,7 @@ definitions:
                   type_data:
                     - success
                     - failure
+                    - unknown
                 description: |
                   A statement whether the measurement was
                   successful or failed prematurely.
@@ -314,6 +358,14 @@ definitions:
                 m_annotations:
                   eln:
                     component: StringEditQuantity
+              location:
+                type: str
+                description: |
+                  Location of the lab or place where the instrument is installed.
+                  Using GEOREF is preferred.
+                m_annotations:
+                  eln:
+                    component: StringEditQuantity
               # (NXfabrication):
               flight_path_length:
                 type: np.float64
@@ -327,6 +379,18 @@ definitions:
                     defaultDisplayUnit: meter
                     minValue: 0.0
                     maxValue: 10.0
+              field_of_view:
+                type: np.float64
+                unit: nanometer
+                description: |
+                  The nominal diameter of the specimen ROI which is measured in the
+                  experiment. Physically, the specimen cannot be measured completely
+                  because ions may launch but not become detected or hit elsewhere.
+                m_annotations:
+                  eln:
+                    component: NumberEditQuantity
+                    defaultDisplayUnit: nanometer
+                    minValue: 0.0
               fabrication_vendor:
                 type: str
                 description: Name of the manufacturer/company, i.e. AMETEK/Cameca.
@@ -415,7 +479,7 @@ definitions:
                     component: NumberEditQuantity
                     defaultDisplayUnit: kelvin
                     minValue: 0.0
-                    maxValue: 273.15
+                    maxValue: 300.0
               analysis_chamber_pressure:
                 type: np.float64
                 unit: torr
@@ -485,8 +549,8 @@ definitions:
                         type_kind: Enum
                         type_data:
                           - laser
-                          - high_voltage
-                          - laser_and_high_voltage
+                          - voltage
+                          - laser_and_voltage
                       description: |
                         Which pulsing mode was used?
                       m_annotations:
@@ -510,41 +574,53 @@ definitions:
                           component: NumberEditQuantity
                           minValue: 0.0
                           maxValue: 1.0
-                    laser_source_name:
-                      type: str
-                      description: Given name/alias.
-                      m_annotations:
-                        eln:
-                          component: StringEditQuantity
-                    laser_source_wavelength:
-                      type: np.float64
-                      unit: meter
-                      description: Nominal wavelength of the laser radiation.
-                      m_annotations:
-                        eln:
-                          component: NumberEditQuantity
-                          defaultDisplayUnit: nanometer
-                          minValue: 0.0
-                    laser_source_power:
-                      type: np.float64
-                      unit: watt
-                      description: |
-                        Nominal power of the laser source while
-                        illuminating the specimen.
-                      m_annotations:
-                        eln:
-                          component: NumberEditQuantity
-                          defaultDisplayUnit: nanowatt
-                          minValue: 0.0
-                    laser_source_pulse_energy:
-                      type: np.float64
-                      unit: joule
-                      description: Average energy of the laser at peak of each pulse.
-                      m_annotations:
-                        eln:
-                          component: NumberEditQuantity
-                          defaultDisplayUnit: picojoule
-                          minValue: 0.0
+                    # LEAP 6000 instrument has up to two lasers
+                  sub_sections:
+                    laser_source:
+                      repeats: True
+                      section:
+                        description: |
+                          Details about each laser pulsing unit.
+                          LEAP6000 instruments can use up to two lasers.
+                        m_annotations:
+                          eln:
+                        quantities:
+                          name:
+                            type: str
+                            description: Given name/alias.
+                            m_annotations:
+                              eln:
+                                component: StringEditQuantity
+                          wavelength:
+                            type: np.float64
+                            unit: nanometer
+                            description: Nominal wavelength of the laser radiation.
+                            m_annotations:
+                             eln:
+                               component: NumberEditQuantity
+                               defaultDisplayUnit: nanometer
+                               minValue: 0.0
+                          power:
+                            type: np.float64
+                            unit: nanowatt
+                            description: |
+                              Nominal power of the laser source while
+                              illuminating the specimen.
+                            m_annotations:
+                              eln:
+                                component: NumberEditQuantity
+                                defaultDisplayUnit: nanowatt
+                                minValue: 0.0
+                          pulse_energy:
+                            type: np.float64
+                            unit: picojoule
+                            description: |
+                              Average energy of the laser at peak of each pulse.
+                            m_annotations:
+                              eln:
+                                component: NumberEditQuantity
+                                defaultDisplayUnit: picojoule
+                                minValue: 0.0
         # control_software:
         #   section:
         #     description: Which control software was used e.g. IVAS/APSuite
diff --git a/tests/data/dataconverter/readers/ellips/eln_data.yaml b/tests/data/dataconverter/readers/ellips/eln_data.yaml
index 70b708ef3..785e8e1e6 100644
--- a/tests/data/dataconverter/readers/ellips/eln_data.yaml
+++ b/tests/data/dataconverter/readers/ellips/eln_data.yaml
@@ -58,9 +58,6 @@ colnames:
 - Delta
 - err.Psi
 - err.Delta
-definition: NXellipsometry
-definition/@url: https://github.com/FAIRmat-NFDI/nexus_definitions/blob/fairmat/contributed_definitions/NXellipsometry.nxdl.xml
-definition/@version: 0.0.2
 derived_parameter_type: depolarization
 experiment_description: RC2 scan on 2nm SiO2 on Si in air
 experiment_identifier: exp-ID
diff --git a/tests/data/dataconverter/readers/json_map/data.json b/tests/data/dataconverter/readers/json_map/data.json
index 28fb71b48..ae0cf6c88 100644
--- a/tests/data/dataconverter/readers/json_map/data.json
+++ b/tests/data/dataconverter/readers/json_map/data.json
@@ -17,5 +17,6 @@
     "type": "2nd type",
     "date_value": "2022-01-22T12:14:12.05018+00:00",
     "required_child": 1,
-    "optional_child": 1
+    "optional_child": 1,
+    "random_data": [0, 1]
 }
\ No newline at end of file
diff --git a/tests/data/dataconverter/readers/json_map/data.mapping.json b/tests/data/dataconverter/readers/json_map/data.mapping.json
index 5fc7b95c5..055b0977e 100644
--- a/tests/data/dataconverter/readers/json_map/data.mapping.json
+++ b/tests/data/dataconverter/readers/json_map/data.mapping.json
@@ -18,5 +18,6 @@
     "/ENTRY[entry]/optional_parent/required_child": "/required_child",
     "/ENTRY[entry]/program_name": "Example for listing exact data in the map file: Nexus Parser",
     "/ENTRY[entry]/required_group/description": "An example description",
-    "/ENTRY[entry]/required_group2/description": "An example description"
+    "/ENTRY[entry]/required_group2/description": "An example description",
+    "/ENTRY[entry]/optional_parent/req_group_in_opt_group/DATA[data]": "/random_data"
 }
\ No newline at end of file
diff --git a/tests/data/dataconverter/readers/mpes/Ref_nexus_mpes.log b/tests/data/dataconverter/readers/mpes/Ref_nexus_mpes.log
index 35c7fb42f..d4a58e2ee 100644
--- a/tests/data/dataconverter/readers/mpes/Ref_nexus_mpes.log
+++ b/tests/data/dataconverter/readers/mpes/Ref_nexus_mpes.log
@@ -8,12 +8,13 @@ DEBUG - documentation (NXmpes.nxdl.xml:/ENTRY):
 DEBUG - 
 DEBUG - documentation (NXentry.nxdl.xml:):
 DEBUG - 
-		(**required**) :ref:`NXentry` describes the measurement.
-
-		The top-level NeXus group which contains all the data and associated
-		information that comprise a single measurement.
-		It is mandatory that there is at least one
-		group of this type in the NeXus file.	
+         (**required**) :ref:`NXentry` describes the measurement.
+         
+         The top-level NeXus group which contains all the data and associated
+         information that comprise a single measurement.
+         It is mandatory that there is at least one
+         group of this type in the NeXus file.
+    
 DEBUG - ===== ATTRS (//entry@NX_class)
 DEBUG - value: NXentry 
 DEBUG - classpath: ['NXentry']
@@ -32,23 +33,23 @@ DEBUG - NXmpes.nxdl.xml:/ENTRY@default - [NX_CHAR]
 DEBUG - NXentry.nxdl.xml:@default - [NX_CHAR]
 DEBUG - documentation (NXentry.nxdl.xml:/default):
 DEBUG - 
-			.. index:: find the default plottable data
-			.. index:: plotting
-			.. index:: default attribute value
-
-			Declares which :ref:`NXdata` group contains the data
-			to be shown by default.
-			It is used to resolve ambiguity when
-			one :ref:`NXdata` group exists.
-			The value :ref:`names <validItemName>` a child group.  If that group
-			itself has a ``default`` attribute, continue this chain until an
-			:ref:`NXdata` group is reached.
-
-			For more information about how NeXus identifies the default
-			plottable data, see the
-			:ref:`Find Plottable Data, v3 <Find-Plottable-Data-v3>`
-			section.
-		
+             .. index:: find the default plottable data
+             .. index:: plotting
+             .. index:: default attribute value
+             
+             Declares which :ref:`NXdata` group contains the data
+             to be shown by default.
+             It is used to resolve ambiguity when
+             one :ref:`NXdata` group exists.
+             The value :ref:`names <validItemName>` a child group.  If that group
+             itself has a ``default`` attribute, continue this chain until an
+             :ref:`NXdata` group is reached.
+             
+             For more information about how NeXus identifies the default
+             plottable data, see the
+             :ref:`Find Plottable Data, v3 <Find-Plottable-Data-v3>`
+             section.
+        
 DEBUG - ===== FIELD (//entry/collection_time): <HDF5 dataset "collection_time": shape (), type "<f8">
 DEBUG - value: 2317.343 
 DEBUG - classpath: ['NXentry', 'NX_FLOAT']
@@ -57,9 +58,9 @@ NXentry.nxdl.xml:/collection_time
 DEBUG - <<OPTIONAL>>
 DEBUG - documentation (NXentry.nxdl.xml:/collection_time):
 DEBUG - 
-			Time transpired actually collecting data i.e. taking out time when collection was
-			suspended due to e.g. temperature out of range
-		
+             Time transpired actually collecting data i.e. taking out time when collection was
+             suspended due to e.g. temperature out of range
+        
 DEBUG - ===== ATTRS (//entry/collection_time@units)
 DEBUG - value: s 
 DEBUG - classpath: ['NXentry', 'NX_FLOAT']
@@ -77,34 +78,33 @@ DEBUG - documentation (NXmpes.nxdl.xml:/ENTRY/DATA):
 DEBUG - 
 DEBUG - documentation (NXentry.nxdl.xml:/DATA):
 DEBUG - 
-			The data group
-
-			.. note:: Before the NIAC2016 meeting [#]_, at least one
-			   :ref:`NXdata` group was required in each :ref:`NXentry` group.
-			   At the NIAC2016 meeting, it was decided to make :ref:`NXdata`
-			   an optional group in :ref:`NXentry` groups for data files that
-			   do not use an application definition.
-			   It is recommended strongly that all NeXus data files provide
-			   a NXdata group.
-			   It is permissable to omit the NXdata group only when
-			   defining the default plot is not practical or possible
-			   from the available data.
-
-			   For example, neutron event data may not have anything that
-			   makes a useful plot without extensive processing.
-
-			   Certain application definitions override this decision and
-			   require an :ref:`NXdata` group
-			   in the :ref:`NXentry` group.  The ``minOccurs=0`` attribute
-			   in the application definition will indicate the
-			   :ref:`NXdata` group
-			   is optional, otherwise, it is required.
-
-			   .. [#] NIAC2016:
-			      https://www.nexusformat.org/NIAC2016.html,
-			      https://github.com/nexusformat/NIAC/issues/16
-
-		
+             The data group
+             
+             .. note:: Before the NIAC2016 meeting [#]_, at least one
+                :ref:`NXdata` group was required in each :ref:`NXentry` group.
+                At the NIAC2016 meeting, it was decided to make :ref:`NXdata`
+                an optional group in :ref:`NXentry` groups for data files that
+                do not use an application definition.
+                It is recommended strongly that all NeXus data files provide
+                a NXdata group.
+                It is permissable to omit the NXdata group only when
+                defining the default plot is not practical or possible
+                from the available data.
+             
+                For example, neutron event data may not have anything that
+                makes a useful plot without extensive processing.
+             
+                Certain application definitions override this decision and
+                require an :ref:`NXdata` group
+                in the :ref:`NXentry` group.  The ``minOccurs=0`` attribute
+                in the application definition will indicate the
+                :ref:`NXdata` group
+                is optional, otherwise, it is required.
+             
+                .. [#] NIAC2016:
+             	  https://www.nexusformat.org/NIAC2016.html,
+             	  https://github.com/nexusformat/NIAC/issues/16
+        
 DEBUG - documentation (NXdata.nxdl.xml:):
 DEBUG - 
 		:ref:`NXdata` describes the plottable data and related dimension scales. 
@@ -466,21 +466,21 @@ DEBUG - documentation (NXmpes.nxdl.xml:/ENTRY/definition):
 DEBUG - 
 DEBUG - documentation (NXentry.nxdl.xml:/definition):
 DEBUG - 
-			(alternate use: see same field in :ref:`NXsubentry` for preferred)
-			
-			Official NeXus NXDL schema to which this entry conforms which must be
-			the name of the NXDL file (case sensitive without the file extension)
-			that the NXDL schema is defined in. 
-			
-			For example the ``definition`` field for a file that conformed to the 
-			*NXarpes.nxdl.xml* definition must contain the string **NXarpes**.
-			
-			This field is provided so that :ref:`NXentry` can be the overlay position
-			in a NeXus data file for an application definition and its
-			set of groups, fields, and attributes.
-
-			*It is advised* to use :ref:`NXsubentry`, instead, as the overlay position.
-		
+             (alternate use: see same field in :ref:`NXsubentry` for preferred)
+             
+             Official NeXus NXDL schema to which this entry conforms which must be
+             the name of the NXDL file (case sensitive without the file extension)
+             that the NXDL schema is defined in.
+             
+             For example the ``definition`` field for a file that conformed to the
+             *NXarpes.nxdl.xml* definition must contain the string **NXarpes**.
+             
+             This field is provided so that :ref:`NXentry` can be the overlay position
+             in a NeXus data file for an application definition and its
+             set of groups, fields, and attributes.
+             
+             *It is advised* to use :ref:`NXsubentry`, instead, as the overlay position.
+        
 DEBUG - ===== ATTRS (//entry/definition@version)
 DEBUG - value: None 
 DEBUG - classpath: ['NXentry', 'NX_CHAR']
@@ -493,7 +493,9 @@ DEBUG - documentation (NXmpes.nxdl.xml:/ENTRY/definition/version):
 DEBUG - 
 DEBUG - NXentry.nxdl.xml:/definition@version - [NX_CHAR]
 DEBUG - documentation (NXentry.nxdl.xml:/definition/version):
-DEBUG - NXDL version number
+DEBUG - 
+                 NXDL version number
+            
 DEBUG - ===== FIELD (//entry/duration): <HDF5 dataset "duration": shape (), type "<i8">
 DEBUG - value: 2317 
 DEBUG - classpath: ['NXentry', 'NX_INT']
@@ -501,7 +503,9 @@ DEBUG - classes:
 NXentry.nxdl.xml:/duration
 DEBUG - <<OPTIONAL>>
 DEBUG - documentation (NXentry.nxdl.xml:/duration):
-DEBUG - Duration of measurement
+DEBUG - 
+             Duration of measurement
+        
 DEBUG - ===== ATTRS (//entry/duration@units)
 DEBUG - value: s 
 DEBUG - classpath: ['NXentry', 'NX_INT']
@@ -515,7 +519,9 @@ DEBUG - classes:
 NXentry.nxdl.xml:/end_time
 DEBUG - <<OPTIONAL>>
 DEBUG - documentation (NXentry.nxdl.xml:/end_time):
-DEBUG - Ending time of measurement
+DEBUG - 
+             Ending time of measurement
+        
 DEBUG - ===== FIELD (//entry/entry_identifier): <HDF5 dataset "entry_identifier": shape (), type "|O">
 DEBUG - value: b'2019/2019_05/2019_05_23/Scan005' 
 DEBUG - classpath: ['NXentry', 'NX_CHAR']
@@ -523,22 +529,39 @@ DEBUG - classes:
 NXentry.nxdl.xml:/entry_identifier
 DEBUG - <<OPTIONAL>>
 DEBUG - documentation (NXentry.nxdl.xml:/entry_identifier):
-DEBUG - unique identifier for the measurement, defined by the facility.
+DEBUG - 
+             unique identifier for the measurement, defined by the facility.
+        
 DEBUG - ===== FIELD (//entry/experiment_facility): <HDF5 dataset "experiment_facility": shape (), type "|O">
 DEBUG - value: b'Time Resolved ARPES' 
-DEBUG - classpath: ['NXentry']
-DEBUG - NOT IN SCHEMA
+DEBUG - classpath: ['NXentry', 'NX_CHAR']
+DEBUG - classes:
+NXentry.nxdl.xml:/experiment_facility
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXentry.nxdl.xml:/experiment_facility):
 DEBUG - 
+             Name of the experimental facility
+        
 DEBUG - ===== FIELD (//entry/experiment_institution): <HDF5 dataset "experiment_institution": shape (), type "|O">
 DEBUG - value: b'Fritz Haber Institute - Max Planck Society' 
-DEBUG - classpath: ['NXentry']
-DEBUG - NOT IN SCHEMA
+DEBUG - classpath: ['NXentry', 'NX_CHAR']
+DEBUG - classes:
+NXentry.nxdl.xml:/experiment_institution
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXentry.nxdl.xml:/experiment_institution):
 DEBUG - 
+             Name of the institution hosting the facility
+        
 DEBUG - ===== FIELD (//entry/experiment_laboratory): <HDF5 dataset "experiment_laboratory": shape (), type "|O">
 DEBUG - value: b'Clean Room 4' 
-DEBUG - classpath: ['NXentry']
-DEBUG - NOT IN SCHEMA
+DEBUG - classpath: ['NXentry', 'NX_CHAR']
+DEBUG - classes:
+NXentry.nxdl.xml:/experiment_laboratory
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXentry.nxdl.xml:/experiment_laboratory):
 DEBUG - 
+             Name of the laboratory or beamline
+        
 DEBUG - ===== GROUP (//entry/instrument [NXmpes::/NXentry/NXinstrument]): <HDF5 group "/entry/instrument" (10 members)>
 DEBUG - classpath: ['NXentry', 'NXinstrument']
 DEBUG - classes:
@@ -552,15 +575,15 @@ DEBUG - documentation (NXentry.nxdl.xml:/INSTRUMENT):
 DEBUG - 
 DEBUG - documentation (NXinstrument.nxdl.xml:):
 DEBUG - 
-		Collection of the components of the instrument or beamline.
-		
-		Template of instrument descriptions comprising various beamline components. 
-		Each component will also be a NeXus group defined by its distance from the 
-		sample. Negative distances represent beamline components that are before the 
-		sample while positive distances represent components that are after the sample. 
-		This device allows the unique identification of beamline components in a way 
-		that is valid for both reactor and pulsed instrumentation.
-	
+         Collection of the components of the instrument or beamline.
+         
+         Template of instrument descriptions comprising various beamline components.
+         Each component will also be a NeXus group defined by its distance from the
+         sample. Negative distances represent beamline components that are before the
+         sample while positive distances represent components that are after the sample.
+         This device allows the unique identification of beamline components in a way
+         that is valid for both reactor and pulsed instrumentation.
+    
 DEBUG - ===== ATTRS (//entry/instrument@NX_class)
 DEBUG - value: NXinstrument 
 DEBUG - classpath: ['NXentry', 'NXinstrument']
@@ -583,22 +606,22 @@ DEBUG - documentation (NXinstrument.nxdl.xml:/BEAM):
 DEBUG - 
 DEBUG - documentation (NXbeam.nxdl.xml:):
 DEBUG - 
-        Properties of the neutron or X-ray beam at a given location. 
-
-        This group is intended to be referenced
-        by beamline component groups within the :ref:`NXinstrument` group or by the :ref:`NXsample` group. This group is
-        especially valuable in storing the results of instrument simulations in which it is useful
-        to specify the beam profile, time distribution etc. at each beamline component. Otherwise,
-        its most likely use is in the :ref:`NXsample` group in which it defines the results of the neutron
-        scattering by the sample, e.g., energy transfer, polarizations. Finally, There are cases where the beam is
-        considered as a beamline component and this group may be defined as a subgroup directly inside
-        :ref:`NXinstrument`, in which case it is recommended that the position of the beam is specified by an 
-        :ref:`NXtransformations` group, unless the beam is at the origin (which is the sample).
-
-        Note that incident_wavelength and related fields can be a scalar values or arrays, depending on the use case.
-        To support these use cases, the explicit dimensionality of these fields is not specified, but it can be inferred
-        by the presense of and shape of accompanying fields, such as incident_wavelength_weights for a polychromatic beam.
-        
+         Properties of the neutron or X-ray beam at a given location.
+         
+         This group is intended to be referenced
+         by beamline component groups within the :ref:`NXinstrument` group or by the :ref:`NXsample` group. This group is
+         especially valuable in storing the results of instrument simulations in which it is useful
+         to specify the beam profile, time distribution etc. at each beamline component. Otherwise,
+         its most likely use is in the :ref:`NXsample` group in which it defines the results of the neutron
+         scattering by the sample, e.g., energy transfer, polarizations. Finally, There are cases where the beam is
+         considered as a beamline component and this group may be defined as a subgroup directly inside
+         :ref:`NXinstrument`, in which case it is recommended that the position of the beam is specified by an
+         :ref:`NXtransformations` group, unless the beam is at the origin (which is the sample).
+         
+         Note that incident_wavelength and related fields can be a scalar values or arrays, depending on the use case.
+         To support these use cases, the explicit dimensionality of these fields is not specified, but it can be inferred
+         by the presense of and shape of accompanying fields, such as incident_wavelength_weights for a polychromatic beam.
+    
 DEBUG - ===== ATTRS (//entry/instrument/beam@NX_class)
 DEBUG - value: NXbeam 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam']
@@ -632,8 +655,8 @@ NXbeam.nxdl.xml:/extent
 DEBUG - <<OPTIONAL>>
 DEBUG - documentation (NXbeam.nxdl.xml:/extent):
 DEBUG - 
-            Size of the beam entering this component. Note this represents
-            a rectangular beam aperture, and values represent FWHM
+             Size of the beam entering this component. Note this represents
+             a rectangular beam aperture, and values represent FWHM
         
 DEBUG - ===== ATTRS (//entry/instrument/beam/extent@units)
 DEBUG - value: µm 
@@ -651,7 +674,24 @@ DEBUG - <<REQUIRED>>
 DEBUG - documentation (NXmpes.nxdl.xml:/ENTRY/INSTRUMENT/BEAM/incident_energy):
 DEBUG - 
 DEBUG - documentation (NXbeam.nxdl.xml:/incident_energy):
-DEBUG - Energy carried by each particle of the beam on entering the beamline component
+DEBUG - 
+             Energy carried by each particle of the beam on entering the beamline component.
+             
+             In the case of a monochromatic beam this is the scalar energy.
+             Several other use cases are permitted, depending on the
+             presence of other incident_energy_X fields.
+             
+             * In the case of a polychromatic beam this is an array of length m of energies, with the relative weights in incident_energy_weights.
+             * In the case of a monochromatic beam that varies shot-to-shot, this is an array of energies, one for each recorded shot.
+               Here, incident_energy_weights and incident_energy_spread are not set.
+             * In the case of a polychromatic beam that varies shot-to-shot,
+               this is an array of length m with the relative weights in incident_energy_weights as a 2D array.
+             * In the case of a polychromatic beam that varies shot-to-shot and where the channels also vary,
+               this is a 2D array of dimensions nP by m (slow to fast) with the relative weights in incident_energy_weights as a 2D array.
+             
+             Note, variants are a good way to represent several of these use cases in a single dataset,
+             e.g. if a calibrated, single-value energy value is available along with the original spectrum from which it was calibrated.
+        
 DEBUG - ===== ATTRS (//entry/instrument/beam/incident_energy@units)
 DEBUG - value: eV 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam', 'NX_FLOAT']
@@ -665,15 +705,25 @@ DEBUG - value: 0.11
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam', 'NX_NUMBER']
 DEBUG - classes:
 NXmpes.nxdl.xml:/ENTRY/INSTRUMENT/BEAM/incident_energy_spread
+NXbeam.nxdl.xml:/incident_energy_spread
 DEBUG - <<RECOMMENDED>>
 DEBUG - documentation (NXmpes.nxdl.xml:/ENTRY/INSTRUMENT/BEAM/incident_energy_spread):
 DEBUG - 
+DEBUG - documentation (NXbeam.nxdl.xml:/incident_energy_spread):
+DEBUG - 
+             The energy spread FWHM for the corresponding energy(ies) in incident_energy. In the case of shot-to-shot variation in
+             the energy spread, this is a 2D array of dimension nP by m
+             (slow to fast) of the spreads of the corresponding
+             wavelength in incident_wavelength.
+        
 DEBUG - ===== ATTRS (//entry/instrument/beam/incident_energy_spread@units)
 DEBUG - value: eV 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam', 'NX_NUMBER']
 DEBUG - classes:
 NXmpes.nxdl.xml:/ENTRY/INSTRUMENT/BEAM/incident_energy_spread
+NXbeam.nxdl.xml:/incident_energy_spread
 DEBUG - NXmpes.nxdl.xml:/ENTRY/INSTRUMENT/BEAM/incident_energy_spread@units [NX_ENERGY]
+DEBUG - NXbeam.nxdl.xml:/incident_energy_spread@units [NX_ENERGY]
 DEBUG - ===== FIELD (//entry/instrument/beam/incident_polarization): <HDF5 dataset "incident_polarization": shape (4,), type "<f8">
 DEBUG - value: [1. 1. 0. 0.] 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam', 'NX_NUMBER']
@@ -684,7 +734,10 @@ DEBUG - <<RECOMMENDED>>
 DEBUG - documentation (NXmpes.nxdl.xml:/ENTRY/INSTRUMENT/BEAM/incident_polarization):
 DEBUG - 
 DEBUG - documentation (NXbeam.nxdl.xml:/incident_polarization):
-DEBUG - Polarization vector on entering beamline component
+DEBUG - 
+             Incident polarization as a Stokes vector
+             on entering beamline component
+        
 DEBUG - ===== ATTRS (//entry/instrument/beam/incident_polarization@units)
 DEBUG - value: V^2/mm^2 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam', 'NX_NUMBER']
@@ -695,14 +748,20 @@ DEBUG - NXmpes.nxdl.xml:/ENTRY/INSTRUMENT/BEAM/incident_polarization@units [NX_A
 DEBUG - NXbeam.nxdl.xml:/incident_polarization@units [NX_ANY]
 DEBUG - ===== FIELD (//entry/instrument/beam/pulse_duration): <HDF5 dataset "pulse_duration": shape (), type "<f8">
 DEBUG - value: 20.0 
-DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam']
-DEBUG - NOT IN SCHEMA
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam', 'NX_FLOAT']
+DEBUG - classes:
+NXbeam.nxdl.xml:/pulse_duration
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXbeam.nxdl.xml:/pulse_duration):
 DEBUG - 
+             FWHM duration of the pulses at the diagnostic point
+        
 DEBUG - ===== ATTRS (//entry/instrument/beam/pulse_duration@units)
 DEBUG - value: fs 
-DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam']
-DEBUG - NOT IN SCHEMA
-DEBUG - 
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam', 'NX_FLOAT']
+DEBUG - classes:
+NXbeam.nxdl.xml:/pulse_duration
+DEBUG - NXbeam.nxdl.xml:/pulse_duration@units [NX_TIME]
 DEBUG - ===== GROUP (//entry/instrument/beam_pump [NXmpes::/NXentry/NXinstrument/NXbeam]): <HDF5 group "/entry/instrument/beam_pump" (10 members)>
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam']
 DEBUG - classes:
@@ -716,22 +775,22 @@ DEBUG - documentation (NXinstrument.nxdl.xml:/BEAM):
 DEBUG - 
 DEBUG - documentation (NXbeam.nxdl.xml:):
 DEBUG - 
-        Properties of the neutron or X-ray beam at a given location. 
-
-        This group is intended to be referenced
-        by beamline component groups within the :ref:`NXinstrument` group or by the :ref:`NXsample` group. This group is
-        especially valuable in storing the results of instrument simulations in which it is useful
-        to specify the beam profile, time distribution etc. at each beamline component. Otherwise,
-        its most likely use is in the :ref:`NXsample` group in which it defines the results of the neutron
-        scattering by the sample, e.g., energy transfer, polarizations. Finally, There are cases where the beam is
-        considered as a beamline component and this group may be defined as a subgroup directly inside
-        :ref:`NXinstrument`, in which case it is recommended that the position of the beam is specified by an 
-        :ref:`NXtransformations` group, unless the beam is at the origin (which is the sample).
-
-        Note that incident_wavelength and related fields can be a scalar values or arrays, depending on the use case.
-        To support these use cases, the explicit dimensionality of these fields is not specified, but it can be inferred
-        by the presense of and shape of accompanying fields, such as incident_wavelength_weights for a polychromatic beam.
-        
+         Properties of the neutron or X-ray beam at a given location.
+         
+         This group is intended to be referenced
+         by beamline component groups within the :ref:`NXinstrument` group or by the :ref:`NXsample` group. This group is
+         especially valuable in storing the results of instrument simulations in which it is useful
+         to specify the beam profile, time distribution etc. at each beamline component. Otherwise,
+         its most likely use is in the :ref:`NXsample` group in which it defines the results of the neutron
+         scattering by the sample, e.g., energy transfer, polarizations. Finally, There are cases where the beam is
+         considered as a beamline component and this group may be defined as a subgroup directly inside
+         :ref:`NXinstrument`, in which case it is recommended that the position of the beam is specified by an
+         :ref:`NXtransformations` group, unless the beam is at the origin (which is the sample).
+         
+         Note that incident_wavelength and related fields can be a scalar values or arrays, depending on the use case.
+         To support these use cases, the explicit dimensionality of these fields is not specified, but it can be inferred
+         by the presense of and shape of accompanying fields, such as incident_wavelength_weights for a polychromatic beam.
+    
 DEBUG - ===== ATTRS (//entry/instrument/beam_pump@NX_class)
 DEBUG - value: NXbeam 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam']
@@ -743,14 +802,20 @@ DEBUG - @NX_class [NX_CHAR]
 DEBUG - 
 DEBUG - ===== FIELD (//entry/instrument/beam_pump/average_power): <HDF5 dataset "average_power": shape (), type "<f8">
 DEBUG - value: 444.0 
-DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam']
-DEBUG - NOT IN SCHEMA
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam', 'NX_FLOAT']
+DEBUG - classes:
+NXbeam.nxdl.xml:/average_power
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXbeam.nxdl.xml:/average_power):
 DEBUG - 
+             Average power at the diagnostic point
+        
 DEBUG - ===== ATTRS (//entry/instrument/beam_pump/average_power@units)
 DEBUG - value: mW 
-DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam']
-DEBUG - NOT IN SCHEMA
-DEBUG - 
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam', 'NX_FLOAT']
+DEBUG - classes:
+NXbeam.nxdl.xml:/average_power
+DEBUG - NXbeam.nxdl.xml:/average_power@units [NX_POWER]
 DEBUG - ===== FIELD (//entry/instrument/beam_pump/distance): <HDF5 dataset "distance": shape (), type "<f8">
 DEBUG - value: 0.0 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam', 'NX_NUMBER']
@@ -775,8 +840,8 @@ NXbeam.nxdl.xml:/extent
 DEBUG - <<OPTIONAL>>
 DEBUG - documentation (NXbeam.nxdl.xml:/extent):
 DEBUG - 
-            Size of the beam entering this component. Note this represents
-            a rectangular beam aperture, and values represent FWHM
+             Size of the beam entering this component. Note this represents
+             a rectangular beam aperture, and values represent FWHM
         
 DEBUG - ===== ATTRS (//entry/instrument/beam_pump/extent@units)
 DEBUG - value: µm 
@@ -786,14 +851,20 @@ NXbeam.nxdl.xml:/extent
 DEBUG - NXbeam.nxdl.xml:/extent@units [NX_LENGTH]
 DEBUG - ===== FIELD (//entry/instrument/beam_pump/fluence): <HDF5 dataset "fluence": shape (), type "<f8">
 DEBUG - value: 1.3 
-DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam']
-DEBUG - NOT IN SCHEMA
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam', 'NX_FLOAT']
+DEBUG - classes:
+NXbeam.nxdl.xml:/fluence
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXbeam.nxdl.xml:/fluence):
 DEBUG - 
+             Incident fluence at the diagnostic point
+        
 DEBUG - ===== ATTRS (//entry/instrument/beam_pump/fluence@units)
 DEBUG - value: mJ/cm^2 
-DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam']
-DEBUG - NOT IN SCHEMA
-DEBUG - 
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam', 'NX_FLOAT']
+DEBUG - classes:
+NXbeam.nxdl.xml:/fluence
+DEBUG - NXbeam.nxdl.xml:/fluence@units [NX_ANY]
 DEBUG - ===== FIELD (//entry/instrument/beam_pump/incident_energy): <HDF5 dataset "incident_energy": shape (), type "<f8">
 DEBUG - value: 1.2 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam', 'NX_FLOAT']
@@ -804,7 +875,24 @@ DEBUG - <<REQUIRED>>
 DEBUG - documentation (NXmpes.nxdl.xml:/ENTRY/INSTRUMENT/BEAM/incident_energy):
 DEBUG - 
 DEBUG - documentation (NXbeam.nxdl.xml:/incident_energy):
-DEBUG - Energy carried by each particle of the beam on entering the beamline component
+DEBUG - 
+             Energy carried by each particle of the beam on entering the beamline component.
+             
+             In the case of a monochromatic beam this is the scalar energy.
+             Several other use cases are permitted, depending on the
+             presence of other incident_energy_X fields.
+             
+             * In the case of a polychromatic beam this is an array of length m of energies, with the relative weights in incident_energy_weights.
+             * In the case of a monochromatic beam that varies shot-to-shot, this is an array of energies, one for each recorded shot.
+               Here, incident_energy_weights and incident_energy_spread are not set.
+             * In the case of a polychromatic beam that varies shot-to-shot,
+               this is an array of length m with the relative weights in incident_energy_weights as a 2D array.
+             * In the case of a polychromatic beam that varies shot-to-shot and where the channels also vary,
+               this is a 2D array of dimensions nP by m (slow to fast) with the relative weights in incident_energy_weights as a 2D array.
+             
+             Note, variants are a good way to represent several of these use cases in a single dataset,
+             e.g. if a calibrated, single-value energy value is available along with the original spectrum from which it was calibrated.
+        
 DEBUG - ===== ATTRS (//entry/instrument/beam_pump/incident_energy@units)
 DEBUG - value: eV 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam', 'NX_FLOAT']
@@ -818,15 +906,25 @@ DEBUG - value: 0.05
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam', 'NX_NUMBER']
 DEBUG - classes:
 NXmpes.nxdl.xml:/ENTRY/INSTRUMENT/BEAM/incident_energy_spread
+NXbeam.nxdl.xml:/incident_energy_spread
 DEBUG - <<RECOMMENDED>>
 DEBUG - documentation (NXmpes.nxdl.xml:/ENTRY/INSTRUMENT/BEAM/incident_energy_spread):
 DEBUG - 
+DEBUG - documentation (NXbeam.nxdl.xml:/incident_energy_spread):
+DEBUG - 
+             The energy spread FWHM for the corresponding energy(ies) in incident_energy. In the case of shot-to-shot variation in
+             the energy spread, this is a 2D array of dimension nP by m
+             (slow to fast) of the spreads of the corresponding
+             wavelength in incident_wavelength.
+        
 DEBUG - ===== ATTRS (//entry/instrument/beam_pump/incident_energy_spread@units)
 DEBUG - value: eV 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam', 'NX_NUMBER']
 DEBUG - classes:
 NXmpes.nxdl.xml:/ENTRY/INSTRUMENT/BEAM/incident_energy_spread
+NXbeam.nxdl.xml:/incident_energy_spread
 DEBUG - NXmpes.nxdl.xml:/ENTRY/INSTRUMENT/BEAM/incident_energy_spread@units [NX_ENERGY]
+DEBUG - NXbeam.nxdl.xml:/incident_energy_spread@units [NX_ENERGY]
 DEBUG - ===== FIELD (//entry/instrument/beam_pump/incident_polarization): <HDF5 dataset "incident_polarization": shape (4,), type "<i8">
 DEBUG - value: [1 1 0 0] 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam', 'NX_NUMBER']
@@ -837,7 +935,10 @@ DEBUG - <<RECOMMENDED>>
 DEBUG - documentation (NXmpes.nxdl.xml:/ENTRY/INSTRUMENT/BEAM/incident_polarization):
 DEBUG - 
 DEBUG - documentation (NXbeam.nxdl.xml:/incident_polarization):
-DEBUG - Polarization vector on entering beamline component
+DEBUG - 
+             Incident polarization as a Stokes vector
+             on entering beamline component
+        
 DEBUG - ===== ATTRS (//entry/instrument/beam_pump/incident_polarization@units)
 DEBUG - value: V^2/mm^2 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam', 'NX_NUMBER']
@@ -854,38 +955,38 @@ NXbeam.nxdl.xml:/incident_wavelength
 DEBUG - <<OPTIONAL>>
 DEBUG - documentation (NXbeam.nxdl.xml:/incident_wavelength):
 DEBUG - 
-            In the case of a monochromatic beam this is the scalar
-            wavelength.
-
-            Several other use cases are permitted, depending on the
-            presence or absence of other incident_wavelength_X
-            fields.
-
-            In the case of a polychromatic beam this is an array of
-            length **m** of wavelengths, with the relative weights
-            in ``incident_wavelength_weights``.
-
-            In the case of a monochromatic beam that varies shot-
-            to-shot, this is an array of wavelengths, one for each
-            recorded shot. Here, ``incident_wavelength_weights`` and
-            incident_wavelength_spread are not set.
-
-            In the case of a polychromatic beam that varies shot-to-
-            shot, this is an array of length **m** with the relative
-            weights in ``incident_wavelength_weights`` as a 2D array.
-
-            In the case of a polychromatic beam that varies shot-to-
-            shot and where the channels also vary, this is a 2D array
-            of dimensions **nP** by **m** (slow to fast) with the
-            relative weights in ``incident_wavelength_weights`` as a 2D
-            array.
-
-            Note, :ref:`variants <Design-Variants>` are a good way
-            to represent several of these use cases in a single dataset,
-            e.g. if a calibrated, single-value wavelength value is
-            available along with the original spectrum from which it
-            was calibrated.
-            Wavelength on entering beamline component
+             In the case of a monochromatic beam this is the scalar
+             wavelength.
+             
+             Several other use cases are permitted, depending on the
+             presence or absence of other incident_wavelength_X
+             fields.
+             
+             In the case of a polychromatic beam this is an array of
+             length **m** of wavelengths, with the relative weights
+             in ``incident_wavelength_weights``.
+             
+             In the case of a monochromatic beam that varies shot-
+             to-shot, this is an array of wavelengths, one for each
+             recorded shot. Here, ``incident_wavelength_weights`` and
+             incident_wavelength_spread are not set.
+             
+             In the case of a polychromatic beam that varies shot-to-
+             shot, this is an array of length **m** with the relative
+             weights in ``incident_wavelength_weights`` as a 2D array.
+             
+             In the case of a polychromatic beam that varies shot-to-
+             shot and where the channels also vary, this is a 2D array
+             of dimensions **nP** by **m** (slow to fast) with the
+             relative weights in ``incident_wavelength_weights`` as a 2D
+             array.
+             
+             Note, :ref:`variants <Design-Variants>` are a good way
+             to represent several of these use cases in a single dataset,
+             e.g. if a calibrated, single-value wavelength value is
+             available along with the original spectrum from which it
+             was calibrated.
+             Wavelength on entering beamline component
         
 DEBUG - ===== ATTRS (//entry/instrument/beam_pump/incident_wavelength@units)
 DEBUG - value: nm 
@@ -895,24 +996,36 @@ NXbeam.nxdl.xml:/incident_wavelength
 DEBUG - NXbeam.nxdl.xml:/incident_wavelength@units [NX_WAVELENGTH]
 DEBUG - ===== FIELD (//entry/instrument/beam_pump/pulse_duration): <HDF5 dataset "pulse_duration": shape (), type "<f8">
 DEBUG - value: 140.0 
-DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam']
-DEBUG - NOT IN SCHEMA
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam', 'NX_FLOAT']
+DEBUG - classes:
+NXbeam.nxdl.xml:/pulse_duration
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXbeam.nxdl.xml:/pulse_duration):
 DEBUG - 
+             FWHM duration of the pulses at the diagnostic point
+        
 DEBUG - ===== ATTRS (//entry/instrument/beam_pump/pulse_duration@units)
 DEBUG - value: fs 
-DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam']
-DEBUG - NOT IN SCHEMA
-DEBUG - 
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam', 'NX_FLOAT']
+DEBUG - classes:
+NXbeam.nxdl.xml:/pulse_duration
+DEBUG - NXbeam.nxdl.xml:/pulse_duration@units [NX_TIME]
 DEBUG - ===== FIELD (//entry/instrument/beam_pump/pulse_energy): <HDF5 dataset "pulse_energy": shape (), type "<f8">
 DEBUG - value: 0.889 
-DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam']
-DEBUG - NOT IN SCHEMA
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam', 'NX_FLOAT']
+DEBUG - classes:
+NXbeam.nxdl.xml:/pulse_energy
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXbeam.nxdl.xml:/pulse_energy):
 DEBUG - 
+             Energy of a single pulse at the diagnostic point
+        
 DEBUG - ===== ATTRS (//entry/instrument/beam_pump/pulse_energy@units)
 DEBUG - value: µJ 
-DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam']
-DEBUG - NOT IN SCHEMA
-DEBUG - 
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam', 'NX_FLOAT']
+DEBUG - classes:
+NXbeam.nxdl.xml:/pulse_energy
+DEBUG - NXbeam.nxdl.xml:/pulse_energy@units [NX_ENERGY]
 DEBUG - ===== GROUP (//entry/instrument/electronanalyser [NXmpes::/NXentry/NXinstrument/NXelectronanalyser]): <HDF5 group "/entry/instrument/electronanalyser" (11 members)>
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXelectronanalyser']
 DEBUG - classes:
@@ -948,8 +1061,8 @@ DEBUG -
         
 DEBUG - documentation (NXcollectioncolumn.nxdl.xml:):
 DEBUG - 
-         Subclass of NXelectronanalyser to describe the electron collection column of a
-         photoelectron analyser.
+         Subclass of NXelectronanalyser to describe the electron collection
+         column of a photoelectron analyser.
     
 DEBUG - ===== ATTRS (//entry/instrument/electronanalyser/collectioncolumn@NX_class)
 DEBUG - value: NXcollectioncolumn 
@@ -978,7 +1091,9 @@ DEBUG -
              or contrast aperture
         
 DEBUG - documentation (NXaperture.nxdl.xml:):
-DEBUG - A beamline aperture. This group is deprecated, use NXslit instead.
+DEBUG - 
+         A beamline aperture. This group is deprecated, use NXslit instead.
+    
 DEBUG - ===== ATTRS (//entry/instrument/electronanalyser/collectioncolumn/contrast_aperture@NX_class)
 DEBUG - value: NXaperture 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXelectronanalyser', 'NXcollectioncolumn', 'NXaperture']
@@ -988,35 +1103,80 @@ NXcollectioncolumn.nxdl.xml:/APERTURE
 NXaperture.nxdl.xml:
 DEBUG - @NX_class [NX_CHAR]
 DEBUG - 
-DEBUG - ===== GROUP (//entry/instrument/electronanalyser/collectioncolumn/contrast_aperture/ca_m3 [NXmpes::/NXentry/NXinstrument/NXelectronanalyser/NXcollectioncolumn/NXaperture/ca_m3]): <HDF5 group "/entry/instrument/electronanalyser/collectioncolumn/contrast_aperture/ca_m3" (1 members)>
-DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXelectronanalyser', 'NXcollectioncolumn', 'NXaperture']
-DEBUG - NOT IN SCHEMA
+DEBUG - ===== GROUP (//entry/instrument/electronanalyser/collectioncolumn/contrast_aperture/ca_m3 [NXmpes::/NXentry/NXinstrument/NXelectronanalyser/NXcollectioncolumn/NXaperture/NXpositioner]): <HDF5 group "/entry/instrument/electronanalyser/collectioncolumn/contrast_aperture/ca_m3" (1 members)>
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXelectronanalyser', 'NXcollectioncolumn', 'NXaperture', 'NXpositioner']
+DEBUG - classes:
+NXaperture.nxdl.xml:/POSITIONER
+NXpositioner.nxdl.xml:
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXaperture.nxdl.xml:/POSITIONER):
+DEBUG - 
+             Stores the raw positions of aperture motors.
+        
+DEBUG - documentation (NXpositioner.nxdl.xml:):
+DEBUG - 
+        A generic positioner such as a motor or piezo-electric transducer.  
+    
+DEBUG - ===== ATTRS (//entry/instrument/electronanalyser/collectioncolumn/contrast_aperture/ca_m3@NX_class)
+DEBUG - value: NXpositioner 
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXelectronanalyser', 'NXcollectioncolumn', 'NXaperture', 'NXpositioner']
+DEBUG - classes:
+NXaperture.nxdl.xml:/POSITIONER
+NXpositioner.nxdl.xml:
+DEBUG - @NX_class [NX_CHAR]
 DEBUG - 
 DEBUG - ===== FIELD (//entry/instrument/electronanalyser/collectioncolumn/contrast_aperture/ca_m3/value): <HDF5 dataset "value": shape (), type "<f8">
 DEBUG - value: -11.49979350759219 
-DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXelectronanalyser', 'NXcollectioncolumn', 'NXaperture']
-DEBUG - NOT IN SCHEMA
-DEBUG - 
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXelectronanalyser', 'NXcollectioncolumn', 'NXaperture', 'NXpositioner', 'NX_NUMBER']
+DEBUG - classes:
+NXpositioner.nxdl.xml:/value
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXpositioner.nxdl.xml:/value):
+DEBUG - best known value of positioner - need [n] as may be scanned
 DEBUG - ===== ATTRS (//entry/instrument/electronanalyser/collectioncolumn/contrast_aperture/ca_m3/value@units)
 DEBUG - value: mm 
-DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXelectronanalyser', 'NXcollectioncolumn', 'NXaperture']
-DEBUG - NOT IN SCHEMA
-DEBUG - 
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXelectronanalyser', 'NXcollectioncolumn', 'NXaperture', 'NXpositioner', 'NX_NUMBER']
+DEBUG - classes:
+NXpositioner.nxdl.xml:/value
+DEBUG - NXpositioner.nxdl.xml:/value@units [NX_ANY]
 DEBUG - ===== FIELD (//entry/instrument/electronanalyser/collectioncolumn/contrast_aperture/shape): <HDF5 dataset "shape": shape (), type "|O">
 DEBUG - value: b'open' 
-DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXelectronanalyser', 'NXcollectioncolumn', 'NXaperture']
-DEBUG - NOT IN SCHEMA
-DEBUG - 
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXelectronanalyser', 'NXcollectioncolumn', 'NXaperture', 'NX_CHAR']
+DEBUG - classes:
+NXaperture.nxdl.xml:/shape
+DEBUG - <<OPTIONAL>>
+DEBUG - enumeration (NXaperture.nxdl.xml:/shape):
+DEBUG - -> straight slit
+DEBUG - -> curved slit
+DEBUG - -> pinhole
+DEBUG - -> circle
+DEBUG - -> square
+DEBUG - -> hexagon
+DEBUG - -> octagon
+DEBUG - -> bladed
+DEBUG - -> open
+DEBUG - -> grid
+DEBUG - documentation (NXaperture.nxdl.xml:/shape):
+DEBUG - 
+             Shape of the aperture.
+        
 DEBUG - ===== FIELD (//entry/instrument/electronanalyser/collectioncolumn/contrast_aperture/size): <HDF5 dataset "size": shape (), type "<f8">
 DEBUG - value: nan 
-DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXelectronanalyser', 'NXcollectioncolumn', 'NXaperture']
-DEBUG - NOT IN SCHEMA
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXelectronanalyser', 'NXcollectioncolumn', 'NXaperture', 'NX_NUMBER']
+DEBUG - classes:
+NXaperture.nxdl.xml:/size
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXaperture.nxdl.xml:/size):
 DEBUG - 
+             The relevant dimension for the aperture, i.e. slit width, pinhole and iris
+             diameter
+        
 DEBUG - ===== ATTRS (//entry/instrument/electronanalyser/collectioncolumn/contrast_aperture/size@units)
 DEBUG - value: µm 
-DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXelectronanalyser', 'NXcollectioncolumn', 'NXaperture']
-DEBUG - NOT IN SCHEMA
-DEBUG - 
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXelectronanalyser', 'NXcollectioncolumn', 'NXaperture', 'NX_NUMBER']
+DEBUG - classes:
+NXaperture.nxdl.xml:/size
+DEBUG - NXaperture.nxdl.xml:/size@units [NX_LENGTH]
 DEBUG - ===== FIELD (//entry/instrument/electronanalyser/collectioncolumn/extractor_current): <HDF5 dataset "extractor_current": shape (), type "<f8">
 DEBUG - value: -0.1309711275510204 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXelectronanalyser', 'NXcollectioncolumn', 'NX_FLOAT']
@@ -1068,7 +1228,9 @@ DEBUG -
              or contrast aperture
         
 DEBUG - documentation (NXaperture.nxdl.xml:):
-DEBUG - A beamline aperture. This group is deprecated, use NXslit instead.
+DEBUG - 
+         A beamline aperture. This group is deprecated, use NXslit instead.
+    
 DEBUG - ===== ATTRS (//entry/instrument/electronanalyser/collectioncolumn/field_aperture@NX_class)
 DEBUG - value: NXaperture 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXelectronanalyser', 'NXcollectioncolumn', 'NXaperture']
@@ -1078,49 +1240,116 @@ NXcollectioncolumn.nxdl.xml:/APERTURE
 NXaperture.nxdl.xml:
 DEBUG - @NX_class [NX_CHAR]
 DEBUG - 
-DEBUG - ===== GROUP (//entry/instrument/electronanalyser/collectioncolumn/field_aperture/fa_m1 [NXmpes::/NXentry/NXinstrument/NXelectronanalyser/NXcollectioncolumn/NXaperture/fa_m1]): <HDF5 group "/entry/instrument/electronanalyser/collectioncolumn/field_aperture/fa_m1" (1 members)>
-DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXelectronanalyser', 'NXcollectioncolumn', 'NXaperture']
-DEBUG - NOT IN SCHEMA
+DEBUG - ===== GROUP (//entry/instrument/electronanalyser/collectioncolumn/field_aperture/fa_m1 [NXmpes::/NXentry/NXinstrument/NXelectronanalyser/NXcollectioncolumn/NXaperture/NXpositioner]): <HDF5 group "/entry/instrument/electronanalyser/collectioncolumn/field_aperture/fa_m1" (1 members)>
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXelectronanalyser', 'NXcollectioncolumn', 'NXaperture', 'NXpositioner']
+DEBUG - classes:
+NXaperture.nxdl.xml:/POSITIONER
+NXpositioner.nxdl.xml:
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXaperture.nxdl.xml:/POSITIONER):
+DEBUG - 
+             Stores the raw positions of aperture motors.
+        
+DEBUG - documentation (NXpositioner.nxdl.xml:):
+DEBUG - 
+        A generic positioner such as a motor or piezo-electric transducer.  
+    
+DEBUG - ===== ATTRS (//entry/instrument/electronanalyser/collectioncolumn/field_aperture/fa_m1@NX_class)
+DEBUG - value: NXpositioner 
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXelectronanalyser', 'NXcollectioncolumn', 'NXaperture', 'NXpositioner']
+DEBUG - classes:
+NXaperture.nxdl.xml:/POSITIONER
+NXpositioner.nxdl.xml:
+DEBUG - @NX_class [NX_CHAR]
 DEBUG - 
 DEBUG - ===== FIELD (//entry/instrument/electronanalyser/collectioncolumn/field_aperture/fa_m1/value): <HDF5 dataset "value": shape (), type "<f8">
 DEBUG - value: 3.749874153422982 
-DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXelectronanalyser', 'NXcollectioncolumn', 'NXaperture']
-DEBUG - NOT IN SCHEMA
-DEBUG - 
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXelectronanalyser', 'NXcollectioncolumn', 'NXaperture', 'NXpositioner', 'NX_NUMBER']
+DEBUG - classes:
+NXpositioner.nxdl.xml:/value
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXpositioner.nxdl.xml:/value):
+DEBUG - best known value of positioner - need [n] as may be scanned
 DEBUG - ===== ATTRS (//entry/instrument/electronanalyser/collectioncolumn/field_aperture/fa_m1/value@units)
 DEBUG - value: mm 
-DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXelectronanalyser', 'NXcollectioncolumn', 'NXaperture']
-DEBUG - NOT IN SCHEMA
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXelectronanalyser', 'NXcollectioncolumn', 'NXaperture', 'NXpositioner', 'NX_NUMBER']
+DEBUG - classes:
+NXpositioner.nxdl.xml:/value
+DEBUG - NXpositioner.nxdl.xml:/value@units [NX_ANY]
+DEBUG - ===== GROUP (//entry/instrument/electronanalyser/collectioncolumn/field_aperture/fa_m2 [NXmpes::/NXentry/NXinstrument/NXelectronanalyser/NXcollectioncolumn/NXaperture/NXpositioner]): <HDF5 group "/entry/instrument/electronanalyser/collectioncolumn/field_aperture/fa_m2" (1 members)>
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXelectronanalyser', 'NXcollectioncolumn', 'NXaperture', 'NXpositioner']
+DEBUG - classes:
+NXaperture.nxdl.xml:/POSITIONER
+NXpositioner.nxdl.xml:
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXaperture.nxdl.xml:/POSITIONER):
 DEBUG - 
-DEBUG - ===== GROUP (//entry/instrument/electronanalyser/collectioncolumn/field_aperture/fa_m2 [NXmpes::/NXentry/NXinstrument/NXelectronanalyser/NXcollectioncolumn/NXaperture/fa_m2]): <HDF5 group "/entry/instrument/electronanalyser/collectioncolumn/field_aperture/fa_m2" (1 members)>
-DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXelectronanalyser', 'NXcollectioncolumn', 'NXaperture']
-DEBUG - NOT IN SCHEMA
+             Stores the raw positions of aperture motors.
+        
+DEBUG - documentation (NXpositioner.nxdl.xml:):
+DEBUG - 
+        A generic positioner such as a motor or piezo-electric transducer.  
+    
+DEBUG - ===== ATTRS (//entry/instrument/electronanalyser/collectioncolumn/field_aperture/fa_m2@NX_class)
+DEBUG - value: NXpositioner 
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXelectronanalyser', 'NXcollectioncolumn', 'NXaperture', 'NXpositioner']
+DEBUG - classes:
+NXaperture.nxdl.xml:/POSITIONER
+NXpositioner.nxdl.xml:
+DEBUG - @NX_class [NX_CHAR]
 DEBUG - 
 DEBUG - ===== FIELD (//entry/instrument/electronanalyser/collectioncolumn/field_aperture/fa_m2/value): <HDF5 dataset "value": shape (), type "<f8">
 DEBUG - value: -5.200156936301793 
-DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXelectronanalyser', 'NXcollectioncolumn', 'NXaperture']
-DEBUG - NOT IN SCHEMA
-DEBUG - 
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXelectronanalyser', 'NXcollectioncolumn', 'NXaperture', 'NXpositioner', 'NX_NUMBER']
+DEBUG - classes:
+NXpositioner.nxdl.xml:/value
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXpositioner.nxdl.xml:/value):
+DEBUG - best known value of positioner - need [n] as may be scanned
 DEBUG - ===== ATTRS (//entry/instrument/electronanalyser/collectioncolumn/field_aperture/fa_m2/value@units)
 DEBUG - value: mm 
-DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXelectronanalyser', 'NXcollectioncolumn', 'NXaperture']
-DEBUG - NOT IN SCHEMA
-DEBUG - 
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXelectronanalyser', 'NXcollectioncolumn', 'NXaperture', 'NXpositioner', 'NX_NUMBER']
+DEBUG - classes:
+NXpositioner.nxdl.xml:/value
+DEBUG - NXpositioner.nxdl.xml:/value@units [NX_ANY]
 DEBUG - ===== FIELD (//entry/instrument/electronanalyser/collectioncolumn/field_aperture/shape): <HDF5 dataset "shape": shape (), type "|O">
 DEBUG - value: b'circle' 
-DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXelectronanalyser', 'NXcollectioncolumn', 'NXaperture']
-DEBUG - NOT IN SCHEMA
-DEBUG - 
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXelectronanalyser', 'NXcollectioncolumn', 'NXaperture', 'NX_CHAR']
+DEBUG - classes:
+NXaperture.nxdl.xml:/shape
+DEBUG - <<OPTIONAL>>
+DEBUG - enumeration (NXaperture.nxdl.xml:/shape):
+DEBUG - -> straight slit
+DEBUG - -> curved slit
+DEBUG - -> pinhole
+DEBUG - -> circle
+DEBUG - -> square
+DEBUG - -> hexagon
+DEBUG - -> octagon
+DEBUG - -> bladed
+DEBUG - -> open
+DEBUG - -> grid
+DEBUG - documentation (NXaperture.nxdl.xml:/shape):
+DEBUG - 
+             Shape of the aperture.
+        
 DEBUG - ===== FIELD (//entry/instrument/electronanalyser/collectioncolumn/field_aperture/size): <HDF5 dataset "size": shape (), type "<f8">
 DEBUG - value: 200.0 
-DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXelectronanalyser', 'NXcollectioncolumn', 'NXaperture']
-DEBUG - NOT IN SCHEMA
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXelectronanalyser', 'NXcollectioncolumn', 'NXaperture', 'NX_NUMBER']
+DEBUG - classes:
+NXaperture.nxdl.xml:/size
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXaperture.nxdl.xml:/size):
 DEBUG - 
+             The relevant dimension for the aperture, i.e. slit width, pinhole and iris
+             diameter
+        
 DEBUG - ===== ATTRS (//entry/instrument/electronanalyser/collectioncolumn/field_aperture/size@units)
 DEBUG - value: µm 
-DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXelectronanalyser', 'NXcollectioncolumn', 'NXaperture']
-DEBUG - NOT IN SCHEMA
-DEBUG - 
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXelectronanalyser', 'NXcollectioncolumn', 'NXaperture', 'NX_NUMBER']
+DEBUG - classes:
+NXaperture.nxdl.xml:/size
+DEBUG - NXaperture.nxdl.xml:/size@units [NX_LENGTH]
 DEBUG - ===== GROUP (//entry/instrument/electronanalyser/collectioncolumn/lens_A [NXmpes::/NXentry/NXinstrument/NXelectronanalyser/NXcollectioncolumn/NXlens_em]): <HDF5 group "/entry/instrument/electronanalyser/collectioncolumn/lens_A" (2 members)>
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXelectronanalyser', 'NXcollectioncolumn', 'NXlens_em']
 DEBUG - classes:
@@ -1133,14 +1362,14 @@ DEBUG -
         
 DEBUG - documentation (NXlens_em.nxdl.xml:):
 DEBUG - 
-         Description of an electro-magnetic lens or a compound lens.
+         Base class for an electro-magnetic lens or a compound lens.
          
-         For NXtransformations the origin of the coordinate system is placed
-         in the center of the lens
-         (its polepiece, pinhole, or another point of reference).
-         The origin should be specified in the NXtransformations.
+         For :ref:`NXtransformations` the origin of the coordinate system is placed
+         in the center of the lens (its polepiece, pinhole, or another
+         point of reference). The origin should be specified in the :ref:`NXtransformations`.
          
-         For details of electro-magnetic lenses in the literature see e.g. `L. Reimer <https://doi.org/10.1007/978-3-540-38967-5>`_
+         For details of electro-magnetic lenses in the literature
+         see e.g. `L. Reimer <https://doi.org/10.1007/978-3-540-38967-5>`_
     
 DEBUG - ===== ATTRS (//entry/instrument/electronanalyser/collectioncolumn/lens_A@NX_class)
 DEBUG - value: NXlens_em 
@@ -1169,8 +1398,10 @@ NXlens_em.nxdl.xml:/voltage
 DEBUG - <<OPTIONAL>>
 DEBUG - documentation (NXlens_em.nxdl.xml:/voltage):
 DEBUG - 
-             Excitation voltage of the lens. For dipoles it is a single number. For higher
-             orders, it is an array.
+             Excitation voltage of the lens.
+             
+             For dipoles it is a single number.
+             For higher order multipoles, it is an array.
         
 DEBUG - ===== ATTRS (//entry/instrument/electronanalyser/collectioncolumn/lens_A/voltage@units)
 DEBUG - value: V 
@@ -1190,14 +1421,14 @@ DEBUG -
         
 DEBUG - documentation (NXlens_em.nxdl.xml:):
 DEBUG - 
-         Description of an electro-magnetic lens or a compound lens.
+         Base class for an electro-magnetic lens or a compound lens.
          
-         For NXtransformations the origin of the coordinate system is placed
-         in the center of the lens
-         (its polepiece, pinhole, or another point of reference).
-         The origin should be specified in the NXtransformations.
+         For :ref:`NXtransformations` the origin of the coordinate system is placed
+         in the center of the lens (its polepiece, pinhole, or another
+         point of reference). The origin should be specified in the :ref:`NXtransformations`.
          
-         For details of electro-magnetic lenses in the literature see e.g. `L. Reimer <https://doi.org/10.1007/978-3-540-38967-5>`_
+         For details of electro-magnetic lenses in the literature
+         see e.g. `L. Reimer <https://doi.org/10.1007/978-3-540-38967-5>`_
     
 DEBUG - ===== ATTRS (//entry/instrument/electronanalyser/collectioncolumn/lens_B@NX_class)
 DEBUG - value: NXlens_em 
@@ -1226,8 +1457,10 @@ NXlens_em.nxdl.xml:/voltage
 DEBUG - <<OPTIONAL>>
 DEBUG - documentation (NXlens_em.nxdl.xml:/voltage):
 DEBUG - 
-             Excitation voltage of the lens. For dipoles it is a single number. For higher
-             orders, it is an array.
+             Excitation voltage of the lens.
+             
+             For dipoles it is a single number.
+             For higher order multipoles, it is an array.
         
 DEBUG - ===== ATTRS (//entry/instrument/electronanalyser/collectioncolumn/lens_B/voltage@units)
 DEBUG - value: V 
@@ -1247,14 +1480,14 @@ DEBUG -
         
 DEBUG - documentation (NXlens_em.nxdl.xml:):
 DEBUG - 
-         Description of an electro-magnetic lens or a compound lens.
+         Base class for an electro-magnetic lens or a compound lens.
          
-         For NXtransformations the origin of the coordinate system is placed
-         in the center of the lens
-         (its polepiece, pinhole, or another point of reference).
-         The origin should be specified in the NXtransformations.
+         For :ref:`NXtransformations` the origin of the coordinate system is placed
+         in the center of the lens (its polepiece, pinhole, or another
+         point of reference). The origin should be specified in the :ref:`NXtransformations`.
          
-         For details of electro-magnetic lenses in the literature see e.g. `L. Reimer <https://doi.org/10.1007/978-3-540-38967-5>`_
+         For details of electro-magnetic lenses in the literature
+         see e.g. `L. Reimer <https://doi.org/10.1007/978-3-540-38967-5>`_
     
 DEBUG - ===== ATTRS (//entry/instrument/electronanalyser/collectioncolumn/lens_C@NX_class)
 DEBUG - value: NXlens_em 
@@ -1283,8 +1516,10 @@ NXlens_em.nxdl.xml:/voltage
 DEBUG - <<OPTIONAL>>
 DEBUG - documentation (NXlens_em.nxdl.xml:/voltage):
 DEBUG - 
-             Excitation voltage of the lens. For dipoles it is a single number. For higher
-             orders, it is an array.
+             Excitation voltage of the lens.
+             
+             For dipoles it is a single number.
+             For higher order multipoles, it is an array.
         
 DEBUG - ===== ATTRS (//entry/instrument/electronanalyser/collectioncolumn/lens_C/voltage@units)
 DEBUG - value: V 
@@ -1304,14 +1539,14 @@ DEBUG -
         
 DEBUG - documentation (NXlens_em.nxdl.xml:):
 DEBUG - 
-         Description of an electro-magnetic lens or a compound lens.
+         Base class for an electro-magnetic lens or a compound lens.
          
-         For NXtransformations the origin of the coordinate system is placed
-         in the center of the lens
-         (its polepiece, pinhole, or another point of reference).
-         The origin should be specified in the NXtransformations.
+         For :ref:`NXtransformations` the origin of the coordinate system is placed
+         in the center of the lens (its polepiece, pinhole, or another
+         point of reference). The origin should be specified in the :ref:`NXtransformations`.
          
-         For details of electro-magnetic lenses in the literature see e.g. `L. Reimer <https://doi.org/10.1007/978-3-540-38967-5>`_
+         For details of electro-magnetic lenses in the literature
+         see e.g. `L. Reimer <https://doi.org/10.1007/978-3-540-38967-5>`_
     
 DEBUG - ===== ATTRS (//entry/instrument/electronanalyser/collectioncolumn/lens_D@NX_class)
 DEBUG - value: NXlens_em 
@@ -1340,8 +1575,10 @@ NXlens_em.nxdl.xml:/voltage
 DEBUG - <<OPTIONAL>>
 DEBUG - documentation (NXlens_em.nxdl.xml:/voltage):
 DEBUG - 
-             Excitation voltage of the lens. For dipoles it is a single number. For higher
-             orders, it is an array.
+             Excitation voltage of the lens.
+             
+             For dipoles it is a single number.
+             For higher order multipoles, it is an array.
         
 DEBUG - ===== ATTRS (//entry/instrument/electronanalyser/collectioncolumn/lens_D/voltage@units)
 DEBUG - value: V 
@@ -1361,14 +1598,14 @@ DEBUG -
         
 DEBUG - documentation (NXlens_em.nxdl.xml:):
 DEBUG - 
-         Description of an electro-magnetic lens or a compound lens.
+         Base class for an electro-magnetic lens or a compound lens.
          
-         For NXtransformations the origin of the coordinate system is placed
-         in the center of the lens
-         (its polepiece, pinhole, or another point of reference).
-         The origin should be specified in the NXtransformations.
+         For :ref:`NXtransformations` the origin of the coordinate system is placed
+         in the center of the lens (its polepiece, pinhole, or another
+         point of reference). The origin should be specified in the :ref:`NXtransformations`.
          
-         For details of electro-magnetic lenses in the literature see e.g. `L. Reimer <https://doi.org/10.1007/978-3-540-38967-5>`_
+         For details of electro-magnetic lenses in the literature
+         see e.g. `L. Reimer <https://doi.org/10.1007/978-3-540-38967-5>`_
     
 DEBUG - ===== ATTRS (//entry/instrument/electronanalyser/collectioncolumn/lens_E@NX_class)
 DEBUG - value: NXlens_em 
@@ -1397,8 +1634,10 @@ NXlens_em.nxdl.xml:/voltage
 DEBUG - <<OPTIONAL>>
 DEBUG - documentation (NXlens_em.nxdl.xml:/voltage):
 DEBUG - 
-             Excitation voltage of the lens. For dipoles it is a single number. For higher
-             orders, it is an array.
+             Excitation voltage of the lens.
+             
+             For dipoles it is a single number.
+             For higher order multipoles, it is an array.
         
 DEBUG - ===== ATTRS (//entry/instrument/electronanalyser/collectioncolumn/lens_E/voltage@units)
 DEBUG - value: V 
@@ -1418,14 +1657,14 @@ DEBUG -
         
 DEBUG - documentation (NXlens_em.nxdl.xml:):
 DEBUG - 
-         Description of an electro-magnetic lens or a compound lens.
+         Base class for an electro-magnetic lens or a compound lens.
          
-         For NXtransformations the origin of the coordinate system is placed
-         in the center of the lens
-         (its polepiece, pinhole, or another point of reference).
-         The origin should be specified in the NXtransformations.
+         For :ref:`NXtransformations` the origin of the coordinate system is placed
+         in the center of the lens (its polepiece, pinhole, or another
+         point of reference). The origin should be specified in the :ref:`NXtransformations`.
          
-         For details of electro-magnetic lenses in the literature see e.g. `L. Reimer <https://doi.org/10.1007/978-3-540-38967-5>`_
+         For details of electro-magnetic lenses in the literature
+         see e.g. `L. Reimer <https://doi.org/10.1007/978-3-540-38967-5>`_
     
 DEBUG - ===== ATTRS (//entry/instrument/electronanalyser/collectioncolumn/lens_F@NX_class)
 DEBUG - value: NXlens_em 
@@ -1454,8 +1693,10 @@ NXlens_em.nxdl.xml:/voltage
 DEBUG - <<OPTIONAL>>
 DEBUG - documentation (NXlens_em.nxdl.xml:/voltage):
 DEBUG - 
-             Excitation voltage of the lens. For dipoles it is a single number. For higher
-             orders, it is an array.
+             Excitation voltage of the lens.
+             
+             For dipoles it is a single number.
+             For higher order multipoles, it is an array.
         
 DEBUG - ===== ATTRS (//entry/instrument/electronanalyser/collectioncolumn/lens_F/voltage@units)
 DEBUG - value: V 
@@ -1475,14 +1716,14 @@ DEBUG -
         
 DEBUG - documentation (NXlens_em.nxdl.xml:):
 DEBUG - 
-         Description of an electro-magnetic lens or a compound lens.
+         Base class for an electro-magnetic lens or a compound lens.
          
-         For NXtransformations the origin of the coordinate system is placed
-         in the center of the lens
-         (its polepiece, pinhole, or another point of reference).
-         The origin should be specified in the NXtransformations.
+         For :ref:`NXtransformations` the origin of the coordinate system is placed
+         in the center of the lens (its polepiece, pinhole, or another
+         point of reference). The origin should be specified in the :ref:`NXtransformations`.
          
-         For details of electro-magnetic lenses in the literature see e.g. `L. Reimer <https://doi.org/10.1007/978-3-540-38967-5>`_
+         For details of electro-magnetic lenses in the literature
+         see e.g. `L. Reimer <https://doi.org/10.1007/978-3-540-38967-5>`_
     
 DEBUG - ===== ATTRS (//entry/instrument/electronanalyser/collectioncolumn/lens_Foc@NX_class)
 DEBUG - value: NXlens_em 
@@ -1511,8 +1752,10 @@ NXlens_em.nxdl.xml:/voltage
 DEBUG - <<OPTIONAL>>
 DEBUG - documentation (NXlens_em.nxdl.xml:/voltage):
 DEBUG - 
-             Excitation voltage of the lens. For dipoles it is a single number. For higher
-             orders, it is an array.
+             Excitation voltage of the lens.
+             
+             For dipoles it is a single number.
+             For higher order multipoles, it is an array.
         
 DEBUG - ===== ATTRS (//entry/instrument/electronanalyser/collectioncolumn/lens_Foc/voltage@units)
 DEBUG - value: V 
@@ -1532,14 +1775,14 @@ DEBUG -
         
 DEBUG - documentation (NXlens_em.nxdl.xml:):
 DEBUG - 
-         Description of an electro-magnetic lens or a compound lens.
+         Base class for an electro-magnetic lens or a compound lens.
          
-         For NXtransformations the origin of the coordinate system is placed
-         in the center of the lens
-         (its polepiece, pinhole, or another point of reference).
-         The origin should be specified in the NXtransformations.
+         For :ref:`NXtransformations` the origin of the coordinate system is placed
+         in the center of the lens (its polepiece, pinhole, or another
+         point of reference). The origin should be specified in the :ref:`NXtransformations`.
          
-         For details of electro-magnetic lenses in the literature see e.g. `L. Reimer <https://doi.org/10.1007/978-3-540-38967-5>`_
+         For details of electro-magnetic lenses in the literature
+         see e.g. `L. Reimer <https://doi.org/10.1007/978-3-540-38967-5>`_
     
 DEBUG - ===== ATTRS (//entry/instrument/electronanalyser/collectioncolumn/lens_G@NX_class)
 DEBUG - value: NXlens_em 
@@ -1568,8 +1811,10 @@ NXlens_em.nxdl.xml:/voltage
 DEBUG - <<OPTIONAL>>
 DEBUG - documentation (NXlens_em.nxdl.xml:/voltage):
 DEBUG - 
-             Excitation voltage of the lens. For dipoles it is a single number. For higher
-             orders, it is an array.
+             Excitation voltage of the lens.
+             
+             For dipoles it is a single number.
+             For higher order multipoles, it is an array.
         
 DEBUG - ===== ATTRS (//entry/instrument/electronanalyser/collectioncolumn/lens_G/voltage@units)
 DEBUG - value: V 
@@ -1589,14 +1834,14 @@ DEBUG -
         
 DEBUG - documentation (NXlens_em.nxdl.xml:):
 DEBUG - 
-         Description of an electro-magnetic lens or a compound lens.
+         Base class for an electro-magnetic lens or a compound lens.
          
-         For NXtransformations the origin of the coordinate system is placed
-         in the center of the lens
-         (its polepiece, pinhole, or another point of reference).
-         The origin should be specified in the NXtransformations.
+         For :ref:`NXtransformations` the origin of the coordinate system is placed
+         in the center of the lens (its polepiece, pinhole, or another
+         point of reference). The origin should be specified in the :ref:`NXtransformations`.
          
-         For details of electro-magnetic lenses in the literature see e.g. `L. Reimer <https://doi.org/10.1007/978-3-540-38967-5>`_
+         For details of electro-magnetic lenses in the literature
+         see e.g. `L. Reimer <https://doi.org/10.1007/978-3-540-38967-5>`_
     
 DEBUG - ===== ATTRS (//entry/instrument/electronanalyser/collectioncolumn/lens_H@NX_class)
 DEBUG - value: NXlens_em 
@@ -1625,8 +1870,10 @@ NXlens_em.nxdl.xml:/voltage
 DEBUG - <<OPTIONAL>>
 DEBUG - documentation (NXlens_em.nxdl.xml:/voltage):
 DEBUG - 
-             Excitation voltage of the lens. For dipoles it is a single number. For higher
-             orders, it is an array.
+             Excitation voltage of the lens.
+             
+             For dipoles it is a single number.
+             For higher order multipoles, it is an array.
         
 DEBUG - ===== ATTRS (//entry/instrument/electronanalyser/collectioncolumn/lens_H/voltage@units)
 DEBUG - value: V 
@@ -1646,14 +1893,14 @@ DEBUG -
         
 DEBUG - documentation (NXlens_em.nxdl.xml:):
 DEBUG - 
-         Description of an electro-magnetic lens or a compound lens.
+         Base class for an electro-magnetic lens or a compound lens.
          
-         For NXtransformations the origin of the coordinate system is placed
-         in the center of the lens
-         (its polepiece, pinhole, or another point of reference).
-         The origin should be specified in the NXtransformations.
+         For :ref:`NXtransformations` the origin of the coordinate system is placed
+         in the center of the lens (its polepiece, pinhole, or another
+         point of reference). The origin should be specified in the :ref:`NXtransformations`.
          
-         For details of electro-magnetic lenses in the literature see e.g. `L. Reimer <https://doi.org/10.1007/978-3-540-38967-5>`_
+         For details of electro-magnetic lenses in the literature
+         see e.g. `L. Reimer <https://doi.org/10.1007/978-3-540-38967-5>`_
     
 DEBUG - ===== ATTRS (//entry/instrument/electronanalyser/collectioncolumn/lens_I@NX_class)
 DEBUG - value: NXlens_em 
@@ -1682,8 +1929,10 @@ NXlens_em.nxdl.xml:/voltage
 DEBUG - <<OPTIONAL>>
 DEBUG - documentation (NXlens_em.nxdl.xml:/voltage):
 DEBUG - 
-             Excitation voltage of the lens. For dipoles it is a single number. For higher
-             orders, it is an array.
+             Excitation voltage of the lens.
+             
+             For dipoles it is a single number.
+             For higher order multipoles, it is an array.
         
 DEBUG - ===== ATTRS (//entry/instrument/electronanalyser/collectioncolumn/lens_I/voltage@units)
 DEBUG - value: V 
@@ -1703,14 +1952,14 @@ DEBUG -
         
 DEBUG - documentation (NXlens_em.nxdl.xml:):
 DEBUG - 
-         Description of an electro-magnetic lens or a compound lens.
+         Base class for an electro-magnetic lens or a compound lens.
          
-         For NXtransformations the origin of the coordinate system is placed
-         in the center of the lens
-         (its polepiece, pinhole, or another point of reference).
-         The origin should be specified in the NXtransformations.
+         For :ref:`NXtransformations` the origin of the coordinate system is placed
+         in the center of the lens (its polepiece, pinhole, or another
+         point of reference). The origin should be specified in the :ref:`NXtransformations`.
          
-         For details of electro-magnetic lenses in the literature see e.g. `L. Reimer <https://doi.org/10.1007/978-3-540-38967-5>`_
+         For details of electro-magnetic lenses in the literature
+         see e.g. `L. Reimer <https://doi.org/10.1007/978-3-540-38967-5>`_
     
 DEBUG - ===== ATTRS (//entry/instrument/electronanalyser/collectioncolumn/lens_UCA@NX_class)
 DEBUG - value: NXlens_em 
@@ -1739,8 +1988,10 @@ NXlens_em.nxdl.xml:/voltage
 DEBUG - <<OPTIONAL>>
 DEBUG - documentation (NXlens_em.nxdl.xml:/voltage):
 DEBUG - 
-             Excitation voltage of the lens. For dipoles it is a single number. For higher
-             orders, it is an array.
+             Excitation voltage of the lens.
+             
+             For dipoles it is a single number.
+             For higher order multipoles, it is an array.
         
 DEBUG - ===== ATTRS (//entry/instrument/electronanalyser/collectioncolumn/lens_UCA/voltage@units)
 DEBUG - value: V 
@@ -1760,14 +2011,14 @@ DEBUG -
         
 DEBUG - documentation (NXlens_em.nxdl.xml:):
 DEBUG - 
-         Description of an electro-magnetic lens or a compound lens.
+         Base class for an electro-magnetic lens or a compound lens.
          
-         For NXtransformations the origin of the coordinate system is placed
-         in the center of the lens
-         (its polepiece, pinhole, or another point of reference).
-         The origin should be specified in the NXtransformations.
+         For :ref:`NXtransformations` the origin of the coordinate system is placed
+         in the center of the lens (its polepiece, pinhole, or another
+         point of reference). The origin should be specified in the :ref:`NXtransformations`.
          
-         For details of electro-magnetic lenses in the literature see e.g. `L. Reimer <https://doi.org/10.1007/978-3-540-38967-5>`_
+         For details of electro-magnetic lenses in the literature
+         see e.g. `L. Reimer <https://doi.org/10.1007/978-3-540-38967-5>`_
     
 DEBUG - ===== ATTRS (//entry/instrument/electronanalyser/collectioncolumn/lens_UFA@NX_class)
 DEBUG - value: NXlens_em 
@@ -1796,8 +2047,10 @@ NXlens_em.nxdl.xml:/voltage
 DEBUG - <<OPTIONAL>>
 DEBUG - documentation (NXlens_em.nxdl.xml:/voltage):
 DEBUG - 
-             Excitation voltage of the lens. For dipoles it is a single number. For higher
-             orders, it is an array.
+             Excitation voltage of the lens.
+             
+             For dipoles it is a single number.
+             For higher order multipoles, it is an array.
         
 DEBUG - ===== ATTRS (//entry/instrument/electronanalyser/collectioncolumn/lens_UFA/voltage@units)
 DEBUG - value: V 
@@ -1912,8 +2165,8 @@ DEBUG -
         
 DEBUG - documentation (NXdetector.nxdl.xml:):
 DEBUG - 
-    A detector, detector bank, or multidetector.
-  
+         A detector, detector bank, or multidetector.
+    
 DEBUG - ===== ATTRS (//entry/instrument/electronanalyser/detector@NX_class)
 DEBUG - value: NXdetector 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXelectronanalyser', 'NXdetector']
@@ -1925,19 +2178,26 @@ DEBUG - @NX_class [NX_CHAR]
 DEBUG - 
 DEBUG - ===== FIELD (//entry/instrument/electronanalyser/detector/amplifier_bias): <HDF5 dataset "amplifier_bias": shape (), type "<f8">
 DEBUG - value: 30.0 
-DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXelectronanalyser', 'NXdetector']
-DEBUG - NOT IN SCHEMA
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXelectronanalyser', 'NXdetector', 'NX_FLOAT']
+DEBUG - classes:
+NXdetector.nxdl.xml:/amplifier_bias
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXdetector.nxdl.xml:/amplifier_bias):
 DEBUG - 
+             The low voltage of the amplifier migh not be the ground.
+        
 DEBUG - ===== ATTRS (//entry/instrument/electronanalyser/detector/amplifier_bias@units)
 DEBUG - value: V 
-DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXelectronanalyser', 'NXdetector']
-DEBUG - NOT IN SCHEMA
-DEBUG - 
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXelectronanalyser', 'NXdetector', 'NX_FLOAT']
+DEBUG - classes:
+NXdetector.nxdl.xml:/amplifier_bias
+DEBUG - NXdetector.nxdl.xml:/amplifier_bias@units [NX_VOLTAGE]
 DEBUG - ===== FIELD (//entry/instrument/electronanalyser/detector/amplifier_type): <HDF5 dataset "amplifier_type": shape (), type "|O">
 DEBUG - value: b'MCP' 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXelectronanalyser', 'NXdetector', 'NX_CHAR']
 DEBUG - classes:
 NXmpes.nxdl.xml:/ENTRY/INSTRUMENT/ELECTRONANALYSER/DETECTOR/amplifier_type
+NXdetector.nxdl.xml:/amplifier_type
 DEBUG - <<RECOMMENDED>>
 DEBUG - enumeration (NXmpes.nxdl.xml:/ENTRY/INSTRUMENT/ELECTRONANALYSER/DETECTOR/amplifier_type):
 DEBUG - -> MCP
@@ -1946,21 +2206,32 @@ DEBUG - documentation (NXmpes.nxdl.xml:/ENTRY/INSTRUMENT/ELECTRONANALYSER/DETECT
 DEBUG - 
                              Type of electron amplifier in the first amplification step.
                         
+DEBUG - documentation (NXdetector.nxdl.xml:/amplifier_type):
+DEBUG - 
+             Type of electron amplifier, MCP, channeltron, etc.
+        
 DEBUG - ===== FIELD (//entry/instrument/electronanalyser/detector/amplifier_voltage): <HDF5 dataset "amplifier_voltage": shape (), type "<f8">
 DEBUG - value: 2340.0 
-DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXelectronanalyser', 'NXdetector']
-DEBUG - NOT IN SCHEMA
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXelectronanalyser', 'NXdetector', 'NX_FLOAT']
+DEBUG - classes:
+NXdetector.nxdl.xml:/amplifier_voltage
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXdetector.nxdl.xml:/amplifier_voltage):
 DEBUG - 
+             Voltage applied to the amplifier.
+        
 DEBUG - ===== ATTRS (//entry/instrument/electronanalyser/detector/amplifier_voltage@units)
 DEBUG - value: V 
-DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXelectronanalyser', 'NXdetector']
-DEBUG - NOT IN SCHEMA
-DEBUG - 
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXelectronanalyser', 'NXdetector', 'NX_FLOAT']
+DEBUG - classes:
+NXdetector.nxdl.xml:/amplifier_voltage
+DEBUG - NXdetector.nxdl.xml:/amplifier_voltage@units [NX_VOLTAGE]
 DEBUG - ===== FIELD (//entry/instrument/electronanalyser/detector/detector_type): <HDF5 dataset "detector_type": shape (), type "|O">
 DEBUG - value: b'DLD' 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXelectronanalyser', 'NXdetector', 'NX_CHAR']
 DEBUG - classes:
 NXmpes.nxdl.xml:/ENTRY/INSTRUMENT/ELECTRONANALYSER/DETECTOR/detector_type
+NXdetector.nxdl.xml:/detector_type
 DEBUG - <<RECOMMENDED>>
 DEBUG - enumeration (NXmpes.nxdl.xml:/ENTRY/INSTRUMENT/ELECTRONANALYSER/DETECTOR/detector_type):
 DEBUG - -> DLD
@@ -1973,21 +2244,36 @@ DEBUG - documentation (NXmpes.nxdl.xml:/ENTRY/INSTRUMENT/ELECTRONANALYSER/DETECT
 DEBUG - 
                              Description of the detector type.
                         
+DEBUG - documentation (NXdetector.nxdl.xml:/detector_type):
+DEBUG - 
+             Description of the detector type, DLD, Phosphor+CCD, CMOS.
+        
 DEBUG - ===== FIELD (//entry/instrument/electronanalyser/detector/detector_voltage): <HDF5 dataset "detector_voltage": shape (), type "<f8">
 DEBUG - value: 399.99712810186986 
-DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXelectronanalyser', 'NXdetector']
-DEBUG - NOT IN SCHEMA
-DEBUG - 
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXelectronanalyser', 'NXdetector', 'NX_FLOAT']
+DEBUG - classes:
+NXdetector.nxdl.xml:/detector_voltage
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXdetector.nxdl.xml:/detector_voltage):
+DEBUG - 
+             Voltage applied to detector.
+        
 DEBUG - ===== ATTRS (//entry/instrument/electronanalyser/detector/detector_voltage@units)
 DEBUG - value: V 
-DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXelectronanalyser', 'NXdetector']
-DEBUG - NOT IN SCHEMA
-DEBUG - 
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXelectronanalyser', 'NXdetector', 'NX_FLOAT']
+DEBUG - classes:
+NXdetector.nxdl.xml:/detector_voltage
+DEBUG - NXdetector.nxdl.xml:/detector_voltage@units [NX_VOLTAGE]
 DEBUG - ===== FIELD (//entry/instrument/electronanalyser/detector/sensor_pixels): <HDF5 dataset "sensor_pixels": shape (2,), type "<i8">
 DEBUG - value: [1800 1800] 
-DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXelectronanalyser', 'NXdetector']
-DEBUG - NOT IN SCHEMA
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXelectronanalyser', 'NXdetector', 'NX_INT']
+DEBUG - classes:
+NXdetector.nxdl.xml:/sensor_pixels
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXdetector.nxdl.xml:/sensor_pixels):
 DEBUG - 
+             Number of raw active elements in each dimension. Important for swept scans.
+        
 DEBUG - ===== FIELD (//entry/instrument/electronanalyser/energy_resolution): <HDF5 dataset "energy_resolution": shape (), type "<f8">
 DEBUG - value: 110.0 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXelectronanalyser', 'NX_FLOAT']
@@ -2441,15 +2727,22 @@ DEBUG - value: 140.0
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NX_FLOAT']
 DEBUG - classes:
 NXmpes.nxdl.xml:/ENTRY/INSTRUMENT/energy_resolution
+NXinstrument.nxdl.xml:/energy_resolution
 DEBUG - <<REQUIRED>>
 DEBUG - documentation (NXmpes.nxdl.xml:/ENTRY/INSTRUMENT/energy_resolution):
 DEBUG - 
+DEBUG - documentation (NXinstrument.nxdl.xml:/energy_resolution):
+DEBUG - 
+             Energy resolution of the experiment (FWHM or gaussian broadening)
+        
 DEBUG - ===== ATTRS (//entry/instrument/energy_resolution@units)
 DEBUG - value: meV 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NX_FLOAT']
 DEBUG - classes:
 NXmpes.nxdl.xml:/ENTRY/INSTRUMENT/energy_resolution
+NXinstrument.nxdl.xml:/energy_resolution
 DEBUG - NXmpes.nxdl.xml:/ENTRY/INSTRUMENT/energy_resolution@units [NX_ENERGY]
+DEBUG - NXinstrument.nxdl.xml:/energy_resolution@units [NX_ENERGY]
 DEBUG - ===== GROUP (//entry/instrument/manipulator [NXmpes::/NXentry/NXinstrument/NXmanipulator]): <HDF5 group "/entry/instrument/manipulator" (4 members)>
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXmanipulator']
 DEBUG - classes:
@@ -2854,14 +3147,20 @@ DEBUG -
 			
 DEBUG - ===== FIELD (//entry/instrument/momentum_resolution): <HDF5 dataset "momentum_resolution": shape (), type "<f8">
 DEBUG - value: 0.08 
-DEBUG - classpath: ['NXentry', 'NXinstrument']
-DEBUG - NOT IN SCHEMA
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NX_FLOAT']
+DEBUG - classes:
+NXinstrument.nxdl.xml:/momentum_resolution
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXinstrument.nxdl.xml:/momentum_resolution):
 DEBUG - 
+             Momentum resolution of the experiment (FWHM)
+        
 DEBUG - ===== ATTRS (//entry/instrument/momentum_resolution@units)
 DEBUG - value: 1/angstrom 
-DEBUG - classpath: ['NXentry', 'NXinstrument']
-DEBUG - NOT IN SCHEMA
-DEBUG - 
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NX_FLOAT']
+DEBUG - classes:
+NXinstrument.nxdl.xml:/momentum_resolution
+DEBUG - NXinstrument.nxdl.xml:/momentum_resolution@units [NX_WAVENUMBER]
 DEBUG - ===== FIELD (//entry/instrument/name): <HDF5 dataset "name": shape (), type "|O">
 DEBUG - value: b'Time-of-flight momentum microscope equipped delay line detector, at the endstation of the high rep-rate HHG source at FHI' 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NX_CHAR']
@@ -2869,7 +3168,9 @@ DEBUG - classes:
 NXinstrument.nxdl.xml:/name
 DEBUG - <<OPTIONAL>>
 DEBUG - documentation (NXinstrument.nxdl.xml:/name):
-DEBUG - Name of instrument
+DEBUG - 
+             Name of instrument
+        
 DEBUG - ===== ATTRS (//entry/instrument/name@short_name)
 DEBUG - value: TR-ARPES @ FHI 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NX_CHAR']
@@ -2878,7 +3179,9 @@ NXinstrument.nxdl.xml:/name
 DEBUG - NXinstrument.nxdl.xml:/name@short_name - [NX_CHAR]
 DEBUG - <<OPTIONAL>>
 DEBUG - documentation (NXinstrument.nxdl.xml:/name/short_name):
-DEBUG - short name for instrument, perhaps the acronym
+DEBUG - 
+                 short name for instrument, perhaps the acronym
+            
 DEBUG - ===== GROUP (//entry/instrument/source [NXmpes::/NXentry/NXinstrument/NXsource]): <HDF5 group "/entry/instrument/source" (6 members)>
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource']
 DEBUG - classes:
@@ -2896,7 +3199,9 @@ DEBUG -
 DEBUG - documentation (NXinstrument.nxdl.xml:/SOURCE):
 DEBUG - 
 DEBUG - documentation (NXsource.nxdl.xml:):
-DEBUG - The neutron or x-ray storage ring/facility.
+DEBUG - 
+         The neutron or x-ray storage ring/facility.
+    
 DEBUG - ===== ATTRS (//entry/instrument/source@NX_class)
 DEBUG - value: NXsource 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource']
@@ -2913,7 +3218,9 @@ DEBUG - classes:
 NXsource.nxdl.xml:/frequency
 DEBUG - <<OPTIONAL>>
 DEBUG - documentation (NXsource.nxdl.xml:/frequency):
-DEBUG - Frequency of pulsed source
+DEBUG - 
+             Frequency of pulsed source
+        
 DEBUG - ===== ATTRS (//entry/instrument/source/frequency@units)
 DEBUG - value: kHz 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource', 'NX_FLOAT']
@@ -2930,7 +3237,9 @@ DEBUG - enumeration (NXsource.nxdl.xml:/mode):
 DEBUG - -> Single Bunch
 DEBUG - -> Multi Bunch
 DEBUG - documentation (NXsource.nxdl.xml:/mode):
-DEBUG - source operating mode
+DEBUG - 
+             source operating mode
+        
 DEBUG - ===== FIELD (//entry/instrument/source/name): <HDF5 dataset "name": shape (), type "|O">
 DEBUG - value: b'HHG @ TR-ARPES @ FHI' 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource', 'NX_CHAR']
@@ -2941,17 +3250,26 @@ DEBUG - <<REQUIRED>>
 DEBUG - documentation (NXmpes.nxdl.xml:/ENTRY/INSTRUMENT/SOURCE/name):
 DEBUG - 
 DEBUG - documentation (NXsource.nxdl.xml:/name):
-DEBUG - Name of source
+DEBUG - 
+             Name of source
+        
 DEBUG - ===== FIELD (//entry/instrument/source/photon_energy): <HDF5 dataset "photon_energy": shape (), type "<f8">
 DEBUG - value: 21.7 
-DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource']
-DEBUG - NOT IN SCHEMA
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource', 'NX_FLOAT']
+DEBUG - classes:
+NXsource.nxdl.xml:/photon_energy
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXsource.nxdl.xml:/photon_energy):
 DEBUG - 
+             The center photon energy of the source, before it is
+             monochromatized or converted
+        
 DEBUG - ===== ATTRS (//entry/instrument/source/photon_energy@units)
 DEBUG - value: eV 
-DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource']
-DEBUG - NOT IN SCHEMA
-DEBUG - 
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource', 'NX_FLOAT']
+DEBUG - classes:
+NXsource.nxdl.xml:/photon_energy
+DEBUG - NXsource.nxdl.xml:/photon_energy@units [NX_ENERGY]
 DEBUG - ===== FIELD (//entry/instrument/source/probe): <HDF5 dataset "probe": shape (), type "|O">
 DEBUG - value: b'ultraviolet' 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource', 'NX_CHAR']
@@ -2978,7 +3296,9 @@ DEBUG -
                          restricted.
                     
 DEBUG - documentation (NXsource.nxdl.xml:/probe):
-DEBUG - type of radiation probe (pick one from the enumerated list and spell exactly)
+DEBUG - 
+             type of radiation probe (pick one from the enumerated list and spell exactly)
+        
 DEBUG - ===== FIELD (//entry/instrument/source/type): <HDF5 dataset "type": shape (), type "|O">
 DEBUG - value: b'HHG laser' 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource', 'NX_CHAR']
@@ -3013,7 +3333,9 @@ DEBUG - -> Metal Jet X-ray
 DEBUG - documentation (NXmpes.nxdl.xml:/ENTRY/INSTRUMENT/SOURCE/type):
 DEBUG - 
 DEBUG - documentation (NXsource.nxdl.xml:/type):
-DEBUG - type of radiation source (pick one from the enumerated list and spell exactly)
+DEBUG - 
+             type of radiation source (pick one from the enumerated list and spell exactly)
+        
 DEBUG - ===== GROUP (//entry/instrument/source_pump [NXmpes::/NXentry/NXinstrument/NXsource]): <HDF5 group "/entry/instrument/source_pump" (6 members)>
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource']
 DEBUG - classes:
@@ -3031,7 +3353,9 @@ DEBUG -
 DEBUG - documentation (NXinstrument.nxdl.xml:/SOURCE):
 DEBUG - 
 DEBUG - documentation (NXsource.nxdl.xml:):
-DEBUG - The neutron or x-ray storage ring/facility.
+DEBUG - 
+         The neutron or x-ray storage ring/facility.
+    
 DEBUG - ===== ATTRS (//entry/instrument/source_pump@NX_class)
 DEBUG - value: NXsource 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource']
@@ -3048,7 +3372,9 @@ DEBUG - classes:
 NXsource.nxdl.xml:/frequency
 DEBUG - <<OPTIONAL>>
 DEBUG - documentation (NXsource.nxdl.xml:/frequency):
-DEBUG - Frequency of pulsed source
+DEBUG - 
+             Frequency of pulsed source
+        
 DEBUG - ===== ATTRS (//entry/instrument/source_pump/frequency@units)
 DEBUG - value: kHz 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource', 'NX_FLOAT']
@@ -3065,7 +3391,9 @@ DEBUG - enumeration (NXsource.nxdl.xml:/mode):
 DEBUG - -> Single Bunch
 DEBUG - -> Multi Bunch
 DEBUG - documentation (NXsource.nxdl.xml:/mode):
-DEBUG - source operating mode
+DEBUG - 
+             source operating mode
+        
 DEBUG - ===== FIELD (//entry/instrument/source_pump/name): <HDF5 dataset "name": shape (), type "|O">
 DEBUG - value: b'OPCPA @ TR-ARPES @ FHI' 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource', 'NX_CHAR']
@@ -3076,17 +3404,26 @@ DEBUG - <<REQUIRED>>
 DEBUG - documentation (NXmpes.nxdl.xml:/ENTRY/INSTRUMENT/SOURCE/name):
 DEBUG - 
 DEBUG - documentation (NXsource.nxdl.xml:/name):
-DEBUG - Name of source
+DEBUG - 
+             Name of source
+        
 DEBUG - ===== FIELD (//entry/instrument/source_pump/photon_energy): <HDF5 dataset "photon_energy": shape (), type "<f8">
 DEBUG - value: 1.2 
-DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource']
-DEBUG - NOT IN SCHEMA
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource', 'NX_FLOAT']
+DEBUG - classes:
+NXsource.nxdl.xml:/photon_energy
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXsource.nxdl.xml:/photon_energy):
 DEBUG - 
+             The center photon energy of the source, before it is
+             monochromatized or converted
+        
 DEBUG - ===== ATTRS (//entry/instrument/source_pump/photon_energy@units)
 DEBUG - value: eV 
-DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource']
-DEBUG - NOT IN SCHEMA
-DEBUG - 
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource', 'NX_FLOAT']
+DEBUG - classes:
+NXsource.nxdl.xml:/photon_energy
+DEBUG - NXsource.nxdl.xml:/photon_energy@units [NX_ENERGY]
 DEBUG - ===== FIELD (//entry/instrument/source_pump/probe): <HDF5 dataset "probe": shape (), type "|O">
 DEBUG - value: b'visible light' 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource', 'NX_CHAR']
@@ -3113,7 +3450,9 @@ DEBUG -
                          restricted.
                     
 DEBUG - documentation (NXsource.nxdl.xml:/probe):
-DEBUG - type of radiation probe (pick one from the enumerated list and spell exactly)
+DEBUG - 
+             type of radiation probe (pick one from the enumerated list and spell exactly)
+        
 DEBUG - ===== FIELD (//entry/instrument/source_pump/type): <HDF5 dataset "type": shape (), type "|O">
 DEBUG - value: b'Optical Laser' 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource', 'NX_CHAR']
@@ -3148,17 +3487,25 @@ DEBUG - -> Metal Jet X-ray
 DEBUG - documentation (NXmpes.nxdl.xml:/ENTRY/INSTRUMENT/SOURCE/type):
 DEBUG - 
 DEBUG - documentation (NXsource.nxdl.xml:/type):
-DEBUG - type of radiation source (pick one from the enumerated list and spell exactly)
+DEBUG - 
+             type of radiation source (pick one from the enumerated list and spell exactly)
+        
 DEBUG - ===== FIELD (//entry/instrument/temporal_resolution): <HDF5 dataset "temporal_resolution": shape (), type "<f8">
 DEBUG - value: 35.0 
-DEBUG - classpath: ['NXentry', 'NXinstrument']
-DEBUG - NOT IN SCHEMA
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NX_FLOAT']
+DEBUG - classes:
+NXinstrument.nxdl.xml:/temporal_resolution
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXinstrument.nxdl.xml:/temporal_resolution):
 DEBUG - 
+             Temporal resolution of the experiment (FWHM)
+        
 DEBUG - ===== ATTRS (//entry/instrument/temporal_resolution@units)
 DEBUG - value: fs 
-DEBUG - classpath: ['NXentry', 'NXinstrument']
-DEBUG - NOT IN SCHEMA
-DEBUG - 
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NX_FLOAT']
+DEBUG - classes:
+NXinstrument.nxdl.xml:/temporal_resolution
+DEBUG - NXinstrument.nxdl.xml:/temporal_resolution@units [NX_TIME]
 DEBUG - ===== GROUP (//entry/process [NXmpes::/NXentry/NXprocess]): <HDF5 group "/entry/process" (5 members)>
 DEBUG - classpath: ['NXentry', 'NXprocess']
 DEBUG - classes:
@@ -3175,7 +3522,9 @@ DEBUG -
 DEBUG - documentation (NXentry.nxdl.xml:/PROCESS):
 DEBUG - 
 DEBUG - documentation (NXprocess.nxdl.xml:):
-DEBUG - Document an event of data processing, reconstruction, or analysis for this data.
+DEBUG - 
+         Document an event of data processing, reconstruction, or analysis for this data.
+    
 DEBUG - ===== ATTRS (//entry/process@NX_class)
 DEBUG - value: NXprocess 
 DEBUG - classpath: ['NXentry', 'NXprocess']
@@ -3185,48 +3534,109 @@ NXentry.nxdl.xml:/PROCESS
 NXprocess.nxdl.xml:
 DEBUG - @NX_class [NX_CHAR]
 DEBUG - 
-DEBUG - ===== GROUP (//entry/process/distortion [NXmpes::/NXentry/NXprocess/distortion]): <HDF5 group "/entry/process/distortion" (6 members)>
-DEBUG - classpath: ['NXentry', 'NXprocess']
-DEBUG - NOT IN SCHEMA
+DEBUG - ===== GROUP (//entry/process/distortion [NXmpes::/NXentry/NXprocess/NXdistortion]): <HDF5 group "/entry/process/distortion" (6 members)>
+DEBUG - classpath: ['NXentry', 'NXprocess', 'NXdistortion']
+DEBUG - classes:
+NXprocess.nxdl.xml:/DISTORTION
+NXdistortion.nxdl.xml:
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXprocess.nxdl.xml:/DISTORTION):
+DEBUG - 
+             Describes the operations of image distortion correction
+        
+DEBUG - documentation (NXdistortion.nxdl.xml:):
+DEBUG - 
+         Subclass of NXprocess to describe post-processing distortion correction.
+    
+DEBUG - ===== ATTRS (//entry/process/distortion@NX_class)
+DEBUG - value: NXdistortion 
+DEBUG - classpath: ['NXentry', 'NXprocess', 'NXdistortion']
+DEBUG - classes:
+NXprocess.nxdl.xml:/DISTORTION
+NXdistortion.nxdl.xml:
+DEBUG - @NX_class [NX_CHAR]
 DEBUG - 
 DEBUG - ===== FIELD (//entry/process/distortion/applied): <HDF5 dataset "applied": shape (), type "|b1">
 DEBUG - value: True 
-DEBUG - classpath: ['NXentry', 'NXprocess']
-DEBUG - NOT IN SCHEMA
+DEBUG - classpath: ['NXentry', 'NXprocess', 'NXdistortion', 'NX_BOOLEAN']
+DEBUG - classes:
+NXdistortion.nxdl.xml:/applied
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXdistortion.nxdl.xml:/applied):
 DEBUG - 
+             Has the distortion correction been applied?
+        
 DEBUG - ===== FIELD (//entry/process/distortion/cdeform_field): <HDF5 dataset "cdeform_field": shape (512, 512), type "<f8">
 DEBUG - value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. ...
-DEBUG - classpath: ['NXentry', 'NXprocess']
-DEBUG - NOT IN SCHEMA
+DEBUG - classpath: ['NXentry', 'NXprocess', 'NXdistortion', 'NX_FLOAT']
+DEBUG - classes:
+NXdistortion.nxdl.xml:/cdeform_field
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXdistortion.nxdl.xml:/cdeform_field):
 DEBUG - 
+             Column deformation field for general non-rigid distortion corrections. 2D matrix
+             holding the column information of the mapping of each original coordinate.
+        
 DEBUG - ===== FIELD (//entry/process/distortion/original_centre): <HDF5 dataset "original_centre": shape (2,), type "<f8">
 DEBUG - value: [203. 215.] 
-DEBUG - classpath: ['NXentry', 'NXprocess']
-DEBUG - NOT IN SCHEMA
+DEBUG - classpath: ['NXentry', 'NXprocess', 'NXdistortion', 'NX_FLOAT']
+DEBUG - classes:
+NXdistortion.nxdl.xml:/original_centre
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXdistortion.nxdl.xml:/original_centre):
 DEBUG - 
+             For symmetry-guided distortion correction. Here we record the coordinates of the
+             symmetry centre point.
+        
 DEBUG - ===== FIELD (//entry/process/distortion/original_points): <HDF5 dataset "original_points": shape (6, 2), type "<f8">
 DEBUG - value: [166. 283.] 
-DEBUG - classpath: ['NXentry', 'NXprocess']
-DEBUG - NOT IN SCHEMA
+DEBUG - classpath: ['NXentry', 'NXprocess', 'NXdistortion', 'NX_FLOAT']
+DEBUG - classes:
+NXdistortion.nxdl.xml:/original_points
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXdistortion.nxdl.xml:/original_points):
 DEBUG - 
+             For symmetry-guided distortion correction. Here we record the coordinates of the
+             relevant symmetry points.
+        
 DEBUG - ===== FIELD (//entry/process/distortion/rdeform_field): <HDF5 dataset "rdeform_field": shape (512, 512), type "<f8">
 DEBUG - value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. ...
-DEBUG - classpath: ['NXentry', 'NXprocess']
-DEBUG - NOT IN SCHEMA
+DEBUG - classpath: ['NXentry', 'NXprocess', 'NXdistortion', 'NX_FLOAT']
+DEBUG - classes:
+NXdistortion.nxdl.xml:/rdeform_field
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXdistortion.nxdl.xml:/rdeform_field):
 DEBUG - 
+             Row deformation field for general non-rigid distortion corrections. 2D matrix
+             holding the row information of the mapping of each original coordinate.
+        
 DEBUG - ===== FIELD (//entry/process/distortion/symmetry): <HDF5 dataset "symmetry": shape (), type "<i8">
 DEBUG - value: 6 
-DEBUG - classpath: ['NXentry', 'NXprocess']
-DEBUG - NOT IN SCHEMA
+DEBUG - classpath: ['NXentry', 'NXprocess', 'NXdistortion', 'NX_INT']
+DEBUG - classes:
+NXdistortion.nxdl.xml:/symmetry
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXdistortion.nxdl.xml:/symmetry):
 DEBUG - 
+             For `symmetry-guided distortion correction`_,
+             where a pattern of features is mapped to the regular geometric structure expected
+             from the symmetry. Here we record the number of elementary symmetry operations.
+             
+               .. _symmetry-guided distortion correction: https://www.sciencedirect.com/science/article/abs/pii/S0304399118303474?via%3Dihub
+        
 DEBUG - ===== GROUP (//entry/process/energy_calibration [NXmpes::/NXentry/NXprocess/NXcalibration]): <HDF5 group "/entry/process/energy_calibration" (4 members)>
 DEBUG - classpath: ['NXentry', 'NXprocess', 'NXcalibration']
 DEBUG - classes:
 NXmpes.nxdl.xml:/ENTRY/PROCESS/energy_calibration
+NXprocess.nxdl.xml:/CALIBRATION
 NXcalibration.nxdl.xml:
 DEBUG - <<OPTIONAL>>
 DEBUG - documentation (NXmpes.nxdl.xml:/ENTRY/PROCESS/energy_calibration):
 DEBUG - 
+DEBUG - documentation (NXprocess.nxdl.xml:/CALIBRATION):
+DEBUG - 
+             Describes the operations of calibration procedures, e.g. axis calibrations.
+        
 DEBUG - documentation (NXcalibration.nxdl.xml:):
 DEBUG - 
          Subclass of NXprocess to describe post-processing calibrations.
@@ -3236,6 +3646,7 @@ DEBUG - value: NXcalibration
 DEBUG - classpath: ['NXentry', 'NXprocess', 'NXcalibration']
 DEBUG - classes:
 NXmpes.nxdl.xml:/ENTRY/PROCESS/energy_calibration
+NXprocess.nxdl.xml:/CALIBRATION
 NXcalibration.nxdl.xml:
 DEBUG - @NX_class [NX_CHAR]
 DEBUG - 
@@ -3282,7 +3693,13 @@ DEBUG -
              
              Use a0, a1, ..., an for the coefficients, corresponding to the values in the coefficients field.
              
-             Use x0, x1, ..., xn for the variables.
+             Use x0, x1, ..., xn for the nth position in the `original_axis` field.
+             If there is the symbol attribute specified for the `original_axis` this may be used instead of x.
+             If you want to use the whole axis use `x`.
+             Alternate axis can also be available as specified by the `input_SYMBOL` field.
+             The data should then be referred here by the `SYMBOL` name, e.g., for a field
+             name `input_my_field` it should be referred here by `my_field` or `my_field0` if
+             you want to read the zeroth element of the array.
              
              The formula should be numpy compliant.
         
@@ -3296,152 +3713,508 @@ DEBUG - documentation (NXcalibration.nxdl.xml:/original_axis):
 DEBUG - 
              Vector containing the data coordinates in the original uncalibrated axis
         
-DEBUG - ===== GROUP (//entry/process/kx_calibration [NXmpes::/NXentry/NXprocess/kx_calibration]): <HDF5 group "/entry/process/kx_calibration" (4 members)>
-DEBUG - classpath: ['NXentry', 'NXprocess']
-DEBUG - NOT IN SCHEMA
+DEBUG - ===== GROUP (//entry/process/kx_calibration [NXmpes::/NXentry/NXprocess/NXcalibration]): <HDF5 group "/entry/process/kx_calibration" (4 members)>
+DEBUG - classpath: ['NXentry', 'NXprocess', 'NXcalibration']
+DEBUG - classes:
+NXprocess.nxdl.xml:/CALIBRATION
+NXcalibration.nxdl.xml:
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXprocess.nxdl.xml:/CALIBRATION):
+DEBUG - 
+             Describes the operations of calibration procedures, e.g. axis calibrations.
+        
+DEBUG - documentation (NXcalibration.nxdl.xml:):
+DEBUG - 
+         Subclass of NXprocess to describe post-processing calibrations.
+    
+DEBUG - ===== ATTRS (//entry/process/kx_calibration@NX_class)
+DEBUG - value: NXcalibration 
+DEBUG - classpath: ['NXentry', 'NXprocess', 'NXcalibration']
+DEBUG - classes:
+NXprocess.nxdl.xml:/CALIBRATION
+NXcalibration.nxdl.xml:
+DEBUG - @NX_class [NX_CHAR]
 DEBUG - 
 DEBUG - ===== FIELD (//entry/process/kx_calibration/applied): <HDF5 dataset "applied": shape (), type "|b1">
 DEBUG - value: True 
-DEBUG - classpath: ['NXentry', 'NXprocess']
-DEBUG - NOT IN SCHEMA
+DEBUG - classpath: ['NXentry', 'NXprocess', 'NXcalibration', 'NX_BOOLEAN']
+DEBUG - classes:
+NXcalibration.nxdl.xml:/applied
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXcalibration.nxdl.xml:/applied):
 DEBUG - 
+             Has the calibration been applied?
+        
 DEBUG - ===== FIELD (//entry/process/kx_calibration/calibrated_axis): <HDF5 dataset "calibrated_axis": shape (512,), type "<f8">
 DEBUG - value: [-2.68021375 -2.66974416 -2.65927458 -2.64880499 -2.63833541 -2.62786582 ...
-DEBUG - classpath: ['NXentry', 'NXprocess']
-DEBUG - NOT IN SCHEMA
+DEBUG - classpath: ['NXentry', 'NXprocess', 'NXcalibration', 'NX_FLOAT']
+DEBUG - classes:
+NXcalibration.nxdl.xml:/calibrated_axis
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXcalibration.nxdl.xml:/calibrated_axis):
 DEBUG - 
+             A vector representing the axis after calibration, matching the data length
+        
 DEBUG - ===== FIELD (//entry/process/kx_calibration/offset): <HDF5 dataset "offset": shape (), type "<f8">
 DEBUG - value: 256.0 
-DEBUG - classpath: ['NXentry', 'NXprocess']
-DEBUG - NOT IN SCHEMA
+DEBUG - classpath: ['NXentry', 'NXprocess', 'NXcalibration', 'NX_FLOAT']
+DEBUG - classes:
+NXcalibration.nxdl.xml:/offset
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXcalibration.nxdl.xml:/offset):
 DEBUG - 
+             For linear calibration. Offset parameter.
+             This is should yield the relation `calibrated_axis` = `scaling` * `original_axis` + `offset`.
+        
 DEBUG - ===== FIELD (//entry/process/kx_calibration/scaling): <HDF5 dataset "scaling": shape (), type "<f8">
 DEBUG - value: 0.01046958495673419 
-DEBUG - classpath: ['NXentry', 'NXprocess']
-DEBUG - NOT IN SCHEMA
+DEBUG - classpath: ['NXentry', 'NXprocess', 'NXcalibration', 'NX_FLOAT']
+DEBUG - classes:
+NXcalibration.nxdl.xml:/scaling
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXcalibration.nxdl.xml:/scaling):
 DEBUG - 
-DEBUG - ===== GROUP (//entry/process/ky_calibration [NXmpes::/NXentry/NXprocess/ky_calibration]): <HDF5 group "/entry/process/ky_calibration" (4 members)>
-DEBUG - classpath: ['NXentry', 'NXprocess']
-DEBUG - NOT IN SCHEMA
+             For linear calibration. Scaling parameter.
+             This is should yield the relation `calibrated_axis` = `scaling` * `original_axis` + `offset`.
+        
+DEBUG - ===== GROUP (//entry/process/ky_calibration [NXmpes::/NXentry/NXprocess/NXcalibration]): <HDF5 group "/entry/process/ky_calibration" (4 members)>
+DEBUG - classpath: ['NXentry', 'NXprocess', 'NXcalibration']
+DEBUG - classes:
+NXprocess.nxdl.xml:/CALIBRATION
+NXcalibration.nxdl.xml:
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXprocess.nxdl.xml:/CALIBRATION):
+DEBUG - 
+             Describes the operations of calibration procedures, e.g. axis calibrations.
+        
+DEBUG - documentation (NXcalibration.nxdl.xml:):
+DEBUG - 
+         Subclass of NXprocess to describe post-processing calibrations.
+    
+DEBUG - ===== ATTRS (//entry/process/ky_calibration@NX_class)
+DEBUG - value: NXcalibration 
+DEBUG - classpath: ['NXentry', 'NXprocess', 'NXcalibration']
+DEBUG - classes:
+NXprocess.nxdl.xml:/CALIBRATION
+NXcalibration.nxdl.xml:
+DEBUG - @NX_class [NX_CHAR]
 DEBUG - 
 DEBUG - ===== FIELD (//entry/process/ky_calibration/applied): <HDF5 dataset "applied": shape (), type "|b1">
 DEBUG - value: True 
-DEBUG - classpath: ['NXentry', 'NXprocess']
-DEBUG - NOT IN SCHEMA
+DEBUG - classpath: ['NXentry', 'NXprocess', 'NXcalibration', 'NX_BOOLEAN']
+DEBUG - classes:
+NXcalibration.nxdl.xml:/applied
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXcalibration.nxdl.xml:/applied):
 DEBUG - 
+             Has the calibration been applied?
+        
 DEBUG - ===== FIELD (//entry/process/ky_calibration/calibrated_axis): <HDF5 dataset "calibrated_axis": shape (512,), type "<f8">
 DEBUG - value: [-2.68021375 -2.66974416 -2.65927458 -2.64880499 -2.63833541 -2.62786582 ...
-DEBUG - classpath: ['NXentry', 'NXprocess']
-DEBUG - NOT IN SCHEMA
+DEBUG - classpath: ['NXentry', 'NXprocess', 'NXcalibration', 'NX_FLOAT']
+DEBUG - classes:
+NXcalibration.nxdl.xml:/calibrated_axis
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXcalibration.nxdl.xml:/calibrated_axis):
 DEBUG - 
+             A vector representing the axis after calibration, matching the data length
+        
 DEBUG - ===== FIELD (//entry/process/ky_calibration/offset): <HDF5 dataset "offset": shape (), type "<f8">
 DEBUG - value: 256.0 
-DEBUG - classpath: ['NXentry', 'NXprocess']
-DEBUG - NOT IN SCHEMA
+DEBUG - classpath: ['NXentry', 'NXprocess', 'NXcalibration', 'NX_FLOAT']
+DEBUG - classes:
+NXcalibration.nxdl.xml:/offset
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXcalibration.nxdl.xml:/offset):
 DEBUG - 
+             For linear calibration. Offset parameter.
+             This is should yield the relation `calibrated_axis` = `scaling` * `original_axis` + `offset`.
+        
 DEBUG - ===== FIELD (//entry/process/ky_calibration/scaling): <HDF5 dataset "scaling": shape (), type "<f8">
 DEBUG - value: 0.01046958495673419 
-DEBUG - classpath: ['NXentry', 'NXprocess']
-DEBUG - NOT IN SCHEMA
+DEBUG - classpath: ['NXentry', 'NXprocess', 'NXcalibration', 'NX_FLOAT']
+DEBUG - classes:
+NXcalibration.nxdl.xml:/scaling
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXcalibration.nxdl.xml:/scaling):
 DEBUG - 
-DEBUG - ===== GROUP (//entry/process/registration [NXmpes::/NXentry/NXprocess/registration]): <HDF5 group "/entry/process/registration" (3 members)>
-DEBUG - classpath: ['NXentry', 'NXprocess']
-DEBUG - NOT IN SCHEMA
+             For linear calibration. Scaling parameter.
+             This is should yield the relation `calibrated_axis` = `scaling` * `original_axis` + `offset`.
+        
+DEBUG - ===== GROUP (//entry/process/registration [NXmpes::/NXentry/NXprocess/NXregistration]): <HDF5 group "/entry/process/registration" (3 members)>
+DEBUG - classpath: ['NXentry', 'NXprocess', 'NXregistration']
+DEBUG - classes:
+NXprocess.nxdl.xml:/REGISTRATION
+NXregistration.nxdl.xml:
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXprocess.nxdl.xml:/REGISTRATION):
+DEBUG - 
+             Describes the operations of image registration
+        
+DEBUG - documentation (NXregistration.nxdl.xml:):
+DEBUG - 
+         Describes image registration procedures.
+    
+DEBUG - ===== ATTRS (//entry/process/registration@NX_class)
+DEBUG - value: NXregistration 
+DEBUG - classpath: ['NXentry', 'NXprocess', 'NXregistration']
+DEBUG - classes:
+NXprocess.nxdl.xml:/REGISTRATION
+NXregistration.nxdl.xml:
+DEBUG - @NX_class [NX_CHAR]
 DEBUG - 
 DEBUG - ===== FIELD (//entry/process/registration/applied): <HDF5 dataset "applied": shape (), type "|b1">
 DEBUG - value: True 
-DEBUG - classpath: ['NXentry', 'NXprocess']
-DEBUG - NOT IN SCHEMA
+DEBUG - classpath: ['NXentry', 'NXprocess', 'NXregistration', 'NX_BOOLEAN']
+DEBUG - classes:
+NXregistration.nxdl.xml:/applied
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXregistration.nxdl.xml:/applied):
 DEBUG - 
+             Has the registration been applied?
+        
 DEBUG - ===== FIELD (//entry/process/registration/depends_on): <HDF5 dataset "depends_on": shape (), type "|O">
 DEBUG - value: b'/entry/process/registration/tranformations/rot_z' 
-DEBUG - classpath: ['NXentry', 'NXprocess']
-DEBUG - NOT IN SCHEMA
-DEBUG - 
-DEBUG - ===== GROUP (//entry/process/registration/tranformations [NXmpes::/NXentry/NXprocess/registration/tranformations]): <HDF5 group "/entry/process/registration/tranformations" (3 members)>
-DEBUG - classpath: ['NXentry', 'NXprocess']
-DEBUG - NOT IN SCHEMA
-DEBUG - 
-DEBUG - ===== FIELD (//entry/process/registration/tranformations/rot_z): <HDF5 dataset "rot_z": shape (), type "<f8">
-DEBUG - value: -1.0 
-DEBUG - classpath: ['NXentry', 'NXprocess']
-DEBUG - NOT IN SCHEMA
-DEBUG - 
-DEBUG - ===== ATTRS (//entry/process/registration/tranformations/rot_z@depends_on)
-DEBUG - value: trans_y 
-DEBUG - classpath: ['NXentry', 'NXprocess']
-DEBUG - NOT IN SCHEMA
-DEBUG - 
-DEBUG - ===== ATTRS (//entry/process/registration/tranformations/rot_z@offset)
-DEBUG - value: [256. 256.   0.] 
-DEBUG - classpath: ['NXentry', 'NXprocess']
-DEBUG - NOT IN SCHEMA
-DEBUG - 
-DEBUG - ===== ATTRS (//entry/process/registration/tranformations/rot_z@transformation_type)
-DEBUG - value: rotation 
-DEBUG - classpath: ['NXentry', 'NXprocess']
-DEBUG - NOT IN SCHEMA
-DEBUG - 
-DEBUG - ===== ATTRS (//entry/process/registration/tranformations/rot_z@units)
-DEBUG - value: degrees 
-DEBUG - classpath: ['NXentry', 'NXprocess']
-DEBUG - NOT IN SCHEMA
-DEBUG - 
-DEBUG - ===== ATTRS (//entry/process/registration/tranformations/rot_z@vector)
-DEBUG - value: [0. 0. 1.] 
-DEBUG - classpath: ['NXentry', 'NXprocess']
-DEBUG - NOT IN SCHEMA
+DEBUG - classpath: ['NXentry', 'NXprocess', 'NXregistration', 'NX_CHAR']
+DEBUG - classes:
+NXregistration.nxdl.xml:/depends_on
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXregistration.nxdl.xml:/depends_on):
 DEBUG - 
-DEBUG - ===== FIELD (//entry/process/registration/tranformations/trans_x): <HDF5 dataset "trans_x": shape (), type "<f8">
-DEBUG - value: 43.0 
-DEBUG - classpath: ['NXentry', 'NXprocess']
-DEBUG - NOT IN SCHEMA
+             Specifies the position by pointing to the last transformation in the
+             transformation chain in the NXtransformations group.
+        
+DEBUG - ===== GROUP (//entry/process/registration/tranformations [NXmpes::/NXentry/NXprocess/NXregistration/NXtransformations]): <HDF5 group "/entry/process/registration/tranformations" (3 members)>
+DEBUG - classpath: ['NXentry', 'NXprocess', 'NXregistration', 'NXtransformations']
+DEBUG - classes:
+NXregistration.nxdl.xml:/TRANSFORMATIONS
+NXtransformations.nxdl.xml:
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXregistration.nxdl.xml:/TRANSFORMATIONS):
 DEBUG - 
-DEBUG - ===== ATTRS (//entry/process/registration/tranformations/trans_x@depends_on)
-DEBUG - value: . 
-DEBUG - classpath: ['NXentry', 'NXprocess']
-DEBUG - NOT IN SCHEMA
+             To describe the operations of image registration (combinations of rigid
+             translations and rotations)
+        
+DEBUG - documentation (NXtransformations.nxdl.xml:):
 DEBUG - 
-DEBUG - ===== ATTRS (//entry/process/registration/tranformations/trans_x@transformation_type)
-DEBUG - value: translation 
-DEBUG - classpath: ['NXentry', 'NXprocess']
-DEBUG - NOT IN SCHEMA
+		Collection of axis-based translations and rotations to describe a geometry.
+		May also contain axes that do not move and therefore do not have a transformation
+		type specified, but are useful in understanding coordinate frames within which
+		transformations are done, or in documenting important directions, such as the
+		direction of gravity.
+
+		A nested sequence of transformations lists the translation and rotation steps
+		needed to describe the position and orientation of any movable or fixed device.
+
+		There will be one or more transformations (axes) defined by one or more fields
+		for each transformation. Transformations can also be described by NXlog groups when
+		the values change with time. The all-caps name ``AXISNAME`` designates the
+		particular axis generating a transformation (e.g. a rotation axis or a translation
+		axis or a general axis).   The attribute ``units="NX_TRANSFORMATION"`` designates the
+		units will be appropriate to the ``transformation_type`` attribute:
+
+		* ``NX_LENGTH`` for ``translation``
+		* ``NX_ANGLE`` for ``rotation``
+		* ``NX_UNITLESS`` for axes for which no transformation type is specified
+
+		This class will usually contain all axes of a sample stage or goniometer or
+		a detector.  The NeXus default McSTAS coordinate frame is assumed, but additional
+		useful coordinate axes may be defined by using axes for which no transformation
+		type has been specified.
+
+		The entry point (``depends_on``) will be outside of this class and point to a
+		field in here. Following the chain may also require following ``depends_on``
+		links to transformations outside, for example to a common base table.  If
+		a relative path is given, it is relative to the group enclosing the ``depends_on``
+		specification.
+
+		For a chain of three transformations, where :math:`T_1` depends on :math:`T_2`
+		and that in turn depends on :math:`T_3`, the final transformation :math:`T_f` is
+
+		.. math:: T_f = T_3 T_2 T_1
+
+		In explicit terms, the transformations are a subset of affine transformations
+		expressed as 4x4 matrices that act on homogeneous coordinates, :math:`w=(x,y,z,1)^T`.
+
+		For rotation and translation,
+
+		.. math:: T_r &= \begin{pmatrix} R & o \\ 0_3 & 1 \end{pmatrix} \\ T_t &= \begin{pmatrix} I_3  & t + o \\ 0_3 & 1 \end{pmatrix}
+
+		where :math:`R` is the usual 3x3 rotation matrix, :math:`o` is an offset vector,
+		:math:`0_3` is a row of 3 zeros, :math:`I_3` is the 3x3 identity matrix and
+		:math:`t` is the translation vector.
+
+		:math:`o` is given by the ``offset`` attribute, :math:`t` is given by the ``vector``
+		attribute multiplied by the field value, and :math:`R` is defined as a rotation
+		about an axis in the direction of ``vector``, of angle of the field value.
+
+		NOTE
+		
+		One possible use of ``NXtransformations`` is to define the motors and
+		transformations for a diffractometer (goniometer).  Such use is mentioned
+		in the ``NXinstrument`` base class.  Use one ``NXtransformations`` group 
+		for each diffractometer and name the group appropriate to the device.
+		Collecting the motors of a sample table or xyz-stage in an NXtransformations
+		group is equally possible.
+
+
+		Following the section on the general dscription of axis in NXtransformations is a section which
+		documents the fields commonly used within NeXus for positioning purposes and their meaning. Whenever
+		there is a need for positioning a beam line component please use the existing names. Use as many fields
+		as needed in order to position the component. Feel free to add more axis if required. In the description
+		given below, only those atttributes which are defined through the name are spcified. Add the other attributes
+		of the full set:
+
+		* vector
+		* offset
+		* transformation_type
+		* depends_on
+
+		as needed.
+	
+DEBUG - ===== ATTRS (//entry/process/registration/tranformations@NX_class)
+DEBUG - value: NXtransformations 
+DEBUG - classpath: ['NXentry', 'NXprocess', 'NXregistration', 'NXtransformations']
+DEBUG - classes:
+NXregistration.nxdl.xml:/TRANSFORMATIONS
+NXtransformations.nxdl.xml:
+DEBUG - @NX_class [NX_CHAR]
+DEBUG - 
+DEBUG - ===== FIELD (//entry/process/registration/tranformations/rot_z): <HDF5 dataset "rot_z": shape (), type "<f8">
+DEBUG - value: -1.0 
+DEBUG - classpath: ['NXentry', 'NXprocess', 'NXregistration', 'NXtransformations', 'NX_NUMBER']
+DEBUG - classes:
+NXtransformations.nxdl.xml:/AXISNAME
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXtransformations.nxdl.xml:/AXISNAME):
+DEBUG - 
+			Units need to be appropriate for translation or rotation
+
+			The name of this field is not forced.  The user is free to use any name
+			that does not cause confusion.  When using more than one ``AXISNAME`` field,
+			make sure that each field name is unique in the same group, as required
+			by HDF5.
+
+			The values given should be the start points of exposures for the corresponding
+			frames.  The end points should be given in ``AXISNAME_end``.
+		
+DEBUG - ===== ATTRS (//entry/process/registration/tranformations/rot_z@depends_on)
+DEBUG - value: trans_y 
+DEBUG - classpath: ['NXentry', 'NXprocess', 'NXregistration', 'NXtransformations', 'NX_NUMBER']
+DEBUG - classes:
+NXtransformations.nxdl.xml:/AXISNAME
+DEBUG - NXtransformations.nxdl.xml:/AXISNAME@depends_on - [NX_CHAR]
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXtransformations.nxdl.xml:/AXISNAME/depends_on):
+DEBUG - 
+				Points to the path to a field defining the axis on which this
+				depends or the string ".".
+			
+DEBUG - ===== ATTRS (//entry/process/registration/tranformations/rot_z@offset)
+DEBUG - value: [256. 256.   0.] 
+DEBUG - classpath: ['NXentry', 'NXprocess', 'NXregistration', 'NXtransformations', 'NX_NUMBER']
+DEBUG - classes:
+NXtransformations.nxdl.xml:/AXISNAME
+DEBUG - NXtransformations.nxdl.xml:/AXISNAME@offset - [NX_NUMBER]
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXtransformations.nxdl.xml:/AXISNAME/offset):
+DEBUG - 
+				A fixed offset applied before the transformation (three vector components).
+				This is not intended to be a substitute for a fixed ``translation`` axis but, for example,
+				as the mechanical offset from mounting the axis to its dependency.
+			
+DEBUG - ===== ATTRS (//entry/process/registration/tranformations/rot_z@transformation_type)
+DEBUG - value: rotation 
+DEBUG - classpath: ['NXentry', 'NXprocess', 'NXregistration', 'NXtransformations', 'NX_NUMBER']
+DEBUG - classes:
+NXtransformations.nxdl.xml:/AXISNAME
+DEBUG - NXtransformations.nxdl.xml:/AXISNAME@transformation_type - [NX_CHAR]
+DEBUG - <<OPTIONAL>>
+DEBUG - enumeration (NXtransformations.nxdl.xml:/AXISNAME/transformation_type):
+DEBUG - -> translation
+DEBUG - -> rotation
+DEBUG - documentation (NXtransformations.nxdl.xml:/AXISNAME/transformation_type):
+DEBUG - 
+				The transformation_type may be ``translation``, in which case the
+				values are linear displacements along the axis, ``rotation``,
+				in which case the values are angular rotations around the axis.
+
+				If this attribute is omitted, this is an axis for which there
+				is no motion to be specifies, such as the direction of gravity,
+				or the direction to the source, or a basis vector of a
+				coordinate frame.
+			
+DEBUG - ===== ATTRS (//entry/process/registration/tranformations/rot_z@units)
+DEBUG - value: degrees 
+DEBUG - classpath: ['NXentry', 'NXprocess', 'NXregistration', 'NXtransformations', 'NX_NUMBER']
+DEBUG - classes:
+NXtransformations.nxdl.xml:/AXISNAME
+DEBUG - NXtransformations.nxdl.xml:/AXISNAME@units [NX_TRANSFORMATION]
+DEBUG - ===== ATTRS (//entry/process/registration/tranformations/rot_z@vector)
+DEBUG - value: [0. 0. 1.] 
+DEBUG - classpath: ['NXentry', 'NXprocess', 'NXregistration', 'NXtransformations', 'NX_NUMBER']
+DEBUG - classes:
+NXtransformations.nxdl.xml:/AXISNAME
+DEBUG - NXtransformations.nxdl.xml:/AXISNAME@vector - [NX_NUMBER]
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXtransformations.nxdl.xml:/AXISNAME/vector):
+DEBUG - 
+				Three values that define the axis for this transformation.
+				The axis should be normalized to unit length, making it
+				dimensionless.  For ``rotation`` axes, the direction should be
+				chosen for a right-handed rotation with increasing angle.
+				For ``translation`` axes the direction should be chosen for
+				increasing displacement. For general axes, an appropriate direction
+				should be chosen.
+			
+DEBUG - ===== FIELD (//entry/process/registration/tranformations/trans_x): <HDF5 dataset "trans_x": shape (), type "<f8">
+DEBUG - value: 43.0 
+DEBUG - classpath: ['NXentry', 'NXprocess', 'NXregistration', 'NXtransformations', 'NX_NUMBER']
+DEBUG - classes:
+NXtransformations.nxdl.xml:/AXISNAME
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXtransformations.nxdl.xml:/AXISNAME):
+DEBUG - 
+			Units need to be appropriate for translation or rotation
+
+			The name of this field is not forced.  The user is free to use any name
+			that does not cause confusion.  When using more than one ``AXISNAME`` field,
+			make sure that each field name is unique in the same group, as required
+			by HDF5.
+
+			The values given should be the start points of exposures for the corresponding
+			frames.  The end points should be given in ``AXISNAME_end``.
+		
+DEBUG - ===== ATTRS (//entry/process/registration/tranformations/trans_x@depends_on)
+DEBUG - value: . 
+DEBUG - classpath: ['NXentry', 'NXprocess', 'NXregistration', 'NXtransformations', 'NX_NUMBER']
+DEBUG - classes:
+NXtransformations.nxdl.xml:/AXISNAME
+DEBUG - NXtransformations.nxdl.xml:/AXISNAME@depends_on - [NX_CHAR]
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXtransformations.nxdl.xml:/AXISNAME/depends_on):
+DEBUG - 
+				Points to the path to a field defining the axis on which this
+				depends or the string ".".
+			
+DEBUG - ===== ATTRS (//entry/process/registration/tranformations/trans_x@transformation_type)
+DEBUG - value: translation 
+DEBUG - classpath: ['NXentry', 'NXprocess', 'NXregistration', 'NXtransformations', 'NX_NUMBER']
+DEBUG - classes:
+NXtransformations.nxdl.xml:/AXISNAME
+DEBUG - NXtransformations.nxdl.xml:/AXISNAME@transformation_type - [NX_CHAR]
+DEBUG - <<OPTIONAL>>
+DEBUG - enumeration (NXtransformations.nxdl.xml:/AXISNAME/transformation_type):
+DEBUG - -> translation
+DEBUG - -> rotation
+DEBUG - documentation (NXtransformations.nxdl.xml:/AXISNAME/transformation_type):
 DEBUG - 
+				The transformation_type may be ``translation``, in which case the
+				values are linear displacements along the axis, ``rotation``,
+				in which case the values are angular rotations around the axis.
+
+				If this attribute is omitted, this is an axis for which there
+				is no motion to be specifies, such as the direction of gravity,
+				or the direction to the source, or a basis vector of a
+				coordinate frame.
+			
 DEBUG - ===== ATTRS (//entry/process/registration/tranformations/trans_x@units)
 DEBUG - value: pixels 
-DEBUG - classpath: ['NXentry', 'NXprocess']
-DEBUG - NOT IN SCHEMA
-DEBUG - 
+DEBUG - classpath: ['NXentry', 'NXprocess', 'NXregistration', 'NXtransformations', 'NX_NUMBER']
+DEBUG - classes:
+NXtransformations.nxdl.xml:/AXISNAME
+DEBUG - NXtransformations.nxdl.xml:/AXISNAME@units [NX_TRANSFORMATION]
 DEBUG - ===== ATTRS (//entry/process/registration/tranformations/trans_x@vector)
 DEBUG - value: [1. 0. 0.] 
-DEBUG - classpath: ['NXentry', 'NXprocess']
-DEBUG - NOT IN SCHEMA
+DEBUG - classpath: ['NXentry', 'NXprocess', 'NXregistration', 'NXtransformations', 'NX_NUMBER']
+DEBUG - classes:
+NXtransformations.nxdl.xml:/AXISNAME
+DEBUG - NXtransformations.nxdl.xml:/AXISNAME@vector - [NX_NUMBER]
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXtransformations.nxdl.xml:/AXISNAME/vector):
 DEBUG - 
+				Three values that define the axis for this transformation.
+				The axis should be normalized to unit length, making it
+				dimensionless.  For ``rotation`` axes, the direction should be
+				chosen for a right-handed rotation with increasing angle.
+				For ``translation`` axes the direction should be chosen for
+				increasing displacement. For general axes, an appropriate direction
+				should be chosen.
+			
 DEBUG - ===== FIELD (//entry/process/registration/tranformations/trans_y): <HDF5 dataset "trans_y": shape (), type "<f8">
 DEBUG - value: 55.0 
-DEBUG - classpath: ['NXentry', 'NXprocess']
-DEBUG - NOT IN SCHEMA
+DEBUG - classpath: ['NXentry', 'NXprocess', 'NXregistration', 'NXtransformations', 'NX_NUMBER']
+DEBUG - classes:
+NXtransformations.nxdl.xml:/AXISNAME
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXtransformations.nxdl.xml:/AXISNAME):
 DEBUG - 
+			Units need to be appropriate for translation or rotation
+
+			The name of this field is not forced.  The user is free to use any name
+			that does not cause confusion.  When using more than one ``AXISNAME`` field,
+			make sure that each field name is unique in the same group, as required
+			by HDF5.
+
+			The values given should be the start points of exposures for the corresponding
+			frames.  The end points should be given in ``AXISNAME_end``.
+		
 DEBUG - ===== ATTRS (//entry/process/registration/tranformations/trans_y@depends_on)
 DEBUG - value: trans_x 
-DEBUG - classpath: ['NXentry', 'NXprocess']
-DEBUG - NOT IN SCHEMA
+DEBUG - classpath: ['NXentry', 'NXprocess', 'NXregistration', 'NXtransformations', 'NX_NUMBER']
+DEBUG - classes:
+NXtransformations.nxdl.xml:/AXISNAME
+DEBUG - NXtransformations.nxdl.xml:/AXISNAME@depends_on - [NX_CHAR]
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXtransformations.nxdl.xml:/AXISNAME/depends_on):
 DEBUG - 
+				Points to the path to a field defining the axis on which this
+				depends or the string ".".
+			
 DEBUG - ===== ATTRS (//entry/process/registration/tranformations/trans_y@transformation_type)
 DEBUG - value: translation 
-DEBUG - classpath: ['NXentry', 'NXprocess']
-DEBUG - NOT IN SCHEMA
+DEBUG - classpath: ['NXentry', 'NXprocess', 'NXregistration', 'NXtransformations', 'NX_NUMBER']
+DEBUG - classes:
+NXtransformations.nxdl.xml:/AXISNAME
+DEBUG - NXtransformations.nxdl.xml:/AXISNAME@transformation_type - [NX_CHAR]
+DEBUG - <<OPTIONAL>>
+DEBUG - enumeration (NXtransformations.nxdl.xml:/AXISNAME/transformation_type):
+DEBUG - -> translation
+DEBUG - -> rotation
+DEBUG - documentation (NXtransformations.nxdl.xml:/AXISNAME/transformation_type):
 DEBUG - 
+				The transformation_type may be ``translation``, in which case the
+				values are linear displacements along the axis, ``rotation``,
+				in which case the values are angular rotations around the axis.
+
+				If this attribute is omitted, this is an axis for which there
+				is no motion to be specifies, such as the direction of gravity,
+				or the direction to the source, or a basis vector of a
+				coordinate frame.
+			
 DEBUG - ===== ATTRS (//entry/process/registration/tranformations/trans_y@units)
 DEBUG - value: pixels 
-DEBUG - classpath: ['NXentry', 'NXprocess']
-DEBUG - NOT IN SCHEMA
-DEBUG - 
+DEBUG - classpath: ['NXentry', 'NXprocess', 'NXregistration', 'NXtransformations', 'NX_NUMBER']
+DEBUG - classes:
+NXtransformations.nxdl.xml:/AXISNAME
+DEBUG - NXtransformations.nxdl.xml:/AXISNAME@units [NX_TRANSFORMATION]
 DEBUG - ===== ATTRS (//entry/process/registration/tranformations/trans_y@vector)
 DEBUG - value: [0. 1. 0.] 
-DEBUG - classpath: ['NXentry', 'NXprocess']
-DEBUG - NOT IN SCHEMA
+DEBUG - classpath: ['NXentry', 'NXprocess', 'NXregistration', 'NXtransformations', 'NX_NUMBER']
+DEBUG - classes:
+NXtransformations.nxdl.xml:/AXISNAME
+DEBUG - NXtransformations.nxdl.xml:/AXISNAME@vector - [NX_NUMBER]
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXtransformations.nxdl.xml:/AXISNAME/vector):
 DEBUG - 
+				Three values that define the axis for this transformation.
+				The axis should be normalized to unit length, making it
+				dimensionless.  For ``rotation`` axes, the direction should be
+				chosen for a right-handed rotation with increasing angle.
+				For ``translation`` axes the direction should be chosen for
+				increasing displacement. For general axes, an appropriate direction
+				should be chosen.
+			
 DEBUG - ===== GROUP (//entry/sample [NXmpes::/NXentry/NXsample]): <HDF5 group "/entry/sample" (12 members)>
 DEBUG - classpath: ['NXentry', 'NXsample']
 DEBUG - classes:
@@ -3455,12 +4228,12 @@ DEBUG - documentation (NXentry.nxdl.xml:/SAMPLE):
 DEBUG - 
 DEBUG - documentation (NXsample.nxdl.xml:):
 DEBUG - 
-		Any information on the sample. 
-		
-		This could include scanned variables that
-		are associated with one of the data dimensions, e.g. the magnetic field, or
-		logged data, e.g. monitored temperature vs elapsed time.
-	
+         Any information on the sample.
+         
+         This could include scanned variables that
+         are associated with one of the data dimensions, e.g. the magnetic field, or
+         logged data, e.g. monitored temperature vs elapsed time.
+    
 DEBUG - ===== ATTRS (//entry/sample@NX_class)
 DEBUG - value: NXsample 
 DEBUG - classpath: ['NXentry', 'NXsample']
@@ -3472,14 +4245,20 @@ DEBUG - @NX_class [NX_CHAR]
 DEBUG - 
 DEBUG - ===== FIELD (//entry/sample/bias): <HDF5 dataset "bias": shape (), type "<f8">
 DEBUG - value: 17.799719004221362 
-DEBUG - classpath: ['NXentry', 'NXsample']
-DEBUG - NOT IN SCHEMA
+DEBUG - classpath: ['NXentry', 'NXsample', 'NX_FLOAT']
+DEBUG - classes:
+NXmpes.nxdl.xml:/ENTRY/SAMPLE/bias
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXmpes.nxdl.xml:/ENTRY/SAMPLE/bias):
 DEBUG - 
+                    Voltage applied to sample and sample holder.
+                
 DEBUG - ===== ATTRS (//entry/sample/bias@units)
 DEBUG - value: V 
-DEBUG - classpath: ['NXentry', 'NXsample']
-DEBUG - NOT IN SCHEMA
-DEBUG - 
+DEBUG - classpath: ['NXentry', 'NXsample', 'NX_FLOAT']
+DEBUG - classes:
+NXmpes.nxdl.xml:/ENTRY/SAMPLE/bias
+DEBUG - NXmpes.nxdl.xml:/ENTRY/SAMPLE/bias@units [NX_VOLTAGE]
 DEBUG - ===== FIELD (//entry/sample/chemical_formula): <HDF5 dataset "chemical_formula": shape (), type "|O">
 DEBUG - value: b'MoTe2' 
 DEBUG - classpath: ['NXentry', 'NXsample', 'NX_CHAR']
@@ -3494,25 +4273,25 @@ DEBUG -
                 
 DEBUG - documentation (NXsample.nxdl.xml:/chemical_formula):
 DEBUG - 
-			The chemical formula specified using CIF conventions.
-			Abbreviated version of CIF standard: 
-			
-			* Only recognized element symbols may be used.
-			* Each element symbol is followed by a 'count' number. A count of '1' may be omitted.
-			* A space or parenthesis must separate each cluster of (element symbol + count).
-			* Where a group of elements is enclosed in parentheses, the multiplier for the 
-			  group must follow the closing parentheses. That is, all element and group 
-			  multipliers are assumed to be printed as subscripted numbers.
-			* Unless the elements are ordered in a manner that corresponds to their chemical 
-			  structure, the order of the elements within any group or moiety depends on 
-			  whether or not carbon is present.
-			* If carbon is present, the order should be: 
-			
-			  - C, then H, then the other elements in alphabetical order of their symbol. 
-			  - If carbon is not present, the elements are listed purely in alphabetic order of their symbol. 
-			  
-			* This is the *Hill* system used by Chemical Abstracts.
-		
+             The chemical formula specified using CIF conventions.
+             Abbreviated version of CIF standard:
+             
+             * Only recognized element symbols may be used.
+             * Each element symbol is followed by a 'count' number. A count of '1' may be omitted.
+             * A space or parenthesis must separate each cluster of (element symbol + count).
+             * Where a group of elements is enclosed in parentheses, the multiplier for the
+               group must follow the closing parentheses. That is, all element and group
+               multipliers are assumed to be printed as subscripted numbers.
+             * Unless the elements are ordered in a manner that corresponds to their chemical
+               structure, the order of the elements within any group or moiety depends on
+               whether or not carbon is present.
+             * If carbon is present, the order should be:
+             
+               - C, then H, then the other elements in alphabetical order of their symbol.
+               - If carbon is not present, the elements are listed purely in alphabetic order of their symbol.
+               
+             * This is the *Hill* system used by Chemical Abstracts.
+        
 DEBUG - ===== FIELD (//entry/sample/depends_on): <HDF5 dataset "depends_on": shape (), type "|O">
 DEBUG - value: b'/entry/sample/transformations/corrected_phi' 
 DEBUG - classpath: ['NXentry', 'NXsample', 'NX_CHAR']
@@ -3521,12 +4300,12 @@ NXsample.nxdl.xml:/depends_on
 DEBUG - <<OPTIONAL>>
 DEBUG - documentation (NXsample.nxdl.xml:/depends_on):
 DEBUG - 
-            NeXus positions components by applying a set of translations and rotations
-            to apply to the component starting from 0, 0, 0. The order of these operations
-            is critical and forms what NeXus calls a dependency chain. The depends_on
-            field defines the path to the top most operation of the dependency chain or the
-            string "." if located in the origin. Usually these operations are stored in a
-            NXtransformations group. But NeXus allows them to be stored anywhere.
+             NeXus positions components by applying a set of translations and rotations
+             to apply to the component starting from 0, 0, 0. The order of these operations
+             is critical and forms what NeXus calls a dependency chain. The depends_on
+             field defines the path to the top most operation of the dependency chain or the
+             string "." if located in the origin. Usually these operations are stored in a
+             NXtransformations group. But NeXus allows them to be stored anywhere.
         
 DEBUG - ===== FIELD (//entry/sample/description): <HDF5 dataset "description": shape (), type "|O">
 DEBUG - value: b'MoTe2' 
@@ -3536,8 +4315,8 @@ NXsample.nxdl.xml:/description
 DEBUG - <<OPTIONAL>>
 DEBUG - documentation (NXsample.nxdl.xml:/description):
 DEBUG - 
-			Description of the sample
-		
+             Description of the sample
+        
 DEBUG - ===== FIELD (//entry/sample/gas_pressure): <HDF5 dataset "gas_pressure": shape (), type "<f8">
 DEBUG - value: 4.5599999999999996e-11 
 DEBUG - classpath: ['NXentry', 'NXsample', 'NX_FLOAT']
@@ -3562,7 +4341,9 @@ DEBUG - <<REQUIRED>>
 DEBUG - documentation (NXmpes.nxdl.xml:/ENTRY/SAMPLE/name):
 DEBUG - 
 DEBUG - documentation (NXsample.nxdl.xml:/name):
-DEBUG - Descriptive name of sample
+DEBUG - 
+             Descriptive name of sample
+        
 DEBUG - ===== FIELD (//entry/sample/preparation_date): <HDF5 dataset "preparation_date": shape (), type "|O">
 DEBUG - value: b'2019-05-22T14:00:00+00:00' 
 DEBUG - classpath: ['NXentry', 'NXsample', 'NX_DATE_TIME']
@@ -3576,7 +4357,9 @@ DEBUG -
                      annealing).
                 
 DEBUG - documentation (NXsample.nxdl.xml:/preparation_date):
-DEBUG - Date of preparation of the sample
+DEBUG - 
+             Date of preparation of the sample
+        
 DEBUG - ===== GROUP (//entry/sample/preparation_description [NXmpes::/NXentry/NXsample/NXnote]): <HDF5 group "/entry/sample/preparation_description" (1 members)>
 DEBUG - classpath: ['NXentry', 'NXsample', 'NXnote']
 DEBUG - classes:
@@ -3675,10 +4458,10 @@ DEBUG - documentation (NXmpes.nxdl.xml:/ENTRY/SAMPLE/situation):
 DEBUG - 
 DEBUG - documentation (NXsample.nxdl.xml:/situation):
 DEBUG - 
-			The atmosphere will be one of the components, which is where 
-			its details will be stored; the relevant components will be 
-			indicated by the entry in the sample_component member.
-		
+             The atmosphere will be one of the components, which is where
+             its details will be stored; the relevant components will be
+             indicated by the entry in the sample_component member.
+        
 DEBUG - ===== FIELD (//entry/sample/temperature): <HDF5 dataset "temperature": shape (), type "<f8">
 DEBUG - value: 23.050763803680983 
 DEBUG - classpath: ['NXentry', 'NXsample', 'NX_FLOAT']
@@ -3694,7 +4477,9 @@ DEBUG -
                      /entry/instrument/manipulator/sample_temperature.
                 
 DEBUG - documentation (NXsample.nxdl.xml:/temperature):
-DEBUG - Sample temperature. This could be a scanned variable
+DEBUG - 
+             Sample temperature. This could be a scanned variable
+        
 DEBUG - ===== ATTRS (//entry/sample/temperature@units)
 DEBUG - value: K 
 DEBUG - classpath: ['NXentry', 'NXsample', 'NX_FLOAT']
@@ -3711,10 +4496,10 @@ NXtransformations.nxdl.xml:
 DEBUG - <<OPTIONAL>>
 DEBUG - documentation (NXsample.nxdl.xml:/TRANSFORMATIONS):
 DEBUG - 
-            This is the group recommended for holding the chain of translation
-            and rotation operations necessary to position the component within
-            the instrument. The dependency chain may however traverse similar groups in
-            other component groups.
+             This is the group recommended for holding the chain of translation
+             and rotation operations necessary to position the component within
+             the instrument. The dependency chain may however traverse similar groups in
+             other component groups.
         
 DEBUG - documentation (NXtransformations.nxdl.xml:):
 DEBUG - 
@@ -4331,7 +5116,9 @@ DEBUG -
                  Datetime of the start of the measurement.
             
 DEBUG - documentation (NXentry.nxdl.xml:/start_time):
-DEBUG - Starting time of measurement
+DEBUG - 
+             Starting time of measurement
+        
 DEBUG - ===== FIELD (//entry/title): <HDF5 dataset "title": shape (), type "|O">
 DEBUG - value: b'Valence Band Dynamics - 1030 nm linear p-polarized pump, 0.6 mJ/cm2 absorbed fluence' 
 DEBUG - classpath: ['NXentry', 'NX_CHAR']
@@ -4342,7 +5129,9 @@ DEBUG - <<REQUIRED>>
 DEBUG - documentation (NXmpes.nxdl.xml:/ENTRY/title):
 DEBUG - 
 DEBUG - documentation (NXentry.nxdl.xml:/title):
-DEBUG - Extended title for entry
+DEBUG - 
+             Extended title for entry
+        
 DEBUG - ===== GROUP (//entry/user [NXmpes::/NXentry/NXuser]): <HDF5 group "/entry/user" (5 members)>
 DEBUG - classpath: ['NXentry', 'NXuser']
 DEBUG - classes:
diff --git a/tests/data/dataconverter/readers/mpes/config_file.json b/tests/data/dataconverter/readers/mpes/config_file.json
index c584a5011..125243397 100644
--- a/tests/data/dataconverter/readers/mpes/config_file.json
+++ b/tests/data/dataconverter/readers/mpes/config_file.json
@@ -332,19 +332,10 @@
   "/ENTRY[entry]/PROCESS[process]/CALIBRATION[ky_calibration]/offset": "@attrs:metadata/momentum_correction/offset_ky",
   "/ENTRY[entry]/PROCESS[process]/CALIBRATION[ky_calibration]/calibrated_axis": "@attrs:metadata/momentum_correction/calibration/axis_ky",
   "/ENTRY[entry]/DATA[data]/@axes": "@data:dims",
-  "/ENTRY[entry]/DATA[data]/AXISNAME_indices[@kx_indices]": "@data:kx.index",
-  "/ENTRY[entry]/DATA[data]/AXISNAME_indices[@ky_indices]": "@data:ky.index",
-  "/ENTRY[entry]/DATA[data]/AXISNAME_indices[@energy_indices]": "@data:energy.index",
-  "/ENTRY[entry]/DATA[data]/AXISNAME_indices[@delay_indices]": "@data:delay.index",
+  "/ENTRY[entry]/DATA[data]/AXISNAME_indices[@*_indices]": "@data:*.index",
   "/ENTRY[entry]/DATA[data]/@signal": "data",
   "/ENTRY[entry]/DATA[data]/data": "@data:data",
   "/ENTRY[entry]/DATA[data]/data/@units": "counts",
-  "/ENTRY[entry]/DATA[data]/VARIABLE[kx]": "@data:kx.data",
-  "/ENTRY[entry]/DATA[data]/VARIABLE[kx]/@units": "@data:kx.unit",
-  "/ENTRY[entry]/DATA[data]/VARIABLE[ky]": "@data:ky.data",
-  "/ENTRY[entry]/DATA[data]/VARIABLE[ky]/@units": "@data:ky.unit",
-  "/ENTRY[entry]/DATA[data]/VARIABLE[energy]": "@data:energy.data",
-  "/ENTRY[entry]/DATA[data]/VARIABLE[energy]/@units": "@data:energy.unit",
-  "/ENTRY[entry]/DATA[data]/VARIABLE[delay]": "@data:delay.data",
-  "/ENTRY[entry]/DATA[data]/VARIABLE[delay]/@units": "@data:delay.unit"
+  "/ENTRY[entry]/DATA[data]/AXISNAME[*]": "@data:*.data",
+  "/ENTRY[entry]/DATA[data]/AXISNAME[*]/@units": "@data:*.unit"
 }
\ No newline at end of file
diff --git a/tests/data/dataconverter/readers/xrd/ACZCTS_5-60_181.xrdml b/tests/data/dataconverter/readers/xrd/ACZCTS_5-60_181.xrdml
new file mode 100644
index 000000000..5af61b718
--- /dev/null
+++ b/tests/data/dataconverter/readers/xrd/ACZCTS_5-60_181.xrdml
@@ -0,0 +1,106 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<xrdMeasurements xmlns="http://www.xrdml.com/XRDMeasurement/1.5" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.xrdml.com/XRDMeasurement/1.5 http://www.xrdml.com/XRDMeasurement/1.5/XRDMeasurement.xsd" status="Completed">
+	<comment>
+		<entry>Configuration=XYZ Stage, Owner=User-1, Creation date=02-Nov-17 2:08:05 PM</entry>
+		<entry>Goniometer=PW3050/60 (Theta/Theta); Minimum step size 2Theta:0.001; Minimum step size Omega:0.001</entry>
+		<entry>Sample stage=Programmable x,y,z stage; Minimum step size X:0.01; Minimum step size Y:0.01; Minimum step size Z:0.001</entry>
+		<entry>Diffractometer system=XPERT-PRO</entry>
+		<entry>Measurement program=D:\user\Pepe\program_files\BB_10-80_18min.xrdmp, Identifier={5202C7B3-EFFD-43D7-83CA-1A77018B086F}</entry>
+		<entry>Batch program=D:\user\Pepe\program_files\batch_5samples.xrdmp, Identifier={49B9A751-97B5-49F0-9CAA-E07AE5E71B6A}</entry>
+	</comment>
+	<sample type="To be analyzed">
+		<id></id>
+		<name></name>
+		<preparedBy></preparedBy>
+	</sample>
+	<xrdMeasurement measurementType="Scan" status="Completed" sampleMode="Reflection">
+		<comment>
+			<entry>PHD Lower Level = 4.02 (keV), PHD Upper Level = 11.26 (keV)</entry>
+		</comment>
+		<usedWavelength intended="K-Alpha">
+			<kAlpha1 unit="Angstrom">1.5405980</kAlpha1>
+			<kAlpha2 unit="Angstrom">1.5444260</kAlpha2>
+			<kBeta unit="Angstrom">1.3922500</kBeta>
+			<ratioKAlpha2KAlpha1>0.5000</ratioKAlpha2KAlpha1>
+		</usedWavelength>
+		<incidentBeamPath>
+			<radius unit="mm">240.00</radius>
+			<xRayTube id="1010048" name="Empyrean Cu LFF HR (9430 033 7310x) DK415251">
+				<tension unit="kV">40</tension>
+				<current unit="mA">40</current>
+				<anodeMaterial>Cu</anodeMaterial>
+				<focus type="Line">
+					<length unit="mm">12.0</length>
+					<width unit="mm">0.4</width>
+					<takeOffAngle unit="deg">6.0</takeOffAngle>
+				</focus>
+			</xRayTube>
+			<sollerSlit id="21010002" name="Soller slits 0.04 rad.">
+				<opening unit="rad">0.0400</opening>
+			</sollerSlit>
+			<mask id="22080003" name="Fixed incident beam mask 15 mm">
+				<width unit="mm">11.60</width>
+			</mask>
+			<divergenceSlit id="22010005" name="Fixed slit 1/4°" xsi:type="fixedDivergenceSlitType">
+				<distanceToSample unit="mm">140.00</distanceToSample>
+				<height unit="mm">0.38</height>
+			</divergenceSlit>
+		</incidentBeamPath>
+		<diffractedBeamPath>
+			<radius unit="mm">240.00</radius>
+			<antiScatterSlit id="22090010" name="AS slit 8.0 mm (PIXcel)" xsi:type="fixedAntiScatterSlitType">
+				<height unit="mm">8.00</height>
+			</antiScatterSlit>
+			<sollerSlit id="21010002" name="Soller slits 0.04 rad.">
+				<opening unit="rad">0.0400</opening>
+			</sollerSlit>
+			<filter id="20030001" name="Large beta-filter Nickel">
+				<material>Ni</material>
+				<thickness unit="mm">0.020</thickness>
+			</filter>
+			<detector id="7010015" name="PIXcel1D detector" xsi:type="rtmsDetectorType">
+				<phd>
+					<lowerLevel unit="%">25.0</lowerLevel>
+					<upperLevel unit="%">70.0</upperLevel>
+				</phd>
+				<mode>Scanning</mode>
+				<activeLength unit="deg">3.347</activeLength>
+			</detector>
+		</diffractedBeamPath>
+		<scan appendNumber="0" mode="Continuous" scanAxis="Gonio" status="Completed">
+			<header>
+				<startTimeStamp>2018-06-12T18:34:49+02:00</startTimeStamp>
+				<endTimeStamp>2018-06-12T18:53:34+02:00</endTimeStamp>
+				<author>
+					<name>cnu_xlab</name>
+				</author>
+				<source>
+					<applicationSoftware version="5.2a">Data Collector</applicationSoftware>
+					<instrumentControlSoftware version="2.2B 20140619">XPERT-PRO</instrumentControlSoftware>
+					<instrumentID>0000000011035964</instrumentID>
+				</source>
+			</header>
+			<dataPoints>
+				<positions axis="2Theta" unit="deg">
+					<startPosition>10.00656514</startPosition>
+					<endPosition>79.99097181</endPosition>
+				</positions>
+				<positions axis="Omega" unit="deg">
+					<startPosition>5.00328257</startPosition>
+					<endPosition>39.99548591</endPosition>
+				</positions>
+				<positions axis="X" unit="mm">
+					<commonPosition>0.00</commonPosition>
+				</positions>
+				<positions axis="Y" unit="mm">
+					<commonPosition>50.00</commonPosition>
+				</positions>
+				<positions axis="Z" unit="mm">
+					<commonPosition>7.868</commonPosition>
+				</positions>
+				<commonCountingTime unit="seconds">49.470</commonCountingTime>
+				<intensities unit="counts">2086 2053 2118 2024 2127 2128 2115 2093 2063 1985 2038 2118 2107 2136 2080 2150 2036 2039 2073 2025 2069 2028 2070 1975 1978 2068 2116 2075 2000 2082 1944 2037 1994 2061 2035 2132 2046 2133 1981 2066 1986 2008 2028 2052 2065 2136 2104 2096 1935 2029 2036 2023 1933 2023 2040 2080 1964 2003 1960 1940 2079 2102 1959 2058 1970 2002 1974 1955 1998 2025 2060 1908 2014 2037 1925 2028 1944 2052 2012 2002 2018 1977 2063 1963 2073 1963 1964 1904 1971 1979 1975 2012 2008 1988 1946 1990 1894 1990 1897 1979 1934 2009 1986 1978 1915 1915 2010 1953 1891 1978 1983 1997 1984 1959 2032 1990 1949 1959 1925 1978 1963 1879 2055 1883 1974 1949 1929 1987 2000 1893 1926 1961 1978 1920 1965 1962 1919 1930 2007 1970 1960 1904 2005 1946 1893 1950 1908 1906 1983 1885 1928 1882 1835 1916 1951 2040 1986 1905 1970 1889 1916 2011 1973 1898 1954 1935 1839 1977 1876 1834 1850 1844 1928 2021 1913 1858 1860 1800 1907 1844 1850 1856 1924 1896 1902 1910 1904 1881 1916 2005 1825 1881 1902 1986 1936 1860 1928 1935 1858 1918 1873 1854 1954 1807 1871 1778 1847 1898 1836 1861 1855 1907 1804 1907 1861 1833 1904 1835 1899 1862 1826 1881 1905 1876 1969 1962 1869 1888 1808 1770 1888 1865 1794 1852 1834 1851 1840 1846 1872 1850 1829 1818 1817 1869 1760 1888 1895 1866 1896 1807 1867 1834 1743 1835 1866 1817 1816 1821 1781 1828 1790 1836 1841 1774 1831 1825 1804 1795 1776 1888 1805 1771 1837 1857 1777 1758 1801 1864 1796 1808 1801 1798 1786 1758 1815 1865 1809 1767 1790 1785 1719 1869 1762 1819 1739 1855 1746 1796 1804 1748 1745 1787 1835 1826 1753 1823 1867 1833 1777 1761 1778 1806 1826 1833 1740 1735 1792 1788 1738 1704 1711 1767 1841 1749 1791 1850 1794 1735 1761 1863 1800 1673 1684 1738 1651 1779 1686 1700 1767 1737 1722 1805 1812 1746 1720 1647 1744 1746 1724 1739 1734 1697 1780 1684 1672 1699 1667 1685 1777 1647 1695 1761 1750 1700 1724 1719 1678 1726 1727 1653 1766 1753 1753 1729 1816 1708 1720 1661 1686 1812 1733 1823 1696 1716 1663 1727 1665 1731 1756 1698 1692 1726 1676 1751 1761 1628 1774 1719 1565 1690 1709 1667 1660 1663 1682 1759 1685 1673 1653 1724 1648 1745 1698 1615 1708 1691 1619 1694 1603 1620 1647 1663 1673 1646 1616 1627 1618 1615 1624 1630 1625 1629 1705 1657 1661 1633 1613 1611 1673 1643 1653 1576 1696 1649 1582 1601 1565 1696 1637 1592 1639 1579 1665 1561 1640 1716 1640 1632 1674 1712 1723 1647 1688 1666 1728 1706 1780 1731 1737 1801 1785 1910 1889 1967 1925 1833 1872 1855 1773 1799 1784 1684 1751 1765 1688 1633 1661 1699 1747 1668 1631 1607 1620 1690 1590 1594 1633 1665 1666 1698 1652 1642 1628 1662 1647 1621 1646 1674 1669 1696 1640 1717 1634 1611 1637 1670 1653 1583 1606 1620 1690 1528 1627 1596 1689 1625 1576 1612 1543 1716 1575 1690 1571 1586 1556 1637 1611 1608 1586 1670 1579 1646 1570 1695 1602 1518 1491 1679 1755 1685 1729 1572 1564 1592 1655 1661 1609 1621 1652 1676 1612 1631 1617 1571 1584 1690 1624 1660 1667 1635 1606 1670 1555 1604 1709 1646 1652 1613 1663 1588 1648 1625 1548 1654 1639 1542 1495 1639 1628 1602 1615 1609 1541 1721 1580 1640 1567 1604 1685 1610 1619 1677 1753 1667 1723 1697 1782 1777 1682 1737 1746 1654 1730 1687 1592 1552 1590 1576 1606 1608 1541 1589 1489 1596 1494 1598 1591 1610 1553 1527 1602 1626 1664 1549 1542 1580 1587 1544 1562 1563 1533 1482 1508 1602 1555 1557 1585 1507 1469 1582 1552 1575 1575 1528 1515 1518 1539 1560 1615 1491 1523 1537 1474 1537 1571 1521 1523 1560 1522 1488 1466 1627 1528 1637 1553 1580 1619 1505 1472 1552 1443 1558 1501 1542 1589 1574 1532 1566 1446 1503 1555 1563 1467 1492 1476 1570 1540 1562 1515 1480 1491 1483 1509 1455 1549 1502 1543 1531 1546 1516 1447 1527 1480 1592 1550 1496 1496 1474 1431 1468 1537 1519 1526 1507 1525 1500 1494 1503 1504 1522 1541 1584 1522 1466 1498 1435 1533 1520 1584 1415 1490 1406 1520 1481 1548 1440 1471 1559 1511 1571 1518 1544 1440 1410 1451 1516 1437 1549 1459 1493 1510 1533 1487 1419 1475 1452 1500 1415 1438 1474 1493 1436 1449 1422 1441 1404 1407 1422 1413 1477 1408 1419 1475 1425 1414 1501 1335 1431 1434 1487 1430 1496 1440 1489 1420 1435 1537 1352 1456 1438 1460 1455 1443 1533 1434 1506 1417 1441 1440 1429 1483 1400 1475 1512 1424 1454 1485 1401 1513 1449 1403 1393 1417 1432 1438 1391 1393 1380 1376 1473 1430 1387 1405 1434 1376 1441 1394 1407 1389 1363 1396 1452 1420 1396 1360 1412 1432 1427 1375 1424 1367 1372 1407 1523 1410 1372 1383 1417 1449 1330 1343 1407 1377 1416 1448 1429 1498 1464 1412 1390 1402 1313 1351 1434 1360 1410 1412 1370 1343 1359 1402 1381 1352 1419 1361 1417 1388 1388 1359 1396 1366 1369 1368 1338 1333 1406 1336 1351 1366 1369 1364 1365 1358 1346 1372 1340 1251 1314 1363 1472 1391 1344 1398 1394 1363 1410 1312 1430 1450 1381 1410 1371 1371 1378 1370 1316 1326 1355 1257 1303 1326 1388 1322 1368 1324 1350 1403 1413 1423 1403 1400 1414 1363 1403 1318 1324 1309 1360 1325 1376 1402 1391 1419 1323 1338 1477 1372 1430 1425 1427 1426 1414 1478 1515 1547 1635 1621 1653 1795 1726 1706 1611 1538 1541 1466 1505 1430 1449 1358 1377 1375 1350 1315 1356 1312 1279 1285 1303 1285 1300 1385 1283 1302 1413 1344 1327 1288 1288 1317 1321 1322 1377 1346 1385 1331 1419 1350 1271 1405 1356 1318 1283 1328 1303 1358 1352 1333 1310 1267 1276 1287 1318 1304 1277 1321 1274 1294 1292 1333 1235 1233 1249 1299 1334 1313 1266 1321 1278 1293 1267 1257 1247 1381 1313 1342 1268 1300 1239 1286 1328 1319 1300 1333 1300 1285 1355 1202 1251 1340 1255 1306 1300 1270 1277 1292 1182 1265 1332 1291 1211 1296 1366 1242 1257 1323 1296 1255 1241 1312 1310 1206 1198 1234 1269 1322 1272 1341 1339 1195 1247 1284 1278 1323 1287 1242 1251 1225 1273 1271 1231 1266 1208 1246 1289 1229 1221 1224 1266 1248 1256 1273 1227 1254 1246 1207 1296 1292 1236 1245 1275 1252 1216 1250 1241 1218 1246 1249 1253 1291 1250 1269 1266 1186 1229 1283 1243 1297 1224 1201 1200 1192 1246 1211 1275 1297 1274 1202 1239 1243 1223 1211 1250 1300 1267 1232 1233 1256 1213 1168 1234 1349 1331 1229 1280 1344 1281 1302 1265 1316 1297 1229 1284 1241 1275 1268 1292 1315 1323 1357 1363 1362 1387 1428 1480 1582 1459 1478 1425 1303 1284 1233 1312 1269 1249 1218 1215 1258 1183 1238 1224 1276 1238 1216 1148 1141 1144 1158 1159 1140 1203 1234 1224 1269 1147 1167 1192 1225 1160 1175 1169 1266 1204 1175 1227 1173 1211 1202 1143 1201 1170 1171 1255 1246 1222 1211 1132 1184 1160 1163 1158 1107 1197 1202 1143 1243 1137 1212 1182 1156 1166 1213 1232 1217 1229 1170 1203 1172 1175 1246 1214 1185 1185 1280 1136 1201 1170 1210 1164 1180 1205 1272 1156 1104 1202 1187 1190 1162 1207 1186 1184 1289 1187 1165 1156 1144 1125 1250 1222 1215 1224 1204 1247 1176 1213 1226 1270 1237 1336 1323 1295 1384 1445 1468 1431 1459 1497 1540 1510 1544 1524 1612 1554 1417 1518 1441 1484 1376 1415 1396 1443 1360 1359 1317 1347 1352 1378 1335 1360 1331 1303 1429 1285 1338 1341 1417 1356 1437 1353 1380 1410 1412 1360 1425 1516 1457 1554 1527 1489 1613 1672 1661 1901 1849 1830 2014 2074 2072 2293 2524 2763 2892 3235 3594 4081 4820 5534 6332 7671 9236 11473 14125 17900 23015 29549 33330 33088 27079 20836 18840 18681 17777 14397 9708 6420 4517 3701 3046 2821 2422 2290 2084 1966 1863 1790 1692 1678 1594 1564 1529 1448 1395 1441 1498 1455 1417 1441 1394 1403 1384 1394 1352 1303 1333 1309 1321 1279 1271 1318 1254 1327 1285 1258 1198 1261 1278 1294 1196 1197 1174 1202 1253 1179 1211 1214 1233 1231 1283 1273 1231 1340 1337 1360 1363 1362 1396 1431 1401 1420 1445 1465 1394 1373 1325 1298 1284 1235 1245 1276 1199 1192 1156 1157 1237 1179 1148 1088 1159 1145 1172 1160 1075 1136 1159 1134 1135 1158 1143 1194 1114 1122 1081 1131 1077 1119 1091 1061 1091 1113 1159 1037 1109 1065 1105 1117 1073 1134 1090 1073 1101 1094 1043 1039 1121 1075 1125 1082 1023 1045 1042 1133 1115 1070 1014 1019 1069 998 1116 983 1030 1053 1024 1022 1063 1058 1018 1089 1034 1040 1041 1099 1002 1066 1008 1048 978 1110 1041 1032 1064 1032 1047 1086 1049 1076 1088 1003 1041 1056 1099 1101 1084 1074 1110 1095 1106 1058 1145 1141 1113 1142 1177 1087 1096 1122 1108 1097 1076 1036 1041 1020 1045 1116 1056 1040 1030 1053 1030 1022 1052 1030 954 1044 1052 1013 1018 1035 1013 1010 1043 989 1009 1027 994 997 1043 962 1013 1033 1044 971 988 989 988 991 1044 1000 967 1005 1016 1037 1011 991 957 965 1031 1053 992 996 1044 969 953 986 1021 986 989 972 939 1001 1043 974 981 1020 1019 957 967 1002 1016 969 949 988 912 983 975 952 990 1015 927 1035 996 1043 933 992 1016 1005 989 920 1060 1003 955 969 1014 961 985 1030 932 987 1021 1034 1019 998 1031 1051 954 1008 946 977 980 993 973 1004 1081 1070 1027 1053 1010 1072 1120 1079 1051 1087 1064 1098 1081 1102 1189 1120 1224 1377 1318 1410 1504 1582 1717 1620 1629 1555 1488 1500 1486 1385 1416 1364 1329 1200 1227 1226 1149 1208 1182 1181 1083 1164 1187 1191 1186 1178 1154 1166 1200 1227 1249 1292 1205 1329 1246 1279 1353 1384 1390 1364 1287 1281 1338 1325 1273 1220 1303 1157 1210 1195 1148 1170 1176 1259 1163 1127 1185 1174 1123 1107 1146 1196 1065 1077 1128 1119 1200 1164 1144 1166 1248 1154 1154 1115 1122 1112 1136 1086 1155 1273 1124 1190 1151 1071 1154 1074 1167 1134 1114 1082 1077 1105 1022 1070 1051 1053 1015 980 1021 1005 997 1009 1021 952 1012 1061 1032 1008 1006 1019 1006 947 965 994 1056 907 970 981 955 955 993 1011 1002 951 1013 1017 976 984 889 984 964 970 984 931 933 933 932 916 945 946 944 930 921 924 907 917 907 1024 953 904 927 937 921 950 948 975 869 926 861 946 915 886 891 923 895 879 825 899 878 919 896 879 860 832 928 911 864 882 921 888 892 879 846 963 919 952 893 910 913 884 954 824 884 879 891 860 879 917 887 928 857 933 857 927 881 875 844 873 910 881 840 887 854 899 860 909 857 885 835 854 887 896 932 893 896 865 879 899 878 887 844 843 864 850 800 869 905 857 894 897 928 873 841 822 886 849 828 854 922 882 855 825 817 858 901 863 840 894 886 902 862 842 877 854 845 870 823 864 892 890 866 854 860 917 871 888 863 900 860 794 866 912 935 884 827 914 891 924 964 899 892 890 884 907 851 876 924 944 996 974 1058 1011 1002 1055 1089 1091 1147 1209 1167 1237 1157 1164 1107 1082 1046 1075 1041 1022 982 979 956 953 881 831 861 889 894 834 901 873 839 878 895 891 954 916 958 959 908 969 913 950 835 887 910 902 948 909 875 877 905 856 900 926 872 843 854 829 836 857 904 800 817 899 894 871 778 834 821 873 878 799 877 853 847 847 874 902 901 864 845 835 897 850 833 810 804 850 876 897 905 887 860 858 899 887 834 889 863 893 830 914 925 927 909 885 965 940 947 907 911 916 873 885 847 923 846 836 850 856 804 784 847 862 824 844 881 837 813 818 848 887 887 853 808 804 853 840 862 919 837 820 808 905 838 861 857 817 800 841 871 880 874 839 889 855 844 875 874 872 848 881 896 831 822 833 840 876 858 847 854 841 841 930 977 950 939 940 956 932 952 959 1014 992 967 984 973 950 984 1008 997 994 991 1006 1012 1063 1010 1091 1115 1160 1105 1179 1198 1202 1222 1282 1314 1311 1389 1377 1405 1312 1393 1377 1416 1385 1349 1347 1410 1391 1327 1442 1382 1349 1386 1397 1445 1386 1474 1458 1450 1414 1420 1448 1469 1521 1501 1487 1537 1531 1456 1594 1529 1635 1574 1574 1611 1675 1568 1634 1589 1642 1717 1679 1708 1737 1659 1745 1763 1717 1730 1847 1782 1803 1864 1952 2040 2017 2101 2098 2067 2177 2236 2309 2344 2535 2521 2625 2722 2983 2906 3164 3413 3643 3771 4111 4374 4700 5170 5606 5920 6519 6981 7807 8535 9324 10317 11260 12671 13820 15930 17757 19828 22311 25843 29316 33738 38285 43955 50089 56440 62996 69428 74307 77322 77919 76144 73310 68798 65125 61819 58419 55442 51345 46303 41361 35801 31152 26874 23004 19856 17349 15284 13441 12150 10675 9276 8485 7737 6932 6383 5863 5307 4915 4516 4192 4020 3706 3556 3398 3187 3064 3015 2861 2714 2625 2473 2409 2405 2286 2247 2259 2141 2098 2051 2054 1956 1979 1973 1864 1824 1795 1720 1800 1753 1688 1682 1568 1642 1570 1530 1523 1570 1583 1501 1501 1572 1478 1499 1405 1369 1423 1472 1457 1392 1361 1440 1305 1326 1352 1329 1302 1309 1278 1278 1295 1284 1252 1222 1234 1274 1212 1236 1183 1202 1161 1198 1265 1141 1123 1131 1172 1237 1158 1115 1172 1226 1122 1127 1172 1135 1089 1077 1100 1073 1033 1042 1064 1088 1121 1092 1095 1104 1061 1058 1049 1057 1040 1007 1050 1014 1042 1026 979 1082 1040 963 953 965 999 1017 1014 1025 907 935 1023 983 982 990 959 1025 939 897 917 925 998 984 948 964 936 1010 921 922 883 982 930 872 912 955 922 924 937 894 982 964 934 911 908 928 981 956 985 959 972 990 946 962 1004 967 1025 1085 1038 1068 1072 1065 1095 1119 1100 1065 1160 1138 1098 1134 1032 1072 1072 1024 1033 1015 1016 996 945 937 909 961 984 903 908 955 880 893 866 874 903 877 892 865 942 895 904 892 900 887 914 865 896 841 876 916 940 936 934 987 912 945 959 947 980 979 997 1050 1052 1105 1066 1156 1137 1118 1094 1225 1177 1199 1180 1166 1105 1174 1144 1095 1112 1066 1063 1089 1029 1004 1026 919 991 944 916 893 879 903 949 953 879 934 917 878 875 872 918 923 910 897 866 808 857 827 816 837 814 820 768 802 777 731 818 759 782 808 815 797 724 792 745 759 714 787 777 749 758 865 767 707 810 772 737 739 748 727 764 787 753 761 763 766 766 765 730 828 789 862 801 816 795 797 832 888 888 821 772 802 848 831 795 883 789 794 828 761 784 797 810 764 785 774 738 728 736 745 773 733 753 731 742 675 736 741 721 728 726 731 726 733 761 741 734 711 762 716 702 726 742 749 688 726 758 775 728 746 709 694 687 694 639 724 727 777 671 693 747 667 711 743 738 741 730 723 756 713 721 680 675 718 708 704 745 728 738 675 686 738 708 758 711 702 756 793 749 717 792 693 756 763 775 821 840 877 932 934 1009 1090 1302 1464 1448 1383 1347 1208 1126 1163 1175 1169 1176 1209 1098 1067 1056 1049 977 964 917 1034 963 1071 1236 1316 1477 1628 1928 2090 2115 1943 1715 1525 1332 1346 1373 1375 1474 1490 1406 1238 1076 994 885 851 813 804 834 780 796 713 717 689 698 803 719 735 653 718 690 617 692 702 687 671 684 742 695 720 714 716 688 701 725 681 725 642 709 713 600 680 659 648 601 642 657 682 629 689 638 685 601 635 662 665 669 666 645 682 641 663 644 672 698 669 639 636 637 697 677 603 616 669 648 619 663 621 608 678 672 663 640 667 655 683 638 631 623 626 655 598 611 622 589 642 626 636 642 668 579 639 588 667 638 685 649 609 605 671 625 608 630 622 633 627 630 629 677 647 652 623 646 622 629 621 583 682 656 630 662 659 611 596 593 590 604 585 568 676 657 605 643 638 657 598 607 597 645 578 620 590 587 635 590 644 625 601 596 656 648 612 635 653 663 646 631 631 684 646 706 664 695 714 652 657 705 702 600 691 693 708 648 694 638 651 669 643 640 705 651 728 638 611 610 578 697 729 662 686 667 690 640 615 643 638 599 665 609 635 619 614 595 582 623 612 578 614 617 587 608 586 596 564 608 600 625 597 554 592 622 602 588 624 581 589 664 610 586 659 583 598 605 607 587 584 610 600 589 593 587 605 598 581 576 580 607 615 593 641 560 585 614 619 594 593 562 598 585 551 563 605 581 644 568 578 613 616 573 609 634 601 600 593 607 606 610 596 595 624 649 631 614 642 588 592 630 664 623 610 587 598 571 580 582 595 581 604 589 564 544 557 581 593 560 608 570 573 583 602 587 574 576 586 609 588 609 593 550 588 574 577 595 576 548 557 559 553 583 514 514 602 506 564 564 619 581 578 595 580 581 524 579 596 540 588 588 540 543 557 586 578 544 573 574 581 539 572 573 607 579 567 547 540 589 619 584 542 551 548 548 571 591 574 518 525 534 534 542 547 573 548 549 521 572 560 510 569 550 589 558 547 584 552 564 548 538 591 539 585 564 598 557 566 583 551 532 581 593 497 553 621 590 565 589 593 580 570 515 565 531 553 542 583 576 545 583 530 534 558 580 548 553 580 582 541 564 541 547 549 532 566 535 536 543 560 536 550 566 530 455 525 530 550 578 533 585 576 562 525 521 542 522 575 548 529 603 522 515 510 522 571 582 521 548 577 575 588 570 589 593 604 577 599 584 612 579 615 621 577 625 579 574 551 546 592 578 558 602 567 566 559 564 551 559 534 553 613 568 498 536 504 531 544 493 563 518 527 538 539 551 534 553 529 602 533 530 542 528 541 582 586 541 512 602 540 504 507 557 516 552 527 548 539 512 504 506 539 529 527 530 552 522 516 540 510 544 550 521 556 578 517 594 520 576 490 582 563 543 588 603 570 558 561 580 521 533 542 604 512 573 549 551 548 542 546 545 536 541 572 523 600 580 577 570 559 575 561 607 591 545 601 625 571 639 647 612 617 655 690 756 864 895 957 1087 1232 1266 1418 1328 1237 1165 1057 1027 1008 944 929 1025 1043 1038 989 977 851 897 739 761 670 684 671 660 615 601 607 600 575 602 594 608 576 584 619 577 564 630 665 667 687 741 719 758 768 786 873 915 859 870 846 791 737 729 713 667 752 747 739 689 704 704 678 604 643 588 583 520 546 567 578 577 570 579 607 616 597 575 572 587 595 596 624 568 598 580 598 600 624 624 596 520 572 602 560 529 490 567 612 596 545 578 558 522 543 574 578 591 522 488 528 524 555 509 554 525 525 495 533 550 487 511 518 508 547 528 523 498 547 512 497 542 545 549 503 523 538 559 524 559 523 541 553 543 535 525 489 564 587 541 530 575 501 533 535 530 518 495 565 566 544 530 540 510 548 527 530 508 523 527 562 516 529 473 543 553 498 516 489 517 572 557 509 527 533 510 586 546 544 527 583 567 552 503 563 548 517 534 531 555 542 563 553 555 587 553 559 579 625 600 602 624 574 623 694 643 733 749 841 893 1000 974 1011 925 848 760 725 720 702 706 741 810 784 828 823 761 732 678 668 670 593 634 647 621 577 675 640 644 660 671 633 634 608 601 631 627 642 624 566 620 617 632 594 651 643 657 648 620 585 593 609 667 537 591 590 589 609 642 607 591 591 555 544 622 571 607 585 554 594 595 612 585 584 572 632 595 535 547 559 570 562 542 537 532 567 541 592 568 567 586 527 555 591 480 541 506 573 544 584 572 512 562 536 560 533 521 574 478 466 550 508 505 513 525 497 541 543 530 575 558 522 497 519 488 574 571 476 516 537 479 514 503 482 512 512 489 557 505 476 512 537 493 476 509 507 505 517 539 522 495 473 509 518 502 501 528 511 555 525 512 502 528 483 516 522 501 521 496 545 503 519 505 504 509 480 536 536 508 518 551 508 497 538 509 497 501 525 499 532 484 508 504 518 497 513 531 492 477 530 527 496 553 536 513 486 465 519 458 446 490 467 497 482 499 506 494 531 502 509 487 480 482 485 485 508 468 475 489 473 508 504 503 494 486 490 483 474 474 528 482 504 497 491 477 504 498 488 484 505 472 502 426 514 487 506 540 519 488 486 524 457 473 484 466 501 469 498 503 528 430 467 497 481 509 505 525 500 457 495 493 520 492 459 482 512 487 485 467 452 474 481 431 491 466 492 505 515 512 467 477 489 565 476 490 482 488 468 482 507 460 494 511 514 470 401 436 498 462 462 471 480 496 447 494 497 449 497 438 465 466 460 468 472 483 468 465 460 496 494 460 479 472 477 476 484 487 470 473 461 439 466 471 492 472 463 438 409 477 507 444 478 440 467 449 446 461 483 456 494 462 462 510 459 527 474 469 477 480 467 464 444 505 435 459 463 528 525 509 521 510 486 564 536 479 514 480 498 499 501 508 436 485 464 522 519 500 524 512 462 459 470 492 446 477 450 468 469 465 466 426 475 472 472 489 449 446 460 471 462 455 468 440 484 497 431 471 445 492 500 465 464 462 466 449 524 482 471 427 489 486 497 461 424 462 482 417 436 494 470 451 483 446 446 460 491 470 465 496 449 494 494 451 447 477 451 464 463 443 430 477 508 416 476 448 445 486 471 449 478 447 473 470 423 476 457 452 459 454 552 449 468 467 444 441 435 419 436 476 433 424 509 416 459 434 459 462 491 457 466 456 448 460 468 448 439 437 449 425 447 443 474 450 491 417 444 405 467 454 461 459 466 477 440 448 480 468 422 404 464 418 493 464 453 472 460 501 461 447 462 466 432 453 419 412 450 427 415 470 450 454 458 413 411 477 476 470 465 497 437 437 498 472 492 435 494 458 453 473 459 443 436 467 438 392 419 445 448 435 436 453 406 502 486 503 427 432 482 425 451 433 432 446 461 421 451 413 479 446 419 457 429 458 450 449 434 433 491 481 450 465 428 447 465 464 458 426 416 484 479 435 438 425 454 431 483 455 460 451 462 441 443 460 443 430 444 457 444 432 471 436 414 474 452 437 462 457 406 459 465 449 450 452 441 442 456 479 412 440 474 424 435 432 429 429 449 450 456 439 456 464 431 458 460 409 434 443 451 442 438 426 476 448 409 435 445 415 422 434 398 479 465 477 425 442 460 441 423 447 405 477 406 443 414 440 474 450 461 440 440 479 418 474 511 465 472 460 458 454 472 466 513 520 498 511 501 576 509 532 473 479 487 517 499 452 493 487 474 513 444 493 478 485 484 472 484 483 461 497 494 460 424 467 460 425 448 473 413 430 426 440 417 463 444 445 421 473 423 421 449 433 485 441 455 424 460 460 399 424 455 446 455 437 444 406 453 443 456 429 436 459 443 453 438 469 452 432 420 462 403 471 504 465 440 493 478 469 454 475 446 446 466 526 481 436 422 446 508 456 429 471 425 450 405 431 446 469 460 459 455 425 486 465 510 438 405 430 460 445 423 413 434 443 430 439 417 394 471 472 458 449 445 428 431 457 406 458 452 434 431 432 426 446 445 478 439 399 439 434 418 381 423 441 470 431 421 459 412 427 440 463 392 419 431 407 437 457 431 407 412 430 423 439 459 454 419 451 430 465 452 445 422 435 404 441 416 448 420 442 464 419 431 431 439 443 439 457 423 467 420 413 395 421 386 456 418 434 438 424 424 442 446 448 426 425 436 464 433 419 464 416 440 418 440 444 399 433 456 504 446 477 459 451 461 451 426 437 472 434 449 445 453 430 425 428 450 453 461 481 423 442 453 433 445 430 403 413 454 439 414 465 397 416 430 423 404 379 419 424 413 428 400 408 429 475 431 425 399 453 409 353 421 417 423 402 425 401 439 383 405 412 417 410 409 420 425 462 430 423 489 435 420 422 420 412 430 393 420 414 390 409 454 402 422 392 419 385 433 428 438 401 419 441 402 405 417 444 438 417 395 428 380 450 415 403 437 425 406 409 444 405 398 387 427 387 396 379 436 422 428 441 422 416 403 438 443 419 425 438 428 397 427 428 408 424 428 415 429 374 474 439 449 427 415 428 419 407 432 419 431 455 439 446 426 438 449 446 473 430 419 451 417 404 469 453 491 457 456 472 481 482 460 492 488 480 464 507 471 508 511 530 534 531 538 553 569 545 546 578 597 602 588 606 592 650 655 658 642 687 679 727 676 676 706 753 753 737 759 716 696 754 790 771 778 797 807 757 734 805 724 691 764 721 767 794 746 678 754 723 717 662 691 675 634 616 655 638 651 633 676 614 537 578 565 569 537 606 554 533 565 520 568 535 461 517 482 538 473 487 529 490 474 461 440 451 478 455 419 438 434 420 445 460 430 425 440 488 455 457 487 423 422 464 425 448 441 429 405 435 457 436 455 446 400 412 440 426 498 413 428 393 423 401 412 379 435 449 494 475 423 444 440 487 488 526 522 487 506 532 497 467 465 472 470 455 469 409 484 503 469 437 459 483 461 467 481 491 473 517 441 419 458 469 480 460 473 426 466 487 491 506 511 543 602 662 651 651 563 558 506 503 507 461 427 474 462 512 463 522 498 481 532 534 493 472 454 456 426 446 440 452 394 410 412 434 423 416 361 378 390 416 391 390 391 384 366 394 401 360 393 424 408 346 384 404 381 397 383 385 398 415 392 390 388 405 395 383 399 397 385 398 386 371 386 386 378 356 369 392 351 411 383 468 365 416 392 374 420 380 395 370 403 349 378 371 410 380 357 387 393 374 405 398 388 398 376 384 401 356 381 400 389 384 395 344 397 406 370 402 400 433 430 376 369 403 399 384 382 379 389 430 370 402 372 406 424 374 401 424 466 365 412 415 403 409 412 414 384 393 388 390 407 398 425 415 414 424 401 369 422 384 392 396 373 355 380 421 359 356 408 443 369 428 423 389 370 372 367 371 400 372 402 382 363 357 360 428 391 357 379 382 392 365 380 366 392 385 366 389 430 376 402 389 390 413 363 375 414 358 382 392 363 393 423 395 439 412 373 364 392 402 351 379 424 410 370 356 378 372 369 369 405 393 419 367 404 383 387 387 376 375 390 343 382 364 411 362 372 388 345 351 392 396 388 349 372 391 396 388 347 375 350 379 375 381 373 397 379 392 342 373 378 325 400 352 388 349 364 389 345 358 382 360 379 355 359 348 369 412 374 348 429 361 399 353 391 368 385 357 376 358 355 383 354 346 351 373 402 364 361 435 375 405 391 423 381 359 351 378 362 406 368 335 404 334 384 380 356 359 365 397 398 348 342 396 365 357 354 365 388 365 342 404 378 372 333 344 362 397 393 346 333 384 387 365 357 394 347</intensities>
+			</dataPoints>
+		</scan>
+	</xrdMeasurement>
+</xrdMeasurements>
diff --git a/tests/data/eln_mapper/eln.yaml b/tests/data/eln_mapper/eln.yaml
new file mode 100644
index 000000000..6a6884017
--- /dev/null
+++ b/tests/data/eln_mapper/eln.yaml
@@ -0,0 +1,103 @@
+Data:
+  '@signal': null
+  data:
+    value: null
+    unit: null
+Instrument:
+  Beam:
+    distance:
+      value: null
+      unit: null
+    incident_energy:
+      value: null
+      unit: null
+    incident_energy_spread:
+      value: null
+      unit: null
+    incident_polarization:
+      value: null
+      unit: null
+  Electronanalyser:
+    Collectioncolumn:
+      contrast_aperture: null
+      field_aperture: null
+      mode: null
+      projection: null
+      scheme: null
+    Detector:
+      Data:
+        '@signal': null
+        raw: null
+      amplifier_type: null
+      detector_type: null
+    Energydispersion:
+      energy_scan_mode: null
+      entrance_slit: null
+      exit_slit: null
+      pass_energy:
+        value: null
+        unit: null
+      scheme: null
+    description: null
+    energy_resolution:
+      value: null
+      unit: null
+    fast_axes: null
+    slow_axes: null
+  Manipulator:
+    drain_current:
+      value: null
+      unit: null
+    sample_bias:
+      value: null
+      unit: null
+    sample_temperature:
+      value: null
+      unit: null
+  Source:
+    name: null
+    probe: null
+    type: null
+  energy_resolution:
+    value: null
+    unit: null
+Process:
+  angular_calibration:
+    applied: null
+    calibrated_axis: null
+  energy_calibration:
+    applied: null
+    calibrated_axis: null
+  momentum_calibration:
+    applied: null
+    calibrated_axis: null
+  spatial_calibration:
+    applied: null
+    calibrated_axis: null
+Sample:
+  atom_types: null
+  bias:
+    value: null
+    unit: null
+  chemical_formula: null
+  gas_pressure:
+    value: null
+    unit: null
+  name: null
+  preparation_date: null
+  preparation_description: null
+  sample_history: null
+  situation: null
+  temperature:
+    value: null
+    unit: null
+User:
+  address: null
+  affiliation: null
+  email: null
+  name: null
+  orcid: null
+definition:
+  '@version': null
+start_time: null
+title: null
diff --git a/tests/data/eln_mapper/mpes.scheme.archive.yaml b/tests/data/eln_mapper/mpes.scheme.archive.yaml
new file mode 100644
index 000000000..0f704ced9
--- /dev/null
+++ b/tests/data/eln_mapper/mpes.scheme.archive.yaml
@@ -0,0 +1,537 @@
+definitions:
+  name: <ADD PREFERED NAME>
+  sections:
+    mpes:
+      base_sections:
+      - nomad.datamodel.metainfo.eln.NexusDataConverter
+      - nomad.datamodel.data.EntryData
+      m_annotations:
+        template:
+          reader: <READER_NAME>
+          nxdl: NX<NAME>.nxdl
+        eln:
+          hide: []
+      quantities:
+        title:
+          type: str
+          m_annotations:
+            eln:
+              component: StringEditQuantity
+              defaultDisplayUnit: <No Default unit>
+          description: ''
+        start_time:
+          type: Datetime
+          m_annotations:
+            eln:
+              component: DateTimeEditQuantity
+              defaultDisplayUnit: <No Default unit>
+          description: ' Datetime of the start of the measurement. '
+        definition:
+          type: str
+          m_annotations:
+            eln:
+              component: StringEditQuantity
+              defaultDisplayUnit: <No Default unit>
+          description: ''
+      sub_sections:
+        User:
+          section:
+            m_annotations:
+              eln:
+                overview: true
+            quantities:
+              name:
+                type: str
+                m_annotations:
+                  eln:
+                    component: StringEditQuantity
+                    defaultDisplayUnit: <No Default unit>
+                description: ' Name of the user. '
+              affiliation:
+                type: str
+                m_annotations:
+                  eln:
+                    component: StringEditQuantity
+                    defaultDisplayUnit: <No Default unit>
+                description: ' Name of the affiliation of the user at the point in
+                  time when the experiment was performed. '
+              address:
+                type: str
+                m_annotations:
+                  eln:
+                    component: StringEditQuantity
+                    defaultDisplayUnit: <No Default unit>
+                description: ' Full address (street, street number, ZIP, city, country)
+                  of the user''s affiliation. '
+              email:
+                type: str
+                m_annotations:
+                  eln:
+                    component: StringEditQuantity
+                    defaultDisplayUnit: <No Default unit>
+                description: ' Email address of the user. '
+              orcid:
+                type: str
+                m_annotations:
+                  eln:
+                    component: StringEditQuantity
+                    defaultDisplayUnit: <No Default unit>
+                description: ' Author ID defined by https://orcid.org/. '
+        Instrument:
+          section:
+            m_annotations:
+              eln:
+                overview: true
+            quantities:
+              energy_resolution:
+                type: np.float64
+                unit: '<hint: NX_ENERGY>'
+                value: <ADD default value>
+                m_annotations:
+                  eln:
+                    component: NumberEditQuantity
+                    defaultDisplayUnit: <No Default unit>
+                description: ''
+            sub_sections:
+              Source:
+                section:
+                  m_annotations:
+                    eln:
+                      overview: true
+                  quantities:
+                    type:
+                      type: str
+                      m_annotations:
+                        eln:
+                          component: StringEditQuantity
+                          defaultDisplayUnit: <No Default unit>
+                      description: ''
+                    name:
+                      type: str
+                      m_annotations:
+                        eln:
+                          component: StringEditQuantity
+                          defaultDisplayUnit: <No Default unit>
+                      description: ''
+                    probe:
+                      type: str
+                      m_annotations:
+                        eln:
+                          component: StringEditQuantity
+                          defaultDisplayUnit: <No Default unit>
+                      description: ' Type of probe. In photoemission it''s always
+                        photons, so the full NIAC list is restricted. '
+              Beam:
+                section:
+                  m_annotations:
+                    eln:
+                      overview: true
+                  quantities:
+                    distance:
+                      type: np.float64
+                      unit: '<hint: NX_LENGTH>'
+                      value: <ADD default value>
+                      m_annotations:
+                        eln:
+                          component: NumberEditQuantity
+                          defaultDisplayUnit: <No Default unit>
+                      description: ' Distance of the point of evaluation of the beam
+                        from the sample surface. '
+                    incident_energy:
+                      type: np.float64
+                      unit: '<hint: NX_ENERGY>'
+                      value: <ADD default value>
+                      m_annotations:
+                        eln:
+                          component: NumberEditQuantity
+                          defaultDisplayUnit: <No Default unit>
+                      description: ''
+                    incident_energy_spread:
+                      type: np.float64
+                      unit: '<hint: NX_ENERGY>'
+                      value: <ADD default value>
+                      m_annotations:
+                        eln:
+                          component: NumberEditQuantity
+                          defaultDisplayUnit: <No Default unit>
+                      description: ''
+                    incident_polarization:
+                      type: np.float64
+                      unit: '<hint: NX_ANY>'
+                      value: <ADD default value>
+                      m_annotations:
+                        eln:
+                          component: NumberEditQuantity
+                          defaultDisplayUnit: <No Default unit>
+                      description: ''
+              Electronanalyser:
+                section:
+                  m_annotations:
+                    eln:
+                      overview: true
+                  quantities:
+                    description:
+                      type: str
+                      m_annotations:
+                        eln:
+                          component: StringEditQuantity
+                          defaultDisplayUnit: <No Default unit>
+                      description: ''
+                    energy_resolution:
+                      type: np.float64
+                      unit: '<hint: NX_ENERGY>'
+                      value: <ADD default value>
+                      m_annotations:
+                        eln:
+                          component: NumberEditQuantity
+                          defaultDisplayUnit: <No Default unit>
+                      description: ' Energy resolution of the analyser with the current
+                        setting. May be linked from a NXcalibration. '
+                    fast_axes:
+                      type: str
+                      m_annotations:
+                        eln:
+                          component: StringEditQuantity
+                          defaultDisplayUnit: <No Default unit>
+                      description: ''
+                    slow_axes:
+                      type: str
+                      m_annotations:
+                        eln:
+                          component: StringEditQuantity
+                          defaultDisplayUnit: <No Default unit>
+                      description: ''
+                  sub_sections:
+                    Collectioncolumn:
+                      section:
+                        m_annotations:
+                          eln:
+                            overview: true
+                        quantities:
+                          scheme:
+                            type: str
+                            m_annotations:
+                              eln:
+                                component: StringEditQuantity
+                                defaultDisplayUnit: <No Default unit>
+                            description: ' Scheme of the electron collection column. '
+                          mode:
+                            type: str
+                            m_annotations:
+                              eln:
+                                component: StringEditQuantity
+                                defaultDisplayUnit: <No Default unit>
+                            description: ''
+                          projection:
+                            type: str
+                            m_annotations:
+                              eln:
+                                component: StringEditQuantity
+                                defaultDisplayUnit: <No Default unit>
+                            description: ''
+                        sub_sections:
+                          Field_aperture:
+                            section:
+                              m_annotations:
+                                eln:
+                                  overview: true
+                          Contrast_aperture:
+                            section:
+                              m_annotations:
+                                eln:
+                                  overview: true
+                    Energydispersion:
+                      section:
+                        m_annotations:
+                          eln:
+                            overview: true
+                        quantities:
+                          scheme:
+                            type: str
+                            m_annotations:
+                              eln:
+                                component: StringEditQuantity
+                                defaultDisplayUnit: <No Default unit>
+                            description: ''
+                          pass_energy:
+                            type: np.float64
+                            unit: '<hint: NX_ENERGY>'
+                            value: <ADD default value>
+                            m_annotations:
+                              eln:
+                                component: NumberEditQuantity
+                                defaultDisplayUnit: <No Default unit>
+                            description: ''
+                          energy_scan_mode:
+                            type: str
+                            m_annotations:
+                              eln:
+                                component: StringEditQuantity
+                                defaultDisplayUnit: <No Default unit>
+                            description: ''
+                        sub_sections:
+                          Entrance_slit:
+                            section:
+                              m_annotations:
+                                eln:
+                                  overview: true
+                          Exit_slit:
+                            section:
+                              m_annotations:
+                                eln:
+                                  overview: true
+                    Detector:
+                      section:
+                        m_annotations:
+                          eln:
+                            overview: true
+                        quantities:
+                          amplifier_type:
+                            type: str
+                            m_annotations:
+                              eln:
+                                component: StringEditQuantity
+                                defaultDisplayUnit: <No Default unit>
+                            description: ' Type of electron amplifier in the first
+                              amplification step. '
+                          detector_type:
+                            type: str
+                            m_annotations:
+                              eln:
+                                component: StringEditQuantity
+                                defaultDisplayUnit: <No Default unit>
+                            description: ' Description of the detector type. '
+                        sub_sections:
+                          Data:
+                            section:
+                              m_annotations:
+                                eln:
+                                  overview: true
+                              quantities:
+                                raw:
+                                  type: np.float64
+                                  m_annotations:
+                                    eln:
+                                      component: NumberEditQuantity
+                                      defaultDisplayUnit: <No Default unit>
+                                  description: ' Raw data before calibration. '
+              Manipulator:
+                section:
+                  m_annotations:
+                    eln:
+                      overview: true
+                  quantities:
+                    sample_temperature:
+                      type: np.float64
+                      unit: '<hint: NX_TEMPERATURE>'
+                      value: <ADD default value>
+                      m_annotations:
+                        eln:
+                          component: NumberEditQuantity
+                          defaultDisplayUnit: <No Default unit>
+                      description: ''
+                    drain_current:
+                      type: np.float64
+                      unit: '<hint: NX_CURRENT>'
+                      value: <ADD default value>
+                      m_annotations:
+                        eln:
+                          component: NumberEditQuantity
+                          defaultDisplayUnit: <No Default unit>
+                      description: ''
+                    sample_bias:
+                      type: np.float64
+                      unit: '<hint: NX_CURRENT>'
+                      value: <ADD default value>
+                      m_annotations:
+                        eln:
+                          component: NumberEditQuantity
+                          defaultDisplayUnit: <No Default unit>
+                      description: ''
+        Process:
+          section:
+            m_annotations:
+              eln:
+                overview: true
+            sub_sections:
+              Energy_calibration:
+                section:
+                  m_annotations:
+                    eln:
+                      overview: true
+                  quantities:
+                    applied:
+                      type: bool
+                      m_annotations:
+                        eln:
+                          component: BoolEditQuantity
+                          defaultDisplayUnit: <No Default unit>
+                      description: ' Has an energy calibration been applied? '
+                    calibrated_axis:
+                      type: np.float64
+                      m_annotations:
+                        eln:
+                          component: NumberEditQuantity
+                          defaultDisplayUnit: <No Default unit>
+                      description: ' This is the calibrated energy axis to be used
+                        for data plotting. '
+              Angular_calibration:
+                section:
+                  m_annotations:
+                    eln:
+                      overview: true
+                  quantities:
+                    applied:
+                      type: bool
+                      m_annotations:
+                        eln:
+                          component: BoolEditQuantity
+                          defaultDisplayUnit: <No Default unit>
+                      description: ' Has an angular calibration been applied? '
+                    calibrated_axis:
+                      type: np.float64
+                      m_annotations:
+                        eln:
+                          component: NumberEditQuantity
+                          defaultDisplayUnit: <No Default unit>
+                      description: ' This is the calibrated angular axis to be used
+                        for data plotting. '
+              Spatial_calibration:
+                section:
+                  m_annotations:
+                    eln:
+                      overview: true
+                  quantities:
+                    applied:
+                      type: bool
+                      m_annotations:
+                        eln:
+                          component: BoolEditQuantity
+                          defaultDisplayUnit: <No Default unit>
+                      description: ' Has an spatial calibration been applied? '
+                    calibrated_axis:
+                      type: np.float64
+                      m_annotations:
+                        eln:
+                          component: NumberEditQuantity
+                          defaultDisplayUnit: <No Default unit>
+                      description: ' This is the calibrated spatial axis to be used
+                        for data plotting. '
+              Momentum_calibration:
+                section:
+                  m_annotations:
+                    eln:
+                      overview: true
+                  quantities:
+                    applied:
+                      type: bool
+                      m_annotations:
+                        eln:
+                          component: BoolEditQuantity
+                          defaultDisplayUnit: <No Default unit>
+                      description: ' Has an momentum calibration been applied? '
+                    calibrated_axis:
+                      type: np.float64
+                      m_annotations:
+                        eln:
+                          component: NumberEditQuantity
+                          defaultDisplayUnit: <No Default unit>
+                      description: ' This is the momentum axis to be used for data
+                        plotting. '
+        Sample:
+          section:
+            m_annotations:
+              eln:
+                overview: true
+            quantities:
+              name:
+                type: str
+                m_annotations:
+                  eln:
+                    component: StringEditQuantity
+                    defaultDisplayUnit: <No Default unit>
+                description: ''
+              chemical_formula:
+                type: str
+                m_annotations:
+                  eln:
+                    component: StringEditQuantity
+                    defaultDisplayUnit: <No Default unit>
+                description: ' The chemical formula of the sample. For mixtures use
+                  the NXsample_component group in NXsample instead. '
+              atom_types:
+                type: str
+                m_annotations:
+                  eln:
+                    component: StringEditQuantity
+                    defaultDisplayUnit: <No Default unit>
+                description: ' List of comma-separated elements from the periodic
+                  table that are contained in the sample. If the sample substance
+                  has multiple components, all elements from each component must be
+                  included in `atom_types`. '
+              preparation_date:
+                type: Datetime
+                m_annotations:
+                  eln:
+                    component: DateTimeEditQuantity
+                    defaultDisplayUnit: <No Default unit>
+                description: ' Date of preparation of the sample for the XPS experiment
+                  (i.e. cleaving, last annealing). '
+              temperature:
+                type: np.float64
+                unit: '<hint: NX_TEMPERATURE>'
+                value: <ADD default value>
+                m_annotations:
+                  eln:
+                    component: NumberEditQuantity
+                    defaultDisplayUnit: <No Default unit>
+                description: ' In the case of a fixed temperature measurement this
+                  is the scalar temperature of the sample. In the case of an experiment
+                  in which the temperature is changed and recoded, this is an array
+                  of length m of temperatures. This should be a link to /entry/instrument/manipulator/sample_temperature. '
+              situation:
+                type: str
+                m_annotations:
+                  eln:
+                    component: StringEditQuantity
+                    defaultDisplayUnit: <No Default unit>
+                description: ''
+              gas_pressure:
+                type: np.float64
+                unit: '<hint: NX_PRESSURE>'
+                value: <ADD default value>
+                m_annotations:
+                  eln:
+                    component: NumberEditQuantity
+                    defaultDisplayUnit: <No Default unit>
+                description: ''
+            sub_sections:
+              Sample_history:
+                section:
+                  m_annotations:
+                    eln:
+                      overview: true
+              Preparation_description:
+                section:
+                  m_annotations:
+                    eln:
+                      overview: true
+        Data:
+          section:
+            m_annotations:
+              eln:
+                overview: true
+            quantities:
+              data:
+                type: np.float64
+                unit: '<hint: NX_ANY>'
+                value: <ADD default value>
+                m_annotations:
+                  eln:
+                    component: NumberEditQuantity
+                    defaultDisplayUnit: <No Default unit>
+                description: ' Represents a measure of one- or more-dimensional photoemission
+                  counts, where the varied axis may be for example energy, momentum,
+                  spatial coordinate, pump-probe delay, spin index, temperature, etc.
+                  The axes traces should be linked to the actual encoder position
+                  in NXinstrument or calibrated axes in NXprocess. '
diff --git a/tests/data/nexus/NXtest2.nxdl.xml b/tests/data/nexus/NXtest2.nxdl.xml
new file mode 100644
index 000000000..7b33b2165
--- /dev/null
+++ b/tests/data/nexus/NXtest2.nxdl.xml
@@ -0,0 +1,455 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-stylesheet type="text/xsl" href="nxdlformat.xsl"?>
+<!--
+# NeXus - Neutron and X-ray Common Data Format
+#
+# Copyright (C) 2014-2022 NeXus International Advisory Committee (NIAC)
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+	@@ -21,2037 +21,415 @@
+#
+# For further information, see http://www.nexusformat.org
+-->
+<!--While an important
+step this will still need in the future a mechains
+ So far Essentially when the docstrings are no longer needed
+ but can be replaced by a connection to an automated tool which understands
+ what a specific field represents conceptually, EM data have become more
+ generally interoperable EM data.
+NEW ISSUE: see duration and collection time, duty cycle
+NEW ISSUE: duration and collection_time needs a clearer description and
+definition by the community
+NEW ISSUE: should version always be an enumeration?
+NEW ISSUE: filter keywords \(.*?\)
+NEW ISSUE: NXdetector adding only those fields which have changed or not?
+symbols:
+the NeXus default for application definitions wrt to the exists keyword is
+that it is required-->
+<definition xmlns="http://definition.nexusformat.org/nxdl/3.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" category="application" name="NXem" extends="NXobject" type="group" xsi:schemaLocation="http://definition.nexusformat.org/nxdl/3.1 ../nxdl.xsd">
+    <doc>
+         Characterization of a sample during a session on an electron microscope.
+    </doc>
+    <group type="NXentry" minOccurs="1" maxOccurs="unbounded">
+        <group type="NXcoordinate_system_set" recommended="true">
+            <group name="TRANSFORMATIONS" type="NXtransformations" minOccurs="0" maxOccurs="unbounded"/>
+        </group>
+        <group name="em_lab" type="NXinstrument">
+            <doc>
+                 Metadata and numerical data of the microscope and the lab in which it stands.
+
+
+            </doc>
+            <field name="instrument_name">
+                <doc>
+                     Given name of the microscope at the hosting institution. This is an alias.
+                     Examples could be NionHermes, Titan, JEOL, Gemini, etc.
+                </doc>
+            </field>
+            <field name="location" optional="true">
+                <doc>
+                     Location of the lab or place where the instrument is installed.
+                     Using GEOREF is preferred.
+                </doc>
+            </field>
+            <group type="NXfabrication">
+                <field name="vendor" recommended="true"/>
+                <field name="model" recommended="true"/>
+                <field name="identifier" recommended="true"/>
+                <field name="capabilities" optional="true"/>
+            </group>
+            <group type="NXchamber" optional="true"/>
+            <group type="NXebeam_column" minOccurs="1" maxOccurs="1">
+                <group type="NXfabrication" recommended="true">
+                    <field name="vendor" recommended="true"/>
+                    <field name="model" recommended="true"/>
+                    <field name="identifier" recommended="true"/>
+                </group>
+                <group type="NXchamber" optional="true"/>
+                <group name="electron_source" type="NXsource">
+                    <field name="name" recommended="true"/>
+                    <group type="NXfabrication" recommended="true">
+                        <field name="vendor" recommended="true"/>
+                        <field name="model" recommended="true"/>
+                        <field name="identifier" recommended="true"/>
+                    </group>
+                    <field name="voltage" type="NX_FLOAT"/>
+                    <field name="emitter_type" recommended="true">
+                        <enumeration>
+                            <item value="thermionic"/>
+                            <item value="schottky"/>
+                            <item value="field_emission"/>
+                        </enumeration>
+                    </field>
+                </group>
+                <group type="NXaperture_em" minOccurs="0" maxOccurs="unbounded">
+                    <group type="NXfabrication" recommended="true">
+                        <field name="vendor" recommended="true"/>
+                        <field name="model" recommended="true"/>
+                        <field name="identifier" recommended="true"/>
+                    </group>
+                    <field name="value" type="NX_NUMBER"/>
+                    <field name="name"/>
+                    <field name="description" optional="true"/>
+                </group>
+                <group type="NXlens_em" minOccurs="0" maxOccurs="unbounded">
+                    <doc>
+                         If the lens is described at least one of the fields
+                         voltage, current, or value should be defined.
+                    </doc>
+                    <!--a classical case where we want at least one field of multiple to exist-->
+                    <group type="NXfabrication" recommended="true">
+                        <field name="vendor" recommended="true"/>
+                        <field name="model" recommended="true"/>
+                        <field name="identifier" recommended="true"/>
+                    </group>
+                    <field name="voltage" type="NX_NUMBER" recommended="true"/>
+                    <field name="current" type="NX_NUMBER" recommended="true"/>
+                    <field name="value" type="NX_NUMBER" recommended="true"/>
+                </group>
+                <group name="aberration_correction" type="NXcorrector_cs" recommended="true">
+                    <field name="applied" type="NX_BOOLEAN"/>
+                    <field name="name" optional="true"/>
+                    <group type="NXfabrication" recommended="true">
+                        <field name="vendor" recommended="true"/>
+                        <field name="model" recommended="true"/>
+                        <field name="identifier" recommended="true"/>
+                    </group>
+                    <group name="ZEMLIN_TABLEAU" type="NXprocess" recommended="true">
+                        <field name="description" optional="true"/>
+                        <field name="tilt_angle" type="NX_FLOAT" recommended="true" units="NX_ANGLE"/>
+                        <field name="exposure_time" type="NX_FLOAT" recommended="true" units="NX_TIME"/>
+                        <field name="magnification" type="NX_NUMBER" optional="true" units="NX_DIMENSIONLESS"/>
+                        <group type="NXprocess" minOccurs="0" maxOccurs="unbounded">
+                            <group name="ceos" type="NXaberration_model_ceos" optional="true">
+                                <group name="c_1" type="NXaberration" recommended="true">
+                                    <field name="magnitude" type="NX_FLOAT" units="NX_LENGTH"/>
+                                    <field name="uncertainty" type="NX_FLOAT" recommended="true" units="NX_LENGTH"/>
+                                    <field name="uncertainty_model" recommended="true"/>
+                                    <field name="delta_time" type="NX_FLOAT" recommended="true" units="NX_TIME"/>
+                                </group>
+                                <group name="a_1" type="NXaberration" recommended="true">
+                                    <field name="magnitude" type="NX_FLOAT" units="NX_LENGTH"/>
+                                    <field name="angle" type="NX_FLOAT" units="NX_ANGLE"/>
+                                    <field name="uncertainty" type="NX_FLOAT" recommended="true" units="NX_LENGTH"/>
+                                    <field name="uncertainty_model" recommended="true"/>
+                                    <field name="delta_time" type="NX_FLOAT" recommended="true" units="NX_TIME"/>
+                                </group>
+                                <group name="b_2" type="NXaberration" recommended="true">
+                                    <field name="magnitude" type="NX_FLOAT" units="NX_LENGTH"/>
+                                    <field name="angle" type="NX_FLOAT" units="NX_ANGLE"/>
+                                    <field name="uncertainty" type="NX_FLOAT" recommended="true" units="NX_LENGTH"/>
+                                    <field name="uncertainty_model" recommended="true"/>
+                                    <field name="delta_time" type="NX_FLOAT" recommended="true" units="NX_TIME"/>
+                                </group>
+                                <group name="a_2" type="NXaberration" recommended="true">
+                                    <field name="magnitude" type="NX_FLOAT" units="NX_LENGTH"/>
+                                    <field name="angle" type="NX_FLOAT" units="NX_ANGLE"/>
+                                    <field name="uncertainty" type="NX_FLOAT" recommended="true" units="NX_LENGTH"/>
+                                    <field name="uncertainty_model" recommended="true"/>
+                                    <field name="delta_time" type="NX_FLOAT" recommended="true" units="NX_TIME"/>
+                                </group>
+                                <group name="c_3" type="NXaberration" recommended="true">
+                                    <field name="magnitude" type="NX_FLOAT" units="NX_LENGTH"/>
+                                    <field name="uncertainty" type="NX_FLOAT" recommended="true" units="NX_LENGTH"/>
+                                    <field name="uncertainty_model" recommended="true"/>
+                                    <field name="delta_time" type="NX_FLOAT" recommended="true" units="NX_TIME"/>
+                                </group>
+                                <group name="s_3" type="NXaberration" recommended="true">
+                                    <field name="magnitude" type="NX_FLOAT" units="NX_LENGTH"/>
+                                    <field name="angle" type="NX_FLOAT" units="NX_ANGLE"/>
+                                    <field name="uncertainty" type="NX_FLOAT" recommended="true" units="NX_LENGTH"/>
+                                    <field name="uncertainty_model" recommended="true"/>
+                                    <field name="delta_time" type="NX_FLOAT" recommended="true" units="NX_TIME"/>
+                                </group>
+                                <group name="a_3" type="NXaberration" recommended="true">
+                                    <field name="magnitude" type="NX_FLOAT" units="NX_LENGTH"/>
+                                    <field name="angle" type="NX_FLOAT" units="NX_ANGLE"/>
+                                    <field name="uncertainty" type="NX_FLOAT" recommended="true" units="NX_LENGTH"/>
+                                    <field name="uncertainty_model" recommended="true"/>
+                                    <field name="delta_time" type="NX_FLOAT" recommended="true" units="NX_TIME"/>
+                                </group>
+                                <!--based on feedback from Thilo Remmele the following aberrations could be measured-->
+                                <group name="b_4" type="NXaberration" optional="true">
+                                    <field name="magnitude" type="NX_FLOAT" units="NX_LENGTH"/>
+                                    <field name="angle" type="NX_FLOAT" units="NX_ANGLE"/>
+                                    <field name="uncertainty" type="NX_FLOAT" recommended="true" units="NX_LENGTH"/>
+                                    <field name="uncertainty_model" recommended="true"/>
+                                    <field name="delta_time" type="NX_FLOAT" recommended="true" units="NX_TIME"/>
+                                </group>
+                                <group name="d_4" type="NXaberration" optional="true">
+                                    <field name="magnitude" type="NX_FLOAT" units="NX_LENGTH"/>
+                                    <field name="angle" type="NX_FLOAT" units="NX_ANGLE"/>
+                                    <field name="uncertainty" type="NX_FLOAT" recommended="true" units="NX_LENGTH"/>
+                                    <field name="uncertainty_model" recommended="true"/>
+                                    <field name="delta_time" type="NX_FLOAT" recommended="true" units="NX_TIME"/>
+                                </group>
+                                <group name="a_4" type="NXaberration" optional="true">
+                                    <field name="magnitude" type="NX_FLOAT" units="NX_LENGTH"/>
+                                    <field name="angle" type="NX_FLOAT" units="NX_ANGLE"/>
+                                    <field name="uncertainty" type="NX_FLOAT" recommended="true" units="NX_LENGTH"/>
+                                    <field name="uncertainty_model" recommended="true"/>
+                                    <field name="delta_time" type="NX_FLOAT" recommended="true" units="NX_TIME"/>
+                                </group>
+                                <group name="c_5" type="NXaberration" optional="true">
+                                    <field name="magnitude" type="NX_FLOAT" units="NX_LENGTH"/>
+                                    <field name="uncertainty" type="NX_FLOAT" recommended="true" units="NX_LENGTH"/>
+                                    <field name="uncertainty_model" recommended="true"/>
+                                    <field name="delta_time" type="NX_FLOAT" recommended="true" units="NX_TIME"/>
+                                </group>
+                                <group name="s_5" type="NXaberration" optional="true">
+                                    <field name="magnitude" type="NX_FLOAT" units="NX_LENGTH"/>
+                                    <field name="angle" type="NX_FLOAT" units="NX_ANGLE"/>
+                                    <field name="uncertainty" type="NX_FLOAT" recommended="true" units="NX_LENGTH"/>
+                                    <field name="uncertainty_model" recommended="true"/>
+                                    <field name="delta_time" type="NX_FLOAT" recommended="true" units="NX_TIME"/>
+                                </group>
+                                <group name="r_5" type="NXaberration" optional="true">
+                                    <field name="magnitude" type="NX_FLOAT" units="NX_LENGTH"/>
+                                    <field name="angle" type="NX_FLOAT" units="NX_ANGLE"/>
+                                    <field name="uncertainty" type="NX_FLOAT" recommended="true" units="NX_LENGTH"/>
+                                    <field name="uncertainty_model" recommended="true"/>
+                                    <field name="delta_time" type="NX_FLOAT" recommended="true" units="NX_TIME"/>
+                                </group>
+                                <group name="a_6" type="NXaberration" optional="true">
+                                    <field name="magnitude" type="NX_FLOAT" units="NX_LENGTH"/>
+                                    <field name="angle" type="NX_FLOAT" units="NX_ANGLE"/>
+                                    <field name="uncertainty" type="NX_FLOAT" recommended="true" units="NX_LENGTH"/>
+                                    <field name="uncertainty_model" recommended="true"/>
+                                    <field name="delta_time" type="NX_FLOAT" recommended="true" units="NX_TIME"/>
+                                </group>
+                            </group>
+                            <group name="nion" type="NXaberration_model_nion" optional="true">
+                                <group name="c_1_0" type="NXaberration" recommended="true">
+                                    <field name="magnitude" type="NX_FLOAT" units="NX_LENGTH"/>
+                                    <field name="uncertainty" type="NX_FLOAT" recommended="true" units="NX_LENGTH"/>
+                                    <field name="uncertainty_model" recommended="true"/>
+                                    <field name="delta_time" type="NX_FLOAT" optional="true" units="NX_TIME"/>
+                                </group>
+                                <group name="c_1_2_a" type="NXaberration" recommended="true">
+                                    <field name="magnitude" type="NX_FLOAT" units="NX_LENGTH"/>
+                                    <field name="uncertainty" type="NX_FLOAT" recommended="true" units="NX_LENGTH"/>
+                                    <field name="uncertainty_model" recommended="true"/>
+                                    <field name="delta_time" type="NX_FLOAT" optional="true" units="NX_TIME"/>
+                                </group>
+                                <group name="c_1_2_b" type="NXaberration" recommended="true">
+                                    <field name="magnitude" type="NX_FLOAT" units="NX_LENGTH"/>
+                                    <field name="uncertainty" type="NX_FLOAT" recommended="true" units="NX_LENGTH"/>
+                                    <field name="uncertainty_model" recommended="true"/>
+                                    <field name="delta_time" type="NX_FLOAT" optional="true" units="NX_TIME"/>
+                                </group>
+                                <group name="c_2_1_a" type="NXaberration" recommended="true">
+                                    <field name="magnitude" type="NX_FLOAT" units="NX_LENGTH"/>
+                                    <field name="uncertainty" type="NX_FLOAT" recommended="true" units="NX_LENGTH"/>
+                                    <field name="uncertainty_model" recommended="true"/>
+                                    <field name="delta_time" type="NX_FLOAT" optional="true" units="NX_TIME"/>
+                                </group>
+                                <group name="c_2_1_b" type="NXaberration" recommended="true">
+                                    <field name="magnitude" type="NX_FLOAT" units="NX_LENGTH"/>
+                                    <field name="uncertainty" type="NX_FLOAT" recommended="true" units="NX_LENGTH"/>
+                                    <field name="uncertainty_model" recommended="true"/>
+                                    <field name="delta_time" type="NX_FLOAT" optional="true" units="NX_TIME"/>
+                                </group>
+                                <group name="c_2_3_a" type="NXaberration" recommended="true">
+                                    <field name="magnitude" type="NX_FLOAT" units="NX_LENGTH"/>
+                                    <field name="uncertainty" type="NX_FLOAT" recommended="true" units="NX_LENGTH"/>
+                                    <field name="uncertainty_model" recommended="true"/>
+                                    <field name="delta_time" type="NX_FLOAT" optional="true" units="NX_TIME"/>
+                                </group>
+                                <group name="c_2_3_b" type="NXaberration" recommended="true">
+                                    <field name="magnitude" type="NX_FLOAT" units="NX_LENGTH"/>
+                                    <field name="uncertainty" type="NX_FLOAT" recommended="true" units="NX_LENGTH"/>
+                                    <field name="uncertainty_model" recommended="true"/>
+                                    <field name="delta_time" type="NX_FLOAT" optional="true" units="NX_TIME"/>
+                                </group>
+                                <group name="c_3_0" type="NXaberration" recommended="true">
+                                    <field name="magnitude" type="NX_FLOAT" units="NX_LENGTH"/>
+                                    <field name="uncertainty" type="NX_FLOAT" recommended="true" units="NX_LENGTH"/>
+                                    <field name="uncertainty_model" recommended="true"/>
+                                    <field name="delta_time" type="NX_FLOAT" optional="true" units="NX_TIME"/>
+                                </group>
+                                <group name="c_3_2_a" type="NXaberration" recommended="true">
+                                    <field name="magnitude" type="NX_FLOAT" units="NX_LENGTH"/>
+                                    <field name="uncertainty" type="NX_FLOAT" recommended="true" units="NX_LENGTH"/>
+                                    <field name="uncertainty_model" recommended="true"/>
+                                    <field name="delta_time" type="NX_FLOAT" optional="true" units="NX_TIME"/>
+                                </group>
+                                <group name="c_3_2_b" type="NXaberration" recommended="true">
+                                    <field name="magnitude" type="NX_FLOAT" units="NX_LENGTH"/>
+                                    <field name="uncertainty" type="NX_FLOAT" recommended="true" units="NX_LENGTH"/>
+                                    <field name="uncertainty_model" recommended="true"/>
+                                    <field name="delta_time" type="NX_FLOAT" optional="true" units="NX_TIME"/>
+                                </group>
+                                <group name="c_3_4_a" type="NXaberration" recommended="true">
+                                    <field name="magnitude" type="NX_FLOAT" units="NX_LENGTH"/>
+                                    <field name="uncertainty" type="NX_FLOAT" recommended="true" units="NX_LENGTH"/>
+                                    <field name="uncertainty_model" recommended="true"/>
+                                    <field name="delta_time" type="NX_FLOAT" optional="true" units="NX_TIME"/>
+                                </group>
+                                <group name="c_3_4_b" type="NXaberration" recommended="true">
+                                    <field name="magnitude" type="NX_FLOAT" units="NX_LENGTH"/>
+                                    <field name="uncertainty" type="NX_FLOAT" recommended="true" units="NX_LENGTH"/>
+                                    <field name="uncertainty_model" recommended="true"/>
+                                    <field name="delta_time" type="NX_FLOAT" optional="true" units="NX_TIME"/>
+                                </group>
+                                <group name="c_4_1_a" type="NXaberration" recommended="true">
+                                    <field name="magnitude" type="NX_FLOAT" units="NX_LENGTH"/>
+                                    <field name="uncertainty" type="NX_FLOAT" recommended="true" units="NX_LENGTH"/>
+                                    <field name="uncertainty_model" recommended="true"/>
+                                    <field name="delta_time" type="NX_FLOAT" optional="true" units="NX_TIME"/>
+                                </group>
+                                <group name="c_4_1_b" type="NXaberration" recommended="true">
+                                    <field name="magnitude" type="NX_FLOAT" units="NX_LENGTH"/>
+                                    <field name="uncertainty" type="NX_FLOAT" recommended="true" units="NX_LENGTH"/>
+                                    <field name="uncertainty_model" recommended="true"/>
+                                    <field name="delta_time" type="NX_FLOAT" optional="true" units="NX_TIME"/>
+                                </group>
+                                <group name="c_4_3_a" type="NXaberration" recommended="true">
+                                    <field name="magnitude" type="NX_FLOAT" units="NX_LENGTH"/>
+                                    <field name="uncertainty" type="NX_FLOAT" recommended="true" units="NX_LENGTH"/>
+                                    <field name="uncertainty_model" recommended="true"/>
+                                    <field name="delta_time" type="NX_FLOAT" optional="true" units="NX_TIME"/>
+                                </group>
+                                <group name="c_4_3_b" type="NXaberration" recommended="true">
+                                    <field name="magnitude" type="NX_FLOAT" units="NX_LENGTH"/>
+                                    <field name="uncertainty" type="NX_FLOAT" recommended="true" units="NX_LENGTH"/>
+                                    <field name="uncertainty_model" recommended="true"/>
+                                    <field name="delta_time" type="NX_FLOAT" optional="true" units="NX_TIME"/>
+                                </group>
+                                <group name="c_4_5_a" type="NXaberration" recommended="true">
+                                    <field name="magnitude" type="NX_FLOAT" units="NX_LENGTH"/>
+                                    <field name="uncertainty" type="NX_FLOAT" recommended="true" units="NX_LENGTH"/>
+                                    <field name="uncertainty_model" recommended="true"/>
+                                    <field name="delta_time" type="NX_FLOAT" optional="true" units="NX_TIME"/>
+                                </group>
+                                <group name="c_4_5_b" type="NXaberration" recommended="true">
+                                    <field name="magnitude" type="NX_FLOAT" units="NX_LENGTH"/>
+                                    <field name="uncertainty" type="NX_FLOAT" recommended="true" units="NX_LENGTH"/>
+                                    <field name="uncertainty_model" recommended="true"/>
+                                    <field name="delta_time" type="NX_FLOAT" optional="true" units="NX_TIME"/>
+                                </group>
+                                <group name="c_5_0" type="NXaberration" recommended="true">
+                                    <field name="magnitude" type="NX_FLOAT" units="NX_LENGTH"/>
+                                    <field name="uncertainty" type="NX_FLOAT" recommended="true" units="NX_LENGTH"/>
+                                    <field name="uncertainty_model" recommended="true"/>
+                                    <field name="delta_time" type="NX_FLOAT" optional="true" units="NX_TIME"/>
+                                </group>
+                                <group name="c_5_2_a" type="NXaberration" optional="true">
+                                    <field name="magnitude" type="NX_FLOAT" units="NX_LENGTH"/>
+                                    <field name="uncertainty" type="NX_FLOAT" recommended="true" units="NX_LENGTH"/>
+                                    <field name="uncertainty_model" recommended="true"/>
+                                    <field name="delta_time" type="NX_FLOAT" optional="true" units="NX_TIME"/>
+                                </group>
+                                <group name="c_5_2_b" type="NXaberration" optional="true">
+                                    <field name="magnitude" type="NX_FLOAT" units="NX_LENGTH"/>
+                                    <field name="uncertainty" type="NX_FLOAT" recommended="true" units="NX_LENGTH"/>
+                                    <field name="uncertainty_model" recommended="true"/>
+                                    <field name="delta_time" type="NX_FLOAT" optional="true" units="NX_TIME"/>
+                                </group>
+                                <group name="c_5_4_a" type="NXaberration" optional="true">
+                                    <field name="magnitude" type="NX_FLOAT" units="NX_LENGTH"/>
+                                    <field name="uncertainty" type="NX_FLOAT" recommended="true" units="NX_LENGTH"/>
+                                    <field name="uncertainty_model" recommended="true"/>
+                                    <field name="delta_time" type="NX_FLOAT" optional="true" units="NX_TIME"/>
+                                </group>
+                                <group name="c_5_4_b" type="NXaberration" optional="true">
+                                    <field name="magnitude" type="NX_FLOAT" units="NX_LENGTH"/>
+                                    <field name="uncertainty" type="NX_FLOAT" recommended="true" units="NX_LENGTH"/>
+                                    <field name="uncertainty_model" recommended="true"/>
+                                    <field name="delta_time" type="NX_FLOAT" optional="true" units="NX_TIME"/>
+                                </group>
+                                <group name="c_5_6_a" type="NXaberration" optional="true">
+                                    <field name="magnitude" type="NX_FLOAT" units="NX_LENGTH"/>
+                                    <field name="uncertainty" type="NX_FLOAT" recommended="true" units="NX_LENGTH"/>
+                                    <field name="uncertainty_model" recommended="true"/>
+                                    <field name="delta_time" type="NX_FLOAT" optional="true" units="NX_TIME"/>
+                                </group>
+                                <group name="c_5_6_b" type="NXaberration" optional="true">
+                                    <field name="magnitude" type="NX_FLOAT" units="NX_LENGTH"/>
+                                    <field name="uncertainty" type="NX_FLOAT" recommended="true" units="NX_LENGTH"/>
+                                    <field name="uncertainty_model" recommended="true"/>
+                                    <field name="delta_time" type="NX_FLOAT" optional="true" units="NX_TIME"/>
+                                </group>
+                            </group>
+                        </group>
+                    </group>
+                </group>
+            </group>
+            <group name="EBEAM_DEFLECTOR" type="NXscanbox_em" minOccurs="0" maxOccurs="unbounded">
+                <group type="NXfabrication" recommended="true">
+                    <field name="vendor" recommended="true"/>
+                    <field name="model" recommended="true"/>
+                    <field name="identifier" recommended="true"/>
+                </group>
+                <field name="pixel_time" type="NX_FLOAT" recommended="true"/>
+            </group>
+            <group name="IBEAM_DEFLECTOR" type="NXscanbox_em" minOccurs="0" maxOccurs="unbounded">
+                <group type="NXfabrication" recommended="true">
+                    <field name="vendor" recommended="true"/>
+                    <field name="model" recommended="true"/>
+                    <field name="identifier" recommended="true"/>
+                </group>
+            </group>
+            <group type="NXoptical_system_em" recommended="true">
+                <field name="camera_length" type="NX_NUMBER" optional="true"/>
+                <field name="magnification" type="NX_NUMBER" optional="true"/>
+                <field name="defocus" type="NX_NUMBER" recommended="true"/>
+                <!--this is c_1_0 of aberration_correction-->
+                <field name="semi_convergence_angle" type="NX_NUMBER" recommended="true"/>
+                <field name="working_distance" type="NX_FLOAT" recommended="true"/>
+                <field name="beam_current" type="NX_FLOAT" recommended="true"/>
+                <field name="beam_current_description" recommended="true"/>
+            </group>
+            <!--vendor/instrument-specific data currently case-by-case dependent
+e.g. Nion Co. magboard settings
+instances of NXoptical system can be placed here and specific for
+each NXevent_data_em instance if desired-->
+            <!--##MK::eventually only adding (NXfabrication) and the new docstring
+is needed, will the rest be inferred automatically?-->
+            <group name="DETECTOR" type="NXdetector" minOccurs="1" maxOccurs="unbounded">
+                <doc>
+                     Description of the type of the detector.
+
+                     Electron microscopes have typically multiple detectors.
+                     Different technologies are in use like CCD, scintillator,
+                     direct electron, CMOS, or image plate to name but a few.
+                </doc>
+                <field name="local_name">
+                    <doc>
+                         Instrument-specific alias/name
+                    </doc>
+                </field>
+                <!--it is unfortunate that for NXdetector there are already many places
+how one can specify details which could equally end up in fabrications
+we should give better guidance which option to use-->
+                <group type="NXfabrication" recommended="true">
+                    <field name="identifier" recommended="true"/>
+                </group>
+            </group>
+            <group type="NXpump" minOccurs="0" maxOccurs="unbounded"/>
+            <!--NEW ISSUE: do we consider the EELS spectrometer an own detector or an own functional unit i.e. NXeels-->
+            <group name="stage_lab" type="NXstage_lab">
+                <field name="name"/>
+                <group type="NXfabrication" recommended="true">
+                    <field name="vendor" recommended="true"/>
+                    <field name="model" recommended="true"/>
+                    <field name="identifier" recommended="true"/>
+                    <field name="capabilities" optional="true"/>
+                </group>
+                <field name="design" recommended="true"/>
+                <field name="description" optional="true"/>
+                <!--tricky for arbitrary design we cannot enforce all the below to exist at all-->
+                <field name="position" type="NX_FLOAT" recommended="true"/>
+                <field name="rotation" type="NX_FLOAT" recommended="true"/>
+                <field name="tilt_1" type="NX_FLOAT" recommended="true"/>
+                <field name="tilt_2" type="NX_FLOAT" recommended="true"/>
+            </group>
+        </group>
+        <group name="measurement" type="NXevent_data_em_set" minOccurs="0" maxOccurs="1">
+            <doc>
+                 A container for storing a set of NXevent_data_em instances.
+
+            </doc>
+        </group>
+    </group>
+</definition>
\ No newline at end of file
diff --git a/tests/data/nexus/Ref_nexus_test.log b/tests/data/nexus/Ref_nexus_test.log
index 0b9f8bebd..ec7214cc4 100644
--- a/tests/data/nexus/Ref_nexus_test.log
+++ b/tests/data/nexus/Ref_nexus_test.log
@@ -8,12 +8,13 @@ DEBUG - documentation (NXarpes.nxdl.xml:/ENTRY):
 DEBUG - 
 DEBUG - documentation (NXentry.nxdl.xml:):
 DEBUG - 
-		(**required**) :ref:`NXentry` describes the measurement.
-
-		The top-level NeXus group which contains all the data and associated
-		information that comprise a single measurement.
-		It is mandatory that there is at least one
-		group of this type in the NeXus file.	
+         (**required**) :ref:`NXentry` describes the measurement.
+         
+         The top-level NeXus group which contains all the data and associated
+         information that comprise a single measurement.
+         It is mandatory that there is at least one
+         group of this type in the NeXus file.
+    
 DEBUG - ===== ATTRS (//entry@NX_class)
 DEBUG - value: NXentry 
 DEBUG - classpath: ['NXentry']
@@ -30,9 +31,9 @@ NXentry.nxdl.xml:/collection_time
 DEBUG - <<OPTIONAL>>
 DEBUG - documentation (NXentry.nxdl.xml:/collection_time):
 DEBUG - 
-			Time transpired actually collecting data i.e. taking out time when collection was
-			suspended due to e.g. temperature out of range
-		
+             Time transpired actually collecting data i.e. taking out time when collection was
+             suspended due to e.g. temperature out of range
+        
 DEBUG - ===== ATTRS (//entry/collection_time@units)
 DEBUG - value: s 
 DEBUG - classpath: ['NXentry', 'NX_FLOAT']
@@ -50,34 +51,33 @@ DEBUG - documentation (NXarpes.nxdl.xml:/ENTRY/DATA):
 DEBUG - 
 DEBUG - documentation (NXentry.nxdl.xml:/DATA):
 DEBUG - 
-			The data group
-
-			.. note:: Before the NIAC2016 meeting [#]_, at least one
-			   :ref:`NXdata` group was required in each :ref:`NXentry` group.
-			   At the NIAC2016 meeting, it was decided to make :ref:`NXdata`
-			   an optional group in :ref:`NXentry` groups for data files that
-			   do not use an application definition.
-			   It is recommended strongly that all NeXus data files provide
-			   a NXdata group.
-			   It is permissable to omit the NXdata group only when
-			   defining the default plot is not practical or possible
-			   from the available data.
-
-			   For example, neutron event data may not have anything that
-			   makes a useful plot without extensive processing.
-
-			   Certain application definitions override this decision and
-			   require an :ref:`NXdata` group
-			   in the :ref:`NXentry` group.  The ``minOccurs=0`` attribute
-			   in the application definition will indicate the
-			   :ref:`NXdata` group
-			   is optional, otherwise, it is required.
-
-			   .. [#] NIAC2016:
-			      https://www.nexusformat.org/NIAC2016.html,
-			      https://github.com/nexusformat/NIAC/issues/16
-
-		
+             The data group
+             
+             .. note:: Before the NIAC2016 meeting [#]_, at least one
+                :ref:`NXdata` group was required in each :ref:`NXentry` group.
+                At the NIAC2016 meeting, it was decided to make :ref:`NXdata`
+                an optional group in :ref:`NXentry` groups for data files that
+                do not use an application definition.
+                It is recommended strongly that all NeXus data files provide
+                a NXdata group.
+                It is permissable to omit the NXdata group only when
+                defining the default plot is not practical or possible
+                from the available data.
+             
+                For example, neutron event data may not have anything that
+                makes a useful plot without extensive processing.
+             
+                Certain application definitions override this decision and
+                require an :ref:`NXdata` group
+                in the :ref:`NXentry` group.  The ``minOccurs=0`` attribute
+                in the application definition will indicate the
+                :ref:`NXdata` group
+                is optional, otherwise, it is required.
+             
+                .. [#] NIAC2016:
+             	  https://www.nexusformat.org/NIAC2016.html,
+             	  https://github.com/nexusformat/NIAC/issues/16
+        
 DEBUG - documentation (NXdata.nxdl.xml:):
 DEBUG - 
 		:ref:`NXdata` describes the plottable data and related dimension scales. 
@@ -395,21 +395,21 @@ DEBUG - documentation (NXarpes.nxdl.xml:/ENTRY/definition):
 DEBUG - Official NeXus NXDL schema to which this file conforms.
 DEBUG - documentation (NXentry.nxdl.xml:/definition):
 DEBUG - 
-			(alternate use: see same field in :ref:`NXsubentry` for preferred)
-			
-			Official NeXus NXDL schema to which this entry conforms which must be
-			the name of the NXDL file (case sensitive without the file extension)
-			that the NXDL schema is defined in. 
-			
-			For example the ``definition`` field for a file that conformed to the 
-			*NXarpes.nxdl.xml* definition must contain the string **NXarpes**.
-			
-			This field is provided so that :ref:`NXentry` can be the overlay position
-			in a NeXus data file for an application definition and its
-			set of groups, fields, and attributes.
-
-			*It is advised* to use :ref:`NXsubentry`, instead, as the overlay position.
-		
+             (alternate use: see same field in :ref:`NXsubentry` for preferred)
+             
+             Official NeXus NXDL schema to which this entry conforms which must be
+             the name of the NXDL file (case sensitive without the file extension)
+             that the NXDL schema is defined in.
+             
+             For example the ``definition`` field for a file that conformed to the
+             *NXarpes.nxdl.xml* definition must contain the string **NXarpes**.
+             
+             This field is provided so that :ref:`NXentry` can be the overlay position
+             in a NeXus data file for an application definition and its
+             set of groups, fields, and attributes.
+             
+             *It is advised* to use :ref:`NXsubentry`, instead, as the overlay position.
+        
 DEBUG - ===== FIELD (//entry/duration): <HDF5 dataset "duration": shape (), type "<i8">
 DEBUG - value: 7200 
 DEBUG - classpath: ['NXentry', 'NX_INT']
@@ -417,7 +417,9 @@ DEBUG - classes:
 NXentry.nxdl.xml:/duration
 DEBUG - <<OPTIONAL>>
 DEBUG - documentation (NXentry.nxdl.xml:/duration):
-DEBUG - Duration of measurement
+DEBUG - 
+             Duration of measurement
+        
 DEBUG - ===== ATTRS (//entry/duration@units)
 DEBUG - value: s 
 DEBUG - classpath: ['NXentry', 'NX_INT']
@@ -431,7 +433,9 @@ DEBUG - classes:
 NXentry.nxdl.xml:/end_time
 DEBUG - <<OPTIONAL>>
 DEBUG - documentation (NXentry.nxdl.xml:/end_time):
-DEBUG - Ending time of measurement
+DEBUG - 
+             Ending time of measurement
+        
 DEBUG - ===== FIELD (//entry/entry_identifier): <HDF5 dataset "entry_identifier": shape (), type "|O">
 DEBUG - value: b'Run 22118' 
 DEBUG - classpath: ['NXentry', 'NX_CHAR']
@@ -439,7 +443,9 @@ DEBUG - classes:
 NXentry.nxdl.xml:/entry_identifier
 DEBUG - <<OPTIONAL>>
 DEBUG - documentation (NXentry.nxdl.xml:/entry_identifier):
-DEBUG - unique identifier for the measurement, defined by the facility.
+DEBUG - 
+             unique identifier for the measurement, defined by the facility.
+        
 DEBUG - ===== FIELD (//entry/experiment_identifier): <HDF5 dataset "experiment_identifier": shape (), type "|O">
 DEBUG - value: b'F-20170538' 
 DEBUG - classpath: ['NXentry', 'NX_CHAR']
@@ -448,10 +454,10 @@ NXentry.nxdl.xml:/experiment_identifier
 DEBUG - <<OPTIONAL>>
 DEBUG - documentation (NXentry.nxdl.xml:/experiment_identifier):
 DEBUG - 
-			Unique identifier for the experiment,
-			defined by the facility,
-			possibly linked to the proposals
-		
+             Unique identifier for the experiment,
+             defined by the facility,
+             possibly linked to the proposals
+        
 DEBUG - ===== GROUP (//entry/instrument [NXarpes::/NXentry/NXinstrument]): <HDF5 group "/entry/instrument" (11 members)>
 DEBUG - classpath: ['NXentry', 'NXinstrument']
 DEBUG - classes:
@@ -465,15 +471,15 @@ DEBUG - documentation (NXentry.nxdl.xml:/INSTRUMENT):
 DEBUG - 
 DEBUG - documentation (NXinstrument.nxdl.xml:):
 DEBUG - 
-		Collection of the components of the instrument or beamline.
-		
-		Template of instrument descriptions comprising various beamline components. 
-		Each component will also be a NeXus group defined by its distance from the 
-		sample. Negative distances represent beamline components that are before the 
-		sample while positive distances represent components that are after the sample. 
-		This device allows the unique identification of beamline components in a way 
-		that is valid for both reactor and pulsed instrumentation.
-	
+         Collection of the components of the instrument or beamline.
+         
+         Template of instrument descriptions comprising various beamline components.
+         Each component will also be a NeXus group defined by its distance from the
+         sample. Negative distances represent beamline components that are before the
+         sample while positive distances represent components that are after the sample.
+         This device allows the unique identification of beamline components in a way
+         that is valid for both reactor and pulsed instrumentation.
+    
 DEBUG - ===== ATTRS (//entry/instrument@NX_class)
 DEBUG - value: NXinstrument 
 DEBUG - classpath: ['NXentry', 'NXinstrument']
@@ -496,8 +502,8 @@ DEBUG - documentation (NXinstrument.nxdl.xml:/DETECTOR):
 DEBUG - 
 DEBUG - documentation (NXdetector.nxdl.xml:):
 DEBUG - 
-    A detector, detector bank, or multidetector.
-  
+         A detector, detector bank, or multidetector.
+    
 DEBUG - ===== ATTRS (//entry/instrument/analyser@NX_class)
 DEBUG - value: NXdetector 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXdetector']
@@ -527,12 +533,19 @@ DEBUG - -> decimated
 DEBUG - documentation (NXarpes.nxdl.xml:/ENTRY/INSTRUMENT/analyser/acquisition_mode):
 DEBUG - 
 DEBUG - documentation (NXdetector.nxdl.xml:/acquisition_mode):
-DEBUG - The acquisition mode of the detector.
+DEBUG - 
+             The acquisition mode of the detector.
+        
 DEBUG - ===== FIELD (//entry/instrument/analyser/amplifier_type): <HDF5 dataset "amplifier_type": shape (), type "|O">
 DEBUG - value: b'MCP' 
-DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXdetector']
-DEBUG - NOT IN SCHEMA
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXdetector', 'NX_CHAR']
+DEBUG - classes:
+NXdetector.nxdl.xml:/amplifier_type
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXdetector.nxdl.xml:/amplifier_type):
 DEBUG - 
+             Type of electron amplifier, MCP, channeltron, etc.
+        
 DEBUG - ===== FIELD (//entry/instrument/analyser/angles): <HDF5 dataset "angles": shape (80,), type "<f8">
 DEBUG - value: [-1.96735314 -1.91500657 -1.86266001 -1.81031344 -1.75796688 -1.70562031 ...
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXdetector', 'NX_NUMBER']
@@ -574,29 +587,29 @@ DEBUG - documentation (NXarpes.nxdl.xml:/ENTRY/INSTRUMENT/analyser/data):
 DEBUG - 
 DEBUG - documentation (NXdetector.nxdl.xml:/data):
 DEBUG - 
-      Data values from the detector. The rank and dimension ordering should follow a principle of
-      slowest to fastest measurement axes and may be explicitly specified in application definitions.
-      
-      Mechanical scanning of objects (e.g. sample position/angle, incident beam energy, etc) tends to be
-      the slowest part of an experiment and so any such scan axes should be allocated to the first dimensions
-      of the array. Note that in some cases it may be useful to represent a 2D set of scan points as a single
-      scan-axis in the data array, especially if the scan pattern doesn't fit a rectangular array nicely.
-      Repetition of an experiment in a time series tends to be used similar to a slow scan axis
-      and so will often be in the first dimension of the data array.
-      
-      The next fastest axes are typically the readout of the detector. A point detector will not add any dimensions
-      (as it is just a single value per scan point) to the data array, a strip detector will add one dimension, an 
-      imaging detector will add two dimensions (e.g. X, Y axes) and detectors outputting higher dimensional data 
-      will add the corresponding number of dimensions. Note that the detector dimensions don't necessarily have to
-      be written in order of the actual readout speeds - the slowest to fastest rule principle is only a guide.
-      
-      Finally, detectors that operate in a time-of-flight mode, such as a neutron spectrometer or a silicon drift 
-      detector (used for X-ray fluorescence) tend to have their dimension(s) added to the last dimensions in the data array.
-      
-      The type of each dimension should should follow the order of scan points, detector pixels, 
-      then time-of-flight (i.e. spectroscopy, spectrometry). The rank and dimension sizes (see symbol list) 
-      shown here are merely illustrative of coordination between related datasets.
-    
+             Data values from the detector. The rank and dimension ordering should follow a principle of
+             slowest to fastest measurement axes and may be explicitly specified in application definitions.
+             
+             Mechanical scanning of objects (e.g. sample position/angle, incident beam energy, etc) tends to be
+             the slowest part of an experiment and so any such scan axes should be allocated to the first dimensions
+             of the array. Note that in some cases it may be useful to represent a 2D set of scan points as a single
+             scan-axis in the data array, especially if the scan pattern doesn't fit a rectangular array nicely.
+             Repetition of an experiment in a time series tends to be used similar to a slow scan axis
+             and so will often be in the first dimension of the data array.
+             
+             The next fastest axes are typically the readout of the detector. A point detector will not add any dimensions
+             (as it is just a single value per scan point) to the data array, a strip detector will add one dimension, an
+             imaging detector will add two dimensions (e.g. X, Y axes) and detectors outputting higher dimensional data
+             will add the corresponding number of dimensions. Note that the detector dimensions don't necessarily have to
+             be written in order of the actual readout speeds - the slowest to fastest rule principle is only a guide.
+             
+             Finally, detectors that operate in a time-of-flight mode, such as a neutron spectrometer or a silicon drift
+             detector (used for X-ray fluorescence) tend to have their dimension(s) added to the last dimensions in the data array.
+             
+             The type of each dimension should should follow the order of scan points, detector pixels,
+             then time-of-flight (i.e. spectroscopy, spectrometry). The rank and dimension sizes (see symbol list)
+             shown here are merely illustrative of coordination between related datasets.
+        
 DEBUG - ===== ATTRS (//entry/instrument/analyser/data@target)
 DEBUG - value: /entry/instrument/analyser/data 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXdetector', 'NX_NUMBER']
@@ -630,9 +643,14 @@ DEBUG - NOT IN SCHEMA
 DEBUG - 
 DEBUG - ===== FIELD (//entry/instrument/analyser/detector_type): <HDF5 dataset "detector_type": shape (), type "|O">
 DEBUG - value: b'DLD' 
-DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXdetector']
-DEBUG - NOT IN SCHEMA
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXdetector', 'NX_CHAR']
+DEBUG - classes:
+NXdetector.nxdl.xml:/detector_type
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXdetector.nxdl.xml:/detector_type):
 DEBUG - 
+             Description of the detector type, DLD, Phosphor+CCD, CMOS.
+        
 DEBUG - ===== FIELD (//entry/instrument/analyser/dispersion_scheme): <HDF5 dataset "dispersion_scheme": shape (), type "|O">
 DEBUG - value: b'Time of flight' 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXdetector']
@@ -776,9 +794,14 @@ DEBUG - documentation (NXarpes.nxdl.xml:/ENTRY/INSTRUMENT/analyser/region_size):
 DEBUG - size of rectangular region selected for readout
 DEBUG - ===== FIELD (//entry/instrument/analyser/sensor_count): <HDF5 dataset "sensor_count": shape (), type "<i8">
 DEBUG - value: 4 
-DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXdetector']
-DEBUG - NOT IN SCHEMA
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXdetector', 'NX_INT']
+DEBUG - classes:
+NXdetector.nxdl.xml:/sensor_count
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXdetector.nxdl.xml:/sensor_count):
 DEBUG - 
+             Number of imaging sensor chips on the detector.
+        
 DEBUG - ===== FIELD (//entry/instrument/analyser/sensor_size): <HDF5 dataset "sensor_size": shape (2,), type "<i8">
 DEBUG - value: [ 80 146] 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXdetector', 'NX_INT']
@@ -821,22 +844,22 @@ DEBUG - documentation (NXinstrument.nxdl.xml:/BEAM):
 DEBUG - 
 DEBUG - documentation (NXbeam.nxdl.xml:):
 DEBUG - 
-        Properties of the neutron or X-ray beam at a given location. 
-
-        This group is intended to be referenced
-        by beamline component groups within the :ref:`NXinstrument` group or by the :ref:`NXsample` group. This group is
-        especially valuable in storing the results of instrument simulations in which it is useful
-        to specify the beam profile, time distribution etc. at each beamline component. Otherwise,
-        its most likely use is in the :ref:`NXsample` group in which it defines the results of the neutron
-        scattering by the sample, e.g., energy transfer, polarizations. Finally, There are cases where the beam is
-        considered as a beamline component and this group may be defined as a subgroup directly inside
-        :ref:`NXinstrument`, in which case it is recommended that the position of the beam is specified by an 
-        :ref:`NXtransformations` group, unless the beam is at the origin (which is the sample).
-
-        Note that incident_wavelength and related fields can be a scalar values or arrays, depending on the use case.
-        To support these use cases, the explicit dimensionality of these fields is not specified, but it can be inferred
-        by the presense of and shape of accompanying fields, such as incident_wavelength_weights for a polychromatic beam.
-        
+         Properties of the neutron or X-ray beam at a given location.
+         
+         This group is intended to be referenced
+         by beamline component groups within the :ref:`NXinstrument` group or by the :ref:`NXsample` group. This group is
+         especially valuable in storing the results of instrument simulations in which it is useful
+         to specify the beam profile, time distribution etc. at each beamline component. Otherwise,
+         its most likely use is in the :ref:`NXsample` group in which it defines the results of the neutron
+         scattering by the sample, e.g., energy transfer, polarizations. Finally, There are cases where the beam is
+         considered as a beamline component and this group may be defined as a subgroup directly inside
+         :ref:`NXinstrument`, in which case it is recommended that the position of the beam is specified by an
+         :ref:`NXtransformations` group, unless the beam is at the origin (which is the sample).
+         
+         Note that incident_wavelength and related fields can be a scalar values or arrays, depending on the use case.
+         To support these use cases, the explicit dimensionality of these fields is not specified, but it can be inferred
+         by the presense of and shape of accompanying fields, such as incident_wavelength_weights for a polychromatic beam.
+    
 DEBUG - ===== ATTRS (//entry/instrument/beam_probe_0@NX_class)
 DEBUG - value: NXbeam 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam']
@@ -852,7 +875,9 @@ DEBUG - classes:
 NXbeam.nxdl.xml:/distance
 DEBUG - <<OPTIONAL>>
 DEBUG - documentation (NXbeam.nxdl.xml:/distance):
-DEBUG - Distance from sample. Note, it is recommended to use NXtransformations instead.
+DEBUG - 
+             Distance from sample. Note, it is recommended to use NXtransformations instead.
+        
 DEBUG - ===== ATTRS (//entry/instrument/beam_probe_0/distance@units)
 DEBUG - value: cm 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam', 'NX_FLOAT']
@@ -886,14 +911,20 @@ DEBUG - NOT IN SCHEMA
 DEBUG - 
 DEBUG - ===== FIELD (//entry/instrument/beam_probe_0/pulse_duration): <HDF5 dataset "pulse_duration": shape (), type "<i8">
 DEBUG - value: 70 
-DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam']
-DEBUG - NOT IN SCHEMA
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam', 'NX_FLOAT']
+DEBUG - classes:
+NXbeam.nxdl.xml:/pulse_duration
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXbeam.nxdl.xml:/pulse_duration):
 DEBUG - 
+             FWHM duration of the pulses at the diagnostic point
+        
 DEBUG - ===== ATTRS (//entry/instrument/beam_probe_0/pulse_duration@units)
 DEBUG - value: fs 
-DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam']
-DEBUG - NOT IN SCHEMA
-DEBUG - 
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam', 'NX_FLOAT']
+DEBUG - classes:
+NXbeam.nxdl.xml:/pulse_duration
+DEBUG - NXbeam.nxdl.xml:/pulse_duration@units [NX_TIME]
 DEBUG - ===== FIELD (//entry/instrument/beam_probe_0/size_x): <HDF5 dataset "size_x": shape (), type "<i8">
 DEBUG - value: 500 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam']
@@ -924,22 +955,22 @@ DEBUG - documentation (NXinstrument.nxdl.xml:/BEAM):
 DEBUG - 
 DEBUG - documentation (NXbeam.nxdl.xml:):
 DEBUG - 
-        Properties of the neutron or X-ray beam at a given location. 
-
-        This group is intended to be referenced
-        by beamline component groups within the :ref:`NXinstrument` group or by the :ref:`NXsample` group. This group is
-        especially valuable in storing the results of instrument simulations in which it is useful
-        to specify the beam profile, time distribution etc. at each beamline component. Otherwise,
-        its most likely use is in the :ref:`NXsample` group in which it defines the results of the neutron
-        scattering by the sample, e.g., energy transfer, polarizations. Finally, There are cases where the beam is
-        considered as a beamline component and this group may be defined as a subgroup directly inside
-        :ref:`NXinstrument`, in which case it is recommended that the position of the beam is specified by an 
-        :ref:`NXtransformations` group, unless the beam is at the origin (which is the sample).
-
-        Note that incident_wavelength and related fields can be a scalar values or arrays, depending on the use case.
-        To support these use cases, the explicit dimensionality of these fields is not specified, but it can be inferred
-        by the presense of and shape of accompanying fields, such as incident_wavelength_weights for a polychromatic beam.
-        
+         Properties of the neutron or X-ray beam at a given location.
+         
+         This group is intended to be referenced
+         by beamline component groups within the :ref:`NXinstrument` group or by the :ref:`NXsample` group. This group is
+         especially valuable in storing the results of instrument simulations in which it is useful
+         to specify the beam profile, time distribution etc. at each beamline component. Otherwise,
+         its most likely use is in the :ref:`NXsample` group in which it defines the results of the neutron
+         scattering by the sample, e.g., energy transfer, polarizations. Finally, There are cases where the beam is
+         considered as a beamline component and this group may be defined as a subgroup directly inside
+         :ref:`NXinstrument`, in which case it is recommended that the position of the beam is specified by an
+         :ref:`NXtransformations` group, unless the beam is at the origin (which is the sample).
+         
+         Note that incident_wavelength and related fields can be a scalar values or arrays, depending on the use case.
+         To support these use cases, the explicit dimensionality of these fields is not specified, but it can be inferred
+         by the presense of and shape of accompanying fields, such as incident_wavelength_weights for a polychromatic beam.
+    
 DEBUG - ===== ATTRS (//entry/instrument/beam_pump_0@NX_class)
 DEBUG - value: NXbeam 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam']
@@ -950,14 +981,20 @@ DEBUG - @NX_class [NX_CHAR]
 DEBUG - 
 DEBUG - ===== FIELD (//entry/instrument/beam_pump_0/average_power): <HDF5 dataset "average_power": shape (), type "<f8">
 DEBUG - value: 6.21289 
-DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam']
-DEBUG - NOT IN SCHEMA
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam', 'NX_FLOAT']
+DEBUG - classes:
+NXbeam.nxdl.xml:/average_power
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXbeam.nxdl.xml:/average_power):
 DEBUG - 
+             Average power at the diagnostic point
+        
 DEBUG - ===== ATTRS (//entry/instrument/beam_pump_0/average_power@units)
 DEBUG - value: uW 
-DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam']
-DEBUG - NOT IN SCHEMA
-DEBUG - 
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam', 'NX_FLOAT']
+DEBUG - classes:
+NXbeam.nxdl.xml:/average_power
+DEBUG - NXbeam.nxdl.xml:/average_power@units [NX_POWER]
 DEBUG - ===== FIELD (//entry/instrument/beam_pump_0/center_wavelength): <HDF5 dataset "center_wavelength": shape (), type "<i8">
 DEBUG - value: 800 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam']
@@ -975,7 +1012,9 @@ DEBUG - classes:
 NXbeam.nxdl.xml:/distance
 DEBUG - <<OPTIONAL>>
 DEBUG - documentation (NXbeam.nxdl.xml:/distance):
-DEBUG - Distance from sample. Note, it is recommended to use NXtransformations instead.
+DEBUG - 
+             Distance from sample. Note, it is recommended to use NXtransformations instead.
+        
 DEBUG - ===== ATTRS (//entry/instrument/beam_pump_0/distance@units)
 DEBUG - value: cm 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam', 'NX_FLOAT']
@@ -984,14 +1023,20 @@ NXbeam.nxdl.xml:/distance
 DEBUG - NXbeam.nxdl.xml:/distance@units [NX_LENGTH]
 DEBUG - ===== FIELD (//entry/instrument/beam_pump_0/fluence): <HDF5 dataset "fluence": shape (), type "<i8">
 DEBUG - value: 5 
-DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam']
-DEBUG - NOT IN SCHEMA
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam', 'NX_FLOAT']
+DEBUG - classes:
+NXbeam.nxdl.xml:/fluence
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXbeam.nxdl.xml:/fluence):
 DEBUG - 
+             Incident fluence at the diagnostic point
+        
 DEBUG - ===== ATTRS (//entry/instrument/beam_pump_0/fluence@units)
 DEBUG - value: mJ/cm^2 
-DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam']
-DEBUG - NOT IN SCHEMA
-DEBUG - 
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam', 'NX_FLOAT']
+DEBUG - classes:
+NXbeam.nxdl.xml:/fluence
+DEBUG - NXbeam.nxdl.xml:/fluence@units [NX_ANY]
 DEBUG - ===== FIELD (//entry/instrument/beam_pump_0/photon_energy): <HDF5 dataset "photon_energy": shape (), type "<f8">
 DEBUG - value: 1.55 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam']
@@ -1019,24 +1064,36 @@ DEBUG - NOT IN SCHEMA
 DEBUG - 
 DEBUG - ===== FIELD (//entry/instrument/beam_pump_0/pulse_duration): <HDF5 dataset "pulse_duration": shape (), type "<i8">
 DEBUG - value: 50 
-DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam']
-DEBUG - NOT IN SCHEMA
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam', 'NX_FLOAT']
+DEBUG - classes:
+NXbeam.nxdl.xml:/pulse_duration
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXbeam.nxdl.xml:/pulse_duration):
 DEBUG - 
+             FWHM duration of the pulses at the diagnostic point
+        
 DEBUG - ===== ATTRS (//entry/instrument/beam_pump_0/pulse_duration@units)
 DEBUG - value: fs 
-DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam']
-DEBUG - NOT IN SCHEMA
-DEBUG - 
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam', 'NX_FLOAT']
+DEBUG - classes:
+NXbeam.nxdl.xml:/pulse_duration
+DEBUG - NXbeam.nxdl.xml:/pulse_duration@units [NX_TIME]
 DEBUG - ===== FIELD (//entry/instrument/beam_pump_0/pulse_energy): <HDF5 dataset "pulse_energy": shape (), type "<f8">
 DEBUG - value: 1.24258 
-DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam']
-DEBUG - NOT IN SCHEMA
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam', 'NX_FLOAT']
+DEBUG - classes:
+NXbeam.nxdl.xml:/pulse_energy
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXbeam.nxdl.xml:/pulse_energy):
 DEBUG - 
+             Energy of a single pulse at the diagnostic point
+        
 DEBUG - ===== ATTRS (//entry/instrument/beam_pump_0/pulse_energy@units)
 DEBUG - value: nJ 
-DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam']
-DEBUG - NOT IN SCHEMA
-DEBUG - 
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam', 'NX_FLOAT']
+DEBUG - classes:
+NXbeam.nxdl.xml:/pulse_energy
+DEBUG - NXbeam.nxdl.xml:/pulse_energy@units [NX_ENERGY]
 DEBUG - ===== FIELD (//entry/instrument/beam_pump_0/size_x): <HDF5 dataset "size_x": shape (), type "<i8">
 DEBUG - value: 500 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXbeam']
@@ -1059,14 +1116,20 @@ DEBUG - NOT IN SCHEMA
 DEBUG - 
 DEBUG - ===== FIELD (//entry/instrument/energy_resolution): <HDF5 dataset "energy_resolution": shape (), type "<i8">
 DEBUG - value: 100 
-DEBUG - classpath: ['NXentry', 'NXinstrument']
-DEBUG - NOT IN SCHEMA
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NX_FLOAT']
+DEBUG - classes:
+NXinstrument.nxdl.xml:/energy_resolution
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXinstrument.nxdl.xml:/energy_resolution):
 DEBUG - 
+             Energy resolution of the experiment (FWHM or gaussian broadening)
+        
 DEBUG - ===== ATTRS (//entry/instrument/energy_resolution@units)
 DEBUG - value: meV 
-DEBUG - classpath: ['NXentry', 'NXinstrument']
-DEBUG - NOT IN SCHEMA
-DEBUG - 
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NX_FLOAT']
+DEBUG - classes:
+NXinstrument.nxdl.xml:/energy_resolution
+DEBUG - NXinstrument.nxdl.xml:/energy_resolution@units [NX_ENERGY]
 DEBUG - ===== GROUP (//entry/instrument/manipulator [NXarpes::/NXentry/NXinstrument/NXpositioner]): <HDF5 group "/entry/instrument/manipulator" (9 members)>
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXpositioner']
 DEBUG - classes:
@@ -1272,7 +1335,9 @@ DEBUG - classes:
 NXinstrument.nxdl.xml:/name
 DEBUG - <<OPTIONAL>>
 DEBUG - documentation (NXinstrument.nxdl.xml:/name):
-DEBUG - Name of instrument
+DEBUG - 
+             Name of instrument
+        
 DEBUG - ===== GROUP (//entry/instrument/source [NXarpes::/NXentry/NXinstrument/NXsource]): <HDF5 group "/entry/instrument/source" (16 members)>
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource']
 DEBUG - classes:
@@ -1285,7 +1350,9 @@ DEBUG -
 DEBUG - documentation (NXinstrument.nxdl.xml:/SOURCE):
 DEBUG - 
 DEBUG - documentation (NXsource.nxdl.xml:):
-DEBUG - The neutron or x-ray storage ring/facility.
+DEBUG - 
+         The neutron or x-ray storage ring/facility.
+    
 DEBUG - ===== ATTRS (//entry/instrument/source@NX_class)
 DEBUG - value: NXsource 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource']
@@ -1302,7 +1369,9 @@ DEBUG - classes:
 NXsource.nxdl.xml:/bunch_distance
 DEBUG - <<OPTIONAL>>
 DEBUG - documentation (NXsource.nxdl.xml:/bunch_distance):
-DEBUG - For storage rings, time between bunches
+DEBUG - 
+             For storage rings, time between bunches
+        
 DEBUG - ===== ATTRS (//entry/instrument/source/bunch_distance@units)
 DEBUG - value: us 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource', 'NX_FLOAT']
@@ -1316,7 +1385,9 @@ DEBUG - classes:
 NXsource.nxdl.xml:/bunch_length
 DEBUG - <<OPTIONAL>>
 DEBUG - documentation (NXsource.nxdl.xml:/bunch_length):
-DEBUG - For storage rings, temporal length of the bunch
+DEBUG - 
+             For storage rings, temporal length of the bunch
+        
 DEBUG - ===== ATTRS (//entry/instrument/source/bunch_length@units)
 DEBUG - value: fs 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource', 'NX_FLOAT']
@@ -1360,7 +1431,9 @@ DEBUG - classes:
 NXsource.nxdl.xml:/current
 DEBUG - <<OPTIONAL>>
 DEBUG - documentation (NXsource.nxdl.xml:/current):
-DEBUG - Accelerator, X-ray tube, or storage ring current
+DEBUG - 
+             Accelerator, X-ray tube, or storage ring current
+        
 DEBUG - ===== ATTRS (//entry/instrument/source/current@units)
 DEBUG - value: uA 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource', 'NX_FLOAT']
@@ -1375,10 +1448,10 @@ NXsource.nxdl.xml:/energy
 DEBUG - <<OPTIONAL>>
 DEBUG - documentation (NXsource.nxdl.xml:/energy):
 DEBUG - 
-			Source energy.  
-			For storage rings, this would be the particle beam energy.
-			For X-ray tubes, this would be the excitation voltage.
-		
+             Source energy.
+             For storage rings, this would be the particle beam energy.
+             For X-ray tubes, this would be the excitation voltage.
+        
 DEBUG - ===== ATTRS (//entry/instrument/source/energy@units)
 DEBUG - value: MeV 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource', 'NX_FLOAT']
@@ -1392,7 +1465,9 @@ DEBUG - classes:
 NXsource.nxdl.xml:/frequency
 DEBUG - <<OPTIONAL>>
 DEBUG - documentation (NXsource.nxdl.xml:/frequency):
-DEBUG - Frequency of pulsed source
+DEBUG - 
+             Frequency of pulsed source
+        
 DEBUG - ===== ATTRS (//entry/instrument/source/frequency@units)
 DEBUG - value: Hz 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource', 'NX_FLOAT']
@@ -1409,7 +1484,9 @@ DEBUG - enumeration (NXsource.nxdl.xml:/mode):
 DEBUG - -> Single Bunch
 DEBUG - -> Multi Bunch
 DEBUG - documentation (NXsource.nxdl.xml:/mode):
-DEBUG - source operating mode
+DEBUG - 
+             source operating mode
+        
 DEBUG - ===== FIELD (//entry/instrument/source/name): <HDF5 dataset "name": shape (), type "|O">
 DEBUG - value: b'FLASH' 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource', 'NX_CHAR']
@@ -1420,7 +1497,9 @@ DEBUG - <<REQUIRED>>
 DEBUG - documentation (NXarpes.nxdl.xml:/ENTRY/INSTRUMENT/SOURCE/name):
 DEBUG - 
 DEBUG - documentation (NXsource.nxdl.xml:/name):
-DEBUG - Name of source
+DEBUG - 
+             Name of source
+        
 DEBUG - ===== FIELD (//entry/instrument/source/number_of_bunches): <HDF5 dataset "number_of_bunches": shape (), type "<i8">
 DEBUG - value: 500 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource', 'NX_INT']
@@ -1428,7 +1507,9 @@ DEBUG - classes:
 NXsource.nxdl.xml:/number_of_bunches
 DEBUG - <<OPTIONAL>>
 DEBUG - documentation (NXsource.nxdl.xml:/number_of_bunches):
-DEBUG - For storage rings, the number of bunches in use.
+DEBUG - 
+             For storage rings, the number of bunches in use.
+        
 DEBUG - ===== FIELD (//entry/instrument/source/number_of_bursts): <HDF5 dataset "number_of_bursts": shape (), type "<i8">
 DEBUG - value: 1 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource']
@@ -1455,7 +1536,9 @@ DEBUG - -> proton
 DEBUG - documentation (NXarpes.nxdl.xml:/ENTRY/INSTRUMENT/SOURCE/probe):
 DEBUG - 
 DEBUG - documentation (NXsource.nxdl.xml:/probe):
-DEBUG - type of radiation probe (pick one from the enumerated list and spell exactly)
+DEBUG - 
+             type of radiation probe (pick one from the enumerated list and spell exactly)
+        
 DEBUG - ===== FIELD (//entry/instrument/source/top_up): <HDF5 dataset "top_up": shape (), type "|b1">
 DEBUG - value: True 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource', 'NX_BOOLEAN']
@@ -1463,7 +1546,9 @@ DEBUG - classes:
 NXsource.nxdl.xml:/top_up
 DEBUG - <<OPTIONAL>>
 DEBUG - documentation (NXsource.nxdl.xml:/top_up):
-DEBUG - Is the synchrotron operating in top_up mode?
+DEBUG - 
+             Is the synchrotron operating in top_up mode?
+        
 DEBUG - ===== FIELD (//entry/instrument/source/type): <HDF5 dataset "type": shape (), type "|O">
 DEBUG - value: b'Free Electron Laser' 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource', 'NX_CHAR']
@@ -1488,7 +1573,9 @@ DEBUG - -> Metal Jet X-ray
 DEBUG - documentation (NXarpes.nxdl.xml:/ENTRY/INSTRUMENT/SOURCE/type):
 DEBUG - 
 DEBUG - documentation (NXsource.nxdl.xml:/type):
-DEBUG - type of radiation source (pick one from the enumerated list and spell exactly)
+DEBUG - 
+             type of radiation source (pick one from the enumerated list and spell exactly)
+        
 DEBUG - ===== GROUP (//entry/instrument/source_pump [NXarpes::/NXentry/NXinstrument/NXsource]): <HDF5 group "/entry/instrument/source_pump" (12 members)>
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource']
 DEBUG - classes:
@@ -1501,7 +1588,9 @@ DEBUG -
 DEBUG - documentation (NXinstrument.nxdl.xml:/SOURCE):
 DEBUG - 
 DEBUG - documentation (NXsource.nxdl.xml:):
-DEBUG - The neutron or x-ray storage ring/facility.
+DEBUG - 
+         The neutron or x-ray storage ring/facility.
+    
 DEBUG - ===== ATTRS (//entry/instrument/source_pump@NX_class)
 DEBUG - value: NXsource 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource']
@@ -1518,7 +1607,9 @@ DEBUG - classes:
 NXsource.nxdl.xml:/bunch_distance
 DEBUG - <<OPTIONAL>>
 DEBUG - documentation (NXsource.nxdl.xml:/bunch_distance):
-DEBUG - For storage rings, time between bunches
+DEBUG - 
+             For storage rings, time between bunches
+        
 DEBUG - ===== ATTRS (//entry/instrument/source_pump/bunch_distance@units)
 DEBUG - value: us 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource', 'NX_FLOAT']
@@ -1532,7 +1623,9 @@ DEBUG - classes:
 NXsource.nxdl.xml:/bunch_length
 DEBUG - <<OPTIONAL>>
 DEBUG - documentation (NXsource.nxdl.xml:/bunch_length):
-DEBUG - For storage rings, temporal length of the bunch
+DEBUG - 
+             For storage rings, temporal length of the bunch
+        
 DEBUG - ===== ATTRS (//entry/instrument/source_pump/bunch_length@units)
 DEBUG - value: fs 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource', 'NX_FLOAT']
@@ -1566,7 +1659,9 @@ DEBUG - classes:
 NXsource.nxdl.xml:/frequency
 DEBUG - <<OPTIONAL>>
 DEBUG - documentation (NXsource.nxdl.xml:/frequency):
-DEBUG - Frequency of pulsed source
+DEBUG - 
+             Frequency of pulsed source
+        
 DEBUG - ===== ATTRS (//entry/instrument/source_pump/frequency@units)
 DEBUG - value: Hz 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource', 'NX_FLOAT']
@@ -1583,7 +1678,9 @@ DEBUG - enumeration (NXsource.nxdl.xml:/mode):
 DEBUG - -> Single Bunch
 DEBUG - -> Multi Bunch
 DEBUG - documentation (NXsource.nxdl.xml:/mode):
-DEBUG - source operating mode
+DEBUG - 
+             source operating mode
+        
 DEBUG - ===== FIELD (//entry/instrument/source_pump/name): <HDF5 dataset "name": shape (), type "|O">
 DEBUG - value: b'User Laser @ FLASH' 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource', 'NX_CHAR']
@@ -1594,7 +1691,9 @@ DEBUG - <<REQUIRED>>
 DEBUG - documentation (NXarpes.nxdl.xml:/ENTRY/INSTRUMENT/SOURCE/name):
 DEBUG - 
 DEBUG - documentation (NXsource.nxdl.xml:/name):
-DEBUG - Name of source
+DEBUG - 
+             Name of source
+        
 DEBUG - ===== FIELD (//entry/instrument/source_pump/number_of_bunches): <HDF5 dataset "number_of_bunches": shape (), type "<i8">
 DEBUG - value: 400 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource', 'NX_INT']
@@ -1602,7 +1701,9 @@ DEBUG - classes:
 NXsource.nxdl.xml:/number_of_bunches
 DEBUG - <<OPTIONAL>>
 DEBUG - documentation (NXsource.nxdl.xml:/number_of_bunches):
-DEBUG - For storage rings, the number of bunches in use.
+DEBUG - 
+             For storage rings, the number of bunches in use.
+        
 DEBUG - ===== FIELD (//entry/instrument/source_pump/number_of_bursts): <HDF5 dataset "number_of_bursts": shape (), type "<i8">
 DEBUG - value: 1 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource']
@@ -1629,7 +1730,9 @@ DEBUG - -> proton
 DEBUG - documentation (NXarpes.nxdl.xml:/ENTRY/INSTRUMENT/SOURCE/probe):
 DEBUG - 
 DEBUG - documentation (NXsource.nxdl.xml:/probe):
-DEBUG - type of radiation probe (pick one from the enumerated list and spell exactly)
+DEBUG - 
+             type of radiation probe (pick one from the enumerated list and spell exactly)
+        
 DEBUG - ===== FIELD (//entry/instrument/source_pump/rms_jitter): <HDF5 dataset "rms_jitter": shape (), type "<f8">
 DEBUG - value: 204.68816194453154 
 DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXsource']
@@ -1664,27 +1767,41 @@ DEBUG - -> Metal Jet X-ray
 DEBUG - documentation (NXarpes.nxdl.xml:/ENTRY/INSTRUMENT/SOURCE/type):
 DEBUG - 
 DEBUG - documentation (NXsource.nxdl.xml:/type):
-DEBUG - type of radiation source (pick one from the enumerated list and spell exactly)
+DEBUG - 
+             type of radiation source (pick one from the enumerated list and spell exactly)
+        
 DEBUG - ===== FIELD (//entry/instrument/spatial_resolution): <HDF5 dataset "spatial_resolution": shape (), type "<i8">
 DEBUG - value: 500 
-DEBUG - classpath: ['NXentry', 'NXinstrument']
-DEBUG - NOT IN SCHEMA
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NX_FLOAT']
+DEBUG - classes:
+NXinstrument.nxdl.xml:/spatial_resolution
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXinstrument.nxdl.xml:/spatial_resolution):
 DEBUG - 
+             Spatial resolution of the experiment (Airy disk radius)
+        
 DEBUG - ===== ATTRS (//entry/instrument/spatial_resolution@units)
 DEBUG - value: um 
-DEBUG - classpath: ['NXentry', 'NXinstrument']
-DEBUG - NOT IN SCHEMA
-DEBUG - 
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NX_FLOAT']
+DEBUG - classes:
+NXinstrument.nxdl.xml:/spatial_resolution
+DEBUG - NXinstrument.nxdl.xml:/spatial_resolution@units [NX_LENGTH]
 DEBUG - ===== FIELD (//entry/instrument/temporal_resolution): <HDF5 dataset "temporal_resolution": shape (), type "<i8">
 DEBUG - value: 100 
-DEBUG - classpath: ['NXentry', 'NXinstrument']
-DEBUG - NOT IN SCHEMA
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NX_FLOAT']
+DEBUG - classes:
+NXinstrument.nxdl.xml:/temporal_resolution
+DEBUG - <<OPTIONAL>>
+DEBUG - documentation (NXinstrument.nxdl.xml:/temporal_resolution):
 DEBUG - 
+             Temporal resolution of the experiment (FWHM)
+        
 DEBUG - ===== ATTRS (//entry/instrument/temporal_resolution@units)
 DEBUG - value: fs 
-DEBUG - classpath: ['NXentry', 'NXinstrument']
-DEBUG - NOT IN SCHEMA
-DEBUG - 
+DEBUG - classpath: ['NXentry', 'NXinstrument', 'NX_FLOAT']
+DEBUG - classes:
+NXinstrument.nxdl.xml:/temporal_resolution
+DEBUG - NXinstrument.nxdl.xml:/temporal_resolution@units [NX_TIME]
 DEBUG - ===== FIELD (//entry/run_cycle): <HDF5 dataset "run_cycle": shape (), type "|O">
 DEBUG - value: b'2018 User Run Block 2' 
 DEBUG - classpath: ['NXentry', 'NX_CHAR']
@@ -1692,7 +1809,9 @@ DEBUG - classes:
 NXentry.nxdl.xml:/run_cycle
 DEBUG - <<OPTIONAL>>
 DEBUG - documentation (NXentry.nxdl.xml:/run_cycle):
-DEBUG - Such as "2007-3". Some user facilities organize their beam time into run cycles.
+DEBUG - 
+             Such as "2007-3". Some user facilities organize their beam time into run cycles.
+        
 DEBUG - ===== GROUP (//entry/sample [NXarpes::/NXentry/NXsample]): <HDF5 group "/entry/sample" (14 members)>
 DEBUG - classpath: ['NXentry', 'NXsample']
 DEBUG - classes:
@@ -1706,12 +1825,12 @@ DEBUG - documentation (NXentry.nxdl.xml:/SAMPLE):
 DEBUG - 
 DEBUG - documentation (NXsample.nxdl.xml:):
 DEBUG - 
-		Any information on the sample. 
-		
-		This could include scanned variables that
-		are associated with one of the data dimensions, e.g. the magnetic field, or
-		logged data, e.g. monitored temperature vs elapsed time.
-	
+         Any information on the sample.
+         
+         This could include scanned variables that
+         are associated with one of the data dimensions, e.g. the magnetic field, or
+         logged data, e.g. monitored temperature vs elapsed time.
+    
 DEBUG - ===== ATTRS (//entry/sample@NX_class)
 DEBUG - value: NXsample 
 DEBUG - classpath: ['NXentry', 'NXsample']
@@ -1766,7 +1885,9 @@ DEBUG - <<REQUIRED>>
 DEBUG - documentation (NXarpes.nxdl.xml:/ENTRY/SAMPLE/name):
 DEBUG - Descriptive name of sample
 DEBUG - documentation (NXsample.nxdl.xml:/name):
-DEBUG - Descriptive name of sample
+DEBUG - 
+             Descriptive name of sample
+        
 DEBUG - ===== FIELD (//entry/sample/preparation_method): <HDF5 dataset "preparation_method": shape (), type "|O">
 DEBUG - value: b'in-vacuum cleave' 
 DEBUG - classpath: ['NXentry', 'NXsample']
@@ -1779,7 +1900,9 @@ DEBUG - classes:
 NXsample.nxdl.xml:/pressure
 DEBUG - <<OPTIONAL>>
 DEBUG - documentation (NXsample.nxdl.xml:/pressure):
-DEBUG - Applied pressure
+DEBUG - 
+             Applied pressure
+        
 DEBUG - ===== ATTRS (//entry/sample/pressure@units)
 DEBUG - value: mbar 
 DEBUG - classpath: ['NXentry', 'NXsample', 'NX_FLOAT']
@@ -1829,7 +1952,9 @@ DEBUG - classes:
 NXsample.nxdl.xml:/thickness
 DEBUG - <<OPTIONAL>>
 DEBUG - documentation (NXsample.nxdl.xml:/thickness):
-DEBUG - sample thickness
+DEBUG - 
+             sample thickness
+        
 DEBUG - ===== ATTRS (//entry/sample/thickness@units)
 DEBUG - value: mm 
 DEBUG - classpath: ['NXentry', 'NXsample', 'NX_FLOAT']
@@ -1851,7 +1976,9 @@ DEBUG - <<REQUIRED>>
 DEBUG - documentation (NXarpes.nxdl.xml:/ENTRY/start_time):
 DEBUG - 
 DEBUG - documentation (NXentry.nxdl.xml:/start_time):
-DEBUG - Starting time of measurement
+DEBUG - 
+             Starting time of measurement
+        
 DEBUG - ===== FIELD (//entry/title): <HDF5 dataset "title": shape (), type "|O">
 DEBUG - value: b'Excited-state dynamics of WSe2 in the Valence Band and Core-Levels' 
 DEBUG - classpath: ['NXentry', 'NX_CHAR']
@@ -1862,7 +1989,9 @@ DEBUG - <<REQUIRED>>
 DEBUG - documentation (NXarpes.nxdl.xml:/ENTRY/title):
 DEBUG - 
 DEBUG - documentation (NXentry.nxdl.xml:/title):
-DEBUG - Extended title for entry
+DEBUG - 
+             Extended title for entry
+        
 DEBUG - ========================
 DEBUG - === Default Plotable ===
 DEBUG - ========================
diff --git a/tests/dataconverter/test_convert.py b/tests/dataconverter/test_convert.py
index f6702bf01..a317c1470 100644
--- a/tests/dataconverter/test_convert.py
+++ b/tests/dataconverter/test_convert.py
@@ -61,6 +61,8 @@ def restore_xarray_file_from_tmp(tmp_path):
 ])
 def test_find_nxdl(cli_inputs):
     """Unit test to check if dataconverter can find NXDLs in contributed/applications folder."""
+    cli_inputs.extend(["--reader", "example"])
+
     runner = CliRunner()
     result = runner.invoke(dataconverter.convert_cli, cli_inputs)
     if "NXdoesnotexist" in cli_inputs:
@@ -110,7 +112,7 @@ def test_cli(caplog, cli_inputs):
 def test_links_and_virtual_datasets(tmp_path):
     """A test for the convert CLI to check whether a Dataset object is created,
 
-when  the template contains links."""
+    when  the template contains links."""
     move_xarray_file_to_tmp(tmp_path)
 
     dirpath = os.path.join(os.path.dirname(__file__),
diff --git a/tests/dataconverter/test_helpers.py b/tests/dataconverter/test_helpers.py
index 540cf07bb..421f8ce9b 100644
--- a/tests/dataconverter/test_helpers.py
+++ b/tests/dataconverter/test_helpers.py
@@ -19,6 +19,7 @@
 
 import xml.etree.ElementTree as ET
 import os
+import logging
 from setuptools import distutils
 import pytest
 import numpy as np
@@ -27,6 +28,16 @@
 from pynxtools.dataconverter.template import Template
 
 
+def remove_optional_parent(data_dict: Template):
+    """Completely removes the optional group from the test Template."""
+    internal_dict = Template(data_dict)
+    del internal_dict["/ENTRY[my_entry]/optional_parent/required_child"]
+    del internal_dict["/ENTRY[my_entry]/optional_parent/optional_child"]
+    del internal_dict["/ENTRY[my_entry]/optional_parent/req_group_in_opt_group/DATA[data]"]
+
+    return internal_dict
+
+
 def alter_dict(data_dict: Template, key: str, value: object):
     """Helper function to alter a single entry in dict for parametrize."""
     if data_dict is not None:
@@ -69,6 +80,29 @@ def listify_template(data_dict: Template):
     return listified_template
 
 
+@pytest.mark.parametrize("input_data, expected_output", [
+    ('2.4E-23', 2.4e-23),
+    ('28', 28),
+    ('45.98', 45.98),
+    ('test', 'test'),
+    (['59', '3.00005', '498E-36'], np.array([59.0, 3.00005, 4.98e-34])),
+    ('23 34 444 5000', np.array([23., 34., 444., 5000.])),
+    ('xrd experiment', 'xrd experiment'),
+    (None, None),
+])
+def test_transform_to_intended_dt(input_data, expected_output):
+    """Transform to possible numerical method."""
+    result = helpers.transform_to_intended_dt(input_data)
+
+    # Use pytest.approx for comparing floating-point numbers
+    if isinstance(expected_output, np.ndarray):
+        np.testing.assert_allclose(result, expected_output, rtol=1e-3)
+    elif isinstance(expected_output, float):
+        assert result == pytest.approx(expected_output, rel=1e-5)
+    else:
+        assert result == expected_output
+
+
 @pytest.fixture(name="nxdl_root")
 def fixture_nxdl_root():
     """pytest fixture to load the same NXDL file for all tests."""
@@ -101,31 +135,31 @@ def fixture_filled_test_data(template, tmp_path):
                                   tmp_path)
 
     template.clear()
-    template["optional"]["/ENTRY[my_entry]/NXODD_name/float_value"] = 2.0
-    template["optional"]["/ENTRY[my_entry]/NXODD_name/float_value/@units"] = "nm"
-    template["optional"]["/ENTRY[my_entry]/optional_parent/required_child"] = 1
-    template["optional"]["/ENTRY[my_entry]/optional_parent/optional_child"] = 1
-    template["required"]["/ENTRY[my_entry]/NXODD_name/bool_value"] = True
-    template["required"]["/ENTRY[my_entry]/NXODD_name/int_value"] = 2
-    template["required"]["/ENTRY[my_entry]/NXODD_name/int_value/@units"] = "eV"
-    template["required"]["/ENTRY[my_entry]/NXODD_name/posint_value"] = np.array([1, 2, 3],
-                                                                                dtype=np.int8)
-    template["required"]["/ENTRY[my_entry]/NXODD_name/posint_value/@units"] = "kg"
-    template["required"]["/ENTRY[my_entry]/NXODD_name/char_value"] = "just chars"
-    template["required"]["/ENTRY[my_entry]/definition"] = "NXtest"
-    template["required"]["/ENTRY[my_entry]/definition/@version"] = "2.4.6"
-    template["required"]["/ENTRY[my_entry]/program_name"] = "Testing program"
-    template["required"]["/ENTRY[my_entry]/NXODD_name/type"] = "2nd type"
-    template["required"]["/ENTRY[my_entry]/NXODD_name/date_value"] = ("2022-01-22T12"
-                                                                      ":14:12.05018+00:00")
-    template["optional"]["/ENTRY[my_entry]/required_group/description"] = "An example description"
-    template["optional"]["/ENTRY[my_entry]/required_group2/description"] = "An example description"
-    template["undocumented"]["/ENTRY[my_entry]/does/not/exist"] = "random"
-    template["undocumented"]["/ENTRY[my_entry]/links/ext_link"] = {"link":
-                                                                   f"{tmp_path}/"
-                                                                   f"xarray_saved_small_cali"
-                                                                   f"bration.h5:/axes/ax3"
-                                                                   }
+    template["/ENTRY[my_entry]/NXODD_name/float_value"] = 2.0
+    template["/ENTRY[my_entry]/NXODD_name/float_value/@units"] = "nm"
+    template["/ENTRY[my_entry]/optional_parent/required_child"] = 1
+    template["/ENTRY[my_entry]/optional_parent/optional_child"] = 1
+    template["/ENTRY[my_entry]/NXODD_name/bool_value"] = True
+    template["/ENTRY[my_entry]/NXODD_name/int_value"] = 2
+    template["/ENTRY[my_entry]/NXODD_name/int_value/@units"] = "eV"
+    template["/ENTRY[my_entry]/NXODD_name/posint_value"] = np.array([1, 2, 3],
+                                                                    dtype=np.int8)
+    template["/ENTRY[my_entry]/NXODD_name/posint_value/@units"] = "kg"
+    template["/ENTRY[my_entry]/NXODD_name/char_value"] = "just chars"
+    template["/ENTRY[my_entry]/definition"] = "NXtest"
+    template["/ENTRY[my_entry]/definition/@version"] = "2.4.6"
+    template["/ENTRY[my_entry]/program_name"] = "Testing program"
+    template["/ENTRY[my_entry]/NXODD_name/type"] = "2nd type"
+    template["/ENTRY[my_entry]/NXODD_name/date_value"] = ("2022-01-22T12"
+                                                          ":14:12.05018+00:00")
+    template["/ENTRY[my_entry]/required_group/description"] = "An example description"
+    template["/ENTRY[my_entry]/required_group2/description"] = "An example description"
+    template["/ENTRY[my_entry]/does/not/exist"] = "random"
+    template["/ENTRY[my_entry]/links/ext_link"] = {"link":
+                                                   f"{tmp_path}/"
+                                                   f"xarray_saved_small_cali"
+                                                   f"bration.h5:/axes/ax3"
+                                                   }
     yield template
 
 
@@ -148,7 +182,11 @@ def fixture_filled_test_data(template, tmp_path):
 TEMPLATE["required"]["/ENTRY[my_entry]/NXODD_name/date_value"] = "2022-01-22T12:14:12.05018+00:00"  # pylint: disable=E1126
 TEMPLATE["optional"]["/ENTRY[my_entry]/required_group/description"] = "An example description"
 TEMPLATE["optional"]["/ENTRY[my_entry]/required_group2/description"] = "An example description"
-# TEMPLATE["optional_parents"].append("/ENTRY[entry]/optional_parent")
+TEMPLATE["required"]["/ENTRY[my_entry]/optional_parent/req_group_in_opt_group/DATA[data]"] = 1
+TEMPLATE["lone_groups"] = ['/ENTRY[entry]/required_group',
+                           '/ENTRY[entry]/required_group2',
+                           '/ENTRY[entry]/optional_parent/req_group_in_opt_group']
+TEMPLATE["optional"]["/@default"] = "Some NXroot attribute"
 
 
 @pytest.mark.parametrize("data_dict,error_message", [
@@ -241,13 +279,11 @@ def fixture_filled_test_data(template, tmp_path):
         id="valid-data-dict"),
     pytest.param(
         remove_from_dict(TEMPLATE, "/ENTRY[my_entry]/required_group/description"),
-        ("The data entry corresponding to /ENTRY[entry]/required_group "
-         "is required and hasn't been supplied by the reader."),
+        "The required group, /ENTRY[entry]/required_group, hasn't been supplied.",
         id="missing-empty-yet-required-group"),
     pytest.param(
         remove_from_dict(TEMPLATE, "/ENTRY[my_entry]/required_group2/description"),
-        ("The data entry corresponding to /ENTRY[entry]/required_group2 "
-         "is required and hasn't been supplied by the reader."),
+        "The required group, /ENTRY[entry]/required_group2, hasn't been supplied.",
         id="missing-empty-yet-required-group2"),
     pytest.param(
         alter_dict(
@@ -258,6 +294,21 @@ def fixture_filled_test_data(template, tmp_path):
         (""),
         id="allow-required-and-empty-group"
     ),
+    pytest.param(
+        remove_from_dict(TEMPLATE,
+                         "/ENTRY[my_entry]/optional_parent/req_group_in_opt_group/DATA[data]",
+                         "required"
+                         ),
+        ("The required group, /ENTRY[entry]/optional_parent/req_group_in_opt_group, hasn't been "
+         "supplied while its optional parent, /ENTRY[entry]/optional_parent/"
+         "req_group_in_opt_group, is supplied."),
+        id="req-group-in-opt-parent-removed"
+    ),
+    pytest.param(
+        remove_optional_parent(TEMPLATE),
+        (""),
+        id="opt-group-completely-removed"
+    ),
 ])
 def test_validate_data_dict(data_dict, error_message, template, nxdl_root, request):
     """Unit test for the data validation routine"""
@@ -269,12 +320,12 @@ def test_validate_data_dict(data_dict, error_message, template, nxdl_root, reque
                                     "no-child-provided-optional-parent",
                                     "int-instead-of-chars",
                                     "link-dict-instead-of-bool",
-                                    "allow-required-and-empty-group"):
+                                    "allow-required-and-empty-group",
+                                    "opt-group-completely-removed"):
         helpers.validate_data_dict(template, data_dict, nxdl_root)
     else:
         with pytest.raises(Exception) as execinfo:
             helpers.validate_data_dict(template, data_dict, nxdl_root)
-
         assert (error_message) == str(execinfo.value)
 
 
@@ -285,7 +336,7 @@ def test_validate_data_dict(data_dict, error_message, template, nxdl_root, reque
         id="path-exists-in-dict"),
     pytest.param(
         "/RANDOM/does/not/@exist",
-        (False, ""),
+        (False, None),
         id="path-does-not-exist-in-dict")
 ])
 def test_path_in_data_dict(nxdl_path, expected, template):
@@ -304,3 +355,47 @@ def test_atom_type_extractor_and_hill_conversion():
     atom_list = helpers.extract_atom_types(test_chemical_formula)
 
     assert expected_atom_types == atom_list
+
+
+def test_writing_of_root_attributes(caplog):
+    """
+    Tests if all root attributes are populated
+    """
+    template = Template()
+    filename = "my_nexus_file.nxs"
+    with caplog.at_level(logging.WARNING):
+        helpers.add_default_root_attributes(template, filename)
+
+    assert "" == caplog.text
+
+    keys_added = template.keys()
+    assert "/@NX_class" in keys_added
+    assert template["/@NX_class"] == "NXroot"
+    assert "/@file_name" in keys_added
+    assert template["/@file_name"] == filename
+    assert "/@file_time" in keys_added
+    assert "/@file_update_time" in keys_added
+    assert "/@NeXus_repository" in keys_added
+    assert "/@NeXus_version" in keys_added
+    assert "/@HDF5_version" in keys_added
+    assert "/@h5py_version" in keys_added
+
+
+def test_warning_on_root_attribute_overwrite(caplog):
+    """
+    A warning is emitted when a root attribute is overwritten
+    by pynxtools.
+    """
+    template = Template()
+    template["/@NX_class"] = "NXwrong"
+    filname = "my_nexus_file.nxs"
+    with caplog.at_level(logging.WARNING):
+        helpers.add_default_root_attributes(template, filname)
+    error_text = (
+        "The NXroot entry '/@NX_class' (value: NXwrong) should not be populated by the reader. "
+        "This is overwritten by the actually used value 'NXroot'"
+    )
+    assert error_text in caplog.text
+
+    assert "/@NX_class" in template.keys()
+    assert template["/@NX_class"] == "NXroot"
diff --git a/tests/dataconverter/test_readers.py b/tests/dataconverter/test_readers.py
index 3d2c86efd..d75344541 100644
--- a/tests/dataconverter/test_readers.py
+++ b/tests/dataconverter/test_readers.py
@@ -102,3 +102,32 @@ def test_has_correct_read_func(reader):
 
             assert isinstance(read_data, Template)
             assert validate_data_dict(template, read_data, root)
+
+
+@pytest.mark.parametrize("reader_name,nxdl,undocumented_keys", [
+    ('mpes', 'NXmpes', [])
+])
+def test_shows_correct_warnings(reader_name, nxdl, undocumented_keys):
+    """
+    Checks whether the read function generates the correct warnings.
+    """
+    def_dir = os.path.join(os.getcwd(), "pynxtools", "definitions")
+    dataconverter_data_dir = os.path.join("tests", "data", "dataconverter")
+
+    input_files = sorted(
+        glob.glob(os.path.join(dataconverter_data_dir, "readers", reader_name, "*"))
+    )
+    nxdl_file = os.path.join(
+        def_dir, "contributed_definitions", f"{nxdl}.nxdl.xml"
+    )
+
+    root = ET.parse(nxdl_file).getroot()
+    template = Template()
+    generate_template_from_nxdl(root, template)
+
+    read_data = get_reader(reader_name)().read(
+        template=Template(template), file_paths=tuple(input_files)
+    )
+
+    assert validate_data_dict(template, read_data, root)
+    assert list(read_data.undocumented.keys()) == undocumented_keys
diff --git a/tests/eln_mapper/__init__.py b/tests/eln_mapper/__init__.py
new file mode 100644
index 000000000..7f1819634
--- /dev/null
+++ b/tests/eln_mapper/__init__.py
@@ -0,0 +1,16 @@
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
diff --git a/tests/eln_mapper/test_eln_mapper.py b/tests/eln_mapper/test_eln_mapper.py
new file mode 100644
index 000000000..17f9130dd
--- /dev/null
+++ b/tests/eln_mapper/test_eln_mapper.py
@@ -0,0 +1,107 @@
+"""This test is dedicated generate_eln converter tool.
+"""
+
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+import os
+from typing import Dict
+from click import testing
+
+
+import yaml
+from pynxtools.eln_mapper import eln_mapper
+
+
+def check_keys_from_two_dict(dict1: Dict, dict2: Dict):
+    """Compare keys of two dicts.
+
+    Parameters
+    ----------
+    dict1 : Dict
+        Dict-1 to compare the key with Dict-2
+    dict2 : Dict
+        Dict-2 to compare the key with Dict-1
+    """
+    for (key1, val1), (key2, val2) in zip(dict1.items(), dict2.items()):
+        assert key1 == key2, "Test and Ref yaml file have different keys."
+        if isinstance(val1, dict) and isinstance(val2, dict):
+            check_keys_from_two_dict(val1, val2)
+
+
+def test_reader_eln(tmp_path):
+    """Test eln that goes with reader.
+
+    Parameters
+    ----------
+    tmp_path : pathlib.Path
+        A temporary path that is created for pytest
+    """
+
+    local_dir = os.path.abspath(os.path.dirname(__file__))
+    ref_file = os.path.join(local_dir, '../data/eln_mapper/eln.yaml')
+
+    test_file = os.path.join(tmp_path, 'eln.yaml')
+    cli_run = testing.CliRunner()
+    cli_run.invoke(eln_mapper.get_eln, [
+        "--nxdl",
+        "NXmpes",
+        "--skip-top-levels",
+        1,
+        "--output-file",
+        test_file,
+        "--eln-type",
+        'eln'])
+
+    with open(ref_file, encoding='utf-8', mode='r') as ref_f:
+        ref_dict = yaml.safe_load(ref_f)
+
+    with open(test_file, encoding='utf-8', mode='r') as test_f:
+        test_dict = yaml.safe_load(test_f)
+
+    check_keys_from_two_dict(ref_dict, test_dict)
+
+
+def test_scheme_eln(tmp_path):
+    """Test Eln that goes in Nomad
+
+    Parameters
+    ----------
+    tmp_path : pathlib.Path
+        A temporary path that is created for pytest
+    """
+
+    local_dir = os.path.abspath(os.path.dirname(__file__))
+    ref_file = os.path.join(local_dir, '../data/eln_mapper/mpes.scheme.archive.yaml')
+
+    test_file = os.path.join(tmp_path, '.scheme.archive.yaml')
+    cli_run = testing.CliRunner()
+    cli_run.invoke(eln_mapper.get_eln, [
+        "--nxdl",
+        "NXmpes",
+        "--output-file",
+        test_file,
+        "--eln-type",
+        'scheme_eln'])
+    with open(ref_file, encoding='utf-8', mode='r') as ref_f:
+        ref_dict = yaml.safe_load(ref_f)
+
+    with open(test_file, encoding='utf-8', mode='r') as test_f:
+        test_dict = yaml.safe_load(test_f)
+
+    check_keys_from_two_dict(ref_dict, test_dict)
diff --git a/tests/nexus/test_nexus.py b/tests/nexus/test_nexus.py
index 894657d7a..d69b0fae2 100644
--- a/tests/nexus/test_nexus.py
+++ b/tests/nexus/test_nexus.py
@@ -49,9 +49,9 @@ def test_get_nexus_classes_units_attributes():
 
 
 def test_nexus(tmp_path):
-    """The nexus test function
-
-"""
+    """
+    The nexus test function
+    """
     local_dir = os.path.abspath(os.path.dirname(__file__))
     example_data = os.path.join(local_dir, '../data/nexus/201805_WSe2_arpes.nxs')
     logger = logging.getLogger(__name__)
@@ -73,15 +73,12 @@ def test_nexus(tmp_path):
         encoding='utf-8'
     ) as reffile:
         ref = reffile.readlines()
-
     assert log == ref
 
-    # didn't work with filecmp library
-    # log = os.path.join(local_dir, 'data/nexus_test_data/nexus_test.log')
-    # ref = os.path.join(local_dir, 'data/nexus_test_data/Ref2_nexus_test.log')
-    # print('yoyo', filecmp.cmp(log, ref, shallow=False))
-
-    # print('Testing of nexus.py is SUCCESSFUL.')
+    # import filecmp
+    # # didn't work with filecmp library
+    # log = os.path.join(local_dir, '../data/nexus_test_data/nexus_test.log')
+    # ref = os.path.join(local_dir, '../data/nexus_test_data/Ref_nexus_test.log')
 
 
 def test_get_node_at_nxdl_path():
@@ -102,7 +99,7 @@ def test_get_node_at_nxdl_path():
 
     nxdl_file_path = os.path.join(
         local_dir,
-        "../../pynxtools/definitions/contributed_definitions/NXem.nxdl.xml"
+        "../data/nexus/NXtest2.nxdl.xml"
     )
     elem = ET.parse(nxdl_file_path).getroot()
     node = nexus.get_node_at_nxdl_path(
diff --git a/tests/nexus/test_version.py b/tests/nexus/test_version.py
new file mode 100644
index 000000000..3fa915ce3
--- /dev/null
+++ b/tests/nexus/test_version.py
@@ -0,0 +1,16 @@
+"""
+Tests the version retrieval for the nexus definitions submodule
+"""
+import re
+
+from pynxtools import get_nexus_version
+
+
+def test_get_nexus_version():
+    """
+    Tests if we get a version string from nexus definitions
+    """
+    version = get_nexus_version()
+
+    assert version is not None
+    assert re.match(r"v\d{4}\.\d{2}\.post1\.dev\d+\+g[a-z0-9]", version)

From 460e49ebc4e767610907f22435913312f6bcef67 Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@hu-berlin.de>
Date: Mon, 4 Dec 2023 13:09:24 +0100
Subject: [PATCH 29/84] Revert nxdefs to 9998376

---
 pynxtools/definitions | 2 +-
 pyproject.toml        | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/pynxtools/definitions b/pynxtools/definitions
index 615ff37cb..999837671 160000
--- a/pynxtools/definitions
+++ b/pynxtools/definitions
@@ -1 +1 @@
-Subproject commit 615ff37cbafd2ca017fb61c119c0f5c0cf052a34
+Subproject commit 999837671373b962fed932829becd42acb7482f6
diff --git a/pyproject.toml b/pyproject.toml
index d2c7853f0..91a1fea5b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -70,7 +70,6 @@ dev = [
 [project.scripts]
 read_nexus = "pynxtools.nexus.nexus:main"
 dataconverter = "pynxtools.dataconverter.convert:convert_cli"
-nyaml2nxdl = "pynxtools.nyaml2nxdl.nyaml2nxdl:launch_tool"
 generate_eln = "pynxtools.eln_mapper.eln_mapper:get_eln"
 
 [tool.setuptools.package-data]

From abc99b6ef816d03ee54377a22b70397aa61036e2 Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@hu-berlin.de>
Date: Mon, 4 Dec 2023 13:12:47 +0100
Subject: [PATCH 30/84] Removed nyaml2nxdl and related tools and test to
 manually sync with pynxtools/master cd54fde

---
 pynxtools/nyaml2nxdl/README.md                |   72 -
 pynxtools/nyaml2nxdl/__init__.py              |   22 -
 pynxtools/nyaml2nxdl/comment_collector.py     |  508 --------
 pynxtools/nyaml2nxdl/nyaml2nxdl.py            |  227 ----
 .../nyaml2nxdl/nyaml2nxdl_backward_tools.py   |  947 --------------
 .../nyaml2nxdl/nyaml2nxdl_forward_tools.py    | 1161 -----------------
 pynxtools/nyaml2nxdl/nyaml2nxdl_helper.py     |  224 ----
 tests/data/nyaml2nxdl/NXattributes.yaml       |   41 -
 .../data/nyaml2nxdl/NXcomment_yaml2nxdl.yaml  |   68 -
 .../nyaml2nxdl/NXellipsometry-docCheck.yaml   |  543 --------
 tests/data/nyaml2nxdl/NXfilelineError1.yaml   |   30 -
 tests/data/nyaml2nxdl/NXfilelineError2.yaml   |   30 -
 tests/data/nyaml2nxdl/NXfilelineError3.yaml   |   30 -
 tests/data/nyaml2nxdl/NXmytests.yaml          |   39 -
 tests/data/nyaml2nxdl/NXnested_symbols.yaml   |   19 -
 tests/data/nyaml2nxdl/NXtest_links.yaml       |    8 -
 .../data/nyaml2nxdl/Ref_NXattributes.nxdl.xml |   88 --
 tests/data/nyaml2nxdl/Ref_NXcomment.yaml      |   68 -
 .../Ref_NXcomment_yaml2nxdl.nxdl.xml          |  101 --
 tests/data/nyaml2nxdl/Ref_NXellips.nxdl.xml   |  586 ---------
 .../Ref_NXellipsometry-docCheck.nxdl.xml      |  777 -----------
 tests/data/nyaml2nxdl/Ref_NXellipsometry.yaml |  271 ----
 tests/data/nyaml2nxdl/Ref_NXentry.nxdl.xml    |   76 --
 tests/data/nyaml2nxdl/Ref_NXentry.yaml        |  136 --
 tests/data/nyaml2nxdl/Ref_NXmytests.nxdl.xml  |  112 --
 .../nyaml2nxdl/Ref_NXnested_symbols.nxdl.xml  |   89 --
 .../data/nyaml2nxdl/Ref_NXtest_links.nxdl.xml |   32 -
 tests/nyaml2nxdl/README.md                    |    5 -
 tests/nyaml2nxdl/test_nyaml2nxdl.py           |  372 ------
 29 files changed, 6682 deletions(-)
 delete mode 100644 pynxtools/nyaml2nxdl/README.md
 delete mode 100644 pynxtools/nyaml2nxdl/__init__.py
 delete mode 100644 pynxtools/nyaml2nxdl/comment_collector.py
 delete mode 100755 pynxtools/nyaml2nxdl/nyaml2nxdl.py
 delete mode 100755 pynxtools/nyaml2nxdl/nyaml2nxdl_backward_tools.py
 delete mode 100644 pynxtools/nyaml2nxdl/nyaml2nxdl_forward_tools.py
 delete mode 100644 pynxtools/nyaml2nxdl/nyaml2nxdl_helper.py
 delete mode 100644 tests/data/nyaml2nxdl/NXattributes.yaml
 delete mode 100644 tests/data/nyaml2nxdl/NXcomment_yaml2nxdl.yaml
 delete mode 100644 tests/data/nyaml2nxdl/NXellipsometry-docCheck.yaml
 delete mode 100644 tests/data/nyaml2nxdl/NXfilelineError1.yaml
 delete mode 100644 tests/data/nyaml2nxdl/NXfilelineError2.yaml
 delete mode 100644 tests/data/nyaml2nxdl/NXfilelineError3.yaml
 delete mode 100644 tests/data/nyaml2nxdl/NXmytests.yaml
 delete mode 100644 tests/data/nyaml2nxdl/NXnested_symbols.yaml
 delete mode 100644 tests/data/nyaml2nxdl/NXtest_links.yaml
 delete mode 100644 tests/data/nyaml2nxdl/Ref_NXattributes.nxdl.xml
 delete mode 100644 tests/data/nyaml2nxdl/Ref_NXcomment.yaml
 delete mode 100644 tests/data/nyaml2nxdl/Ref_NXcomment_yaml2nxdl.nxdl.xml
 delete mode 100644 tests/data/nyaml2nxdl/Ref_NXellips.nxdl.xml
 delete mode 100644 tests/data/nyaml2nxdl/Ref_NXellipsometry-docCheck.nxdl.xml
 delete mode 100644 tests/data/nyaml2nxdl/Ref_NXellipsometry.yaml
 delete mode 100644 tests/data/nyaml2nxdl/Ref_NXentry.nxdl.xml
 delete mode 100644 tests/data/nyaml2nxdl/Ref_NXentry.yaml
 delete mode 100644 tests/data/nyaml2nxdl/Ref_NXmytests.nxdl.xml
 delete mode 100644 tests/data/nyaml2nxdl/Ref_NXnested_symbols.nxdl.xml
 delete mode 100644 tests/data/nyaml2nxdl/Ref_NXtest_links.nxdl.xml
 delete mode 100644 tests/nyaml2nxdl/README.md
 delete mode 100755 tests/nyaml2nxdl/test_nyaml2nxdl.py

diff --git a/pynxtools/nyaml2nxdl/README.md b/pynxtools/nyaml2nxdl/README.md
deleted file mode 100644
index ff083e189..000000000
--- a/pynxtools/nyaml2nxdl/README.md
+++ /dev/null
@@ -1,72 +0,0 @@
-# YAML to NXDL converter and NXDL to YAML converter
-
-**NOTE: Please use python3.8 or above to run this converter**
-
-**Tools purpose**: Offer a simple YAML-based schema and a XML-based schema to describe NeXus instances. These can be NeXus application definitions or classes
-such as base or contributed classes. Users either create NeXus instances by writing a YAML file or a XML file which details a hierarchy of data/metadata elements.
-The forward (YAML -> NXDL.XML) and backward (NXDL.XML -> YAML) conversions are implemented.
-
-**How the tool works**:
-- yaml2nxdl.py
-1. Reads the user-specified NeXus instance, either in YML or XML format.
-2. If input is in YAML, creates an instantiated NXDL schema XML tree by walking the dictionary nest.
-   If input is in XML, creates a YML file walking the dictionary nest.
-3. Write the tree into a YAML file or a properly formatted NXDL XML schema file to disk.
-4. Optionally, if --append argument is given,
-   the XML or YAML input file is interpreted as an extension of a base class and the entries contained in it
-   are appended below a standard NeXus base class.
-   You need to specify both your input file (with YAML or XML extension) and NeXus class (with no extension).
-   Both .yml and .nxdl.xml file of the extended class are printed.
-
-```console
-user@box:~$ python yaml2nxdl.py
-
-Usage: python yaml2nxdl.py [OPTIONS]
-
-Options:
-   --input-file TEXT     The path to the input data file to read.
-   --append TEXT         Parse xml NeXus file and append to specified base class,
-                         write the base class name with no extension.
-   --check-consistency   Check consistency by generating another version of the input file.
-                         E.g. for input file: NXexample.nxdl.xml the output file
-                         NXexample_consistency.nxdl.xml.
-   --verbose             Addictional std output info is printed to help debugging.
-   --help                Show this message and exit.
-
-```
-
-## Documentation
-
-**Rule set**: From transcoding YAML files we need to follow several rules.
-* Named NeXus groups, which are instances of NeXus classes especially base or contributed classes. Creating (NXbeam) is a simple example of a request to define a group named according to NeXus default rules. mybeam1(NXbeam) or mybeam2(NXbeam) are examples how to create multiple named instances at the same hierarchy level.
-* Members of groups so-called fields or attributes. A simple example of a member is voltage. Here the datatype is implied automatically as the default NeXus NX_CHAR type.  By contrast, voltage(NX_FLOAT) can be used to instantiate a member of class which should be of NeXus type NX_FLOAT.
-* And attributes of either groups or fields. Names of attributes have to be preceeded by \@ to mark them as attributes.
-* Optionality: For all fields, groups and attributes in `application definitions` are `required` by default, except anything (`recommended` or `optional`) mentioned.
-
-**Special keywords**: Several keywords can be used as childs of groups, fields, and attributes to specify the members of these. Groups, fields and attributes are nodes of the XML tree.
-* **doc**: A human-readable description/docstring
-* **exists** Options are recommended, required, [min, 1, max, infty] numbers like here 1 can be replaced by any uint, or infty to indicate no restriction on how frequently the entry can occur inside the NXDL schema at the same hierarchy level.
-* **link** Define links between nodes.
-* **units** A statement introducing NeXus-compliant NXDL units arguments, like NX_VOLTAGE
-* **dimensions** Details which dimensional arrays to expect
-* **enumeration** Python list of strings which are considered as recommended entries to choose from.
-* **dim_parameters** `dim` which is a child of `dimension` and the `dim` might have several attributes `ref`,
-`incr` including `index` and `value`. So while writting `yaml` file schema definition please following structure:
-```
-dimensions:
-   rank: integer value
-   dim: [[ind_1, val_1], [ind_2, val_2], ...]
-   dim_parameters:
-      ref: [ref_value_1, ref_value_2, ...]
-      incr: [incr_value_1, incr_value_2, ...]
-```
-Keep in mind that length of all the lists must be same.
-
-## Next steps
-
-The NOMAD team is currently working on the establishing of a one-to-one mapping between
-NeXus definitions and the NOMAD MetaInfo. As soon as this is in place the YAML files will
-be annotated with further metadata so that they can serve two purposes.
-On the one hand they can serve as an instance for a schema to create a GUI representation
-of a NOMAD Oasis ELN schema. On the other hand the YAML to NXDL converter will skip all
-those pieces of information which are irrelevant from a NeXus perspective.
diff --git a/pynxtools/nyaml2nxdl/__init__.py b/pynxtools/nyaml2nxdl/__init__.py
deleted file mode 100644
index 22eb35f68..000000000
--- a/pynxtools/nyaml2nxdl/__init__.py
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/usr/bin/env python3
-"""
-# Load paths
-"""
-# -*- coding: utf-8 -*-
-#
-# Copyright The NOMAD Authors.
-#
-# This file is part of NOMAD. See https://nomad-lab.eu for further info.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
diff --git a/pynxtools/nyaml2nxdl/comment_collector.py b/pynxtools/nyaml2nxdl/comment_collector.py
deleted file mode 100644
index 5f0c5e3bc..000000000
--- a/pynxtools/nyaml2nxdl/comment_collector.py
+++ /dev/null
@@ -1,508 +0,0 @@
-#!usr/bin/env python3
-# -*- coding: utf-8 -*-
-#
-# Copyright The NOMAD Authors.
-#
-# This file is part of NOMAD. See https://nomad-lab.eu for further info.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-"""
-Collect comments in a list by CommentCollector class. Comment is a instance of Comment,
-where each comment includes comment text and line info or neighbour info where the
-comment must be assinged.
-
-The class Comment is an abstract class for general functions or method to be implemented
-XMLComment and YAMLComment class.
-
-NOTE: Here comment block mainly stands for (comment text + line or element for what comment is
-intended.)
-"""
-
-
-from typing import List, Type, Any, Tuple, Union, Dict
-from pynxtools.nyaml2nxdl.nyaml2nxdl_helper import LineLoader
-
-__all__ = ['Comment', 'CommentCollector', 'XMLComment', 'YAMLComment']
-
-
-# pylint: disable=inconsistent-return-statements
-class CommentCollector:
-    """CommentCollector will store a full comment ('Comment') object in
-    _comment_chain.
-    """
-
-    def __init__(self, input_file: str = None,
-                 loaded_obj: Union[object, Dict] = None):
-        """
-        Initialise CommentCollector
-        parameters:
-            input_file: raw input file (xml, yml)
-            loaded_obj: file loaded by third party library
-        """
-        self._comment_chain: List = []
-        self.file = input_file
-        self._comment_tracker = 0
-        self._comment_hash: Dict[Tuple, Type[Comment]] = {}
-        self.comment: Type[Comment]
-        if self.file and not loaded_obj:
-            if self.file.split('.')[-1] == 'xml':
-                self.comment = XMLComment
-            if self.file.split('.')[-1] == 'yaml':
-                self.comment = YAMLComment
-                with open(self.file, "r", encoding="utf-8") as plain_text_yaml:
-                    loader = LineLoader(plain_text_yaml)
-                    self.comment.__yaml_dict__ = loader.get_single_data()
-        elif self.file and loaded_obj:
-            if self.file.split('.')[-1] == 'yaml' and isinstance(loaded_obj, dict):
-                self.comment = YAMLComment
-                self.comment.__yaml_dict__ = loaded_obj
-            else:
-                raise ValueError("Incorrect inputs for CommentCollector e.g. Wrong file extension.")
-
-        else:
-            raise ValueError("Incorrect inputs for CommentCollector")
-
-    def extract_all_comment_blocks(self):
-        """
-        Collect all comments. Note that here comment means (comment text + element or line info
-        intended for comment.
-        """
-        id_ = 0
-        single_comment = self.comment(comment_id=id_)
-        with open(self.file, mode='r', encoding='UTF-8') as enc_f:
-            lines = enc_f.readlines()
-            # Make an empty line for last comment if no empty lines in original file
-            if lines[-1] != '':
-                lines.append('')
-            for line_num, line in enumerate(lines):
-                if single_comment.is_storing_single_comment():
-                    # If the last comment comes without post nxdl fields, groups and attributes
-                    if '++ SHA HASH ++' in line:
-                        # Handle with stored nxdl.xml file that is not part of yaml
-                        line = ''
-                        single_comment.process_each_line(line + 'post_comment', (line_num + 1))
-                        self._comment_chain.append(single_comment)
-                        break
-                    if line_num < (len(lines) - 1):
-                        # Processing file from Line number 1
-                        single_comment.process_each_line(line, (line_num + 1))
-                    else:
-                        # For processing last line of file
-                        single_comment.process_each_line(line + 'post_comment', (line_num + 1))
-                        self._comment_chain.append(single_comment)
-                else:
-                    self._comment_chain.append(single_comment)
-                    single_comment = self.comment(last_comment=single_comment)
-                    single_comment.process_each_line(line, (line_num + 1))
-
-    def get_comment(self):
-        """
-            Return comment from comment_chain that must come earlier in order.
-        """
-        return self._comment_chain[self._comment_tracker]
-
-    def get_coment_by_line_info(self, comment_locs: Tuple[str, Union[int, str]]):
-        """
-            Get comment using line information.
-        """
-        if comment_locs in self._comment_hash:
-            return self._comment_hash[comment_locs]
-
-        line_annot, line_loc = comment_locs
-        for cmnt in self._comment_chain:
-            if line_annot in cmnt:
-                line_loc_ = cmnt.get_line_number(line_annot)
-                if line_loc == line_loc_:
-                    self._comment_hash[comment_locs] = cmnt
-                    return cmnt
-
-    def remove_comment(self, ind):
-        """Remove a comment from comment list.
-        """
-        if ind < len(self._comment_chain):
-            del self._comment_chain[ind]
-        else:
-            raise ValueError("Oops! Index is out of range.")
-
-    def reload_comment(self):
-        """
-        Update self._comment_tracker after done with last comment.
-        """
-        self._comment_tracker += 1
-
-    def __contains__(self, comment_locs: tuple):
-        """
-        Confirm wether the comment corresponds to key_line and line_loc
-            is exist or not.
-            comment_locs is equvalant to (line_annotation, line_loc) e.g.
-            (__line__doc and 35)
-        """
-        if not isinstance(comment_locs, tuple):
-            raise TypeError("Comment_locs should be 'tuple' containing line annotation "
-                            "(e.g.__line__doc) and line_loc (e.g. 35).")
-        line_annot, line_loc = comment_locs
-        for cmnt in self._comment_chain:
-            if line_annot in cmnt:
-                line_loc_ = cmnt.get_line_number(line_annot)
-                if line_loc == line_loc_:
-                    self._comment_hash[comment_locs] = cmnt
-                    return True
-        return False
-
-    def __getitem__(self, ind):
-        """Get comment from  self.obj._comment_chain by index.
-        """
-        if isinstance(ind, int):
-            if ind >= len(self._comment_chain):
-                raise IndexError(f'Oops! Comment index {ind} in {__class__} is out of range!')
-            return self._comment_chain[ind]
-
-        if isinstance(ind, slice):
-            start_n = ind.start or 0
-            end_n = ind.stop or len(self._comment_chain)
-            return self._comment_chain[start_n:end_n]
-
-    def __iter__(self):
-        """get comment ieratively
-        """
-        return iter(self._comment_chain)
-
-
-# pylint: disable=too-many-instance-attributes
-class Comment:
-    """
-    This class is building yaml comment and the intended line for what comment is written.
-    """
-
-    def __init__(self,
-                 comment_id: int = -1,
-                 last_comment: 'Comment' = None) -> None:
-        """Comment object can be considered as a block element that includes
-            document element (an entity for what the comment is written).
-        """
-        self._elemt: Any = None
-        self._elemt_text: str = None
-        self._is_elemt_found: bool = None
-        self._is_elemt_stored: bool = None
-
-        self._comnt: str = ''
-        # If Multiple comments for one element or entity
-        self._comnt_list: List[str] = []
-        self.last_comment: 'Comment' = last_comment if last_comment else None
-        if comment_id >= 0 and last_comment:
-            self.cid = comment_id
-            self.last_comment = last_comment
-        elif comment_id == 0 and not last_comment:
-            self.cid = comment_id
-            self.last_comment = None
-        elif last_comment:
-            self.cid = self.last_comment.cid + 1
-            self.last_comment = last_comment
-        else:
-            raise ValueError("Neither last comment nor comment id dound")
-        self._comnt_start_found: bool = False
-        self._comnt_end_found: bool = False
-        self.is_storing_single_comment = lambda: not (self._comnt_end_found
-                                                      and self._is_elemt_stored)
-
-    def get_comment_text(self) -> Union[List, str]:
-        """
-        Extract comment text from entrire comment (comment text + elment or
-        line for what comment is intended)
-        """
-
-    def append_comment(self, text: str) -> None:
-        """
-        Append lines of the same comment.
-        """
-
-    def store_element(self, args) -> None:
-        """
-        Strore comment text and line or element that is intended for comment.
-        """
-
-
-class XMLComment(Comment):
-    """
-    XMLComment to store xml comment element.
-    """
-
-    def __init__(self, comment_id: int = -1, last_comment: 'Comment' = None) -> None:
-        super().__init__(comment_id, last_comment)
-
-    def process_each_line(self, text, line_num):
-        """Take care of each line of text. Through which function the text
-        must be passed should be decide here.
-        """
-        text = text.strip()
-        if text and line_num:
-            self.append_comment(text)
-            if self._comnt_end_found and not self._is_elemt_found:
-                # for multiple comment if exist
-                if self._comnt:
-                    self._comnt_list.append(self._comnt)
-                    self._comnt = ''
-
-            if self._comnt_end_found:
-                self.store_element(text)
-
-    def append_comment(self, text: str) -> None:
-        # Comment in single line
-        if '<!--' == text[0:4]:
-            self._comnt_start_found = True
-            self._comnt_end_found = False
-            self._comnt = self._comnt + text.replace('<!--', '')
-            if '-->' == text[-4:]:
-                self._comnt_end_found = True
-                self._comnt_start_found = False
-                self._comnt = self._comnt.replace('-->', '')
-
-        elif '-->' == text[0:4] and self._comnt_start_found:
-            self._comnt_end_found = True
-            self._comnt_start_found = False
-            self._comnt = self._comnt + '\n' + text.replace('-->', '')
-        elif self._comnt_start_found:
-            self._comnt = self._comnt + '\n' + text
-
-    # pylint: disable=arguments-differ, arguments-renamed
-    def store_element(self, text) -> None:
-        def collect_xml_attributes(text_part):
-            for part in text_part:
-                part = part.strip()
-                if part and '">' == ''.join(part[-2:]):
-                    self._is_elemt_stored = True
-                    self._is_elemt_found = False
-                    part = ''.join(part[0:-2])
-                elif part and '"/>' == ''.join(part[-3:]):
-                    self._is_elemt_stored = True
-                    self._is_elemt_found = False
-                    part = ''.join(part[0:-3])
-                elif part and '/>' == ''.join(part[-2:]):
-                    self._is_elemt_stored = True
-                    self._is_elemt_found = False
-                    part = ''.join(part[0:-2])
-                elif part and '>' == part[-1]:
-                    self._is_elemt_stored = True
-                    self._is_elemt_found = False
-                    part = ''.join(part[0:-1])
-                elif part and '"' == part[-1]:
-                    part = ''.join(part[0:-1])
-
-                if '="' in part:
-                    lf_prt, rt_prt = part.split('="')
-                else:
-                    continue
-                if ':' in lf_prt:
-                    continue
-                self._elemt[lf_prt] = str(rt_prt)
-        if not self._elemt:
-            self._elemt = {}
-        # First check for comment part has been collected prefectly
-        if '</' == text[0:2]:
-            pass
-        elif '<' == text[0] and not '<!--' == text[0:4]:
-            self._is_elemt_found = True
-            text = text.replace('<', '', 1)
-            text_part = text.split(' ')
-            # collect tag
-            self._elemt['tag'] = text_part[0]
-            self._elemt['attrib'] = {}
-            collect_xml_attributes(text_part[1:])
-
-        elif self._is_elemt_found:
-            text_part = text.split(' ')
-            collect_xml_attributes(text_part)
-
-    def get_element_info(self):
-        """
-            The method returns info dict that includes:
-        'tag' and 'attrib' keys.
-        """
-        return self._elemt
-
-    def get_comment_text(self) -> Union[List, str]:
-        """
-            This method returns list of commnent text. As some xml element might have
-            multiple separated comment intended for a single element.
-        """
-        return self._comnt_list
-
-
-class YAMLComment(Comment):
-    """
-    This class for stroing comment text as well as location of the comment e.g. line
-    number of other in the file.
-    NOTE:
-     1. Do not delete any element form yaml dictionary (for loaded_obj. check: Comment_collector
-     class. because this loaded file has been exploited in nyaml2nxdl forward tools.)
-    """
-    # Class level variable. The main reason behind that to follow structure of
-    # abstract class 'Comment'
-    __yaml_dict__: dict = {}
-    __yaml_line_info: dict = {}
-    __comment_escape_char = {'--': '-\\-'}
-
-    def __init__(self, comment_id: int = -1, last_comment: 'Comment' = None) -> None:
-        """Initialization of YAMLComment follow Comment class.
-        """
-        super().__init__(comment_id, last_comment)
-        self.collect_yaml_line_info(YAMLComment.__yaml_dict__, YAMLComment.__yaml_line_info)
-
-    def process_each_line(self, text, line_num):
-        """Take care of each line of text. Through which function the text
-        must be passed should be decide here.
-        """
-        text = text.strip()
-        self.append_comment(text)
-        if self._comnt_end_found and not self._is_elemt_found:
-            if self._comnt:
-                self._comnt_list.append(self._comnt)
-                self._comnt = ''
-
-        if self._comnt_end_found:
-            line_key = ''
-            if ':' in text:
-                ind = text.index(':')
-                line_key = '__line__' + ''.join(text[0:ind])
-
-            for l_num, l_key in self.__yaml_line_info.items():
-                if line_num == int(l_num) and line_key == l_key:
-                    self.store_element(line_key, line_num)
-                    break
-                # Comment comes very end of the file
-                if text == 'post_comment' and line_key == '':
-                    line_key = '__line__post_comment'
-                    self.store_element(line_key, line_num)
-
-    def has_post_comment(self):
-        """
-        Ensure is this a post coment or not.
-        Post comment means the comment that come at the very end without having any
-        nxdl element(class, group, filed and attribute.)
-        """
-        for key, _ in self._elemt.items():
-            if '__line__post_comment' == key:
-                return True
-        return False
-
-    def append_comment(self, text: str) -> None:
-        """
-            Collects all the line of the same comment and
-        append them with that single comment.
-        """
-        # check for escape char
-        text = self.replace_scape_char(text)
-        # Empty line after last line of comment
-        if not text and self._comnt_start_found:
-            self._comnt_end_found = True
-            self._comnt_start_found = False
-        # For empty line inside doc or yaml file.
-        elif not text:
-            return
-        elif '# ' == ''.join(text[0:2]):
-            self._comnt_start_found = True
-            self._comnt_end_found = False
-            self._comnt = '' if not self._comnt else self._comnt + '\n'
-            self._comnt = self._comnt + ''.join(text[2:])
-        elif '#' == text[0]:
-            self._comnt_start_found = True
-            self._comnt_end_found = False
-            self._comnt = '' if not self._comnt else self._comnt + '\n'
-            self._comnt = self._comnt + ''.join(text[1:])
-        elif 'post_comment' == text:
-            self._comnt_end_found = True
-            self._comnt_start_found = False
-        # for any line after 'comment block' found
-        elif self._comnt_start_found:
-            self._comnt_start_found = False
-            self._comnt_end_found = True
-
-    # pylint: disable=arguments-differ
-    def store_element(self, line_key, line_number):
-        """
-            Store comment content and information of commen location (for what comment is
-            created.).
-        """
-        self._elemt = {}
-        self._elemt[line_key] = int(line_number)
-        self._is_elemt_found = False
-        self._is_elemt_stored = True
-
-    def get_comment_text(self):
-        """
-        Return list of comments if there are multiple comment for same yaml line.
-        """
-        return self._comnt_list
-
-    def get_line_number(self, line_key):
-        """
-        Retrun line number for what line the comment is created
-        """
-        return self._elemt[line_key]
-
-    def get_line_info(self):
-        """
-            Return line annotation and line number from a comment.
-        """
-        for line_anno, line_loc in self._elemt.items():
-            return line_anno, line_loc
-
-    def replace_scape_char(self, text):
-        """Replace escape char according to __comment_escape_char dict
-        """
-        for ecp_char, ecp_alt in YAMLComment.__comment_escape_char.items():
-            if ecp_char in text:
-                text = text.replace(ecp_char, ecp_alt)
-        return text
-
-    def get_element_location(self):
-        """
-        Retrun yaml line '__line__KEY' info and and line numner
-        """
-        if len(self._elemt) > 1:
-            raise ValueError(f"Comment element should be one but got "
-                             f"{self._elemt}")
-
-        for key, val in self._elemt.items():
-            yield key, val
-
-    def collect_yaml_line_info(self, yaml_dict, line_info_dict):
-        """Collect __line__key and corresponding value from
-        a yaml file dictonary in another dictionary.
-        """
-        for line_key, line_n in yaml_dict.items():
-            if '__line__' in line_key:
-                line_info_dict[line_n] = line_key
-
-        for _, val in yaml_dict.items():
-            if isinstance(val, dict):
-                self.collect_yaml_line_info(val, line_info_dict)
-
-    def __contains__(self, line_key):
-        """For Checking whether __line__NAME is in _elemt dict or not."""
-        return line_key in self._elemt
-
-    def __eq__(self, comment_obj):
-        """Check the self has same value as right comment.
-        """
-        if len(self._comnt_list) != len(comment_obj._comnt_list):
-            return False
-        for left_cmnt, right_cmnt in zip(self._comnt_list, comment_obj._comnt_list):
-            left_cmnt = left_cmnt.split('\n')
-            right_cmnt = right_cmnt.split('\n')
-            for left_line, right_line in zip(left_cmnt, right_cmnt):
-                if left_line.strip() != right_line.strip():
-                    return False
-        return True
diff --git a/pynxtools/nyaml2nxdl/nyaml2nxdl.py b/pynxtools/nyaml2nxdl/nyaml2nxdl.py
deleted file mode 100755
index 160b3f830..000000000
--- a/pynxtools/nyaml2nxdl/nyaml2nxdl.py
+++ /dev/null
@@ -1,227 +0,0 @@
-#!/usr/bin/env python3
-"""Main file of yaml2nxdl tool.
-Users create NeXus instances by writing a YAML file
-which details a hierarchy of data/metadata elements
-
-"""
-# -*- coding: utf-8 -*-
-#
-# Copyright The NOMAD Authors.
-#
-# This file is part of NOMAD. See https://nomad-lab.eu for further info.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-import os
-import xml.etree.ElementTree as ET
-
-import click
-from pynxtools.nyaml2nxdl.nyaml2nxdl_helper import (get_sha256_hash,
-                                                    extend_yamlfile_with_comment,
-                                                    separate_hash_yaml_and_nxdl)
-from pynxtools.nyaml2nxdl.nyaml2nxdl_forward_tools import nyaml2nxdl, pretty_print_xml
-from pynxtools.nyaml2nxdl.nyaml2nxdl_backward_tools import (Nxdl2yaml,
-                                                            compare_niac_and_my)
-
-
-DEPTH_SIZE = 4 * " "
-
-# NOTE: Some handful links for nyaml2nxdl converter:
-# https://manual.nexusformat.org/nxdl_desc.html?highlight=optional
-
-
-def generate_nxdl_or_retrieve_nxdl(yaml_file, out_xml_file, verbose):
-    """
-        Generate yaml, nxdl and hash.
-        if the extracted hash is exactly the same as producd from generated yaml then
-        retrieve the nxdl part from provided yaml.
-        Else, generate nxdl from separated yaml with the help of nyaml2nxdl function
-    """
-    pa_path, rel_file = os.path.split(yaml_file)
-    sep_yaml = os.path.join(pa_path, f'temp_{rel_file}')
-    hash_found = separate_hash_yaml_and_nxdl(yaml_file, sep_yaml, out_xml_file)
-
-    if hash_found:
-        gen_hash = get_sha256_hash(sep_yaml)
-        if hash_found == gen_hash:
-            os.remove(sep_yaml)
-            return
-
-    nyaml2nxdl(sep_yaml, out_xml_file, verbose)
-    os.remove(sep_yaml)
-
-
-# pylint: disable=too-many-locals
-def append_yml(input_file, append, verbose):
-    """Append to an existing NeXus base class new elements provided in YML input file \
-and print both an XML and YML file of the extended base class.
-
-"""
-    nexus_def_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), '../../definitions')
-    assert [s for s in os.listdir(os.path.join(nexus_def_path, 'base_classes')
-                                  ) if append.strip() == s.replace('.nxdl.xml', '')], \
-        'Your base class extension does not match any existing NeXus base classes'
-    tree = ET.parse(os.path.join(nexus_def_path + '/base_classes', append + '.nxdl.xml'))
-    root = tree.getroot()
-    # warning: tmp files are printed on disk and removed at the ends!!
-    pretty_print_xml(root, 'tmp.nxdl.xml')
-    input_tmp_xml = 'tmp.nxdl.xml'
-    out_tmp_yml = 'tmp_parsed.yaml'
-    converter = Nxdl2yaml([], [])
-    converter.print_yml(input_tmp_xml, out_tmp_yml, verbose)
-    nyaml2nxdl(input_file=out_tmp_yml,
-               out_file='tmp_parsed.nxdl.xml',
-               verbose=verbose)
-    tree = ET.parse('tmp_parsed.nxdl.xml')
-    tree2 = ET.parse(input_file)
-    root_no_duplicates = ET.Element(
-        'definition', {'xmlns': 'http://definition.nexusformat.org/nxdl/3.1',
-                       'xmlns:xsi': 'http://www.w3.org/2001/XMLSchema-instance',
-                       'xsi:schemaLocation': 'http://www.w3.org/2001/XMLSchema-instance'
-                       }
-    )
-    for attribute_keys in root.attrib.keys():
-        if attribute_keys != '{http://www.w3.org/2001/XMLSchema-instance}schemaLocation':
-            attribute_value = root.attrib[attribute_keys]
-            root_no_duplicates.set(attribute_keys, attribute_value)
-    for elems in root.iter():
-        if 'doc' in elems.tag:
-            root_doc = ET.SubElement(root_no_duplicates, 'doc')
-            root_doc.text = elems.text
-            break
-    group = '{http://definition.nexusformat.org/nxdl/3.1}group'
-    root_no_duplicates = compare_niac_and_my(tree, tree2, verbose,
-                                             group,
-                                             root_no_duplicates)
-    field = '{http://definition.nexusformat.org/nxdl/3.1}field'
-    root_no_duplicates = compare_niac_and_my(tree, tree2, verbose,
-                                             field,
-                                             root_no_duplicates)
-    attribute = '{http://definition.nexusformat.org/nxdl/3.1}attribute'
-    root_no_duplicates = compare_niac_and_my(tree, tree2, verbose,
-                                             attribute,
-                                             root_no_duplicates)
-    pretty_print_xml(root_no_duplicates, f"{input_file.replace('.nxdl.xml', '')}"
-                     f"_appended.nxdl.xml")
-
-    input_file_xml = input_file.replace('.nxdl.xml', "_appended.nxdl.xml")
-    out_file_yml = input_file.replace('.nxdl.xml', "_appended_parsed.yaml")
-    converter = Nxdl2yaml([], [])
-    converter.print_yml(input_file_xml, out_file_yml, verbose)
-    nyaml2nxdl(input_file=out_file_yml,
-               out_file=out_file_yml.replace('.yaml', '.nxdl.xml'),
-               verbose=verbose)
-    os.rename(f"{input_file.replace('.nxdl.xml', '_appended_parsed.yaml')}",
-              f"{input_file.replace('.nxdl.xml', '_appended.yaml')}")
-    os.rename(f"{input_file.replace('.nxdl.xml', '_appended_parsed.nxdl.xml')}",
-              f"{input_file.replace('.nxdl.xml', '_appended.nxdl.xml')}")
-    os.remove('tmp.nxdl.xml')
-    os.remove('tmp_parsed.yaml')
-    os.remove('tmp_parsed.nxdl.xml')
-
-
-def split_name_and_extension(file_name):
-    """
-    Split file name into extension and rest of the file name.
-    return file raw nam and extension
-    """
-    parts = file_name.rsplit('.', 3)
-    if len(parts) == 2:
-        raw = parts[0]
-        ext = parts[1]
-    if len(parts) == 3:
-        raw = parts[0]
-        ext = '.'.join(parts[1:])
-
-    return raw, ext
-
-
-@click.command()
-@click.option(
-    '--input-file',
-    required=True,
-    prompt=True,
-    help='The path to the XML or YAML input data file to read and create \
-a YAML or XML file from, respectively.'
-)
-@click.option(
-    '--append',
-    help='Parse xml file and append to base class, given that the xml file has same name \
-of an existing base class'
-)
-@click.option(
-    '--check-consistency',
-    is_flag=True,
-    default=False,
-    help=('Check wether yaml or nxdl has followed general rules of scema or not'
-          'check whether your comment in the right place or not. The option render an '
-          'output file of the same extension(*_consistency.yaml or *_consistency.nxdl.xml)')
-)
-@click.option(
-    '--verbose',
-    is_flag=True,
-    default=False,
-    help='Print in standard output keywords and value types to help \
-possible issues in yaml files'
-)
-def launch_tool(input_file, verbose, append, check_consistency):
-    """
-        Main function that distiguishes the input file format and launches the tools.
-    """
-    if os.path.isfile(input_file):
-        raw_name, ext = split_name_and_extension(input_file)
-    else:
-        raise ValueError("Need a valid input file.")
-
-    if ext == 'yaml':
-        xml_out_file = raw_name + '.nxdl.xml'
-        generate_nxdl_or_retrieve_nxdl(input_file, xml_out_file, verbose)
-        if append:
-            append_yml(raw_name + '.nxdl.xml',
-                       append,
-                       verbose
-                       )
-        # For consistency running
-        if check_consistency:
-            yaml_out_file = raw_name + '_consistency.' + ext
-            converter = Nxdl2yaml([], [])
-            converter.print_yml(xml_out_file, yaml_out_file, verbose)
-            os.remove(xml_out_file)
-    elif ext == 'nxdl.xml':
-        if not append:
-            yaml_out_file = raw_name + '_parsed' + '.yaml'
-            converter = Nxdl2yaml([], [])
-            converter.print_yml(input_file, yaml_out_file, verbose)
-            # Append nxdl.xml file with yaml output file
-            yaml_hash = get_sha256_hash(yaml_out_file)
-            # Lines as divider between yaml and nxdl
-            top_lines = [('\n# ++++++++++++++++++++++++++++++++++ SHA HASH'
-                         ' ++++++++++++++++++++++++++++++++++\n'),
-                         f'# {yaml_hash}\n']
-
-            extend_yamlfile_with_comment(yaml_file=yaml_out_file,
-                                         file_to_be_appended=input_file,
-                                         top_lines_list=top_lines)
-        else:
-            append_yml(input_file, append, verbose)
-        # Taking care of consistency running
-        if check_consistency:
-            xml_out_file = raw_name + '_consistency.' + ext
-            generate_nxdl_or_retrieve_nxdl(yaml_out_file, xml_out_file, verbose)
-            os.remove(yaml_out_file)
-    else:
-        raise ValueError("Provide correct file with extension '.yaml or '.nxdl.xml")
-
-
-if __name__ == '__main__':
-    launch_tool().parse()  # pylint: disable=no-value-for-parameter
diff --git a/pynxtools/nyaml2nxdl/nyaml2nxdl_backward_tools.py b/pynxtools/nyaml2nxdl/nyaml2nxdl_backward_tools.py
deleted file mode 100755
index 72f5a6c42..000000000
--- a/pynxtools/nyaml2nxdl/nyaml2nxdl_backward_tools.py
+++ /dev/null
@@ -1,947 +0,0 @@
-#!/usr/bin/env python3
-"""This file collects the function used in the reverse tool nxdl2yaml.
-
-"""
-# -*- coding: utf-8 -*-
-#
-# Copyright The NOMAD Authors.
-#
-# This file is part of NOMAD. See https://nomad-lab.eu for further info.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-import sys
-from typing import List, Dict
-import xml.etree.ElementTree as ET
-import os
-
-from pynxtools.nyaml2nxdl.nyaml2nxdl_helper import (get_node_parent_info,
-                                                    get_yaml_escape_char_dict,
-                                                    cleaning_empty_lines)
-from pynxtools.dataconverter.helpers import remove_namespace_from_tag
-
-
-DEPTH_SIZE = "  "
-CMNT_TAG = '!--'
-
-
-def separate_pi_comments(input_file):
-    """
-    Separate PI comments from ProcessesInstruction (pi)
-    """
-    comments_list = []
-    comment = []
-    xml_lines = []
-
-    with open(input_file, "r", encoding='utf-8') as file:
-        lines = file.readlines()
-        has_pi = True
-        for line in lines:
-            c_start = '<!--'
-            cmnt_end = '-->'
-            def_tag = '<definition'
-
-            if c_start in line and has_pi:
-                line = line.replace(c_start, '')
-                if cmnt_end in line:
-                    line = line.replace(cmnt_end, '')
-                    comments_list.append(line)
-                else:
-                    comment.append(line)
-            elif cmnt_end in line and len(comment) > 0 and has_pi:
-                comment.append(line.replace(cmnt_end, ''))
-                comments_list.append(''.join(comment))
-                comment = []
-            elif def_tag in line or not has_pi:
-                has_pi = False
-                xml_lines.append(line)
-            elif len(comment) > 0 and has_pi:
-                comment.append(line)
-            else:
-                xml_lines.append(line)
-    return comments_list, ''.join(xml_lines)
-
-
-# Collected: https://dustinoprea.com/2019/01/22/python-parsing-xml-and-retaining-the-comments/
-class _CommentedTreeBuilder(ET.TreeBuilder):
-
-    def comment(self, text):
-        """
-        defining comment builder in TreeBuilder
-        """
-        self.start('!--', {})
-        self.data(text)
-        self.end('--')
-
-
-def parse(filepath):
-    """
-        Construct parse function for modified tree builder for including modified TreeBuilder
-        and rebuilding XMLParser.
-    """
-    comments, xml_str = separate_pi_comments(filepath)
-    ctb = _CommentedTreeBuilder()
-    xp_parser = ET.XMLParser(target=ctb)
-    root = ET.fromstring(xml_str, parser=xp_parser)
-    return comments, root
-
-
-def handle_mapping_char(text, depth=-1, skip_n_line_on_top=False):
-    """Check for ":" char and replace it by "':'". """
-
-    escape_char = get_yaml_escape_char_dict()
-    for esc_key, val in escape_char.items():
-        if esc_key in text:
-            text = text.replace(esc_key, val)
-    if not skip_n_line_on_top:
-        if depth > 0:
-            text = add_new_line_with_pipe_on_top(text, depth)
-        else:
-            raise ValueError("Need depth size to co-ordinate text line in yaml file.")
-    return text
-
-
-def add_new_line_with_pipe_on_top(text, depth):
-    """
-    Return modified text for what we get error in converter, such as ':'. After adding a
-    new line at the start of text the error is solved.
-    """
-    char_list_to_add_new_line_on_top_of_text = [":"]
-    for char in char_list_to_add_new_line_on_top_of_text:
-        if char in text:
-            return '|' + '\n' + depth * DEPTH_SIZE + text
-    return text
-
-
-# pylint: disable=too-many-instance-attributes
-class Nxdl2yaml():
-    """
-        Parse XML file and print a YML file
-    """
-
-    def __init__(
-            self,
-            symbol_list: List[str],
-            root_level_definition: List[str],
-            root_level_doc='',
-            root_level_symbols=''):
-
-        # updated part of yaml_dict
-        self.found_definition = False
-        self.root_level_doc = root_level_doc
-        self.root_level_symbols = root_level_symbols
-        self.root_level_definition = root_level_definition
-        self.symbol_list = symbol_list
-        self.is_last_element_comment = False
-        self.include_comment = True
-        self.pi_comments = None
-        # NOTE: Here is how root_level_comments organised for storing comments
-        # root_level_comment= {'root_doc': comment,
-        #                      'symbols': comment,
-        #       The 'symbol_doc_comments' list is for comments from all 'symbol doc'
-        #                      'symbol_doc_comments' : [comments]
-        #                      'symbol_list': [symbols],
-        #       The 'symbol_comments' contains comments for 'symbols doc' and all 'symbol'
-        #                      'symbol_comments': [comments]}
-        self.root_level_comment: Dict[str, str] = {}
-
-    def print_yml(self, input_file, output_yml, verbose):
-        """
-            Parse an XML file provided as input and print a YML file
-        """
-        if os.path.isfile(output_yml):
-            os.remove(output_yml)
-
-        depth = 0
-
-        self.pi_comments, root = parse(input_file)
-        xml_tree = {'tree': root, 'node': root}
-        self.xmlparse(output_yml, xml_tree, depth, verbose)
-
-    def handle_symbols(self, depth, node):
-        """Handle symbols field and its childs symbol"""
-
-        # pylint: disable=consider-using-f-string
-        self.root_level_symbols = (
-            f"{remove_namespace_from_tag(node.tag)}: "
-            f"{node.text.strip() if node.text else ''}"
-        )
-        depth += 1
-        last_comment = ''
-        sbl_doc_cmnt_list = []
-        # Comments that come above symbol tag
-        symbol_cmnt_list = []
-        for child in list(node):
-            tag = remove_namespace_from_tag(child.tag)
-            if tag == CMNT_TAG and self.include_comment:
-                last_comment = self.comvert_to_ymal_comment(depth * DEPTH_SIZE, child.text)
-            if tag == 'doc':
-                symbol_cmnt_list.append(last_comment)
-                # The bellow line is for handling lenth of 'symbol_comments' and
-                # 'symbol_doc_comments'. Otherwise print_root_level_info() gets inconsistency
-                # over for the loop while writting comment on file
-                sbl_doc_cmnt_list.append('')
-                last_comment = ''
-                self.symbol_list.append(self.handle_not_root_level_doc(depth,
-                                                                       text=child.text))
-            elif tag == 'symbol':
-                # place holder is symbol name
-                symbol_cmnt_list.append(last_comment)
-                last_comment = ''
-                if 'doc' in child.attrib:
-                    self.symbol_list.append(
-                        self.handle_not_root_level_doc(depth,
-                                                       tag=child.attrib['name'],
-                                                       text=child.attrib['doc']))
-                else:
-                    for symbol_doc in list(child):
-                        tag = remove_namespace_from_tag(symbol_doc.tag)
-                        if tag == CMNT_TAG and self.include_comment:
-                            last_comment = self.comvert_to_ymal_comment(depth * DEPTH_SIZE,
-                                                                        symbol_doc.text)
-                        if tag == 'doc':
-                            sbl_doc_cmnt_list.append(last_comment)
-                            last_comment = ''
-                            self.symbol_list.append(
-                                self.handle_not_root_level_doc(depth,
-                                                               tag=child.attrib['name'],
-                                                               text=symbol_doc.text))
-        self.store_root_level_comments('symbol_doc_comments', sbl_doc_cmnt_list)
-        self.store_root_level_comments('symbol_comments', symbol_cmnt_list)
-
-    def store_root_level_comments(self, holder, comment):
-        """Store yaml text or section line and the comments inteded for that lines or section"""
-
-        self.root_level_comment[holder] = comment
-
-    def handle_definition(self, node):
-        """
-            Handle definition group and its attributes
-            NOTE: Here we tried to store the order of the xml element attributes. So that we get
-            exactly the same file in nxdl from yaml.
-        """
-        # pylint: disable=consider-using-f-string
-        # self.root_level_definition[0] = ''
-        keyword = ''
-        # tmp_word for reseving the location
-        tmp_word = "#xx#"
-        attribs = node.attrib
-        # for tracking the order of name and type
-        keyword_order = -1
-        for item in attribs:
-            if "name" in item:
-                keyword = keyword + attribs[item]
-                if keyword_order == -1:
-                    self.root_level_definition.append(tmp_word)
-                    keyword_order = self.root_level_definition.index(tmp_word)
-            elif "extends" in item:
-                keyword = f"{keyword}({attribs[item]})"
-                if keyword_order == -1:
-                    self.root_level_definition.append(tmp_word)
-                    keyword_order = self.root_level_definition.index(tmp_word)
-            elif 'schemaLocation' not in item \
-                    and 'extends' != item:
-                text = f"{item}: {attribs[item]}"
-                self.root_level_definition.append(text)
-        self.root_level_definition[keyword_order] = f"{keyword}:"
-
-    def handle_root_level_doc(self, node):
-        """
-            Handle the documentation field found at root level.
-        """
-        # tag = remove_namespace_from_tag(node.tag)
-        text = node.text
-        text = self.handle_not_root_level_doc(depth=0, text=text)
-        self.root_level_doc = text
-
-    # pylint: disable=too-many-branches
-    def handle_not_root_level_doc(self, depth, text, tag='doc', file_out=None):
-        """
-        Handle docs field along the yaml file. In this function we also tried to keep
-        the track of intended indentation. E.g. the bollow doc block.
-            * Topic name
-                Description of topic
-        """
-
-        # Handling empty doc
-        if not text:
-            text = ""
-        else:
-            text = handle_mapping_char(text, -1, True)
-        if "\n" in text:
-            # To remove '\n' character as it will be added before text.
-            text = cleaning_empty_lines(text.split('\n'))
-            text_tmp = []
-            yaml_indent_n = len((depth + 1) * DEPTH_SIZE)
-            # Find indentaion in the first text line with alphabet
-            tmp_i = 0
-            while tmp_i != -1:
-                first_line_indent_n = 0
-                # Taking care of empty text whitout any character
-                if len(text) == 1 and text[0] == '':
-                    break
-                for ch_ in text[tmp_i]:
-                    if ch_ == ' ':
-                        first_line_indent_n = first_line_indent_n + 1
-                    elif ch_ != '':
-                        tmp_i = -2
-                        break
-                tmp_i = tmp_i + 1
-            # Taking care of doc like bellow:
-            # <doc>Text liness
-            # text continues</doc>
-            # So no indentaion at the staring or doc. So doc group will come along general
-            # alignment
-            if first_line_indent_n == 0:
-                first_line_indent_n = yaml_indent_n
-
-            # for indent_diff -ve all lines will move left by the same ammout
-            # for indect_diff +ve all lines will move right the same amount
-            indent_diff = yaml_indent_n - first_line_indent_n
-            # CHeck for first line empty if not keep first line empty
-
-            for _, line in enumerate(text):
-                line_indent_n = 0
-                # Collect first empty space without alphabate
-                for ch_ in line:
-                    if ch_ == ' ':
-                        line_indent_n = line_indent_n + 1
-                    else:
-                        break
-                line_indent_n = line_indent_n + indent_diff
-                if line_indent_n < yaml_indent_n:
-                    # if line still under yaml identation
-                    text_tmp.append(yaml_indent_n * ' ' + line.strip())
-                else:
-                    text_tmp.append(line_indent_n * ' ' + line.strip())
-
-            text = '\n' + '\n'.join(text_tmp)
-            if "}" in tag:
-                tag = remove_namespace_from_tag(tag)
-            indent = depth * DEPTH_SIZE
-        elif text:
-            text = '\n' + (depth + 1) * DEPTH_SIZE + text.strip()
-            if "}" in tag:
-                tag = remove_namespace_from_tag(tag)
-            indent = depth * DEPTH_SIZE
-        else:
-            text = ""
-            if "}" in tag:
-                tag = remove_namespace_from_tag(tag)
-            indent = depth * DEPTH_SIZE
-
-        doc_str = f"{indent}{tag}: |{text}\n"
-        if file_out:
-            file_out.write(doc_str)
-            return None
-        return doc_str
-
-    def write_out(self, indent, text, file_out):
-        """
-        Write text line in output file.
-        """
-        line_string = f"{indent}{text.rstrip()}\n"
-        file_out.write(line_string)
-
-    def print_root_level_doc(self, file_out):
-        """
-        Print at the root level of YML file \
-        the general documentation field found in XML file
-        """
-        indent = 0 * DEPTH_SIZE
-
-        if ('root_doc' in self.root_level_comment
-                and self.root_level_comment['root_doc'] != ''):
-            text = self.root_level_comment['root_doc']
-            self.write_out(indent, text, file_out)
-
-        text = self.root_level_doc
-        self.write_out(indent, text, file_out)
-        self.root_level_doc = ''
-
-    def comvert_to_ymal_comment(self, indent, text):
-        """
-            Convert into yaml comment by adding exta '#' char in front of comment lines
-        """
-        lines = text.split('\n')
-        mod_lines = []
-        for line in lines:
-            line = line.strip()
-            if line and line[0] != '#':
-                line = indent + '# ' + line
-                mod_lines.append(line)
-            elif line:
-                line = indent + line
-                mod_lines.append(line)
-        # The starting '\n' to keep multiple comments separate
-        return '\n' + '\n'.join(mod_lines)
-
-    def print_root_level_info(self, depth, file_out):
-        """
-        Print at the root level of YML file \
-        the information stored as definition attributes in the XML file
-        """
-        # pylint: disable=consider-using-f-string
-        if depth < 0:
-            raise ValueError("Somthing wrong with indentaion in root level.")
-
-        has_categoty = False
-        for def_line in self.root_level_definition:
-            if def_line in ("category: application", "category: base"):
-                self.write_out(indent=0 * DEPTH_SIZE, text=def_line, file_out=file_out)
-                # file_out.write(f"{def_line}\n")
-                has_categoty = True
-
-        if not has_categoty:
-            raise ValueError("Definition dose not get any category from 'base or application'.")
-        self.print_root_level_doc(file_out)
-        if 'symbols' in self.root_level_comment and self.root_level_comment['symbols'] != '':
-            indent = depth * DEPTH_SIZE
-            text = self.root_level_comment['symbols']
-            self.write_out(indent, text, file_out)
-        if self.root_level_symbols:
-            self.write_out(indent=0 * DEPTH_SIZE, text=self.root_level_symbols, file_out=file_out)
-            # symbol_list include 'symbols doc', and all 'symbol'
-            for ind, symbol in enumerate(self.symbol_list):
-                # Taking care of comments that come on to of 'symbols doc' and 'symbol'
-                if 'symbol_comments' in self.root_level_comment and \
-                        self.root_level_comment['symbol_comments'][ind] != '':
-                    indent = depth * DEPTH_SIZE
-                    self.write_out(indent,
-                                   self.root_level_comment['symbol_comments'][ind], file_out)
-                if 'symbol_doc_comments' in self.root_level_comment and \
-                        self.root_level_comment['symbol_doc_comments'][ind] != '':
-
-                    indent = depth * DEPTH_SIZE
-                    self.write_out(indent,
-                                   self.root_level_comment['symbol_doc_comments'][ind], file_out)
-
-                self.write_out(indent=(0 * DEPTH_SIZE), text=symbol, file_out=file_out)
-        if len(self.pi_comments) > 1:
-            indent = DEPTH_SIZE * depth
-            # The first comment is top level copy-right doc string
-            for comment in self.pi_comments[1:]:
-                self.write_out(indent, self.comvert_to_ymal_comment(indent, comment), file_out)
-        if self.root_level_definition:
-            # Soring NXname for writting end of the definition attributes
-            nx_name = ''
-            for defs in self.root_level_definition:
-                if 'NX' in defs and defs[-1] == ':':
-                    nx_name = defs
-                    continue
-                if defs in ("category: application", "category: base"):
-                    continue
-                self.write_out(indent=0 * DEPTH_SIZE, text=defs, file_out=file_out)
-            self.write_out(indent=0 * DEPTH_SIZE, text=nx_name, file_out=file_out)
-        self.found_definition = False
-
-    def handle_exists(self, exists_dict, key, val):
-        """
-            Create exist component as folows:
-
-            {'min' : value for min,
-             'max' : value for max,
-             'optional' : value for optional}
-
-            This is created separately so that the keys stays in order.
-        """
-        if not val:
-            val = ''
-        else:
-            val = str(val)
-        if 'minOccurs' == key:
-            exists_dict['minOccurs'] = ['min', val]
-        if 'maxOccurs' == key:
-            exists_dict['maxOccurs'] = ['max', val]
-        if 'optional' == key:
-            exists_dict['optional'] = ['optional', val]
-        if 'recommended' == key:
-            exists_dict['recommended'] = ['recommended', val]
-        if 'required' == key:
-            exists_dict['required'] = ['required', val]
-
-    # pylint: disable=too-many-branches, consider-using-f-string
-    def handle_group_or_field(self, depth, node, file_out):
-        """Handle all the possible attributes that come along a field or group"""
-
-        allowed_attr = ['optional', 'recommended', 'name', 'type', 'axes', 'axis', 'data_offset',
-                        'interpretation', 'long_name', 'maxOccurs', 'minOccurs', 'nameType',
-                        'optional', 'primary', 'signal', 'stride', 'units', 'required',
-                        'deprecated', 'exists']
-
-        name_type = ""
-        node_attr = node.attrib
-        rm_key_list = []
-        # Maintain order: name and type in form name(type) or (type)name that come first
-        for key, val in node_attr.items():
-            if key == 'name':
-                name_type = name_type + val
-                rm_key_list.append(key)
-            if key == 'type':
-                name_type = name_type + "(%s)" % val
-                rm_key_list.append(key)
-        if not name_type:
-            raise ValueError(f"No 'name' or 'type' hase been found. But, 'group' or 'field' "
-                             f"must have at list a nme.We got attributes:  {node_attr}")
-        file_out.write('{indent}{name_type}:\n'.format(
-            indent=depth * DEPTH_SIZE,
-            name_type=name_type))
-
-        for key in rm_key_list:
-            del node_attr[key]
-
-        # tmp_dict intended to persevere order of attribnutes
-        tmp_dict = {}
-        exists_dict = {}
-        for key, val in node_attr.items():
-            # As both 'minOccurs', 'maxOccurs' and optionality move to the 'exists'
-            if key in ['minOccurs', 'maxOccurs', 'optional', 'recommended', 'required']:
-                if 'exists' not in tmp_dict:
-                    tmp_dict['exists'] = []
-                self.handle_exists(exists_dict, key, val)
-            elif key == 'units':
-                tmp_dict['unit'] = str(val)
-            else:
-                tmp_dict[key] = str(val)
-            if key not in allowed_attr:
-                raise ValueError(f"An attribute ({key}) in 'field' or 'group' has been found "
-                                 f"that is not allowed. The allowed attr is {allowed_attr}.")
-
-        if exists_dict:
-            for key, val in exists_dict.items():
-                if key in ['minOccurs', 'maxOccurs']:
-                    tmp_dict['exists'] = tmp_dict['exists'] + val
-                elif key in ['optional', 'recommended', 'required']:
-                    tmp_dict['exists'] = key
-
-        depth_ = depth + 1
-        for key, val in tmp_dict.items():
-            # Increase depth size inside handle_map...() for writting text with one
-            # more indentation.
-            file_out.write(f'{depth_ * DEPTH_SIZE}{key}: '
-                           f'{handle_mapping_char(val, depth_ + 1, False)}\n')
-
-    # pylint: disable=too-many-branches, too-many-locals
-    def handle_dimension(self, depth, node, file_out):
-        """
-        Handle the dimension field.
-            NOTE: Usually we take care of any xml element in xmlparse(...) and
-        recursion_in_xml_tree(...) functions. But Here it is a bit different. The doc dimension
-          and attributes of dim has been handled inside this function here.
-        """
-        # pylint: disable=consider-using-f-string
-        possible_dim_attrs = ['ref', 'required',
-                              'incr', 'refindex']
-        possible_dimemsion_attrs = ['rank']
-
-        # taking care of Dimension tag
-        file_out.write(
-            '{indent}{tag}:\n'.format(
-                indent=depth * DEPTH_SIZE,
-                tag=node.tag.split("}", 1)[1]))
-        # Taking care of dimension attributes
-        for attr, value in node.attrib.items():
-            if attr in possible_dimemsion_attrs and not isinstance(value, dict):
-                indent = (depth + 1) * DEPTH_SIZE
-                file_out.write(f'{indent}{attr}: {value}\n')
-            else:
-                raise ValueError(f"Dimension has got an attribute {attr} that is not valid."
-                                 f"Current the allowd atributes are {possible_dimemsion_attrs}."
-                                 f" Please have a look")
-        # taking carew of dimension doc
-        for child in list(node):
-            tag = remove_namespace_from_tag(child.tag)
-            if tag == 'doc':
-                text = self.handle_not_root_level_doc(depth + 1, child.text)
-                file_out.write(text)
-                node.remove(child)
-
-        dim_index_value = ''
-        dim_other_parts = {}
-        dim_cmnt_node = []
-        # taking care of dim and doc childs of dimension
-        for child in list(node):
-            tag = remove_namespace_from_tag(child.tag)
-            child_attrs = child.attrib
-            # taking care of index and value attributes
-            if tag == ('dim'):
-                # taking care of index and value in format [[index, value]]
-                dim_index_value = dim_index_value + '[{index}, {value}], '.format(
-                    index=child_attrs['index'] if "index" in child_attrs else '',
-                    value=child_attrs['value'] if "value" in child_attrs else '')
-                if "index" in child_attrs:
-                    del child_attrs["index"]
-                if "value" in child_attrs:
-                    del child_attrs["value"]
-
-                # Taking care of doc comes as child of dim
-                for cchild in list(child):
-                    ttag = cchild.tag.split("}", 1)[1]
-                    if ttag == ('doc'):
-                        if ttag not in dim_other_parts:
-                            dim_other_parts[ttag] = []
-                        text = cchild.text
-                        dim_other_parts[ttag].append(text.strip())
-                        child.remove(cchild)
-                        continue
-                # taking care of other attributes except index and value
-                for attr, value in child_attrs.items():
-                    if attr in possible_dim_attrs:
-                        if attr not in dim_other_parts:
-                            dim_other_parts[attr] = []
-                        dim_other_parts[attr].append(value)
-            if tag == CMNT_TAG and self.include_comment:
-                # Store and remove node so that comment nodes from dim node so
-                # that it does not call in xmlparser function
-                dim_cmnt_node.append(child)
-                node.remove(child)
-
-        # All 'dim' element comments on top of 'dim' yaml key
-        if dim_cmnt_node:
-            for ch_nd in dim_cmnt_node:
-                self.handel_comment(depth + 1, ch_nd, file_out)
-        # index and value attributes of dim elements
-        file_out.write(
-            '{indent}dim: [{value}]\n'.format(
-                indent=(depth + 1) * DEPTH_SIZE,
-                value=dim_index_value[:-2] or ''))
-        # Write the attributes, except index and value, and doc of dim as child of dim_parameter.
-        # But tthe doc or attributes for each dim come inside list according to the order of dim.
-        if dim_other_parts:
-            file_out.write(
-                '{indent}dim_parameters:\n'.format(
-                    indent=(depth + 1) * DEPTH_SIZE))
-            # depth = depth + 2 dim_paramerter has child such as doc of dim
-            indent = (depth + 2) * DEPTH_SIZE
-            for key, value in dim_other_parts.items():
-                if key == 'doc':
-                    value = self.handle_not_root_level_doc(depth + 2, str(value), key, file_out)
-                else:
-                    # Increase depth size inside handle_map...() for writting text with one
-                    # more indentation.
-                    file_out.write(f"{indent}{key}: "
-                                   f"{handle_mapping_char(value, depth + 3, False)}\n")
-
-    def handle_enumeration(self, depth, node, file_out):
-        """
-            Handle the enumeration field parsed from the xml file.
-
-        If the enumeration items contain a doc field, the yaml file will contain items as child
-        fields of the enumeration field.
-
-        If no doc are inherited in the enumeration items, a list of the items is given for the
-        enumeration list.
-
-    """
-        # pylint: disable=consider-using-f-string
-
-        check_doc = []
-        for child in list(node):
-            if list(child):
-                check_doc.append(list(child))
-        # pylint: disable=too-many-nested-blocks
-        if check_doc:
-            file_out.write(
-                '{indent}{tag}: \n'.format(
-                    indent=depth * DEPTH_SIZE,
-                    tag=node.tag.split("}", 1)[1]))
-            for child in list(node):
-                tag = remove_namespace_from_tag(child.tag)
-                itm_depth = depth + 1
-                if tag == ('item'):
-                    file_out.write(
-                        '{indent}{value}: \n'.format(
-                            indent=(itm_depth) * DEPTH_SIZE,
-                            value=child.attrib['value']))
-
-                    if list(child):
-                        for item_doc in list(child):
-                            if remove_namespace_from_tag(item_doc.tag) == 'doc':
-                                item_doc_depth = itm_depth + 1
-                                self.handle_not_root_level_doc(item_doc_depth, item_doc.text,
-                                                               item_doc.tag, file_out)
-                            if (remove_namespace_from_tag(item_doc.tag) == CMNT_TAG
-                                    and self.include_comment):
-                                self.handel_comment(itm_depth + 1, item_doc, file_out)
-                if tag == CMNT_TAG and self.include_comment:
-                    self.handel_comment(itm_depth + 1, child, file_out)
-        else:
-            enum_list = ''
-            remove_nodes = []
-            for item_child in list(node):
-                tag = remove_namespace_from_tag(item_child.tag)
-                if tag == ('item'):
-                    enum_list = enum_list + '{value}, '.format(
-                        value=item_child.attrib['value'])
-                if tag == CMNT_TAG and self.include_comment:
-                    self.handel_comment(depth, item_child, file_out)
-                    remove_nodes.append(item_child)
-            for ch_node in remove_nodes:
-                node.remove(ch_node)
-
-            file_out.write(
-                '{indent}{tag}: [{enum_list}]\n'.format(
-                    indent=depth * DEPTH_SIZE,
-                    tag=remove_namespace_from_tag(node.tag),
-                    enum_list=enum_list[:-2] or ''))
-
-    def handle_attributes(self, depth, node, file_out):
-        """Handle the attributes parsed from the xml file"""
-
-        allowed_attr = ['name', 'type', 'units', 'nameType', 'recommended', 'optional',
-                        'minOccurs', 'maxOccurs', 'deprecated']
-
-        name = ""
-        node_attr = node.attrib
-        if 'name' in node_attr:
-            pass
-        else:
-            raise ValueError("Attribute must have an name key.")
-        rm_key_list = []
-        # Maintain order: name and type in form name(type) or (type)name that come first
-        for key, val in node_attr.items():
-            if key == 'name':
-                name = val
-                rm_key_list.append(key)
-
-        for key in rm_key_list:
-            del node_attr[key]
-
-        file_out.write('{indent}{escapesymbol}{name}:\n'.format(
-            indent=depth * DEPTH_SIZE,
-            escapesymbol=r'\@',
-            name=name))
-
-        tmp_dict = {}
-        exists_dict = {}
-        for key, val in node_attr.items():
-            # As both 'minOccurs', 'maxOccurs' and optionality move to the 'exists'
-            if key in ['minOccurs', 'maxOccurs', 'optional', 'recommended', 'required']:
-                if 'exists' not in tmp_dict:
-                    tmp_dict['exists'] = []
-                self.handle_exists(exists_dict, key, val)
-            elif key == 'units':
-                tmp_dict['unit'] = val
-            else:
-                tmp_dict[key] = val
-            if key not in allowed_attr:
-                raise ValueError(f"An attribute ({key}) has been found that is not allowed."
-                                 f"The allowed attr is {allowed_attr}.")
-
-        has_min_max = False
-        has_opt_reco_requ = False
-        if exists_dict:
-            for key, val in exists_dict.items():
-                if key in ['minOccurs', 'maxOccurs']:
-                    tmp_dict['exists'] = tmp_dict['exists'] + val
-                    has_min_max = True
-                elif key in ['optional', 'recommended', 'required']:
-                    tmp_dict['exists'] = key
-                    has_opt_reco_requ = True
-        if has_min_max and has_opt_reco_requ:
-            raise ValueError("Optionality 'exists' can take only either from ['minOccurs',"
-                             " 'maxOccurs'] or from ['optional', 'recommended', 'required']"
-                             ". But not from both of the groups together. Please check in"
-                             " attributes")
-
-        depth_ = depth + 1
-        for key, val in tmp_dict.items():
-            # Increase depth size inside handle_map...() for writting text with one
-            # more indentation.
-            file_out.write(f'{depth_ * DEPTH_SIZE}{key}: '
-                           f'{handle_mapping_char(val, depth_ + 1, False)}\n')
-
-    def handel_link(self, depth, node, file_out):
-        """
-            Handle link elements of nxdl
-        """
-
-        possible_link_attrs = ['name', 'target', 'napimount']
-        node_attr = node.attrib
-        # Handle special cases
-        if 'name' in node_attr:
-            file_out.write('{indent}{name}(link):\n'.format(
-                indent=depth * DEPTH_SIZE,
-                name=node_attr['name'] or ''))
-            del node_attr['name']
-
-        depth_ = depth + 1
-        # Handle general cases
-        for attr_key, val in node_attr.items():
-            if attr_key in possible_link_attrs:
-                file_out.write('{indent}{attr}: {value}\n'.format(
-                    indent=depth_ * DEPTH_SIZE,
-                    attr=attr_key,
-                    value=val))
-            else:
-                raise ValueError(f"An anexpected attribute '{attr_key}' of link has found."
-                                 f"At this moment the alloed keys are {possible_link_attrs}")
-
-    def handel_choice(self, depth, node, file_out):
-        """
-            Handle choice element which is a parent node of group.
-        """
-
-        possible_attr = []
-
-        node_attr = node.attrib
-        # Handle special casees
-        if 'name' in node_attr:
-            file_out.write('{indent}{attr}(choice): \n'.format(
-                indent=depth * DEPTH_SIZE,
-                attr=node_attr['name']))
-            del node_attr['name']
-
-        depth_ = depth + 1
-        # Taking care of general attrinutes. Though, still no attrinutes have found,
-        # but could be used for future
-        for attr in node_attr.items():
-            if attr in possible_attr:
-                file_out.write('{indent}{attr}: {value}\n'.format(
-                    indent=depth_ * DEPTH_SIZE,
-                    attr=attr,
-                    value=node_attr[attr]))
-            else:
-                raise ValueError(f"An unexpected attribute '{attr}' of 'choice' has been found."
-                                 f"At this moment attributes for choice {possible_attr}")
-
-    def handel_comment(self, depth, node, file_out):
-        """
-            Collect comment element and pass to write_out function
-        """
-        indent = depth * DEPTH_SIZE
-        if self.is_last_element_comment:
-            text = self.comvert_to_ymal_comment(indent, node.text)
-            self.write_out(indent, text, file_out)
-        else:
-            text = self.comvert_to_ymal_comment(indent, node.text)
-            self.write_out(indent, text, file_out)
-            self.is_last_element_comment = True
-
-    def recursion_in_xml_tree(self, depth, xml_tree, output_yml, verbose):
-        """
-            Descend lower level in xml tree. If we are in the symbols branch, the recursive
-        behaviour is not triggered as we already handled the symbols' childs.
-        """
-
-        tree = xml_tree['tree']
-        node = xml_tree['node']
-        for child in list(node):
-            xml_tree_children = {'tree': tree, 'node': child}
-            self.xmlparse(output_yml, xml_tree_children, depth, verbose)
-
-    # pylint: disable=too-many-branches, too-many-statements
-    def xmlparse(self, output_yml, xml_tree, depth, verbose):
-        """
-        Main of the nxdl2yaml converter.
-        It parses XML tree, then prints recursively each level of the tree
-        """
-        tree = xml_tree['tree']
-        node = xml_tree['node']
-        if verbose:
-            sys.stdout.write(f'Node tag: {remove_namespace_from_tag(node.tag)}\n')
-            sys.stdout.write(f'Attributes: {node.attrib}\n')
-        with open(output_yml, "a", encoding="utf-8") as file_out:
-            tag = remove_namespace_from_tag(node.tag)
-            if tag == 'definition':
-                self.found_definition = True
-                self.handle_definition(node)
-                # Taking care of root level doc and symbols
-                remove_cmnt_n = None
-                last_comment = ''
-                for child in list(node):
-                    tag_tmp = remove_namespace_from_tag(child.tag)
-                    if tag_tmp == CMNT_TAG and self.include_comment:
-                        last_comment = self.comvert_to_ymal_comment(depth * DEPTH_SIZE, child.text)
-                        remove_cmnt_n = child
-                    if tag_tmp == 'doc':
-                        self.store_root_level_comments('root_doc', last_comment)
-                        last_comment = ''
-                        self.handle_root_level_doc(child)
-                        node.remove(child)
-                        if remove_cmnt_n is not None:
-                            node.remove(remove_cmnt_n)
-                            remove_cmnt_n = None
-                    if tag_tmp == 'symbols':
-                        self.store_root_level_comments('symbols', last_comment)
-                        last_comment = ''
-                        self.handle_symbols(depth, child)
-                        node.remove(child)
-                        if remove_cmnt_n is not None:
-                            node.remove(remove_cmnt_n)
-                            remove_cmnt_n = None
-
-            if tag == ('doc') and depth != 1:
-                parent = get_node_parent_info(tree, node)[0]
-                doc_parent = remove_namespace_from_tag(parent.tag)
-                if doc_parent != 'item':
-                    self.handle_not_root_level_doc(depth, text=node.text,
-                                                   tag=node.tag,
-                                                   file_out=file_out)
-
-            if self.found_definition is True and self.root_level_doc:
-                self.print_root_level_info(depth, file_out)
-            # End of print root-level definitions in file
-            if tag in ('field', 'group') and depth != 0:
-                self.handle_group_or_field(depth, node, file_out)
-            if tag == ('enumeration'):
-                self.handle_enumeration(depth, node, file_out)
-            if tag == ('attribute'):
-                self.handle_attributes(depth, node, file_out)
-            if tag == ('dimensions'):
-                self.handle_dimension(depth, node, file_out)
-            if tag == ('link'):
-                self.handel_link(depth, node, file_out)
-            if tag == ('choice'):
-                self.handel_choice(depth, node, file_out)
-            if tag == CMNT_TAG and self.include_comment:
-                self.handel_comment(depth, node, file_out)
-        depth += 1
-        # Write nested nodes
-        self.recursion_in_xml_tree(depth, xml_tree, output_yml, verbose)
-
-
-def compare_niac_and_my(tree, tree2, verbose, node, root_no_duplicates):
-    """This function creates two trees with Niac XML file and My XML file.
-The main aim is to compare the two trees and create a new one that is the
-union of the two initial trees.
-
-"""
-    root = tree.getroot()
-    root2 = tree2.getroot()
-    attrs_list_niac = []
-    for nodo in root.iter(node):
-        attrs_list_niac.append(nodo.attrib)
-    if verbose:
-        sys.stdout.write('Attributes found in Niac file: \n')
-        sys.stdout.write(str(attrs_list_niac) + '\n')
-        sys.stdout.write('  \n')
-        sys.stdout.write('Started merging of Niac and My file... \n')
-    for elem in root.iter(node):
-        if verbose:
-            sys.stdout.write('- Niac element inserted: \n')
-            sys.stdout.write(str(elem.attrib) + '\n')
-        index = get_node_parent_info(tree, elem)[1]
-        root_no_duplicates.insert(index, elem)
-
-    for elem2 in root2.iter(node):
-        index = get_node_parent_info(tree2, elem2)[1]
-        if elem2.attrib not in attrs_list_niac:
-            if verbose:
-                sys.stdout.write('- My element inserted: \n')
-                sys.stdout.write(str(elem2.attrib) + '\n')
-            root_no_duplicates.insert(index, elem2)
-
-    if verbose:
-        sys.stdout.write('     \n')
-    return root_no_duplicates
diff --git a/pynxtools/nyaml2nxdl/nyaml2nxdl_forward_tools.py b/pynxtools/nyaml2nxdl/nyaml2nxdl_forward_tools.py
deleted file mode 100644
index db4d4c464..000000000
--- a/pynxtools/nyaml2nxdl/nyaml2nxdl_forward_tools.py
+++ /dev/null
@@ -1,1161 +0,0 @@
-#!/usr/bin/env python3
-"""Creates an instantiated NXDL schema XML tree by walking the dictionary nest
-
-"""
-# -*- coding: utf-8 -*-
-#
-# Copyright The NOMAD Authors.
-#
-# This file is part of NOMAD. See https://nomad-lab.eu for further info.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import sys
-import xml.etree.ElementTree as ET
-from xml.dom import minidom
-import os
-import textwrap
-
-import yaml
-
-from pynxtools.nexus import nexus
-from pynxtools.nyaml2nxdl.comment_collector import CommentCollector
-from pynxtools.dataconverter.helpers import remove_namespace_from_tag
-from pynxtools.nyaml2nxdl.nyaml2nxdl_helper import (get_yaml_escape_char_reverter_dict,
-                                                    nx_name_type_resolving,
-                                                    cleaning_empty_lines, LineLoader)
-
-
-# pylint: disable=too-many-lines, global-statement, invalid-name
-DOM_COMMENT = ("\n"
-               "# NeXus - Neutron and X-ray Common Data Format\n"
-               "# \n"
-               "# Copyright (C) 2014-2022 NeXus International Advisory Committee (NIAC)\n"
-               "# \n"
-               "# This library is free software; you can redistribute it and/or\n"
-               "# modify it under the terms of the GNU Lesser General Public\n"
-               "# License as published by the Free Software Foundation; either\n"
-               "# version 3 of the License, or (at your option) any later version.\n"
-               "#\n"
-               "# This library is distributed in the hope that it will be useful,\n"
-               "# but WITHOUT ANY WARRANTY; without even the implied warranty of\n"
-               "# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n"
-               "# Lesser General Public License for more details.\n"
-               "#\n"
-               "# You should have received a copy of the GNU Lesser General Public\n"
-               "# License along with this library; if not, write to the Free Software\n"
-               "# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA\n"
-               "#\n"
-               "# For further information, see http://www.nexusformat.org\n")
-NX_CLSS = nexus.get_nx_classes()
-NX_NEW_DEFINED_CLASSES = ['NX_COMPLEX']
-NX_TYPE_KEYS = nexus.get_nx_attribute_type()
-NX_ATTR_IDNT = '\\@'
-NX_UNIT_IDNT = 'unit'
-DEPTH_SIZE = "    "
-NX_UNIT_TYPES = nexus.get_nx_units()
-COMMENT_BLOCKS: CommentCollector
-CATEGORY = ''  # Definition would be either 'base' or 'application'
-
-
-def check_for_dom_comment_in_yaml():
-    """Check the yaml file has dom comment or dom comment needed to be hard coded.
-    """
-    dignature_keyword_list = ['NeXus',
-                              'GNU Lesser General Public',
-                              'Free Software Foundation',
-                              'Copyright (C)',
-                              'WITHOUT ANY WARRANTY']
-
-    # Check for dom comments in first three comments
-    dom_comment = ''
-    dom_comment_ind = 1
-    for ind, comnt in enumerate(COMMENT_BLOCKS[0:5]):
-        cmnt_list = comnt.get_comment_text()
-        if len(cmnt_list) == 1:
-            text = cmnt_list[0]
-        else:
-            continue
-        dom_comment = text
-        dom_comment_ind = ind
-        for keyword in dignature_keyword_list:
-            if keyword not in text:
-                dom_comment = ''
-                break
-        if dom_comment:
-            break
-
-    # deactivate the root dom_comment, So that the corresponding comment would not be
-    # considered as comment for definition xml element.
-    if dom_comment:
-        COMMENT_BLOCKS.remove_comment(dom_comment_ind)
-
-    return dom_comment
-
-
-def yml_reader(inputfile):
-    """
-    This function launches the LineLoader class.
-    It parses the yaml in a dict and extends it with line tag keys for each key of the dict.
-    """
-    global COMMENT_BLOCKS
-    with open(inputfile, "r", encoding="utf-8") as plain_text_yaml:
-        loader = LineLoader(plain_text_yaml)
-        loaded_yaml = loader.get_single_data()
-    COMMENT_BLOCKS = CommentCollector(inputfile, loaded_yaml)
-    COMMENT_BLOCKS.extract_all_comment_blocks()
-    dom_cmnt_frm_yaml = check_for_dom_comment_in_yaml()
-    global DOM_COMMENT
-    if dom_cmnt_frm_yaml:
-        DOM_COMMENT = dom_cmnt_frm_yaml
-
-    if 'category' not in loaded_yaml.keys():
-        raise ValueError("All definitions should be either 'base' or 'application' category. "
-                         "No category has been found.")
-    global CATEGORY
-    CATEGORY = loaded_yaml['category']
-    return loaded_yaml
-
-
-def check_for_default_attribute_and_value(xml_element):
-    """NeXus Groups, fields and attributes might have xml default attributes and valuesthat must
-    come. For example: 'optional' which is 'true' by default for base class and false otherwise.
-    """
-
-    # base:Default attributes and value for all elements of base class except dimension element
-    base_attr_to_val = {'optional': 'true'}
-
-    # application: Default attributes and value for all elements of application class except
-    # dimension element
-    application_attr_to_val = {'optional': 'false'}
-
-    # Default attributes and value for dimension element
-    base_dim_attr_to_val = {'required': 'false'}
-    application_dim_attr_to_val = {'required': 'true'}
-
-    # Eligible tag for default attr and value
-    elegible_tag = ['group', 'field', 'attribute']
-
-    def set_default_attribute(xml_elem, default_attr_to_val):
-        for deflt_attr, deflt_val in default_attr_to_val.items():
-            if deflt_attr not in xml_elem.attrib \
-                and 'maxOccurs' not in xml_elem.attrib \
-                    and 'minOccurs' not in xml_elem.attrib \
-                        and 'recommended' not in xml_elem.attrib:
-                xml_elem.set(deflt_attr, deflt_val)
-
-    for child in list(xml_element):
-        # skiping comment 'function' that mainly collect comment from yaml file.
-        if not isinstance(child.tag, str):
-            continue
-        tag = remove_namespace_from_tag(child.tag)
-
-        if tag == 'dim' and CATEGORY == 'base':
-            set_default_attribute(child, base_dim_attr_to_val)
-        if tag == 'dim' and CATEGORY == 'application':
-            set_default_attribute(child, application_dim_attr_to_val)
-        if tag in elegible_tag and CATEGORY == 'base':
-            set_default_attribute(child, base_attr_to_val)
-        if tag in elegible_tag and CATEGORY == 'application':
-
-            set_default_attribute(child, application_attr_to_val)
-        check_for_default_attribute_and_value(child)
-
-
-def yml_reader_nolinetag(inputfile):
-    """
-    pyyaml based parsing of yaml file in python dict
-    """
-    with open(inputfile, 'r', encoding="utf-8") as stream:
-        parsed_yaml = yaml.safe_load(stream)
-    return parsed_yaml
-
-
-def check_for_skiped_attributes(component, value, allowed_attr=None, verbose=False):
-    """
-        Check for any attributes have been skipped or not.
-        NOTE: We should keep in mind about 'doc'
-    """
-    block_tag = ['enumeration']
-    if value:
-        for attr, val in value.items():
-            if attr in ['doc']:
-                continue
-            if '__line__' in attr or attr in block_tag:
-                continue
-            line_number = f'__line__{attr}'
-            if verbose:
-                print(f"__line__ : {value[line_number]}")
-            if not isinstance(val, dict) \
-                and '\\@' not in attr\
-                and attr not in allowed_attr\
-                    and 'NX' not in attr and val:
-
-                raise ValueError(f"An attribute '{attr}' in part '{component}' has been found"
-                                 f". Please check arround line '{value[line_number]}. At this "
-                                 f"moment. The allowed attrbutes are {allowed_attr}")
-
-
-def format_nxdl_doc(string):
-    """NeXus format for doc string
-    """
-    string = check_for_mapping_char_other(string)
-    formatted_doc = ''
-    if "\n" not in string:
-        if len(string) > 80:
-            wrapped = textwrap.TextWrapper(width=80,
-                                           break_long_words=False,
-                                           replace_whitespace=False)
-            string = '\n'.join(wrapped.wrap(string))
-        formatted_doc = '\n' + f"{string}"
-    else:
-        text_lines = string.split('\n')
-        text_lines = cleaning_empty_lines(text_lines)
-        formatted_doc += "\n" + "\n".join(text_lines)
-    if not formatted_doc.endswith("\n"):
-        formatted_doc += "\n"
-    return formatted_doc
-
-
-def check_for_mapping_char_other(text):
-    """
-    Check for mapping char \':\' which does not be passed through yaml library.
-    Then replace it by ':'.
-    """
-    if not text:
-        text = ''
-    text = str(text)
-    if text == 'True':
-        text = 'true'
-    if text == 'False':
-        text = 'false'
-    # Some escape char is not valid in yaml libray which is written while writting
-    # yaml file. In the time of writting nxdl revert to that escape char.
-    escape_reverter = get_yaml_escape_char_reverter_dict()
-    for key, val in escape_reverter.items():
-        if key in text:
-            text = text.replace(key, val)
-    return str(text).strip()
-
-
-def xml_handle_doc(obj, value: str,
-                   line_number=None, line_loc=None):
-    """This function creates a 'doc' element instance, and appends it to an existing element
-
-    """
-    # global comment_bolcks
-    doc_elemt = ET.SubElement(obj, 'doc')
-    text = format_nxdl_doc(check_for_mapping_char_other(value)).strip()
-    # To keep the doc middle of doc tag.
-    doc_elemt.text = f"\n{text}\n"
-    if line_loc is not None and line_number is not None:
-        xml_handle_comment(obj, line_number,
-                           line_loc, doc_elemt)
-
-
-def xml_handle_units(obj, value):
-    """This function creates a 'units' element instance, and appends it to an existing element
-
-    """
-    obj.set('units', str(value))
-
-
-# pylint: disable=too-many-branches
-def xml_handle_exists(dct, obj, keyword, value):
-    """
-    This function creates an 'exists' element instance, and appends it to an existing element
-    """
-    line_number = f'__line__{keyword}'
-    assert value is not None, f'Line {dct[line_number]}: exists argument must not be None !'
-    if isinstance(value, list):
-        if len(value) == 4 and value[0] == 'min' and value[2] == 'max':
-            obj.set('minOccurs', str(value[1]))
-            if str(value[3]) != 'infty':
-                obj.set('maxOccurs', str(value[3]))
-            else:
-                obj.set('maxOccurs', 'unbounded')
-        elif len(value) == 2 and value[0] == 'min':
-            obj.set('minOccurs', str(value[1]))
-        elif len(value) == 2 and value[0] == 'max':
-            obj.set('maxOccurs', str(value[1]))
-        elif len(value) == 4 and value[0] == 'max' and value[2] == 'min':
-            obj.set('minOccurs', str(value[3]))
-            if str(value[1]) != 'infty':
-                obj.set('maxOccurs', str(value[3]))
-            else:
-                obj.set('maxOccurs', 'unbounded')
-        elif len(value) == 4 and (value[0] != 'min' or value[2] != 'max'):
-            raise ValueError(f'Line {dct[line_number]}: exists keyword'
-                             f'needs to go either with an optional [recommended] list with two '
-                             f'entries either [min, <uint>] or [max, <uint>], or a list of four '
-                             f'entries [min, <uint>, max, <uint>] !')
-        else:
-            raise ValueError(f'Line {dct[line_number]}: exists keyword '
-                             f'needs to go either with optional, recommended, a list with two '
-                             f'entries either [min, <uint>] or [max, <uint>], or a list of four '
-                             f'entries [min, <uint>, max, <uint>] !')
-    else:
-        # This clause take optional in all concept except dimension where 'required' key is allowed
-        # not the 'optional' key.
-        if value == 'optional':
-            obj.set('optional', 'true')
-        elif value == 'recommended':
-            obj.set('recommended', 'true')
-        elif value == 'required':
-            obj.set('optional', 'false')
-        else:
-            obj.set('minOccurs', '0')
-
-
-# pylint: disable=too-many-branches, too-many-locals, too-many-statements
-def xml_handle_group(dct, obj, keyword, value, verbose=False):
-    """
-    The function deals with group instances
-    """
-    line_number = f'__line__{keyword}'
-    line_loc = dct[line_number]
-    xml_handle_comment(obj, line_number, line_loc)
-    list_of_attr = ['name', 'type', 'nameType', 'deprecated', 'optional', 'recommended',
-                    'exists', 'unit']
-    l_bracket = -1
-    r_bracket = -1
-    if keyword.count('(') == 1:
-        l_bracket = keyword.index('(')
-    if keyword.count(')') == 1:
-        r_bracket = keyword.index(')')
-
-    keyword_name, keyword_type = nx_name_type_resolving(keyword)
-    if not keyword_name and not keyword_type:
-        raise ValueError("A group must have both value and name. Check for group.")
-    grp = ET.SubElement(obj, 'group')
-
-    if l_bracket == 0 and r_bracket > 0:
-        grp.set('type', keyword_type)
-        if keyword_name:
-            grp.set('name', keyword_name)
-    elif l_bracket > 0:
-        grp.set('name', keyword_name)
-        if keyword_type:
-            grp.set('type', keyword_type)
-    else:
-        grp.set('name', keyword_name)
-
-    if value:
-        rm_key_list = []
-        for attr, vval in value.items():
-            if '__line__' in attr:
-                continue
-            line_number = f"__line__{attr}"
-            line_loc = value[line_number]
-            if attr == 'doc':
-                xml_handle_doc(grp, vval, line_number, line_loc)
-                rm_key_list.append(attr)
-                rm_key_list.append(line_number)
-            elif attr == 'exists' and vval:
-                xml_handle_exists(value, grp, attr, vval)
-                rm_key_list.append(attr)
-                rm_key_list.append(line_number)
-                xml_handle_comment(obj,
-                                   line_number, line_loc, grp)
-            elif attr == 'unit':
-                xml_handle_units(grp, vval)
-                xml_handle_comment(obj, line_number, line_loc, grp)
-            elif attr in list_of_attr and not isinstance(vval, dict) and vval:
-                validate_field_attribute_and_value(attr, vval, list_of_attr, value)
-                grp.set(attr, check_for_mapping_char_other(vval))
-                rm_key_list.append(attr)
-                rm_key_list.append(line_number)
-                xml_handle_comment(obj, line_number, line_loc, grp)
-
-        for key in rm_key_list:
-            del value[key]
-        # Check for skipped attrinutes
-        check_for_skiped_attributes('group', value, list_of_attr, verbose)
-    if isinstance(value, dict) and value != {}:
-        recursive_build(grp, value, verbose)
-
-
-def xml_handle_dimensions(dct, obj, keyword, value: dict):
-    """
-    This function creates a 'dimensions' element instance, and appends it to an existing element
-
-    NOTE: we could create xml_handle_dim() function.
-        But, the dim elements in yaml file is defined as 'dim =[[index, value]]'
-        but dim has other attributes such as 'ref' and also might have doc as chlid.
-        so in that sense 'dim' should have come as dict keeping attributes and child as members of
-        dict.
-        Regarding this situation all the attributes of 'dimensions' and child 'doc' has been
-        included here.
-
-        Other attributes, except 'index' and 'value', of 'dim' comes under nested dict named
-        'dim_parameter:
-            incr:[...]'
-    """
-
-    possible_dimension_attrs = ['rank']  # nxdl attributes
-    line_number = f'__line__{keyword}'
-    line_loc = dct[line_number]
-    assert 'dim' in value.keys(), (f"Line {line_loc}: No dim as child of dimension has "
-                                   f"been found.")
-    xml_handle_comment(obj, line_number, line_loc)
-    dims = ET.SubElement(obj, 'dimensions')
-    # Consider all the childs under dimension is dim element and
-    # its attributes
-
-    rm_key_list = []
-    rank = ''
-    for key, val in value.items():
-        if '__line__' in key:
-            continue
-        line_number = f"__line__{key}"
-        line_loc = value[line_number]
-        if key == 'rank':
-            rank = val or ''
-            if isinstance(rank, int) and rank < 0:
-                raise ValueError(f"Dimension must have some info about rank which is not "
-                                 f"available. Please check arround Line: {dct[line_number]}")
-            dims.set(key, str(val))
-            rm_key_list.append(key)
-            rm_key_list.append(line_number)
-            xml_handle_comment(obj, line_number, line_loc, dims)
-        # Check dimension doc and handle it
-        elif key == 'doc' and isinstance(val, str):
-            xml_handle_doc(dims, val, line_number, line_loc)
-            rm_key_list.append(key)
-            rm_key_list.append(line_number)
-        elif key in possible_dimension_attrs and not isinstance(val, dict):
-            dims.set(key, str(val))
-            rm_key_list.append(key)
-            rm_key_list.append(line_number)
-            xml_handle_comment(obj, line_number, line_loc, dims)
-
-    for key in rm_key_list:
-        del value[key]
-
-    xml_handle_dim_from_dimension_dict(dct, dims, keyword, value, rank=False)
-
-    if isinstance(value, dict) and value != {}:
-        recursive_build(dims, value, verbose=None)
-
-
-# pylint: disable=too-many-locals, too-many-arguments
-def xml_handle_dim_from_dimension_dict(dct, dims_obj, keyword, value, rank, verbose=False):
-    """
-        Handling dim element.
-        NOTE: The inputs 'keyword' and 'value' are as input for xml_handle_dimensions
-        function. please also read note in xml_handle_dimensions.
-    """
-
-    possible_dim_attrs = ['ref', 'incr', 'refindex', 'required']
-
-    # Some attributes might have equivalent name e.g. 'required' is correct one and
-    # 'optional' could be another name. Then change attribute to the correct one.
-    wrong_to_correct_attr = [('optional', 'required')]
-    header_line_number = f"__line__{keyword}"
-    dim_list = []
-    rm_key_list = []
-    # NOTE: dim doc and other attributes except 'index' and 'value' will come as list of value
-    # under dim_parameters
-    if not value:
-        return
-    rank = ''
-    # pylint: disable=too-many-nested-blocks
-    for attr, vvalue in value.items():
-        if '__line__' in attr:
-            continue
-        line_number = f"__line__{attr}"
-        line_loc = value[line_number]
-        # dim comes in precedence
-        if attr == 'dim':
-            # dim consists of list of [index, value]
-            llist_ind_value = vvalue
-            assert isinstance(llist_ind_value, list), (f'Line {value[line_number]}: dim'
-                                                       f'argument not a list !')
-            xml_handle_comment(dims_obj, line_number, line_loc)
-            if isinstance(rank, int) and rank > 0:
-                assert rank == len(llist_ind_value), (
-                    f"Wrong dimension rank check around Line {dct[header_line_number]}.\n"
-                    f"Line {[dct[header_line_number]]} rank value {rank} "
-                    f"is not the same as dim array = "
-                    f"{len(llist_ind_value)}.")
-            # Taking care of ind and value that comes as list of list
-            for dim_ind_val in llist_ind_value:
-                dim = ET.SubElement(dims_obj, 'dim')
-
-                # Taking care of multidimensions or rank
-                if len(dim_ind_val) >= 1 and dim_ind_val[0]:
-                    dim.set('index', str(dim_ind_val[0]))
-                if len(dim_ind_val) == 2 and dim_ind_val[1]:
-                    dim.set('value', str(dim_ind_val[1]))
-                dim_list.append(dim)
-            rm_key_list.append(attr)
-            rm_key_list.append(line_number)
-        elif attr == 'dim_parameters' and isinstance(vvalue, dict):
-            xml_handle_comment(dims_obj, line_number, line_loc)
-            for kkkey, vvval in vvalue.items():
-                if '__line__' in kkkey:
-                    continue
-                cmnt_number = f'__line__{kkkey}'
-                cmnt_loc = vvalue[cmnt_number]
-                # Check whether any optional attributes added
-                for tuple_wng_crt in wrong_to_correct_attr:
-                    if kkkey == tuple_wng_crt[0]:
-                        raise ValueError(f"{cmnt_loc}: Attribute '{kkkey}' is prohibited, use "
-                                         f"'{tuple_wng_crt[1]}")
-                if kkkey == 'doc' and dim_list:
-                    # doc comes as list of doc
-                    for i, dim in enumerate(dim_list):
-                        if isinstance(vvval, list) and i < len(vvval):
-                            tmp_val = vvval[i]
-                            xml_handle_doc(dim, vvval[i], cmnt_number, cmnt_loc)
-                        # Check all the dim have doc if not skip
-                        elif isinstance(vvval, list) and i >= len(vvval):
-                            pass
-                else:
-                    for i, dim in enumerate(dim_list):
-                        # all atribute of dims comes as list
-                        if isinstance(vvval, list) and i < len(vvval):
-                            tmp_val = vvval[i]
-                            dim.set(kkkey, str(tmp_val))
-
-                        # Check all the dim have doc if not skip
-                        elif isinstance(vvval, list) and i >= len(vvval):
-                            pass
-                        # All dim might have the same value for the same attribute
-                        elif not isinstance(vvval, list):
-                            tmp_val = value
-                            dim.set(kkkey, str(tmp_val))
-            rm_key_list.append(attr)
-            rm_key_list.append(line_number)
-        else:
-            raise ValueError(f"Got unexpected block except 'dim' and 'dim_parameters'."
-                             f"Please check arround line {line_number}")
-
-    for key in rm_key_list:
-        del value[key]
-
-    check_for_skiped_attributes('dim', value, possible_dim_attrs, verbose)
-
-
-def xml_handle_enumeration(dct, obj, keyword, value, verbose):
-    """This function creates an 'enumeration' element instance.
-
-    Two cases are handled:
-    1) the items are in a list
-    2) the items are dictionaries and may contain a nested doc
-    """
-    line_number = f'__line__{keyword}'
-    line_loc = dct[line_number]
-    xml_handle_comment(obj, line_number, line_loc)
-    enum = ET.SubElement(obj, 'enumeration')
-
-    assert value is not None, f'Line {line_loc}: enumeration must \
-bear at least an argument !'
-    assert len(
-        value) >= 1, f'Line {dct[line_number]}: enumeration must not be an empty list!'
-    if isinstance(value, list):
-        for element in value:
-            itm = ET.SubElement(enum, 'item')
-            itm.set('value', str(element))
-    if isinstance(value, dict) and value != {}:
-        for element in value.keys():
-            if '__line__' not in element:
-                itm = ET.SubElement(enum, 'item')
-                itm.set('value', str(element))
-                if isinstance(value[element], dict):
-                    recursive_build(itm, value[element], verbose)
-
-
-# pylint: disable=unused-argument
-def xml_handle_link(dct, obj, keyword, value, verbose):
-    """
-        If we have an NXDL link we decode the name attribute from <optional string>(link)[:-6]
-    """
-
-    line_number = f"__line__{keyword}"
-    line_loc = dct[line_number]
-    xml_handle_comment(obj, line_number, line_loc)
-    possible_attrs = ['name', 'target', 'napimount']
-    name = keyword[:-6]
-    link_obj = ET.SubElement(obj, 'link')
-    link_obj.set('name', str(name))
-
-    if value:
-        rm_key_list = []
-        for attr, vval in value.items():
-            if '__line__' in attr:
-                continue
-            line_number = f"__line__{attr}"
-            line_loc = value[line_number]
-            if attr == 'doc':
-                xml_handle_doc(link_obj, vval, line_number, line_loc)
-                rm_key_list.append(attr)
-                rm_key_list.append(line_number)
-            elif attr in possible_attrs and not isinstance(vval, dict):
-                if vval:
-                    link_obj.set(attr, str(vval))
-                rm_key_list.append(attr)
-                rm_key_list.append(line_number)
-                xml_handle_comment(obj, line_number, line_loc, link_obj)
-
-        for key in rm_key_list:
-            del value[key]
-        # Check for skipped attrinutes
-        check_for_skiped_attributes('link', value, possible_attrs, verbose)
-
-    if isinstance(value, dict) and value != {}:
-        recursive_build(link_obj, value, verbose=None)
-
-
-def xml_handle_choice(dct, obj, keyword, value, verbose=False):
-    """
-        Build choice xml elements. That consists of groups.
-    """
-    line_number = f'__line__{keyword}'
-    line_loc = dct[line_number]
-    xml_handle_comment(obj, line_number, line_loc)
-    # Add attributes in possible if new attributs have been added nexus definition.
-    possible_attr = []
-    choice_obj = ET.SubElement(obj, 'choice')
-    # take care of special attributes
-    name = keyword[:-8]
-    choice_obj.set('name', name)
-
-    if value:
-        rm_key_list = []
-        for attr, vval in value.items():
-            if '__line__' in attr:
-                continue
-            line_number = f"__line__{attr}"
-            line_loc = value[line_number]
-            if attr == 'doc':
-                xml_handle_doc(choice_obj, vval, line_number, line_loc)
-                rm_key_list.append(attr)
-                rm_key_list.append(line_number)
-            elif attr in possible_attr and not isinstance(vval, dict):
-                if vval:
-                    choice_obj.set(attr, str(vval))
-                rm_key_list.append(attr)
-                rm_key_list.append(line_number)
-                xml_handle_comment(obj, line_number, line_loc, choice_obj)
-
-        for key in rm_key_list:
-            del value[key]
-        # Check for skipped attrinutes
-        check_for_skiped_attributes('choice', value, possible_attr, verbose)
-
-    if isinstance(value, dict) and value != {}:
-        recursive_build(choice_obj, value, verbose=None)
-
-
-def xml_handle_symbols(dct, obj, keyword, value: dict):
-    """Handle a set of NXDL symbols as a child to obj
-
-    """
-    line_number = f'__line__{keyword}'
-    line_loc = dct[line_number]
-    assert len(list(value.keys())
-               ) >= 1, f'Line {line_loc}: symbols table must not be empty !'
-    xml_handle_comment(obj, line_number, line_loc)
-    syms = ET.SubElement(obj, 'symbols')
-    if 'doc' in value.keys():
-        line_number = '__line__doc'
-        line_loc = value[line_number]
-        xml_handle_comment(syms, line_number, line_loc)
-        doctag = ET.SubElement(syms, 'doc')
-        doctag.text = '\n' + textwrap.fill(value['doc'], width=70) + '\n'
-    rm_key_list = []
-    for kkeyword, vvalue in value.items():
-        if '__line__' in kkeyword:
-            continue
-        if kkeyword != 'doc':
-            line_number = f'__line__{kkeyword}'
-            line_loc = value[line_number]
-            xml_handle_comment(syms, line_number, line_loc)
-            assert vvalue is not None and isinstance(
-                vvalue, str), f'Line {line_loc}: put a comment in doc string !'
-            sym = ET.SubElement(syms, 'symbol')
-            sym.set('name', str(kkeyword))
-            # sym_doc = ET.SubElement(sym, 'doc')
-            xml_handle_doc(sym, vvalue)
-            rm_key_list.append(kkeyword)
-            rm_key_list.append(line_number)
-            # sym_doc.text = '\n' + textwrap.fill(vvalue, width=70) + '\n'
-    for key in rm_key_list:
-        del value[key]
-
-
-def check_keyword_variable(verbose, dct, keyword, value):
-    """
-    Check whether both keyword_name and keyword_type are empty,
-        and complains if it is the case
-    """
-    keyword_name, keyword_type = nx_name_type_resolving(keyword)
-    if verbose:
-        sys.stdout.write(
-            f'{keyword_name}({keyword_type}): value type is {type(value)}\n')
-    if keyword_name == '' and keyword_type == '':
-        line_number = f'__line__{keyword}'
-        raise ValueError(f'Line {dct[line_number]}: found an improper yaml key !')
-
-
-def helper_keyword_type(kkeyword_type):
-    """
-        This function is returning a value of keyword_type if it belong to NX_TYPE_KEYS
-    """
-    if kkeyword_type in NX_TYPE_KEYS:
-        return kkeyword_type
-    return None
-
-
-def verbose_flag(verbose, keyword, value):
-    """
-        Verbose stdout printing for nested levels of yaml file, if verbose flag is active
-    """
-    if verbose:
-        sys.stdout.write(f'  key:{keyword}; value type is {type(value)}\n')
-
-
-def xml_handle_attributes(dct, obj, keyword, value, verbose):
-    """Handle the attributes found connected to attribute field"""
-
-    line_number = f"__line__{keyword}"
-    line_loc = dct[line_number]
-    xml_handle_comment(obj, line_number, line_loc)
-    # list of possible attribute of xml attribute elementsa
-    attr_attr_list = ['name', 'type', 'unit', 'nameType',
-                      'optional', 'recommended', 'minOccurs',
-                      'maxOccurs', 'deprecated', 'exists']
-    # as an attribute identifier
-    keyword_name, keyword_typ = nx_name_type_resolving(keyword)
-    line_number = f'__line__{keyword}'
-    if verbose:
-        print(f"__line__ : {dct[line_number]}")
-    if keyword_name == '' and keyword_typ == '':
-        raise ValueError(f'Line {dct[line_number]}: found an improper yaml key !')
-    elemt_obj = ET.SubElement(obj, 'attribute')
-    elemt_obj.set('name', keyword_name[2:])
-    if keyword_typ:
-        elemt_obj.set('type', keyword_typ)
-
-    rm_key_list = []
-    if value and value:
-        # taking care of attributes of attributes
-        for attr, attr_val in value.items():
-            if '__line__' in attr:
-                continue
-            line_number = f"__line__{attr}"
-            line_loc = value[line_number]
-            if attr in ['doc', *attr_attr_list] and not isinstance(attr_val, dict):
-                if attr == 'unit':
-                    elemt_obj.set(f"{attr}s", str(value[attr]))
-                    rm_key_list.append(attr)
-                    rm_key_list.append(line_number)
-                    xml_handle_comment(obj, line_number, line_loc, elemt_obj)
-                elif attr == 'exists' and attr_val:
-                    xml_handle_exists(value, elemt_obj, attr, attr_val)
-                    rm_key_list.append(attr)
-                    rm_key_list.append(line_number)
-                    xml_handle_comment(obj, line_number, line_loc, elemt_obj)
-                elif attr == 'doc':
-                    xml_handle_doc(elemt_obj, format_nxdl_doc(attr_val),
-                                   line_number, line_loc)
-                    rm_key_list.append(attr)
-                    rm_key_list.append(line_number)
-                else:
-                    elemt_obj.set(attr, check_for_mapping_char_other(attr_val))
-                    rm_key_list.append(attr)
-                    rm_key_list.append(line_number)
-                    xml_handle_comment(obj, line_number, line_loc, elemt_obj)
-
-        for key in rm_key_list:
-            del value[key]
-        # Check cor skiped attribute
-        check_for_skiped_attributes('Attribute', value, attr_attr_list, verbose)
-    if value:
-        recursive_build(elemt_obj, value, verbose)
-
-
-def validate_field_attribute_and_value(v_attr, vval, allowed_attribute, value):
-    """
-    Check for any attributes that comes with invalid name,
-        and invalid value.
-    """
-
-    # check for empty val
-    if (not isinstance(vval, dict)
-            and not str(vval)):  # check for empty value
-
-        line_number = f"__line__{v_attr}"
-        raise ValueError(f"In a field a valid attrbute ('{v_attr}') found that is not stored."
-                         f" Please check arround line {value[line_number]}")
-
-    # The bellow elements might come as child element
-    skipped_child_name = ['doc', 'dimension', 'enumeration', 'choice', 'exists']
-    # check for invalid key or attributes
-    if (v_attr not in [*skipped_child_name, *allowed_attribute]
-        and '__line__' not in v_attr
-        and not isinstance(vval, dict)
-        and '(' not in v_attr           # skip only groups and field that has name and type
-            and '\\@' not in v_attr):     # skip nexus attributes
-
-        line_number = f"__line__{v_attr}"
-        raise ValueError(f"In a field or group a invalid attribute ('{v_attr}') or child has found."
-                         f" Please check arround line {value[line_number]}.")
-
-
-def xml_handle_fields(obj, keyword, value, line_annot, line_loc, verbose=False):
-    """
-    Handle a field in yaml file.
-        When a keyword is NOT:
-            symbol,
-            NX baseclass member,
-            attribute (\\@),
-            doc,
-            enumerations,
-            dimension,
-            exists,
-    then the not empty keyword_name is a field!
-    This simple function will define a new node of xml tree
-    """
-    # List of possible attributes of xml elements
-    allowed_attr = ['name', 'type', 'nameType', 'unit', 'minOccurs', 'long_name',
-                    'axis', 'signal', 'deprecated', 'axes', 'exists',
-                    'data_offset', 'interpretation', 'maxOccurs',
-                    'primary', 'recommended', 'optional', 'stride']
-
-    xml_handle_comment(obj, line_annot, line_loc)
-    l_bracket = -1
-    r_bracket = -1
-    if keyword.count('(') == 1:
-        l_bracket = keyword.index('(')
-    if keyword.count(')') == 1:
-        r_bracket = keyword.index(')')
-
-    keyword_name, keyword_type = nx_name_type_resolving(keyword)
-    if not keyword_type and not keyword_name:
-        raise ValueError("Check for name or type in field.")
-    elemt_obj = ET.SubElement(obj, 'field')
-
-    # type come first
-    if l_bracket == 0 and r_bracket > 0:
-        elemt_obj.set('type', keyword_type)
-        if keyword_name:
-            elemt_obj.set('name', keyword_name)
-    elif l_bracket > 0:
-        elemt_obj.set('name', keyword_name)
-        if keyword_type:
-            elemt_obj.set('type', keyword_type)
-    else:
-        elemt_obj.set('name', keyword_name)
-
-    if value:
-        rm_key_list = []
-        # In each each if clause apply xml_handle_comment(), to collect
-        # comments on that yaml line.
-        for attr, vval in value.items():
-            if '__line__' in attr:
-                continue
-            line_number = f"__line__{attr}"
-            line_loc = value[line_number]
-            if attr == 'doc':
-                xml_handle_doc(elemt_obj, vval, line_number, line_loc,)
-                rm_key_list.append(attr)
-                rm_key_list.append(line_number)
-            elif attr == 'exists' and vval:
-                xml_handle_exists(value, elemt_obj, attr, vval)
-                rm_key_list.append(attr)
-                rm_key_list.append(line_number)
-                xml_handle_comment(obj,
-                                   line_number,
-                                   line_loc, elemt_obj)
-            elif attr == 'unit':
-                xml_handle_units(elemt_obj, vval)
-                xml_handle_comment(obj,
-                                   line_number,
-                                   line_loc, elemt_obj)
-            elif attr in allowed_attr and not isinstance(vval, dict) and vval:
-                validate_field_attribute_and_value(attr, vval, allowed_attr, value)
-                elemt_obj.set(attr, check_for_mapping_char_other(vval))
-                rm_key_list.append(attr)
-                rm_key_list.append(line_number)
-                xml_handle_comment(obj,
-                                   line_number,
-                                   line_loc, elemt_obj)
-
-        for key in rm_key_list:
-            del value[key]
-        # Check for skipped attrinutes
-        check_for_skiped_attributes('field', value, allowed_attr, verbose)
-
-    if isinstance(value, dict) and value != {}:
-        recursive_build(elemt_obj, value, verbose)
-
-
-def xml_handle_comment(obj: ET.Element,
-                       line_annotation: str,
-                       line_loc_no: int,
-                       xml_ele: ET.Element = None,
-                       is_def_cmnt: bool = False):
-    """
-        Add xml comment: check for comments that has the same 'line_annotation'
-    (e.g. __line__data) and the same line_loc_no (e.g. 30). After that, i
-    does of three tasks:
-    1. Returns list of comments texts (multiple members if element has multiple comments)
-    2. Rearrange comment element and xml_ele where comment comes first.
-    3. Append comment element when no xml_ele will no be provided.
-    """
-
-    line_info = (line_annotation, int(line_loc_no))
-    if line_info in COMMENT_BLOCKS:
-        cmnt = COMMENT_BLOCKS.get_coment_by_line_info(line_info)
-        cmnt_text = cmnt.get_comment_text()
-
-        if is_def_cmnt:
-            return cmnt_text
-        if xml_ele is not None:
-            obj.remove(xml_ele)
-            for string in cmnt_text:
-                si_comnt = ET.Comment(string)
-                obj.append(si_comnt)
-            obj.append(xml_ele)
-        elif not is_def_cmnt and xml_ele is None:
-            for string in cmnt_text:
-                si_comnt = ET.Comment(string)
-                obj.append(si_comnt)
-        else:
-            raise ValueError("Provied correct parameter values.")
-    return ''
-
-
-def recursive_build(obj, dct, verbose):
-    """obj is the current node of the XML tree where we want to append to,
-    dct is a dictionary object which represents the content of a child to obj
-    dct may contain further dictionary nests, representing NXDL groups,
-    which trigger recursive processing
-    NXDL fields may contain attributes but trigger no recursion so attributes are leafs.
-
-    """
-    for keyword, value in iter(dct.items()):
-        if '__line__' in keyword:
-            continue
-        line_number = f"__line__{keyword}"
-        line_loc = dct[line_number]
-        keyword_name, keyword_type = nx_name_type_resolving(keyword)
-        check_keyword_variable(verbose, dct, keyword, value)
-        if verbose:
-            sys.stdout.write(
-                f'keyword_name:{keyword_name} keyword_type {keyword_type}\n')
-
-        if keyword[-6:] == '(link)':
-            xml_handle_link(dct, obj, keyword, value, verbose)
-        elif keyword[-8:] == '(choice)':
-            xml_handle_choice(dct, obj, keyword, value)
-        # The bellow xml_symbol clause is for the symbols that come ubde filed or attributes
-        # Root level symbols has been inside nyaml2nxdl()
-        elif keyword_type == '' and keyword_name == 'symbols':
-            xml_handle_symbols(dct, obj, keyword, value)
-
-        elif ((keyword_type in NX_CLSS) or (keyword_type not in
-                                            [*NX_TYPE_KEYS, '', *NX_NEW_DEFINED_CLASSES])):
-            # we can be sure we need to instantiate a new group
-            xml_handle_group(dct, obj, keyword, value, verbose)
-
-        elif keyword_name[0:2] == NX_ATTR_IDNT:  # check if obj qualifies
-            xml_handle_attributes(dct, obj, keyword, value, verbose)
-        elif keyword == 'doc':
-            xml_handle_doc(obj, value, line_number, line_loc)
-        elif keyword == NX_UNIT_IDNT:
-            xml_handle_units(obj, value)
-        elif keyword == 'enumeration':
-            xml_handle_enumeration(dct, obj, keyword, value, verbose)
-
-        elif keyword == 'dimensions':
-            xml_handle_dimensions(dct, obj, keyword, value)
-
-        elif keyword == 'exists':
-            xml_handle_exists(dct, obj, keyword, value)
-        # Handles fileds e.g. AXISNAME
-        elif keyword_name != '' and '__line__' not in keyword_name:
-            xml_handle_fields(obj, keyword,
-                              value, line_number,
-                              line_loc, verbose)
-        else:
-            raise ValueError(f"An unfamiliar type of element {keyword} has been found which is "
-                             f"not be able to be resolved. Chekc arround line {dct[line_number]}")
-
-
-def pretty_print_xml(xml_root, output_xml, def_comments=None):
-    """
-    Print better human-readable indented and formatted xml file using
-    built-in libraries and preceding XML processing instruction
-    """
-    dom = minidom.parseString(ET.tostring(
-        xml_root, encoding='utf-8', method='xml'))
-    proc_instractionn = dom.createProcessingInstruction(
-        'xml-stylesheet', 'type="text/xsl" href="nxdlformat.xsl"')
-    dom_comment = dom.createComment(DOM_COMMENT)
-    root = dom.firstChild
-    dom.insertBefore(proc_instractionn, root)
-    dom.insertBefore(dom_comment, root)
-
-    if def_comments:
-        for string in def_comments:
-            def_comt_ele = dom.createComment(string)
-            dom.insertBefore(def_comt_ele, root)
-
-    xml_string = dom.toprettyxml(indent=1 * DEPTH_SIZE, newl='\n', encoding='UTF-8')
-    with open('tmp.xml', "wb") as file_tmp:
-        file_tmp.write(xml_string)
-    flag = False
-    with open('tmp.xml', "r", encoding="utf-8") as file_out:
-        with open(output_xml, "w", encoding="utf-8") as file_out_mod:
-            for i in file_out.readlines():
-                if '<doc>' not in i and '</doc>' not in i and flag is False:
-                    file_out_mod.write(i)
-                elif '<doc>' in i and '</doc>' in i:
-                    file_out_mod.write(i)
-                elif '<doc>' in i and '</doc>' not in i:
-                    flag = True
-                    white_spaces = len(i) - len(i.lstrip())
-                    file_out_mod.write(i)
-                elif '<doc>' not in i and '</doc>' not in i and flag is True:
-                    file_out_mod.write((white_spaces + 5) * ' ' + i)
-                elif '<doc>' not in i and '</doc>' in i and flag is True:
-                    file_out_mod.write(white_spaces * ' ' + i)
-                    flag = False
-    os.remove('tmp.xml')
-
-
-# pylint: disable=too-many-statements
-def nyaml2nxdl(input_file: str, out_file, verbose: bool):
-    """
-    Main of the nyaml2nxdl converter, creates XML tree, namespace and
-    schema, definitions then evaluates a dictionary nest of groups recursively and
-    fields or (their) attributes as childs of the groups
-    """
-
-    def_attributes = ['deprecated', 'ignoreExtraGroups', 'category', 'type',
-                      'ignoreExtraFields', 'ignoreExtraAttributes', 'restricts']
-    yml_appdef = yml_reader(input_file)
-    def_cmnt_text = []
-    if verbose:
-        sys.stdout.write(f'input-file: {input_file}\n')
-        sys.stdout.write('application/base contains the following root-level entries:\n')
-        sys.stdout.write(str(yml_appdef.keys()))
-    xml_root = ET.Element('definition', {})
-    assert 'category' in yml_appdef.keys(
-    ), 'Required root-level keyword category is missing!'
-    assert yml_appdef['category'] in ['application', 'base'], 'Only \
-application and base are valid categories!'
-    assert 'doc' in yml_appdef.keys(), 'Required root-level keyword doc is missing!'
-
-    name_extends = ''
-    yml_appdef_copy = yml_appdef.copy()
-    for kkey, vvalue in yml_appdef_copy.items():
-        if '__line__' in kkey:
-            continue
-        line_number = f"__line__{kkey}"
-        line_loc_no = yml_appdef[line_number]
-        if not isinstance(vvalue, dict) and kkey in def_attributes:
-            xml_root.set(kkey, str(vvalue) or '')
-            cmnt_text = xml_handle_comment(xml_root,
-                                           line_number, line_loc_no,
-                                           is_def_cmnt=True)
-            def_cmnt_text += cmnt_text if cmnt_text else []
-
-            del yml_appdef[line_number]
-            del yml_appdef[kkey]
-        # Taking care or name and extends
-        elif 'NX' in kkey:
-            # Tacking the attribute order but the correct value will be stored later
-            # check for name first or type first if (NXobject)NXname then type first
-            l_bracket_ind = kkey.rfind('(')
-            r_bracket_ind = kkey.rfind(')')
-            if l_bracket_ind == 0:
-                extend = kkey[1:r_bracket_ind]
-                name = kkey[r_bracket_ind + 1:]
-                xml_root.set('extends', extend)
-                xml_root.set('name', name)
-            elif l_bracket_ind > 0:
-                name = kkey[0:l_bracket_ind]
-                extend = kkey[l_bracket_ind + 1: r_bracket_ind]
-                xml_root.set('name', name)
-                xml_root.set('extends', extend)
-            else:
-                name = kkey
-                xml_root.set('name', name)
-                xml_root.set('extends', 'NXobject')
-            cmnt_text = xml_handle_comment(xml_root,
-                                           line_number, line_loc_no,
-                                           is_def_cmnt=True)
-            def_cmnt_text += cmnt_text if cmnt_text else []
-
-            name_extends = kkey
-
-    if 'type' not in xml_root.attrib:
-        xml_root.set('type', "group")
-    # Taking care of namespaces
-    namespaces = {'xmlns': 'http://definition.nexusformat.org/nxdl/3.1',
-                  'xmlns:xsi': 'http://www.w3.org/2001/XMLSchema-instance',
-                  'xsi:schemaLocation': 'http://definition.nexusformat.org/nxdl/3.1 ../nxdl.xsd'}
-    for key, ns_ in namespaces.items():
-        xml_root.attrib[key] = ns_
-    # Taking care of Symbols elements
-    if 'symbols' in yml_appdef.keys():
-        xml_handle_symbols(yml_appdef,
-                           xml_root,
-                           'symbols',
-                           yml_appdef['symbols'])
-
-        del yml_appdef['symbols']
-        del yml_appdef["__line__symbols"]
-
-    assert isinstance(yml_appdef['doc'], str) and yml_appdef['doc'] != '', 'Doc \
-has to be a non-empty string!'
-
-    line_number = '__line__doc'
-    line_loc_no = yml_appdef[line_number]
-    xml_handle_doc(xml_root, yml_appdef['doc'], line_number, line_loc_no)
-
-    del yml_appdef['doc']
-
-    root_keys = 0
-    for key in yml_appdef.keys():
-        if '__line__' not in key:
-            root_keys += 1
-            extra_key = key
-
-    assert root_keys == 1, (f"Accepting at most keywords: category, doc, symbols, and NX... "
-                            f"at root-level! check key at root level {extra_key}")
-
-    assert ('NX' in name_extends and len(name_extends) > 2), 'NX \
-keyword has an invalid pattern, or is too short!'
-    # Taking care if definition has empty content
-    if yml_appdef[name_extends]:
-        recursive_build(xml_root, yml_appdef[name_extends], verbose)
-    # Taking care of comments that comes at the end of file that is might not be intended for
-    # any nxdl elements.
-    if COMMENT_BLOCKS[-1].has_post_comment:
-        post_comment = COMMENT_BLOCKS[-1]
-        (lin_annot, line_loc) = post_comment.get_line_info()
-        xml_handle_comment(xml_root, lin_annot, line_loc)
-
-    # Note: Just to keep the functionality if we need this functionality later.
-    default_attr = False
-    if default_attr:
-        check_for_default_attribute_and_value(xml_root)
-    pretty_print_xml(xml_root, out_file, def_cmnt_text)
-    if verbose:
-        sys.stdout.write('Parsed YAML to NXDL successfully\n')
diff --git a/pynxtools/nyaml2nxdl/nyaml2nxdl_helper.py b/pynxtools/nyaml2nxdl/nyaml2nxdl_helper.py
deleted file mode 100644
index 9583b375d..000000000
--- a/pynxtools/nyaml2nxdl/nyaml2nxdl_helper.py
+++ /dev/null
@@ -1,224 +0,0 @@
-#!/usr/bin/env python3
-"""Main file of yaml2nxdl tool.
-Users create NeXus instances by writing a YAML file
-which details a hierarchy of data/metadata elements
-
-"""
-# -*- coding: utf-8 -*-
-#
-# Copyright The NOMAD Authors.
-#
-# This file is part of NOMAD. See https://nomad-lab.eu for further info.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-
-# Yaml library does not except the keys (escapechar "\t" and yaml separator ":")
-# So the corresponding value is to skip them and
-# and also carefull about this order
-import hashlib
-from yaml.composer import Composer
-from yaml.constructor import Constructor
-
-from yaml.nodes import ScalarNode
-from yaml.resolver import BaseResolver
-from yaml.loader import Loader
-
-# NOTE: If any one change one of the bellow dict please change it for both
-ESCAPE_CHAR_DICT_IN_YAML = {"\t": "    ",
-                            "\':\'": ":"}
-
-ESCAPE_CHAR_DICT_IN_XML = {"    ": "\t",
-                           "\':\'": ":"}
-
-
-class LineLoader(Loader):  # pylint: disable=too-many-ancestors
-    """
-    LineLoader parses a yaml into a python dictionary extended with extra items.
-    The new items have as keys __line__<yaml_keyword> and as values the yaml file line number
-    """
-
-    def compose_node(self, parent, index):
-        # the line number where the previous token has ended (plus empty lines)
-        node = Composer.compose_node(self, parent, index)
-        node.__line__ = self.line + 1
-        return node
-
-    def construct_mapping(self, node, deep=False):
-        node_pair_lst = node.value
-        node_pair_lst_for_appending = []
-
-        for key_node in node_pair_lst:
-            shadow_key_node = ScalarNode(
-                tag=BaseResolver.DEFAULT_SCALAR_TAG, value='__line__' + key_node[0].value)
-            shadow_value_node = ScalarNode(
-                tag=BaseResolver.DEFAULT_SCALAR_TAG, value=key_node[0].__line__)
-            node_pair_lst_for_appending.append(
-                (shadow_key_node, shadow_value_node))
-
-        node.value = node_pair_lst + node_pair_lst_for_appending
-        return Constructor.construct_mapping(self, node, deep=deep)
-
-
-def get_yaml_escape_char_dict():
-    """Get escape char and the way to skip them in yaml."""
-    return ESCAPE_CHAR_DICT_IN_YAML
-
-
-def get_yaml_escape_char_reverter_dict():
-    """To revert yaml escape char in xml constructor from yaml."""
-
-    return ESCAPE_CHAR_DICT_IN_XML
-
-
-def type_check(nx_type):
-    """
-        Check for nexus type if type is NX_CHAR get '' or get as it is.
-    """
-
-    if nx_type in ['NX_CHAR', '']:
-        nx_type = ''
-    else:
-        nx_type = f"({nx_type})"
-    return nx_type
-
-
-def get_node_parent_info(tree, node):
-    """
-    Return tuple of (parent, index) where:
-    parent = node of parent within tree
-    index = index of node under parent
-    """
-
-    parent_map = {c: p for p in tree.iter() for c in p}
-    parent = parent_map[node]
-    return parent, list(parent).index(node)
-
-
-def cleaning_empty_lines(line_list):
-    """
-        Cleaning up empty lines on top and bottom.
-    """
-    if not isinstance(line_list, list):
-        line_list = line_list.split('\n') if '\n' in line_list else ['']
-
-    # Clining up top empty lines
-    while True:
-        if line_list[0].strip():
-            break
-        line_list = line_list[1:]
-        if len(line_list) == 0:
-            line_list.append('')
-            return line_list
-
-    # Clining bottom empty lines
-    while True:
-        if line_list[-1].strip():
-            break
-        line_list = line_list[0:-1]
-        if len(line_list) == 0:
-            line_list.append('')
-            return line_list
-
-    return line_list
-
-
-def nx_name_type_resolving(tmp):
-    """
-    extracts the eventually custom name {optional_string}
-    and type {nexus_type} from a YML section string.
-    YML section string syntax: optional_string(nexus_type)
-    """
-    if tmp.count('(') == 1 and tmp.count(')') == 1:
-        # we can safely assume that every valid YML key resolves
-        # either an nx_ (type, base, candidate) class contains only 1 '(' and ')'
-        index_start = tmp.index('(')
-        index_end = tmp.index(')', index_start + 1)
-        typ = tmp[index_start + 1:index_end]
-        nam = tmp.replace('(' + typ + ')', '')
-        return nam, typ
-
-    # or a name for a member
-    typ = ''
-    nam = tmp
-    return nam, typ
-
-
-def get_sha256_hash(file_name):
-    """Generate a sha256_hash for a given file.
-    """
-    sha_hash = hashlib.sha256()
-
-    with open(file=file_name, mode='rb',) as file_obj:
-        # Update hash for each 4k block of bytes
-        for b_line in iter(lambda: file_obj.read(4096), b""):
-            sha_hash.update(b_line)
-    return sha_hash.hexdigest()
-
-
-def extend_yamlfile_with_comment(yaml_file,
-                                 file_to_be_appended,
-                                 top_lines_list=None):
-    """Extend yaml file by the file_to_be_appended as comment.
-    """
-
-    with open(yaml_file, mode='a+', encoding='utf-8') as f1_obj:
-        if top_lines_list:
-            for line in top_lines_list:
-                f1_obj.write(line)
-
-        with open(file_to_be_appended, mode='r', encoding='utf-8') as f2_obj:
-            lines = f2_obj.readlines()
-            for line in lines:
-                f1_obj.write(f"# {line}")
-
-
-def separate_hash_yaml_and_nxdl(yaml_file, sep_yaml, sep_xml):
-    """Separate the provided yaml file into yaml, nxdl and hash if yaml was extended with
-    nxdl at the end of yaml by
-        '\n# ++++++++++++++++++++++++++++++++++ SHA HASH \
-            ++++++++++++++++++++++++++++++++++\n'
-         # <has value>'
-    """
-    sha_hash = ''
-    with open(yaml_file, 'r', encoding='utf-8') as inp_file:
-        lines = inp_file.readlines()
-        # file to write yaml part
-        with open(sep_yaml, 'w', encoding='utf-8') as yml_f_ob, \
-                open(sep_xml, 'w', encoding='utf-8') as xml_f_ob:
-
-            last_line = ''
-            write_on_yaml = True
-            for ind, line in enumerate(lines):
-                if ind == 0:
-                    last_line = line
-                # Write in file when ensured that the nest line is not with '++ SHA HASH ++'
-                elif '++ SHA HASH ++' not in line and write_on_yaml:
-                    yml_f_ob.write(last_line)
-                    last_line = line
-                elif '++ SHA HASH ++' in line:
-                    write_on_yaml = False
-                    last_line = ''
-                elif not write_on_yaml and not last_line:
-                    # The first line of xml file has been found. Onward write lines directly
-                    # into xml file.
-                    if not sha_hash:
-                        sha_hash = line.split('# ', 1)[-1].strip()
-                    else:
-                        xml_f_ob.write(line[2:])
-            # If the yaml fiile does not contain any hash for nxdl then we may have last line.
-            if last_line:
-                yml_f_ob.write(last_line)
-
-    return sha_hash
diff --git a/tests/data/nyaml2nxdl/NXattributes.yaml b/tests/data/nyaml2nxdl/NXattributes.yaml
deleted file mode 100644
index f8ae54335..000000000
--- a/tests/data/nyaml2nxdl/NXattributes.yaml
+++ /dev/null
@@ -1,41 +0,0 @@
-doc: documentation no. 0
-symbols:
-  doc: documentation no. 1
-  testnamesymbol: test description of symbol
-category: application
-NXellipsometry_base_draft(my_test_extends):
-  (NXentry):
-    \@entry:
-      doc: attribute documentation
-    doc: documentation no. 2
-    experiment_identifier:
-      exists: ['min', 3, 'max', 100]
-      doc: documentation no. 3
-    experiment_description:
-      exists: required
-    start_time(NX_DATE_TIME):
-      exists: required
-      unit: NX_TIME
-    program_name:
-      doc: documentation no. 4
-    program_version:
-      exists: ['min', 5]
-      doc: documentation no. 5
-    time_zone(NX_DATE_TIME):
-      exists: required
-      doc: documentation no. 6
-    definition_local:
-      exists: ['max', 5]
-      doc: documentation no. 7
-      \@version:
-    calibration_data(NX_NUMBER):
-      unit: NX_UNITLESS
-      doc: |
-        Calibration is performed on a reference surface (usually silicon wafer with well
-        defined oxide layer) at a number of angles, then in a straight through mode
-        (transmission in air).
-      dimensions:
-        rank: 3
-        dim: [[3, N_calibration_angles+1], [2, N_variables], [1, N_calibration_wavelength]]
-        dim_parameters:
-          required: ['true', 'true', 'true']
diff --git a/tests/data/nyaml2nxdl/NXcomment_yaml2nxdl.yaml b/tests/data/nyaml2nxdl/NXcomment_yaml2nxdl.yaml
deleted file mode 100644
index e08505f39..000000000
--- a/tests/data/nyaml2nxdl/NXcomment_yaml2nxdl.yaml
+++ /dev/null
@@ -1,68 +0,0 @@
-
-category: application
-
-# 1: Pincelli, Rettig, Arora at fhi-berlin.mpg.de, Dobener at hu-berlin.de, 06/2022
-#Draft version of a NeXus application definition for photoemission,
-#It is designed to be extended by other application definitions
-#with higher granularity in the data description.
-
-doc: This is the most general application definition for multidimensional photoelectron spectroscopy.
-# 2: symbols comments: comments here
-symbols:
-# 3: symbols doc comments
-  doc: |
-    symbols doc
-# 4: symbol comments: comments here
-  n_different_temperatures: "Number of different temperature setpoints used in the experiment."
-# 5: symbol comments: comments here
-  n_different_voltages: "Number of different voltage setpoints used in the experiment."
-
-# 6: NXmpes: Test -- documentation
-# NXmpes: Test documentation
-NXmpes:
-  # 7: NXmpes: Test documentation
-  # NXmpes: Test documentation
-   # 8: exists: comment
-
-  (NXentry):
-    exists: recommended
-    # 9: Title comment
-    title:
-    # 10: Group comment
-    start_time(NX_DATE_TIME):
-      doc: "Datetime of the start of the measurement."
-    definition:
-      # 11: version_attribute: comments hrere
-      \@version:
-      enumeration: ["NXmpes"]
-    # 12: Scond comment for Comment NXdata(data)
-
-    # 13: comment nxdata(data): comments
-    # comment nxdata(data): comments
-
-    # 14: Third comment for Comment NXdata(data)
-    (NXdata)data:
-     # 15: comment (energy(link)):
-      energy(link):
-        target: /entry/instrument/fluorescence/energy
-     # 16: comment (data(link)):
-      data(link):
-        target: /entry/instrument/fluorescence/data
-      region_origin(NX_INT):
-        doc: |
-          origin of rectangular region selected for readout
-        # 17: dimensions comments:
-
-        dimensions:
-          # 18: rank comments: comments
-          rank: 1
-          # 19: dim comments:
-          dim: [[1, 2]]
-
-  # 20: File endgin comments
-  # 20: File ending comments
-  # 20: File ending comments
-
-  # 21: File endgin comments
-  # 21: File ending comments
-  # 21: File ending comments
\ No newline at end of file
diff --git a/tests/data/nyaml2nxdl/NXellipsometry-docCheck.yaml b/tests/data/nyaml2nxdl/NXellipsometry-docCheck.yaml
deleted file mode 100644
index b9c34da39..000000000
--- a/tests/data/nyaml2nxdl/NXellipsometry-docCheck.yaml
+++ /dev/null
@@ -1,543 +0,0 @@
-doc: | 
-  Ellipsometry, complex systems, up to variable angle spectroscopy.
-  
-  Information on ellipsometry is provided, e.g. in:
-  - H. Fujiwara, Spectroscopic ellipsometry: principles and applications, John Wiley & Sons, 2007.
-  - R. M. A. Azzam and N. M. Bashara, Ellipsometry and Polarized Light, North-Holland Publishing Company, 1977.
-  - H. G. Tompkins and E. A. Irene, Handbook of Ellipsometry, William Andrew, 2005.
-  
-  Open acces sources:
-  - https://www.angstromadvanced.com/resource.asp
-  - https://pypolar.readthedocs.io/en/latest/
-symbols: 
-  doc: | 
-    Variables used throughout the document, e.g. dimensions and important
-    parameters
-
-  N_wavelength: | 
-    Size of the energy / wavelength vector used
-
-  N_variables: | 
-    How many variables are saved in a measurement (e.g. Psi and Delta,
-    Mueller matrix)
-
-  N_angles: | 
-    Number of incident angles used
-
-  N_p1: | 
-    Number of sample parameters scanned
-
-  N_time: | 
-    Number of time points measured
-
-category: application
-type: group
-(NXobject)NXellipsometry:
-  (NXentry):
-    doc: | 
-      Ellipsometry, complex systems, up to variable angle spectroscopy.
-      
-      Information on ellipsometry is provided, e.g. in':'
-      - H. Fujiwara, Spectroscopic ellipsometry':' principles and applications, John Wiley & Sons, 2007.
-      - R. M. A. Azzam and N. M. Bashara, Ellipsometry and Polarized Light, North-Holland Publishing Company, 1977.
-      - H. G. Tompkins and E. A. Irene, Handbook of Ellipsometry, William Andrew, 2005.
-      
-      Open acces sources':'
-      - https':'//www.angstromadvanced.com/resource.asp
-      - https':'//pypolar.readthedocs.io/en/latest/
-    definition(NX_CHAR):
-      doc: | 
-        An application definition for ellipsometry.
-      \@version:
-        type: NX_CHAR
-        doc: | 
-          Version number to identify which definition of this application definition was
-          used for this entry/data.
-      \@url:
-        type: NX_CHAR
-        doc: | 
-          URL where to find further material (documentation, examples) relevant to the
-          application definition
-      enumeration: [NXellipsometry]
-    experiment_identifier(NX_CHAR):
-      doc: | 
-        Unique identifier of the experiment, such as a (globally persistent) unique
-        identifier. i) The identifier is usually defined by the facility or principle
-        investigator. ii) The identifier enables to link experiments to e.g. proposals.
-    experiment_description(NX_CHAR):
-      recommended: true
-      doc: | 
-        A free-text description of the experiment. What is the aim of the experiment?
-        The general procedure.
-    start_time(NX_DATE_TIME):
-      doc: | 
-        Start time of the experiment. UTC offset should be specified.
-    program(NX_CHAR):
-      optional: true
-      doc: | 
-        Commercial or otherwise defined given name to the program that was used to
-        generate the results file(s) with measured data and metadata (or a link to the
-        instrument software).
-      \@version:
-        type: NX_CHAR
-        doc: | 
-          Either version with build number, commit hash, or description of a (online)
-          repository where the source code of the program and build instructions can be
-          found so that the program can be configured in such a way that result files can
-          be created ideally in a deterministic manner.
-      \@url:
-        type: NX_CHAR
-        doc: | 
-          Website of the software.
-    operator(NXuser):
-      exists: ['min', '1']
-      doc: | 
-        Contact information of at least the user of the instrument or the investigator
-        who performed this experiment. Adding multiple users if relevant is recommended.
-      name(NX_CHAR):
-        doc: | 
-          Name of the user.
-      affiliation(NX_CHAR):
-        doc: | 
-          Name of the affiliation of the user at the point in time when the experiment was
-          performed.
-      address(NX_CHAR):
-        doc: | 
-          Full address (street, street number, ZIP, city, country) of the user's
-          affiliation.
-      email(NX_CHAR):
-        doc: | 
-          Email address of the user.
-      orcid(NX_CHAR):
-        recommended: true
-        doc: | 
-          Author ID defined by https':'//orcid.org/.
-      telephone_number(NX_CHAR):
-        recommended: true
-        doc: | 
-          Official telephone number of the user.
-    (NXinstrument):
-      doc: | 
-        General properties of the ellipsometry equipment
-      model(NX_CHAR):
-        doc: | 
-          The name of the instrument
-        \@version:
-          type: NX_CHAR
-          doc: | 
-            The used version of the hardware if available. If not a commercial instrument
-            use date of completion of the hardware.
-      company(NX_CHAR):
-        optional: true
-        doc: | 
-          Name of the company which build the instrument
-      construction_year(NX_DATE_TIME):
-        optional: true
-        doc: | 
-          ISO8601 date when the instrument was constructed. UTC offset should be
-          specifiec.
-      software(NX_CHAR):
-        doc: | 
-          Name (e.g. commercial) of the software that was used for the measurement
-        \@version:
-          type: NX_CHAR
-          doc: | 
-            Version and build number or commit hash of the software source code
-        \@url:
-          type: NX_CHAR
-          doc: | 
-            Website of the software.
-      light_source(NX_CHAR):
-        doc: | 
-          Specify the used light source. Multiple selection possible.
-        enumeration: [UV light, quartz tungsten halogen lamp, xenon arc lamp, deuterium lamp, silicon carbide globar, other]
-      other_light_source(NX_CHAR):
-        optional: true
-        doc: | 
-          If you specified 'other' as light source type, please write down what it is.
-      focussing_probes(NX_BOOLEAN):
-        doc: | 
-          Were focussing probes (lenses) used or not?
-      data_correction(NX_BOOLEAN):
-        optional: true
-        doc: | 
-          Were the recorded data corrected by the window effects of the lenses or not?
-      angular_spread(NX_NUMBER):
-        optional: true
-        unit: NX_ANGLE
-        doc: | 
-          Specify the angular spread caused by the focussing probes
-      ellipsometry_type(NX_CHAR):
-        doc: | 
-          What type of ellipsometry was used? See Fujiwara Table 4.2
-        enumeration: [rotating analyzer, rotating analyzer with analyzer compensator, rotating analyzer with polarizer compensator, rotating polarizer, rotating compensator on polarizer side, rotating compensator on analyzer side, modulator on polarizer side, modulator on analyzer side, dual compensator, phase modulation, imaging ellipsometry, null ellipsometry]
-      calibration_status(NX_DATE_TIME):
-        doc: | 
-          Was a calibration done. If yes, when was it done?
-        enumeration: [calibration time provided, no calibration, within 1 hour, within 1 day, within 1 week]
-      calibration(NXsubentry):
-        recommended: true
-        doc: | 
-          Ellipsometers require regular calibration to adjust the hardware parameters for
-          proper zero values and background light compensation.
-        calibration_time(NX_DATE_TIME):
-          optional: true
-          doc: | 
-            If calibtration status is 'calibration time provided', specify the ISO8601 datum
-            when calibration was last performed before this measurement. UTC offset should
-            be specified.
-        calibration_data(NXsubentry):
-          doc: | 
-            Arrays which provide the measured calibration data.
-            Multiple sets are possible, e.g. Psi and delta measured on an
-            e.g. silicon calibration waver, and the straight-through data.
-            
-            We 
-            recommend to 
-            
-            provide data that is measured under the same settings
-            
-            as the measurement was performed, that is if Psi and delta are measured
-            for your data, also provide Psi and delta here.
-            And use the same wavelenghts as there."
-          calibration_data_type(NX_CHAR):
-            doc: | 
-              What data was recorded for the calibration, The number of variables
-              (N_variables) have to be set to the number of provided data columns accordingly,
-              e.g. psi/delta -> N_variables= 2, Jones vector':' N_variables = 4, Mueller martix
-              -> N_variables= 16, etc.
-            enumeration: [psi/delta, tan(psi)/cos(delta), Jones matrix, Mueller matrix, not provided]
-          calibration_angle_of_incidence(NX_NUMBER):
-            unit: NX_ANGLE
-            doc: | 
-              angle(s) of incidence used during the calibration measurement (excluding
-              straight through mode)
-            dimensions:
-              rank: 1
-              dim: [[1, N_calibration_angles]]
-          calibration_wavelength(NX_NUMBER):
-            doc: | 
-              The wavelength or equivalent values (which are inter-convertible). The importer should convert all to one unit, and make the others accessible. Historically, energy is used in eV, but for visible spectroscopy wavelength is more common, for IR wave numbers in 1/cm units.
-              Possibly use the same type of data as for the measurement!
-            dimensions:
-              rank: 1
-              dim: [[1, N_calibration_wavelength]]
-          calibration_data(NX_NUMBER):
-            unit: NX_UNITLESS
-            doc: | 
-              Calibration is performed on a reference surface (usually silicon wafer with well
-              defined oxide layer) at a number of angles, then in a straight through mode
-              (transmission in air).
-            dimensions:
-              rank: 3
-              dim: [[3, N_calibration_angles+1], [2, N_variables], [1, N_calibration_wavelength]]
-        calibration_sample(NX_CHAR):
-          doc: | 
-            Free-text to describe which sample was used for calibration, e.g. silicon wafer
-            with 25 nm thermal oxide layer.
-      angle_of_incidence(NX_NUMBER):
-        unit: NX_ANGLE
-        doc: | 
-          Incident angle of the beam vs. the normal of the bottom reflective (substrate)
-          surface in the sample
-        dimensions:
-          rank: 1
-          dim: [[1, N_angles]]
-      stage(NXsubentry):
-        doc: | 
-          Sample stage, holding the sample at a specific position in X,Y,Z (Cartesian)
-          coordinate system and at an orientation defined by three Euler angles (alpha,
-          beta, gamma). The stage may be motorized or manual, special for liquids or gas
-          environment.
-        enumeration: [manual stage, scanning stage, liquid stage, gas cell, cryostat]
-        description(NX_CHAR):
-          recommended: true
-          doc: | 
-            A free-text field to provide information about the stage.
-        (NXtransformations):
-          recommended: true
-          doc: | 
-            The stage coordinate system vs. the incident beam. The Z-axis of the stage is considered to point along the normal of the substrate (bottom reflecting surface) from the stage towards the general direction of the light source. The beam comes with angle of incidence towards this Z-axis, but in opposite direction, thus they are connected with a rotation of 180 - angle of incidence (in degrees).
-            This transformation brings us from the NEXUS coordinates to the stage coordinates.
-            Then provide the set of translations (if there are any). These all have a vector defining their relative direction in the current coordinate system. (This current coordinate system changes with every transformation if you set the parameter 'depends' to the name of the previous step.)
-            Last, provide the rotations of the sample
-          alternative(NX_CHAR):
-            optional: true
-            doc: | 
-              If there is no motorized stage, we should at least qualify where the beam hits
-              the sample and in what direction the sample stands in a free-text description,
-              e.g. 'center of sample, long edge parallel to plane of incidence'.
-      window(NXaperture):
-        optional: true
-        doc: | 
-          For environmental measurements, the environment (liquid, vapor, vacuum etc.) is
-          enclosed in a cell or cryostat, which has windows both in the direction of the
-          source and the detector (looking from the sample). These windows also add a
-          phase shift to the light altering the measured signal. This shift has to be
-          corrected based on measuring a known sample in the environmental cell.
-        material(NX_CHAR):
-          doc: | 
-            The material of the window
-        thickness(NX_NUMBER):
-          unit: NX_LENGTH
-          doc: | 
-            Thickness of the window
-        orientation_angle(NX_NUMBER):
-          unit: NX_ANGLE
-          doc: | 
-            Angle of the window normal (outer) vs. the substrate normal (similar to the
-            angle of incidence).
-        reference_data(NXsubentry):
-          doc: | 
-            Recorded data that can be used to calculate the window effect. Typically this is
-            the substrate (e.g. silicon with thermal oxide layer) in air without window and
-            in a known medium with the window.
-          reference_sample(NX_CHAR):
-            doc: | 
-              What sample was used to estimate the window effect.
-          reference_wavelength(NX_NUMBER):
-            unit: NX_LENGTH
-            doc: | 
-              Use the same wavelengths at which all other measurements are recorded
-            dimensions:
-              rank: 1
-              dim: [[1, N_wavelength]]
-          data(NX_NUMBER):
-            unit: NX_UNITLESS
-            doc: | 
-              Recorded data of a reference surface with and without window / medium.
-            dimensions:
-              rank: 4
-              dim: [[4, 2], [3, N_angles], [2, N_variables], [1, N_wavelength]]
-      (NXdetector):
-        doc: | 
-          Which type of detector was used, and what is known about it? A detector can be a
-          photomultiplier (PMT), a CCD in a camera, an array in a spectrometer. If so, the
-          whole detector unit goes in here.
-        detector_type(NX_CHAR):
-          doc: | 
-            What kind of detector module is used, e.g. CCD-spectrometer, CCD camera, PMT,
-            photodiode, etc.
-          enumeration: [PMT, photodiode, avalanche diode, CCD camera, CCD spectrometer, other]
-        other_detector(NX_CHAR):
-          optional: true
-          doc: | 
-            If you specified 'other' as detector type, please write down what it is.
-        integration_time(NX_NUMBER):
-          unit: NX_TIME
-          doc: | 
-            Integration time for the measurement. Single number or array if it was varied.
-        revolution(NX_NUMBER):
-          optional: true
-          unit: NX_ANY
-          doc: | 
-            Define how many rotations of the rotating element were taken into account per
-            spectrum.
-        rotating_element(NX_CHAR):
-          doc: | 
-            Define which elements rotates, e.g. polarizer or analyzer.
-          enumeration: [polarizer (source side), analyzer (detector side), compensator (source side), compensator (detector side)]
-        fixed_revolution(NX_NUMBER):
-          optional: true
-          unit: NX_FREQUENCY
-          doc: | 
-            rotation rate, if the revolution does not change during the measurement.
-        variable_revolution(NX_NUMBER):
-          optional: true
-          doc: | 
-            Specify maximum and minimum values for the revolution.
-          dimensions:
-            rank: 1
-            dim: [[1, 2]]
-    (NXsample):
-      doc: | 
-        Properties of the sample, its history, the sample environment and experimental
-        conditions (e.g. surrounding medium, temperature, pressure etc.), along with the
-        data (data type, wavelength array, measured data).
-      atom_types(NX_CHAR):
-        doc: | 
-          Use Hill's system for listing elements of the periodic table which are inside or
-          attached to the surface of the specimen and thus relevant from a scientific
-          point. The purpose of this field is to allow materials database to parse the
-          relevant elements without having to interpret the sample history or other
-          fields.
-      sample_name(NX_CHAR):
-        doc: | 
-          Descriptive name of the sample
-      sample_history(NX_CHAR):
-        doc: | 
-          Ideally, a reference to the location or a unique (globally persistent)
-          identifier (e.g.) of e.g. another file which gives as many as possible details
-          of the material, its microstructure, and its thermo-chemo-mechanical
-          processing/preparation history. In the case that such a detailed history of the
-          sample is not available, use this field as a free-text description to specify
-          details of the sample and its preparation.
-      preparation_date(NX_DATE_TIME):
-        recommended: true
-        doc: | 
-          ISO 8601 date with time zone specified. UTC offset should be specifiec.
-      layer_structure(NX_CHAR):
-        doc: | 
-          Qualitative description of the layer structure for the sample. For example':'
-          Si/native oxide/thermal oxide/polymer/peptide
-      data_identifier(NX_NUMBER):
-        doc: | 
-          An identifier to correlate data to the experimental conditions, if several were
-          used in this measurement; typically an index of 0 - N
-      data_type(NX_CHAR):
-        doc: | 
-          Select which type of data was recorded, for example Psi and Delta (see':'
-          https':'//en.wikipedia.org/wiki/Ellipsometry#Data_acquisition). It is possible to
-          have multiple selections. Data types may also be converted to each other, e.g. a
-          Mueller matrix contains N,C,S data as well. This selection defines how many
-          columns (N_variables) are stored in the data array.
-        enumeration: [psi / delta, tan(psi)/cos(delta), Mueller matrix, Jones matrix, N/C/S, raw data]
-      wavelength(NX_NUMBER):
-        unit: NX_LENGTH
-        doc: | 
-          Wavelength value(s) used for the measurement.
-          An array of 1 or more elements. Length defines N_wavelength
-        dimensions:
-          rank: 1
-          dim: [[1, N_wavelength]]
-      measured_data(NX_NUMBER):
-        doc: | 
-          Resulting data from the measurement, described by data type.
-          Minimum two columns containing Psi and delta, or for the normalized Mueller matrix, it may be 16 (or 15 if 1,1 is all 1).
-        dimensions:
-          rank: 5
-          dim: [[5, N_time], [4, N_p1], [3, N_angles], [2, N_variables], [1, N_wavelength]]
-      data_error(NX_NUMBER):
-        recommended: true
-        doc: | 
-          Specified uncertainties (errors) of the data described by data type. The
-          structure is the same as for the measured data.
-        dimensions:
-          rank: 5
-          dim: [[5, N_time], [4, N_p1], [3, N_angles], [2, N_variables], [1, N_wavelength]]
-      time_points(NX_NUMBER):
-        optional: true
-        unit: NX_TIME
-        doc: | 
-          An array of relative time points if a time series was recorded
-      medium(NX_CHAR):
-        doc: | 
-          Describe what was the medium above or around the sample. The common model is
-          built up from substrate to the medium on the other side. Both boundaries are
-          assumed infinite in the model. Here define the name of the material (e.g. water,
-          air, etc.).
-      medium_refractive_indices(NX_NUMBER):
-        optional: true
-        unit: NX_UNITLESS
-        doc: | 
-          Array of pairs of complex refractive indices of the medium for every measured
-          wavelength. Only necessary if the measurement was performed not in air, or
-          something very well known, e.g. high purity water. Specify the complex
-          refractive index':' n + ik
-        dimensions:
-          rank: 1
-          dim: [[1, N_wavelength]]
-      environment_conditions(NX_CHAR):
-        optional: true
-        doc: | 
-          External parameters that have influenced the sample.
-      number_of_runs(NX_UINT):
-        optional: true
-        unit: NX_DIMENSIONLESS
-        doc: | 
-          How many measurements were done varying the parameters? This forms an extra
-          dimension beyond incident angle, time points and energy / wavelength (this is
-          the length of the 4th dimension of the data). Defaults to 1.
-      varied_parameters(NX_CHAR):
-        optional: true
-        doc: | 
-          Indicates which parameter was changed. Its definition must exist below. The
-          specified variable has to be number_of_runs long, providing the parameters for
-          each data set.
-        enumeration: [optical excitation, voltage, temperature, pH, stress, stage positions]
-      optical_excitation(NXsubentry):
-        optional: true
-        doc: | 
-          Was the sample modified using an optical source? Describe in this group the
-          parameters of the optical excitation used.
-        excitation_source(NX_CHAR):
-          doc: | 
-            Specify the source for the external excitation
-        excitation_wavelength(NX_NUMBER):
-          unit: NX_LENGTH
-          doc: | 
-            Wavelength value(s) or the range used for excitation.
-            In cases of continuous laser radiation a value or a set of values may do but for other illumination types, such as pulsed lasers, or lamps, a range may describe the source better.
-        broadening(NX_NUMBER):
-          optional: true
-          unit: NX_LENGTH
-          doc: | 
-            Specify the FWHM of the excitation
-        excitation_type(NX_CHAR):
-          doc: | 
-            CW or pulsed excitation
-          enumeration: [cw, pulsed]
-        pulse_length(NX_NUMBER):
-          optional: true
-          unit: NX_TIME
-          doc: | 
-            Duration of one laser pulse.
-        repetition_rate(NX_NUMBER):
-          optional: true
-          unit: NX_FREQUENCY
-          doc: | 
-            Repetition rate of the laser.
-        excitation_duration(NX_TIME):
-          optional: true
-          doc: | 
-            How long was the sample excited.
-        pulse_energy(NX_NUMBER):
-          optional: true
-          unit: NX_ENERGY
-          doc: | 
-            The integrated energy of light pulse.
-        excitation_power(NX_NUMBER):
-          optional: true
-          unit: NX_ENERGY
-          doc: | 
-            The power of one laser pulse.
-      voltage(NX_NUMBER):
-        optional: true
-        unit: NX_VOLTAGE
-        doc: | 
-          Specify the voltage if the spectra were taken under bias
-      temperature(NX_NUMBER):
-        optional: true
-        unit: NX_TEMPERATURE
-        doc: | 
-          Temperature of the sample (sample holder, medium)
-      pH(NX_NUMBER):
-        optional: true
-        unit: NX_UNITLESS
-        doc: | 
-          pH of medium (measured or set)
-      pressure(NX_NUMBER):
-        optional: true
-        unit: NX_PRESSURE
-        doc: | 
-          Pressure of the environment of the sample.
-    derived_parameters(NXprocess):
-      optional: true
-      doc: | 
-        What parameters are derived from the above data.
-      depolarization(NX_NUMBER):
-        optional: true
-        unit: NX_UNITLESS
-        doc: | 
-          Light loss due to depolarization as a value in [0-1].
-    plot(NXdata):
-      optional: true
-      doc: | 
-        A default view of the data, in this case Psi vs. wavelength and the angles of
-        incidence. If Psi does not exist, use other Müller matrix elements, such as N, C
-        and S.
-      \@axes:
-        doc: | 
-          We recommend to use wavelength as a default attribute, but it can be replaced in
-          the case of not full spectral ellipsometry to any suitable parameter along the
-          X-axis.
diff --git a/tests/data/nyaml2nxdl/NXfilelineError1.yaml b/tests/data/nyaml2nxdl/NXfilelineError1.yaml
deleted file mode 100644
index d7c3e32dd..000000000
--- a/tests/data/nyaml2nxdl/NXfilelineError1.yaml
+++ /dev/null
@@ -1,30 +0,0 @@
-#test case for attributes
-doc: documentation no. 0
-symbols:
-  doc: documentation no. 1
-  testnamesymbol: test description of symbol
-category: application
-NXellipsometry_base_draft(my_test_extend):
-  (NXentry):
-    \@entry:
-      doc: attribute documentation
-    doc: documentation no. 2
-    experiment_identifier:
-      existsss: required
-      doc: documentation no. 3
-    experiment_description:
-      exists: required
-    start_time(NX_DATE_TIME):
-      exists: required
-      unit: NX_TIME
-    program_name:
-      doc: documentation no. 4
-    program_version:
-      doc: documentation no. 5
-    time_zone(NX_DATE_TIME):
-      exists: required
-      doc: documentation no. 6
-    definition_local:
-      doc: documentation no. 7
-      \@version:
-      # EMPTY ATTRIBUTES
diff --git a/tests/data/nyaml2nxdl/NXfilelineError2.yaml b/tests/data/nyaml2nxdl/NXfilelineError2.yaml
deleted file mode 100644
index bd446f92a..000000000
--- a/tests/data/nyaml2nxdl/NXfilelineError2.yaml
+++ /dev/null
@@ -1,30 +0,0 @@
-#test case for attributes
-doc: documentation no. 0
-symbols:
-  doc: documentation no. 1
-  testnamesymbol: test description of symbol
-category: application
-NXellipsometry_base_draft(my_test_extend):
-  (NXentry):
-    \@entry:
-      doc: attribute documentation
-    doc: documentation no. 2
-    experiment_identifier:
-      exists: required
-      doc: documentation no. 3
-    experiment_description:
-      exists: required
-    start_time(NX_DATE_TIME):
-      exists: required
-      unit: NX_TIME
-    program_name:
-      dochy: documentation no. 4
-    program_version:
-      doc: documentation no. 5
-    time_zone(NX_DATE_TIME):
-      exists: required
-      doc: documentation no. 6
-    definition_local:
-      doc: documentation no. 7
-      \@version:
-      # EMPTY ATTRIBUTES
diff --git a/tests/data/nyaml2nxdl/NXfilelineError3.yaml b/tests/data/nyaml2nxdl/NXfilelineError3.yaml
deleted file mode 100644
index 8b681068d..000000000
--- a/tests/data/nyaml2nxdl/NXfilelineError3.yaml
+++ /dev/null
@@ -1,30 +0,0 @@
-#test case for attributes
-doc: documentation no. 0
-symbols:
-  doc: documentation no. 1
-  testnamesymbol: test description of symbol
-category: application
-NXellipsometry_base_draft(my_test_extend):
-  (NXentry):
-    \@entry:
-      doc: attribute documentation
-    doc: documentation no. 2
-    experiment_identifier:
-      exists: required
-      doc: documentation no. 3
-    experiment_description:
-      exists: required
-    start_time(NX_DATE_TIME):
-      exists: required
-      unit: NX_TIME
-    program_name:
-      doc: documentation no. 4
-    program_version:
-      doc: documentation no. 5
-    time_zone(NX_DATE_TIME):
-      exists:
-      doc: documentation no. 6
-    definition_local:
-      doc: documentation no. 7
-      \@version:
-      # EMPTY ATTRIBUTES
diff --git a/tests/data/nyaml2nxdl/NXmytests.yaml b/tests/data/nyaml2nxdl/NXmytests.yaml
deleted file mode 100644
index b1ba78d03..000000000
--- a/tests/data/nyaml2nxdl/NXmytests.yaml
+++ /dev/null
@@ -1,39 +0,0 @@
-category: base
-doc: "This is a test file for checking the correct parsing of several fields and attributes in nxdl files"
-symbols: 
-  doc: "symbolic array lengths to be coordinated between various fields"
-  n_Temp: "number of temperatures"
-  n_eField: "number of values in applied electric field"
-  n_mField: "number of values in applied magnetic field"
-  n_pField: "number of values in applied pressure field"
-  n_sField: "number of values in applied stress field"
-NXbeam:
-  distance(NX_FLOAT):
-    unit: NX_LENGTH
-    doc: "Distance from sample"
-  incident_energy(NX_FLOAT):
-    unit: NX_ENERGY
-    doc: "Energy on entering beamline component"
-    dimensions:
-      rank: 1
-      dim: [[1, i]]
-  mode:
-    doc: "source operating mode"
-    enumeration: 
-      Single Bunch: 
-        doc: "for storage rings"
-      Multi Bunch: 
-        doc: "for storage rings"
-  electric_field(NX_FLOAT):
-    unit: NX_VOLTAGE
-    doc: "Applied electric field"
-    dimensions:
-      dim: [[1, n_eField]]
-    \@direction:
-      enumeration: [x, y, z]
-  temperature(NX_FLOAT):
-    unit: NX_TEMPERATURE
-    doc: "Sample temperature. This could be a scanned variable"
-    dimensions:
-      rank: anyRank
-      dim: [[1, n_Temp]]
diff --git a/tests/data/nyaml2nxdl/NXnested_symbols.yaml b/tests/data/nyaml2nxdl/NXnested_symbols.yaml
deleted file mode 100644
index 33257b20d..000000000
--- a/tests/data/nyaml2nxdl/NXnested_symbols.yaml
+++ /dev/null
@@ -1,19 +0,0 @@
-category: base
-doc: Test case for verifying handling of symbols inside a nexus class in nested layers of the hierarchy
-symbols:
-  doc: teststring
-  nfa: Number of fast axes (acquired simutaneously) e.g. emission angle, kinetic energy
-  nsa: Number of slow axes (acquired scanning a physical quantity) e.g. lens voltage, photon energy or temperature
-  nx: Number of points in the first angular direction
-  ne: Number of points in the energy dispersion direction
-NXentry(NXobject):
-  (NXsample):
-    symbols:
-      doc: teststring
-      n_comp: number of compositions
-      n_Temp: number of temperatures
-    (NXprocess):
-      symbols:
-        doc: another nest
-        x: parameter1
-        y: parameter2
diff --git a/tests/data/nyaml2nxdl/NXtest_links.yaml b/tests/data/nyaml2nxdl/NXtest_links.yaml
deleted file mode 100644
index 61f9e4d1b..000000000
--- a/tests/data/nyaml2nxdl/NXtest_links.yaml
+++ /dev/null
@@ -1,8 +0,0 @@
-category: base
-doc: Test case for verifying that the parser can handle links correctly.
-NXentry:
-   (NXdata):
-     polar_angle(link):
-       target: here1
-     target_angle(link):
-       target: here2
diff --git a/tests/data/nyaml2nxdl/Ref_NXattributes.nxdl.xml b/tests/data/nyaml2nxdl/Ref_NXattributes.nxdl.xml
deleted file mode 100644
index c429391c9..000000000
--- a/tests/data/nyaml2nxdl/Ref_NXattributes.nxdl.xml
+++ /dev/null
@@ -1,88 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<?xml-stylesheet type="text/xsl" href="nxdlformat.xsl"?>
-<!--
-# NeXus - Neutron and X-ray Common Data Format
-# 
-# Copyright (C) 2014-2022 NeXus International Advisory Committee (NIAC)
-# 
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 3 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-#
-# For further information, see http://www.nexusformat.org
--->
-<definition xmlns="http://definition.nexusformat.org/nxdl/3.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" category="application" name="NXellipsometry_base_draft" extends="my_test_extends" type="group" xsi:schemaLocation="http://definition.nexusformat.org/nxdl/3.1 ../nxdl.xsd">
-    <symbols>
-        <doc>
-             documentation no. 1
-        </doc>
-        <symbol name="testnamesymbol">
-            <doc>
-                 test description of symbol
-            </doc>
-        </symbol>
-    </symbols>
-    <doc>
-         documentation no. 0
-    </doc>
-    <group type="NXentry">
-        <doc>
-             documentation no. 2
-        </doc>
-        <attribute name="entry">
-            <doc>
-                 attribute documentation
-            </doc>
-        </attribute>
-        <field name="experiment_identifier" minOccurs="3" maxOccurs="100">
-            <doc>
-                 documentation no. 3
-            </doc>
-        </field>
-        <field name="experiment_description" optional="false"/>
-        <field name="start_time" type="NX_DATE_TIME" optional="false" units="NX_TIME"/>
-        <field name="program_name">
-            <doc>
-                 documentation no. 4
-            </doc>
-        </field>
-        <field name="program_version" minOccurs="5">
-            <doc>
-                 documentation no. 5
-            </doc>
-        </field>
-        <field name="time_zone" type="NX_DATE_TIME" optional="false">
-            <doc>
-                 documentation no. 6
-            </doc>
-        </field>
-        <field name="definition_local" maxOccurs="5">
-            <doc>
-                 documentation no. 7
-            </doc>
-            <attribute name="version"/>
-        </field>
-        <field name="calibration_data" type="NX_NUMBER" units="NX_UNITLESS">
-            <doc>
-                 Calibration is performed on a reference surface (usually silicon wafer with well
-                 defined oxide layer) at a number of angles, then in a straight through mode
-                 (transmission in air).
-            </doc>
-            <dimensions rank="3">
-                <dim index="3" value="N_calibration_angles+1" required="true"/>
-                <dim index="2" value="N_variables" required="true"/>
-                <dim index="1" value="N_calibration_wavelength" required="true"/>
-            </dimensions>
-        </field>
-    </group>
-</definition>
diff --git a/tests/data/nyaml2nxdl/Ref_NXcomment.yaml b/tests/data/nyaml2nxdl/Ref_NXcomment.yaml
deleted file mode 100644
index 025a97930..000000000
--- a/tests/data/nyaml2nxdl/Ref_NXcomment.yaml
+++ /dev/null
@@ -1,68 +0,0 @@
-
-category: application
-
-# 1: Pincelli, Rettig, Arora at fhi-berlin.mpg.de, Dobener at hu-berlin.de, 06/2022
-#Draft version of a NeXus application definition for photoemission,
-#It is designed to be extended by other application definitions
-#with higher granularity in the data description.
-
-doc: This is the most general application definition for multidimensional photoelectron spectroscopy.
-# 2: symbols comments: comments here
-symbols:
-# 3: symbols doc comments
-  doc: |
-    symbols doc
-# 4: symbol comments: comments here
-  n_different_temperatures: "Number of different temperature setpoints used in the experiment."
-# 5: symbol comments: comments here
-  n_different_voltages: "Number of different voltage setpoints used in the experiment."
-
-# 6: NXmpes: Test -- documentation
-# NXmpes: Test documentation
-NXmpes:
-  # 7: NXmpes: Test documentation
-  # NXmpes: Test documentation
-
-  # 8: exists: comment
-  (NXentry):
-    exists: recommended
-    # 9: Title comment
-    title:
-    # 10: Group comment
-    start_time(NX_DATE_TIME):
-      doc: "Datetime of the start of the measurement."
-    definition:
-      # 11: version_attribute: comments hrere
-      \@version:
-      enumeration: ["NXmpes"]
-    # 12: Scond comment for Comment NXdata(data)
-
-    # 13: comment nxdata(data): comments
-    # comment nxdata(data): comments
-
-    # 14: Third comment for Comment NXdata(data)
-    (NXdata)data:
-     # 15: comment (energy(link)):
-      energy(link):
-        target: /entry/instrument/fluorescence/energy
-     # 16: comment (data(link)):
-      data(link):
-        target: /entry/instrument/fluorescence/data
-      region_origin(NX_INT):
-        doc: |
-          origin of rectangular region selected for readout
-        # 17: dimensions comments:
-
-        # 18: rank comments: comments
-        dimensions:
-          rank: 1
-          # 19: dim comments:
-          dim: [[1, 2]]
-
-  # 20: File endgin comments
-  # 20: File ending comments
-  # 20: File ending comments
-
-  # 21: File endgin comments
-  # 21: File ending comments
-  # 21: File ending comments
\ No newline at end of file
diff --git a/tests/data/nyaml2nxdl/Ref_NXcomment_yaml2nxdl.nxdl.xml b/tests/data/nyaml2nxdl/Ref_NXcomment_yaml2nxdl.nxdl.xml
deleted file mode 100644
index a59bdb69d..000000000
--- a/tests/data/nyaml2nxdl/Ref_NXcomment_yaml2nxdl.nxdl.xml
+++ /dev/null
@@ -1,101 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<?xml-stylesheet type="text/xsl" href="nxdlformat.xsl"?>
-<!--
-# NeXus - Neutron and X-ray Common Data Format
-# 
-# Copyright (C) 2014-2022 NeXus International Advisory Committee (NIAC)
-# 
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 3 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-#
-# For further information, see http://www.nexusformat.org
--->
-<!--6: NXmpes: Test -\- documentation
-NXmpes: Test documentation-->
-<definition xmlns="http://definition.nexusformat.org/nxdl/3.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" category="application" name="NXmpes" extends="NXobject" type="group" xsi:schemaLocation="http://definition.nexusformat.org/nxdl/3.1 ../nxdl.xsd">
-    <!--2: symbols comments: comments here-->
-    <symbols>
-        <!--3: symbols doc comments-->
-        <doc>
-             symbols doc
-        </doc>
-        <!--4: symbol comments: comments here-->
-        <symbol name="n_different_temperatures">
-            <doc>
-                 Number of different temperature setpoints used in the experiment.
-            </doc>
-        </symbol>
-        <!--5: symbol comments: comments here-->
-        <symbol name="n_different_voltages">
-            <doc>
-                 Number of different voltage setpoints used in the experiment.
-            </doc>
-        </symbol>
-    </symbols>
-    <!--1: Pincelli, Rettig, Arora at fhi-berlin.mpg.de, Dobener at hu-berlin.de, 06/2022
-Draft version of a NeXus application definition for photoemission,
-It is designed to be extended by other application definitions
-with higher granularity in the data description.-->
-    <doc>
-         This is the most general application definition for multidimensional
-         photoelectron spectroscopy.
-    </doc>
-    <!--7: NXmpes: Test documentation
-NXmpes: Test documentation
-8: exists: comment-->
-    <group type="NXentry" recommended="true">
-        <!--9: Title comment-->
-        <field name="title"/>
-        <!--10: Group comment-->
-        <field name="start_time" type="NX_DATE_TIME">
-            <doc>
-                 Datetime of the start of the measurement.
-            </doc>
-        </field>
-        <field name="definition">
-            <!--11: version_attribute: comments hrere-->
-            <attribute name="version"/>
-            <enumeration>
-                <item value="NXmpes"/>
-            </enumeration>
-        </field>
-        <!--12: Scond comment for Comment NXdata(data)-->
-        <!--13: comment nxdata(data): comments
-comment nxdata(data): comments-->
-        <!--14: Third comment for Comment NXdata(data)-->
-        <group type="NXdata" name="data">
-            <!--15: comment (energy(link)):-->
-            <link name="energy" target="/entry/instrument/fluorescence/energy"/>
-            <!--16: comment (data(link)):-->
-            <link name="data" target="/entry/instrument/fluorescence/data"/>
-            <field name="region_origin" type="NX_INT">
-                <doc>
-                     origin of rectangular region selected for readout
-                </doc>
-                <!--17: dimensions comments:-->
-                <!--18: rank comments: comments-->
-                <dimensions rank="1">
-                    <!--19: dim comments:-->
-                    <dim index="1" value="2"/>
-                </dimensions>
-            </field>
-        </group>
-    </group>
-    <!--20: File endgin comments
-20: File ending comments
-20: File ending comments-->
-    <!--21: File endgin comments
-21: File ending comments
-21: File ending comments-->
-</definition>
diff --git a/tests/data/nyaml2nxdl/Ref_NXellips.nxdl.xml b/tests/data/nyaml2nxdl/Ref_NXellips.nxdl.xml
deleted file mode 100644
index a3621b088..000000000
--- a/tests/data/nyaml2nxdl/Ref_NXellips.nxdl.xml
+++ /dev/null
@@ -1,586 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<?xml-stylesheet type="text/xsl" href="nxdlformat.xsl"?>
-<definition category="application" extends="NXobject" name="NXellipsometry_base_draft" type="group" xmlns="http://definition.nexusformat.org/nxdl/3.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.w3.org/2001/XMLSchema-instance">
-    <doc>
-         draft application definition for ellipsometry measurements, including complex systems up
-         to variable angle spectroscopic ellipsometry.
-    </doc>
-    <symbols>
-        <doc>
-             Variables used throughout the document, e.g. dimensions and important
-             parameters
-        </doc>
-        <symbol name="angle_of_incidence">
-            <doc>
-                 The angle of incidence to the surface normal (stage normal) of the
-                 sample
-            </doc>
-        </symbol>
-        <symbol name="N_wavelength">
-            <doc>
-                 Size of the energy / wavelength vector used
-            </doc>
-        </symbol>
-        <symbol name="N_variables">
-            <doc>
-                 How many variables are saved in a measurement (e.g. Psi and delta,
-                 Mueller matrix)
-            </doc>
-        </symbol>
-        <symbol name="N_angles">
-            <doc>
-                 Number of incident angles used
-            </doc>
-        </symbol>
-        <symbol name="N_p1">
-            <doc>
-                 Number of first sample parameters scanned
-            </doc>
-        </symbol>
-        <symbol name="N_time">
-            <doc>
-                 Number of time points measured
-            </doc>
-        </symbol>
-    </symbols>
-    <group minOccurs="1" type="NXentry">
-        <attribute name="entry">
-            <doc>
-                 NeXus convention is to use entry1, entry2, for analysis software to locate each entry.
-            </doc>
-        </attribute>
-        <doc>
-             to be defined
-        </doc>
-        <field minOccurs="1" name="experiment_identifier">
-            <doc>
-                 Unique identifier of the experiment, such as a (globally persistent) unique identifier.
-                 The identifier is usually defined by the facility or principle investigator. The
-                 identifier enables to link experiments to e.g. proposals.
-            </doc>
-        </field>
-        <field minOccurs="1" name="experiment_description"/>
-        <field minOccurs="1" name="start_time" type="NX_DATE_TIME" units="NX_TIME"/>
-        <field name="program_name">
-            <doc>
-                 Commercial or otherwise defined given name to the program that was used to generate the
-                 results file(s) with measured data and metadata.
-            </doc>
-        </field>
-        <field name="program_version">
-            <doc>
-                 Either version with build number, commit hash, or description of a (online) repository
-                 where the source code of the program and build instructions can be found so that the
-                 program can be configured in such a way that result files can be created ideally in a
-                 deterministic manner.
-            </doc>
-        </field>
-        <field minOccurs="1" name="time_zone" type="NX_DATE_TIME">
-            <doc>
-                 ISO 8601 time_zone offset from UTC.
-            </doc>
-        </field>
-        <field name="definition_local">
-            <doc>
-                 FAIRmat-specific candidate proposal for an application definition exemplifying
-                 ellipsometry.
-            </doc>
-            <attribute name="version">
-                <doc>
-                     Ideally version with build number are commit hash of the application definition. If not
-                     available a free-text description.
-                </doc>
-            </attribute>
-            <attribute name="url">
-                <doc>
-                     URL where to find further material (documentation, examples) relevant to the application
-                     definition
-                </doc>
-            </attribute>
-        </field>
-        <group maxOccurs="unbounded" minOccurs="1" name="operator" type="NXuser">
-            <doc>
-                 Contact information of at least the user of the instrument or the principal investigator
-                 who performed this experiment. Adding multiple users if relevant is recommended.
-            </doc>
-            <field minOccurs="1" name="name"/>
-            <field name="affiliation" recommended="true">
-                <doc>
-                     Name of the affiliation of the user at the point in time when the experiment was
-                     performed.
-                </doc>
-            </field>
-            <field name="address" recommended="true"/>
-            <field minOccurs="1" name="email"/>
-            <field name="orcid" recommended="true"/>
-            <field name="telephone_number" recommended="true"/>
-        </group>
-        <group type="NXmonitor"/>
-        <group minOccurs="1" name="instrument" type="NXinstrument">
-            <doc>
-                 General properties of the ellipsometry equipment
-            </doc>
-            <field name="model">
-                <doc>
-                     The name of the instrument
-                </doc>
-            </field>
-            <field name="company">
-                <doc>
-                     Name of the company
-                </doc>
-            </field>
-            <field name="construction_year" type="NX_DATE_TIME" units="NX_TIME">
-                <doc>
-                     ISO8601 date when the instrument was constructed
-                </doc>
-            </field>
-            <field name="hardware_version">
-                <doc>
-                     The used version of the hardware if available
-                </doc>
-            </field>
-            <field name="software_name">
-                <doc>
-                     Name (e.g. commercial) of the software that was used for the measurement
-                </doc>
-            </field>
-            <field name="software_version">
-                <doc>
-                     Version and build number or commit hash of the software source code
-                </doc>
-            </field>
-            <field name="bandwidth" type="NX_NUMBER" units="NX_WAVELENGTH">
-                <doc>
-                     Specify the bandwidth of the light
-                </doc>
-            </field>
-            <field name="light_source">
-                <doc>
-                     Specify the used light source
-                </doc>
-            </field>
-            <field name="focussing_probes" type="NX_BOOLEAN">
-                <doc>
-                     Were focussing probes (lenses) used or not?
-                </doc>
-            </field>
-            <field name="data_correction" type="NX_BOOLEAN">
-                <doc>
-                     Were the recorded data corrected by the window effects of the lenses or not?
-                </doc>
-            </field>
-            <field name="angular_spread" type="NX_NUMBER" units="NX_ANGLE">
-                <doc>
-                     Specify the angular spread caused by the focussing probes
-                </doc>
-            </field>
-            <field name="ellipsometry_type">
-                <doc>
-                     What type of ellipsometry was used? See Fujiwara Table 4.2.
-                </doc>
-                <enumeration>
-                    <item value="rotating analyzer"/>
-                    <item value="rotating analyzer with analyzer compensator"/>
-                    <item value="rotating analyzer with polarizer compensator"/>
-                    <item value="rotating polarizer"/>
-                    <item value="rotating compensator on polarizer side"/>
-                    <item value="rotating compensator on analyzer side"/>
-                    <item value="modulator on polarizer side"/>
-                    <item value="modulator on analyzer side"/>
-                    <item value="dual compensator"/>
-                    <item value="phase modulation"/>
-                    <item value="imaging ellipsometry"/>
-                    <item value="null ellipsometry"/>
-                </enumeration>
-            </field>
-            <group name="calibration" type="NXprocess">
-                <doc>
-                     ellipsometers require regular calibration to adjust the hardware parameters for proper
-                     zero values and background light compensation
-                </doc>
-                <field name="calibration_time" type="NX_DATE_TIME">
-                    <doc>
-                         ISO8601 datum when calibration was last performed before this measurement
-                    </doc>
-                </field>
-                <field name="calibration_provided" type="NX_BOOLEAN">
-                    <doc>
-                         Are the measured data provided?
-                    </doc>
-                </field>
-                <group name="calibration_data" type="NXdata">
-                    <doc>
-                         Arrays which provide the measured calibration data. Multiple sets are possible, e.g. Psi
-                         and delta measured on an e.g. silicon calibration waver, and the straight-through data.
-                    </doc>
-                    <field name="data">
-                        <doc>
-                             to be defined
-                        </doc>
-                        <enumeration>
-                            <item value="psi/delta"/>
-                            <item value="tan(psi)/cos(delta)"/>
-                            <item value="Jones matrix"/>
-                            <item value="Mueller matrix"/>
-                        </enumeration>
-                    </field>
-                    <field name="angle_of_incidence" type="NX_NUMBER" units="NX_ANGLE">
-                        <doc>
-                             angle(s) of incidence used during the calibration measurement
-                        </doc>
-                    </field>
-                    <field name="wavelength" type="NX_NUMBER" units="NX_LENGTH">
-                        <doc>
-                             The wavelength or equivalent values (, which are inter-convertible). The importer should
-                             convert all to one unit, and make the others accessible. Historically, energy is used in
-                             eV, but for visible spectroscopy wavelength is more common, for IR wave numbers in 1/cm
-                             units.
-                        </doc>
-                    </field>
-                    <field name="calibration_data" type="NX_NUMBER" units="NX_UNITLESS">
-                        <doc>
-                             to be defined
-                        </doc>
-                    </field>
-                    <field name="calibration_sample">
-                        <doc>
-                             Free-text to describe which sample was used for calibration, e.g. silicon wafer with 25 nm
-                             thermal oxide layer
-                        </doc>
-                    </field>
-                </group>
-                <field name="angle_of_incidence" type="NX_NUMBER" units="NX_ANGLE">
-                    <doc>
-                         the incident angle of the beam vs. the normal of the sample surface
-                    </doc>
-                    <attribute name="target"/>
-                    <dimensions rank="1">
-                        <dim index="1" value="N_angles"/>
-                    </dimensions>
-                </field>
-            </group>
-            <group minOccurs="1" name="stage" type="NXstage">
-                <doc>
-                     Where and how is the sample mounted
-                </doc>
-                <enumeration>
-                    <item value="manual stage"/>
-                    <item value="scanning stage"/>
-                    <item value="liquid stage"/>
-                    <item value="gas cell"/>
-                </enumeration>
-            </group>
-            <group name="window" type="NXcollection">
-                <doc>
-                     For environmental measurements, if a window is between the sample and the optics of the
-                     ellipsometer, describe its properties.
-                </doc>
-                <field name="thickness" type="NX_NUMBER" units="NX_LENGTH">
-                    <doc>
-                         Thickness of the window
-                    </doc>
-                </field>
-                <field name="orientation_angle" type="NX_NUMBER" units="NX_ANGLE">
-                    <doc>
-                         Angle in the plane of incidence
-                    </doc>
-                </field>
-                <group name="calibration_data" type="NXdata">
-                    <doc>
-                         to be defined
-                    </doc>
-                </group>
-                <field name="wavelength" type="NX_NUMBER" units="NX_LENGTH">
-                    <doc>
-                         to be defined
-                    </doc>
-                </field>
-                <field name="data array" type="NX_NUMBER" units="NX_UNITLESS">
-                    <doc>
-                         to be defined
-                    </doc>
-                </field>
-                <field name="calibration_sample">
-                    <doc>
-                         Which sample was used to calibrate the window effect?
-                    </doc>
-                </field>
-            </group>
-            <group name="detector" type="NXdetector">
-                <doc>
-                     Which type of detector was used, and what is known about it? A detector can be a
-                     photomultiplier (PMT), a CCD in a camera, an array in a spectrometer. If so, the whole
-                     unit goes in here.
-                </doc>
-                <field minOccurs="1" name="detector_type">
-                    <doc>
-                         What kind of detector module is used, e.g. CCD-spectrometer, CCD camera, PMT, photodiode,
-                         etc.
-                    </doc>
-                </field>
-                <field name="duration" type="NX_NUMBER" units="NX_TIME">
-                    <doc>
-                         Integration time for the measurement. Single number or array if it was varied.
-                    </doc>
-                </field>
-                <field name="revolution" type="NX_NUMBER" units="NX_ANY">
-                    <doc>
-                         Define how many rotations of the rotating element were taken into account for one spectra.
-                    </doc>
-                </field>
-                <field name="rotating_element">
-                    <doc>
-                         Define which elements rotates
-                    </doc>
-                    <enumeration>
-                        <item value="polarizer (source side)"/>
-                        <item value="polarizer (detector side)"/>
-                        <item value="compensator (source side)"/>
-                        <item value="ccompensator (detector side)"/>
-                    </enumeration>
-                </field>
-                <field name="fixed_revolution" type="NX_NUMBER" units="NX_PER_TIME">
-                    <doc>
-                         if the revolution does not change during the measurement.
-                    </doc>
-                </field>
-                <field name="variable revolution" type="NX_NUMBER">
-                    <doc>
-                         Specify maximum and minimum values for the revolution.
-                    </doc>
-                    <dimensions rank="1">
-                        <dim index="1" value="2"/>
-                    </dimensions>
-                </field>
-            </group>
-        </group>
-        <group minOccurs="1" name="sample" type="NXsample">
-            <field minOccurs="1" name="atom_types">
-                <doc>
-                     Use Hill's system for listing elements of the periodic table which are inside or attached
-                     to the surface of the specimen and thus relevant from a scientific point. The purpose of
-                     this field is to allow materials database to parse the relevant elements without having to
-                     interpret the sample history or other fields.
-                </doc>
-            </field>
-            <field minOccurs="1" name="name"/>
-            <field minOccurs="1" name="sample_history">
-                <doc>
-                     Ideally, a reference to the location or a unique (globally persistent) identifier (e.g.)
-                     of e.g. another file which gives as many as possible details of the material, its
-                     microstructure, and its thermo-chemo-mechanical processing/preparation history. In the
-                     case that such a detailed history of the sample is not available, use this field as a
-                     free-text description to specify details of the sample and its preparation.
-                </doc>
-            </field>
-            <field minOccurs="1" name="preparation_date" type="NX_DATE_TIME" units="NX_TIME"/>
-            <field minOccurs="1" name="preparation_time_zone" type="NX_DATE_TIME" units="NX_TIME">
-                <doc>
-                     ISO 8601 time_zone offset from UTC. The time zone can be different to the time zone of
-                     this experiment description because maybe the sample was prepared by one international
-                     group and is then measured in a different time zone.
-                </doc>
-            </field>
-            <field name="description">
-                <doc>
-                     Specimen/sample preparation and previous processing steps is the history which the sample
-                     carries when it is mounted in the electron microscope. Therefore, preparation details and
-                     other points of this history should be stored in sample_history.
-                </doc>
-            </field>
-            <field name="layer structure">
-                <doc>
-                     Qualitative description of the layer structure for the sample in cases where a detailed
-                     geometrical description is not available or desired/required.
-                </doc>
-            </field>
-            <field name="orientation" type="NX_NUMBER" units="NX_ANGLE">
-                <doc>
-                     Euler angles of stress relative to sample
-                </doc>
-                <dimensions rank="1">
-                    <dim index="1" value="3"/>
-                </dimensions>
-            </field>
-            <field name="position" type="NX_NUMBER" units="NX_LENGTH">
-                <doc>
-                     Specifiy the position (e.g. in a line scan) with respect to a reference point
-                </doc>
-                <dimensions rank="1">
-                    <dim index="1" value="3"/>
-                </dimensions>
-            </field>
-            <field name="data_identifier" type="NX_NUMBER">
-                <doc>
-                     A identifier to correlate data to the experimental conditions, if several were used in
-                     this measurement; typically an index of 0 - N
-                </doc>
-            </field>
-            <field minOccurs="1" name="data_type">
-                <doc>
-                     to be defined
-                </doc>
-                <enumeration>
-                    <item value="psi / delta"/>
-                    <item value="tan(psi)/cos(delta)"/>
-                    <item value="Mueller matrix"/>
-                    <item value="Jones matrix"/>
-                    <item value="raw data"/>
-                </enumeration>
-            </field>
-            <field name="number_of_variables" type="NX_INT">
-                <doc>
-                     specify the number of variables stored, e.g. psi, delta and their errors are 4 (this can
-                     be also automated, based on the provided data table)
-                </doc>
-            </field>
-            <field name="wavelength" type="NX_NUMBER" units="NX_LENGTH">
-                <doc>
-                     Range, to be further specified
-                </doc>
-            </field>
-            <group type="NXdata">
-                <doc>
-                     Resulting data from the measurement, described by data type. Minimum two columns, if
-                     errors are available twice as many. For a Mueller matrix, it may be nine (1,1 is all 1,
-                     the rest is symmetric).
-                </doc>
-                <field name="data" type="NX_NUMBER">
-                    <dimensions rank="5">
-                        <dim index="5" value="N_time"/>
-                        <dim index="4" value="N_p1"/>
-                        <dim index="3" value="N_angles"/>
-                        <dim index="2" value="N_variables"/>
-                        <dim index="1" value="N_wavelength"/>
-                    </dimensions>
-                </field>
-            </group>
-            <field name="stage">
-                <doc>
-                     A link to the already existing information about sample position.
-                </doc>
-            </field>
-            <field name="angle_of_incidence">
-                <doc>
-                     The incident angle of the beam vs. the normal of the sample surface.
-                </doc>
-            </field>
-            <field name="time_points" type="NX_NUMBER" units="NX_TIME">
-                <doc>
-                     An array of relative time points if a time series was recorded
-                </doc>
-            </field>
-            <field minOccurs="1" name="medium">
-                <doc>
-                     Describe what was the medium above or around the sample. The common model is built up from
-                     substrate to the medium on the other side. Both boundaries are assumed infinite in the
-                     model. Here define the name of the material (e.g. water, air, etc.).
-                </doc>
-            </field>
-            <field name="alternative" type="NX_NUMBER" units="NX_UNITLESS">
-                <doc>
-                     Array of pairs of complex refractive indices of the medium for every measured wavelength.
-                </doc>
-                <dimensions rank="2">
-                    <dim index="1" value="N_wavelength"/>
-                    <dim index="2" value="2"/>
-                </dimensions>
-            </field>
-            <field name="environment_conditions">
-                <doc>
-                     External parameters that have influenced the sample.
-                </doc>
-            </field>
-            <field name="number_of_runs" type="NX_UINT">
-                <doc>
-                     How many measurements were done varying the parameters? This forms an extra dimension
-                     beyond incident angle and energy / wavelength.
-                </doc>
-            </field>
-            <field name="varied_parameters">
-                <doc>
-                     this is to indicate which parameter was changed. Its definition must exist below. The
-                     specified variable has to be number_of_runs long, providing the parameters for each data
-                     set.
-                </doc>
-                <enumeration>
-                    <item value="optical excitation"/>
-                    <item value="voltage"/>
-                    <item value="temperature"/>
-                    <item value="pH"/>
-                    <item value="stress"/>
-                    <item value="stage positions"/>
-                </enumeration>
-            </field>
-            <field name="length_of_runs" type="NX_UINT" units="NX_DIMENSIONLESS">
-                <doc>
-                     Provide the number of parameters used, N_p1
-                </doc>
-            </field>
-            <field name="optical_excitation" type="NX_BOOLEAN">
-                <doc>
-                     Describe if the spectra where taken under optical excitation
-                </doc>
-            </field>
-            <field name="excitation_source">
-                <doc>
-                     Specify the source for the external excitation
-                </doc>
-            </field>
-            <field name="broadening" type="NX_NUMBER" units="NX_LENGTH">
-                <doc>
-                     Specify the FWHM of the excitation
-                </doc>
-            </field>
-            <field name="excitation_type">
-                <doc>
-                     CW or pulsed excitation
-                </doc>
-                <enumeration>
-                    <item value="cw"/>
-                    <item value="pulsed"/>
-                </enumeration>
-            </field>
-            <field name="pulse_length" type="NX_NUMBER" units="NX_TIME"/>
-            <field name="repetition_rate" type="NX_NUMBER" units="NX_FREQUENCY"/>
-            <field name="pulse_energy" type="NX_NUMBER" units="NX_ENERGY">
-                <doc>
-                     to be define
-                </doc>
-            </field>
-            <field name="excitation power" type="NX_NUMBER" units="NX_ENERGY"/>
-            <field name="voltage" type="NX_NUMBER" units="NX_VOLTAGE">
-                <doc>
-                     If the spectra were taken under bias
-                </doc>
-            </field>
-            <field name="temperature" type="NX_NUMBER" units="nx_temperature">
-                <doc>
-                     to be defined
-                </doc>
-            </field>
-            <field name="ph" type="NX_NUMBER" units="NX_UNITLESS">
-                <doc>
-                     to be defined, how measured?
-                </doc>
-            </field>
-            <field name="stress" type="NX_NUMBER" units="NX_PRESSURE">
-                <doc>
-                     to be defined, only qualitative (atmospheric) pressure or really the applied continuum
-                     stress/strain tensor on the sample?
-                </doc>
-            </field>
-            <group name="derived_parameters" type="NXcollection">
-                <doc>
-                     What parameters are derived from the above data
-                </doc>
-                <field name="depolarization" type="NX_NUMBER" units="NX_UNITLESS">
-                    <doc>
-                         to be defined
-                    </doc>
-                </field>
-            </group>
-        </group>
-    </group>
-</definition>
diff --git a/tests/data/nyaml2nxdl/Ref_NXellipsometry-docCheck.nxdl.xml b/tests/data/nyaml2nxdl/Ref_NXellipsometry-docCheck.nxdl.xml
deleted file mode 100644
index 506c4f401..000000000
--- a/tests/data/nyaml2nxdl/Ref_NXellipsometry-docCheck.nxdl.xml
+++ /dev/null
@@ -1,777 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<?xml-stylesheet type="text/xsl" href="nxdlformat.xsl"?>
-<!--
-# NeXus - Neutron and X-ray Common Data Format
-#
-# Copyright (C) 2014-2022 NeXus International Advisory Committee (NIAC)
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 3 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-#
-# For further information, see http://www.nexusformat.org
--->
-<definition xmlns="http://definition.nexusformat.org/nxdl/3.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" category="application" type="group" extends="NXobject" name="NXellipsometry" xsi:schemaLocation="http://definition.nexusformat.org/nxdl/3.1 ../nxdl.xsd">
-    <symbols>
-        <doc>
-             Variables used throughout the document, e.g. dimensions and important
-             parameters
-        </doc>
-        <symbol name="N_wavelength">
-            <doc>
-                 Size of the energy / wavelength vector used
-            </doc>
-        </symbol>
-        <symbol name="N_variables">
-            <doc>
-                 How many variables are saved in a measurement (e.g. Psi and Delta,
-                 Mueller matrix)
-            </doc>
-        </symbol>
-        <symbol name="N_angles">
-            <doc>
-                 Number of incident angles used
-            </doc>
-        </symbol>
-        <symbol name="N_p1">
-            <doc>
-                 Number of sample parameters scanned
-            </doc>
-        </symbol>
-        <symbol name="N_time">
-            <doc>
-                 Number of time points measured
-            </doc>
-        </symbol>
-    </symbols>
-    <doc>
-         Ellipsometry, complex systems, up to variable angle spectroscopy.
-
-         Information on ellipsometry is provided, e.g. in:
-         - H. Fujiwara, Spectroscopic ellipsometry: principles and applications, John Wiley &amp; Sons, 2007.
-         - R. M. A. Azzam and N. M. Bashara, Ellipsometry and Polarized Light, North-Holland Publishing Company, 1977.
-         - H. G. Tompkins and E. A. Irene, Handbook of Ellipsometry, William Andrew, 2005.
-
-         Open acces sources:
-         - https://www.angstromadvanced.com/resource.asp
-         - https://pypolar.readthedocs.io/en/latest/
-    </doc>
-    <group type="NXentry">
-        <doc>
-             Ellipsometry, complex systems, up to variable angle spectroscopy.
-
-             Information on ellipsometry is provided, e.g. in:
-             - H. Fujiwara, Spectroscopic ellipsometry: principles and applications, John Wiley &amp; Sons, 2007.
-             - R. M. A. Azzam and N. M. Bashara, Ellipsometry and Polarized Light, North-Holland Publishing Company, 1977.
-             - H. G. Tompkins and E. A. Irene, Handbook of Ellipsometry, William Andrew, 2005.
-
-             Open acces sources:
-             - https://www.angstromadvanced.com/resource.asp
-             - https://pypolar.readthedocs.io/en/latest/
-        </doc>
-        <field name="definition" type="NX_CHAR">
-            <doc>
-                 An application definition for ellipsometry.
-            </doc>
-            <attribute name="version" type="NX_CHAR">
-                <doc>
-                     Version number to identify which definition of this application definition was
-                     used for this entry/data.
-                </doc>
-            </attribute>
-            <attribute name="url" type="NX_CHAR">
-                <doc>
-                     URL where to find further material (documentation, examples) relevant to the
-                     application definition
-                </doc>
-            </attribute>
-            <enumeration>
-                <item value="NXellipsometry"/>
-            </enumeration>
-        </field>
-        <field name="experiment_identifier" type="NX_CHAR">
-            <doc>
-                 Unique identifier of the experiment, such as a (globally persistent) unique
-                 identifier. i) The identifier is usually defined by the facility or principle
-                 investigator. ii) The identifier enables to link experiments to e.g. proposals.
-            </doc>
-        </field>
-        <field name="experiment_description" type="NX_CHAR" recommended="true">
-            <doc>
-                 A free-text description of the experiment. What is the aim of the experiment?
-                 The general procedure.
-            </doc>
-        </field>
-        <field name="start_time" type="NX_DATE_TIME">
-            <doc>
-                 Start time of the experiment. UTC offset should be specified.
-            </doc>
-        </field>
-        <field name="program" type="NX_CHAR" optional="true">
-            <doc>
-                 Commercial or otherwise defined given name to the program that was used to
-                 generate the results file(s) with measured data and metadata (or a link to the
-                 instrument software).
-            </doc>
-            <attribute name="version" type="NX_CHAR">
-                <doc>
-                     Either version with build number, commit hash, or description of a (online)
-                     repository where the source code of the program and build instructions can be
-                     found so that the program can be configured in such a way that result files can
-                     be created ideally in a deterministic manner.
-                </doc>
-            </attribute>
-            <attribute name="url" type="NX_CHAR">
-                <doc>
-                     Website of the software.
-                </doc>
-            </attribute>
-        </field>
-        <group name="operator" type="NXuser" minOccurs="1">
-            <doc>
-                 Contact information of at least the user of the instrument or the investigator
-                 who performed this experiment. Adding multiple users if relevant is recommended.
-            </doc>
-            <field name="name" type="NX_CHAR">
-                <doc>
-                     Name of the user.
-                </doc>
-            </field>
-            <field name="affiliation" type="NX_CHAR">
-                <doc>
-                     Name of the affiliation of the user at the point in time when the experiment was
-                     performed.
-                </doc>
-            </field>
-            <field name="address" type="NX_CHAR">
-                <doc>
-                     Full address (street, street number, ZIP, city, country) of the user's
-                     affiliation.
-                </doc>
-            </field>
-            <field name="email" type="NX_CHAR">
-                <doc>
-                     Email address of the user.
-                </doc>
-            </field>
-            <field name="orcid" type="NX_CHAR" recommended="true">
-                <doc>
-                     Author ID defined by https://orcid.org/.
-                </doc>
-            </field>
-            <field name="telephone_number" type="NX_CHAR" recommended="true">
-                <doc>
-                     Official telephone number of the user.
-                </doc>
-            </field>
-        </group>
-        <group type="NXinstrument">
-            <doc>
-                 General properties of the ellipsometry equipment
-            </doc>
-            <field name="model" type="NX_CHAR">
-                <doc>
-                     The name of the instrument
-                </doc>
-                <attribute name="version" type="NX_CHAR">
-                    <doc>
-                         The used version of the hardware if available. If not a commercial instrument
-                         use date of completion of the hardware.
-                    </doc>
-                </attribute>
-            </field>
-            <field name="company" type="NX_CHAR" optional="true">
-                <doc>
-                     Name of the company which build the instrument
-                </doc>
-            </field>
-            <field name="construction_year" type="NX_DATE_TIME" optional="true">
-                <doc>
-                     ISO8601 date when the instrument was constructed. UTC offset should be
-                     specifiec.
-                </doc>
-            </field>
-            <field name="software" type="NX_CHAR">
-                <doc>
-                     Name (e.g. commercial) of the software that was used for the measurement
-                </doc>
-                <attribute name="version" type="NX_CHAR">
-                    <doc>
-                         Version and build number or commit hash of the software source code
-                    </doc>
-                </attribute>
-                <attribute name="url" type="NX_CHAR">
-                    <doc>
-                         Website of the software.
-                    </doc>
-                </attribute>
-            </field>
-            <field name="light_source" type="NX_CHAR">
-                <doc>
-                     Specify the used light source. Multiple selection possible.
-                </doc>
-                <enumeration>
-                    <item value="UV light"/>
-                    <item value="quartz tungsten halogen lamp"/>
-                    <item value="xenon arc lamp"/>
-                    <item value="deuterium lamp"/>
-                    <item value="silicon carbide globar"/>
-                    <item value="other"/>
-                </enumeration>
-            </field>
-            <field name="other_light_source" type="NX_CHAR" optional="true">
-                <doc>
-                     If you specified 'other' as light source type, please write down what it is.
-                </doc>
-            </field>
-            <field name="focussing_probes" type="NX_BOOLEAN">
-                <doc>
-                     Were focussing probes (lenses) used or not?
-                </doc>
-            </field>
-            <field name="data_correction" type="NX_BOOLEAN" optional="true">
-                <doc>
-                     Were the recorded data corrected by the window effects of the lenses or not?
-                </doc>
-            </field>
-            <field name="angular_spread" type="NX_NUMBER" optional="true" units="NX_ANGLE">
-                <doc>
-                     Specify the angular spread caused by the focussing probes
-                </doc>
-            </field>
-            <field name="ellipsometry_type" type="NX_CHAR">
-                <doc>
-                     What type of ellipsometry was used? See Fujiwara Table 4.2
-                </doc>
-                <enumeration>
-                    <item value="rotating analyzer"/>
-                    <item value="rotating analyzer with analyzer compensator"/>
-                    <item value="rotating analyzer with polarizer compensator"/>
-                    <item value="rotating polarizer"/>
-                    <item value="rotating compensator on polarizer side"/>
-                    <item value="rotating compensator on analyzer side"/>
-                    <item value="modulator on polarizer side"/>
-                    <item value="modulator on analyzer side"/>
-                    <item value="dual compensator"/>
-                    <item value="phase modulation"/>
-                    <item value="imaging ellipsometry"/>
-                    <item value="null ellipsometry"/>
-                </enumeration>
-            </field>
-            <field name="calibration_status" type="NX_DATE_TIME">
-                <doc>
-                     Was a calibration done. If yes, when was it done?
-                </doc>
-                <enumeration>
-                    <item value="calibration time provided"/>
-                    <item value="no calibration"/>
-                    <item value="within 1 hour"/>
-                    <item value="within 1 day"/>
-                    <item value="within 1 week"/>
-                </enumeration>
-            </field>
-            <group name="calibration" type="NXsubentry" recommended="true">
-                <doc>
-                     Ellipsometers require regular calibration to adjust the hardware parameters for
-                     proper zero values and background light compensation.
-                </doc>
-                <field name="calibration_time" type="NX_DATE_TIME" optional="true">
-                    <doc>
-                         If calibtration status is 'calibration time provided', specify the ISO8601 datum
-                         when calibration was last performed before this measurement. UTC offset should
-                         be specified.
-                    </doc>
-                </field>
-                <group name="calibration_data" type="NXsubentry">
-                    <doc>
-                         Arrays which provide the measured calibration data.
-                         Multiple sets are possible, e.g. Psi and delta measured on an
-                         e.g. silicon calibration waver, and the straight-through data.
-
-                         We
-                         recommend to
-
-                         provide data that is measured under the same settings
-
-                         as the measurement was performed, that is if Psi and delta are measured
-                         for your data, also provide Psi and delta here.
-                         And use the same wavelenghts as there.&quot;
-                    </doc>
-                    <field name="calibration_data_type" type="NX_CHAR">
-                        <doc>
-                             What data was recorded for the calibration, The number of variables
-                             (N_variables) have to be set to the number of provided data columns accordingly,
-                             e.g. psi/delta -&gt; N_variables= 2, Jones vector: N_variables = 4, Mueller martix
-                             -&gt; N_variables= 16, etc.
-                        </doc>
-                        <enumeration>
-                            <item value="psi/delta"/>
-                            <item value="tan(psi)/cos(delta)"/>
-                            <item value="Jones matrix"/>
-                            <item value="Mueller matrix"/>
-                            <item value="not provided"/>
-                        </enumeration>
-                    </field>
-                    <field name="calibration_angle_of_incidence" type="NX_NUMBER" units="NX_ANGLE">
-                        <doc>
-                             angle(s) of incidence used during the calibration measurement (excluding
-                             straight through mode)
-                        </doc>
-                        <dimensions rank="1">
-                            <dim index="1" value="N_calibration_angles"/>
-                        </dimensions>
-                    </field>
-                    <field name="calibration_wavelength" type="NX_NUMBER">
-                        <doc>
-                             The wavelength or equivalent values (which are inter-convertible). The importer should convert all to one unit, and make the others accessible. Historically, energy is used in eV, but for visible spectroscopy wavelength is more common, for IR wave numbers in 1/cm units.
-                             Possibly use the same type of data as for the measurement!
-                        </doc>
-                        <dimensions rank="1">
-                            <dim index="1" value="N_calibration_wavelength"/>
-                        </dimensions>
-                    </field>
-                    <field name="calibration_data" type="NX_NUMBER" units="NX_UNITLESS">
-                        <doc>
-                             Calibration is performed on a reference surface (usually silicon wafer with well
-                             defined oxide layer) at a number of angles, then in a straight through mode
-                             (transmission in air).
-                        </doc>
-                        <dimensions rank="3">
-                            <dim index="3" value="N_calibration_angles+1"/>
-                            <dim index="2" value="N_variables"/>
-                            <dim index="1" value="N_calibration_wavelength"/>
-                        </dimensions>
-                    </field>
-                </group>
-                <field name="calibration_sample" type="NX_CHAR">
-                    <doc>
-                         Free-text to describe which sample was used for calibration, e.g. silicon wafer
-                         with 25 nm thermal oxide layer.
-                    </doc>
-                </field>
-            </group>
-            <field name="angle_of_incidence" type="NX_NUMBER" units="NX_ANGLE">
-                <doc>
-                     Incident angle of the beam vs. the normal of the bottom reflective (substrate)
-                     surface in the sample
-                </doc>
-                <dimensions rank="1">
-                    <dim index="1" value="N_angles"/>
-                </dimensions>
-            </field>
-            <group name="stage" type="NXsubentry">
-                <doc>
-                     Sample stage, holding the sample at a specific position in X,Y,Z (Cartesian)
-                     coordinate system and at an orientation defined by three Euler angles (alpha,
-                     beta, gamma). The stage may be motorized or manual, special for liquids or gas
-                     environment.
-                </doc>
-                <enumeration>
-                    <item value="manual stage"/>
-                    <item value="scanning stage"/>
-                    <item value="liquid stage"/>
-                    <item value="gas cell"/>
-                    <item value="cryostat"/>
-                </enumeration>
-                <field name="description" type="NX_CHAR" recommended="true">
-                    <doc>
-                         A free-text field to provide information about the stage.
-                    </doc>
-                </field>
-                <group type="NXtransformations" recommended="true">
-                    <doc>
-                         The stage coordinate system vs. the incident beam. The Z-axis of the stage is considered to point along the normal of the substrate (bottom reflecting surface) from the stage towards the general direction of the light source. The beam comes with angle of incidence towards this Z-axis, but in opposite direction, thus they are connected with a rotation of 180 - angle of incidence (in degrees).
-                         This transformation brings us from the NEXUS coordinates to the stage coordinates.
-                         Then provide the set of translations (if there are any). These all have a vector defining their relative direction in the current coordinate system. (This current coordinate system changes with every transformation if you set the parameter 'depends' to the name of the previous step.)
-                         Last, provide the rotations of the sample
-                    </doc>
-                    <field name="alternative" type="NX_CHAR" optional="true">
-                        <doc>
-                             If there is no motorized stage, we should at least qualify where the beam hits
-                             the sample and in what direction the sample stands in a free-text description,
-                             e.g. 'center of sample, long edge parallel to plane of incidence'.
-                        </doc>
-                    </field>
-                </group>
-            </group>
-            <group name="window" type="NXaperture" optional="true">
-                <doc>
-                     For environmental measurements, the environment (liquid, vapor, vacuum etc.) is
-                     enclosed in a cell or cryostat, which has windows both in the direction of the
-                     source and the detector (looking from the sample). These windows also add a
-                     phase shift to the light altering the measured signal. This shift has to be
-                     corrected based on measuring a known sample in the environmental cell.
-                </doc>
-                <field name="material" type="NX_CHAR">
-                    <doc>
-                         The material of the window
-                    </doc>
-                </field>
-                <field name="thickness" type="NX_NUMBER" units="NX_LENGTH">
-                    <doc>
-                         Thickness of the window
-                    </doc>
-                </field>
-                <field name="orientation_angle" type="NX_NUMBER" units="NX_ANGLE">
-                    <doc>
-                         Angle of the window normal (outer) vs. the substrate normal (similar to the
-                         angle of incidence).
-                    </doc>
-                </field>
-                <group name="reference_data" type="NXsubentry">
-                    <doc>
-                         Recorded data that can be used to calculate the window effect. Typically this is
-                         the substrate (e.g. silicon with thermal oxide layer) in air without window and
-                         in a known medium with the window.
-                    </doc>
-                    <field name="reference_sample" type="NX_CHAR">
-                        <doc>
-                             What sample was used to estimate the window effect.
-                        </doc>
-                    </field>
-                    <field name="reference_wavelength" type="NX_NUMBER" units="NX_LENGTH">
-                        <doc>
-                             Use the same wavelengths at which all other measurements are recorded
-                        </doc>
-                        <dimensions rank="1">
-                            <dim index="1" value="N_wavelength"/>
-                        </dimensions>
-                    </field>
-                    <field name="data" type="NX_NUMBER" units="NX_UNITLESS">
-                        <doc>
-                             Recorded data of a reference surface with and without window / medium.
-                        </doc>
-                        <dimensions rank="4">
-                            <dim index="4" value="2"/>
-                            <dim index="3" value="N_angles"/>
-                            <dim index="2" value="N_variables"/>
-                            <dim index="1" value="N_wavelength"/>
-                        </dimensions>
-                    </field>
-                </group>
-            </group>
-            <group type="NXdetector">
-                <doc>
-                     Which type of detector was used, and what is known about it? A detector can be a
-                     photomultiplier (PMT), a CCD in a camera, an array in a spectrometer. If so, the
-                     whole detector unit goes in here.
-                </doc>
-                <field name="detector_type" type="NX_CHAR">
-                    <doc>
-                         What kind of detector module is used, e.g. CCD-spectrometer, CCD camera, PMT,
-                         photodiode, etc.
-                    </doc>
-                    <enumeration>
-                        <item value="PMT"/>
-                        <item value="photodiode"/>
-                        <item value="avalanche diode"/>
-                        <item value="CCD camera"/>
-                        <item value="CCD spectrometer"/>
-                        <item value="other"/>
-                    </enumeration>
-                </field>
-                <field name="other_detector" type="NX_CHAR" optional="true">
-                    <doc>
-                         If you specified 'other' as detector type, please write down what it is.
-                    </doc>
-                </field>
-                <field name="integration_time" type="NX_NUMBER" units="NX_TIME">
-                    <doc>
-                         Integration time for the measurement. Single number or array if it was varied.
-                    </doc>
-                </field>
-                <field name="revolution" type="NX_NUMBER" optional="true" units="NX_ANY">
-                    <doc>
-                         Define how many rotations of the rotating element were taken into account per
-                         spectrum.
-                    </doc>
-                </field>
-                <field name="rotating_element" type="NX_CHAR">
-                    <doc>
-                         Define which elements rotates, e.g. polarizer or analyzer.
-                    </doc>
-                    <enumeration>
-                        <item value="polarizer (source side)"/>
-                        <item value="analyzer (detector side)"/>
-                        <item value="compensator (source side)"/>
-                        <item value="compensator (detector side)"/>
-                    </enumeration>
-                </field>
-                <field name="fixed_revolution" type="NX_NUMBER" optional="true" units="NX_FREQUENCY">
-                    <doc>
-                         rotation rate, if the revolution does not change during the measurement.
-                    </doc>
-                </field>
-                <field name="variable_revolution" type="NX_NUMBER" optional="true">
-                    <doc>
-                         Specify maximum and minimum values for the revolution.
-                    </doc>
-                    <dimensions rank="1">
-                        <dim index="1" value="2"/>
-                    </dimensions>
-                </field>
-            </group>
-        </group>
-        <group type="NXsample">
-            <doc>
-                 Properties of the sample, its history, the sample environment and experimental
-                 conditions (e.g. surrounding medium, temperature, pressure etc.), along with the
-                 data (data type, wavelength array, measured data).
-            </doc>
-            <field name="atom_types" type="NX_CHAR">
-                <doc>
-                     Use Hill's system for listing elements of the periodic table which are inside or
-                     attached to the surface of the specimen and thus relevant from a scientific
-                     point. The purpose of this field is to allow materials database to parse the
-                     relevant elements without having to interpret the sample history or other
-                     fields.
-                </doc>
-            </field>
-            <field name="sample_name" type="NX_CHAR">
-                <doc>
-                     Descriptive name of the sample
-                </doc>
-            </field>
-            <field name="sample_history" type="NX_CHAR">
-                <doc>
-                     Ideally, a reference to the location or a unique (globally persistent)
-                     identifier (e.g.) of e.g. another file which gives as many as possible details
-                     of the material, its microstructure, and its thermo-chemo-mechanical
-                     processing/preparation history. In the case that such a detailed history of the
-                     sample is not available, use this field as a free-text description to specify
-                     details of the sample and its preparation.
-                </doc>
-            </field>
-            <field name="preparation_date" type="NX_DATE_TIME" recommended="true">
-                <doc>
-                     ISO 8601 date with time zone specified. UTC offset should be specifiec.
-                </doc>
-            </field>
-            <field name="layer_structure" type="NX_CHAR">
-                <doc>
-                     Qualitative description of the layer structure for the sample. For example:
-                     Si/native oxide/thermal oxide/polymer/peptide
-                </doc>
-            </field>
-            <field name="data_identifier" type="NX_NUMBER">
-                <doc>
-                     An identifier to correlate data to the experimental conditions, if several were
-                     used in this measurement; typically an index of 0 - N
-                </doc>
-            </field>
-            <field name="data_type" type="NX_CHAR">
-                <doc>
-                     Select which type of data was recorded, for example Psi and Delta (see:
-                     https://en.wikipedia.org/wiki/Ellipsometry#Data_acquisition). It is possible to
-                     have multiple selections. Data types may also be converted to each other, e.g. a
-                     Mueller matrix contains N,C,S data as well. This selection defines how many
-                     columns (N_variables) are stored in the data array.
-                </doc>
-                <enumeration>
-                    <item value="psi / delta"/>
-                    <item value="tan(psi)/cos(delta)"/>
-                    <item value="Mueller matrix"/>
-                    <item value="Jones matrix"/>
-                    <item value="N/C/S"/>
-                    <item value="raw data"/>
-                </enumeration>
-            </field>
-            <field name="wavelength" type="NX_NUMBER" units="NX_LENGTH">
-                <doc>
-                     Wavelength value(s) used for the measurement.
-                     An array of 1 or more elements. Length defines N_wavelength
-                </doc>
-                <dimensions rank="1">
-                    <dim index="1" value="N_wavelength"/>
-                </dimensions>
-            </field>
-            <field name="measured_data" type="NX_NUMBER">
-                <doc>
-                     Resulting data from the measurement, described by data type.
-                     Minimum two columns containing Psi and delta, or for the normalized Mueller matrix, it may be 16 (or 15 if 1,1 is all 1).
-                </doc>
-                <dimensions rank="5">
-                    <dim index="5" value="N_time"/>
-                    <dim index="4" value="N_p1"/>
-                    <dim index="3" value="N_angles"/>
-                    <dim index="2" value="N_variables"/>
-                    <dim index="1" value="N_wavelength"/>
-                </dimensions>
-            </field>
-            <field name="data_error" type="NX_NUMBER" recommended="true">
-                <doc>
-                     Specified uncertainties (errors) of the data described by data type. The
-                     structure is the same as for the measured data.
-                </doc>
-                <dimensions rank="5">
-                    <dim index="5" value="N_time"/>
-                    <dim index="4" value="N_p1"/>
-                    <dim index="3" value="N_angles"/>
-                    <dim index="2" value="N_variables"/>
-                    <dim index="1" value="N_wavelength"/>
-                </dimensions>
-            </field>
-            <field name="time_points" type="NX_NUMBER" optional="true" units="NX_TIME">
-                <doc>
-                     An array of relative time points if a time series was recorded
-                </doc>
-            </field>
-            <field name="medium" type="NX_CHAR">
-                <doc>
-                     Describe what was the medium above or around the sample. The common model is
-                     built up from substrate to the medium on the other side. Both boundaries are
-                     assumed infinite in the model. Here define the name of the material (e.g. water,
-                     air, etc.).
-                </doc>
-            </field>
-            <field name="medium_refractive_indices" type="NX_NUMBER" optional="true" units="NX_UNITLESS">
-                <doc>
-                     Array of pairs of complex refractive indices of the medium for every measured
-                     wavelength. Only necessary if the measurement was performed not in air, or
-                     something very well known, e.g. high purity water. Specify the complex
-                     refractive index: n + ik
-                </doc>
-                <dimensions rank="1">
-                    <dim index="1" value="N_wavelength"/>
-                </dimensions>
-            </field>
-            <field name="environment_conditions" type="NX_CHAR" optional="true">
-                <doc>
-                     External parameters that have influenced the sample.
-                </doc>
-            </field>
-            <field name="number_of_runs" type="NX_UINT" optional="true" units="NX_DIMENSIONLESS">
-                <doc>
-                     How many measurements were done varying the parameters? This forms an extra
-                     dimension beyond incident angle, time points and energy / wavelength (this is
-                     the length of the 4th dimension of the data). Defaults to 1.
-                </doc>
-            </field>
-            <field name="varied_parameters" type="NX_CHAR" optional="true">
-                <doc>
-                     Indicates which parameter was changed. Its definition must exist below. The
-                     specified variable has to be number_of_runs long, providing the parameters for
-                     each data set.
-                </doc>
-                <enumeration>
-                    <item value="optical excitation"/>
-                    <item value="voltage"/>
-                    <item value="temperature"/>
-                    <item value="pH"/>
-                    <item value="stress"/>
-                    <item value="stage positions"/>
-                </enumeration>
-            </field>
-            <group name="optical_excitation" type="NXsubentry" optional="true">
-                <doc>
-                     Was the sample modified using an optical source? Describe in this group the
-                     parameters of the optical excitation used.
-                </doc>
-                <field name="excitation_source" type="NX_CHAR">
-                    <doc>
-                         Specify the source for the external excitation
-                    </doc>
-                </field>
-                <field name="excitation_wavelength" type="NX_NUMBER" units="NX_LENGTH">
-                    <doc>
-                         Wavelength value(s) or the range used for excitation.
-                         In cases of continuous laser radiation a value or a set of values may do but for other illumination types, such as pulsed lasers, or lamps, a range may describe the source better.
-                    </doc>
-                </field>
-                <field name="broadening" type="NX_NUMBER" optional="true" units="NX_LENGTH">
-                    <doc>
-                         Specify the FWHM of the excitation
-                    </doc>
-                </field>
-                <field name="excitation_type" type="NX_CHAR">
-                    <doc>
-                         CW or pulsed excitation
-                    </doc>
-                    <enumeration>
-                        <item value="cw"/>
-                        <item value="pulsed"/>
-                    </enumeration>
-                </field>
-                <field name="pulse_length" type="NX_NUMBER" optional="true" units="NX_TIME">
-                    <doc>
-                         Duration of one laser pulse.
-                    </doc>
-                </field>
-                <field name="repetition_rate" type="NX_NUMBER" optional="true" units="NX_FREQUENCY">
-                    <doc>
-                         Repetition rate of the laser.
-                    </doc>
-                </field>
-                <group name="excitation_duration" type="NX_TIME" optional="true">
-                    <doc>
-                         How long was the sample excited.
-                    </doc>
-                </group>
-                <field name="pulse_energy" type="NX_NUMBER" optional="true" units="NX_ENERGY">
-                    <doc>
-                         The integrated energy of light pulse.
-                    </doc>
-                </field>
-                <field name="excitation_power" type="NX_NUMBER" optional="true" units="NX_ENERGY">
-                    <doc>
-                         The power of one laser pulse.
-                    </doc>
-                </field>
-            </group>
-            <field name="voltage" type="NX_NUMBER" optional="true" units="NX_VOLTAGE">
-                <doc>
-                     Specify the voltage if the spectra were taken under bias
-                </doc>
-            </field>
-            <field name="temperature" type="NX_NUMBER" optional="true" units="NX_TEMPERATURE">
-                <doc>
-                     Temperature of the sample (sample holder, medium)
-                </doc>
-            </field>
-            <field name="pH" type="NX_NUMBER" optional="true" units="NX_UNITLESS">
-                <doc>
-                     pH of medium (measured or set)
-                </doc>
-            </field>
-            <field name="pressure" type="NX_NUMBER" optional="true" units="NX_PRESSURE">
-                <doc>
-                     Pressure of the environment of the sample.
-                </doc>
-            </field>
-        </group>
-        <group name="derived_parameters" type="NXprocess" optional="true">
-            <doc>
-                 What parameters are derived from the above data.
-            </doc>
-            <field name="depolarization" type="NX_NUMBER" optional="true" units="NX_UNITLESS">
-                <doc>
-                     Light loss due to depolarization as a value in [0-1].
-                </doc>
-            </field>
-        </group>
-        <group name="plot" type="NXdata" optional="true">
-            <doc>
-                 A default view of the data, in this case Psi vs. wavelength and the angles of
-                 incidence. If Psi does not exist, use other Müller matrix elements, such as N, C
-                 and S.
-            </doc>
-            <attribute name="axes">
-                <doc>
-                     We recommend to use wavelength as a default attribute, but it can be replaced in
-                     the case of not full spectral ellipsometry to any suitable parameter along the
-                     X-axis.
-                </doc>
-            </attribute>
-        </group>
-    </group>
-</definition>
diff --git a/tests/data/nyaml2nxdl/Ref_NXellipsometry.yaml b/tests/data/nyaml2nxdl/Ref_NXellipsometry.yaml
deleted file mode 100644
index 99849a24c..000000000
--- a/tests/data/nyaml2nxdl/Ref_NXellipsometry.yaml
+++ /dev/null
@@ -1,271 +0,0 @@
-category: application
-doc: "draft application definition for ellipsometry measurements, including complex systems up to variable angle spectroscopic ellipsometry."
-symbols: 
-  doc: "Variables used throughout the document, e.g. dimensions and important parameters"
-  angle_of_incidence: "The angle of incidence to the surface normal (stage normal) of the sample"
-  N_wavelength: "Size of the energy / wavelength vector used"
-  N_variables: "How many variables are saved in a measurement (e.g. Psi and delta, Mueller matrix)"
-  N_angles: "Number of incident angles used"
-  N_p1: "Number of first sample parameters scanned"
-  N_time: "Number of time points measured"
-type: group
-NXellipsometry_base_draft(NXobject):
-  (NXentry):
-    doc: "to be defined"
-    exists: required 
-    \@entry:
-      doc: "NeXus convention is to use entry1, entry2, for analysis software to locate each entry."
-    experiment_identifier(NX_CHAR):
-      exists: required 
-      doc: "Unique identifier of the experiment, such as a (globally persistent) unique identifier. The identifier is usually defined by the facility or principle investigator. The identifier enables to link experiments to e.g. proposals."
-    experiment_description(NX_CHAR):
-      exists: required 
-    start_time(NX_DATE_TIME):
-      exists: required 
-      unit: NX_TIME
-    program_name(NX_CHAR):
-      doc: "Commercial or otherwise defined given name to the program that was used to generate the results file(s) with measured data and metadata."
-    program_version(NX_CHAR):
-      doc: "Either version with build number, commit hash, or description of a (online) repository where the source code of the program and build instructions can be found so that the program can be configured in such a way that result files can be created ideally in a deterministic manner."
-    time_zone(NX_DATE_TIME):
-      exists: required 
-      doc: "ISO 8601 time_zone offset from UTC."
-    definition_local(NX_CHAR):
-      doc: "FAIRmat-specific candidate proposal for an application definition exemplifying ellipsometry."
-      \@version:
-        doc: "Ideally version with build number are commit hash of the application definition. If not available a free-text description."
-      \@url:
-        doc: "URL where to find further material (documentation, examples) relevant to the application definition"
-    operator(NXuser):
-      exists: [min, 1, max, unbounded]
-      doc: "Contact information of at least the user of the instrument or the principal investigator who performed this experiment. Adding multiple users if relevant is recommended."
-      name(NX_CHAR):
-        exists: required 
-      affiliation(NX_CHAR):
-        exists: recommended
-        doc: "Name of the affiliation of the user at the point in time when the experiment was performed."
-      address(NX_CHAR):
-        exists: recommended
-      email(NX_CHAR):
-        exists: required 
-      orcid(NX_CHAR):
-        exists: recommended
-      telephone_number(NX_CHAR):
-        exists: recommended
-    (NXmonitor):
-    instrument(NXinstrument):
-      exists: required 
-      doc: "General properties of the ellipsometry equipment"
-      model(NX_CHAR):
-        doc: "The name of the instrument"
-      company(NX_CHAR):
-        doc: "Name of the company"
-      construction_year(NX_DATE_TIME):
-        unit: NX_TIME
-        doc: "ISO8601 date when the instrument was constructed"
-      hardware_version(NX_CHAR):
-        doc: "The used version of the hardware if available"
-      software_name(NX_CHAR):
-        doc: "Name (e.g. commercial) of the software that was used for the measurement"
-      software_version(NX_CHAR):
-        doc: "Version and build number or commit hash of the software source code"
-      bandwidth(NX_NUMBER):
-        unit: NX_WAVELENGTH
-        doc: "Specify the bandwidth of the light"
-      light_source(NX_CHAR):
-        doc: "Specify the used light source"
-      focussing_probes(NX_BOOLEAN):
-        doc: "Were focussing probes (lenses) used or not?"
-      data_correction(NX_BOOLEAN):
-        doc: "Were the recorded data corrected by the window effects of the lenses or not?"
-      angular_spread(NX_NUMBER):
-        unit: NX_ANGLE
-        doc: "Specify the angular spread caused by the focussing probes"
-      ellipsometry_type(NX_CHAR):
-        doc: "What type of ellipsometry was used? See Fujiwara Table 4.2."
-        enumeration: [rotating analyzer, rotating analyzer with analyzer compensator, rotating analyzer with polarizer compensator, rotating polarizer, rotating compensator on polarizer side, rotating compensator on analyzer side, modulator on polarizer side, modulator on analyzer side, dual compensator, phase modulation, imaging ellipsometry, null ellipsometry]
-      calibration(NXprocess):
-        doc: "ellipsometers require regular calibration to adjust the hardware parameters for proper zero values and background light compensation"
-        calibration_time(NX_DATE_TIME):
-          doc: "ISO8601 datum when calibration was last performed before this measurement"
-        calibration_provided(NX_BOOLEAN):
-          doc: "Are the measured data provided?"
-        calibration_data(NXdata):
-          doc: "Arrays which provide the measured calibration data. Multiple sets are possible, e.g. Psi and delta measured on an e.g. silicon calibration waver, and the straight-through data."
-          data(NX_CHAR):
-            doc: "to be defined"
-            enumeration: [psi/delta, tan(psi)/cos(delta), Jones matrix, Mueller matrix]
-          angle_of_incidence(NX_NUMBER):
-            unit: NX_ANGLE
-            doc: "angle(s) of incidence used during the calibration measurement"
-          wavelength(NX_NUMBER):
-            unit: NX_LENGTH
-            doc: "The wavelength or equivalent values (, which are inter-convertible). The importer should convert all to one unit, and make the others accessible. Historically, energy is used in eV, but for visible spectroscopy wavelength is more common, for IR wave numbers in 1/cm units."
-          calibration_data(NX_NUMBER):
-            unit: NX_UNITLESS
-            doc: "to be defined"
-          calibration_sample(NX_CHAR):
-            doc: "Free-text to describe which sample was used for calibration, e.g. silicon wafer with 25 nm thermal oxide layer"
-        angle_of_incidence(NX_NUMBER):
-          unit: NX_ANGLE
-          doc: "the incident angle of the beam vs. the normal of the sample surface"
-          \@target:
-          dimensions:
-            rank: 1
-            dim: [[1, N_angles]]
-      stage(NXstage):
-        exists: required 
-        doc: "Where and how is the sample mounted"
-        enumeration: [manual stage, scanning stage, liquid stage, gas cell]
-      window(NXcollection):
-        doc: "For environmental measurements, if a window is between the sample and the optics of the ellipsometer, describe its properties."
-        thickness(NX_NUMBER):
-          unit: NX_LENGTH
-          doc: "Thickness of the window"
-        orientation_angle(NX_NUMBER):
-          unit: NX_ANGLE
-          doc: "Angle in the plane of incidence"
-        calibration_data(NXdata):
-          doc: "to be defined"
-        wavelength(NX_NUMBER):
-          unit: NX_LENGTH
-          doc: "to be defined"
-        data array(NX_NUMBER):
-          unit: NX_UNITLESS
-          doc: "to be defined"
-        calibration_sample(NX_CHAR):
-          doc: "Which sample was used to calibrate the window effect?"
-      detector(NXdetector):
-        doc: "Which type of detector was used, and what is known about it? A detector can be a photomultiplier (PMT), a CCD in a camera, an array in a spectrometer. If so, the whole unit goes in here."
-        detector_type(NX_CHAR):
-          exists: required 
-          doc: "What kind of detector module is used, e.g. CCD-spectrometer, CCD camera, PMT, photodiode, etc."
-        duration(NX_NUMBER):
-          unit: NX_TIME
-          doc: "Integration time for the measurement. Single number or array if it was varied."
-        revolution(NX_NUMBER):
-          unit: NX_ANY
-          doc: "Define how many rotations of the rotating element were taken into account for one spectra."
-        rotating_element(NX_CHAR):
-          doc: "Define which elements rotates"
-          enumeration: [polarizer (source side), polarizer (detector side), compensator (source side), ccompensator (detector side)]
-        fixed_revolution(NX_NUMBER):
-          unit: NX_PER_TIME
-          doc: "if the revolution does not change during the measurement."
-        variable revolution(NX_NUMBER):
-          doc: "Specify maximum and minimum values for the revolution."
-          dimensions:
-            rank: 1
-            dim: [[1, 2]]
-    sample(NXsample):
-      exists: required 
-      atom_types(NX_CHAR):
-        exists: required 
-        doc: "Use Hill's system for listing elements of the periodic table which are inside or attached to the surface of the specimen and thus relevant from a scientific point. The purpose of this field is to allow materials database to parse the relevant elements without having to interpret the sample history or other fields."
-      name(NX_CHAR):
-        exists: required 
-      sample_history(NX_CHAR):
-        exists: required 
-        doc: "Ideally, a reference to the location or a unique (globally persistent) identifier (e.g.) of e.g. another file which gives as many as possible details of the material, its microstructure, and its thermo-chemo-mechanical processing/preparation history. In the case that such a detailed history of the sample is not available, use this field as a free-text description to specify details of the sample and its preparation."
-      preparation_date(NX_DATE_TIME):
-        exists: required 
-        unit: NX_TIME
-      preparation_time_zone(NX_DATE_TIME):
-        exists: required 
-        unit: NX_TIME
-        doc: "ISO 8601 time_zone offset from UTC. The time zone can be different to the time zone of this experiment description because maybe the sample was prepared by one international group and is then measured in a different time zone."
-      description(NX_CHAR):
-        doc: "Specimen/sample preparation and previous processing steps is the history which the sample carries when it is mounted in the electron microscope. Therefore, preparation details and other points of this history should be stored in sample_history."
-      layer structure(NX_CHAR):
-        doc: "Qualitative description of the layer structure for the sample in cases where a detailed geometrical description is not available or desired/required."
-      orientation(NX_NUMBER):
-        unit: NX_ANGLE
-        doc: "Euler angles of stress relative to sample"
-        dimensions:
-          rank: 1
-          dim: [[1, 3]]
-      position(NX_NUMBER):
-        unit: NX_LENGTH
-        doc: "Specifiy the position (e.g. in a line scan) with respect to a reference point"
-        dimensions:
-          rank: 1
-          dim: [[1, 3]]
-      data_identifier(NX_NUMBER):
-        doc: "A identifier to correlate data to the experimental conditions, if several were used in this measurement; typically an index of 0 - N"
-      data_type(NX_CHAR):
-        exists: required 
-        doc: "to be defined"
-        enumeration: [psi / delta, tan(psi)/cos(delta), Mueller matrix, Jones matrix, raw data]
-      number_of_variables(NX_INT):
-        doc: "specify the number of variables stored, e.g. psi, delta and their errors are 4 (this can be also automated, based on the provided data table)"
-      wavelength(NX_NUMBER):
-        unit: NX_LENGTH
-        doc: "Range, to be further specified"
-      (NXdata):
-        doc: "Resulting data from the measurement, described by data type. Minimum two columns, if errors are available twice as many. For a Mueller matrix, it may be nine (1,1 is all 1, the rest is symmetric)."
-        data(NX_NUMBER):
-          dimensions:
-            rank: 5
-            dim: [[5, N_time], [4, N_p1], [3, N_angles], [2, N_variables], [1, N_wavelength]]
-      stage(NX_CHAR):
-        doc: "A link to the already existing information about sample position."
-      angle_of_incidence(NX_CHAR):
-        doc: "The incident angle of the beam vs. the normal of the sample surface."
-      time_points(NX_NUMBER):
-        unit: NX_TIME
-        doc: "An array of relative time points if a time series was recorded"
-      medium(NX_CHAR):
-        exists: required 
-        doc: "Describe what was the medium above or around the sample. The common model is built up from substrate to the medium on the other side. Both boundaries are assumed infinite in the model. Here define the name of the material (e.g. water, air, etc.)."
-      alternative(NX_NUMBER):
-        unit: NX_UNITLESS
-        doc: "Array of pairs of complex refractive indices of the medium for every measured wavelength."
-        dimensions:
-          rank: 2
-          dim: [[1, N_wavelength], [2, 2]]
-      environment_conditions(NX_CHAR):
-        doc: "External parameters that have influenced the sample."
-      number_of_runs(NX_UINT):
-        doc: "How many measurements were done varying the parameters? This forms an extra dimension beyond incident angle and energy / wavelength."
-      varied_parameters(NX_CHAR):
-        doc: "this is to indicate which parameter was changed. Its definition must exist below. The specified variable has to be number_of_runs long, providing the parameters for each data set."
-        enumeration: [optical excitation, voltage, temperature, pH, stress, stage positions]
-      length_of_runs(NX_UINT):
-        unit: NX_DIMENSIONLESS
-        doc: "Provide the number of parameters used, N_p1"
-      optical_excitation(NX_BOOLEAN):
-        doc: "Describe if the spectra where taken under optical excitation"
-      excitation_source(NX_CHAR):
-        doc: "Specify the source for the external excitation"
-      broadening(NX_NUMBER):
-        unit: NX_LENGTH
-        doc: "Specify the FWHM of the excitation"
-      excitation_type(NX_CHAR):
-        doc: "CW or pulsed excitation"
-        enumeration: [cw, pulsed]
-      pulse_length(NX_NUMBER):
-        unit: NX_TIME
-      repetition_rate(NX_NUMBER):
-        unit: NX_FREQUENCY
-      pulse_energy(NX_NUMBER):
-        unit: NX_ENERGY
-        doc: "to be define"
-      excitation power(NX_NUMBER):
-        unit: NX_ENERGY
-      voltage(NX_NUMBER):
-        unit: NX_VOLTAGE
-        doc: "If the spectra were taken under bias"
-      temperature(NX_NUMBER):
-        unit: nx_temperature
-        doc: "to be defined"
-      ph(NX_NUMBER):
-        unit: NX_UNITLESS
-        doc: "to be defined, how measured?"
-      stress(NX_NUMBER):
-        unit: NX_PRESSURE
-        doc: "to be defined, only qualitative (atmospheric) pressure or really the applied continuum stress/strain tensor on the sample?"
-      derived_parameters(NXcollection):
-        doc: "What parameters are derived from the above data"
-        depolarization(NX_NUMBER):
-          unit: NX_UNITLESS
-          doc: "to be defined"
diff --git a/tests/data/nyaml2nxdl/Ref_NXentry.nxdl.xml b/tests/data/nyaml2nxdl/Ref_NXentry.nxdl.xml
deleted file mode 100644
index 7f547bcd3..000000000
--- a/tests/data/nyaml2nxdl/Ref_NXentry.nxdl.xml
+++ /dev/null
@@ -1,76 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<?xml-stylesheet type="text/xsl" href="nxdlformat.xsl" ?>
-<definition name="NXentry" 
-    type="group" 
-    extends="NXobject"
-    category="base"
-    xmlns="http://definition.nexusformat.org/nxdl/3.1"
-    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-    xsi:schemaLocation="http://definition.nexusformat.org/nxdl/3.1 ../nxdl.xsd"
-    >
-
-    <attribute name="default">
-        <doc>
-                oki
-        </doc>
-    </attribute>
-    <doc> 
-        my nice doc string in root level.
-        my nice doc string in root level, line 2.
-    </doc>
-    <group type="NXdata">
-        <doc>my nice doc string.
-            my nice doc string, line 2.
-        </doc>
-    </group>
-    <attribute name="IDF_Version">
-        <!-- as ratified at NIAC2010 -->
-        <doc>my nice single line doc string</doc>
-    </attribute>
-    <field name="title">
-        <doc>Extended title for entry</doc>
-    </field>
-    <field name="collection_description">
-        <doc>My not very proper doc string, it is supported though
-        </doc>
-    </field>
-    <field name="experiment_identifier">
-        <doc>
-        My not very proper doc string, it is supported though
-           Point-1:
-                my not very proper doc string, line2 
-        my not very proper doc string, line3</doc>
-    </field>
-    <field name="experiment_description">
-        <doc>My single line doc string.</doc>
-    </field>
-    <group type="NXnote"  name="experiment_documentation">
-        <doc>
-            My single line doc string, with doc tags in different lines
-        </doc>
-    </group>
-    <field name="collection_identifier">
-        <doc>
-            Yet another doc string not very proper but supported</doc>
-    </field>
-    <field name="entry_identifier_uuid">
-        <doc>
-            Yet another doc string not very proper but supported
-            Yet another doc string not very proper but supported, line2</doc>
-        <attribute name="version"><doc>Version of UUID used</doc></attribute>
-    </field>
-    <field name="entry_identifier">
-        <doc>
-            Trailing line doc stringy. Trailing lines are removed
-        </doc>
-    </field>
-    <group type="NXuser" />
-    <group type="NXsample" />
-    <group type="NXinstrument" />
-    <group type="NXcollection" />
-    <group type="NXmonitor" />
-    <group type="NXparameters" />
-    <group type="NXprocess" />
-    <group type="NXsubentry" />
-</definition>
-
diff --git a/tests/data/nyaml2nxdl/Ref_NXentry.yaml b/tests/data/nyaml2nxdl/Ref_NXentry.yaml
deleted file mode 100644
index 0f1a2f1a3..000000000
--- a/tests/data/nyaml2nxdl/Ref_NXentry.yaml
+++ /dev/null
@@ -1,136 +0,0 @@
-category: base
-doc: |
-  my nice doc string in root level.
-  my nice doc string in root level, line 2.
-type: group
-NXentry(NXobject):
-  \@default:
-    doc: |
-      oki
-  (NXdata):
-    doc: |
-      my nice doc string.
-            my nice doc string, line 2.
-  \@IDF_Version:
-    
-    # as ratified at NIAC2010
-    doc: |
-      my nice single line doc string
-  title:
-    doc: |
-      Extended title for entry
-  collection_description:
-    doc: |
-      My not very proper doc string, it is supported though
-  experiment_identifier:
-    doc: |
-      My not very proper doc string, it is supported though
-         Point-1:
-              my not very proper doc string, line2
-      my not very proper doc string, line3
-  experiment_description:
-    doc: |
-      My single line doc string.
-  (NXnote)experiment_documentation:
-    doc: |
-      My single line doc string, with doc tags in different lines
-  collection_identifier:
-    doc: |
-      Yet another doc string not very proper but supported
-  entry_identifier_uuid:
-    doc: |
-      Yet another doc string not very proper but supported
-      Yet another doc string not very proper but supported, line2
-    \@version:
-      doc: |
-        Version of UUID used
-  entry_identifier:
-    doc: |
-      Trailing line doc stringy. Trailing lines are removed
-  (NXuser):
-  (NXsample):
-  (NXinstrument):
-  (NXcollection):
-  (NXmonitor):
-  (NXparameters):
-  (NXprocess):
-  (NXsubentry):
-
-# ++++++++++++++++++++++++++++++++++ SHA HASH ++++++++++++++++++++++++++++++++++
-# 6e5f16c6d106f3b59aa4df6a9f254e1ba2041ed235e1f4377d7788adcb8f01a9
-# <?xml version="1.0" encoding="UTF-8"?>
-# <?xml-stylesheet type="text/xsl" href="nxdlformat.xsl" ?>
-# <definition name="NXentry" 
-#     type="group" 
-#     extends="NXobject"
-#     category="base"
-#     xmlns="http://definition.nexusformat.org/nxdl/3.1"
-#     xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-#     xsi:schemaLocation="http://definition.nexusformat.org/nxdl/3.1 ../nxdl.xsd"
-#     >
-# 
-#     <attribute name="default">
-#         <doc>
-#                 oki
-#         </doc>
-#     </attribute>
-#     <doc> 
-#         my nice doc string in root level.
-#         my nice doc string in root level, line 2.
-#     </doc>
-#     <group type="NXdata">
-#         <doc>my nice doc string.
-#             my nice doc string, line 2.
-#         </doc>
-#     </group>
-#     <attribute name="IDF_Version">
-#         <!-- as ratified at NIAC2010 -->
-#         <doc>my nice single line doc string</doc>
-#     </attribute>
-#     <field name="title">
-#         <doc>Extended title for entry</doc>
-#     </field>
-#     <field name="collection_description">
-#         <doc>My not very proper doc string, it is supported though
-#         </doc>
-#     </field>
-#     <field name="experiment_identifier">
-#         <doc>
-#         My not very proper doc string, it is supported though
-#            Point-1:
-#                 my not very proper doc string, line2 
-#         my not very proper doc string, line3</doc>
-#     </field>
-#     <field name="experiment_description">
-#         <doc>My single line doc string.</doc>
-#     </field>
-#     <group type="NXnote"  name="experiment_documentation">
-#         <doc>
-#             My single line doc string, with doc tags in different lines
-#         </doc>
-#     </group>
-#     <field name="collection_identifier">
-#         <doc>
-#             Yet another doc string not very proper but supported</doc>
-#     </field>
-#     <field name="entry_identifier_uuid">
-#         <doc>
-#             Yet another doc string not very proper but supported
-#             Yet another doc string not very proper but supported, line2</doc>
-#         <attribute name="version"><doc>Version of UUID used</doc></attribute>
-#     </field>
-#     <field name="entry_identifier">
-#         <doc>
-#             Trailing line doc stringy. Trailing lines are removed
-#         </doc>
-#     </field>
-#     <group type="NXuser" />
-#     <group type="NXsample" />
-#     <group type="NXinstrument" />
-#     <group type="NXcollection" />
-#     <group type="NXmonitor" />
-#     <group type="NXparameters" />
-#     <group type="NXprocess" />
-#     <group type="NXsubentry" />
-# </definition>
-# 
diff --git a/tests/data/nyaml2nxdl/Ref_NXmytests.nxdl.xml b/tests/data/nyaml2nxdl/Ref_NXmytests.nxdl.xml
deleted file mode 100644
index e39391585..000000000
--- a/tests/data/nyaml2nxdl/Ref_NXmytests.nxdl.xml
+++ /dev/null
@@ -1,112 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<?xml-stylesheet type="text/xsl" href="nxdlformat.xsl"?>
-<!--
-# NeXus - Neutron and X-ray Common Data Format
-# 
-# Copyright (C) 2014-2022 NeXus International Advisory Committee (NIAC)
-# 
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 3 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-#
-# For further information, see http://www.nexusformat.org
--->
-<definition xmlns="http://definition.nexusformat.org/nxdl/3.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" category="base" name="NXbeam" extends="NXobject" type="group" xsi:schemaLocation="http://definition.nexusformat.org/nxdl/3.1 ../nxdl.xsd">
-    <symbols>
-        <doc>
-             symbolic array lengths to be coordinated between various fields
-        </doc>
-        <symbol name="n_Temp">
-            <doc>
-                 number of temperatures
-            </doc>
-        </symbol>
-        <symbol name="n_eField">
-            <doc>
-                 number of values in applied electric field
-            </doc>
-        </symbol>
-        <symbol name="n_mField">
-            <doc>
-                 number of values in applied magnetic field
-            </doc>
-        </symbol>
-        <symbol name="n_pField">
-            <doc>
-                 number of values in applied pressure field
-            </doc>
-        </symbol>
-        <symbol name="n_sField">
-            <doc>
-                 number of values in applied stress field
-            </doc>
-        </symbol>
-    </symbols>
-    <doc>
-         This is a test file for checking the correct parsing of several fields and
-         attributes in nxdl files
-    </doc>
-    <field name="distance" type="NX_FLOAT" units="NX_LENGTH">
-        <doc>
-             Distance from sample
-        </doc>
-    </field>
-    <field name="incident_energy" type="NX_FLOAT" units="NX_ENERGY">
-        <doc>
-             Energy on entering beamline component
-        </doc>
-        <dimensions rank="1">
-            <dim index="1" value="i"/>
-        </dimensions>
-    </field>
-    <field name="mode">
-        <doc>
-             source operating mode
-        </doc>
-        <enumeration>
-            <item value="Single Bunch">
-                <doc>
-                     for storage rings
-                </doc>
-            </item>
-            <item value="Multi Bunch">
-                <doc>
-                     for storage rings
-                </doc>
-            </item>
-        </enumeration>
-    </field>
-    <field name="electric_field" type="NX_FLOAT" units="NX_VOLTAGE">
-        <doc>
-             Applied electric field
-        </doc>
-        <dimensions>
-            <dim index="1" value="n_eField"/>
-        </dimensions>
-        <attribute name="direction">
-            <enumeration>
-                <item value="x"/>
-                <item value="y"/>
-                <item value="z"/>
-            </enumeration>
-        </attribute>
-    </field>
-    <field name="temperature" type="NX_FLOAT" units="NX_TEMPERATURE">
-        <doc>
-             Sample temperature. This could be a scanned variable
-        </doc>
-        <dimensions rank="anyRank">
-            <dim index="1" value="n_Temp"/>
-        </dimensions>
-    </field>
-</definition>
diff --git a/tests/data/nyaml2nxdl/Ref_NXnested_symbols.nxdl.xml b/tests/data/nyaml2nxdl/Ref_NXnested_symbols.nxdl.xml
deleted file mode 100644
index 6447a397f..000000000
--- a/tests/data/nyaml2nxdl/Ref_NXnested_symbols.nxdl.xml
+++ /dev/null
@@ -1,89 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<?xml-stylesheet type="text/xsl" href="nxdlformat.xsl"?>
-<!--
-# NeXus - Neutron and X-ray Common Data Format
-#
-# Copyright (C) 2014-2022 NeXus International Advisory Committee (NIAC)
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 3 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-#
-# For further information, see http://www.nexusformat.org
--->
-<definition xmlns="http://definition.nexusformat.org/nxdl/3.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" category="base" name="NXentry" extends="NXobject" type="group" xsi:schemaLocation="http://definition.nexusformat.org/nxdl/3.1 ../nxdl.xsd">
-    <symbols>
-        <doc>
-             teststring
-        </doc>
-        <symbol name="nfa">
-            <doc>
-                 Number of fast axes (acquired simutaneously) e.g. emission angle, kinetic energy
-            </doc>
-        </symbol>
-        <symbol name="nsa">
-            <doc>
-                 Number of slow axes (acquired scanning a physical quantity) e.g. lens voltage,
-                 photon energy or temperature
-            </doc>
-        </symbol>
-        <symbol name="nx">
-            <doc>
-                 Number of points in the first angular direction
-            </doc>
-        </symbol>
-        <symbol name="ne">
-            <doc>
-                 Number of points in the energy dispersion direction
-            </doc>
-        </symbol>
-    </symbols>
-    <doc>
-         Test case for verifying handling of symbols inside a nexus class in nested
-         layers of the hierarchy
-    </doc>
-    <group type="NXsample">
-        <symbols>
-            <doc>
-                 teststring
-            </doc>
-            <symbol name="n_comp">
-                <doc>
-                     number of compositions
-                </doc>
-            </symbol>
-            <symbol name="n_Temp">
-                <doc>
-                     number of temperatures
-                </doc>
-            </symbol>
-        </symbols>
-        <group type="NXprocess">
-            <symbols>
-                <doc>
-                     another nest
-                </doc>
-                <symbol name="x">
-                    <doc>
-                         parameter1
-                    </doc>
-                </symbol>
-                <symbol name="y">
-                    <doc>
-                         parameter2
-                    </doc>
-                </symbol>
-            </symbols>
-        </group>
-    </group>
-</definition>
diff --git a/tests/data/nyaml2nxdl/Ref_NXtest_links.nxdl.xml b/tests/data/nyaml2nxdl/Ref_NXtest_links.nxdl.xml
deleted file mode 100644
index 40140816d..000000000
--- a/tests/data/nyaml2nxdl/Ref_NXtest_links.nxdl.xml
+++ /dev/null
@@ -1,32 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<?xml-stylesheet type="text/xsl" href="nxdlformat.xsl"?>
-<!--
-# NeXus - Neutron and X-ray Common Data Format
-#
-# Copyright (C) 2014-2022 NeXus International Advisory Committee (NIAC)
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 3 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-#
-# For further information, see http://www.nexusformat.org
--->
-<definition xmlns="http://definition.nexusformat.org/nxdl/3.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" category="base" name="NXentry" extends="NXobject" type="group" xsi:schemaLocation="http://definition.nexusformat.org/nxdl/3.1 ../nxdl.xsd">
-    <doc>
-         Test case for verifying that the parser can handle links correctly.
-    </doc>
-    <group type="NXdata">
-        <link name="polar_angle" target="here1"/>
-        <link name="target_angle" target="here2"/>
-    </group>
-</definition>
diff --git a/tests/nyaml2nxdl/README.md b/tests/nyaml2nxdl/README.md
deleted file mode 100644
index 7a7198269..000000000
--- a/tests/nyaml2nxdl/README.md
+++ /dev/null
@@ -1,5 +0,0 @@
-This is the place for storing code for tests of the yaml2nxdl and nxdl2yaml NeXus schema translation routines.
-
-## Contact person in FAIRmat for these tests
-1. Rubel Mozumder
-2. Andrea Albino
\ No newline at end of file
diff --git a/tests/nyaml2nxdl/test_nyaml2nxdl.py b/tests/nyaml2nxdl/test_nyaml2nxdl.py
deleted file mode 100755
index d0c9f875a..000000000
--- a/tests/nyaml2nxdl/test_nyaml2nxdl.py
+++ /dev/null
@@ -1,372 +0,0 @@
-#!/usr/bin/env python3
-"""This tool accomplishes some tests for the yaml2nxdl parser
-
-"""
-#
-# Copyright The NOMAD Authors.
-#
-# This file is part of NOMAD. See https://nomad-lab.eu for further info.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import os
-import sys
-import filecmp
-from datetime import datetime
-from pathlib import Path
-import xml.etree.ElementTree as ET
-import pytest
-from click.testing import CliRunner
-import pynxtools.nyaml2nxdl.nyaml2nxdl as nyml2nxdl
-from pynxtools.nyaml2nxdl import nyaml2nxdl_forward_tools
-
-
-def delete_duplicates(list_of_matching_string):
-    """
-    Delete duplicate from lists
-    """
-    return list(dict.fromkeys(list_of_matching_string))
-
-
-def check_file_fresh_baked(test_file):
-    """
-    Get sure that the test file is generated by the converter
-    """
-    path = Path(test_file)
-    timestamp = datetime.fromtimestamp(path.stat().st_mtime).strftime("%d/%m/%Y %H:%M")
-    now = datetime.now().strftime("%d/%m/%Y %H:%M")
-    assert timestamp == now, 'xml file not generated'
-
-
-def find_matches(xml_file, desired_matches):
-    """
-        Read xml file and find desired matches. Return a list of two lists in the form:
-    [[matching_line],[matching_line_index]]
-    """
-    with open(xml_file, 'r') as file:
-        xml_reference = file.readlines()
-    lines = []
-    lines_index = []
-    found_matches = []
-    for i, line in enumerate(xml_reference):
-        for desired_match in desired_matches:
-            if str(desired_match) in str(line):
-                lines.append(line)
-                lines_index.append(i)
-                found_matches.append(desired_match)
-    # ascertain that all the desired matches were found in file
-    found_matches_clean = delete_duplicates(found_matches)
-    assert len(found_matches_clean) == len(desired_matches), 'some desired_matches were \
-not found in file'
-    return [lines, lines_index]
-
-
-def compare_matches(ref_xml_file, test_yml_file, test_xml_file, desired_matches):
-    """
-        Check if a new xml file is generated
-    and if test xml file is equal to reference xml file
-    """
-    # Reference file is read
-    ref_matches = find_matches(ref_xml_file, desired_matches)
-    # Test file is generated
-    runner = CliRunner()
-    result = runner.invoke(nyml2nxdl.launch_tool, ['--input-file', test_yml_file])
-    assert result.exit_code == 0
-    check_file_fresh_baked(test_xml_file)
-    # Test file is read
-    test_matches = find_matches(test_xml_file, desired_matches)
-    assert test_matches == ref_matches
-
-
-def test_links():
-    """
-    Check the correct parsing of links
-    """
-    data_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
-                             '../data/nyaml2nxdl')
-    ref_xml_link_file = 'tests/data/nyaml2nxdl/Ref_NXtest_links.nxdl.xml'
-    test_yml_link_file = 'tests/data/nyaml2nxdl/NXtest_links.yaml'
-    test_xml_link_file = 'tests/data/nyaml2nxdl/NXtest_links.nxdl.xml'
-    # ref_xml_link_file = os.path.abspath(data_path + '/Ref_NXtest_links.nxdl.xml')
-    # test_yml_link_file = os.path.abspath(data_path + '/NXtest_links.yaml')
-    # test_xml_link_file = os.path.abspath(data_path + '/NXtest_links.nxdl.xml')
-    desired_matches = ['<link', '/>']
-    compare_matches(
-        ref_xml_link_file,
-        test_yml_link_file,
-        test_xml_link_file,
-        desired_matches)
-    os.remove('tests/data/nyaml2nxdl/NXtest_links.nxdl.xml')
-    sys.stdout.write('Test on links okay.\n')
-
-
-def test_docs():
-    """In this test an xml file in converted to yml and then back to xml.
-    The xml trees of the two files are then compared.
-    """
-    ref_xml_file = 'tests/data/nyaml2nxdl/Ref_NXellipsometry-docCheck.nxdl.xml'
-    test_yml_file = 'tests/data/nyaml2nxdl/NXellipsometry-docCheck.yaml'
-    test_xml_file = 'tests/data/nyaml2nxdl/NXellipsometry-docCheck.nxdl.xml'
-    desired_matches = ['<doc', '</doc>']
-    compare_matches(
-        ref_xml_file,
-        test_yml_file,
-        test_xml_file,
-        desired_matches)
-    os.remove('tests/data/nyaml2nxdl/NXellipsometry-docCheck.nxdl.xml')
-    sys.stdout.write('Test on documentation formatting okay.\n')
-
-
-def test_nxdl2yaml_doc_format_and_nxdl_part_as_comment():
-    """
-    This test for two reason:
-        1. In test-1 an nxdl file with all kind of doc formats are translated
-    to yaml to check if they are correct.
-        2. In test-2: Check the nxdl that comes at the end of yaml file as comment.
-    """
-    ref_xml_file = 'tests/data/nyaml2nxdl/Ref_NXentry.nxdl.xml'
-    ref_yml_file = 'tests/data/nyaml2nxdl/Ref_NXentry.yaml'
-    test_yml_file = 'tests/data/nyaml2nxdl/Ref_NXentry_parsed.yaml'
-    result = CliRunner().invoke(nyml2nxdl.launch_tool, ['--input-file', ref_xml_file])
-    assert result.exit_code == 0
-    check_file_fresh_baked(test_yml_file)
-
-    result = filecmp.cmp(ref_yml_file, test_yml_file, shallow=False)
-    assert result, 'Ref YML and parsed YML\
-has not the same structure!!'
-    os.remove(test_yml_file)
-    sys.stdout.write('Test on xml -> yml doc formatting okay.\n')
-
-
-def test_fileline_error():
-    """
-    In this test the yaml fileline in the error message is tested.
-    """
-    test_yml_file = 'tests/data/nyaml2nxdl/NXfilelineError1.yaml'
-    out_nxdl = 'tests/data/nyaml2nxdl/NXfilelineError1.nxdl.xml'
-    out_yaml = 'tests/data/nyaml2nxdl/temp_NXfilelineError1.yaml'
-    result = CliRunner().invoke(nyml2nxdl.launch_tool, ['--input-file', test_yml_file])
-    assert result.exit_code == 1
-    assert '13' in str(result.exception)
-    os.remove(out_nxdl)
-    os.remove(out_yaml)
-
-    test_yml_file = 'tests/data/nyaml2nxdl/NXfilelineError2.yaml'
-    out_nxdl = 'tests/data/nyaml2nxdl/NXfilelineError2.nxdl.xml'
-    out_yaml = 'tests/data/nyaml2nxdl/temp_NXfilelineError2.yaml'
-    result = CliRunner().invoke(nyml2nxdl.launch_tool, ['--input-file', test_yml_file])
-    assert result.exit_code == 1
-    assert '21' in str(result.exception)
-    os.remove(out_nxdl)
-    os.remove(out_yaml)
-
-    test_yml_file = 'tests/data/nyaml2nxdl/NXfilelineError3.yaml'
-    out_nxdl = 'tests/data/nyaml2nxdl/NXfilelineError3.nxdl.xml'
-    out_yaml = 'tests/data/nyaml2nxdl/temp_NXfilelineError3.yaml'
-    result = CliRunner().invoke(nyml2nxdl.launch_tool, ['--input-file', test_yml_file])
-    assert result.exit_code == 1
-    assert '25' in str(result.exception)
-    os.remove(out_nxdl)
-    os.remove(out_yaml)
-
-    sys.stdout.write('Test on xml -> yml fileline error handling okay.\n')
-
-
-def test_symbols():
-    """
-    Check the correct parsing of symbols
-    """
-    ref_xml_symbol_file = 'tests/data/nyaml2nxdl/Ref_NXnested_symbols.nxdl.xml'
-    test_yml_symbol_file = 'tests/data/nyaml2nxdl/NXnested_symbols.yaml'
-    test_xml_symbol_file = 'tests/data/nyaml2nxdl/NXnested_symbols.nxdl.xml'
-    desired_matches = ['<symbols>', '</symbols>', '<symbols']
-    compare_matches(
-        ref_xml_symbol_file,
-        test_yml_symbol_file,
-        test_xml_symbol_file,
-        desired_matches)
-    os.remove('tests/data/nyaml2nxdl/NXnested_symbols.nxdl.xml')
-    sys.stdout.write('Test on symbols okay.\n')
-
-
-def test_attributes():
-    """
-        Check expected attributes in NeXus fields, groups, and attributes.
-        Check proper doc elements.
-    """
-    ref_xml_attribute_file = 'tests/data/nyaml2nxdl/Ref_NXattributes.nxdl.xml'
-    test_yml_attribute_file = 'tests/data/nyaml2nxdl/NXattributes.yaml'
-    test_xml_attribute_file = 'tests/data/nyaml2nxdl/NXattributes.nxdl.xml'
-    desired_matches = ['<attribute', '</attribute>', '<doc>', '</doc>',
-                       '<field', '</field>', '<group', '</group>']
-    compare_matches(
-        ref_xml_attribute_file,
-        test_yml_attribute_file,
-        test_xml_attribute_file,
-        desired_matches)
-    os.remove('tests/data/nyaml2nxdl/NXattributes.nxdl.xml')
-    sys.stdout.write('Test on attributes okay.\n')
-
-
-def test_extends():
-    """
-        Check the correct handling of extends keyword
-    """
-    ref_xml_attribute_file = 'tests/data/nyaml2nxdl/Ref_NXattributes.nxdl.xml'
-    test_yml_attribute_file = 'tests/data/nyaml2nxdl/NXattributes.yaml'
-    test_xml_attribute_file = 'tests/data/nyaml2nxdl/NXattributes.nxdl.xml'
-    runner = CliRunner()
-    result = runner.invoke(nyml2nxdl.launch_tool, ['--input-file', test_yml_attribute_file])
-    assert result.exit_code == 0
-    ref_root_node = ET.parse(ref_xml_attribute_file).getroot()
-    test_root_node = ET.parse(test_xml_attribute_file).getroot()
-    assert ref_root_node.attrib == test_root_node.attrib
-    os.remove('tests/data/nyaml2nxdl/NXattributes.nxdl.xml')
-    sys.stdout.write('Test on extends keyword okay.\n')
-
-
-def test_symbols_and_enum_docs():
-    """
-        Check the correct handling of empty attributes
-    or attributes fields, e.g. doc
-    """
-    ref_xml_file = 'tests/data/nyaml2nxdl/Ref_NXmytests.nxdl.xml'
-    test_yml_file = 'tests/data/nyaml2nxdl/NXmytests.yaml'
-    test_xml_file = 'tests/data/nyaml2nxdl/NXmytests.nxdl.xml'
-    desired_matches = ['<attribute', '</attribute>', '<doc>', '</doc>',
-                       '<symbols>', '</symbols>', '<symbols',
-                       '<dimensions', '</dimensions>', '<dim']
-    compare_matches(
-        ref_xml_file,
-        test_yml_file,
-        test_xml_file,
-        desired_matches)
-    os.remove('tests/data/nyaml2nxdl/NXmytests.nxdl.xml')
-    sys.stdout.write('Test on docs in enumeration and symbols okay.\n')
-
-
-def test_xml_parsing():
-    """
-        In this test an xml file in converted to yml and then back to xml.
-    The xml trees of the two files are then compared.
-    """
-    ref_xml_file = 'tests/data/nyaml2nxdl/Ref_NXellips.nxdl.xml'
-    test_yml_file = 'tests/data/nyaml2nxdl/Ref_NXellips_parsed.yaml'
-    test_xml_file = 'tests/data/nyaml2nxdl/Ref_NXellips_parsed.nxdl.xml'
-    result = CliRunner().invoke(nyml2nxdl.launch_tool, ['--input-file', ref_xml_file])
-    assert result.exit_code == 0
-    check_file_fresh_baked(test_yml_file)
-    result = CliRunner().invoke(nyml2nxdl.launch_tool, ['--input-file', test_yml_file])
-    assert result.exit_code == 0
-    check_file_fresh_baked(test_xml_file)
-
-    test_tree = ET.parse(test_xml_file)
-    test_tree_flattened = set([i.tag.split("}", 1)[1] for i in test_tree.iter()])
-
-    ref_tree = ET.parse(ref_xml_file)
-    ref_tree_flattened = set([i.tag.split("}", 1)[1] for i in ref_tree.iter()])
-
-    assert test_tree_flattened == ref_tree_flattened, 'Ref XML and parsed XML\
-has not the same tree structure!!'
-    os.remove('tests/data/nyaml2nxdl/Ref_NXellips_parsed.nxdl.xml')
-    os.remove('tests/data/nyaml2nxdl/Ref_NXellips_parsed.yaml')
-    sys.stdout.write('Test on xml -> yml -> xml okay.\n')
-
-
-def test_yml_parsing():
-    """In this test an xml file in converted to yml and then back to xml.
-    The xml trees of the two files are then compared.
-    """
-    ref_yml_file = 'tests/data/nyaml2nxdl/Ref_NXellipsometry.yaml'
-    test_xml_file = 'tests/data/nyaml2nxdl/Ref_NXellipsometry.nxdl.xml'
-    test_yml_file = 'tests/data/nyaml2nxdl/Ref_NXellipsometry_parsed.yaml'
-    result = CliRunner().invoke(nyml2nxdl.launch_tool, ['--input-file', ref_yml_file])
-    assert result.exit_code == 0
-    check_file_fresh_baked(test_xml_file)
-    result = CliRunner().invoke(nyml2nxdl.launch_tool, ['--input-file', test_xml_file])
-    assert result.exit_code == 0
-    check_file_fresh_baked(test_yml_file)
-
-    test_yml_tree = nyaml2nxdl_forward_tools.yml_reader(test_yml_file)
-
-    ref_yml_tree = nyaml2nxdl_forward_tools.yml_reader(ref_yml_file)
-
-    assert list(test_yml_tree) == list(ref_yml_tree), 'Ref YML and parsed YML \
-has not the same root entries!!'
-    os.remove('tests/data/nyaml2nxdl/Ref_NXellipsometry_parsed.yaml')
-    os.remove('tests/data/nyaml2nxdl/Ref_NXellipsometry.nxdl.xml')
-    sys.stdout.write('Test on yml -> xml -> yml okay.\n')
-
-
-def test_yml_consistency_comment_parsing():
-    """Test comments parsing from yaml. Convert 'yaml' input file to '.nxdl.xml' and
-    '.nxdl.xml' to '.yaml'
-    """
-    from pynxtools.nyaml2nxdl.comment_collector import CommentCollector
-    from pynxtools.nyaml2nxdl.nyaml2nxdl_helper import LineLoader
-
-    ref_yml_file = 'tests/data/nyaml2nxdl/Ref_NXcomment.yaml'
-    test_yml_file = 'tests/data/nyaml2nxdl/Ref_NXcomment_consistency.yaml'
-
-    result = CliRunner().invoke(nyml2nxdl.launch_tool,
-                                ['--input-file', ref_yml_file,
-                                 '--check-consistency'])
-    assert result.exit_code == 0, (f'Exception: {result.exception}, \nExecution Info:'
-                                   '{result.exc_info}')
-    with open(ref_yml_file, 'r', encoding='utf-8') as ref_yml:
-        loader = LineLoader(ref_yml)
-        ref_loaded_yaml = loader.get_single_data()
-    ref_comment_blocks = CommentCollector(ref_yml_file, ref_loaded_yaml)
-    ref_comment_blocks.extract_all_comment_blocks()
-
-    with open(test_yml_file, 'r', encoding='utf-8') as test_yml:
-        loader = LineLoader(test_yml)
-        test_loaded_yaml = loader.get_single_data()
-    test_comment_blocks = CommentCollector(test_yml_file, test_loaded_yaml)
-    test_comment_blocks.extract_all_comment_blocks()
-
-    for ref_cmnt, test_cmnt in zip(ref_comment_blocks, test_comment_blocks):
-        assert ref_cmnt == test_cmnt, 'Comment is not consistent.'
-
-    os.remove(test_yml_file)
-
-
-def test_yml2xml_comment_parsing():
-    """To test comment that written in xml for element attributes, e.g.
-    attribute 'rank' for 'dimension' element and attribute 'exists' for
-    'NXentry' group element.
-    """
-    input_yml = 'tests/data/nyaml2nxdl/NXcomment_yaml2nxdl.yaml'
-    ref_xml = 'tests/data/nyaml2nxdl/Ref_NXcomment_yaml2nxdl.nxdl.xml'
-    test_xml = 'tests/data/nyaml2nxdl/NXcomment_yaml2nxdl.nxdl.xml'
-
-    result = CliRunner().invoke(nyml2nxdl.launch_tool,
-                                ['--input-file', input_yml])
-    assert result.exit_code == 0
-
-    ref_root = ET.parse(ref_xml).getroot()
-    test_root = ET.parse(test_xml).getroot()
-
-    def recursive_compare(ref_root, test_root):
-        assert ref_root.attrib.items() == test_root.attrib.items(), ("Got different xml element"
-                                                                     "Atribute.")
-        if ref_root.text and test_root.text:
-            assert ref_root.text.strip() == test_root.text.strip(), ("Got differen element text.")
-        if len(ref_root) > 0 and len(test_root) > 0:
-            for x, y in zip(ref_root, test_root):
-                recursive_compare(x, y)
-
-    recursive_compare(ref_root, test_root)
-
-    os.remove(test_xml)

From 8540700b16d27962af445ffdf18a4ede205c5fca Mon Sep 17 00:00:00 2001
From: atomprobe-tc <markus.kuehbach@physik.hu-berlin.de>
Date: Wed, 6 Dec 2023 10:50:30 +0100
Subject: [PATCH 31/84] Rename nxs_hfive parser to signify that the difference
 with the nxs_ subparsers is not in that input comes from HDF5 files but that
 the backend code to compute harmonized quantities is pyxem or mtex,
 therefore, there should also be a nxs_hyperspy parser e.g. for spectra
 (EDS/EDXS)

---
 .../readers/em/subparsers/{nxs_hfive.py => nxs_pyxem.py}          | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename pynxtools/dataconverter/readers/em/subparsers/{nxs_hfive.py => nxs_pyxem.py} (100%)

diff --git a/pynxtools/dataconverter/readers/em/subparsers/nxs_hfive.py b/pynxtools/dataconverter/readers/em/subparsers/nxs_pyxem.py
similarity index 100%
rename from pynxtools/dataconverter/readers/em/subparsers/nxs_hfive.py
rename to pynxtools/dataconverter/readers/em/subparsers/nxs_pyxem.py

From 71a6a56ecb8af4e3ecf8393722af8696338bb104 Mon Sep 17 00:00:00 2001
From: atomprobe-tc <markus.kuehbach@physik.hu-berlin.de>
Date: Wed, 6 Dec 2023 13:09:41 +0100
Subject: [PATCH 32/84] Testing pyxem workflow and the large 3D EBSD dataset
 which takes about 48GB RAM peak

---
 pynxtools/dataconverter/readers/em/reader.py  |  6 +--
 .../readers/em/subparsers/nxs_pyxem.py        |  2 +-
 pyxem.batch.sh                                | 41 +++++++++++++++++++
 test.ebsd2d_hdf5.sh                           | 24 -----------
 test.ebsd3d_hdf5.sh                           | 24 -----------
 5 files changed, 45 insertions(+), 52 deletions(-)
 create mode 100755 pyxem.batch.sh
 delete mode 100755 test.ebsd2d_hdf5.sh
 delete mode 100755 test.ebsd3d_hdf5.sh

diff --git a/pynxtools/dataconverter/readers/em/reader.py b/pynxtools/dataconverter/readers/em/reader.py
index f53c3ec83..d4f648aae 100644
--- a/pynxtools/dataconverter/readers/em/reader.py
+++ b/pynxtools/dataconverter/readers/em/reader.py
@@ -27,7 +27,7 @@
 
 from pynxtools.dataconverter.readers.em.subparsers.nxs_mtex import NxEmNxsMTexSubParser
 
-from pynxtools.dataconverter.readers.em.subparsers.nxs_hfive import NxEmNxsHfiveSubParser
+from pynxtools.dataconverter.readers.em.subparsers.nxs_pyxem import NxEmNxsPyxemSubParser
 
 from pynxtools.dataconverter.readers.em.utils.default_plots import NxEmDefaultPlotResolver
 
@@ -127,8 +127,8 @@ def read(self,
 
         # add further with resolving cases
         # if file_path is an HDF5 will use hfive parser
-        sub_parser = "nxs_hfive"
-        subparser = NxEmNxsHfiveSubParser(entry_id, file_paths[0])
+        sub_parser = "nxs_pyxem"
+        subparser = NxEmNxsPyxemSubParser(entry_id, file_paths[0])
         subparser.parse(template)
         # exit(1)
 
diff --git a/pynxtools/dataconverter/readers/em/subparsers/nxs_pyxem.py b/pynxtools/dataconverter/readers/em/subparsers/nxs_pyxem.py
index 7e08bebd4..1709cd865 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/nxs_pyxem.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/nxs_pyxem.py
@@ -89,7 +89,7 @@ def get_ipfdir_legend(ipf_key):
     return img
 
 
-class NxEmNxsHfiveSubParser:
+class NxEmNxsPyxemSubParser:
     """Map content from different type of *.h5 files on an instance of NXem."""
 
     def __init__(self, entry_id: int = 1, input_file_name: str = ""):
diff --git a/pyxem.batch.sh b/pyxem.batch.sh
new file mode 100755
index 000000000..f26a124a2
--- /dev/null
+++ b/pyxem.batch.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+datasource="../../../../paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/production_ebsd_pyxem/"
+
+# 177_0007
+# skip
+# 177_0007 as it is one of the weird examples where the h5py library cannot traverse the content... let's not follow-up on this rabbit hole right now
+# 177_0004 has only vertices
+# 177_0005 has only edges
+# 177_0006 has only surface facets
+# 177_0008 out because old 6.0 format which does not store DIMENSIONS, ORIGIN, SHAPE under _SIMPL yet
+# 177_0009 follows the new structure but has no EulerAngles only Phases thus without following with yet another logic the source for the 
+# respective filter we have no chance to find the orientation data
+# 226_0010 and _0011 are out because they do have only plain images (backscattered electron likely)
+# 226_0013 is out because it has only plain optical image data no EBSD
+# 244_0014 is out because it does not have any quantity whereby to generate a band contrast, confidence index, or mad on to generate a default plot
+
+# 026_0007.h5 026_0027.h5 026_0029.h5 026_0030.h5 026_0033.h5 026_0039.h5 026_0041.h5 delmic hdf5 have no ebsd data
+# 173_0056.h5oina has only eds data
+
+# HDF5 files, 2D ESBD
+examples="026_0046.h5oina 026_0049.h5oina 026_0050.h5oina 026_0052.h5oina 066_0013.h5 066_0015.h5 066_0016.h5 066_0023.h5 066_0025.h5 066_0034.h5 078_0004.h5 087_0021.h5 088_0009.h5 093_0045.h5oina 093_0047.h5oina 093_0048.h5oina 093_0051.h5oina 093_0053.h5oina 093_0054.h5oina 093_0055.h5oina 093_0058.h5oina 093_0059.h5oina 093_0060.h5oina 093_0062.h5oina 093_0063.h5oina 101_0040.h5 110_0012.h5 114_0017.h5 116_0008.h5 116_0014.h5 116_0018.h5 116_0019.h5 116_0020.h5 116_0022.h5 116_0037.h5 116_0042.h5 124_0002.h5 124_0036.h5 125_0006.h5 126_0038.h5 130_0003.h5 130_2082.h5 130_2083.h5 130_2084.h5 130_2085.h5 130_2086.h5 130_2087.h5 130_2088.h5 130_2089.h5 130_2090.h5 130_2091.h5 130_2092.h5 130_2093.h5 130_2094.h5 132_0005.h5 144_0043.h5 173_0056.h5oina 173_0057.h5oina 174_0031.h5 207_2081.edaxh5 208_0061.h5oina 212_2095.h5oina 229_2096.oh5 229_2097.oh5"
+# dream3d files 3D ESBD
+# examples="067_0003.dream3d 177_0004.dream3d 177_0005.dream3d 177_0006.dream3d 177_0008.dream3d 177_0009.dream3d 226_0010.dream3d 226_0011.dream3d 226_0012.dream3d 226_0013.dream3d 244_0014.dream3d SmallIN100_Final.dream3d"
+
+# specific examples for testing purposes
+examples="207_2081.edaxh5"
+# examples="173_0057.h5oina"
+# oxford, bruker, britton, edax old noncali, edax old calib, apex
+# examples="173_0057.h5oina 130_0003.h5 088_0009.h5 116_0014.h5 229_2097.oh5 207_2081.edaxh5"
+# examples="229_2096.oh5"  # this is the largest EBSD map, a composite
+# examples="229_2097.oh5"
+# examples="067_0003.dream3d SmallIN100_Final.dream3d 244_0014.dream3d"
+# examples="244_0014.dream3d"
+# examples="SmallIN100_Final.dream3d"
+# examples="067_0003.dream3d"  # very large 3D EBSD takes ~40GB RAM for processing
+
+for example in $examples; do
+	echo $example
+	dataconverter --reader em --nxdl NXroot --input-file $datasource$example --output debug.$example.nxs 1>stdout.$example.nxs.txt 2>stderr.$example.nxs.txt
+done
diff --git a/test.ebsd2d_hdf5.sh b/test.ebsd2d_hdf5.sh
deleted file mode 100755
index 977b7a82f..000000000
--- a/test.ebsd2d_hdf5.sh
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/bin/bash
-
-
-# dataconverter --reader em --nxdl NXroot --input-file 130_0003.h5 --output debug.bruker.nxs 1>stdout.bruker.txt 2>stderr.bruker.txt
-# dataconverter --reader em --nxdl NXroot --input-file 207_2081.edaxh5 --output debug.apex.nxs 1>stdout.apex.txt 2>stderr.apex.txt
-# dataconverter --reader em --nxdl NXroot --input-file 229_2097.oh5 --output debug.edax.nxs 1>stdout.edax.txt 2>stderr.edax.txt
-# dataconverter --reader em --nxdl NXroot --input-file 088_0009.h5 --output debug.britton.nxs 1>stdout.britton.txt 2>stderr.britton.txt
-
-# 026_0007.h5 026_0027.h5 026_0029.h5 026_0030.h5 026_0033.h5 026_0039.h5 026_0041.h5 delmic hdf5 have no ebsd data
-# 173_0056.h5oina has only eds data
-
-Examples="026_0046.h5oina 026_0049.h5oina 026_0050.h5oina 026_0052.h5oina 066_0013.h5 066_0015.h5 066_0016.h5 066_0023.h5 066_0025.h5 066_0034.h5 078_0004.h5 087_0021.h5 088_0009.h5 093_0045.h5oina 093_0047.h5oina 093_0048.h5oina 093_0051.h5oina 093_0053.h5oina 093_0054.h5oina 093_0055.h5oina 093_0058.h5oina 093_0059.h5oina 093_0060.h5oina 093_0062.h5oina 093_0063.h5oina 101_0040.h5 110_0012.h5 114_0017.h5 116_0008.h5 116_0014.h5 116_0018.h5 116_0019.h5 116_0020.h5 116_0022.h5 116_0037.h5 116_0042.h5 124_0002.h5 124_0036.h5 125_0006.h5 126_0038.h5 130_0003.h5 130_2082.h5 130_2083.h5 130_2084.h5 130_2085.h5 130_2086.h5 130_2087.h5 130_2088.h5 130_2089.h5 130_2090.h5 130_2091.h5 130_2092.h5 130_2093.h5 130_2094.h5 132_0005.h5 144_0043.h5 173_0056.h5oina 173_0057.h5oina 174_0031.h5 207_2081.edaxh5 208_0061.h5oina 212_2095.h5oina 229_2096.oh5 229_2097.oh5"
-
-# Examples="207_2081.edaxh5"
-# Examples="173_0057.h5oina"
-# oxford, bruker, britton, edax old noncali, edax old calib, apex
-# Examples="173_0057.h5oina 130_0003.h5 088_0009.h5 116_0014.h5 229_2097.oh5 207_2081.edaxh5"
-
-# Examples="229_2096.oh5"  # this is the largest EBSD map, a composite
-# Examples="229_2097.oh5"
-for example in $Examples; do
-	echo $example
-	dataconverter --reader em --nxdl NXroot --input-file $example --output debug.$example.nxs 1>stdout.$example.nxs.txt 2>stderr.$example.nxs.txt
-done
diff --git a/test.ebsd3d_hdf5.sh b/test.ebsd3d_hdf5.sh
deleted file mode 100755
index 5964514e6..000000000
--- a/test.ebsd3d_hdf5.sh
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/bin/bash
-
-Examples="067_0003 177_0004 177_0005 177_0006 177_0008 177_0009 226_0010 226_0011 226_0012 226_0013 244_0014 SmallIN100_Final"
-Examples="067_0003 SmallIN100_Final 244_0014"
-# 177_0007
-# skip
-# 177_0007 as it is one of the weird examples where the h5py library cannot traverse the content... let's not follow-up on this rabbit hole right now
-# 177_0004 has only vertices
-# 177_0005 has only edges
-# 177_0006 has only surface facets
-# 177_0008 out because old 6.0 format which does not store DIMENSIONS, ORIGIN, SHAPE under _SIMPL yet
-# 177_0009 follows the new structure but has no EulerAngles only Phases thus without following with yet another logic the source for the 
-# respective filter we have no chance to find the orientation data
-# 226_0010 and _0011 are out because they do have only plain images (backscattered electron likely)
-# 226_0013 is out because it has only plain optical image data no EBSD
-# 244_0014 is out because it does not have any quantity whereby to generate a band contrast, confidence index, or mad on to generate a default plot
-
-
-# Examples="SmallIN100_Final"
-# Examples="244_0014"
-for example in $Examples; do
-	echo $example
-	dataconverter --reader em --nxdl NXroot --input-file $example.dream3d --output debug.$example.dream3d.nxs 1>stdout.$example.dream3d.nxs.txt 2>stderr.$example.dream3d.nxs.txt
-done

From 97fcaa1674b172625c25e763dc4b4a44a34e0ca6 Mon Sep 17 00:00:00 2001
From: atomprobe-tc <markus.kuehbach@physik.hu-berlin.de>
Date: Wed, 6 Dec 2023 18:27:06 +0100
Subject: [PATCH 33/84] Started the refactoring to discretize always all point
 cloud data which are not collected on a square grid that is smaller than the
 maximum possible extent supported by h5web, tested with use case
 207_2081.edaxh5 resulting ROI map is a square likely due to improper handling
 of HexGrid, next steps: i) fix this bug for 207_2081, ii) replace xmap in
 ebsd map twod by discretized grid, iii) test with examples from all other
 tech partners, iv) run against all datasets

---
 .../readers/em/examples/ebsd_database.py      |   3 +
 .../readers/em/subparsers/hfive_apex.py       |  61 ++++++--
 .../readers/em/subparsers/hfive_bruker.py     |  11 +-
 .../em/subparsers/hfive_dreamthreed.py        |   8 +-
 .../readers/em/subparsers/hfive_ebsd.py       |  25 ++-
 .../readers/em/subparsers/hfive_edax.py       |  21 +--
 .../readers/em/subparsers/hfive_oxford.py     |   8 +
 .../readers/em/subparsers/nxs_pyxem.py        |  95 ++++++++----
 .../readers/em/utils/get_scan_points.py       |  80 ++++++++++
 .../readers/em/utils/get_sqr_grid.py          | 145 ++++++++++++++++++
 .../readers/em/utils/hfive_utils.py           |   5 +
 11 files changed, 391 insertions(+), 71 deletions(-)
 create mode 100644 pynxtools/dataconverter/readers/em/utils/get_scan_points.py
 create mode 100644 pynxtools/dataconverter/readers/em/utils/get_sqr_grid.py

diff --git a/pynxtools/dataconverter/readers/em/examples/ebsd_database.py b/pynxtools/dataconverter/readers/em/examples/ebsd_database.py
index 620284f23..34e79debd 100644
--- a/pynxtools/dataconverter/readers/em/examples/ebsd_database.py
+++ b/pynxtools/dataconverter/readers/em/examples/ebsd_database.py
@@ -28,6 +28,9 @@
 # is recoverable when there is no common agreement about the phases used and their
 # exact atomic configuration
 
+HEXAGONAL_GRID = "hexagonal_grid"
+SQUARE_GRID = "square_grid"
+
 
 FreeTextToUniquePhase = {"Actinolite": "Actinolite",
                          "al": "Al",
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
index 26008e21b..47c339a91 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
@@ -37,7 +37,9 @@
 from pynxtools.dataconverter.readers.em.utils.hfive_utils import \
     read_strings_from_dataset
 from pynxtools.dataconverter.readers.em.examples.ebsd_database import \
-    ASSUME_PHASE_NAME_TO_SPACE_GROUP
+    ASSUME_PHASE_NAME_TO_SPACE_GROUP, HEXAGONAL_GRID, SQUARE_GRID
+from pynxtools.dataconverter.readers.em.utils.get_scan_points import \
+    get_scan_point_coords
 
 
 class HdfFiveEdaxApexReader(HdfFiveBaseParser):
@@ -106,7 +108,6 @@ def parse_and_normalize_group_ebsd_header(self, fp, ckey: str):
         if f"{self.prfx}/EBSD/ANG/DATA/DATA" not in fp:
             raise ValueError(f"Unable to parse {self.prfx}/EBSD/ANG/DATA/DATA !")
 
-        grid_type = None
         # for a regular tiling of R^2 with perfect hexagons
         n_pts = 0
         # their vertical center of mass distance is smaller than the horizontal
@@ -118,10 +119,14 @@ def parse_and_normalize_group_ebsd_header(self, fp, ckey: str):
             if f"{self.prfx}/Sample/{req_field}" not in fp:
                 raise ValueError(f"Unable to parse {self.prfx}/Sample/{req_field} !")
 
+        self.tmp[ckey]["dimensionality"] = 2
         grid_type = read_strings_from_dataset(fp[f"{self.prfx}/Sample/Grid Type"][()])
-        if grid_type not in ["HexGrid", "SqrGrid"]:
-            raise ValueError(f"Grid Type {grid_type} is currently not supported !")
-        self.tmp[ckey]["grid_type"] = grid_type
+        if grid_type == "HexGrid":
+            self.tmp[ckey]["grid_type"] = HEXAGONAL_GRID
+        elif grid_type == "SqrGrid":
+            self.tmp[ckey]["grid_type"] = SQUARE_GRID
+        else:
+            raise ValueError(f"Unable to parse {self.prfx}/Sample/Grid Type !")
         self.tmp[ckey]["s_x"] = fp[f"{self.prfx}/Sample/Step X"][0]
         self.tmp[ckey]["s_unit"] = "um"  # "µm"  # TODO::always micron?
         self.tmp[ckey]["n_x"] = fp[f"{self.prfx}/Sample/Number Of Columns"][0]
@@ -226,12 +231,40 @@ def parse_and_normalize_group_ebsd_data(self, fp, ckey: str):
         # TODO::currently assuming s_x and s_y are already the correct center of mass
         # distances for hexagonal or square tiling of R^2
         # self.tmp[ckey]["grid_type"] in ["HexGrid", "SqrGrid"]:
-        self.tmp[ckey]["scan_point_x"] = np.asarray(
-            np.linspace(0, self.tmp[ckey]["n_x"] - 1,
-                        num=self.tmp[ckey]["n_x"],
-                        endpoint=True) * self.tmp[ckey]["s_x"], np.float32)
-
-        self.tmp[ckey]["scan_point_y"] = np.asarray(
-            np.linspace(0, self.tmp[ckey]["n_y"] - 1,
-                        num=self.tmp[ckey]["n_y"],
-                        endpoint=True) * self.tmp[ckey]["s_y"], np.float32)
+        # if just SQUARE_GRID there is no point to explicitly compute the scan_point
+        # coordinates here (for every subparser) especially not when the respective
+        # quantity from the tech partner is just a pixel index i.e. zeroth, first px ...
+        # however, ideally the tech partners would use the scan_point fields to report
+        # calibrated absolute scan point positions in the local reference frame of the
+        # sample surface in which case these could indeed not just scaled positions
+        # having the correct x and y spacing but eventually even the absolute coordinate
+        # where the scan was performed on the sample surface whereby one could conclude
+        # more precisely where the scanned area was located, in practice though this precision
+        # is usually not needed because scientists assume that the ROI is representative for
+        # the material which they typically never scan (time, interest, costs, instrument
+        # availability) completely!
+        if self.tmp[ckey]["grid_type"] != SQUARE_GRID:
+            print(f"WARNING: {self.tmp[ckey]['grid_type']}: check carefully the " \
+                  f"correct interpretation of scan_point coords!")
+        # the case of EDAX APEX shows the key problem with implicit assumptions
+        # edaxh5 file not necessarily store the scan_point_{dim} positions
+        # therefore the following code is deprecated as the axes coordinates anyway
+        # have to be recomputed based on whether results are rediscretized on a coarser
+        # grid or not !
+        # mind also that the code below anyway would give only the NeXus dim axis but
+        # not the array of pairs of x, y coordinates for each scan point
+        # TODO::also keep in mind that the order in which the scan points are stored
+        # i.e. which index on self.tmp[ckey]["euler"] belongs to which scan point
+        # depends not only on the scan grid but also the flight plan i.e. how the grid
+        # gets visited
+        # only because of the fact that in most cases people seem to accept that
+        # scanning snake like first a line along +x and then +y meandering over the
+        # scan area from the top left corner to the bottom right corner is JUST an
+        # assumption for a random or dynamically adaptive scan strategy the scan positions
+        # have to be reported anyway, TODO::tech partners should be convinced to export
+        # scaled and calibrated scan positions as they are not necessarily redundant information
+        # that can be stripped to improve performance of their commercial product, I mean
+        # we talk typically <5k pattern per second demanding to store 5k * 2 * 8B, indeed
+        # this is the non-harmonized content one is facing in the field of EBSD despite
+        # almost two decades of commercialization of the technique now
+        get_scan_point_coords(self.tmp[ckey])
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py
index 4af1cd5e0..9457ec46d 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py
@@ -39,7 +39,7 @@
 from pynxtools.dataconverter.readers.em.utils.hfive_utils import \
     EBSD_MAP_SPACEGROUP, read_strings_from_dataset, all_equal, format_euler_parameterization
 from pynxtools.dataconverter.readers.em.examples.ebsd_database import \
-    ASSUME_PHASE_NAME_TO_SPACE_GROUP
+    ASSUME_PHASE_NAME_TO_SPACE_GROUP, HEXAGONAL_GRID, SQUARE_GRID
 
 
 class HdfFiveBrukerEspritReader(HdfFiveBaseParser):
@@ -107,6 +107,12 @@ def parse_and_normalize_group_ebsd_header(self, fp, ckey: str):
         if f"{grp_name}" not in fp:
             raise ValueError(f"Unable to parse {grp_name} !")
 
+        self.tmp[ckey]["dimensionality"] = 2  # TODO::QUBE can also yield 3D datasets
+        if read_strings_from_dataset(fp[f"{grp_name}/Grid Type"]) == "isometric":
+            self.tmp[ckey]["grid_type"] = SQUARE_GRID
+        else:
+            raise ValueError(f"Unable to parse {grp_name}/Grid Type !")
+
         req_fields = ["NCOLS", "NROWS", "XSTEP", "YSTEP"]
         for req_field in req_fields:
             if f"{grp_name}/{req_field}" not in fp:
@@ -221,6 +227,9 @@ def parse_and_normalize_group_ebsd_data(self, fp, ckey: str):
         # there is X SAMPLE and Y SAMPLE but these are not defined somewhere instead
         # here adding x and y assuming that we scan first lines along positive x and then
         # moving downwards along +y
+        # TODO::calculation below x/y only valid if self.tmp[ckey]["grid_type"] == SQUARE_GRID
+        if self.tmp[ckey]["grid_type"] != SQUARE_GRID:
+            print(f"WARNING: Check carefully correct interpretation of scan_point coords!")
         self.tmp[ckey]["scan_point_x"] \
             = np.asarray(np.tile(np.linspace(0.,
                                              self.tmp[ckey]["n_x"] - 1.,
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_dreamthreed.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_dreamthreed.py
index bdb739ff0..248be5452 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_dreamthreed.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_dreamthreed.py
@@ -38,7 +38,7 @@
 from pynxtools.dataconverter.readers.em.utils.hfive_utils import \
     EBSD_MAP_SPACEGROUP, read_strings_from_dataset, all_equal, format_euler_parameterization
 from pynxtools.dataconverter.readers.em.examples.ebsd_database import \
-    ASSUME_PHASE_NAME_TO_SPACE_GROUP
+    ASSUME_PHASE_NAME_TO_SPACE_GROUP, HEXAGONAL_GRID, SQUARE_GRID
 
 # DREAM3D implements essentially a data analysis workflow with individual steps
 # in the DREAM3D jargon each step is referred to as a filter, filters have well-defined
@@ -312,6 +312,10 @@ def parse_and_normalize_ebsd_header(self, ckey: str):
             spc = h5r[f"{self.path_registry['group_geometry']}" \
                       f"/_SIMPL_GEOMETRY/SPACING"][:].flatten()
             idx = 0
+
+            # TODO::is it correct an assumption that DREAM3D regrids using square voxel
+            self.tmp[ckey]["dimensionality"] = 3
+            self.tmp[ckey]["grid_type"] = SQUARE_GRID
             for dim in ["x", "y", "z"]:
                 self.tmp[ckey][f"n_{dim}"] = dims[idx]
                 self.tmp[ckey][f"s_{dim}"] = spc[idx]
@@ -388,6 +392,8 @@ def parse_and_normalize_ebsd_data(self, ckey: str):
             # in effect, the phase_id == 0 rightly so marks position indexed with the null-model
 
             # normalize pixel coordinates to physical positions even though the origin can still dangle somewhere
+            if self.tmp[ckey]["grid_type"] != SQUARE_GRID:
+                print(f"WARNING: Check carefully correct interpretation of scan_point coords!")
             for dim in ["x", "y", "z"]:
                 self.tmp[ckey][f"scan_point_{dim}"] \
                     = np.asarray(np.linspace(0, self.tmp[ckey][f"n_{dim}"] - 1,
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py
index 3a11eddec..8bb2bbeb1 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py
@@ -38,7 +38,7 @@
 from pynxtools.dataconverter.readers.em.utils.hfive_utils import \
     EBSD_MAP_SPACEGROUP, read_strings_from_dataset, all_equal, format_euler_parameterization
 from pynxtools.dataconverter.readers.em.examples.ebsd_database import \
-    ASSUME_PHASE_NAME_TO_SPACE_GROUP
+    ASSUME_PHASE_NAME_TO_SPACE_GROUP, HEXAGONAL_GRID, SQUARE_GRID
 
 
 class HdfFiveCommunityReader(HdfFiveBaseParser):
@@ -108,6 +108,12 @@ def parse_and_normalize_group_ebsd_header(self, fp, ckey: str):
         if f"{grp_name}" not in fp:
             raise ValueError(f"Unable to parse {grp_name} !")
 
+        self.tmp[ckey]["dimensionality"] = 2
+        if read_strings_from_dataset(fp[f"{grp_name}/Grid Type"][()]) == "isometric":
+            self.tmp[ckey]["grid_type"] = SQUARE_GRID
+        else:
+            raise ValueError(f"Unable to parse {grp_name}/Grid Type !")
+
         req_fields = ["NCOLS", "NROWS", "XSTEP", "YSTEP"]
         for req_field in req_fields:
             if f"{grp_name}/{req_field}" not in fp:
@@ -223,7 +229,10 @@ def parse_and_normalize_group_ebsd_data(self, fp, ckey: str):
         # X and Y
         # there exist X SAMPLE and Y SAMPLE which give indeed calibrated coordinates
         # relative to the sample coordinate system, ignore this for now an
-        # and TOD::just calibrate on image dimension
+        # and TODO::just calibrate on image dimension
+        # TODO::calculation below x/y only valid if self.tmp[ckey]["grid_type"] == SQUARE_GRID
+        if self.tmp[ckey]["grid_type"] != SQUARE_GRID:
+            print(f"WARNING: Check carefully correct interpretation of scan_point coords!")
         self.tmp[ckey]["scan_point_x"] \
             = np.asarray(np.tile(np.linspace(0.,
                                              self.tmp[ckey]["n_x"] - 1.,
@@ -236,17 +245,7 @@ def parse_and_normalize_group_ebsd_data(self, fp, ckey: str):
                                                num=self.tmp[ckey]["n_y"],
                                                endpoint=True) * self.tmp[ckey]["s_y"],
                                                self.tmp[ckey]["n_x"]), np.float32)
-
-        # if np.shape(fp[f"{grp_name}/X SAMPLE"][:])[0] == n_pts:
-        #     self.tmp[ckey]["scan_point_x"] \
-        #         = np.asarray(fp[f"{grp_name}/X SAMPLE"][:], np.float32)
-        # else:
-        #     raise ValueError(f"{grp_name}/X SAMPLE has unexpected shape !")
-        # if np.shape(fp[f"{grp_name}/Y SAMPLE"][:])[0] == n_pts:
-        #     self.tmp[ckey]["scan_point_y"] \
-        #         = np.asarray(fp[f"{grp_name}/Y SAMPLE"], np.float32)
-        # else:
-        #     raise ValueError(f"{grp_name}/Y SAMPLE has unexpected shape !")
+        # X SAMPLE and Y SAMPLE seem to be something different!
 
         # Band Contrast is not stored in Bruker but Radon Quality or MAD
         # but this is s.th. different as it is the mean angular deviation between
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_edax.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_edax.py
index 586179a51..8e3fc1164 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_edax.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_edax.py
@@ -39,7 +39,7 @@
 from pynxtools.dataconverter.readers.em.utils.hfive_utils import EULER_SPACE_SYMMETRY, \
     read_strings_from_dataset, read_first_scalar, format_euler_parameterization
 from pynxtools.dataconverter.readers.em.examples.ebsd_database import \
-    ASSUME_PHASE_NAME_TO_SPACE_GROUP
+    ASSUME_PHASE_NAME_TO_SPACE_GROUP, HEXAGONAL_GRID, SQUARE_GRID
 
 
 class HdfFiveEdaxOimAnalysisReader(HdfFiveBaseParser):
@@ -110,17 +110,20 @@ def parse_and_normalize_group_ebsd_header(self, fp, ckey: str):
         if f"{grp_name}" not in fp:
             raise ValueError(f"Unable to parse {grp_name} !")
 
-        grid_type = None
         n_pts = 0
         req_fields = ["Grid Type", "Step X", "Step Y", "nColumns", "nRows"]
         for req_field in req_fields:
             if f"{grp_name}/{req_field}" not in fp:
                 raise ValueError(f"Unable to parse {grp_name}/{req_field} !")
 
+        self.tmp[ckey]["dimensionality"] = 2
         grid_type = read_strings_from_dataset(fp[f"{grp_name}/Grid Type"][()])
-        if grid_type not in ["HexGrid", "SqrGrid"]:
-            raise ValueError(f"Grid Type {grid_type} is currently not supported !")
-        self.tmp[ckey]["grid_type"] = grid_type
+        if grid_type == "HexGrid":
+            self.tmp[ckey]["grid_type"] = HEXAGONAL_GRID
+        elif grid_type == "SqrGrid":
+            self.tmp[ckey]["grid_type"] = SQUARE_GRID
+        else:
+            raise ValueError(f"Unable to parse {grp_name}/Grid Type !")
         self.tmp[ckey]["s_x"] = read_first_scalar(fp[f"{grp_name}/Step X"])
         self.tmp[ckey]["s_unit"] = "um"  # "µm"  # TODO::always micron?
         self.tmp[ckey]["n_x"] = read_first_scalar(fp[f"{grp_name}/nColumns"])
@@ -248,17 +251,17 @@ def parse_and_normalize_group_ebsd_data(self, fp, ckey: str):
         # as the step size has already been accounted for by the tech partner when writing!
         if self.version["schema_version"] in ["OIM Analysis 8.5.1002 x64 [07-17-20]"]:
             print(f"{self.version['schema_version']}, tech partner accounted for calibration")
+            if self.tmp[ckey]["grid_type"] != SQUARE_GRID:
+                print(f"WARNING: Check carefully correct interpretation of scan_point coords!")
             self.tmp[ckey]["scan_point_x"] \
                 = np.asarray(fp[f"{grp_name}/X Position"][:], np.float32)
             self.tmp[ckey]["scan_point_y"] \
                 = np.asarray(fp[f"{grp_name}/Y Position"][:], np.float32)
         else:
             print(f"{self.version['schema_version']}, parser has to do the calibration")
+            if self.tmp[ckey]["grid_type"] != SQUARE_GRID:
+                print(f"WARNING: Check carefully correct interpretation of scan_point coords!")
             self.tmp[ckey]["scan_point_x"] = np.asarray(
                     fp[f"{grp_name}/X Position"][:] * self.tmp[ckey]["s_x"], np.float32)
             self.tmp[ckey]["scan_point_y"] = np.asarray(
                     fp[f"{grp_name}/Y Position"][:] * self.tmp[ckey]["s_y"], np.float32)
-        print(f"xmin {np.min(self.tmp[ckey]['scan_point_x'])}," \
-              f"xmax {np.max(self.tmp[ckey]['scan_point_x'])}," \
-              f"ymin {np.min(self.tmp[ckey]['scan_point_y'])}," \
-              f"ymax {np.max(self.tmp[ckey]['scan_point_y'])}")
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_oxford.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_oxford.py
index e05d7f4d6..2f6d6d3d7 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_oxford.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_oxford.py
@@ -38,6 +38,8 @@
 from pynxtools.dataconverter.readers.em.subparsers.hfive_base import HdfFiveBaseParser
 from pynxtools.dataconverter.readers.em.utils.hfive_utils import \
     read_strings_from_dataset, format_euler_parameterization
+from pynxtools.dataconverter.readers.em.examples.ebsd_database import \
+    HEXAGONAL_GRID, SQUARE_GRID
 
 
 class HdfFiveOxfordReader(HdfFiveBaseParser):
@@ -118,6 +120,10 @@ def parse_and_normalize_slice_ebsd_header(self, fp, ckey: str):
         if f"{grp_name}" not in fp:
             raise ValueError(f"Unable to parse {grp_name} !")
 
+        # TODO::check if Oxford always uses SquareGrid like assumed here
+        self.tmp[ckey]["dimensionality"] = 2
+        self.tmp[ckey]["grid_type"] = SQUARE_GRID
+
         req_fields = ["X Cells", "Y Cells", "X Step", "Y Step"]
         for req_field in req_fields:
             if f"{grp_name}/{req_field}" not in fp:
@@ -231,6 +237,8 @@ def parse_and_normalize_slice_ebsd_data(self, fp, ckey: str):
         # expected is order on x is first all possible x values while y == 0
         # followed by as many copies of this linear sequence for each y increment
         # no action needed Oxford reports already the pixel coordinate multiplied by step
+        if self.tmp[ckey]["grid_type"] != SQUARE_GRID:
+            print(f"WARNING: Check carefully correct interpretation of scan_point coords!")
         # X, no, H5T_NATIVE_FLOAT, (size, 1), X position of each pixel in micrometers (origin: top left corner)
         self.tmp[ckey]["scan_point_x"] = np.asarray(fp[f"{grp_name}/X"], np.float32)
         # inconsistency f32 in file although specification states float
diff --git a/pynxtools/dataconverter/readers/em/subparsers/nxs_pyxem.py b/pynxtools/dataconverter/readers/em/subparsers/nxs_pyxem.py
index 1709cd865..d9139e59a 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/nxs_pyxem.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/nxs_pyxem.py
@@ -48,7 +48,6 @@
 from orix.quaternion import Rotation
 from orix.quaternion.symmetry import get_point_group
 from orix.vector import Vector3d
-from scipy.spatial import KDTree
 
 import matplotlib.pyplot as plt
 
@@ -58,13 +57,16 @@
 from pynxtools.dataconverter.readers.em.utils.hfive_web_utils \
     import hfive_web_decorate_nxdata
 from pynxtools.dataconverter.readers.em.utils.image_processing import thumbnail
+from pynxtools.dataconverter.readers.em.utils.get_sqr_grid import \
+    get_scan_points_with_mark_data_discretized_on_sqr_grid
+from pynxtools.dataconverter.readers.em.utils.get_scan_points import \
+    get_scan_point_axis_values, get_scan_point_coords
 
 PROJECTION_VECTORS = [Vector3d.xvector(), Vector3d.yvector(), Vector3d.zvector()]
 PROJECTION_DIRECTIONS = [("X", Vector3d.xvector().data.flatten()),
                          ("Y", Vector3d.yvector().data.flatten()),
                          ("Z", Vector3d.zvector().data.flatten())]
 
-
 from pynxtools.dataconverter.readers.em.subparsers.hfive_oxford import HdfFiveOxfordReader
 from pynxtools.dataconverter.readers.em.subparsers.hfive_bruker import HdfFiveBrukerEspritReader
 from pynxtools.dataconverter.readers.em.subparsers.hfive_edax import HdfFiveEdaxOimAnalysisReader
@@ -114,17 +116,33 @@ def parse(self, template: dict) -> dict:
         # copying over all data and content within tech partner files into NeXus makes
         # not much sense as the data exists and we would like to motivate that
         # tech partners and community members write NeXus content directly
-        # therefore currently in this example we carry over the EBSD map and some
-        # metadata to motivate that there is indeed value wrt to interoperability
-        # when such data are harmonized exactly this is the point we would like to
-        # make with this example for NeXus and NOMAD OASIS within the FAIRmat project
+        # therefore, in this example we carry over the EBSD map and some metadata
+        # to motivate that there is indeed value wrt to interoperability when such data
+        # are harmonized upon injection in the RDMS - exactly this is the point
+        # we would like to make with this comprehensive example of data harmonization
+        # within the field of EBSD as one method in the field of electron diffraction
+        # we use NeXus, NOMAD OASIS within the FAIRmat project
         # it is practically beyond our resources to implement a mapping for all cases
-        # and corner cases of the vendor files
+        # and corner cases of vendor files
         # ideally concept mapping would be applied to just point to pieces of information
-        # in the HDF5 file that is written by the tech partners however because of the
-        # fact that currently these pieces of information are formatted very differently
-        # it is non-trivial to establish this mapping and only because of this we
-        # map over manually
+        # in (HDF5) files based on which semantically understood pieces of information
+        # are then interpreted and injected into the RDMS
+        # currently the fact that the documentation by tech partners is incomplete
+        # and the fact that conceptually similar or even the same concepts as instances
+        # with their pieces of information are formatted very differently, it is
+        # non-trivial to establish this mapping and only because of this we
+        # map over using hardcoding of concept names and symbols
+
+        # a collection of different tech-partner-specific subparser follows
+        # these subparsers already extract specific information and perform a first
+        # step of harmonization. The subparsers specifically store e.g. EBSD maps in a
+        # tmp dictionary, which is
+        # TODO: scan point positions (irrespective on which grid type (sqr, hex) these
+        # were probed, in some cases the grid may have a two large extent along a dim
+        # so that a sub-sampling is performed, here only for the purpose of using
+        # h5web to show the IPF color maps but deal with the fact that h5web has so far
+        # not been designed to deal with images as large as several thousand pixels along
+        # either dimension
         if hfive_parser_type == "oxford":
             oina = HdfFiveOxfordReader(self.file_path)
             oina.parse_and_normalize()
@@ -199,6 +217,8 @@ def process_into_template(self, inp: dict, template: dict) -> dict:
         return template
 
     def get_named_axis(self, inp: dict, dim_name: str):
+        """"Return scaled but not offset-calibrated scan point coordinates along dim."""
+        # TODO::remove!
         return np.asarray(np.linspace(0,
                                       inp[f"n_{dim_name}"] - 1,
                                       num=inp[f"n_{dim_name}"],
@@ -217,38 +237,47 @@ def process_roi_overview_ebsd_based(self,
                                         roi_id: str,
                                         template: dict) -> dict:
         print("Parse ROI default plot...")
+        # tech partner specific subparsers have just extracted the per scan point information
+        # in the sequence they were (which is often how they were scanned)
+        # however that can be a square, a hexagonal or some random grid
+        # a consuming visualization tool (like h5web) may however not be able to
+        # represent the data as point cloud but only visualizes a grid of square pixels
+        # therefore in general the scan_point_x and scan_point_y arrays and their associated
+        # data arrays such as euler should always be interpolated on a specific grid
+        # Here, the square_grid supported by h5web with a specific maximum extent
+        # which may represent a downsampled representation of the actual ROI
+        # only in the case that indeed the grid is a square grid this interpolation is
+        # obsolete but also only when the grid does not exceed the technical limitation
+        # of here h5web
+        # TODO::implement rediscretization using a kdtree take n_x, n_y, and n_z as guides
+
+        trg_grid \
+            = get_scan_points_with_mark_data_discretized_on_sqr_grid(inp,
+                                                                     HFIVE_WEB_MAXIMUM_ROI)
+
         contrast_modes = [(None, "n/a"),
                           ("bc", "normalized_band_contrast"),
                           ("ci", "normalized_confidence_index"),
                           ("mad", "normalized_mean_angular_deviation")]
         contrast_mode = None
         for mode in contrast_modes:
-            if mode[0] in inp.keys() and contrast_mode is None:
+            if mode[0] in trg_grid.keys() and contrast_mode is None:
                 contrast_mode = mode
                 break
         if contrast_mode is None:
             print(f"{__name__} unable to generate plot for entry{self.entry_id}, roi{roi_id} !")
             return template
 
-        is_threed = False
-        if "n_z" in inp.keys():
-            is_threed = True
-            if np.max((inp["n_x"], inp["n_y"], inp["n_z"])) > HFIVE_WEB_MAXIMUM_ROI:
-                raise ValueError(f"Plotting 3D roi_overviews larger than " \
-                                 f"{HFIVE_WEB_MAXIMUM_ROI} is not supported !")
-        else:
-            if np.max((inp["n_x"], inp["n_y"])) > HFIVE_WEB_MAXIMUM_ROI:
-                raise ValueError(f"Plotting 2D roi_overviews larger than " \
-                                 f"{HFIVE_WEB_MAXIMUM_ROI} is not supported !")
-
-        template[f"/ENTRY[entry{self.entry_id}]/ROI[roi{roi_id}]/@NX_class"] = "NXroi"  # TODO::writer should decorate automatically!
-        template[f"/ENTRY[entry{self.entry_id}]/ROI[roi{roi_id}]/ebsd/indexing/@NX_class"] = "NXprocess"  # TODO::writer should decorate automatically!
+        template[f"/ENTRY[entry{self.entry_id}]/ROI[roi{roi_id}]/@NX_class"] = "NXroi"
+        # TODO::writer should decorate automatically!
+        template[f"/ENTRY[entry{self.entry_id}]/ROI[roi{roi_id}]/ebsd/indexing/@NX_class"] = "NXprocess"
+        # TODO::writer should decorate automatically!
         trg = f"/ENTRY[entry{self.entry_id}]/ROI[roi{roi_id}]/ebsd/indexing/DATA[roi]"
         template[f"{trg}/title"] = f"Region-of-interest overview image"
         template[f"{trg}/@NX_class"] = f"NXdata"  # TODO::writer should decorate automatically!
         template[f"{trg}/@signal"] = "data"
         dims = ["x", "y"]
-        if is_threed is True:
+        if trg_grid["dimensionality"] == 3:
             dims.append("z")
         idx = 0
         for dim in dims:
@@ -258,22 +287,22 @@ def process_roi_overview_ebsd_based(self,
         for dim in dims[::-1]:
             template[f"{trg}/@axes"].append(f"axis_{dim}")
 
-        if is_threed is True:
-            template[f"{trg}/data"] = {"compress": np.squeeze(np.asarray(np.asarray((inp[contrast_mode[0]] / np.max(inp[contrast_mode[0]], axis=None) * 255.), np.uint32), np.uint8), axis=3), "strength": 1}
+        if trg_grid["dimensionality"] == 3:
+            template[f"{trg}/data"] = {"compress": np.squeeze(np.asarray(np.asarray((trg_grid[contrast_mode[0]] / np.max(trg_grid[contrast_mode[0]], axis=None) * 255.), np.uint32), np.uint8), axis=3), "strength": 1}
         else:
-            template[f"{trg}/data"] = {"compress": np.reshape(np.asarray(np.asarray((inp[contrast_mode[0]] / np.max(inp[contrast_mode[0]]) * 255.), np.uint32), np.uint8), (inp["n_y"], inp["n_x"]), order="C"), "strength": 1}
+            template[f"{trg}/data"] = {"compress": np.reshape(np.asarray(np.asarray((trg_grid[contrast_mode[0]] / np.max(trg_grid[contrast_mode[0]]) * 255.), np.uint32), np.uint8), (trg_grid["n_y"], trg_grid["n_x"]), order="C"), "strength": 1}
         template[f"{trg}/descriptor"] = contrast_mode[1]
 
         # 0 is y while 1 is x for 2d, 0 is z, 1 is y, while 2 is x for 3d
         template[f"{trg}/data/@long_name"] = f"Signal"
         hfive_web_decorate_nxdata(f"{trg}/data", template)
 
-        scan_unit = inp["s_unit"]
+        scan_unit = trg_grid["s_unit"]
         if scan_unit == "um":
             scan_unit = "µm"
         for dim in dims:
             template[f"{trg}/AXISNAME[axis_{dim}]"] \
-                = {"compress": self.get_named_axis(inp, dim), "strength": 1}
+                = {"compress": self.get_named_axis(trg_grid, dim), "strength": 1}
             template[f"{trg}/AXISNAME[axis_{dim}]/@long_name"] \
                 = f"Coordinate along {dim}-axis ({scan_unit})"
             template[f"{trg}/AXISNAME[axis_{dim}]/@units"] = f"{scan_unit}"
@@ -287,7 +316,7 @@ def process_roi_ebsd_maps(self, inp: dict, template: dict) -> dict:
                     if "n_z" not in inp[ckey].keys():
                         self.prepare_roi_ipfs_phases_twod(inp[ckey], roi_id, template)
                         self.process_roi_ipfs_phases_twod(inp[ckey], roi_id, template)
-                        # self.onthefly_process_roi_ipfs_phases_threed(inp[ckey], roi_id, template)
+                        # self.onthefly_process_roi_ipfs_phases_two(inp[ckey], roi_id, template)
                     else:
                         self.onthefly_process_roi_ipfs_phases_threed(inp[ckey], roi_id, template)
         return template
@@ -612,7 +641,7 @@ def process_roi_phase_ipfs_threed(self, inp: dict, roi_id: int, pyxem_phase_id:
         # TODO: I have not seen any dataset yet where is limit is exhausted, the largest
         # dataset is a 3D SEM/FIB study from a UK project this is likely because to
         # get an EBSD map as large one already scans quite long for one section as making
-        # a ompromise is required and thus such hypothetical large serial-sectioning
+        # a compromise is required and thus such hypothetical large serial-sectioning
         # studies would block the microscope for a very long time
         # however I have seen examples from Hadi Pirgazi with L. Kestens from Leuven
         # where indeed large but thin 3d slabs were characterized
diff --git a/pynxtools/dataconverter/readers/em/utils/get_scan_points.py b/pynxtools/dataconverter/readers/em/utils/get_scan_points.py
new file mode 100644
index 000000000..9bfbe5ab1
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/utils/get_scan_points.py
@@ -0,0 +1,80 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Identify likely scan_point_positions for specific EBSD grid types."""
+
+# pylint: disable=no-member
+
+import numpy as np
+
+from pynxtools.dataconverter.readers.em.examples.ebsd_database import \
+    HEXAGONAL_GRID, SQUARE_GRID
+
+
+def get_scan_point_axis_values(inp: dict, dim_name: str):
+    is_threed = False
+    if "dimensionality" in inp.keys():
+        if inp["dimensionality"] == 3:
+            is_threed = True
+    req_keys = ["grid_type", f"n_{dim_name}", f"s_{dim_name}"]
+    for key in req_keys:
+        if key not in inp.keys():
+            raise ValueError(f"Unable to find required key {key} in inp !")
+
+    if inp["grid_type"] in [HEXAGONAL_GRID, SQUARE_GRID]:
+        return np.asarray(np.linspace(0,
+                                      inp[f"n_{dim_name}"] - 1,
+                                      num=inp[f"n_{dim_name}"],
+                                      endpoint=True) * inp[f"s_{dim_name}"], np.float32)
+    else:
+        return None
+
+
+def get_scan_point_coords(inp: dict) -> dict:
+    """Add scan_point_dim array assuming top-left to bottom-right snake style scanning."""
+    is_threed = False
+    if "dimensionality" in inp.keys():
+        if inp["dimensionality"] == 3:
+            is_threed = True
+
+    req_keys = ["grid_type"]
+    dims = ["x", "y"]
+    if is_threed is True:
+        dims.append("z")
+    for dim in dims:
+        req_keys.append(f"n_{dim}")
+        req_keys.append(f"s_{dim}")
+
+    for key in req_keys:
+        if key not in inp.keys():
+            raise ValueError(f"Unable to find required key {key} in inp !")
+
+    if is_threed is False:
+        if inp["grid_type"] in [SQUARE_GRID, HEXAGONAL_GRID]:
+            # TODO::check that below code is correct as well for hexagonal grid !
+            for dim in dims:
+                if "scan_point_{dim}" in inp.keys():
+                    print("WARNING::Overwriting scan_point_{dim} !")
+            inp["scan_point_x"] = np.tile(
+                np.linspace(0, inp["n_x"] - 1, num=inp["n_x"], endpoint=True) * inp["s_x"], inp["n_y"])
+            inp["scan_point_y"] = np.repeat(
+                np.linspace(0, inp["n_y"] - 1, num=inp["n_y"], endpoint=True) * inp["s_y"], inp["n_x"])
+        else:
+            print("WARNING::{__name__} facing an unknown scan strategy !")
+    else:
+        print("WARNING::{__name__} not implemented for 3D case !")
+    return inp
diff --git a/pynxtools/dataconverter/readers/em/utils/get_sqr_grid.py b/pynxtools/dataconverter/readers/em/utils/get_sqr_grid.py
new file mode 100644
index 000000000..ec9a78eb3
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/utils/get_sqr_grid.py
@@ -0,0 +1,145 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Discretize point cloud in R^d (d=2, 3) with mark data to square/cube voxel grid."""
+
+# pylint: disable=no-member
+
+import numpy as np
+from scipy.spatial import KDTree
+
+from pynxtools.dataconverter.readers.em.examples.ebsd_database import SQUARE_GRID
+
+
+def get_scan_points_with_mark_data_discretized_on_sqr_grid(src_grid: dict,
+                                                           max_edge_length: int) -> dict:
+    """Inspect grid_type, dimensionality, point locations, and mark src_grida, map then."""
+    is_threed = False
+    if "dimensionality" in src_grid.keys():
+        if src_grid["dimensionality"] == 3:
+            is_threed = True
+
+    req_keys = ["grid_type"]
+    dims = ["x", "y"]
+    if is_threed is True:
+        dims.append("z")
+    for dim in dims:
+        req_keys.append(f"scan_point_{dim}")
+        req_keys.append(f"n_{dim}")
+        req_keys.append(f"s_{dim}")
+
+    trg_grid = {}
+    for key in req_keys:
+        if key not in src_grid.keys():
+            raise ValueError(f"Unable to find required key {key} in src_grid !")
+
+    # take discretization of the source grid as a guide for the target_grid
+    # optimization possible if square grid and matching maximum_extent
+
+    max_extent = None
+    if is_threed is False:
+        max_extent = np.max((src_grid["n_x"], src_grid["n_y"]))
+    else:
+        max_extent = np.max((src_grid["n_x"], src_grid["n_y"], src_grid["n_z"]))
+
+    if src_grid["grid_type"] == SQUARE_GRID:
+        if max_extent <= max_edge_length:
+            return src_grid
+        else:
+            max_extent = max_edge_length  # cap the maximum extent
+
+    # all non-square grids or too large square grids will be
+    # discretized onto a regular grid with square or cubic pixel/voxel
+    aabb = []
+    for dim in dims:
+        aabb.append(np.min(src_grid[f"scan_point_{dim}"]))  # - 0.5 * src_grid[f"s_{dim}"]))
+        aabb.append(np.max(src_grid[f"scan_point_{dim}"]))  # + 0.5 * src_grid[f"s_{dim}"]))
+    print(f"{aabb}")
+
+    if is_threed is False:
+        if aabb[1] - aabb[0] >= aabb[3] - aabb[2]:
+            sxy = (aabb[1] - aabb[0]) / max_extent
+            nxy = [max_extent, int(np.ceil((aabb[3] - aabb[2]) / sxy))]
+        else:
+            sxy = (aabb[3] - aabb[2]) / max_extent
+            nxy = [int(np.ceil((aabb[1] - aabb[0]) / sxy)), max_extent]
+        print(f"H5Web default plot generation, scaling nxy0 {[src_grid['n_x'], src_grid['n_y']]}, nxy {nxy}")
+        # the above estimate is not exactly correct (may create a slight real space shift)
+        # of the EBSD map TODO:: regrid the real world axis-aligned bounding box aabb with
+        # a regular tiling of squares or hexagons
+        # https://stackoverflow.com/questions/18982650/differences-between-matlab-and-numpy-and-pythons-round-function
+        # MTex/Matlab round not exactly the same as numpy round but reasonably close
+
+        # scan point positions were normalized by tech partner subparsers such that they
+        # always build on pixel coordinates calibrated for step size not by giving absolute positions
+        # in the sample surface frame of reference as this is typically not yet consistently documented
+        # because we assume in addition that we always start at the top left corner the zeroth/first
+        # coordinate is always 0., 0. !
+        xy = np.column_stack(
+            (np.tile(np.linspace(0, nxy[0] - 1, num=nxy[0], endpoint=True) * sxy, nxy[1]),
+            np.repeat(np.linspace(0, nxy[1] - 1, num=nxy[1], endpoint=True) * sxy, nxy[0])))
+        # TODO:: if scan_point_{dim} are calibrated this approach
+        # here would shift the origin to 0, 0 implicitly which may not be desired
+        print(f"xy {xy}, shape {np.shape(xy)}")
+        tree = KDTree(np.column_stack((src_grid["scan_point_x"], src_grid["scan_point_y"])))
+        d, idx = tree.query(xy, k=1)
+        if np.sum(idx == tree.n) > 0:
+            raise ValueError(f"kdtree query left some query points without a neighbor!")
+        del d
+        del tree
+
+        # rebuild src_grid container with only the relevant src_grida selected from src_grid
+        for key in src_grid.keys():
+            if key == "euler":
+                trg_grid[key] = np.zeros((np.shape(xy)[0], 3), np.float32)
+                trg_grid[key] = np.nan
+                trg_grid[key] = src_grid["euler"][idx, :]
+                if np.isnan(trg_grid[key]).any() is True:
+                    raise ValueError(f"Downsampling of the point cloud left " \
+                                     f"pixels without mark data {key} !")
+                print(f"final np.shape(trg_grid[{key}]) {np.shape(trg_grid[key])}")
+            elif key == "phase_id" or key == "bc":
+                trg_grid[key] = np.zeros((np.shape(xy)[0],), np.int32) - 2
+                # pyxem_id is at least -1, bc is typically positive
+                trg_grid[key] = src_grid[key][idx]
+                if np.sum(trg_grid[key] == -2) > 0:
+                    raise ValueError(f"Downsampling of the point cloud left " \
+                                     f"pixels without mark data {key} !")
+                print(f"final np.shape(trg_grid[{key}]) {np.shape(trg_grid[key])}")
+            elif key == "ci" or key == "mad":
+                trg_grid[key] = np.zeros((np.shape(xy)[0],), np.float32)
+                trg_grid[key] = np.nan
+                trg_grid[key] = src_grid[key][idx]
+                print(f"final np.shape(trg_grid[{key}]) {np.shape(trg_grid[key])}")
+                if np.isnan(trg_grid[key]).any() is True:
+                    raise ValueError(f"Downsampling of the point cloud left " \
+                                     f"pixels without mark data {key} !")
+            elif key not in ["n_x", "n_y", "n_z", "s_x", "s_y", "s_z"]:
+                print(f"WARNING:: src_grid[{key}] is mapped as is on trg_grid[{key}] !")
+                trg_grid[key] = src_grid[key]
+                print(f"final np.shape(trg_grid[{key}]) {np.shape(trg_grid[key])}")
+            else:
+                print(f"WARNING:: src_grid[{key}] is not yet mapped on trg_grid[{key}] !")
+            trg_grid["n_x"] = nxy[0]
+            trg_grid["n_y"] = nxy[1]
+            trg_grid["s_x"] = sxy
+            trg_grid["s_y"] = sxy
+            # TODO::need to update scan_point_{dim}
+        return trg_grid
+    else:
+        raise ValueError(f"The 3D discretization is currently not implemented because " \
+                         f"we do not know of any large enough dataset the test it !")
diff --git a/pynxtools/dataconverter/readers/em/utils/hfive_utils.py b/pynxtools/dataconverter/readers/em/utils/hfive_utils.py
index bf1e7af10..3b2320c4e 100644
--- a/pynxtools/dataconverter/readers/em/utils/hfive_utils.py
+++ b/pynxtools/dataconverter/readers/em/utils/hfive_utils.py
@@ -101,3 +101,8 @@ def read_first_scalar(obj):
 def all_equal(iterable):
     g = groupby(iterable)
     return next(g, True) and not next(g, False)
+
+
+# for dim in ["x", "y"]:
+#     print(f"{dim}min {np.min(self.tmp[ckey][f'scan_point_{dim}'])}")
+#    print(f"{dim}max {np.max(self.tmp[ckey][f'scan_point_{dim}'])}")

From 19dd5549ce0c34aada2c973953dace3d9804130b Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@physik.hu-berlin.de>
Date: Thu, 7 Dec 2023 10:07:50 +0100
Subject: [PATCH 34/84] Fixed bug for 207_2081

---
 .../readers/em/examples/ebsd_database.py      | 11 ++-
 .../readers/em/subparsers/hfive_apex.py       |  5 +-
 .../readers/em/subparsers/nxs_pyxem.py        | 17 +++--
 .../readers/em/utils/get_scan_points.py       | 45 +++++++++---
 .../readers/em/utils/get_sqr_grid.py          | 69 +++++++++++--------
 5 files changed, 100 insertions(+), 47 deletions(-)

diff --git a/pynxtools/dataconverter/readers/em/examples/ebsd_database.py b/pynxtools/dataconverter/readers/em/examples/ebsd_database.py
index 34e79debd..369b3dcc9 100644
--- a/pynxtools/dataconverter/readers/em/examples/ebsd_database.py
+++ b/pynxtools/dataconverter/readers/em/examples/ebsd_database.py
@@ -28,8 +28,15 @@
 # is recoverable when there is no common agreement about the phases used and their
 # exact atomic configuration
 
-HEXAGONAL_GRID = "hexagonal_grid"
-SQUARE_GRID = "square_grid"
+# typical scanning schemes used for EBSD
+# which lattice symmetry
+HEXAGONAL_GRID = "hexagonal_grid"  # typically assuming a tiling with regular hexagons
+SQUARE_GRID = "square_grid"  # a tiling with squares
+REGULAR_TILING = "regular_tiling"
+# most frequently this is the sequence of set scan positions with actual positions
+# based on grid type and spacing based on tiling
+FLIGHT_PLAN = "start_top_left_stack_x_left_to_right_stack_x_line_along_end_bottom_right"
+
 
 
 FreeTextToUniquePhase = {"Actinolite": "Actinolite",
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
index 47c339a91..f45ea009c 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
@@ -37,7 +37,7 @@
 from pynxtools.dataconverter.readers.em.utils.hfive_utils import \
     read_strings_from_dataset
 from pynxtools.dataconverter.readers.em.examples.ebsd_database import \
-    ASSUME_PHASE_NAME_TO_SPACE_GROUP, HEXAGONAL_GRID, SQUARE_GRID
+    ASSUME_PHASE_NAME_TO_SPACE_GROUP, HEXAGONAL_GRID, SQUARE_GRID, REGULAR_TILING, FLIGHT_PLAN
 from pynxtools.dataconverter.readers.em.utils.get_scan_points import \
     get_scan_point_coords
 
@@ -127,6 +127,9 @@ def parse_and_normalize_group_ebsd_header(self, fp, ckey: str):
             self.tmp[ckey]["grid_type"] = SQUARE_GRID
         else:
             raise ValueError(f"Unable to parse {self.prfx}/Sample/Grid Type !")
+        # the next two lines encode the typical assumption that is not reported in tech partner file!
+        self.tmp[ckey]["tiling"] = REGULAR_TILING
+        self.tmp[ckey]["flight_plan"] = FLIGHT_PLAN
         self.tmp[ckey]["s_x"] = fp[f"{self.prfx}/Sample/Step X"][0]
         self.tmp[ckey]["s_unit"] = "um"  # "µm"  # TODO::always micron?
         self.tmp[ckey]["n_x"] = fp[f"{self.prfx}/Sample/Number Of Columns"][0]
diff --git a/pynxtools/dataconverter/readers/em/subparsers/nxs_pyxem.py b/pynxtools/dataconverter/readers/em/subparsers/nxs_pyxem.py
index d9139e59a..2a2cf1cfb 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/nxs_pyxem.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/nxs_pyxem.py
@@ -60,7 +60,7 @@
 from pynxtools.dataconverter.readers.em.utils.get_sqr_grid import \
     get_scan_points_with_mark_data_discretized_on_sqr_grid
 from pynxtools.dataconverter.readers.em.utils.get_scan_points import \
-    get_scan_point_axis_values, get_scan_point_coords
+    get_scan_point_axis_values, get_scan_point_coords, square_grid, hexagonal_grid
 
 PROJECTION_VECTORS = [Vector3d.xvector(), Vector3d.yvector(), Vector3d.zvector()]
 PROJECTION_DIRECTIONS = [("X", Vector3d.xvector().data.flatten()),
@@ -217,12 +217,17 @@ def process_into_template(self, inp: dict, template: dict) -> dict:
         return template
 
     def get_named_axis(self, inp: dict, dim_name: str):
-        """"Return scaled but not offset-calibrated scan point coordinates along dim."""
+        # Return scaled but not offset-calibrated scan point coordinates along dim.
         # TODO::remove!
-        return np.asarray(np.linspace(0,
-                                      inp[f"n_{dim_name}"] - 1,
-                                      num=inp[f"n_{dim_name}"],
-                                      endpoint=True) * inp[f"s_{dim_name}"], np.float32)
+        if square_grid(inp) is True or hexagonal_grid(inp) is True:
+            # TODO::this code does not work for scaled and origin-offset scan point positions!
+            # TODO::below formula is only the same for sqr and hex grid if
+            # s_{dim_name} already accounts for the fact that typically s_y = sqrt(3)/2 s_x !
+            return np.asarray(np.linspace(0,
+                                          inp[f"n_{dim_name}"] - 1,
+                                          num=inp[f"n_{dim_name}"],
+                                          endpoint=True) * inp[f"s_{dim_name}"], np.float32)
+        return None
 
     def process_roi_overview(self, inp: dict, template: dict) -> dict:
         for ckey in inp.keys():
diff --git a/pynxtools/dataconverter/readers/em/utils/get_scan_points.py b/pynxtools/dataconverter/readers/em/utils/get_scan_points.py
index 9bfbe5ab1..117a3777d 100644
--- a/pynxtools/dataconverter/readers/em/utils/get_scan_points.py
+++ b/pynxtools/dataconverter/readers/em/utils/get_scan_points.py
@@ -22,7 +22,7 @@
 import numpy as np
 
 from pynxtools.dataconverter.readers.em.examples.ebsd_database import \
-    HEXAGONAL_GRID, SQUARE_GRID
+    HEXAGONAL_GRID, SQUARE_GRID, REGULAR_TILING, FLIGHT_PLAN
 
 
 def get_scan_point_axis_values(inp: dict, dim_name: str):
@@ -44,14 +44,33 @@ def get_scan_point_axis_values(inp: dict, dim_name: str):
         return None
 
 
-def get_scan_point_coords(inp: dict) -> dict:
-    """Add scan_point_dim array assuming top-left to bottom-right snake style scanning."""
-    is_threed = False
+def threed(inp: dict):
+    """Identify if 3D triboolean."""
     if "dimensionality" in inp.keys():
         if inp["dimensionality"] == 3:
-            is_threed = True
+            return True
+        return False
+    return None
+
+
+def square_grid(inp: dict):
+    """Identify if square grid with specific assumptions."""
+    if inp["grid_type"] == SQUARE_GRID and inp["tiling"] == REGULAR_TILING and inp["flight_plan"] == FLIGHT_PLAN:
+        return True
+    return False
+
+
+def hexagonal_grid(inp: dict):
+    """Identify if square grid with specific assumptions."""
+    if inp["grid_type"] == HEXAGONAL_GRID and inp["tiling"] == REGULAR_TILING and inp["flight_plan"] == FLIGHT_PLAN:
+        return True
+    return False
+
 
-    req_keys = ["grid_type"]
+def get_scan_point_coords(inp: dict) -> dict:
+    """Add scan_point_dim array assuming top-left to bottom-right snake style scanning."""
+    is_threed = threed(inp)
+    req_keys = ["grid_type", "tiling", "flight_plan"]
     dims = ["x", "y"]
     if is_threed is True:
         dims.append("z")
@@ -64,11 +83,21 @@ def get_scan_point_coords(inp: dict) -> dict:
             raise ValueError(f"Unable to find required key {key} in inp !")
 
     if is_threed is False:
-        if inp["grid_type"] in [SQUARE_GRID, HEXAGONAL_GRID]:
-            # TODO::check that below code is correct as well for hexagonal grid !
+        if square_grid(inp) is True:
+            for dim in dims:
+                if "scan_point_{dim}" in inp.keys():
+                    print("WARNING::Overwriting scan_point_{dim} !")
+            inp["scan_point_x"] = np.tile(
+                np.linspace(0, inp["n_x"] - 1, num=inp["n_x"], endpoint=True) * inp["s_x"], inp["n_y"])
+            inp["scan_point_y"] = np.repeat(
+                np.linspace(0, inp["n_y"] - 1, num=inp["n_y"], endpoint=True) * inp["s_y"], inp["n_x"])
+        elif hexagonal_grid(inp) is True:
             for dim in dims:
                 if "scan_point_{dim}" in inp.keys():
                     print("WARNING::Overwriting scan_point_{dim} !")
+            # the following code is only the same as for the sqrgrid because
+            # typically the tech partners already take into account and export scan step
+            # values such that for a hexagonal grid one s_{dim} (typically s_y) is sqrt(3)/2*s_{other_dim} !
             inp["scan_point_x"] = np.tile(
                 np.linspace(0, inp["n_x"] - 1, num=inp["n_x"], endpoint=True) * inp["s_x"], inp["n_y"])
             inp["scan_point_y"] = np.repeat(
diff --git a/pynxtools/dataconverter/readers/em/utils/get_sqr_grid.py b/pynxtools/dataconverter/readers/em/utils/get_sqr_grid.py
index ec9a78eb3..99886c66c 100644
--- a/pynxtools/dataconverter/readers/em/utils/get_sqr_grid.py
+++ b/pynxtools/dataconverter/readers/em/utils/get_sqr_grid.py
@@ -23,17 +23,15 @@
 from scipy.spatial import KDTree
 
 from pynxtools.dataconverter.readers.em.examples.ebsd_database import SQUARE_GRID
+from pynxtools.dataconverter.readers.em.utils.get_scan_points import \
+    threed, square_grid, hexagonal_grid
 
 
 def get_scan_points_with_mark_data_discretized_on_sqr_grid(src_grid: dict,
                                                            max_edge_length: int) -> dict:
     """Inspect grid_type, dimensionality, point locations, and mark src_grida, map then."""
-    is_threed = False
-    if "dimensionality" in src_grid.keys():
-        if src_grid["dimensionality"] == 3:
-            is_threed = True
-
-    req_keys = ["grid_type"]
+    is_threed = threed(src_grid)
+    req_keys = ["grid_type", "tiling", "flight_plan"]
     dims = ["x", "y"]
     if is_threed is True:
         dims.append("z")
@@ -56,28 +54,36 @@ def get_scan_points_with_mark_data_discretized_on_sqr_grid(src_grid: dict,
     else:
         max_extent = np.max((src_grid["n_x"], src_grid["n_y"], src_grid["n_z"]))
 
-    if src_grid["grid_type"] == SQUARE_GRID:
+    if square_grid(src_grid) is True:
         if max_extent <= max_edge_length:
             return src_grid
         else:
-            max_extent = max_edge_length  # cap the maximum extent
+            # too large square grid has to be discretized and capped
+            # cap to the maximum extent to comply with h5web technical constraints
+            max_extent = max_edge_length
+    elif hexagonal_grid(src_grid) is True:
+        if max_extent > max_edge_length:
+            max_extent = max_edge_length
+    else:
+        raise ValueError(f"Facing an unsupported grid type !")
 
     # all non-square grids or too large square grids will be
     # discretized onto a regular grid with square or cubic pixel/voxel
     aabb = []
     for dim in dims:
-        aabb.append(np.min(src_grid[f"scan_point_{dim}"]))  # - 0.5 * src_grid[f"s_{dim}"]))
-        aabb.append(np.max(src_grid[f"scan_point_{dim}"]))  # + 0.5 * src_grid[f"s_{dim}"]))
+        aabb.append(np.min(src_grid[f"scan_point_{dim}"] - 0.5 * src_grid[f"s_{dim}"]))
+        aabb.append(np.max(src_grid[f"scan_point_{dim}"] + 0.5 * src_grid[f"s_{dim}"]))
     print(f"{aabb}")
 
     if is_threed is False:
         if aabb[1] - aabb[0] >= aabb[3] - aabb[2]:
-            sxy = (aabb[1] - aabb[0]) / max_extent
-            nxy = [max_extent, int(np.ceil((aabb[3] - aabb[2]) / sxy))]
+            trg_sxy = (aabb[1] - aabb[0]) / max_extent
+            trg_nxy = [max_extent, int(np.ceil((aabb[3] - aabb[2]) / trg_sxy))]
         else:
-            sxy = (aabb[3] - aabb[2]) / max_extent
-            nxy = [int(np.ceil((aabb[1] - aabb[0]) / sxy)), max_extent]
-        print(f"H5Web default plot generation, scaling nxy0 {[src_grid['n_x'], src_grid['n_y']]}, nxy {nxy}")
+            trg_sxy = (aabb[3] - aabb[2]) / max_extent
+            trg_nxy = [int(np.ceil((aabb[1] - aabb[0]) / trg_sxy)), max_extent]
+        print(f"H5Web default plot generation, scaling src_nxy " \
+              f"{[src_grid['n_x'], src_grid['n_y']]}, trg_nxy {trg_nxy}")
         # the above estimate is not exactly correct (may create a slight real space shift)
         # of the EBSD map TODO:: regrid the real world axis-aligned bounding box aabb with
         # a regular tiling of squares or hexagons
@@ -89,14 +95,13 @@ def get_scan_points_with_mark_data_discretized_on_sqr_grid(src_grid: dict,
         # in the sample surface frame of reference as this is typically not yet consistently documented
         # because we assume in addition that we always start at the top left corner the zeroth/first
         # coordinate is always 0., 0. !
-        xy = np.column_stack(
-            (np.tile(np.linspace(0, nxy[0] - 1, num=nxy[0], endpoint=True) * sxy, nxy[1]),
-            np.repeat(np.linspace(0, nxy[1] - 1, num=nxy[1], endpoint=True) * sxy, nxy[0])))
+        trg_xy = np.column_stack((np.tile(np.linspace(0, trg_nxy[0] - 1, num=trg_nxy[0], endpoint=True) * trg_sxy, trg_nxy[1]),
+                                  np.repeat(np.linspace(0, trg_nxy[1] - 1, num=trg_nxy[1], endpoint=True) * trg_sxy, trg_nxy[0])))
         # TODO:: if scan_point_{dim} are calibrated this approach
         # here would shift the origin to 0, 0 implicitly which may not be desired
-        print(f"xy {xy}, shape {np.shape(xy)}")
+        print(f"trg_xy {trg_xy}, shape {np.shape(trg_xy)}")
         tree = KDTree(np.column_stack((src_grid["scan_point_x"], src_grid["scan_point_y"])))
-        d, idx = tree.query(xy, k=1)
+        d, idx = tree.query(trg_xy, k=1)
         if np.sum(idx == tree.n) > 0:
             raise ValueError(f"kdtree query left some query points without a neighbor!")
         del d
@@ -105,7 +110,7 @@ def get_scan_points_with_mark_data_discretized_on_sqr_grid(src_grid: dict,
         # rebuild src_grid container with only the relevant src_grida selected from src_grid
         for key in src_grid.keys():
             if key == "euler":
-                trg_grid[key] = np.zeros((np.shape(xy)[0], 3), np.float32)
+                trg_grid[key] = np.zeros((np.shape(trg_xy)[0], 3), np.float32)
                 trg_grid[key] = np.nan
                 trg_grid[key] = src_grid["euler"][idx, :]
                 if np.isnan(trg_grid[key]).any() is True:
@@ -113,7 +118,7 @@ def get_scan_points_with_mark_data_discretized_on_sqr_grid(src_grid: dict,
                                      f"pixels without mark data {key} !")
                 print(f"final np.shape(trg_grid[{key}]) {np.shape(trg_grid[key])}")
             elif key == "phase_id" or key == "bc":
-                trg_grid[key] = np.zeros((np.shape(xy)[0],), np.int32) - 2
+                trg_grid[key] = np.zeros((np.shape(trg_xy)[0],), np.int32) - 2
                 # pyxem_id is at least -1, bc is typically positive
                 trg_grid[key] = src_grid[key][idx]
                 if np.sum(trg_grid[key] == -2) > 0:
@@ -121,23 +126,27 @@ def get_scan_points_with_mark_data_discretized_on_sqr_grid(src_grid: dict,
                                      f"pixels without mark data {key} !")
                 print(f"final np.shape(trg_grid[{key}]) {np.shape(trg_grid[key])}")
             elif key == "ci" or key == "mad":
-                trg_grid[key] = np.zeros((np.shape(xy)[0],), np.float32)
+                trg_grid[key] = np.zeros((np.shape(trg_xy)[0],), np.float32)
                 trg_grid[key] = np.nan
                 trg_grid[key] = src_grid[key][idx]
                 print(f"final np.shape(trg_grid[{key}]) {np.shape(trg_grid[key])}")
                 if np.isnan(trg_grid[key]).any() is True:
                     raise ValueError(f"Downsampling of the point cloud left " \
                                      f"pixels without mark data {key} !")
-            elif key not in ["n_x", "n_y", "n_z", "s_x", "s_y", "s_z"]:
-                print(f"WARNING:: src_grid[{key}] is mapped as is on trg_grid[{key}] !")
+            elif key not in ["n_x", "n_y", "n_z",
+                             "s_x", "s_y", "s_z",
+                             "scan_point_x", "scan_point_y", "scan_point_z"]:
                 trg_grid[key] = src_grid[key]
-                print(f"final np.shape(trg_grid[{key}]) {np.shape(trg_grid[key])}")
+            #     print(f"WARNING:: src_grid[{key}] is mapped as is on trg_grid[{key}] !")
+            #     print(f"final np.shape(trg_grid[{key}]) {np.shape(trg_grid[key])}")
             else:
                 print(f"WARNING:: src_grid[{key}] is not yet mapped on trg_grid[{key}] !")
-            trg_grid["n_x"] = nxy[0]
-            trg_grid["n_y"] = nxy[1]
-            trg_grid["s_x"] = sxy
-            trg_grid["s_y"] = sxy
+            trg_grid["n_x"] = trg_nxy[0]
+            trg_grid["n_y"] = trg_nxy[1]
+            trg_grid["s_x"] = trg_sxy
+            trg_grid["s_y"] = trg_sxy
+            trg_grid["scan_point_x"] = trg_xy[0]
+            trg_grid["scan_point_y"] = trg_xy[1]
             # TODO::need to update scan_point_{dim}
         return trg_grid
     else:

From 23c28d433c6e27899c4cbccd1bfa281810ef23f7 Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@physik.hu-berlin.de>
Date: Thu, 7 Dec 2023 11:08:52 +0100
Subject: [PATCH 35/84] Implemented xmap-free twod IPF rendering function
 tested successfully on 207_2081

---
 .../readers/em/subparsers/hfive_apex.py       |   5 +-
 .../readers/em/subparsers/nxs_pyxem.py        | 483 ++++++++++++------
 2 files changed, 324 insertions(+), 164 deletions(-)

diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
index f45ea009c..886eb787a 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
@@ -222,8 +222,9 @@ def parse_and_normalize_group_ebsd_data(self, fp, ckey: str):
             oris = Orientation.from_matrix([np.reshape(dat[i][0], (3, 3))])
             self.tmp[ckey]["euler"][i, :] = oris.to_euler(degrees=False)
             self.tmp[ckey]["ci"][i] = dat[i][2]
-            self.tmp[ckey]["phase_id"][i] = dat[i][3] + 1  # APEX seems to define
-            # notIndexed as -1 and the first valid phase id 0
+            self.tmp[ckey]["phase_id"][i] = dat[i][3] + 1  # adding +1 because
+            # EDAX/APEX seems to define notIndexed as -1 and the first valid phase_id is then 0
+            # for NXem however we assume that notIndexed is 0 and the first valid_phase_id is 1
         if np.isnan(self.tmp[ckey]["euler"]).any():
             raise ValueError(f"Conversion of om2eu unexpectedly resulted in NaN !")
         # TODO::convert orientation matrix to Euler angles via om_eu but what are conventions !
diff --git a/pynxtools/dataconverter/readers/em/subparsers/nxs_pyxem.py b/pynxtools/dataconverter/readers/em/subparsers/nxs_pyxem.py
index 2a2cf1cfb..9ae711ccf 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/nxs_pyxem.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/nxs_pyxem.py
@@ -60,7 +60,7 @@
 from pynxtools.dataconverter.readers.em.utils.get_sqr_grid import \
     get_scan_points_with_mark_data_discretized_on_sqr_grid
 from pynxtools.dataconverter.readers.em.utils.get_scan_points import \
-    get_scan_point_axis_values, get_scan_point_coords, square_grid, hexagonal_grid
+    get_scan_point_axis_values, get_scan_point_coords, square_grid, hexagonal_grid, threed
 
 PROJECTION_VECTORS = [Vector3d.xvector(), Vector3d.yvector(), Vector3d.zvector()]
 PROJECTION_DIRECTIONS = [("X", Vector3d.xvector().data.flatten()),
@@ -318,16 +318,324 @@ def process_roi_ebsd_maps(self, inp: dict, template: dict) -> dict:
             if ckey.startswith("ebsd") and inp[ckey] != {}:
                 if ckey.replace("ebsd", "").isdigit():
                     roi_id = int(ckey.replace("ebsd", ""))
-                    if "n_z" not in inp[ckey].keys():
-                        self.prepare_roi_ipfs_phases_twod(inp[ckey], roi_id, template)
-                        self.process_roi_ipfs_phases_twod(inp[ckey], roi_id, template)
-                        # self.onthefly_process_roi_ipfs_phases_two(inp[ckey], roi_id, template)
+                    if threed(inp[ckey]) is False:
+                        self.onthefly_process_roi_ipfs_phases_twod(inp[ckey], roi_id, template)
                     else:
                         self.onthefly_process_roi_ipfs_phases_threed(inp[ckey], roi_id, template)
         return template
 
+    def onthefly_process_roi_ipfs_phases_twod(self,
+                                              inp: dict,
+                                              roi_id: int,
+                                              template: dict) -> dict:
+        print("Parse crystal_structure_models aka phases (no xmap) 2D version...")
+        nxem_phase_id = 0
+        prfx = f"/ENTRY[entry{self.entry_id}]/ROI[roi{roi_id}]/ebsd/indexing"
+        # bookkeeping is always reported for the original grid
+        # because the eventual discretization for h5web is solely
+        # for the purpose of showing users a readily consumable default plot
+        # to judge for each possible dataset in the same way if the
+        # dataset is worthwhile and potentially valuable for ones on research
+        n_pts = inp["n_x"] * inp["n_y"]
+        n_pts_indexed = np.sum(inp["phase_id"] != 0)
+        print(f"n_pts {n_pts}, n_pts_indexed {n_pts_indexed}")
+        template[f"{prfx}/number_of_scan_points"] = np.uint32(n_pts)
+        template[f"{prfx}/indexing_rate"] = np.float64(100. * n_pts_indexed / n_pts)
+        template[f"{prfx}/indexing_rate/@units"] = f"%"
+        grp_name = f"{prfx}/EM_EBSD_CRYSTAL_STRUCTURE_MODEL[phase{nxem_phase_id}]"
+        template[f"{grp_name}/number_of_scan_points"] \
+            = np.uint32(np.sum(inp["phase_id"] == 0))
+        template[f"{grp_name}/phase_identifier"] = np.uint32(nxem_phase_id)
+        template[f"{grp_name}/phase_name"] = f"notIndexed"
+
+        print(f"----unique inp phase_id--->{np.unique(inp['phase_id'])}")
+        for nxem_phase_id in np.arange(1, np.max(np.unique(inp["phase_id"])) + 1):
+            # starting here at ID 1 because the subpasrsers have already normalized the
+            # tech partner specific phase_id convention to follow NXem NeXus convention
+            print(f"inp[phases].keys(): {inp['phases'].keys()}")
+            if nxem_phase_id not in inp["phases"].keys():
+                raise ValueError(f"{nxem_phase_id} is not a key in inp['phases'] !")
+            trg = f"{prfx}/EM_EBSD_CRYSTAL_STRUCTURE_MODEL[phase{nxem_phase_id}]"
+            template[f"{trg}/number_of_scan_points"] \
+                = np.uint32(np.sum(inp["phase_id"] == nxem_phase_id))
+            template[f"{trg}/phase_identifier"] = np.uint32(nxem_phase_id)
+            template[f"{trg}/phase_name"] \
+                = f"{inp['phases'][nxem_phase_id]['phase_name']}"
+            # internally the following function may discretize a coarser IPF
+            # if the input grid inp is too large for h5web to display
+            # this remove fine details in the EBSD maps but keep in mind
+            # that the purpose of the default plot is to guide the user
+            # of the potential usefulness of the dataset when searching in
+            # a RDMS like NOMAD OASIS, the purpose is NOT to take the coarse-grained
+            # discretization and use this for scientific data analysis!
+            self.process_roi_phase_ipfs_twod(inp,
+                                             roi_id,
+                                             nxem_phase_id,
+                                             inp["phases"][nxem_phase_id]["phase_name"],
+                                             inp["phases"][nxem_phase_id]["space_group"],
+                                             template)
+        return template
+    
+    def process_roi_phase_ipfs_twod(self,
+                                    inp: dict,
+                                    roi_id: int,
+                                    nxem_phase_id: int,
+                                    phase_name: str,
+                                    space_group: int,
+                                    template: dict) -> dict:
+        print(f"Generate 2D IPF maps for {nxem_phase_id}, {phase_name}...")
+        trg_grid \
+            = get_scan_points_with_mark_data_discretized_on_sqr_grid(inp, HFIVE_WEB_MAXIMUM_RGB)
+        
+        rotations = Rotation.from_euler(
+            euler=trg_grid["euler"][trg_grid["phase_id"] == nxem_phase_id],
+            direction='lab2crystal', degrees=False)
+        print(f"shape rotations -----> {np.shape(rotations)}")
+
+        for idx in np.arange(0, len(PROJECTION_VECTORS)):
+            point_group = get_point_group(space_group, proper=False)
+            ipf_key = plot.IPFColorKeyTSL(
+                point_group.laue, direction=PROJECTION_VECTORS[idx])
+            img = get_ipfdir_legend(ipf_key)
+
+            rgb_px_with_phase_id = np.asarray(np.asarray(
+                ipf_key.orientation2color(rotations) * 255., np.uint32), np.uint8)
+            print(f"shape rgb_px_with_phase_id -----> {np.shape(rgb_px_with_phase_id)}")
+
+            ipf_rgb_map = np.asarray(np.asarray(
+                np.zeros((trg_grid["n_y"] * trg_grid["n_x"], 3)) * 255., np.uint32), np.uint8)
+            # background is black instead of white (which would be more pleasing)
+            # but IPF color maps have a whitepoint which encodes in fact an orientation
+            # and because of that we may have a map from a single crystal characterization
+            # whose orientation could be close to the whitepoint which becomes a fully white
+            # seemingly "empty" image, therefore we use black as empty, i.e. white reports an
+            # orientation
+            ipf_rgb_map[trg_grid["phase_id"] == nxem_phase_id, :] = rgb_px_with_phase_id
+            ipf_rgb_map = np.reshape(ipf_rgb_map, (trg_grid["n_y"], trg_grid["n_x"], 3), order="C")
+            # 0 is y, 1 is x, only valid for REGULAR_TILING and FLIGHT_PLAN !
+
+            trg = f"/ENTRY[entry{self.entry_id}]/ROI[roi{roi_id}]/ebsd/indexing" \
+                  f"/EM_EBSD_CRYSTAL_STRUCTURE_MODEL[phase{nxem_phase_id}]" \
+                  f"/MS_IPF[ipf{idx + 1}]"
+            template[f"{trg}/projection_direction"] \
+                = np.asarray(PROJECTION_VECTORS[idx].data.flatten(), np.float32)
+
+            # add the IPF color map
+            mpp = f"{trg}/DATA[map]"
+            template[f"{mpp}/title"] \
+                = f"Inverse pole figure {PROJECTION_DIRECTIONS[idx][0]} {phase_name}"
+            template[f"{mpp}/@NX_class"] = f"NXdata"  # TODO::writer should decorate automatically!
+            template[f"{mpp}/@signal"] = "data"
+            dims = ["x", "y"]
+            template[f"{mpp}/@axes"] = []
+            for dim in dims[::-1]:
+                template[f"{mpp}/@axes"].append(f"axis_{dim}")
+            enum = 0
+            for dim in dims:
+                template[f"{mpp}/@AXISNAME_indices[axis_{dim}_indices]"] = np.uint32(enum)
+                enum += 1
+            template[f"{mpp}/DATA[data]"] = {"compress": ipf_rgb_map, "strength": 1}
+            hfive_web_decorate_nxdata(f"{mpp}/DATA[data]", template)
+
+            scan_unit = trg_grid["s_unit"]  # TODO::this is not necessarily correct
+            # could be a scale-invariant synthetic microstructure whose simulation
+            # would work on multiple length-scales as atoms are not resolved directly!
+            if scan_unit == "um":
+                scan_unit = "µm"
+            for dim in dims:
+                template[f"{mpp}/AXISNAME[axis_{dim}]"] \
+                    = {"compress": self.get_named_axis(trg_grid, f"{dim}"), "strength": 1}
+                template[f"{mpp}/AXISNAME[axis_{dim}]/@long_name"] \
+                    = f"Coordinate along {dim}-axis ({scan_unit})"
+                template[f"{mpp}/AXISNAME[axis_{dim}]/@units"] = f"{scan_unit}"
+
+            # add the IPF color map legend/key
+            lgd = f"{trg}/DATA[legend]"
+            template[f"{lgd}/title"] \
+                = f"Inverse pole figure {PROJECTION_DIRECTIONS[idx][0]} {phase_name}"
+            # template[f"{trg}/title"] = f"Inverse pole figure color key with SST"
+            template[f"{lgd}/@NX_class"] = f"NXdata"  # TODO::writer should decorate automatically!
+            template[f"{lgd}/@signal"] = "data"
+            template[f"{lgd}/@axes"] = []
+            dims = ["x", "y"]
+            for dim in dims[::-1]:
+                template[f"{lgd}/@axes"].append(f"axis_{dim}")
+            enum = 0
+            for dim in dims:
+                template[f"{lgd}/@AXISNAME_indices[axis_{dim}_indices]"] = np.uint32(enum)
+                enum += 1
+            template[f"{lgd}/data"] = {"compress": img, "strength": 1}
+            hfive_web_decorate_nxdata(f"{lgd}/data", template)
+
+            dims = [("x", 1), ("y", 0)]
+            for dim in dims:
+                template[f"{lgd}/AXISNAME[axis_{dim[0]}]"] \
+                    = {"compress": np.asarray(np.linspace(0,
+                                                          np.shape(img)[dim[1]] - 1,
+                                                          num=np.shape(img)[dim[1]],
+                                                          endpoint=True), np.uint32),
+                       "strength": 1}
+                template[f"{lgd}/AXISNAME[axis_{dim[0]}]/@long_name"] \
+                    = f"Pixel along {dim[0]}-axis"
+                template[f"{lgd}/AXISNAME[axis_{dim[0]}]/@units"] = "px"
+        return template
+
+    def onthefly_process_roi_ipfs_phases_threed(self,
+                                                inp: dict,
+                                                roi_id: int,
+                                                template: dict) -> dict:
+        # this function is almost the same as its twod version we keep it for
+        # now an own function until the rediscretization also works for the 3D grid
+        print("Parse crystal_structure_models aka phases (no xmap) 3D version...")
+        # see comments in twod version of this function
+        nxem_phase_id = 0
+        prfx = f"/ENTRY[entry{self.entry_id}]/ROI[roi{roi_id}]/ebsd/indexing"
+        n_pts = inp["n_x"] * inp["n_y"] * inp["n_z"]
+        n_pts_indexed = np.sum(inp["phase_id"] != 0)
+        print(f"n_pts {n_pts}, n_pts_indexed {n_pts_indexed}")
+        template[f"{prfx}/number_of_scan_points"] = np.uint32(n_pts)
+        template[f"{prfx}/indexing_rate"] = np.float64(100. * n_pts_indexed / n_pts)
+        template[f"{prfx}/indexing_rate/@units"] = f"%"
+        grp_name = f"{prfx}/EM_EBSD_CRYSTAL_STRUCTURE_MODEL[phase{nxem_phase_id}]"
+        template[f"{grp_name}/number_of_scan_points"] \
+            = np.uint32(np.sum(inp["phase_id"] == 0))
+        template[f"{grp_name}/phase_identifier"] = np.uint32(nxem_phase_id)
+        template[f"{grp_name}/phase_name"] = f"notIndexed"
+
+        print(f"----unique inp phase_id--->{np.unique(inp['phase_id'])}")
+        for nxem_phase_id in np.arange(1, np.max(np.unique(inp["phase_id"])) + 1):
+            print(f"inp[phases].keys(): {inp['phases'].keys()}")
+            if nxem_phase_id not in inp["phases"].keys():
+                raise ValueError(f"{nxem_phase_id} is not a key in inp['phases'] !")
+            trg = f"{prfx}/EM_EBSD_CRYSTAL_STRUCTURE_MODEL[phase{nxem_phase_id}]"
+            template[f"{trg}/number_of_scan_points"] \
+                = np.uint32(np.sum(inp["phase_id"] == nxem_phase_id))
+            template[f"{trg}/phase_identifier"] = np.uint32(nxem_phase_id)
+            template[f"{trg}/phase_name"] \
+                = f"{inp['phases'][nxem_phase_id]['phase_name']}"
+
+            self.process_roi_phase_ipfs_threed(inp,
+                                               roi_id,
+                                               nxem_phase_id,
+                                               inp["phases"][nxem_phase_id]["phase_name"],
+                                               inp["phases"][nxem_phase_id]["space_group"],
+                                               template)
+        return template
+
+    def process_roi_phase_ipfs_threed(self,
+                                      inp: dict,
+                                      roi_id: int,
+                                      nxem_phase_id: int,
+                                      phase_name: str,
+                                      space_group: int,
+                                      template: dict) -> dict:
+        """Generate inverse pole figures (IPF) for 3D mappings for specific phase."""
+        # equivalent to the case in twod, one needs at if required regridding/downsampling
+        # code here when any of the ROI's number of pixels along an edge > HFIVE_WEB_MAXIMUM_RGB
+        # TODO: I have not seen any dataset yet where is limit is exhausted, the largest
+        # dataset is a 3D SEM/FIB study from a UK project this is likely because to
+        # get an EBSD map as large one already scans quite long for one section as making
+        # a compromise is required and thus such hypothetical large serial-sectioning
+        # studies would block the microscope for a very long time
+        # however I have seen examples from Hadi Pirgazi with L. Kestens from Leuven
+        # where indeed large but thin 3d slabs were characterized
+        print(f"Generate 3D IPF map for {nxem_phase_id}, {phase_name}...")
+        rotations = Rotation.from_euler(
+            euler=inp["euler"][inp["phase_id"] == nxem_phase_id],
+            direction='lab2crystal', degrees=False)
+        print(f"shape rotations -----> {np.shape(rotations)}")
+
+        for idx in np.arange(0, len(PROJECTION_VECTORS)):
+            point_group = get_point_group(space_group, proper=False)
+            ipf_key = plot.IPFColorKeyTSL(
+                point_group.laue, direction=PROJECTION_VECTORS[idx])
+            img = get_ipfdir_legend(ipf_key)
+
+            rgb_px_with_phase_id = np.asarray(np.asarray(
+                ipf_key.orientation2color(rotations) * 255., np.uint32), np.uint8)
+            print(f"shape rgb_px_with_phase_id -----> {np.shape(rgb_px_with_phase_id)}")
+
+            ipf_rgb_map = np.asarray(np.asarray(
+                np.zeros((inp["n_z"] * inp["n_y"] * inp["n_x"], 3)) * 255., np.uint32), np.uint8)
+            # background is black instead of white (which would be more pleasing)
+            # but IPF color maps have a whitepoint which encodes in fact an orientation
+            # and because of that we may have a single crystal with an orientation
+            # close to the whitepoint which become a fully white seemingly "empty" image
+            ipf_rgb_map[inp["phase_id"] == nxem_phase_id, :] = rgb_px_with_phase_id
+            ipf_rgb_map = np.reshape(
+                ipf_rgb_map, (inp["n_z"], inp["n_y"], inp["n_x"], 3), order="C")
+            # 0 is z, 1 is y, while 2 is x !
+
+            trg = f"/ENTRY[entry{self.entry_id}]/ROI[roi{roi_id}]/ebsd/indexing" \
+                  f"/EM_EBSD_CRYSTAL_STRUCTURE_MODEL[phase{nxem_phase_id}]" \
+                  f"/MS_IPF[ipf{idx + 1}]"
+            template[f"{trg}/projection_direction"] \
+                = np.asarray(PROJECTION_VECTORS[idx].data.flatten(), np.float32)
+
+            # add the IPF color map
+            mpp = f"{trg}/DATA[map]"
+            template[f"{mpp}/title"] \
+                = f"Inverse pole figure {PROJECTION_DIRECTIONS[idx][0]} {phase_name}"
+            template[f"{mpp}/@NX_class"] = f"NXdata"  # TODO::writer should decorate automatically!
+            template[f"{mpp}/@signal"] = "data"
+            dims = ["x", "y", "z"]
+            template[f"{mpp}/@axes"] = []
+            for dim in dims[::-1]:
+                template[f"{mpp}/@axes"].append(f"axis_{dim}")
+            enum = 0
+            for dim in dims:
+                template[f"{mpp}/@AXISNAME_indices[axis_{dim}_indices]"] = np.uint32(enum)
+                enum += 1
+            template[f"{mpp}/DATA[data]"] = {"compress": ipf_rgb_map, "strength": 1}
+            hfive_web_decorate_nxdata(f"{mpp}/DATA[data]", template)
+
+            scan_unit = inp["s_unit"]  # TODO::this is not necessarily correct
+            # could be a scale-invariant synthetic microstructure whose simulation
+            # would work on multiple length-scales as atoms are not resolved directly!
+            if scan_unit == "um":
+                scan_unit = "µm"
+            for dim in dims:
+                template[f"{mpp}/AXISNAME[axis_{dim}]"] \
+                    = {"compress": self.get_named_axis(inp, f"{dim}"), "strength": 1}
+                template[f"{mpp}/AXISNAME[axis_{dim}]/@long_name"] \
+                    = f"Coordinate along {dim}-axis ({scan_unit})"
+                template[f"{mpp}/AXISNAME[axis_{dim}]/@units"] = f"{scan_unit}"
+
+            # add the IPF color map legend/key
+            lgd = f"{trg}/DATA[legend]"
+            template[f"{lgd}/title"] \
+                = f"Inverse pole figure {PROJECTION_DIRECTIONS[idx][0]} {phase_name}"
+            # template[f"{trg}/title"] = f"Inverse pole figure color key with SST"
+            template[f"{lgd}/@NX_class"] = f"NXdata"  # TODO::writer should decorate automatically!
+            template[f"{lgd}/@signal"] = "data"
+            template[f"{lgd}/@axes"] = []
+            dims = ["x", "y"]
+            for dim in dims[::-1]:
+                template[f"{lgd}/@axes"].append(f"axis_{dim}")
+            enum = 0
+            for dim in dims:
+                template[f"{lgd}/@AXISNAME_indices[axis_{dim}_indices]"] = np.uint32(enum)
+                enum += 1
+            template[f"{lgd}/data"] = {"compress": img, "strength": 1}
+            hfive_web_decorate_nxdata(f"{lgd}/data", template)
+
+            dims = [("x", 1), ("y", 0)]
+            for dim in dims:
+                template[f"{lgd}/AXISNAME[axis_{dim[0]}]"] \
+                    = {"compress": np.asarray(np.linspace(0,
+                                                          np.shape(img)[dim[1]] - 1,
+                                                          num=np.shape(img)[dim[1]],
+                                                          endpoint=True), np.uint32),
+                       "strength": 1}
+                template[f"{lgd}/AXISNAME[axis_{dim[0]}]/@long_name"] \
+                    = f"Pixel along {dim[0]}-axis"
+                template[f"{lgd}/AXISNAME[axis_{dim[0]}]/@units"] = "px"
+        return template
+
+
+    """
     def prepare_roi_ipfs_phases_twod(self, inp: dict, roi_id: int, template: dict) -> dict:
-        """Process crystal orientation map from normalized orientation data."""
+        # Process crystal orientation map from normalized orientation data.
         # for NeXus to create a default representation of the EBSD map to explore
         # get rid of this xmap at some point it is really not needed in my option
         # one can work with passing the set of EulerAngles to the IPF mapper directly
@@ -449,7 +757,10 @@ def prepare_roi_ipfs_phases_twod(self, inp: dict, roi_id: int, template: dict) -
         print(self.xmap)
         return template
 
-    def process_roi_ipfs_phases_twod(self, inp: dict, roi_id: int, template: dict) -> dict:
+    def process_roi_ipfs_phases_twod(self,
+                                     inp: dict,
+                                     roi_id: int,
+                                     template: dict) -> dict:
         print("Parse crystal_structure_models aka phases (use xmap)...")
         phase_id = 0
         prfx = f"/ENTRY[entry{self.entry_id}]/ROI[roi{roi_id}]/ebsd/indexing"
@@ -496,13 +807,9 @@ def process_roi_ipfs_phases_twod(self, inp: dict, roi_id: int, template: dict) -
 
             self.process_roi_phase_ipfs_twod(roi_id, pyxem_phase_id, template)
         return template
-
-    def onthefly_process_roi_ipfs_phases_twod(self, inp: dict, roi_id: int, template: dict) -> dict:
-        # TODO: #####
-        return template
-
+    
     def process_roi_phase_ipfs_twod(self, roi_id: int, pyxem_phase_id: int, template: dict) -> dict:
-        """Parse inverse pole figures (IPF) mappings for specific phase."""
+        # Parse inverse pole figures (IPF) mappings for specific phase.
         phase_name = self.xmap.phases[pyxem_phase_id].name
         print(f"Generate 2D IPF map for {pyxem_phase_id}, {phase_name}...")
         for idx in np.arange(0, len(PROJECTION_VECTORS)):
@@ -593,152 +900,4 @@ def process_roi_phase_ipfs_twod(self, roi_id: int, pyxem_phase_id: int, template
 
         # call process_roi_ipf_color_key
         return template
-
-    def onthefly_process_roi_ipfs_phases_threed(self, inp: dict, roi_id: int, template: dict) -> dict:
-        print("Parse crystal_structure_models aka phases (no xmap)...")
-        phase_id = 0
-        prfx = f"/ENTRY[entry{self.entry_id}]/ROI[roi{roi_id}]/ebsd/indexing"
-        n_pts = inp["n_x"] * inp["n_y"] * inp["n_z"]
-        n_pts_indexed = np.sum(inp["phase_id"] != 0)
-        print(f"n_pts {n_pts}, n_pts_indexed {n_pts_indexed}")
-        template[f"{prfx}/number_of_scan_points"] = np.uint32(n_pts)
-        template[f"{prfx}/indexing_rate"] = np.float64(100. * n_pts_indexed / n_pts)
-        template[f"{prfx}/indexing_rate/@units"] = f"%"
-        grp_name = f"{prfx}/EM_EBSD_CRYSTAL_STRUCTURE_MODEL[phase{phase_id}]"
-        template[f"{grp_name}/number_of_scan_points"] \
-            = np.uint32(np.sum(inp["phase_id"] == 0))
-        template[f"{grp_name}/phase_identifier"] = np.uint32(phase_id)
-        template[f"{grp_name}/phase_name"] = f"notIndexed"
-
-        print(f"----unique inp phase_id--->{np.unique(inp['phase_id'])}")
-        for phase_id in np.arange(1, np.max(np.unique(inp["phase_id"])) + 1):
-            # starting here at ID 1 because TODO::currently the only supported 3D case
-            # is from DREAM3D and here phase_ids start at 0 but this marks in DREAM3D jargon
-            # the 999 i.e. null-model of the notIndexed phase !
-            print(f"inp[phases].keys(): {inp['phases'].keys()}")
-            if phase_id not in inp["phases"].keys():
-                raise ValueError(f"{phase_id} is not a key in inp['phases'] !")
-            # pyxem_phase_id for notIndexed is -1, while for NeXus it is 0 so add + 1 in naming schemes
-            trg = f"{prfx}/EM_EBSD_CRYSTAL_STRUCTURE_MODEL[phase{phase_id}]"
-
-            # TODO::dealing with unexpected phase_identifier should not be an issue
-            # with DREAM3D because that software is more restrictive on this
-            template[f"{trg}/number_of_scan_points"] \
-                = np.uint32(np.sum(inp["phase_id"] == phase_id))
-            template[f"{trg}/phase_identifier"] = np.uint32(phase_id)
-            template[f"{trg}/phase_name"] \
-                = f"{inp['phases'][phase_id]['phase_name']}"
-
-            # mind to pass phase_id - 1 from the perspective of pyxem because
-            # in that software the id of the null-model is -1 and not 0 like in NeXus or DREAM3D!
-            self.process_roi_phase_ipfs_threed(inp,
-                                               roi_id,
-                                               phase_id,
-                                               inp["phases"][phase_id]["phase_name"],
-                                               inp["phases"][phase_id]["space_group"],
-                                               template)
-        return template
-
-    def process_roi_phase_ipfs_threed(self, inp: dict, roi_id: int, pyxem_phase_id: int, phase_name: str, space_group: int, template: dict) -> dict:
-        """Generate inverse pole figures (IPF) for 3D mappings for specific phase."""
-        # equivalent to the case in twod, one needs at if required regridding/downsampling
-        # code here when any of the ROI's number of pixels along an edge > HFIVE_WEB_MAXIMUM_RGB
-        # TODO: I have not seen any dataset yet where is limit is exhausted, the largest
-        # dataset is a 3D SEM/FIB study from a UK project this is likely because to
-        # get an EBSD map as large one already scans quite long for one section as making
-        # a compromise is required and thus such hypothetical large serial-sectioning
-        # studies would block the microscope for a very long time
-        # however I have seen examples from Hadi Pirgazi with L. Kestens from Leuven
-        # where indeed large but thin 3d slabs were characterized
-        print(f"Generate 3D IPF map for {pyxem_phase_id}, {phase_name}...")
-        rotations = Rotation.from_euler(
-            euler=inp["euler"][inp["phase_id"] == pyxem_phase_id],
-            direction='lab2crystal', degrees=False)
-        print(f"shape rotations -----> {np.shape(rotations)}")
-
-        for idx in np.arange(0, len(PROJECTION_VECTORS)):
-            point_group = get_point_group(space_group, proper=False)
-            ipf_key = plot.IPFColorKeyTSL(
-                point_group.laue, direction=PROJECTION_VECTORS[idx])
-            img = get_ipfdir_legend(ipf_key)
-
-            rgb_px_with_phase_id = np.asarray(np.asarray(
-                ipf_key.orientation2color(rotations) * 255., np.uint32), np.uint8)
-            print(f"shape rgb_px_with_phase_id -----> {np.shape(rgb_px_with_phase_id)}")
-
-            ipf_rgb_map = np.asarray(np.asarray(
-                np.zeros((inp["n_z"] * inp["n_y"] * inp["n_x"], 3)) * 255., np.uint32), np.uint8)
-            # background is black instead of white (which would be more pleasing)
-            # but IPF color maps have a whitepoint which encodes in fact an orientation
-            # and because of that we may have a single crystal with an orientation
-            # close to the whitepoint which become a fully white seemingly "empty" image
-            ipf_rgb_map[inp["phase_id"] == pyxem_phase_id, :] = rgb_px_with_phase_id
-            ipf_rgb_map = np.reshape(
-                ipf_rgb_map, (inp["n_z"], inp["n_y"], inp["n_x"], 3), order="C")
-            # 0 is z, 1 is y, while 2 is x !
-
-            trg = f"/ENTRY[entry{self.entry_id}]/ROI[roi{roi_id}]/ebsd/indexing" \
-                  f"/EM_EBSD_CRYSTAL_STRUCTURE_MODEL[phase{pyxem_phase_id}]" \
-                  f"/MS_IPF[ipf{idx + 1}]"
-            template[f"{trg}/projection_direction"] \
-                = np.asarray(PROJECTION_VECTORS[idx].data.flatten(), np.float32)
-
-            # add the IPF color map
-            mpp = f"{trg}/DATA[map]"
-            template[f"{mpp}/title"] \
-                = f"Inverse pole figure {PROJECTION_DIRECTIONS[idx][0]} {phase_name}"
-            template[f"{mpp}/@NX_class"] = f"NXdata"  # TODO::writer should decorate automatically!
-            template[f"{mpp}/@signal"] = "data"
-            dims = ["x", "y", "z"]
-            template[f"{mpp}/@axes"] = []
-            for dim in dims[::-1]:
-                template[f"{mpp}/@axes"].append(f"axis_{dim}")
-            enum = 0
-            for dim in dims:
-                template[f"{mpp}/@AXISNAME_indices[axis_{dim}_indices]"] = np.uint32(enum)
-                enum += 1
-            template[f"{mpp}/DATA[data]"] = {"compress": ipf_rgb_map, "strength": 1}
-            hfive_web_decorate_nxdata(f"{mpp}/DATA[data]", template)
-
-            scan_unit = inp["s_unit"]  # TODO::this is not necessarily correct
-            # could be a scale-invariant synthetic microstructure whose simulation
-            # would work on multiple length-scales as atoms are not resolved directly!
-            if scan_unit == "um":
-                scan_unit = "µm"
-            for dim in dims:
-                template[f"{mpp}/AXISNAME[axis_{dim}]"] \
-                    = {"compress": self.get_named_axis(inp, f"{dim}"), "strength": 1}
-                template[f"{mpp}/AXISNAME[axis_{dim}]/@long_name"] \
-                    = f"Coordinate along {dim}-axis ({scan_unit})"
-                template[f"{mpp}/AXISNAME[axis_{dim}]/@units"] = f"{scan_unit}"
-
-            # add the IPF color map legend/key
-            lgd = f"{trg}/DATA[legend]"
-            template[f"{lgd}/title"] \
-                = f"Inverse pole figure {PROJECTION_DIRECTIONS[idx][0]} {phase_name}"
-            # template[f"{trg}/title"] = f"Inverse pole figure color key with SST"
-            template[f"{lgd}/@NX_class"] = f"NXdata"  # TODO::writer should decorate automatically!
-            template[f"{lgd}/@signal"] = "data"
-            template[f"{lgd}/@axes"] = []
-            dims = ["x", "y"]
-            for dim in dims[::-1]:
-                template[f"{lgd}/@axes"].append(f"axis_{dim}")
-            enum = 0
-            for dim in dims:
-                template[f"{lgd}/@AXISNAME_indices[axis_{dim}_indices]"] = np.uint32(enum)
-                enum += 1
-            template[f"{lgd}/data"] = {"compress": img, "strength": 1}
-            hfive_web_decorate_nxdata(f"{lgd}/data", template)
-
-            dims = [("x", 1), ("y", 0)]
-            for dim in dims:
-                template[f"{lgd}/AXISNAME[axis_{dim[0]}]"] \
-                    = {"compress": np.asarray(np.linspace(0,
-                                                          np.shape(img)[dim[1]] - 1,
-                                                          num=np.shape(img)[dim[1]],
-                                                          endpoint=True), np.uint32),
-                       "strength": 1}
-                template[f"{lgd}/AXISNAME[axis_{dim[0]}]/@long_name"] \
-                    = f"Pixel along {dim[0]}-axis"
-                template[f"{lgd}/AXISNAME[axis_{dim[0]}]/@units"] = "px"
-        return template
+    """

From c2ce3a04c6085a10c5267c73bbbb661a8967c9a0 Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@physik.hu-berlin.de>
Date: Thu, 7 Dec 2023 11:12:30 +0100
Subject: [PATCH 36/84] Remove code using now obsolete xmap

---
 .../readers/em/subparsers/nxs_pyxem.py        | 277 +-----------------
 1 file changed, 4 insertions(+), 273 deletions(-)

diff --git a/pynxtools/dataconverter/readers/em/subparsers/nxs_pyxem.py b/pynxtools/dataconverter/readers/em/subparsers/nxs_pyxem.py
index 9ae711ccf..7d92e1fca 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/nxs_pyxem.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/nxs_pyxem.py
@@ -102,7 +102,6 @@ def __init__(self, entry_id: int = 1, input_file_name: str = ""):
             self.entry_id = 1
         self.file_path = input_file_name
         self.cache = {"is_filled": False}
-        self.xmap = None
 
     def parse(self, template: dict) -> dict:
         hfive_parser_type = self.identify_hfive_type()
@@ -328,7 +327,8 @@ def onthefly_process_roi_ipfs_phases_twod(self,
                                               inp: dict,
                                               roi_id: int,
                                               template: dict) -> dict:
-        print("Parse crystal_structure_models aka phases (no xmap) 2D version...")
+        dimensionality = inp["dimensionality"]
+        print(f"Parse crystal_structure_models aka phases {dimensionality}D version...")
         nxem_phase_id = 0
         prfx = f"/ENTRY[entry{self.entry_id}]/ROI[roi{roi_id}]/ebsd/indexing"
         # bookkeeping is always reported for the original grid
@@ -486,7 +486,8 @@ def onthefly_process_roi_ipfs_phases_threed(self,
                                                 template: dict) -> dict:
         # this function is almost the same as its twod version we keep it for
         # now an own function until the rediscretization also works for the 3D grid
-        print("Parse crystal_structure_models aka phases (no xmap) 3D version...")
+        dimensionality = inp["dimensionality"]
+        print(f"Parse crystal_structure_models aka phases {dimensionality}D version...")
         # see comments in twod version of this function
         nxem_phase_id = 0
         prfx = f"/ENTRY[entry{self.entry_id}]/ROI[roi{roi_id}]/ebsd/indexing"
@@ -631,273 +632,3 @@ def process_roi_phase_ipfs_threed(self,
                     = f"Pixel along {dim[0]}-axis"
                 template[f"{lgd}/AXISNAME[axis_{dim[0]}]/@units"] = "px"
         return template
-
-
-    """
-    def prepare_roi_ipfs_phases_twod(self, inp: dict, roi_id: int, template: dict) -> dict:
-        # Process crystal orientation map from normalized orientation data.
-        # for NeXus to create a default representation of the EBSD map to explore
-        # get rid of this xmap at some point it is really not needed in my option
-        # one can work with passing the set of EulerAngles to the IPF mapper directly
-        # the order of the individual per scan point results arrays anyway are assumed
-        # to have the same sequence of scan points and thus the same len along the scan axes
-        self.xmap = None
-        self.axis_x = None
-        self.axis_y = None
-
-        print(f"Unique phase_identifier {np.unique(inp['phase_id'])}")
-        min_phase_id = np.min(np.unique(inp["phase_id"]))
-
-        if np.max((inp["n_x"], inp["n_y"])) > HFIVE_WEB_MAXIMUM_RGB:
-            # assume center of mass of the scan points
-            # TODO::check if mapping correct for hexagonal and square grid
-            aabb = [np.min(inp["scan_point_x"]) - 0.5 * inp["s_x"],
-                    np.max(inp["scan_point_x"]) + 0.5 * inp["s_x"],
-                    np.min(inp["scan_point_y"]) - 0.5 * inp["s_y"],
-                    np.max(inp["scan_point_y"]) + 0.5 * inp["s_y"]]
-            print(f"{aabb}")
-            if aabb[1] - aabb[0] >= aabb[3] - aabb[2]:
-                sqr_step_size = (aabb[1] - aabb[0]) / HFIVE_WEB_MAXIMUM_RGB
-                nxy = [HFIVE_WEB_MAXIMUM_RGB,
-                       int(np.ceil((aabb[3] - aabb[2]) / sqr_step_size))]
-            else:
-                sqr_step_size = (aabb[3] - aabb[2]) / HFIVE_WEB_MAXIMUM_RGB
-                nxy = [int(np.ceil((aabb[1] - aabb[0]) / sqr_step_size)),
-                       HFIVE_WEB_MAXIMUM_RGB]
-            print(f"H5Web default plot generation, scaling nxy0 {[inp['n_x'], inp['n_y']]}, nxy {nxy}")
-            # the above estimate is not exactly correct (may create a slight real space shift)
-            # of the EBSD map TODO:: regrid the real world axis-aligned bounding box aabb with
-            # a regular tiling of squares or hexagons
-            # https://stackoverflow.com/questions/18982650/differences-between-matlab-and-numpy-and-pythons-round-function
-            # MTex/Matlab round not exactly the same as numpy round but reasonably close
-
-            # scan point positions were normalized by tech partner subparsers such that they
-            # always build on pixel coordinates calibrated for step size not by giving absolute positions
-            # in the sample surface frame of reference as this is typically not yet consistently documented
-            # because we assume in addition that we always start at the top left corner the zeroth/first
-            # coordinate is always 0., 0. !
-            xy = np.column_stack(
-                (np.tile(np.linspace(0, nxy[0] - 1, num=nxy[0], endpoint=True) * sqr_step_size, nxy[1]),
-                np.repeat(np.linspace(0, nxy[1] - 1, num=nxy[1], endpoint=True) * sqr_step_size, nxy[0])))
-            print(f"xy {xy}, shape {np.shape(xy)}")
-            tree = KDTree(np.column_stack((inp["scan_point_x"], inp["scan_point_y"])))
-            d, idx = tree.query(xy, k=1)
-            if np.sum(idx == tree.n) > 0:
-                raise ValueError(f"kdtree query left some query points without a neighbor!")
-            del d
-            del tree
-            pyxem_euler = np.zeros((np.shape(xy)[0], 3), np.float32)
-            pyxem_euler = np.nan
-            pyxem_euler = inp["euler"][idx, :]
-            if np.isnan(pyxem_euler).any() is True:
-                raise ValueError(f"Downsampling of the EBSD map left pixels without euler!")
-            phase_new = np.zeros((np.shape(xy)[0],), np.int32) - 2
-            phase_new = inp["phase_id"][idx]
-            if np.sum(phase_new == -2) > 0:
-                raise ValueError(f"Downsampling of the EBSD map left pixels without euler!")
-            del xy
-
-            if min_phase_id > 0:
-                pyxem_phase_id = phase_new - min_phase_id
-            elif min_phase_id == 0:
-                pyxem_phase_id = phase_new - 1
-            else:
-                raise ValueError(f"Unable how to deal with unexpected phase_identifier!")
-            del phase_new
-
-            coordinates, _ = create_coordinate_arrays(
-                (nxy[1], nxy[0]), (sqr_step_size, sqr_step_size))
-            xaxis = coordinates["x"]
-            yaxis = coordinates["y"]
-            print(f"coordinates" \
-                  f"xmi {np.min(xaxis)}, xmx {np.max(xaxis)}, " \
-                  f"ymi {np.min(yaxis)}, ymx {np.max(yaxis)}")
-            del coordinates
-            self.axis_x = np.linspace(0, nxy[0] - 1, num=nxy[0], endpoint=True) * sqr_step_size
-            self.axis_y = np.linspace(0, nxy[1] - 1, num=nxy[1], endpoint=True) * sqr_step_size
-        else:
-            # can use the map discretization as is
-            coordinates, _ = create_coordinate_arrays(
-                (inp["n_y"], inp["n_x"]), (inp["s_y"], inp["s_x"]))
-            xaxis = coordinates["x"]
-            yaxis = coordinates["y"]
-            print(f"xmi {np.min(xaxis)}, xmx {np.max(xaxis)}, " \
-                  f"ymi {np.min(yaxis)}, ymx {np.max(yaxis)}")
-            del coordinates
-            self.axis_x = self.get_named_axis(inp, "x")
-            self.axis_y = self.get_named_axis(inp, "y")
-
-            pyxem_euler = inp["euler"]
-            # TODO::there was one example 093_0060.h5oina
-            # where HitRate was 75% but no pixel left unidentified ??
-            if min_phase_id > 0:
-                pyxem_phase_id = inp["phase_id"] - min_phase_id
-            elif min_phase_id == 0:
-                pyxem_phase_id = inp["phase_id"] - 1
-            else:
-                raise ValueError(f"Unable how to deal with unexpected phase_identifier!")
-
-        # inp["phase_id"] - (np.min(inp["phase_id"]) - (-1))
-        # for pyxem the non-indexed has to be -1 instead of 0 which is what NeXus uses
-        # -1 always because content of inp["phase_id"] is normalized
-        # to NeXus NXem_ebsd_crystal_structure concept already!
-        print(f"Unique pyxem_phase_id {np.unique(pyxem_phase_id)}")
-        self.xmap = CrystalMap(rotations=Rotation.from_euler(euler=pyxem_euler,
-                                                             direction='lab2crystal',
-                                                             degrees=False),
-                               x=xaxis, y=yaxis,
-                               phase_id=pyxem_phase_id,
-                               phase_list=PhaseList(space_groups=inp["space_group"],
-                                                    structures=inp["phase"]),
-                               prop={},
-                               scan_unit=inp["s_unit"])
-        del xaxis
-        del yaxis
-        # "bc": inp["band_contrast"]}, scan_unit=inp["s_unit"])
-        print(self.xmap)
-        return template
-
-    def process_roi_ipfs_phases_twod(self,
-                                     inp: dict,
-                                     roi_id: int,
-                                     template: dict) -> dict:
-        print("Parse crystal_structure_models aka phases (use xmap)...")
-        phase_id = 0
-        prfx = f"/ENTRY[entry{self.entry_id}]/ROI[roi{roi_id}]/ebsd/indexing"
-        n_pts = inp["n_x"] * inp["n_y"]
-        n_pts_indexed = np.sum(inp["phase_id"] != 0)
-        print(f"n_pts {n_pts}, n_pts_indexed {n_pts_indexed}")
-        template[f"{prfx}/number_of_scan_points"] = np.uint32(n_pts)
-        template[f"{prfx}/indexing_rate"] = np.float64(100. * n_pts_indexed / n_pts)
-        template[f"{prfx}/indexing_rate/@units"] = f"%"
-        grp_name = f"{prfx}/EM_EBSD_CRYSTAL_STRUCTURE_MODEL[phase{phase_id}]"
-        template[f"{grp_name}/number_of_scan_points"] = np.uint32(0)
-        template[f"{grp_name}/phase_identifier"] = np.uint32(phase_id)
-        template[f"{grp_name}/phase_name"] = f"notIndexed"
-
-        for pyxem_phase_id in np.arange(0, np.max(self.xmap.phase_id) + 1):
-            # this loop is implicitly ignored as when xmap is None
-            print(f"inp[phases].keys(): {inp['phases'].keys()}")
-            if (pyxem_phase_id + 1) not in inp["phases"].keys():
-                raise ValueError(f"{pyxem_phase_id + 1} is not a key in inp['phases'] !")
-            # phase_id of pyxem notIndexed is -1 while for NeXus
-            # it is 0 so add + 1 in naming schemes
-            trg = f"{prfx}/EM_EBSD_CRYSTAL_STRUCTURE_MODEL[phase{pyxem_phase_id + 1}]"
-
-            min_phase_id = np.min(np.unique(inp["phase_id"]))
-            if min_phase_id > 0:
-                pyx_phase_id = inp["phase_id"] - min_phase_id
-            elif min_phase_id == 0:
-                pyx_phase_id = inp["phase_id"] - 1
-            else:
-                raise ValueError(f"Unable how to deal with unexpected phase_identifier!")
-            del min_phase_id
-
-            template[f"{trg}/number_of_scan_points"] \
-                = np.uint32(np.sum(pyx_phase_id == pyxem_phase_id))
-            del pyx_phase_id
-            # not self.xmap.phase_id because in NeXus the number_of_scan_points is always
-            # accounting for the original map size and not the potentially downscaled version
-            # of the map as the purpose of the later one is exclusively to show a plot at all
-            # because of a technical limitation of H5Web if there would be a tool that
-            # could show larger RGB plots we would not need to downscale the EBSD map resolution!
-            template[f"{trg}/phase_identifier"] = np.uint32(pyxem_phase_id + 1)
-            template[f"{trg}/phase_name"] \
-                = f"{inp['phases'][pyxem_phase_id + 1]['phase_name']}"
-
-            self.process_roi_phase_ipfs_twod(roi_id, pyxem_phase_id, template)
-        return template
-    
-    def process_roi_phase_ipfs_twod(self, roi_id: int, pyxem_phase_id: int, template: dict) -> dict:
-        # Parse inverse pole figures (IPF) mappings for specific phase.
-        phase_name = self.xmap.phases[pyxem_phase_id].name
-        print(f"Generate 2D IPF map for {pyxem_phase_id}, {phase_name}...")
-        for idx in np.arange(0, len(PROJECTION_VECTORS)):
-            ipf_key = plot.IPFColorKeyTSL(
-                self.xmap.phases[pyxem_phase_id].point_group.laue,
-                direction=PROJECTION_VECTORS[idx])
-            img = get_ipfdir_legend(ipf_key)
-
-            rgb_px_with_phase_id = np.asarray(
-                np.asarray(ipf_key.orientation2color(
-                    self.xmap[phase_name].rotations) * 255., np.uint32), np.uint8)
-
-            print(f"idx {idx}, phase_name {phase_name}, shape {self.xmap.shape}")
-            ipf_rgb_map = np.asarray(
-                np.uint8(np.zeros((self.xmap.shape[0] * self.xmap.shape[1], 3)) * 255.))
-            # background is black instead of white (which would be more pleasing)
-            # but IPF color maps have a whitepoint which encodes in fact an orientation
-            # and because of that we may have a single crystal with an orientation
-            # close to the whitepoint which become a fully white seemingly "empty" image
-            ipf_rgb_map[self.xmap.phase_id == pyxem_phase_id, :] = rgb_px_with_phase_id
-            ipf_rgb_map = np.reshape(
-                ipf_rgb_map, (self.xmap.shape[0], self.xmap.shape[1], 3), order="C")
-            # 0 is y while 1 is x !
-
-            trg = f"/ENTRY[entry{self.entry_id}]/ROI[roi{roi_id}]/ebsd/indexing" \
-                  f"/EM_EBSD_CRYSTAL_STRUCTURE_MODEL[phase{pyxem_phase_id + 1}]" \
-                  f"/MS_IPF[ipf{idx + 1}]"
-            template[f"{trg}/projection_direction"] \
-                = np.asarray(PROJECTION_VECTORS[idx].data.flatten(), np.float32)
-
-            # add the IPF color map
-            mpp = f"{trg}/DATA[map]"
-            template[f"{mpp}/title"] \
-                = f"Inverse pole figure {PROJECTION_DIRECTIONS[idx][0]} {phase_name}"
-            template[f"{mpp}/@NX_class"] = f"NXdata"  # TODO::writer should decorate automatically!
-            template[f"{mpp}/@signal"] = "data"
-            dims = ["x", "y"]
-            template[f"{mpp}/@axes"] = []
-            for dim in dims[::-1]:
-                template[f"{mpp}/@axes"].append(f"axis_{dim}")
-            enum = 0
-            for dim in dims:
-                template[f"{mpp}/@AXISNAME_indices[axis_{dim}_indices]"] = np.uint32(enum)
-                enum += 1
-            template[f"{mpp}/DATA[data]"] = {"compress": ipf_rgb_map, "strength": 1}
-            hfive_web_decorate_nxdata(f"{mpp}/DATA[data]", template)
-
-            scan_unit = self.xmap.scan_unit
-            if scan_unit == "um":
-                scan_unit = "µm"
-            template[f"{mpp}/AXISNAME[axis_x]"] = {"compress": self.axis_x, "strength": 1}
-            template[f"{mpp}/AXISNAME[axis_x]/@long_name"] \
-                = f"Coordinate along x-axis ({scan_unit})"
-            template[f"{mpp}/AXISNAME[axis_x]/@units"] = f"{scan_unit}"
-            template[f"{mpp}/AXISNAME[axis_y]"] = {"compress": self.axis_y, "strength": 1}
-            template[f"{mpp}/AXISNAME[axis_y]/@long_name"] \
-                = f"Coordinate along y-axis ({scan_unit})"
-            template[f"{mpp}/AXISNAME[axis_y]/@units"] = f"{scan_unit}"
-
-            # add the IPF color map legend/key
-            lgd = f"{trg}/DATA[legend]"
-            template[f"{lgd}/title"] \
-                = f"Inverse pole figure {PROJECTION_DIRECTIONS[idx][0]} {phase_name}"
-            # template[f"{trg}/title"] = f"Inverse pole figure color key with SST"
-            template[f"{lgd}/@NX_class"] = f"NXdata"  # TODO::writer should decorate automatically!
-            template[f"{lgd}/@signal"] = "data"
-            template[f"{lgd}/@axes"] = []
-            for dim in dims[::-1]:
-                template[f"{lgd}/@axes"].append(f"axis_{dim}")
-            enum = 0
-            for dim in dims:
-                template[f"{lgd}/@AXISNAME_indices[axis_{dim}_indices]"] = np.uint32(enum)
-                enum += 1
-            template[f"{lgd}/data"] = {"compress": img, "strength": 1}
-            hfive_web_decorate_nxdata(f"{lgd}/data", template)
-
-            dims = [("x", 1), ("y", 0)]
-            for dim in dims:
-                template[f"{lgd}/AXISNAME[axis_{dim[0]}]"] \
-                    = {"compress": np.asarray(np.linspace(0,
-                                                          np.shape(img)[dim[1]] - 1,
-                                                          num=np.shape(img)[dim[1]],
-                                                          endpoint=True), np.uint32),
-                       "strength": 1}
-                template[f"{lgd}/AXISNAME[axis_{dim[0]}]/@long_name"] \
-                    = f"Pixel along {dim[0]}-axis"
-                template[f"{lgd}/AXISNAME[axis_{dim[0]}]/@units"] = "px"
-
-        # call process_roi_ipf_color_key
-        return template
-    """

From 1824163778365e440705178cb1033866f79909f5 Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@physik.hu-berlin.de>
Date: Thu, 7 Dec 2023 11:50:06 +0100
Subject: [PATCH 37/84] Proliferate tiling and phase_id, scan_point consistency
 for other parsers

---
 .../readers/em/subparsers/hfive_bruker.py     | 32 ++++++++++-------
 .../em/subparsers/hfive_dreamthreed.py        |  8 ++++-
 .../readers/em/subparsers/hfive_ebsd.py       | 35 ++++++++++++-------
 .../readers/em/subparsers/hfive_edax.py       | 12 +++++--
 .../readers/em/subparsers/hfive_oxford.py     |  8 +++--
 pyxem.batch.sh                                |  2 +-
 6 files changed, 65 insertions(+), 32 deletions(-)

diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py
index 9457ec46d..343ae3b6a 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py
@@ -39,7 +39,9 @@
 from pynxtools.dataconverter.readers.em.utils.hfive_utils import \
     EBSD_MAP_SPACEGROUP, read_strings_from_dataset, all_equal, format_euler_parameterization
 from pynxtools.dataconverter.readers.em.examples.ebsd_database import \
-    ASSUME_PHASE_NAME_TO_SPACE_GROUP, HEXAGONAL_GRID, SQUARE_GRID
+    ASSUME_PHASE_NAME_TO_SPACE_GROUP, HEXAGONAL_GRID, SQUARE_GRID, REGULAR_TILING, FLIGHT_PLAN
+from pynxtools.dataconverter.readers.em.utils.get_scan_points import \
+    get_scan_point_coords
 
 
 class HdfFiveBrukerEspritReader(HdfFiveBaseParser):
@@ -112,6 +114,9 @@ def parse_and_normalize_group_ebsd_header(self, fp, ckey: str):
             self.tmp[ckey]["grid_type"] = SQUARE_GRID
         else:
             raise ValueError(f"Unable to parse {grp_name}/Grid Type !")
+        # the next two lines encode the typical assumption that is not reported in tech partner file!
+        self.tmp[ckey]["tiling"] = REGULAR_TILING
+        self.tmp[ckey]["flight_plan"] = FLIGHT_PLAN
 
         req_fields = ["NCOLS", "NROWS", "XSTEP", "YSTEP"]
         for req_field in req_fields:
@@ -230,18 +235,19 @@ def parse_and_normalize_group_ebsd_data(self, fp, ckey: str):
         # TODO::calculation below x/y only valid if self.tmp[ckey]["grid_type"] == SQUARE_GRID
         if self.tmp[ckey]["grid_type"] != SQUARE_GRID:
             print(f"WARNING: Check carefully correct interpretation of scan_point coords!")
-        self.tmp[ckey]["scan_point_x"] \
-            = np.asarray(np.tile(np.linspace(0.,
-                                             self.tmp[ckey]["n_x"] - 1.,
-                                             num=self.tmp[ckey]["n_x"],
-                                             endpoint=True) * self.tmp[ckey]["s_x"],
-                                             self.tmp[ckey]["n_y"]), np.float32)
-        self.tmp[ckey]["scan_point_y"] \
-            = np.asarray(np.repeat(np.linspace(0.,
-                                               self.tmp[ckey]["n_y"] - 1.,
-                                               num=self.tmp[ckey]["n_y"],
-                                               endpoint=True) * self.tmp[ckey]["s_y"],
-                                               self.tmp[ckey]["n_x"]), np.float32)
+        # self.tmp[ckey]["scan_point_x"] \
+        #     = np.asarray(np.tile(np.linspace(0.,
+        #                                      self.tmp[ckey]["n_x"] - 1.,
+        #                                      num=self.tmp[ckey]["n_x"],
+        #                                      endpoint=True) * self.tmp[ckey]["s_x"],
+        #                                      self.tmp[ckey]["n_y"]), np.float32)
+        # self.tmp[ckey]["scan_point_y"] \
+        #     = np.asarray(np.repeat(np.linspace(0.,
+        #                                        self.tmp[ckey]["n_y"] - 1.,
+        #                                        num=self.tmp[ckey]["n_y"],
+        #                                        endpoint=True) * self.tmp[ckey]["s_y"],
+        #                                        self.tmp[ckey]["n_x"]), np.float32)
+        get_scan_point_coords(self.tmp[ckey])
 
         # Band Contrast is not stored in Bruker but Radon Quality or MAD
         # but this is s.th. different as it is the mean angular deviation between
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_dreamthreed.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_dreamthreed.py
index 248be5452..48e4ee420 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_dreamthreed.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_dreamthreed.py
@@ -38,7 +38,7 @@
 from pynxtools.dataconverter.readers.em.utils.hfive_utils import \
     EBSD_MAP_SPACEGROUP, read_strings_from_dataset, all_equal, format_euler_parameterization
 from pynxtools.dataconverter.readers.em.examples.ebsd_database import \
-    ASSUME_PHASE_NAME_TO_SPACE_GROUP, HEXAGONAL_GRID, SQUARE_GRID
+    ASSUME_PHASE_NAME_TO_SPACE_GROUP, HEXAGONAL_GRID, SQUARE_GRID, REGULAR_TILING, FLIGHT_PLAN
 
 # DREAM3D implements essentially a data analysis workflow with individual steps
 # in the DREAM3D jargon each step is referred to as a filter, filters have well-defined
@@ -316,6 +316,9 @@ def parse_and_normalize_ebsd_header(self, ckey: str):
             # TODO::is it correct an assumption that DREAM3D regrids using square voxel
             self.tmp[ckey]["dimensionality"] = 3
             self.tmp[ckey]["grid_type"] = SQUARE_GRID
+            # the next two lines encode the typical assumption that is not reported in tech partner file!
+            self.tmp[ckey]["tiling"] = REGULAR_TILING
+            self.tmp[ckey]["flight_plan"] = FLIGHT_PLAN
             for dim in ["x", "y", "z"]:
                 self.tmp[ckey][f"n_{dim}"] = dims[idx]
                 self.tmp[ckey][f"s_{dim}"] = spc[idx]
@@ -394,6 +397,9 @@ def parse_and_normalize_ebsd_data(self, ckey: str):
             # normalize pixel coordinates to physical positions even though the origin can still dangle somewhere
             if self.tmp[ckey]["grid_type"] != SQUARE_GRID:
                 print(f"WARNING: Check carefully correct interpretation of scan_point coords!")
+            # TODO::all other hfive parsers normalize scan_point_{dim} arrays into
+            # tiled and repeated coordinate tuples and not like below
+            # only the dimension scale axes values!
             for dim in ["x", "y", "z"]:
                 self.tmp[ckey][f"scan_point_{dim}"] \
                     = np.asarray(np.linspace(0, self.tmp[ckey][f"n_{dim}"] - 1,
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py
index 8bb2bbeb1..07546d2d7 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py
@@ -38,7 +38,9 @@
 from pynxtools.dataconverter.readers.em.utils.hfive_utils import \
     EBSD_MAP_SPACEGROUP, read_strings_from_dataset, all_equal, format_euler_parameterization
 from pynxtools.dataconverter.readers.em.examples.ebsd_database import \
-    ASSUME_PHASE_NAME_TO_SPACE_GROUP, HEXAGONAL_GRID, SQUARE_GRID
+    ASSUME_PHASE_NAME_TO_SPACE_GROUP, HEXAGONAL_GRID, SQUARE_GRID, REGULAR_TILING, FLIGHT_PLAN
+from pynxtools.dataconverter.readers.em.utils.get_scan_points import \
+    get_scan_point_coords
 
 
 class HdfFiveCommunityReader(HdfFiveBaseParser):
@@ -113,6 +115,9 @@ def parse_and_normalize_group_ebsd_header(self, fp, ckey: str):
             self.tmp[ckey]["grid_type"] = SQUARE_GRID
         else:
             raise ValueError(f"Unable to parse {grp_name}/Grid Type !")
+        # the next two lines encode the typical assumption that is not reported in tech partner file!
+        self.tmp[ckey]["tiling"] = REGULAR_TILING
+        self.tmp[ckey]["flight_plan"] = FLIGHT_PLAN
 
         req_fields = ["NCOLS", "NROWS", "XSTEP", "YSTEP"]
         for req_field in req_fields:
@@ -233,19 +238,20 @@ def parse_and_normalize_group_ebsd_data(self, fp, ckey: str):
         # TODO::calculation below x/y only valid if self.tmp[ckey]["grid_type"] == SQUARE_GRID
         if self.tmp[ckey]["grid_type"] != SQUARE_GRID:
             print(f"WARNING: Check carefully correct interpretation of scan_point coords!")
-        self.tmp[ckey]["scan_point_x"] \
-            = np.asarray(np.tile(np.linspace(0.,
-                                             self.tmp[ckey]["n_x"] - 1.,
-                                             num=self.tmp[ckey]["n_x"],
-                                             endpoint=True) * self.tmp[ckey]["s_x"],
-                                             self.tmp[ckey]["n_y"]), np.float32)
-        self.tmp[ckey]["scan_point_y"] \
-            = np.asarray(np.repeat(np.linspace(0.,
-                                               self.tmp[ckey]["n_y"] - 1.,
-                                               num=self.tmp[ckey]["n_y"],
-                                               endpoint=True) * self.tmp[ckey]["s_y"],
-                                               self.tmp[ckey]["n_x"]), np.float32)
+        # self.tmp[ckey]["scan_point_x"] \
+        #     = np.asarray(np.tile(np.linspace(0.,
+        #                                      self.tmp[ckey]["n_x"] - 1.,
+        #                                      num=self.tmp[ckey]["n_x"],
+        #                                      endpoint=True) * self.tmp[ckey]["s_x"],
+        #                                      self.tmp[ckey]["n_y"]), np.float32)
+        # self.tmp[ckey]["scan_point_y"] \
+        #     = np.asarray(np.repeat(np.linspace(0.,
+        #                                        self.tmp[ckey]["n_y"] - 1.,
+        #                                        num=self.tmp[ckey]["n_y"],
+        #                                        endpoint=True) * self.tmp[ckey]["s_y"],
+        #                                        self.tmp[ckey]["n_x"]), np.float32)
         # X SAMPLE and Y SAMPLE seem to be something different!
+        get_scan_point_coords(self.tmp[ckey])
 
         # Band Contrast is not stored in Bruker but Radon Quality or MAD
         # but this is s.th. different as it is the mean angular deviation between
@@ -254,3 +260,6 @@ def parse_and_normalize_group_ebsd_data(self, fp, ckey: str):
             self.tmp[ckey]["mad"] = np.asarray(fp[f"{grp_name}/MAD"][:], np.float32)
         else:
             raise ValueError(f"{grp_name}/MAD has unexpected shape !")
+        
+
+
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_edax.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_edax.py
index 8e3fc1164..5f552b01e 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_edax.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_edax.py
@@ -39,7 +39,7 @@
 from pynxtools.dataconverter.readers.em.utils.hfive_utils import EULER_SPACE_SYMMETRY, \
     read_strings_from_dataset, read_first_scalar, format_euler_parameterization
 from pynxtools.dataconverter.readers.em.examples.ebsd_database import \
-    ASSUME_PHASE_NAME_TO_SPACE_GROUP, HEXAGONAL_GRID, SQUARE_GRID
+    ASSUME_PHASE_NAME_TO_SPACE_GROUP, HEXAGONAL_GRID, SQUARE_GRID, REGULAR_TILING, FLIGHT_PLAN
 
 
 class HdfFiveEdaxOimAnalysisReader(HdfFiveBaseParser):
@@ -124,6 +124,10 @@ def parse_and_normalize_group_ebsd_header(self, fp, ckey: str):
             self.tmp[ckey]["grid_type"] = SQUARE_GRID
         else:
             raise ValueError(f"Unable to parse {grp_name}/Grid Type !")
+        # the next two lines encode the typical assumption that is not reported in tech partner file!
+        self.tmp[ckey]["tiling"] = REGULAR_TILING
+        self.tmp[ckey]["flight_plan"] = FLIGHT_PLAN
+
         self.tmp[ckey]["s_x"] = read_first_scalar(fp[f"{grp_name}/Step X"])
         self.tmp[ckey]["s_unit"] = "um"  # "µm"  # TODO::always micron?
         self.tmp[ckey]["n_x"] = read_first_scalar(fp[f"{grp_name}/nColumns"])
@@ -227,7 +231,9 @@ def parse_and_normalize_group_ebsd_data(self, fp, ckey: str):
 
         # given no official EDAX OimAnalysis spec we cannot define for sure if
         # phase_id == 0 means just all was indexed with the first/zeroth phase or nothing
-        # was indexed, TODO::assuming it means all indexed with first phase:
+        # was indexed, here we assume it means all indexed with first phase
+        # and we assume EDAX uses -1 for notIndexed, this assumption is also
+        # substantiated by the situation in the hfive_apex parser
         if np.all(fp[f"{grp_name}/Phase"][:] == 0):
             self.tmp[ckey]["phase_id"] = np.zeros(n_pts, np.int32) + 1
         else:
@@ -265,3 +271,5 @@ def parse_and_normalize_group_ebsd_data(self, fp, ckey: str):
                     fp[f"{grp_name}/X Position"][:] * self.tmp[ckey]["s_x"], np.float32)
             self.tmp[ckey]["scan_point_y"] = np.asarray(
                     fp[f"{grp_name}/Y Position"][:] * self.tmp[ckey]["s_y"], np.float32)
+        # despite differences in reported calibrations the scan_point_{dim} arrays are
+        # already provided by the tech partner as tile and repeat coordinates
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_oxford.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_oxford.py
index 2f6d6d3d7..3c9f19b3d 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_oxford.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_oxford.py
@@ -39,7 +39,7 @@
 from pynxtools.dataconverter.readers.em.utils.hfive_utils import \
     read_strings_from_dataset, format_euler_parameterization
 from pynxtools.dataconverter.readers.em.examples.ebsd_database import \
-    HEXAGONAL_GRID, SQUARE_GRID
+    HEXAGONAL_GRID, SQUARE_GRID, REGULAR_TILING, FLIGHT_PLAN
 
 
 class HdfFiveOxfordReader(HdfFiveBaseParser):
@@ -123,6 +123,9 @@ def parse_and_normalize_slice_ebsd_header(self, fp, ckey: str):
         # TODO::check if Oxford always uses SquareGrid like assumed here
         self.tmp[ckey]["dimensionality"] = 2
         self.tmp[ckey]["grid_type"] = SQUARE_GRID
+        # the next two lines encode the typical assumption that is not reported in tech partner file!
+        self.tmp[ckey]["tiling"] = REGULAR_TILING
+        self.tmp[ckey]["flight_plan"] = FLIGHT_PLAN
 
         req_fields = ["X Cells", "Y Cells", "X Step", "Y Step"]
         for req_field in req_fields:
@@ -230,7 +233,7 @@ def parse_and_normalize_slice_ebsd_data(self, fp, ckey: str):
         self.tmp[ckey]["euler"] = format_euler_parameterization(self.tmp[ckey]["euler"])
 
         # Phase, yes, H5T_NATIVE_INT32, (size, 1), Index of phase, 0 if not indexed
-        # no normalization needed, also in NXem_ebsd the null model notIndexed is phase_identifier 0
+        # no normalization needed, also in NXem the null model notIndexed is phase_identifier 0
         self.tmp[ckey]["phase_id"] = np.asarray(fp[f"{grp_name}/Phase"], np.int32)
 
         # normalize pixel coordinates to physical positions even though the origin can still dangle somewhere
@@ -240,6 +243,7 @@ def parse_and_normalize_slice_ebsd_data(self, fp, ckey: str):
         if self.tmp[ckey]["grid_type"] != SQUARE_GRID:
             print(f"WARNING: Check carefully correct interpretation of scan_point coords!")
         # X, no, H5T_NATIVE_FLOAT, (size, 1), X position of each pixel in micrometers (origin: top left corner)
+        # for Oxford instrument this is already the required tile and repeated array of shape (size,1)
         self.tmp[ckey]["scan_point_x"] = np.asarray(fp[f"{grp_name}/X"], np.float32)
         # inconsistency f32 in file although specification states float
 
diff --git a/pyxem.batch.sh b/pyxem.batch.sh
index f26a124a2..f9f9e246d 100755
--- a/pyxem.batch.sh
+++ b/pyxem.batch.sh
@@ -32,7 +32,7 @@ examples="207_2081.edaxh5"
 # examples="229_2097.oh5"
 # examples="067_0003.dream3d SmallIN100_Final.dream3d 244_0014.dream3d"
 # examples="244_0014.dream3d"
-# examples="SmallIN100_Final.dream3d"
+examples="SmallIN100_Final.dream3d"
 # examples="067_0003.dream3d"  # very large 3D EBSD takes ~40GB RAM for processing
 
 for example in $examples; do

From 2090418ca08a196afb365f14c83c99ed0a52266a Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@physik.hu-berlin.de>
Date: Thu, 7 Dec 2023 12:40:11 +0100
Subject: [PATCH 38/84] Fixed one of two failing tests, mpes test still fails,
 pycodestyling

---
 .../readers/em/concepts/nexus_concepts.py     |  4 +-
 .../readers/em/examples/ebsd_database.py      |  1 -
 pynxtools/dataconverter/readers/em/reader.py  | 24 +++----
 .../readers/em/subparsers/hfive_apex.py       | 26 ++++----
 .../readers/em/subparsers/hfive_base.py       | 65 ++++++++++---------
 .../readers/em/subparsers/hfive_bruker.py     | 15 +++--
 .../em/subparsers/hfive_dreamthreed.py        | 17 ++---
 .../readers/em/subparsers/hfive_ebsd.py       | 16 ++---
 .../readers/em/subparsers/hfive_edax.py       | 14 ++--
 .../readers/em/subparsers/hfive_oxford.py     | 14 ++--
 .../readers/em/subparsers/nxs_mtex.py         | 20 +++---
 .../readers/em/subparsers/nxs_pyxem.py        | 25 ++++---
 .../readers/em/utils/get_sqr_grid.py          | 10 +--
 .../readers/em/utils/hfive_utils.py           | 10 ++-
 pyxem.batch.sh                                |  8 ++-
 tests/dataconverter/test_readers.py           |  2 +-
 16 files changed, 143 insertions(+), 128 deletions(-)

diff --git a/pynxtools/dataconverter/readers/em/concepts/nexus_concepts.py b/pynxtools/dataconverter/readers/em/concepts/nexus_concepts.py
index 8617052a6..e00bcf807 100644
--- a/pynxtools/dataconverter/readers/em/concepts/nexus_concepts.py
+++ b/pynxtools/dataconverter/readers/em/concepts/nexus_concepts.py
@@ -19,6 +19,7 @@
 
 # pylint: disable=no-member
 
+from typing import List
 from pynxtools.dataconverter.readers.em.concepts.concept_mapper \
     import variadic_path_to_specific_path, apply_modifier
 
@@ -46,7 +47,8 @@ class NxEmAppDef():
     def __init__(self):
         pass
 
-    def parse(self, template: dict, entry_id: int = 1, cmd_line_args = []) -> dict:
+    def parse(self, template: dict, entry_id: int = 1, cmd_line_args: List = []) -> dict:
+        """Parse application definition."""
         for nx_path, modifier in NxEmRoot.items():
             if (nx_path != "IGNORE") and (nx_path != "UNCLEAR"):
                 trg = variadic_path_to_specific_path(nx_path, [entry_id])
diff --git a/pynxtools/dataconverter/readers/em/examples/ebsd_database.py b/pynxtools/dataconverter/readers/em/examples/ebsd_database.py
index 369b3dcc9..e140b570c 100644
--- a/pynxtools/dataconverter/readers/em/examples/ebsd_database.py
+++ b/pynxtools/dataconverter/readers/em/examples/ebsd_database.py
@@ -38,7 +38,6 @@
 FLIGHT_PLAN = "start_top_left_stack_x_left_to_right_stack_x_line_along_end_bottom_right"
 
 
-
 FreeTextToUniquePhase = {"Actinolite": "Actinolite",
                          "al": "Al",
                          "Al2 O3": "Al2O3",
diff --git a/pynxtools/dataconverter/readers/em/reader.py b/pynxtools/dataconverter/readers/em/reader.py
index d4f648aae..75fad265d 100644
--- a/pynxtools/dataconverter/readers/em/reader.py
+++ b/pynxtools/dataconverter/readers/em/reader.py
@@ -135,18 +135,18 @@ def read(self,
         # for dat_instance in case.dat_parser_type:
         #     print(f"Process pieces of information in {dat_instance} tech partner file...")
         #    continue
-            # elif case.dat_parser_type == "zip":
-            #     zip_parser = NxEmOmZipEbsdParser(case.dat[0], entry_id)
-            #     zip_parser.parse(template)
-            # elif case.dat_parser_type == "dream3d":
-            #     dream_parser = NxEmOmDreamThreedEbsdParser(case.dat[0], entry_id)
-            #     dream_parser.parse(template)
-            # elif case.dat_parser_type == "kikuchipy":
-            # elif case.dat_parser_type == "pyxem":
-            # elif case.dat_parser_type == "score":
-            # elif case.dat_parser_type == "qube":
-            # elif case.dat_parser_type == "paradis":
-            # elif case.dat_parser_type == "brinckmann":
+        #    # elif case.dat_parser_type == "zip":
+        #    #     zip_parser = NxEmOmZipEbsdParser(case.dat[0], entry_id)
+        #    #     zip_parser.parse(template)
+        #    # elif case.dat_parser_type == "dream3d":
+        #    #     dream_parser = NxEmOmDreamThreedEbsdParser(case.dat[0], entry_id)
+        #    #     dream_parser.parse(template)
+        #    # elif case.dat_parser_type == "kikuchipy":
+        #    # elif case.dat_parser_type == "pyxem":
+        #    # elif case.dat_parser_type == "score":
+        #    # elif case.dat_parser_type == "qube":
+        #    # elif case.dat_parser_type == "paradis":
+        #    # elif case.dat_parser_type == "brinckmann":
         # at this point the data for the default plots should already exist
         # we only need to decorate the template to point to the mandatory ROI overview
         # print("Create NeXus default plottable data...")
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
index 886eb787a..1f2076394 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
@@ -167,11 +167,11 @@ def parse_and_normalize_group_ebsd_phases(self, fp, ckey: str):
                     if f"{sub_grp_name}/Lattice Constant {req_field}" not in fp:
                         raise ValueError(f"Unable to parse ../Lattice Constant {req_field} !")
                 a_b_c = [fp[f"{sub_grp_name}/Lattice Constant A"][0],
-                            fp[f"{sub_grp_name}/Lattice Constant B"][0],
-                            fp[f"{sub_grp_name}/Lattice Constant C"][0]]
+                         fp[f"{sub_grp_name}/Lattice Constant B"][0],
+                         fp[f"{sub_grp_name}/Lattice Constant C"][0]]
                 angles = [fp[f"{sub_grp_name}/Lattice Constant Alpha"][0],
-                            fp[f"{sub_grp_name}/Lattice Constant Beta"][0],
-                            fp[f"{sub_grp_name}/Lattice Constant Gamma"][0]]
+                          fp[f"{sub_grp_name}/Lattice Constant Beta"][0],
+                          fp[f"{sub_grp_name}/Lattice Constant Gamma"][0]]
                 # TODO::available examples support reporting in angstroem and degree
                 self.tmp[ckey]["phases"][int(phase_id)]["a_b_c"] \
                     = np.asarray(a_b_c, np.float32) * 0.1
@@ -194,14 +194,16 @@ def parse_and_normalize_group_ebsd_phases(self, fp, ckey: str):
 
                 if len(self.tmp[ckey]["phase"]) > 0:
                     self.tmp[ckey]["phase"].append(
-                        Structure(title=phase_name, atoms=None,
-                                    lattice=Lattice(a_b_c[0], a_b_c[1], a_b_c[2],
-                                    angles[0], angles[1], angles[2])))
+                        Structure(title=phase_name,
+                                  atoms=None,
+                                  lattice=Lattice(a_b_c[0], a_b_c[1], a_b_c[2],
+                                                  angles[0], angles[1], angles[2])))
                 else:
-                    self.tmp[ckey]["phase"] \
-                        = [Structure(title=phase_name, atoms=None,
-                                        lattice=Lattice(a_b_c[0], a_b_c[1], a_b_c[2],
-                                        angles[0], angles[1], angles[2]))]
+                    self.tmp[ckey]["phase"] = [
+                        Structure(title=phase_name,
+                                  atoms=None,
+                                  lattice=Lattice(a_b_c[0], a_b_c[1], a_b_c[2],
+                                                  angles[0], angles[1], angles[2]))]
 
     def parse_and_normalize_group_ebsd_data(self, fp, ckey: str):
         grp_name = f"{self.prfx}/EBSD/ANG/DATA/DATA"
@@ -248,7 +250,7 @@ def parse_and_normalize_group_ebsd_data(self, fp, ckey: str):
         # the material which they typically never scan (time, interest, costs, instrument
         # availability) completely!
         if self.tmp[ckey]["grid_type"] != SQUARE_GRID:
-            print(f"WARNING: {self.tmp[ckey]['grid_type']}: check carefully the " \
+            print(f"WARNING: {self.tmp[ckey]['grid_type']}: check carefully the "
                   f"correct interpretation of scan_point coords!")
         # the case of EDAX APEX shows the key problem with implicit assumptions
         # edaxh5 file not necessarily store the scan_point_{dim} positions
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_base.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_base.py
index 33a836fb5..a84be5897 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_base.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_base.py
@@ -17,7 +17,23 @@
 #
 """(Sub-)parser mapping concepts and content from EDAX/AMETEK *.edaxh5 (APEX) files on NXem."""
 
-"""HDF5 base parser to inherit from for tech-partner-specific HDF5 subparsers."""
+import os
+import glob
+import re
+import sys
+from typing import Dict, Any, List
+import numpy as np
+import h5py
+import yaml
+import json
+# import imageio.v3 as iio
+from PIL import Image as pil
+
+from pynxtools.dataconverter.readers.em.subparsers.hfive_concept import \
+    IS_GROUP, IS_REGULAR_DATASET, IS_COMPOUND_DATASET, IS_ATTRIBUTE, \
+    IS_FIELD_IN_COMPOUND_DATASET, Concept
+
+# HDF5 base parser to inherit from for tech-partner-specific HDF5 subparsers."""
 
 # the base parser implements the processing of standardized orientation maps via
 # the pyxem software package from the electron microscopy community
@@ -34,19 +50,6 @@
 # task for the community and instead focus here on showing a more diverse example
 # towards more interoperability between the different tools in the community
 
-import os, glob, re, sys
-from typing import Dict, Any, List
-import numpy as np
-import h5py
-import yaml, json
-# import imageio.v3 as iio
-from PIL import Image as pil
-
-
-from pynxtools.dataconverter.readers.em.subparsers.hfive_concept import \
-    IS_GROUP, IS_REGULAR_DATASET, IS_COMPOUND_DATASET, IS_ATTRIBUTE, \
-    IS_FIELD_IN_COMPOUND_DATASET, Concept
-
 
 class HdfFiveBaseParser:
     def __init__(self, file_path: str = ""):
@@ -101,7 +104,7 @@ def close(self):
     def __call__(self, node_name, h5obj):
         # only h5py datasets have dtype attribute, so we can search on this
         if isinstance(h5obj, h5py.Dataset):
-            if not node_name in self.datasets.keys():
+            if node_name not in self.datasets.keys():
                 if hasattr(h5obj, "dtype"):
                     if hasattr(h5obj.dtype, "fields") and hasattr(h5obj.dtype, "names"):
                         if h5obj.dtype.names is not None:
@@ -153,7 +156,7 @@ def __call__(self, node_name, h5obj):
                                               None,
                                               hdf_type="regular_dataset")
                             elif n_dims == 1:
-                                if not 0 in np.shape(h5obj):
+                                if 0 not in np.shape(h5obj):
                                     self.datasets[node_name] \
                                         = ("IS_REGULAR_DATASET",
                                            type(h5obj),
@@ -225,12 +228,12 @@ def __call__(self, node_name, h5obj):
                                               hdf_type="regular_dataset")
                     else:
                         raise ValueError(
-                            f"hasattr(h5obj.dtype, 'fields') and hasattr(" \
+                            f"hasattr(h5obj.dtype, 'fields') and hasattr("
                             f"h5obj.dtype, 'names') failed, inspect {node_name} !")
                 else:
                     raise ValueError(f"hasattr(h5obj, dtype) failed, inspect {node_name} !")
         else:
-            if not node_name in self.groups.keys():
+            if node_name not in self.groups.keys():
                 self.groups[node_name] = ("IS_GROUP")
                 self.instances[node_name] \
                     = Concept(node_name,
@@ -246,7 +249,7 @@ def __call__(self, node_name, h5obj):
     def get_attribute_data_structure(self, prefix, src_dct):
         # trg_dct is self.attributes
         for key, val in src_dct.items():
-            if not f"{prefix}/@{key}" in self.attributes.keys():
+            if f"{prefix}/@{key}" not in self.attributes.keys():
                 if isinstance(val, str):
                     self.attributes[f"{prefix}/@{key}"] \
                         = ("IS_ATTRIBUTE", type(val), np.shape(val), str, val)
@@ -280,8 +283,8 @@ def get_content(self):
         """Walk recursively through the file to get content."""
         if self.h5r is not None:  # if self.file_path is not None:
             # with h5py.File(self.file_path, "r") as h5r:
-                # first step visit all groups and datasets recursively
-                # get their full path within the HDF5 file
+            # first step visit all groups and datasets recursively
+            # get their full path within the HDF5 file
             self.h5r.visititems(self)
             # second step visit all these and get their attributes
             for h5path, h5ifo in self.groups.items():
@@ -315,29 +318,29 @@ def store_report(self,
                      store_instances_templatized=True,
                      store_templates=False):
         if store_instances is True:
-            print(f"Storing analysis results in " \
-                  f"{self.file_path[self.file_path.rfind('/')+1:]}." \
+            print(f"Storing analysis results in "
+                  f"{self.file_path[self.file_path.rfind('/')+1:]}."
                   f"EbsdHdfFileInstanceNames.txt...")
             with open(f"{self.file_path}.EbsdHdfFileInstanceNames.txt", "w") as txt:
                 for instance_name, concept in self.instances.items():
-                    txt.write(f"/{instance_name}, hdf: {concept.hdf}, " \
+                    txt.write(f"/{instance_name}, hdf: {concept.hdf}, "
                               f"type: {concept.dtype}, shape: {concept.shape}\n")
 
         if store_instances_templatized is True:
-            print(f"Storing analysis results in " \
-                  f"{self.file_path[self.file_path.rfind('/')+1:]}" \
+            print(f"Storing analysis results in "
+                  f"{self.file_path[self.file_path.rfind('/')+1:]}"
                   f".EbsdHdfFileInstanceNamesTemplatized.txt...")
             with open(f"{self.file_path}.EbsdHdfFileInstanceNamesTemplatized.txt", "w") as txt:
                 for instance_name, concept in self.instances.items():
                     txt.write(f"/{instance_name}, hdf: {concept.hdf}\n")
 
         if store_templates is True:
-            print(f"Storing analysis results in "\
-                  f"{self.file_path[self.file_path.rfind('/')+1:]}" \
+            print(f"Storing analysis results in "
+                  f"{self.file_path[self.file_path.rfind('/')+1:]}"
                   f".EbsdHdfFileTemplateNames.txt...")
             with open(f"{self.file_path}.EbsdHdfFileTemplateNames.txt", "w") as txt:
                 for template_name, concept in self.templates.items():
-                    txt.write(f"{template_name}, hdf: {concept.hdf}, "\
+                    txt.write(f"{template_name}, hdf: {concept.hdf}, "
                               f"type: {concept.dtype}, shape: {concept.shape}\n")
 
     def get_attribute_value(self, h5path):
@@ -366,7 +369,7 @@ def get_dataset_value(self, h5path):
             if h5path.count("#") == 1:
                 # with (self.file_path, "r") as h5r:
                 obj = self.h5r[h5path[0:h5path.rfind("#")]]
-                return obj.fields(h5path[h5path.rfind("#")+1:])[:]
+                return obj.fields(h5path[h5path.rfind("#") + 1:])[:]
             return None
 
     def get_value(self, h5path):
@@ -463,4 +466,4 @@ def get_value(self, h5path):
 # based on its file format ending (mime type, magic cookie) etc
 # although interesting this is exactly what the magic cookie
 # (the initial few bytes to the beginning of the byte stream of a file)
-# were originally conceptualized for
\ No newline at end of file
+# were originally conceptualized for
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py
index 343ae3b6a..3e6594c75 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py
@@ -167,7 +167,7 @@ def parse_and_normalize_group_ebsd_phases(self, fp, ckey: str):
 
                 # Space Group, no, H5T_NATIVE_INT32, (1, 1), Space group index.
                 # The attribute Symbol contains the string representation, for example P m -3 m.
-                spc_grp  = read_strings_from_dataset(fp[f"{sub_grp_name}/SpaceGroup"][()])
+                spc_grp = read_strings_from_dataset(fp[f"{sub_grp_name}/SpaceGroup"][()])
                 if spc_grp in EBSD_MAP_SPACEGROUP.keys():
                     space_group = EBSD_MAP_SPACEGROUP[spc_grp]
                     self.tmp[ckey]["phases"][int(phase_id)]["space_group"] = space_group
@@ -186,14 +186,15 @@ def parse_and_normalize_group_ebsd_phases(self, fp, ckey: str):
 
                 if len(self.tmp[ckey]["phase"]) > 0:
                     self.tmp[ckey]["phase"].append(
-                        Structure(title=phase_name, atoms=None,
+                        Structure(title=phase_name,
+                                  atoms=None,
                                   lattice=Lattice(a_b_c[0], a_b_c[1], a_b_c[2],
-                                  angles[0], angles[1], angles[2])))
+                                                  angles[0], angles[1], angles[2])))
                 else:
-                    self.tmp[ckey]["phase"] \
-                        = [Structure(title=phase_name, atoms=None,
-                                     lattice=Lattice(a_b_c[0], a_b_c[1], a_b_c[2],
-                                     angles[0], angles[1], angles[2]))]
+                    self.tmp[ckey]["phase"] = [
+                        Structure(title=phase_name, atoms=None,
+                                  lattice=Lattice(a_b_c[0], a_b_c[1], a_b_c[2],
+                                                  angles[0], angles[1], angles[2]))]
 
     def parse_and_normalize_group_ebsd_data(self, fp, ckey: str):
         # no official documentation yet from Bruker but seems inspired by H5EBSD
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_dreamthreed.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_dreamthreed.py
index 48e4ee420..009508ed2 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_dreamthreed.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_dreamthreed.py
@@ -265,7 +265,6 @@ def search_normalizable_ebsd_content(self):
                     found = 0
                     for req_field in ["CrystalStructures", "LatticeConstants", "MaterialName"]:
                         if f"{group_phases}/{req_field}" in self.datasets.keys():
-                        #           (which should also have specific shape)
                             found += 1
                     if found != 3:
                         return False
@@ -305,11 +304,11 @@ def parse_and_normalize(self):
 
     def parse_and_normalize_ebsd_header(self, ckey: str):
         with h5py.File(self.file_path, "r") as h5r:
-            dims = h5r[f"{self.path_registry['group_geometry']}" \
+            dims = h5r[f"{self.path_registry['group_geometry']}"
                        f"/_SIMPL_GEOMETRY/DIMENSIONS"][:].flatten()
-            org = h5r[f"{self.path_registry['group_geometry']}" \
+            org = h5r[f"{self.path_registry['group_geometry']}"
                       f"/_SIMPL_GEOMETRY/ORIGIN"][:].flatten()
-            spc = h5r[f"{self.path_registry['group_geometry']}" \
+            spc = h5r[f"{self.path_registry['group_geometry']}"
                       f"/_SIMPL_GEOMETRY/SPACING"][:].flatten()
             idx = 0
 
@@ -401,13 +400,9 @@ def parse_and_normalize_ebsd_data(self, ckey: str):
             # tiled and repeated coordinate tuples and not like below
             # only the dimension scale axes values!
             for dim in ["x", "y", "z"]:
-                self.tmp[ckey][f"scan_point_{dim}"] \
-                    = np.asarray(np.linspace(0, self.tmp[ckey][f"n_{dim}"] - 1,
-                                             num=self.tmp[ckey][f"n_{dim}"],
-                                             endpoint=True) \
-                                             * self.tmp[ckey][f"s_{dim}"] \
-                                             + 0.5 * self.tmp[ckey][f"s_{dim}"],
-                                             np.float32)
+                self.tmp[ckey][f"scan_point_{dim}"] = np.asarray(np.linspace(
+                    0, self.tmp[ckey][f"n_{dim}"] - 1, num=self.tmp[ckey][f"n_{dim}"], endpoint=True)
+                    * self.tmp[ckey][f"s_{dim}"] + 0.5 * self.tmp[ckey][f"s_{dim}"], np.float32)
             # ROI overviewed rendered from either bc, ci, or mad
             if isinstance(self.path_registry["roi_info"], tuple) and len(self.path_registry["roi_info"]) == 2:
                 if isinstance(self.path_registry["roi_info"][0], str) is True and isinstance(self.path_registry["roi_info"][1], str) is True:
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py
index 07546d2d7..ffd8bf73c 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py
@@ -171,7 +171,7 @@ def parse_and_normalize_group_ebsd_phases(self, fp, ckey: str):
                 # The attribute Symbol contains the string representation, for example P m -3 m.
                 # formatting is a nightmare F m#ovl3m for F m 3bar m... but IT i.e.
                 # international table of crystallography identifier
-                spc_grp  = read_strings_from_dataset(fp[f"{sub_grp_name}/SpaceGroup"][()])
+                spc_grp = read_strings_from_dataset(fp[f"{sub_grp_name}/SpaceGroup"][()])
                 if spc_grp in EBSD_MAP_SPACEGROUP.keys():
                     space_group = EBSD_MAP_SPACEGROUP[spc_grp]
                     self.tmp[ckey]["phases"][int(phase_id)]["space_group"] = space_group
@@ -181,7 +181,6 @@ def parse_and_normalize_group_ebsd_phases(self, fp, ckey: str):
                 else:
                     raise ValueError(f"Unable to decode improperly formatted space group {spc_grp} !")
 
-
                 if len(self.tmp[ckey]["space_group"]) > 0:
                     self.tmp[ckey]["space_group"].append(space_group)
                 else:
@@ -189,14 +188,16 @@ def parse_and_normalize_group_ebsd_phases(self, fp, ckey: str):
 
                 if len(self.tmp[ckey]["phase"]) > 0:
                     self.tmp[ckey]["phase"].append(
-                        Structure(title=phase_name, atoms=None,
+                        Structure(title=phase_name,
+                                  atoms=None,
                                   lattice=Lattice(a_b_c[0], a_b_c[1], a_b_c[2],
-                                  angles[0], angles[1], angles[2])))
+                                                  angles[0], angles[1], angles[2])))
                 else:
                     self.tmp[ckey]["phase"] \
-                        = [Structure(title=phase_name, atoms=None,
+                        = [Structure(title=phase_name,
+                                     atoms=None,
                                      lattice=Lattice(a_b_c[0], a_b_c[1], a_b_c[2],
-                                     angles[0], angles[1], angles[2]))]
+                                                     angles[0], angles[1], angles[2]))]
 
     def parse_and_normalize_group_ebsd_data(self, fp, ckey: str):
         # no official documentation yet from Bruker but seems inspired by H5EBSD
@@ -260,6 +261,3 @@ def parse_and_normalize_group_ebsd_data(self, fp, ckey: str):
             self.tmp[ckey]["mad"] = np.asarray(fp[f"{grp_name}/MAD"][:], np.float32)
         else:
             raise ValueError(f"{grp_name}/MAD has unexpected shape !")
-        
-
-
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_edax.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_edax.py
index 5f552b01e..7e756776f 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_edax.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_edax.py
@@ -194,14 +194,16 @@ def parse_and_normalize_group_ebsd_phases(self, fp, ckey: str):
 
                     if len(self.tmp[ckey]["phase"]) > 0:
                         self.tmp[ckey]["phase"].append(
-                            Structure(title=phase_name, atoms=None,
+                            Structure(title=phase_name,
+                                      atoms=None,
                                       lattice=Lattice(a_b_c[0], a_b_c[1], a_b_c[2],
-                                      angles[0], angles[1], angles[2])))
+                                                      angles[0], angles[1], angles[2])))
                     else:
                         self.tmp[ckey]["phase"] \
-                            = [Structure(title=phase_name, atoms=None,
+                            = [Structure(title=phase_name,
+                                         atoms=None,
                                          lattice=Lattice(a_b_c[0], a_b_c[1], a_b_c[2],
-                                         angles[0], angles[1], angles[2]))]
+                                                         angles[0], angles[1], angles[2]))]
         else:
             raise ValueError(f"Unable to parse {grp_name} !")
 
@@ -268,8 +270,8 @@ def parse_and_normalize_group_ebsd_data(self, fp, ckey: str):
             if self.tmp[ckey]["grid_type"] != SQUARE_GRID:
                 print(f"WARNING: Check carefully correct interpretation of scan_point coords!")
             self.tmp[ckey]["scan_point_x"] = np.asarray(
-                    fp[f"{grp_name}/X Position"][:] * self.tmp[ckey]["s_x"], np.float32)
+                fp[f"{grp_name}/X Position"][:] * self.tmp[ckey]["s_x"], np.float32)
             self.tmp[ckey]["scan_point_y"] = np.asarray(
-                    fp[f"{grp_name}/Y Position"][:] * self.tmp[ckey]["s_y"], np.float32)
+                fp[f"{grp_name}/Y Position"][:] * self.tmp[ckey]["s_y"], np.float32)
         # despite differences in reported calibrations the scan_point_{dim} arrays are
         # already provided by the tech partner as tile and repeat coordinates
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_oxford.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_oxford.py
index 3c9f19b3d..e64929e4d 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_oxford.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_oxford.py
@@ -205,14 +205,16 @@ def parse_and_normalize_slice_ebsd_phases(self, fp, ckey: str):
 
                 if len(self.tmp[ckey]["phase"]) > 0:
                     self.tmp[ckey]["phase"].append(
-                        Structure(title=phase_name, atoms=None,
-                                    lattice=Lattice(a_b_c[0], a_b_c[1], a_b_c[2],
-                                    angles[0], angles[1], angles[2])))
+                        Structure(title=phase_name,
+                                  atoms=None,
+                                  lattice=Lattice(a_b_c[0], a_b_c[1], a_b_c[2],
+                                                  angles[0], angles[1], angles[2])))
                 else:
                     self.tmp[ckey]["phase"] \
-                        = [Structure(title=phase_name, atoms=None,
-                                        lattice=Lattice(a_b_c[0], a_b_c[1], a_b_c[2],
-                                        angles[0], angles[1], angles[2]))]
+                        = [Structure(title=phase_name,
+                                     atoms=None,
+                                     lattice=Lattice(a_b_c[0], a_b_c[1], a_b_c[2],
+                                                     angles[0], angles[1], angles[2]))]
 
     def parse_and_normalize_slice_ebsd_data(self, fp, ckey: str):
         # https://github.com/oinanoanalysis/h5oina/blob/master/H5OINAFile.md
diff --git a/pynxtools/dataconverter/readers/em/subparsers/nxs_mtex.py b/pynxtools/dataconverter/readers/em/subparsers/nxs_mtex.py
index ba96f8977..48f6dffc1 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/nxs_mtex.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/nxs_mtex.py
@@ -17,6 +17,16 @@
 #
 """(Sub-)parser mapping concepts and content from *.nxs.mtex files on NXem."""
 
+import re
+from typing import Any
+from typing_extensions import SupportsIndex
+import h5py
+
+from ase.data import chemical_symbols
+
+from pynxtools.dataconverter.readers.em.examples.ebsd_database import \
+    FreeTextToUniquePhase, UniquePhaseToAtomTypes, ProjectIdToCitation
+
 """
 README.md
 *.nxs.mtex is a specific HDF5-based data processing report format for users of
@@ -41,16 +51,6 @@
 of the sub-parsers.
 """
 
-import re
-from typing import Any
-from typing_extensions import SupportsIndex
-import h5py
-
-from ase.data import chemical_symbols
-
-from pynxtools.dataconverter.readers.em.examples.ebsd_database \
-    import FreeTextToUniquePhase, UniquePhaseToAtomTypes, ProjectIdToCitation
-
 
 class NxEmNxsMTexSubParser():
     """Map content from *.nxs.mtex files on an instance of NXem.
diff --git a/pynxtools/dataconverter/readers/em/subparsers/nxs_pyxem.py b/pynxtools/dataconverter/readers/em/subparsers/nxs_pyxem.py
index 7d92e1fca..c226478a2 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/nxs_pyxem.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/nxs_pyxem.py
@@ -32,11 +32,15 @@
 # task for the community and instead focus here on showing a more diverse example
 # towards more interoperability between the different tools in the community
 
-import os, glob, re, sys
+import os
+import glob
+import re
+import sys
 from typing import Dict, Any, List
 import numpy as np
 import h5py
-import yaml, json
+import yaml
+import json
 # import imageio.v3 as iio
 from PIL import Image as pil
 
@@ -62,11 +66,6 @@
 from pynxtools.dataconverter.readers.em.utils.get_scan_points import \
     get_scan_point_axis_values, get_scan_point_coords, square_grid, hexagonal_grid, threed
 
-PROJECTION_VECTORS = [Vector3d.xvector(), Vector3d.yvector(), Vector3d.zvector()]
-PROJECTION_DIRECTIONS = [("X", Vector3d.xvector().data.flatten()),
-                         ("Y", Vector3d.yvector().data.flatten()),
-                         ("Z", Vector3d.zvector().data.flatten())]
-
 from pynxtools.dataconverter.readers.em.subparsers.hfive_oxford import HdfFiveOxfordReader
 from pynxtools.dataconverter.readers.em.subparsers.hfive_bruker import HdfFiveBrukerEspritReader
 from pynxtools.dataconverter.readers.em.subparsers.hfive_edax import HdfFiveEdaxOimAnalysisReader
@@ -76,6 +75,12 @@
 from pynxtools.dataconverter.readers.em.subparsers.hfive_dreamthreed import HdfFiveDreamThreedReader
 
 
+PROJECTION_VECTORS = [Vector3d.xvector(), Vector3d.yvector(), Vector3d.zvector()]
+PROJECTION_DIRECTIONS = [("X", Vector3d.xvector().data.flatten()),
+                         ("Y", Vector3d.yvector().data.flatten()),
+                         ("Z", Vector3d.zvector().data.flatten())]
+
+
 def get_ipfdir_legend(ipf_key):
     """Generate IPF color map key for a specific ipf_key."""
     img = None
@@ -84,7 +89,7 @@ def get_ipfdir_legend(ipf_key):
                 orientation='landscape', format='png', transparent=False,
                 bbox_inches='tight', pad_inches=0.1, metadata=None)
     img = np.asarray(thumbnail(pil.open("temporary.png", "r", ["png"]),
-                        size=HFIVE_WEB_MAXIMUM_RGB), np.uint8)  # no flipping
+                     size=HFIVE_WEB_MAXIMUM_RGB), np.uint8)  # no flipping
     img = img[:, :, 0:3]  # discard alpha channel
     if os.path.exists("temporary.png"):
         os.remove("temporary.png")
@@ -375,7 +380,7 @@ def onthefly_process_roi_ipfs_phases_twod(self,
                                              inp["phases"][nxem_phase_id]["space_group"],
                                              template)
         return template
-    
+
     def process_roi_phase_ipfs_twod(self,
                                     inp: dict,
                                     roi_id: int,
@@ -386,7 +391,7 @@ def process_roi_phase_ipfs_twod(self,
         print(f"Generate 2D IPF maps for {nxem_phase_id}, {phase_name}...")
         trg_grid \
             = get_scan_points_with_mark_data_discretized_on_sqr_grid(inp, HFIVE_WEB_MAXIMUM_RGB)
-        
+
         rotations = Rotation.from_euler(
             euler=trg_grid["euler"][trg_grid["phase_id"] == nxem_phase_id],
             direction='lab2crystal', degrees=False)
diff --git a/pynxtools/dataconverter/readers/em/utils/get_sqr_grid.py b/pynxtools/dataconverter/readers/em/utils/get_sqr_grid.py
index 99886c66c..573962e8a 100644
--- a/pynxtools/dataconverter/readers/em/utils/get_sqr_grid.py
+++ b/pynxtools/dataconverter/readers/em/utils/get_sqr_grid.py
@@ -82,7 +82,7 @@ def get_scan_points_with_mark_data_discretized_on_sqr_grid(src_grid: dict,
         else:
             trg_sxy = (aabb[3] - aabb[2]) / max_extent
             trg_nxy = [int(np.ceil((aabb[1] - aabb[0]) / trg_sxy)), max_extent]
-        print(f"H5Web default plot generation, scaling src_nxy " \
+        print(f"H5Web default plot generation, scaling src_nxy "
               f"{[src_grid['n_x'], src_grid['n_y']]}, trg_nxy {trg_nxy}")
         # the above estimate is not exactly correct (may create a slight real space shift)
         # of the EBSD map TODO:: regrid the real world axis-aligned bounding box aabb with
@@ -114,7 +114,7 @@ def get_scan_points_with_mark_data_discretized_on_sqr_grid(src_grid: dict,
                 trg_grid[key] = np.nan
                 trg_grid[key] = src_grid["euler"][idx, :]
                 if np.isnan(trg_grid[key]).any() is True:
-                    raise ValueError(f"Downsampling of the point cloud left " \
+                    raise ValueError(f"Downsampling of the point cloud left "
                                      f"pixels without mark data {key} !")
                 print(f"final np.shape(trg_grid[{key}]) {np.shape(trg_grid[key])}")
             elif key == "phase_id" or key == "bc":
@@ -122,7 +122,7 @@ def get_scan_points_with_mark_data_discretized_on_sqr_grid(src_grid: dict,
                 # pyxem_id is at least -1, bc is typically positive
                 trg_grid[key] = src_grid[key][idx]
                 if np.sum(trg_grid[key] == -2) > 0:
-                    raise ValueError(f"Downsampling of the point cloud left " \
+                    raise ValueError(f"Downsampling of the point cloud left "
                                      f"pixels without mark data {key} !")
                 print(f"final np.shape(trg_grid[{key}]) {np.shape(trg_grid[key])}")
             elif key == "ci" or key == "mad":
@@ -131,7 +131,7 @@ def get_scan_points_with_mark_data_discretized_on_sqr_grid(src_grid: dict,
                 trg_grid[key] = src_grid[key][idx]
                 print(f"final np.shape(trg_grid[{key}]) {np.shape(trg_grid[key])}")
                 if np.isnan(trg_grid[key]).any() is True:
-                    raise ValueError(f"Downsampling of the point cloud left " \
+                    raise ValueError(f"Downsampling of the point cloud left "
                                      f"pixels without mark data {key} !")
             elif key not in ["n_x", "n_y", "n_z",
                              "s_x", "s_y", "s_z",
@@ -150,5 +150,5 @@ def get_scan_points_with_mark_data_discretized_on_sqr_grid(src_grid: dict,
             # TODO::need to update scan_point_{dim}
         return trg_grid
     else:
-        raise ValueError(f"The 3D discretization is currently not implemented because " \
+        raise ValueError(f"The 3D discretization is currently not implemented because "
                          f"we do not know of any large enough dataset the test it !")
diff --git a/pynxtools/dataconverter/readers/em/utils/hfive_utils.py b/pynxtools/dataconverter/readers/em/utils/hfive_utils.py
index 3b2320c4e..29e36f25e 100644
--- a/pynxtools/dataconverter/readers/em/utils/hfive_utils.py
+++ b/pynxtools/dataconverter/readers/em/utils/hfive_utils.py
@@ -17,11 +17,14 @@
 #
 """Utility functions when working with parsing HDF5."""
 
-import numpy as np
-import os, glob, re, sys
+import os
+import glob
+import re
+import sys
 import h5py
 import yaml
 import json
+import numpy as np
 from itertools import groupby
 
 
@@ -36,9 +39,9 @@
 # see here for typical examples http://img.chem.ucl.ac.uk/sgp/large/186az1.htm
 
 DIRTY_FIX_SPACEGROUP = {}
-
 EULER_SPACE_SYMMETRY = [2. * np.pi, np.pi, 2. * np.pi]
 
+
 def format_euler_parameterization(triplet_set):
     """Transform degrees to radiant and apply orientation space symmetry"""
     # it is not robust in general to judge just from the collection of euler angles
@@ -55,6 +58,7 @@ def format_euler_parameterization(triplet_set):
                 = EULER_SPACE_SYMMETRY[column_id] + triplet_set[here, column_id]
     return triplet_set
 
+
 def read_strings_from_dataset(obj):
     # print(f"type {type(obj)}, np.shape {np.shape(obj)}, obj {obj}")
     # if hasattr(obj, "dtype"):
diff --git a/pyxem.batch.sh b/pyxem.batch.sh
index f9f9e246d..32046ab8f 100755
--- a/pyxem.batch.sh
+++ b/pyxem.batch.sh
@@ -19,12 +19,14 @@ datasource="../../../../paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/pro
 # 173_0056.h5oina has only eds data
 
 # HDF5 files, 2D ESBD
-examples="026_0046.h5oina 026_0049.h5oina 026_0050.h5oina 026_0052.h5oina 066_0013.h5 066_0015.h5 066_0016.h5 066_0023.h5 066_0025.h5 066_0034.h5 078_0004.h5 087_0021.h5 088_0009.h5 093_0045.h5oina 093_0047.h5oina 093_0048.h5oina 093_0051.h5oina 093_0053.h5oina 093_0054.h5oina 093_0055.h5oina 093_0058.h5oina 093_0059.h5oina 093_0060.h5oina 093_0062.h5oina 093_0063.h5oina 101_0040.h5 110_0012.h5 114_0017.h5 116_0008.h5 116_0014.h5 116_0018.h5 116_0019.h5 116_0020.h5 116_0022.h5 116_0037.h5 116_0042.h5 124_0002.h5 124_0036.h5 125_0006.h5 126_0038.h5 130_0003.h5 130_2082.h5 130_2083.h5 130_2084.h5 130_2085.h5 130_2086.h5 130_2087.h5 130_2088.h5 130_2089.h5 130_2090.h5 130_2091.h5 130_2092.h5 130_2093.h5 130_2094.h5 132_0005.h5 144_0043.h5 173_0056.h5oina 173_0057.h5oina 174_0031.h5 207_2081.edaxh5 208_0061.h5oina 212_2095.h5oina 229_2096.oh5 229_2097.oh5"
+# examples="026_0046.h5oina 026_0049.h5oina 026_0050.h5oina 026_0052.h5oina 066_0013.h5 066_0015.h5 066_0016.h5 066_0023.h5 066_0025.h5 066_0034.h5 078_0004.h5 087_0021.h5 088_0009.h5 093_0045.h5oina 093_0047.h5oina 093_0048.h5oina 093_0051.h5oina 093_0053.h5oina 093_0054.h5oina 093_0055.h5oina 093_0058.h5oina 093_0059.h5oina 093_0060.h5oina 093_0062.h5oina 093_0063.h5oina 101_0040.h5 110_0012.h5 114_0017.h5 116_0008.h5 116_0014.h5 116_0018.h5 116_0019.h5 116_0020.h5 116_0022.h5 116_0037.h5 116_0042.h5 124_0002.h5 124_0036.h5 125_0006.h5 126_0038.h5 130_0003.h5 130_2082.h5 130_2083.h5 130_2084.h5 130_2085.h5 130_2086.h5 130_2087.h5 130_2088.h5 130_2089.h5 130_2090.h5 130_2091.h5 130_2092.h5 130_2093.h5 130_2094.h5 132_0005.h5 144_0043.h5 173_0056.h5oina 173_0057.h5oina 174_0031.h5 207_2081.edaxh5 208_0061.h5oina 212_2095.h5oina 229_2096.oh5 229_2097.oh5"
 # dream3d files 3D ESBD
 # examples="067_0003.dream3d 177_0004.dream3d 177_0005.dream3d 177_0006.dream3d 177_0008.dream3d 177_0009.dream3d 226_0010.dream3d 226_0011.dream3d 226_0012.dream3d 226_0013.dream3d 244_0014.dream3d SmallIN100_Final.dream3d"
+# all of them
+examples="026_0046.h5oina 026_0049.h5oina 026_0050.h5oina 026_0052.h5oina 066_0013.h5 066_0015.h5 066_0016.h5 066_0023.h5 066_0025.h5 066_0034.h5 078_0004.h5 087_0021.h5 088_0009.h5 093_0045.h5oina 093_0047.h5oina 093_0048.h5oina 093_0051.h5oina 093_0053.h5oina 093_0054.h5oina 093_0055.h5oina 093_0058.h5oina 093_0059.h5oina 093_0060.h5oina 093_0062.h5oina 093_0063.h5oina 101_0040.h5 110_0012.h5 114_0017.h5 116_0008.h5 116_0014.h5 116_0018.h5 116_0019.h5 116_0020.h5 116_0022.h5 116_0037.h5 116_0042.h5 124_0002.h5 124_0036.h5 125_0006.h5 126_0038.h5 130_0003.h5 130_2082.h5 130_2083.h5 130_2084.h5 130_2085.h5 130_2086.h5 130_2087.h5 130_2088.h5 130_2089.h5 130_2090.h5 130_2091.h5 130_2092.h5 130_2093.h5 130_2094.h5 132_0005.h5 144_0043.h5 173_0056.h5oina 173_0057.h5oina 174_0031.h5 207_2081.edaxh5 208_0061.h5oina 212_2095.h5oina 229_2096.oh5 229_2097.oh5 067_0003.dream3d 177_0004.dream3d 177_0005.dream3d 177_0006.dream3d 177_0008.dream3d 177_0009.dream3d 226_0010.dream3d 226_0011.dream3d 226_0012.dream3d 226_0013.dream3d 244_0014.dream3d SmallIN100_Final.dream3d"
 
 # specific examples for testing purposes
-examples="207_2081.edaxh5"
+# examples="207_2081.edaxh5"
 # examples="173_0057.h5oina"
 # oxford, bruker, britton, edax old noncali, edax old calib, apex
 # examples="173_0057.h5oina 130_0003.h5 088_0009.h5 116_0014.h5 229_2097.oh5 207_2081.edaxh5"
@@ -32,7 +34,7 @@ examples="207_2081.edaxh5"
 # examples="229_2097.oh5"
 # examples="067_0003.dream3d SmallIN100_Final.dream3d 244_0014.dream3d"
 # examples="244_0014.dream3d"
-examples="SmallIN100_Final.dream3d"
+# examples="SmallIN100_Final.dream3d"
 # examples="067_0003.dream3d"  # very large 3D EBSD takes ~40GB RAM for processing
 
 for example in $examples; do
diff --git a/tests/dataconverter/test_readers.py b/tests/dataconverter/test_readers.py
index d75344541..0ef32320f 100644
--- a/tests/dataconverter/test_readers.py
+++ b/tests/dataconverter/test_readers.py
@@ -53,7 +53,7 @@ def get_all_readers() -> List[ParameterSet]:
 
     # Explicitly removing ApmReader and EmNionReader because we need to add test data
     for reader in [get_reader(x) for x in get_names_of_all_readers()]:
-        if reader.__name__ in ("ApmReader", "EmOmReader", "EmSpctrscpyReader", "EmNionReader"):
+        if reader.__name__ in ("ApmReader", "EmReader", "EmOmReader", "EmSpctrscpyReader", "EmNionReader"):
             readers.append(pytest.param(reader,
                                         marks=pytest.mark.skip(reason="Missing test data.")
                                         ))

From ff5cfa055f089dfd030beb9b307953c447745502 Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@physik.hu-berlin.de>
Date: Thu, 7 Dec 2023 13:09:16 +0100
Subject: [PATCH 39/84] pylinting, mypy, removal of unnecessary imports

---
 pynxtools/dataconverter/readers/em/reader.py  | 38 +++++++--------
 .../readers/em/subparsers/hfive_apex.py       | 18 ++-----
 .../readers/em/subparsers/hfive_base.py       | 30 +++++-------
 .../readers/em/subparsers/hfive_bruker.py     | 23 ++-------
 .../readers/em/subparsers/hfive_concept.py    |  8 ++--
 .../em/subparsers/hfive_dreamthreed.py        | 25 +++-------
 .../readers/em/subparsers/hfive_ebsd.py       | 20 ++------
 .../readers/em/subparsers/hfive_edax.py       | 19 ++------
 .../readers/em/subparsers/hfive_emsoft.py     | 20 ++------
 .../readers/em/subparsers/hfive_oxford.py     | 21 ++------
 .../readers/em/subparsers/nxs_mtex.py         |  5 +-
 .../readers/em/subparsers/nxs_pyxem.py        | 48 +++++++------------
 .../readers/em/utils/hfive_utils.py           |  5 +-
 tests/dataconverter/test_readers.py           |  3 +-
 14 files changed, 85 insertions(+), 198 deletions(-)

diff --git a/pynxtools/dataconverter/readers/em/reader.py b/pynxtools/dataconverter/readers/em/reader.py
index 75fad265d..a8aff8d4b 100644
--- a/pynxtools/dataconverter/readers/em/reader.py
+++ b/pynxtools/dataconverter/readers/em/reader.py
@@ -25,33 +25,27 @@
 
 from pynxtools.dataconverter.readers.em.concepts.nexus_concepts import NxEmAppDef
 
-from pynxtools.dataconverter.readers.em.subparsers.nxs_mtex import NxEmNxsMTexSubParser
+# from pynxtools.dataconverter.readers.em.subparsers.nxs_mtex import NxEmNxsMTexSubParser
 
 from pynxtools.dataconverter.readers.em.subparsers.nxs_pyxem import NxEmNxsPyxemSubParser
 
 from pynxtools.dataconverter.readers.em.utils.default_plots import NxEmDefaultPlotResolver
 
-from pynxtools.dataconverter.readers.em.geometry.convention_mapper \
-    import NxEmConventionMapper
+# from pynxtools.dataconverter.readers.em.geometry.convention_mapper import NxEmConventionMapper
 
-"""
-from pynxtools.dataconverter.readers.em_om.utils.generic_eln_io \
-    import NxEmOmGenericElnSchemaParser
-
-from pynxtools.dataconverter.readers.em_om.utils.orix_ebsd_parser \
-    import NxEmOmOrixEbsdParser
-
-from pynxtools.dataconverter.readers.em_om.utils.mtex_ebsd_parser \
-    import NxEmOmMtexEbsdParser
-
-from pynxtools.dataconverter.readers.em_om.utils.zip_ebsd_parser \
-    import NxEmOmZipEbsdParser
-
-from pynxtools.dataconverter.readers.em_om.utils.dream3d_ebsd_parser \
-    import NxEmOmDreamThreedEbsdParser
-
-from pynxtools.dataconverter.readers.em_om.utils.em_nexus_plots \
-    import em_om_default_plot_generator"""
+# remaining subparsers to be implemented and merged into this one
+# from pynxtools.dataconverter.readers.em_om.utils.generic_eln_io \
+#     import NxEmOmGenericElnSchemaParser
+# from pynxtools.dataconverter.readers.em_om.utils.orix_ebsd_parser \
+#     import NxEmOmOrixEbsdParser
+# from pynxtools.dataconverter.readers.em_om.utils.mtex_ebsd_parser \
+#     import NxEmOmMtexEbsdParser
+# from pynxtools.dataconverter.readers.em_om.utils.zip_ebsd_parser \
+#     import NxEmOmZipEbsdParser
+# from pynxtools.dataconverter.readers.em_om.utils.dream3d_ebsd_parser \
+#     import NxEmOmDreamThreedEbsdParser
+# from pynxtools.dataconverter.readers.em_om.utils.em_nexus_plots \
+#     import em_om_default_plot_generator
 
 
 class EmReader(BaseReader):
@@ -127,7 +121,7 @@ def read(self,
 
         # add further with resolving cases
         # if file_path is an HDF5 will use hfive parser
-        sub_parser = "nxs_pyxem"
+        # sub_parser = "nxs_pyxem"
         subparser = NxEmNxsPyxemSubParser(entry_id, file_paths[0])
         subparser.parse(template)
         # exit(1)
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
index 1f2076394..e487c5287 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
@@ -19,19 +19,9 @@
 
 import numpy as np
 import h5py
-from itertools import groupby
-# import imageio.v3 as iio
-from PIL import Image as pil
-
-import diffsims
-import orix
+from typing import Dict
 from diffpy.structure import Lattice, Structure
-from orix import plot
-from orix.crystal_map import create_coordinate_arrays, CrystalMap, PhaseList
-from orix.quaternion import Rotation, Orientation
-from orix.vector import Vector3d
-
-import matplotlib.pyplot as plt
+from orix.quaternion import Orientation
 
 from pynxtools.dataconverter.readers.em.subparsers.hfive_base import HdfFiveBaseParser
 from pynxtools.dataconverter.readers.em.utils.hfive_utils import \
@@ -48,8 +38,8 @@ def __init__(self, file_path: str = ""):
         super().__init__(file_path)
         self.prfx = None
         self.tmp = {}
-        self.supported_version = {}
-        self.version = {}
+        self.supported_version: Dict = {}
+        self.version: Dict = {}
         self.init_support()
         self.supported = False
         self.check_if_supported()
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_base.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_base.py
index a84be5897..eeebb88d8 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_base.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_base.py
@@ -17,17 +17,9 @@
 #
 """(Sub-)parser mapping concepts and content from EDAX/AMETEK *.edaxh5 (APEX) files on NXem."""
 
-import os
-import glob
-import re
-import sys
-from typing import Dict, Any, List
 import numpy as np
 import h5py
-import yaml
-import json
-# import imageio.v3 as iio
-from PIL import Image as pil
+from typing import Dict, List
 
 from pynxtools.dataconverter.readers.em.subparsers.hfive_concept import \
     IS_GROUP, IS_REGULAR_DATASET, IS_COMPOUND_DATASET, IS_ATTRIBUTE, \
@@ -64,24 +56,24 @@ def __init__(self, file_path: str = ""):
         #   an instance of a file whose schema belongs to the H5OINA family of HDF5 container formats
         #   specifically using version 5
         self.prfx = None
-        self.tmp = {}
+        self.tmp: Dict = {}
         self.source = None
         self.file_path = None
         # collection of instance path
-        self.groups = {}
-        self.datasets = {}
-        self.attributes = {}
-        self.instances = {}
+        self.groups: Dict = {}
+        self.datasets: Dict = {}
+        self.attributes: Dict = {}
+        self.instances: Dict = {}
         # collection of template
-        self.template_groups = []
-        self.template_datasets = []
-        self.template_attributes = []
-        self.templates = {}
+        self.template_groups: List = []
+        self.template_datasets: List = []
+        self.template_attributes: List = []
+        self.templates: Dict = {}
         self.h5r = None
         if file_path is not None and file_path != "":
             self.file_path = file_path
         else:
-            raise ValueError(f"{__class__.__name__} needs proper instantiation !")
+            raise ValueError(f"{__name__} needs proper instantiation !")
 
     def init_named_cache(self, ckey: str):
         """Init a new cache for normalized EBSD data if not existent."""
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py
index 3e6594c75..2c8f5a8b3 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py
@@ -17,29 +17,16 @@
 #
 """(Sub-)parser mapping concepts and content from Bruker *.h5 files on NXem."""
 
-import os
-from typing import Dict, Any, List
 import numpy as np
 import h5py
-from itertools import groupby
-# import imageio.v3 as iio
-from PIL import Image as pil
-
-import diffsims
-import orix
+from typing import Dict
 from diffpy.structure import Lattice, Structure
-from orix import plot
-from orix.crystal_map import create_coordinate_arrays, CrystalMap, PhaseList
-from orix.quaternion import Rotation
-from orix.vector import Vector3d
-
-import matplotlib.pyplot as plt
 
 from pynxtools.dataconverter.readers.em.subparsers.hfive_base import HdfFiveBaseParser
 from pynxtools.dataconverter.readers.em.utils.hfive_utils import \
     EBSD_MAP_SPACEGROUP, read_strings_from_dataset, all_equal, format_euler_parameterization
 from pynxtools.dataconverter.readers.em.examples.ebsd_database import \
-    ASSUME_PHASE_NAME_TO_SPACE_GROUP, HEXAGONAL_GRID, SQUARE_GRID, REGULAR_TILING, FLIGHT_PLAN
+    ASSUME_PHASE_NAME_TO_SPACE_GROUP, SQUARE_GRID, REGULAR_TILING, FLIGHT_PLAN  # HEXAGONAL_GRID
 from pynxtools.dataconverter.readers.em.utils.get_scan_points import \
     get_scan_point_coords
 
@@ -49,9 +36,9 @@ class HdfFiveBrukerEspritReader(HdfFiveBaseParser):
     def __init__(self, file_path: str = ""):
         super().__init__(file_path)
         self.prfx = None
-        self.tmp = {}
-        self.supported_version = {}
-        self.version = {}
+        self.tmp: Dict = {}
+        self.supported_version: Dict = {}
+        self.version: Dict = {}
         self.init_support()
         self.supported = False
         self.check_if_supported()
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_concept.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_concept.py
index 55e8714c0..b7262974c 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_concept.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_concept.py
@@ -17,14 +17,16 @@
 #
 """Constants and utilities used when parsing concepts from HDF5 files."""
 
+from typing import Dict
+
 IS_GROUP = 0
 IS_REGULAR_DATASET = 1
 IS_COMPOUND_DATASET = 2
 IS_FIELD_IN_COMPOUND_DATASET = 3
 IS_ATTRIBUTE = 4
-VERSION_MANAGEMENT = {"tech_partner": [],
-                      "schema_name": [], "schema_version": [],
-                      "writer_name": [], "writer_version": []}
+VERSION_MANAGEMENT: Dict = {"tech_partner": [],
+                            "schema_name": [], "schema_version": [],
+                            "writer_name": [], "writer_version": []}
 
 
 class Concept():
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_dreamthreed.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_dreamthreed.py
index 009508ed2..0502c6519 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_dreamthreed.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_dreamthreed.py
@@ -17,28 +17,15 @@
 #
 """(Sub-)parser mapping concepts and content from community *.dream3d files on NXem."""
 
-import os
-from typing import Dict, Any, List
 import numpy as np
 import h5py
-# import imageio.v3 as iio
-from PIL import Image as pil
-
-import diffsims
-import orix
-from diffpy.structure import Lattice, Structure
-from orix import plot
-from orix.crystal_map import create_coordinate_arrays, CrystalMap, PhaseList
-from orix.quaternion import Rotation
-from orix.vector import Vector3d
-
-import matplotlib.pyplot as plt
+from typing import Dict
 
 from pynxtools.dataconverter.readers.em.subparsers.hfive_base import HdfFiveBaseParser
 from pynxtools.dataconverter.readers.em.utils.hfive_utils import \
-    EBSD_MAP_SPACEGROUP, read_strings_from_dataset, all_equal, format_euler_parameterization
+    read_strings_from_dataset  # EBSD_MAP_SPACEGROUP
 from pynxtools.dataconverter.readers.em.examples.ebsd_database import \
-    ASSUME_PHASE_NAME_TO_SPACE_GROUP, HEXAGONAL_GRID, SQUARE_GRID, REGULAR_TILING, FLIGHT_PLAN
+    SQUARE_GRID, REGULAR_TILING, FLIGHT_PLAN  # ASSUME_PHASE_NAME_TO_SPACE_GROUP, HEXAGONAL_GRID
 
 # DREAM3D implements essentially a data analysis workflow with individual steps
 # in the DREAM3D jargon each step is referred to as a filter, filters have well-defined
@@ -97,9 +84,9 @@ def __init__(self, file_path: str = ""):
         super().__init__(file_path)
         self.prfx = None
         self.tmp = {}
-        self.path_registry = {}
-        self.supported_version = {}
-        self.version = {}
+        self.path_registry: Dict = {}
+        self.supported_version: Dict = {}
+        self.version: Dict = {}
         self.init_support()
         self.supported = False
         self.check_if_supported()
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py
index ffd8bf73c..0de0c79d7 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py
@@ -17,28 +17,16 @@
 #
 """(Sub-)parser mapping concepts and content from community *.h5/*.h5ebsd files on NXem."""
 
-import os
-from typing import Dict, Any, List
 import numpy as np
 import h5py
-# import imageio.v3 as iio
-from PIL import Image as pil
-
-import diffsims
-import orix
+from typing import Dict
 from diffpy.structure import Lattice, Structure
-from orix import plot
-from orix.crystal_map import create_coordinate_arrays, CrystalMap, PhaseList
-from orix.quaternion import Rotation
-from orix.vector import Vector3d
-
-import matplotlib.pyplot as plt
 
 from pynxtools.dataconverter.readers.em.subparsers.hfive_base import HdfFiveBaseParser
 from pynxtools.dataconverter.readers.em.utils.hfive_utils import \
     EBSD_MAP_SPACEGROUP, read_strings_from_dataset, all_equal, format_euler_parameterization
 from pynxtools.dataconverter.readers.em.examples.ebsd_database import \
-    ASSUME_PHASE_NAME_TO_SPACE_GROUP, HEXAGONAL_GRID, SQUARE_GRID, REGULAR_TILING, FLIGHT_PLAN
+    ASSUME_PHASE_NAME_TO_SPACE_GROUP, SQUARE_GRID, REGULAR_TILING, FLIGHT_PLAN  # HEXAGONAL_GRID
 from pynxtools.dataconverter.readers.em.utils.get_scan_points import \
     get_scan_point_coords
 
@@ -49,8 +37,8 @@ def __init__(self, file_path: str = ""):
         super().__init__(file_path)
         self.prfx = None
         self.tmp = {}
-        self.supported_version = {}
-        self.version = {}
+        self.supported_version: Dict = {}
+        self.version: Dict = {}
         self.init_support()
         self.supported = False
         self.check_if_supported()
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_edax.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_edax.py
index 7e756776f..157b8ce75 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_edax.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_edax.py
@@ -17,23 +17,10 @@
 #
 """(Sub-)parser mapping concepts and content from EDAX/AMETEK *.oh5/*.h5 (OIM Analysis) files on NXem."""
 
-import os
-from typing import Dict, Any, List
 import numpy as np
 import h5py
-from itertools import groupby
-# import imageio.v3 as iio
-from PIL import Image as pil
-
-import diffsims
-import orix
+from typing import Dict
 from diffpy.structure import Lattice, Structure
-from orix import plot
-from orix.crystal_map import create_coordinate_arrays, CrystalMap, PhaseList
-from orix.quaternion import Rotation
-from orix.vector import Vector3d
-
-import matplotlib.pyplot as plt
 
 from pynxtools.dataconverter.readers.em.subparsers.hfive_base import HdfFiveBaseParser
 from pynxtools.dataconverter.readers.em.utils.hfive_utils import EULER_SPACE_SYMMETRY, \
@@ -48,8 +35,8 @@ def __init__(self, file_path: str = ""):
         super().__init__(file_path)
         self.prfx = None
         self.tmp = {}
-        self.supported_version = {}
-        self.version = {}
+        self.supported_version: Dict = {}
+        self.version: Dict = {}
         self.init_support()
         self.supported = False
         self.check_if_supported()
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_emsoft.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_emsoft.py
index 5aa5e8f0c..49405197b 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_emsoft.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_emsoft.py
@@ -17,23 +17,9 @@
 #
 """(Sub-)parser mapping concepts and content from Marc deGraeff's EMsoft *.h5 files on NXem."""
 
-import os
-from typing import Dict, Any, List
 import numpy as np
 import h5py
-from itertools import groupby
-# import imageio.v3 as iio
-from PIL import Image as pil
-
-import diffsims
-import orix
-from diffpy.structure import Lattice, Structure
-from orix import plot
-from orix.crystal_map import create_coordinate_arrays, CrystalMap, PhaseList
-from orix.quaternion import Rotation
-from orix.vector import Vector3d
-
-import matplotlib.pyplot as plt
+from typing import Dict
 
 from pynxtools.dataconverter.readers.em.subparsers.hfive_base import HdfFiveBaseParser
 from pynxtools.dataconverter.readers.em.utils.hfive_utils import read_strings_from_dataset
@@ -45,8 +31,8 @@ def __init__(self, file_path: str = ""):
         super().__init__(file_path)
         self.prfx = None
         self.tmp = {}
-        self.supported_version = {}
-        self.version = {}
+        self.supported_version: Dict = {}
+        self.version: Dict = {}
         self.init_support()
         self.supported = False
         self.check_if_supported()
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_oxford.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_oxford.py
index e64929e4d..04db7b896 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_oxford.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_oxford.py
@@ -17,29 +17,16 @@
 #
 """(Sub-)parser mapping concepts and content from Oxford Instruments *.h5oina files on NXem."""
 
-import os
-from typing import Dict, Any, List
 import numpy as np
 import h5py
-from itertools import groupby
-# import imageio.v3 as iio
-from PIL import Image as pil
-
-import diffsims
-import orix
+from typing import Dict
 from diffpy.structure import Lattice, Structure
-from orix import plot
-from orix.crystal_map import create_coordinate_arrays, CrystalMap, PhaseList
-from orix.quaternion import Rotation
-from orix.vector import Vector3d
-
-import matplotlib.pyplot as plt
 
 from pynxtools.dataconverter.readers.em.subparsers.hfive_base import HdfFiveBaseParser
 from pynxtools.dataconverter.readers.em.utils.hfive_utils import \
     read_strings_from_dataset, format_euler_parameterization
 from pynxtools.dataconverter.readers.em.examples.ebsd_database import \
-    HEXAGONAL_GRID, SQUARE_GRID, REGULAR_TILING, FLIGHT_PLAN
+    SQUARE_GRID, REGULAR_TILING, FLIGHT_PLAN  # HEXAGONAL_GRID
 
 
 class HdfFiveOxfordReader(HdfFiveBaseParser):
@@ -53,8 +40,8 @@ def __init__(self, file_path: str = ""):
         # which perform plotting and data processing functionalities
         # this design effectively avoids that different specialized hfive readers need to
         # duplicate the code of the base hfive parser for generating NeXus default plots
-        self.supported_version = {}
-        self.version = {}
+        self.supported_version: Dict = {}
+        self.version: Dict = {}
         self.init_support()
         self.supported = False
         self.check_if_supported()
diff --git a/pynxtools/dataconverter/readers/em/subparsers/nxs_mtex.py b/pynxtools/dataconverter/readers/em/subparsers/nxs_mtex.py
index 48f6dffc1..834c6f268 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/nxs_mtex.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/nxs_mtex.py
@@ -18,10 +18,9 @@
 """(Sub-)parser mapping concepts and content from *.nxs.mtex files on NXem."""
 
 import re
-from typing import Any
-from typing_extensions import SupportsIndex
 import h5py
-
+# from typing_extensions import SupportsIndex
+# from typing import Any
 from ase.data import chemical_symbols
 
 from pynxtools.dataconverter.readers.em.examples.ebsd_database import \
diff --git a/pynxtools/dataconverter/readers/em/subparsers/nxs_pyxem.py b/pynxtools/dataconverter/readers/em/subparsers/nxs_pyxem.py
index c226478a2..90e417c0f 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/nxs_pyxem.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/nxs_pyxem.py
@@ -33,28 +33,14 @@
 # towards more interoperability between the different tools in the community
 
 import os
-import glob
-import re
-import sys
-from typing import Dict, Any, List
 import numpy as np
-import h5py
-import yaml
-import json
-# import imageio.v3 as iio
+# from typing import Dict, Any, List
 from PIL import Image as pil
-
-import diffsims
-import orix
-from diffpy.structure import Lattice, Structure
 from orix import plot
-from orix.crystal_map import create_coordinate_arrays, CrystalMap, PhaseList
 from orix.quaternion import Rotation
 from orix.quaternion.symmetry import get_point_group
 from orix.vector import Vector3d
 
-import matplotlib.pyplot as plt
-
 from pynxtools.dataconverter.readers.em.utils.hfive_utils import read_strings_from_dataset
 from pynxtools.dataconverter.readers.em.utils.hfive_web_constants \
     import HFIVE_WEB_MAXIMUM_ROI, HFIVE_WEB_MAXIMUM_RGB
@@ -64,7 +50,7 @@
 from pynxtools.dataconverter.readers.em.utils.get_sqr_grid import \
     get_scan_points_with_mark_data_discretized_on_sqr_grid
 from pynxtools.dataconverter.readers.em.utils.get_scan_points import \
-    get_scan_point_axis_values, get_scan_point_coords, square_grid, hexagonal_grid, threed
+    square_grid, hexagonal_grid, threed, get_scan_point_axis_values, get_scan_point_coords
 
 from pynxtools.dataconverter.readers.em.subparsers.hfive_oxford import HdfFiveOxfordReader
 from pynxtools.dataconverter.readers.em.subparsers.hfive_bruker import HdfFiveBrukerEspritReader
@@ -472,17 +458,17 @@ def process_roi_phase_ipfs_twod(self,
             template[f"{lgd}/data"] = {"compress": img, "strength": 1}
             hfive_web_decorate_nxdata(f"{lgd}/data", template)
 
-            dims = [("x", 1), ("y", 0)]
-            for dim in dims:
-                template[f"{lgd}/AXISNAME[axis_{dim[0]}]"] \
+            dims_idxs = {"x": 1, "y": 0}
+            for dim, idx in dims_idxs.items():
+                template[f"{lgd}/AXISNAME[axis_{dim}]"] \
                     = {"compress": np.asarray(np.linspace(0,
-                                                          np.shape(img)[dim[1]] - 1,
-                                                          num=np.shape(img)[dim[1]],
+                                                          np.shape(img)[idx] - 1,
+                                                          num=np.shape(img)[idx],
                                                           endpoint=True), np.uint32),
                        "strength": 1}
-                template[f"{lgd}/AXISNAME[axis_{dim[0]}]/@long_name"] \
+                template[f"{lgd}/AXISNAME[axis_{dim}]/@long_name"] \
                     = f"Pixel along {dim[0]}-axis"
-                template[f"{lgd}/AXISNAME[axis_{dim[0]}]/@units"] = "px"
+                template[f"{lgd}/AXISNAME[axis_{dim}]/@units"] = "px"
         return template
 
     def onthefly_process_roi_ipfs_phases_threed(self,
@@ -625,15 +611,15 @@ def process_roi_phase_ipfs_threed(self,
             template[f"{lgd}/data"] = {"compress": img, "strength": 1}
             hfive_web_decorate_nxdata(f"{lgd}/data", template)
 
-            dims = [("x", 1), ("y", 0)]
-            for dim in dims:
-                template[f"{lgd}/AXISNAME[axis_{dim[0]}]"] \
+            dims_idxs = {"x": 1, "y": 0}
+            for dim, idx in dims_idxs.items():
+                template[f"{lgd}/AXISNAME[axis_{dim}]"] \
                     = {"compress": np.asarray(np.linspace(0,
-                                                          np.shape(img)[dim[1]] - 1,
-                                                          num=np.shape(img)[dim[1]],
+                                                          np.shape(img)[idx] - 1,
+                                                          num=np.shape(img)[idx],
                                                           endpoint=True), np.uint32),
                        "strength": 1}
-                template[f"{lgd}/AXISNAME[axis_{dim[0]}]/@long_name"] \
-                    = f"Pixel along {dim[0]}-axis"
-                template[f"{lgd}/AXISNAME[axis_{dim[0]}]/@units"] = "px"
+                template[f"{lgd}/AXISNAME[axis_{dim}]/@long_name"] \
+                    = f"Pixel along {dim}-axis"
+                template[f"{lgd}/AXISNAME[axis_{dim}]/@units"] = "px"
         return template
diff --git a/pynxtools/dataconverter/readers/em/utils/hfive_utils.py b/pynxtools/dataconverter/readers/em/utils/hfive_utils.py
index 29e36f25e..685306bc8 100644
--- a/pynxtools/dataconverter/readers/em/utils/hfive_utils.py
+++ b/pynxtools/dataconverter/readers/em/utils/hfive_utils.py
@@ -26,6 +26,7 @@
 import json
 import numpy as np
 from itertools import groupby
+from typing import List, Dict
 
 
 EBSD_MAP_SPACEGROUP = {"P 6#sub3mc": 186,
@@ -38,8 +39,8 @@
                        "I m#ovl3m": 229}
 # see here for typical examples http://img.chem.ucl.ac.uk/sgp/large/186az1.htm
 
-DIRTY_FIX_SPACEGROUP = {}
-EULER_SPACE_SYMMETRY = [2. * np.pi, np.pi, 2. * np.pi]
+DIRTY_FIX_SPACEGROUP: Dict = {}
+EULER_SPACE_SYMMETRY: List = [2. * np.pi, np.pi, 2. * np.pi]
 
 
 def format_euler_parameterization(triplet_set):
diff --git a/tests/dataconverter/test_readers.py b/tests/dataconverter/test_readers.py
index 0ef32320f..9d8d5093a 100644
--- a/tests/dataconverter/test_readers.py
+++ b/tests/dataconverter/test_readers.py
@@ -53,7 +53,8 @@ def get_all_readers() -> List[ParameterSet]:
 
     # Explicitly removing ApmReader and EmNionReader because we need to add test data
     for reader in [get_reader(x) for x in get_names_of_all_readers()]:
-        if reader.__name__ in ("ApmReader", "EmReader", "EmOmReader", "EmSpctrscpyReader", "EmNionReader"):
+        if reader.__name__ in ("ApmReader",
+                               "EmReader", "EmOmReader", "EmSpctrscpyReader", "EmNionReader"):
             readers.append(pytest.param(reader,
                                         marks=pytest.mark.skip(reason="Missing test data.")
                                         ))

From 6cde0e94e283632e2f88b7750b4f48cfa573a8d4 Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@physik.hu-berlin.de>
Date: Thu, 7 Dec 2023 13:36:11 +0100
Subject: [PATCH 40/84] Passing pycodestyle, pylinting, mypy now locally using
 py3.10.13 conda

---
 .../readers/em/concepts/nexus_concepts.py           |  4 ++--
 .../readers/em/geometry/convention_mapper.py        |  4 ++--
 .../readers/em/subparsers/hfive_bruker.py           | 13 +++++++------
 .../readers/em/subparsers/hfive_ebsd.py             | 12 ++++++------
 .../dataconverter/readers/em/utils/get_sqr_grid.py  | 11 ++++++-----
 5 files changed, 23 insertions(+), 21 deletions(-)

diff --git a/pynxtools/dataconverter/readers/em/concepts/nexus_concepts.py b/pynxtools/dataconverter/readers/em/concepts/nexus_concepts.py
index e00bcf807..15ae33019 100644
--- a/pynxtools/dataconverter/readers/em/concepts/nexus_concepts.py
+++ b/pynxtools/dataconverter/readers/em/concepts/nexus_concepts.py
@@ -49,10 +49,10 @@ def __init__(self):
 
     def parse(self, template: dict, entry_id: int = 1, cmd_line_args: List = []) -> dict:
         """Parse application definition."""
-        for nx_path, modifier in NxEmRoot.items():
+        for nx_path, value in NxEmRoot.items():
             if (nx_path != "IGNORE") and (nx_path != "UNCLEAR"):
                 trg = variadic_path_to_specific_path(nx_path, [entry_id])
-                res = apply_modifier(modifier, modifier)
+                res = value
                 if res is not None:
                     template[trg] = res
         if cmd_line_args != [] and all(isinstance(item, str) for item in cmd_line_args):
diff --git a/pynxtools/dataconverter/readers/em/geometry/convention_mapper.py b/pynxtools/dataconverter/readers/em/geometry/convention_mapper.py
index 7e08427c5..9ce968bda 100644
--- a/pynxtools/dataconverter/readers/em/geometry/convention_mapper.py
+++ b/pynxtools/dataconverter/readers/em/geometry/convention_mapper.py
@@ -68,10 +68,10 @@ def __init__(self, file_name: str, entry_id: int = 1):  # , pattern_simulation:
     def parse(self, template: dict) -> dict:
         """Extract metadata from generic ELN text file to respective NeXus objects."""
         print("Parsing conventions...")
-        for nx_path, modifier in NxEmConventions.items():
+        for nx_path, value in NxEmConventions.items():
             if (nx_path != "IGNORE") and (nx_path != "UNCLEAR"):
                 trg = variadic_path_to_specific_path(nx_path, [self.entry_id])
-                res = apply_modifier(modifier, modifier)
+                res = value
                 if res is not None:
                     template[trg] = res
         return template
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py
index 2c8f5a8b3..291bdcfef 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py
@@ -195,11 +195,12 @@ def parse_and_normalize_group_ebsd_data(self, fp, ckey: str):
                 raise ValueError(f"Unable to parse {grp_name}/{req_field} !")
 
         # Euler
-        n_pts = (np.shape(fp[f"{grp_name}/phi1"][:])[0],
-                 np.shape(fp[f"{grp_name}/PHI"][:])[0],
-                 np.shape(fp[f"{grp_name}/phi2"][:])[0])
-        if all_equal(n_pts) is True and n_pts[0] == (self.tmp[ckey]["n_x"] * self.tmp[ckey]["n_y"]):
-            self.tmp[ckey]["euler"] = np.zeros((n_pts[0], 3), np.float32)
+        n_pts_probe = (np.shape(fp[f"{grp_name}/phi1"][:])[0],
+                       np.shape(fp[f"{grp_name}/PHI"][:])[0],
+                       np.shape(fp[f"{grp_name}/phi2"][:])[0])
+        n_pts = None
+        if all_equal(n_pts_probe) is True and n_pts_probe[0] == (self.tmp[ckey]["n_x"] * self.tmp[ckey]["n_y"]):
+            self.tmp[ckey]["euler"] = np.zeros((n_pts_probe[0], 3), np.float32)
             column_id = 0
             for angle in ["phi1", "PHI", "phi2"]:
                 # TODO::available examples support that Bruker reports Euler triplets in degree
@@ -207,7 +208,7 @@ def parse_and_normalize_group_ebsd_data(self, fp, ckey: str):
                     = np.asarray(fp[f"{grp_name}/{angle}"][:], np.float32) / 180. * np.pi
                 column_id += 1
             self.tmp[ckey]["euler"] = format_euler_parameterization(self.tmp[ckey]["euler"])
-            n_pts = n_pts[0]
+            n_pts = n_pts_probe[0]
 
         # index of phase, 0 if not indexed
         # no normalization needed, also in NXem_ebsd the null model notIndexed is phase_identifier 0
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py
index 0de0c79d7..173ce7ad4 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py
@@ -199,11 +199,11 @@ def parse_and_normalize_group_ebsd_data(self, fp, ckey: str):
                 raise ValueError(f"Unable to parse {grp_name}/{req_field} !")
 
         # Euler
-        n_pts = (np.shape(fp[f"{grp_name}/phi1"][:])[0],
-                 np.shape(fp[f"{grp_name}/PHI"][:])[0],
-                 np.shape(fp[f"{grp_name}/phi2"][:])[0])
-        if all_equal(n_pts) is True and n_pts[0] == (self.tmp[ckey]["n_x"] * self.tmp[ckey]["n_y"]):
-            self.tmp[ckey]["euler"] = np.zeros((n_pts[0], 3), np.float32)
+        n_pts_probe = (np.shape(fp[f"{grp_name}/phi1"][:])[0],
+                       np.shape(fp[f"{grp_name}/PHI"][:])[0],
+                       np.shape(fp[f"{grp_name}/phi2"][:])[0])
+        if all_equal(n_pts_probe) is True and n_pts_probe[0] == (self.tmp[ckey]["n_x"] * self.tmp[ckey]["n_y"]):
+            self.tmp[ckey]["euler"] = np.zeros((n_pts_probe[0], 3), np.float32)
             column_id = 0
             for angle in ["phi1", "PHI", "phi2"]:
                 # TODO::available examples support that community H5EBSD reports Euler triplets in degree
@@ -211,7 +211,7 @@ def parse_and_normalize_group_ebsd_data(self, fp, ckey: str):
                     = np.asarray(fp[f"{grp_name}/{angle}"][:], np.float32) / 180. * np.pi
                 column_id += 1
             self.tmp[ckey]["euler"] = format_euler_parameterization(self.tmp[ckey]["euler"])
-            n_pts = n_pts[0]
+            n_pts = n_pts_probe[0]
 
         # index of phase, 0 if not indexed
         # no normalization needed, also in NXem_ebsd the null model notIndexed is phase_identifier 0
diff --git a/pynxtools/dataconverter/readers/em/utils/get_sqr_grid.py b/pynxtools/dataconverter/readers/em/utils/get_sqr_grid.py
index 573962e8a..5370aae1e 100644
--- a/pynxtools/dataconverter/readers/em/utils/get_sqr_grid.py
+++ b/pynxtools/dataconverter/readers/em/utils/get_sqr_grid.py
@@ -110,15 +110,16 @@ def get_scan_points_with_mark_data_discretized_on_sqr_grid(src_grid: dict,
         # rebuild src_grid container with only the relevant src_grida selected from src_grid
         for key in src_grid.keys():
             if key == "euler":
-                trg_grid[key] = np.zeros((np.shape(trg_xy)[0], 3), np.float32)
-                trg_grid[key] = np.nan
+                trg_grid[key] = np.empty((np.shape(trg_xy)[0], 3), np.float32)
+                trg_grid[key].fill(np.nan)
                 trg_grid[key] = src_grid["euler"][idx, :]
                 if np.isnan(trg_grid[key]).any() is True:
                     raise ValueError(f"Downsampling of the point cloud left "
                                      f"pixels without mark data {key} !")
                 print(f"final np.shape(trg_grid[{key}]) {np.shape(trg_grid[key])}")
             elif key == "phase_id" or key == "bc":
-                trg_grid[key] = np.zeros((np.shape(trg_xy)[0],), np.int32) - 2
+                trg_grid[key] = np.empty((np.shape(trg_xy)[0],), np.int32)
+                trg_grid[key].fill(np.int32(-2))
                 # pyxem_id is at least -1, bc is typically positive
                 trg_grid[key] = src_grid[key][idx]
                 if np.sum(trg_grid[key] == -2) > 0:
@@ -126,8 +127,8 @@ def get_scan_points_with_mark_data_discretized_on_sqr_grid(src_grid: dict,
                                      f"pixels without mark data {key} !")
                 print(f"final np.shape(trg_grid[{key}]) {np.shape(trg_grid[key])}")
             elif key == "ci" or key == "mad":
-                trg_grid[key] = np.zeros((np.shape(trg_xy)[0],), np.float32)
-                trg_grid[key] = np.nan
+                trg_grid[key] = np.empty((np.shape(trg_xy)[0],), np.float32)
+                trg_grid[key].fill(np.nan)
                 trg_grid[key] = src_grid[key][idx]
                 print(f"final np.shape(trg_grid[{key}]) {np.shape(trg_grid[key])}")
                 if np.isnan(trg_grid[key]).any() is True:

From 013c59c972b718e07281ce5faaafd011aa4d5178 Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@physik.hu-berlin.de>
Date: Thu, 7 Dec 2023 14:05:57 +0100
Subject: [PATCH 41/84] Successful run-through of all pyxem-processed examples
 (2D/3D) none of them failing, maximum RAM peaking at 2.7GB for largest 2D
 example 229_2096.oh5 with 20.3mio scan points and 23GB for largest 3D map
 067_0003.dream3d with 48.9mio scan points

---
 pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py | 2 +-
 pyxem.batch.sh                                                | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py
index 291bdcfef..ad448ebe3 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py
@@ -97,7 +97,7 @@ def parse_and_normalize_group_ebsd_header(self, fp, ckey: str):
             raise ValueError(f"Unable to parse {grp_name} !")
 
         self.tmp[ckey]["dimensionality"] = 2  # TODO::QUBE can also yield 3D datasets
-        if read_strings_from_dataset(fp[f"{grp_name}/Grid Type"]) == "isometric":
+        if read_strings_from_dataset(fp[f"{grp_name}/Grid Type"][()]) == "isometric":
             self.tmp[ckey]["grid_type"] = SQUARE_GRID
         else:
             raise ValueError(f"Unable to parse {grp_name}/Grid Type !")
diff --git a/pyxem.batch.sh b/pyxem.batch.sh
index 32046ab8f..f1f821925 100755
--- a/pyxem.batch.sh
+++ b/pyxem.batch.sh
@@ -36,6 +36,7 @@ examples="026_0046.h5oina 026_0049.h5oina 026_0050.h5oina 026_0052.h5oina 066_00
 # examples="244_0014.dream3d"
 # examples="SmallIN100_Final.dream3d"
 # examples="067_0003.dream3d"  # very large 3D EBSD takes ~40GB RAM for processing
+# examples="174_0031.h5 130_2085.h5 130_2092.h5 130_2093.h5 130_2089.h5 130_2087.h5 130_2088.h5 130_2090.h5 130_2091.h5 124_0036.h5 130_2082.h5 130_2083.h5 130_2084.h5 130_2094.h5 130_0003.h5 130_2086.h5 124_0002.h5 144_0043.h5 066_0013.h5 066_0034.h5 066_0016.h5 066_0023.h5 066_0025.h5"
 
 for example in $examples; do
 	echo $example

From cb17fde172fe32988a3900f7836535a011806ae9 Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@physik.hu-berlin.de>
Date: Thu, 7 Dec 2023 15:11:29 +0100
Subject: [PATCH 42/84] Manual rebase against master
 0c69581b014d0ef7a65e54e9cc8a2e25916c26c8 to assure converter and nexus code
 is the same as 0c69581b01 master, it now is

---
 pynxtools/dataconverter/convert.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/pynxtools/dataconverter/convert.py b/pynxtools/dataconverter/convert.py
index 46c9af7eb..59af45eac 100644
--- a/pynxtools/dataconverter/convert.py
+++ b/pynxtools/dataconverter/convert.py
@@ -37,7 +37,15 @@
 if sys.version_info >= (3, 10):
     from importlib.metadata import entry_points
 else:
-    from importlib_metadata import entry_points
+    try:
+        from importlib_metadata import entry_points
+    except ImportError:
+        # If importlib_metadata is not present
+        # we provide a dummy function just returning an empty list.
+        # pylint: disable=W0613
+        def entry_points(group):
+            """Dummy function for importlib_metadata"""
+            return []
 
 
 logger = logging.getLogger(__name__)  # pylint: disable=C0103
@@ -59,7 +67,7 @@ def get_reader(reader_name) -> BaseReader:
         importlib_module = entry_points(group='pynxtools.reader')
         if (
             importlib_module
-            and reader_name in map(lambda ep: ep.name, entry_points(group='pynxtools.reader'))
+            and reader_name in map(lambda ep: ep.name, importlib_module)
         ):
             return importlib_module[reader_name].load()
         raise ValueError(f"The reader, {reader_name}, was not found.") from exc

From f949e3bed1a96581bb922f708c50b0b88471ffc4 Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@physik.hu-berlin.de>
Date: Thu, 7 Dec 2023 17:38:08 +0100
Subject: [PATCH 43/84] =?UTF-8?q?Initial=20steps=20generic=20reader=20for?=
 =?UTF-8?q?=20images=20(TIF)=20to=20start=20with=20for=20IKZ=20(Albrecht,?=
 =?UTF-8?q?=20Br=C3=BCckner)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 imgs.batch.bash                               | 10 +++
 .../readers/em/subparsers/hfive_base.py       |  4 +-
 .../readers/em/subparsers/image_base.py       | 46 ++++++++++++++
 .../readers/em/subparsers/image_tiff.py       | 62 +++++++++++++++++++
 4 files changed, 119 insertions(+), 3 deletions(-)
 create mode 100644 imgs.batch.bash
 create mode 100644 pynxtools/dataconverter/readers/em/subparsers/image_base.py
 create mode 100644 pynxtools/dataconverter/readers/em/subparsers/image_tiff.py

diff --git a/imgs.batch.bash b/imgs.batch.bash
new file mode 100644
index 000000000..90a0cf7cf
--- /dev/null
+++ b/imgs.batch.bash
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+datasource="../../../../paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/production_imgs/"
+
+examples="ALN_baoh_021.tif FeMoOx_AntiA_04_1k5x_CN.tif"
+
+for example in $examples; do
+	echo $example
+	dataconverter --reader em --nxdl NXroot --input-file $datasource$example --output debug.$example.nxs 1>stdout.$example.nxs.txt 2>stderr.$example.nxs.txt
+done
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_base.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_base.py
index eeebb88d8..6c3534bd2 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_base.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_base.py
@@ -15,7 +15,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-"""(Sub-)parser mapping concepts and content from EDAX/AMETEK *.edaxh5 (APEX) files on NXem."""
+"""Parent class for all tech partner-specific HDF5 parsers for mapping on NXem."""
 
 import numpy as np
 import h5py
@@ -25,8 +25,6 @@
     IS_GROUP, IS_REGULAR_DATASET, IS_COMPOUND_DATASET, IS_ATTRIBUTE, \
     IS_FIELD_IN_COMPOUND_DATASET, Concept
 
-# HDF5 base parser to inherit from for tech-partner-specific HDF5 subparsers."""
-
 # the base parser implements the processing of standardized orientation maps via
 # the pyxem software package from the electron microscopy community
 # specifically so-called NeXus default plots are generated to add RDMS-relevant
diff --git a/pynxtools/dataconverter/readers/em/subparsers/image_base.py b/pynxtools/dataconverter/readers/em/subparsers/image_base.py
new file mode 100644
index 000000000..1516ff3ae
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/subparsers/image_base.py
@@ -0,0 +1,46 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Parent class for all tech partner-specific image parsers for mapping on NXem."""
+
+import numpy as np
+from typing import Dict, List
+
+
+class ImgsBaseParser:
+    def __init__(self, file_path: str = ""):
+        # self.supported_version = VERSION_MANAGEMENT
+        # self.version = VERSION_MANAGEMENT
+        # tech_partner the company which designed this format
+        # schema_name the specific name of the family of schemas supported by this reader
+        # schema_version the specific version(s) supported by this reader
+        # writer_name the specific name of the tech_partner's (typically proprietary) software
+        self.prfx = None
+        self.tmp: Dict = {}
+        if file_path is not None and file_path != "":
+            self.file_path = file_path
+        else:
+            raise ValueError(f"{__name__} needs proper instantiation !")
+
+    def init_named_cache(self, ckey: str):
+        """Init a new cache for normalized image data if not existent."""
+        # purpose of the cache is to hold normalized information
+        if ckey not in self.tmp.keys():
+            self.tmp[ckey] = {}
+            return ckey
+        else:
+            raise ValueError(f"Existent named cache {ckey} must not be overwritten !")
diff --git a/pynxtools/dataconverter/readers/em/subparsers/image_tiff.py b/pynxtools/dataconverter/readers/em/subparsers/image_tiff.py
new file mode 100644
index 000000000..ecbac4569
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/subparsers/image_tiff.py
@@ -0,0 +1,62 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Derived image class to derive every tech-partner-specific TIFF subparser from."""
+
+import mmap
+import numpy as np
+from typing import Dict
+from PIL import Image
+from PIL.TiffTags import TAGS
+
+from pynxtools.dataconverter.readers.em.subparsers.image_base import ImgsBaseParser
+
+
+class TiffReader(ImgsBaseParser):
+    """Read Bruker Esprit H5"""
+    def __init__(self, file_path: str = ""):
+        super().__init__(file_path)
+        self.prfx = None
+        self.tmp: Dict = {}
+        self.supported_version: Dict = {}
+        self.version: Dict = {}
+        self.supported = False
+        self.tags: Dict = {}
+        self.check_if_tiff()
+        if self.supported is True:
+            self.get_tags()
+
+    def check_if_tiff(self):
+        """Check if instance can at all be likely a TaggedImageFormat file via magic number."""
+        self.supported = 0  # voting-based
+        with open(self.file_path, 'rb', 0) as file:
+            s = mmap.mmap(file.fileno(), 0, access=mmap.ACCESS_READ)
+            magic = s.read(4)
+            print(magic)
+            # TODO::add magic number https://en.wikipedia.org/wiki/TIFF
+            self.supported += 1
+            if self.supported == 1:
+                self.supported = True
+            else:
+                self.supported = False
+
+    def get_tags(self):
+        """Extract tags if present."""
+        with Image.open(self.file_path, mode="r") as fp:
+            self.tags = {TAGS[key] : fp.tag[key] for key in fp.tag_v2}
+            for key, val in self.tags.items():
+                print(f"{key}, {val}")

From 00f4556e332541c7a074d9198cb603ba82bb56d3 Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@physik.hu-berlin.de>
Date: Fri, 8 Dec 2023 13:44:52 +0100
Subject: [PATCH 44/84] Decode tech partner magic

---
 imgs.batch.bash => imgs.batch.sh              |   0
 imgs.dev.ipynb                                | 236 ++++++++++++++++++
 pynxtools/dataconverter/readers/em/reader.py  |  16 +-
 .../readers/em/subparsers/hfive_apex.py       |   5 +-
 .../readers/em/subparsers/hfive_base.py       |   2 +
 .../readers/em/subparsers/hfive_bruker.py     |   5 +-
 .../em/subparsers/hfive_dreamthreed.py        |   5 +-
 .../readers/em/subparsers/hfive_ebsd.py       |   5 +-
 .../readers/em/subparsers/hfive_edax.py       |   5 +-
 .../readers/em/subparsers/hfive_emsoft.py     |   5 +-
 .../readers/em/subparsers/hfive_oxford.py     |   5 +-
 .../readers/em/subparsers/image_tiff.py       |  65 ++++-
 .../readers/em/subparsers/nxs_imgs.py         |  74 ++++++
 pyxem.dev.ipynb                               |  85 +++++++
 14 files changed, 479 insertions(+), 34 deletions(-)
 rename imgs.batch.bash => imgs.batch.sh (100%)
 mode change 100644 => 100755
 create mode 100755 imgs.dev.ipynb
 create mode 100644 pynxtools/dataconverter/readers/em/subparsers/nxs_imgs.py
 create mode 100644 pyxem.dev.ipynb

diff --git a/imgs.batch.bash b/imgs.batch.sh
old mode 100644
new mode 100755
similarity index 100%
rename from imgs.batch.bash
rename to imgs.batch.sh
diff --git a/imgs.dev.ipynb b/imgs.dev.ipynb
new file mode 100755
index 000000000..deee2d480
--- /dev/null
+++ b/imgs.dev.ipynb
@@ -0,0 +1,236 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3889c47f-11c4-4bf3-97de-04fc52f0798d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fnm = \"/home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/production_imgs/ALN_baoh_021.tif\"\n",
+    "fnm = \"/home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/production_imgs/FeMoOx_AntiA_04_1k5x_CN.tif\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6da1aea0-545b-446b-a3d1-1574af72f6c6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from PIL import Image\n",
+    "from PIL.TiffTags import TAGS\n",
+    "# print(TAGS)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1963afb6-6e48-4628-a0e8-d2da0874701e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with Image.open(fnm, mode=\"r\") as fp:\n",
+    "    for key in fp.tag_v2:\n",
+    "        if key in [34118, 34119]:\n",
+    "            print(type(fp.tag[key]))\n",
+    "            print(len(fp.tag[key]))        \n",
+    "            # print(f\"{key}, {fp.tag[key]}\")\n",
+    "        if key not in TAGS.keys():\n",
+    "            print(f\"--->tag {key}, is not in PIL.TiffTAGS !\")\n",
+    "    # self.tags = {TAGS[key] : fp.tag[key] for key in fp.tag_v2}\n",
+    "    # for key, val in self.tags.items():\n",
+    "    #     print(f\"{key}, {val}\")\n",
+    "    nparr = np.array(fp)\n",
+    "    print(f\"{type(nparr)}\")\n",
+    "    print(f\"{nparr.dtype}\")\n",
+    "    print(f\"{np.shape(nparr)}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a9ef2a35-a260-4a54-9b83-eae1d588966f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with Image.open(fnm, mode=\"r\") as fp:\n",
+    "    czi_keys = [34118, 34119]\n",
+    "    for czi_key in czi_keys:\n",
+    "        if czi_key in fp.tag_v2:\n",
+    "            utf = fp.tag[czi_key]\n",
+    "            print(type(utf))\n",
+    "            if len(utf) == 1:\n",
+    "                print(utf[0])\n",
+    "    exit(1)\n",
+    "    tfs_keys = [34682]\n",
+    "    for tfs_key in tfs_keys:\n",
+    "        if tfs_key in fp.tag_v2:\n",
+    "            utf = fp.tag[tfs_key]\n",
+    "            print(type(utf))\n",
+    "            if len(utf) == 1:\n",
+    "                print(utf[0])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a8ada062-e308-4288-8f00-b3e620f3c890",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "# https://www.geeksforgeeks.org/python-program-to-sort-a-list-of-tuples-by-second-item/\n",
+    "def sort_tuple(tup):\n",
+    "    # convert the list of tuples to a numpy array with data type (object, int)\n",
+    "    arr = np.array(tup, dtype=[('col1', object), ('col2', int)])\n",
+    "    # get the indices that would sort the array based on the second column\n",
+    "    indices = np.argsort(arr['col2'])\n",
+    "    # use the resulting indices to sort the array\n",
+    "    sorted_arr = arr[indices]\n",
+    "    # convert the sorted numpy array back to a list of tuples\n",
+    "    sorted_tup = [(row['col1'], row['col2']) for row in sorted_arr]\n",
+    "    return sorted_tup"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d27df293-626c-4d37-80df-96c182d4f401",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def if_str_represents_float(s):\n",
+    "    try:\n",
+    "        float(s)\n",
+    "        return str(float(s)) == s\n",
+    "    except ValueError:\n",
+    "        return False"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1a2f0864-f8b3-4d53-bf9d-08a5787c32fb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# TFS sections based on IKZ ALN_baoh_021.tif example\n",
+    "import mmap\n",
+    "\n",
+    "tfs_section_names = [\"[User]\",\n",
+    "                     \"[System]\",\n",
+    "                     \"[Beam]\",\n",
+    "                     \"[EBeam]\",                 \n",
+    "                     \"[GIS]\",\n",
+    "                     \"[Scan]\",\n",
+    "                     \"[EScan]\",\n",
+    "                     \"[Stage]\",\n",
+    "                     \"[Image]\",\n",
+    "                     \"[Vacuum]\",\n",
+    "                     \"[Specimen]\",\n",
+    "                     \"[Detectors]\",\n",
+    "                     \"[T2]\",\n",
+    "                     \"[Accessories]\",\n",
+    "                     \"[EBeamDeceleration]\",\n",
+    "                     \"[CompoundLensFilter]\",\n",
+    "                     \"[PrivateFei]\",\n",
+    "                     \"[HiResIllumination]\",\n",
+    "                     \"[EasyLift]\",\n",
+    "                     \"[HotStageMEMS]\",\n",
+    "                     \"[HotStage]\",\n",
+    "                     \"[HotStageHVHS]\",\n",
+    "                     \"[ColdStage]\"]\n",
+    "\n",
+    "tfs_section_details = {\"[System]\": [\"Type\", \"Dnumber\", \"Software\", \"BuildNr\", \"Source\", \"Column\", \"FinalLens\", \"Chamber\", \"Stage\", \"Pump\",\n",
+    "              \"ESEM\", \"Aperture\", \"Scan\", \"Acq\", \"EucWD\", \"SystemType\", \"DisplayWidth\", \"DisplayHeight\"]}\n",
+    "tfs_section_offsets = {}\n",
+    "\n",
+    "with open(fnm, 'rb', 0) as file:\n",
+    "    s = mmap.mmap(file.fileno(), 0, access=mmap.ACCESS_READ)\n",
+    "    for section_name in tfs_section_names:\n",
+    "        pos = s.find(bytes(section_name, \"utf8\"))  # != -1\n",
+    "        tfs_section_offsets[section_name] = pos\n",
+    "    print(tfs_section_offsets)\n",
+    "\n",
+    "    # define search offsets\n",
+    "    tpl = []\n",
+    "    for key, value in tfs_section_offsets.items():\n",
+    "        tpl.append((key, value))\n",
+    "    # print(tpl)\n",
+    "    tpl = sort_tuple(tpl)\n",
+    "    print(tpl)\n",
+    "    # if section_name == \"[System]\":\n",
+    "    pos_s = None\n",
+    "    pos_e = None\n",
+    "    for idx in np.arange(0, len(tpl)):\n",
+    "        if tpl[idx][0] != \"[System]\":\n",
+    "            continue\n",
+    "        else:\n",
+    "            pos_s = tpl[idx][1]\n",
+    "            if idx <= len(tpl) - 1:\n",
+    "                pos_e = tpl[idx + 1][1]\n",
+    "            break\n",
+    "    print(f\"Search in between byte offsets {pos_s} and {pos_e}\")\n",
+    "    # fish metadata of e.g. the system section\n",
+    "    section_metadata = {}\n",
+    "    for term in tfs_section_details[\"[System]\"]:\n",
+    "        \n",
+    "        s.seek(pos_s, 0)\n",
+    "        pos = s.find(bytes(term, \"utf8\"))\n",
+    "        if pos < pos_e:  # check if pos_e is None\n",
+    "            s.seek(pos, 0)\n",
+    "            section_metadata[f\"{term}\"] = f\"{s.readline().strip().decode('utf8').replace(f'{term}=', '')}\"\n",
+    "            if if_str_represents_float(section_metadata[f\"{term}\"]) is True:\n",
+    "                section_metadata[f\"{term}\"] = np.float64(section_metadata[f\"{term}\"])\n",
+    "            elif section_metadata[f\"{term}\"].isdigit() is True:\n",
+    "                section_metadata[f\"{term}\"] = np.int64(section_metadata[f\"{term}\"])\n",
+    "            else:\n",
+    "                pass\n",
+    "            # print(f\"{term}, {pos}, {pos + len(term) + 1}\")\n",
+    "    #        tfs_section_offswr\n",
+    "    #        file.seek(pos, 0)  #\n",
+    "    print(section_metadata)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2f3eb287-8f55-424c-a016-a07fc59f068a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "'2'.isdigit()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c1341e30-fcce-4a3d-a099-d342b8bbe318",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/pynxtools/dataconverter/readers/em/reader.py b/pynxtools/dataconverter/readers/em/reader.py
index a8aff8d4b..ae21ad233 100644
--- a/pynxtools/dataconverter/readers/em/reader.py
+++ b/pynxtools/dataconverter/readers/em/reader.py
@@ -22,15 +22,11 @@
 from typing import Tuple, Any
 
 from pynxtools.dataconverter.readers.base.reader import BaseReader
-
 from pynxtools.dataconverter.readers.em.concepts.nexus_concepts import NxEmAppDef
-
 # from pynxtools.dataconverter.readers.em.subparsers.nxs_mtex import NxEmNxsMTexSubParser
-
 from pynxtools.dataconverter.readers.em.subparsers.nxs_pyxem import NxEmNxsPyxemSubParser
-
+from pynxtools.dataconverter.readers.em.subparsers.nxs_imgs import NxEmImagesSubParser
 from pynxtools.dataconverter.readers.em.utils.default_plots import NxEmDefaultPlotResolver
-
 # from pynxtools.dataconverter.readers.em.geometry.convention_mapper import NxEmConventionMapper
 
 # remaining subparsers to be implemented and merged into this one
@@ -118,13 +114,19 @@ def read(self,
         # sub_parser = "nxs_mtex"
         # subparser = NxEmNxsMTexSubParser(entry_id, file_paths[0])
         # subparser.parse(template)
+        # TODO::check correct loop through!
 
         # add further with resolving cases
         # if file_path is an HDF5 will use hfive parser
         # sub_parser = "nxs_pyxem"
-        subparser = NxEmNxsPyxemSubParser(entry_id, file_paths[0])
+        # subparser = NxEmNxsPyxemSubParser(entry_id, file_paths[0])
+        # subparser.parse(template)
+        # TODO::check correct loop through!
+
+        # sub_parser = "image_tiff"
+        subparser = NxEmImagesSubParser(entry_id, file_paths[0])
         subparser.parse(template)
-        # exit(1)
+        exit(1)
 
         # for dat_instance in case.dat_parser_type:
         #     print(f"Process pieces of information in {dat_instance} tech partner file...")
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
index e487c5287..fbc7b91da 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
@@ -40,9 +40,10 @@ def __init__(self, file_path: str = ""):
         self.tmp = {}
         self.supported_version: Dict = {}
         self.version: Dict = {}
-        self.init_support()
         self.supported = False
-        self.check_if_supported()
+        if self.is_hdf is True:
+            self.init_support()
+            self.check_if_supported()
 
     def init_support(self):
         """Init supported versions."""
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_base.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_base.py
index 6c3534bd2..d365d1d34 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_base.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_base.py
@@ -68,8 +68,10 @@ def __init__(self, file_path: str = ""):
         self.template_attributes: List = []
         self.templates: Dict = {}
         self.h5r = None
+        self.is_hdf = True
         if file_path is not None and file_path != "":
             self.file_path = file_path
+            # TODO::check if HDF5 file using magic cookie
         else:
             raise ValueError(f"{__name__} needs proper instantiation !")
 
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py
index ad448ebe3..3ebd0aace 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py
@@ -39,9 +39,10 @@ def __init__(self, file_path: str = ""):
         self.tmp: Dict = {}
         self.supported_version: Dict = {}
         self.version: Dict = {}
-        self.init_support()
         self.supported = False
-        self.check_if_supported()
+        if self.is_hdf is True:
+            self.init_support()
+            self.check_if_supported()
 
     def init_support(self):
         """Init supported versions."""
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_dreamthreed.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_dreamthreed.py
index 0502c6519..0816e0466 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_dreamthreed.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_dreamthreed.py
@@ -87,9 +87,10 @@ def __init__(self, file_path: str = ""):
         self.path_registry: Dict = {}
         self.supported_version: Dict = {}
         self.version: Dict = {}
-        self.init_support()
         self.supported = False
-        self.check_if_supported()
+        if self.is_hdf is True:
+            self.init_support()
+            self.check_if_supported()
 
     def init_support(self):
         """Init supported versions."""
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py
index 173ce7ad4..e246bdef8 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py
@@ -39,9 +39,10 @@ def __init__(self, file_path: str = ""):
         self.tmp = {}
         self.supported_version: Dict = {}
         self.version: Dict = {}
-        self.init_support()
         self.supported = False
-        self.check_if_supported()
+        if self.is_hdf is True:
+            self.init_support()
+            self.check_if_supported()
 
     def init_support(self):
         """Init supported versions."""
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_edax.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_edax.py
index 157b8ce75..8d7db4a74 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_edax.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_edax.py
@@ -37,9 +37,10 @@ def __init__(self, file_path: str = ""):
         self.tmp = {}
         self.supported_version: Dict = {}
         self.version: Dict = {}
-        self.init_support()
         self.supported = False
-        self.check_if_supported()
+        if self.is_hdf is True:
+            self.init_support()
+            self.check_if_supported()
 
     def init_support(self):
         """Init supported versions."""
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_emsoft.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_emsoft.py
index 49405197b..acb75b67f 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_emsoft.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_emsoft.py
@@ -33,9 +33,10 @@ def __init__(self, file_path: str = ""):
         self.tmp = {}
         self.supported_version: Dict = {}
         self.version: Dict = {}
-        self.init_support()
         self.supported = False
-        self.check_if_supported()
+        if self.is_hdf is True:
+            self.init_support()
+            self.check_if_supported()
 
     def init_support(self):
         """Init supported versions."""
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_oxford.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_oxford.py
index 04db7b896..8818c93f3 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_oxford.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_oxford.py
@@ -42,9 +42,10 @@ def __init__(self, file_path: str = ""):
         # duplicate the code of the base hfive parser for generating NeXus default plots
         self.supported_version: Dict = {}
         self.version: Dict = {}
-        self.init_support()
         self.supported = False
-        self.check_if_supported()
+        if self.is_hdf is True:
+            self.init_support()
+            self.check_if_supported()
 
     def init_support(self):
         """Init supported versions."""
diff --git a/pynxtools/dataconverter/readers/em/subparsers/image_tiff.py b/pynxtools/dataconverter/readers/em/subparsers/image_tiff.py
index ecbac4569..c7969aaf1 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/image_tiff.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/image_tiff.py
@@ -26,29 +26,56 @@
 from pynxtools.dataconverter.readers.em.subparsers.image_base import ImgsBaseParser
 
 
-class TiffReader(ImgsBaseParser):
-    """Read Bruker Esprit H5"""
+class TiffSubParser(ImgsBaseParser):
+    """Read Tagged Image File Format TIF/TIFF."""
     def __init__(self, file_path: str = ""):
         super().__init__(file_path)
         self.prfx = None
         self.tmp: Dict = {}
         self.supported_version: Dict = {}
         self.version: Dict = {}
-        self.supported = False
         self.tags: Dict = {}
+        self.supported = False
         self.check_if_tiff()
-        if self.supported is True:
-            self.get_tags()
 
     def check_if_tiff(self):
-        """Check if instance can at all be likely a TaggedImageFormat file via magic number."""
+        """Check if resource behind self.file_path is a TaggedImageFormat file."""
         self.supported = 0  # voting-based
+        # different tech partners may all generate tiff files but internally
+        # report completely different pieces of information
+        # the situation is the same as for HDF5 files
+        # for this reason specific parsers for specific tech partner content is required
+        # checking just on the file ending is in most cases never sufficient !
+        # checking the magic number is useful as it at least narrows down that one
+        # has a tiff container but like with every container you can pack into it
+        # almost whatever you want and unfortunately this is how tiff is used
+        # currently in the field of electron microscopy
+
+        # it is common practice to export single images from a microscope session
+        # using common image formats like png, jpg, tiff often with a scale bar
+        # hard-coded into the image
+        # although this is usual practice we argue this is not best practice at all
+        # better is to use tech partner file formats and at conferences and meetings
+        # speak up to convince the tech partners to offer documentation of the
+        # content of these file formats (ideally using semantic web technology)
+        # the more this happens and the more users articulate this one write software
+        # to support scientists with reading directly and more completely from the
+        # tech partner files. In effect, there is then less and less of a reason
+        # to manually export files and share them ad hoc like single tiff images.
+        # Rather try to think about a mindset change and ask yourself:
+        # Can I not just show this content to my colleagues in the research
+        # data management system directly instead of copying over files that in the
+        # process of manually exporting them get cut off from their contextualization
+        # unless one is super careful and spents time on writing super rich metadata !
+        # Most tech partners by now have file formats with indeed very rich metadata.
+        # Our conviction is that these should be used and explored more frequently.
+        # Exactly for this reason we provided an example for the differences
+        # in the current state of and documentation of EBSD data stored in HDF5
         with open(self.file_path, 'rb', 0) as file:
             s = mmap.mmap(file.fileno(), 0, access=mmap.ACCESS_READ)
             magic = s.read(4)
-            print(magic)
-            # TODO::add magic number https://en.wikipedia.org/wiki/TIFF
-            self.supported += 1
+            if magic == b'II*\x00':  # https://en.wikipedia.org/wiki/TIFF
+                self.supported += 1
             if self.supported == 1:
                 self.supported = True
             else:
@@ -56,7 +83,19 @@ def check_if_tiff(self):
 
     def get_tags(self):
         """Extract tags if present."""
-        with Image.open(self.file_path, mode="r") as fp:
-            self.tags = {TAGS[key] : fp.tag[key] for key in fp.tag_v2}
-            for key, val in self.tags.items():
-                print(f"{key}, {val}")
+        print("Reporting the tags found in this TIFF file...")
+        # for an overview of tags
+        # https://www.loc.gov/preservation/digital/formats/content/tiff_tags.shtml
+        pass
+        # with Image.open(self.file_path, mode="r") as fp:
+        #     self.tags = {TAGS[key] : fp.tag[key] for key in fp.tag_v2}
+        #     for key, val in self.tags.items():
+        #         print(f"{key}, {val}")
+
+    def parse_and_normalize(self):
+        """Perform actual parsing filling cache self.tmp."""
+        if self.supported is True:
+            print(f"Parsing via TiffSubParser...")
+            self.get_tags()
+        else:
+            print(f"{self.file_path} is not a TIFF file this parser can process !")
diff --git a/pynxtools/dataconverter/readers/em/subparsers/nxs_imgs.py b/pynxtools/dataconverter/readers/em/subparsers/nxs_imgs.py
new file mode 100644
index 000000000..6b2c2f479
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/subparsers/nxs_imgs.py
@@ -0,0 +1,74 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Parser mapping content of specific image files on NeXus."""
+
+import numpy as np
+# from typing import Dict, Any, List
+
+from pynxtools.dataconverter.readers.em.subparsers.image_tiff import TiffSubParser
+
+
+class NxEmImagesSubParser:
+    """Map content from different type of image files on an instance of NXem."""
+
+    def __init__(self, entry_id: int = 1, input_file_name: str = ""):
+        """Overwrite constructor of the generic reader."""
+        if entry_id > 0:
+            self.entry_id = entry_id
+        else:
+            self.entry_id = 1
+        self.file_path = input_file_name
+        self.cache = {"is_filled": False}
+
+    def identify_image_type(self):
+        """Identify if image matches known mime type and has content for which subparser exists."""
+        # tech partner formats used for measurement
+        img = TiffSubParser(f"{self.file_path}")
+        if img.supported is True:
+            return "tiff"
+        return None
+
+    def parse(self, template: dict) -> dict:
+        image_parser_type = self.identify_image_type()
+        if image_parser_type is None:
+            print(f"{self.file_path} does not match any of the supported image formats")
+            return template
+        print(f"Parsing via {image_parser_type}...")
+        # see also comments for respective nxs_pyxem parser
+        # and its interaction with tech-partner-specific hfive_* subparsers
+
+        if image_parser_type == "tiff":
+            tiff = TiffSubParser(self.file_path)
+            tiff.parse_and_normalize()
+            self.process_into_template(tiff.tmp, template)
+        else:  # none or something unsupported
+            return template
+        return template
+
+    def process_into_template(self, inp: dict, template: dict) -> dict:
+        debugging = False
+        if debugging is True:
+            for key, val in inp.items():
+                if isinstance(val, dict):
+                    for ckey, cval in val.items():
+                        print(f"{ckey}, {cval}")
+                else:
+                    print(f"{key}, {val}")
+        # TODO:: implement actual mapping on template
+        # self.process_roi_overview(inp, template)
+        return template
diff --git a/pyxem.dev.ipynb b/pyxem.dev.ipynb
new file mode 100644
index 000000000..8813ff68f
--- /dev/null
+++ b/pyxem.dev.ipynb
@@ -0,0 +1,85 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "685b6ead-42e8-43f1-81c5-d354fee63935",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from jupyterlab_h5web import H5Web\n",
+    "datasource=\"../../../../paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/production_ebsd_pyxem\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "29a18629-3ab7-4353-955d-4a2c943f6dee",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "H5Web(f\"{datasource}/244_0014.dream3d\")  # dream3d, synthetic\n",
+    "H5Web(f\"{datasource}/SmallIN100_Final.dream3d\")  # dream3d, exp\n",
+    "H5Web(f\"{datasource}/173_0057.h5oina\")  # oxford\n",
+    "H5Web(f\"{datasource}/130_0003.h5\")  # bruker\n",
+    "H5Web(f\"{datasource}/088_0009.h5\")  # britton\n",
+    "H5Web(f\"{datasource}/116_0014.h5\")  # edax new, where X Position and Y Position are calibrated by step size\n",
+    "# H5Web(f\"{datasource}/229_2097.oh5\")  # edax old, where X Position and Y Position is not yet calibrated by step size\n",
+    "# H5Web(f\"{datasource}/207_2081.edaxh5\")  # apex"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "807b8d48-ee35-4742-be3e-d43063eeefc6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6ad9cef4-a9c1-4c62-b56c-4391e18309a5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nxy = [2, 3]\n",
+    "sxy = [0.3, 0.25]\n",
+    "\n",
+    "print(np.tile(np.linspace(0, nxy[0] - 1, num=nxy[0], endpoint=True) * sxy[0], nxy[1]))\n",
+    "print(np.repeat(np.linspace(0, nxy[1] - 1, num=nxy[1], endpoint=True) * sxy[1], nxy[0]))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "62a0e099-10c8-425d-bc85-a46b55b12cfa",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

From 020dea06adb64cd3124b84bee55c938437f0f104 Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@physik.hu-berlin.de>
Date: Fri, 8 Dec 2023 17:09:29 +0100
Subject: [PATCH 45/84] Added working skeleton code for the TFS-specific TIFF
 parser, which fishes successfully metadata and converts into standardized
 python dict, ods document added to collect IKZ feedback how to map specific
 TFS using assumptions onto specific concepts in NXem to consume in e.g. OASIS

---
 image_tiff_tfs_to_nexus.ods                   | Bin 0 -> 17720 bytes
 imgs.batch.sh                                 |   2 +-
 imgs.dev.ipynb                                |   7 +-
 .../readers/em/subparsers/image_tiff_tfs.py   | 130 ++++++++++++++++++
 .../em/subparsers/image_tiff_tfs_cfg.py       |  62 +++++++++
 .../readers/em/subparsers/nxs_imgs.py         |  10 +-
 .../readers/em/utils/image_utils.py           |  40 ++++++
 7 files changed, 243 insertions(+), 8 deletions(-)
 create mode 100644 image_tiff_tfs_to_nexus.ods
 create mode 100644 pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs.py
 create mode 100644 pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs_cfg.py
 create mode 100644 pynxtools/dataconverter/readers/em/utils/image_utils.py

diff --git a/image_tiff_tfs_to_nexus.ods b/image_tiff_tfs_to_nexus.ods
new file mode 100644
index 0000000000000000000000000000000000000000..f3fc491b4a8bac27c6a828b0b0de8a3732fc185e
GIT binary patch
literal 17720
zcmb8X1z23kwl<8r6C8pBcXxLU8r-3AcXxuj1rP2P+#P}ich?XsxXa&UCTHfHJLmiF
zEuMy=s+QK;?_RsRRx8MWL%ab2fdK(Ibq`eww&Dt>2LS<jeF29+tj(<fPVRO913Nn_
zb0Y&Mb6Xn*7aL=GTLVXPM|xX3fQ_-Ok+U_x#);n1&H-Rx>}Uo6I4S%a&wTBeK^%Ax
zkk=PBkf(~7v$dg(fw`3<gVXOWy`7C|n8JGrL^xbH;1WbBNl_)>-+d4eP!Jd}V2{15
z!U+TfZbM2`NX0GVIP=({oD)IRyX^W&kE>O~Mh;(!-GqO2imMYl#(;JTM#1QOe12XH
zTwyy~9vNig?K9lXc(%Y`j|bSZZPIs>!y;R|CyJhHwV3aSXXJYn+i!W!7ScHgZjPTU
zyuWMZ{WOXWdEOp|A{-=)ZOs1kn2fV*W;r-GdET~MGvN!F63T!vn$50l)_HT=7W1R?
zOSh>(gL|mSxrm8mUd0{CJzbU4yKVlS7&|#;#>n`IRbKDFWnoQk$ZI8HQkB{v9PjK?
zY(_Pt4oA;1z`7_0Is9ArJdt^;3Ozu<GUZit1Bl}@T;sayrq3nCilWiN)<cN2_kFe<
z^U=_TL@%cf0MOXRy2kC*gs*zcphmYgFGPSvD%3<I<+J%d+(WGq<RMnC>D?Y+&j_^L
zMn-3|p=Nw4_UL*B!$vQpFpFA(crz`a(^aG?86!8=Zn^1J;0)$KeOhw|>m*Pr5x<S%
zE*PE)$ZlvHA=FDg^jn}brWM;(q-B;&q7MiIt!FAMOv_{v8H&xzcUjfFqo|LA75cYC
z{&t=<Ce%<Byl<ZeHoHU?N_gl>N5?53k48n~__>)YQ7Bl5^4lNE&{+jxh7E{ddmyDy
z=q2M9{mgjLvQp5X`mDLUWTKp$03`rH>}YHzqBm5=g0f#djLFHHK{wCLF~Ei6MxjLU
zJW&!{Mf@QWdj-*g;Nyw3V@Yd4g)(WF?hr$>CM{DK7j`I31tU%Pc0wzW>JdWAC5(ia
z@lc-OLgwiUa*9EjG@xK9L;Oi$et8TQO0;dOhEb1Z@ni}NoP?c7mbgbgkCDONLk;2-
z3n4TW)S%_+fp{HO8Xh4OeHXxO#^MjkG?KPSCX}5YcU9Eo*EC_htL29LmpTyOB)Vtn
z>a0XroU<g0*&ELKg1p^+pB|+jD*CGpjXL0b-ymS4$TJePc~KNq*?~Ms6b@JTf!5@K
z#!)G^!61Zw$3}51xho51^wq=FvS%cscS`p<#G}w#J<HSoj1To{GQ?&8>0L118g@n1
zd3n~U_Bs5`N5#`(LCawn10Bd3pA6xi;}84r>QBP^yJ=Bfrc|UWdvG=05BvEHIz#Ve
z={BP+p2ZWN;Xr3k#OXHgO#xo5=BZx6X#$cyo6$D;Z#%oUFpmRwu;}w&B>`P^m)hb;
zTh9B5qHl{7KXzNINj&k1<?fklMp4I63sBw)SpEo-WxJVH^vQPJ-IgkY|1<L>v(rcq
zMW2E1QQ(elFnMhut!}*i?kAn?w1K;V00@(XJqY0e43bYREG;=WW0FkAS_a~P{1Seq
z66Riw4k`m(HQeIuU&4p11Xpy5lo1+m$q+(^mMz5<BdO;K@w~!?q56b;Wz#&_Sjwy}
zuhSZlaljaf-gl)hB<w)-{h(<lr`eJ_k~IN0WIZ%t-I$2^eA!A-j{qYH?)--fQxQYi
z+qt%-w9SDLq8p74o*QGSGI|Y2ZUAgxe6<pcl!f?79uz2mE3dZ5cnU|#L3A>q>We)d
zNVe@0D-0DR)y21|c7biX9&axoYyzH(D6IZsF*WEL<Qf198m}<;E5em}{LD<}1@asg
ztaT<&(GwQ^I8R0R=y+qoVMqNILPnwb{7a;=k*5007FY%XfHIbPDLj(Go!R2?P^>*R
zuEbC1Do?2;$riZv1v7kSucK?ou~CR%|1^ON0j58$Hv`CJ3js!NfIUlNnYm@ln4PEx
zyb2Kt9~0-(UNzkiV(1`1Vqo>vEnZN4q!8%jIIbM8krn9O<p(15r=O_|1nVg2Cqp+r
zw}5Uw+}IlkTFIDz_b}R{dqpf#JV>ZT?La{olhuM6d@1Dm`2e3Y9`%M^dEU=Vk+#GB
z`#yExzZu=ceO~qx#sB7)!8xnR%Rl3L%waZ_<5C(x#z{Isnc`-w7ih-QL!QM5Z?iT{
z$vQ!6K1rylb3s)iA)maf1x5WrQZ<o&oA&J^$|_#V3>(p2Z<Z&z54`8cY-|LW*dT6t
z$pAZ{cr+-{*4qHcNqw2PML+D-=aBjUaNAG8G5j{zR&3{ZC{VWCm&b)#jwPBT18_SW
zM8a8y++EPXn2P*HO7qDvV)!l}2?H^-RW5|js2mmwLQ?ooFogy0Q8N|F1O{0J|HJ^p
zhtm%h3kjo8L5f8<J72NkO}V>lfV(A*mLVZDu$uh}O&P4R*>Q_91FNZ;P@)S{P->{v
zEZ8XPvD&+zqaP4UlfZ!&6EK9u-^)pufNymn&&Fijr{DvlFi^M9B$oWtS_DMi0X#QO
zA|Zao?=<qGT+3{3GAPs8yK8)b$S%LzLjA7?e@4|W%fF>k9j7X>%16^E^>FcEw$V$}
zTP$Q#e_Nb?34(2+N+}po8sTC|fp|{a6fp`K*>U~WbGY<OqmW<e*uYr^eGTy)fix%j
z4-jnzojk#9wyAcOv=I!dZ>xUe?bOu2dx+ezG!~SN9KY_z7$jkY7qqydc)a`pOkUgv
zJU8maR>hN64;cKCz%a=m>={s)=BIgxy1~LQyLozzL36H)0hv{HgyvTy?#uZQ_~)Jh
z<A~!~1Gopbfn#`x>ae-M#gB<0d&7|Jpfv_Cj3mvVOw5gSxgheut4eGh1g-a@GVl_$
zUSwY2f`9?uAOwG7gOrUR>uy|c(5w6B&&3NY6^Mb6V@TbqGAIh1!!O2Hseb>80N}Y{
zk%Z(xIpQM6DREVlvxuQe-6jG5QmAhjeQR&_t>$d|7vQdHvNB%q7ii^az#pH;`hb&o
zZBBJp#4QEj1Mf1^y<usUB*0w*cU*%R3%%wc*?WKTs;Hc);O%ZEIH-s-IGE_4euV(v
zvTaP-cf0l?1R7wVZH*#%D_+Pi?s&+)q?t!~h6x-Mm*2}XN5(L3U|DRkU9<}^el>q`
zVn^SzT2*eq8ytA~Dk@oZ#&q%1JDxFiA3-FP4mB|pB!f;ym`|qfYmsYNPG<SfD~|ZN
z*_b(2^ve9+(^^n_*GwA#hVr1!(dQ<-8{nPNUVnJJaMaZT=khF?o!P=lCphP6EowZb
z78_`g^62Vp@yarZXF1{lzz6GYD{!l033$=>1z~{71$-zmHhmR3P}Rypw#L|U3O1xo
zCwm_Y$Odk|q0<_2XQnSN3;{IZ^Nwrds+z+LGm$v$M{T=tEBHUb3LB*t1M5M1iQlBJ
zw#(1$tKaFWSBRATM~mK7eZfcCb!$Oc*5fi;Z>88}qX$1w`y$XuA5%+*@9Eus)yK!-
zpax6c2bo8dkzo>De(Lvb`hBYmqm3(v3svy0exPS#GKesh06kFWm6SjWJy2kNaSqQ>
z9;-bZka5UC+G&SqaJJ5{&h2ZGXeZ9KnI?R7OuW>fiW(?KNuix>k6KK!8)SJIZXs4e
zwsOY{IsF#bsv7n#_+bMP^TyOOUau+kHQy-z=BLm%({_?x3*vd6VE@n)+kAzY_7g<x
z7WHJu-DmX<X#i^bZl3jVxas)j;!&<sT3(8VhJ8EQ1iH;#ok92p210}pyvO%aPb$N@
zHw+5xU5N5u>Bvy0E!sQ4M8w)_5)2|G_FlA7SU=#8bQ%T*=2wdTI`g|B06UHVCns|o
zQ%8C?YpY!i8Rcb3l#cIeO->v>AmpkBMUm)W8tgf=z2wbN1r2=tW}0j)U+jIJqDdo|
zh2ey!jFCOTELrs$AI`3NzZ1Uoo?G&ZeRKEqfWWx|(+-PoSnb`RjnATwtv7Zj&){x+
zi@xXnM}E86<j;vdVhxwh^v64H#=!y0lB7xO6#SA}&FoNJ)0Wqp%PzFE?&n`AeX%tx
z3&2{Z7i}6Pmbc%%DSL0riSM#Cefa_FF1~U&TzZZFO>n4uKrVb3b4|5Rqvw<qzg9(i
zJapj?t?e14AM=FSKOsqdmsI=-7Ypls@k`jh)Jb}Z7(5uW(fnMy;JHzmf(#4;VIJ&~
zMxLdg2uY-|T;coK2H}o=ib$J;v*TM_H0M516lm~L&i;roG0UxXbraZqnosMqOd*u+
zfc&<5WH`XOBV~(+yQ8l%a-h2Xn%P2{%n3$)o07=Gc8V2jY@dr_iKL3^{xQ53N}(MM
zl@l>o)IK7fIum65qI|V#Z|M71f%Dk$_hmwQIiEXYX?imJ6t6fuV(VXU#iFpGbBZnD
z`0~6ha>yMo6@0;O(bXmq`}SFS)FAgtusKRP5HD2WcxXtH(wfH3UBf;6rE{`JY6{GE
zxz6Q9eJc%c5r4p3W>qsr;94#ns<MG;2=!HViV?o?`_9X2e3iTX$!EkwP<9k@G@H@R
z`lB5ra|2Os_1yKi#qH4x3QUzhkL8Lu7`@ZTnH>!g^&-wABnfy=#aTQf>}>xdgn!NR
z7fC-f1H{LwVp$<yOxERXRzLil_i;I21z94MisuROZyt&Qp&l)|sMSO*W<|2c?K~m-
zrr!mj5;Mh_6*Bqx&~*k&$LMSvJwh`75Nu#(C+XWH{j?n^38^8$5vKurTE7y5G(b8k
zXyJ?1Fw{OQn-M&IoOIa3v)Pk}M-4w~3&X2-SDKU1CI7a>$z??ce&-TPg(x?Cwadq9
z)K_TyZkGdmC{Q8xYqzZi>I%`BNt$kKZD!aPQFgm*D}jzPDiL!M-^$&=r=9@~bj71i
zO_FD@Zi;v`A3O)OU{cdPbKwb6n1z$~0k@KG%(Li<mIV7|nFPcHP>9|QvaS`Z9$>N!
zYbqGHm}#KE1}4A{hb?GiPATYJg*=KkK@8|@phBhy-{fsT%f76)=}_eKP#M^mL9KZq
z-irWc6jE>BWex-}t)7d4-a%q1_vq*s@>-W;F)p93Dm=#;Yn`bc-gZ{bRwb7Y;5}?%
zoRNh?Uh!_ow`uztc!}InlQJ}#JoJN}h9`mdO8S5vWGdSyNe0oEEE>~|A7fSLYKQ1t
z9h`=FcDCy^*3-nr?S1E+4y;Z+TQ$3|b)#l(V<@RTpu%V8X7KrR^ybN!=NU^u1{$Vs
zo+I}K90cT&00iW}0|)R?9K^`h#_9FJ9f%vpnOcs^oXLahH_F9rAC!6OX(DbSidL1Q
z8IF$zeyALgvP`of*Fnf+6iOmWElT;`L%PMeam7Q%ha`*g?~s6y+9SO{_+Nl`Wk270
zpCDv~5iVqT`jBM7QGjnnw}&0SAD8doT|tOS=zM-86mXZ9zy6dV*LLRce#MUSdL{Om
z=d(m>w?>Ai&zH+pmT|bFjnVo&Qcn0m)3t!6ABv`nQ>K&kTZu%En@3)5Dx989k{uzf
zZ%2psqU+zfb~*q@F8dmFUHCpF6}%sN`@D1PaeC8*!|y(GcoB@T6s+mxYISpSk)VN1
z|4lqEB_(AV>u%<v=-njim||jU>8aZXo0`ka;u&LJ5+nmGc=4?gPU(?vQf9}R0;=p<
zWUgGkq|e_fYlO?3)Ufzr%}_1q(f4d9hTkSk&tEv1I`heMG`aKEK4v;&VtkVk%Q~XR
zaF6G6u?en4yqU*dogCe-YDKoxxx3J*>L1?IX1hM_|7J2!P&??E)mTc+L*%oNcOV_`
zIdW?~b&5eNqVv{8@=^BeY;MbF1gDDkWQ@3I(qSt!89UyT{BBmH4<FTvQxLa*v8<=t
zc)R4q<z~}|_no7q&Vo<->4Zz*gu{H+*`nNnSqb;~P|rngl2Mk6t=Es$cva{0U`JEO
z>28PVTxDNo(1@vnukV&3CqmqAtrAjRM(Zn<uk@pHg=sdI`lPbsKP>j)pL+{=rE3^+
z$-?xy3ucP<Z_<5ZmKfBOjUH{A&risnPEn^_(yx<t%VntDOkgALg~j3N9vA43nxu9|
zc`>=-6lji2V;^*&pa5S|&o9_(xOkU-ClDy00nczd!VR#qzd<RgIBzSkMfW|8Ys<%B
zrK;A6PrlWk4$RxLvishw`+OR6{b-LPiW7)@S59-zaCqooXWI;Fx52}MAe^m!(XHlj
z?NE}hTP96Acuh)meQG4MWy<V3oG;6jo;8o_IoYXO{rQ`*%}7fMmklQweS7oeBJdGn
ztF@T~o(F$=@8{6-`xpZ2PA)7J;<t8sQ<q~{NV-Qvew7bacP{ypYV=Q&@a<nEx^@#D
zl9EGxZK}S-5RqPpAFwn%uD{@|=ym8;<m(&N<5_#O#XDKzcwEk!#Dq+`tB&hjx_(Qp
zp{S%!p5e3L*iT>5__msG`PFL6PDl%CD})^uN`7te-R#59UJ2o4dk<lkAD59*XIP#+
zYhSiXNF*EZW(AZpt(_B=O4Gfli_=EcxSHFChr$cbKfE_DbC@cjhk?@DooS$^$iOY^
z`@vOF|J}bsUQT4QB(w8}`Bi(0@T{41gRVWMOa`haH+-tCsLt&EY~#E;PHavi%yIKB
zLTyOMbNs1_S8twn%~pVV^Ok%vy4|c-ucL0oUYcV2o)<kbojQ&ARt6G6n1HsrGIL~1
z6N&uX6fC?dfHqv>2Re^B=Uhxa|IniPd{A_ENO_9A&(>}4X^r-*+tDyofw6k!sf30A
z4yo821LJsh^`?ze#VpR=Wjlw=2sts*pkvWdyc9LO#3S#$dE!%ZWmn7r^b1k)I4uoM
zEq~f|0rA5dckAv*7l=RtYI`U(Lz~u-*cnbC!GSEE#a#&STZlQYrcb7i)$I4P59M%G
zE8kgDq7W+Vuod)bu(6L1<Wp2CAqkGW#+y%mBGH>2zddn-2ca5x5#$g;pR++2U?+H7
ztUKh4RG#9V)CJ7IO@8;5-AB>YT*cMAGmGRbFwI_2=03_{sMDq=_~1bgZ&tW?E>&2$
z7+!I3w(pKqqg~um=&nSY%%yeGMaSU!rKDB{pJGfyoxjZa(#W#+D-w0vy`GmPD%+}+
z@u*LE2-?xNq;;h765pCtSnJ)q4&Gy8H=hnb6w+mKjtF$sk7b(l{;817(VOJyMMl&(
z8NO^hkwoZ)x(3Df`Y%F`q6Vnq=08|me1x~=XBM<ta^(50cKIC@JaF#4xD_2#lJrgF
z)MuzX?<kFaOoi~2npFn;_@Nt*rR~9g`t^3KCwKd@ImhU(%{R-HD#>xSFrKCgp`xo{
zWroAv<RFXnXUt-YE!G{?SkvPt0loY}QU+U-0dOt2YCLQ)d4h930TOx@j}K7g*VL{i
zSTr5O9>^cOlUke4@xn#3C@%U(!wXcW3gz+Bc|l7xR#gON@4$wPYJGw@xgU+EiE6{x
ziIp`*YKl${bgqqk<>gNjAZt`tD5iyUbe7Ym3XN6GEaxcr1Ym_nVzSqpcje8u4`V6j
z)^~94V&%Y>R`KH!Lj#f)z2+)-NGPmv)#sg*W;l3pf!&SQt|=F=3&%PG>?(8PQvBe2
zuY{~1l^4~$O1eFn{-~b!DOkQ-it$|#684tA=cOxiXv~w{_!skR&i5l8l;LmthTzwA
zwNiN0&|bihVtR(9nut2EYC>|-S(olI(0aH4L5|cg-&Ih*m9%@jpW~s<7{Af=OQ`v3
zyxDT8O`~bXC9`z#Ttg&#t|G6j`biF3<MENo!wdS73O7OQcA8S^*<6Je$IhdRlD%(v
zvvfQ4Sazvnzjy3uWAkZjY=gPuq|0HDW!(1goN#D8S!kKv*Z#+b@2OhH$g$np;ZICF
z8-!eDt#N+p;>MPY2zgb$@yBa2qGy4!l@L-lG=ar#Mjzz3p}R1&b~fBv*P;iRC^H!u
zwV7e@N(_qH)2*qRZ}M$LwHG%WFIUSyf*;2(1fD#91p9`)Jj<L6?p6^pFubi4m=dsG
zBsEIhd2zXa|NK4srEM&cmFAf%QaU4>kuds`y?bLcZi8!{m9+KafaB0-d)m{c;n3XJ
zFoSp^Sdni0@SY{+Zv3zwNi<CllN-5S<ZFD5Ubv8t7=pdh&WJ4AaJV)LYf!A^egjNC
zHN&C$;)V-F(27kMV>}<)^wP=UOmp1}!viB)qYs*DEUjm1hI5QCCTcWw29K%&qVXD1
zlm~fMVjsu|jQE$kX`Y6*Rd>Lj7%HjRkUm)WQP+Ev8bg5%TP>XI%?_C`#dil^)NF6%
z_vX}a2%UiL<KxbPMjc$Sr7biIm5zDlxc5`11bi+F_&jtOBlR-zA`?JFN%ABxrd%1^
za9@oW!}Bwbt1wACtJQL1W8yQY*@Ql>?4dUP>=sS_mA#XfvL;^egv!c;t;32hdee(Z
z0XyaOR%oi4({ZsuI;w}=`~&MJGqqT?8XkK#DmeVy{NRz&{4h4-RBR0;lZ%0QtnUfi
z8U|}q7wFR}Vv3V<P^7S#NyqIsZ#vZZ9ki&!d0Q}uSHqp1(ls^E=mYM?ILdOpEVAH=
z((-XLXMeO79Lw?>a#W$k$dXAGDiNfTizllrwVg?h(Io7*TxVh)d9$v9X;ksnNd(!&
zs7KwDYs#Cvn<!?t^0<&Y7`tM~{ICbu5jO^$GvcELa2m3OCrF+$>l`JknQRwwRE@^4
z^J!>ZfL_aIa5PwQ5r*Os9VUc~g`#AjmQALLhSIRep!M4cR!tSAPo&QF=x2w_grbz9
zmer)y)kH?oa^yzgt+(-a3LyOax$dl*eUro1f8u%0h>IG)YRD#?;KJPNMhwU~T&&)N
z33OM)_9C3<<e~W{KXM1Ak<NQ09<&i<HZw_HTTMFlam+?H71=eLyKLxienQM!?);P#
z6-?TYF1L@C*8Gb>ZTfuvp|ff{mR0{48V@tW844LJiL?@3Zlb0}JUP&BBNtnTVy0K_
z0u8Z&l4!_m@kAD+vw7k(Jr2mV8bIouq8lvmySmoZk%+U-IKYrd7-=Y)R6QjsTlI=2
zyM+_O@W4aL$XO$evBr595o~~v5*_xNRP_@Bb}LS_rZ?{)nQD7No)5}Zw#zohs}kH$
z;vk4gN-#+@mo<C_jhaAmZkkV}Tn8o8&ggyC7jzT9O7HrMp+Q9CYgSWQn!6tmMB_~?
zX?$S)f=KtY=nT0o-YQQl^C^T(3PmQ~q9#A1B%O7o>D`1Vt&Fs4-=(PxiH#d^KaZgS
zLtd;KXB>n*iPX;pX*d_PC5u3tPLg$>w{PAFQHqy-H2j1v(WrN!OTcD9uXK?MCv4IP
zdTklA7He&B;j?MbiRu115<&>41UE6P@9PoGeyu@5jjE#YoV?SA^c=oSt(@;Kh-x{^
z$k}nk{O{DZS!(drR56{y4fx|VLwcyJMVzISF`TW%+3OE+pLxD~bmxplu*Yx49UPLm
z`UzAy|IY19N+=dmDq&aq6SLe=alm5ziRra+=qlU=5h0E+zXRJ2IxO(xLZNQTJtHd4
zyyeO=jMnzuB?zPy$Vyz;;fOB_Yq_7+%P=Zj85>r{bsxJ`jN8v+iEv1S9N5s_xPmjg
zV0yxE((kiH6fE92T1uZxG*ZO7`^6%F5;x<bgsU_&fBCG6u#KvUB(^@cU(9c7l_Dzh
zW^EDk9F8M3<n}AdSzH-vwqyjC2t0ScgYbH}p^J9h?663faVXl*tSWkiN1>ppb5)&2
zn&PzuVvJhMt#S<DtreNbf^?;S#W{M6n$$zZpqgb<<D178NCf(eSaKW^xi5=M2+tny
z^Tr8NJuY-`$RmnG$$2V9T=VwmN(&Wa=`HheBR<x1Q>C=qh%%=X+tj?F;{GU6N{Ewv
zB--g3i4@`B+|`8QWz^Q@)0n{_J(MWHilvI54xLssS85AlCuN*$$-+CRWjFol;UIJp
zEhfn3^lcOusLiV&n|&>NmOdEM9JXRq#{BTpn=TnpMMF$ZcKtqmqxt!vzU4u#&mAz`
zD}0n$`Q~j_t{|^i$$;{?p={JDsVami>nR&02CL@<Es0R_9Mo`8ydB@0An!D=`O=A+
z@0~Qf*y8?-kxGcY+9~8+MO2=%dvDheZe4cWFAz1dc{jxTyCRej+ej_i?o`pt^2j^I
zz-c5~$j$GU7=NhDtJAc~XXBWC7|@#pjP_c-37w~$b(1S!tODS0Se~bURcC#pI9@)7
zA&*!v%{;KyBeLl)3I`FEXfYCJIAqfJQ&Rc5v$m_R^ixau?p#ulxlR&w>)h^?sb1mW
zZ22&Fy$Xy)iSuXGfp58P_S&c9boMhPU#}@3*e#aJ_>8qa+yov6rMzvgH)3bI@jj(F
zcS1>mjwwNTt9-ao^QBp=B9D9LYvYyGhh{YaGj~s~?WhzX_?BH_6Kc`X8ScJvGb_dV
z&~N)}%S|SoI`u^AZ^H|P-^f*nOgbdnC|GXW>=zld_vgM#f{rXf!B#%>V{)5S<g%?w
z>k~;MTrag++(CW>NKhk_D-+8W&UG7?#ECVNV6St80j#Y}C$IYZ!An(OR7#u$XNQO7
z4lJe~b#-y3UaGxF_4uG)B7~@sEtH67^5=x)G(=>A2nS&$ZMENpNJJrRPcncu*MV8B
zo>!A$7|K`>G$bylCt6(;atIjv2xOxRxK&_z_T9HpT<Brt@HJ#$dW7(vN(OJlX+po<
zm7_y6ug0J39M)QY65J-{#})Oe<L-lAc>Ww)GjF}McI<l!HFAVqA-Gl}vrWv7D{2Lt
zCO4vJ+o&|hy6n!xWQ}OEO&JieWDT7&b2gJ3_%@byd~?E$F!m*m)&+f}*Mc>4*34O5
z&dN}#L=G?dwwqW;d33Zq`h!xDSj6hUxjrFc55Fee5qc_2_%(>;oY}as7z-OQ%RGI;
zg8Ak%xJDE2sYKvLj4JeVA9+BJ^_1NVS8kzs;$G;Ny7wd3EMKhLtK*>z$SROj!{s0V
z%lVt$@|bG*$e3TG-6s;Ea;Peh(xj~*087O`Y?<9P;$*2>BSpEAM&C2M3;TKa?n}OV
zaT1gwO$AcDv{ew<YW@%J>4@TKmk#^9vsfBL5;aXa2S~4-!cVm?0er<(D{WDcaX_Fz
z#+oLkFPvXmL%#hc$7K2f@De0!G56-v!_#Y-bZ<&UvS(>W;t1q-RvzU7LA}Wgnd5iK
zyAWg{2r%&=FoW_wF8nkNe~kj+*^X~i^>f}mG!+RINWm0AOFLX=UW@9k@ixg>waAf2
z-;r8NW3}pHgPEWQ;6b3RT3w&7!>Xo#Z6h{`B3nDr8in3k`s4BZ3f}d8SiJtqc}!&5
zDYWp1vl*`b&ufe)xz>DSh@XK+0(X-c)W#(jD4YZ&b{+&^DZmhMy(qlVF=Ea1dB>8%
zN&GA^;`A1&NCf|VRqrMcRIkFf1AMN8`Stxe-Gsh(8*&HrUvjI{vZ29Y9=%KjZ<cv*
z8hWp3D9;xb<XYp*@v#^;pMPc|5H|84U~TB$%k?<ZnQr^!>+NA7HH7F}ag=g%^L3q?
z#Z537)Sp&lf1z=BU(URY;2zr@Oaqkn%~wClSj8wXMS*BTaK{b;Hk-?{NK9NC-Gx1o
zg=2W+FHA(nMv?@Udbkgd-|0s}S7UE0cQ<?ivm2YlP<<ki7Q8{J?88f`H<vI&kcgtI
ztP=|o<Bl-(Bx5o@MSCBgnOU~nZXb6&Vz|$&r5_H0-x4l4?g$lji^w5_reV((OOdW6
z#X%|>VZV^u?~FdOv#r6pR|$W$hPzLA%H&)>lHwFmWkqISBGNTf*(Qr~m{-LljWT>r
zKR&hoR7h)Pi5{gqz0zpip@nrrS5w4dENXa4ga$55D$XYQ1=HS&ob=}a!Nj4b{H6Ed
z9S6HTx&ITiOGr>i88(U<Qz`IGz5xv$csL0Mp_kbrzStYJskAot&(WYl1bMhKVXPVT
zwM4e0SeJT%OW^K$yy%49<0^q6FW4wAOr>Ypk{w}Tkcu3(w`UMWH*)#3v20f*EcO74
zbCh@JB;qFZ1@dKPI>w|Z{gz8^<#gUG?OV~(R|7@;gcC-N<j70R#B~5w5v@2<HF3=R
za0A|Wm84N#uYwAVTSSq{Lrlt)A(3X;P={2VJnebw298^|k)3WW@}S1i@DcA^cCzvM
zIdjk1zC=TxpA1mvKlq*JH8B95N3%NyD&&2|qv1534PEmNYd%qFF)<wtr6-|+BqM3i
zDyb;N%rx{z8Z2|US?TPpA(gh}+YQBKL%jvEp)#o$DJNEamHinaDOWjmCD&K0P;PAQ
zO^LAfRJB_k*<(BPiL)|~Fl1|QJ=xqh5@EZ6!{K>!r}Y`G`_uvu_e-Tc%e__P$ob5~
zk^oih{JP-I<O?L7%@7cMtq*Q8eS&Y?_AzO6Fgax^D((DmwQm+SP{<y-U9=Qwk#`IW
zi2GGn1{u&yEKC)vVXn8WXA@r7M|8cD3*^SuG#c<Y*KS>{CeJ}wRB_R)``aH$>?;xI
zN-=xEVQA8B4p_>p8%uu}BVUr;<#q*Lshdj{Id|!HYZpiLpFt@pr~kmwcS6V)iDhh6
z?Q6%klx3+V8wGy>F6(hV)$7)VQQmgjb|YQ6Y_YutK<!X5<#vgD2hO@4_<sv8SwdpF
zmFjh=Y7m3w)sIN}=%ipj^ubu~$6;La&XcXJ=Cjuhs1%latG(iekZL(A>1udH2{Y~!
z5A7=Wg5@J3KZ(2Gx!7}fKIQ2Ir7CW`ZGXQKPUbUsvh54iP#`A5c_XQqKQwF2RID3K
zWyo;xB@6A^b6`9)=`6HyNx1f?Bt&YQy1kr=cmi^@N_yS%ej}Oy>4$f|DoSsfyW(L2
z$ZEFkOUiu+Irdv#zU=rHI|~imYOP0*U@4&q<&&z4NSDx}i;Q?!uYtGgpFuo~UHBHk
zs=1(Oo?xf1M`(viL7*n6+gtXjXF+<(ocCWqA2i9O)K4`<6*#NTV{K3PkWaGJu#NR+
zh{ke-VKj<GvZdG5=bPdSup-~B(`6plqC{(yceZCYf|Q8TOOge|N|!e+8_*8)eT$b&
zZ|s!NTigx1o<XAfKIJH~4ma^osEr^Cf%8c{!XM)Zcpze&a8P9PMz#N^SA)wC8R$7h
zx@a7HXc!R+x+Q2$e1Cdk<W_>uOmO9{UisZhPcjo0Ho6ca`5c&$h7ku<lezpQBGcAH
zX%r%uSX@L{7!f+UB~#5gy?^uNlbYrFOjdn^@@GnWe<oN6BPBXUC~GYBuZzB8B(~UY
z*lAC)U<az~Lj;~ZxqbcZq_Lh-^-XF6bMZFzkAC^u8WSwFn2S}qo%Gk`Vqx$>QbaHb
z7XH<d6<U{`AQxC<LPM+_M{1^5p1aHPKO_!@Ahde4Wo5xqKdD57?T*nQPgb|tQxXdB
zAxu8^UhHwqabCuD=1@s%cgcYHr>MZYRmjnWVf4d$vx2HMWYa!I(_=lx3*M5=2I!UK
zd^X0r23gV$9JL9%l$aNbSZm!CHb)Dc%Gac%V{=L{L;kF{9Kp`@l*!g!w_-|jD_hR)
za`%ReK7iaGLatP7VwgN7DDnV<Rf;Az*Z9MjkE_N)rJRh|vha#K`lSK`mQsRXKKZP$
z1z7~KM&2C+jbCzDmb2=}jZrpA<5OW;M1vHOb;3b~;Ih7(djMBQ$V8eo71q0G-y0>{
z`m^L_&P)Uc?&dWx_Jxx%VS&1jZ84S;)4t~crwKo!q);|~dXhRC@Z$8jInYI>a5Uxv
zSZ!GeMV|MYm$Pe(RF4CJ?;I<u<KcZ9(ms6;wVA06zXtSI3Fa6cbj8v~j{SR!sFV?|
z>n-Tih%ZA#fP~%X;|YxQw1%aukwuE^4#H%953E>t{UM^R_%hJ)g}l7_5;4;lg*LyY
zYXt>a#d$~Kxer@!+r^iY&KWST4<J$_OoxcZ<IB#<MT=(!;@#5gTWtwg)E>}KI-@q3
zupHurc1YDk-iLiu5|P;wJ{7!ps@C%4#^dd*j5JC<ICij+KRShOP-#AmN7y23#?_U6
zlZvYuPaOcgR?TGPA+ZLDv+Nk~lxd;3*Tc#jYHplor1N~aSBGgwbOCu*Q5~!+fVCAX
z)JLo)iXWydfnzyBPq=xTi-qsjK2q=#9m|uctw#*+O)NVlhPeoJSwO$v>8aP$3Asn_
zXbQ1dw?f}2eprqhHMV;8N2|>WcQN6f!g{EMD!JeYGD;#Dg}&%^b~V?c3X>zF9b_vi
zJIH+uM!_l-_=zff?r=ItZ9a<?=+&*9W6L?=7r~4kj6Jn3#!e+`COPpG)oXD3yUTVZ
z%Mz9j`2<DH6jS&~w%`a}N+NfK(zuX%gp2NauX|nKGkoVdfyw%@knqns`%MtYK2Yg-
ze!)v=$NA4-noAyXn{aD1`lq@jm~FnX;=50C%we9h)qd@)a=af9*jNl@(XRKC7P9oU
zmI1(}TQvG8dL@{BMF&9*sv~4SJFUoeRc2~y!xMtu4_~ikwIm19Y0y`G_R|(BX?P5@
zU}0yXm>;OH={R)pU6tZ1wCyZACYXm}9fIU|gJMHDyCJ?u=6i+Yb;Du7r^He^Qq)2W
zC}bjo%}hMLe+r@3PJ)juCL&TS1C3va=pfP|MZWa*!@LI}ML1K_F4j&Ig`^78wC;!W
z+#Y1#^8}4r_Wp{R@ieuQdAxM7`*;wXwbv~k6HunTA`aOcr5UxXK#UnRR`O;NeUL@u
zz;F~(+Py<=!n|r2QdR}!Q?ZT~R$Q}yM8D?;dfuD>H{ptp6wLH=sOt`Ey6rpHfS<b`
z@p2Ab`_lAKJ#g76#;zy6Tb61gh(O|eTw?pE*dk*+HKkXh7Fwz;C<5vKQ4`TD5uYcV
zZf)ycahsp(nt*v-ww;jHRT#ZeXG^|jo~l?oR0I+=L=(|Ikt~hHD?Pb}H|VnC10?aq
zFrd>TmBHuNgUDyB$5Hx-3P*bS+yHISiUG0V^D(UsxqUPKgE^}nj%l|#wO$@P@BWJ6
zV09Ign&Md$8I3xcyhbX#UX~|2^Ip3jQ|SzC1w40xD;onY!dpf$DXjEPx&Hg2=-lkO
z)a!+FYyF`2c2e5`Xt*$BbruS_NAsvGZt2?+h@KDz)phM2I*KQ~qZHkvj^q^xKw7sP
z|5M>IC<c%yOsn-$Q(gfBXEMYw&arEXy!DW~=ck?J@fiw^K5^fl4#en)qU4(Mu|xOQ
zb?jmNNIy-l(XTTsqasqxh*%~|nG)*dB}!^ydnI79II|48e8O*r;VN8{edVDY>AV7@
zw@DaqC3Wj8(xm{#n$<Xo%vpMAKl!Fw(AfJ8PRWQ;R0W|L^`OzwkOG3SkUDe__vW1J
zJTI&n4IGJ~B8=x7yP*%`I0Jb+k38ww>U|MtU$z#%5V)WnefNXsFekE^D%~vnGN2}I
z>xsh1JN=k|a;tJC&)=><;!^r7zpjehb<-WwO&A#Kk_%m@|E5(_07+hPo!i4s6?tYQ
zeS)PRy8!QT)BWPML^`|fES`LNa4btgXtg`f8Zs~f@~roYYWuy%OA`RvW2U*2U*|qQ
zYSovGp0?0mUAF9FKl8Zan(?Wk$b~d`dfxu$GxTfJeb&GyNyHQgDeUkb1x7;AO<C6Q
znWKGqY}h?XG$_TnjjhV*aWG4LLb{ja>adN47%(iZX+ej=5ARHa)rN<+q8JDevM=*s
zwdr15EPctlJwsaNU9?h13hcX?5rY7lZ0gCAH2NIHmfA0uaX5DFlTmURQt%3*-+PR?
zLx(OcXd=HbeQF!C%YYhVXoujCIi;cbSRq8uSQM!Lz6xqj-&;()lf0*pZ@R2{X7JM2
zwgP9T$AUX__R>O~Za#2Sy)7puY&${ME4^=C8vJYjs8ej}kJ)QLU|oIloyf)TS^4|c
zh5$msqaqYfgdE2>HUd)1mMNxhX=@n`nsMV&U6@+%+CkFLut~a63I%To3JQ}m`Gik<
z&iX`gZG3Fn8PgU#T)kam)*$Rbi3uQtTjpxWDLn(1EI!(3*sWzY)Rwe<6Qg&E)!v`|
zDmK@c*EV^Ih@InJsJ2Ot;d6!W0<gkx5CSI%g7RQ91A(Pw@@R!izmcu#VeYSgq=H?3
zs1E;zaX8<t-l<DBmp!dMbb_eacPY;xp<f$mXU=szG`SIegQwrNUX1y3e5l;`28zq@
zz9?n8(DjnyC!{wIMj*|Bt)HTAp5_dp$DKI2t*-#bagwg@?r!Ov*JfRf?(7EKEuMZC
zkp6UB%F_B2>Nv+OUu_L)*PGm*ASjtf+!xs~_z{tD$5sXY4*Xm*z5PQl<r-Jkgzo{<
zQB!(1#yv;ZMCWAvgs+EVv~z~J9TNz+F^D)DRBVe&3X_RIhn0LlMGRfDn;+;oNZt}y
zTvD40w%WDP@HOdzu5VTZH#U(o<AGvGh4<b2n{6%S<;XEPa}jH8r*4oR0g5s7k#hsW
z7QLr^XIC?}ool(u4uS9>*hC<VqtVxMi-_EFZ(D~j!a2GX+B6v6jIXrHR1wg-;>(S6
z4A&g50jFX(y4`?NZwZI2Lf-OfSEg($Uhc?Mer<yl-y8FYyOY#g_P7WJYd??9RkL;I
z-;rY^x1Ea^8thq`s%3b?iL}esu~^JvPKILZ0J$Z{h+^wPhtk=?Be7idJ-jbnpLDGp
zGfeN!MXSh1CTH91{k2kXh3og`fHQLJNRBed?Rl`}sg!l4Vm0D4INKCQPs5Of)|oWz
zNjLP!f%MtjGfSO<2H`Z3QxS&xMqEY*$Za`BY+F_MfhtJID9oG{kvH1U9(CH+hPj`y
zzf_8pdKIKCN!20?)sFy_9Vq0+v2@OT#3h@~MH`rN>RX86siRIGcZM9ml*;v02=;wE
zEIe+5cr|1wA|r83Mzs5J`deFuxR`l+N&ziiGlelzA)cG)mS~KD^I<V-JA?Q0eRvGt
z`wBi>0S|3FT)cq4o;ZP;V;<!q0Rdqo1N-yD$?s|sRhUJwbl_Q(*VnIV5`dGz>v0oa
z0pKqp7l4B!u%3jEgo&P!ga}|`WNQpO$HqtUK}DR7lSF_Yj@Q=2#M}tL{kz134p@s~
zKm;VUapeAEijTzE!G`-+F$%Yhfi=L9+sTOg?^P+>e_zf0OVA%9ZdT?tmV6{;PEK~*
z3=FQWuJo=f^tKMB3`|^HTnxWvN&XaNZ1k^ncFqn~zg8I=F#xOpuXQhu3{3P)48PaD
zYWTlZ>UHhkRkF3U{g<Fu8~!lympEodMpod5vajCwEy>iu*x2e{>;Gl|a4idiDTA?r
zlL4KJIl%QD3DKW^{F}p>N%((z_f-vk-e1a?+n76<8(7g90V{c&Y#sRJ3>++-9f@Rm
z+sq6NjLdi${&CeGw7+P8+err$4B%&GWM-jbVy0u{RAFN0W@P4OWBZf(-`DXP8*%>w
z0TUZH3o|dn@5$Fy|5c~I&HbSXz!+$SjVYb6v%{}~C4L1JCT2Mn4&^^J_#gBCPG;n8
zWCd{K2a5k??eEop4*#*%6kr1YG6%Xy9=K8Rzcz+}l^z%gtfU`o%-x9o8h+hBJv#?m
z3xJUm11l2~t2Gm;u(P?9F&!5t6VsnU{^Ius|9?fiqZ7~sM<;V5qF>5585mjt=!|Tg
zZJhW>SV;aDGy+%wx9pEG4i=6-M{NyXCH@{~Bw^s^{WF5NUs?V2-@j!28vgG`#$J!4
zIy$)nee-$=uWDG^F0-RNcjyrpcU6NXQ7y{hdDaBd_g1HnW8YtACR3)CiNcWrD4wo(
zl=4Y5;wxMTTIfLI_itCuHhZ_v%C&XYv6>qxgq6O{`LM!!$5el!NCa5EtS)B<%Di}7
zjA6(t<0gFQ*)AoLg;e6(5%t&@-6?&m{g9+j6HS&x^AVmzjanCYknww+MV^{T>5^f9
zoHNHpLnFzJMQQDTi`SXU{V3GR^?L|@S?4+RiMr#g^OA2j%*aY_``RdLawTX_9EbVK
zpE8wp;c|L68F^?;w2dh>=~KQ&vIlKLsi~516qAaRQg|?j<!S`sHxR+e?-tcqOP5nO
zh{`5I(X1@bDxtuls8@a)G525M&wN}mO{lfZj{1uKv-(qpDb$Kwk{C1J2TZBu#x4tv
zFP$m-m{4Oe%W5(1WZ@-_=2jA=>Jtv<%$82e*^X-pvq)D498TX2ZV_h3=`J@(ABl=d
zav@{ndyFy$qqf?x(GcxW!9)eGxpxUP_r+qwI|Ks~wTnMWa<?z|b6@NgpQyye5Psl0
z_?#*kFGM+^LB!c$XBNOm7DAD!>7w~U#!R$c%hvSa=KJ#i#$BR_zr4L{Ku}-viuP<s
zEtBU}t?u<wCzAx<Anh3+Coj^6iN_qi+nEQ<6z%6zvoqbbjtc8B>?#Pdp4NBJ93bRU
zIQF4xZ!*Ttu5Gxsb#8bxt!T`l+SFEavcG2EdhfP(%OY_;OD3rVZ1i{{Pd-qNlo2cT
zv({8MB9oOk*5OSV-uI$l4X!-yeSu)Ax3YGxw-XY_Mt$SZz5$C`4o+1Po`au%`POUl
z$Cx{nSvT}`|KcD;GBow>M4KmV@lXzn^16M0lsNeG@{>d8{TXW}bcy7d*sU8!=hI1h
zGnf1bSSL0Qt4mvXyG_XoHCp@@bz@X=gjHCY2!0>wxHmLIO;>8yE<Gh=hG!0?lj#-8
zXJR;pz#!@1RGXQeMMywQ_>aospQdgu=ajoNoXr}+EY+wc2uxL<&;$=w@m>~~{LVu)
zIIs|U#O(cIcsLAuoBh*UeH6U;zVAG%EI@t98}v`l(fc`Ch)~?#T>5~uJmA#OMXtP0
z=+FD0qJXJTC<)~QJNEgDpgkKpVxQCf4aIHA-gfr-EsN*;GFZxUjHQ6cE?Znts+$}X
zJ0U{A3H3me@I7{J!%Gp|cUG4(geF6Um(PJ@NblU=C!l`yCvQ>gN!ofB`sq9Vk`hbF
z-7@fqHO!dH1h|!ZmRBj~#kkTST*N@$i*WTb{xzV^HUo+}NENnjT%BmMavB4y5(^7o
zlQ5uOH+Bx;jkG<z)~-wSxnF6F(7X>q^u$0EW(GOxzQiXU4_#_sTCV3c9}Tg*@%S0*
z!8cNnRRSL&6yjN*01Fgh(QuvKGk5TG0#Sh{Q3?mc$=lGnCEwC)Mo7+~1-+G8F#I7Q
zTFTTPnqg+H?d4snTef?Qt*O3OdrROI(yV^75#~&1V9=eo7pJWLs!vN%^7aRAaKCGX
zR%PPJT(3q`o0saktXW1<!3Eo=9yV^tS{c}@Yr~$m-(E~Tiw7IDN$>eUu(8Iol=o84
zkNIH7Qv4G+OV-&y$-PjyB=CK|LTE3Bl`YC|Dfn`b)T7jabbC-uYn}SOD|&AKk~wbA
zs@Z)c=np{8EwP*{ewTURzf@%S`E>S@Lv0UH&)j!}fSAq0(c9ZQI67)h>1~)@B8UeA
z9jsZ7%*I43gr#-b;>(0(4RV^x>ZLUahu+9s-iENVsDKpcl@l+;vE>8%cF>oLtS^;#
zUoYzmVZCz~)?C@%D<(A(UtQer5p%OR%kJS5jAiNX&Y9^()iIp#m`&g+R1dmC4!Cpb
zK}_<(Fg(ezN^Fu%VU7rcbI9{aU;{t#P@C+xV6v4&E|5=IR~w;1uJ_+d7&mFQ0=HZi
zNc1$jt;^edA+~x3dri^`J0b4NAwfU_ffcy_9phi?39Jol%uN7}PV^4OCSwU>Hi3*N
zK|gte0pAt`86Y7YD2JOZvb)$mszWlIr=hKNL?@^!=9eC1&umwUv#dwgr5_vMe~tK3
z+^xcDL`60%X;%M(hP#!%xnP-lX`z9JFpc2MBCsHvy2aIonoBP^n2rO%YP>d=(k<<R
zSH6v;dzRB)5@Db&8V2@ll(6~Lju$T1w=Gx`HASbx&#~?|M0^pe+$EpCz!@|swTCni
z#rX?H!O9K?IR)akyqD=`Nxo3z>drad8q{x}4$S4%Kc$HtUswx8DBRWEXs<re4qW$2
zGV?ew@^GH+KU;s7sGq@nB7e}z<NosfnST5QXRE5tMbt|0Tb}8C?Dx8J!VeNvukNe6
zF`I$}1-cKI$^PNK-#ISOWB+Qnz@CV$jfuJGpQ(o<GlQM2owFS<QFLbbztO$k`JEjN
z4IKU-bg$La|0&PO*4FC(PW7tb|DDRj$`+XH{nqfG3REcmK|=#0V8-%C`ov)5?BMX{
z8FX-<W1(N45<p(-y<gk^&0(*G+ZosZto|Pk!v}KxPlo|>GgAkE<NuGV3IBo8+}gkt
z;K(3s?qqFX=lG}CfARtbkXTMkMTk!7y*R_at}kE~RgoxX)5C}oe3L<P-Vna}rIZ9L
zLYfbh&+kw+fXvg%eE`Nr4;n*KD;Cc1d(B$gPxHrG_2ag(hS;xAR{arPm>8d7Gy8Ke
z2oG=WKa?le2G5&$64^vYlwJ~N4y*12$xRrS2p#+5vde|~=`b#WJ6tHQ<4YD<py|^m
z*B*=>Rs^<O7CmVk=hU|Bc?YTPV`s9Z$uuq*QXr%5B+^omPY_sx+~S_Mt(ty6D0ZKQ
zBb$*^ET0E24($cBUXee@&&<VbZ7W3|EK?%zr~*Ev5JQaa?n_38OW>ya@giMJcDz*=
zSV?sIxsin<YpZ;4Uvya0d1I`&`nAdG7PGoh<UKU|SQ0xFej+m|z(M+E?D^*Kb7Wq`
zsA?M3sc)+It?<(wT(aJ4SRfsaIFSLbKQV~cumA;n1M=U$vjFJqzqQ}rVDO(*|M{&1
zf03vFyZ`Mw3Vw6>y~}^Ews-~4zgix!#NzLzS^r7-_owF9YK*_y3i7|zX8b1n4LJY#
zQ2iI-duAYd19SV={QNi7Z$<x(|G$dM{zZn&{`b88Kk5DseE)9X1<$|VD}Gb{*7IKi
zfSp%h{HrnZ{liuGH<<kQ?fr}HKlxbv1E0Uc=6~n&3Y>qn+<)Nn7mWUIxh(txm%m{3
zf6E1l|9|@CFPQy<%kMpXwcs~az5?xEP4pkQ{1@2$-)~#`KQR3}=>B&TUt#yJHvJEL
z{vCw>JEvC|{;MGh{7+~74^aMhpZy(3USavKCJkivk07le0}1uI3>Ele2gIK*g0KJm
Ee^%LY!vFvP

literal 0
HcmV?d00001

diff --git a/imgs.batch.sh b/imgs.batch.sh
index 90a0cf7cf..a764c63d4 100755
--- a/imgs.batch.sh
+++ b/imgs.batch.sh
@@ -2,7 +2,7 @@
 
 datasource="../../../../paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/production_imgs/"
 
-examples="ALN_baoh_021.tif FeMoOx_AntiA_04_1k5x_CN.tif"
+examples="ALN_baoh_021.tif"  # FeMoOx_AntiA_04_1k5x_CN.tif"
 
 for example in $examples; do
 	echo $example
diff --git a/imgs.dev.ipynb b/imgs.dev.ipynb
index deee2d480..82317d0a6 100755
--- a/imgs.dev.ipynb
+++ b/imgs.dev.ipynb
@@ -8,7 +8,7 @@
    "outputs": [],
    "source": [
     "fnm = \"/home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/production_imgs/ALN_baoh_021.tif\"\n",
-    "fnm = \"/home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/production_imgs/FeMoOx_AntiA_04_1k5x_CN.tif\""
+    "# fnm = \"/home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/production_imgs/FeMoOx_AntiA_04_1k5x_CN.tif\""
    ]
   },
   {
@@ -18,6 +18,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "import numpy as np\n",
     "from PIL import Image\n",
     "from PIL.TiffTags import TAGS\n",
     "# print(TAGS)"
@@ -58,14 +59,16 @@
     "    czi_keys = [34118, 34119]\n",
     "    for czi_key in czi_keys:\n",
     "        if czi_key in fp.tag_v2:\n",
+    "            print(f\"Found czi_key {tfs_key}...\")\n",
     "            utf = fp.tag[czi_key]\n",
     "            print(type(utf))\n",
     "            if len(utf) == 1:\n",
     "                print(utf[0])\n",
-    "    exit(1)\n",
+    "    # exit(1)\n",
     "    tfs_keys = [34682]\n",
     "    for tfs_key in tfs_keys:\n",
     "        if tfs_key in fp.tag_v2:\n",
+    "            print(f\"Found tfs_key {tfs_key}...\")\n",
     "            utf = fp.tag[tfs_key]\n",
     "            print(type(utf))\n",
     "            if len(utf) == 1:\n",
diff --git a/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs.py b/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs.py
new file mode 100644
index 000000000..6fbd61fba
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs.py
@@ -0,0 +1,130 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Subparser for harmonizing ThermoFisher-specific content in TIFF files."""
+
+import mmap
+import numpy as np
+from typing import Dict
+from PIL import Image
+from PIL.TiffTags import TAGS
+
+from pynxtools.dataconverter.readers.em.subparsers.image_tiff import TiffSubParser
+from pynxtools.dataconverter.readers.em.subparsers.image_tiff_tfs_cfg import \
+    tfs_section_names, tfs_section_details
+from pynxtools.dataconverter.readers.em.utils.image_utils import \
+    sort_tuple, if_str_represents_float
+
+
+class TfsTiffSubParser(TiffSubParser):
+    def __init__(self, file_path: str = ""):
+        super().__init__(file_path)
+        self.prfx = None
+        self.tmp: Dict = {}
+        self.supported_version: Dict = {}
+        self.version: Dict = {}
+        self.tags: Dict = {}
+        self.supported = False
+        self.check_if_tiff()
+        self.tfs: Dict = {}
+
+    def check_if_tiff_tfs(self):
+        """Check if resource behind self.file_path is a TaggedImageFormat file."""
+        self.supported = 0  # voting-based
+        with open(self.file_path, 'rb', 0) as file:
+            s = mmap.mmap(file.fileno(), 0, access=mmap.ACCESS_READ)
+            magic = s.read(4)
+            if magic == b'II*\x00':  # https://en.wikipedia.org/wiki/TIFF
+                self.supported += 1
+
+        with Image.open(self.fiele_path, mode="r") as fp:
+            tfs_keys = [34682]
+            for tfs_key in tfs_keys:
+                if tfs_key in fp.tag_v2:
+                    if len(fp.tag[tfs_key]) == 1:
+                        self.supported += 1  # found TFS-specific tag
+        if self.supported == 2:
+            self.supported = True
+        else:
+            self.supported = False
+
+    def get_metadata(self):
+        """Extract metadata in TFS specific tags if present."""
+        print("Reporting the tags found in this TIFF file...")
+        # for an overview of tags
+        # https://www.loc.gov/preservation/digital/formats/content/tiff_tags.shtml
+        # with Image.open(self.file_path, mode="r") as fp:
+        #     self.tags = {TAGS[key] : fp.tag[key] for key in fp.tag_v2}
+        #     for key, val in self.tags.items():
+        #         print(f"{key}, {val}")
+        tfs_section_offsets = {}
+        with open(self.file_path, 'rb', 0) as fp:
+            s = mmap.mmap(fp.fileno(), 0, access=mmap.ACCESS_READ)
+            for section_name in tfs_section_names:
+                pos = s.find(bytes(section_name, "utf8"))  # != -1
+                tfs_section_offsets[section_name] = pos
+            print(tfs_section_offsets)
+
+            # define search offsets
+            tpl = []
+            for key, value in tfs_section_offsets.items():
+                tpl.append((key, value))
+            tpl = sort_tuple(tpl)
+            print(tpl)
+
+            # exemplar parsing of specific TFS section content into a dict
+            # here for section_name == "[System]":
+            pos_s = None
+            pos_e = None
+            for idx in np.arange(0, len(tpl)):
+                if tpl[idx][0] != "[System]":
+                    continue
+                else:
+                    pos_s = tpl[idx][1]
+                    if idx <= len(tpl) - 1:
+                        pos_e = tpl[idx + 1][1]
+                    break
+            print(f"Search for [System] in between byte offsets {pos_s} and {pos_e}")
+            if pos_s is None or pos_e is None:
+                raise ValueError(f"Search for [System] was unsuccessful !")
+
+            # fish metadata of e.g. the system section
+            for term in tfs_section_details["[System]"]:
+                s.seek(pos_s, 0)
+                pos = s.find(bytes(term, "utf8"))
+                if pos < pos_e:  # check if pos_e is None
+                    s.seek(pos, 0)
+                    value = f"{s.readline().strip().decode('utf8').replace(f'{term}=', '')}"
+                    if value != "":
+                        if if_str_represents_float(value) is True:
+                            self.tfs[f"system/{term}"] = np.float64(value)
+                        elif value.isdigit() is True:
+                            self.tfs[f"system/{term}"] = np.int64(value)
+                        else:
+                            self.tfs[f"system/{term}"] = None
+                else:
+                    pass
+            print(self.tfs)
+
+    def parse_and_normalize(self):
+        """Perform actual parsing filling cache self.tmp."""
+        if self.supported is True:
+            print(f"Parsing via ThermoFisher-specific metadata...")
+            self.get_metadata()
+        else:
+            print(f"{self.file_path} is not a ThermoFisher-specific "
+                  f"TIFF file that this parser can process !")
diff --git a/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs_cfg.py b/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs_cfg.py
new file mode 100644
index 000000000..53797ce87
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs_cfg.py
@@ -0,0 +1,62 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Configuration of the image_tiff_tfs subparser."""
+
+
+tfs_section_names = ["[User]",
+                     "[System]",
+                     "[Beam]",
+                     "[EBeam]",
+                     "[GIS]",
+                     "[Scan]",
+                     "[EScan]",
+                     "[Stage]",
+                     "[Image]",
+                     "[Vacuum]",
+                     "[Specimen]",
+                     "[Detectors]",
+                     "[T2]",
+                     "[Accessories]",
+                     "[EBeamDeceleration]",
+                     "[CompoundLensFilter]",
+                     "[PrivateFei]",
+                     "[HiResIllumination]",
+                     "[EasyLift]",
+                     "[HotStageMEMS]",
+                     "[HotStage]",
+                     "[HotStageHVHS]",
+                     "[ColdStage]"]
+
+tfs_section_details = {"[System]": ["Type",
+                                    "Dnumber",
+                                    "Software",
+                                    "BuildNr",
+                                    "Source",
+                                    "Column",
+                                    "FinalLens",
+                                    "Chamber",
+                                    "Stage",
+                                    "Pump",
+                                    "ESEM",
+                                    "Aperture",
+                                    "Scan",
+                                    "Acq",
+                                    "EucWD",
+                                    "SystemType",
+                                    "DisplayWidth",
+                                    "DisplayHeight"]}
diff --git a/pynxtools/dataconverter/readers/em/subparsers/nxs_imgs.py b/pynxtools/dataconverter/readers/em/subparsers/nxs_imgs.py
index 6b2c2f479..495cec07b 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/nxs_imgs.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/nxs_imgs.py
@@ -20,7 +20,7 @@
 import numpy as np
 # from typing import Dict, Any, List
 
-from pynxtools.dataconverter.readers.em.subparsers.image_tiff import TiffSubParser
+from pynxtools.dataconverter.readers.em.subparsers.image_tiff_tfs import TfsTiffSubParser
 
 
 class NxEmImagesSubParser:
@@ -38,9 +38,9 @@ def __init__(self, entry_id: int = 1, input_file_name: str = ""):
     def identify_image_type(self):
         """Identify if image matches known mime type and has content for which subparser exists."""
         # tech partner formats used for measurement
-        img = TiffSubParser(f"{self.file_path}")
+        img = TfsTiffSubParser(f"{self.file_path}")
         if img.supported is True:
-            return "tiff"
+            return "tiff_tfs"
         return None
 
     def parse(self, template: dict) -> dict:
@@ -52,8 +52,8 @@ def parse(self, template: dict) -> dict:
         # see also comments for respective nxs_pyxem parser
         # and its interaction with tech-partner-specific hfive_* subparsers
 
-        if image_parser_type == "tiff":
-            tiff = TiffSubParser(self.file_path)
+        if image_parser_type == "tiff_tfs":
+            tiff = TfsTiffSubParser(self.file_path)
             tiff.parse_and_normalize()
             self.process_into_template(tiff.tmp, template)
         else:  # none or something unsupported
diff --git a/pynxtools/dataconverter/readers/em/utils/image_utils.py b/pynxtools/dataconverter/readers/em/utils/image_utils.py
new file mode 100644
index 000000000..342af0bfc
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/utils/image_utils.py
@@ -0,0 +1,40 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import numpy as np
+
+
+# https://www.geeksforgeeks.org/python-program-to-sort-a-list-of-tuples-by-second-item/
+def sort_tuple(tup):
+    # convert the list of tuples to a numpy array with data type (object, int)
+    arr = np.array(tup, dtype=[('col1', object), ('col2', int)])
+    # get the indices that would sort the array based on the second column
+    indices = np.argsort(arr['col2'])
+    # use the resulting indices to sort the array
+    sorted_arr = arr[indices]
+    # convert the sorted numpy array back to a list of tuples
+    sorted_tup = [(row['col1'], row['col2']) for row in sorted_arr]
+    return sorted_tup
+
+
+def if_str_represents_float(s):
+    try:
+        float(s)
+        return str(float(s)) == s
+    except ValueError:
+        return False

From 981e3c2296cb65184d25c2197c58a3b31da2f441 Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@physik.hu-berlin.de>
Date: Mon, 11 Dec 2023 11:01:20 +0100
Subject: [PATCH 46/84] Added initial version for the configuration, i.e.
 concept mapping table to harmonize and map instances of TFS/FEI concepts on
 NeXus concepts, using a few examples which are possibly of interest to IKZ
 staff

---
 .../em/subparsers/image_tiff_tfs_cfg.py       | 392 ++++++++++++++++--
 1 file changed, 351 insertions(+), 41 deletions(-)

diff --git a/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs_cfg.py b/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs_cfg.py
index 53797ce87..9aa226c31 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs_cfg.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs_cfg.py
@@ -18,45 +18,355 @@
 """Configuration of the image_tiff_tfs subparser."""
 
 
-tfs_section_names = ["[User]",
-                     "[System]",
-                     "[Beam]",
-                     "[EBeam]",
-                     "[GIS]",
-                     "[Scan]",
-                     "[EScan]",
-                     "[Stage]",
-                     "[Image]",
-                     "[Vacuum]",
-                     "[Specimen]",
-                     "[Detectors]",
-                     "[T2]",
-                     "[Accessories]",
-                     "[EBeamDeceleration]",
-                     "[CompoundLensFilter]",
-                     "[PrivateFei]",
-                     "[HiResIllumination]",
-                     "[EasyLift]",
-                     "[HotStageMEMS]",
-                     "[HotStage]",
-                     "[HotStageHVHS]",
-                     "[ColdStage]"]
+TiffTfsConcepts = ["User/Date",
+                   "User/Time",
+                   "User/User",
+                   "User/UserText",
+                   "User/UserTextUnicode",
+                   "System/Type",
+                   "System/Dnumber",
+                   "System/Software",
+                   "System/BuildNr",
+                   "System/Source",
+                   "System/Column",
+                   "System/FinalLens",
+                   "System/Chamber",
+                   "System/Stage",
+                   "System/Pump",
+                   "System/ESEM",
+                   "System/Aperture",
+                   "System/Scan",
+                   "System/Acq",
+                   "System/EucWD",
+                   "System/SystemType",
+                   "System/DisplayWidth",
+                   "System/DisplayHeight",
+                   "Beam/HV",
+                   "Beam/Spot",
+                   "Beam/StigmatorX",
+                   "Beam/StigmatorY",
+                   "Beam/BeamShiftX",
+                   "Beam/BeamShiftY",
+                   "Beam/ScanRotation",
+                   "Beam/ImageMode",
+                   "Beam/FineStageBias",
+                   "Beam/Beam",
+                   "Beam/Scan",
+                   "EBeam/Source",
+                   "EBeam/ColumnType",
+                   "EBeam/FinalLens",
+                   "EBeam/Acq",
+                   "EBeam/Aperture",
+                   "EBeam/ApertureDiameter",
+                   "EBeam/HV",
+                   "EBeam/HFW",
+                   "EBeam/VFW",
+                   "EBeam/WD",
+                   "EBeam/BeamCurrent",
+                   "EBeam/TiltCorrectionIsOn",
+                   "EBeam/DynamicFocusIsOn",
+                   "EBeam/DynamicWDIsOn",
+                   "EBeam/ScanRotation",
+                   "EBeam/LensMode",
+                   "EBeam/LensModeA",
+                   "EBeam/ATubeVoltage",
+                   "EBeam/UseCase",
+                   "EBeam/SemOpticalMode",
+                   "EBeam/ImageMode",
+                   "EBeam/SourceTiltX",
+                   "EBeam/SourceTiltY",
+                   "EBeam/StageX",
+                   "EBeam/StageY",
+                   "EBeam/StageZ",
+                   "EBeam/StageR",
+                   "EBeam/StageTa",
+                   "EBeam/StageTb",
+                   "EBeam/StigmatorX",
+                   "EBeam/StigmatorY",
+                   "EBeam/BeamShiftX",
+                   "EBeam/BeamShiftY",
+                   "EBeam/EucWD",
+                   "EBeam/EmissionCurrent",
+                   "EBeam/TiltCorrectionAngle",
+                   "EBeam/PreTilt",
+                   "EBeam/WehneltBias",
+                   "EBeam/BeamMode",
+                   "EBeam/MagnificationCorrection",
+                   "GIS/Number",
+                   "Scan/InternalScan",
+                   "Scan/Dwelltime",
+                   "Scan/PixelWidth",
+                   "Scan/PixelHeight",
+                   "Scan/HorFieldsize",
+                   "Scan/VerFieldsize",
+                   "Scan/Average",
+                   "Scan/Integrate",
+                   "Scan/FrameTime",
+                   "EScan/Scan",
+                   "EScan/InternalScan",
+                   "ESCAN/Dwell",
+                   "ESCAN/PixelWidth",
+                   "ESCAN/PixelHeight",
+                   "ESCAN/HorFieldsize",
+                   "ESCAN/VerFieldsize",
+                   "ESCAN/FrameTime",
+                   "ESCAN/LineTime",
+                   "ESCAN/Mainslock",
+                   "ESCAN/LineIntegration",
+                   "ESCAN/ScanInterlacing",
+                   "Stage/StageX",
+                   "Stage/StageY",
+                   "Stage/StageZ",
+                   "Stage/StageR",
+                   "Stage/StageT",
+                   "Stage/StageTb",
+                   "Stage/SpecTilt",
+                   "Stage/WorkingDistance",
+                   "Stage/ActiveStage",
+                   "Image/DigitalContrast",
+                   "Image/DigitalBrightness",
+                   "Image/DigitalGamma",
+                   "Image/Average",
+                   "Image/Integrate",
+                   "Image/ResolutionX",
+                   "Image/ResolutionY",
+                   "Image/DriftCorrected",
+                   "Image/ZoomFactor",
+                   "Image/ZoomPanX",
+                   "Image/ZoomPanY",
+                   "Image/MagCanvasRealWidth",
+                   "Image/MagnificationMode",
+                   "Image/ScreenMagCanvasRealWidth",
+                   "Image/ScreenMagnificationMode",
+                   "Image/PostProcessing",
+                   "Image/Transformation",
+                   "Vacuum/ChPressure",
+                   "Vacuum/Gas",
+                   "Vacuum/UserMode",
+                   "Vacuum/Humidity",
+                   "Specimen/Temperature",
+                   "Specimen/SpecimenCurrent",
+                   "Detectors/Number",
+                   "Detectors/Name",
+                   "Detectors/Mode",
+                   "T2/Contrast",
+                   "T2/Brightness",
+                   "T2/Signal",
+                   "T2/ContrastDB",
+                   "T2/BrightnessDB",
+                   "T2/Setting",
+                   "T2/MinimumDwellTime",
+                   "Accessories/Number",
+                   "EBeamDeceleration/ModeOn",
+                   "EBeamDeceleration/LandingEnergy",
+                   "EBeamDeceleration/ImmersionRatio",
+                   "EBeamDeceleration/StageBias",
+                   "CompoundLensFilter/IsOn",
+                   "CompoundLensFilter/ThresholdEnergy",
+                   "PrivateFei/BitShift",
+                   "PrivateFei/DataBarSelected",
+                   "PrivateFei/DataBarAvailable",
+                   "PrivateFei/TimeOfCreation",
+                   "PrivateFei/DatabarHeight",
+                   "HiResIllumination/BrightFieldIsOn",
+                   "HiResIllumination/BrightFieldValue",
+                   "HiResIllumination/DarkFieldIsOn",
+                   "HiResIllumination/DarkFieldValue",
+                   "EasyLift/Rotation",
+                   "HotStageMEMS/HeatingCurrent",
+                   "HotStageMEMS/HeatingVoltage",
+                   "HotStageMEMS/TargetTemperature",
+                   "HotStageMEMS/ActualTemperature",
+                   "HotStageMEMS/HeatingPower",
+                   "HotStageMEMS/SampleBias",
+                   "HotStageMEMS/SampleResistance",
+                   "HotStage/TargetTemperature",
+                   "HotStage/ActualTemperature",
+                   "HotStage/SampleBias",
+                   "HotStage/ShieldBias",
+                   "HotStageHVHS/TargetTemperature",
+                   "HotStageHVHS/ActualTemperature",
+                   "HotStageHVHS/SampleBias",
+                   "HotStageHVHS/ShieldBias",
+                   "ColdStage/TargetTemperature",
+                   "ColdStage/ActualTemperature",
+                   "ColdStage/Humidity",
+                   "ColdStage/SampleBias"]
 
-tfs_section_details = {"[System]": ["Type",
-                                    "Dnumber",
-                                    "Software",
-                                    "BuildNr",
-                                    "Source",
-                                    "Column",
-                                    "FinalLens",
-                                    "Chamber",
-                                    "Stage",
-                                    "Pump",
-                                    "ESEM",
-                                    "Aperture",
-                                    "Scan",
-                                    "Acq",
-                                    "EucWD",
-                                    "SystemType",
-                                    "DisplayWidth",
-                                    "DisplayHeight"]}
+TiffTfsToNeXusCfg = {"/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]/start_time": {"fun": "ikz_berlin_apreo_iso8601", "terms": ["User/Date", "User/Time"]},
+                     "IGNORE": { "fun": "load_from", "terms": "User/User" },	
+                     "IGNORE": { "fun": "load_from", "terms": "User/UserText" },
+                     "IGNORE": { "fun": "load_from", "terms": "User/UserTextUnicode" },	
+                     "IGNORE": { "fun": "load_from", "terms": "System/Type" },
+                     "IGNORE": { "fun": "load_from", "terms": "System/Dnumber" },
+                     "IGNORE": { "fun": "load_from", "terms": "System/Software" },
+                     "/ENTRY[entry*]/measurement/em_lab/FABRICATION[fabrication]/identifier": { "fun": "load_from", "terms": "System/BuildNr" },
+                     "/ENTRY[entry*]/measurement/em_lab/EBEAM_COLUMN[ebeam_column]/electron_source/emitter_type": { "fun": "ikz_berlin_apreo", "terms": "System/Source" },
+                     "/ENTRY[entry*]/measurement/em_lab/FABRICATION[fabrication]/vendor": { "fun": "load_from", "terms": "System/Column" },	
+                     "IGNORE": { "fun": "load_from", "terms": "System/FinalLens" },
+                     "IGNORE": { "fun": "load_from", "terms": "System/Chamber" },
+                     "IGNORE": { "fun": "load_from", "terms": "System/Stage" },
+                     "IGNORE": { "fun": "load_from", "terms": "System/Pump" },
+                     "IGNORE": { "fun": "load_from", "terms": "System/ESEM" },
+                     "IGNORE": { "fun": "load_from", "terms": "System/Aperture" },	
+                     "IGNORE": { "fun": "load_from", "terms": "System/Scan" },
+                     "IGNORE": { "fun": "load_from", "terms": "System/Acq" },
+                     "IGNORE": { "fun": "load_from", "terms": "System/EucWD" },
+                     "/ENTRY[entry*]/measurement/em_lab/FABRICATION[fabrication]/model": { "fun": "load_from", "terms": "System/SystemType" },	
+                     "IGNORE": { "fun": "load_from", "terms": "System/DisplayWidth" },	
+                     "IGNORE": { "fun": "load_from", "terms": "System/DisplayHeight" },
+                     "IGNORE": { "fun": "load_from", "terms": "Beam/HV" },
+                     "IGNORE": { "fun": "load_from", "terms": "Beam/Spot" },	
+                     "IGNORE": { "fun": "load_from", "terms": "Beam/StigmatorX" },	
+                     "IGNORE": { "fun": "load_from", "terms": "Beam/StigmatorY" },	
+                     "IGNORE": { "fun": "load_from", "terms": "Beam/BeamShiftX" },
+                     "IGNORE": { "fun": "load_from", "terms": "Beam/BeamShiftY" },	
+                     "IGNORE": { "fun": "load_from", "terms": "Beam/ScanRotation" },	
+                     "IGNORE": { "fun": "load_from", "terms": "Beam/ImageMode" },
+                     "IGNORE": { "fun": "load_from", "terms": "Beam/FineStageBias" },	
+                     "IGNORE": { "fun": "load_from", "terms": "Beam/Beam" },
+                     "IGNORE": { "fun": "load_from", "terms": "Beam/Scan" },	
+                     "IGNORE": { "fun": "load_from", "terms": "EBeam/Source" },
+                     "IGNORE": { "fun": "load_from", "terms": "EBeam/ColumnType" },	
+                     "IGNORE": { "fun": "load_from", "terms": "EBeam/FinalLens" },	
+                     "IGNORE": { "fun": "load_from", "terms": "EBeam/Acq" },
+                     "/ENTRY[entry*]/measurement/em_lab/EBEAM_COLUMN[ebeam_column]/APERTURE_EM[aperture_em*]/description": { "fun": "load_from", "terms": "EBeam/Aperture" },
+                     "/ENTRY[entry*]/measurement/em_lab/EBEAM_COLUMN[ebeam_column]/APERTURE_EM[aperture_em*]/value": { "fun": "load_from", "terms": "EBeam/ApertureDiameter" },	
+                     "/ENTRY[entry*]/measurement/em_lab/EBEAM_COLUMN[ebeam_column]/APERTURE_EM[aperture_em*]/value/@units": "m",
+                     "/ENTRY[entry*]/measurement/em_lab/EBEAM_COLUMN[ebeam_column]/electron_source/voltage": { "fun": "load_from", "terms":	"EBeam/HV" },
+                     "/ENTRY[entry*]/measurement/em_lab/EBEAM_COLUMN[ebeam_column]/electron_source/voltage/@units": "V",			
+                     "IGNORE": { "fun": "load_from", "terms": "EBeam/HFW" },
+                     "IGNORE": { "fun": "load_from", "terms": "EBeam/VFW" },
+                     "/ENTRY[entry*]/measurement/em_lab/OPTICAL_SYSTEM_EM[optical_system_em]/working_distance": { "fun": "load_from", "terms": "EBeam/WD" },
+                     "/ENTRY[entry*]/measurement/em_lab/OPTICAL_SYSTEM_EM[optical_system_em]/working_distance/@units": "m",
+                     "IGNORE": { "fun": "load_from", "terms": "EBeam/BeamCurrent" },	
+                     "IGNORE": { "fun": "load_from", "terms": "EBeam/TiltCorrectionIsOn" },	
+                     "IGNORE": { "fun": "load_from", "terms": "EBeam/DynamicFocusIsOn" },
+                     "IGNORE": { "fun": "load_from", "terms": "EBeam/DynamicWDIsOn" },
+                     "IGNORE": { "fun": "load_from", "terms": "EBeam/ScanRotation" },
+                     "IGNORE": { "fun": "load_from", "terms": "EBeam/LensMode" },
+                     "IGNORE": { "fun": "load_from", "terms": "EBeam/LensModeA" },
+                     "IGNORE": { "fun": "load_from", "terms": "EBeam/ATubeVoltage" },	
+                     "IGNORE": { "fun": "load_from", "terms": "EBeam/UseCase" },
+                     "IGNORE": { "fun": "load_from", "terms": "EBeam/SemOpticalMode" },	
+                     "IGNORE": { "fun": "load_from", "terms": "EBeam/ImageMode" },
+                     "IGNORE": { "fun": "load_from", "terms": "EBeam/SourceTiltX" },	
+                     "IGNORE": { "fun": "load_from", "terms": "EBeam/SourceTiltY" },
+                     "IGNORE": { "fun": "load_from", "terms": "EBeam/StageX" },
+                     "IGNORE": { "fun": "load_from", "terms": "EBeam/StageY" },	
+                     "IGNORE": { "fun": "load_from", "terms": "EBeam/StageZ" },
+                     "IGNORE": { "fun": "load_from", "terms": "EBeam/StageR" },
+                     "IGNORE": { "fun": "load_from", "terms": "EBeam/StageTa" },
+                     "IGNORE": { "fun": "load_from", "terms": "EBeam/StageTb" },
+                     "IGNORE": { "fun": "load_from", "terms": "EBeam/StigmatorX" },
+                     "IGNORE": { "fun": "load_from", "terms": "EBeam/StigmatorY" },
+                     "IGNORE": { "fun": "load_from", "terms": "EBeam/BeamShiftX" },
+                     "IGNORE": { "fun": "load_from", "terms": "EBeam/BeamShiftY" },
+                     "IGNORE": { "fun": "load_from", "terms": "EBeam/EucWD" },
+                     "IGNORE": { "fun": "load_from", "terms": "EBeam/EmissionCurrent" },	
+                     "IGNORE": { "fun": "load_from", "terms": "EBeam/TiltCorrectionAngle" },	
+                     "IGNORE": { "fun": "load_from", "terms": "EBeam/PreTilt" },	
+                     "IGNORE": { "fun": "load_from", "terms": "EBeam/WehneltBias" },	
+                     "IGNORE": { "fun": "load_from", "terms": "EBeam/BeamMode" },
+                     "IGNORE": { "fun": "load_from", "terms": "EBeam/MagnificationCorrection" },
+                     "IGNORE": { "fun": "load_from", "terms": "GIS/Number" },
+                     "IGNORE": { "fun": "load_from", "terms": "Scan/InternalScan" },
+                     "IGNORE": { "fun": "load_from", "terms": "Scan/Dwelltime" },
+                     "IGNORE": { "fun": "load_from", "terms": "Scan/PixelWidth" },
+                     "IGNORE": { "fun": "load_from", "terms": "Scan/PixelHeight" },
+                     "IGNORE": { "fun": "load_from", "terms": "Scan/HorFieldsize" },
+                     "IGNORE": { "fun": "load_from", "terms": "Scan/VerFieldsize" },
+                     "IGNORE": { "fun": "load_from", "terms": "Scan/Average" },
+                     "IGNORE": { "fun": "load_from", "terms": "Scan/Integrate" },
+                     "IGNORE": { "fun": "load_from", "terms": "Scan/FrameTime" },
+                     "IGNORE": { "fun": "load_from", "terms": "EScan/Scan" },
+                     "IGNORE": { "fun": "load_from", "terms": "EScan/InternalScan" },	
+                     "IGNORE": { "fun": "load_from", "terms": "EScan/Dwell" },
+                     "IGNORE": { "fun": "load_from", "terms": "EScan/PixelWidth" },	
+                     "IGNORE": { "fun": "load_from", "terms": "EScan/PixelHeight" },
+                     "IGNORE": { "fun": "load_from", "terms": "EScan/HorFieldsize" },
+                     "IGNORE": { "fun": "load_from", "terms": "EScan/VerFieldsize" },
+                     "IGNORE": { "fun": "load_from", "terms": "EScan/FrameTime" },
+                     "IGNORE": { "fun": "load_from", "terms": "EScan/LineTime" },
+                     "IGNORE": { "fun": "load_from", "terms": "EScan/Mainslock" },
+                     "IGNORE": { "fun": "load_from", "terms": "EScan/LineIntegration" },	
+                     "IGNORE": { "fun": "load_from", "terms": "EScan/ScanInterlacing" },
+                     "IGNORE": { "fun": "load_from", "terms": "StageX/StageX" },
+                     "IGNORE": { "fun": "load_from", "terms": "StageX/StageY" },
+                     "IGNORE": { "fun": "load_from", "terms": "StageX/StageZ" },
+                     "IGNORE": { "fun": "load_from", "terms": "StageX/StageR" },
+                     "IGNORE": { "fun": "load_from", "terms": "StageX/StageT" },
+                     "IGNORE": { "fun": "load_from", "terms": "StageX/StageTb" },
+                     "IGNORE": { "fun": "load_from", "terms": "StageX/SpecTilt" },
+                     "IGNORE": { "fun": "load_from", "terms": "StageX/WorkingDistance" },	
+                     "IGNORE": { "fun": "load_from", "terms": "StageX/ActiveStage" },
+                     "IGNORE": { "fun": "load_from", "terms": "StageX/[Image]" },
+                     "IGNORE": { "fun": "load_from", "terms": "StageX/DigitalContrast" },	
+                     "IGNORE": { "fun": "load_from", "terms": "StageX/DigitalBrightness" },
+                     "IGNORE": { "fun": "load_from", "terms": "StageX/DigitalGamma" },
+                     "IGNORE": { "fun": "load_from", "terms": "StageX/Average" },
+                     "IGNORE": { "fun": "load_from", "terms": "StageX/Integrate" },
+                     "IGNORE": { "fun": "load_from", "terms": "StageX/ResolutionX" },
+                     "IGNORE": { "fun": "load_from", "terms": "StageX/ResolutionY" },
+                     "IGNORE": { "fun": "load_from", "terms": "StageX/DriftCorrected" },	
+                     "IGNORE": { "fun": "load_from", "terms": "StageX/ZoomFactor" },
+                     "IGNORE": { "fun": "load_from", "terms": "StageX/ZoomPanX" },
+                     "IGNORE": { "fun": "load_from", "terms": "StageX/ZoomPanY" },
+                     "IGNORE": { "fun": "load_from", "terms": "StageX/MagCanvasRealWidth" },	
+                     "IGNORE": { "fun": "load_from", "terms": "StageX/MagnificationMode" },
+                     "IGNORE": { "fun": "load_from", "terms": "StageX/ScreenMagCanvasRealWidth" },	
+                     "IGNORE": { "fun": "load_from", "terms": "StageX/ScreenMagnificationMode" },
+                     "IGNORE": { "fun": "load_from", "terms": "StageX/PostProcessing" },
+                     "IGNORE": { "fun": "load_from", "terms": "StageX/Transformation" },
+                     "IGNORE": { "fun": "load_from", "terms": "Vacuum/ChPressure" },
+                     "IGNORE": { "fun": "load_from", "terms": "Vacuum/Gas" },
+                     "IGNORE": { "fun": "load_from", "terms": "Vacuum/UserMode" },	
+                     "IGNORE": { "fun": "load_from", "terms": "Vacuum/Humidity" },
+                     "IGNORE": { "fun": "load_from", "terms": "Specimen/Temperature" },	
+                     "IGNORE": { "fun": "load_from", "terms": "Specimen/SpecimenCurrent" },	
+                     "IGNORE": { "fun": "load_from", "terms": "Detectors/Number" },
+                     "/ENTRY[entry*]/measurement/em_lab/DETECTOR[detector*]/local_name": { "fun": "load_from", "terms": "Detectors/Name" },	
+                     "IGNORE": { "fun": "load_from", "terms": "Detectors/Mode" },
+                     "IGNORE": { "fun": "load_from", "terms": "T2/Contrast" },
+                     "IGNORE": { "fun": "load_from", "terms": "T2/Brightness" },
+                     "/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]/event_type": { "fun": "load_from_lower_case", "terms": "T2/Signal" },	
+                     "IGNORE": { "fun": "load_from", "terms": "T2/ContrastDB" },	
+                     "IGNORE": { "fun": "load_from", "terms": "T2/BrightnessDB" },
+                     "IGNORE": { "fun": "load_from", "terms": "T2/Setting" },
+                     "IGNORE": { "fun": "load_from", "terms": "T2/MinimumDwellTime" },	
+                     "IGNORE": { "fun": "load_from", "terms": "Accessories/Number" },
+                     "IGNORE": { "fun": "load_from", "terms": "EBeamDeceleration/ModeOn" },	
+                     "IGNORE": { "fun": "load_from", "terms": "EBeamDeceleration/LandingEnergy" },	
+                     "IGNORE": { "fun": "load_from", "terms": "EBeamDeceleration/ImmersionRatio" },
+                     "IGNORE": { "fun": "load_from", "terms": "EBeamDeceleration/StageBias" },
+                     "IGNORE": { "fun": "load_from", "terms": "CompoundLensFilter/IsOn" },
+                     "IGNORE": { "fun": "load_from", "terms": "CompoundLensFilter/ThresholdEnergy" },	
+                     "IGNORE": { "fun": "load_from", "terms": "PrivateFei/BitShift" },	
+                     "IGNORE": { "fun": "load_from", "terms": "PrivateFei/DataBarSelected" },	
+                     "IGNORE": { "fun": "load_from", "terms": "PrivateFei/DataBarAvailable" },
+                     "IGNORE": { "fun": "load_from", "terms": "PrivateFei/TimeOfCreation" },
+                     "IGNORE": { "fun": "load_from", "terms": "PrivateFei/DatabarHeight" },
+                     "IGNORE": { "fun": "load_from", "terms": "HiResIllumination/BrightFieldIsOn" },
+                     "IGNORE": { "fun": "load_from", "terms": "HiResIllumination/BrightFieldValue" },
+                     "IGNORE": { "fun": "load_from", "terms": "HiResIllumination/DarkFieldIsOn" },
+                     "IGNORE": { "fun": "load_from", "terms": "HiResIllumination/DarkFieldValue" },
+                     "IGNORE": { "fun": "load_from", "terms": "EasyLift/Rotation" },
+                     "IGNORE": { "fun": "load_from", "terms": "HotStageMEMS/HeatingCurrent" },
+                     "IGNORE": { "fun": "load_from", "terms": "HotStageMEMS/HeatingVoltage" },
+                     "IGNORE": { "fun": "load_from", "terms": "HotStageMEMS/TargetTemperature" },
+                     "IGNORE": { "fun": "load_from", "terms": "HotStageMEMS/ActualTemperature" },
+                     "IGNORE": { "fun": "load_from", "terms": "HotStageMEMS/HeatingPower" },
+                     "IGNORE": { "fun": "load_from", "terms": "HotStageMEMS/SampleBias" },
+                     "IGNORE": { "fun": "load_from", "terms": "HotStageMEMS/SampleResistance" },	
+                     "IGNORE": { "fun": "load_from", "terms": "HotStage/TargetTemperature" },
+                     "IGNORE": { "fun": "load_from", "terms": "HotStage/ActualTemperature" },
+                     "IGNORE": { "fun": "load_from", "terms": "HotStage/SampleBias" },
+                     "IGNORE": { "fun": "load_from", "terms": "HotStage/ShieldBias" },
+                     "IGNORE": { "fun": "load_from", "terms": "HotStageHVHS/TargetTemperature" },	
+                     "IGNORE": { "fun": "load_from", "terms": "HotStageHVHS/ActualTemperature" },
+                     "IGNORE": { "fun": "load_from", "terms": "HotStageHVHS/SampleBias" },
+                     "IGNORE": { "fun": "load_from", "terms": "HotStageHVHS/ShieldBias" },
+                     "IGNORE": { "fun": "load_from", "terms": "ColdStage/TargetTemperature" },	
+                     "IGNORE": { "fun": "load_from", "terms": "ColdStage/ActualTemperature" },
+                     "IGNORE": { "fun": "load_from", "terms": "ColdStage/Humidity" },
+                     "IGNORE": { "fun": "load_from", "terms": "ColdStage/SampleBias" } }	

From df729565fc4ddc00864817a72813d8ce9ddc88b3 Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@physik.hu-berlin.de>
Date: Mon, 11 Dec 2023 12:09:23 +0100
Subject: [PATCH 47/84] image_tiff_tfs: generalized logic to read in all
 TFS/FEI metadata

---
 .../readers/em/subparsers/image_tiff_tfs.py   | 95 ++++++++++---------
 .../em/subparsers/image_tiff_tfs_cfg.py       | 42 ++++++--
 .../readers/em/utils/image_utils.py           |  2 +-
 3 files changed, 85 insertions(+), 54 deletions(-)

diff --git a/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs.py b/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs.py
index 6fbd61fba..6eded43f9 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs.py
@@ -25,9 +25,9 @@
 
 from pynxtools.dataconverter.readers.em.subparsers.image_tiff import TiffSubParser
 from pynxtools.dataconverter.readers.em.subparsers.image_tiff_tfs_cfg import \
-    tfs_section_names, tfs_section_details
+    TiffTfsConcepts, TiffTfsToNeXusCfg, get_fei_parent_concepts, get_fei_childs
 from pynxtools.dataconverter.readers.em.utils.image_utils import \
-    sort_tuple, if_str_represents_float
+    sort_ascendingly_by_second_argument, if_str_represents_float
 
 
 class TfsTiffSubParser(TiffSubParser):
@@ -71,54 +71,63 @@ def get_metadata(self):
         #     self.tags = {TAGS[key] : fp.tag[key] for key in fp.tag_v2}
         #     for key, val in self.tags.items():
         #         print(f"{key}, {val}")
-        tfs_section_offsets = {}
+        tfs_parent_concepts = get_fei_parent_concepts()
+        tfs_parent_concepts_byte_offset = {}
+        for concept in tfs_parent_concepts:
+            tfs_parent_concepts_byte_offset[concept] = None
         with open(self.file_path, 'rb', 0) as fp:
             s = mmap.mmap(fp.fileno(), 0, access=mmap.ACCESS_READ)
-            for section_name in tfs_section_names:
-                pos = s.find(bytes(section_name, "utf8"))  # != -1
-                tfs_section_offsets[section_name] = pos
-            print(tfs_section_offsets)
+            for concept in tfs_parent_concepts:
+                pos = s.find(bytes(f"[{concept}]", "utf8"))  # != -1
+                if pos != -1:
+                    tfs_parent_concepts_byte_offset[concept] = pos
+                else:
+                    raise ValueError(f"Expected block with metadata for concept [{concept}] were not found !")
+            print(tfs_parent_concepts_byte_offset)
 
-            # define search offsets
-            tpl = []
-            for key, value in tfs_section_offsets.items():
-                tpl.append((key, value))
-            tpl = sort_tuple(tpl)
-            print(tpl)
+            sequence = []  # decide I/O order in which metadata for childs of parent concepts will be read
+            for key, value in tfs_parent_concepts_byte_offset.items():
+                if value is not None:
+                    sequence.append((key, value)) 
+                    # tuple of parent_concept name and byte offset
+            sequence = sort_ascendingly_by_second_argument(sequence)
+            print(sequence)
 
-            # exemplar parsing of specific TFS section content into a dict
-            # here for section_name == "[System]":
-            pos_s = None
-            pos_e = None
-            for idx in np.arange(0, len(tpl)):
-                if tpl[idx][0] != "[System]":
-                    continue
+            idx = 0
+            for parent, byte_offset in sequence:
+                pos_s = byte_offset
+                pos_e = None
+                if idx < len(sequence) - 1:
+                    pos_e = sequence[idx + 1][1]
                 else:
-                    pos_s = tpl[idx][1]
-                    if idx <= len(tpl) - 1:
-                        pos_e = tpl[idx + 1][1]
-                    break
-            print(f"Search for [System] in between byte offsets {pos_s} and {pos_e}")
-            if pos_s is None or pos_e is None:
-                raise ValueError(f"Search for [System] was unsuccessful !")
+                    pos_e = np.iinfo(np.uint64).max
+                idx += 1
+                if pos_s is None or pos_e is None:
+                    raise ValueError(f"Definition of byte boundaries for reading childs of [{parent}] was unsuccessful !")
+                print(f"Search for [{parent}] in between byte offsets {pos_s} and {pos_e}")
 
-            # fish metadata of e.g. the system section
-            for term in tfs_section_details["[System]"]:
-                s.seek(pos_s, 0)
-                pos = s.find(bytes(term, "utf8"))
-                if pos < pos_e:  # check if pos_e is None
-                    s.seek(pos, 0)
-                    value = f"{s.readline().strip().decode('utf8').replace(f'{term}=', '')}"
-                    if value != "":
-                        if if_str_represents_float(value) is True:
-                            self.tfs[f"system/{term}"] = np.float64(value)
-                        elif value.isdigit() is True:
-                            self.tfs[f"system/{term}"] = np.int64(value)
+                # fish metadata of e.g. the system section
+                for term in get_fei_childs(parent):
+                    s.seek(pos_s, 0)
+                    pos = s.find(bytes(f"{term}=", "utf8"))
+                    if pos < pos_e:  # check if pos_e is None
+                        s.seek(pos, 0)
+                        value = f"{s.readline().strip().decode('utf8').replace(f'{term}=', '')}"
+                        self.tfs[f"{parent}/{term}"] = None
+                        if isinstance(value, str):
+                            if value != "":
+                                if if_str_represents_float(value) is True:
+                                    self.tfs[f"{parent}/{term}"] = np.float64(value)
+                                elif value.isdigit() is True:
+                                    self.tfs[f"{parent}/{term}"] = np.int64(value)
+                                else:
+                                    self.tfs[f"{parent}/{term}"] = value
                         else:
-                            self.tfs[f"system/{term}"] = None
-                else:
-                    pass
-            print(self.tfs)
+                            print(f"{parent}/{term} ---> {type(value)}")                
+                    else:
+                        pass
+            for key, val in self.tfs.items():
+                print(f"{key}, {val}")
 
     def parse_and_normalize(self):
         """Perform actual parsing filling cache self.tmp."""
diff --git a/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs_cfg.py b/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs_cfg.py
index 9aa226c31..e38999b68 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs_cfg.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs_cfg.py
@@ -17,6 +17,8 @@
 #
 """Configuration of the image_tiff_tfs subparser."""
 
+from typing import List
+
 
 TiffTfsConcepts = ["User/Date",
                    "User/Time",
@@ -104,16 +106,16 @@
                    "Scan/FrameTime",
                    "EScan/Scan",
                    "EScan/InternalScan",
-                   "ESCAN/Dwell",
-                   "ESCAN/PixelWidth",
-                   "ESCAN/PixelHeight",
-                   "ESCAN/HorFieldsize",
-                   "ESCAN/VerFieldsize",
-                   "ESCAN/FrameTime",
-                   "ESCAN/LineTime",
-                   "ESCAN/Mainslock",
-                   "ESCAN/LineIntegration",
-                   "ESCAN/ScanInterlacing",
+                   "EScan/Dwell",
+                   "EScan/PixelWidth",
+                   "EScan/PixelHeight",
+                   "EScan/HorFieldsize",
+                   "EScan/VerFieldsize",
+                   "EScan/FrameTime",
+                   "EScan/LineTime",
+                   "EScan/Mainslock",
+                   "EScan/LineIntegration",
+                   "EScan/ScanInterlacing",
                    "Stage/StageX",
                    "Stage/StageY",
                    "Stage/StageZ",
@@ -193,6 +195,26 @@
                    "ColdStage/Humidity",
                    "ColdStage/SampleBias"]
 
+
+def get_fei_parent_concepts() -> List:
+    """Get list of unique FEI parent concepts."""
+    parent_concepts = set()
+    for entry in TiffTfsConcepts:
+        if isinstance(entry, str) and entry.count("/") == 1:
+            parent_concepts.add(entry.split("/")[0])
+    return list(parent_concepts)
+
+
+def get_fei_childs(concept: str) -> List:
+    """Get all children of FEI parent concept."""
+    child_concepts = set()
+    for entry in TiffTfsConcepts:
+        if isinstance(entry, str) and entry.count("/") == 1:
+            if entry.startswith(f"{concept}/") is True:
+                child_concepts.add(entry.split("/")[1])
+    return list(child_concepts)
+
+
 TiffTfsToNeXusCfg = {"/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]/start_time": {"fun": "ikz_berlin_apreo_iso8601", "terms": ["User/Date", "User/Time"]},
                      "IGNORE": { "fun": "load_from", "terms": "User/User" },	
                      "IGNORE": { "fun": "load_from", "terms": "User/UserText" },
diff --git a/pynxtools/dataconverter/readers/em/utils/image_utils.py b/pynxtools/dataconverter/readers/em/utils/image_utils.py
index 342af0bfc..74598ad12 100644
--- a/pynxtools/dataconverter/readers/em/utils/image_utils.py
+++ b/pynxtools/dataconverter/readers/em/utils/image_utils.py
@@ -20,7 +20,7 @@
 
 
 # https://www.geeksforgeeks.org/python-program-to-sort-a-list-of-tuples-by-second-item/
-def sort_tuple(tup):
+def sort_ascendingly_by_second_argument(tup):
     # convert the list of tuples to a numpy array with data type (object, int)
     arr = np.array(tup, dtype=[('col1', object), ('col2', int)])
     # get the indices that would sort the array based on the second column

From 345d8e1dbfcdcd6c7b5d4276ca4e187e12e2e3f0 Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@physik.hu-berlin.de>
Date: Mon, 11 Dec 2023 14:34:03 +0100
Subject: [PATCH 48/84] Fixed error in floating point value interpretation
 logic, added writing of NXimage_r_set instance based on TIFF content

---
 pynxtools/dataconverter/readers/em/reader.py  |  19 +--
 .../readers/em/subparsers/image_tiff_tfs.py   | 109 +++++++++++++++---
 .../readers/em/subparsers/nxs_imgs.py         |  24 ++--
 .../readers/em/utils/image_utils.py           |   3 +-
 4 files changed, 114 insertions(+), 41 deletions(-)

diff --git a/pynxtools/dataconverter/readers/em/reader.py b/pynxtools/dataconverter/readers/em/reader.py
index ae21ad233..3af242b4f 100644
--- a/pynxtools/dataconverter/readers/em/reader.py
+++ b/pynxtools/dataconverter/readers/em/reader.py
@@ -126,8 +126,7 @@ def read(self,
         # sub_parser = "image_tiff"
         subparser = NxEmImagesSubParser(entry_id, file_paths[0])
         subparser.parse(template)
-        exit(1)
-
+        
         # for dat_instance in case.dat_parser_type:
         #     print(f"Process pieces of information in {dat_instance} tech partner file...")
         #    continue
@@ -147,13 +146,15 @@ def read(self,
         # we only need to decorate the template to point to the mandatory ROI overview
         # print("Create NeXus default plottable data...")
         # em_default_plot_generator(template, 1)
-        nxs_plt = NxEmDefaultPlotResolver()
-        # if nxs_mtex is the sub-parser
-        resolved_path = nxs_plt.nxs_mtex_get_nxpath_to_default_plot(
-            entry_id, file_paths[0])
-        # print(f"DEFAULT PLOT IS {resolved_path}")
-        if resolved_path != "":
-            nxs_plt.annotate_default_plot(template, resolved_path)
+        
+        if True is False:
+            nxs_plt = NxEmDefaultPlotResolver()
+            # if nxs_mtex is the sub-parser
+            resolved_path = nxs_plt.nxs_mtex_get_nxpath_to_default_plot(
+                entry_id, file_paths[0])
+            # print(f"DEFAULT PLOT IS {resolved_path}")
+            if resolved_path != "":
+                nxs_plt.annotate_default_plot(template, resolved_path)
 
         debugging = True
         if debugging is True:
diff --git a/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs.py b/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs.py
index 6eded43f9..30aa5ffaa 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs.py
@@ -31,16 +31,18 @@
 
 
 class TfsTiffSubParser(TiffSubParser):
-    def __init__(self, file_path: str = ""):
+    def __init__(self, file_path: str = "", entry_id: int = 1):
         super().__init__(file_path)
+        self.entry_id = entry_id
+        self.event_id = 1
         self.prfx = None
-        self.tmp: Dict = {}
+        self.tmp: Dict = {"data": None,
+                          "meta": {}}
         self.supported_version: Dict = {}
         self.version: Dict = {}
         self.tags: Dict = {}
         self.supported = False
         self.check_if_tiff()
-        self.tfs: Dict = {}
 
     def check_if_tiff_tfs(self):
         """Check if resource behind self.file_path is a TaggedImageFormat file."""
@@ -101,10 +103,11 @@ def get_metadata(self):
                     pos_e = sequence[idx + 1][1]
                 else:
                     pos_e = np.iinfo(np.uint64).max
+                    # TODO::better use official convention to not read beyond the end of file
                 idx += 1
                 if pos_s is None or pos_e is None:
                     raise ValueError(f"Definition of byte boundaries for reading childs of [{parent}] was unsuccessful !")
-                print(f"Search for [{parent}] in between byte offsets {pos_s} and {pos_e}")
+                # print(f"Search for [{parent}] in between byte offsets {pos_s} and {pos_e}")
 
                 # fish metadata of e.g. the system section
                 for term in get_fei_childs(parent):
@@ -113,21 +116,20 @@ def get_metadata(self):
                     if pos < pos_e:  # check if pos_e is None
                         s.seek(pos, 0)
                         value = f"{s.readline().strip().decode('utf8').replace(f'{term}=', '')}"
-                        self.tfs[f"{parent}/{term}"] = None
+                        self.tmp["meta"][f"{parent}/{term}"] = None
                         if isinstance(value, str):
                             if value != "":
-                                if if_str_represents_float(value) is True:
-                                    self.tfs[f"{parent}/{term}"] = np.float64(value)
-                                elif value.isdigit() is True:
-                                    self.tfs[f"{parent}/{term}"] = np.int64(value)
+                                # execution order of the check here matters!
+                                if value.isdigit() is True:
+                                    self.tmp["meta"][f"{parent}/{term}"] = np.int64(value)
+                                elif if_str_represents_float(value) is True:
+                                    self.tmp["meta"][f"{parent}/{term}"] = np.float64(value)
                                 else:
-                                    self.tfs[f"{parent}/{term}"] = value
+                                    self.tmp["meta"][f"{parent}/{term}"] = value
                         else:
-                            print(f"{parent}/{term} ---> {type(value)}")                
+                            raise ValueError(f"Detected an unexpected case {parent}/{term}, type: {type(value)} !")                
                     else:
                         pass
-            for key, val in self.tfs.items():
-                print(f"{key}, {val}")
 
     def parse_and_normalize(self):
         """Perform actual parsing filling cache self.tmp."""
@@ -137,3 +139,84 @@ def parse_and_normalize(self):
         else:
             print(f"{self.file_path} is not a ThermoFisher-specific "
                   f"TIFF file that this parser can process !")
+
+    def process_into_template(self, template: dict) -> dict:
+        self.process_event_data_em_metadata(template)
+        self.process_event_data_em_data(template)
+        return template
+    
+    def process_event_data_em_metadata(self, template: dict) -> dict:
+        """Add respective event_data_em header."""
+        # contextualization to understand how the image relates to the EM session
+        print(f"Mapping some of the TFS/FEI metadata concepts onto NeXus concepts")
+        return template
+
+    def process_event_data_em_data(self, template: dict) -> dict:
+        """Add respective heavy image data."""
+        # default display of the image(s) representing the data collected in this event
+        print(f"Writing TFS/FEI TIFF image as a onto the respective NeXus concept")
+        # read image in-place
+        with Image.open(self.file_path, mode="r") as fp:
+            nparr = np.array(fp)
+            # print(f"type: {type(nparr)}, dtype: {nparr.dtype}, shape: {np.shape(nparr)}")
+            # TODO::discussion points
+            # - how do you know we have an image of real space vs. imaginary space (from the metadata?)
+            # - how do deal with the (ugly) scale bar that is typically stamped into the TIFF image content?
+            # with H5Web and NeXus most of this is obsolete unless there are metadata stamped which are not
+            # available in NeXus or in the respective metadata in the metadata section of the TIFF image
+            # remember H5Web images can be scaled based on the metadata allowing basically the same
+            # explorative viewing using H5Web than what traditionally typical image viewers are meant for
+            image_identifier = 1
+            trg = f"/ENTRY[entry{self.entry_id}]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/" \
+                    f"EVENT_DATA_EM[event_data_em{self.event_id}]/" \
+                    f"IMAGE_R_SET[image_r_set{image_identifier}]/DATA[image]"
+            # TODO::writer should decorate automatically!
+            template[f"{trg}/title"] = f"Image"
+            template[f"{trg}/@NX_class"] = f"NXdata"  # TODO::writer should decorate automatically!
+            template[f"{trg}/@signal"] = "intensity"
+            dims = ["x", "y"]
+            idx = 0
+            for dim in dims:
+                template[f"{trg}/@AXISNAME_indices[axis_{dim}_indices]"] = np.uint32(idx)
+                idx += 1
+            template[f"{trg}/@axes"] = []
+            for dim in dims[::-1]:
+                template[f"{trg}/@axes"].append(f"axis_{dim}")
+            template[f"{trg}/intensity"] = {"compress": np.array(fp), "strength": 1}
+            #  0 is y while 1 is x for 2d, 0 is z, 1 is y, while 2 is x for 3d
+            template[f"{trg}/intensity/@long_name"] = f"Signal"         
+    
+            sxy = {"x": self.tmp["meta"]["EScan/PixelWidth"],
+                   "y": self.tmp["meta"]["EScan/PixelHeight"]}
+            shp = np.shape(np.array(fp))
+            nxy = {"x": shp[1], "y": shp[0]}
+            scan_unit = {"x": "m", "y": "m"}  # assuming FEI reports SI units
+            # TODO::be careful we assume here a very specific coordinate system
+            # however the TIFF file gives no clue, TIFF just documents in which order
+            # it arranges a bunch of pixels that have stream in into a n-d tiling
+            # e.g. a 2D image
+            # also we have to be careful because TFS just gives us here
+            # typical case of an image without an information without its location
+            # on the physical sample surface, therefore we can only scale
+            # pixel_identifier by physical scaling quantities s_x, s_y
+            # also the dimensions of the image are on us to fish with the image
+            # reading library instead of TFS for consistency checks adding these
+            # to the metadata the reason is that TFS TIFF use the TIFF tagging mechanism
+            # and there is already a proper TIFF tag for the width and height of an
+            # image in number of pixel
+            for dim in dims:
+                template[f"{trg}/AXISNAME[axis_{dim}]"] \
+                    = {"compress": np.asarray(np.linspace(0,
+                                                            nxy[dim] - 1,
+                                                            num=nxy[dim],
+                                                            endpoint=True) * sxy[dim], np.float64), "strength": 1}
+                template[f"{trg}/AXISNAME[axis_{dim}]/@long_name"] \
+                    = f"Coordinate along {dim}-axis ({scan_unit[dim]})"
+                template[f"{trg}/AXISNAME[axis_{dim}]/@units"] = f"{scan_unit[dim]}"
+        return template
+    
+    def process_event_data_em_state(self, template: dict) -> dict:
+        """Add em-state as they were during the event_data_em event."""
+        # state of the microscope not repeating static/long-valid microscope metadata
+        print(f"Writing TFS/FEI event_data_em state")
+        return template
diff --git a/pynxtools/dataconverter/readers/em/subparsers/nxs_imgs.py b/pynxtools/dataconverter/readers/em/subparsers/nxs_imgs.py
index 495cec07b..de4d3ecf9 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/nxs_imgs.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/nxs_imgs.py
@@ -19,8 +19,10 @@
 
 import numpy as np
 # from typing import Dict, Any, List
+from PIL import Image
 
 from pynxtools.dataconverter.readers.em.subparsers.image_tiff_tfs import TfsTiffSubParser
+from pynxtools.dataconverter.readers.em.utils.hfive_web_utils import hfive_web_decorate_nxdata
 
 
 class NxEmImagesSubParser:
@@ -53,22 +55,10 @@ def parse(self, template: dict) -> dict:
         # and its interaction with tech-partner-specific hfive_* subparsers
 
         if image_parser_type == "tiff_tfs":
-            tiff = TfsTiffSubParser(self.file_path)
+            tiff = TfsTiffSubParser(self.file_path, self.entry_id)
             tiff.parse_and_normalize()
-            self.process_into_template(tiff.tmp, template)
-        else:  # none or something unsupported
-            return template
-        return template
-
-    def process_into_template(self, inp: dict, template: dict) -> dict:
-        debugging = False
-        if debugging is True:
-            for key, val in inp.items():
-                if isinstance(val, dict):
-                    for ckey, cval in val.items():
-                        print(f"{ckey}, {cval}")
-                else:
-                    print(f"{key}, {val}")
-        # TODO:: implement actual mapping on template
-        # self.process_roi_overview(inp, template)
+            tiff.process_into_template(template)
+        # else:
+            # TODO::add here specific content parsers for other tech partner
+            # or other custom parsing of images
         return template
diff --git a/pynxtools/dataconverter/readers/em/utils/image_utils.py b/pynxtools/dataconverter/readers/em/utils/image_utils.py
index 74598ad12..2b29587da 100644
--- a/pynxtools/dataconverter/readers/em/utils/image_utils.py
+++ b/pynxtools/dataconverter/readers/em/utils/image_utils.py
@@ -34,7 +34,6 @@ def sort_ascendingly_by_second_argument(tup):
 
 def if_str_represents_float(s):
     try:
-        float(s)
-        return str(float(s)) == s
+        return isinstance(float(s), float)
     except ValueError:
         return False

From 99d55ba0e30262bbc667aaafe9aa1ca7060183c9 Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@physik.hu-berlin.de>
Date: Mon, 11 Dec 2023 17:46:15 +0100
Subject: [PATCH 49/84] Made also now the mapping of TFS vocabulary to NeXus
 work, awaiting now feedback from IKZ to implement mapping of further
 quantities, test NeXus file writes successfully, next step refactor
 em_spctrscpy parser

---
 .../readers/em/subparsers/image_tiff_tfs.py   | 31 +++++++-----
 .../em/subparsers/image_tiff_tfs_cfg.py       | 24 +++++++--
 .../em/subparsers/image_tiff_tfs_modifier.py  | 49 +++++++++++++++++++
 .../shared/map_concepts/mapping_functors.py   |  6 +--
 4 files changed, 90 insertions(+), 20 deletions(-)
 create mode 100644 pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs_modifier.py

diff --git a/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs.py b/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs.py
index 30aa5ffaa..79b6a9375 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs.py
@@ -28,6 +28,10 @@
     TiffTfsConcepts, TiffTfsToNeXusCfg, get_fei_parent_concepts, get_fei_childs
 from pynxtools.dataconverter.readers.em.utils.image_utils import \
     sort_ascendingly_by_second_argument, if_str_represents_float
+from pynxtools.dataconverter.readers.shared.map_concepts.mapping_functors \
+    import variadic_path_to_specific_path
+from pynxtools.dataconverter.readers.em.subparsers.image_tiff_tfs_modifier import \
+    get_nexus_value
 
 
 class TfsTiffSubParser(TiffSubParser):
@@ -141,18 +145,13 @@ def parse_and_normalize(self):
                   f"TIFF file that this parser can process !")
 
     def process_into_template(self, template: dict) -> dict:
-        self.process_event_data_em_metadata(template)
-        self.process_event_data_em_data(template)
+        if self.supported is True:
+            self.process_event_data_em_metadata(template)
+            self.process_event_data_em_data(template)
         return template
     
-    def process_event_data_em_metadata(self, template: dict) -> dict:
-        """Add respective event_data_em header."""
-        # contextualization to understand how the image relates to the EM session
-        print(f"Mapping some of the TFS/FEI metadata concepts onto NeXus concepts")
-        return template
-
     def process_event_data_em_data(self, template: dict) -> dict:
-        """Add respective heavy image data."""
+        """Add respective heavy data."""
         # default display of the image(s) representing the data collected in this event
         print(f"Writing TFS/FEI TIFF image as a onto the respective NeXus concept")
         # read image in-place
@@ -215,8 +214,14 @@ def process_event_data_em_data(self, template: dict) -> dict:
                 template[f"{trg}/AXISNAME[axis_{dim}]/@units"] = f"{scan_unit[dim]}"
         return template
     
-    def process_event_data_em_state(self, template: dict) -> dict:
-        """Add em-state as they were during the event_data_em event."""
-        # state of the microscope not repeating static/long-valid microscope metadata
-        print(f"Writing TFS/FEI event_data_em state")
+    def process_event_data_em_metadata(self, template: dict) -> dict:
+        """Add respective metadata."""
+        # contextualization to understand how the image relates to the EM session
+        print(f"Mapping some of the TFS/FEI metadata concepts onto NeXus concepts")
+        identifier = [self.entry_id, self.event_id, 1]
+        for nx_path, modifier in TiffTfsToNeXusCfg.items():
+            if (nx_path != "IGNORE") and (nx_path != "UNCLEAR"):
+                trg = variadic_path_to_specific_path(nx_path, identifier)
+                template[trg] = get_nexus_value(modifier, self.tmp["meta"])
+                # print(f"nx_path: {nx_path}, trg: {trg}, tfs_concept: {template[trg]}\n")
         return template
diff --git a/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs_cfg.py b/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs_cfg.py
index e38999b68..0452cc6ad 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs_cfg.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs_cfg.py
@@ -195,6 +195,23 @@
                    "ColdStage/Humidity",
                    "ColdStage/SampleBias"]
 
+# there is more to know and understand than just knowing TFS/FEI uses
+# the above-mentioned concepts in their taxonomy:
+# take the example of System/Source for which an example file (instance) has the
+# value "FEG"
+# similar like in NeXus "System/Source" labels a concept for which (assumption!) there
+# is a controlled enumeration of symbols possible (as the example shows "FEG" is one such
+# allowed symbol of the enumeration.
+# The key issue is that the symbols for the leaf (here "FEG") means nothing eventually
+# when one has another semantic world-view, like in NOMAD metainfo or NeXus
+# (only us) humans understand that what TFS/FEI likely means with the symbol
+# "FEG" is exactly the same as what we mean in NeXus when setting emitter_type of
+# NXebeam_column to "cold_cathode_field_emitter"
+# world with the controlled enumeration value "other" because we do not know
+# if FEG means really a filament or a cold_cathode_field_emitter
+
+TfsToNexusConceptMapping = {"System/Source/FEG": "cold_field_cathode_emitter"}
+
 
 def get_fei_parent_concepts() -> List:
     """Get list of unique FEI parent concepts."""
@@ -215,7 +232,8 @@ def get_fei_childs(concept: str) -> List:
     return list(child_concepts)
 
 
-TiffTfsToNeXusCfg = {"/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]/start_time": {"fun": "ikz_berlin_apreo_iso8601", "terms": ["User/Date", "User/Time"]},
+# "/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]/start_time"
+TiffTfsToNeXusCfg = {"IGNORE": {"fun": "ikz_berlin_apreo_iso8601", "terms": ["User/Date", "User/Time"]},
                      "IGNORE": { "fun": "load_from", "terms": "User/User" },	
                      "IGNORE": { "fun": "load_from", "terms": "User/UserText" },
                      "IGNORE": { "fun": "load_from", "terms": "User/UserTextUnicode" },	
@@ -223,8 +241,8 @@ def get_fei_childs(concept: str) -> List:
                      "IGNORE": { "fun": "load_from", "terms": "System/Dnumber" },
                      "IGNORE": { "fun": "load_from", "terms": "System/Software" },
                      "/ENTRY[entry*]/measurement/em_lab/FABRICATION[fabrication]/identifier": { "fun": "load_from", "terms": "System/BuildNr" },
-                     "/ENTRY[entry*]/measurement/em_lab/EBEAM_COLUMN[ebeam_column]/electron_source/emitter_type": { "fun": "ikz_berlin_apreo", "terms": "System/Source" },
-                     "/ENTRY[entry*]/measurement/em_lab/FABRICATION[fabrication]/vendor": { "fun": "load_from", "terms": "System/Column" },	
+                     "/ENTRY[entry*]/measurement/em_lab/EBEAM_COLUMN[ebeam_column]/electron_source/emitter_type": { "fun": "tfs_to_nexus", "terms": "System/Source" },
+                     "/ENTRY[entry*]/measurement/em_lab/FABRICATION[fabrication]/vendor": "FEI",
                      "IGNORE": { "fun": "load_from", "terms": "System/FinalLens" },
                      "IGNORE": { "fun": "load_from", "terms": "System/Chamber" },
                      "IGNORE": { "fun": "load_from", "terms": "System/Stage" },
diff --git a/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs_modifier.py b/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs_modifier.py
new file mode 100644
index 000000000..dddd59bd3
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs_modifier.py
@@ -0,0 +1,49 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Utilities for working with TFS/FEI-specific concepts."""
+
+# pylint: disable=no-member
+
+from pynxtools.dataconverter.readers.em.subparsers.image_tiff_tfs_cfg import \
+    TfsToNexusConceptMapping
+
+
+def get_nexus_value(modifier, metadata: dict):
+    """Interpret a functional mapping using data from dct via calling modifiers."""
+    if isinstance(modifier, dict):
+        # different commands are available
+        if set(["fun", "terms"]) == set(modifier.keys()):
+            if modifier["fun"] == "load_from":
+                if modifier["terms"] in metadata.keys():
+                    return metadata[modifier['terms']]
+                else:
+                    raise ValueError(f"Unable to interpret modififier load_from for argument {modifier['terms']}")
+            if modifier["fun"] == "tfs_to_nexus":
+                # print(metadata[modifier['terms']])
+                if f"{modifier['terms']}/{metadata[modifier['terms']]}" in TfsToNexusConceptMapping.keys():
+                    return TfsToNexusConceptMapping[f"{modifier['terms']}/{metadata[modifier['terms']]}"]
+                else:
+                    raise ValueError(f"Unable to interpret modifier tfs_to_nexus for argument {modifier['terms']}/{metadata[modifier['terms']]}")
+        else:
+            print(f"WARNING::Modifier {modifier} is currently not implemented !")
+            # elif set(["link"]) == set(modifier.keys()), with the jsonmap reader Sherjeel conceptualized "link"
+            return None
+    elif isinstance(modifier, str):
+        return modifier  # metadata[modifier]
+    else:
+        return None
diff --git a/pynxtools/dataconverter/readers/shared/map_concepts/mapping_functors.py b/pynxtools/dataconverter/readers/shared/map_concepts/mapping_functors.py
index 6ee855b84..8851e3427 100644
--- a/pynxtools/dataconverter/readers/shared/map_concepts/mapping_functors.py
+++ b/pynxtools/dataconverter/readers/shared/map_concepts/mapping_functors.py
@@ -78,11 +78,9 @@ def apply_modifier(modifier, dct: dict):
                 return load_from_modifier(modifier["terms"], dct)
             if modifier["fun"] == "convert_iso8601":
                 return convert_iso8601_modifier(modifier["terms"], dct)
-        elif set(["link"]) == set(modifier.keys()):
-            # CURRENTLY NOT IMPLEMENTED
-            # with the jsonmap reader Sherjeel conceptualized "link"
-            return None
         else:
+            print(f"WARNING::Modifier {modifier} is currently not implemented !")
+            # elif set(["link"]) == set(modifier.keys()), with the jsonmap reader Sherjeel conceptualized "link"
             return None
     if isinstance(modifier, str):
         return modifier

From 5aa9977d83510e98dddad88835517e54e3d206ce Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@physik.hu-berlin.de>
Date: Mon, 11 Dec 2023 18:07:15 +0100
Subject: [PATCH 50/84] linting, styling, mypying

---
 pynxtools/dataconverter/readers/em/reader.py  |   8 +-
 .../readers/em/subparsers/image_tiff_tfs.py   |  22 +-
 .../em/subparsers/image_tiff_tfs_cfg.py       | 346 +++++++++---------
 3 files changed, 188 insertions(+), 188 deletions(-)

diff --git a/pynxtools/dataconverter/readers/em/reader.py b/pynxtools/dataconverter/readers/em/reader.py
index 3af242b4f..4134ea869 100644
--- a/pynxtools/dataconverter/readers/em/reader.py
+++ b/pynxtools/dataconverter/readers/em/reader.py
@@ -17,14 +17,14 @@
 #
 """Parser for loading generic orientation microscopy data based on ."""
 
-# pylint: disable=no-member
+# pylint: disable=no-member,fixme
 
 from typing import Tuple, Any
 
 from pynxtools.dataconverter.readers.base.reader import BaseReader
 from pynxtools.dataconverter.readers.em.concepts.nexus_concepts import NxEmAppDef
 # from pynxtools.dataconverter.readers.em.subparsers.nxs_mtex import NxEmNxsMTexSubParser
-from pynxtools.dataconverter.readers.em.subparsers.nxs_pyxem import NxEmNxsPyxemSubParser
+# from pynxtools.dataconverter.readers.em.subparsers.nxs_pyxem import NxEmNxsPyxemSubParser
 from pynxtools.dataconverter.readers.em.subparsers.nxs_imgs import NxEmImagesSubParser
 from pynxtools.dataconverter.readers.em.utils.default_plots import NxEmDefaultPlotResolver
 # from pynxtools.dataconverter.readers.em.geometry.convention_mapper import NxEmConventionMapper
@@ -126,7 +126,7 @@ def read(self,
         # sub_parser = "image_tiff"
         subparser = NxEmImagesSubParser(entry_id, file_paths[0])
         subparser.parse(template)
-        
+
         # for dat_instance in case.dat_parser_type:
         #     print(f"Process pieces of information in {dat_instance} tech partner file...")
         #    continue
@@ -146,7 +146,7 @@ def read(self,
         # we only need to decorate the template to point to the mandatory ROI overview
         # print("Create NeXus default plottable data...")
         # em_default_plot_generator(template, 1)
-        
+
         if True is False:
             nxs_plt = NxEmDefaultPlotResolver()
             # if nxs_mtex is the sub-parser
diff --git a/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs.py b/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs.py
index 79b6a9375..bc9acd948 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs.py
@@ -94,7 +94,7 @@ def get_metadata(self):
             sequence = []  # decide I/O order in which metadata for childs of parent concepts will be read
             for key, value in tfs_parent_concepts_byte_offset.items():
                 if value is not None:
-                    sequence.append((key, value)) 
+                    sequence.append((key, value))
                     # tuple of parent_concept name and byte offset
             sequence = sort_ascendingly_by_second_argument(sequence)
             print(sequence)
@@ -131,7 +131,7 @@ def get_metadata(self):
                                 else:
                                     self.tmp["meta"][f"{parent}/{term}"] = value
                         else:
-                            raise ValueError(f"Detected an unexpected case {parent}/{term}, type: {type(value)} !")                
+                            raise ValueError(f"Detected an unexpected case {parent}/{term}, type: {type(value)} !")
                     else:
                         pass
 
@@ -149,7 +149,7 @@ def process_into_template(self, template: dict) -> dict:
             self.process_event_data_em_metadata(template)
             self.process_event_data_em_data(template)
         return template
-    
+
     def process_event_data_em_data(self, template: dict) -> dict:
         """Add respective heavy data."""
         # default display of the image(s) representing the data collected in this event
@@ -167,8 +167,8 @@ def process_event_data_em_data(self, template: dict) -> dict:
             # explorative viewing using H5Web than what traditionally typical image viewers are meant for
             image_identifier = 1
             trg = f"/ENTRY[entry{self.entry_id}]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/" \
-                    f"EVENT_DATA_EM[event_data_em{self.event_id}]/" \
-                    f"IMAGE_R_SET[image_r_set{image_identifier}]/DATA[image]"
+                  f"EVENT_DATA_EM[event_data_em{self.event_id}]/" \
+                  f"IMAGE_R_SET[image_r_set{image_identifier}]/DATA[image]"
             # TODO::writer should decorate automatically!
             template[f"{trg}/title"] = f"Image"
             template[f"{trg}/@NX_class"] = f"NXdata"  # TODO::writer should decorate automatically!
@@ -183,8 +183,8 @@ def process_event_data_em_data(self, template: dict) -> dict:
                 template[f"{trg}/@axes"].append(f"axis_{dim}")
             template[f"{trg}/intensity"] = {"compress": np.array(fp), "strength": 1}
             #  0 is y while 1 is x for 2d, 0 is z, 1 is y, while 2 is x for 3d
-            template[f"{trg}/intensity/@long_name"] = f"Signal"         
-    
+            template[f"{trg}/intensity/@long_name"] = f"Signal"
+
             sxy = {"x": self.tmp["meta"]["EScan/PixelWidth"],
                    "y": self.tmp["meta"]["EScan/PixelHeight"]}
             shp = np.shape(np.array(fp))
@@ -206,14 +206,14 @@ def process_event_data_em_data(self, template: dict) -> dict:
             for dim in dims:
                 template[f"{trg}/AXISNAME[axis_{dim}]"] \
                     = {"compress": np.asarray(np.linspace(0,
-                                                            nxy[dim] - 1,
-                                                            num=nxy[dim],
-                                                            endpoint=True) * sxy[dim], np.float64), "strength": 1}
+                                                          nxy[dim] - 1,
+                                                          num=nxy[dim],
+                                                          endpoint=True) * sxy[dim], np.float64), "strength": 1}
                 template[f"{trg}/AXISNAME[axis_{dim}]/@long_name"] \
                     = f"Coordinate along {dim}-axis ({scan_unit[dim]})"
                 template[f"{trg}/AXISNAME[axis_{dim}]/@units"] = f"{scan_unit[dim]}"
         return template
-    
+
     def process_event_data_em_metadata(self, template: dict) -> dict:
         """Add respective metadata."""
         # contextualization to understand how the image relates to the EM session
diff --git a/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs_cfg.py b/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs_cfg.py
index 0452cc6ad..47bf712eb 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs_cfg.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs_cfg.py
@@ -234,179 +234,179 @@ def get_fei_childs(concept: str) -> List:
 
 # "/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]/start_time"
 TiffTfsToNeXusCfg = {"IGNORE": {"fun": "ikz_berlin_apreo_iso8601", "terms": ["User/Date", "User/Time"]},
-                     "IGNORE": { "fun": "load_from", "terms": "User/User" },	
-                     "IGNORE": { "fun": "load_from", "terms": "User/UserText" },
-                     "IGNORE": { "fun": "load_from", "terms": "User/UserTextUnicode" },	
-                     "IGNORE": { "fun": "load_from", "terms": "System/Type" },
-                     "IGNORE": { "fun": "load_from", "terms": "System/Dnumber" },
-                     "IGNORE": { "fun": "load_from", "terms": "System/Software" },
-                     "/ENTRY[entry*]/measurement/em_lab/FABRICATION[fabrication]/identifier": { "fun": "load_from", "terms": "System/BuildNr" },
-                     "/ENTRY[entry*]/measurement/em_lab/EBEAM_COLUMN[ebeam_column]/electron_source/emitter_type": { "fun": "tfs_to_nexus", "terms": "System/Source" },
+                     "IGNORE": {"fun": "load_from", "terms": "User/User"},
+                     "IGNORE": {"fun": "load_from", "terms": "User/UserText"},
+                     "IGNORE": {"fun": "load_from", "terms": "User/UserTextUnicode"},
+                     "IGNORE": {"fun": "load_from", "terms": "System/Type"},
+                     "IGNORE": {"fun": "load_from", "terms": "System/Dnumber"},
+                     "IGNORE": {"fun": "load_from", "terms": "System/Software"},
+                     "/ENTRY[entry*]/measurement/em_lab/FABRICATION[fabrication]/identifier": {"fun": "load_from", "terms": "System/BuildNr"},
+                     "/ENTRY[entry*]/measurement/em_lab/EBEAM_COLUMN[ebeam_column]/electron_source/emitter_type": {"fun": "tfs_to_nexus", "terms": "System/Source"},
                      "/ENTRY[entry*]/measurement/em_lab/FABRICATION[fabrication]/vendor": "FEI",
-                     "IGNORE": { "fun": "load_from", "terms": "System/FinalLens" },
-                     "IGNORE": { "fun": "load_from", "terms": "System/Chamber" },
-                     "IGNORE": { "fun": "load_from", "terms": "System/Stage" },
-                     "IGNORE": { "fun": "load_from", "terms": "System/Pump" },
-                     "IGNORE": { "fun": "load_from", "terms": "System/ESEM" },
-                     "IGNORE": { "fun": "load_from", "terms": "System/Aperture" },	
-                     "IGNORE": { "fun": "load_from", "terms": "System/Scan" },
-                     "IGNORE": { "fun": "load_from", "terms": "System/Acq" },
-                     "IGNORE": { "fun": "load_from", "terms": "System/EucWD" },
-                     "/ENTRY[entry*]/measurement/em_lab/FABRICATION[fabrication]/model": { "fun": "load_from", "terms": "System/SystemType" },	
-                     "IGNORE": { "fun": "load_from", "terms": "System/DisplayWidth" },	
-                     "IGNORE": { "fun": "load_from", "terms": "System/DisplayHeight" },
-                     "IGNORE": { "fun": "load_from", "terms": "Beam/HV" },
-                     "IGNORE": { "fun": "load_from", "terms": "Beam/Spot" },	
-                     "IGNORE": { "fun": "load_from", "terms": "Beam/StigmatorX" },	
-                     "IGNORE": { "fun": "load_from", "terms": "Beam/StigmatorY" },	
-                     "IGNORE": { "fun": "load_from", "terms": "Beam/BeamShiftX" },
-                     "IGNORE": { "fun": "load_from", "terms": "Beam/BeamShiftY" },	
-                     "IGNORE": { "fun": "load_from", "terms": "Beam/ScanRotation" },	
-                     "IGNORE": { "fun": "load_from", "terms": "Beam/ImageMode" },
-                     "IGNORE": { "fun": "load_from", "terms": "Beam/FineStageBias" },	
-                     "IGNORE": { "fun": "load_from", "terms": "Beam/Beam" },
-                     "IGNORE": { "fun": "load_from", "terms": "Beam/Scan" },	
-                     "IGNORE": { "fun": "load_from", "terms": "EBeam/Source" },
-                     "IGNORE": { "fun": "load_from", "terms": "EBeam/ColumnType" },	
-                     "IGNORE": { "fun": "load_from", "terms": "EBeam/FinalLens" },	
-                     "IGNORE": { "fun": "load_from", "terms": "EBeam/Acq" },
-                     "/ENTRY[entry*]/measurement/em_lab/EBEAM_COLUMN[ebeam_column]/APERTURE_EM[aperture_em*]/description": { "fun": "load_from", "terms": "EBeam/Aperture" },
-                     "/ENTRY[entry*]/measurement/em_lab/EBEAM_COLUMN[ebeam_column]/APERTURE_EM[aperture_em*]/value": { "fun": "load_from", "terms": "EBeam/ApertureDiameter" },	
+                     "IGNORE": {"fun": "load_from", "terms": "System/FinalLens"},
+                     "IGNORE": {"fun": "load_from", "terms": "System/Chamber"},
+                     "IGNORE": {"fun": "load_from", "terms": "System/Stage"},
+                     "IGNORE": {"fun": "load_from", "terms": "System/Pump"},
+                     "IGNORE": {"fun": "load_from", "terms": "System/ESEM"},
+                     "IGNORE": {"fun": "load_from", "terms": "System/Aperture"},
+                     "IGNORE": {"fun": "load_from", "terms": "System/Scan"},
+                     "IGNORE": {"fun": "load_from", "terms": "System/Acq"},
+                     "IGNORE": {"fun": "load_from", "terms": "System/EucWD"},
+                     "/ENTRY[entry*]/measurement/em_lab/FABRICATION[fabrication]/model": {"fun": "load_from", "terms": "System/SystemType"},
+                     "IGNORE": {"fun": "load_from", "terms": "System/DisplayWidth"},
+                     "IGNORE": {"fun": "load_from", "terms": "System/DisplayHeight"},
+                     "IGNORE": {"fun": "load_from", "terms": "Beam/HV"},
+                     "IGNORE": {"fun": "load_from", "terms": "Beam/Spot"},
+                     "IGNORE": {"fun": "load_from", "terms": "Beam/StigmatorX"},
+                     "IGNORE": {"fun": "load_from", "terms": "Beam/StigmatorY"},
+                     "IGNORE": {"fun": "load_from", "terms": "Beam/BeamShiftX"},
+                     "IGNORE": {"fun": "load_from", "terms": "Beam/BeamShiftY"},
+                     "IGNORE": {"fun": "load_from", "terms": "Beam/ScanRotation"},
+                     "IGNORE": {"fun": "load_from", "terms": "Beam/ImageMode"},
+                     "IGNORE": {"fun": "load_from", "terms": "Beam/FineStageBias"},
+                     "IGNORE": {"fun": "load_from", "terms": "Beam/Beam"},
+                     "IGNORE": {"fun": "load_from", "terms": "Beam/Scan"},
+                     "IGNORE": {"fun": "load_from", "terms": "EBeam/Source"},
+                     "IGNORE": {"fun": "load_from", "terms": "EBeam/ColumnType"},
+                     "IGNORE": {"fun": "load_from", "terms": "EBeam/FinalLens"},
+                     "IGNORE": {"fun": "load_from", "terms": "EBeam/Acq"},
+                     "/ENTRY[entry*]/measurement/em_lab/EBEAM_COLUMN[ebeam_column]/APERTURE_EM[aperture_em*]/description": {"fun": "load_from", "terms": "EBeam/Aperture"},
+                     "/ENTRY[entry*]/measurement/em_lab/EBEAM_COLUMN[ebeam_column]/APERTURE_EM[aperture_em*]/value": {"fun": "load_from", "terms": "EBeam/ApertureDiameter"},
                      "/ENTRY[entry*]/measurement/em_lab/EBEAM_COLUMN[ebeam_column]/APERTURE_EM[aperture_em*]/value/@units": "m",
-                     "/ENTRY[entry*]/measurement/em_lab/EBEAM_COLUMN[ebeam_column]/electron_source/voltage": { "fun": "load_from", "terms":	"EBeam/HV" },
-                     "/ENTRY[entry*]/measurement/em_lab/EBEAM_COLUMN[ebeam_column]/electron_source/voltage/@units": "V",			
-                     "IGNORE": { "fun": "load_from", "terms": "EBeam/HFW" },
-                     "IGNORE": { "fun": "load_from", "terms": "EBeam/VFW" },
-                     "/ENTRY[entry*]/measurement/em_lab/OPTICAL_SYSTEM_EM[optical_system_em]/working_distance": { "fun": "load_from", "terms": "EBeam/WD" },
+                     "/ENTRY[entry*]/measurement/em_lab/EBEAM_COLUMN[ebeam_column]/electron_source/voltage": {"fun": "load_from", "terms": "EBeam/HV"},
+                     "/ENTRY[entry*]/measurement/em_lab/EBEAM_COLUMN[ebeam_column]/electron_source/voltage/@units": "V",
+                     "IGNORE": {"fun": "load_from", "terms": "EBeam/HFW"},
+                     "IGNORE": {"fun": "load_from", "terms": "EBeam/VFW"},
+                     "/ENTRY[entry*]/measurement/em_lab/OPTICAL_SYSTEM_EM[optical_system_em]/working_distance": {"fun": "load_from", "terms": "EBeam/WD"},
                      "/ENTRY[entry*]/measurement/em_lab/OPTICAL_SYSTEM_EM[optical_system_em]/working_distance/@units": "m",
-                     "IGNORE": { "fun": "load_from", "terms": "EBeam/BeamCurrent" },	
-                     "IGNORE": { "fun": "load_from", "terms": "EBeam/TiltCorrectionIsOn" },	
-                     "IGNORE": { "fun": "load_from", "terms": "EBeam/DynamicFocusIsOn" },
-                     "IGNORE": { "fun": "load_from", "terms": "EBeam/DynamicWDIsOn" },
-                     "IGNORE": { "fun": "load_from", "terms": "EBeam/ScanRotation" },
-                     "IGNORE": { "fun": "load_from", "terms": "EBeam/LensMode" },
-                     "IGNORE": { "fun": "load_from", "terms": "EBeam/LensModeA" },
-                     "IGNORE": { "fun": "load_from", "terms": "EBeam/ATubeVoltage" },	
-                     "IGNORE": { "fun": "load_from", "terms": "EBeam/UseCase" },
-                     "IGNORE": { "fun": "load_from", "terms": "EBeam/SemOpticalMode" },	
-                     "IGNORE": { "fun": "load_from", "terms": "EBeam/ImageMode" },
-                     "IGNORE": { "fun": "load_from", "terms": "EBeam/SourceTiltX" },	
-                     "IGNORE": { "fun": "load_from", "terms": "EBeam/SourceTiltY" },
-                     "IGNORE": { "fun": "load_from", "terms": "EBeam/StageX" },
-                     "IGNORE": { "fun": "load_from", "terms": "EBeam/StageY" },	
-                     "IGNORE": { "fun": "load_from", "terms": "EBeam/StageZ" },
-                     "IGNORE": { "fun": "load_from", "terms": "EBeam/StageR" },
-                     "IGNORE": { "fun": "load_from", "terms": "EBeam/StageTa" },
-                     "IGNORE": { "fun": "load_from", "terms": "EBeam/StageTb" },
-                     "IGNORE": { "fun": "load_from", "terms": "EBeam/StigmatorX" },
-                     "IGNORE": { "fun": "load_from", "terms": "EBeam/StigmatorY" },
-                     "IGNORE": { "fun": "load_from", "terms": "EBeam/BeamShiftX" },
-                     "IGNORE": { "fun": "load_from", "terms": "EBeam/BeamShiftY" },
-                     "IGNORE": { "fun": "load_from", "terms": "EBeam/EucWD" },
-                     "IGNORE": { "fun": "load_from", "terms": "EBeam/EmissionCurrent" },	
-                     "IGNORE": { "fun": "load_from", "terms": "EBeam/TiltCorrectionAngle" },	
-                     "IGNORE": { "fun": "load_from", "terms": "EBeam/PreTilt" },	
-                     "IGNORE": { "fun": "load_from", "terms": "EBeam/WehneltBias" },	
-                     "IGNORE": { "fun": "load_from", "terms": "EBeam/BeamMode" },
-                     "IGNORE": { "fun": "load_from", "terms": "EBeam/MagnificationCorrection" },
-                     "IGNORE": { "fun": "load_from", "terms": "GIS/Number" },
-                     "IGNORE": { "fun": "load_from", "terms": "Scan/InternalScan" },
-                     "IGNORE": { "fun": "load_from", "terms": "Scan/Dwelltime" },
-                     "IGNORE": { "fun": "load_from", "terms": "Scan/PixelWidth" },
-                     "IGNORE": { "fun": "load_from", "terms": "Scan/PixelHeight" },
-                     "IGNORE": { "fun": "load_from", "terms": "Scan/HorFieldsize" },
-                     "IGNORE": { "fun": "load_from", "terms": "Scan/VerFieldsize" },
-                     "IGNORE": { "fun": "load_from", "terms": "Scan/Average" },
-                     "IGNORE": { "fun": "load_from", "terms": "Scan/Integrate" },
-                     "IGNORE": { "fun": "load_from", "terms": "Scan/FrameTime" },
-                     "IGNORE": { "fun": "load_from", "terms": "EScan/Scan" },
-                     "IGNORE": { "fun": "load_from", "terms": "EScan/InternalScan" },	
-                     "IGNORE": { "fun": "load_from", "terms": "EScan/Dwell" },
-                     "IGNORE": { "fun": "load_from", "terms": "EScan/PixelWidth" },	
-                     "IGNORE": { "fun": "load_from", "terms": "EScan/PixelHeight" },
-                     "IGNORE": { "fun": "load_from", "terms": "EScan/HorFieldsize" },
-                     "IGNORE": { "fun": "load_from", "terms": "EScan/VerFieldsize" },
-                     "IGNORE": { "fun": "load_from", "terms": "EScan/FrameTime" },
-                     "IGNORE": { "fun": "load_from", "terms": "EScan/LineTime" },
-                     "IGNORE": { "fun": "load_from", "terms": "EScan/Mainslock" },
-                     "IGNORE": { "fun": "load_from", "terms": "EScan/LineIntegration" },	
-                     "IGNORE": { "fun": "load_from", "terms": "EScan/ScanInterlacing" },
-                     "IGNORE": { "fun": "load_from", "terms": "StageX/StageX" },
-                     "IGNORE": { "fun": "load_from", "terms": "StageX/StageY" },
-                     "IGNORE": { "fun": "load_from", "terms": "StageX/StageZ" },
-                     "IGNORE": { "fun": "load_from", "terms": "StageX/StageR" },
-                     "IGNORE": { "fun": "load_from", "terms": "StageX/StageT" },
-                     "IGNORE": { "fun": "load_from", "terms": "StageX/StageTb" },
-                     "IGNORE": { "fun": "load_from", "terms": "StageX/SpecTilt" },
-                     "IGNORE": { "fun": "load_from", "terms": "StageX/WorkingDistance" },	
-                     "IGNORE": { "fun": "load_from", "terms": "StageX/ActiveStage" },
-                     "IGNORE": { "fun": "load_from", "terms": "StageX/[Image]" },
-                     "IGNORE": { "fun": "load_from", "terms": "StageX/DigitalContrast" },	
-                     "IGNORE": { "fun": "load_from", "terms": "StageX/DigitalBrightness" },
-                     "IGNORE": { "fun": "load_from", "terms": "StageX/DigitalGamma" },
-                     "IGNORE": { "fun": "load_from", "terms": "StageX/Average" },
-                     "IGNORE": { "fun": "load_from", "terms": "StageX/Integrate" },
-                     "IGNORE": { "fun": "load_from", "terms": "StageX/ResolutionX" },
-                     "IGNORE": { "fun": "load_from", "terms": "StageX/ResolutionY" },
-                     "IGNORE": { "fun": "load_from", "terms": "StageX/DriftCorrected" },	
-                     "IGNORE": { "fun": "load_from", "terms": "StageX/ZoomFactor" },
-                     "IGNORE": { "fun": "load_from", "terms": "StageX/ZoomPanX" },
-                     "IGNORE": { "fun": "load_from", "terms": "StageX/ZoomPanY" },
-                     "IGNORE": { "fun": "load_from", "terms": "StageX/MagCanvasRealWidth" },	
-                     "IGNORE": { "fun": "load_from", "terms": "StageX/MagnificationMode" },
-                     "IGNORE": { "fun": "load_from", "terms": "StageX/ScreenMagCanvasRealWidth" },	
-                     "IGNORE": { "fun": "load_from", "terms": "StageX/ScreenMagnificationMode" },
-                     "IGNORE": { "fun": "load_from", "terms": "StageX/PostProcessing" },
-                     "IGNORE": { "fun": "load_from", "terms": "StageX/Transformation" },
-                     "IGNORE": { "fun": "load_from", "terms": "Vacuum/ChPressure" },
-                     "IGNORE": { "fun": "load_from", "terms": "Vacuum/Gas" },
-                     "IGNORE": { "fun": "load_from", "terms": "Vacuum/UserMode" },	
-                     "IGNORE": { "fun": "load_from", "terms": "Vacuum/Humidity" },
-                     "IGNORE": { "fun": "load_from", "terms": "Specimen/Temperature" },	
-                     "IGNORE": { "fun": "load_from", "terms": "Specimen/SpecimenCurrent" },	
-                     "IGNORE": { "fun": "load_from", "terms": "Detectors/Number" },
-                     "/ENTRY[entry*]/measurement/em_lab/DETECTOR[detector*]/local_name": { "fun": "load_from", "terms": "Detectors/Name" },	
-                     "IGNORE": { "fun": "load_from", "terms": "Detectors/Mode" },
-                     "IGNORE": { "fun": "load_from", "terms": "T2/Contrast" },
-                     "IGNORE": { "fun": "load_from", "terms": "T2/Brightness" },
-                     "/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]/event_type": { "fun": "load_from_lower_case", "terms": "T2/Signal" },	
-                     "IGNORE": { "fun": "load_from", "terms": "T2/ContrastDB" },	
-                     "IGNORE": { "fun": "load_from", "terms": "T2/BrightnessDB" },
-                     "IGNORE": { "fun": "load_from", "terms": "T2/Setting" },
-                     "IGNORE": { "fun": "load_from", "terms": "T2/MinimumDwellTime" },	
-                     "IGNORE": { "fun": "load_from", "terms": "Accessories/Number" },
-                     "IGNORE": { "fun": "load_from", "terms": "EBeamDeceleration/ModeOn" },	
-                     "IGNORE": { "fun": "load_from", "terms": "EBeamDeceleration/LandingEnergy" },	
-                     "IGNORE": { "fun": "load_from", "terms": "EBeamDeceleration/ImmersionRatio" },
-                     "IGNORE": { "fun": "load_from", "terms": "EBeamDeceleration/StageBias" },
-                     "IGNORE": { "fun": "load_from", "terms": "CompoundLensFilter/IsOn" },
-                     "IGNORE": { "fun": "load_from", "terms": "CompoundLensFilter/ThresholdEnergy" },	
-                     "IGNORE": { "fun": "load_from", "terms": "PrivateFei/BitShift" },	
-                     "IGNORE": { "fun": "load_from", "terms": "PrivateFei/DataBarSelected" },	
-                     "IGNORE": { "fun": "load_from", "terms": "PrivateFei/DataBarAvailable" },
-                     "IGNORE": { "fun": "load_from", "terms": "PrivateFei/TimeOfCreation" },
-                     "IGNORE": { "fun": "load_from", "terms": "PrivateFei/DatabarHeight" },
-                     "IGNORE": { "fun": "load_from", "terms": "HiResIllumination/BrightFieldIsOn" },
-                     "IGNORE": { "fun": "load_from", "terms": "HiResIllumination/BrightFieldValue" },
-                     "IGNORE": { "fun": "load_from", "terms": "HiResIllumination/DarkFieldIsOn" },
-                     "IGNORE": { "fun": "load_from", "terms": "HiResIllumination/DarkFieldValue" },
-                     "IGNORE": { "fun": "load_from", "terms": "EasyLift/Rotation" },
-                     "IGNORE": { "fun": "load_from", "terms": "HotStageMEMS/HeatingCurrent" },
-                     "IGNORE": { "fun": "load_from", "terms": "HotStageMEMS/HeatingVoltage" },
-                     "IGNORE": { "fun": "load_from", "terms": "HotStageMEMS/TargetTemperature" },
-                     "IGNORE": { "fun": "load_from", "terms": "HotStageMEMS/ActualTemperature" },
-                     "IGNORE": { "fun": "load_from", "terms": "HotStageMEMS/HeatingPower" },
-                     "IGNORE": { "fun": "load_from", "terms": "HotStageMEMS/SampleBias" },
-                     "IGNORE": { "fun": "load_from", "terms": "HotStageMEMS/SampleResistance" },	
-                     "IGNORE": { "fun": "load_from", "terms": "HotStage/TargetTemperature" },
-                     "IGNORE": { "fun": "load_from", "terms": "HotStage/ActualTemperature" },
-                     "IGNORE": { "fun": "load_from", "terms": "HotStage/SampleBias" },
-                     "IGNORE": { "fun": "load_from", "terms": "HotStage/ShieldBias" },
-                     "IGNORE": { "fun": "load_from", "terms": "HotStageHVHS/TargetTemperature" },	
-                     "IGNORE": { "fun": "load_from", "terms": "HotStageHVHS/ActualTemperature" },
-                     "IGNORE": { "fun": "load_from", "terms": "HotStageHVHS/SampleBias" },
-                     "IGNORE": { "fun": "load_from", "terms": "HotStageHVHS/ShieldBias" },
-                     "IGNORE": { "fun": "load_from", "terms": "ColdStage/TargetTemperature" },	
-                     "IGNORE": { "fun": "load_from", "terms": "ColdStage/ActualTemperature" },
-                     "IGNORE": { "fun": "load_from", "terms": "ColdStage/Humidity" },
-                     "IGNORE": { "fun": "load_from", "terms": "ColdStage/SampleBias" } }	
+                     "IGNORE": {"fun": "load_from", "terms": "EBeam/BeamCurrent"},
+                     "IGNORE": {"fun": "load_from", "terms": "EBeam/TiltCorrectionIsOn"},
+                     "IGNORE": {"fun": "load_from", "terms": "EBeam/DynamicFocusIsOn"},
+                     "IGNORE": {"fun": "load_from", "terms": "EBeam/DynamicWDIsOn"},
+                     "IGNORE": {"fun": "load_from", "terms": "EBeam/ScanRotation"},
+                     "IGNORE": {"fun": "load_from", "terms": "EBeam/LensMode"},
+                     "IGNORE": {"fun": "load_from", "terms": "EBeam/LensModeA"},
+                     "IGNORE": {"fun": "load_from", "terms": "EBeam/ATubeVoltage"},
+                     "IGNORE": {"fun": "load_from", "terms": "EBeam/UseCase"},
+                     "IGNORE": {"fun": "load_from", "terms": "EBeam/SemOpticalMode"},
+                     "IGNORE": {"fun": "load_from", "terms": "EBeam/ImageMode"},
+                     "IGNORE": {"fun": "load_from", "terms": "EBeam/SourceTiltX"},
+                     "IGNORE": {"fun": "load_from", "terms": "EBeam/SourceTiltY"},
+                     "IGNORE": {"fun": "load_from", "terms": "EBeam/StageX"},
+                     "IGNORE": {"fun": "load_from", "terms": "EBeam/StageY"},
+                     "IGNORE": {"fun": "load_from", "terms": "EBeam/StageZ"},
+                     "IGNORE": {"fun": "load_from", "terms": "EBeam/StageR"},
+                     "IGNORE": {"fun": "load_from", "terms": "EBeam/StageTa"},
+                     "IGNORE": {"fun": "load_from", "terms": "EBeam/StageTb"},
+                     "IGNORE": {"fun": "load_from", "terms": "EBeam/StigmatorX"},
+                     "IGNORE": {"fun": "load_from", "terms": "EBeam/StigmatorY"},
+                     "IGNORE": {"fun": "load_from", "terms": "EBeam/BeamShiftX"},
+                     "IGNORE": {"fun": "load_from", "terms": "EBeam/BeamShiftY"},
+                     "IGNORE": {"fun": "load_from", "terms": "EBeam/EucWD"},
+                     "IGNORE": {"fun": "load_from", "terms": "EBeam/EmissionCurrent"},
+                     "IGNORE": {"fun": "load_from", "terms": "EBeam/TiltCorrectionAngle"},
+                     "IGNORE": {"fun": "load_from", "terms": "EBeam/PreTilt"},
+                     "IGNORE": {"fun": "load_from", "terms": "EBeam/WehneltBias"},
+                     "IGNORE": {"fun": "load_from", "terms": "EBeam/BeamMode"},
+                     "IGNORE": {"fun": "load_from", "terms": "EBeam/MagnificationCorrection"},
+                     "IGNORE": {"fun": "load_from", "terms": "GIS/Number"},
+                     "IGNORE": {"fun": "load_from", "terms": "Scan/InternalScan"},
+                     "IGNORE": {"fun": "load_from", "terms": "Scan/Dwelltime"},
+                     "IGNORE": {"fun": "load_from", "terms": "Scan/PixelWidth"},
+                     "IGNORE": {"fun": "load_from", "terms": "Scan/PixelHeight"},
+                     "IGNORE": {"fun": "load_from", "terms": "Scan/HorFieldsize"},
+                     "IGNORE": {"fun": "load_from", "terms": "Scan/VerFieldsize"},
+                     "IGNORE": {"fun": "load_from", "terms": "Scan/Average"},
+                     "IGNORE": {"fun": "load_from", "terms": "Scan/Integrate"},
+                     "IGNORE": {"fun": "load_from", "terms": "Scan/FrameTime"},
+                     "IGNORE": {"fun": "load_from", "terms": "EScan/Scan"},
+                     "IGNORE": {"fun": "load_from", "terms": "EScan/InternalScan"},
+                     "IGNORE": {"fun": "load_from", "terms": "EScan/Dwell"},
+                     "IGNORE": {"fun": "load_from", "terms": "EScan/PixelWidth"},
+                     "IGNORE": {"fun": "load_from", "terms": "EScan/PixelHeight"},
+                     "IGNORE": {"fun": "load_from", "terms": "EScan/HorFieldsize"},
+                     "IGNORE": {"fun": "load_from", "terms": "EScan/VerFieldsize"},
+                     "IGNORE": {"fun": "load_from", "terms": "EScan/FrameTime"},
+                     "IGNORE": {"fun": "load_from", "terms": "EScan/LineTime"},
+                     "IGNORE": {"fun": "load_from", "terms": "EScan/Mainslock"},
+                     "IGNORE": {"fun": "load_from", "terms": "EScan/LineIntegration"},
+                     "IGNORE": {"fun": "load_from", "terms": "EScan/ScanInterlacing"},
+                     "IGNORE": {"fun": "load_from", "terms": "StageX/StageX"},
+                     "IGNORE": {"fun": "load_from", "terms": "StageX/StageY"},
+                     "IGNORE": {"fun": "load_from", "terms": "StageX/StageZ"},
+                     "IGNORE": {"fun": "load_from", "terms": "StageX/StageR"},
+                     "IGNORE": {"fun": "load_from", "terms": "StageX/StageT"},
+                     "IGNORE": {"fun": "load_from", "terms": "StageX/StageTb"},
+                     "IGNORE": {"fun": "load_from", "terms": "StageX/SpecTilt"},
+                     "IGNORE": {"fun": "load_from", "terms": "StageX/WorkingDistance"},
+                     "IGNORE": {"fun": "load_from", "terms": "StageX/ActiveStage"},
+                     "IGNORE": {"fun": "load_from", "terms": "StageX/[Image]"},
+                     "IGNORE": {"fun": "load_from", "terms": "StageX/DigitalContrast"},
+                     "IGNORE": {"fun": "load_from", "terms": "StageX/DigitalBrightness"},
+                     "IGNORE": {"fun": "load_from", "terms": "StageX/DigitalGamma"},
+                     "IGNORE": {"fun": "load_from", "terms": "StageX/Average"},
+                     "IGNORE": {"fun": "load_from", "terms": "StageX/Integrate"},
+                     "IGNORE": {"fun": "load_from", "terms": "StageX/ResolutionX"},
+                     "IGNORE": {"fun": "load_from", "terms": "StageX/ResolutionY"},
+                     "IGNORE": {"fun": "load_from", "terms": "StageX/DriftCorrected"},
+                     "IGNORE": {"fun": "load_from", "terms": "StageX/ZoomFactor"},
+                     "IGNORE": {"fun": "load_from", "terms": "StageX/ZoomPanX"},
+                     "IGNORE": {"fun": "load_from", "terms": "StageX/ZoomPanY"},
+                     "IGNORE": {"fun": "load_from", "terms": "StageX/MagCanvasRealWidth"},
+                     "IGNORE": {"fun": "load_from", "terms": "StageX/MagnificationMode"},
+                     "IGNORE": {"fun": "load_from", "terms": "StageX/ScreenMagCanvasRealWidth"},
+                     "IGNORE": {"fun": "load_from", "terms": "StageX/ScreenMagnificationMode"},
+                     "IGNORE": {"fun": "load_from", "terms": "StageX/PostProcessing"},
+                     "IGNORE": {"fun": "load_from", "terms": "StageX/Transformation"},
+                     "IGNORE": {"fun": "load_from", "terms": "Vacuum/ChPressure"},
+                     "IGNORE": {"fun": "load_from", "terms": "Vacuum/Gas"},
+                     "IGNORE": {"fun": "load_from", "terms": "Vacuum/UserMode"},
+                     "IGNORE": {"fun": "load_from", "terms": "Vacuum/Humidity"},
+                     "IGNORE": {"fun": "load_from", "terms": "Specimen/Temperature"},
+                     "IGNORE": {"fun": "load_from", "terms": "Specimen/SpecimenCurrent"},
+                     "IGNORE": {"fun": "load_from", "terms": "Detectors/Number"},
+                     "/ENTRY[entry*]/measurement/em_lab/DETECTOR[detector*]/local_name": {"fun": "load_from", "terms": "Detectors/Name"},
+                     "IGNORE": {"fun": "load_from", "terms": "Detectors/Mode"},
+                     "IGNORE": {"fun": "load_from", "terms": "T2/Contrast"},
+                     "IGNORE": {"fun": "load_from", "terms": "T2/Brightness"},
+                     "/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]/event_type": {"fun": "load_from_lower_case", "terms": "T2/Signal"},
+                     "IGNORE": {"fun": "load_from", "terms": "T2/ContrastDB"},
+                     "IGNORE": {"fun": "load_from", "terms": "T2/BrightnessDB"},
+                     "IGNORE": {"fun": "load_from", "terms": "T2/Setting"},
+                     "IGNORE": {"fun": "load_from", "terms": "T2/MinimumDwellTime"},
+                     "IGNORE": {"fun": "load_from", "terms": "Accessories/Number"},
+                     "IGNORE": {"fun": "load_from", "terms": "EBeamDeceleration/ModeOn"},
+                     "IGNORE": {"fun": "load_from", "terms": "EBeamDeceleration/LandingEnergy"},
+                     "IGNORE": {"fun": "load_from", "terms": "EBeamDeceleration/ImmersionRatio"},
+                     "IGNORE": {"fun": "load_from", "terms": "EBeamDeceleration/StageBias"},
+                     "IGNORE": {"fun": "load_from", "terms": "CompoundLensFilter/IsOn"},
+                     "IGNORE": {"fun": "load_from", "terms": "CompoundLensFilter/ThresholdEnergy"},
+                     "IGNORE": {"fun": "load_from", "terms": "PrivateFei/BitShift"},
+                     "IGNORE": {"fun": "load_from", "terms": "PrivateFei/DataBarSelected"},
+                     "IGNORE": {"fun": "load_from", "terms": "PrivateFei/DataBarAvailable"},
+                     "IGNORE": {"fun": "load_from", "terms": "PrivateFei/TimeOfCreation"},
+                     "IGNORE": {"fun": "load_from", "terms": "PrivateFei/DatabarHeight"},
+                     "IGNORE": {"fun": "load_from", "terms": "HiResIllumination/BrightFieldIsOn"},
+                     "IGNORE": {"fun": "load_from", "terms": "HiResIllumination/BrightFieldValue"},
+                     "IGNORE": {"fun": "load_from", "terms": "HiResIllumination/DarkFieldIsOn"},
+                     "IGNORE": {"fun": "load_from", "terms": "HiResIllumination/DarkFieldValue"},
+                     "IGNORE": {"fun": "load_from", "terms": "EasyLift/Rotation"},
+                     "IGNORE": {"fun": "load_from", "terms": "HotStageMEMS/HeatingCurrent"},
+                     "IGNORE": {"fun": "load_from", "terms": "HotStageMEMS/HeatingVoltage"},
+                     "IGNORE": {"fun": "load_from", "terms": "HotStageMEMS/TargetTemperature"},
+                     "IGNORE": {"fun": "load_from", "terms": "HotStageMEMS/ActualTemperature"},
+                     "IGNORE": {"fun": "load_from", "terms": "HotStageMEMS/HeatingPower"},
+                     "IGNORE": {"fun": "load_from", "terms": "HotStageMEMS/SampleBias"},
+                     "IGNORE": {"fun": "load_from", "terms": "HotStageMEMS/SampleResistance"},
+                     "IGNORE": {"fun": "load_from", "terms": "HotStage/TargetTemperature"},
+                     "IGNORE": {"fun": "load_from", "terms": "HotStage/ActualTemperature"},
+                     "IGNORE": {"fun": "load_from", "terms": "HotStage/SampleBias"},
+                     "IGNORE": {"fun": "load_from", "terms": "HotStage/ShieldBias"},
+                     "IGNORE": {"fun": "load_from", "terms": "HotStageHVHS/TargetTemperature"},
+                     "IGNORE": {"fun": "load_from", "terms": "HotStageHVHS/ActualTemperature"},
+                     "IGNORE": {"fun": "load_from", "terms": "HotStageHVHS/SampleBias"},
+                     "IGNORE": {"fun": "load_from", "terms": "HotStageHVHS/ShieldBias"},
+                     "IGNORE": {"fun": "load_from", "terms": "ColdStage/TargetTemperature"},
+                     "IGNORE": {"fun": "load_from", "terms": "ColdStage/ActualTemperature"},
+                     "IGNORE": {"fun": "load_from", "terms": "ColdStage/Humidity"},
+                     "IGNORE": {"fun": "load_from", "terms": "ColdStage/SampleBias"}}

From 49fb0a3752674e3965edd134116f5112b843295f Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@physik.hu-berlin.de>
Date: Wed, 13 Dec 2023 17:21:02 +0100
Subject: [PATCH 51/84] Added rosettasciio, added skeleton for refactoring EDS,
 EELS, ADF using rosettasciio, collected feedback from the discussion with
 Robert Kernke (IKZ), next steps i) complete mapping of all detector modes for
 Robert, ii) complete EDS, EELS, ADF

---
 dev-requirements.txt                          | 17 +++-
 imgs.batch.sh                                 | 11 ++-
 .../{nexus_concepts.py => nxs_concepts.py}    |  2 +-
 .../readers/em/concepts/nxs_em_image_r_set.py | 41 ++++++++++
 .../em/concepts/nxs_em_spectrum_set.py        | 70 ++++++++++++++++
 .../readers/em/concepts/nxs_object.py         | 61 ++++++++++++++
 pynxtools/dataconverter/readers/em/reader.py  |  2 +-
 .../em/subparsers/image_tiff_tfs_cfg.py       |  3 +-
 .../readers/em/subparsers/rsciio_base.py      | 47 +++++++++++
 .../readers/em/subparsers/rsciio_bruker.py    | 80 +++++++++++++++++++
 pyproject.toml                                |  3 +-
 spctrscpy.batch.sh                            | 11 +++
 12 files changed, 340 insertions(+), 8 deletions(-)
 rename pynxtools/dataconverter/readers/em/concepts/{nexus_concepts.py => nxs_concepts.py} (97%)
 create mode 100644 pynxtools/dataconverter/readers/em/concepts/nxs_em_image_r_set.py
 create mode 100644 pynxtools/dataconverter/readers/em/concepts/nxs_em_spectrum_set.py
 create mode 100644 pynxtools/dataconverter/readers/em/concepts/nxs_object.py
 create mode 100644 pynxtools/dataconverter/readers/em/subparsers/rsciio_base.py
 create mode 100644 pynxtools/dataconverter/readers/em/subparsers/rsciio_bruker.py
 create mode 100755 spctrscpy.batch.sh

diff --git a/dev-requirements.txt b/dev-requirements.txt
index f6ac22e2f..d8ad60a09 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -44,17 +44,21 @@ comm==0.2.0
 contourpy==1.1.1
     # via matplotlib
 coverage[toml]==7.3.2
-    # via pytest-cov
+    # via
+    #   coverage
+    #   pytest-cov
 cycler==0.12.1
     # via matplotlib
 cython==3.0.6
     # via tables
 dask[array]==2023.5.0
     # via
+    #   dask
     #   hyperspy
     #   kikuchipy
     #   orix
     #   pyxem
+    #   rosettasciio
 debugpy==1.8.0
     # via ipykernel
 decorator==5.1.1
@@ -282,6 +286,7 @@ numpy==1.24.4
     #   pywavelets
     #   pyxem
     #   radioactivedecay
+    #   rosettasciio
     #   scikit-image
     #   scikit-learn
     #   scipy
@@ -332,7 +337,9 @@ pillow==10.0.1
     #   nionswift
     #   scikit-image
 pint==0.21.1
-    # via hyperspy
+    # via
+    #   hyperspy
+    #   rosettasciio
 pip-tools==7.3.0
     # via pynxtools (pyproject.toml)
 platformdirs==4.0.0
@@ -388,6 +395,8 @@ pytest-cov==4.1.0
     # via pynxtools (pyproject.toml)
 pytest-timeout==2.2.0
     # via pynxtools (pyproject.toml)
+python-box==6.1.0
+    # via rosettasciio
 python-dateutil==2.8.2
     # via
     #   hyperspy
@@ -395,6 +404,7 @@ python-dateutil==2.8.2
     #   jupyter-client
     #   matplotlib
     #   pandas
+    #   rosettasciio
 pytz==2023.3.post1
     # via
     #   nionswift
@@ -412,6 +422,7 @@ pyyaml==6.0.1
     #   hyperspy
     #   kikuchipy
     #   pynxtools (pyproject.toml)
+    #   rosettasciio
 pyzmq==25.1.1
     # via
     #   ipykernel
@@ -427,6 +438,8 @@ requests==2.31.0
     #   requests-cache
 requests-cache==1.1.1
     # via pynxtools (pyproject.toml)
+rosettasciio==0.2
+    # via pynxtools (pyproject.toml)
 scikit-image==0.20.0
     # via
     #   hyperspy
diff --git a/imgs.batch.sh b/imgs.batch.sh
index a764c63d4..c18aa0a23 100755
--- a/imgs.batch.sh
+++ b/imgs.batch.sh
@@ -1,8 +1,15 @@
 #!/bin/bash
 
-datasource="../../../../paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/production_imgs/"
+datasource="../../../../paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_imgs/"
+
+# comments is detector mode
+examples="kit/FeMoOx_AntiA_04_1k5x_CN.tif"
+examples="ikz_robert/0c8nA_3deg_003_AplusB_test.tif"  # T1
+examples="ikz_martin/ALN_baoh_021.tif"  # T2
+examples="ikz_robert/T3_image.tif"
+examples="ikz_robert/ETD_image.tif"  # ETD
+examples="ikz_martin/NavCam_normal_vis_light_ccd.tif"  # NavCam
 
-examples="ALN_baoh_021.tif"  # FeMoOx_AntiA_04_1k5x_CN.tif"
 
 for example in $examples; do
 	echo $example
diff --git a/pynxtools/dataconverter/readers/em/concepts/nexus_concepts.py b/pynxtools/dataconverter/readers/em/concepts/nxs_concepts.py
similarity index 97%
rename from pynxtools/dataconverter/readers/em/concepts/nexus_concepts.py
rename to pynxtools/dataconverter/readers/em/concepts/nxs_concepts.py
index 15ae33019..33716e2d2 100644
--- a/pynxtools/dataconverter/readers/em/concepts/nexus_concepts.py
+++ b/pynxtools/dataconverter/readers/em/concepts/nxs_concepts.py
@@ -21,7 +21,7 @@
 
 from typing import List
 from pynxtools.dataconverter.readers.em.concepts.concept_mapper \
-    import variadic_path_to_specific_path, apply_modifier
+    import variadic_path_to_specific_path
 
 
 PYNXTOOLS_VERSION = "n/a"
diff --git a/pynxtools/dataconverter/readers/em/concepts/nxs_em_image_r_set.py b/pynxtools/dataconverter/readers/em/concepts/nxs_em_image_r_set.py
new file mode 100644
index 000000000..ab28a7e0b
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/concepts/nxs_em_image_r_set.py
@@ -0,0 +1,41 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""NXem spectrum set (element of a labelled property graph) to store instance data."""
+
+# pylint: disable=no-member,too-few-public-methods
+
+
+from typing import Dict
+
+from pynxtools.dataconverter.readers.em.concepts.nxs_object import NxObject
+
+
+NX_EM_IMAGE_REAL_SPACE_SET_HDF_PATH = []
+# this one needs an update !
+
+
+class NxEmImageRealSpaceSet():
+    def __init__(self):
+        self.tmp: Dict = {}
+        for entry in NX_EM_IMAGE_REAL_SPACE_SET_HDF_PATH:
+            if entry.endswith("-field") is True:
+                self.tmp[entry[0:len(entry)-len("-field")]] = NxObject(eqv_hdf="dset")
+            elif entry.endswith("-attribute") is True:
+                self.tmp[entry[0:len(entry)-len("-attribute")]] = NxObject(eqv_hdf="attr")
+            else:
+                self.tmp[entry[0:len(entry)-len("-group")]] = NxObject(eqv_hdf="grp")
diff --git a/pynxtools/dataconverter/readers/em/concepts/nxs_em_spectrum_set.py b/pynxtools/dataconverter/readers/em/concepts/nxs_em_spectrum_set.py
new file mode 100644
index 000000000..50af5f83a
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/concepts/nxs_em_spectrum_set.py
@@ -0,0 +1,70 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""NXem spectrum set (element of a labelled property graph) to store instance data."""
+
+# pylint: disable=no-member,too-few-public-methods
+
+
+from typing import Dict
+
+from pynxtools.dataconverter.readers.em.concepts.nxs_object import NxObject
+
+
+NX_EM_SPECTRUM_SET_HDF_PATH = [
+   "PROCESS-group",
+   "PROCESS/detector_identifier-field",
+   "PROCESS/source-group",
+   "PROCESS/source/algorithm-field",
+   "PROCESS/source/checksum-field",
+   "PROCESS/source/path-field",
+   "PROCESS/source/type-field",
+   "stack-group",
+   "stack/axis_energy-field",
+   "stack/axis_energy@long_name-attribute",
+   "stack/axis_x-field",
+   "stack/axis_x@long_name-attribute",
+   "stack/axis_y-field",
+   "stack/axis_y@long_name-attribute",
+   "stack/intensity-field",
+   "stack/intensity@long_name-attribute",
+   "stack/title-field",
+   "stack@axes-attribute",
+   "stack@AXISNAME_indices-attribute",
+   "stack@long_name-attribute",
+   "stack@signal-attribute",
+   "summary-group",
+   "summary/axis_energy-field",
+   "summary/axis_energy@long_name-attribute",
+   "summary/title-field",
+   "summary@axes-attribute",
+   "summary@AXISNAME_indices-attribute",
+   "summary@long_name-attribute",
+   "summary@signal-attribute"]
+# this one needs an update !
+
+
+class NxEmSpectrumSet():
+    def __init__(self):
+        self.tmp: Dict = {}
+        for entry in NX_EM_SPECTRUM_SET_HDF_PATH:
+            if entry.endswith("-field") is True:
+                self.tmp[entry[0:len(entry)-len("-field")]] = NxObject(eqv_hdf="dset")
+            elif entry.endswith("-attribute") is True:
+                self.tmp[entry[0:len(entry)-len("-attribute")]] = NxObject(eqv_hdf="attr")
+            else:
+                self.tmp[entry[0:len(entry)-len("-group")]] = NxObject(eqv_hdf="grp")
diff --git a/pynxtools/dataconverter/readers/em/concepts/nxs_object.py b/pynxtools/dataconverter/readers/em/concepts/nxs_object.py
new file mode 100644
index 000000000..c5d3c9f77
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/concepts/nxs_object.py
@@ -0,0 +1,61 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""NXobject (element of a labelled property graph) to store instance data."""
+
+# pylint: disable=no-member,too-few-public-methods
+
+from typing import Dict
+
+
+class NxObject:
+    """An object in a graph e.g. an attribute, dataset, or group in NeXus."""
+
+    def __init__(self,
+                 name: str = None,
+                 unit: str = None,
+                 dtype=str,
+                 value=None,
+                 **kwargs):
+        if (name is not None) and (name == ""):
+            raise ValueError(f"Value for argument name needs to be a non-empty string !")
+        if (unit is not None) and (unit == ""):
+            raise ValueError(f"Value for argument unit needs to be a non-empty string !")
+        if (dtype is not None) and isinstance(dtype, type) is False:
+            raise ValueError(f"Value of argument dtype must not be None " \
+                             f" and a valid, ideally a numpy datatype !")
+        # self.doc = None  # docstring
+        self.name = name  # name of the field
+        self.unit = unit  # not unit category but actual unit
+        # use special values "unitless" for NX_UNITLESS (e.g. 1) and
+        # "dimensionless" for NX_DIMENSIONLESS (e.g. 1m / 1m)
+        self.dtype = dtype  # use np.dtype if possible
+        if value is None or dtype is str:
+            self.unit = "unitless"
+        if value is not None:
+            self.value = value
+        # value should be a numpy scalar, tensor, or string if possible
+        self.eqv_hdf = None
+        if "eqv_hdf" in kwargs:
+            if kwargs["eqv_hdf"] in ["group", "dataset", "attribute"]:
+                self.eqv_hdf = kwargs["eqv_hdf"]
+            else:
+                raise ValueError(f"Value of keyword argument eqv_hdf needs to be one of grp, dset, attr !")
+
+    def __repr__(self):
+        """Report values."""
+        return f"Name: {self.name}, unit: {self.unit}, dtype: {self.dtype}, eqv_hdf: {self.eqv_hdf}"
diff --git a/pynxtools/dataconverter/readers/em/reader.py b/pynxtools/dataconverter/readers/em/reader.py
index 4134ea869..8108cbcb7 100644
--- a/pynxtools/dataconverter/readers/em/reader.py
+++ b/pynxtools/dataconverter/readers/em/reader.py
@@ -22,7 +22,7 @@
 from typing import Tuple, Any
 
 from pynxtools.dataconverter.readers.base.reader import BaseReader
-from pynxtools.dataconverter.readers.em.concepts.nexus_concepts import NxEmAppDef
+from pynxtools.pynxtools.dataconverter.readers.em.concepts.nxs_concepts import NxEmAppDef
 # from pynxtools.dataconverter.readers.em.subparsers.nxs_mtex import NxEmNxsMTexSubParser
 # from pynxtools.dataconverter.readers.em.subparsers.nxs_pyxem import NxEmNxsPyxemSubParser
 from pynxtools.dataconverter.readers.em.subparsers.nxs_imgs import NxEmImagesSubParser
diff --git a/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs_cfg.py b/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs_cfg.py
index 47bf712eb..2ff14e344 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs_cfg.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs_cfg.py
@@ -279,7 +279,8 @@ def get_fei_childs(concept: str) -> List:
                      "IGNORE": {"fun": "load_from", "terms": "EBeam/VFW"},
                      "/ENTRY[entry*]/measurement/em_lab/OPTICAL_SYSTEM_EM[optical_system_em]/working_distance": {"fun": "load_from", "terms": "EBeam/WD"},
                      "/ENTRY[entry*]/measurement/em_lab/OPTICAL_SYSTEM_EM[optical_system_em]/working_distance/@units": "m",
-                     "IGNORE": {"fun": "load_from", "terms": "EBeam/BeamCurrent"},
+                     "/ENTRY[entry*]/measurement/em_lab/OPTICAL_SYSTEM_EM[optical_system_em]/beam_current": {"fun": "load_from", "terms": "EBeam/WD"},
+                     "/ENTRY[entry*]/measurement/em_lab/OPTICAL_SYSTEM_EM[optical_system_em]/beam_current/@units": "A",
                      "IGNORE": {"fun": "load_from", "terms": "EBeam/TiltCorrectionIsOn"},
                      "IGNORE": {"fun": "load_from", "terms": "EBeam/DynamicFocusIsOn"},
                      "IGNORE": {"fun": "load_from", "terms": "EBeam/DynamicWDIsOn"},
diff --git a/pynxtools/dataconverter/readers/em/subparsers/rsciio_base.py b/pynxtools/dataconverter/readers/em/subparsers/rsciio_base.py
new file mode 100644
index 000000000..bf2d4f5a7
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/subparsers/rsciio_base.py
@@ -0,0 +1,47 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Base class to inherit sub-parser from when interacting with rosettasciio."""
+
+# this subparser is currently implemented such that it represents the first
+# and the second step of a parsing and mapping workflow whereby concepts and
+# instance data within the representation realm of a tech partner is mapped
+# onto the realm of NeXus e.g. to offer normalized content to e.g. NOMAD OASIS
+
+# the first step is the reading of information (all code relevant to enable
+# information extraction from a specific file of a tech partner
+# the second step is normalization of the information
+# the third step is (currently performed) with the nxs_hyperspy.py parser
+# which finally processes the already normalized information into the
+# template object that is thereafter consumed by the convert.py and writer.py
+# functionalities to create a serialized NeXus data artifact
+
+from typing import Dict
+
+
+class RsciioBaseParser:
+    def __init__(self, file_path: str = ""):
+        # self.supported_version = VERSION_MANAGEMENT
+        # self.version = VERSION_MANAGEMENT
+        # tech_partner the company which designed this format
+        # schema_name the specific name of the family of schemas supported by this reader
+        # schema_version the specific version(s) supported by this reader
+        # writer_name the specific name of the tech_partner's (typically proprietary) software
+        self.prfx = None
+        self.tmp: Dict = {}
+        if file_path is not None and file_path != "":
+            self.file_path = file_path
diff --git a/pynxtools/dataconverter/readers/em/subparsers/rsciio_bruker.py b/pynxtools/dataconverter/readers/em/subparsers/rsciio_bruker.py
new file mode 100644
index 000000000..13c32c7dd
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/subparsers/rsciio_bruker.py
@@ -0,0 +1,80 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""(Sub-)parser for reading content from Bruker *.BCF files via rosettasciio."""
+
+from typing import Dict, List
+from rsciio import bruker
+
+from pynxtools.dataconverter.readers.em.subparsers.rsciio_base import RsciioBaseParser
+
+
+class RsciioBrukerSubParser(RsciioBaseParser):
+    """Read Bruker BCF File Format bcf."""
+    def __init__(self, file_path: str = ""):
+        super().__init__(file_path)
+        self.prfx = None
+        self.tmp: Dict = {}
+        self.objs: List = []
+        self.supported_version: Dict = {}
+        self.version: Dict = {}
+        self.supported = False
+        self.check_if_supported()
+
+    def check_if_supported(self):
+        try:
+            self.objs = bruker.file_reader(self.file_path)
+            # TODO::what to do if the content of the file is larger than the available
+            # main memory, one approach to handle this is to have the file_reader parsing
+            # only the collection of the concepts without the actual instance data
+            # based on this one could then plan how much memory has to be reserved
+            # in the template and stream out accordingly
+            self.supported = True
+        except:
+            print(f"Loading {self.file_path} using {self.__name__} is not supported !")
+
+    def parse_and_normalize(self):
+        """Perform actual parsing filling cache self.tmp."""
+        if self.supported is True:
+            print(f"Parsing with {self.__name__}...")
+            self.tech_partner_to_nexus_normalization()
+        else:
+            print(f"{self.file_path} is not a Bruker-specific "
+                  f"BCF file that this parser can process !")
+
+    def tech_partner_to_nexus_normalization(self):
+        """Translate tech partner concepts to NeXus concepts."""
+        self.normalize_eds_content()
+        self.normalize_eels_content()
+
+    def normalize_eds_content(self):
+        pass
+
+    def normalize_eels_content(self):
+        pass
+
+    def process_into_template(self, template: dict) -> dict:
+        if self.supported is True:
+            self.process_event_data_em_metadata(template)
+            self.process_event_data_em_data(template)
+        return template
+    
+    def process_event_data_em_metadata(self, template: dict) -> dict:
+        return template
+
+    def process_event_data_em_data(self, template: dict) -> dict:
+        return template
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index f62d25f94..f8e75cf10 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -30,7 +30,8 @@ dependencies = [
     "pandas>=1.3.2",
     "ase>=3.19.0",
     "flatdict>=4.0.1",
-    "hyperspy>=1.7.5",
+    "hyperspy>=1.7.6",
+    "rosettasciio>=0.2",
     "ifes_apt_tc_data_modeling>=0.1",
     "gitpython>=3.1.24",
     "pytz>=2021.1",
diff --git a/spctrscpy.batch.sh b/spctrscpy.batch.sh
new file mode 100755
index 000000000..b1b5c4259
--- /dev/null
+++ b/spctrscpy.batch.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+datasource="../../../../paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/"
+
+examples="ikz/VInP_108_L2.h5 ikz/GeSn_13.h5 pynx/46_ES-LP_L1_brg.bcf pynx/1613_Si_HAADF_610_kx.emd pynx/EELS_map_2_ROI_1_location_4.dm3 pynx/H5OINA_examples_Specimen_1_Map_EDS_+_EBSD_Map_Data_2.h5oina"
+examples="ikz/VInP_108_L2.h5"
+
+for example in $examples; do
+	echo $example
+	dataconverter --reader em --nxdl NXroot --input-file $datasource$example --output debug.$example.nxs 1>stdout.$example.nxs.txt 2>stderr.$example.nxs.txt
+done

From 2ae1cb2d410bc28f0f4fc499d72597b5d5b36a75 Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@physik.hu-berlin.de>
Date: Thu, 14 Dec 2023 17:42:44 +0100
Subject: [PATCH 52/84] Added prioritization level 1 and 2 quantities for all
 five imaging modes of the IKZ Apreo, tested generation of NeXus files for all
 five test images from Robert Kernke successfully, next step continue with eds

---
 dev-requirements.txt                          |   4 +
 image_tiff_tfs_to_nexus.ods                   | Bin 17720 -> 11631 bytes
 imgs.batch.sh                                 |   4 +-
 pynxtools/dataconverter/readers/em/reader.py  |   2 +-
 .../readers/em/subparsers/image_tiff_tfs.py   |  34 +-
 .../em/subparsers/image_tiff_tfs_cfg.py       | 416 ++----------------
 .../em/subparsers/image_tiff_tfs_concepts.py  | 301 +++++++++++++
 .../em/subparsers/image_tiff_tfs_modifier.py  |  40 +-
 pyproject.toml                                |   1 +
 9 files changed, 377 insertions(+), 425 deletions(-)
 create mode 100644 pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs_concepts.py

diff --git a/dev-requirements.txt b/dev-requirements.txt
index d8ad60a09..9941ac2a7 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -65,6 +65,8 @@ decorator==5.1.1
     # via
     #   ipyparallel
     #   ipython
+defusedxml==0.7.1
+    # via odfpy
 diffpy-structure==3.1.0
     # via
     #   diffsims
@@ -298,6 +300,8 @@ numpy==1.24.4
     #   zarr
 numpy-quaternion==2022.4.3
     # via orix
+odfpy==1.4.1
+    # via pynxtools (pyproject.toml)
 orix==0.11.1
     # via
     #   diffsims
diff --git a/image_tiff_tfs_to_nexus.ods b/image_tiff_tfs_to_nexus.ods
index f3fc491b4a8bac27c6a828b0b0de8a3732fc185e..835a84832dfbee901a3cb0fa4ea443627121a8f2 100644
GIT binary patch
delta 8872
zcmb`tWmH{D(;$3;yE_DTw}ZO{f@^Sh4Z(He5<DjaClCUG;O=e-?(XiA;Ba8beV=>3
z`@J)3&CFWUKX!N3uC84z)z!PI8s5J+)kjxTgoVQcfsjBT-H`Ysbajm9>dos6q?Kt{
z5NH}U$&nLy0eUX(NHEXO?I}ME5C}F?K}J%`JM(yP-Fbl%4?!)_`_x`_M93<p#i1YU
zt|{H_7FZ!cwo89w(WK$lyn0F~@6l{reo8!Na{cl+5_)JU;pZ(lPf+(*c-(ze=(*Ka
zuO!TxlLc@i@YN^7SF#mc)gCR>M;^AJ5s`xdV!@KP;^X65i$PV_d8l+FPWLtj{M-ER
zp5DImZ?2CO7pf57+AF-oq<D#zVc_#LCPmC%`&+<FAyXM0B!dn5f(#mm3$lfg;C(@v
z4#H9fMbU#~aR1NI!2ccj|8r{Kf07)E(sAj#ys~t9{mXN4MeMZoRCJ-%R{V{F@Tr)a
ziLr1_RxX|<#sdX;X*(W}HuGdC3Q-tX(Rg_DNYUUxWie}wsJ?}YcX);y>~G6Ck{*XF
zo~krvYzHQvHi)(?GY*<_<dxTE)_l&5ZFzu(1DCazgww{C<K}D>V){TkbX{-9qb?Gy
zJ!xIp7S&;u{qX=D;2jjD_!Lm*_<$nOu5-t{=;9!tp0;IM$Gx`a;eDVW+U;_&=ODIl
z^Y+7S@ZsgPTUq9IuI7+UHep@hPZoHk0kKd9zaTebG?CZ1N;MqLA>x^Ct?G;7{(E>(
zfUZRcv!5Zr6;}jpPTX$l&a7LdlA~x9d_;dst}+gXsmfcv6>Yy8w_v%$bN$x;1AVG(
zy6OLJ)oTd-$kz%h9wn!LO+DZOqawts;*o0hEM8TmK0veaPjhXNb@*(=y*F3D#^y9M
z?>rzWpw<c9D%&in8<i>Yqr?6ov6#o0O@ra(+4=2b{P3wp!>l*C>G|j#1B3oj>a57T
zu=u6_*0?5O%(egdd!k<=pz#Y(aq>m;$Rw;ST6O{A4TfmPSCyR$(59eCC1S<h=Nv@J
zbaK<$4}r63zCje%_XhV@qk1cE*p>EQ7&jr_xZ3IXL<h^4jKRk4`X`aEl`h+^p0KpV
zejS7LSJio-2GCypv&FA)r$^w(p2W{m|0u75tcpyTlxfI+DLWDp(jR4qRG?A<ZVU2Q
zZKvfq?)@-FYqD1IW>v<1o3!j<tr<%QMZru71=>6`oVS<6R9i55@2wOdWLK7X<NnJR
zn}>*mr=MYF^g|QzZpRL?-Af4GVg?MPNFN4zPCriiir;O*5TYIgZ{`ugcW4`gXx!n?
zPs}#Df7r~$M?RrU(h|yqqkOpy2ok}lmJ3F@2n(D2gfPh6ZjR|Q;S1HZ&O55`ePdw!
z<mtspG}k<Np^bSP!an%^^@<o?c+_af25L07uvx3h?vRR@L22s;#DaajyeW+RSqh^g
zcv@gVOA>3bpcWu2=BufZw^26x?FeQ$T{+J}WZuv0)CHxP+2%*ulO9L|1Ige6!#NJg
z6*zxXR4asAlY?Sy@(Vp+%<m29Bgx*o<l?&=z%$U8p#5cHg>oaN>Y9yJC;h5ov#?NG
zPhD(ZMdLY-{cAdRB;7m#$|jcyUWDO2L7D~XawcE@UZWR<&e2GjGE!d}q!p4?f}Tjw
zK3Z&*`}|mzwBsPRFv-gSR+Ct|mJ-!3kpq4RnTCInOAJ+;;t9_l7Xzz96nqQYUp!QP
z+avMZ)9gxt-`UrORlRgdZP1vr!!Dlcar)GuIi;wD61ub{$f3;IKemb$n$~B>TVOGU
z*!6|g{)_#$@AyA(%mblJzU?59-F2#R!k4E=4UQ;Bg^fY?xEN*10Cu)S-;NBP&f<y>
z`e!Hw%sTk3S9peQN4~QV`6*nFtZ=CKD@%z6us#-jZd0lY<=b!VtT;I`TCh(%%$j2W
zYWndz5{Y<qh97E{ImCTTQ^%L8eyv_mga`W$=2;p0>AWCSS2y&YS?MrrDq-qzB?}N#
zE-?Bdc}vGKf}7<85bkK_C=)fPAkfW}gi;mnOkIt$M%qY09;=WVGWE#r_^~-TO#t=t
zD%)(BS6-v)tQNudRV?qeU)s_P!4%@^`_7H^I6wBi3MFLln}eZisZSx<7lWq=9}tr$
zD??YSXd7fVRKcy3HU_v>(Ix9t^ML<4Zp4Jl<I)ez`{cN0fcncEP%GmP*V+F8Cy>xp
zRhHIf&rWJg0B!!tIOIkG&n}ydVL_s2l1sdk5|7fNpLZpHd7l70qOYdzX`hCR46Rli
zh@R8;v@0CFlMbXYgxM6khz?=Ny;nz)DtK~PDch7^598$EhIMH}fPUe&FI0N?pkgtU
z>U_Hq^nkbv;C6|c7Kk{N5OOY_Eo(fcS{j_|A6|IP9=uW1*^CP~|8RrSn|UX8W@B8|
z&fP}mPn?wEPz~JxdG`(ktFyL)E>}t$6ltYWrW~t2?OrMBYLs=C`Fh;M+>5uG)Yq~k
zCGG5rOompZoiE$|cJXHBZs91ZK42o_J0cWUUXT^1T8Iw>sVO2NK}YY+o}MlK)l1Mb
z{O3yW_bIWDNe=jFk_vz7c>1im{!7bg*s*k<NQzv^v~P~SA~d)Qqw%^xgT3*7Zl<7l
znx=?rNwxh$6SSw#6V%TjsEs!^F`joREaY25PVwkN7#?nr_&(;QLU+YSn^oc{CaAx)
zd*j=T!oJb@=U2cjWs}iKw<P5QZ-{P+Keof~dQ++X14`hxC~E<?7?zT&2Ua!GRPYt1
z49q#_ROi==U_}-;^*b>`!3NxHw9<8za+0|Lm2j)9X-&^ANG|M8gkprcssc5JP_N$a
z!KYff2Pm2c>Rr5_fA$*v*mQ~PZJYTSyo-(yn$@`ypD0@X@CfxAy^ZHSO4$0K6`{g>
zCW_QaU(f>lQkf?mkjt6>ep_7y(R)8x2>WjPi&?s8PP)9BTu)1x_RutSIvP*DNquW_
z?O|PgPAXIk!g82~o2`@FYU-gqk4oPyr*utt7O)GB{dPYsKyjEHtD`(Ppn{*w;Zya?
z!FmF+9*&h)vH;Sv@|s@`xICy$5mlAPB*&+aFOvjRqV5yk-?Lu4qGSK!6K=gVke$TX
zS*m@jPFDq=O8_@_$$gqsfkGe$HP~#~zos&`Fl#ex=&{Jx6JaN|qhj@+3{aJ#{z{Io
z<mIvHV0#?@Yi3rxVa{E5HP==Zq0>+uk|jEtime%~SWkukWL1?W9CrJSBFEvSNDGwE
z(aZx69&79`ejaC3j4?|wX%vjbzKhy2rFJif@%a($=~VB{>)6Acuf6gk*MX#3Bjr{N
zr2}(%JaULAE)#XWdbr=L@<7Sp$3~7WXP-GE{`pzp#BTuQsTU{rk;{`wi==eOX^TPQ
zrz8QpDdz>J%KLuKj{>{&6+AZK5mCMFXQ<?WhsL%D%I>5w_(t3lLXgg?;|4?SR$2o`
zfbXnygqi=#KaM-;PUJ_g{x!0t$2*|`!=2pv=#<+oC_Xb+xLu4>mA6x8vM&~CLxJ-v
zY7nt%G_B!%--E|(7V@De#<yN&X14~kWkLp4*`A#Ozd>3rp(fh>_v$KK4EwqF8(*h^
zAePIIUpc}EFh(=C+_Z^bTP7sf>>AreWz&g=A3QxakCm{G>agE9HvT4<K3dAkUpSjt
z;%M}i(_VpQ`x2V85Yv6yT~1@2R6v?!WXF(J5&DE<@lnDa<NFvs;!DlxwqzmGbS&Il
zWnUr-cCoc!r!|w~7Qgo9QHyIB+*==3z^yrBz;J%cx&6Vqb?MltZ0~BIe0FH*Fb;ot
z<gxhrbY^hrap(2PWs{Fzb5YCj=}~KdwlRa-yQBdHuGF7tu4zL$J6136JNyEi6bl}9
zvXr6ctlv;xQ;&<7o7r(Ud}1nroWp}}@3|j6pN1p2%k+Gl`y%6$i06Y@=}Hju0ah9G
zCRX&zAp_+Wb-p2thSyS_pLZD&hOgL$D4Q&skt2K6KDpbV4~O6CvE5=tNPiAf5N=;5
zC$J(qxrmi(_OwnUG<_8|nUy(|XL+~7YJBAMDvN4q8^)n!`U{`wa6Jipx=($uWwa*V
zVp1LE<_=It+xH&4X1&J6*m%v84Cw!`i)YV~_prTdDsbqR0v}r~D>iY+E*z%(n$Okk
z;na54G-{)Q>wSV-)Tdi$OpEv805^H*hLwojP{-BshF=s3V%w&M@FsT(HX9a-p&s41
zb6f2u^%bNX{1Qm6*OMjc<L9?5=<+0w>o3~BR3#ifX4cTmuaSSJ#VidS1P17TlM#%L
zosK@v;WWP^@zvre*mQgU@m{%ohb-<hjp=+GIIs#<7_y%<wB(rJfA7z?NmE|!qepSe
zKJoyA{pJ@IUBIWJ1=!k;Snu{m#?!H<1W`oU5@%CgkM;V=SP$Hj1S#Dr@ORcEVWWN$
zDOm>|DtCUN8YwqJp|?4x8wbcm)aa@ZEYXd}NLbS~DIF@%XM%hYd3C*+GS=8KuSP_L
zs2eB%3PsuqiOUxk<B^Wseb5mG>#BpPnw4$y6Fxi6rC7C{OnLG{v(~KlET4%oahHbe
zb~Po^_J76>i)3t-lI^ab_T1jVZ!7`90;}wClXd7k=g*r5H!Z6m6Hu7#0dr^z&+<K>
z40cVWX5aTTusCUbvDvIZb^VY>01+L7w}|$acVBlop;768hQ#fh!jluSuhnZczPi3W
z3r&f9SR1z9)Z>{^wlG0$T;E#qb7odX=hjeFJ-U1y4VGmNAPOdD_o@hc<)|zIj~>`=
z+C(K3(T}>CF+Owp23Va>w={7FTOQ)Cptweaz#o_4jaT<SysUT=Rl9MY^D(hNbJwT=
zgMe$%)4IQddi0T6%T^71-%<?Fm^K7KRXWr_!d%^j1$@#y79-@UA*oKN)sv(!;V@Hr
zmdrZRa0!?WQL}Yt_IF3;A1JjJ!w&Q<7mbjJF)X9U!cY*Ifnh)FNvVkjRANpNFDKE8
z??|xxTfdmwRP^RT7%PRdG!&pRGG{82QHX}CxI7|5=|h3v3yLrdr>ZdXE$P)eV{nrC
zIt6<U!XoLhPnC=&<k4vwbWB1HG8S|bB(M0USB9Z0ZOs$cp5=k#O^;C2xozbq=yXgF
zlLhb5l}~vfVDYd}`?`3djDKO+_b^5nCD+O8te(oAWz6JTcmceUyBxFS%W@r;>EhDC
zLyi}=6^De5uLle6<XPG|X7r8w1{T*G4+d@5Xbx~uK8GJ)rgSFalorVl4!%!K|8l$`
zJLsT>Rm|f7j5IIgrTOswSh_IleHtAdE1XF<OqOf*1$-WhT+iW!c7_+`7uL#+x4#E2
z-?<yriwap^o2TaJ0qj9XYG17mUXxtWq7M!}q-gV`&@#=%u;|8-F8ErshL>vAuzKpe
zwJ{-2&GoykXsF&ChPpQXeoXE&EZ!KOvM}~PvK2K<llFwOZsU+nrlsxRg^TQMMKs{o
zu~b6!1S++<bryn2u@|BjDfHm4vx`uSjcZ4~C;#^ReQjO5Qc*kX{JUoJ;)s_|bGOY`
zZ&)dRS>iSQW3T(*<M%SP0r4dJQj1}R+P98VyA951PUVSC{hCj<U>0`c8b1U&F8#jn
zFx~0eP&OLLp0`-V%NZNnkI`qEvJN<7uJ?vCwt#oeG)>tN7%t;w9d_L}b46uQ8eIX?
zth3Z`&sKsL-7k(VwtlNVdca7X)#AAtH0HUqEaKCLDSXTRs+qBX`xvTapA|g@zrdf3
z8(?TUSn$fOTKogEd^0rZMRfrEMtA~Y0H+j8NvAItf0Z+vt+u>fx{*HqdxX{GFak`w
z2?PL{T?hxBiYJP_T9#r8TT*5ioiv(&UcZhm&jV{Oaa;<C^~Vv~0{B4T$L=E8ilesJ
z%;IkHCdU9ljK}?fUEatscbVW>9q*IvI7UvSDyc#2lxwa~8BcDu-HG?3|AEh7!C;Er
zLAkQ1biJaSt(#n-^udk4oUK<M((&VgToiCuI;yT<XBrhv5Qoe)eiv$MmMa@6Mnfk@
zfQ`&0oG<u@WgP8C6(A=O?Hrn+Ru^D;{roqxu5twKoi51ZGSLzgXDZ~Dt%E}@%lK@8
z*zr0t4=wD$dcz4(r1|PoNk>f4bl;UD^lhoW&Bj}u7yX)1sQz`1+({#CT^i6F3TdEl
z-xrzphrMafZ}#tA1hSOJ*4qyEN^s}cA~vq3GJL+OFJAb|1$kn#zB~y^B3q`4L(E=g
zT$@Ia%QRgN@}O4UuM=3-4~CyBN@O7Gk~kNSCdRS0sf-r2rdo!HkDM1w=zMpmuy7TD
zMhdp^_68FrXfr;zz@Z^lummEmGo1m(pX*#Kq^{Bsn^d1SNM1ZOyG~gs1^22qE+5&P
zb5P%SRBEmLN@Yz}JQ3b79L=V-FugfZl(uXMo#s^KE7=%(LJVUHI?O0M1PyfZ-;8lf
zkxJ=GJ<`YV5@w)jxKe(8jg=K;_LKR7hok(5sz~cFb5PYW$0B}})$4vgt~|hs$zb$g
zQ*W~QEz5HBbX~R3_H52b7iXMzCfm$l--EMx9Q&kH!dV{zZNv@HOzMMN@|g`tUK<Nh
zGIv?XqkQA=V>nyX(xW?%KBcGa99ZvTSj{wuxTXyZfkVfy`E!0-(qUH#w+cFGuta83
z)Vk|-syjsDlGS?K-h!XKJND-lzlGtg+XZzrnWzfjRT$Te=1=3pFld@G@XPENycZD?
zB9IF^jY9ReEoHjuoo;L^g5p>8w%Q(-NKB`O%sRC<Ma%T$Nn;X2@}SI&l>wIOHASd+
z(es*V3M(&HR6sQVJaX;&PuVGa$v=9aO|Z)CfgU?@AR))TBR2!EEjh3@$A)z{p*OJP
zUL@nS?O(e%^W&)dQP(=CQ7bta<qd9)QROXeB~m)v;ZPQ+Ao~VRfjgth>GVpPq~zSM
zSgGv+B<w6s6IA`LK~Zz~dD*nw##3=ifyT2Z8&#8WMz0h~vIpMZ5$np&!zz`B7mQFb
z+qNJe$K{-#RSg1_Wypl|hHUyeK{0P4z0EdcVXd?pY%^m~W+}o@k;_QaXC%50+~U-n
zguk==bV-ZAU0dOr9Tq#CNOx8mvsbFNpl7L%jpV}!i9|DwRA`#Sq!ZLUPRE+q@OL~q
zYQ;%e{tlPk`zm<Eh@^fVnpJFh7+%g8wNM~>ojyI#d%g|0=t!lR??nl3LZ?xWpo~tf
zM<I9<4GgxK&7szI*iYO-DOSPwCsO2q@SfrxIf0jFOQX2O<;8!_SLyl|-T86f8}7)t
zWlgnTvZ->F%nI_c;K;R#hBpr?ii*OCu3Ce*b|@)j5mMcUT4PS-KN80Ybo^T^hoqXC
z$<TqSXFZ@AzBi?j6Rtc1<vHxAQz>@vXWh+_K1%QQn$x`cwqy2%uZ1UiRSJR;)f+mb
zQ%`{Tlhb@%HfFwZ+#Z`vsm7L0nBK4^@g**set&#40h`4usl)2T)mXPUcI1wb<LArt
zkoT>44+?5A+!n&_mmQ?L3IqR*@7eweBMQLw&0UGUU6zrt|9(~c5GKL>!Fh-LIlIX_
zVcen#h}OQqw%J=r8%48q?PH#1kyvlD_3cmR*zB|oO7&pmc~Lf#4*9rGoH-$%*xc!j
z<-MmfE)hDrU|25gVuVQa8APnt1$%1LVNolKd0+E^@2i$GzKPre2Dbsa@1P`hOAvrJ
zUZ=w%gs#cABQPF$;m);n!uAcvs5qFtO#)GOYUGNZq}wzBDX~inNP117gjA;7Sm&f+
ziE6CT>gWmqcZ4{E#;5^9^(2#KNS_SVDk<|u+ZS)Y23~2Vc+kQi{cLJ8_3!}n2NyW2
zDi%XyP0fO)nf>}F0(GM$$F&gI4S`w<m<W)rb}Bi%=EXh^F6(y##<yVap*-Gqkv8K@
zqVhRlNr$$|m<AJTnHw_An-k*I7DHh?*hHt#MZ2RngZpHsy+`L%LTQms619q(*d+~x
zqhT;aaj&qtzBGly5I#4g^3IXsUh$Q=@sf82+b8tV=(?(ZMuz7kNuUgq1w2sU%P4rs
zbHz@09LT!FPRK7^lXC99N-VJK*w(=yGN<cGg%136>8c&#S(gew)Eo-HP8418wdWsF
z)%4SyNQTSbn+g>9A;z74+tYQx8|U3|Df&VhW~k00ZWwGjSa);0$QFnR$#745?@sht
zv1%T1thrY(ZM;OpT}27t+Uqt?2}iN!*{kAl`6b^%jDVRlIHA^@!lXPf#hozE_C5y1
z#-)u<e1G~nF!)Wqo88NTShyI!oE>4ph!<9v?mJrMlBT4OErkQZm&>8+!1kN+)#LV;
zt=~Azha1!f#VSBsj=@Y+ms?NyBLU<U4jgODR!srA{$OT)$g}p~RB6RT*`25s`Vf5P
zKNTeq$f&>iMN)=i$6a(36Z=y5`SvXi-=%u|VD+;t<<{@w>5ofAGtu%hHZ};vSNP&T
zE|vd8S>9CPWPHnhcD7_!TXEmMK;q)n;^GnF5rA0pGNJwf_=UIxgm?uZ>AXq^@?b7y
zE?&q1FFOnu7lasG3oF3S4H*Z^!~SvMlJSuP2^j)dkAdwlmER7SlR@)N%?kW_mvutR
zieB2ai-BGpSSE3Cju6MYLl;NRU=VVE;H9rYkwM?+kVrN$riHHuG*R6qG@Le*ZRsse
z1BNwr0<t3E$^HFmi{iEK?IgY`4$ivIP#$%+l8?qQ<0lM89zhO5LT9SEOd5r)%v>~J
zu<`wlNdFM?aK>JvtM=>I%E|+Tr74{o$vEwqxlio+ao3I=%E!7n$GTQ=;i-JnxMDiD
zk{TTA#y!<!Oz%22f^n`~*UORzcH7#Qof#H?BNG-;j9Bxmmdu3Zk)#lYkZ6(JrkWos
z-L2DJu<>=H#BG18RDNCZLq|6j16`vKfJ_^jB4f>!l?}Q^qzjeaEqu)@c7R}n3F^~u
zHI1M(7%}gj-)IOXWHb?L90ora-_=*xJ~jokma`<qUFKJos>z0I<<;wb$+*k>o&H^i
z5wkg07(E2F-QPK<{?)pHBpv~}w>us=70ERAuAa#li104x>`X4DMHaPnUvf|t2ncp#
z*HWe>UOl1|n!GD4)0T@&_-<S~rq$!hfwz}=|JHGR%$qc%-lo*pQ{<6nHOTPWL1$c>
zxJOFePX;m){}Ae<UV$&~e6-0&^p8G{`Yxhg_mO$L*F6$?>&?nc<34NXzab(en8I;|
zv%7s9z*IczO@%}-@^WPX9!sIC0+4N?&vA^ecN-*jDPB!-)RwPaYjNgi?E>;J!+&FN
z&F9^)sB0j{a__?Go_=9A21ip<yTUiGZa0*S*ZoA9G(u=*+}Pny5ror>EG0I6DZtT-
zh6+=geNKcr6S5^EJBESt3)J*@zO{sVHUS}Iq3~q&j_`##CsGzfs5AsJ0rGu*h6cA4
z4vlmgfpLezWep0d3HRT%Siks-sMs*zkF+6E?Cy-VXL|8}v(xTy=^LRCnon-Y+&N-5
z&&&zY;mNKw#0EpVV>t86!gu9!v~v89w_)_-)!-1i-yx5ghVL3HeU(xgjq2QDGP{r3
z4=yRT@vb{yTz9i2u?yBn1k9}AusKq3Bo3g$*J|UTBeA;81ZZ9Ql|iZw%^GRdsQg|=
zdA{5^H|weMBvtg05r_HWCrTQ=Us_w3UwyL=vCq$8F6a;i2~KOurM^n-V%qDa4KF|l
zSQa>_)LV=~p)y@ty=+7Yjo}Yo?v2pgYw^|B`O@K>5NuSpF;!(}0}Psz5*s`k;tF$e
zRfsd{eV{tn#zP^x;`2y2T`DUGA#7|MA=-!ws<{^9qdieOPXF+gY-gpK7J2Q}{dY(E
zqlseCjD8A%5GxS($62}&bB*`|QW?~T!Od4izLkYBh}(jt8~e5I59Rq@4?ANRCnNTW
zE~lyRrl=%TKz#rKRY1)rX4{osWtsX=v3ho;H_4n^rns3)J?g`c1)jB#PDfC^up`$8
zO&m;#I@*~OBrQg<pu*R)0^lc42MhzH3Gh(An%?;?g3X=mxE<omP*Y@YHAi>Z;VRbz
z1!tSGxqW{2bp{3%ExlEC7?pbaswsPQ-}*>U5(YgPE5f=W7LXh_IeEVlmUtS+zJ!Fg
z{>XFFR8kz~53{;nZP+NC{M+Ma2{lQey1bK6Uzpa}ug4R~>&Xin$&{}EXPs6;Q^R_h
z5_P%g=+<4U(s(@K_u6lZBB;ycYAGMH*UezF$@>Y}@w4@h-Krv%k4=i%^tuGUo>H1M
z&6X9@!a??r_5nAI5guY3rV{ncz2rJIt(!Xc-sFkdB~zRsc8-C@0SasF@mq2wGo+*Z
z`GPI4cU;twlTUgWmkaM>bO8z2_1j*aF0#qRtm=*jBKi1@Uo>o{A-lQSGUKnHP1co8
zdp7DhPg9cil%_=TuHP($NFTZE586rVCc!6|o)z{~(m)j-)%9iyw!LCM!q=UmJv)Co
z=yvigZ}yw^ZIVr#)|R%mwqKax$+Y4~U6L>M3>V%AByoD22yt|B^VMGS`*KW&78u1x
z436D7{1Cz`Dbg8B&^Qxz6~SMqpSJ0sNy_r1cK8;o)jq?UJBn8KTQOQwW}N=3f8Y|b
z%%G;C<p?ky>_t>|ww4J$X@LG~$P%+Y!qoR9T$3X#Lrvha8`VUSnSi9he?zycL(E?c
zlVg{#I^Esmw&pgVKK<PT{n%3Nr)oW1-GRsah2Qt8&8ttJT?T=_Vgm2+_bZeTK%f@V
z{}vN?^CzbGcd14f;dt?vF%lQ0f{cm6L59V!{({%Uco6@NBB+aL{e>%krV$atK>Uv>
zgpi2y{nIWN<@txkNE!u#C5H45-%%+v)PE%^K*UA4{^ni%M%?DV1n89D`ETh95Ew}T
z%0C$m|Bi2<{Vn=mwFw+VUy=m)L-cQpjr#v1N`20cuyAp9w{mu8_jYnrQ+xq~2l^*%
z;{P+~&qn>3@?T|x7zA>%ayR>Tum8-+n}5$Q3(CJbfj|Y%VHh`eA4e;<|3ya#xfCf3
zGc80|isY~P$$19t=pb#P1kY2C1bHok^4F06&5-Qh04SFZaw<lN{$FGGdxe^$NnsZ0
zAf{qWA7s$};>-Sw;iiWqNmKu`M7c7gFt+rNHW{XWP=BcMpHZS@|6%y|(*Aj|NDonz
orvFDuYz9cH6yraj0s}-vl>EQRaq|3$U<L@PECa$m;~%mA2QIH-EdT%j

delta 15104
zcmaKTb9i4(^LK39YHZtP!^UiE8z*dRyRp^SP8v0~*))yqyy^Yi_k-Ub@42q;_w3o-
znc4l!?985X&7NtuM1UeH$$>*)fPlb&fSh}V#Um;sz27tnbz_Hd;Xy!f;p1(X0gyuw
z5Ks^pFc1(B&Wb7*5D>U68F3LckF1mI6Q@dU1aaSr+ZO|#b}c&v0vS#-!SxxQZk$*n
z`WYA{<IBm#MGbJJ-CRXvkS(HDxVy<*q2WF+uvh!!pJu;H>>XaH`ffF1e<EH`9#HKP
z@m(xsauMF0yjc1E)G0VKjtPC;1xBHWhl%5wbJLzva91s?hli&xJ67wa0LauZCd~0%
zPF;)cyZesVU)|q&&5fEo!^|$l%%lsd9#9?`YFs|-3iic1D6ld|B}}dJ`v$FwYWqUo
zs*sYa)sNu%=APp)Yan$wdrz2digQuI6TugVE!tEWm=>*4-^4V5IKRR*1Do#Ke%DlM
z%En7OPoc8DkGT%4$0J)({oHz{rp+Df8@xU(1nMVD8VnnYB81rFA}u5`e%l`-yfm6Y
zp5hEzJ{*`H7=w1&$?0u3)lJUC9pBDj+8Klv=g>-#Zf68`yNk7?VCKg;thU?>UBDb^
z&T8*rp9aY!5p+;Jguv5?04*dVlL%oxieW#3WU*{GcA~6v<dXbAnCQLJVPV>)lPOT_
z-hRrf9~?)28Ll$CCkb%yt}~;Bs^TYl9op^@TPovYC?B7sf;=7<j~C=+twy0@BPr~B
zsz7HKh8Z;?f$f8oL1C0mSoXKzN6Se?gBq~q@sW#maWO436~>9d0a!>dXiS9Vzk8Wb
zQnrF_Usz&-i^h*biQ{{tB)W?QKqU1GqXok!km$yd*Mo{=)3H1thUH9Kr!p_?QJV`#
znF;KLRU<VbgjGrzi?HIOyuyVpG8W~Pg0g5q!BU3?ki-0i87!1|$4(uy0p0TH3>Y{W
zCy6|1pJD+slOy1z0dbCv5S9*V)OPbkx(O=_j}V5wZ^~=I768gJma$DCl3N&mQ_|z#
zGG)83<AMC2@j!r+>0fASvXkU+&yy|ZZn+u?^Y{9vJ<C2-4c2}(?t%j_5O7cwnMpc)
zs7h)aKwhMZN2~ln>k2^QX;eF45W;@qpg03%4;3NIfB~M4BQpu3OQz3nd@6(Wivq*1
z1W<n_4Y3_a{t$w{fm2m;S($ULdkKH{Mfto`*m@MkNDs2kFH5xV<kKO%=8NdzenxbU
zISu*R0bHH$(_tZ#-pEIJhV2-uSIMMTIMDf1Nrvr5b5oyo%XFWR3?XU1?HId4BA~l>
z2kRtg51X;@kEnOF9@5*K=_~ous0Mb(3F3BjS|pQR*dAVa=9KlEbxbSUO;^8w<k)X#
zl%&~jdfL-u34UdrW_20sqZ%**j)V5}Ln!Nu>GcyF_tW%tGlm|D0wK(n4j@E_Fv-%|
z*xK@NC!|?Ubc`fT3(Evq%2<JZtu7iPeGR<Q-M^%d*a)xbm8c>#;ZY!jjjUQrDo4>S
z6yy6uh(ZmB0P-2$9BdUf*S8tX$hct4Bp-V+mlF4&27b|XQqt`x9Lt-5e`fz}#=bQb
z`}Mk=su2N38r=0?Af_XRadz_T%II3YTZw)QI(UApwc7YCV4Biox`p}OMmS0y;*2sl
zNC;0+W10CJj+~3+bV?n-nGB-X^@|gQ3YO^+*w(nhvENK^6cRN9&qoy1e6yMv27=uK
zK|vFghX1m2l|Fw9^F^TorzKmx>2vhNWq<B-F#!ht*a+Bh|E17zsDXeonOvlq0gENJ
zp}=<;z)C-rS8{|aM*=>It=HC#<S9e-IjuCs60f0f7BGJsQ%8w|LIV5uBq$JI2IKp)
z-r4LRz!(g1=4h?5w(Xd3lJtSsAVLvf;ietbGK?UG4Vy|0t^WbW2Wo&E0-X}qo$HTv
z1^M>)gUI}iG>wsP12rQs9k%tg4RrhI&e2HNM$QbpkJ%C3CvutUNlGJn4+_GBq8`-f
zTQSes6MWueGzO#UqQ8YQeV60UL)xJKnRW~BWyKjvz};U{&Rb7eJxl1bgxOY&Pi+F3
zB<luciJ!AwqMOVNeU%`-&)GJo=mxDzlhOb*d7!G1kWW9<gQ9*TtC`BY&-n2LWgWk5
zj)UZ&KgS#058nGrE)D`rTre-AbfAMs0veQf`+Xqfw4q%5vOmuHYiMI2xP4kkte_o^
z4aX%u3Y0zX^+~agbD1{T5ZoRYiD=Ge-X7?8yNdcjPM78!Ir>nDgozl|t`I6<TnP(6
zK}d`K?N?#J`!vi&vfnMON^ok3>C^cqtEI$os9@z1+`aEO@aDWdcJCUKM9Y#A8CuW%
z<D;_J<#Xeg<%ZVNwV}k9W}q}sYuRv6Hsf^nzs5WvmM4S1FR6Dsmi(w7Wd^>}gFGLb
z^_WWVZi+$r#b$Ao=eA-1h@umCe!Ns-!kYhi)E9-e`TP`6mWvO!1VT|g{`bX(-;e$_
zt-t2}Bc0|XU4>mShEAoAhYzcRQL52uDVO%g^5R=CYzs|l(U{5@4_hk4YsR*iaq!rl
z`;WfgE3b4)h1JeY+!fHb5I+&f^J4zxPFq2zPw{|OyINOiJK^BQj#>;~mzKf(-^g7n
z6T!L237h`R!BWQf!OLsPC##>p6eaz@^P}JF)x7Bq-j{&%yQyRm_YEn{3er7A-(h1~
z+`arUL`$BlA-Q!<gw{V!c_8mo(BEo%w<WGut#>ut2TkB3YQpBfcYaO^1rA0bJ3;G=
zV3^5TL0MRv8}dOE-xrs}A_!Xl7uENrYP-z3!~+5I9xa6b@ewk1!tDF;{lR}Uuy`q1
zWUWN{ZatrA+tr4}-`faE3Djyo{_zCh`QcH-l<$Hh#ZFS=YpCZD!&G|B0{=atfl+i|
z=U|>ld%pAAyJof7nXd$kbPBZKPfq3iz{z~JXL_sRR{{y%L(Xh}ct$nZyXL^1H((~h
zZu!U#KA!#=se-xi{eCt$sF*7_nE2n&LV)ksHzyx@-1-o|&%j967Df7AvRF{k`8Vf^
zb^-MT)_bj_;z5BWGA3Zz#J1dGzw8id@&|zFsXasQT6KjXU-0*3tgK?w9or+w=zPK4
zdkm3SKGMQelma>(X*r#Fs6(k^J)IM<s66KHVQ1l7)i3vtUh6>}-LvgXF;$24j=#3x
z-@V68N5j#{;&FE?-0O=NPF5=$y^y?@jp)hPdK}<=N~+`Q^QCL6WZspCN2Y$*4?96S
zT`TX4e;^nWR3Y$FnTh!ysY5mGY!n;JZRcPkx(xD<ai+QNCh*y11GzgJ;D;fECVt&>
zZ(i4MnqwuAr2nGpFlht-x7Wf!>Bq!=(p?cW8>sK`_xSFAzU~t$<M_p@f87ueex~2F
z6_#f|sj&A|iCZ;(@&|P+0iE_Uw|4rO+3R0>auNY*wBmb|eM}t{F4g0&`RHLdu+B8z
zy!Lyk2HxEt^kPB|5vJPI0MvCYHOR^U^nJr|3C~p-r#l*$^_z>l+X2z&Vv}i;7cfh5
zkmT9U5WP7eU1?HB4N{<{(#-`NQA^48gRQS4tR%`P)*kpFXNmA^YGEHjp0*IN?##Ut
z3|iv;Y(Of1_$v*}b)06_gLq#iIzIKqwccQ5oPkK(qn_@0`mH}84MFWbEV4h3ww&Bu
zJu7s}D$3B&aqh;LL3em+GKt>7K!`De_W{=iGFi6$;ZSHFLRH`QC$xWWP)54#(LR_?
zMQ*^oZ-hZ)BtD8)iy8+0YyS)b1M_G9oM;0<0;sM^qjde$XmR241EEwmDv3e|)8fpd
z@26~yE@~1Ow9w{Y`{wBP5<?!zDhek$V}k4rX3cKU{B&{C|C5-=cVWdp4&&kb5us}p
zmIJoHsK$ric7Dr#_P#jXe8c+*ZHC^5Uj!ZIQ@$qoNi<!%GM?;tn1lqbN|PsZQVB}y
zv;v$^J+s!g+pBK$^q!aBsR0}<>msoB*=4(Csny*N7!@Dwxe45MX0JbCKO|I-M#ydm
zVuXY#2Ij+uv)0x6HG9v<2<lXICO{Ye(%GFu`n5=$dj?4ktf&PLFBdlg1ZAAx8l=6&
zjGjz5=*~8-`0mtZAcMj|SciLLk>?qwLV+YY>otM14hT>5b42=N+&!SQWWjT)B*^Hk
zlJgmJYMxi)<}RrBypY~+l}aSj37M#OY&6ieD|LsDw`-s}YN)pJmeop@!UaZimzu=O
zeuf=v;*f`Gg{+3=@j0R%N~seKl^Zcc+%YnNHXCH~s&c*NVC3g_q06|*j};;YdBE51
zIJ&+pf8`r4ueioHJc(!==)6*EIDrCRt2|2QYb5~u9$jM^ao~`xPXqFx42P?%3-L-F
zj*pHkIiqFb(mlc}KsGOTtggs%pXX9h9H=(JL;MAEom0ykiD$j?Tb%<;OJtzBTY?zF
z|0h4c$xZ%ln%|h2u>3gWcrLSpE%3zwlC_B>zjonv((3;B4F#qqfX{kO5{%Jh?81SL
zgmxMC8IlaVuj(R!8Fqf~86u$W^_#Rmni1l2O{u&HfW^ML%kEE*_c1>2yD(dnO6ejo
z!QE3y5Y)3(53Po{)x22lq=Pr)!0d-$R8p3Bi((dkKZfoQ*;u`;<7Y_LUx08ED<|2&
zHhJ1^lr*H46j!_!?0Mr_EYcA9xUdy~-89lUDxVcHd6N9Qk8iuL0G}3q-X4bE;GsM(
zu}6`p%*Aa@4}R|&Ta6??V!g-DW*iWieAwp#9|=;5``&A>g}O#^VV0pESDzjJO`Ovq
z*G8!8f=0}e45;28e(4+10??I@yS2$)!Fs6@(ERY7G(yPD4=hEe$YGXFKL*}QV_4=e
zl&lC3%(Dnd2%(UC7-runT0g?#7}ZuXa<kAvfelK89}Qp9%AQd&xCwohY=Ick+d_p*
z6}>Cif|h^V?9iji>!UHUvw+(0L3|W5ol{D`|ByWt%(8wd0r~)mt*zRpXIRW{TZzrQ
zdcLmo8fT(&q5k{6yL!GRrE&=WX$SLyA_DS;e@n4L7clY>yQd{*YBqZs1U-*P2Je^l
z13k)Cbxf8HW-MDaVVFF@uFcmC)w?-55BKg)G=L!jc$xgtjxk<L_+GJ<<e*^&7P<1@
zz(GK+2|+;q#}{b)@e==B@9)we#`bnDrgkoj9=0|o**eav+$qDGcdDfwpH%r8=_2nU
zOV(9mm`;v|eyJUkv(0iKH$cc`6-y(^EXx3okRI_KJPD8qp(%j4;2s$Wxg*jWM8Fkz
zPwwlZ?<qn~IPp@Bw;x#!92NLZOlSDX$4SL5{xyW?#O~K;Vj)jO#oM$jg^mlSk82Lx
zw`*~)d|##7d$qE>{k~nVvrWPsZ;dw|kaNQin{NcR{8Bbwo-v<p+({yN-ahv6P~-M?
zk?sm@CmJ6;h-n0f+`FCLtF8x{_1y&0l8Zi05WVi5c%9$%;0k)q{k{spTnW+kaksg<
zyGqo;Vf-OkkeZr0i~TTnRq|n)eL^{@z5LwclU?0)cIli6KN*q{HoW9c8Mo}%4;hOS
zZ6S3|9SV0Ifc*7Gb)9I1iw3qJtOcqSBl>|I)hJQo>>_aGV(u!S$kpP>U;mu#iiP<@
zP9o=+5z{k4z|AhC9`SAwXMKA7u%;c^TJPaXuV!%cK$qk8WblXCP*MG`cTRITEgy;B
zQo)gI;Mb^~&GZ>2oyhKcH|b~ji;IOF<1ySC{?iH4l4+-%uoRpGbION#u>k^98*X8|
z!R3m+UK3!q?9J_N+mHW)v$fumU+4LhThNr#V$Q{~!jeT9@8w9}Re!Q^j+?#Duk{3V
z*US)SbLZJ!r`dc}fE6@y=IHx}m8hvuk9(WM)VJ}*s?{6An0!&X?UezU+=Nfd0|b}8
zB0iZ~pLyhA`aOlSB?q?|ey~anYs<%scPthr=FS4Cn)EA%4YD5jO!eD|9F+aAxO}~n
zLW9xMw4Nw$W;fhItx*}A!%kFG;47MiMF&k+A2J_=f`qi-neNAUO&uKXP)e#UJBsYl
zf%Eu|LR@y5TD^pnd&AkFf&&|epRM|@=driXj=18uLC6o4beBxOe>*wYw}Lus@$n&u
z=4t|0y&7J(PGyDq6|(fhx8yXp=f)yC=B&VIp*&A!&LW=obhm!(*B>T!V{NHCcH9(<
zovqi)?{5h^?X6_+d<3%xXCtp4V+n1$d9c+;i5v`Ot|zdO^p8pWtDkHh+zO{P7+<F0
zJHJcy>?b}Yr-T7^H9ulW$gd=i*jk=9-|&GogD(B5LPO(5d|R)M1Q%;uuj_fU*wATD
z^+~;J_a7;BRMm_ra{^XehnXu{Kh_hkzuW9Mi0DA=gmS_{DQ+x(n14F!ml9od^b&RZ
zbsZ&hf$iP5@olG!Ou7kwUPv|D)-`dZJky7^G-F(Yr?qo*B%=88(?`n+r<pQF7$^f^
zf3As^Dhsc8;1^F-<IjLDMFp|#vh40(mN%WLqVpEAP5O>la#^U}yzuGv;(GIk^UaH%
zxN&*SFek112=$?%uL<XBKK%u{bvuEYtviY-=nnHf{m%MT2N}wp2R@9*44QP7J6T8w
z;X=Bas;p75Eo6!dGqCXLrt}d~ztH&rP40!*Lcx(`&BfrD-q6ZaN57r>{_{HBd5_~!
zs3H^1>T@YAAzX5a1t#Xn+}dqBm#TT(gX>N%xiLyoq+#ch;{+L6c&TUpN6Vy_*6N<v
zBj`7hlu3Fz+<L)`+al5@3{TtMC^v{8LRv>Cjn8)NV{vobBEmyCe9QX~;P()~f=^4D
z`ExDj<NQ-4T+P}~_S9&EDhC`TgE}0XlOx4cwQ5MhW1q>^(=#MSixZ+#4|ouop*LYJ
z5%dK+gdtAC=jDdq+)=7Cywm!ob8yo?edP~P^tIRVv>z;@xQonlmsEL=^OzcR83{jm
zF~VCEFJH<OS1(6Y9bFuHBGu^vrESHYD)cElI;TAhOzz*x>g5QiCbTpKD_pOQt^2<t
z(RMr<_*kQItlOB3`&EXb9sfw)M5-(U>egXx_Y1oCPe?udx=f>yu2b^Fplg1u(q#_L
zgl>=DrOYlfqsGe#<l>7ZK`%8lDStG46LA(dLY1`q#pdQGy0bX9q|=rMC<@%{3py)#
z;XeBCDm$qq8=5I-&e3>3P#gc63FRxds1E$~OFscy*GusH`~5^;{_b^ap7BEmkmF91
z>^xtbKv#oM)zh>#$K_~tl*4`&yWD1v{XjF(@|-4QP*_aPWN$VEt^-$#k0YT-cqt%6
z#;E4?399mz*4+%7u4~i_K>p;L+}?VLA0eJYbu~C1QKUXotVoc_4_dCZt|mPH05)P=
z?-$I?`)o2xQXkGqs;V_sS8{r!cWVMDDxM}n)~T;i&5G#ht!B&=o2Xk@FHi{x!HSN>
z=5Ds`D_ZXUj-y)G+{1f_QvhFCCx}l93rt@2S*YS8qq4=*Ty#+Z=D7Ir-pgC><vDYn
z_aYqo+<Q@#7mw-}_eT|EC7FWg-gWZb>C9)%g0v9DN*U%4!ALke0p8c{tYNV)4wK(3
zbGbi`c~M6Y4UE8V>g%NPX`sD<A;tEM%CwMlVb_J`WwNh4WTEx(m<Bu3!u(W2{ZZEG
z^>KlZHf!<@(DzTQ`);z`cCAaNZNVeAa`jqAB7do-sH&c(fTQ*NOylJPeNBUxC~-ea
zE%R!r#*gdZ)kDoWu)1Bon|>m{(skHB@v^o3GBL5m+I8CFG|V<>|ND}7WHUu%l@oCM
zwFR7Obd8-jZ2UgM!nZ@nXVsY$q%Cc3%ZgM~7npnoZYfA!g(}uU$vx17mV24~kmE-l
z!qGZ8@ao-5p5&q}<m5EwMkT8;sp`*nX6k+@c9hg#-EqC$to{mlp1cxz@%|M8gulJY
zoeuBU5Hm3mRSV4sIWChMC+)qtJ$`)snful;k;G2-$`d7<mCH;Vlji8z9D~>7USK0@
z`#j_fjC^&ZKW`Zg%a02;N+5w1>m`WjTVd@b2=9|d)AlmEQ|L#&CD7`J3;lvA+%M~j
z$hHfIXScKg#a`(@#Nt;s8fGZ@d8q_ixdn5A?^B0CCMBGCzGrbnP-J_|QA?e*?Ofex
zo-yWBownZaaZO+hep9OIFyC6-69u8M;A$@&@G`Qiz6buoR87l)^vTMfw$ZEH1PW}_
zX6f``e#DF=p*Q5JZg;1!Kd+8U<P`Lf0B;^N`sjuuW2sf7e8M}=bC5PQ@M}fj*OBX3
znYXDoxj+(XvKOHV)#{L@$6CZ#zOw?J;$+F3cI&CFsjr|GQ-*l*zjX=bcjyXl99?`=
z0d4${DYdmHd#5!3z2(iUh?DwuCoEmV<)qXo6V=OM@rganLL*M2j?a;U296-VFl4N}
zFr33A9Y;&W>}qHc`)A^=meIz{75c22g!1$P6gg~m@=50%Mwh0blMZbJe;X$0dW5S>
zrnVLuW8lLCS4F;$RSsN9Mj>AI{IB*R;6z^VGgl2-tUQHuu?k^2rDTfQO2>ut1YP1`
z+if=1u`l~Nm{tvcgH*6XtY-9GrM9BkhpAFd8?P&cqlp`)>`w=#dy*!mm&^pHf!v=t
zA`+#~S@n)nG|YC3xoXB^IR&(Iu0U@Uv$&e9d5FXCNq#4WPK2Rkp;k<%i-*y%0di=A
z4#G7v#hFv-^L>W7p>ts<<){^P84Yz&(ezyT(fFGkg55$0XJ?zP>bZA$9D}Fcm&|ym
zf$X0-WE0(3`#nfa^L{VaZo>q5D&zPNPj&Or{ZJfx0Mp9kKb8#Minf@WrmU|epZGFi
zr=O1Op37S?@_TVg!dKz)oE#NQ7WmANKR{1!`OT<4bFuKZt9k;q&ENzYA1l)Z3I!~g
ztO`SZlD1X?<vVm^S3AEY%x^q}n&N__(U3V3NUX@`3nb_IoRI6a-l_LW?y$ih8rs*#
zA}_k*O-IDS$-~fO8mUn^YS*+mtz4K!hn`Z$E}H2~v@Ux{U;{-|7;rFvbj?#EP8)8t
z7L1ROEcJb%uSb<?yA|7$HHjW5@erhBWmsg|t6F};#w{Rucdh3#?!!_V7mR+JOZrLQ
zW%mOl&>$iUwQH%ZEj^D2WALX|v_7$aLu7bac7@!OY*!?eOADotL6J+asw>PY%Vb|`
z`7k9;FDI)$aBVI}X6Hc)4Dx+8Vk(I9;EsoIB$GK?l7(~ASg{JS>n7XuBf|I~LM>VT
z<#QUkRI|aAJ|Tw{qsmo2oTynd=&g0^Mx3qHmEX2eH<stuC<qbUGQ6blf$zt3hxJB@
zb?V9{3yLnEGV=tob@G0`A!_8YBIm}F3VzVoWve65P{(qOFcM7A215I2Y{gt<R54v`
zB{>^^<Gu2I`{KzRgWyQeiZ?tWcXReW=)wn&3mK6(NSVZa-85E(<I=$8##8fK)v$HA
zD-vQ{Q9&n;J#<)CTT_u<>H}jM?t<0o3e5J-{S^qLHOOi_*wM&uOB?xVn-!Q<?#xYV
zllsrSY9^hRaU{58fQS<Z8iqSKs~eU#3^(H;M`Y3RowK#<=~OdSf~S8R0w`%K9!i8-
zE9<wf>Il22>PQls3x}nG_BN^Fau^%SSeI~I>7n=EQ7+;uP;;dtdBos(2c1MWD?hvG
z#?Oz6g`0$-jm)c~S9ui+o4eLDXk{qhS|P@2#NMmMniAPi0AfqB)d5wP=&>3yPgTPj
z)-BB#&ux$hj8}1#xMT|7mRS&9z2Fy35@-6{7~qh{lu1$w)Qou+9nn>mswgts78S<)
zY!_zA>30$3&Z%~3`NJdwP@+{3rw7P%GqsYaBEWfTi6tv&Z7pZ9LPGnfQ9_i<mD7H^
ztZA>+7sXA>0WS6w5nZ(M+X0Ml5PC^gQxpq^cFIe%mNk&AfOX%>Clk8g?U>cEzx)km
z%ZAj^5K~gzf6m@%e|>6fdr}ziG@b1iJ<hJiAj-)X=9efNQoZ~vAH7bl4q?uI&OwdI
z?tMj1CX%uMHCmG3Ab=6<n*p|1K2`U#n~onxGJrWs1wicAO{MH9q4AzSAlg8<ciZ>8
zLe$FT-;xOEiBv)CAh+sxP)D;UpzM|ar;~1@w0vA){-w64N!PBJi)-;|$Y9!Zyx$rl
zY>|52L!olH#uS&!`ZDvoCOd}mWaR>;B4W`j>(EA@*mi(897K4M)mZ%J5wqqqY1P~A
z`ksOEG@z|=e<8WVQZJddePMsb+@N@PzH$`2Q4Pkb%=N4K(2sl%N8NKu2FJOw@3&MC
zoK~w90wy}2?t)H&Q;9kojX62)e9!4FT~LytW6My8RDW;PeQTAdD&XDw-h5;8sZ~SB
z!qeMlH#$`WzHOh>j8=Snj(4Ea!bZ6<?8hO;Y71c2t=CARNfc2milI;?Htm#Rr)0fr
zcUWT7Ihg+;89J&A1xNL_Ka0n_GLL;t#(-D`@n*Tr@*eWDsT3_Tr7Ed>@j|ajS-eCm
z8O|nGxT&qJ`Si`;Ab7bNj9Qti@cihg!jaX?v%Ws=%v-Gwxq$%mTcijrvXu(yT;YO<
zf)*eq7fd`1D{Zg)Ayg_FX?L0lw6y`uX8p330`s$+6=74-l4g?4RWX;4iJwp|x{yZ|
zw)ene2i27UcAh{}7M52i|G9L?R=hSe(Y^u$qGc_?boZ#v=8NzysUV)XPXq4&^wR6s
zxVlB#osARV9BS+sr%HIEPHvZ!6HnZxffpE{G^Xm<s<y<w?#;$xkL<8d9TKx<4_mNs
zwNM!PF_Cd{cgl+}@hzU-4SlTNial)J!c|kj=Ce$h0)EVWFR6&?_;_W^CzTS3$n~L1
zLt?}}L2ZU(^mLerTM+F9i%Am+HV#s@MaIM>%k5Witrq@ssi3V`b?Dat%D_I`83$mF
zC%@P-=^*S|!^g2(wr@6`wFyv06jey-5eg8dtA*RXidY(j$XMTGJ*SeO@@T4%GGuKa
zOjk;O*|U0T#mm#QM~U+!kAGzP5PtUi!?!}u(qt%Qx+<hbS({*r^}=7ivyr8<Ze5NA
z7jblmWE$EGPLMu(#cB0#fdZv9YaP)*RQ$WgK*pXWWh`D?*+9Pkp}=DPX6hqM+-B)3
zV1RGXHtpGxj^xPJiNqBo=&Cx-1A=;&9l9XsR&XWEMi^-3MQ8!#ds3V>3xA6O;oV7K
zTywVQ8J3QO`cA<TNl!o8U|EmqsYR6Rs$SyEXXs3;qqSahwZ%%<XX-_$s}9sQ7V5EU
z7~a}ROryxxPqjy*x0nBVzPy2Ve;k!;ym6fnn{^2*{^e?cXLxpt`J&KXhzxNSbS(5R
zokeR>c7?)CNao;02$uS8Chk|ocY4O`*?u3`Qn^WArN&%{kV?b|KGyVazq{9)h}}TH
z8&N?+|85VFpS_=Xg9op9H351KG&ro|x0#Ub3NP-@z8hMqi^WCxwzvxdY$mOjUs(u6
zjlBrjn|crOy)N`-JJJAOFDsc5M4;+8_3rNbCM}zXa0+Muz1ZPW^XQ?1Wd-3QjwhHF
zC_lz`f9g2pXfS1=n9tzOU4$HV*B4P(cy{_r2VzSn@W|g-NKA~S2?1*Zyr<`%jALQz
zarf2xTY%~O);1~BfLN3je{eeI=t}zCHOvSkqWC)d)RNSsGfX4dgq&Z=!IxK7wjGcA
z=Y6kO-V0mVr=t)e;uYsTk>Xx41%$8+ocU52@{QzpNM&Q3H%iC7@n=qs4Oq__(eJi!
zkBKkY+?&TT++ynN$N&=yiT-D`U5a?8MRhFl=+Cd2C+D_l#q<`|=+UaPYt6P@I@os%
zbtP;j;-Ak+(7;8>B{{^uVL94RlAjF`PW|>)y!QS5z{TlE8Snz_78)E{frFyKQvUwQ
zZ%B&|9!|<h<ZZr0Ans0MCZogiYYeCeVFBJ;ID1xOJ&8Rz_BCJ-v;yvFz>iMsJE;~F
z`i6t@#!`NfE8P_y4ynv#e}4gCe5X)IAIEW1#^z{hb&2u;olMfKu}HDPLeGR8Wzc%X
zqmsdwt#c<v_GYMLka)`2nG$)0m9)WBT}&sQTtgD8Fv5sGK`nWl->0Zb>mE_8`ZpGJ
z>WEmYe3(<ZUIC!HXxqef?=iO5t3w&wJRULTo6kux*|=coJwK52+3$=A>hhPM>!LQM
zsrT{xo{<{mK<Rh{op)2uqSJ<7bVh7!S5x_Em@wH`2DDl_N+~NH<FOXoLVivrXM1S1
zedTUbX~jr?k$jk3I%evr%|P{FmRRado<rH~%{r6^hbPdI1nWpszvGoVvDcV1FZT>X
zu|ed`;klIr+xuP}QNVECnB{&*D+KYlQr@@PUqgvp$Vw`0s;*nu5YnA;g`~F~3Sy}9
z$wO{H7{lWbi%t)VTdu0w!5>feZfOgJ;;Gk7N0}aZ?{g99p!(V{6PlToxpFPc?XK;7
z;v46fKH!^Dq%g6e)r8Nzaqn(3eF?&*j)!JD*!fK6SdG9?j@1tiLzi)P#8zS3T>i@h
z`I_P(zbEKM(^9&`wMV~Kw={b20!l?S^B1n63qqk-9CN$+KqrB<JX<ZrIQZLpw?5Z%
zgI--2)m@KW5AwC^Hv3yss685%{2sBN@2&2@QNWLgvK1tbdzpT>nkEToe#6M*FD^=s
zBcDtRe*KPb-FvaO*M9Zc1C_z{Y<E=N5>c;YCtr_<EMvuc;iF#%U$TBi6eRN$zLa>4
zD5O50qE^RCupb;$!OeaJPqBZa841E-x@;zw2!Lj<n~C$Fs}3D4y=J4|cnwN`CZC4}
zOv)m3$E6|C<24->)Fcy;Yt=FvUJskmgvdXA0Mt}4T6+>;0x9Zt9?L5Ih`A2i-oEVx
zl)8!x-Ro>dl3}Z$i4>Bni^-PJql=As*=~S$8eTv=O<Vybuo`YC+NU^~n~}O<G7zYV
znvT{3nmLf(a+iZw&_^u_sf{x&(M7I+`Xcu3lpp0ZM;*t+V3v3sPdH|?L=;D6U1Om+
zfe<_L!zM%aNj*x8R%Lf*ZZk-kIHNR0V4Q4a%c>Fm(7=xb>CEPCDTC$x@Y^{ghMzOe
zVw-SNPsO?j@({RbnvnsR$M4w@lf=Vf+jr`NXFg4CBNU*QRGH%O@L}O3DCpJzXkEf!
zW>VBnqTgId^}a#n!&+Yo3pNhA2s7mZn6Z{I7fp+$;x!`6&Qy6cBA7&cWOz6UI=VGW
z-6dl{>-CF<_2yhoW0UGvYR3Q;SO{Yk24*N*Y|Zb>zyz5+jt5T0i#*toI_C(XcVGU%
zU?+K;w@hP;#?V57o#V5Ap{^D%#a55CT%+I3cv~qE4j(K-0+VPJP#aaHbL|asg-szc
z!rpbPVSeMizpD64>SzQ)r%zX29xOdgEi!z6f&qEDw!@K{SV#b2`nCV+fNO#KI<7m9
zMpm~+4lE#54c?<lfgv1o5Z;#^RHG@E{yByb`#C}Qo?<@Gpe*mJ3H~hrvZ5O_ZWn$n
zwI~s}(Y`Ngi550fs7=kl;gV>9{MBGJl9T5po1?Q~&7AIDzLL}J0fT}ukTL*5p<H5W
zlrl9q>IjovhAuzf<kN(oyVg>*f}F&v=$a?`wGtDyN}_Nf<-DjBMI@<K!2<-He@b|c
ztNPfTaV|>pOL0bIlMD%9n|M?uylUv-8OYNWI+bBdgZ&`}xKpujyhv%~&PH(JZQTIl
zTsoZ)6>9j>5o<j)3tR@CC!R&iplqFalRF#n;|_Q@F+`<uH5Zz)+p`mky&kr%=GGaj
zp9H;sW>(e2$Nw>+d;Sq>J6jii;~lXwtO<PRs+BKXhmTg#sbc`o?GAK$<hKzL)5QIl
zlPS#2jHZ>Gv1O{<F5(nHFYGvY!x566gbL8grGkRSG70lor4Ij=TO}oV<wa-Gg-<&~
zosz4`mrPi<M-b_e<|8DN2^ANW;-zy#2_Bh^?e>Ih8c%2_-O<}D*iH!|d*m8oAH%<>
zh{^4Uo(o^S)an4<y!iax)ltSNM<-5pipS^BO=_*@2?#qBt$6yf80mQ03ABOG8?`Jp
zUQ!#7xU0^AFWFYg2Yu|UVU{KZ#(J;U2Mt&bBv+6ZRkb1dLfAWTA_Jrv;soKUQn=P*
zjKtgb`Pc*=onu93=-A#Y9eon`7;&7`n3iI+6@i2P=jVXW%qgW;|9C2?M6c4oI6-)x
z2Q7|f?iZWwDo+W~zT!rxr5c5hND68aIi-P^PEHN?k}9)f<2_^>8VATjOlIL4HTbC-
z0^SG)NL>M|HR$!7yc6pM(Kq3&KFkA+9_DTpTNVY$RP|eM$A{}q73(s#F2zJ;?NoF4
zX^xOcerkZkQ>i>Yv=QN|x6$WO|NR}mdy~*?^F&1Stif>`1abgWc2Q9HTGn~-E133*
zm%=vO2A$!#ei>E=5GT3+vcMYdJzwkJ$*#cv34w#{vpm}EVe(Rrq0Xx5d)FO0LllED
ztbvlF;3oAkinDGTihZ@Y`ud2(;E$uX8#!$$K>&jmWA#^mU6Hb;=O8OKP8O=gp(?ws
z-)_LVj6kt{cf|?eA{6@wBo_vX9rgT{<N*b6gXD9^WhJ1(Rz6nJMrvBjLIIndbn^HT
z%BY(RA6H63qFe!*uol@xqDPK=?dy;A2ttl<p`lx<n<NfN6RvGL2<g2$%z5Aq8olcK
z9YD=`nc2%eS-IMOJ_^Y>=#`8Otk7MPglvu0j$TzF#fqLN!<a@NW)nO5JdP#n*`+XL
zSu+YLuZEIVs;7e;-zp?E=>3V2KQGWjwCW2LD<cEyrqhOg=iaU9+5Q*&yx;Bv83w3c
zc$`!dw^Ki@%XJaNAaTE}aC}j2le3+fF#zf`!pe1p#UKN|Xd_xA5%5JYZ0tNL?+WtV
z60&Z}cM{XPi=tQS?I_kQ(v<3ki9w=<Y9l%(QDm_BWTw>d2VZx6f+W2fHSP9FXY%{&
z4dpBL^EhK<l`|t_exR;+)sV#K<%CX`!l8xW(Sl7M*Q`f_Mn50E?_kwvh^86}P**yS
zBB#|rSI|s@-_Q2qVA=2RYbKMaqloW8cx`LQO?1aNHkF;xB|qR$9G#a_pLVl&VPg>V
z(LrW65DgE8qQOcj|9BCV%_DPH3eg+FsJ5ZgOHcW<f1Ij!+?lcp;hokaFW_9X0*VPF
z8p~$0+?-#?$dv+df_vhYs$dfceRzG@Tb-Pv;u??y&YVcm5ydIB7vn}AZyPwn2a(Rq
zZ_#hFtfM2-ElAj=%UKc|6{X7R;`*gva=3Ghdi)~pM&YX5QvgM1X9k}@*<CUwJZb#~
zt4tYF6YW~uB-R{*j5C3mHZ;ybqjL(9RCQsfW&>z+G^D@~Y@{wd!~<Z##lic^mf6Ue
z6e`kWvAGxecRY6xpZBphBS#~EK>xP0{Eg5J?f9oZJeMVj-Aws*@wXujNqcV;X8zgd
zM3j5A3q`?BB{H}2SH(?r<et0U*k0nGIJbQ021AT?Z6PE@<xO5M2X*ARwah8DqTC|<
z=WWlc`!d<whKmHs*<oNJM@nS9x4;%MC=&9b|AuDwqt{!DDYVyIYqy}@V`20<z`;mg
z9H1#*@nw*8QhCGVTv_Z&7Cf`y@aqNoE$ShA(2F!;DufJ9M4u8fvG}$;`{dm5p&}0K
zfixPF^1{|m_3R{=wIMOXTS{&C)>14OHqWfEQ}L${<{=uRqdPz}6CpzGbpfm{!<(Bm
zK-udZ+P3JXlRj4D*vpC-Y^u$nnKDgh$W>~s`*t0V>)<&Zt&k-HuO$An&xAK@<l2fZ
z>KjX1$Am)`)C5x}1ee@79o?5I5k}^cAj6L}PzQ#-5|Z7NeZ>N^6}5B2*MNN$?p~i2
zZ`k~`l_tYt&^Vykkrx}jo2c)TIj|@T{(W%VB`*Ef{H<wFLu2cM*wyGo<;V7>Kw{$K
z5)^NQJm+{0LUQZ28I~Uz8(B@-@slz=SUT{!!Lrb>$@<YsMMQ)}#VOeWqUU`V1LAmg
zes-PA8B1R7zV5Lb5RRavgb<=_3w4y#-a#u?U+lCTHnM@H`m**P5{xc!x`*@MB^H_s
zI;PJNaq>KiHMS`+{cZ@{Ol>foM8FAyp}aUOKw#-vyxQS1@8oOxSO*)QX<%2MY9oGN
z{$A|W?AB*k$eq<3IYrbSxK?D6GOUksu;e)zncj-H!#C{MEX6vT9H}(9gW~!8SdzM1
z?0!vk2I&j%VFuA1+50O41=@4OUJsI#_J9!ANwPk$zhA!Kv)xdqKfeWck8jxX&ft7f
z#@hBA>LkyjP-6pX-<L9gFgS%z5<qqic}8U3vsZ(E0Ke4E?EDl$y}^?+1st&)w`BHW
zK63R;bx$`=0bb5Au3452EFj<}Ad(zVacyp?0E?MWmyKdzRV+iRhd<~gNWltNd~$~y
zj>fI==q>q@K9Cd1i$mhde54#&<@@mQZdXTjHEKe^Qp{G@r5EH^pmHo@)WVRcRsZ?G
z#m$_3_eQ>|QxH4|4habJc+Bm>G9vE+QTqsH1Xr(8hZYmY<XXF24I!gDfx=kVXx+&M
z@ZJ{7)$8%zMnpVf6H3IdTb;VAe7&bo{k;QL@?gR%{z2Mc)$1w*tn)G^U&G#Ma8H4m
z(taUwWVml-rk)9d8)=`TYq^xok^;rv335k)8O7d>0j0Z*PinR1XT(6JA^AonR=B}~
zn@)+JT;8t5$6J+<D)*nQfftlGQCvU;<nAKa>P+gUN~s2E2AqAWv-juFrS`cD-DwZ>
zsG-dH{0nQnq9)M{v2!t|#%4TbC&*m|W*mEU_@Np|$Y`v*H8Bj`SFZ-$+t2x5alTcH
zmHQNBtjN?Oi!_dzsyb0AOk(R@`bkQ+T#7fb<~6pF!qY~dKktn=fvHp)su2Q*eqL5y
z_rd&Ha#T@KxMpLzgSdn3Z6iFa{C(x7Z9a3wu`{9G+vwJ4%t4Fcu^W5CkBbBNOg{&T
zKHa=0emq^hf&WQcfm&c4=OF_DVWt54J8k9fBOn^;Fv}8|?+Gn`e($G3Kx|E2jQ-@T
z@C&{Fk+{_(<L!dNFmtQ1aPl&<CO+cR!2j!kg@c!kHIb1(9)eYYjVsZgfDx26v6i3_
zoa_C`f>0XxbFk!}e3(D?KWTK%E}k}~&VSgiYuVbba-zI;8IYFt)Pg3{EGyu9*99^5
z*QQe9Jl<xfP^VXj!;za(z1;Ar6q0ErRJjqhF@Po<-mhJ3_wQa*>gsJ`w>DFWs{B~+
zV~6*Rtxcm!GPQnNU(F4Yd-J-Q05BC*@e)7v?Us|sL#hbuiF<90@0AnjJ|!E{#ZV;E
zeSs&_pw)j*%KF)0RiI&3zVbOx!If*PshRA~s=R*4&F8}HaU5#x_9KL#yz7GIRKrQm
zW!aBAR%8{Tfez}rd@1@<=TX7Rmu!`NxV-*tW<Gi|T@z|;#?<dooWZ+L8h|<(S1Gw9
zIh7Y{c)nIJK@$m_;(ke;t!yQ2lel~e6y4hDyb1~|ie~kXG0T7z!R+T1^Tc}V-01HF
zXSHcr=1^-2$r7vrpRi<Bn|rLdzICS_VnI#Bu4=@3Qbd$FTiQsKYfd>`vRb>W<~naE
z%_H3yak>07x<{CwWVqfYe+EcO$?_p%75j{{hNE{naL^DPP{G87Z+Z6#wGSm?CA)+J
zlXOeJNb`0s1@K-Sl%A@^#}a=MIQp6{oghLzrA5Ns<X{mfKoLrnt?j1$M!`z5S<lh(
z>F($25avUYSb(CVd|>cE>zeL-Xg!PfO}+l@N;iv?z%c!V05?C<rzzk$PvCy;2`g3i
z_1xk@f1|6)b^@mcf}*ee12h*1r3|iPm<C4H#Ko;0&#vAbpSBI1B~*vTdS34L+<V{s
z&R%&W?pNt#wZN@DZ{+DG>ahw^<w5qk+Gb>mGUo>Tna_{?DA>bm&j;TiSQ>3?JsTZF
zBymtNoI1B)Q7ge|$|8U~g2HPepXpx{o-`J{(6@uj!&E8IwEI&X-t?s-d2Fhij)T#X
z;IpeQPGOH1?Ag#|(ial<9$eior=6`lieq5iIDG7G9hIGSWoxu(2|KjS(J7HO;Td8C
z1LTvw&`fnb={@_5)R0-;dDJfEHz;38;h2JgWkb^K=K7W)O=E$GU)7~&<{oaB)cbVY
zty&>$wWwwYEH!Cp!bj`)Z%Zuxmtk65*a&?Rj{dQHT%Y?}12WtFlzat#?!BrlL47M2
z4#><iI2$iUDD7-5f5KiJa%t+JR6QgP;D1t6!cr=hhVp}*`1(!QkpmrZz~%9d>b~q?
zH+S=%&3kbbEcF$LwH6ZF=ZG&!_fUZ1Bt{54r5$P!eZ<LcdMknZ$?kT6(DGU7?Q0MP
z(g)9viKyQLDBG0#l6O9YrTrvWQDG~4SbfhMg_&@h0=MzZ@hRuNnp7EviySI=6RmwE
zxHWCC&w`>2R)=kv)Fj!ip2Y;K#>OVlCJt=Wk6S>%kac7PboSk9Fa677MHc-KVy1?o
zv9c&p52e!hy!2@SdY;z}KP`!Z$%HxEVGJ3_8lf)`N(t;Qrb|@eF>u|!a}V$gLeW8|
z(Mm_7DZ9}6Wj`|P#>g(Ag?&}pFasbV+A1`kT4CmH9TnYb+jjd*Y-xVh`byy!)2;uo
z6XniiV$z=i9wn)3zZ=q1l@a~I3mJ5;(y2~5UFg?p>F`nCl()!AF1q4K>*L^+u9t(o
zx&7Qn^yAIkyL7lom;6xx1P6OENA)26@<af3A~hh1yKIvKl+p*4M~VRW4xzgoUa_pW
zqXck~HKH_t^m@_E>YM`~N?tp^WluV?Yxf=t2biJ*`DNA%r60170#-^se?6bS=F&Jo
zG_VB52uV4-oPB+LLt>&AREWYIl0dwe7+@{(<hG{TA*^jPmfxnV>yR_t)~{{JxD3V?
z3bsUD#f4-*Z(R7PPOP6ecZ0uO<$SBg|9;(A4C|Y}wBgS2Q8~Gp^ycbLfRvZbRsMj0
za3aSL*k7>Fk8WT(<+GT=Q>q>IgdFnZHh`GshhchAV3*pan86wo1?N%}kivQYZmTgp
zXvJbLja;Odx~VZngWMQ!kT_}9Y6EV)DwO1Hao<p|{YGl@3ic<K6n8^BRziY+1mXPu
zQXuQa$(caG|0GEhQIRGhFd)GH*TKX0I*z~EgeXZ9XDQMCUG}4g`=5S7luXe7BwHU+
zsQsH?7w!M@tEOOs{yU;2x>9ofLsK6^W&W>0+*7mtpQDb6JT%-Se}dG1r*tCxcf<lI
zfKQC4!TYPze+SS%*`5En){qm&XmI}ucmFq*{-@JN*8fCSB{@(q3=j;Ezvrd#-+2C&
zYyNLCWX}H%ng5rr;m%?P67*dU?~xmU{@+A@3RisZ;*9Jp%}kwL7@bVa{vrJbqF7pd
zFjj%YM|$#qqxPrDN+6MeiYRf49_}xo73uN8@&yumDG3n&HR6BB|1>NKB-+qZ{<|NN
zVB$wgg2Wm+sDH3eB&EX#6BkSrr2Chu{|LwweCLLh*hh=@7pVU|W`h5I0GSm`?4%(1
yKVm!H86pZLF4NJF|AQUU?=csOcMK#TKtM46hl8w8Vm1|aq80-R)HmTjE&mUb)k=5(

diff --git a/imgs.batch.sh b/imgs.batch.sh
index c18aa0a23..ae80412cf 100755
--- a/imgs.batch.sh
+++ b/imgs.batch.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-datasource="../../../../paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_imgs/"
+datasource="../../../../paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_imgs/ikz_robert/"
 
 # comments is detector mode
 examples="kit/FeMoOx_AntiA_04_1k5x_CN.tif"
@@ -10,6 +10,8 @@ examples="ikz_robert/T3_image.tif"
 examples="ikz_robert/ETD_image.tif"  # ETD
 examples="ikz_martin/NavCam_normal_vis_light_ccd.tif"  # NavCam
 
+examples="0c8nA_3deg_003_AplusB_test.tif ALN_baoh_021.tif T3_image.tif ETD_image.tif NavCam_normal_vis_light_ccd.tif"
+
 
 for example in $examples; do
 	echo $example
diff --git a/pynxtools/dataconverter/readers/em/reader.py b/pynxtools/dataconverter/readers/em/reader.py
index 8108cbcb7..971579de4 100644
--- a/pynxtools/dataconverter/readers/em/reader.py
+++ b/pynxtools/dataconverter/readers/em/reader.py
@@ -22,7 +22,7 @@
 from typing import Tuple, Any
 
 from pynxtools.dataconverter.readers.base.reader import BaseReader
-from pynxtools.pynxtools.dataconverter.readers.em.concepts.nxs_concepts import NxEmAppDef
+from pynxtools.dataconverter.readers.em.concepts.nxs_concepts import NxEmAppDef
 # from pynxtools.dataconverter.readers.em.subparsers.nxs_mtex import NxEmNxsMTexSubParser
 # from pynxtools.dataconverter.readers.em.subparsers.nxs_pyxem import NxEmNxsPyxemSubParser
 from pynxtools.dataconverter.readers.em.subparsers.nxs_imgs import NxEmImagesSubParser
diff --git a/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs.py b/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs.py
index bc9acd948..09c33f997 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs.py
@@ -24,8 +24,10 @@
 from PIL.TiffTags import TAGS
 
 from pynxtools.dataconverter.readers.em.subparsers.image_tiff import TiffSubParser
+from pynxtools.dataconverter.readers.em.subparsers.image_tiff_tfs_concepts import \
+    get_fei_parent_concepts, get_fei_childs
 from pynxtools.dataconverter.readers.em.subparsers.image_tiff_tfs_cfg import \
-    TiffTfsConcepts, TiffTfsToNeXusCfg, get_fei_parent_concepts, get_fei_childs
+    TIFF_TFS_TO_NEXUS_CFG
 from pynxtools.dataconverter.readers.em.utils.image_utils import \
     sort_ascendingly_by_second_argument, if_str_represents_float
 from pynxtools.dataconverter.readers.shared.map_concepts.mapping_functors \
@@ -88,7 +90,7 @@ def get_metadata(self):
                 if pos != -1:
                     tfs_parent_concepts_byte_offset[concept] = pos
                 else:
-                    raise ValueError(f"Expected block with metadata for concept [{concept}] were not found !")
+                    print(f"Instance of concept [{concept}] was not found !")
             print(tfs_parent_concepts_byte_offset)
 
             sequence = []  # decide I/O order in which metadata for childs of parent concepts will be read
@@ -140,6 +142,8 @@ def parse_and_normalize(self):
         if self.supported is True:
             print(f"Parsing via ThermoFisher-specific metadata...")
             self.get_metadata()
+            # for key in self.tmp["meta"].keys():
+            #     print(f"{key}")
         else:
             print(f"{self.file_path} is not a ThermoFisher-specific "
                   f"TIFF file that this parser can process !")
@@ -185,11 +189,14 @@ def process_event_data_em_data(self, template: dict) -> dict:
             #  0 is y while 1 is x for 2d, 0 is z, 1 is y, while 2 is x for 3d
             template[f"{trg}/intensity/@long_name"] = f"Signal"
 
-            sxy = {"x": self.tmp["meta"]["EScan/PixelWidth"],
-                   "y": self.tmp["meta"]["EScan/PixelHeight"]}
-            shp = np.shape(np.array(fp))
-            nxy = {"x": shp[1], "y": shp[0]}
+            sxy = {"x": 1., "y": 1.}
             scan_unit = {"x": "m", "y": "m"}  # assuming FEI reports SI units
+            # we may face the CCD overview camera for the chamber for which there might not be a calibration!
+            if ("EScan/PixelWidth" in self.tmp["meta"].keys()) and ("EScan/PixelHeight" in self.tmp["meta"].keys()):
+                sxy = {"x": self.tmp["meta"]["EScan/PixelWidth"],
+                       "y": self.tmp["meta"]["EScan/PixelHeight"]}
+                scan_unit = {"x": "px", "y": "px"}
+            nxy = {"x": np.shape(np.array(fp))[1], "y": np.shape(np.array(fp))[0]}
             # TODO::be careful we assume here a very specific coordinate system
             # however the TIFF file gives no clue, TIFF just documents in which order
             # it arranges a bunch of pixels that have stream in into a n-d tiling
@@ -219,9 +226,14 @@ def process_event_data_em_metadata(self, template: dict) -> dict:
         # contextualization to understand how the image relates to the EM session
         print(f"Mapping some of the TFS/FEI metadata concepts onto NeXus concepts")
         identifier = [self.entry_id, self.event_id, 1]
-        for nx_path, modifier in TiffTfsToNeXusCfg.items():
-            if (nx_path != "IGNORE") and (nx_path != "UNCLEAR"):
-                trg = variadic_path_to_specific_path(nx_path, identifier)
-                template[trg] = get_nexus_value(modifier, self.tmp["meta"])
-                # print(f"nx_path: {nx_path}, trg: {trg}, tfs_concept: {template[trg]}\n")
+        for tpl in TIFF_TFS_TO_NEXUS_CFG:
+            if isinstance(tpl, tuple):
+                trg = variadic_path_to_specific_path(tpl[0], identifier)
+                if len(tpl) == 2:
+                    template[trg] = tpl[1]
+                if len(tpl) == 3:
+                    # nxpath, modifier, value to load from and eventually to be modified
+                    retval = get_nexus_value(tpl[1], tpl[2], self.tmp["meta"])
+                    if retval is not None:
+                        template[trg] = retval
         return template
diff --git a/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs_cfg.py b/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs_cfg.py
index 2ff14e344..68c4d5352 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs_cfg.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs_cfg.py
@@ -17,397 +17,35 @@
 #
 """Configuration of the image_tiff_tfs subparser."""
 
-from typing import List
-
-
-TiffTfsConcepts = ["User/Date",
-                   "User/Time",
-                   "User/User",
-                   "User/UserText",
-                   "User/UserTextUnicode",
-                   "System/Type",
-                   "System/Dnumber",
-                   "System/Software",
-                   "System/BuildNr",
-                   "System/Source",
-                   "System/Column",
-                   "System/FinalLens",
-                   "System/Chamber",
-                   "System/Stage",
-                   "System/Pump",
-                   "System/ESEM",
-                   "System/Aperture",
-                   "System/Scan",
-                   "System/Acq",
-                   "System/EucWD",
-                   "System/SystemType",
-                   "System/DisplayWidth",
-                   "System/DisplayHeight",
-                   "Beam/HV",
-                   "Beam/Spot",
-                   "Beam/StigmatorX",
-                   "Beam/StigmatorY",
-                   "Beam/BeamShiftX",
-                   "Beam/BeamShiftY",
-                   "Beam/ScanRotation",
-                   "Beam/ImageMode",
-                   "Beam/FineStageBias",
-                   "Beam/Beam",
-                   "Beam/Scan",
-                   "EBeam/Source",
-                   "EBeam/ColumnType",
-                   "EBeam/FinalLens",
-                   "EBeam/Acq",
-                   "EBeam/Aperture",
-                   "EBeam/ApertureDiameter",
-                   "EBeam/HV",
-                   "EBeam/HFW",
-                   "EBeam/VFW",
-                   "EBeam/WD",
-                   "EBeam/BeamCurrent",
-                   "EBeam/TiltCorrectionIsOn",
-                   "EBeam/DynamicFocusIsOn",
-                   "EBeam/DynamicWDIsOn",
-                   "EBeam/ScanRotation",
-                   "EBeam/LensMode",
-                   "EBeam/LensModeA",
-                   "EBeam/ATubeVoltage",
-                   "EBeam/UseCase",
-                   "EBeam/SemOpticalMode",
-                   "EBeam/ImageMode",
-                   "EBeam/SourceTiltX",
-                   "EBeam/SourceTiltY",
-                   "EBeam/StageX",
-                   "EBeam/StageY",
-                   "EBeam/StageZ",
-                   "EBeam/StageR",
-                   "EBeam/StageTa",
-                   "EBeam/StageTb",
-                   "EBeam/StigmatorX",
-                   "EBeam/StigmatorY",
-                   "EBeam/BeamShiftX",
-                   "EBeam/BeamShiftY",
-                   "EBeam/EucWD",
-                   "EBeam/EmissionCurrent",
-                   "EBeam/TiltCorrectionAngle",
-                   "EBeam/PreTilt",
-                   "EBeam/WehneltBias",
-                   "EBeam/BeamMode",
-                   "EBeam/MagnificationCorrection",
-                   "GIS/Number",
-                   "Scan/InternalScan",
-                   "Scan/Dwelltime",
-                   "Scan/PixelWidth",
-                   "Scan/PixelHeight",
-                   "Scan/HorFieldsize",
-                   "Scan/VerFieldsize",
-                   "Scan/Average",
-                   "Scan/Integrate",
-                   "Scan/FrameTime",
-                   "EScan/Scan",
-                   "EScan/InternalScan",
-                   "EScan/Dwell",
-                   "EScan/PixelWidth",
-                   "EScan/PixelHeight",
-                   "EScan/HorFieldsize",
-                   "EScan/VerFieldsize",
-                   "EScan/FrameTime",
-                   "EScan/LineTime",
-                   "EScan/Mainslock",
-                   "EScan/LineIntegration",
-                   "EScan/ScanInterlacing",
-                   "Stage/StageX",
-                   "Stage/StageY",
-                   "Stage/StageZ",
-                   "Stage/StageR",
-                   "Stage/StageT",
-                   "Stage/StageTb",
-                   "Stage/SpecTilt",
-                   "Stage/WorkingDistance",
-                   "Stage/ActiveStage",
-                   "Image/DigitalContrast",
-                   "Image/DigitalBrightness",
-                   "Image/DigitalGamma",
-                   "Image/Average",
-                   "Image/Integrate",
-                   "Image/ResolutionX",
-                   "Image/ResolutionY",
-                   "Image/DriftCorrected",
-                   "Image/ZoomFactor",
-                   "Image/ZoomPanX",
-                   "Image/ZoomPanY",
-                   "Image/MagCanvasRealWidth",
-                   "Image/MagnificationMode",
-                   "Image/ScreenMagCanvasRealWidth",
-                   "Image/ScreenMagnificationMode",
-                   "Image/PostProcessing",
-                   "Image/Transformation",
-                   "Vacuum/ChPressure",
-                   "Vacuum/Gas",
-                   "Vacuum/UserMode",
-                   "Vacuum/Humidity",
-                   "Specimen/Temperature",
-                   "Specimen/SpecimenCurrent",
-                   "Detectors/Number",
-                   "Detectors/Name",
-                   "Detectors/Mode",
-                   "T2/Contrast",
-                   "T2/Brightness",
-                   "T2/Signal",
-                   "T2/ContrastDB",
-                   "T2/BrightnessDB",
-                   "T2/Setting",
-                   "T2/MinimumDwellTime",
-                   "Accessories/Number",
-                   "EBeamDeceleration/ModeOn",
-                   "EBeamDeceleration/LandingEnergy",
-                   "EBeamDeceleration/ImmersionRatio",
-                   "EBeamDeceleration/StageBias",
-                   "CompoundLensFilter/IsOn",
-                   "CompoundLensFilter/ThresholdEnergy",
-                   "PrivateFei/BitShift",
-                   "PrivateFei/DataBarSelected",
-                   "PrivateFei/DataBarAvailable",
-                   "PrivateFei/TimeOfCreation",
-                   "PrivateFei/DatabarHeight",
-                   "HiResIllumination/BrightFieldIsOn",
-                   "HiResIllumination/BrightFieldValue",
-                   "HiResIllumination/DarkFieldIsOn",
-                   "HiResIllumination/DarkFieldValue",
-                   "EasyLift/Rotation",
-                   "HotStageMEMS/HeatingCurrent",
-                   "HotStageMEMS/HeatingVoltage",
-                   "HotStageMEMS/TargetTemperature",
-                   "HotStageMEMS/ActualTemperature",
-                   "HotStageMEMS/HeatingPower",
-                   "HotStageMEMS/SampleBias",
-                   "HotStageMEMS/SampleResistance",
-                   "HotStage/TargetTemperature",
-                   "HotStage/ActualTemperature",
-                   "HotStage/SampleBias",
-                   "HotStage/ShieldBias",
-                   "HotStageHVHS/TargetTemperature",
-                   "HotStageHVHS/ActualTemperature",
-                   "HotStageHVHS/SampleBias",
-                   "HotStageHVHS/ShieldBias",
-                   "ColdStage/TargetTemperature",
-                   "ColdStage/ActualTemperature",
-                   "ColdStage/Humidity",
-                   "ColdStage/SampleBias"]
-
-# there is more to know and understand than just knowing TFS/FEI uses
-# the above-mentioned concepts in their taxonomy:
-# take the example of System/Source for which an example file (instance) has the
-# value "FEG"
-# similar like in NeXus "System/Source" labels a concept for which (assumption!) there
-# is a controlled enumeration of symbols possible (as the example shows "FEG" is one such
-# allowed symbol of the enumeration.
-# The key issue is that the symbols for the leaf (here "FEG") means nothing eventually
-# when one has another semantic world-view, like in NOMAD metainfo or NeXus
-# (only us) humans understand that what TFS/FEI likely means with the symbol
-# "FEG" is exactly the same as what we mean in NeXus when setting emitter_type of
-# NXebeam_column to "cold_cathode_field_emitter"
-# world with the controlled enumeration value "other" because we do not know
-# if FEG means really a filament or a cold_cathode_field_emitter
 
 TfsToNexusConceptMapping = {"System/Source/FEG": "cold_field_cathode_emitter"}
 
 
-def get_fei_parent_concepts() -> List:
-    """Get list of unique FEI parent concepts."""
-    parent_concepts = set()
-    for entry in TiffTfsConcepts:
-        if isinstance(entry, str) and entry.count("/") == 1:
-            parent_concepts.add(entry.split("/")[0])
-    return list(parent_concepts)
-
-
-def get_fei_childs(concept: str) -> List:
-    """Get all children of FEI parent concept."""
-    child_concepts = set()
-    for entry in TiffTfsConcepts:
-        if isinstance(entry, str) and entry.count("/") == 1:
-            if entry.startswith(f"{concept}/") is True:
-                child_concepts.add(entry.split("/")[1])
-    return list(child_concepts)
-
-
 # "/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]/start_time"
-TiffTfsToNeXusCfg = {"IGNORE": {"fun": "ikz_berlin_apreo_iso8601", "terms": ["User/Date", "User/Time"]},
-                     "IGNORE": {"fun": "load_from", "terms": "User/User"},
-                     "IGNORE": {"fun": "load_from", "terms": "User/UserText"},
-                     "IGNORE": {"fun": "load_from", "terms": "User/UserTextUnicode"},
-                     "IGNORE": {"fun": "load_from", "terms": "System/Type"},
-                     "IGNORE": {"fun": "load_from", "terms": "System/Dnumber"},
-                     "IGNORE": {"fun": "load_from", "terms": "System/Software"},
-                     "/ENTRY[entry*]/measurement/em_lab/FABRICATION[fabrication]/identifier": {"fun": "load_from", "terms": "System/BuildNr"},
-                     "/ENTRY[entry*]/measurement/em_lab/EBEAM_COLUMN[ebeam_column]/electron_source/emitter_type": {"fun": "tfs_to_nexus", "terms": "System/Source"},
-                     "/ENTRY[entry*]/measurement/em_lab/FABRICATION[fabrication]/vendor": "FEI",
-                     "IGNORE": {"fun": "load_from", "terms": "System/FinalLens"},
-                     "IGNORE": {"fun": "load_from", "terms": "System/Chamber"},
-                     "IGNORE": {"fun": "load_from", "terms": "System/Stage"},
-                     "IGNORE": {"fun": "load_from", "terms": "System/Pump"},
-                     "IGNORE": {"fun": "load_from", "terms": "System/ESEM"},
-                     "IGNORE": {"fun": "load_from", "terms": "System/Aperture"},
-                     "IGNORE": {"fun": "load_from", "terms": "System/Scan"},
-                     "IGNORE": {"fun": "load_from", "terms": "System/Acq"},
-                     "IGNORE": {"fun": "load_from", "terms": "System/EucWD"},
-                     "/ENTRY[entry*]/measurement/em_lab/FABRICATION[fabrication]/model": {"fun": "load_from", "terms": "System/SystemType"},
-                     "IGNORE": {"fun": "load_from", "terms": "System/DisplayWidth"},
-                     "IGNORE": {"fun": "load_from", "terms": "System/DisplayHeight"},
-                     "IGNORE": {"fun": "load_from", "terms": "Beam/HV"},
-                     "IGNORE": {"fun": "load_from", "terms": "Beam/Spot"},
-                     "IGNORE": {"fun": "load_from", "terms": "Beam/StigmatorX"},
-                     "IGNORE": {"fun": "load_from", "terms": "Beam/StigmatorY"},
-                     "IGNORE": {"fun": "load_from", "terms": "Beam/BeamShiftX"},
-                     "IGNORE": {"fun": "load_from", "terms": "Beam/BeamShiftY"},
-                     "IGNORE": {"fun": "load_from", "terms": "Beam/ScanRotation"},
-                     "IGNORE": {"fun": "load_from", "terms": "Beam/ImageMode"},
-                     "IGNORE": {"fun": "load_from", "terms": "Beam/FineStageBias"},
-                     "IGNORE": {"fun": "load_from", "terms": "Beam/Beam"},
-                     "IGNORE": {"fun": "load_from", "terms": "Beam/Scan"},
-                     "IGNORE": {"fun": "load_from", "terms": "EBeam/Source"},
-                     "IGNORE": {"fun": "load_from", "terms": "EBeam/ColumnType"},
-                     "IGNORE": {"fun": "load_from", "terms": "EBeam/FinalLens"},
-                     "IGNORE": {"fun": "load_from", "terms": "EBeam/Acq"},
-                     "/ENTRY[entry*]/measurement/em_lab/EBEAM_COLUMN[ebeam_column]/APERTURE_EM[aperture_em*]/description": {"fun": "load_from", "terms": "EBeam/Aperture"},
-                     "/ENTRY[entry*]/measurement/em_lab/EBEAM_COLUMN[ebeam_column]/APERTURE_EM[aperture_em*]/value": {"fun": "load_from", "terms": "EBeam/ApertureDiameter"},
-                     "/ENTRY[entry*]/measurement/em_lab/EBEAM_COLUMN[ebeam_column]/APERTURE_EM[aperture_em*]/value/@units": "m",
-                     "/ENTRY[entry*]/measurement/em_lab/EBEAM_COLUMN[ebeam_column]/electron_source/voltage": {"fun": "load_from", "terms": "EBeam/HV"},
-                     "/ENTRY[entry*]/measurement/em_lab/EBEAM_COLUMN[ebeam_column]/electron_source/voltage/@units": "V",
-                     "IGNORE": {"fun": "load_from", "terms": "EBeam/HFW"},
-                     "IGNORE": {"fun": "load_from", "terms": "EBeam/VFW"},
-                     "/ENTRY[entry*]/measurement/em_lab/OPTICAL_SYSTEM_EM[optical_system_em]/working_distance": {"fun": "load_from", "terms": "EBeam/WD"},
-                     "/ENTRY[entry*]/measurement/em_lab/OPTICAL_SYSTEM_EM[optical_system_em]/working_distance/@units": "m",
-                     "/ENTRY[entry*]/measurement/em_lab/OPTICAL_SYSTEM_EM[optical_system_em]/beam_current": {"fun": "load_from", "terms": "EBeam/WD"},
-                     "/ENTRY[entry*]/measurement/em_lab/OPTICAL_SYSTEM_EM[optical_system_em]/beam_current/@units": "A",
-                     "IGNORE": {"fun": "load_from", "terms": "EBeam/TiltCorrectionIsOn"},
-                     "IGNORE": {"fun": "load_from", "terms": "EBeam/DynamicFocusIsOn"},
-                     "IGNORE": {"fun": "load_from", "terms": "EBeam/DynamicWDIsOn"},
-                     "IGNORE": {"fun": "load_from", "terms": "EBeam/ScanRotation"},
-                     "IGNORE": {"fun": "load_from", "terms": "EBeam/LensMode"},
-                     "IGNORE": {"fun": "load_from", "terms": "EBeam/LensModeA"},
-                     "IGNORE": {"fun": "load_from", "terms": "EBeam/ATubeVoltage"},
-                     "IGNORE": {"fun": "load_from", "terms": "EBeam/UseCase"},
-                     "IGNORE": {"fun": "load_from", "terms": "EBeam/SemOpticalMode"},
-                     "IGNORE": {"fun": "load_from", "terms": "EBeam/ImageMode"},
-                     "IGNORE": {"fun": "load_from", "terms": "EBeam/SourceTiltX"},
-                     "IGNORE": {"fun": "load_from", "terms": "EBeam/SourceTiltY"},
-                     "IGNORE": {"fun": "load_from", "terms": "EBeam/StageX"},
-                     "IGNORE": {"fun": "load_from", "terms": "EBeam/StageY"},
-                     "IGNORE": {"fun": "load_from", "terms": "EBeam/StageZ"},
-                     "IGNORE": {"fun": "load_from", "terms": "EBeam/StageR"},
-                     "IGNORE": {"fun": "load_from", "terms": "EBeam/StageTa"},
-                     "IGNORE": {"fun": "load_from", "terms": "EBeam/StageTb"},
-                     "IGNORE": {"fun": "load_from", "terms": "EBeam/StigmatorX"},
-                     "IGNORE": {"fun": "load_from", "terms": "EBeam/StigmatorY"},
-                     "IGNORE": {"fun": "load_from", "terms": "EBeam/BeamShiftX"},
-                     "IGNORE": {"fun": "load_from", "terms": "EBeam/BeamShiftY"},
-                     "IGNORE": {"fun": "load_from", "terms": "EBeam/EucWD"},
-                     "IGNORE": {"fun": "load_from", "terms": "EBeam/EmissionCurrent"},
-                     "IGNORE": {"fun": "load_from", "terms": "EBeam/TiltCorrectionAngle"},
-                     "IGNORE": {"fun": "load_from", "terms": "EBeam/PreTilt"},
-                     "IGNORE": {"fun": "load_from", "terms": "EBeam/WehneltBias"},
-                     "IGNORE": {"fun": "load_from", "terms": "EBeam/BeamMode"},
-                     "IGNORE": {"fun": "load_from", "terms": "EBeam/MagnificationCorrection"},
-                     "IGNORE": {"fun": "load_from", "terms": "GIS/Number"},
-                     "IGNORE": {"fun": "load_from", "terms": "Scan/InternalScan"},
-                     "IGNORE": {"fun": "load_from", "terms": "Scan/Dwelltime"},
-                     "IGNORE": {"fun": "load_from", "terms": "Scan/PixelWidth"},
-                     "IGNORE": {"fun": "load_from", "terms": "Scan/PixelHeight"},
-                     "IGNORE": {"fun": "load_from", "terms": "Scan/HorFieldsize"},
-                     "IGNORE": {"fun": "load_from", "terms": "Scan/VerFieldsize"},
-                     "IGNORE": {"fun": "load_from", "terms": "Scan/Average"},
-                     "IGNORE": {"fun": "load_from", "terms": "Scan/Integrate"},
-                     "IGNORE": {"fun": "load_from", "terms": "Scan/FrameTime"},
-                     "IGNORE": {"fun": "load_from", "terms": "EScan/Scan"},
-                     "IGNORE": {"fun": "load_from", "terms": "EScan/InternalScan"},
-                     "IGNORE": {"fun": "load_from", "terms": "EScan/Dwell"},
-                     "IGNORE": {"fun": "load_from", "terms": "EScan/PixelWidth"},
-                     "IGNORE": {"fun": "load_from", "terms": "EScan/PixelHeight"},
-                     "IGNORE": {"fun": "load_from", "terms": "EScan/HorFieldsize"},
-                     "IGNORE": {"fun": "load_from", "terms": "EScan/VerFieldsize"},
-                     "IGNORE": {"fun": "load_from", "terms": "EScan/FrameTime"},
-                     "IGNORE": {"fun": "load_from", "terms": "EScan/LineTime"},
-                     "IGNORE": {"fun": "load_from", "terms": "EScan/Mainslock"},
-                     "IGNORE": {"fun": "load_from", "terms": "EScan/LineIntegration"},
-                     "IGNORE": {"fun": "load_from", "terms": "EScan/ScanInterlacing"},
-                     "IGNORE": {"fun": "load_from", "terms": "StageX/StageX"},
-                     "IGNORE": {"fun": "load_from", "terms": "StageX/StageY"},
-                     "IGNORE": {"fun": "load_from", "terms": "StageX/StageZ"},
-                     "IGNORE": {"fun": "load_from", "terms": "StageX/StageR"},
-                     "IGNORE": {"fun": "load_from", "terms": "StageX/StageT"},
-                     "IGNORE": {"fun": "load_from", "terms": "StageX/StageTb"},
-                     "IGNORE": {"fun": "load_from", "terms": "StageX/SpecTilt"},
-                     "IGNORE": {"fun": "load_from", "terms": "StageX/WorkingDistance"},
-                     "IGNORE": {"fun": "load_from", "terms": "StageX/ActiveStage"},
-                     "IGNORE": {"fun": "load_from", "terms": "StageX/[Image]"},
-                     "IGNORE": {"fun": "load_from", "terms": "StageX/DigitalContrast"},
-                     "IGNORE": {"fun": "load_from", "terms": "StageX/DigitalBrightness"},
-                     "IGNORE": {"fun": "load_from", "terms": "StageX/DigitalGamma"},
-                     "IGNORE": {"fun": "load_from", "terms": "StageX/Average"},
-                     "IGNORE": {"fun": "load_from", "terms": "StageX/Integrate"},
-                     "IGNORE": {"fun": "load_from", "terms": "StageX/ResolutionX"},
-                     "IGNORE": {"fun": "load_from", "terms": "StageX/ResolutionY"},
-                     "IGNORE": {"fun": "load_from", "terms": "StageX/DriftCorrected"},
-                     "IGNORE": {"fun": "load_from", "terms": "StageX/ZoomFactor"},
-                     "IGNORE": {"fun": "load_from", "terms": "StageX/ZoomPanX"},
-                     "IGNORE": {"fun": "load_from", "terms": "StageX/ZoomPanY"},
-                     "IGNORE": {"fun": "load_from", "terms": "StageX/MagCanvasRealWidth"},
-                     "IGNORE": {"fun": "load_from", "terms": "StageX/MagnificationMode"},
-                     "IGNORE": {"fun": "load_from", "terms": "StageX/ScreenMagCanvasRealWidth"},
-                     "IGNORE": {"fun": "load_from", "terms": "StageX/ScreenMagnificationMode"},
-                     "IGNORE": {"fun": "load_from", "terms": "StageX/PostProcessing"},
-                     "IGNORE": {"fun": "load_from", "terms": "StageX/Transformation"},
-                     "IGNORE": {"fun": "load_from", "terms": "Vacuum/ChPressure"},
-                     "IGNORE": {"fun": "load_from", "terms": "Vacuum/Gas"},
-                     "IGNORE": {"fun": "load_from", "terms": "Vacuum/UserMode"},
-                     "IGNORE": {"fun": "load_from", "terms": "Vacuum/Humidity"},
-                     "IGNORE": {"fun": "load_from", "terms": "Specimen/Temperature"},
-                     "IGNORE": {"fun": "load_from", "terms": "Specimen/SpecimenCurrent"},
-                     "IGNORE": {"fun": "load_from", "terms": "Detectors/Number"},
-                     "/ENTRY[entry*]/measurement/em_lab/DETECTOR[detector*]/local_name": {"fun": "load_from", "terms": "Detectors/Name"},
-                     "IGNORE": {"fun": "load_from", "terms": "Detectors/Mode"},
-                     "IGNORE": {"fun": "load_from", "terms": "T2/Contrast"},
-                     "IGNORE": {"fun": "load_from", "terms": "T2/Brightness"},
-                     "/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]/event_type": {"fun": "load_from_lower_case", "terms": "T2/Signal"},
-                     "IGNORE": {"fun": "load_from", "terms": "T2/ContrastDB"},
-                     "IGNORE": {"fun": "load_from", "terms": "T2/BrightnessDB"},
-                     "IGNORE": {"fun": "load_from", "terms": "T2/Setting"},
-                     "IGNORE": {"fun": "load_from", "terms": "T2/MinimumDwellTime"},
-                     "IGNORE": {"fun": "load_from", "terms": "Accessories/Number"},
-                     "IGNORE": {"fun": "load_from", "terms": "EBeamDeceleration/ModeOn"},
-                     "IGNORE": {"fun": "load_from", "terms": "EBeamDeceleration/LandingEnergy"},
-                     "IGNORE": {"fun": "load_from", "terms": "EBeamDeceleration/ImmersionRatio"},
-                     "IGNORE": {"fun": "load_from", "terms": "EBeamDeceleration/StageBias"},
-                     "IGNORE": {"fun": "load_from", "terms": "CompoundLensFilter/IsOn"},
-                     "IGNORE": {"fun": "load_from", "terms": "CompoundLensFilter/ThresholdEnergy"},
-                     "IGNORE": {"fun": "load_from", "terms": "PrivateFei/BitShift"},
-                     "IGNORE": {"fun": "load_from", "terms": "PrivateFei/DataBarSelected"},
-                     "IGNORE": {"fun": "load_from", "terms": "PrivateFei/DataBarAvailable"},
-                     "IGNORE": {"fun": "load_from", "terms": "PrivateFei/TimeOfCreation"},
-                     "IGNORE": {"fun": "load_from", "terms": "PrivateFei/DatabarHeight"},
-                     "IGNORE": {"fun": "load_from", "terms": "HiResIllumination/BrightFieldIsOn"},
-                     "IGNORE": {"fun": "load_from", "terms": "HiResIllumination/BrightFieldValue"},
-                     "IGNORE": {"fun": "load_from", "terms": "HiResIllumination/DarkFieldIsOn"},
-                     "IGNORE": {"fun": "load_from", "terms": "HiResIllumination/DarkFieldValue"},
-                     "IGNORE": {"fun": "load_from", "terms": "EasyLift/Rotation"},
-                     "IGNORE": {"fun": "load_from", "terms": "HotStageMEMS/HeatingCurrent"},
-                     "IGNORE": {"fun": "load_from", "terms": "HotStageMEMS/HeatingVoltage"},
-                     "IGNORE": {"fun": "load_from", "terms": "HotStageMEMS/TargetTemperature"},
-                     "IGNORE": {"fun": "load_from", "terms": "HotStageMEMS/ActualTemperature"},
-                     "IGNORE": {"fun": "load_from", "terms": "HotStageMEMS/HeatingPower"},
-                     "IGNORE": {"fun": "load_from", "terms": "HotStageMEMS/SampleBias"},
-                     "IGNORE": {"fun": "load_from", "terms": "HotStageMEMS/SampleResistance"},
-                     "IGNORE": {"fun": "load_from", "terms": "HotStage/TargetTemperature"},
-                     "IGNORE": {"fun": "load_from", "terms": "HotStage/ActualTemperature"},
-                     "IGNORE": {"fun": "load_from", "terms": "HotStage/SampleBias"},
-                     "IGNORE": {"fun": "load_from", "terms": "HotStage/ShieldBias"},
-                     "IGNORE": {"fun": "load_from", "terms": "HotStageHVHS/TargetTemperature"},
-                     "IGNORE": {"fun": "load_from", "terms": "HotStageHVHS/ActualTemperature"},
-                     "IGNORE": {"fun": "load_from", "terms": "HotStageHVHS/SampleBias"},
-                     "IGNORE": {"fun": "load_from", "terms": "HotStageHVHS/ShieldBias"},
-                     "IGNORE": {"fun": "load_from", "terms": "ColdStage/TargetTemperature"},
-                     "IGNORE": {"fun": "load_from", "terms": "ColdStage/ActualTemperature"},
-                     "IGNORE": {"fun": "load_from", "terms": "ColdStage/Humidity"},
-                     "IGNORE": {"fun": "load_from", "terms": "ColdStage/SampleBias"}}
+TIFF_TFS_TO_NEXUS_CFG = [('/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]/em_lab/DETECTOR[detector*]/mode', 'load_from', 'Detectors/Mode'),
+                         ('/ENTRY[entry*]/measurement/em_lab/DETECTOR[detector*]/local_name', 'load_from', 'Detectors/Name'),
+                         ('/ENTRY[entry*]/measurement/em_lab/EBEAM_COLUMN[ebeam_column]/APERTURE_EM[aperture_em*]/description', 'load_from', 'EBeam/Aperture'),
+                         ('/ENTRY[entry*]/measurement/em_lab/EBEAM_COLUMN[ebeam_column]/APERTURE_EM[aperture_em*]/value', 'load_from', 'EBeam/ApertureDiameter'),
+                         ('/ENTRY[entry*]/measurement/em_lab/EBEAM_COLUMN[ebeam_column]/APERTURE_EM[aperture_em*]/value/@units', 'm'),
+                         ('/ENTRY[entry*]/measurement/em_lab/OPTICAL_SYSTEM_EM[optical_system_em]/beam_current', 'load_from', 'EBeam/BeamCurrent'),
+                         ('/ENTRY[entry*]/measurement/em_lab/OPTICAL_SYSTEM_EM[optical_system_em]/beam_current/@units', 'A'),
+                         ('/ENTRY[entry*]/measurement/em_lab/EBEAM_COLUMN[ebeam_column]/electron_source/voltage', 'load_from', 'EBeam/HV'),
+                         ('/ENTRY[entry*]/measurement/em_lab/EBEAM_COLUMN[ebeam_column]/electron_source/voltage/@units', 'V'),
+                         ('/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]/em_lab/STAGE_LAB[stage_lab]/tilt_1', 'load_from_rad_to_deg', 'EBeam/StageTa'),
+                         ('/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]/em_lab/STAGE_LAB[stage_lab]/tilt_1/@units', 'deg'),
+                         ('/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]/em_lab/STAGE_LAB[stage_lab]/tilt_2', 'load_from_rad_to_deg', 'EBeam/StageTb'),
+                         ('/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]/em_lab/STAGE_LAB[stage_lab]/tilt_2/@units', 'deg'),
+                         ('/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]/em_lab/EBEAM_COLUMN[ebeam_column]/operation_mode', 'load_from', 'EBeam/UseCase'),
+                         ('/ENTRY[entry*]/measurement/em_lab/OPTICAL_SYSTEM_EM[optical_system_em]/working_distance', 'load_from', 'EBeam/WD'),
+                         ('/ENTRY[entry*]/measurement/em_lab/OPTICAL_SYSTEM_EM[optical_system_em]/working_distance/@units', 'm'),
+                         ('/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]/event_type', 'load_from_lower_case', 'ETD/Signal'),
+                         ('/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]/em_lab/SCANBOX_EM[scanbox_em]/dwell_time', 'load_from', 'Scan/Dwelltime'),
+                         ('/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]/em_lab/SCANBOX_EM[scanbox_em]/dwell_time/@units', 's'),
+                         ('/ENTRY[entry*]/measurement/em_lab/FABRICATION[fabrication]/identifier', 'load_from', 'System/BuildNr'),
+                         ('/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]/em_lab/SCANBOX_EM[scanbox_em]/scan_schema', 'load_from', 'System/Scan'),
+                         ('/ENTRY[entry*]/measurement/em_lab/EBEAM_COLUMN[ebeam_column]/electron_source/emitter_type', 'load_from', 'System/Source'),
+                         ('/ENTRY[entry*]/measurement/em_lab/FABRICATION[fabrication]/vendor', 'FEI'),
+                         ('/ENTRY[entry*]/measurement/em_lab/FABRICATION[fabrication]/model', 'load_from', 'System/SystemType'),
+                         ('/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]/event_type', 'load_from_lower_case', 'T1/Signal'),
+                         ('/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]/event_type', 'load_from_lower_case', 'T2/Signal'),
+                         ('/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]/event_type', 'load_from_lower_case', 'T3/Signal')]
diff --git a/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs_concepts.py b/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs_concepts.py
new file mode 100644
index 000000000..8e7f2d69d
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs_concepts.py
@@ -0,0 +1,301 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Configuration of the image_tiff_tfs subparser."""
+
+from typing import List
+
+
+# this example exemplifies the situation for the TFS/FEI SEM Apreo from the IKZ of Prof. Martin Albrecht
+# thanks to Robert Kernke it was clarified the microscope has several detectors and imaging modes
+# these imaging modes control the specific TFS/FEI concept instances stored in the respective TIFF file
+# we here use a glossary of all concepts which we were able to parse out from an example image
+# taken for each detector and imaging mode
+# we then assume that one can work with the joint set of these concepts
+
+TIFF_TFS_PARENT_CONCEPTS = ["Accessories", 
+                         "Beam",
+                         "ColdStage",
+                         "CompoundLensFilter",
+                         "Detectors",
+                         "EBeam",
+                         "EBeamDeceleration",
+                         "EScan",
+                         "ETD",
+                         "EasyLift",
+                         "GIS",
+                         "HiResIllumination",
+                         "HotStage",
+                         "HotStageHVHS",
+                         "HotStageMEMS",
+                         "IRBeam",
+                         "Image",
+                         "Nav-Cam",
+                         "PrivateFei",
+                         "Scan",
+                         "Specimen",
+                         "Stage",
+                         "System",
+                         "T1",
+                         "T2",
+                         "T3",
+                         "User",
+                         "Vacuum"]
+
+TIFF_TFS_ALL_CONCEPTS = ["Accessories/Number",
+                      "Beam/Beam",
+                      "Beam/BeamShiftX",
+                      "Beam/BeamShiftY",
+                      "Beam/FineStageBias",
+                      "Beam/HV",
+                      "Beam/ImageMode",
+                      "Beam/Scan",
+                      "Beam/ScanRotation",
+                      "Beam/Spot",
+                      "Beam/StigmatorX",
+                      "Beam/StigmatorY",
+                      "ColdStage/ActualTemperature",
+                      "ColdStage/Humidity",
+                      "ColdStage/SampleBias",
+                      "ColdStage/TargetTemperature",
+                      "CompoundLensFilter/IsOn",
+                      "CompoundLensFilter/ThresholdEnergy",
+                      "Detectors/Mode",
+                      "Detectors/Name",
+                      "Detectors/Number",
+                      "EasyLift/Rotation",
+                      "EBeam/Acq",
+                      "EBeam/Aperture",
+                      "EBeam/ApertureDiameter",
+                      "EBeam/ATubeVoltage",
+                      "EBeam/BeamCurrent",
+                      "EBeam/BeamMode",
+                      "EBeam/BeamShiftX",
+                      "EBeam/BeamShiftY",
+                      "EBeam/ColumnType",
+                      "EBeam/DynamicFocusIsOn",
+                      "EBeam/DynamicWDIsOn",
+                      "EBeam/EmissionCurrent",
+                      "EBeam/EucWD",
+                      "EBeam/FinalLens",
+                      "EBeam/HFW",
+                      "EBeam/HV",
+                      "EBeam/ImageMode",
+                      "EBeam/LensMode",
+                      "EBeam/LensModeA",
+                      "EBeam/MagnificationCorrection",
+                      "EBeam/PreTilt",
+                      "EBeam/ScanRotation",
+                      "EBeam/SemOpticalMode",
+                      "EBeam/Source",
+                      "EBeam/SourceTiltX",
+                      "EBeam/SourceTiltY",
+                      "EBeam/StageR",
+                      "EBeam/StageTa",
+                      "EBeam/StageTb",
+                      "EBeam/StageX",
+                      "EBeam/StageY",
+                      "EBeam/StageZ",
+                      "EBeam/StigmatorX",
+                      "EBeam/StigmatorY",
+                      "EBeam/TiltCorrectionAngle",
+                      "EBeam/TiltCorrectionIsOn",
+                      "EBeam/UseCase",
+                      "EBeam/VFW",
+                      "EBeam/WD",
+                      "EBeam/WehneltBias",
+                      "EBeamDeceleration/ImmersionRatio",
+                      "EBeamDeceleration/LandingEnergy",
+                      "EBeamDeceleration/ModeOn",
+                      "EBeamDeceleration/StageBias",
+                      "EScan/Dwell",
+                      "EScan/FrameTime",
+                      "EScan/HorFieldsize",
+                      "EScan/InternalScan",
+                      "EScan/LineIntegration",
+                      "EScan/LineTime",
+                      "EScan/Mainslock",
+                      "EScan/PixelHeight",
+                      "EScan/PixelWidth",
+                      "EScan/Scan",
+                      "EScan/ScanInterlacing",
+                      "EScan/VerFieldsize",
+                      "ETD/Brightness",
+                      "ETD/BrightnessDB",
+                      "ETD/Contrast",
+                      "ETD/ContrastDB",
+                      "ETD/Grid",
+                      "ETD/MinimumDwellTime",
+                      "ETD/Mix",
+                      "ETD/Setting",
+                      "ETD/Signal",
+                      "GIS/Number",
+                      "HiResIllumination/BrightFieldIsOn",
+                      "HiResIllumination/BrightFieldValue",
+                      "HiResIllumination/DarkFieldIsOn",
+                      "HiResIllumination/DarkFieldValue",
+                      "HotStage/ActualTemperature",
+                      "HotStage/SampleBias",
+                      "HotStage/ShieldBias",
+                      "HotStage/TargetTemperature",
+                      "HotStageHVHS/ActualTemperature",
+                      "HotStageHVHS/SampleBias",
+                      "HotStageHVHS/ShieldBias",
+                      "HotStageHVHS/TargetTemperature",
+                      "HotStageMEMS/ActualTemperature",
+                      "HotStageMEMS/HeatingCurrent",
+                      "HotStageMEMS/HeatingPower",
+                      "HotStageMEMS/HeatingVoltage",
+                      "HotStageMEMS/SampleBias",
+                      "HotStageMEMS/SampleResistance",
+                      "HotStageMEMS/TargetTemperature",
+                      "Image/Average",
+                      "Image/DigitalBrightness",
+                      "Image/DigitalContrast",
+                      "Image/DigitalGamma",
+                      "Image/DriftCorrected",
+                      "Image/Integrate",
+                      "Image/MagCanvasRealWidth",
+                      "Image/MagnificationMode",
+                      "Image/PostProcessing",
+                      "Image/ResolutionX",
+                      "Image/ResolutionY",
+                      "Image/ScreenMagCanvasRealWidth",
+                      "Image/ScreenMagnificationMode",
+                      "Image/Transformation",
+                      "Image/ZoomFactor",
+                      "Image/ZoomPanX",
+                      "Image/ZoomPanY",
+                      "IRBeam/HFW",
+                      "IRBeam/n",
+                      "IRBeam/ScanRotation",
+                      "IRBeam/SiDepth",
+                      "IRBeam/StageR",
+                      "IRBeam/StageTa",
+                      "IRBeam/StageTb",
+                      "IRBeam/StageX",
+                      "IRBeam/StageY",
+                      "IRBeam/StageZ",
+                      "IRBeam/VFW",
+                      "IRBeam/WD",
+                      "PrivateFei/BitShift",
+                      "PrivateFei/DataBarAvailable",
+                      "PrivateFei/DatabarHeight",
+                      "PrivateFei/DataBarSelected",
+                      "PrivateFei/TimeOfCreation",
+                      "Scan/Average",
+                      "Scan/Dwelltime",
+                      "Scan/FrameTime",
+                      "Scan/HorFieldsize",
+                      "Scan/Integrate",
+                      "Scan/InternalScan",
+                      "Scan/PixelHeight",
+                      "Scan/PixelWidth",
+                      "Scan/VerFieldsize",
+                      "Specimen/SpecimenCurrent",
+                      "Specimen/Temperature",
+                      "Stage/ActiveStage",
+                      "Stage/SpecTilt",
+                      "Stage/StageR",
+                      "Stage/StageT",
+                      "Stage/StageTb",
+                      "Stage/StageX",
+                      "Stage/StageY",
+                      "Stage/StageZ",
+                      "Stage/WorkingDistance",
+                      "System/Acq",
+                      "System/Aperture",
+                      "System/BuildNr",
+                      "System/Chamber",
+                      "System/Column",
+                      "System/DisplayHeight",
+                      "System/DisplayWidth",
+                      "System/Dnumber",
+                      "System/ESEM",
+                      "System/EucWD",
+                      "System/FinalLens",
+                      "System/Pump",
+                      "System/Scan",
+                      "System/Software",
+                      "System/Source",
+                      "System/Stage",
+                      "System/SystemType",
+                      "System/Type",
+                      "T1/Brightness",
+                      "T1/BrightnessDB",
+                      "T1/Contrast",
+                      "T1/ContrastDB",
+                      "T1/MinimumDwellTime",
+                      "T1/Setting",
+                      "T1/Signal",
+                      "T2/Brightness",
+                      "T2/BrightnessDB",
+                      "T2/Contrast",
+                      "T2/ContrastDB",
+                      "T2/MinimumDwellTime",
+                      "T2/Setting",
+                      "T2/Signal",
+                      "T3/Brightness",
+                      "T3/BrightnessDB",
+                      "T3/Contrast",
+                      "T3/ContrastDB",
+                      "T3/MinimumDwellTime",
+                      "T3/Signal",
+                      "User/Date",
+                      "User/Time",
+                      "User/User",
+                      "User/UserText",
+                      "User/UserTextUnicode",
+                      "Vacuum/ChPressure",
+                      "Vacuum/Gas",
+                      "Vacuum/Humidity",
+                      "Vacuum/UserMode"]
+
+# there is more to know and understand than just knowing TFS/FEI uses
+# the above-mentioned concepts in their taxonomy:
+# take the example of System/Source for which an example file (instance) has the
+# value "FEG"
+# similar like in NeXus "System/Source" labels a concept for which (assumption!) there
+# is a controlled enumeration of symbols possible (as the example shows "FEG" is one such
+# allowed symbol of the enumeration.
+# The key issue is that the symbols for the leaf (here "FEG") means nothing eventually
+# when one has another semantic world-view, like in NOMAD metainfo or NeXus
+# (only us) humans understand that what TFS/FEI likely means with the symbol
+# "FEG" is exactly the same as what we mean in NeXus when setting emitter_type of
+# NXebeam_column to "cold_cathode_field_emitter"
+# world with the controlled enumeration value "other" because we do not know
+# if FEG means really a filament or a cold_cathode_field_emitter
+
+
+def get_fei_parent_concepts() -> List:
+    """Get list of unique FEI parent concepts."""
+    return TIFF_TFS_PARENT_CONCEPTS
+    # parent_concepts = set()
+    # for entry in TiffTfsConcepts:
+    #     if isinstance(entry, str) and entry.count("/") == 1:
+    #         parent_concepts.add(entry.split("/")[0])
+    # return list(parent_concepts)
+
+
+def get_fei_childs(parent_concept: str) -> List:
+    """Get all children of FEI parent concept."""
+    child_concepts = set()
+    for entry in TIFF_TFS_ALL_CONCEPTS:
+        if isinstance(entry, str) and entry.count("/") == 1:
+            if entry.startswith(f"{parent_concept}/") is True:
+                child_concepts.add(entry.split("/")[1])
+    return list(child_concepts)
diff --git a/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs_modifier.py b/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs_modifier.py
index dddd59bd3..2a4a5f5e9 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs_modifier.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs_modifier.py
@@ -19,31 +19,25 @@
 
 # pylint: disable=no-member
 
-from pynxtools.dataconverter.readers.em.subparsers.image_tiff_tfs_cfg import \
-    TfsToNexusConceptMapping
+from numpy import pi
 
 
-def get_nexus_value(modifier, metadata: dict):
-    """Interpret a functional mapping using data from dct via calling modifiers."""
-    if isinstance(modifier, dict):
-        # different commands are available
-        if set(["fun", "terms"]) == set(modifier.keys()):
-            if modifier["fun"] == "load_from":
-                if modifier["terms"] in metadata.keys():
-                    return metadata[modifier['terms']]
-                else:
-                    raise ValueError(f"Unable to interpret modififier load_from for argument {modifier['terms']}")
-            if modifier["fun"] == "tfs_to_nexus":
-                # print(metadata[modifier['terms']])
-                if f"{modifier['terms']}/{metadata[modifier['terms']]}" in TfsToNexusConceptMapping.keys():
-                    return TfsToNexusConceptMapping[f"{modifier['terms']}/{metadata[modifier['terms']]}"]
-                else:
-                    raise ValueError(f"Unable to interpret modifier tfs_to_nexus for argument {modifier['terms']}/{metadata[modifier['terms']]}")
-        else:
-            print(f"WARNING::Modifier {modifier} is currently not implemented !")
-            # elif set(["link"]) == set(modifier.keys()), with the jsonmap reader Sherjeel conceptualized "link"
+def get_nexus_value(modifier, qnt_name, metadata: dict):
+    """Interpret a functional mapping and modifier on qnt_name loaded from metadata."""
+    if qnt_name in metadata.keys():
+        if modifier == "load_from":
+            return metadata[qnt_name]
+        elif modifier == "load_from_rad_to_deg":
+            if qnt_name in metadata.keys():
+                return metadata[qnt_name] / pi * 180.
+        elif modifier == "load_from_lower_case":
+            if isinstance(metadata[qnt_name], str):
+                return metadata[qnt_name].lower()
+            # print(f"WARNING modifier {modifier}, qnt_name {qnt_name} metadata['qnt_name'] not string !")
             return None
-    elif isinstance(modifier, str):
-        return modifier  # metadata[modifier]
     else:
+        # print(f"WARNING modifier {modifier}, qnt_name {qnt_name} not found !")
         return None
+    # if f"{modifier['terms']}/{metadata[modifier['terms']]}" in TfsToNexusConceptMapping.keys():
+    # return TfsToNexusConceptMapping[f"{modifier['terms']}/{metadata[modifier['terms']]}"]
+    # elif set(["link"]) == set(modifier.keys()), with the jsonmap reader Sherjeel conceptualized "link"
diff --git a/pyproject.toml b/pyproject.toml
index f8e75cf10..21ebf7b89 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -28,6 +28,7 @@ dependencies = [
     "PyYAML>=6.0",
     "numpy>=1.21.2",
     "pandas>=1.3.2",
+    "odfpy>=1.4.1",
     "ase>=3.19.0",
     "flatdict>=4.0.1",
     "hyperspy>=1.7.6",

From 76d25fe368459c1ce44501ff5bd66eebfa3858eb Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@physik.hu-berlin.de>
Date: Fri, 15 Dec 2023 18:16:32 +0100
Subject: [PATCH 53/84] Added code to read EDAX's FOVIMAGE their cumbersome SPD
 from APEX files in preparation to normalize into NeXus concepts NXimage_r_set
 and NXspectrum_set instance that are required for EDS

---
 .../em/concepts/nxs_em_spectrum_set.py        | 70 --------------
 .../readers/em/concepts/nxs_image_r_set.py    | 75 +++++++++++++++
 ..._em_image_r_set.py => nxs_spectrum_set.py} | 12 +--
 .../readers/em/subparsers/hfive_apex.py       | 94 +++++++++++++++++++
 4 files changed, 175 insertions(+), 76 deletions(-)
 delete mode 100644 pynxtools/dataconverter/readers/em/concepts/nxs_em_spectrum_set.py
 create mode 100644 pynxtools/dataconverter/readers/em/concepts/nxs_image_r_set.py
 rename pynxtools/dataconverter/readers/em/concepts/{nxs_em_image_r_set.py => nxs_spectrum_set.py} (86%)

diff --git a/pynxtools/dataconverter/readers/em/concepts/nxs_em_spectrum_set.py b/pynxtools/dataconverter/readers/em/concepts/nxs_em_spectrum_set.py
deleted file mode 100644
index 50af5f83a..000000000
--- a/pynxtools/dataconverter/readers/em/concepts/nxs_em_spectrum_set.py
+++ /dev/null
@@ -1,70 +0,0 @@
-#
-# Copyright The NOMAD Authors.
-#
-# This file is part of NOMAD. See https://nomad-lab.eu for further info.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-"""NXem spectrum set (element of a labelled property graph) to store instance data."""
-
-# pylint: disable=no-member,too-few-public-methods
-
-
-from typing import Dict
-
-from pynxtools.dataconverter.readers.em.concepts.nxs_object import NxObject
-
-
-NX_EM_SPECTRUM_SET_HDF_PATH = [
-   "PROCESS-group",
-   "PROCESS/detector_identifier-field",
-   "PROCESS/source-group",
-   "PROCESS/source/algorithm-field",
-   "PROCESS/source/checksum-field",
-   "PROCESS/source/path-field",
-   "PROCESS/source/type-field",
-   "stack-group",
-   "stack/axis_energy-field",
-   "stack/axis_energy@long_name-attribute",
-   "stack/axis_x-field",
-   "stack/axis_x@long_name-attribute",
-   "stack/axis_y-field",
-   "stack/axis_y@long_name-attribute",
-   "stack/intensity-field",
-   "stack/intensity@long_name-attribute",
-   "stack/title-field",
-   "stack@axes-attribute",
-   "stack@AXISNAME_indices-attribute",
-   "stack@long_name-attribute",
-   "stack@signal-attribute",
-   "summary-group",
-   "summary/axis_energy-field",
-   "summary/axis_energy@long_name-attribute",
-   "summary/title-field",
-   "summary@axes-attribute",
-   "summary@AXISNAME_indices-attribute",
-   "summary@long_name-attribute",
-   "summary@signal-attribute"]
-# this one needs an update !
-
-
-class NxEmSpectrumSet():
-    def __init__(self):
-        self.tmp: Dict = {}
-        for entry in NX_EM_SPECTRUM_SET_HDF_PATH:
-            if entry.endswith("-field") is True:
-                self.tmp[entry[0:len(entry)-len("-field")]] = NxObject(eqv_hdf="dset")
-            elif entry.endswith("-attribute") is True:
-                self.tmp[entry[0:len(entry)-len("-attribute")]] = NxObject(eqv_hdf="attr")
-            else:
-                self.tmp[entry[0:len(entry)-len("-group")]] = NxObject(eqv_hdf="grp")
diff --git a/pynxtools/dataconverter/readers/em/concepts/nxs_image_r_set.py b/pynxtools/dataconverter/readers/em/concepts/nxs_image_r_set.py
new file mode 100644
index 000000000..ba8b8a40b
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/concepts/nxs_image_r_set.py
@@ -0,0 +1,75 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""NXem spectrum set (element of a labelled property graph) to store instance data."""
+
+# pylint: disable=no-member,too-few-public-methods
+
+
+from typing import Dict
+
+from pynxtools.dataconverter.readers.em.concepts.nxs_object import NxObject
+
+
+NX_IMAGE_REAL_SPACE_SET_HDF_PATH = ["image_oned/axis_x-field",
+                                    "image_oned/axis_x@long_name-attribute",
+                                    "image_oned/intensity-field",
+                                    "image_threed/axis_x-field",
+                                    "image_threed/axis_x@long_name-attribute",
+                                    "image_threed/axis_y-field",
+                                    "image_threed/axis_y@long_name-attribute",
+                                    "image_threed/axis_z-field",
+                                    "image_threed/axis_z@long_name-attribute",
+                                    "image_threed/intensity-field",
+                                    "image_twod/axis_x-field",
+                                    "image_twod/axis_x@long_name-attribute",
+                                    "image_twod/axis_y-field",
+                                    "image_twod/axis_y@long_name-attribute",
+                                    "image_twod/intensity-field",
+                                    "stack_oned/axis_image_identifier-field",
+                                    "stack_oned/axis_image_identifier@long_name-attribute",
+                                    "stack_oned/axis_x-field",
+                                    "stack_oned/axis_x@long_name-attribute",
+                                    "stack_oned/intensity-field",
+                                    "stack_threed/axis_image_identifier-field",
+                                    "stack_threed/axis_image_identifier@long_name-attribute",
+                                    "stack_threed/axis_x-field",
+                                    "stack_threed/axis_x@long_name-attribute",
+                                    "stack_threed/axis_y-field",
+                                    "stack_threed/axis_y@long_name-attribute",
+                                    "stack_threed/axis_z-field",
+                                    "stack_threed/axis_z@long_name-attribute",
+                                    "stack_threed/intensity-field",
+                                    "stack_twod/axis_image_identifier-field",
+                                    "stack_twod/axis_image_identifier@long_name-attribute",
+                                    "stack_twod/axis_x-field",
+                                    "stack_twod/axis_x@long_name-attribute",
+                                    "stack_twod/axis_y-field",
+                                    "stack_twod/axis_y@long_name-attribute",
+                                    "stack_twod/intensity-field"]
+
+
+class NxImageRealSpaceSet():
+    def __init__(self):
+        self.tmp: Dict = {}
+        for entry in NX_IMAGE_REAL_SPACE_SET_HDF_PATH:
+            if entry.endswith("-field") is True:
+                self.tmp[entry[0:len(entry)-len("-field")]] = NxObject(eqv_hdf="dataset")
+            elif entry.endswith("-attribute") is True:
+                self.tmp[entry[0:len(entry)-len("-attribute")]] = NxObject(eqv_hdf="attribute")
+            else:
+                self.tmp[entry[0:len(entry)-len("-group")]] = NxObject(eqv_hdf="group")
diff --git a/pynxtools/dataconverter/readers/em/concepts/nxs_em_image_r_set.py b/pynxtools/dataconverter/readers/em/concepts/nxs_spectrum_set.py
similarity index 86%
rename from pynxtools/dataconverter/readers/em/concepts/nxs_em_image_r_set.py
rename to pynxtools/dataconverter/readers/em/concepts/nxs_spectrum_set.py
index ab28a7e0b..a16851ce1 100644
--- a/pynxtools/dataconverter/readers/em/concepts/nxs_em_image_r_set.py
+++ b/pynxtools/dataconverter/readers/em/concepts/nxs_spectrum_set.py
@@ -25,17 +25,17 @@
 from pynxtools.dataconverter.readers.em.concepts.nxs_object import NxObject
 
 
-NX_EM_IMAGE_REAL_SPACE_SET_HDF_PATH = []
+NX_SPECTRUM_SET_HDF_PATH = []
 # this one needs an update !
 
 
-class NxEmImageRealSpaceSet():
+class NxSpectrumSet():
     def __init__(self):
         self.tmp: Dict = {}
-        for entry in NX_EM_IMAGE_REAL_SPACE_SET_HDF_PATH:
+        for entry in NX_SPECTRUM_SET_HDF_PATH:
             if entry.endswith("-field") is True:
-                self.tmp[entry[0:len(entry)-len("-field")]] = NxObject(eqv_hdf="dset")
+                self.tmp[entry[0:len(entry)-len("-field")]] = NxObject(eqv_hdf="dataset")
             elif entry.endswith("-attribute") is True:
-                self.tmp[entry[0:len(entry)-len("-attribute")]] = NxObject(eqv_hdf="attr")
+                self.tmp[entry[0:len(entry)-len("-attribute")]] = NxObject(eqv_hdf="attribute")
             else:
-                self.tmp[entry[0:len(entry)-len("-group")]] = NxObject(eqv_hdf="grp")
+                self.tmp[entry[0:len(entry)-len("-group")]] = NxObject(eqv_hdf="group")
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
index fbc7b91da..2275ee159 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
@@ -30,6 +30,8 @@
     ASSUME_PHASE_NAME_TO_SPACE_GROUP, HEXAGONAL_GRID, SQUARE_GRID, REGULAR_TILING, FLIGHT_PLAN
 from pynxtools.dataconverter.readers.em.utils.get_scan_points import \
     get_scan_point_coords
+from pynxtools.dataconverter.readers.em.concepts.nxs_image_r_set import \
+    NX_IMAGE_REAL_SPACE_SET_HDF_PATH, NxImageRealSpaceSet
 
 
 class HdfFiveEdaxApexReader(HdfFiveBaseParser):
@@ -83,6 +85,14 @@ def parse_and_normalize(self):
                     sub_sub_grp_nms = list(h5r[f"/{grp_nm}/{sub_grp_nm}"])
                     for sub_sub_grp_nm in sub_sub_grp_nms:
                         if sub_sub_grp_nm.startswith("Area"):
+                            # get field-of-view (fov in edax jargon, i.e. roi)
+                            if "/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}/FOVIMAGE" in h5r.keys():
+                                ckey = self.init_named_cache(f"roi{cache_id}")
+                                self.parse_and_normalize_roi(
+                                    h5r, f"/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}/FOVIMAGE", ckey)
+                                cache_id += 1
+
+                            # get oim_maps, live_maps, or line_scans if available
                             area_grp_nms = list(h5r[f"/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}"])
                             for area_grp_nm in area_grp_nms:
                                 if area_grp_nm.startswith("OIM Map"):
@@ -93,6 +103,41 @@ def parse_and_normalize(self):
                                     self.parse_and_normalize_group_ebsd_phases(h5r, ckey)
                                     self.parse_and_normalize_group_ebsd_data(h5r, ckey)
                                     cache_id += 1
+                                elif area_grp_nm.startswith("Live Map"):
+                                    self.prfx = f"/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}/{area_grp_nm}"
+                                    print(f"Parsing {self.prfx}")
+                                    ckey = self.init_named_cache(f"eds{cache_id}")
+                                    self.parse_and_normalize_eds_spd(
+                                        h5r, f"/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}/{area_grp_nm}", ckey)
+                                    cache_id += 1
+
+    def parse_and_normalize_roi(self, fp, src: str, ckey: str):
+        """Normalize and scale APEX-specific FOV/ROI image to NeXus."""
+        self.tmp[ckey] = NxImageRealSpaceSet()
+        reqs = ["PixelHeight", "PixelWidth"]
+        for req in reqs:
+            if req not in fp[f"{src}/FOVIMAGE"].attrs.keys():
+                # also check for shape
+                raise ValueError(f"Required attribute named {req} not found in {src}/FOVIMAGE !")
+        nyx = {"y": fp[f"{src}/FOVIMAGE"].attrs["PixelHeight"][0],
+               "x": fp[f"{src}/FOVIMAGE"].attrs["PixelWidth"][0]}
+        self.tmp[ckey]["image_twod/intensity"] = np.reshape(np.asarray(fp[f"{src}/FOVIMAGE"]), (nyx["y"], nyx["x"]))
+
+        syx = {"x": 1., "y": 1.}
+        scan_unit = {"x": "px", "y": "px"}
+        if f"{src}/FOVIMAGECOLLECTIONPARAMS" in fp.keys():
+            ipr = np.asarray(fp[f"{src}/FOVIPR"])
+            syx = {"x": ipr["MicronsPerPixelX"][0], "y": ipr["MicronsPerPixelY"][0]}
+            scan_unit = {"x": "µm", "y": "µm"}
+        dims = ["y", "x"]
+        for dim in dims:
+            self.tmp[ckey].tmp[f"image_twod/axis_{dim}"] = np.asarray(
+                np.linspace(0, nyx[dim] - 1, num=nyx[dim], endpoint=True) * syx[dim], np.float64)
+            self.tmp[ckey].tmp[f"image_twod/axis_{dim}@long_name"] \
+                = f"Calibrated pixel position along {dim} ({scan_unit[dim]})"
+        for key, val in self.tmp[ckey].tmp.items():
+            if key.startswith("image_twod"):
+                print(f"{key}, {val}")
 
     def parse_and_normalize_group_ebsd_header(self, fp, ckey: str):
         # no official documentation yet from EDAX/APEX, deeply nested, chunking, virtual ds
@@ -265,3 +310,52 @@ def parse_and_normalize_group_ebsd_data(self, fp, ckey: str):
         # this is the non-harmonized content one is facing in the field of EBSD despite
         # almost two decades of commercialization of the technique now
         get_scan_point_coords(self.tmp[ckey])
+
+    def parse_and_normalize_eds_spd(self, fp, src: str, ckey: str):
+        if f"{src}/SPD" not in fp.keys():
+            return None
+        reqs = ["MicronPerPixelX",
+                "MicronPerPixelY",
+                "NumberOfLines",
+                "NumberOfPoints",
+                "NumberofChannels"]  # TODO: mind the typo here, can break parsing easily!
+        for req in reqs:
+            if req not in fp[f"{src}/SPD"].attrs.keys():  # also check for shape
+                raise ValueError(f"Required attribute named {req} not found in {src}/SPD !")
+        
+        nyxe = {"y": fp[f"{src}/SPD"].attrs["NumberOfLines"][0],
+                "x": fp[f"{src}/SPD"].attrs["NumberOfPoints"][0],
+                "e": fp[f"{src}/SPD"].attrs["NumberofChannels"][0]}
+        print(f"lines: {nyxe['y']}, points: {nyxe['x']}, channels: {nyxe['e']}")
+        # the native APEX SPD concept instance is a two-dimensional array of arrays of length e (n_energy_bins)
+        # likely EDAX has in their C(++) code a vector of vector or something equivalent either way we faced
+        # nested C arrays of the base data type in an IKZ example <i2, even worse stored chunked in HDF5 !
+        # while storage efficient and likely with small effort to add HDF5 storage from the EDAX code
+        # thereby these EDAX energy count arrays are just some payload inside a set of compressed chunks
+        # without some extra logic to resolve the third (energy) dimension reading them can be super inefficient
+        # so let's read chunk-by-chunk to reuse chunk cache, hopefully...
+        chk_bnds = {"x": [], "y": []}
+        chk_info = {"ny": nyxe["y"], "cy": fp[f"{src}/SPD"].chunks[0],
+                    "nx": nyxe["x"], "cx": fp[f"{src}/SPD"].chunks[1]}
+        for dim in ["y", "x"]:
+            idx = 0
+            while idx < chk_info[f"n{dim}"]:
+                if idx + chk_info[f"c{dim}"] < chk_info[f"n{dim}"]:
+                    chk_bnds[f"{dim}"].append((idx, idx + chk_info[f"c{dim}"]))
+                else:
+                    chk_bnds[f"{dim}"].append((idx, chk_info[f"n{dim}"]))
+                idx += chk_info[f"c{dim}"]
+        for key, val in chk_bnds.items():
+            print(f"{key}, {val}")
+        spd_chk = np.zeros((nyxe["y"], nyxe["x"], nyxe["e"]), fp[f"{src}/SPD"].dtype)
+        print(f"edax: {np.shape(spd_chk)}, {type(spd_chk)}, {spd_chk.dtype}")
+        for chk_bnd_y in chk_bnds["y"]:
+            for chk_bnd_x in chk_bnds["x"]:
+                spd_chk[chk_bnd_y[0]:chk_bnd_y[1], chk_bnd_x[0]:chk_bnd_x[1], :] \
+                    = fp[f"{src}/SPD"][chk_bnd_y[0]:chk_bnd_y[1], chk_bnd_x[0]:chk_bnd_x[1]]
+        # compared to naive reading, thereby we read the chunks as they are arranged in memory
+        # and thus do not discard unnecessarily data cached in the hfive chunk cache
+        # by contrast, if we were to read naively for each pixel the energy array most of the
+        # content in the chunk cache is discarded plus we may end up reading a substantially
+        # more times from the file, tested this on a Samsung 990 2TB pro-SSD for a tiny 400 x 512 SPD:
+        # above strategy 2s, versus hours! required to read and reshape the spectrum via naive I/O

From d471f03c2c78de4ed23806580558b35a22874601 Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@physik.hu-berlin.de>
Date: Mon, 18 Dec 2023 21:52:26 +0100
Subject: [PATCH 54/84] Added skeleton for collection of image files from the
 Reduction of FeOx example from AXON Studio 10.4.4.1 aka Protochips PNG file
 collection reader

---
 concept_mapper.ipynb                          |  74 +++++++
 examples/em/image_png_protochips_to_nexus.ods | Bin 0 -> 11254 bytes
 .../em/image_tiff_tfs_to_nexus.ods            | Bin
 .../em/subparsers/image_png_protochips.py     | 201 ++++++++++++++++++
 .../em/subparsers/image_png_protochips_cfg.py |  44 ++++
 .../image_png_protochips_concepts.py          |  27 +++
 .../image_png_protochips_modifier.py          |  38 ++++
 .../readers/em/subparsers/nxs_imgs.py         |  14 +-
 8 files changed, 395 insertions(+), 3 deletions(-)
 create mode 100644 concept_mapper.ipynb
 create mode 100644 examples/em/image_png_protochips_to_nexus.ods
 rename image_tiff_tfs_to_nexus.ods => examples/em/image_tiff_tfs_to_nexus.ods (100%)
 create mode 100644 pynxtools/dataconverter/readers/em/subparsers/image_png_protochips.py
 create mode 100644 pynxtools/dataconverter/readers/em/subparsers/image_png_protochips_cfg.py
 create mode 100644 pynxtools/dataconverter/readers/em/subparsers/image_png_protochips_concepts.py
 create mode 100644 pynxtools/dataconverter/readers/em/subparsers/image_png_protochips_modifier.py

diff --git a/concept_mapper.ipynb b/concept_mapper.ipynb
new file mode 100644
index 000000000..7334dfa30
--- /dev/null
+++ b/concept_mapper.ipynb
@@ -0,0 +1,74 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "4f240b8f-71d4-4004-ab56-d7480b44d96e",
+   "metadata": {},
+   "source": [
+    "# Generate Python list of tuple with concept mapping to be used for the configuration of tech-partner-specific subparsers."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "869670b4-0780-4bf4-bc08-d802288fa5df",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "\n",
+    "fnm = \"image_tiff_tfs_to_nexus.ods\"\n",
+    "fnm = \"image_png_protochips_to_nexus.ods\"\n",
+    "\n",
+    "ods = pd.read_excel(fnm, engine=\"odf\")\n",
+    "ods = ods.fillna(\"\")\n",
+    "# print(ods)\n",
+    "cfg = []\n",
+    "for row_idx in np.arange(1, ods.shape[0]):\n",
+    "    nxpath = ods.iloc[row_idx, 0]\n",
+    "    functor = ods.iloc[row_idx, 1]\n",
+    "    if nxpath != \"\" and ods.iloc[row_idx, 4] != \"\":  # not in [\"IGNORE\", \"UNCLEAR\"]:\n",
+    "        if functor != \"fun\":\n",
+    "            cfg.append((f\"{nxpath}\", f\"{ods.iloc[row_idx, 4]}\"))\n",
+    "        else:\n",
+    "            cfg.append((f\"{nxpath}\", f\"{ods.iloc[row_idx, 2]}\", ods.iloc[row_idx, 4]))  # not fstring because can be a list!\n",
+    "\n",
+    "indent = \"                               \"\n",
+    "for entry in cfg:\n",
+    "    print(f\"{indent}{entry},\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f27812fa-d023-4ed6-a5ee-d417a8705828",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/examples/em/image_png_protochips_to_nexus.ods b/examples/em/image_png_protochips_to_nexus.ods
new file mode 100644
index 0000000000000000000000000000000000000000..323fbc10c6c2670bcf24cc41ebc5cc5197084f5a
GIT binary patch
literal 11254
zcmb7K1z6Ne*Iz;bk(BP1URt_QLK+EamZe#i-6aJ<LO@y&5RvX~q(PKUX-NSorQy=>
zEqc9r?|Xfp_d7hh`)B6VoSFIUnLVSSf`(260AK<DIl<5M0v$x2aRUH=s~55f;Araz
zMtDHM=1{1EEyx^U>jVM1LM*wR%;C0hZYL-hV(A2OaRfsU+;AuiY;Fm+0fP}5f79uE
z8a-x=4gg%e$dEd9Y+M{IAm+9Xa3JExBR3Rc9ipLj59bEO4I~SWlA^p80Dyvg1ejr>
zBHz7Z#O441bS5Qv86Eeejn^Bn44#_`UH50}V{Q**FS316F8_-D+;2-ns?KYYM4y!D
zYw7suc@rQP)@a0yha-NGNd2X~+)Ve}Rhymj=txXUsm5Kut)KL|z#CWNoTHa+gPvQ8
zt%ItDFG?;pLgS4O{+NF38(@EI-Pk-PnP>x=`5f6r{DO!o&$>al1#{dJJXmlsjlJ`D
zBGmp8-`7J1K!<}0u*C-``U9jX&;cLl08!Ubd<8H8na3o)DdyFL{j-yj_+L2I`fK<r
z7VZ{WJf}h&OOfXG^6WjfGAga|4q1LQrXA*0r2@j$T3!A0_0I6KlDStn8hc#6E^**V
zORx7|acY9JS9@jo`ijM>ie1LH636pNQn!t8p{(knvAj?%t9}$f-Th>=k$aFjH6AuX
z5kNDAKgmF#wBc-SfXON?=CUSe^HAvlma%_~DHCw$K*{0Jv$6L#<=%ZhJlH7+A#Y%T
z?9}4tLOMB$iibH*UE||pV<>GK*mn{<ooLpM`2~w=)VdAdR8!J8gca%;^5!(pC!Wb*
z66q#jC@g-`zLTh|{8`VeRAEjTqEz#KDeU=OQn$G-oT$&!^n)hUB4A9*<l6z#^a0WV
z(jOQ<fBPU93c2S;AJk9r3C`HM1Te4P>Or|nfeHf*QJF1S<p4x{A-Jh)JJ(^pwrLf2
z=Wj><HqQTL3Vzq+MZsIWA)Ip8VY0f+mUY|qU4{=unafbGogSgN8zAMbct;Ka#*}iW
zR2eIF%3XBZP85r&<$e_I_$l?t66+qd>mt5txKNJG%|=rLyd{9BNQ+)$si=3O3jRfW
z=R|d_T&lv<nmy(dE8Hi$Q{<ch((oW@cP~=e)QRd4xzw4G**to5LYybFZg)gT1EgW}
zgY6idmOO-onCEZaB5ZyvAagz=hpGD|GNzk+*W7W~cPK_|$RrB4DRfo^bK}X8pO|$6
za=7?YLVCE+J-E~hA2ba)Tpl-EC_gryTggF-lSL;A((A__@SWZAGS)PgOxz`aEEzD~
z<%Ht;+kdRqt57kvmN|BiV-<7lGe%nZ7J~2E8#(pV_VVUMM9*hLqQ@(bxwiR-M<3Wt
z1T-g`Gb;t1^bk_Oy~wa6I~1Y%?6wH!!`BTu7U~6ZF~cGRUZkwmT$S`*V~%A@Mf6b~
z$pLtkTq_pq(~a7aX!Q8>gUR%Rrw}bCKm1^PR<lewZ^tRxkd&Y<=<x1wzi7szZ3=-a
z`I~=K>+g<HrA`ett-!Q8Q94mfx=>_Ct4<a=qtK$z(8Wj7iWnoe&hai#CS5iJ@hX@D
zuFykCK~26`*39oaBw=D=euKHIo*#4*`2Yta5VjC&IJdi_!^TTf?I~gWmboo$%QKq$
zvRL6knYTVH&PUk46oxv^5j~2riK?Peicpd|?{e-%2l<y3&GtyhaDJ(IHEMVH`D_@}
zB1(z>oPpM$F#}Y9HZ7O3JK9<I%G6crVM=@R>qVH71k`Ik9q?vdSwp#*LCjMuUad~N
zoS~JP>A7-H3?MT@97Aq?QfOt{(fT&0_iiVv51D~|mTX7$qOjyM^$3l^ud`6$O1FZ^
z0-9ZB;|Ihqn!H(W5Tu|pQ)+=--=1`I(0<*o7@g$sFiUI_?Ef0&eZF&SB7KshLW~~b
zD0<ovV#lZwiVz}NxLshNaGJbToAaET>^^#w$Wz_w-0C!wOe2HEMffT6lJPvHK0E)C
zPj1PW!s@5Vj!y=lGvdL~I*o(jiq`E)_GZecQk7b~$oUnpjFFdey~M}nch%3@@l8)f
z1hWV~UMqB7z;AT3O_YXv_J>q=8a|VpTPQ}+De=QlW2D!dPR~>}^>yMB?S}6jGQdh$
zPHtQ~4>2NGej1kFJQUP$KF&&UB#fybp}n8<$IHNv4H-cvl=JH13U^8~1~5%uB;=Ul
zC9h4vrn^fk%2Pi?O+n83b4hcFu5<bM&|QbC+Ky)2T9q)@Os;EQa&eT_jY1#STk|J$
zZ*6N!HK}e0ZF&G~;pUD<zYqZ2!NpIOZYPnC&Hi{Xc^R>t&)9IyZJg{jDmUW&4wQqg
zYeM7#H5%p9n7dL!z=_3U-<H=G<@bFFHPJ3eb+QM2FcX2!59w}?u<;Rz2(&HTI5_c-
zd3=c&d|OLV+D#n&@PKumKdi*}sLr2<!ID%Fk;yCF9B0ayRnHBrX+?dBpidz)Bv^nE
z_B-Fr^gENOajwwXu9|kkZD;D0Uf<&`dj6=2(V527i2csH`k}Wul<X>_PFN;|UdH1}
zJ2CgzXSfc>P({wM1W`5n?9Ao_Nt#oq&er{(?8`7I!dkXo;!o4<Sc^8$SsUK64RM&4
zq<-x6S^MdScjvWnDTjL$ZmROf$AW}Aq?nS?vD;I@kJuTmaj8@Kv6y#OyAi&#PFFk~
zg5~4Li*M{{-;_n+6G<2=s5CTAXQ?64iQO_Fk$*eicI~sg16MuYdgz6Q5vHEmbfBar
zlno(qL#A{8a%0MckS&RDS;1<189oS9&On+5<_WNbtvv@icS<?)GEbvft!IXhqFP}$
zQFhaQn9`LT5j`$}AG~5KIC$ISCF>PpA>&PZq-_L|1UAuo5l6?%Rh-WQ-1~Z;XfiYh
zc&>~kF%hO|+g+6FMsARnrW@58pMTvJJ2ah{tE?=5GWH#MWK){+Z`2Ph%t7@m-Az7N
zj%a^$m;gSGuk3kTMtzB`p@M-K>hh-J01W`xqXqze4;R=-xBxjp5LYSB6<Qdd!=~O4
zTsEt>YFdkK^l4UFidBtHxRrCQd^jnHTj&{$8IopV`Xoyz4ygPp{vahyT^GQ_td-5l
zUPc}=TbtyFifvZbJjX4G`P|8;?$BrvmtQT{N5g)k#_swbj_de72#wQ0F~LJSDe8+g
zE^6G6a6{ZGIlR-im3hD=C%L#kJUF@~aszvvG~Z!49e1`hFn=Y%DNTvB%gQpWD`E`{
z#i)Tgb0u+eZ8x$$)bFzDPM6t<tAf^5whd<5m@{*PCvr<>DqzpS$EmbzU4YVly|&I<
zgqM3Vj-2v?<%M0AS}6|WY?KD!=jYxkD{92mIWU<{Bd*UAB(7z4VTR$JZOWDsoEn<v
zu?`QJrS`8U5DVcDt`g*<v>83?++?>PDPT2bX)CGMG<Tf|CrOy3Vbo2qekiQ+!ZAnq
zLG^2d0ba-ss@Gv5{AHd`Qc9ou`-XH!w#l$Ohr51nXTmavKeJ_NKX~w@bF!+guc(Nc
zqfSjpJ((zlhk-{WWJ>+nRA*HVNsdYKP^MAab3&FT?bsC2Xg+jPjZ|BONp15@&cc($
zV}o%T5MEzE7tB27$cWCziac9de`44;^ph_iuV8Pf`}PYP!JNAAz!h#>YW6z>)Vv+Y
zTyNdMkS|Q}w$K)4@|nmj!<6N=rmVp1cgz#24VLVbZWwbI2?V3Jme?5*XyWh2ETzhD
zJP=*5Fya#|7PJrQq9cipGaL>`qw4LYFE3j&oBAB&lwl^IDwJ-Y)giN?A0$M2tkxIR
z$`HL9d>Yk%Hgtr2gY~k5^Sw>VVS1%44Gwv0@0*qwCOH=8g+=*=u+F!lN#rXdIg<;d
z?<fO|tjVk=3vZo?)nSEm?|P`UGBUg|BlBnvFFY^PDIC`c8Dvr89D9TbU5c4x2fgf=
zEH|;#K7X|<O`-jg%h3cZ3=i<eD&BXD6iqlLkM_kheA-N38xJnZX<Z|jRL132I$3y1
zexL!VcMB%y8)8<iVHUAf8%DNf9|u4$xKvFwWCX=U`Z{!!n9_T4Mm^?H9}T2OI-d>r
zkk2fBwtMe4ko~%<vtkiJbR(pOt~&3_%esb(musP_l*8>7GcKLaW6<yHpFPy#du?-_
zkJX$7bGbdL`uc}a$yaEcLHGt~cA|N_Bq{j!aJIuYJV8#6;*aVu*aDM36dT038Xj!4
z_rDsAo#O689LyA*HzFU|24G?Z>(z#F5Y_j!cR_+1v$58-jpxhCxUD3$92qepeU{1(
zi&#~dBY4Tf1MM1j4vpU*<vhR@5wfKV4l?35HY&w0Nk2kWzhHmw_Qz~&)*I<#5_Lj}
z+RC=xn}e9IG(&EQ%cpTHd%jrLtn7Ei#53B*|8i?Qn}rlc?3nNIR;CY}O;*@q)W8}C
z8drs(z8kq7)TE!u(?kjn8$;`35}Nm|n>0|1HSg{c*(J3u0Oy2mfu0b1R@vOtFVh>A
zg?qo^A+kx&eOXzT5PrEAmq1`IyVMP`Siexp$>t2r;8v9T)ZJB_G({h8U7=IQ)*kjW
zqm0}M?|7txydJ#lg@u{q6&(^@B)uvivPTp6{1JvA)v>>-oGFiOCf<xrTIMyD*D|;5
zxsJ$0?4Be-46Fm1lCtfn`Rn>_(dPkcEalCsqVJw*sGU}1*K#ntDEq+BmG^+~LZLc@
z@75lZhiQ9-V&O)tC6%e=mtpy5d*w5|=3WuLZz1hR`!`+H*7EKbe;l6XeW~Y_cp#~M
zV7qDYnKam@2LvR`2)2u~=_*y(<_Qwi)_`NaCJvUcT9zmX@K>lPcEW~*o<V4a$##<j
zTVUIIt4ryVl<&lYJ?shgXu@}25$$~FF7a+Ktkn0UPNQq2t&P~3CwM_eAuZtEYEx<h
zM@?@Pl}AgUZh5wvW#Lx^g-i95X{?yLlJc05iFk#T342j)2(P8=Y-%s58NZmxeQ&$w
zn^x19zT`__%JleE9>?r?FI%w_$GcM_&D9V;Rg?SWH^wKuo`}|%ruM%k&4s(!zP58n
zD2N<VQUlxF@+dYo&@(BpljE%&51ZC_nj9pZ4X0=0)pE&QyxVGyS4KecG)!IyH(*E4
zyP%uCIxYYBm5kRSs?W{Zrja{Ja}S-nYQ`EU2J$C(_8nymy86~0$BkyU^e)cLPC(My
z>jW%?l2Kn%t!rSam%TV3L!AQ2smTRIrU*3&c8rzeY&G1@iy-lL5L_hA9nR`p!y`XP
zZd@(_)Jqqwyt9g(f?2q62GQP~IZURq9VnAtA5=NF_C7;5D?|_DLY0OHM>(EGMnTi#
zY(y7jOG1?W1l_i6NGQY$`xUlMY77gP*QLD5#lze?_gjsyA9<7F6hcbQi-)D`X=l2f
z!2u6G-o~y)muL=uVtvh>rIWfTonVybi$&U|>Yk_ChYh@xb>l#TbBoDo>zLYAGqt-M
zgtwK0)xtV%d2#E7q1;6k!>HQ0XYcE`8V<h3m*y0yR62BhYr3=HLaBay)2;t>Mr})%
z)_9YH@DFARs!(-<v_jv)O}1WiXN6`I6iaPXMas^GfL>L)z3ixC%+7{{UKxklzI3?h
z#^cVisrbugD?6XFA@^ucpGRq)muT2?sw=kc>Pr?;5=r89qH+Dg>0f0QUmGROuWIIk
z>93cT(EP#7Er+tdg;S^eMze3bFiK5@2^#5s6!x+6nh5}CVS>n3pK&!Jf_`Ov4v_Mt
zjXZL`doSnRH3;Ai4P1_!M;_Bn@d-s_UfL`f;E}W^-fFyB{djHDgF`N!Q_D$u?GBwg
z_Int?$;>CfA=TVPRQ)F1^n0p|a?QE<f>!IPHjR+a7opy7Pek@Z=BwIj116eQO9RBB
zA{~>G3^*;U--N_a?xBhrz_id9X07BK3bGWNc)DuFUoA}rAH0rKKD(g}-<~VHGy$ol
zxApL7eHPiFb{RG1Co5oYIe7KWS_z+6q_>5T*Z@nch#1hUmy&Fs4xj0XZyl~x4)<##
zpjZ{^J9qDQ?lnuk=U>RFU;}wBVx%)YUkoF>)eN-z!(-!k_&<8hn)3D%hxndxJohQm
zJ@hN@dwh~-iLR|$g5itu=93+rklD@rUb2xklhoNKcxR@sFYN_yo^LgG0L+{L;}9ak
zEHMVosBtO90{BwhCJjX^7I0o6ku*LmEI880eGHJxdoR4$Xejp(F5H4WukehNnFiyU
za-kIS0uRbc#}iTmlhqPm6fR#9@`w9Tm$jO+eWI!Fo@%5oo$9xQ_RTBYaz|-MSbUFD
z-ZrTH)z=x_J-P!MlcFSyY6fa{Zya4tal(CME6<0pc>}$r5Rh;u!P(t^bQ$DvpA8=D
zX`dR?kW*ag_8K)Goc?f3MG9q<PxE!75#i>cF401yuiZ7xa~zE#y8La?In{dy0nur~
z*<t2}sE_!ftKOzp!QRAOhA>E1Q(2mS)NwI0U)I4g8|A;8&m?|JcveoKucf62%nXvL
zOi%Dj(Y-#*eT)jS8Qn&|oJNSN=TYza1nETWnn>Z;G_fY~aO4YT<GXP9v-Lh>559e5
z&Ak2fBN48%9%grSftpR|&M-CF{c&1tZ<qJEMadU`+#7Zxrsd_Mz&6cBeMcOqkIjgy
zGMbNihW*Jef^3(YbuUV#X;pCG<q1<ao1BnW=!uJkkHThIK}t2+zA5nutsg-AQV@if
zZ0fPkpPHhL5mn6oL65%e9(TEvX-0_aSN;WpgkC-JP16o}Gv9g`w1Tbs3O(d+o6x)K
zQJA<lWO{DX5WHKA`9k<9q9oC;W>Xi}axUHnP)O`g3~|>XGI%qfscJMwztCo5=p2>t
zdHd+eEy`NxyDu)%H*=cUvC2MHEzCDRxK1iw^KAaDVLN;9S3zP-TvQ!&JvKbx$D$j_
zMpEP-T_5As`Vw!nFM31-NQ_YubE8iy14R2Cw<b*WK0(Y#RZokoR}P$Z#9Pz$10g#A
z#u8d@A_L_&6LE3-r%6wH+sBZb;xHkFS~PX*#=h!Ie4I-Go?1`AE7VSL)7Xzj%J=N(
z7H63+R>l%OeUd3MsexT`nfaXc9ew;n_=F4_fw@_pUoW0LjzyC`NW4D$RT@P8Qe8C1
z`t{=M=Yt|Onm3)7VFrsa??0d5qwag6uQFowX$IVPW8*9d8jII!JnY!0czjp`QP*X@
zW=l8X7$7-r%Jt%H3a8<EpT^w#3&pS}1NN;epzyAjPcwTl2=k>^AF$AnZw_QrdUWx0
z9X%xV7<-A%In{Gh&J&jjZIF5y98my(<<XtGC4`qcEmljxP!?}kc&$H;HHY-bRN3=~
zd2cU2W1ybf@XG<Q-zWi=4urV-UX$4&E*8jF&ur>(0-{9sFzk9z#PG$x!7uKX_eey)
zmzf^|y^|_St$f|#4wa#|Y?T;`9Ll|@0(2HCky!*!{rUsak)V>j2N)k{h&x9+$ml0@
zv3kq>Hcq&E4yx=t1c}noP4C^|7xipaeTr5n$>gOmRs4DAu#Dh&8;8Qpq2;G7>*!^?
z^cD`4QYI#3;YBAAXC;$ILpBgW%H^?$Nv~mhA;bD`U3f`TMSWv)O|v6pqq@5KX37o!
zqiRaga@++<y|5d0kxDJ$1)*+FN%s~<wHZF<iLC9Xg9LC%CP-rQ;^H>6P}PM3Zmau#
z)vBUi0UBPSjSS!&)EzxOeB<qmIM!5xQXZpumK=hq52$yV^j)RTvR>{q+@>C%S}vf9
zc-hJ?M!v_ou`x~^YG99P$|sv#9a%g_Tk~iO2!sULv@B3>tCKiq5|KY=E~xJ-U-A_5
z&$`%zKMHHik=wN*KWAR6f6K@@!6;kY(R68H1~07V5mwX{t5kmSsa{``>y8o6LXD{B
zX+C`B6gs0hM070y%Rt~+HSV$yW9xd&WNU9h!ZBoR1IDvmL+1WMseB9~B&BBB6qmQt
z&oA>}l~X;ytaG9~64T$<bUiIZ0G)q%C9{bHLqEfK?Dkt~oc&w9Fu#kL8e=%UQVvs9
z-USYRJYgrTd2)&a)3r&=P}VN_h&G-TjH*pC&`_>hV$o7b`&j>M|Ewz)x0SXj9YZ~T
zduQv{R3Ulp4((MDFvEF`h<h-0cUIk(glJV<J3}RJ<(dQ(R-%y@mKts9i1muylQKGk
zKR!NNdymHQVkw`o;vI?$jg=nqWcr>>`#|MF@=WULNX?iWTSS2e0O&EI{CTAQQD`WA
zoWh@sJZxXRz7-n42=lA+w73-Vjm{MegCk1~5)8cDJPdST2*}A2`SY9v!vmeWT*3@e
zk~hSitgLK7V9_7d2QFlp!ki8%3W1A$?~-6}fk8yS)hR?F=8j;vC;}w<Yr#VFS8ma7
zhQ7DBJJ>?(B^YcF2&gC!=;r3e?Z(gT1hWS6iin5+zx6WwY047x7a!CG=J1Wn5(ET0
zfUim-a3C)?FYpKNm52YUQ&+sdy5!{K^p~NlF?=85H*<VEJOaS)|NpRL4YRa#_>2FS
z0U&w#f!07vbA&mUt1Z}#k%8{dIQ|lFJ_gAj(Y<m*Qv91Uwh&u{t+@ji2wCJoIKd=U
z&0+Q~a5|N?1{(`=kc~L-U%9@^ev?5?Cl}H%Sdx#2kDrT|kBe78hnHWJUszO7@K5Q#
z^NCx6ME@=ztivlT$|ESsD<BU1(Rs!7Tc3XE`|b(Y5;+QpHJ7Ce>{~5MQbR{jQ9wXd
zfJggJAO3^m?}8u?kOLSl$t&~^_CMRdGh2fpV5ELzl+=-PrT%TD0tL8{i$Q?t0mRmw
z?q~DW6q-U|PIh1r0w}=CE8xh>B<o`9V96yS%**?yk)Qg$>;G%f!x6|)z!A0}x^K}(
zm|Hl2xj;@X5Cn4PFnn(WfgO;u_PtF&Nbt{ACyOhKKiYX1fRf^WE+WzIlm6qszifSL
z{{7ElS5-SW!UGwct6e-};0S*ujDPXC*^mCtYu9Wt=yM@gkhSL;Vl%4?@v(A_7++tH
zp7(j_;>C#}RzRz-swu>?5s}SSVC_b`Q5;9y*x=cjThT5{*iQlx#xlaF2<`~T!ai)9
zvEO>z`0xVO6%!!uPP3QW$)laDWB4o!t^0m|&-*|_OraG;LJ@1$Hdt}G$N+DJ@*ayP
z#OMVf$f2+f<xUmxV-3&?!RoBg&5lPx^VzPPgI0y*?Xve@tz*#^oYCdQLxBqh5k!-q
z9cir7N~oUlXZYNlgHTN0T0rOso^{1bCtdrun&CazGC1rJUiZsfS<az^CnT@T1KW3p
zCs>$uEf4j>aB;M9V@8e4u<$2}i+h}(CW;oFJj}@`uz!skAo7r&x32%@8)NpIog*!g
z@K<%@S_MzeOpfl9+FaCmR1|PThaY5>6=^8=EoIagq`x{z+DpvWWyPyc6T|W2Z}x(u
z-of}pDMLhp<Bq_mp{E!nSTQgi1dyKiZf`$~Z&1K)T$AZpX=)K6&1ZQY=H!b1+SXe~
zk3FO9-h+~dHtY*;NN*t2yoIZwHVuNUuVQq8VpNweourGk59aUArPQfRE#1P!k(43$
z!-7h)wt*ARn#;A@x^W}$kz<6Y0;emrusF#Jf;vZTNf(;5q#jbSkiFgI;@DC927Gkv
z6%wbfsKHF;6AZKX8bTdYwV%RjMxB&gO%*HV8*G7%Z+$W%1NUfnCo_&HHMOw9_*NM8
zx6;{71Vb1YoUheSEx#!os_wcmY<#_*bz%8Vya>8>P!YKawJ`AgO>ESv<Q+2XF~22w
zg+W}xT|nK%r=@9v?GG`e9JH>S?oU0>i=*kXpGLfS%%<4>$jI=x)V`Kp>v8B8!{Rqe
z>XE1UI-KcV;`gmNi2IwcX;)4Lnv)L4J8U1cY_<2(icY?0NLv2NZIP7fr^}yQWkeu&
z-WJM}RUEjYn5vWN_4zaE!w3!Z>uoTsL9@UWllz&n_XmPomy$}2-|b6}jq-~dy2Nga
z2!vNkvvu<D`vsHU$D%v<)L37#Dr0@O&=~u=d69n0$_wMX-pRl(Q^lGR<QiwtP6r%=
zXz#Xz541HbY!X8zCUF_sf-+TF8pPW3n@^I~*%*5n?|&lNw|FkA?cW5UI+!SQln=z`
z*>*ZL3T&O6YT2~tV?&roaB*mw)pWE7)vt_{+gkTnP?8&7JiIl<&s!?RYVe$X{WH-G
zvOk1e@V2Ilv;9bGYx~I-!hI_bC54zaH8v8TXHqTC#xP;c-#g8Bu>1O<K;l(Lim)FT
zz!y8t+;5>3v2HJqeRkB9q0^{8^4@<pNq1-e;d>W4$}Fop7$x0ki!@vJ?<K0_+?%*)
z5KFq3+t9Ylb1MfDeULS2zqKIEOP)SDlfWvkWVt~OZ8a0}ouaNL>tYMRpra)GqJqv&
zL@TY6WuKJjQ!f~V!?Jc`8I)Uiq~gzip1)d&W^XlfzA*X1eEeXGYz>cW?ly0g*1)N1
z5I$iHpZeRV+&L$nc!Nc#b&Dd#EnE`+ZRX(a_O&%{C)rN7yQC2GXV6P>FXOQXvXcH=
z+XltcY2C-wmbwHu&Fq;%)F4@QcMqS#)KlA&id2;kbCibp_l&2ZdMwf7$=0xQlWdzF
z?9q?Is>y*6yj&IKf$dTh^~go+_#5goTd;D!nGKTyE`zrsnNC3Sy7A(CCiIxKuV*4`
zD=%+`5RI{_?JFCyvU?eN4!rm<K5a(W%iZ5y+f8frVCb0OzB%UCtjX*pHyAHN(D0=J
z?!iY1(y|NR>u-lEtOG={Ys79pIp&R`djI*}!xr=bxq{$y^%Ivqi>X2Nbm_}H{}JgH
zo~Q-2E`-n-d}PGujzfq2H0@WjQQ4=Kj<TEWK*o_cTpYzd?V;f#<&z$jJzMdT$34XD
z%$^=DE-q_Z*c?htqydengSC|<PzLk$X;%|Ju!G%}(}77ohH5<(PJG(Ic$5pPzIIV6
zZRWh&f;(sBR{nr527}m!{FoYA-s2)$SyM8H73UCb`5~4}ug4#-<a@MLEc=I&-N=f!
z=aa4t8{+KtTH@6_HS=5#)CM*{<3Eky0lm2L^lj#M^FUZ=r`r}ZNds*ai3ro<%H#9W
zMA#Y2;B-}7g~2WT?u)FCd#CF4bC)SjJwC|15d(9nzI){UbpW7&^1qd4t}3~X<`7#e
zFdV@Rv$X1q>4W(5;0J7p2ZH-2q=DD2!C1R%?NV9<pXy%&?j{n>HbunfYG&rIr;IEX
z-{qf+sC>0yPSqJ&p4+M;0AgeAR<x;F;}EUquFaYf{Www0L7PauW9Ofh!d~YFVHYut
z3*@?U(_x?@oz*>YPh7o$p><T)S@C9jWdtS`O}MP>!IBq+NdE$sm7XTzOKOzIA)Q3%
zjA&kJ`3>`Gt;V2gy67j;;aK;(0}%dHb!sZ_`Q!F9MOxD~7COuthyByV&9*rr1}0{M
zZ)SfsnQtuJH1ePGdST<a3G#Fqe!nxv7;Bcqx2e8fFXnN%a=|@tNxo20=_>Ca-JfB7
z617sfOZ(tn$yN9&4{bhNLtduwM<(0fzruf{;mC;nMHI*<IVXsft@WST5u6VQb%MG;
zk-4l3@c$=wm6^D}EzDv6Megb@&_C@VoSYo~|58^D{{N+{9GsB(=noJ7?P|d7fAP>9
zgv_YEr*}Y*3k>#W9T4r?I=zbJ)orCK`hCD?k-h&MxUHkPH5d+*wM96ZL*f4=YX3G%
zS5bqSL%<IIMULuUj3N_HYZw^*%RK#C6o`?MstP(XTuN$pfq(5UWR_PHs|tC`gCBU9
z#IRfaY^FS)0X0-v0!702%Y8rQM-CqCm=IG8VnxHK8x|{NvkhCeXBGMz4F%Ovo!1@S
zhkB6^r((T+pGHjk<?!S|VO&MvxXmLvNJMD<KK<)%-K7B4LCZXujVBaBs=>a-Jd<d!
zJ?%Lv#T+|AGw!&G_1-T<{&o8~=LQ>T6?Mzr0lKSXuLToTYCc-r#v@pY<z!<Sq;>=x
zQS3I%Sg)+-dJNxS9#PdS97oFyZUfgJu$-!oj72XjBER0ISZ|8yf)nED(R)9yDn>lJ
zNAb#69CvT1iAG;)I@Wz_KAHT!llJ;a&X=<0Lrcw>&NmK6B&9WSY8XO&FNCgB#qu$M
zVakVn7l&U`!!km9bradPeO}%@l084Z5oda}|8T$bAefQ+PXT!YPD2F+l?d?r8+XWH
z|3W|R-~ChSpLgnhis&F8e|N*~hn63+{A<Pg3Os+(3ewYGFU9;*^4Ej?RYm+KkzoC;
zKK?`S2jKkk-2YS17KsN}hm5Q9!w;z+j{b`OS0|01#D!efKhGWil=~I<{yxAmhQEJT
z{E+<N=T8d&z}1%fN!yJ7xqE+w$-htUPq}~UVP*PHJ-@={@AX^(=T9>G7d<~=^ncY7
z_b*z0!s`F3rT1U7{Dj%>T7Jyw)d+rI<rUEWBvj`AjLdIf_rFcs70mu5gMU%_AE5jD
zasGV`#Qlr1UqSfydahvjC-wh}p1*_g-(&VGkX*s?Pr_vR4M=OKTtg;K0005<4~h-|
JK)=QM{{U-@<CFjZ

literal 0
HcmV?d00001

diff --git a/image_tiff_tfs_to_nexus.ods b/examples/em/image_tiff_tfs_to_nexus.ods
similarity index 100%
rename from image_tiff_tfs_to_nexus.ods
rename to examples/em/image_tiff_tfs_to_nexus.ods
diff --git a/pynxtools/dataconverter/readers/em/subparsers/image_png_protochips.py b/pynxtools/dataconverter/readers/em/subparsers/image_png_protochips.py
new file mode 100644
index 000000000..f774d6213
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/subparsers/image_png_protochips.py
@@ -0,0 +1,201 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Subparser for exemplar reading of raw PNG files collected on a TEM with Protochip heating_chip."""
+
+import mmap
+import numpy as np
+from typing import Dict
+from PIL import Image
+from zipfile import ZipFile
+
+from pynxtools.dataconverter.readers.em.subparsers.image_png_protochips_concepts import \
+    get_protochips_variadic_concept
+from pynxtools.dataconverter.readers.em.subparsers.image_png_protochips_cfg import \
+    PNG_PROTOCHIPS_TO_NEXUS_CFG
+from pynxtools.dataconverter.readers.shared.map_concepts.mapping_functors \
+    import variadic_path_to_specific_path
+from pynxtools.dataconverter.readers.em.subparsers.image_png_protochips_modifier import \
+    get_nexus_value
+from pynxtools.dataconverter.readers.em.subparsers.image_base import \
+    ImgsBaseParser
+
+
+class ProtochipsPngSetSubParser(ImgsBaseParser):
+    def __init__(self, file_path: str = "", entry_id: int = 1):
+        super().__init__(file_path)
+        self.entry_id = entry_id
+        self.event_id = 1
+        self.prfx = None
+        self.tmp: Dict = {"data": None,
+                          "meta": {}}
+        self.supported_version: Dict = {}
+        self.version: Dict = {}
+        self.supported = False
+        self.check_if_zipped_png_protochips()
+
+    def check_if_zipped_png_protochips(self):
+        """Check if resource behind self.file_path is a TaggedImageFormat file."""
+        # all tests have to be passed before the input self.file_path
+        # can at all be processed with this parser
+        # test 1: check if file is a zipfile
+        with open(self.file_path, 'rb', 0) as file:
+            s = mmap.mmap(file.fileno(), 0, access=mmap.ACCESS_READ)
+            magic = s.read(8)
+            if magic != b'PK\x03\x04\x14\x00\x08\x00':  # https://en.wikipedia.org/wiki/List_of_file_signatures
+                print(f"Test 1 failed, {self.file_path} is not a ZIP archive !")
+                return
+        # test 2: check if there are at all PNG files with iTXt metadata from Protochips in this zip file
+        png_info = {}  # collect all those PNGs to work with and write a tuple of their image dimensions
+        with ZipFile(self.file_path) as zip_file_hdl:
+            for file in zip_file_hdl.namelist():
+                if file.lower().endswith(".png") is True:
+                    with zip_file_hdl.open(file) as fp:
+                        magic = fp.read(8)
+                        if magic == b'\x89PNG\r\n\x1a\n':
+                            method = "smart"  # "lazy"
+                            # get image dimensions
+                            if method == "lazy":  # lazy but paid with the price of reading the image content
+                                fp.seek(0)  # seek back to beginning of file required because fp.read advanced fp implicitly!
+                                with Image.open(fp) as png:
+                                    try:
+                                        nparr = np.array(png)
+                                        png_info[file] = np.shape(nparr)
+                                    except:
+                                        raise ValueError(f"Loading image data in-place from {self.file_path}:{file} failed !")
+                            if method == "smart":  # knowing where to hunt width and height in PNG metadata
+                                # https://dev.exiv2.org/projects/exiv2/wiki/The_Metadata_in_PNG_files
+                                magic = fp.read(8)
+                                png_info[file] = (np.frombuffer(fp.read(4), dtype=">i4"),
+                                                  np.frombuffer(fp.read(4), dtype=">i4"))
+
+        # test 3: check there are some PNGs
+        if len(png_info.keys()) == 0:
+            print("Test 3 failed, there are no PNGs !")
+            return
+        # test 4: check that all PNGs have the same dimensions, TODO::could check for other things here
+        target_dims = None
+        for file_name, tpl in png_info.items():
+            if target_dims is not None:
+                if tpl == target_dims:
+                    continue
+                else:
+                    print("Test 4 failed, not all PNGs have the same dimensions")
+                    return
+            else:
+                target_dims = tpl
+        print("All tests passed successfully")
+        self.supported = True
+
+    def parse_and_normalize(self):
+        """Perform actual parsing filling cache self.tmp."""
+        if self.supported is True:
+            print(f"Parsing via Protochips-specific metadata...")
+            # may need to set self.supported = False on error
+        else:
+            print(f"{self.file_path} is not a Protochips-specific "
+                  f"PNG file that this parser can process !")
+
+    def process_into_template(self, template: dict) -> dict:
+        if self.supported is True:
+            self.process_event_data_em_metadata(template)
+            self.process_event_data_em_data(template)
+        return template
+
+    def process_event_data_em_metadata(self, template: dict) -> dict:
+        """Add respective metadata."""
+        # contextualization to understand how the image relates to the EM session
+        print(f"Mapping some of the Protochips-specific metadata on respective NeXus concept instance")
+        identifier = [self.entry_id, self.event_id, 1]
+        for tpl in PNG_PROTOCHIPS_TO_NEXUS_CFG:
+            if isinstance(tpl, tuple):
+                trg = variadic_path_to_specific_path(tpl[0], identifier)
+                if len(tpl) == 2:
+                    template[trg] = tpl[1]
+                if len(tpl) == 3:
+                    # nxpath, modifier, value to load from and eventually to be modified
+                    retval = get_nexus_value(tpl[1], tpl[2], self.tmp["meta"])
+                    if retval is not None:
+                        template[trg] = retval
+        return template
+
+    def process_event_data_em_data(self, template: dict) -> dict:
+        """Add respective heavy data."""
+        # default display of the image(s) representing the data collected in this event
+        print(f"Writing Protochips PNG image into a respective NeXus concept instance")
+        # read image in-place
+        with Image.open(self.file_path, mode="r") as fp:
+            nparr = np.array(fp)
+            # print(f"type: {type(nparr)}, dtype: {nparr.dtype}, shape: {np.shape(nparr)}")
+            # TODO::discussion points
+            # - how do you know we have an image of real space vs. imaginary space (from the metadata?)
+            # - how do deal with the (ugly) scale bar that is typically stamped into the TIFF image content?
+            # with H5Web and NeXus most of this is obsolete unless there are metadata stamped which are not
+            # available in NeXus or in the respective metadata in the metadata section of the TIFF image
+            # remember H5Web images can be scaled based on the metadata allowing basically the same
+            # explorative viewing using H5Web than what traditionally typical image viewers are meant for
+            image_identifier = 1
+            trg = f"/ENTRY[entry{self.entry_id}]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/" \
+                  f"EVENT_DATA_EM[event_data_em{self.event_id}]/" \
+                  f"IMAGE_R_SET[image_r_set{image_identifier}]/DATA[image]"
+            # TODO::writer should decorate automatically!
+            template[f"{trg}/title"] = f"Image"
+            template[f"{trg}/@NX_class"] = f"NXdata"  # TODO::writer should decorate automatically!
+            template[f"{trg}/@signal"] = "intensity"
+            dims = ["x", "y"]
+            idx = 0
+            for dim in dims:
+                template[f"{trg}/@AXISNAME_indices[axis_{dim}_indices]"] = np.uint32(idx)
+                idx += 1
+            template[f"{trg}/@axes"] = []
+            for dim in dims[::-1]:
+                template[f"{trg}/@axes"].append(f"axis_{dim}")
+            template[f"{trg}/intensity"] = {"compress": np.array(fp), "strength": 1}
+            #  0 is y while 1 is x for 2d, 0 is z, 1 is y, while 2 is x for 3d
+            template[f"{trg}/intensity/@long_name"] = f"Signal"
+
+            sxy = {"x": 1., "y": 1.}
+            scan_unit = {"x": "m", "y": "m"}  # assuming FEI reports SI units
+            # we may face the CCD overview camera for the chamber for which there might not be a calibration!
+            if ("EScan/PixelWidth" in self.tmp["meta"].keys()) and ("EScan/PixelHeight" in self.tmp["meta"].keys()):
+                sxy = {"x": self.tmp["meta"]["EScan/PixelWidth"],
+                       "y": self.tmp["meta"]["EScan/PixelHeight"]}
+                scan_unit = {"x": "px", "y": "px"}
+            nxy = {"x": np.shape(np.array(fp))[1], "y": np.shape(np.array(fp))[0]}
+            # TODO::be careful we assume here a very specific coordinate system
+            # however the TIFF file gives no clue, TIFF just documents in which order
+            # it arranges a bunch of pixels that have stream in into a n-d tiling
+            # e.g. a 2D image
+            # also we have to be careful because TFS just gives us here
+            # typical case of an image without an information without its location
+            # on the physical sample surface, therefore we can only scale
+            # pixel_identifier by physical scaling quantities s_x, s_y
+            # also the dimensions of the image are on us to fish with the image
+            # reading library instead of TFS for consistency checks adding these
+            # to the metadata the reason is that TFS TIFF use the TIFF tagging mechanism
+            # and there is already a proper TIFF tag for the width and height of an
+            # image in number of pixel
+            for dim in dims:
+                template[f"{trg}/AXISNAME[axis_{dim}]"] \
+                    = {"compress": np.asarray(np.linspace(0,
+                                                          nxy[dim] - 1,
+                                                          num=nxy[dim],
+                                                          endpoint=True) * sxy[dim], np.float64), "strength": 1}
+                template[f"{trg}/AXISNAME[axis_{dim}]/@long_name"] \
+                    = f"Coordinate along {dim}-axis ({scan_unit[dim]})"
+                template[f"{trg}/AXISNAME[axis_{dim}]/@units"] = f"{scan_unit[dim]}"
+        return template
diff --git a/pynxtools/dataconverter/readers/em/subparsers/image_png_protochips_cfg.py b/pynxtools/dataconverter/readers/em/subparsers/image_png_protochips_cfg.py
new file mode 100644
index 000000000..792c1e8c8
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/subparsers/image_png_protochips_cfg.py
@@ -0,0 +1,44 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Configuration of the image_png_protochips subparser."""
+
+
+PNG_PROTOCHIPS_TO_NEXUS_CFG = [('/ENTRY[entry*]/measurement/em_lab/STAGE_LAB[stage_lab]/alias', 'load_from', 'MicroscopeControlImageMetadata.ActivePositionerSettings.PositionerSettings.[*].Stage.Name'),
+                               ('/ENTRY[entry*]/measurement/em_lab/STAGE_LAB[stage_lab]/design', 'heating_chip'),
+                               ('/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]/em_lab/STAGE_LAB[stage_lab]/tilt_1', 'load_from', 'MicroscopeControlImageMetadata.ActivePositionerSettings.PositionerSettings.[*].Stage.A'),
+                               ('/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]/em_lab/STAGE_LAB[stage_lab]/tilt_2', 'load_from', 'MicroscopeControlImageMetadata.ActivePositionerSettings.PositionerSettings.[*].Stage.B'),
+                               ('/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]/em_lab/STAGE_LAB[stage_lab]/position', 'load_from_concatenate', 'MicroscopeControlImageMetadata.ActivePositionerSettings.PositionerSettings.[*].Stage.X, MicroscopeControlImageMetadata.ActivePositionerSettings.PositionerSettings.[*].Stage.Y, MicroscopeControlImageMetadata.ActivePositionerSettings.PositionerSettings.[*].Stage.Z'),
+                               ('/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]/em_lab/HEATER[heater]/current', 'load_from', 'MicroscopeControlImageMetadata.AuxiliaryData.AuxiliaryDataCategory.[*].DataValues.AuxiliaryDataValue.[*].HeatingCurrent'),
+                               ('/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]/em_lab/HEATER[heater]/current/@units', 'A'),
+                               ('/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]/em_lab/HEATER[heater]/power', 'load_from', 'MicroscopeControlImageMetadata.AuxiliaryData.AuxiliaryDataCategory.[*].DataValues.AuxiliaryDataValue.[*].HeatingPower'),
+                               ('/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]/em_lab/HEATER[heater]/power/@units', 'W'),
+                               ('/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]/em_lab/HEATER[heater]/voltage', 'load_from', 'MicroscopeControlImageMetadata.AuxiliaryData.AuxiliaryDataCategory.[*].DataValues.AuxiliaryDataValue.[*].HeatingVoltage'),
+                               ('/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]/em_lab/HEATER[heater]/voltage/@units', 'V'),
+                               ('/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]/em_lab/STAGE_LAB[stage_lab]/SENSOR[sensor2]/value', 'load_from', 'MicroscopeControlImageMetadata.AuxiliaryData.AuxiliaryDataCategory.[*].DataValues.AuxiliaryDataValue.[*].HolderPressure'),
+                               ('/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]/em_lab/STAGE_LAB[stage_lab]/SENSOR[sensor2]/value/@units', 'torr'),
+                               ('/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]/em_lab/STAGE_LAB[stage_lab]/SENSOR[sensor2]/measurement', 'pressure'),
+                               ('/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]/em_lab/STAGE_LAB[stage_lab]/SENSOR[sensor1]/value', 'load_from', 'MicroscopeControlImageMetadata.AuxiliaryData.AuxiliaryDataCategory.[*].DataValues.AuxiliaryDataValue.[*].HolderTemperature'),
+                               ('/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]/em_lab/STAGE_LAB[stage_lab]/SENSOR[sensor1]/value/@units', '°C'),
+                               ('/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]/em_lab/STAGE_LAB[stage_lab]/SENSOR[sensor1]/measurement', 'temperature'),
+                               ('/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]/em_lab/EBEAM_COLUMN[ebeam_column]/electron_source/voltage', 'load_from', 'MicroscopeControlImageMetadata.MicroscopeSettings.AcceleratingVoltage'),
+                               ('/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]/em_lab/EBEAM_COLUMN[ebeam_column]/DEFLECTOR[beam_blanker1]/state', 'load_from', 'MicroscopeControlImageMetadata.MicroscopeSettings.BeamBlankerState'),
+                               ('/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]/em_lab/OPTICAL_SYSTEM_EM[optical_system_em]/camera_length', 'load_from', 'MicroscopeControlImageMetadata.MicroscopeSettings.CameraLengthValue'),
+                               ('/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]/em_lab/OPTICAL_SYSTEM_EM[optical_system_em]/magnification', 'load_from', 'MicroscopeControlImageMetadata.MicroscopeSettings.MagnificationValue'),
+                               ('/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]/event_type', 'As tested with AXON 10.4.4.21, 2021-04-26T22:51:28.4539893-05:00 not included in Protochips PNG metadata'),
+                               ('/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]/em_lab/DETECTOR[detector*]/mode', 'As tested with AXON 10.4.4.21, 2021-04-26T22:51:28.4539893-05:00 not included in Protochips PNG metadata'),
+                               ('/ENTRY[entry*]/measurement/em_lab/DETECTOR[detector*]/local_name', 'As tested with AXON 10.4.4.21, 2021-04-26T22:51:28.4539893-05:00 not included in Protochips PNG metadata')]
diff --git a/pynxtools/dataconverter/readers/em/subparsers/image_png_protochips_concepts.py b/pynxtools/dataconverter/readers/em/subparsers/image_png_protochips_concepts.py
new file mode 100644
index 000000000..7602da9e3
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/subparsers/image_png_protochips_concepts.py
@@ -0,0 +1,27 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Configuration of the image_png_protochips subparser."""
+
+from typing import List
+from pynxtools.dataconverter.readers.em.subparsers.image_png_protochips_cfg import \
+    PNG_PROTOCHIPS_TO_NEXUS_CFG
+
+
+def get_protochips_variadic_concept(png_metadata_tag) -> str:
+    """Get variadic protochips concept name to identify target NeXus concept name target."""
+    return None
diff --git a/pynxtools/dataconverter/readers/em/subparsers/image_png_protochips_modifier.py b/pynxtools/dataconverter/readers/em/subparsers/image_png_protochips_modifier.py
new file mode 100644
index 000000000..54872857f
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/subparsers/image_png_protochips_modifier.py
@@ -0,0 +1,38 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Utilities for working with Protochips-specific concepts."""
+
+# pylint: disable=no-member
+
+from numpy import pi
+
+
+def get_nexus_value(modifier, qnt_name, metadata: dict):
+    """Interpret a functional mapping and modifier on qnt_name loaded from metadata."""
+    if qnt_name in metadata.keys():
+        if modifier == "load_from":
+            return metadata[qnt_name]
+        elif modifier == "load_from_concatenate":
+            if qnt_name in metadata.keys():
+                return metadata[qnt_name] / pi * 180.
+    else:
+        # print(f"WARNING modifier {modifier}, qnt_name {qnt_name} not found !")
+        return None
+    # if f"{modifier['terms']}/{metadata[modifier['terms']]}" in TfsToNexusConceptMapping.keys():
+    # return TfsToNexusConceptMapping[f"{modifier['terms']}/{metadata[modifier['terms']]}"]
+    # elif set(["link"]) == set(modifier.keys()), with the jsonmap reader Sherjeel conceptualized "link"
diff --git a/pynxtools/dataconverter/readers/em/subparsers/nxs_imgs.py b/pynxtools/dataconverter/readers/em/subparsers/nxs_imgs.py
index de4d3ecf9..436500678 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/nxs_imgs.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/nxs_imgs.py
@@ -22,6 +22,7 @@
 from PIL import Image
 
 from pynxtools.dataconverter.readers.em.subparsers.image_tiff_tfs import TfsTiffSubParser
+from pynxtools.dataconverter.readers.em.subparsers.image_png_protochips import ProtochipsPngSetSubParser
 from pynxtools.dataconverter.readers.em.utils.hfive_web_utils import hfive_web_decorate_nxdata
 
 
@@ -40,9 +41,12 @@ def __init__(self, entry_id: int = 1, input_file_name: str = ""):
     def identify_image_type(self):
         """Identify if image matches known mime type and has content for which subparser exists."""
         # tech partner formats used for measurement
-        img = TfsTiffSubParser(f"{self.file_path}")
+        # img = TfsTiffSubParser(f"{self.file_path}")
+        # if img.supported is True:
+        #     return "single_tiff_tfs"
+        img = ProtochipsPngSetSubParser(f"{self.file_path}")
         if img.supported is True:
-            return "tiff_tfs"
+            return "set_of_zipped_png_protochips"
         return None
 
     def parse(self, template: dict) -> dict:
@@ -54,10 +58,14 @@ def parse(self, template: dict) -> dict:
         # see also comments for respective nxs_pyxem parser
         # and its interaction with tech-partner-specific hfive_* subparsers
 
-        if image_parser_type == "tiff_tfs":
+        if image_parser_type == "single_tiff_tfs":
             tiff = TfsTiffSubParser(self.file_path, self.entry_id)
             tiff.parse_and_normalize()
             tiff.process_into_template(template)
+        elif image_parser_type == "set_of_zipped_png_protochips":
+            pngs = ProtochipsPngSetSubParser(self.file_path, self.entry_id)
+            pngs.parse_and_normalize()
+            pngs.process_into_template(template)
         # else:
             # TODO::add here specific content parsers for other tech partner
             # or other custom parsing of images

From eccccb5fa0a521f4aae9485418060572508d7c21 Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@physik.hu-berlin.de>
Date: Mon, 18 Dec 2023 22:14:36 +0100
Subject: [PATCH 55/84] Add xmltodict to flatten xml to Python dictionaries

---
 dev-requirements.txt | 4 ++--
 pyproject.toml       | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/dev-requirements.txt b/dev-requirements.txt
index 9941ac2a7..dd12c799b 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -580,10 +580,10 @@ wheel==0.42.0
     # via pip-tools
 xarray==2023.1.0
     # via pynxtools (pyproject.toml)
+xmltodict==0.13.0
+    # via pynxtools (pyproject.toml)
 zarr==2.16.1
     # via hyperspy
-zipfile37==0.1.3
-    # via pynxtools (pyproject.toml)
 zipp==3.17.0
     # via importlib-metadata
 
diff --git a/pyproject.toml b/pyproject.toml
index 21ebf7b89..828d199ee 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -38,7 +38,7 @@ dependencies = [
     "pytz>=2021.1",
     "kikuchipy>=0.9.0",
     "pyxem>=0.15.1",
-    "zipfile37==0.1.3",
+    "xmltodict",
     "nionswift>=0.16.8",
     "tzlocal<=4.3",
     "scipy>=1.7.1",

From a065978a4a8f4be39ba8d11e953b082732798fbc Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@physik.hu-berlin.de>
Date: Mon, 18 Dec 2023 22:40:27 +0100
Subject: [PATCH 56/84] Not yet finished code to read metadata but includes
 already working name mangling for AXON jargon, next steps i) clean
 parse_and_normalize, ii) populate template with metadata, iii) populate
 template with image data

---
 .../em/subparsers/image_png_protochips.py     | 80 +++++++++++++++++--
 1 file changed, 74 insertions(+), 6 deletions(-)

diff --git a/pynxtools/dataconverter/readers/em/subparsers/image_png_protochips.py b/pynxtools/dataconverter/readers/em/subparsers/image_png_protochips.py
index f774d6213..994702814 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/image_png_protochips.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/image_png_protochips.py
@@ -18,10 +18,13 @@
 """Subparser for exemplar reading of raw PNG files collected on a TEM with Protochip heating_chip."""
 
 import mmap
+import re
 import numpy as np
+import xmltodict
 from typing import Dict
 from PIL import Image
 from zipfile import ZipFile
+from collections import OrderedDict
 
 from pynxtools.dataconverter.readers.em.subparsers.image_png_protochips_concepts import \
     get_protochips_variadic_concept
@@ -35,6 +38,26 @@
     ImgsBaseParser
 
 
+def flatten_xml_to_dict(xml_content) -> dict:
+    # https://codereview.stackexchange.com/a/21035
+    # https://stackoverflow.com/questions/38852822/how-to-flatten-xml-file-in-python
+    def items():
+        for key, value in xml_content.items():
+            # nested subtree
+            if isinstance(value, dict):
+                for subkey, subvalue in flatten_xml_to_dict(value).items():
+                    yield '{}.{}'.format(key, subkey), subvalue
+            # nested list
+            elif isinstance(value, list):
+                for num, elem in enumerate(value):
+                    for subkey, subvalue in flatten_xml_to_dict(elem).items():
+                        yield '{}.[{}].{}'.format(key, num, subkey), subvalue
+            # everything else (only leafs should remain)
+            else:
+                yield key, value
+    return OrderedDict(items())
+
+
 class ProtochipsPngSetSubParser(ImgsBaseParser):
     def __init__(self, file_path: str = "", entry_id: int = 1):
         super().__init__(file_path)
@@ -45,6 +68,7 @@ def __init__(self, file_path: str = "", entry_id: int = 1):
                           "meta": {}}
         self.supported_version: Dict = {}
         self.version: Dict = {}
+        self.png_info: Dict = {}
         self.supported = False
         self.check_if_zipped_png_protochips()
 
@@ -60,7 +84,7 @@ def check_if_zipped_png_protochips(self):
                 print(f"Test 1 failed, {self.file_path} is not a ZIP archive !")
                 return
         # test 2: check if there are at all PNG files with iTXt metadata from Protochips in this zip file
-        png_info = {}  # collect all those PNGs to work with and write a tuple of their image dimensions
+        # collect all those PNGs to work with and write a tuple of their image dimensions
         with ZipFile(self.file_path) as zip_file_hdl:
             for file in zip_file_hdl.namelist():
                 if file.lower().endswith(".png") is True:
@@ -74,22 +98,22 @@ def check_if_zipped_png_protochips(self):
                                 with Image.open(fp) as png:
                                     try:
                                         nparr = np.array(png)
-                                        png_info[file] = np.shape(nparr)
+                                        self.png_info[file] = np.shape(nparr)
                                     except:
                                         raise ValueError(f"Loading image data in-place from {self.file_path}:{file} failed !")
                             if method == "smart":  # knowing where to hunt width and height in PNG metadata
                                 # https://dev.exiv2.org/projects/exiv2/wiki/The_Metadata_in_PNG_files
                                 magic = fp.read(8)
-                                png_info[file] = (np.frombuffer(fp.read(4), dtype=">i4"),
-                                                  np.frombuffer(fp.read(4), dtype=">i4"))
+                                self.png_info[file] = (np.frombuffer(fp.read(4), dtype=">i4"),
+                                                       np.frombuffer(fp.read(4), dtype=">i4"))
 
         # test 3: check there are some PNGs
-        if len(png_info.keys()) == 0:
+        if len(self.png_info.keys()) == 0:
             print("Test 3 failed, there are no PNGs !")
             return
         # test 4: check that all PNGs have the same dimensions, TODO::could check for other things here
         target_dims = None
-        for file_name, tpl in png_info.items():
+        for file_name, tpl in self.png_info.items():
             if target_dims is not None:
                 if tpl == target_dims:
                     continue
@@ -106,6 +130,50 @@ def parse_and_normalize(self):
         if self.supported is True:
             print(f"Parsing via Protochips-specific metadata...")
             # may need to set self.supported = False on error
+            with ZipFile(self.file_path) as zip_file_hdl:
+                for file in self.png_info.keys():
+                    with zip_file_hdl.open(file) as fp:
+                        try:
+                            with Image.open(fp) as png:
+                                png.load()
+                                if "MicroscopeControlImage" in png.info.keys():
+                                    meta = flatten_xml_to_dict(
+                                        xmltodict.parse(png.info["MicroscopeControlImage"]))
+                                    # first phase analyse the collection of Protochips metadata concept instance symbols and reduce to unique concepts
+                                    self.tmp["meta"][file] = {}
+                                for concept, value in meta.items():
+                                    # not every key is allowed to define a concept
+                                    # print(f"{concept}: {value}")
+                                    idxs = re.finditer(".\[[0-9]+\].", concept)
+                                    if (sum(1 for _ in idxs) > 0):  # is_variadic
+                                        markers = [".Name", ".PositionerName"]
+                                        for marker in markers:
+                                            if concept.endswith(marker):
+                                                self.tmp["meta"][file][f"{concept[0:len(concept)-len(marker)]}"] = value
+                                    else:
+                                        self.tmp["meta"][file][concept] = value
+                                # print(f"First phase of metadata parsing {self.file_path}:{file} successful")
+                                # second phase, evaluate each concept instance symbol wrt to its prefix coming from the unique concept
+                                for k, v in meta.items():
+                                    grpnms = None
+                                    idxs = re.finditer(".\[[0-9]+\].", k)
+                                    if (sum(1 for _ in idxs) > 0):  # is variadic
+                                        search_argument = k[0:k.rfind("].")+1]
+                                        for parent_grpnm, child_grpnm in self.tmp["meta"][file].items():
+                                            if parent_grpnm.startswith(search_argument):
+                                                grpnms = (parent_grpnm, child_grpnm)
+                                                break
+                                        if grpnms is not None:
+                                            if len(grpnms) == 2:
+                                                if "PositionerSettings" in k and k.endswith(".PositionerName") is False:
+                                                    print(f"vv: {grpnms[0]}.{grpnms[1]}{k[k.rfind('.') + 1:]}: {v}")
+                                                if k.endswith(".Value"):
+                                                    print(f"vv: {grpnms[0]}.{grpnms[1]}: {v}")
+                                    else:
+                                        print(f"nv: {k}: {v}")
+                                    # TODO::simplify and check that metadata end up correctly in self.tmp["meta"][file]
+                        except:
+                            raise ValueError(f"Flattening XML metadata content {self.file_path}:{file} failed !")
         else:
             print(f"{self.file_path} is not a Protochips-specific "
                   f"PNG file that this parser can process !")

From 97c7f8a85ec1ee20dead1a9e23ab0fff3a248dcb Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@physik.hu-berlin.de>
Date: Tue, 19 Dec 2023 09:58:36 +0100
Subject: [PATCH 57/84] Fixed bug, refactored get_metadata, added and tested
 successfully hashing of files inside zip, linting, styling, myping

---
 imgs.batch.sh                                 |   6 +-
 .../readers/em/concepts/nxs_image_r_set.py    |   6 +-
 .../readers/em/concepts/nxs_object.py         |   2 +-
 .../readers/em/concepts/nxs_spectrum_set.py   |  10 +-
 pynxtools/dataconverter/readers/em/reader.py  |   3 +-
 .../readers/em/subparsers/hfive_apex.py       |   4 +-
 .../em/subparsers/image_png_protochips.py     | 127 +++--
 .../em/subparsers/image_tiff_tfs_concepts.py  | 470 +++++++++---------
 .../readers/em/subparsers/rsciio_bruker.py    |   6 +-
 .../readers/em/utils/xml_utils.py             |  41 ++
 .../readers/shared/shared_utils.py            |  11 +
 11 files changed, 366 insertions(+), 320 deletions(-)
 create mode 100644 pynxtools/dataconverter/readers/em/utils/xml_utils.py

diff --git a/imgs.batch.sh b/imgs.batch.sh
index ae80412cf..8b6498b58 100755
--- a/imgs.batch.sh
+++ b/imgs.batch.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-datasource="../../../../paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_imgs/ikz_robert/"
+datasource="../../../../paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_imgs/axon/"
 
 # comments is detector mode
 examples="kit/FeMoOx_AntiA_04_1k5x_CN.tif"
@@ -9,9 +9,9 @@ examples="ikz_martin/ALN_baoh_021.tif"  # T2
 examples="ikz_robert/T3_image.tif"
 examples="ikz_robert/ETD_image.tif"  # ETD
 examples="ikz_martin/NavCam_normal_vis_light_ccd.tif"  # NavCam
-
 examples="0c8nA_3deg_003_AplusB_test.tif ALN_baoh_021.tif T3_image.tif ETD_image.tif NavCam_normal_vis_light_ccd.tif"
-
+examples="axon/20210426T224437.049Raw0.png"  #axon
+examples="ReductionOfFeOx.zip"
 
 for example in $examples; do
 	echo $example
diff --git a/pynxtools/dataconverter/readers/em/concepts/nxs_image_r_set.py b/pynxtools/dataconverter/readers/em/concepts/nxs_image_r_set.py
index ba8b8a40b..fbdc311e7 100644
--- a/pynxtools/dataconverter/readers/em/concepts/nxs_image_r_set.py
+++ b/pynxtools/dataconverter/readers/em/concepts/nxs_image_r_set.py
@@ -68,8 +68,8 @@ def __init__(self):
         self.tmp: Dict = {}
         for entry in NX_IMAGE_REAL_SPACE_SET_HDF_PATH:
             if entry.endswith("-field") is True:
-                self.tmp[entry[0:len(entry)-len("-field")]] = NxObject(eqv_hdf="dataset")
+                self.tmp[entry[0:len(entry) - len("-field")]] = NxObject(eqv_hdf="dataset")
             elif entry.endswith("-attribute") is True:
-                self.tmp[entry[0:len(entry)-len("-attribute")]] = NxObject(eqv_hdf="attribute")
+                self.tmp[entry[0:len(entry) - len("-attribute")]] = NxObject(eqv_hdf="attribute")
             else:
-                self.tmp[entry[0:len(entry)-len("-group")]] = NxObject(eqv_hdf="group")
+                self.tmp[entry[0:len(entry) - len("-group")]] = NxObject(eqv_hdf="group")
diff --git a/pynxtools/dataconverter/readers/em/concepts/nxs_object.py b/pynxtools/dataconverter/readers/em/concepts/nxs_object.py
index c5d3c9f77..696be9a86 100644
--- a/pynxtools/dataconverter/readers/em/concepts/nxs_object.py
+++ b/pynxtools/dataconverter/readers/em/concepts/nxs_object.py
@@ -36,7 +36,7 @@ def __init__(self,
         if (unit is not None) and (unit == ""):
             raise ValueError(f"Value for argument unit needs to be a non-empty string !")
         if (dtype is not None) and isinstance(dtype, type) is False:
-            raise ValueError(f"Value of argument dtype must not be None " \
+            raise ValueError(f"Value of argument dtype must not be None "
                              f" and a valid, ideally a numpy datatype !")
         # self.doc = None  # docstring
         self.name = name  # name of the field
diff --git a/pynxtools/dataconverter/readers/em/concepts/nxs_spectrum_set.py b/pynxtools/dataconverter/readers/em/concepts/nxs_spectrum_set.py
index a16851ce1..e1bed0f39 100644
--- a/pynxtools/dataconverter/readers/em/concepts/nxs_spectrum_set.py
+++ b/pynxtools/dataconverter/readers/em/concepts/nxs_spectrum_set.py
@@ -20,12 +20,12 @@
 # pylint: disable=no-member,too-few-public-methods
 
 
-from typing import Dict
+from typing import Dict, List
 
 from pynxtools.dataconverter.readers.em.concepts.nxs_object import NxObject
 
 
-NX_SPECTRUM_SET_HDF_PATH = []
+NX_SPECTRUM_SET_HDF_PATH: List = []
 # this one needs an update !
 
 
@@ -34,8 +34,8 @@ def __init__(self):
         self.tmp: Dict = {}
         for entry in NX_SPECTRUM_SET_HDF_PATH:
             if entry.endswith("-field") is True:
-                self.tmp[entry[0:len(entry)-len("-field")]] = NxObject(eqv_hdf="dataset")
+                self.tmp[entry[0:len(entry) - len("-field")]] = NxObject(eqv_hdf="dataset")
             elif entry.endswith("-attribute") is True:
-                self.tmp[entry[0:len(entry)-len("-attribute")]] = NxObject(eqv_hdf="attribute")
+                self.tmp[entry[0:len(entry) - len("-attribute")]] = NxObject(eqv_hdf="attribute")
             else:
-                self.tmp[entry[0:len(entry)-len("-group")]] = NxObject(eqv_hdf="group")
+                self.tmp[entry[0:len(entry) - len("-group")]] = NxObject(eqv_hdf="group")
diff --git a/pynxtools/dataconverter/readers/em/reader.py b/pynxtools/dataconverter/readers/em/reader.py
index 971579de4..76dd5fdee 100644
--- a/pynxtools/dataconverter/readers/em/reader.py
+++ b/pynxtools/dataconverter/readers/em/reader.py
@@ -147,7 +147,8 @@ def read(self,
         # print("Create NeXus default plottable data...")
         # em_default_plot_generator(template, 1)
 
-        if True is False:
+        run_block = False
+        if run_block is True:
             nxs_plt = NxEmDefaultPlotResolver()
             # if nxs_mtex is the sub-parser
             resolved_path = nxs_plt.nxs_mtex_get_nxpath_to_default_plot(
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
index 2275ee159..67a3868df 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
@@ -322,7 +322,7 @@ def parse_and_normalize_eds_spd(self, fp, src: str, ckey: str):
         for req in reqs:
             if req not in fp[f"{src}/SPD"].attrs.keys():  # also check for shape
                 raise ValueError(f"Required attribute named {req} not found in {src}/SPD !")
-        
+
         nyxe = {"y": fp[f"{src}/SPD"].attrs["NumberOfLines"][0],
                 "x": fp[f"{src}/SPD"].attrs["NumberOfPoints"][0],
                 "e": fp[f"{src}/SPD"].attrs["NumberofChannels"][0]}
@@ -334,7 +334,7 @@ def parse_and_normalize_eds_spd(self, fp, src: str, ckey: str):
         # thereby these EDAX energy count arrays are just some payload inside a set of compressed chunks
         # without some extra logic to resolve the third (energy) dimension reading them can be super inefficient
         # so let's read chunk-by-chunk to reuse chunk cache, hopefully...
-        chk_bnds = {"x": [], "y": []}
+        chk_bnds: Dict = {"x": [], "y": []}
         chk_info = {"ny": nyxe["y"], "cy": fp[f"{src}/SPD"].chunks[0],
                     "nx": nyxe["x"], "cx": fp[f"{src}/SPD"].chunks[1]}
         for dim in ["y", "x"]:
diff --git a/pynxtools/dataconverter/readers/em/subparsers/image_png_protochips.py b/pynxtools/dataconverter/readers/em/subparsers/image_png_protochips.py
index 994702814..49adfd67e 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/image_png_protochips.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/image_png_protochips.py
@@ -24,7 +24,6 @@
 from typing import Dict
 from PIL import Image
 from zipfile import ZipFile
-from collections import OrderedDict
 
 from pynxtools.dataconverter.readers.em.subparsers.image_png_protochips_concepts import \
     get_protochips_variadic_concept
@@ -34,28 +33,9 @@
     import variadic_path_to_specific_path
 from pynxtools.dataconverter.readers.em.subparsers.image_png_protochips_modifier import \
     get_nexus_value
-from pynxtools.dataconverter.readers.em.subparsers.image_base import \
-    ImgsBaseParser
-
-
-def flatten_xml_to_dict(xml_content) -> dict:
-    # https://codereview.stackexchange.com/a/21035
-    # https://stackoverflow.com/questions/38852822/how-to-flatten-xml-file-in-python
-    def items():
-        for key, value in xml_content.items():
-            # nested subtree
-            if isinstance(value, dict):
-                for subkey, subvalue in flatten_xml_to_dict(value).items():
-                    yield '{}.{}'.format(key, subkey), subvalue
-            # nested list
-            elif isinstance(value, list):
-                for num, elem in enumerate(value):
-                    for subkey, subvalue in flatten_xml_to_dict(elem).items():
-                        yield '{}.[{}].{}'.format(key, num, subkey), subvalue
-            # everything else (only leafs should remain)
-            else:
-                yield key, value
-    return OrderedDict(items())
+from pynxtools.dataconverter.readers.em.subparsers.image_base import ImgsBaseParser
+from pynxtools.dataconverter.readers.em.utils.xml_utils import flatten_xml_to_dict
+from pynxtools.dataconverter.readers.shared.shared_utils import get_sha256_of_file_content
 
 
 class ProtochipsPngSetSubParser(ImgsBaseParser):
@@ -99,8 +79,8 @@ def check_if_zipped_png_protochips(self):
                                     try:
                                         nparr = np.array(png)
                                         self.png_info[file] = np.shape(nparr)
-                                    except:
-                                        raise ValueError(f"Loading image data in-place from {self.file_path}:{file} failed !")
+                                    except IOError:
+                                        print(f"Loading image data in-place from {self.file_path}:{file} failed !")
                             if method == "smart":  # knowing where to hunt width and height in PNG metadata
                                 # https://dev.exiv2.org/projects/exiv2/wiki/The_Metadata_in_PNG_files
                                 magic = fp.read(8)
@@ -125,6 +105,53 @@ def check_if_zipped_png_protochips(self):
         print("All tests passed successfully")
         self.supported = True
 
+    def get_xml_metadata(self, file, fp):
+        try:
+            fp.seek(0)
+            with Image.open(fp) as png:
+                png.load()
+                if "MicroscopeControlImage" in png.info.keys():
+                    meta = flatten_xml_to_dict(
+                        xmltodict.parse(png.info["MicroscopeControlImage"]))
+                    # first phase analyse the collection of Protochips metadata concept instance symbols and reduce to unique concepts
+                    grpnm_lookup = {}
+                    for concept, value in meta.items():
+                        # not every key is allowed to define a concept
+                        # print(f"{concept}: {value}")
+                        idxs = re.finditer(r".\[[0-9]+\].", concept)
+                        if (sum(1 for _ in idxs) > 0):  # is_variadic
+                            markers = [".Name", ".PositionerName"]
+                            for marker in markers:
+                                if concept.endswith(marker):
+                                    grpnm_lookup[f"{concept[0:len(concept)-len(marker)]}"] = value
+                        else:
+                            grpnm_lookup[concept] = value
+                    # second phase, evaluate each concept instance symbol wrt to its prefix coming from the unique concept
+                    self.tmp["meta"][file] = {}
+                    for k, v in meta.items():
+                        grpnms = None
+                        idxs = re.finditer(r".\[[0-9]+\].", k)
+                        if (sum(1 for _ in idxs) > 0):  # is variadic
+                            search_argument = k[0:k.rfind("].") + 1]
+                            for parent_grpnm, child_grpnm in grpnm_lookup.items():
+                                if parent_grpnm.startswith(search_argument):
+                                    grpnms = (parent_grpnm, child_grpnm)
+                                    break
+                            if grpnms is not None:
+                                if len(grpnms) == 2:
+                                    if "PositionerSettings" in k and k.endswith(".PositionerName") is False:
+                                        self.tmp["meta"][file][f"{grpnms[0]}.{grpnms[1]}{k[k.rfind('.') + 1:]}"] = v
+                                    if k.endswith(".Value"):
+                                        self.tmp["meta"][file][f"{grpnms[0]}.{grpnms[1]}"] = v
+                        else:
+                            self.tmp["meta"][file][f"{k}"] = v
+                        # TODO::simplify and check that metadata end up correctly in self.tmp["meta"][file]
+        except ValueError:
+            print(f"Flattening XML metadata content {self.file_path}:{file} failed !")
+
+    def get_file_hash(self, file, fp):
+        self.tmp["meta"][file]["sha256"] = get_sha256_of_file_content(fp)
+
     def parse_and_normalize(self):
         """Perform actual parsing filling cache self.tmp."""
         if self.supported is True:
@@ -133,47 +160,13 @@ def parse_and_normalize(self):
             with ZipFile(self.file_path) as zip_file_hdl:
                 for file in self.png_info.keys():
                     with zip_file_hdl.open(file) as fp:
-                        try:
-                            with Image.open(fp) as png:
-                                png.load()
-                                if "MicroscopeControlImage" in png.info.keys():
-                                    meta = flatten_xml_to_dict(
-                                        xmltodict.parse(png.info["MicroscopeControlImage"]))
-                                    # first phase analyse the collection of Protochips metadata concept instance symbols and reduce to unique concepts
-                                    self.tmp["meta"][file] = {}
-                                for concept, value in meta.items():
-                                    # not every key is allowed to define a concept
-                                    # print(f"{concept}: {value}")
-                                    idxs = re.finditer(".\[[0-9]+\].", concept)
-                                    if (sum(1 for _ in idxs) > 0):  # is_variadic
-                                        markers = [".Name", ".PositionerName"]
-                                        for marker in markers:
-                                            if concept.endswith(marker):
-                                                self.tmp["meta"][file][f"{concept[0:len(concept)-len(marker)]}"] = value
-                                    else:
-                                        self.tmp["meta"][file][concept] = value
-                                # print(f"First phase of metadata parsing {self.file_path}:{file} successful")
-                                # second phase, evaluate each concept instance symbol wrt to its prefix coming from the unique concept
-                                for k, v in meta.items():
-                                    grpnms = None
-                                    idxs = re.finditer(".\[[0-9]+\].", k)
-                                    if (sum(1 for _ in idxs) > 0):  # is variadic
-                                        search_argument = k[0:k.rfind("].")+1]
-                                        for parent_grpnm, child_grpnm in self.tmp["meta"][file].items():
-                                            if parent_grpnm.startswith(search_argument):
-                                                grpnms = (parent_grpnm, child_grpnm)
-                                                break
-                                        if grpnms is not None:
-                                            if len(grpnms) == 2:
-                                                if "PositionerSettings" in k and k.endswith(".PositionerName") is False:
-                                                    print(f"vv: {grpnms[0]}.{grpnms[1]}{k[k.rfind('.') + 1:]}: {v}")
-                                                if k.endswith(".Value"):
-                                                    print(f"vv: {grpnms[0]}.{grpnms[1]}: {v}")
-                                    else:
-                                        print(f"nv: {k}: {v}")
-                                    # TODO::simplify and check that metadata end up correctly in self.tmp["meta"][file]
-                        except:
-                            raise ValueError(f"Flattening XML metadata content {self.file_path}:{file} failed !")
+                        self.get_xml_metadata(file, fp)
+                        self.get_file_hash(file, fp)
+                        # print(f"Debugging self.tmp.file.items {file}")
+                        # for k, v in self.tmp["meta"][file].items():
+                        #    print(f"{k}: {v}")
+            print(f"{self.file_path} metadata within PNG collection processed "
+                  f"successfully ({len(self.tmp['meta'].keys())} PNGs evaluated).")
         else:
             print(f"{self.file_path} is not a Protochips-specific "
                   f"PNG file that this parser can process !")
@@ -181,7 +174,7 @@ def parse_and_normalize(self):
     def process_into_template(self, template: dict) -> dict:
         if self.supported is True:
             self.process_event_data_em_metadata(template)
-            self.process_event_data_em_data(template)
+            # self.process_event_data_em_data(template)
         return template
 
     def process_event_data_em_metadata(self, template: dict) -> dict:
diff --git a/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs_concepts.py b/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs_concepts.py
index 8e7f2d69d..4ada716d4 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs_concepts.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs_concepts.py
@@ -27,243 +27,243 @@
 # taken for each detector and imaging mode
 # we then assume that one can work with the joint set of these concepts
 
-TIFF_TFS_PARENT_CONCEPTS = ["Accessories", 
-                         "Beam",
-                         "ColdStage",
-                         "CompoundLensFilter",
-                         "Detectors",
-                         "EBeam",
-                         "EBeamDeceleration",
-                         "EScan",
-                         "ETD",
-                         "EasyLift",
-                         "GIS",
-                         "HiResIllumination",
-                         "HotStage",
-                         "HotStageHVHS",
-                         "HotStageMEMS",
-                         "IRBeam",
-                         "Image",
-                         "Nav-Cam",
-                         "PrivateFei",
-                         "Scan",
-                         "Specimen",
-                         "Stage",
-                         "System",
-                         "T1",
-                         "T2",
-                         "T3",
-                         "User",
-                         "Vacuum"]
+TIFF_TFS_PARENT_CONCEPTS = ["Accessories",
+                            "Beam",
+                            "ColdStage",
+                            "CompoundLensFilter",
+                            "Detectors",
+                            "EBeam",
+                            "EBeamDeceleration",
+                            "EScan",
+                            "ETD",
+                            "EasyLift",
+                            "GIS",
+                            "HiResIllumination",
+                            "HotStage",
+                            "HotStageHVHS",
+                            "HotStageMEMS",
+                            "IRBeam",
+                            "Image",
+                            "Nav-Cam",
+                            "PrivateFei",
+                            "Scan",
+                            "Specimen",
+                            "Stage",
+                            "System",
+                            "T1",
+                            "T2",
+                            "T3",
+                            "User",
+                            "Vacuum"]
 
 TIFF_TFS_ALL_CONCEPTS = ["Accessories/Number",
-                      "Beam/Beam",
-                      "Beam/BeamShiftX",
-                      "Beam/BeamShiftY",
-                      "Beam/FineStageBias",
-                      "Beam/HV",
-                      "Beam/ImageMode",
-                      "Beam/Scan",
-                      "Beam/ScanRotation",
-                      "Beam/Spot",
-                      "Beam/StigmatorX",
-                      "Beam/StigmatorY",
-                      "ColdStage/ActualTemperature",
-                      "ColdStage/Humidity",
-                      "ColdStage/SampleBias",
-                      "ColdStage/TargetTemperature",
-                      "CompoundLensFilter/IsOn",
-                      "CompoundLensFilter/ThresholdEnergy",
-                      "Detectors/Mode",
-                      "Detectors/Name",
-                      "Detectors/Number",
-                      "EasyLift/Rotation",
-                      "EBeam/Acq",
-                      "EBeam/Aperture",
-                      "EBeam/ApertureDiameter",
-                      "EBeam/ATubeVoltage",
-                      "EBeam/BeamCurrent",
-                      "EBeam/BeamMode",
-                      "EBeam/BeamShiftX",
-                      "EBeam/BeamShiftY",
-                      "EBeam/ColumnType",
-                      "EBeam/DynamicFocusIsOn",
-                      "EBeam/DynamicWDIsOn",
-                      "EBeam/EmissionCurrent",
-                      "EBeam/EucWD",
-                      "EBeam/FinalLens",
-                      "EBeam/HFW",
-                      "EBeam/HV",
-                      "EBeam/ImageMode",
-                      "EBeam/LensMode",
-                      "EBeam/LensModeA",
-                      "EBeam/MagnificationCorrection",
-                      "EBeam/PreTilt",
-                      "EBeam/ScanRotation",
-                      "EBeam/SemOpticalMode",
-                      "EBeam/Source",
-                      "EBeam/SourceTiltX",
-                      "EBeam/SourceTiltY",
-                      "EBeam/StageR",
-                      "EBeam/StageTa",
-                      "EBeam/StageTb",
-                      "EBeam/StageX",
-                      "EBeam/StageY",
-                      "EBeam/StageZ",
-                      "EBeam/StigmatorX",
-                      "EBeam/StigmatorY",
-                      "EBeam/TiltCorrectionAngle",
-                      "EBeam/TiltCorrectionIsOn",
-                      "EBeam/UseCase",
-                      "EBeam/VFW",
-                      "EBeam/WD",
-                      "EBeam/WehneltBias",
-                      "EBeamDeceleration/ImmersionRatio",
-                      "EBeamDeceleration/LandingEnergy",
-                      "EBeamDeceleration/ModeOn",
-                      "EBeamDeceleration/StageBias",
-                      "EScan/Dwell",
-                      "EScan/FrameTime",
-                      "EScan/HorFieldsize",
-                      "EScan/InternalScan",
-                      "EScan/LineIntegration",
-                      "EScan/LineTime",
-                      "EScan/Mainslock",
-                      "EScan/PixelHeight",
-                      "EScan/PixelWidth",
-                      "EScan/Scan",
-                      "EScan/ScanInterlacing",
-                      "EScan/VerFieldsize",
-                      "ETD/Brightness",
-                      "ETD/BrightnessDB",
-                      "ETD/Contrast",
-                      "ETD/ContrastDB",
-                      "ETD/Grid",
-                      "ETD/MinimumDwellTime",
-                      "ETD/Mix",
-                      "ETD/Setting",
-                      "ETD/Signal",
-                      "GIS/Number",
-                      "HiResIllumination/BrightFieldIsOn",
-                      "HiResIllumination/BrightFieldValue",
-                      "HiResIllumination/DarkFieldIsOn",
-                      "HiResIllumination/DarkFieldValue",
-                      "HotStage/ActualTemperature",
-                      "HotStage/SampleBias",
-                      "HotStage/ShieldBias",
-                      "HotStage/TargetTemperature",
-                      "HotStageHVHS/ActualTemperature",
-                      "HotStageHVHS/SampleBias",
-                      "HotStageHVHS/ShieldBias",
-                      "HotStageHVHS/TargetTemperature",
-                      "HotStageMEMS/ActualTemperature",
-                      "HotStageMEMS/HeatingCurrent",
-                      "HotStageMEMS/HeatingPower",
-                      "HotStageMEMS/HeatingVoltage",
-                      "HotStageMEMS/SampleBias",
-                      "HotStageMEMS/SampleResistance",
-                      "HotStageMEMS/TargetTemperature",
-                      "Image/Average",
-                      "Image/DigitalBrightness",
-                      "Image/DigitalContrast",
-                      "Image/DigitalGamma",
-                      "Image/DriftCorrected",
-                      "Image/Integrate",
-                      "Image/MagCanvasRealWidth",
-                      "Image/MagnificationMode",
-                      "Image/PostProcessing",
-                      "Image/ResolutionX",
-                      "Image/ResolutionY",
-                      "Image/ScreenMagCanvasRealWidth",
-                      "Image/ScreenMagnificationMode",
-                      "Image/Transformation",
-                      "Image/ZoomFactor",
-                      "Image/ZoomPanX",
-                      "Image/ZoomPanY",
-                      "IRBeam/HFW",
-                      "IRBeam/n",
-                      "IRBeam/ScanRotation",
-                      "IRBeam/SiDepth",
-                      "IRBeam/StageR",
-                      "IRBeam/StageTa",
-                      "IRBeam/StageTb",
-                      "IRBeam/StageX",
-                      "IRBeam/StageY",
-                      "IRBeam/StageZ",
-                      "IRBeam/VFW",
-                      "IRBeam/WD",
-                      "PrivateFei/BitShift",
-                      "PrivateFei/DataBarAvailable",
-                      "PrivateFei/DatabarHeight",
-                      "PrivateFei/DataBarSelected",
-                      "PrivateFei/TimeOfCreation",
-                      "Scan/Average",
-                      "Scan/Dwelltime",
-                      "Scan/FrameTime",
-                      "Scan/HorFieldsize",
-                      "Scan/Integrate",
-                      "Scan/InternalScan",
-                      "Scan/PixelHeight",
-                      "Scan/PixelWidth",
-                      "Scan/VerFieldsize",
-                      "Specimen/SpecimenCurrent",
-                      "Specimen/Temperature",
-                      "Stage/ActiveStage",
-                      "Stage/SpecTilt",
-                      "Stage/StageR",
-                      "Stage/StageT",
-                      "Stage/StageTb",
-                      "Stage/StageX",
-                      "Stage/StageY",
-                      "Stage/StageZ",
-                      "Stage/WorkingDistance",
-                      "System/Acq",
-                      "System/Aperture",
-                      "System/BuildNr",
-                      "System/Chamber",
-                      "System/Column",
-                      "System/DisplayHeight",
-                      "System/DisplayWidth",
-                      "System/Dnumber",
-                      "System/ESEM",
-                      "System/EucWD",
-                      "System/FinalLens",
-                      "System/Pump",
-                      "System/Scan",
-                      "System/Software",
-                      "System/Source",
-                      "System/Stage",
-                      "System/SystemType",
-                      "System/Type",
-                      "T1/Brightness",
-                      "T1/BrightnessDB",
-                      "T1/Contrast",
-                      "T1/ContrastDB",
-                      "T1/MinimumDwellTime",
-                      "T1/Setting",
-                      "T1/Signal",
-                      "T2/Brightness",
-                      "T2/BrightnessDB",
-                      "T2/Contrast",
-                      "T2/ContrastDB",
-                      "T2/MinimumDwellTime",
-                      "T2/Setting",
-                      "T2/Signal",
-                      "T3/Brightness",
-                      "T3/BrightnessDB",
-                      "T3/Contrast",
-                      "T3/ContrastDB",
-                      "T3/MinimumDwellTime",
-                      "T3/Signal",
-                      "User/Date",
-                      "User/Time",
-                      "User/User",
-                      "User/UserText",
-                      "User/UserTextUnicode",
-                      "Vacuum/ChPressure",
-                      "Vacuum/Gas",
-                      "Vacuum/Humidity",
-                      "Vacuum/UserMode"]
+                         "Beam/Beam",
+                         "Beam/BeamShiftX",
+                         "Beam/BeamShiftY",
+                         "Beam/FineStageBias",
+                         "Beam/HV",
+                         "Beam/ImageMode",
+                         "Beam/Scan",
+                         "Beam/ScanRotation",
+                         "Beam/Spot",
+                         "Beam/StigmatorX",
+                         "Beam/StigmatorY",
+                         "ColdStage/ActualTemperature",
+                         "ColdStage/Humidity",
+                         "ColdStage/SampleBias",
+                         "ColdStage/TargetTemperature",
+                         "CompoundLensFilter/IsOn",
+                         "CompoundLensFilter/ThresholdEnergy",
+                         "Detectors/Mode",
+                         "Detectors/Name",
+                         "Detectors/Number",
+                         "EasyLift/Rotation",
+                         "EBeam/Acq",
+                         "EBeam/Aperture",
+                         "EBeam/ApertureDiameter",
+                         "EBeam/ATubeVoltage",
+                         "EBeam/BeamCurrent",
+                         "EBeam/BeamMode",
+                         "EBeam/BeamShiftX",
+                         "EBeam/BeamShiftY",
+                         "EBeam/ColumnType",
+                         "EBeam/DynamicFocusIsOn",
+                         "EBeam/DynamicWDIsOn",
+                         "EBeam/EmissionCurrent",
+                         "EBeam/EucWD",
+                         "EBeam/FinalLens",
+                         "EBeam/HFW",
+                         "EBeam/HV",
+                         "EBeam/ImageMode",
+                         "EBeam/LensMode",
+                         "EBeam/LensModeA",
+                         "EBeam/MagnificationCorrection",
+                         "EBeam/PreTilt",
+                         "EBeam/ScanRotation",
+                         "EBeam/SemOpticalMode",
+                         "EBeam/Source",
+                         "EBeam/SourceTiltX",
+                         "EBeam/SourceTiltY",
+                         "EBeam/StageR",
+                         "EBeam/StageTa",
+                         "EBeam/StageTb",
+                         "EBeam/StageX",
+                         "EBeam/StageY",
+                         "EBeam/StageZ",
+                         "EBeam/StigmatorX",
+                         "EBeam/StigmatorY",
+                         "EBeam/TiltCorrectionAngle",
+                         "EBeam/TiltCorrectionIsOn",
+                         "EBeam/UseCase",
+                         "EBeam/VFW",
+                         "EBeam/WD",
+                         "EBeam/WehneltBias",
+                         "EBeamDeceleration/ImmersionRatio",
+                         "EBeamDeceleration/LandingEnergy",
+                         "EBeamDeceleration/ModeOn",
+                         "EBeamDeceleration/StageBias",
+                         "EScan/Dwell",
+                         "EScan/FrameTime",
+                         "EScan/HorFieldsize",
+                         "EScan/InternalScan",
+                         "EScan/LineIntegration",
+                         "EScan/LineTime",
+                         "EScan/Mainslock",
+                         "EScan/PixelHeight",
+                         "EScan/PixelWidth",
+                         "EScan/Scan",
+                         "EScan/ScanInterlacing",
+                         "EScan/VerFieldsize",
+                         "ETD/Brightness",
+                         "ETD/BrightnessDB",
+                         "ETD/Contrast",
+                         "ETD/ContrastDB",
+                         "ETD/Grid",
+                         "ETD/MinimumDwellTime",
+                         "ETD/Mix",
+                         "ETD/Setting",
+                         "ETD/Signal",
+                         "GIS/Number",
+                         "HiResIllumination/BrightFieldIsOn",
+                         "HiResIllumination/BrightFieldValue",
+                         "HiResIllumination/DarkFieldIsOn",
+                         "HiResIllumination/DarkFieldValue",
+                         "HotStage/ActualTemperature",
+                         "HotStage/SampleBias",
+                         "HotStage/ShieldBias",
+                         "HotStage/TargetTemperature",
+                         "HotStageHVHS/ActualTemperature",
+                         "HotStageHVHS/SampleBias",
+                         "HotStageHVHS/ShieldBias",
+                         "HotStageHVHS/TargetTemperature",
+                         "HotStageMEMS/ActualTemperature",
+                         "HotStageMEMS/HeatingCurrent",
+                         "HotStageMEMS/HeatingPower",
+                         "HotStageMEMS/HeatingVoltage",
+                         "HotStageMEMS/SampleBias",
+                         "HotStageMEMS/SampleResistance",
+                         "HotStageMEMS/TargetTemperature",
+                         "Image/Average",
+                         "Image/DigitalBrightness",
+                         "Image/DigitalContrast",
+                         "Image/DigitalGamma",
+                         "Image/DriftCorrected",
+                         "Image/Integrate",
+                         "Image/MagCanvasRealWidth",
+                         "Image/MagnificationMode",
+                         "Image/PostProcessing",
+                         "Image/ResolutionX",
+                         "Image/ResolutionY",
+                         "Image/ScreenMagCanvasRealWidth",
+                         "Image/ScreenMagnificationMode",
+                         "Image/Transformation",
+                         "Image/ZoomFactor",
+                         "Image/ZoomPanX",
+                         "Image/ZoomPanY",
+                         "IRBeam/HFW",
+                         "IRBeam/n",
+                         "IRBeam/ScanRotation",
+                         "IRBeam/SiDepth",
+                         "IRBeam/StageR",
+                         "IRBeam/StageTa",
+                         "IRBeam/StageTb",
+                         "IRBeam/StageX",
+                         "IRBeam/StageY",
+                         "IRBeam/StageZ",
+                         "IRBeam/VFW",
+                         "IRBeam/WD",
+                         "PrivateFei/BitShift",
+                         "PrivateFei/DataBarAvailable",
+                         "PrivateFei/DatabarHeight",
+                         "PrivateFei/DataBarSelected",
+                         "PrivateFei/TimeOfCreation",
+                         "Scan/Average",
+                         "Scan/Dwelltime",
+                         "Scan/FrameTime",
+                         "Scan/HorFieldsize",
+                         "Scan/Integrate",
+                         "Scan/InternalScan",
+                         "Scan/PixelHeight",
+                         "Scan/PixelWidth",
+                         "Scan/VerFieldsize",
+                         "Specimen/SpecimenCurrent",
+                         "Specimen/Temperature",
+                         "Stage/ActiveStage",
+                         "Stage/SpecTilt",
+                         "Stage/StageR",
+                         "Stage/StageT",
+                         "Stage/StageTb",
+                         "Stage/StageX",
+                         "Stage/StageY",
+                         "Stage/StageZ",
+                         "Stage/WorkingDistance",
+                         "System/Acq",
+                         "System/Aperture",
+                         "System/BuildNr",
+                         "System/Chamber",
+                         "System/Column",
+                         "System/DisplayHeight",
+                         "System/DisplayWidth",
+                         "System/Dnumber",
+                         "System/ESEM",
+                         "System/EucWD",
+                         "System/FinalLens",
+                         "System/Pump",
+                         "System/Scan",
+                         "System/Software",
+                         "System/Source",
+                         "System/Stage",
+                         "System/SystemType",
+                         "System/Type",
+                         "T1/Brightness",
+                         "T1/BrightnessDB",
+                         "T1/Contrast",
+                         "T1/ContrastDB",
+                         "T1/MinimumDwellTime",
+                         "T1/Setting",
+                         "T1/Signal",
+                         "T2/Brightness",
+                         "T2/BrightnessDB",
+                         "T2/Contrast",
+                         "T2/ContrastDB",
+                         "T2/MinimumDwellTime",
+                         "T2/Setting",
+                         "T2/Signal",
+                         "T3/Brightness",
+                         "T3/BrightnessDB",
+                         "T3/Contrast",
+                         "T3/ContrastDB",
+                         "T3/MinimumDwellTime",
+                         "T3/Signal",
+                         "User/Date",
+                         "User/Time",
+                         "User/User",
+                         "User/UserText",
+                         "User/UserTextUnicode",
+                         "Vacuum/ChPressure",
+                         "Vacuum/Gas",
+                         "Vacuum/Humidity",
+                         "Vacuum/UserMode"]
 
 # there is more to know and understand than just knowing TFS/FEI uses
 # the above-mentioned concepts in their taxonomy:
diff --git a/pynxtools/dataconverter/readers/em/subparsers/rsciio_bruker.py b/pynxtools/dataconverter/readers/em/subparsers/rsciio_bruker.py
index 13c32c7dd..8d4cbef24 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/rsciio_bruker.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/rsciio_bruker.py
@@ -44,7 +44,7 @@ def check_if_supported(self):
             # based on this one could then plan how much memory has to be reserved
             # in the template and stream out accordingly
             self.supported = True
-        except:
+        except IOError:
             print(f"Loading {self.file_path} using {self.__name__} is not supported !")
 
     def parse_and_normalize(self):
@@ -72,9 +72,9 @@ def process_into_template(self, template: dict) -> dict:
             self.process_event_data_em_metadata(template)
             self.process_event_data_em_data(template)
         return template
-    
+
     def process_event_data_em_metadata(self, template: dict) -> dict:
         return template
 
     def process_event_data_em_data(self, template: dict) -> dict:
-        return template
\ No newline at end of file
+        return template
diff --git a/pynxtools/dataconverter/readers/em/utils/xml_utils.py b/pynxtools/dataconverter/readers/em/utils/xml_utils.py
new file mode 100644
index 000000000..c7a3223f8
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/utils/xml_utils.py
@@ -0,0 +1,41 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Flatten content of an XML tree into a Python dictionary."""
+
+from collections import OrderedDict
+
+
+def flatten_xml_to_dict(xml_content) -> dict:
+    """Flatten content of an XML tree into a Python dictionary."""
+    # https://codereview.stackexchange.com/a/21035
+    # https://stackoverflow.com/questions/38852822/how-to-flatten-xml-file-in-python
+    def items():
+        for key, value in xml_content.items():
+            # nested subtree
+            if isinstance(value, dict):
+                for subkey, subvalue in flatten_xml_to_dict(value).items():
+                    yield '{}.{}'.format(key, subkey), subvalue
+            # nested list
+            elif isinstance(value, list):
+                for num, elem in enumerate(value):
+                    for subkey, subvalue in flatten_xml_to_dict(elem).items():
+                        yield '{}.[{}].{}'.format(key, num, subkey), subvalue
+            # everything else (only leafs should remain)
+            else:
+                yield key, value
+    return OrderedDict(items())
diff --git a/pynxtools/dataconverter/readers/shared/shared_utils.py b/pynxtools/dataconverter/readers/shared/shared_utils.py
index 629e29a0f..5311bb3a1 100644
--- a/pynxtools/dataconverter/readers/shared/shared_utils.py
+++ b/pynxtools/dataconverter/readers/shared/shared_utils.py
@@ -23,6 +23,7 @@
 # pylint: disable=E1101, R0801
 
 # import git
+import hashlib
 
 
 def get_repo_last_commit() -> str:
@@ -36,6 +37,16 @@ def get_repo_last_commit() -> str:
     return "unknown git commit id or unable to parse git reverse head"
 
 
+def get_sha256_of_file_content(file_hdl) -> str:
+    """Compute a hashvalue of given file, here SHA256."""
+    file_hdl.seek(0)
+    # Read and update hash string value in blocks of 4K
+    sha256_hash = hashlib.sha256()
+    for byte_block in iter(lambda: file_hdl.read(4096), b""):
+        sha256_hash.update(byte_block)
+    return str(sha256_hash.hexdigest())
+
+
 class NxObject:  # pylint: disable=R0903
     """An object in a graph e.g. a field or group in NeXus."""
 

From 4c113cadc946f9eb8411f0ececa033b111e3b056 Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@physik.hu-berlin.de>
Date: Tue, 19 Dec 2023 10:06:08 +0100
Subject: [PATCH 58/84] Removed deprecated zipfile37 in all remaining places
 using zipfile instead

---
 .../readers/em_nion/utils/swift_zipped_project_parser.py        | 2 +-
 pynxtools/dataconverter/readers/em_om/utils/zip_ebsd_parser.py  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pynxtools/dataconverter/readers/em_nion/utils/swift_zipped_project_parser.py b/pynxtools/dataconverter/readers/em_nion/utils/swift_zipped_project_parser.py
index 17f74ba61..4f8887abe 100644
--- a/pynxtools/dataconverter/readers/em_nion/utils/swift_zipped_project_parser.py
+++ b/pynxtools/dataconverter/readers/em_nion/utils/swift_zipped_project_parser.py
@@ -33,7 +33,7 @@
 
 import h5py
 
-from zipfile37 import ZipFile
+from zipfile import ZipFile
 
 from pynxtools.dataconverter.readers.em_nion.utils.swift_uuid_to_file_name \
     import uuid_to_file_name
diff --git a/pynxtools/dataconverter/readers/em_om/utils/zip_ebsd_parser.py b/pynxtools/dataconverter/readers/em_om/utils/zip_ebsd_parser.py
index e6acb6c85..de37ad6dd 100644
--- a/pynxtools/dataconverter/readers/em_om/utils/zip_ebsd_parser.py
+++ b/pynxtools/dataconverter/readers/em_om/utils/zip_ebsd_parser.py
@@ -29,7 +29,7 @@
 
 import numpy as np
 
-from zipfile37 import ZipFile
+from zipfile import ZipFile
 
 # import imageio.v3 as iio
 from PIL import Image as pil

From 9d3b8e76e8922f2f65daceda70763931ac323846 Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@physik.hu-berlin.de>
Date: Tue, 19 Dec 2023 11:46:15 +0100
Subject: [PATCH 59/84] Working parsing and mapping of some
 Protochips/AXONStudio metadata on NeXus

---
 .../em/subparsers/image_png_protochips.py     | 72 ++++++++++++++-----
 .../em/subparsers/image_png_protochips_cfg.py |  4 +-
 .../image_png_protochips_modifier.py          | 44 +++++++++---
 .../dataconverter/readers/em/utils/sorting.py | 32 +++++++++
 .../readers/em_om/utils/zip_ebsd_parser.py    |  6 +-
 .../shared/map_concepts/mapping_functors.py   |  2 +-
 6 files changed, 127 insertions(+), 33 deletions(-)
 create mode 100644 pynxtools/dataconverter/readers/em/utils/sorting.py

diff --git a/pynxtools/dataconverter/readers/em/subparsers/image_png_protochips.py b/pynxtools/dataconverter/readers/em/subparsers/image_png_protochips.py
index 49adfd67e..df9058e39 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/image_png_protochips.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/image_png_protochips.py
@@ -21,7 +21,8 @@
 import re
 import numpy as np
 import xmltodict
-from typing import Dict
+import datetime
+from typing import Dict, List
 from PIL import Image
 from zipfile import ZipFile
 
@@ -36,6 +37,8 @@
 from pynxtools.dataconverter.readers.em.subparsers.image_base import ImgsBaseParser
 from pynxtools.dataconverter.readers.em.utils.xml_utils import flatten_xml_to_dict
 from pynxtools.dataconverter.readers.shared.shared_utils import get_sha256_of_file_content
+from pynxtools.dataconverter.readers.em.utils.sorting import \
+    sort_ascendingly_by_second_argument_iso8601
 
 
 class ProtochipsPngSetSubParser(ImgsBaseParser):
@@ -140,7 +143,7 @@ def get_xml_metadata(self, file, fp):
                             if grpnms is not None:
                                 if len(grpnms) == 2:
                                     if "PositionerSettings" in k and k.endswith(".PositionerName") is False:
-                                        self.tmp["meta"][file][f"{grpnms[0]}.{grpnms[1]}{k[k.rfind('.') + 1:]}"] = v
+                                        self.tmp["meta"][file][f"{grpnms[0]}.{grpnms[1]}.{k[k.rfind('.') + 1:]}"] = v
                                     if k.endswith(".Value"):
                                         self.tmp["meta"][file][f"{grpnms[0]}.{grpnms[1]}"] = v
                         else:
@@ -162,9 +165,11 @@ def parse_and_normalize(self):
                     with zip_file_hdl.open(file) as fp:
                         self.get_xml_metadata(file, fp)
                         self.get_file_hash(file, fp)
-                        # print(f"Debugging self.tmp.file.items {file}")
-                        # for k, v in self.tmp["meta"][file].items():
-                        #    print(f"{k}: {v}")
+                        print(f"Debugging self.tmp.file.items {file}")
+                        for k, v in self.tmp["meta"][file].items():
+                            # if k == "MicroscopeControlImageMetadata.MicroscopeDateTime":
+                            print(f"{k}: {v}")
+
             print(f"{self.file_path} metadata within PNG collection processed "
                   f"successfully ({len(self.tmp['meta'].keys())} PNGs evaluated).")
         else:
@@ -177,21 +182,56 @@ def process_into_template(self, template: dict) -> dict:
             # self.process_event_data_em_data(template)
         return template
 
+    def sort_event_data_em(self) -> List:
+        events: List = []
+        for file_name, mdict in self.tmp["meta"].items():
+            key = f"MicroscopeControlImageMetadata.MicroscopeDateTime"
+            if key in mdict.keys():
+                if mdict[key].count(".") == 1:
+                    datetime_obj = datetime.datetime.strptime(mdict[key], '%Y-%m-%dT%H:%M:%S.%f%z')
+                else:
+                    datetime_obj = datetime.datetime.strptime(mdict[key], '%Y-%m-%dT%H:%M:%S%z')
+                events.append((f"{file_name}", datetime_obj))
+
+        events_sorted = sort_ascendingly_by_second_argument_iso8601(events)
+        del events
+        time_series_start = events_sorted[0][1]
+        print(f"Time series start: {time_series_start}")
+        for file_name, iso8601 in events_sorted:
+            continue
+            # print(f"{file_name}, {iso8601}, {(iso8601 - time_series_start).total_seconds()} s")
+        print(f"Time series end: {events_sorted[-1][1]}, {(events_sorted[-1][1] - time_series_start).total_seconds()} s")
+        return events_sorted
+
     def process_event_data_em_metadata(self, template: dict) -> dict:
         """Add respective metadata."""
         # contextualization to understand how the image relates to the EM session
         print(f"Mapping some of the Protochips-specific metadata on respective NeXus concept instance")
-        identifier = [self.entry_id, self.event_id, 1]
-        for tpl in PNG_PROTOCHIPS_TO_NEXUS_CFG:
-            if isinstance(tpl, tuple):
-                trg = variadic_path_to_specific_path(tpl[0], identifier)
-                if len(tpl) == 2:
-                    template[trg] = tpl[1]
-                if len(tpl) == 3:
-                    # nxpath, modifier, value to load from and eventually to be modified
-                    retval = get_nexus_value(tpl[1], tpl[2], self.tmp["meta"])
-                    if retval is not None:
-                        template[trg] = retval
+        # individual PNGs in self.file_path may include time/date information in the file name
+        # surplus eventually AXON-specific identifier it seems useful though to sort these
+        # PNGs based on time stamped information directly from the AXON metadata
+        # here we sort ascendingly in time the events and associate new event ids
+        event_sequence = self.sort_event_data_em()
+        event_id = self.event_id
+        for file_name, iso8601 in event_sequence:
+            identifier = [self.entry_id, event_id, 1]
+            for tpl in PNG_PROTOCHIPS_TO_NEXUS_CFG:
+                if isinstance(tpl, tuple):
+                    trg = variadic_path_to_specific_path(tpl[0], identifier)
+                    print(f"Target {trg} after variadic name resolution identifier {identifier}")
+                    if len(tpl) == 2:
+                        template[trg] = tpl[1]
+                    if len(tpl) == 3:
+                        # nxpath, modifier, value to load from and eventually to be modified
+                        print(f"Loading {tpl[2]} from tmp.meta.filename modifier {tpl[1]}...")
+                        retval = get_nexus_value(tpl[1], tpl[2], self.tmp["meta"][file_name])
+                        if retval is not None:
+                            template[trg] = retval
+                    trg = variadic_path_to_specific_path(f"/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET"
+                                                         f"[event_data_em_set]/EVENT_DATA_EM[event_data_em*]"
+                                                         f"/start_time", identifier)
+                    template[trg] = f"{iso8601}"
+            event_id += 1
         return template
 
     def process_event_data_em_data(self, template: dict) -> dict:
diff --git a/pynxtools/dataconverter/readers/em/subparsers/image_png_protochips_cfg.py b/pynxtools/dataconverter/readers/em/subparsers/image_png_protochips_cfg.py
index 792c1e8c8..810507cc3 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/image_png_protochips_cfg.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/image_png_protochips_cfg.py
@@ -22,7 +22,9 @@
                                ('/ENTRY[entry*]/measurement/em_lab/STAGE_LAB[stage_lab]/design', 'heating_chip'),
                                ('/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]/em_lab/STAGE_LAB[stage_lab]/tilt_1', 'load_from', 'MicroscopeControlImageMetadata.ActivePositionerSettings.PositionerSettings.[*].Stage.A'),
                                ('/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]/em_lab/STAGE_LAB[stage_lab]/tilt_2', 'load_from', 'MicroscopeControlImageMetadata.ActivePositionerSettings.PositionerSettings.[*].Stage.B'),
-                               ('/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]/em_lab/STAGE_LAB[stage_lab]/position', 'load_from_concatenate', 'MicroscopeControlImageMetadata.ActivePositionerSettings.PositionerSettings.[*].Stage.X, MicroscopeControlImageMetadata.ActivePositionerSettings.PositionerSettings.[*].Stage.Y, MicroscopeControlImageMetadata.ActivePositionerSettings.PositionerSettings.[*].Stage.Z'),
+                               ('/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]/em_lab/STAGE_LAB[stage_lab]/position', 'load_from_concatenate', ['MicroscopeControlImageMetadata.ActivePositionerSettings.PositionerSettings.[*].Stage.X',
+                                                                                                                                                                                                'MicroscopeControlImageMetadata.ActivePositionerSettings.PositionerSettings.[*].Stage.Y',
+                                                                                                                                                                                                'MicroscopeControlImageMetadata.ActivePositionerSettings.PositionerSettings.[*].Stage.Z']),
                                ('/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]/em_lab/HEATER[heater]/current', 'load_from', 'MicroscopeControlImageMetadata.AuxiliaryData.AuxiliaryDataCategory.[*].DataValues.AuxiliaryDataValue.[*].HeatingCurrent'),
                                ('/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]/em_lab/HEATER[heater]/current/@units', 'A'),
                                ('/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]/em_lab/HEATER[heater]/power', 'load_from', 'MicroscopeControlImageMetadata.AuxiliaryData.AuxiliaryDataCategory.[*].DataValues.AuxiliaryDataValue.[*].HeatingPower'),
diff --git a/pynxtools/dataconverter/readers/em/subparsers/image_png_protochips_modifier.py b/pynxtools/dataconverter/readers/em/subparsers/image_png_protochips_modifier.py
index 54872857f..08a59dcaf 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/image_png_protochips_modifier.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/image_png_protochips_modifier.py
@@ -19,20 +19,44 @@
 
 # pylint: disable=no-member
 
+import re
 from numpy import pi
 
 
+def specific_to_variadic(token):
+    # "MicroscopeControlImageMetadata.AuxiliaryData.AuxiliaryDataCategory.[0].DataValues.AuxiliaryDataValue.[20].HeatingPower"
+    # to "MicroscopeControlImageMetadata.AuxiliaryData.AuxiliaryDataCategory.[*].DataValues.AuxiliaryDataValue.[*].HeatingPower"
+    if isinstance(token, str) and token != "":
+        concept = token.strip()
+        idxs = re.finditer(r".\[[0-9]+\].", concept)
+        if (sum(1 for _ in idxs) > 0):
+            variadic = concept
+            for idx in re.finditer(r".\[[0-9]+\].", concept):
+                variadic = variadic.replace(concept[idx.start(0):idx.end(0)], ".[*].")
+            return variadic
+        else:
+            return concept
+    return None
+
+
 def get_nexus_value(modifier, qnt_name, metadata: dict):
     """Interpret a functional mapping and modifier on qnt_name loaded from metadata."""
-    if qnt_name in metadata.keys():
-        if modifier == "load_from":
-            return metadata[qnt_name]
-        elif modifier == "load_from_concatenate":
-            if qnt_name in metadata.keys():
-                return metadata[qnt_name] / pi * 180.
+    if modifier == "load_from":
+        if isinstance(qnt_name, str):
+            for qnt in metadata.keys():
+                if qnt_name == specific_to_variadic(qnt):
+                    return metadata[qnt]
+    elif modifier == "load_from_concatenate":
+        if isinstance(qnt_name, list):
+            retval = []
+            for entry in qnt_name:
+                for qnt in metadata.keys():
+                    if entry == specific_to_variadic(qnt):
+                        retval.append(metadata[qnt])
+                        break  # breaking only out of the inner loop
+            if retval != []:
+                print(f"load_from_concatenate modifier, retval: {retval}")
+                return retval
+        return None
     else:
-        # print(f"WARNING modifier {modifier}, qnt_name {qnt_name} not found !")
         return None
-    # if f"{modifier['terms']}/{metadata[modifier['terms']]}" in TfsToNexusConceptMapping.keys():
-    # return TfsToNexusConceptMapping[f"{modifier['terms']}/{metadata[modifier['terms']]}"]
-    # elif set(["link"]) == set(modifier.keys()), with the jsonmap reader Sherjeel conceptualized "link"
diff --git a/pynxtools/dataconverter/readers/em/utils/sorting.py b/pynxtools/dataconverter/readers/em/utils/sorting.py
new file mode 100644
index 000000000..72615aab2
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/utils/sorting.py
@@ -0,0 +1,32 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import numpy as np
+import datetime
+
+
+def sort_ascendingly_by_second_argument_iso8601(tup):
+    # convert the list of tuples to a numpy array with data type (object, int)
+    arr = np.array(tup, dtype=[('col1', object), ('col2', datetime.datetime)])
+    # get the indices that would sort the array based on the second column
+    indices = np.argsort(arr['col2'])
+    # use the resulting indices to sort the array
+    sorted_arr = arr[indices]
+    # convert the sorted numpy array back to a list of tuples
+    sorted_tup = [(row['col1'], row['col2']) for row in sorted_arr]
+    return sorted_tup
diff --git a/pynxtools/dataconverter/readers/em_om/utils/zip_ebsd_parser.py b/pynxtools/dataconverter/readers/em_om/utils/zip_ebsd_parser.py
index de37ad6dd..a08bd5e4f 100644
--- a/pynxtools/dataconverter/readers/em_om/utils/zip_ebsd_parser.py
+++ b/pynxtools/dataconverter/readers/em_om/utils/zip_ebsd_parser.py
@@ -26,12 +26,8 @@
 # from typing import Dict, Any, List
 
 import re
-
-import numpy as np
-
 from zipfile import ZipFile
-
-# import imageio.v3 as iio
+import numpy as np
 from PIL import Image as pil
 
 from pynxtools.dataconverter.readers.em_om.utils.em_nexus_plots import HFIVE_WEB_MAX_SIZE
diff --git a/pynxtools/dataconverter/readers/shared/map_concepts/mapping_functors.py b/pynxtools/dataconverter/readers/shared/map_concepts/mapping_functors.py
index 8851e3427..e79791939 100644
--- a/pynxtools/dataconverter/readers/shared/map_concepts/mapping_functors.py
+++ b/pynxtools/dataconverter/readers/shared/map_concepts/mapping_functors.py
@@ -96,7 +96,7 @@ def apply_modifier(modifier, dct: dict):
 # modd = {"fun": "convert_iso8601", "terms": ["data_modified", "timezone"]}
 # print(apply_modifier(modd, yml))
 
-def variadic_path_to_specific_path(path: str, instance_identifier: list):
+def variadic_path_to_specific_path(path, instance_identifier: list):
     """Transforms a variadic path to an actual path with instances."""
     if (path is not None) and (path != ""):
         narguments = path.count("*")

From a729d1f9e2ea6a41499243c7d1b56ecc0a0a5082 Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@physik.hu-berlin.de>
Date: Tue, 19 Dec 2023 12:09:40 +0100
Subject: [PATCH 60/84] Functionally working version of the
 collection-of-PNG-files from Protochips AXON Studio FeOx Reduction example,
 missing functionalities and remaining issues with this parser, detector is
 written out twice likely because of passing the same instance id list which
 should be specific for detector, default plot for heating curve missing but
 can be constructed easily via looping over self.tmp[meta][file_name][use
 specific quantity and loop via self.event_sequence and elapsed time
 difference using datetime, next steps: i) refactor nion parser to enable
 analyzing metadata state in all examples from C. Koch, ii) finish
 implementation of IKZ/APEX parser, iii) refactor remaining EDS parser, iv)
 add ELNs

---
 imgs.batch.sh                                 |   2 +-
 pynxtools/dataconverter/readers/em/reader.py  |   2 +-
 .../em/subparsers/image_png_protochips.py     | 127 ++++++++----------
 .../image_png_protochips_modifier.py          |   1 -
 4 files changed, 59 insertions(+), 73 deletions(-)

diff --git a/imgs.batch.sh b/imgs.batch.sh
index 8b6498b58..d6a563ac6 100755
--- a/imgs.batch.sh
+++ b/imgs.batch.sh
@@ -11,7 +11,7 @@ examples="ikz_robert/ETD_image.tif"  # ETD
 examples="ikz_martin/NavCam_normal_vis_light_ccd.tif"  # NavCam
 examples="0c8nA_3deg_003_AplusB_test.tif ALN_baoh_021.tif T3_image.tif ETD_image.tif NavCam_normal_vis_light_ccd.tif"
 examples="axon/20210426T224437.049Raw0.png"  #axon
-examples="ReductionOfFeOx.zip"
+examples="ReductionOfFeOx.zip"  # Small.zip"
 
 for example in $examples; do
 	echo $example
diff --git a/pynxtools/dataconverter/readers/em/reader.py b/pynxtools/dataconverter/readers/em/reader.py
index 76dd5fdee..d05ad02e5 100644
--- a/pynxtools/dataconverter/readers/em/reader.py
+++ b/pynxtools/dataconverter/readers/em/reader.py
@@ -157,7 +157,7 @@ def read(self,
             if resolved_path != "":
                 nxs_plt.annotate_default_plot(template, resolved_path)
 
-        debugging = True
+        debugging = False
         if debugging is True:
             print("Reporting state of template before passing to HDF5 writing...")
             for keyword in template.keys():
diff --git a/pynxtools/dataconverter/readers/em/subparsers/image_png_protochips.py b/pynxtools/dataconverter/readers/em/subparsers/image_png_protochips.py
index df9058e39..4f38ff779 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/image_png_protochips.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/image_png_protochips.py
@@ -53,6 +53,7 @@ def __init__(self, file_path: str = "", entry_id: int = 1):
         self.version: Dict = {}
         self.png_info: Dict = {}
         self.supported = False
+        self.event_sequence: List = []
         self.check_if_zipped_png_protochips()
 
     def check_if_zipped_png_protochips(self):
@@ -165,11 +166,10 @@ def parse_and_normalize(self):
                     with zip_file_hdl.open(file) as fp:
                         self.get_xml_metadata(file, fp)
                         self.get_file_hash(file, fp)
-                        print(f"Debugging self.tmp.file.items {file}")
-                        for k, v in self.tmp["meta"][file].items():
-                            # if k == "MicroscopeControlImageMetadata.MicroscopeDateTime":
-                            print(f"{k}: {v}")
-
+                        # print(f"Debugging self.tmp.file.items {file}")
+                        # for k, v in self.tmp["meta"][file].items():
+                        #     if k == "MicroscopeControlImageMetadata.MicroscopeDateTime":
+                        #     print(f"{k}: {v}")
             print(f"{self.file_path} metadata within PNG collection processed "
                   f"successfully ({len(self.tmp['meta'].keys())} PNGs evaluated).")
         else:
@@ -179,7 +179,7 @@ def parse_and_normalize(self):
     def process_into_template(self, template: dict) -> dict:
         if self.supported is True:
             self.process_event_data_em_metadata(template)
-            # self.process_event_data_em_data(template)
+            self.process_event_data_em_data(template)
         return template
 
     def sort_event_data_em(self) -> List:
@@ -211,19 +211,19 @@ def process_event_data_em_metadata(self, template: dict) -> dict:
         # surplus eventually AXON-specific identifier it seems useful though to sort these
         # PNGs based on time stamped information directly from the AXON metadata
         # here we sort ascendingly in time the events and associate new event ids
-        event_sequence = self.sort_event_data_em()
+        self.event_sequence = self.sort_event_data_em()
         event_id = self.event_id
-        for file_name, iso8601 in event_sequence:
+        for file_name, iso8601 in self.event_sequence:
             identifier = [self.entry_id, event_id, 1]
             for tpl in PNG_PROTOCHIPS_TO_NEXUS_CFG:
                 if isinstance(tpl, tuple):
                     trg = variadic_path_to_specific_path(tpl[0], identifier)
-                    print(f"Target {trg} after variadic name resolution identifier {identifier}")
+                    # print(f"Target {trg} after variadic name resolution identifier {identifier}")
                     if len(tpl) == 2:
                         template[trg] = tpl[1]
                     if len(tpl) == 3:
                         # nxpath, modifier, value to load from and eventually to be modified
-                        print(f"Loading {tpl[2]} from tmp.meta.filename modifier {tpl[1]}...")
+                        # print(f"Loading {tpl[2]} from tmp.meta.filename modifier {tpl[1]}...")
                         retval = get_nexus_value(tpl[1], tpl[2], self.tmp["meta"][file_name])
                         if retval is not None:
                             template[trg] = retval
@@ -237,66 +237,53 @@ def process_event_data_em_metadata(self, template: dict) -> dict:
     def process_event_data_em_data(self, template: dict) -> dict:
         """Add respective heavy data."""
         # default display of the image(s) representing the data collected in this event
-        print(f"Writing Protochips PNG image into a respective NeXus concept instance")
+        print(f"Writing Protochips PNG images into respective NeXus event_data_em concept instances")
         # read image in-place
-        with Image.open(self.file_path, mode="r") as fp:
-            nparr = np.array(fp)
-            # print(f"type: {type(nparr)}, dtype: {nparr.dtype}, shape: {np.shape(nparr)}")
-            # TODO::discussion points
-            # - how do you know we have an image of real space vs. imaginary space (from the metadata?)
-            # - how do deal with the (ugly) scale bar that is typically stamped into the TIFF image content?
-            # with H5Web and NeXus most of this is obsolete unless there are metadata stamped which are not
-            # available in NeXus or in the respective metadata in the metadata section of the TIFF image
-            # remember H5Web images can be scaled based on the metadata allowing basically the same
-            # explorative viewing using H5Web than what traditionally typical image viewers are meant for
-            image_identifier = 1
-            trg = f"/ENTRY[entry{self.entry_id}]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/" \
-                  f"EVENT_DATA_EM[event_data_em{self.event_id}]/" \
-                  f"IMAGE_R_SET[image_r_set{image_identifier}]/DATA[image]"
-            # TODO::writer should decorate automatically!
-            template[f"{trg}/title"] = f"Image"
-            template[f"{trg}/@NX_class"] = f"NXdata"  # TODO::writer should decorate automatically!
-            template[f"{trg}/@signal"] = "intensity"
-            dims = ["x", "y"]
-            idx = 0
-            for dim in dims:
-                template[f"{trg}/@AXISNAME_indices[axis_{dim}_indices]"] = np.uint32(idx)
-                idx += 1
-            template[f"{trg}/@axes"] = []
-            for dim in dims[::-1]:
-                template[f"{trg}/@axes"].append(f"axis_{dim}")
-            template[f"{trg}/intensity"] = {"compress": np.array(fp), "strength": 1}
-            #  0 is y while 1 is x for 2d, 0 is z, 1 is y, while 2 is x for 3d
-            template[f"{trg}/intensity/@long_name"] = f"Signal"
+        event_id = self.event_id
+        with ZipFile(self.file_path) as zip_file_hdl:
+            for file_name, iso8601 in self.event_sequence:
+                identifier = [self.entry_id, event_id, 1]
+                with zip_file_hdl.open(file_name) as fp:
+                    with Image.open(fp) as png:
+                        nparr = np.array(png)
+                        image_identifier = 1
+                        trg = f"/ENTRY[entry{self.entry_id}]/measurement/EVENT_DATA_EM_SET" \
+                              f"[event_data_em_set]/EVENT_DATA_EM[event_data_em{event_id}]" \
+                              f"/IMAGE_R_SET[image_r_set{image_identifier}]/DATA[image]"
+                        # TODO::writer should decorate automatically!
+                        template[f"{trg}/title"] = f"Image"
+                        template[f"{trg}/@NX_class"] = f"NXdata"  # TODO::writer should decorate automatically!
+                        template[f"{trg}/@signal"] = "intensity"
+                        dims = ["x", "y"]
+                        idx = 0
+                        for dim in dims:
+                            template[f"{trg}/@AXISNAME_indices[axis_{dim}_indices]"] = np.uint32(idx)
+                            idx += 1
+                        template[f"{trg}/@axes"] = []
+                        for dim in dims[::-1]:
+                            template[f"{trg}/@axes"].append(f"axis_{dim}")
+                        template[f"{trg}/intensity"] = {"compress": nparr, "strength": 1}
+                        #  0 is y while 1 is x for 2d, 0 is z, 1 is y, while 2 is x for 3d
+                        template[f"{trg}/intensity/@long_name"] = f"Signal"
 
-            sxy = {"x": 1., "y": 1.}
-            scan_unit = {"x": "m", "y": "m"}  # assuming FEI reports SI units
-            # we may face the CCD overview camera for the chamber for which there might not be a calibration!
-            if ("EScan/PixelWidth" in self.tmp["meta"].keys()) and ("EScan/PixelHeight" in self.tmp["meta"].keys()):
-                sxy = {"x": self.tmp["meta"]["EScan/PixelWidth"],
-                       "y": self.tmp["meta"]["EScan/PixelHeight"]}
-                scan_unit = {"x": "px", "y": "px"}
-            nxy = {"x": np.shape(np.array(fp))[1], "y": np.shape(np.array(fp))[0]}
-            # TODO::be careful we assume here a very specific coordinate system
-            # however the TIFF file gives no clue, TIFF just documents in which order
-            # it arranges a bunch of pixels that have stream in into a n-d tiling
-            # e.g. a 2D image
-            # also we have to be careful because TFS just gives us here
-            # typical case of an image without an information without its location
-            # on the physical sample surface, therefore we can only scale
-            # pixel_identifier by physical scaling quantities s_x, s_y
-            # also the dimensions of the image are on us to fish with the image
-            # reading library instead of TFS for consistency checks adding these
-            # to the metadata the reason is that TFS TIFF use the TIFF tagging mechanism
-            # and there is already a proper TIFF tag for the width and height of an
-            # image in number of pixel
-            for dim in dims:
-                template[f"{trg}/AXISNAME[axis_{dim}]"] \
-                    = {"compress": np.asarray(np.linspace(0,
-                                                          nxy[dim] - 1,
-                                                          num=nxy[dim],
-                                                          endpoint=True) * sxy[dim], np.float64), "strength": 1}
-                template[f"{trg}/AXISNAME[axis_{dim}]/@long_name"] \
-                    = f"Coordinate along {dim}-axis ({scan_unit[dim]})"
-                template[f"{trg}/AXISNAME[axis_{dim}]/@units"] = f"{scan_unit[dim]}"
+                        sxy = {"x": 1., "y": 1.}
+                        scan_unit = {"x": "px", "y": "px"}  # TODO::get AXON image calibration
+                        nxy = {"x": np.shape(nparr)[1],
+                               "y": np.shape(nparr)[0]}
+                        del nparr
+                        # TODO::we assume here a very specific coordinate system
+                        # see image_tiff_tfs.py parser for further details of the limitations
+                        # of this approach
+                        for dim in dims:
+                            template[f"{trg}/AXISNAME[axis_{dim}]"] \
+                                = {"compress": np.asarray(np.linspace(0,
+                                                                      nxy[dim] - 1,
+                                                                      num=nxy[dim],
+                                                                      endpoint=True) * sxy[dim],
+                                                          np.float64), "strength": 1}
+                            template[f"{trg}/AXISNAME[axis_{dim}]/@long_name"] \
+                                = f"Coordinate along {dim}-axis ({scan_unit[dim]})"
+                            template[f"{trg}/AXISNAME[axis_{dim}]/@units"] \
+                                = f"{scan_unit[dim]}"
+                event_id += 1
         return template
diff --git a/pynxtools/dataconverter/readers/em/subparsers/image_png_protochips_modifier.py b/pynxtools/dataconverter/readers/em/subparsers/image_png_protochips_modifier.py
index 08a59dcaf..98d2d99f8 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/image_png_protochips_modifier.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/image_png_protochips_modifier.py
@@ -55,7 +55,6 @@ def get_nexus_value(modifier, qnt_name, metadata: dict):
                         retval.append(metadata[qnt])
                         break  # breaking only out of the inner loop
             if retval != []:
-                print(f"load_from_concatenate modifier, retval: {retval}")
                 return retval
         return None
     else:

From d1ecede542a2a124085aaa7bd12410d66eb48b8a Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@physik.hu-berlin.de>
Date: Tue, 19 Dec 2023 17:28:55 +0100
Subject: [PATCH 61/84] Started refactoring of the em_nion parser to the so
 that zip-compressed project directories can be inspected for their content,
 next steps i) find how to get swift version, ii) polish ndata, hfive, nsproj
 reporting functionality, iii) discuss with Sherjeel how to run it, iv) fix
 reading and mapping functionalities again, in the meantime APEX parser

---
 nion.batch.sh                                 |  12 +
 pynxtools/dataconverter/readers/em/reader.py  |  13 +-
 .../readers/em/subparsers/nxs_nion.py         | 318 ++++++++++++++++++
 .../readers/em/utils/nion_utils.py            |  40 +++
 4 files changed, 379 insertions(+), 4 deletions(-)
 create mode 100755 nion.batch.sh
 create mode 100644 pynxtools/dataconverter/readers/em/subparsers/nxs_nion.py
 create mode 100644 pynxtools/dataconverter/readers/em/utils/nion_utils.py

diff --git a/nion.batch.sh b/nion.batch.sh
new file mode 100755
index 000000000..3ac1601da
--- /dev/null
+++ b/nion.batch.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+datasource="../../../../paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_nion/"
+
+# examples="a.zip.nionswift b.zip.nionswift"
+examples="2022-02-18_Metadata_Kuehbach.zip.nionswift"
+
+
+for example in $examples; do
+	echo $example
+	dataconverter --reader em --nxdl NXroot --input-file $datasource$example --output debug.$example.nxs 1>stdout.$example.nxs.txt 2>stderr.$example.nxs.txt
+done
diff --git a/pynxtools/dataconverter/readers/em/reader.py b/pynxtools/dataconverter/readers/em/reader.py
index d05ad02e5..529e45f2a 100644
--- a/pynxtools/dataconverter/readers/em/reader.py
+++ b/pynxtools/dataconverter/readers/em/reader.py
@@ -26,6 +26,7 @@
 # from pynxtools.dataconverter.readers.em.subparsers.nxs_mtex import NxEmNxsMTexSubParser
 # from pynxtools.dataconverter.readers.em.subparsers.nxs_pyxem import NxEmNxsPyxemSubParser
 from pynxtools.dataconverter.readers.em.subparsers.nxs_imgs import NxEmImagesSubParser
+from pynxtools.dataconverter.readers.em.subparsers.nxs_nion import NxEmZippedNionProjectSubParser
 from pynxtools.dataconverter.readers.em.utils.default_plots import NxEmDefaultPlotResolver
 # from pynxtools.dataconverter.readers.em.geometry.convention_mapper import NxEmConventionMapper
 
@@ -103,8 +104,8 @@ def read(self,
             if file_path != "":
                 input_file_names.append(file_path)
         print("Parse NeXus appdef-specific content...")
-        nxs = NxEmAppDef()
-        nxs.parse(template, entry_id, input_file_names)
+        # nxs = NxEmAppDef()
+        # nxs.parse(template, entry_id, input_file_names)
 
         print("Parse conventions of reference frames...")
         # conventions = NxEmConventionMapper(entry_id)
@@ -124,8 +125,12 @@ def read(self,
         # TODO::check correct loop through!
 
         # sub_parser = "image_tiff"
-        subparser = NxEmImagesSubParser(entry_id, file_paths[0])
-        subparser.parse(template)
+        # subparser = NxEmImagesSubParser(entry_id, file_paths[0])
+        # subparser.parse(template)
+
+        # sub_parser = "zipped_nion_project"
+        subparser = NxEmZippedNionProjectSubParser(entry_id, file_paths[0])
+        subparser.parse(template, verbose=True)
 
         # for dat_instance in case.dat_parser_type:
         #     print(f"Process pieces of information in {dat_instance} tech partner file...")
diff --git a/pynxtools/dataconverter/readers/em/subparsers/nxs_nion.py b/pynxtools/dataconverter/readers/em/subparsers/nxs_nion.py
new file mode 100644
index 000000000..1125c344f
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/subparsers/nxs_nion.py
@@ -0,0 +1,318 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""Parse Nion-specific content in a file containing a zip-compressed nionswift project."""
+
+# pylint: disable=no-member
+
+import mmap
+import yaml
+import json
+import flatdict as fd
+import numpy as np
+import h5py
+import nion.swift.model.NDataHandler as nsnd
+from zipfile import ZipFile
+from typing import Dict, List
+
+from pynxtools.dataconverter.readers.em.utils.nion_utils import \
+    uuid_to_file_name
+# from pynxtools.dataconverter.readers.em_nion.utils.swift_generate_dimscale_axes \
+#     import get_list_of_dimension_scale_axes
+# from pynxtools.dataconverter.readers.em_nion.map_concepts.swift_display_items_to_nx \
+#     import nexus_concept_dict, identify_nexus_concept_key
+# from pynxtools.dataconverter.readers.shared.map_concepts.mapping_functors \
+#    import apply_modifier, variadic_path_to_specific_path
+# from pynxtools.dataconverter.readers.em_nion.map_concepts.swift_to_nx_image_real_space \
+#    import NxImageRealSpaceDict
+from pynxtools.dataconverter.readers.shared.shared_utils import get_sha256_of_file_content
+
+
+class NxEmZippedNionProjectSubParser:
+    """Parse zip-compressed archive of a nionswift project with its content."""
+
+    def __init__(self, entry_id: int = 1, input_file_path: str = ""):
+        """Class wrapping swift parser."""
+        if input_file_path is not None and input_file_path != "":
+            self.file_path = input_file_path
+        else:
+            raise ValueError(f"{__name__} needs proper instantiation !")
+        if entry_id > 0:
+            self.entry_id = entry_id
+        else:
+            self.entry_id = 1
+        # counters which keep track of how many instances of NXevent_data_em have
+        # been instantiated, this implementation currently maps each display_items
+        # onto an own NXevent_data_em instance
+        self.prfx = None
+        self.tmp: Dict = {}
+        self.proj_file_dict: Dict = {}
+        # assure that there is exactly one *.nsproj file only to parse from
+        self.ndata_file_dict: Dict = {}
+        # just get the *.ndata files irrespective whether parsed later or not
+        self.hfive_file_dict: Dict = {}
+        # just get the *.h5 files irrespective whether parsed later or not
+        self.configure()
+        self.supported = False
+    
+    def configure(self):
+        self.tmp["cfg"]: Dict = {}
+        self.tmp["cfg"]["event_data_written"] = False
+        self.tmp["cfg"]["event_data_em_id"] = 1
+        self.tmp["cfg"]["image_id"] = 1
+        self.tmp["cfg"]["spectrum_id"] = 1
+        self.tmp["meta"]: Dict = {}
+
+    def check_if_zipped_nionswift_project_file(self, verbose=False):
+        """Inspect the content of the compressed project file to check if supported."""
+        with open(self.file_path, 'rb', 0) as file:
+            s = mmap.mmap(file.fileno(), 0, access=mmap.ACCESS_READ)
+            magic = s.read(8)
+            if verbose is True:
+                print(f"Expecting pfile: {self.file_path}: __{magic}__")
+            """
+            if magic != b'PK\x03\x04':  # https://en.wikipedia.org/wiki/List_of_file_signatures
+                print(f"Test 1 failed, {self.file_path} is not a ZIP archive !")
+                return False
+            """
+        # analyze information content in the archive an granularization
+        with ZipFile(self.file_path) as zip_file_hdl:
+            for file in zip_file_hdl.namelist():
+                if file.endswith(".h5") or file.endswith(".hdf") or file.endswith(".hdf5"):
+                    with zip_file_hdl.open(file) as fp:
+                        magic = fp.read(8)
+                        if verbose is True:
+                            fp.seek(0, 2)
+                            eof_byte_offset = fp.tell()
+                            print(f"Expecting hfive: {file}: __{magic}___{get_sha256_of_file_content(fp)}__{eof_byte_offset}")
+                        key = file[file.rfind("/") + 1:].replace(".h5", "")
+                        if key not in self.hfive_file_dict:
+                            self.hfive_file_dict[key] = file
+                elif file.endswith(".ndata"):
+                    with zip_file_hdl.open(file) as fp:
+                        magic = fp.read(8)
+                        if verbose is True:
+                            fp.seek(0, 2)
+                            eof_byte_offset = fp.tell()
+                            print(f"Expecting ndata: {file}: __{magic}__{get_sha256_of_file_content(fp)}__{eof_byte_offset}")
+                        key = file[file.rfind("/") + 1:].replace(".ndata", "")
+                        if key not in self.ndata_file_dict:
+                            self.ndata_file_dict[key] = file
+                elif file.endswith(".nsproj"):
+                    with zip_file_hdl.open(file) as fp:
+                        magic = fp.read(8)
+                        if verbose is True:
+                            fp.seek(0, 2)
+                            eof_byte_offset = fp.tell()
+                            print(f"Expecting nsproj: {file}: __{magic}__{get_sha256_of_file_content(fp)}__{eof_byte_offset}")
+                        key = file[file.rfind("/") + 1:].replace(".nsproj", "")
+                        if key not in self.proj_file_dict:
+                            self.proj_file_dict[key] = file
+                else:
+                    continue
+        if not self.ndata_file_dict.keys().isdisjoint(self.hfive_file_dict.keys()):
+            print("Test 2 failed, UUID keys of *.ndata and *.h5 files in project are not disjoint!")
+            return False
+        if len(self.proj_file_dict.keys()) != 1:
+            print("Test 3 failed, he project contains either no or more than one nsproj file!")
+            return False
+        print(f"Content in zip-compressed nionswift project {self.file_path} passed all tests")
+        self.supported = True
+        if verbose is True:
+            for key, val in self.proj_file_dict.items():
+                print(f"{key}, {val}")
+            for key, val in self.ndata_file_dict.items():
+                print(f"{key}, {val}")
+            for key, val in self.hfive_file_dict.items():
+                print(f"{key}, {val}")
+        return True
+
+    def update_event_identifier(self):
+        """Advance and reset bookkeeping of event data em and data instances."""
+        if self.tmp["cfg"]["event_data_written"] is True:
+            self.tmp["cfg"]["event_data_em_id"] += 1
+            self.tmp["cfg"]["event_data_written"] = False
+        self.tmp["cfg"]["image_id"] = 1
+        self.tmp["cfg"]["spectrum_id"] = 1
+
+    def add_nx_image_real_space(self, meta, arr, template):
+        """Create instance of NXimage_r_set"""
+        # TODO::
+        return template
+
+    def map_to_nexus(self, meta, arr, concept_name, template):
+        """Create the actual instance of a specific set of NeXus concepts in template."""
+        # TODO::
+        return template
+
+    def process_ndata(self, file_hdl, full_path, template):
+        """Handle reading and processing of opened *.ndata inside the ZIP file."""
+        # assure that we start reading that file_hdl/pointer from the beginning...
+        file_hdl.seek(0)
+        local_files, dir_files, eocd = nsnd.parse_zip(file_hdl)
+        # ...now that pointer might point somewhere...
+        flat_metadata_dict = {}
+        """
+        data_arr = None
+        nx_concept_name = ""
+        """
+
+        for offset, tpl in local_files.items():
+            print(f"{offset}__{tpl}")
+            continue
+            # print(f"{tpl}")
+            if tpl[0] == b'metadata.json':
+                print(f"Extract metadata.json from {full_path} at offset {offset}")
+                # ... explicit jump back to beginning of the file
+                file_hdl.seek(0)
+                metadata_dict = nsnd.read_json(file_hdl,
+                                               local_files,
+                                               dir_files,
+                                               b'metadata.json')
+                """
+                nx_concept_key = identify_nexus_concept_key(metadata_dict)
+                nx_concept_name = nexus_concept_dict[nx_concept_key]
+                print(f"Display_item {full_path}, concept {nx_concept_key}, maps {nx_concept_name}")
+                """
+
+                flat_metadata_dict = fd.FlatDict(metadata_dict, delimiter='/')
+                break
+                # because we expect (based on Benedikt's example) to find only one json file
+                # in that *.ndata file pointed to by file_hdl
+        if flat_metadata_dict == {}:  # only continue if some metadata were retrieved
+            return template
+
+        for offset, tpl in local_files.items():
+            # print(f"{tpl}")
+            if tpl[0] == b'data.npy':
+                print(f"Extract data.npy from {full_path} at offset {offset}")
+                file_hdl.seek(0)
+                data_arr = nsnd.read_data(file_hdl,
+                                          local_files,
+                                          dir_files,
+                                          b'data.npy')
+                break
+                # because we expect (based on Benedikt's example) to find only one npy file
+                # in that *.ndata file pointed to by file_hdl
+
+        print(f"data_arr type {data_arr.dtype}, shape {np.shape(data_arr)}")
+        # check on the integriety of the data_arr array that it is not None or empty
+        # this should be done more elegantly by just writing the
+        # data directly into the template and not creating another copy
+
+        self.map_to_nexus(flat_metadata_dict, data_arr, nx_concept_name, template)
+        del flat_metadata_dict
+        del data_arr
+        del nx_concept_name
+        return template
+
+    def process_hfive(self, file_hdl, full_path, template: dict, verbose=False):
+        """Handle reading and processing of opened *.h5 inside the ZIP file."""
+        flat_metadata_dict = {}
+        """
+        data_arr = None
+        nx_concept_name = ""
+        """
+
+        file_hdl.seek(0)
+        with h5py.File(file_hdl, "r") as h5r:
+            metadata_dict = json.loads(h5r["data"].attrs["properties"])
+
+            """
+            nx_concept_key = identify_nexus_concept_key(metadata_dict)
+            nx_concept_name = nexus_concept_dict[nx_concept_key]
+            print(f"Display_item {full_path}, concept {nx_concept_key}, maps {nx_concept_name}")
+            """
+
+            flat_metadata_dict = fd.FlatDict(metadata_dict, delimiter='/')
+            if verbose is True:
+                for pkey, pdict in flat_metadata_dict.items():
+                    print(f"{pkey}")
+                    if isinstance(pdict, dict):
+                        for key, val in pdict.items():
+                            print(f"{key}, {val}")
+
+            if flat_metadata_dict == {}:  # only continue if some metadata were retrieved
+                return template
+
+            data_arr = h5r["data"][()]
+
+            """
+            print(f"data_arr type {data_arr.dtype}, shape {np.shape(data_arr)}")
+            # check on the integriety of the data_arr array that it is not None or empty
+            # this should be done more elegantly by just writing the
+            # data directly into the template and not creating another copy
+            self.map_to_nexus(flat_metadata_dict, data_arr, nx_concept_name, template)
+            del flat_metadata_dict
+            del data_arr
+            del nx_concept_name
+            """
+        return template
+
+    def parse_project_file(self, template: dict, verbose=False) -> dict:
+        """Parse lazily from compressed NionSwift project (nsproj + directory)."""
+        nionswift_proj_mdata = {}
+        with ZipFile(self.file_path) as zip_file_hdl:
+            for pkey, proj_file_name in self.proj_file_dict.items():
+                with zip_file_hdl.open(proj_file_name) as file_hdl:
+                    nionswift_proj_mdata = fd.FlatDict(yaml.safe_load(file_hdl), delimiter='/')
+                    # TODO::inspection phase, maybe with yaml to file?
+                    if verbose is True:
+                        for pkey, pdict in nionswift_proj_mdata.items():  # ["display_items"]:
+                            print(f"{pkey}")
+                            if isinstance(pdict, dict):
+                                for key, val in pdict.items():
+                                    print(f"{key}, {val}")
+        if nionswift_proj_mdata == {}:
+            return template
+
+        for itm in nionswift_proj_mdata["display_items"]:
+            if set(["type", "uuid", "created", "display_data_channels"]).issubset(itm.keys()):
+                if len(itm["display_data_channels"]) == 1:
+                    if "data_item_reference" in itm["display_data_channels"][0].keys():
+                        key = uuid_to_file_name(
+                            itm["display_data_channels"][0]["data_item_reference"])
+                        # file_name without the mime type
+                        if key in self.ndata_file_dict.keys():
+                            print(f"Key {key} is *.ndata maps to {self.ndata_file_dict[key]}")
+                            with ZipFile(self.file_path) as zip_file_hdl:
+                                print(f"Parsing {self.ndata_file_dict[key]}...")
+                                with zip_file_hdl.open(self.ndata_file_dict[key]) as file_hdl:
+                                    self.process_ndata(file_hdl,
+                                                       self.ndata_file_dict[key],
+                                                       template)
+                        elif key in self.hfive_file_dict.keys():
+                            print(f"Key {key} is *.h5 maps to {self.hfive_file_dict[key]}")
+                            with ZipFile(self.file_path) as zip_file_hdl:
+                                print(f"Parsing {self.hfive_file_dict[key]}...")
+                                with zip_file_hdl.open(self.hfive_file_dict[key]) as file_hdl:
+                                    self.process_hfive(file_hdl,
+                                                       self.hfive_file_dict[key],
+                                                       template)
+                        else:
+                            print(f"Key {key} has no corresponding data file")
+        return template
+
+    def parse(self, template: dict, verbose=False) -> dict:
+        """Parse NOMAD OASIS relevant data and metadata from swift project."""
+        print("Parsing in-place from zip-compressed nionswift project (nsproj + directory)...")
+        if self.check_if_zipped_nionswift_project_file(verbose) is False:
+            return template
+
+        self.parse_project_file(template, verbose)
+        return template
diff --git a/pynxtools/dataconverter/readers/em/utils/nion_utils.py b/pynxtools/dataconverter/readers/em/utils/nion_utils.py
new file mode 100644
index 000000000..452eb1f63
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/utils/nion_utils.py
@@ -0,0 +1,40 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Utility function for mapping nionswift identifier to suffix used for identifying files in project."""
+
+# pylint: disable=no-member
+
+import uuid
+
+# see https://github.com/nion-software/nionswift/blob/e95839c5602d009006ea88a648e5f78dc77c1ea4/
+# nion/swift/model/Profile.py line 146 and following
+
+
+def encode(uuid_: uuid.UUID, alphabet: str) -> str:
+    result = str()
+    uuid_int = uuid_.int
+    while uuid_int:
+        uuid_int, digit = divmod(uuid_int, len(alphabet))
+        result += alphabet[digit]
+    return result
+
+
+def uuid_to_file_name(data_item_uuid_str: str) -> str:
+    data_item_uuid_uuid = uuid.UUID(f'{data_item_uuid_str}')
+    return f'data_{encode(data_item_uuid_uuid, "ABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890")}'
+    # 25 character results

From 0ec624384e22c095091c67b518729454278af189 Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@physik.hu-berlin.de>
Date: Tue, 19 Dec 2023 17:30:50 +0100
Subject: [PATCH 62/84] Pylinting, styling, myping

---
 pynxtools/dataconverter/readers/em/reader.py              | 4 ++--
 pynxtools/dataconverter/readers/em/subparsers/nxs_nion.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pynxtools/dataconverter/readers/em/reader.py b/pynxtools/dataconverter/readers/em/reader.py
index 529e45f2a..4ba4686ef 100644
--- a/pynxtools/dataconverter/readers/em/reader.py
+++ b/pynxtools/dataconverter/readers/em/reader.py
@@ -22,10 +22,10 @@
 from typing import Tuple, Any
 
 from pynxtools.dataconverter.readers.base.reader import BaseReader
-from pynxtools.dataconverter.readers.em.concepts.nxs_concepts import NxEmAppDef
+# from pynxtools.dataconverter.readers.em.concepts.nxs_concepts import NxEmAppDef
 # from pynxtools.dataconverter.readers.em.subparsers.nxs_mtex import NxEmNxsMTexSubParser
 # from pynxtools.dataconverter.readers.em.subparsers.nxs_pyxem import NxEmNxsPyxemSubParser
-from pynxtools.dataconverter.readers.em.subparsers.nxs_imgs import NxEmImagesSubParser
+# from pynxtools.dataconverter.readers.em.subparsers.nxs_imgs import NxEmImagesSubParser
 from pynxtools.dataconverter.readers.em.subparsers.nxs_nion import NxEmZippedNionProjectSubParser
 from pynxtools.dataconverter.readers.em.utils.default_plots import NxEmDefaultPlotResolver
 # from pynxtools.dataconverter.readers.em.geometry.convention_mapper import NxEmConventionMapper
diff --git a/pynxtools/dataconverter/readers/em/subparsers/nxs_nion.py b/pynxtools/dataconverter/readers/em/subparsers/nxs_nion.py
index 1125c344f..6461014d4 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/nxs_nion.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/nxs_nion.py
@@ -69,7 +69,7 @@ def __init__(self, entry_id: int = 1, input_file_path: str = ""):
         # just get the *.h5 files irrespective whether parsed later or not
         self.configure()
         self.supported = False
-    
+
     def configure(self):
         self.tmp["cfg"]: Dict = {}
         self.tmp["cfg"]["event_data_written"] = False

From 5ea8824e10846b289d90ce616e9a56a1b30e6238 Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@physik.hu-berlin.de>
Date: Wed, 20 Dec 2023 15:56:10 +0100
Subject: [PATCH 63/84] Found possible metadata quantity to decipher used
 nionswift version from, fixed reporting style to perform run-through analysis
 of all experiments performed in C. Koch's group

---
 .../readers/em/subparsers/nxs_nion.py         | 80 ++++++++++---------
 1 file changed, 43 insertions(+), 37 deletions(-)

diff --git a/pynxtools/dataconverter/readers/em/subparsers/nxs_nion.py b/pynxtools/dataconverter/readers/em/subparsers/nxs_nion.py
index 6461014d4..076e7adde 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/nxs_nion.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/nxs_nion.py
@@ -80,11 +80,13 @@ def configure(self):
 
     def check_if_zipped_nionswift_project_file(self, verbose=False):
         """Inspect the content of the compressed project file to check if supported."""
-        with open(self.file_path, 'rb', 0) as file:
-            s = mmap.mmap(file.fileno(), 0, access=mmap.ACCESS_READ)
+        with open(self.file_path, 'rb', 0) as fp:
+            s = mmap.mmap(fp.fileno(), 0, access=mmap.ACCESS_READ)
             magic = s.read(8)
             if verbose is True:
-                print(f"Expecting pfile: {self.file_path}: __{magic}__")
+                fp.seek(0, 2)
+                eof_byte_offset = fp.tell()
+                print(f"Expecting zip-compressed file: ___{self.file_path}___{magic}___{get_sha256_of_file_content(fp)}___{eof_byte_offset}___")
             """
             if magic != b'PK\x03\x04':  # https://en.wikipedia.org/wiki/List_of_file_signatures
                 print(f"Test 1 failed, {self.file_path} is not a ZIP archive !")
@@ -99,7 +101,7 @@ def check_if_zipped_nionswift_project_file(self, verbose=False):
                         if verbose is True:
                             fp.seek(0, 2)
                             eof_byte_offset = fp.tell()
-                            print(f"Expecting hfive: {file}: __{magic}___{get_sha256_of_file_content(fp)}__{eof_byte_offset}")
+                            print(f"Expecting hfive: ___{file}___{magic}___{get_sha256_of_file_content(fp)}___{eof_byte_offset}___")
                         key = file[file.rfind("/") + 1:].replace(".h5", "")
                         if key not in self.hfive_file_dict:
                             self.hfive_file_dict[key] = file
@@ -109,7 +111,7 @@ def check_if_zipped_nionswift_project_file(self, verbose=False):
                         if verbose is True:
                             fp.seek(0, 2)
                             eof_byte_offset = fp.tell()
-                            print(f"Expecting ndata: {file}: __{magic}__{get_sha256_of_file_content(fp)}__{eof_byte_offset}")
+                            print(f"Expecting ndata: ___{file}___{magic}___{get_sha256_of_file_content(fp)}___{eof_byte_offset}___")
                         key = file[file.rfind("/") + 1:].replace(".ndata", "")
                         if key not in self.ndata_file_dict:
                             self.ndata_file_dict[key] = file
@@ -119,7 +121,7 @@ def check_if_zipped_nionswift_project_file(self, verbose=False):
                         if verbose is True:
                             fp.seek(0, 2)
                             eof_byte_offset = fp.tell()
-                            print(f"Expecting nsproj: {file}: __{magic}__{get_sha256_of_file_content(fp)}__{eof_byte_offset}")
+                            print(f"Expecting nsproj: ___{file}___{magic}___{get_sha256_of_file_content(fp)}___{eof_byte_offset}___")
                         key = file[file.rfind("/") + 1:].replace(".nsproj", "")
                         if key not in self.proj_file_dict:
                             self.proj_file_dict[key] = file
@@ -135,11 +137,11 @@ def check_if_zipped_nionswift_project_file(self, verbose=False):
         self.supported = True
         if verbose is True:
             for key, val in self.proj_file_dict.items():
-                print(f"{key}, {val}")
+                print(f"nsprj: ___{key}___{val}___")
             for key, val in self.ndata_file_dict.items():
-                print(f"{key}, {val}")
+                print(f"ndata: ___{key}___{val}___")
             for key, val in self.hfive_file_dict.items():
-                print(f"{key}, {val}")
+                print(f"hfive: ___{key}___{val}___")
         return True
 
     def update_event_identifier(self):
@@ -160,30 +162,28 @@ def map_to_nexus(self, meta, arr, concept_name, template):
         # TODO::
         return template
 
-    def process_ndata(self, file_hdl, full_path, template):
+    def process_ndata(self, file_hdl, full_path, template, verbose=False):
         """Handle reading and processing of opened *.ndata inside the ZIP file."""
         # assure that we start reading that file_hdl/pointer from the beginning...
         file_hdl.seek(0)
         local_files, dir_files, eocd = nsnd.parse_zip(file_hdl)
-        # ...now that pointer might point somewhere...
         flat_metadata_dict = {}
         """
         data_arr = None
         nx_concept_name = ""
         """
-
+        print(f"Inspecting {full_path} with len(local_files.keys()) ___{len(local_files.keys())}___")
         for offset, tpl in local_files.items():
-            print(f"{offset}__{tpl}")
-            continue
-            # print(f"{tpl}")
-            if tpl[0] == b'metadata.json':
-                print(f"Extract metadata.json from {full_path} at offset {offset}")
+            print(f"{offset}___{tpl}")
+            # report to know there are more than metadata.json files in the ndata swift container format
+            if tpl[0] == b"metadata.json":
+                print(f"Extract metadata.json from ___{full_path}___ at offset ___{offset}___")
                 # ... explicit jump back to beginning of the file
                 file_hdl.seek(0)
                 metadata_dict = nsnd.read_json(file_hdl,
                                                local_files,
                                                dir_files,
-                                               b'metadata.json')
+                                               b"metadata.json")
                 """
                 nx_concept_key = identify_nexus_concept_key(metadata_dict)
                 nx_concept_name = nexus_concept_dict[nx_concept_key]
@@ -191,29 +191,36 @@ def process_ndata(self, file_hdl, full_path, template):
                 """
 
                 flat_metadata_dict = fd.FlatDict(metadata_dict, delimiter='/')
-                break
-                # because we expect (based on Benedikt's example) to find only one json file
+                if verbose is True:
+                    print(f"Flattened content of this metadata.json")
+                    for key, value in flat_metadata_dict.items():
+                        print(f"ndata, metadata.json, flat: ___{key}___{value}___")
+                # no break here, because we would like to inspect all content
+                # expect (based on Benedikt's example) to find only one json file
                 # in that *.ndata file pointed to by file_hdl
         if flat_metadata_dict == {}:  # only continue if some metadata were retrieved
             return template
 
         for offset, tpl in local_files.items():
             # print(f"{tpl}")
-            if tpl[0] == b'data.npy':
-                print(f"Extract data.npy from {full_path} at offset {offset}")
+            if tpl[0] == b"data.npy":
+                print(f"Extract data.npy from ___{full_path}___ at offset ___{offset}___")
                 file_hdl.seek(0)
                 data_arr = nsnd.read_data(file_hdl,
                                           local_files,
                                           dir_files,
-                                          b'data.npy')
+                                          b"data.npy")
+                if isinstance(data_arr, np.ndarray):
+                    print(f"ndata, data.npy, type, shape, dtype: ___{type(data_arr)}___{np.shape(data_arr)}___{data_arr.dtype}___")
                 break
                 # because we expect (based on Benedikt's example) to find only one npy file
                 # in that *.ndata file pointed to by file_hdl
 
-        print(f"data_arr type {data_arr.dtype}, shape {np.shape(data_arr)}")
         # check on the integriety of the data_arr array that it is not None or empty
         # this should be done more elegantly by just writing the
         # data directly into the template and not creating another copy
+        # TODO::only during inspection
+        return template
 
         self.map_to_nexus(flat_metadata_dict, data_arr, nx_concept_name, template)
         del flat_metadata_dict
@@ -228,9 +235,10 @@ def process_hfive(self, file_hdl, full_path, template: dict, verbose=False):
         data_arr = None
         nx_concept_name = ""
         """
-
         file_hdl.seek(0)
         with h5py.File(file_hdl, "r") as h5r:
+            print(f"Inspecting {full_path} with len(h5r.keys()) ___{len(h5r.keys())}___")
+            print(f"{h5r.keys()}")
             metadata_dict = json.loads(h5r["data"].attrs["properties"])
 
             """
@@ -241,17 +249,17 @@ def process_hfive(self, file_hdl, full_path, template: dict, verbose=False):
 
             flat_metadata_dict = fd.FlatDict(metadata_dict, delimiter='/')
             if verbose is True:
-                for pkey, pdict in flat_metadata_dict.items():
-                    print(f"{pkey}")
-                    if isinstance(pdict, dict):
-                        for key, val in pdict.items():
-                            print(f"{key}, {val}")
+                print(f"Flattened content of this metadata.json")
+                for key, value in flat_metadata_dict.items():
+                    print(f"hfive, data, flat: ___{key}___{value}___")
 
             if flat_metadata_dict == {}:  # only continue if some metadata were retrieved
                 return template
 
             data_arr = h5r["data"][()]
 
+            if isinstance(data_arr, np.ndarray):
+                print(f"hfive, data, type, shape, dtype: ___{type(data_arr)}___{np.shape(data_arr)}___{data_arr.dtype}___")
             """
             print(f"data_arr type {data_arr.dtype}, shape {np.shape(data_arr)}")
             # check on the integriety of the data_arr array that it is not None or empty
@@ -273,11 +281,9 @@ def parse_project_file(self, template: dict, verbose=False) -> dict:
                     nionswift_proj_mdata = fd.FlatDict(yaml.safe_load(file_hdl), delimiter='/')
                     # TODO::inspection phase, maybe with yaml to file?
                     if verbose is True:
-                        for pkey, pdict in nionswift_proj_mdata.items():  # ["display_items"]:
-                            print(f"{pkey}")
-                            if isinstance(pdict, dict):
-                                for key, val in pdict.items():
-                                    print(f"{key}, {val}")
+                        print(f"Flattened content of {proj_file_name}")
+                        for key, value in nionswift_proj_mdata.items():  # ["display_items"]:
+                            print(f"nsprj, flat: ___{key}___{value}___")
         if nionswift_proj_mdata == {}:
             return template
 
@@ -295,7 +301,7 @@ def parse_project_file(self, template: dict, verbose=False) -> dict:
                                 with zip_file_hdl.open(self.ndata_file_dict[key]) as file_hdl:
                                     self.process_ndata(file_hdl,
                                                        self.ndata_file_dict[key],
-                                                       template)
+                                                       template, verbose)
                         elif key in self.hfive_file_dict.keys():
                             print(f"Key {key} is *.h5 maps to {self.hfive_file_dict[key]}")
                             with ZipFile(self.file_path) as zip_file_hdl:
@@ -303,7 +309,7 @@ def parse_project_file(self, template: dict, verbose=False) -> dict:
                                 with zip_file_hdl.open(self.hfive_file_dict[key]) as file_hdl:
                                     self.process_hfive(file_hdl,
                                                        self.hfive_file_dict[key],
-                                                       template)
+                                                       template, verbose)
                         else:
                             print(f"Key {key} has no corresponding data file")
         return template

From 7f9cf4e402d0fdf3af060fd672f7c54dbe124df5 Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@physik.hu-berlin.de>
Date: Fri, 5 Jan 2024 09:55:12 +0100
Subject: [PATCH 64/84] Fixed and confirmed that HdfFiveBaseParser can be used
 to analyze structure of any HDF5 file, tested for jlaehnemanns InGaN.edaxh5

---
 pynxtools/dataconverter/readers/em/subparsers/hfive_base.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_base.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_base.py
index d365d1d34..43ea9c73a 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_base.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_base.py
@@ -273,8 +273,8 @@ def get_attribute_data_structure(self, prefix, src_dct):
 
     def get_content(self):
         """Walk recursively through the file to get content."""
-        if self.h5r is not None:  # if self.file_path is not None:
-            # with h5py.File(self.file_path, "r") as h5r:
+        # if self.h5r is not None:  # if self.file_path is not None:
+        with h5py.File(self.file_path, "r") as self.h5r:
             # first step visit all groups and datasets recursively
             # get their full path within the HDF5 file
             self.h5r.visititems(self)

From f28abcfe7783bb45e08453f6d736adf644547f8a Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@physik.hu-berlin.de>
Date: Thu, 11 Jan 2024 11:08:32 +0100
Subject: [PATCH 65/84] Continuing on the spctrscpy examples, local dev
 directory tidied up

---
 axon.batch.sh      |  12 +++
 imgs.batch.sh      |  20 ++--
 imgs.dev.ipynb     | 239 ---------------------------------------------
 spctrscpy.batch.sh |   4 +-
 4 files changed, 26 insertions(+), 249 deletions(-)
 create mode 100755 axon.batch.sh
 delete mode 100755 imgs.dev.ipynb

diff --git a/axon.batch.sh b/axon.batch.sh
new file mode 100755
index 000000000..e96567def
--- /dev/null
+++ b/axon.batch.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+datasource="../../../../paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_axon/axon/"
+
+# comments is detector mode
+examples="axon/20210426T224437.049Raw0.png"  #axon
+examples="ReductionOfFeOx.zip"  # Small.zip"
+
+for example in $examples; do
+	echo $example
+	dataconverter --reader em --nxdl NXroot --input-file $datasource$example --output debug.$example.nxs 1>stdout.$example.nxs.txt 2>stderr.$example.nxs.txt
+done
diff --git a/imgs.batch.sh b/imgs.batch.sh
index d6a563ac6..7d4f80d65 100755
--- a/imgs.batch.sh
+++ b/imgs.batch.sh
@@ -1,17 +1,19 @@
 #!/bin/bash
 
-datasource="../../../../paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_imgs/axon/"
+datasource="../../../../paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_imgs/ikz_robert/"
 
 # comments is detector mode
-examples="kit/FeMoOx_AntiA_04_1k5x_CN.tif"
-examples="ikz_robert/0c8nA_3deg_003_AplusB_test.tif"  # T1
-examples="ikz_martin/ALN_baoh_021.tif"  # T2
-examples="ikz_robert/T3_image.tif"
-examples="ikz_robert/ETD_image.tif"  # ETD
-examples="ikz_martin/NavCam_normal_vis_light_ccd.tif"  # NavCam
+
+# Reetu some SEM
+# examples="kit/FeMoOx_AntiA_04_1k5x_CN.tif"  
+
+# IKZ SEM TFS Apreo for all imaging modes
+# examples="ikz_robert/0c8nA_3deg_003_AplusB_test.tif"  # T1
+# examples="ikz_martin/ALN_baoh_021.tif"  # T2
+# examples="ikz_robert/T3_image.tif"  # T3
+# examples="ikz_robert/ETD_image.tif"  # ETD
+# examples="ikz_martin/NavCam_normal_vis_light_ccd.tif"  # NavCam
 examples="0c8nA_3deg_003_AplusB_test.tif ALN_baoh_021.tif T3_image.tif ETD_image.tif NavCam_normal_vis_light_ccd.tif"
-examples="axon/20210426T224437.049Raw0.png"  #axon
-examples="ReductionOfFeOx.zip"  # Small.zip"
 
 for example in $examples; do
 	echo $example
diff --git a/imgs.dev.ipynb b/imgs.dev.ipynb
deleted file mode 100755
index 82317d0a6..000000000
--- a/imgs.dev.ipynb
+++ /dev/null
@@ -1,239 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "3889c47f-11c4-4bf3-97de-04fc52f0798d",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "fnm = \"/home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/production_imgs/ALN_baoh_021.tif\"\n",
-    "# fnm = \"/home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/production_imgs/FeMoOx_AntiA_04_1k5x_CN.tif\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "6da1aea0-545b-446b-a3d1-1574af72f6c6",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np\n",
-    "from PIL import Image\n",
-    "from PIL.TiffTags import TAGS\n",
-    "# print(TAGS)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "1963afb6-6e48-4628-a0e8-d2da0874701e",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "with Image.open(fnm, mode=\"r\") as fp:\n",
-    "    for key in fp.tag_v2:\n",
-    "        if key in [34118, 34119]:\n",
-    "            print(type(fp.tag[key]))\n",
-    "            print(len(fp.tag[key]))        \n",
-    "            # print(f\"{key}, {fp.tag[key]}\")\n",
-    "        if key not in TAGS.keys():\n",
-    "            print(f\"--->tag {key}, is not in PIL.TiffTAGS !\")\n",
-    "    # self.tags = {TAGS[key] : fp.tag[key] for key in fp.tag_v2}\n",
-    "    # for key, val in self.tags.items():\n",
-    "    #     print(f\"{key}, {val}\")\n",
-    "    nparr = np.array(fp)\n",
-    "    print(f\"{type(nparr)}\")\n",
-    "    print(f\"{nparr.dtype}\")\n",
-    "    print(f\"{np.shape(nparr)}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "a9ef2a35-a260-4a54-9b83-eae1d588966f",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "with Image.open(fnm, mode=\"r\") as fp:\n",
-    "    czi_keys = [34118, 34119]\n",
-    "    for czi_key in czi_keys:\n",
-    "        if czi_key in fp.tag_v2:\n",
-    "            print(f\"Found czi_key {tfs_key}...\")\n",
-    "            utf = fp.tag[czi_key]\n",
-    "            print(type(utf))\n",
-    "            if len(utf) == 1:\n",
-    "                print(utf[0])\n",
-    "    # exit(1)\n",
-    "    tfs_keys = [34682]\n",
-    "    for tfs_key in tfs_keys:\n",
-    "        if tfs_key in fp.tag_v2:\n",
-    "            print(f\"Found tfs_key {tfs_key}...\")\n",
-    "            utf = fp.tag[tfs_key]\n",
-    "            print(type(utf))\n",
-    "            if len(utf) == 1:\n",
-    "                print(utf[0])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "a8ada062-e308-4288-8f00-b3e620f3c890",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np\n",
-    "# https://www.geeksforgeeks.org/python-program-to-sort-a-list-of-tuples-by-second-item/\n",
-    "def sort_tuple(tup):\n",
-    "    # convert the list of tuples to a numpy array with data type (object, int)\n",
-    "    arr = np.array(tup, dtype=[('col1', object), ('col2', int)])\n",
-    "    # get the indices that would sort the array based on the second column\n",
-    "    indices = np.argsort(arr['col2'])\n",
-    "    # use the resulting indices to sort the array\n",
-    "    sorted_arr = arr[indices]\n",
-    "    # convert the sorted numpy array back to a list of tuples\n",
-    "    sorted_tup = [(row['col1'], row['col2']) for row in sorted_arr]\n",
-    "    return sorted_tup"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "d27df293-626c-4d37-80df-96c182d4f401",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def if_str_represents_float(s):\n",
-    "    try:\n",
-    "        float(s)\n",
-    "        return str(float(s)) == s\n",
-    "    except ValueError:\n",
-    "        return False"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "1a2f0864-f8b3-4d53-bf9d-08a5787c32fb",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# TFS sections based on IKZ ALN_baoh_021.tif example\n",
-    "import mmap\n",
-    "\n",
-    "tfs_section_names = [\"[User]\",\n",
-    "                     \"[System]\",\n",
-    "                     \"[Beam]\",\n",
-    "                     \"[EBeam]\",                 \n",
-    "                     \"[GIS]\",\n",
-    "                     \"[Scan]\",\n",
-    "                     \"[EScan]\",\n",
-    "                     \"[Stage]\",\n",
-    "                     \"[Image]\",\n",
-    "                     \"[Vacuum]\",\n",
-    "                     \"[Specimen]\",\n",
-    "                     \"[Detectors]\",\n",
-    "                     \"[T2]\",\n",
-    "                     \"[Accessories]\",\n",
-    "                     \"[EBeamDeceleration]\",\n",
-    "                     \"[CompoundLensFilter]\",\n",
-    "                     \"[PrivateFei]\",\n",
-    "                     \"[HiResIllumination]\",\n",
-    "                     \"[EasyLift]\",\n",
-    "                     \"[HotStageMEMS]\",\n",
-    "                     \"[HotStage]\",\n",
-    "                     \"[HotStageHVHS]\",\n",
-    "                     \"[ColdStage]\"]\n",
-    "\n",
-    "tfs_section_details = {\"[System]\": [\"Type\", \"Dnumber\", \"Software\", \"BuildNr\", \"Source\", \"Column\", \"FinalLens\", \"Chamber\", \"Stage\", \"Pump\",\n",
-    "              \"ESEM\", \"Aperture\", \"Scan\", \"Acq\", \"EucWD\", \"SystemType\", \"DisplayWidth\", \"DisplayHeight\"]}\n",
-    "tfs_section_offsets = {}\n",
-    "\n",
-    "with open(fnm, 'rb', 0) as file:\n",
-    "    s = mmap.mmap(file.fileno(), 0, access=mmap.ACCESS_READ)\n",
-    "    for section_name in tfs_section_names:\n",
-    "        pos = s.find(bytes(section_name, \"utf8\"))  # != -1\n",
-    "        tfs_section_offsets[section_name] = pos\n",
-    "    print(tfs_section_offsets)\n",
-    "\n",
-    "    # define search offsets\n",
-    "    tpl = []\n",
-    "    for key, value in tfs_section_offsets.items():\n",
-    "        tpl.append((key, value))\n",
-    "    # print(tpl)\n",
-    "    tpl = sort_tuple(tpl)\n",
-    "    print(tpl)\n",
-    "    # if section_name == \"[System]\":\n",
-    "    pos_s = None\n",
-    "    pos_e = None\n",
-    "    for idx in np.arange(0, len(tpl)):\n",
-    "        if tpl[idx][0] != \"[System]\":\n",
-    "            continue\n",
-    "        else:\n",
-    "            pos_s = tpl[idx][1]\n",
-    "            if idx <= len(tpl) - 1:\n",
-    "                pos_e = tpl[idx + 1][1]\n",
-    "            break\n",
-    "    print(f\"Search in between byte offsets {pos_s} and {pos_e}\")\n",
-    "    # fish metadata of e.g. the system section\n",
-    "    section_metadata = {}\n",
-    "    for term in tfs_section_details[\"[System]\"]:\n",
-    "        \n",
-    "        s.seek(pos_s, 0)\n",
-    "        pos = s.find(bytes(term, \"utf8\"))\n",
-    "        if pos < pos_e:  # check if pos_e is None\n",
-    "            s.seek(pos, 0)\n",
-    "            section_metadata[f\"{term}\"] = f\"{s.readline().strip().decode('utf8').replace(f'{term}=', '')}\"\n",
-    "            if if_str_represents_float(section_metadata[f\"{term}\"]) is True:\n",
-    "                section_metadata[f\"{term}\"] = np.float64(section_metadata[f\"{term}\"])\n",
-    "            elif section_metadata[f\"{term}\"].isdigit() is True:\n",
-    "                section_metadata[f\"{term}\"] = np.int64(section_metadata[f\"{term}\"])\n",
-    "            else:\n",
-    "                pass\n",
-    "            # print(f\"{term}, {pos}, {pos + len(term) + 1}\")\n",
-    "    #        tfs_section_offswr\n",
-    "    #        file.seek(pos, 0)  #\n",
-    "    print(section_metadata)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "2f3eb287-8f55-424c-a016-a07fc59f068a",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "'2'.isdigit()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "c1341e30-fcce-4a3d-a099-d342b8bbe318",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.13"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/spctrscpy.batch.sh b/spctrscpy.batch.sh
index b1b5c4259..ea5b091af 100755
--- a/spctrscpy.batch.sh
+++ b/spctrscpy.batch.sh
@@ -2,8 +2,10 @@
 
 datasource="../../../../paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/"
 
-examples="ikz/VInP_108_L2.h5 ikz/GeSn_13.h5 pynx/46_ES-LP_L1_brg.bcf pynx/1613_Si_HAADF_610_kx.emd pynx/EELS_map_2_ROI_1_location_4.dm3 pynx/H5OINA_examples_Specimen_1_Map_EDS_+_EBSD_Map_Data_2.h5oina"
+# apex examples ikz, pdi
+# examples="ikz/VInP_108_L2.h5 ikz/GeSn_13.h5 pynx/46_ES-LP_L1_brg.bcf pynx/1613_Si_HAADF_610_kx.emd pynx/EELS_map_2_ROI_1_location_4.dm3 pynx/H5OINA_examples_Specimen_1_Map_EDS_+_EBSD_Map_Data_2.h5oina"
 examples="ikz/VInP_108_L2.h5"
+examples="pdi/InGaN_nanowires_spectra.edaxh5"
 
 for example in $examples; do
 	echo $example

From 69e738f647f12ae069916715a7b9ca9c14e99b74 Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@physik.hu-berlin.de>
Date: Thu, 11 Jan 2024 11:09:08 +0100
Subject: [PATCH 66/84] Further tidying up

---
 NXem_refactoring.nxdl.xml | 31 -------------------------------
 NXem_refactoring.yaml     |  9 ---------
 2 files changed, 40 deletions(-)
 delete mode 100644 NXem_refactoring.nxdl.xml
 delete mode 100644 NXem_refactoring.yaml

diff --git a/NXem_refactoring.nxdl.xml b/NXem_refactoring.nxdl.xml
deleted file mode 100644
index 172e18654..000000000
--- a/NXem_refactoring.nxdl.xml
+++ /dev/null
@@ -1,31 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<?xml-stylesheet type="text/xsl" href="nxdlformat.xsl"?>
-<!--
-# NeXus - Neutron and X-ray Common Data Format
-# 
-# Copyright (C) 2014-2022 NeXus International Advisory Committee (NIAC)
-# 
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 3 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-#
-# For further information, see http://www.nexusformat.org
--->
-<definition xmlns="http://definition.nexusformat.org/nxdl/3.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" category="application" type="group" name="NXem_refactoring" extends="NXroot" xsi:schemaLocation="http://definition.nexusformat.org/nxdl/3.1 ../nxdl.xsd">
-    <doc>
-         Debugging.
-    </doc>
-    <group type="NXentry" minOccurs="1" maxOccurs="unbounded">
-        <attribute name="version" optional="true"/>
-    </group>
-</definition>
diff --git a/NXem_refactoring.yaml b/NXem_refactoring.yaml
deleted file mode 100644
index b0c857655..000000000
--- a/NXem_refactoring.yaml
+++ /dev/null
@@ -1,9 +0,0 @@
-category: application
-doc: |
-  Debugging.
-type: group
-NXem_refactoring(NXroot):
-  (NXentry):
-    exists: [min, 1, max, infty]
-    \@version:
-      exists: optional

From ad768bceed8c232d39e1036d59e833237f091790 Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@physik.hu-berlin.de>
Date: Thu, 11 Jan 2024 11:20:03 +0100
Subject: [PATCH 67/84] Reorganize location of tests, etc.

---
 debug/apex.dev.ipynb                          |  81 ++
 axon.batch.sh => debug/axon.batch.sh          |   0
 .../concept_mapper.ipynb                      |   0
 imgs.batch.sh => debug/imgs.batch.sh          |   0
 debug/imgs.dev.ipynb                          | 812 +++++++++++++++++
 debug/metadata_tiff.ipynb                     | 210 +++++
 nion.batch.sh => debug/nion.batch.sh          |   0
 debug/nion.dev.ipynb                          |  57 ++
 pyxem.batch.sh => debug/pyxem.batch.sh        |   0
 pyxem.dev.ipynb => debug/pyxem.dev.ipynb      |   0
 .../spctrscpy.batch.sh                        |   4 +-
 debug/spctrscpy.dev.ipynb                     | 838 ++++++++++++++++++
 delete/test.ebsd.sh                           |  12 +
 13 files changed, 2012 insertions(+), 2 deletions(-)
 create mode 100644 debug/apex.dev.ipynb
 rename axon.batch.sh => debug/axon.batch.sh (100%)
 rename concept_mapper.ipynb => debug/concept_mapper.ipynb (100%)
 rename imgs.batch.sh => debug/imgs.batch.sh (100%)
 create mode 100644 debug/imgs.dev.ipynb
 create mode 100644 debug/metadata_tiff.ipynb
 rename nion.batch.sh => debug/nion.batch.sh (100%)
 create mode 100644 debug/nion.dev.ipynb
 rename pyxem.batch.sh => debug/pyxem.batch.sh (100%)
 rename pyxem.dev.ipynb => debug/pyxem.dev.ipynb (100%)
 rename spctrscpy.batch.sh => debug/spctrscpy.batch.sh (86%)
 create mode 100644 debug/spctrscpy.dev.ipynb
 create mode 100755 delete/test.ebsd.sh

diff --git a/debug/apex.dev.ipynb b/debug/apex.dev.ipynb
new file mode 100644
index 000000000..8d5a55da3
--- /dev/null
+++ b/debug/apex.dev.ipynb
@@ -0,0 +1,81 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f39f5e57-4926-4ccd-b187-9c1aba568a95",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pynxtools.dataconverter.readers.em.subparsers.hfive_base import HdfFiveBaseParser\n",
+    "from jupyterlab_h5web import H5Web\n",
+    "! which python"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f1617083-f6ab-4bad-ad6d-aaa6beffec89",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fpath = f\"/home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/\" \\\n",
+    "        f\"data/development_spctrscpy/pdi/APEX-single-spectrum/InGaN_nanowires_spectra.edaxh5\"\n",
+    "# H5Web(fpath)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a7191dce-db83-4252-bfe3-ffc36efb5e71",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "hdf = HdfFiveBaseParser(file_path=fpath)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "944c0809-4ec6-406a-be61-1dd56dcf6d00",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "hdf.get_content()\n",
+    "hdf.report_content()\n",
+    "hdf.store_report(store_instances=True,\n",
+    "                 store_instances_templatized=True,\n",
+    "                 store_templates=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ac77fbda-ae54-4c1d-9441-e4d771605052",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/axon.batch.sh b/debug/axon.batch.sh
similarity index 100%
rename from axon.batch.sh
rename to debug/axon.batch.sh
diff --git a/concept_mapper.ipynb b/debug/concept_mapper.ipynb
similarity index 100%
rename from concept_mapper.ipynb
rename to debug/concept_mapper.ipynb
diff --git a/imgs.batch.sh b/debug/imgs.batch.sh
similarity index 100%
rename from imgs.batch.sh
rename to debug/imgs.batch.sh
diff --git a/debug/imgs.dev.ipynb b/debug/imgs.dev.ipynb
new file mode 100644
index 000000000..93b767356
--- /dev/null
+++ b/debug/imgs.dev.ipynb
@@ -0,0 +1,812 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6da1aea0-545b-446b-a3d1-1574af72f6c6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "from PIL import Image\n",
+    "from PIL.TiffTags import TAGS\n",
+    "# print(TAGS)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3889c47f-11c4-4bf3-97de-04fc52f0798d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "src = \"/home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_imgs\"\n",
+    "fnms = [(\"kit\", \"kit/FeMoOx_AntiA_04_1k5x_CN.tif\"),\n",
+    "        (\"ikz_changming\", \"ikz_changming/STO252b Camera Ceta 1818 790 kx.png\"),\n",
+    "        (\"axon\", \"axon/20210426T224437.049Raw0.png\"),\n",
+    "        (\"ikz_robert_t1\", \"ikz_robert/0c8nA_3deg_003_AplusB_test.tif\"),\n",
+    "        (\"ikz_martin_t2\", \"ikz_robert/ALN_baoh_021.tif\"),\n",
+    "        (\"ikz_robert_t3\", \"ikz_robert/T3_image.tif\"),\n",
+    "        (\"ikz_robert_etd\", \"ikz_robert/ETD_image.tif\"),\n",
+    "        (\"ikz_robert_navcam\", \"ikz_robert/NavCam_normal_vis_light_ccd.tif\")\n",
+    "       ]\n",
+    "fnm = f\"{src}/{fnms[2][1]}\"\n",
+    "print(fnm)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4f4bf73d-66b7-414b-abb1-db99b2bf370a",
+   "metadata": {},
+   "source": [
+    "***"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a9ef2a35-a260-4a54-9b83-eae1d588966f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "all_parent_concepts = set()\n",
+    "for idx in np.arange(3, len(fnms)):\n",
+    "    fnm = f\"{src}/{fnms[idx][1]}\"\n",
+    "    # print(f\"{fnm}\")\n",
+    "    with Image.open(fnm, mode=\"r\") as fp:\n",
+    "        if True is False:\n",
+    "            czi_keys = [34118, 34119]\n",
+    "            for czi_key in czi_keys:\n",
+    "                if czi_key in fp.tag_v2:\n",
+    "                    print(f\"Found czi_key {tfs_key}...\")\n",
+    "                    utf = fp.tag[czi_key]\n",
+    "                    print(type(utf))\n",
+    "                    if len(utf) == 1:\n",
+    "                        print(utf[0])\n",
+    "        # exit(1)\n",
+    "        tfs_keys = [34682]\n",
+    "        for tfs_key in tfs_keys:\n",
+    "            if tfs_key in fp.tag_v2:\n",
+    "                # print(f\"Found tfs_key {tfs_key}...\")\n",
+    "                utf = fp.tag[tfs_key]\n",
+    "                # print(type(utf))\n",
+    "                if len(utf) == 1:\n",
+    "                    print(utf[0])\n",
+    "                    for line in utf[0].splitlines():\n",
+    "                        if line.startswith(\"[\") is True:\n",
+    "                            all_parent_concepts.add(line)\n",
+    "\n",
+    "for concept in all_parent_concepts:\n",
+    "    print(concept)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b280857b-df16-4f66-b81a-618fc34c69e5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# fish all concepts"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a7bfb57a-583d-47af-97d8-4f450b36a351",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def sort_ascendingly_by_second_argument(tup):\n",
+    "    # convert the list of tuples to a numpy array with data type (object, int)\n",
+    "    arr = np.array(tup, dtype=[('col1', object), ('col2', int)])\n",
+    "    # get the indices that would sort the array based on the second column\n",
+    "    indices = np.argsort(arr['col2'])\n",
+    "    # use the resulting indices to sort the array\n",
+    "    sorted_arr = arr[indices]\n",
+    "    # convert the sorted numpy array back to a list of tuples\n",
+    "    sorted_tup = [(row['col1'], row['col2']) for row in sorted_arr]\n",
+    "    return sorted_tup"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "05dd5af4-b32c-45be-bc80-da5a1166494e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import mmap\n",
+    "TiffTfsParentConcepts = [\"Accessories\", \n",
+    "                         \"EasyLift\",\n",
+    "                         \"ETD\",\n",
+    "                         \"Specimen\",\n",
+    "                         \"T3\",\n",
+    "                         \"User\",\n",
+    "                         \"Scan\",\n",
+    "                         \"T1\",\n",
+    "                         \"Beam\",\n",
+    "                         \"Image\",\n",
+    "                         \"PrivateFei\",\n",
+    "                         \"EBeamDeceleration\",\n",
+    "                         \"EBeam\",\n",
+    "                         \"T2\",\n",
+    "                         \"CompoundLensFilter\",\n",
+    "                         \"Stage\",\n",
+    "                         \"Nav-Cam\",\n",
+    "                         \"EScan\",\n",
+    "                         \"HotStageMEMS\",\n",
+    "                         \"ColdStage\",\n",
+    "                         \"HotStage\",\n",
+    "                         \"System\",\n",
+    "                         \"GIS\",\n",
+    "                         \"Vacuum\",\n",
+    "                         \"IRBeam\",\n",
+    "                         \"HotStageHVHS\",\n",
+    "                         \"HiResIllumination\",\n",
+    "                         \"Detectors\"]\n",
+    "\n",
+    "all_concepts = set()\n",
+    "for idx in np.arange(3, len(fnms)):\n",
+    "    fnm = f\"{src}/{fnms[idx][1]}\"\n",
+    "    # print(f\"{fnm}\")\n",
+    "    with open(fnm, 'rb', 0) as fp:\n",
+    "        s = mmap.mmap(fp.fileno(), 0, access=mmap.ACCESS_READ)\n",
+    "        s.seek(0, 2)\n",
+    "        fsize = s.tell()\n",
+    "        print(fsize)\n",
+    "        s.seek(0, 0)\n",
+    "        tfs_parent_concepts_byte_offset = {}\n",
+    "        for concept in TiffTfsParentConcepts:\n",
+    "            pos = s.find(bytes(f\"[{concept}]\", \"utf8\"))  # != -1\n",
+    "            if pos != -1:\n",
+    "                tfs_parent_concepts_byte_offset[concept] = pos\n",
+    "            # else:\n",
+    "                # print(f\"Instance of concept [{concept}] was not found !\")\n",
+    "        # print(tfs_parent_concepts_byte_offset)\n",
+    "\n",
+    "        sequence = []  # decide I/O order in which metadata for childs of parent concepts will be read\n",
+    "        for key, value in tfs_parent_concepts_byte_offset.items():\n",
+    "            if value is not None:\n",
+    "                sequence.append((key, value))\n",
+    "                # tuple of parent_concept name and byte offset\n",
+    "        sequence = sort_ascendingly_by_second_argument(sequence)\n",
+    "        # print(sequence)\n",
+    "\n",
+    "        idx = 0\n",
+    "        for parent, byte_offset in sequence:\n",
+    "            pos_s = byte_offset\n",
+    "            pos_e = None\n",
+    "            if idx < len(sequence) - 1:\n",
+    "                pos_e = sequence[idx + 1][1]\n",
+    "            else:\n",
+    "                pos_e = fsize  # np.iinfo(np.uint64).max\n",
+    "                # TODO::better use official convention to not read beyond the end of file\n",
+    "            idx += 1\n",
+    "            if pos_s is None or pos_e is None:\n",
+    "                raise ValueError(f\"Definition of byte boundaries for reading childs of [{parent}] was unsuccessful !\")\n",
+    "            print(f\"Search for [{parent}] in between byte offsets {pos_s} and {pos_e}\")\n",
+    "            s.seek(pos_s, 0)\n",
+    "            payload = s.read(pos_e - pos_s).decode(\"utf8\")\n",
+    "            for line in payload.split(\"\\n\"):\n",
+    "                # print(line)\n",
+    "                if line.count(\"=\") == 1:\n",
+    "                    all_concepts.add(f\"{parent}/{line.split('=')[0]}\")\n",
+    "            # print(payload)\n",
+    "\n",
+    "for concept in all_concepts:\n",
+    "    print(concept)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1963afb6-6e48-4628-a0e8-d2da0874701e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# handle TIFF\n",
+    "with Image.open(fnm, mode=\"r\") as fp:\n",
+    "    for key in fp.tag_v2:\n",
+    "        if key in [34118, 34119]:\n",
+    "            print(type(fp.tag[key]))\n",
+    "            print(len(fp.tag[key]))        \n",
+    "            # print(f\"{key}, {fp.tag[key]}\")\n",
+    "        if key not in TAGS.keys():\n",
+    "            print(f\"--->tag {key}, is not in PIL.TiffTAGS !\")\n",
+    "    # self.tags = {TAGS[key] : fp.tag[key] for key in fp.tag_v2}\n",
+    "    # for key, val in self.tags.items():\n",
+    "    #     print(f\"{key}, {val}\")\n",
+    "    nparr = np.array(fp)\n",
+    "    print(f\"{type(nparr)}\")\n",
+    "    print(f\"{nparr.dtype}\")\n",
+    "    print(f\"{np.shape(nparr)}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "28687c0e-6f14-484c-b511-3a4906d9672e",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a8ada062-e308-4288-8f00-b3e620f3c890",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "# https://www.geeksforgeeks.org/python-program-to-sort-a-list-of-tuples-by-second-item/\n",
+    "def sort_tuple(tup):\n",
+    "    # convert the list of tuples to a numpy array with data type (object, int)\n",
+    "    arr = np.array(tup, dtype=[('col1', object), ('col2', int)])\n",
+    "    # get the indices that would sort the array based on the second column\n",
+    "    indices = np.argsort(arr['col2'])\n",
+    "    # use the resulting indices to sort the array\n",
+    "    sorted_arr = arr[indices]\n",
+    "    # convert the sorted numpy array back to a list of tuples\n",
+    "    sorted_tup = [(row['col1'], row['col2']) for row in sorted_arr]\n",
+    "    return sorted_tup"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d27df293-626c-4d37-80df-96c182d4f401",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def if_str_represents_float(s):\n",
+    "    try:\n",
+    "        return isinstance(float(s), float)\n",
+    "        # return str(float(s)) == s\n",
+    "    except ValueError:\n",
+    "        return False"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f647fa79-330b-48b2-8360-f92fc5ead187",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\"10\".isdigit()\n",
+    "# isinstance(float(\"8.99306e-010\"), float)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1a2f0864-f8b3-4d53-bf9d-08a5787c32fb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# TFS sections based on IKZ ALN_baoh_021.tif example\n",
+    "import mmap\n",
+    "\n",
+    "tfs_section_names = [\"[User]\",\n",
+    "                     \"[System]\",\n",
+    "                     \"[Beam]\",\n",
+    "                     \"[EBeam]\",                 \n",
+    "                     \"[GIS]\",\n",
+    "                     \"[Scan]\",\n",
+    "                     \"[EScan]\",\n",
+    "                     \"[Stage]\",\n",
+    "                     \"[Image]\",\n",
+    "                     \"[Vacuum]\",\n",
+    "                     \"[Specimen]\",\n",
+    "                     \"[Detectors]\",\n",
+    "                     \"[T2]\",\n",
+    "                     \"[Accessories]\",\n",
+    "                     \"[EBeamDeceleration]\",\n",
+    "                     \"[CompoundLensFilter]\",\n",
+    "                     \"[PrivateFei]\",\n",
+    "                     \"[HiResIllumination]\",\n",
+    "                     \"[EasyLift]\",\n",
+    "                     \"[HotStageMEMS]\",\n",
+    "                     \"[HotStage]\",\n",
+    "                     \"[HotStageHVHS]\",\n",
+    "                     \"[ColdStage]\"]\n",
+    "\n",
+    "tfs_section_details = {\"[System]\": [\"Type\", \"Dnumber\", \"Software\", \"BuildNr\", \"Source\", \"Column\", \"FinalLens\", \"Chamber\", \"Stage\", \"Pump\",\n",
+    "              \"ESEM\", \"Aperture\", \"Scan\", \"Acq\", \"EucWD\", \"SystemType\", \"DisplayWidth\", \"DisplayHeight\"]}\n",
+    "tfs_section_offsets = {}\n",
+    "\n",
+    "with open(fnm, 'rb', 0) as file:\n",
+    "    s = mmap.mmap(file.fileno(), 0, access=mmap.ACCESS_READ)\n",
+    "    for section_name in tfs_section_names:\n",
+    "        pos = s.find(bytes(section_name, \"utf8\"))  # != -1\n",
+    "        tfs_section_offsets[section_name] = pos\n",
+    "    print(tfs_section_offsets)\n",
+    "\n",
+    "    # define search offsets\n",
+    "    tpl = []\n",
+    "    for key, value in tfs_section_offsets.items():\n",
+    "        tpl.append((key, value))\n",
+    "    # print(tpl)\n",
+    "    tpl = sort_tuple(tpl)\n",
+    "    print(tpl)\n",
+    "    # if section_name == \"[System]\":\n",
+    "    pos_s = None\n",
+    "    pos_e = None\n",
+    "    for idx in np.arange(0, len(tpl)):\n",
+    "        if tpl[idx][0] != \"[System]\":\n",
+    "            continue\n",
+    "        else:\n",
+    "            pos_s = tpl[idx][1]\n",
+    "            if idx <= len(tpl) - 1:\n",
+    "                pos_e = tpl[idx + 1][1]\n",
+    "            break\n",
+    "    print(f\"Search in between byte offsets {pos_s} and {pos_e}\")\n",
+    "    # fish metadata of e.g. the system section\n",
+    "    section_metadata = {}\n",
+    "    for term in tfs_section_details[\"[System]\"]:\n",
+    "        \n",
+    "        s.seek(pos_s, 0)\n",
+    "        pos = s.find(bytes(term, \"utf8\"))\n",
+    "        if pos < pos_e:  # check if pos_e is None\n",
+    "            s.seek(pos, 0)\n",
+    "            section_metadata[f\"{term}\"] = f\"{s.readline().strip().decode('utf8').replace(f'{term}=', '')}\"\n",
+    "            if if_str_represents_float(section_metadata[f\"{term}\"]) is True:\n",
+    "                section_metadata[f\"{term}\"] = np.float64(section_metadata[f\"{term}\"])\n",
+    "            elif section_metadata[f\"{term}\"].isdigit() is True:\n",
+    "                section_metadata[f\"{term}\"] = np.int64(section_metadata[f\"{term}\"])\n",
+    "            else:\n",
+    "                pass\n",
+    "            # print(f\"{term}, {pos}, {pos + len(term) + 1}\")\n",
+    "    #        tfs_section_offswr\n",
+    "    #        file.seek(pos, 0)  #\n",
+    "    print(section_metadata)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2f3eb287-8f55-424c-a016-a07fc59f068a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "'2'.isdigit()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c1341e30-fcce-4a3d-a099-d342b8bbe318",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "48f31e6c-1554-4476-8688-5f5323d513c8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# https://codereview.stackexchange.com/a/21035\n",
+    "# https://stackoverflow.com/questions/38852822/how-to-flatten-xml-file-in-python\n",
+    "import xmltodict\n",
+    "from collections import OrderedDict\n",
+    "\n",
+    "def flatten_dict(d):\n",
+    "    def items():\n",
+    "        for key, value in d.items():\n",
+    "            # nested subtree\n",
+    "            if isinstance(value, dict):\n",
+    "                for subkey, subvalue in flatten_dict(value).items():\n",
+    "                    yield '{}.{}'.format(key, subkey), subvalue\n",
+    "            # nested list\n",
+    "            elif isinstance(value, list):\n",
+    "                for num, elem in enumerate(value):\n",
+    "                    for subkey, subvalue in flatten_dict(elem).items():\n",
+    "                        yield '{}.[{}].{}'.format(key, num, subkey), subvalue\n",
+    "            # everything else (only leafs should remain)\n",
+    "            else:\n",
+    "                yield key, value\n",
+    "    return OrderedDict(items())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a9358182-2cde-45c2-bfd5-07266b142601",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import re\n",
+    "def get_protochip_variadic_concept(token):\n",
+    "    # return val is a tuple\n",
+    "    # zeroth entry specifies the prefix of the instance concept\n",
+    "    # first entry specifies associated parent concept\n",
+    "    # second entry is True is token resolves an AXON parent concept\n",
+    "    if token.count(\" = \") == 1:\n",
+    "        tmp = token.split(\"=\")\n",
+    "        concept = tmp[0].strip()\n",
+    "        value = tmp[1].strip()\n",
+    "        # print(f\"{concept}, {value}\")\n",
+    "        idxs = re.finditer(\".\\[[0-9]+\\].\", concept)\n",
+    "        is_variadic = False\n",
+    "        if (sum(1 for _ in idxs) > 0):  #  and (\"DataValues\" in concept):\n",
+    "            is_variadic = True\n",
+    "            variadic = concept\n",
+    "            concept_id = []\n",
+    "            for idx in re.finditer(\".\\[[0-9]+\\].\", token):\n",
+    "                variadic = variadic.replace(concept[idx.start(0):idx.end(0)], \".[*].\")\n",
+    "                concept_id.append(f\"{concept[idx.start(0):idx.end(0)]}\".replace(\".[\", \"\").replace(\"].\", \"\"))\n",
+    "            if (\"DataValues\" in concept) and concept.count(\".Name = \") == 1:\n",
+    "                return (variadic, f\"{variadic}.{'_'.join(concept_id)}\", True)  # parent concept\n",
+    "            elif concept.count(\"PositionerName\") == 1:\n",
+    "                return (variadic, f\"{variadic}.{'_'.join(concept_id)}\", True)  # parent concept\n",
+    "            else:\n",
+    "                return (variadic, f\"{variadic}.{'_'.join(concept_id)}\", False)  # child concept\n",
+    "        else:\n",
+    "            return (concept, concept, True)\n",
+    "    else:\n",
+    "        return (None, None, None)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "93b116cf-0ead-4bb9-9496-774221feb70f",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3caa30a0-e050-4898-b553-9d71e149160c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_protochips_variadic_concept(key, value):\n",
+    "    # return val is a tuple\n",
+    "    # zeroth entry specifies the prefix of the instance concept\n",
+    "    # first entry specifies associated parent concept\n",
+    "    # second entry is True is token resolves an AXON parent concept\n",
+    "    if isinstance(key, str) and key != \"\":\n",
+    "        concept = key.strip()\n",
+    "        idxs = re.finditer(\".\\[[0-9]+\\].\", concept)\n",
+    "        is_variadic = False\n",
+    "        if (sum(1 for _ in idxs) > 0):\n",
+    "            is_variadic = True\n",
+    "            variadic = concept\n",
+    "            concept_id = []\n",
+    "            for idx in re.finditer(\".\\[[0-9]+\\].\", concept):\n",
+    "                variadic = variadic.replace(concept[idx.start(0):idx.end(0)], \".[*].\")\n",
+    "                concept_id.append(f\"{concept[idx.start(0):idx.end(0)]}\".replace(\".[\", \"\").replace(\"].\", \"\"))\n",
+    "            print(f\"concept_id: {concept_id}\")\n",
+    "            if variadic.endswith(\".Name\"):\n",
+    "                variadic = variadic.replace(\".Name\", f\".{value}\")\n",
+    "            return variadic\n",
+    "        else:\n",
+    "            return concept\n",
+    "    return None"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "501ddd47-46d5-4191-be82-bac7ed007749",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import re\n",
+    "key = \"MicroscopeControlImageMetadata.AuxiliaryData.AuxiliaryDataCategory.[0].DataValues.AuxiliaryDataValue.[20].HeatingPower\"\n",
+    "def specific_to_variadic(token):\n",
+    "    if isinstance(token, str) and key != \"\":\n",
+    "        concept = token.strip()\n",
+    "        idxs = re.finditer(r\".\\[[0-9]+\\].\", concept)\n",
+    "        if (sum(1 for _ in idxs) > 0):\n",
+    "            variadic = concept\n",
+    "            for idx in re.finditer(\".\\[[0-9]+\\].\", concept):\n",
+    "                variadic = variadic.replace(concept[idx.start(0):idx.end(0)], \".[*].\")\n",
+    "            return variadic\n",
+    "        else:\n",
+    "            return concept\n",
+    "    return None\n",
+    "print(specific_to_variadic(key))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6feb02fa-d179-4d7e-b1cc-09d8af396315",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "key = \"MicroscopeControlImageMetadata.AuxiliaryData.AuxiliaryDataCategory.[0].DataValues.AuxiliaryDataValue.[20].Name\"\n",
+    "value = \"HeatingPower\"\n",
+    "key = \"MicroscopeControlImageMetadata.AuxiliaryData.AuxiliaryDataCategory.[0].DataValues.AuxiliaryDataValue.[20].Value\"\n",
+    "value = 0.077978329305\n",
+    "key = \"MicroscopeControlImageMetadata.@xmlns:i\"\n",
+    "value = \"http://www.w3.org/2001/XMLSchema-instance\"\n",
+    "key = \"MicroscopeControlImageMetadata.ImagerSettings.ImagePhysicalSize.X\"\n",
+    "value = 0.99\n",
+    "key = \"MicroscopeControlImageMetadata.ActivePositionerSettings.PositionerSettings.[0].X\"\n",
+    "value = -369.84159375\n",
+    "\n",
+    "# first phase analyse the collection of Protochips metadata concept instance symbols and reduce to unique concepts\n",
+    "concepts = {}\n",
+    "for concept, value in flattened_xml.items():\n",
+    "    # not every key is allowed to define a concept\n",
+    "    # print(f\"{concept}: {value}\")\n",
+    "    idxs = re.finditer(\".\\[[0-9]+\\].\", concept)\n",
+    "    if (sum(1 for _ in idxs) > 0):  # is_variadic (allowed to define eventually a key!)\n",
+    "        markers = [\".Name\", \".PositionerName\"]\n",
+    "        for marker in markers:\n",
+    "            if concept.endswith(marker):\n",
+    "                concepts[f\"{concept[0:len(concept)-len(marker)]}\"] = value  # .{value}\"] = None\n",
+    "    else:\n",
+    "        # not variadic, i.e. defines a concept\n",
+    "        concepts[concept] = value\n",
+    "#for c, v in concepts.items():\n",
+    "#    print(f\"{c}: {v}\")\n",
+    "print(\"1. done\")\n",
+    "# second phase, evaluate each concept instance symbol wrt to its prefix coming from the unique concept\n",
+    "for k, v in flattened_xml.items():\n",
+    "    # print(f\"-->{k}: {v}\")\n",
+    "    grpnms = None\n",
+    "    idxs = re.finditer(\".\\[[0-9]+\\].\", k)\n",
+    "    if (sum(1 for _ in idxs) > 0):  # is variadic\n",
+    "        search_argument = k[0:k.rfind(\"].\")+1]\n",
+    "        for parent_grpnm, child_grpnm in concepts.items():\n",
+    "            if parent_grpnm.startswith(search_argument):\n",
+    "                grpnms = (parent_grpnm, child_grpnm)\n",
+    "                break\n",
+    "        if grpnms is not None:\n",
+    "            if len(grpnms) == 2:\n",
+    "                if \"PositionerSettings\" in k and k.endswith(\".PositionerName\") is False:\n",
+    "                    print(f\"vv: {grpnms[0]}.{grpnms[1]}{k[k.rfind('.') + 1:]}: {v}\")\n",
+    "                if k.endswith(\".Value\"):\n",
+    "                    print(f\"vv: {grpnms[0]}.{grpnms[1]}: {v}\")\n",
+    "    else:\n",
+    "        print(f\"nv: {k}: {v}\")\n",
+    "# token = \"MicroscopeControlImageMetadata.ActivePositionerSettings.PositionerSettings.[0].PositionerName = Stage\"\n",
+    "# token = \"\"\n",
+    "# idxs = re.finditer(\".\\[[0-9]+\\].\", token)\n",
+    "# print(sum(1 for _ in idxs))\n",
+    "# a, b, c = instance_to_concept(token)\n",
+    "# print(f\"{a}, {b}, {c}\")\n",
+    "# print(get_protochips_variadic_concept(key, value))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "67dc0c90-7b37-4d9c-8bbe-3db50040094d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_axon_concept_prefix(glossary, what):\n",
+    "    # return val is a tuple\n",
+    "    # zeroth entry specifies the prefix of the instance concept\n",
+    "    # first entry specifies associated parent concept\n",
+    "    # second entry is True is token resolves an AXON parent concept\n",
+    "    for term in glossary:\n",
+    "        if token.count(\" = \") == 1:\n",
+    "            concept = term.split(\"=\")[0].strip()\n",
+    "            value = term.split(\"=\")[1].strip()\n",
+    "            if concept.endswith(\"Name\") and what in value:\n",
+    "                return concept[0:len(concept)-len(\"Name\")-1]\n",
+    "    return None"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1300ffd9-06c3-44b2-87bf-a9f8e50db713",
+   "metadata": {},
+   "source": [
+    "Human understand that the following metadata structure from AXON suggests that:\n",
+    "MicroscopeControlImageMetadata.AuxiliaryData.AuxiliaryDataCategory.[0].DataValues.AuxiliaryDataValue.[0].Name = SystemStatus\n",
+    "MicroscopeControlImageMetadata.AuxiliaryData.AuxiliaryDataCategory.[0].DataValues.AuxiliaryDataValue.[0].Value = Ready\n",
+    "MicroscopeControlImageMetadata.AuxiliaryData.AuxiliaryDataCategory.[0].DataValues.AuxiliaryDataValue.[0].ValueType = String\n",
+    "MicroscopeControlImageMetadata.AuxiliaryData.AuxiliaryDataCategory.[0].DataValues.AuxiliaryDataValue.[0].ValueUnits = None\n",
+    "MicroscopeControlImageMetadata.AuxiliaryData.AuxiliaryDataCategory.[0].DataValues.AuxiliaryDataValue.[0].DisplayUnits = None\n",
+    "MicroscopeControlImageMetadata.AuxiliaryData.AuxiliaryDataCategory.[0].DataValues.AuxiliaryDataValue.[0].Description = None\n",
+    "MicroscopeControlImageMetadata.AuxiliaryData.AuxiliaryDataCategory.[0].DataValues.AuxiliaryDataValue.[0].MeasurementTypeName = None\n",
+    "\n",
+    "Is equivalent to\n",
+    "*MicroscopeControlImageMetadata.SystemStatus* being a group  # NeXus concept name\n",
+    "MicroscopeControlImageMetadata.SystemStatus.Value = Ready  # instance data\n",
+    "MicroscopeControlImageMetadata.SystemStatus.ValueType = String  # NeXus value category\n",
+    "MicroscopeControlImageMetadata.SystemStatus.ValueUnits = None   # unit\n",
+    "MicroscopeControlImageMetadata.SystemStatus.DisplayUnits = None  # essentially NOMAD MetaInfo display unit relevant for the GUI\n",
+    "MicroscopeControlImageMetadata.SystemStatus.Description = None  # essentially the NeXus docstring\n",
+    "MicroscopeControlImageMetadata.SystemStatus.MeasurementTypeName = None  # no equivalent in NeXus"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2cf76281-756f-4794-8406-0a4df1acb682",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import mmap\n",
+    "for file_name in os.listdir(f\"{src}/axon\"):\n",
+    "    with open(f\"{src}/axon/{file_name}\", 'rb', 0) as fp:\n",
+    "        s = mmap.mmap(fp.fileno(), 0, access=mmap.ACCESS_READ)\n",
+    "        magic = s.read(8)\n",
+    "        if magic == b'\\x89PNG\\r\\n\\x1a\\n':  # https://en.wikipedia.org/wiki/List_of_file_signatures\n",
+    "            print(f\"{file_name} is PNG\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a5d5cbb4-c5a2-44a1-b6a4-277167582869",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import mmap\n",
+    "import PIL\n",
+    "import datetime\n",
+    "events = []\n",
+    "for file_name in os.listdir(f\"{src}/axon\"):\n",
+    "    # if we try to open non-image files with PIL directly we get an error so one should hunt first on the magic number\n",
+    "    with open(f\"{src}/axon/{file_name}\", 'rb', 0) as file:\n",
+    "        s = mmap.mmap(file.fileno(), 0, access=mmap.ACCESS_READ)\n",
+    "        magic = s.read(8)\n",
+    "        if magic != b'\\x89PNG\\r\\n\\x1a\\n':\n",
+    "            continue\n",
+    "                \n",
+    "    with Image.open(f\"{src}/axon/{file_name}\", mode=\"r\") as fp:\n",
+    "        # if f\"{file_name}\" != \"20210426T225128.453Raw382.png\":  # \"20210426T224913.001Raw257.png\":\n",
+    "        #    continue\n",
+    "        if isinstance(fp, PIL.PngImagePlugin.PngImageFile) is True:\n",
+    "            print(f\"Parsing {src}/axon/{file_name}...\")\n",
+    "            fp.load()  # Needed only for .png EXIF data (see citation above)\n",
+    "            # print(fp.info.keys())\n",
+    "\n",
+    "            if \"MicroscopeControlImage\" in fp.info.keys():\n",
+    "                # print(fp.info[\"MicroscopeControlImage\"])\n",
+    "                xml_content = xmltodict.parse(fp.info[\"MicroscopeControlImage\"])\n",
+    "                flattened_xml = flatten_dict(xml_content)\n",
+    "                # first display all instance metadata in their unmodified AXON-specific representation\n",
+    "                axon_concepts = []\n",
+    "                for k,v in flattened_xml.items():\n",
+    "                    # print('{} = {}'.format(k,v))\n",
+    "                    print(f\"{k}: {v}\")\n",
+    "                    continue\n",
+    "                    if \"AuxiliaryDataValue\" in f\"{k}\":\n",
+    "                        axon_concepts.append(f\"{k}.{v}\")\n",
+    "                    else:\n",
+    "                        axon_concepts.append(f\"{k}\")\n",
+    "                    # if f\"{k}\".endswith(\".Name\"):\n",
+    "                    #     axon_concepts.append(f\"{k[0:len(k)-len('Name')-1]}\")\n",
+    "                    # else:\n",
+    "                    #     axon_concepts.append(f\"{k}\")\n",
+    "                # for concept in axon_concepts:\n",
+    "                #     print(concept)\n",
+    "                # print(axon_concepts)\n",
+    "                reqs = [\"MicroscopeControlImageMetadata.AuxiliaryData.AuxiliaryDataCategory.[0].DataValues.AuxiliaryDataValue.[0].Name.SystemStatus\",\n",
+    "                        \"MicroscopeControlImageMetadata.MicroscopeDateTime\",\n",
+    "                        \"MicroscopeControlImageMetadata.AuxiliaryData.AuxiliaryDataCategory.[0].DataValues.AuxiliaryDataValue.[7].Name.HolderTemperature\"]\n",
+    "                for req in reqs:\n",
+    "                    if req not in axon_concepts:\n",
+    "                        raise ValueError(f\"{src}/axon/{file_name} does not contain concept instance {req} !\")\n",
+    "                # all required concepts have instance data so parse\n",
+    "                # print(f\"{flattened_xml['MicroscopeControlImageMetadata.MicroscopeDateTime']}\")\n",
+    "                if f\"{flattened_xml['MicroscopeControlImageMetadata.MicroscopeDateTime']}\".count(\".\") == 1:\n",
+    "                    datetime_obj = datetime.datetime.strptime(f\"{flattened_xml['MicroscopeControlImageMetadata.MicroscopeDateTime']}\", '%Y-%m-%dT%H:%M:%S.%f%z')\n",
+    "                else:\n",
+    "                    datetime_obj = datetime.datetime.strptime(f\"{flattened_xml['MicroscopeControlImageMetadata.MicroscopeDateTime']}\", '%Y-%m-%dT%H:%M:%S%z')\n",
+    "                events.append((f\"{file_name}\", datetime_obj))\n",
+    "                # print(f\"converted to datetime: {datetime_obj}\")\n",
+    "                print(f\"{flattened_xml['MicroscopeControlImageMetadata.AuxiliaryData.AuxiliaryDataCategory.[0].DataValues.AuxiliaryDataValue.[7].Value']}\")\n",
+    "                fp.seek(0)\n",
+    "                nparr = np.array(fp)\n",
+    "                print(f\"{np.shape(nparr)}, {nparr.dtype}\")\n",
+    "                continue\n",
+    "\n",
+    "            if True is False:\n",
+    "                # identify all unique axon_concepts\n",
+    "                axon_concepts = []\n",
+    "                for k,v in flattened_xml.items():\n",
+    "                    prefix, parent, is_parent = instance_to_concept('{} = {}'.format(k,v))\n",
+    "                    if is_parent is True:\n",
+    "                        if parent not in axon_concepts:\n",
+    "                            axon_concepts.append(parent)\n",
+    "                        else:\n",
+    "                            raise ValueError(f\"Found duplicated axon conceptid {conceptid} !\")\n",
+    "                for entry in axon_concepts:\n",
+    "                    print(entry)\n",
+    "\n",
+    "                # make members of the same axon concept childs\n",
+    "                for k,v in flattened_xml.items():\n",
+    "                    varname, conceptid, is_parent = instance_to_concept('{} = {}'.format(k,v))\n",
+    "                    if conceptid is not None:                \n",
+    "                        if conceptid in axon_concepts.keys():\n",
+    "                            if is_parent is True:\n",
+    "                                print(f\"{axon_concepts[conceptid]}\")\n",
+    "                            else:\n",
+    "                                print(f\"{axon_concepts[conceptid]}.{varname[varname.rfind('.') + 1:]}\")\n",
+    "                        else:\n",
+    "                            raise ValueError(f\"Unable to find matching axon parent conceptid {conceptid} !\")\n",
+    "                    else:\n",
+    "                        print('{} = {}'.format(k,v))\n",
+    "            # elif fnm.lower().endswith(\".png\") is True:  # check for mime type instead\n",
+    "            #     print(f\"There is no iTXt chunk in {fnm} which has embedded XML within the AXON namespace MicroscopeControlImage!\")\n",
+    "            else:\n",
+    "                print(f\"There is nothing to harvest here!\")\n",
+    "            # print(axon_concepts)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "60da8d37-6e03-4c04-b47c-b24224de6a02",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def sort_ascendingly_by_second_argument_iso8601(tup):\n",
+    "    # convert the list of tuples to a numpy array with data type (object, int)\n",
+    "    arr = np.array(tup, dtype=[('col1', object), ('col2', datetime.datetime)])\n",
+    "    # get the indices that would sort the array based on the second column\n",
+    "    indices = np.argsort(arr['col2'])\n",
+    "    # use the resulting indices to sort the array\n",
+    "    sorted_arr = arr[indices]\n",
+    "    # convert the sorted numpy array back to a list of tuples\n",
+    "    sorted_tup = [(row['col1'], row['col2']) for row in sorted_arr]\n",
+    "    return sorted_tup\n",
+    "\n",
+    "new_lst = sort_ascendingly_by_second_argument_iso8601(events)\n",
+    "# events = sorted(events, key=lambda x: x if isinstance(x, datetime.datetime) else datetime.datetime(x.year, x.month, x.day))\n",
+    "time_series_start = new_lst[0][1]\n",
+    "print(f\"Time series start: {time_series_start}\")\n",
+    "for file_name, iso8601 in new_lst:\n",
+    "    continue\n",
+    "    print(f\"{file_name}, {iso8601}, {(iso8601 - time_series_start).total_seconds()} s\")\n",
+    "print(f\"Time series end: {new_lst[-1][1]}, {(new_lst[-1][1] - time_series_start).total_seconds()} s\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3c18541f-43d2-433d-8bbc-0ef3ec3bf9f7",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "01d55278-ba89-4c02-9728-108da1c598ec",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/debug/metadata_tiff.ipynb b/debug/metadata_tiff.ipynb
new file mode 100644
index 000000000..8bed68164
--- /dev/null
+++ b/debug/metadata_tiff.ipynb
@@ -0,0 +1,210 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5eafd031",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6f655f67",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "src = \"/home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_imgs/ikz_robert\"\n",
+    "filepath=f\"{src}/0c8nA_3deg_003_AplusB_test.tif\"\n",
+    "lines=[]\n",
+    "with open(filepath, 'r', encoding=\"utf8\",errors=\"ignore\") as fp:\n",
+    "    contents=fp.read()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "268eba7c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "text=contents[contents.find(\"Date\"):] #metadata at the end of the file starts with the date"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a7f988b3",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "print(text)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3b69b8ea",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_metadata_tif(filepath):\n",
+    "    with open(filepath, 'r', encoding=\"utf8\",errors=\"ignore\") as fp:\n",
+    "        contents=fp.read()    \n",
+    "    text=contents[contents.find(\"Date\"):]\n",
+    "    # qtu <-- quantitiy_type_unit\n",
+    "    #Beam\n",
+    "    qtu=[[\"\\nSystemType=\",\"str\",None],\n",
+    "         [\"\\nPixelWidth=\",\"np.float64\",\"m\"],\n",
+    "         [\"\\nPixelHeight=\",\"np.float64\",\"m\"],         \n",
+    "         \n",
+    "         [\"\\nHV=\",\"np.float64\",\"V\"],\n",
+    "         [\"\\nBeamCurrent=\",\"np.float64\",\"A\"],\n",
+    "         [\"\\nWorkingDistance=\",\"np.float64\",\"m\"],\n",
+    "         [\"\\nDwelltime=\",\"np.float64\",\"s\"],\n",
+    "         [\"\\nSpot=\",\"np.float64\",\"nm\"],\n",
+    "         [\"\\nStigmatorX=\",\"np.float64\",None],\n",
+    "         [\"\\nStigmatorY=\",\"np.float64\",None],\n",
+    "         [\"\\nBeamShiftX=\",\"np.float64\",None],\n",
+    "         [\"\\nBeamShiftY=\",\"np.float64\",None],\n",
+    "         [\"\\nSourceTiltX=\",\"np.float64\",None],\n",
+    "         [\"\\nSourceTiltY=\",\"np.float64\",None],\n",
+    "         [\"\\nEmissionCurrent=\",\"np.float64\",\"A\"],\n",
+    "         [\"\\nSpecimenCurrent=\",\"np.float64\",\"A\"],\n",
+    "         [\"\\nApertureDiameter=\",\"np.float64\",\"m\"],\n",
+    "         [\"\\nATubeVoltage=\",\"np.float64\",\"V\"],\n",
+    "    #Scan     \n",
+    "         [\"\\nScanRotation=\",\"np.float64\",\"deg\"],\n",
+    "         [\"\\nTiltCorrectionIsOn=\",\"str\",None],#yes,no \n",
+    "         [\"\\nUseCase=\",\"str\",None],\n",
+    "\n",
+    "    #CompoundLens\n",
+    "         [\"\\nIsOn=\",\"str\",None], #On,Off\n",
+    "         [\"\\nThresholdEnergy=\",\"np.float64\",\"eV\"],\n",
+    "    #Stage          \n",
+    "         [\"\\nStageX=\",\"np.float64\",\"m\"],\n",
+    "         [\"\\nStageY=\",\"np.float64\",\"m\"],\n",
+    "         [\"\\nStageZ=\",\"np.float64\",\"m\"],\n",
+    "         [\"\\nStageR=\",\"np.float64\",\"deg\"],\n",
+    "         [\"\\nStageTa=\",\"np.float64\",\"deg\"],\n",
+    "         [\"\\nStageTb=\",\"np.float64\",\"deg\"],\n",
+    "         [\"\\nStageBias=\",\"np.float64\",\"V\"],\n",
+    "         [\"\\nChPressure=\",\"np.float64\",\"Pa\"],\n",
+    "\n",
+    "    #Detecor\n",
+    "         [\"\\nName=\",\"str\",None],\n",
+    "         [\"\\nMode=\",\"str\",None],\n",
+    "         [\"\\nContrast=\",\"np.float64\",None],\n",
+    "         [\"\\nBrightness=\",\"np.float64\",None],\n",
+    "         [\"\\nSignal=\",\"str\",None],\n",
+    "         [\"\\nContrastDB=\",\"np.float64\",\"dB\"],\n",
+    "         [\"\\nBrightnessDB=\",\"np.float64\",\"dB\"],\n",
+    "         [\"\\nAverage=\",\"int\",None],\n",
+    "         [\"\\nIntegrate=\",\"int\",None],\n",
+    "\n",
+    "\n",
+    "         [\"\\nResolutionX=\",\"int\",None],\n",
+    "         [\"\\nResolutionY=\",\"int\",None],\n",
+    "         [\"\\nHorFieldsize=\",\"np.float64\",\"m\"],\n",
+    "         [\"\\nVerFieldsize=\",\"np.float64\",\"m\"],\n",
+    "         [\"\\nFrameTime=\",\"np.float64\",\"s\"],\n",
+    "    #Digital\n",
+    "         [\"\\nDigitalContrast=\",\"np.float64\",None],\n",
+    "         [\"\\nDigitalBrightness=\",\"np.float64\",None],\n",
+    "         [\"\\nDigitalGamma=\",\"np.float64\",None]]\n",
+    "    \n",
+    "    res=[]\n",
+    "    typechanges=[]\n",
+    "    counter=0\n",
+    "    for i in qtu:\n",
+    "        kw=i[0]\n",
+    "        start=text.find(kw)+len(kw)\n",
+    "        end=text[start:].find(\"\\n\")\n",
+    "        if i[1]==\"int\":    \n",
+    "            res.append(int(text[start:start+end]))\n",
+    "        elif i[1]==\"np.float64\":\n",
+    "            res.append(np.double(text[start:start+end]))\n",
+    "        elif i[1]==\"str\" or i[1]==\"string\":\n",
+    "            if text[start:start+end] in [\"no\",\"No\",\"off\",\"Off\",\"false\",\"False\"]:\n",
+    "                res.append(False)\n",
+    "                typechanges.append(counter)\n",
+    "            elif text[start:start+end] in [\"yes\",\"Yes\",\"on\",\"On\",\"true\",\"True\"]:\n",
+    "                res.append(True)\n",
+    "                typechanges.append(counter)\n",
+    "            else:\n",
+    "                res.append(text[start:start+end])\n",
+    "        counter +=1\n",
+    "\n",
+    "    for j in typechanges:\n",
+    "        qtu[j][1]=\"bool\"\n",
+    "        \n",
+    "    return res,qtu"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "74365e99",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "filepath=f\"{src}/0c8nA_3deg_003_AplusB_test.tif\"\n",
+    "res,qtu=get_metadata_tif(filepath)\n",
+    "res"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0000d17e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "qtu"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3435cdbc-9ffa-4035-8b8d-ee4a648cc597",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "584e4103-2e86-43f7-ab58-fb5d3e7dc36b",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/nion.batch.sh b/debug/nion.batch.sh
similarity index 100%
rename from nion.batch.sh
rename to debug/nion.batch.sh
diff --git a/debug/nion.dev.ipynb b/debug/nion.dev.ipynb
new file mode 100644
index 000000000..24750c59b
--- /dev/null
+++ b/debug/nion.dev.ipynb
@@ -0,0 +1,57 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f6fd745e-93c4-4754-ade9-0f4aa4b36213",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from jupyterlab_h5web import H5Web\n",
+    "src = \"/home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_nion/2022-02-18_Metadata_Kuehbach.zip/2022-02-18_Metadata_Kuehbach Data/2022/02/18/20220218-140947/\"\n",
+    "H5Web(f\"{src}data_7EPPSHNUFKH6F6A4JCR45J03G.h5\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8469fb59-4e2b-4f7e-9012-9059192f210a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "a = np.asarray([1, 1, 1], np.float64)\n",
+    "print(isinstance(a, np.ndarray))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "207488e4-fa3c-4757-96bb-fa5f37bd14db",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/pyxem.batch.sh b/debug/pyxem.batch.sh
similarity index 100%
rename from pyxem.batch.sh
rename to debug/pyxem.batch.sh
diff --git a/pyxem.dev.ipynb b/debug/pyxem.dev.ipynb
similarity index 100%
rename from pyxem.dev.ipynb
rename to debug/pyxem.dev.ipynb
diff --git a/spctrscpy.batch.sh b/debug/spctrscpy.batch.sh
similarity index 86%
rename from spctrscpy.batch.sh
rename to debug/spctrscpy.batch.sh
index ea5b091af..0d2a219cc 100755
--- a/spctrscpy.batch.sh
+++ b/debug/spctrscpy.batch.sh
@@ -1,11 +1,11 @@
 #!/bin/bash
 
-datasource="../../../../paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/"
+datasource="../../../../paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/pdi/"
 
 # apex examples ikz, pdi
 # examples="ikz/VInP_108_L2.h5 ikz/GeSn_13.h5 pynx/46_ES-LP_L1_brg.bcf pynx/1613_Si_HAADF_610_kx.emd pynx/EELS_map_2_ROI_1_location_4.dm3 pynx/H5OINA_examples_Specimen_1_Map_EDS_+_EBSD_Map_Data_2.h5oina"
 examples="ikz/VInP_108_L2.h5"
-examples="pdi/InGaN_nanowires_spectra.edaxh5"
+examples="InGaN_nanowires_spectra.edaxh5"
 
 for example in $examples; do
 	echo $example
diff --git a/debug/spctrscpy.dev.ipynb b/debug/spctrscpy.dev.ipynb
new file mode 100644
index 000000000..c6b5871e2
--- /dev/null
+++ b/debug/spctrscpy.dev.ipynb
@@ -0,0 +1,838 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "6da1aea0-545b-446b-a3d1-1574af72f6c6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "from rsciio import bruker, emd, digitalmicrograph\n",
+    "from jupyterlab_h5web import H5Web\n",
+    "import h5py\n",
+    "from matplotlib import pyplot as plt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "3889c47f-11c4-4bf3-97de-04fc52f0798d",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/ikz/VInP_108_L2.h5\n"
+     ]
+    }
+   ],
+   "source": [
+    "src = \"/home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy\"\n",
+    "fnms = [(\"apex\", \"ikz/VInP_108_L2.h5\"),\n",
+    "        (\"apex\", \"ikz/GeSn_13.h5\"),\n",
+    "        (\"bruker\", \"pynx/46_ES-LP_L1_brg.bcf\"),\n",
+    "        (\"emd\", \"pynx/1613_Si_HAADF_610_kx.emd\"),\n",
+    "        (\"digitalmicrograph\", \"pynx/EELS_map_2_ROI_1_location_4.dm3\"),\n",
+    "        (\"oxfordinstruments\", \"pynx/H5OINA_examples_Specimen_1_Map_EDS_+_EBSD_Map_Data_2.h5oina\")]\n",
+    "# pyUSID, HSMA\n",
+    "fnm = f\"{src}/{fnms[0][1]}\"\n",
+    "print(fnm)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "a4d9ef96-3c70-4c12-80ba-ea4a7d716d47",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/x-hdf5": "/home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/ikz/VInP_108_L2.h5",
+      "text/plain": [
+       "<jupyterlab_h5web.widget.H5Web object>"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "H5Web(fnm)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "id": "f0a7f9ac-1ade-43d7-aedd-b2572d163b34",
+   "metadata": {
+    "jupyter": {
+     "source_hidden": true
+    }
+   },
+   "outputs": [],
+   "source": [
+    "\n",
+    "from typing import Dict\n",
+    "\n",
+    "\n",
+    "class NxObject:\n",
+    "    \"\"\"An object in a graph e.g. an attribute, dataset, or group in NeXus.\"\"\"\n",
+    "\n",
+    "    def __init__(self,\n",
+    "                 name: str = None,\n",
+    "                 unit: str = None,\n",
+    "                 dtype=str,\n",
+    "                 value=None,\n",
+    "                 **kwargs):\n",
+    "        if (name is not None) and (name == \"\"):\n",
+    "            raise ValueError(f\"Value for argument name needs to be a non-empty string !\")\n",
+    "        if (unit is not None) and (unit == \"\"):\n",
+    "            raise ValueError(f\"Value for argument unit needs to be a non-empty string !\")\n",
+    "        if (dtype is not None) and isinstance(dtype, type) is False:\n",
+    "            raise ValueError(f\"Value of argument dtype must not be None \" \\\n",
+    "                             f\" and a valid, ideally a numpy datatype !\")\n",
+    "        # self.doc = None  # docstring\n",
+    "        self.name = name  # name of the field\n",
+    "        self.unit = unit  # not unit category but actual unit\n",
+    "        # use special values \"unitless\" for NX_UNITLESS (e.g. 1) and\n",
+    "        # \"dimensionless\" for NX_DIMENSIONLESS (e.g. 1m / 1m)\n",
+    "        self.dtype = dtype  # use np.dtype if possible\n",
+    "        if value is None or dtype is str:\n",
+    "            self.unit = \"unitless\"\n",
+    "        if value is not None:\n",
+    "            self.value = value\n",
+    "        # value should be a numpy scalar, tensor, or string if possible\n",
+    "        self.eqv_hdf = None\n",
+    "        if \"eqv_hdf\" in kwargs:\n",
+    "            if kwargs[\"eqv_hdf\"] in [\"group\", \"dataset\", \"attribute\"]:\n",
+    "                self.eqv_hdf = kwargs[\"eqv_hdf\"]\n",
+    "            else:\n",
+    "                raise ValueError(f\"Value of keyword argument eqv_hdf needs to be one of grp, dset, attr !\")\n",
+    "\n",
+    "    def __repr__(self):\n",
+    "        \"\"\"Report values.\"\"\"\n",
+    "        return f\"Name: {self.name}, unit: {self.unit}, dtype: {self.dtype}, eqv_hdf: {self.eqv_hdf}\"\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "id": "97c3a10f-903a-4d7e-883b-779c6c34f4a0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "NX_IMAGE_REAL_SPACE_SET_HDF_PATH = [\"image_oned/axis_x-field\",\n",
+    "\"image_oned/axis_x@long_name-attribute\",\n",
+    "\"image_oned/intensity-field\",\n",
+    "\"image_threed/axis_x-field\",\n",
+    "\"image_threed/axis_x@long_name-attribute\",\n",
+    "\"image_threed/axis_y-field\",\n",
+    "\"image_threed/axis_y@long_name-attribute\",\n",
+    "\"image_threed/axis_z-field\",\n",
+    "\"image_threed/axis_z@long_name-attribute\",\n",
+    "\"image_threed/intensity-field\",\n",
+    "\"image_twod/axis_x-field\",\n",
+    "\"image_twod/axis_x@long_name-attribute\",\n",
+    "\"image_twod/axis_y-field\",\n",
+    "\"image_twod/axis_y@long_name-attribute\",\n",
+    "\"image_twod/intensity-field\",\n",
+    "\"stack_oned/axis_image_identifier-field\",\n",
+    "\"stack_oned/axis_image_identifier@long_name-attribute\",\n",
+    "\"stack_oned/axis_x-field\",\n",
+    "\"stack_oned/axis_x@long_name-attribute\",\n",
+    "\"stack_oned/intensity-field\",\n",
+    "\"stack_threed/axis_image_identifier-field\",\n",
+    "\"stack_threed/axis_image_identifier@long_name-attribute\",\n",
+    "\"stack_threed/axis_x-field\",\n",
+    "\"stack_threed/axis_x@long_name-attribute\",\n",
+    "\"stack_threed/axis_y-field\",\n",
+    "\"stack_threed/axis_y@long_name-attribute\",\n",
+    "\"stack_threed/axis_z-field\",\n",
+    "\"stack_threed/axis_z@long_name-attribute\",\n",
+    "\"stack_threed/intensity-field\",\n",
+    "\"stack_twod/axis_image_identifier-field\",\n",
+    "\"stack_twod/axis_image_identifier@long_name-attribute\",\n",
+    "\"stack_twod/axis_x-field\",\n",
+    "\"stack_twod/axis_x@long_name-attribute\",\n",
+    "\"stack_twod/axis_y-field\",\n",
+    "\"stack_twod/axis_y@long_name-attribute\",\n",
+    "\"stack_twod/intensity-field\"]\n",
+    "\n",
+    "class NxEmImageRealSpaceSet():\n",
+    "    def __init__(self):\n",
+    "        self.tmp: Dict = {}\n",
+    "        for entry in NX_IMAGE_REAL_SPACE_SET_HDF_PATH:\n",
+    "            if entry.endswith(\"-field\") is True:\n",
+    "                self.tmp[entry[0:len(entry)-len(\"-field\")]] = NxObject(eqv_hdf=\"dataset\")\n",
+    "            elif entry.endswith(\"-attribute\") is True:\n",
+    "                self.tmp[entry[0:len(entry)-len(\"-attribute\")]] = NxObject(eqv_hdf=\"attribute\")\n",
+    "            else:\n",
+    "                self.tmp[entry[0:len(entry)-len(\"-group\")]] = NxObject(eqv_hdf=\"group\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "id": "8bbbaa03-0aac-43fb-941a-f63910496fa3",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'image_oned/axis_x': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: dataset, 'image_oned/axis_x@long_name': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: attribute, 'image_oned/intensity': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: dataset, 'image_threed/axis_x': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: dataset, 'image_threed/axis_x@long_name': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: attribute, 'image_threed/axis_y': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: dataset, 'image_threed/axis_y@long_name': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: attribute, 'image_threed/axis_z': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: dataset, 'image_threed/axis_z@long_name': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: attribute, 'image_threed/intensity': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: dataset, 'image_twod/axis_x': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: dataset, 'image_twod/axis_x@long_name': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: attribute, 'image_twod/axis_y': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: dataset, 'image_twod/axis_y@long_name': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: attribute, 'image_twod/intensity': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: dataset, 'stack_oned/axis_image_identifier': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: dataset, 'stack_oned/axis_image_identifier@long_name': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: attribute, 'stack_oned/axis_x': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: dataset, 'stack_oned/axis_x@long_name': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: attribute, 'stack_oned/intensity': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: dataset, 'stack_threed/axis_image_identifier': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: dataset, 'stack_threed/axis_image_identifier@long_name': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: attribute, 'stack_threed/axis_x': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: dataset, 'stack_threed/axis_x@long_name': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: attribute, 'stack_threed/axis_y': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: dataset, 'stack_threed/axis_y@long_name': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: attribute, 'stack_threed/axis_z': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: dataset, 'stack_threed/axis_z@long_name': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: attribute, 'stack_threed/intensity': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: dataset, 'stack_twod/axis_image_identifier': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: dataset, 'stack_twod/axis_image_identifier@long_name': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: attribute, 'stack_twod/axis_x': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: dataset, 'stack_twod/axis_x@long_name': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: attribute, 'stack_twod/axis_y': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: dataset, 'stack_twod/axis_y@long_name': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: attribute, 'stack_twod/intensity': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: dataset}\n"
+     ]
+    }
+   ],
+   "source": [
+    "tmp = NxEmImageRealSpaceSet()\n",
+    "# print(tmp.tmp)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "id": "58052fb7-723f-476d-a8ca-df99efffcc05",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'numpy.ndarray'>, (1,), {'names': ['Version', 'ImageType', 'Label', 'SMin', 'SMax', 'Par_Color', 'EdsPresetMode', 'EdsPresetTime', 'EdsMapDataType', 'TiltCorrected', 'RasterMode', 'ChannelStart', 'ChannelEnd', 'IntDummy1', 'IAdc', 'ISize', 'IBits', 'NReads', 'NFrames', 'FDwell', 'KVolt', 'Tilt', 'TakeOff', 'Magnification', 'WorkingDistance', 'MicronsPerPixelX', 'MicronsPerPixelY', 'NumberOfCommentLines', 'TextLines', 'Fpar1', 'NOverlayElements', 'OverlayColors', 'XmpEdiTimeCnst', 'Fpar'], 'formats': ['<i2', '<i2', ('u1', (8,)), '<i2', '<i2', '<i2', '<i2', '<i4', '<i2', '<i2', '<i2', '<i2', '<i2', '<i4', '<i2', '<i2', '<i2', '<i2', '<i2', '<f4', '<i2', '<i2', '<i2', '<i4', '<i2', '<f4', '<f4', '<i2', ('u1', (128,)), ('<f4', (4,)), '<i2', ('<i2', (16,)), '<f4', ('<f4', (2,))], 'offsets': [0, 2, 4, 12, 14, 16, 18, 20, 24, 26, 28, 30, 32, 36, 40, 42, 44, 46, 48, 52, 56, 58, 60, 64, 68, 72, 76, 80, 82, 212, 228, 230, 264, 268], 'itemsize': 276}\n",
+      "0.4090192\n"
+     ]
+    }
+   ],
+   "source": [
+    "with h5py.File(fnm, \"r\") as h5r:\n",
+    "    src = \"/VInP/VInP_108_L2/Area 10/VInP/VInP_108_L2/Area 10/Live Map 1\"\n",
+    "    if f\"{src}/FOVIMAGECOLLECTIONPARAMS\" in h5r.keys():\n",
+    "        ipr = np.asarray(h5r[f\"{src}/FOVIPR\"])\n",
+    "    print(f\"{type(ipr)}, {np.shape(ipr)}, {ipr.dtype}\")\n",
+    "    print(ipr[\"MicronsPerPixelY\"][0])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 77,
+   "id": "8f5892cb-476e-453d-99e0-befb766fa9ca",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'y': 400, 'x': 512, 'e': 1000}\n",
+      "edax: (400, 512), <class 'h5py._hl.dataset.Dataset'>, ('<i2', (1000,))\n",
+      "[(0, 102), (102, 204), (204, 306), (306, 400)]\n",
+      "[(0, 512), (0, 512), (0, 512), (0, 512)]\n",
+      "0\n",
+      "1\n",
+      "2\n",
+      "3\n",
+      "Chunking down\n",
+      "0\n",
+      "1\n",
+      "2\n",
+      "3\n",
+      "4\n",
+      "5\n",
+      "6\n",
+      "7\n",
+      "8\n",
+      "9\n",
+      "10\n",
+      "11\n"
+     ]
+    },
+    {
+     "ename": "KeyboardInterrupt",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[77], line 38\u001b[0m\n\u001b[1;32m     36\u001b[0m     \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mone\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m     37\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m two \u001b[38;5;129;01min\u001b[39;00m np\u001b[38;5;241m.\u001b[39marange(\u001b[38;5;241m0\u001b[39m, \u001b[38;5;241m512\u001b[39m):\n\u001b[0;32m---> 38\u001b[0m         spd_naive[one, two, :] \u001b[38;5;241m=\u001b[39m \u001b[43mspd_edax\u001b[49m\u001b[43m[\u001b[49m\u001b[43mone\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtwo\u001b[49m\u001b[43m]\u001b[49m\n\u001b[1;32m     39\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNaive done\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m     41\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28;01mFalse\u001b[39;00m \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:        \n",
+      "File \u001b[0;32mh5py/_objects.pyx:54\u001b[0m, in \u001b[0;36mh5py._objects.with_phil.wrapper\u001b[0;34m()\u001b[0m\n",
+      "File \u001b[0;32mh5py/_objects.pyx:55\u001b[0m, in \u001b[0;36mh5py._objects.with_phil.wrapper\u001b[0;34m()\u001b[0m\n",
+      "File \u001b[0;32m~/miniconda3/envs/py3.10.13/lib/python3.10/site-packages/h5py/_hl/dataset.py:841\u001b[0m, in \u001b[0;36mDataset.__getitem__\u001b[0;34m(self, args, new_dtype)\u001b[0m\n\u001b[1;32m    839\u001b[0m mspace \u001b[38;5;241m=\u001b[39m h5s\u001b[38;5;241m.\u001b[39mcreate_simple(selection\u001b[38;5;241m.\u001b[39mmshape)\n\u001b[1;32m    840\u001b[0m fspace \u001b[38;5;241m=\u001b[39m selection\u001b[38;5;241m.\u001b[39mid\n\u001b[0;32m--> 841\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mid\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmspace\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfspace\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43marr\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdxpl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_dxpl\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    843\u001b[0m \u001b[38;5;66;03m# Patch up the output for NumPy\u001b[39;00m\n\u001b[1;32m    844\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m arr\u001b[38;5;241m.\u001b[39mshape \u001b[38;5;241m==\u001b[39m ():\n",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
+     ]
+    }
+   ],
+   "source": [
+    "with h5py.File(fnm, \"r\") as h5r:\n",
+    "    src = \"/VInP/VInP_108_L2/Area 10/Live Map 1\"\n",
+    "    trg = \"SPD\"\n",
+    "    reqs = [\"MicronPerPixelX\", \"MicronPerPixelY\", \"NumberOfLines\", \"NumberOfPoints\", \"NumberofChannels\"]\n",
+    "    for req in reqs:\n",
+    "        if req not in h5r[f\"{src}/{trg}\"].attrs.keys():\n",
+    "            # also check for shape\n",
+    "            raise ValueError(f\"Required attribute named {req} not found in {src}/{trg} !\")\n",
+    "    nyxe = {\"y\": h5r[f\"{src}/{trg}\"].attrs[\"NumberOfLines\"][0],\n",
+    "            \"x\": h5r[f\"{src}/{trg}\"].attrs[\"NumberOfPoints\"][0],\n",
+    "            \"e\": h5r[f\"{src}/{trg}\"].attrs[\"NumberofChannels\"][0]}\n",
+    "    print(nyxe)\n",
+    "    # the native APEX SPD concept instance is a two-dimensional array of arrays of length e (n_energy_bins)\n",
+    "    # likely EDAX has in their C(++) code a vector of vector or something equivalent either way we faced\n",
+    "    # nested C arrays of the base data type (here (u)int 16\n",
+    "    # even worse, chunked in HDF5 thus the e-long arrays are just some payload inside the compressed\n",
+    "    # chunk without some extra logic to resolve the third (energy) dimension:\n",
+    "    # how to reshape this efficiently without creating unnecessary copies\n",
+    "    # the following code is ugly as it needs a maximum large copy of the dataset\n",
+    "    spd_edax = h5r[f\"{src}/{trg}\"]\n",
+    "    print(f\"edax: {np.shape(spd_edax)}, {type(spd_edax)}, {spd_edax.dtype}\")\n",
+    "    spd_naive = np.zeros((nyxe[\"y\"], nyxe[\"x\"], nyxe[\"e\"]), \"<i2\")\n",
+    "    spd_chunk = np.zeros((nyxe[\"y\"], nyxe[\"x\"], nyxe[\"e\"]), \"<i2\")\n",
+    "    # spd = spd_edax.view().reshape((400, 512, 1000))\n",
+    "    \n",
+    "    chk_one = [(0, 102), (102, 102 + 102), (102 + 102, 102 + 102 + 102), (102 + 102 + 102, 400)]\n",
+    "    print(chk_one)\n",
+    "    chk_two = [(0, 512), (0, 512), (0, 512), (0, 512)]\n",
+    "    print(chk_two)\n",
+    "    for chk_idx in np.arange(0, 4):\n",
+    "        print(f\"{chk_idx}\")\n",
+    "        spd_chunk[chk_one[chk_idx][0]:chk_one[chk_idx][1], chk_two[chk_idx][0]:chk_two[chk_idx][1], :] \\\n",
+    "            = spd_edax[chk_one[chk_idx][0]:chk_one[chk_idx][1], chk_two[chk_idx][0]:chk_two[chk_idx][1]]\n",
+    "    print(\"Chunking down\")   \n",
+    "    for one in np.arange(0, 400):\n",
+    "        print(f\"{one}\")\n",
+    "        for two in np.arange(0, 512):\n",
+    "            spd_naive[one, two, :] = spd_edax[one, two]\n",
+    "    print(\"Naive done\")\n",
+    "   \n",
+    "    if False is True:        \n",
+    "        img.tmp[\"image_twod/intensity\"] = np.reshape(np.asarray(h5r[f\"{src}/FOVIMAGE\"]), (nyx[\"y\"], nyx[\"x\"]))\n",
+    "    \n",
+    "        syx = {\"x\": 1., \"y\": 1.}\n",
+    "        scan_unit = {\"x\": \"px\", \"y\": \"px\"}\n",
+    "        if f\"{src}/FOVIMAGECOLLECTIONPARAMS\" in h5r.keys():\n",
+    "            ipr = np.asarray(h5r[f\"{src}/FOVIPR\"])\n",
+    "            syx = {\"x\": ipr[\"MicronsPerPixelX\"][0], \"y\": ipr[\"MicronsPerPixelY\"][0]}\n",
+    "            scan_unit = {\"x\": \"µm\", \"y\": \"µm\"}\n",
+    "        dims = [\"y\", \"x\"]\n",
+    "        for dim in dims:\n",
+    "            img.tmp[f\"image_twod/axis_{dim}\"] = np.asarray(np.linspace(0, nyx[dim] - 1, num=nyx[dim], endpoint=True) * syx[dim], np.float64)\n",
+    "            img.tmp[f\"image_twod/axis_{dim}@long_name\"] = f\"Calibrated pixel position along {dim} ({scan_unit[dim]})\"\n",
+    "    \n",
+    "        for key, val in img.tmp.items():\n",
+    "            if key.startswith(\"image_twod\"):\n",
+    "                print(f\"{key}, {val}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 78,
+   "id": "1d637cdc-0729-45aa-91f7-a12346307004",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "diff = spd_chunk[0:10, :, :] - spd_naive[0:10, :, :]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 105,
+   "id": "dc034d09-b089-4f85-a4a0-b0689d76108c",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "None\n",
+      "(102, 512)\n",
+      "int16\n",
+      "int16\n"
+     ]
+    }
+   ],
+   "source": [
+    "with h5py.File(fnm, \"r\") as h5r:\n",
+    "    src = \"/VInP/VInP_108_L2/Area 10/Live Map 1\"\n",
+    "    cps = h5r[f\"{src}/CPS\"]\n",
+    "    print(cps.chunks)\n",
+    "    spd = h5r[f\"{src}/SPD\"]\n",
+    "    print(spd.chunks)\n",
+    "    print(h5r[f\"{src}/SPD\"][0, 0].dtype)\n",
+    "    spd_chunk = np.zeros((nyxe[\"y\"], nyxe[\"x\"], nyxe[\"e\"]), h5r[f\"{src}/SPD\"][0, 0].dtype)\n",
+    "    print(spd_chunk.dtype)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 87,
+   "id": "3c7b1022-beea-4996-ab06-b120531c3a57",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "x, [(0, 512)]\n",
+      "y, [(0, 102), (102, 204), (204, 306), (306, 400)]\n"
+     ]
+    }
+   ],
+   "source": [
+    "chk_bnds = {\"x\": [], \"y\": []}\n",
+    "ifo = {\"ny\": 400,\n",
+    "       \"cy\": spd.chunks[0],\n",
+    "       \"nx\": 512,\n",
+    "       \"cx\": spd.chunks[1]}\n",
+    "for dim in [\"y\", \"x\"]:\n",
+    "    idx = 0\n",
+    "    while idx < ifo[f\"n{dim}\"]:\n",
+    "        if idx + ifo[f\"c{dim}\"] < ifo[f\"n{dim}\"]:\n",
+    "            chk_bnds[f\"{dim}\"].append((idx, idx + ifo[f\"c{dim}\"]))\n",
+    "        else:\n",
+    "            chk_bnds[f\"{dim}\"].append((idx, ifo[f\"n{dim}\"]))\n",
+    "        idx += ifo[f\"c{dim}\"]\n",
+    "for key, val in chk_bnds.items():\n",
+    "    print(f\"{key}, {val}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e692b704-6a28-4e4c-a6b3-58864e0f98cf",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "    #    plt.imshow(fov, interpolation='nearest')\n",
+    "    #    plt.show()\n",
+    "    #    print(f\"{type(cmpd)}, {np.shape(cmpd)}, {cmpd.dtype}\")\n",
+    "    #    print(cmpd[\"DetectorLabel\"][0].decode(\"utf8\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "570da751-a38c-4902-b929-ef32cf19b1ba",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "report_all_plugins = False\n",
+    "if report_all_plugins is True:\n",
+    "    for plugin in rsciio.IO_PLUGINS:\n",
+    "        print(f\"\\n\\n\")\n",
+    "        for key, val in plugin.items():\n",
+    "            print(f\"{key}, {val}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "d2b5287d-7441-4141-b161-351de4bf7488",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loading type(entry) <class 'dict'>\n",
+      "axes\n",
+      "[{'name': 'height', 'offset': 0, 'scale': 0.00351909256747931, 'units': 'µm', 'size': 512}, {'name': 'width', 'offset': 0, 'scale': 0.00351909256747931, 'units': 'µm', 'size': 512}]\n",
+      "metadata\n",
+      "{'Acquisition_instrument': {'TEM': {'beam_energy': 200, 'magnification': 56000}}, 'Sample': {'name': 'map 4'}, 'Signal': {}, 'General': {'title': 'HAADF', 'original_filename': '46_ES-LP_L1_brg.bcf'}}\n",
+      "original_metadata\n",
+      "{'Microscope': {'HV': 200, 'WD': -1, 'Mag': 56000, 'DX': 0.00351909256747931, 'DY': 0.00351909256747931, 'Flags': 16776960, 'XmlClassType': 'TRTSEMData'}, 'DSP Configuration': {'ImageWidth': 512, 'ImageHeight': 512, 'PixelAverage': 60, 'LineAverage': 1, 'SEBitCount': 16, 'ChannelCount': 4, 'ChannelName0': 'BF', 'ChannelName1': 'DF', 'ChannelName2': 'DF4', 'Channel3': 1, 'ChannelName3': 'HAADF', 'CounterIndex': 0, 'CounterChannelUsed': 0, 'TiltAngle': 0, 'CounterMode': 0, 'PixelTime': 1, 'XmlClassType': 'TRTDSPConfiguration'}, 'Stage': {'State': 7936, 'XmlClassType': 'TRTSEMStageData'}}\n",
+      "mapping\n",
+      "{'Stage.Rotation': ('Acquisition_instrument.TEM.Stage.rotation', None), 'Stage.Tilt': ('Acquisition_instrument.TEM.Stage.tilt_alpha', None), 'Stage.X': ('Acquisition_instrument.TEM.Stage.x', None), 'Stage.Y': ('Acquisition_instrument.TEM.Stage.y', None), 'Stage.Z': ('Acquisition_instrument.TEM.Stage.z', None)}\n",
+      "data\n",
+      "[[19447 20033 16242 ... 48594 49493 50088]\n",
+      " [21447 21375 21792 ... 47285 48770 49734]\n",
+      " [22395 22443 22459 ... 48080 48883 49461]\n",
+      " ...\n",
+      " [18395 18379 18853 ... 17190 17801 17768]\n",
+      " [17785 19704 18772 ... 15905 16178 17511]\n",
+      " [19471 20226 20355 ... 17142 17254 17793]]\n",
+      "Loading type(entry) <class 'dict'>\n",
+      "data\n",
+      "[[[0 0 0 ... 0 0 0]\n",
+      "  [0 0 0 ... 0 0 0]\n",
+      "  [0 0 0 ... 0 0 0]\n",
+      "  ...\n",
+      "  [0 0 0 ... 0 0 0]\n",
+      "  [0 0 0 ... 0 0 0]\n",
+      "  [0 0 0 ... 0 0 0]]\n",
+      "\n",
+      " [[0 0 0 ... 0 0 0]\n",
+      "  [0 0 0 ... 0 0 0]\n",
+      "  [0 0 0 ... 0 0 0]\n",
+      "  ...\n",
+      "  [0 0 0 ... 0 0 0]\n",
+      "  [0 0 0 ... 0 0 0]\n",
+      "  [0 0 0 ... 0 0 0]]\n",
+      "\n",
+      " [[0 0 0 ... 0 0 0]\n",
+      "  [0 0 0 ... 0 0 0]\n",
+      "  [0 0 0 ... 0 0 0]\n",
+      "  ...\n",
+      "  [0 0 0 ... 0 0 0]\n",
+      "  [0 0 0 ... 0 0 0]\n",
+      "  [0 0 0 ... 0 0 0]]\n",
+      "\n",
+      " ...\n",
+      "\n",
+      " [[0 0 0 ... 0 0 0]\n",
+      "  [0 0 0 ... 0 0 0]\n",
+      "  [0 0 0 ... 0 0 0]\n",
+      "  ...\n",
+      "  [0 0 0 ... 0 0 0]\n",
+      "  [0 0 0 ... 0 0 0]\n",
+      "  [0 0 0 ... 0 0 0]]\n",
+      "\n",
+      " [[0 0 0 ... 0 0 0]\n",
+      "  [0 0 0 ... 0 0 0]\n",
+      "  [0 0 0 ... 0 0 0]\n",
+      "  ...\n",
+      "  [0 0 0 ... 0 0 0]\n",
+      "  [0 0 0 ... 0 0 0]\n",
+      "  [0 0 0 ... 0 0 0]]\n",
+      "\n",
+      " [[0 0 0 ... 0 0 0]\n",
+      "  [0 0 0 ... 0 0 0]\n",
+      "  [0 0 0 ... 0 0 0]\n",
+      "  ...\n",
+      "  [0 0 0 ... 0 0 0]\n",
+      "  [0 0 0 ... 0 0 0]\n",
+      "  [0 0 0 ... 0 0 0]]]\n",
+      "axes\n",
+      "[{'name': 'height', 'size': 512, 'offset': 0, 'scale': 0.00351909256747931, 'units': 'µm', 'navigate': True}, {'name': 'width', 'size': 512, 'offset': 0, 'scale': 0.00351909256747931, 'units': 'µm', 'navigate': True}, {'name': 'Energy', 'size': 2048, 'offset': -0.4798465772, 'scale': 0.01000934711, 'units': 'keV', 'navigate': False}]\n",
+      "metadata\n",
+      "{'Acquisition_instrument': {'TEM': {'beam_energy': 200, 'magnification': 56000, 'Detector': {'EDS': {'elevation_angle': 22.0, 'detector_type': 'Custom type', 'azimuth_angle': 45.0, 'real_time': 723.7632, 'live_time': 13.678}}}}, 'General': {'original_filename': '46_ES-LP_L1_brg.bcf', 'title': 'EDX', 'date': '2020-07-22', 'time': '14:18:32'}, 'Sample': {'name': 'map 4', 'elements': ['Al', 'Ca', 'Fe', 'Hf', 'Lu', 'Mg', 'Nd', 'O', 'Si', 'Sm', 'U'], 'xray_lines': ['Al_Ka', 'Ca_Ka', 'Fe_Ka', 'Hf_La', 'Lu_La', 'Mg_Ka', 'Nd_La', 'O_Ka', 'Si_Ka', 'Sm_La', 'U_Ma']}, 'Signal': {'signal_type': 'EDS_TEM', 'quantity': 'X-rays (Counts)'}}\n",
+      "original_metadata\n",
+      "{'Hardware': {'TRTKnownHeader': {'Type': 'RTHardware', 'Size': 137}, 'RealTime': 16331, 'LifeTime': 13678, 'DeadTime': 16.0, 'ZeroPeakPosition': 95, 'ZeroPeakFrequency': 800, 'PulseDensity': 89486, 'Amplification': 20000.0, 'ShapingTime': 60000, 'XmlClassType': 'TRTSpectrumHardwareHeader'}, 'Detector': {'TRTKnownHeader': {'Type': 'RTDetector', 'Version': 5, 'Size': 9932}, 'Technology': 'SDD', 'Type': 'Custom type', 'DetectorThickness': 0.45, 'SiDeadLayerThickness': 0.01, 'DetLayers': {}, 'WindowType': 'Custom type', 'WindowLayers': None, 'Corrections': {'Escape': None, 'Tail': {'FormulaType': 'Internal', 'MainCorrection': 1}, 'Shelf': {'FormulaType': 'Internal', 'RangeStart': 0.08, 'RangeEnd': 10.0, 'MainCorrection': 1, 'Coefficient0': 1}, 'Shift': {'FormulaType': 'Internal', 'RangeStart': 0.08, 'RangeEnd': 0.555, 'MainCorrection': 1}, 'FWHMShift': None}, 'CorrectionType': 2, 'ResponseFunctionCount': 21, 'SampleCount': 5, 'SampleOffset': -3, 'PulsePairResTimeCount': 0, 'PileUpMinEnergy': 1, 'PileUpWithBG': False, 'TailFactor': 0, 'ShelfFactor': 0, 'ShiftFactor': 0, 'ShiftFactor2': 0, 'ShiftData': (0.079, 0, 0.08, 0.01, 0.555, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 'ResponseFunction': [(0, 3.3, 0.000801, 3.3, 0.00298, 3.3, 0.008902, 3.3, 0.025, 3.300046, 0.041098, 3.303475, 0.04702, 3.307302, 0.049199, 3.309237, 0.05, 3.31), (0, 3.3, 0.00444, 3.3, 0.01651, 3.3, 0.049318, 3.3, 0.1385, 3.300046, 0.227682, 3.303475, 0.26049, 3.307302, 0.27256, 3.309237, 0.277, 3.31), (0, 1.1, 0.006283, 1.1, 0.023364, 1.1, 0.069793, 1.1, 0.196, 1.102513, 0.322207, 1.291145, 0.368636, 1.50163, 0.385717, 1.608042, 0.392, 1.65), (0, 0.4, 0.008415, 0.4, 0.031291, 0.4, 0.093473, 0.4, 0.2625, 0.40457, 0.431527, 0.747537, 0.493709, 1.13024, 0.516585, 1.323712, 0.525, 1.4), (0, 0.2, 0.010836, 0.2, 0.040291, 0.2, 0.120357, 0.2, 0.338, 0.202513, 0.555643, 0.391145, 0.635709, 0.601632, 0.665164, 0.708042, 0.676, 0.75), (0, 0.03, 0.016687, 0.03, 0.062045, 0.03, 0.185343, 0.03, 0.5205, 0.032513, 0.855657, 0.221145, 0.978955, 0.43163, 1.024313, 0.538043, 1.041, 0.58), (0, 0.055, 0.020101, 0.055, 0.07474, 0.055, 0.223266, 0.055, 0.627, 0.057057, 1.030734, 0.21139, 1.17926, 0.383607, 1.233899, 0.470671, 1.254, 0.505), (0, 0.05, 0.023836, 0.05, 0.088627, 0.05, 0.26475, 0.05, 0.7435, 0.050732, 1.22225, 0.105607, 1.398373, 0.166839, 1.463164, 0.197794, 1.487, 0.21), (0, 0.03, 0.027891, 0.03, 0.103707, 0.03, 0.309795, 0.03, 0.87, 0.030594, 1.430205, 0.075181, 1.636293, 0.124932, 1.712109, 0.150082, 1.74, 0.16), (0, 0.15, 0.030776, 0.15, 0.114435, 0.15, 0.341842, 0.15, 0.96, 0.152377, 1.578158, 0.330719, 1.805565, 0.529724, 1.889224, 0.63033, 1.92, 0.67), (0, 0.15, 0.032283, 0.15, 0.120037, 0.15, 0.358578, 0.15, 1.007, 0.152377, 1.655422, 0.330719, 1.893963, 0.529725, 1.981717, 0.63033, 2.014, 0.67), (0, 0.085, 0.036996, 0.085, 0.13756, 0.085, 0.410923, 0.085, 1.154, 0.087055, 1.897077, 0.241391, 2.17044, 0.413607, 2.271004, 0.50067, 2.308, 0.535), (0, 0.085, 0.05918, 0.085, 0.220049, 0.085, 0.657334, 0.085, 1.846, 0.08589, 3.034665, 0.152769, 3.471952, 0.227397, 3.63282, 0.265124, 3.692, 0.28), (0, 0.035, 0.079378, 0.035, 0.295146, 0.035, 0.881668, 0.035, 2.476, 0.035549, 4.070332, 0.076705, 4.656854, 0.122629, 4.872623, 0.145845, 4.952, 0.155), (0, 0.035, 0.119867, 0.035, 0.445699, 0.035, 1.331404, 0.035, 3.739, 0.035457, 6.146595, 0.069754, 7.0323, 0.108024, 7.358133, 0.127371, 7.478, 0.135), (0, 0.035, 0.148303, 0.035, 0.551433, 0.035, 1.647253, 0.035, 4.626, 0.035457, 7.604747, 0.069754, 8.700567, 0.108024, 9.103698, 0.127371, 9.252, 0.135), (0, 0.023571, 0.176322, 0.023571, 0.655616, 0.023571, 1.958472, 0.023571, 5.5, 0.023597, 9.041529, 0.025499, 10.344384, 0.02762, 10.823678, 0.028693, 11, 0.029116), (0, 0.009286, 0.208381, 0.009286, 0.774819, 0.009286, 2.314557, 0.009286, 6.5, 0.009297, 10.685443, 0.010101, 12.225181, 0.010999, 12.791619, 0.011453, 13, 0.011632), (0, 0.010714, 0.24044, 0.010714, 0.894022, 0.010714, 2.670643, 0.010714, 7.5, 0.010714, 12.329357, 0.010714, 14.105978, 0.010714, 14.759561, 0.010714, 15, 0.010714), (0, 0, 0.320586, 0, 1.192029, 0, 3.560857, 0, 10, 0, 16.439142, 0, 18.80797, 0, 19.679415, 0, 20, 0), (0, 0, 1.60293, 0, 5.960146, 0, 17.804287, 0, 50, 0, 82.195709, 0, 94.039856, 0, 98.397072, 0, 100, 0)], 'XmlClassType': 'TRTDetectorHeader'}, 'Analysis': {'TRTKnownHeader': {'Type': 'RTESMA', 'Size': 662}, 'PrimaryEnergy': 200.0, 'ReferenceFactor': -1, 'ReferenceStdDev': -1, 'BaseRefStdDev': 0.002000100008, 'ElevationAngle': 22.0, 'AzimutAngle': 45.0, 'DetectorAngle': 15.0, 'CoatCorrection': None, 'XmlClassType': 'TRTESMAHeader'}, 'Spectrum': {'Size': 82, 'Date': '22.7.2020', 'Time': '14:18:32', 'ChannelCount': 2048, 'CalibAbs': -0.4798465772, 'CalibLin': 0.01000934711, 'SigmaAbs': 0.0004952410698, 'SigmaLin': 0.0004825546962, 'XmlClassType': 'TRTSpectrumHeader'}, 'DSP Configuration': {'ImageWidth': 512, 'ImageHeight': 512, 'PixelAverage': 60, 'LineAverage': 1, 'SEBitCount': 16, 'ChannelCount': 4, 'ChannelName0': 'BF', 'ChannelName1': 'DF', 'ChannelName2': 'DF4', 'Channel3': 1, 'ChannelName3': 'HAADF', 'CounterIndex': 0, 'CounterChannelUsed': 0, 'TiltAngle': 0, 'CounterMode': 0, 'PixelTime': 1, 'XmlClassType': 'TRTDSPConfiguration'}, 'Line counter': (47, 47, 47, 47, 47, 47, 47, 47, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46), 'Stage': {'State': 7936, 'XmlClassType': 'TRTSEMStageData'}, 'Microscope': {'HV': 200, 'WD': -1, 'Mag': 56000, 'DX': 0.00351909256747931, 'DY': 0.00351909256747931, 'Flags': 16776960, 'XmlClassType': 'TRTSEMData'}}\n",
+      "mapping\n",
+      "{'Stage.Rotation': ('Acquisition_instrument.TEM.Stage.rotation', None), 'Stage.Tilt': ('Acquisition_instrument.TEM.Stage.tilt_alpha', None), 'Stage.X': ('Acquisition_instrument.TEM.Stage.x', None), 'Stage.Y': ('Acquisition_instrument.TEM.Stage.y', None), 'Stage.Z': ('Acquisition_instrument.TEM.Stage.z', None)}\n"
+     ]
+    }
+   ],
+   "source": [
+    "objs = bruker.file_reader(f\"{src}/{fnms[2][1]}\")\n",
+    "# objs = emd.file_reader(f\"{src}/{fnms[3][1]}\")\n",
+    "# objs = digitalmicrograph.file_reader(f\"{src}/{fnms[4][1]}\")\n",
+    "if isinstance(objs, list) is True:\n",
+    "    for entry in objs:\n",
+    "        print(f\"Loading type(entry) {type(entry)}\")\n",
+    "        if isinstance(entry, dict) is True:\n",
+    "            for key, val in entry.items():\n",
+    "                print(key)\n",
+    "                print(val)   "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c92b0a71-e9d8-460e-99b5-b12208b56258",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e04b275f-bc59-4fbc-8c56-ae4d6e964d14",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "48f31e6c-1554-4476-8688-5f5323d513c8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# https://codereview.stackexchange.com/a/21035\n",
+    "# https://stackoverflow.com/questions/38852822/how-to-flatten-xml-file-in-python\n",
+    "from collections import OrderedDict\n",
+    "\n",
+    "def flatten_dict(d):\n",
+    "    def items():\n",
+    "        for key, value in d.items():\n",
+    "            # nested subtree\n",
+    "            if isinstance(value, dict):\n",
+    "                for subkey, subvalue in flatten_dict(value).items():\n",
+    "                    yield '{}.{}'.format(key, subkey), subvalue\n",
+    "            # nested list\n",
+    "            elif isinstance(value, list):\n",
+    "                for num, elem in enumerate(value):\n",
+    "                    for subkey, subvalue in flatten_dict(elem).items():\n",
+    "                        yield '{}.[{}].{}'.format(key, num, subkey), subvalue\n",
+    "            # everything else (only leafs should remain)\n",
+    "            else:\n",
+    "                yield key, value\n",
+    "    return OrderedDict(items())\n",
+    "\n",
+    "import xmltodict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a5d5cbb4-c5a2-44a1-b6a4-277167582869",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with Image.open(fnm, mode=\"r\") as fp:\n",
+    "    fp.load()  # Needed only for .png EXIF data (see citation above)\n",
+    "    if \"MicroscopeControlImage\" in fp.info.keys():\n",
+    "        # print(fp.info[\"MicroscopeControlImage\"])\n",
+    "        xml_content = xmltodict.parse(fp.info[\"MicroscopeControlImage\"])\n",
+    "        flattened_xml = flatten_dict(xml_content)\n",
+    "        for k,v in flattened_xml.items():\n",
+    "            print('{} = {}'.format(k,v))\n",
+    "    elif fnm.lower().endswith(\".png\") is True:  # check for mime type instead\n",
+    "        print(f\"There is no iTXt chunk in {fnm} which has embedded XML within the AXON namespace MicroscopeControlImage!\")\n",
+    "    else:\n",
+    "        print(f\"There is nothing to harvest here!\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4f4bf73d-66b7-414b-abb1-db99b2bf370a",
+   "metadata": {},
+   "source": [
+    "***"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1963afb6-6e48-4628-a0e8-d2da0874701e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# handle TIFF\n",
+    "with Image.open(fnm, mode=\"r\") as fp:\n",
+    "    for key in fp.tag_v2:\n",
+    "        if key in [34118, 34119]:\n",
+    "            print(type(fp.tag[key]))\n",
+    "            print(len(fp.tag[key]))        \n",
+    "            # print(f\"{key}, {fp.tag[key]}\")\n",
+    "        if key not in TAGS.keys():\n",
+    "            print(f\"--->tag {key}, is not in PIL.TiffTAGS !\")\n",
+    "    # self.tags = {TAGS[key] : fp.tag[key] for key in fp.tag_v2}\n",
+    "    # for key, val in self.tags.items():\n",
+    "    #     print(f\"{key}, {val}\")\n",
+    "    nparr = np.array(fp)\n",
+    "    print(f\"{type(nparr)}\")\n",
+    "    print(f\"{nparr.dtype}\")\n",
+    "    print(f\"{np.shape(nparr)}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a9ef2a35-a260-4a54-9b83-eae1d588966f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with Image.open(fnm, mode=\"r\") as fp:\n",
+    "    if True is False:\n",
+    "        czi_keys = [34118, 34119]\n",
+    "        for czi_key in czi_keys:\n",
+    "            if czi_key in fp.tag_v2:\n",
+    "                print(f\"Found czi_key {tfs_key}...\")\n",
+    "                utf = fp.tag[czi_key]\n",
+    "                print(type(utf))\n",
+    "                if len(utf) == 1:\n",
+    "                    print(utf[0])\n",
+    "    # exit(1)\n",
+    "    tfs_keys = [34682]\n",
+    "    for tfs_key in tfs_keys:\n",
+    "        if tfs_key in fp.tag_v2:\n",
+    "            print(f\"Found tfs_key {tfs_key}...\")\n",
+    "            utf = fp.tag[tfs_key]\n",
+    "            print(type(utf))\n",
+    "            if len(utf) == 1:\n",
+    "                print(utf[0])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "28687c0e-6f14-484c-b511-3a4906d9672e",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a8ada062-e308-4288-8f00-b3e620f3c890",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "# https://www.geeksforgeeks.org/python-program-to-sort-a-list-of-tuples-by-second-item/\n",
+    "def sort_tuple(tup):\n",
+    "    # convert the list of tuples to a numpy array with data type (object, int)\n",
+    "    arr = np.array(tup, dtype=[('col1', object), ('col2', int)])\n",
+    "    # get the indices that would sort the array based on the second column\n",
+    "    indices = np.argsort(arr['col2'])\n",
+    "    # use the resulting indices to sort the array\n",
+    "    sorted_arr = arr[indices]\n",
+    "    # convert the sorted numpy array back to a list of tuples\n",
+    "    sorted_tup = [(row['col1'], row['col2']) for row in sorted_arr]\n",
+    "    return sorted_tup"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d27df293-626c-4d37-80df-96c182d4f401",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def if_str_represents_float(s):\n",
+    "    try:\n",
+    "        return isinstance(float(s), float)\n",
+    "        # return str(float(s)) == s\n",
+    "    except ValueError:\n",
+    "        return False"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f647fa79-330b-48b2-8360-f92fc5ead187",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\"10\".isdigit()\n",
+    "# isinstance(float(\"8.99306e-010\"), float)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1a2f0864-f8b3-4d53-bf9d-08a5787c32fb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# TFS sections based on IKZ ALN_baoh_021.tif example\n",
+    "import mmap\n",
+    "\n",
+    "tfs_section_names = [\"[User]\",\n",
+    "                     \"[System]\",\n",
+    "                     \"[Beam]\",\n",
+    "                     \"[EBeam]\",                 \n",
+    "                     \"[GIS]\",\n",
+    "                     \"[Scan]\",\n",
+    "                     \"[EScan]\",\n",
+    "                     \"[Stage]\",\n",
+    "                     \"[Image]\",\n",
+    "                     \"[Vacuum]\",\n",
+    "                     \"[Specimen]\",\n",
+    "                     \"[Detectors]\",\n",
+    "                     \"[T2]\",\n",
+    "                     \"[Accessories]\",\n",
+    "                     \"[EBeamDeceleration]\",\n",
+    "                     \"[CompoundLensFilter]\",\n",
+    "                     \"[PrivateFei]\",\n",
+    "                     \"[HiResIllumination]\",\n",
+    "                     \"[EasyLift]\",\n",
+    "                     \"[HotStageMEMS]\",\n",
+    "                     \"[HotStage]\",\n",
+    "                     \"[HotStageHVHS]\",\n",
+    "                     \"[ColdStage]\"]\n",
+    "\n",
+    "tfs_section_details = {\"[System]\": [\"Type\", \"Dnumber\", \"Software\", \"BuildNr\", \"Source\", \"Column\", \"FinalLens\", \"Chamber\", \"Stage\", \"Pump\",\n",
+    "              \"ESEM\", \"Aperture\", \"Scan\", \"Acq\", \"EucWD\", \"SystemType\", \"DisplayWidth\", \"DisplayHeight\"]}\n",
+    "tfs_section_offsets = {}\n",
+    "\n",
+    "with open(fnm, 'rb', 0) as file:\n",
+    "    s = mmap.mmap(file.fileno(), 0, access=mmap.ACCESS_READ)\n",
+    "    for section_name in tfs_section_names:\n",
+    "        pos = s.find(bytes(section_name, \"utf8\"))  # != -1\n",
+    "        tfs_section_offsets[section_name] = pos\n",
+    "    print(tfs_section_offsets)\n",
+    "\n",
+    "    # define search offsets\n",
+    "    tpl = []\n",
+    "    for key, value in tfs_section_offsets.items():\n",
+    "        tpl.append((key, value))\n",
+    "    # print(tpl)\n",
+    "    tpl = sort_tuple(tpl)\n",
+    "    print(tpl)\n",
+    "    # if section_name == \"[System]\":\n",
+    "    pos_s = None\n",
+    "    pos_e = None\n",
+    "    for idx in np.arange(0, len(tpl)):\n",
+    "        if tpl[idx][0] != \"[System]\":\n",
+    "            continue\n",
+    "        else:\n",
+    "            pos_s = tpl[idx][1]\n",
+    "            if idx <= len(tpl) - 1:\n",
+    "                pos_e = tpl[idx + 1][1]\n",
+    "            break\n",
+    "    print(f\"Search in between byte offsets {pos_s} and {pos_e}\")\n",
+    "    # fish metadata of e.g. the system section\n",
+    "    section_metadata = {}\n",
+    "    for term in tfs_section_details[\"[System]\"]:\n",
+    "        \n",
+    "        s.seek(pos_s, 0)\n",
+    "        pos = s.find(bytes(term, \"utf8\"))\n",
+    "        if pos < pos_e:  # check if pos_e is None\n",
+    "            s.seek(pos, 0)\n",
+    "            section_metadata[f\"{term}\"] = f\"{s.readline().strip().decode('utf8').replace(f'{term}=', '')}\"\n",
+    "            if if_str_represents_float(section_metadata[f\"{term}\"]) is True:\n",
+    "                section_metadata[f\"{term}\"] = np.float64(section_metadata[f\"{term}\"])\n",
+    "            elif section_metadata[f\"{term}\"].isdigit() is True:\n",
+    "                section_metadata[f\"{term}\"] = np.int64(section_metadata[f\"{term}\"])\n",
+    "            else:\n",
+    "                pass\n",
+    "            # print(f\"{term}, {pos}, {pos + len(term) + 1}\")\n",
+    "    #        tfs_section_offswr\n",
+    "    #        file.seek(pos, 0)  #\n",
+    "    print(section_metadata)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2f3eb287-8f55-424c-a016-a07fc59f068a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "'2'.isdigit()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c1341e30-fcce-4a3d-a099-d342b8bbe318",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/delete/test.ebsd.sh b/delete/test.ebsd.sh
new file mode 100755
index 000000000..4730b1d5b
--- /dev/null
+++ b/delete/test.ebsd.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+src_prefix="~/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/production_ebsd"
+src_pyxem="${src_prefix}_pyxem"
+src_mtex="${src_prefix}_mtex"
+echo "$src_prefix"
+echo "$src_pyxem"
+echo "$src_mtex"
+ohfiles="${src_pyxem}/*.oh5"
+cmd="ls ${ohfiles}"
+$cmd
+

From 518a706d4b9cb5f5d6ade8528320f47b7684dadd Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@physik.hu-berlin.de>
Date: Fri, 12 Jan 2024 13:30:07 +0100
Subject: [PATCH 68/84] Reading and normalizing FOV, SPC, SPD, and LSD, next
 steps: i) add  elementmap

---
 .../readers/em/concepts/nxs_image_r_set.py    |   1 +
 .../readers/em/concepts/nxs_spectrum_set.py   |  47 +++-
 .../readers/em/subparsers/hfive_apex.py       | 230 +++++++++++++++---
 3 files changed, 238 insertions(+), 40 deletions(-)

diff --git a/pynxtools/dataconverter/readers/em/concepts/nxs_image_r_set.py b/pynxtools/dataconverter/readers/em/concepts/nxs_image_r_set.py
index fbdc311e7..bc8e675a8 100644
--- a/pynxtools/dataconverter/readers/em/concepts/nxs_image_r_set.py
+++ b/pynxtools/dataconverter/readers/em/concepts/nxs_image_r_set.py
@@ -66,6 +66,7 @@
 class NxImageRealSpaceSet():
     def __init__(self):
         self.tmp: Dict = {}
+        self.tmp["source"] = None
         for entry in NX_IMAGE_REAL_SPACE_SET_HDF_PATH:
             if entry.endswith("-field") is True:
                 self.tmp[entry[0:len(entry) - len("-field")]] = NxObject(eqv_hdf="dataset")
diff --git a/pynxtools/dataconverter/readers/em/concepts/nxs_spectrum_set.py b/pynxtools/dataconverter/readers/em/concepts/nxs_spectrum_set.py
index e1bed0f39..077e30648 100644
--- a/pynxtools/dataconverter/readers/em/concepts/nxs_spectrum_set.py
+++ b/pynxtools/dataconverter/readers/em/concepts/nxs_spectrum_set.py
@@ -25,13 +25,56 @@
 from pynxtools.dataconverter.readers.em.concepts.nxs_object import NxObject
 
 
-NX_SPECTRUM_SET_HDF_PATH: List = []
-# this one needs an update !
+NX_SPECTRUM_SET_HDF_PATH: List = ["collection-group",
+                                  "collection/axis_energy-field",
+                                  "collection/axis_energy@long_name-attribute",
+                                  "collection/axis_scan_point_id-field",
+                                  "collection/axis_scan_point_id@long_name-attribute",
+                                  "collection/intensity-field",
+                                  "collection/intensity@long_name-attribute",
+                                  "PROCESS-group",
+                                  "PROCESS/detector_identifier-field",
+                                  "PROCESS/mode-field",
+                                  "PROCESS/PROGRAM-group",
+                                  "PROCESS/source-group",
+                                  "spectrum_zerod-group",
+                                  "spectrum_zerod/axis_energy-field",
+                                  "spectrum_zerod/axis_energy@long_name-attribute",
+                                  "spectrum_zerod/intensity-field",
+                                  "spectrum_zerod/intensity@long_name-attribute",
+                                  "spectrum_oned-group",
+                                  "spectrum_oned/axis_energy-field",
+                                  "spectrum_oned/axis_energy@long_name-attribute",
+                                  "spectrum_oned/axis_x-field",
+                                  "spectrum_oned/axis_x@long_name-attribute",
+                                  "spectrum_oned/intensity-field",
+                                  "spectrum_oned/intensity@long_name-attribute",
+                                  "spectrum_threed-group",
+                                  "spectrum_threed/axis_energy-field",
+                                  "spectrum_threed/axis_energy@long_name-attribute",
+                                  "spectrum_threed/axis_x-field",
+                                  "spectrum_threed/axis_x@long_name-attribute",
+                                  "spectrum_threed/axis_y-field",
+                                  "spectrum_threed/axis_y@long_name-attribute",
+                                  "spectrum_threed/axis_z-field",
+                                  "spectrum_threed/axis_z@long_name-attribute",
+                                  "spectrum_threed/intensity-field",
+                                  "spectrum_threed/intensity@long_name-attribute",
+                                  "spectrum_twod-group",
+                                  "spectrum_twod/axis_energy-field",
+                                  "spectrum_twod/axis_energy@long_name-attribute",
+                                  "spectrum_twod/axis_x-field",
+                                  "spectrum_twod/axis_x@long_name-attribute",
+                                  "spectrum_twod/axis_y-field",
+                                  "spectrum_twod/axis_y@long_name-attribute",
+                                  "spectrum_twod/intensity-field",
+                                  "spectrum_twod/intensity@long_name-attribute"]
 
 
 class NxSpectrumSet():
     def __init__(self):
         self.tmp: Dict = {}
+        self.tmp["source"] = None
         for entry in NX_SPECTRUM_SET_HDF_PATH:
             if entry.endswith("-field") is True:
                 self.tmp[entry[0:len(entry) - len("-field")]] = NxObject(eqv_hdf="dataset")
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
index 67a3868df..2a46a1df7 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
@@ -23,15 +23,18 @@
 from diffpy.structure import Lattice, Structure
 from orix.quaternion import Orientation
 
-from pynxtools.dataconverter.readers.em.subparsers.hfive_base import HdfFiveBaseParser
-from pynxtools.dataconverter.readers.em.utils.hfive_utils import \
-    read_strings_from_dataset
+from pynxtools.dataconverter.readers.em.subparsers.hfive_base \
+    import HdfFiveBaseParser
+from pynxtools.dataconverter.readers.em.utils.hfive_utils \
+    import read_strings_from_dataset
 from pynxtools.dataconverter.readers.em.examples.ebsd_database import \
     ASSUME_PHASE_NAME_TO_SPACE_GROUP, HEXAGONAL_GRID, SQUARE_GRID, REGULAR_TILING, FLIGHT_PLAN
-from pynxtools.dataconverter.readers.em.utils.get_scan_points import \
-    get_scan_point_coords
-from pynxtools.dataconverter.readers.em.concepts.nxs_image_r_set import \
-    NX_IMAGE_REAL_SPACE_SET_HDF_PATH, NxImageRealSpaceSet
+from pynxtools.dataconverter.readers.em.utils.get_scan_points \
+    import get_scan_point_coords
+from pynxtools.dataconverter.readers.em.concepts.nxs_image_r_set \
+    import NxImageRealSpaceSet
+from pynxtools.dataconverter.readers.em.concepts.nxs_spectrum_set \
+    import NxSpectrumSet
 
 
 class HdfFiveEdaxApexReader(HdfFiveBaseParser):
@@ -88,11 +91,11 @@ def parse_and_normalize(self):
                             # get field-of-view (fov in edax jargon, i.e. roi)
                             if "/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}/FOVIMAGE" in h5r.keys():
                                 ckey = self.init_named_cache(f"roi{cache_id}")
-                                self.parse_and_normalize_roi(
+                                self.parse_and_normalize_eds_fov(
                                     h5r, f"/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}/FOVIMAGE", ckey)
                                 cache_id += 1
 
-                            # get oim_maps, live_maps, or line_scans if available
+                            # get oim_maps, live_maps, or full area if available
                             area_grp_nms = list(h5r[f"/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}"])
                             for area_grp_nm in area_grp_nms:
                                 if area_grp_nm.startswith("OIM Map"):
@@ -103,41 +106,32 @@ def parse_and_normalize(self):
                                     self.parse_and_normalize_group_ebsd_phases(h5r, ckey)
                                     self.parse_and_normalize_group_ebsd_data(h5r, ckey)
                                     cache_id += 1
-                                elif area_grp_nm.startswith("Live Map"):
+
+                                for prefix in ["Live Map", "Free Draw", "Full Area"]:
+                                    if not area_grp_nm.startswith(prefix):
+                                        continue
+                                    self.prfx = f"/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}/{area_grp_nm}"
+                                    print(f"Parsing {self.prfx}")
+
+                                    # SPC
                                     self.prfx = f"/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}/{area_grp_nm}"
                                     print(f"Parsing {self.prfx}")
                                     ckey = self.init_named_cache(f"eds{cache_id}")
+                                    self.parse_and_normalize_eds_spc(
+                                        h5r, f"/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}/{area_grp_nm}", ckey)
+                                    cache_id += 1
+
+                                    # SPD
+                                    ckey = self.init_named_cache(f"eds{cache_id}")
                                     self.parse_and_normalize_eds_spd(
                                         h5r, f"/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}/{area_grp_nm}", ckey)
                                     cache_id += 1
 
-    def parse_and_normalize_roi(self, fp, src: str, ckey: str):
-        """Normalize and scale APEX-specific FOV/ROI image to NeXus."""
-        self.tmp[ckey] = NxImageRealSpaceSet()
-        reqs = ["PixelHeight", "PixelWidth"]
-        for req in reqs:
-            if req not in fp[f"{src}/FOVIMAGE"].attrs.keys():
-                # also check for shape
-                raise ValueError(f"Required attribute named {req} not found in {src}/FOVIMAGE !")
-        nyx = {"y": fp[f"{src}/FOVIMAGE"].attrs["PixelHeight"][0],
-               "x": fp[f"{src}/FOVIMAGE"].attrs["PixelWidth"][0]}
-        self.tmp[ckey]["image_twod/intensity"] = np.reshape(np.asarray(fp[f"{src}/FOVIMAGE"]), (nyx["y"], nyx["x"]))
-
-        syx = {"x": 1., "y": 1.}
-        scan_unit = {"x": "px", "y": "px"}
-        if f"{src}/FOVIMAGECOLLECTIONPARAMS" in fp.keys():
-            ipr = np.asarray(fp[f"{src}/FOVIPR"])
-            syx = {"x": ipr["MicronsPerPixelX"][0], "y": ipr["MicronsPerPixelY"][0]}
-            scan_unit = {"x": "µm", "y": "µm"}
-        dims = ["y", "x"]
-        for dim in dims:
-            self.tmp[ckey].tmp[f"image_twod/axis_{dim}"] = np.asarray(
-                np.linspace(0, nyx[dim] - 1, num=nyx[dim], endpoint=True) * syx[dim], np.float64)
-            self.tmp[ckey].tmp[f"image_twod/axis_{dim}@long_name"] \
-                = f"Calibrated pixel position along {dim} ({scan_unit[dim]})"
-        for key, val in self.tmp[ckey].tmp.items():
-            if key.startswith("image_twod"):
-                print(f"{key}, {val}")
+                                if area_grp_nm.startswith("LineScan"):
+                                    ckey = self.init_named_cache(f"eds{cache_id}")
+                                    self.parse_and_normalize_eds_lsd(
+                                        h5r, f"/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}/{area_grp_nm}", ckey)
+                                    cache_id += 1
 
     def parse_and_normalize_group_ebsd_header(self, fp, ckey: str):
         # no official documentation yet from EDAX/APEX, deeply nested, chunking, virtual ds
@@ -311,9 +305,86 @@ def parse_and_normalize_group_ebsd_data(self, fp, ckey: str):
         # almost two decades of commercialization of the technique now
         get_scan_point_coords(self.tmp[ckey])
 
+    def parse_and_normalize_eds_fov(self, fp, src: str, ckey: str):
+        """Normalize and scale APEX-specific FOV/ROI image to NeXus."""
+        reqs = ["FOVIMAGE", "FOVIMAGECOLLECTIONPARAMS", "FOVIPR"]
+        for req in reqs:
+            if f"{src}/{req}" not in fp.keys():
+                return
+        reqs = ["PixelHeight", "PixelWidth"]
+        for req in reqs:
+            if req not in fp[f"{src}/FOVIMAGE"].attrs.keys():  # also check for shape
+                raise ValueError(f"Required attribute named {req} not found in {src}/FOVIMAGE !")
+        reqs = ["MicronsPerPixelX", "MicronsPerPixelY"]
+        for req in reqs:
+            if req not in fp[f"{src}/FOVIPR"].attrs.keys():
+                raise ValueError(f"Required attribute named {req} not found in {src}/FOVIPR !")
+
+        self.tmp[ckey] = NxImageRealSpaceSet()
+        self.tmp[ckey].tmp["source"] = f"{src}/FOVIMAGE"
+        nyx = {"y": fp[f"{src}/FOVIMAGE"].attrs["PixelHeight"][0],
+               "x": fp[f"{src}/FOVIMAGE"].attrs["PixelWidth"][0]}
+        syx = {"x": fp[f"{src}/FOVIPR"].attrs["MicronsPerPixelX"][0],
+               "y": fp[f"{src}/FOVIPR"].attrs["MicronsPerPixelY"][0]}
+        scan_unit = {"x": "µm", "y": "µm"}
+        # is micron because MicronsPerPixel{dim} used by EDAX
+        self.tmp[ckey].tmp["image_twod/intensity"].value \
+            = np.reshape(np.asarray(fp[f"{src}/FOVIMAGE"]), (nyx["y"], nyx["x"]))
+
+        dims = ["y", "x"]
+        for dim in dims:
+            self.tmp[ckey].tmp[f"image_twod/axis_{dim}"].value \
+                = np.asarray(0. + np.linspace(0.,
+                                              nyx[dim] - 1,
+                                              num=nyx[dim],
+                                              endpoint=True) * syx[dim],
+                             syx["x"].dtype)
+            self.tmp[ckey].tmp[f"image_twod/axis_{dim}@long_name"].value \
+                = f"Position along {dim} ({scan_unit[dim]})"
+        for key, val in self.tmp[ckey].tmp.items():
+            if key.startswith("image_twod"):
+                print(f"image_twod, key: {key}, val: {val}")
+
+    def parse_and_normalize_eds_spc(self, fp, src: str, ckey: str):
+        """Normalize and scale APEX-specific SPC (sum) spectrum to NeXus."""
+        # https://hyperspy.org/rosettasciio/_downloads/
+        # 9e2f0ccf5287bb2d17f1b7550e1d626f/SPECTRUM-V70.pdf
+        if f"{src}/SPC" not in fp.keys():
+            return
+        if "NumberOfLines" in fp[f"{src}/SPC"].attrs.keys():
+            return
+        reqs = ["eVOffset", "evPch", "NumberOfPoints", "SpectrumCount"]
+        for req in reqs:
+            if req not in fp[f"{src}/SPC"].attrs.keys():  # also check for shape
+                raise ValueError(f"Required attribute named {req} not found in {src}/SPC !")
+
+        self.tmp[ckey] = NxSpectrumSet()
+        self.tmp[ckey].tmp["source"] = f"{src}/SPC"
+        e_zero = fp[f"{src}/SPC"].attrs["eVOffset"][0]
+        e_delta = fp[f"{src}/SPC"].attrs["eVPCh"][0]
+        e_n = fp[f"{src}/SPC"].attrs["NumberOfPoints"][0]
+        self.tmp[ckey].tmp["spectrum_zerod/axis_energy"].value \
+            = e_zero + np.asarray(e_delta * np.linspace(0.,
+                                                        int(e_n) - 1,
+                                                        num=int(e_n),
+                                                        endpoint=True),
+                                  e_zero.dtype)
+        self.tmp[ckey].tmp["spectrum_zerod/axis_energy@long_name"].value \
+            = "Energy (eV)"
+        self.tmp[ckey].tmp["spectrum_zerod/intensity"].value \
+            = np.asarray(fp[f"{src}/SPC"].attrs["SpectrumCount"][0], np.int32)
+        self.tmp[ckey].tmp["spectrum_zerod/intensity@long_name"].value \
+            = f"Count (1)"
+        for key, val in self.tmp[ckey].tmp.items():
+            if key.startswith("spectrum_zerod"):
+                print(f"spectrum_zerod, key: {key}, val: {val}")
+
     def parse_and_normalize_eds_spd(self, fp, src: str, ckey: str):
+        """Normalize and scale APEX-specific spectrum cuboid to NeXus."""
+        # https://hyperspy.org/rosettasciio/_downloads/
+        # c2e8b23d511a3c44fc30c69114e2873e/SpcMap-spd.file.format.pdf
         if f"{src}/SPD" not in fp.keys():
-            return None
+            return
         reqs = ["MicronPerPixelX",
                 "MicronPerPixelY",
                 "NumberOfLines",
@@ -323,6 +394,8 @@ def parse_and_normalize_eds_spd(self, fp, src: str, ckey: str):
             if req not in fp[f"{src}/SPD"].attrs.keys():  # also check for shape
                 raise ValueError(f"Required attribute named {req} not found in {src}/SPD !")
 
+        self.tmp[ckey] = NxSpectrumSet()
+        self.tmp[ckey].tmp["source"] = f"{src}/SPD"
         nyxe = {"y": fp[f"{src}/SPD"].attrs["NumberOfLines"][0],
                 "x": fp[f"{src}/SPD"].attrs["NumberOfPoints"][0],
                 "e": fp[f"{src}/SPD"].attrs["NumberofChannels"][0]}
@@ -359,3 +432,84 @@ def parse_and_normalize_eds_spd(self, fp, src: str, ckey: str):
         # content in the chunk cache is discarded plus we may end up reading a substantially
         # more times from the file, tested this on a Samsung 990 2TB pro-SSD for a tiny 400 x 512 SPD:
         # above strategy 2s, versus hours! required to read and reshape the spectrum via naive I/O
+        # TODO:: e.g the IKZ example VInP_108_L2 has a Area 10/Live Map 1/SPD but it has
+        # no calibration data of the spectrum stack, which is significant as Live Map 1
+        # has a child SPC but here the number of channels is 4096 while for SPD the
+        # number of channels is 1000
+        # one could inspect the location of identified peaks from an SPC instance and
+        # thus infer (assuming linearly spaced energy channels) the range but as this is
+        # pure speculation and because of the fact that not even the SPD file format
+        # specification details the metadata, i.e. energy per channel, start and end
+        # we do not use the SPD instance right now
+
+    def parse_and_normalize_eds_lsd(self, fp, src: str, ckey: str):
+        """Normalize and scale APEX-specific line scan with one spectrum each to NeXus."""
+        # https://hyperspy.org/rosettasciio/_downloads/
+        # c2e8b23d511a3c44fc30c69114e2873e/SpcMap-spd.file.format.pdf
+        # ../Region/Step = ..Region(deltaXY) * ../IPR/mmField(Width/Height) !
+        # ../Region attributes detail the calibrated position of the line in the image
+        # we need to collect pieces of information from various places to contextualize
+        # the absolute location of the line grid in the image of this LineScan group
+        # and to get the spectra right
+        # TODO: this can be an arbitrary free form line, right?
+        reqs = ["LSD", "SPC", "REGION", "LINEIMAGECOLLECTIONPARAMS"]
+        for req in reqs:
+            if f"{src}/{req}" not in fp.keys():
+                return
+        reqs = ["NumberOfSpectra",
+                "NumberofChannels"]  # TODO: mind the typo here, can break parsing easily!
+        for req in reqs:  # also check all following four for shape
+            if req not in fp[f"{src}/LSD"].attrs.keys():
+                raise ValueError(f"Required attribute named {req} not found in {src}/LSD !")
+        reqs = ["evPCh"]
+        for req in reqs:
+            if req not in fp[f"{src}/SPC"].attrs.keys():
+                raise ValueError(f"Required attribute named {req} not found in {src}/SPC !")
+        reqs = ["Step","X1", "X2", "Y1", "Y2"]
+        for req in reqs:
+            if req not in fp[f"{src}/REGION"].attrs.keys():
+                raise ValueError(f"Required attribute named {req} not found in {src}/REGION !")
+        reqs = ["mmFieldWidth", "mmFieldHeight"]  # mm as the name implies
+        for req in reqs:
+            if req not in fp[f"{src}/LINEMAPIMAGECOLLECTIONPARAMS"].attrs.keys():
+                raise ValueError(f"Required attribute named {req} not found "
+                                 f"in {src}/LINEMAPIMAGECOLLECTIONPARAMS !")
+
+        self.tmp[ckey] = NxSpectrumSet()
+        self.tmp[ckey].tmp["source"] = f"{src}/LSD"
+        e_zero = 0.  # strong assumption based on VInP_108_L2 example from IKZ
+        e_delta = fp[f"{src}/SPC"].attrs["eVPCh"][0]
+        e_n = fp[f"{src}/LSD"].attrs["NumberofChannels"][0]
+        self.tmp[ckey].tmp["spectrum_oned/axis_energy"].value \
+            = e_zero + np.asarray(e_delta * np.linspace(0.,
+                                                        int(e_n) - 1,
+                                                        num=int(e_n),
+                                                        endpoint=True),
+                                  e_zero.dtype)
+        self.tmp[ckey].tmp["spectrum_oned/axis_energy@long_name"].value \
+            = "Energy (eV)"
+
+        # vector representation of the line's physical length from mm to µm
+        line = np.asarray([
+            (fp[f"{src}/REGION"].attrs["X2"][0] - fp[f"{src}/REGION"].attrs["X1"][0]) \
+            * fp[f"{src}/LINEMAPIMAGECOLLECTIONPARAMS"].attrs["mmFieldWidth"] * 1000.,
+            (fp[f"{src}/REGION"].attrs["Y2"][0] - fp[f"{src}/REGION"].attrs["Y1"][0]) \
+            * fp[f"{src}/LINEMAPIMAGECOLLECTIONPARAMS"].attrs["mmFieldHeight"] * 1000.])
+        i_n = fp[f"{src}/LSD"].attrs["NumberOfSpectra"][0]
+        line_length = np.sqrt(line[0]**2 + line[1]**2)
+        line_incr = line_length / i_n
+        self.tmp[ckey].tmp["spectrum_oned/axis_x"].value \
+            = np.asarray(np.linspace(0.5 * line_incr,
+                                     line_length,
+                                     num=i_n,
+                                     endpoint=True),
+                         fp[f"{src}/REGION"].attrs["X2"][0].dtype)
+        self.tmp[ckey].tmp["spectrum_oned/axis_x@long_name"] \
+            = "Position along the line (µm)"
+        self.tmp[ckey].tmp["spectrum_oned/intensity"].value \
+            = np.asarray(fp[f"{src}/LSD"][0], np.int32)
+        self.tmp[ckey].tmp["spectrum_oned/intensity@long_name"].value \
+            = f"Count (1)"
+        for key, val in self.tmp[ckey].tmp.items():
+            if key.startswith("spectrum_oned"):
+                print(f"spectrum_oned, key: {key}, val: {val}")

From baa02a6d8ae589aa43b92fb50757d6f246f9b2b7 Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@physik.hu-berlin.de>
Date: Fri, 12 Jan 2024 14:32:03 +0100
Subject: [PATCH 69/84] =?UTF-8?q?Added=20other=20relevant=20conceptual=20g?=
 =?UTF-8?q?roups=20from=20the=20three=20APEX=20examples=20shared=20VInP=5F?=
 =?UTF-8?q?108=5FL2=20(IKZ=20Br=C3=BCckner,=20Bergmann),=20GeSi=20(IKZ=20K?=
 =?UTF-8?q?ernke),=20InP=20(PDI,=20laehnemann)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../readers/em/subparsers/hfive_apex.py       | 76 ++++++++++++++++++-
 1 file changed, 72 insertions(+), 4 deletions(-)

diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
index 2a46a1df7..5707c49b3 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
@@ -107,9 +107,72 @@ def parse_and_normalize(self):
                                     self.parse_and_normalize_group_ebsd_data(h5r, ckey)
                                     cache_id += 1
 
-                                for prefix in ["Live Map", "Free Draw", "Full Area"]:
-                                    if not area_grp_nm.startswith(prefix):
-                                        continue
+                                # TODO: conceptually the content of the three
+                                # above-mentioned groups has and uses for some
+                                # groups the same formatting but fundamentally I assume
+                                # that they are three different concepts:
+                                # free draw polygonal region choosen via GUI interaction
+                                #   over which one integrates
+                                # full area rectangular region typically used
+                                # i.e. difference between free draw and full area
+                                #    is integration region
+                                # live map rectangular region plus child concepts
+                                # with (sum) spectrum SPC, spectrum stack (SPD)
+                                # with eventually different number of energy bins and
+                                # Live Map */ROIs for the individual elements aka
+                                # "element mappings"
+                                # TODO: LIVENETMAPS groups are not parsed cuz not requested
+                                # TODO: EBSD+EDS groups are not parsed cuz internal structure
+                                # TODO: ZAF WtLineScan 2
+                                #   mirrors concept tree behind an OIM Map and Live Map
+                                if area_grp_nm.startswith("Full Area") \
+                                        or area_grp_nm.startswith("Selected Area"):
+                                    # TODO: Selected Area groups have a REGION and I assume that this
+                                    # is the use case when one filters from the FOV a sub-set but
+                                    # not a free-form but a rectangular sub-FOV this is also substantiated
+                                    # by the metadata stored in region (x,y) pair (likely upperleft edge)
+                                    # and relative width/height of the sub-FOV
+                                    # also supported in that Full Area has a region with (x,y) 0,0
+                                    # and relative width/height 1./1.
+                                    self.prfx = f"/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}/{area_grp_nm}"
+                                    print(f"Parsing {self.prfx}")
+
+                                    # SPC
+                                    self.prfx = f"/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}/{area_grp_nm}"
+                                    print(f"Parsing {self.prfx}")
+                                    ckey = self.init_named_cache(f"eds{cache_id}")
+                                    self.parse_and_normalize_eds_spc(
+                                        h5r, f"/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}/{area_grp_nm}", ckey)
+                                    cache_id += 1
+
+                                # there is a oned equivalent of the twod Free Draw called EDS Spot
+                                if area_grp_nm.startswith("EDS Spot"):
+                                    self.prfx = f"/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}/{area_grp_nm}"
+                                    print(f"Parsing {self.prfx}")
+
+                                    # TODO: parse ../REGION x,y coordinate pair (relative coordinate)
+                                    # with respect to parent FOV, SPC
+                                    self.prfx = f"/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}/{area_grp_nm}"
+                                    print(f"Parsing {self.prfx}")
+                                    ckey = self.init_named_cache(f"eds{cache_id}")
+                                    self.parse_and_normalize_eds_spc(
+                                        h5r, f"/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}/{area_grp_nm}", ckey)
+                                    cache_id += 1
+
+                                if area_grp_nm.startswith("Free Draw"):
+                                    self.prfx = f"/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}/{area_grp_nm}"
+                                    print(f"Parsing {self.prfx}")
+
+                                    # TODO: parse ../REGION x,y table (relative coordinate)
+                                    # with respect to parent FOV, SPC
+                                    self.prfx = f"/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}/{area_grp_nm}"
+                                    print(f"Parsing {self.prfx}")
+                                    ckey = self.init_named_cache(f"eds{cache_id}")
+                                    self.parse_and_normalize_eds_spc(
+                                        h5r, f"/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}/{area_grp_nm}", ckey)
+                                    cache_id += 1
+
+                                if area_grp_nm.startswith("Live Map"):
                                     self.prfx = f"/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}/{area_grp_nm}"
                                     print(f"Parsing {self.prfx}")
 
@@ -127,7 +190,12 @@ def parse_and_normalize(self):
                                         h5r, f"/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}/{area_grp_nm}", ckey)
                                     cache_id += 1
 
-                                if area_grp_nm.startswith("LineScan"):
+                                if area_grp_nm.startswith("LineScan") \
+                                        or area_grp_nm.startswith("ROILineScan"):
+                                    # "free form? or (which I assume) orthogonal line grid inside the FOV
+                                    # TODO::currently I assume that the internal organization of LineScan and ROILineScan
+                                    # groups is the same TODO but maybe the physical ROI which they reference
+                                    # respective differs (TODO:: LineScan refers to FOV that is in the parent of the group)
                                     ckey = self.init_named_cache(f"eds{cache_id}")
                                     self.parse_and_normalize_eds_lsd(
                                         h5r, f"/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}/{area_grp_nm}", ckey)

From bd4edec8f69625c0ad1753d8576cc698d7202e74 Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@physik.hu-berlin.de>
Date: Fri, 12 Jan 2024 15:40:00 +0100
Subject: [PATCH 70/84] Added xraydb to get all X-ray emission lines

---
 dev-requirements.txt                          | 11 ++++
 .../readers/em/concepts/nxs_concepts.py       | 12 ++++-
 .../em/concepts/nxs_em_eds_indexing.py        | 54 +++++++++++++++++++
 .../readers/em/concepts/nxs_image_r_set.py    |  3 --
 .../readers/em/concepts/nxs_spectrum_set.py   |  3 --
 .../readers/em/subparsers/hfive_apex.py       |  9 ++++
 pyproject.toml                                |  7 +--
 7 files changed, 89 insertions(+), 10 deletions(-)
 create mode 100644 pynxtools/dataconverter/readers/em/concepts/nxs_em_eds_indexing.py

diff --git a/dev-requirements.txt b/dev-requirements.txt
index dd12c799b..733d5cb2f 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -108,6 +108,8 @@ gitdb==4.0.11
     # via gitpython
 gitpython==3.1.40
     # via pynxtools (pyproject.toml)
+greenlet==3.0.3
+    # via sqlalchemy
 h5py==3.10.0
     # via
     #   fabio
@@ -297,6 +299,7 @@ numpy==1.24.4
     #   tables
     #   tifffile
     #   xarray
+    #   xraydb
     #   zarr
 numpy-quaternion==2022.4.3
     # via orix
@@ -319,6 +322,7 @@ packaging==23.2
     #   scikit-image
     #   tables
     #   xarray
+    #   xraydb
 pandas==2.0.3
     # via
     #   ifes-apt-tc-data-modeling
@@ -352,6 +356,7 @@ platformdirs==4.0.0
     #   pooch
     #   pylint
     #   requests-cache
+    #   xraydb
 pluggy==1.3.0
     # via pytest
 ply==3.11
@@ -470,6 +475,7 @@ scipy==1.9.1
     #   scikit-image
     #   scikit-learn
     #   sparse
+    #   xraydb
 silx==1.1.2
     # via pyfai
 six==1.16.0
@@ -482,6 +488,8 @@ smmap==5.0.1
     # via gitdb
 sparse==0.14.0
     # via hyperspy
+sqlalchemy==2.0.25
+    # via xraydb
 stack-data==0.6.3
     # via ipython
 structlog==23.2.0
@@ -555,6 +563,7 @@ typing-extensions==4.8.0
     #   astroid
     #   cattrs
     #   mypy
+    #   sqlalchemy
 tzdata==2023.3
     # via
     #   pandas
@@ -582,6 +591,8 @@ xarray==2023.1.0
     # via pynxtools (pyproject.toml)
 xmltodict==0.13.0
     # via pynxtools (pyproject.toml)
+xraydb==4.5.4
+    # via pynxtools (pyproject.toml)
 zarr==2.16.1
     # via hyperspy
 zipp==3.17.0
diff --git a/pynxtools/dataconverter/readers/em/concepts/nxs_concepts.py b/pynxtools/dataconverter/readers/em/concepts/nxs_concepts.py
index 33716e2d2..09e00228b 100644
--- a/pynxtools/dataconverter/readers/em/concepts/nxs_concepts.py
+++ b/pynxtools/dataconverter/readers/em/concepts/nxs_concepts.py
@@ -17,7 +17,7 @@
 #
 """Implement NeXus-specific groups and fields to document software and versions used."""
 
-# pylint: disable=no-member
+# pylint: disable=no-member,too-few-members
 
 from typing import List
 from pynxtools.dataconverter.readers.em.concepts.concept_mapper \
@@ -59,3 +59,13 @@ def parse(self, template: dict, entry_id: int = 1, cmd_line_args: List = []) ->
             template["/cs_profiling/@NX_class"] = "NXcs_profiling"
             template["/cs_profiling/command_line_call"] = cmd_line_args
         return template
+
+
+class NxConcept():
+    """"Define a NeXus concept object to handle paths.
+
+    """
+    def __init__(self, hdf_paths: List = []):
+        # TODO::remove redundant code for instantiating specific NxConcepts like
+        # NxSpectrum, NxImageRealSpaceSet, NxEmEdsIndexing
+        pass
diff --git a/pynxtools/dataconverter/readers/em/concepts/nxs_em_eds_indexing.py b/pynxtools/dataconverter/readers/em/concepts/nxs_em_eds_indexing.py
new file mode 100644
index 000000000..0ab4bb3ec
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/concepts/nxs_em_eds_indexing.py
@@ -0,0 +1,54 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""NXem_eds indexing instance data."""
+
+from typing import Dict, List
+
+from pynxtools.dataconverter.readers.em.concepts.nxs_object import NxObject
+
+
+NX_EM_EDS_INDEXING_HDF_PATH = ["indexing-group",
+                               "indexing/element_names-field",
+                               "indexing/IMAGE_R_SET-group",
+                               "indexing/IMAGE_R_SET/PROCESS-group",
+                               "indexing/IMAGE_R_SET/PROCESS/peaks-field",
+                               "indexing/IMAGE_R_SET/PROCESS/weights-field",
+                               "indexing/PEAK-group",
+                               "indexing/PEAK/ION-group",
+                               "indexing/PEAK/ION/energy-field",
+                               "indexing/PEAK/ION/energy_range-field",
+                               "indexing/PEAK/ION/iupac_line_names-field",
+                               "indexing/PROGRAM-group",
+                               "indexing/summary-group",
+                               "indexing/summary/axis_energy-field",
+                               "indexing/summary/axis_energy@long_name-attribute",
+                               "indexing/summary/intensity-field",
+                               "indexing/summary/intensity@long_name-attribute"]
+
+
+class NxEmEdsIndexing():
+    def __init__(self):
+        self.tmp: Dict = {}
+        self.tmp["source"] = None
+        for entry in NX_EM_EDS_INDEXING_HDF_PATH:
+            if entry.endswith("-field") is True:
+                self.tmp[entry[0:len(entry) - len("-field")]] = NxObject(eqv_hdf="dataset")
+            elif entry.endswith("-attribute") is True:
+                self.tmp[entry[0:len(entry) - len("-attribute")]] = NxObject(eqv_hdf="attribute")
+            else:
+                self.tmp[entry[0:len(entry) - len("-group")]] = NxObject(eqv_hdf="group")
diff --git a/pynxtools/dataconverter/readers/em/concepts/nxs_image_r_set.py b/pynxtools/dataconverter/readers/em/concepts/nxs_image_r_set.py
index bc8e675a8..4a2ca2e58 100644
--- a/pynxtools/dataconverter/readers/em/concepts/nxs_image_r_set.py
+++ b/pynxtools/dataconverter/readers/em/concepts/nxs_image_r_set.py
@@ -17,9 +17,6 @@
 #
 """NXem spectrum set (element of a labelled property graph) to store instance data."""
 
-# pylint: disable=no-member,too-few-public-methods
-
-
 from typing import Dict
 
 from pynxtools.dataconverter.readers.em.concepts.nxs_object import NxObject
diff --git a/pynxtools/dataconverter/readers/em/concepts/nxs_spectrum_set.py b/pynxtools/dataconverter/readers/em/concepts/nxs_spectrum_set.py
index 077e30648..0e09e4ec9 100644
--- a/pynxtools/dataconverter/readers/em/concepts/nxs_spectrum_set.py
+++ b/pynxtools/dataconverter/readers/em/concepts/nxs_spectrum_set.py
@@ -17,9 +17,6 @@
 #
 """NXem spectrum set (element of a labelled property graph) to store instance data."""
 
-# pylint: disable=no-member,too-few-public-methods
-
-
 from typing import Dict, List
 
 from pynxtools.dataconverter.readers.em.concepts.nxs_object import NxObject
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
index 5707c49b3..6539417ea 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
@@ -201,6 +201,11 @@ def parse_and_normalize(self):
                                         h5r, f"/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}/{area_grp_nm}", ckey)
                                     cache_id += 1
 
+                                    ckey = self.init_named_cache(f"eds_map{cache_id}")
+                                    self.parse_and_normalize_eds_rois(
+                                        h5r, f"/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}/{area_grp_nm}", ckey)
+                                    cache_id += 1
+
     def parse_and_normalize_group_ebsd_header(self, fp, ckey: str):
         # no official documentation yet from EDAX/APEX, deeply nested, chunking, virtual ds
         if f"{self.prfx}/EBSD/ANG/DATA/DATA" not in fp:
@@ -581,3 +586,7 @@ def parse_and_normalize_eds_lsd(self, fp, src: str, ckey: str):
         for key, val in self.tmp[ckey].tmp.items():
             if key.startswith("spectrum_oned"):
                 print(f"spectrum_oned, key: {key}, val: {val}")
+
+    def parse_and_normalize_eds_rois(self, fp, src: str, ckey: str):
+        """Normalize and scale APEX-specific EDS element emission line maps to NeXus."""
+
diff --git a/pyproject.toml b/pyproject.toml
index 828d199ee..7dd67d11d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -30,9 +30,10 @@ dependencies = [
     "pandas>=1.3.2",
     "odfpy>=1.4.1",
     "ase>=3.19.0",
-    "flatdict>=4.0.1",
-    "hyperspy>=1.7.6",
-    "rosettasciio>=0.2",
+    "flatdict",
+    "hyperspy",
+    "rosettasciio",
+    "xraydb",
     "ifes_apt_tc_data_modeling>=0.1",
     "gitpython>=3.1.24",
     "pytz>=2021.1",

From 712e384c95382f49fda86f53528839844ece7b4f Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@physik.hu-berlin.de>
Date: Sat, 13 Jan 2024 22:29:12 +0100
Subject: [PATCH 71/84] Tested parsing on IKZ and PDI examples

---
 debug/spctrscpy.batch.sh                      |  10 +-
 debug/spctrscpy.dev.ipynb                     | 193 ++++++++++++++++--
 .../readers/em/concepts/nxs_image_r_set.py    |   4 +-
 .../readers/em/concepts/nxs_object.py         |   9 +-
 .../readers/em/concepts/nxs_spectrum_set.py   |   4 -
 pynxtools/dataconverter/readers/em/reader.py  |  16 +-
 .../readers/em/subparsers/hfive_apex.py       | 109 +++++++---
 .../readers/em/subparsers/nxs_nion.py         |   2 +-
 8 files changed, 278 insertions(+), 69 deletions(-)

diff --git a/debug/spctrscpy.batch.sh b/debug/spctrscpy.batch.sh
index 0d2a219cc..fd9c22bad 100755
--- a/debug/spctrscpy.batch.sh
+++ b/debug/spctrscpy.batch.sh
@@ -1,11 +1,15 @@
 #!/bin/bash
 
-datasource="../../../../paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/pdi/"
+datasource="../../../../../paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/pdi/"
+datasource="../../../../../paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/ikz/"
+
 
 # apex examples ikz, pdi
 # examples="ikz/VInP_108_L2.h5 ikz/GeSn_13.h5 pynx/46_ES-LP_L1_brg.bcf pynx/1613_Si_HAADF_610_kx.emd pynx/EELS_map_2_ROI_1_location_4.dm3 pynx/H5OINA_examples_Specimen_1_Map_EDS_+_EBSD_Map_Data_2.h5oina"
-examples="ikz/VInP_108_L2.h5"
-examples="InGaN_nanowires_spectra.edaxh5"
+examples="AlGaO.nxs"
+examples="GeSi.nxs"
+examples="VInP_108_L2.h5"
+#examples="InGaN_nanowires_spectra.edaxh5"
 
 for example in $examples; do
 	echo $example
diff --git a/debug/spctrscpy.dev.ipynb b/debug/spctrscpy.dev.ipynb
index c6b5871e2..f5162301d 100644
--- a/debug/spctrscpy.dev.ipynb
+++ b/debug/spctrscpy.dev.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 4,
    "id": "6da1aea0-545b-446b-a3d1-1574af72f6c6",
    "metadata": {},
    "outputs": [],
@@ -11,12 +11,51 @@
     "from rsciio import bruker, emd, digitalmicrograph\n",
     "from jupyterlab_h5web import H5Web\n",
     "import h5py\n",
-    "from matplotlib import pyplot as plt"
+    "from matplotlib import pyplot as plt\n",
+    "import xraydb\n",
+    "from ase.data import chemical_symbols"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 11,
+   "id": "8e721dee-7b6f-4dd0-b50e-ea8ff05d4682",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1807\n",
+      "['Mg-Ka3', 'Ge-Lb1', 'Ge-Lb6', 'Se-Ln', 'Se-Ll', 'Pm-Mg', 'Gd-Mb', 'Tb-Ma', 'Lu-Mz']\n"
+     ]
+    }
+   ],
+   "source": [
+    "xray_lines = {}\n",
+    "for symbol in chemical_symbols[1:]:\n",
+    "    # print(f\"{symbol}\")\n",
+    "    for name, line in xraydb.xray_lines(symbol).items():\n",
+    "        xray_lines[f\"{symbol}-{name}\"] = line.energy\n",
+    "        # print(f\"{name}, {line.energy} eV\")\n",
+    "print(len(xray_lines))\n",
+    "\n",
+    "def get_xray_line_candidates(e_min=1200., e_max=1250.):\n",
+    "    cand = []\n",
+    "    for key, val in xray_lines.items():\n",
+    "        if val < e_min:\n",
+    "            continue\n",
+    "        if val > e_max:\n",
+    "            continue\n",
+    "        cand.append(key)\n",
+    "    return cand\n",
+    "\n",
+    "print(get_xray_line_candidates())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
    "id": "3889c47f-11c4-4bf3-97de-04fc52f0798d",
    "metadata": {},
    "outputs": [
@@ -30,20 +69,22 @@
    ],
    "source": [
     "src = \"/home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy\"\n",
-    "fnms = [(\"apex\", \"ikz/VInP_108_L2.h5\"),\n",
-    "        (\"apex\", \"ikz/GeSn_13.h5\"),\n",
+    "fnms = [(\"ikz\", \"VInP_108_L2.h5\"),\n",
+    "        (\"ikz\", \"GeSn_13.nxs\"),\n",
     "        (\"bruker\", \"pynx/46_ES-LP_L1_brg.bcf\"),\n",
     "        (\"emd\", \"pynx/1613_Si_HAADF_610_kx.emd\"),\n",
     "        (\"digitalmicrograph\", \"pynx/EELS_map_2_ROI_1_location_4.dm3\"),\n",
-    "        (\"oxfordinstruments\", \"pynx/H5OINA_examples_Specimen_1_Map_EDS_+_EBSD_Map_Data_2.h5oina\")]\n",
+    "        (\"oxfordinstruments\", \"pynx/H5OINA_examples_Specimen_1_Map_EDS_+_EBSD_Map_Data_2.h5oina\"),\n",
+    "        (\"pdi\", \"InGaN_nanowires_spectra.edaxh5\")]\n",
     "# pyUSID, HSMA\n",
-    "fnm = f\"{src}/{fnms[0][1]}\"\n",
+    "case = 0  # len(fnms) - 1\n",
+    "fnm = f\"{src}/{fnms[case][0]}/{fnms[case][1]}\"\n",
     "print(fnm)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 14,
    "id": "a4d9ef96-3c70-4c12-80ba-ea4a7d716d47",
    "metadata": {},
    "outputs": [
@@ -54,7 +95,7 @@
        "<jupyterlab_h5web.widget.H5Web object>"
       ]
      },
-     "execution_count": 9,
+     "execution_count": 14,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -63,15 +104,139 @@
     "H5Web(fnm)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "6b883a7a-f6aa-4151-8ee4-f3c8c79ccc72",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'h5py._hl.dataset.Dataset'>, (200,), int32\n",
+      "<class 'h5py._hl.dataset.Dataset'>, (200,), int32\n",
+      "<class 'h5py._hl.dataset.Dataset'>, (200,), int32\n"
+     ]
+    }
+   ],
+   "source": [
+    "with h5py.File(fnm, \"r\") as h5r:\n",
+    "    src = \"/VInP/VInP_108_L2/Area 10/LineScan 1/ROIs/\"\n",
+    "    for key in h5r[src].keys():\n",
+    "        tmp = h5r[f\"{src}/{key}\"]\n",
+    "        print(f\"{type(tmp)}, {np.shape(tmp)}, {tmp.dtype}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 46,
+   "id": "e99588fe-67dc-48df-8d60-28187d8daa0a",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'numpy.ndarray'>, (200, 1024), int32\n",
+      "0\t\tIn L\t\t849\n",
+      "0\t\tK K\t\t752\n",
+      "0\t\tP K\t\t938\n",
+      "1\t\tIn L\t\t857\n",
+      "1\t\tK K\t\t786\n",
+      "1\t\tP K\t\t1004\n"
+     ]
+    },
+    {
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAigAAAGdCAYAAAA44ojeAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABIqklEQVR4nO3deXxU5d0+/uvMmn2SyTKTkAQStrAbthDBaiGKiBYk1YJYW7XSBa1Cqy2/Kj4+2lJtH1Fal9avRazgDhSsxQIiLiQBwiKbSSCBrDOBLDOZhMxMZs7vj8mMBEFImMk5M3O9X6/zEs6cHD7jIZmL+3zu+wiiKIogIiIikhGF1AUQERERnY8BhYiIiGSHAYWIiIhkhwGFiIiIZIcBhYiIiGSHAYWIiIhkhwGFiIiIZIcBhYiIiGRHJXUBfeF2u1FfX4/Y2FgIgiB1OURERHQZRFFEW1sb0tLSoFB8+xhJUAaU+vp6ZGRkSF0GERER9UFNTQ3S09O/9ZigDCixsbEAPG8wLi5O4mqIiIjoclitVmRkZPg+x79NUAYU722duLg4BhQiIqIgczntGWySJSIiItlhQCEiIiLZYUAhIiIi2WFAISIiItlhQCEiIiLZYUAhIiIi2WFAISIiItlhQCEiIiLZYUAhIiIi2WFAISIiItlhQCEiIiLZYUAhIiIi2QnKhwUSEV0Ot1tE61knmmx2nLE50NzuQFO759eRaiXuuyYLKiX/nUYkRwwoRBQ0RFGEtbPLEzS6Q0dTux3NNgea2h04Y7N3v9a9v90Bt3jx8yVEqTF/cmb/vQEiumwMKEQkqQ5HF5psPcPFmfNCR9M5ox9O17ckjouIj1IjMVqDxGgtEmM0aOlwoLiyGev31TGgEMkUAwoR+ZW9y/V10DgvdPiCxjm3XM46Xb3+M2K0KiTGaJAYrYE+WoukGA0SY875dXcQSYzWICFaA/V5t3FMlk7k/3E7dp9sRnVTBzITo/z19onITxhQiPpBXetZHK6zSF2GX3Q6XT1uo5w5J3Q02Rxos3f1+pxalQJJMdrukOEJGOeGjsQYDZKiv349Qq28ovdg1EVg2pAkfFZxBhv21+HBgqFXdD4i8j8GFKIA63S6MOevX+CMzS51Kf1GpRA8QSNG67m1ct6oRmLM16FDH6NBtEYJQRD6tcZ54wfgs4ozWL+/Fr+cMaTf/3wi+na9Diiffvop/vSnP6G0tBQNDQ3YsGED5s6dCwBwOp149NFH8eGHH6KyshI6nQ4FBQX44x//iLS0NN85mpub8cADD2Dz5s1QKBQoLCzE888/j5iYGL+9MSK5+PeXDThjsyNWq8JQQ/D/HdeoFOeEjQuHjrhIlew/8GeOMiJacxinmjpQeqoFEwfppS6JiM7R64DS3t6OcePG4Z577sG8efN6vNbR0YF9+/bhsccew7hx49DS0oIHH3wQ3/ve97B3717fcQsXLkRDQwO2bt0Kp9OJu+++G4sWLcK6deuu/B0RyczaklMAgJ9dNxiLvztE4mrIK0qjwqwxqXivtBbv76tjQCGSGUEUxd63xHu/WBB6jKBcyJ49ezB58mScOnUKmZmZOHbsGEaOHIk9e/Zg4sSJAIAtW7bgpptuQm1tbY+RlouxWq3Q6XSwWCyIi4vra/lEAXe03oqbVn0GlULArmXTkRIbIXVJdI5dJ87gjldKEBuhwp7fFVxxbwsRfbvefH4HfIUii8UCQRAQHx8PACgqKkJ8fLwvnABAQUEBFAoFSkpKLngOu90Oq9XaYyMKBut2e0ZPZo4yMpzI0JSsRAyIj0RbZxe2HTNLXQ4RnSOgAaWzsxO/+c1vsGDBAl9SMplMSElJ6XGcSqWCXq+HyWS64HlWrFgBnU7n2zIyMgJZNpFf2Oxd2LCvDgCwMI9rbciRQiHg1twBAID13deKiOQhYAHF6XTi9ttvhyiKeOmll67oXMuWLYPFYvFtNTU1fqqSKHA2HahHu8OF7KRo5A9OlLocuohbx3sCys7y0zjdFj4zrYjkLiABxRtOTp06ha1bt/a4z2Q0GtHY2Njj+K6uLjQ3N8NoNF7wfFqtFnFxcT02IjkTRdHXHHtHXqbsZ7SEs8HJMbgqIx4ut4hNB+ulLoeIuvk9oHjDSUVFBbZt24bExJ7/cszPz0draytKS0t9+z7++GO43W7k5eX5uxwiSRysteBIvRUalQKF49OlLocuobB7FOX90lqJKyEir14HFJvNhgMHDuDAgQMAgKqqKhw4cADV1dVwOp34/ve/j71792Lt2rVwuVwwmUwwmUxwOBwAgBEjRuDGG2/Efffdh927d+OLL77A/fffj/nz51/WDB6iYLC22DN6cvOYVCREaySuhi7l5rFpUCsFHG2w4lgDm/CJ5KDXAWXv3r3Izc1Fbm4uAGDp0qXIzc3F8uXLUVdXh02bNqG2thZXXXUVUlNTfduuXbt851i7di1ycnIwY8YM3HTTTZg2bRr+/ve/++9dEUnI0uHE5i89twoWTmFzbDBIiNZgRo4BALBhP5tlieSg1wu1XXfddfi2pVMuZ1kVvV7PRdkoZK3fX4tOpxs5xliMz0yQuhy6TPPGD8CWIyZs2F+HR2YOh0oZ8FUYiOhb8DuQyI88zbHVADxTi9kcGzyuG56ChCg1TrfZ8cWJJqnLIQp7DChEfrS7qhnHG22I0igxt3t9DQoOGpUC3xvn6YNjsyyR9BhQiPzIO3oy56o0xEaoJa6GeqtwgmfG1UdHTGjrdEpcDVF4Y0Ah8pMzNjv+c7gBAHDH5IESV0N9MWaADkNSYmDvcuM/hy68sjUR9Q8GFCI/ea+0Fk6XiHHpOoxJ10ldDvWBIAiY510TZR9v8xBJiQGFyA/cbhHrfM2xHD0JZrfmDoAgACVVzahp7pC6HKKwxYBC5AefHz+D6uYOxEaocPO4VKnLoSuQqovE1MFJALgmCpGUGFCI/MD73J3C8emI0vR6eSGSGe9tnvX7ai9rbSci8j8GFKIrZLJ0YtsxzwMw78jjyrGhYOYoI6I0Spxs6sC+6lapyyEKSwwoRFfo7T01cLlFTB6kxzBDrNTlkB9Ea1W4cbTn6epsliWSBgMK0RXocrnx1p7u5lg+dyekeJ9C/cHBenQ6XRJXQxR+GFCIrsCOstNosHRCH63x/YubQkN+diLSdBGwdnbh468apS6HKOwwoBBdAW9z7G0T0qFVKSWuhvxJoRB8jytYz9s8RP2OAYWoj2qaO7Cz/DQAYMFk3t4JRd7ZPJ+UncYZm13iaojCCwMKUR+9ubsaoghcMzQJg5KipS6HAmBISizGpevQ5Rax6UC91OUQhRUGFKI+cHS58c7eGgDAQk4tDmneBwiu38/bPET9iQGFqA/+e9SEMzYHUmK1mDHCIHU5FEA3j02DWingcJ0VZaY2qcshChsMKER9sLbYM7V4/qQMqJX8Ngpl+mgNvjs8BQBHUYj6E3+yEvXS8UYbiiqboBCAH7A5NizM614TZeP+OrjcXPqeqD8woBD10pu7PaMn03NSMCA+UuJqqD9Mz0lBfJQaZqsdXxw/I3U5RGGBAYWoFzqdLrxX6hnmX5g3UOJqqL9oVAp8b1waAK6JQtRfGFCIeuHfXzbActaJAfGR+M6wZKnLoX7kvc2z5YgJNnuXxNUQhT4GFKJe8K4ce0deJpQKQeJqqD+NS9chOzkanU43PjzUIHU5RCGPAYXoMh2tt2JfdStUCgG3TUyXuhzqZ4Ig+B4gyNs8RIHHgEJ0mdbt9oyezBxlREpshMTVkBTm5g6AIADFlc2obemQuhyikMaAQnQZbPYubNhXB4Arx4azAfGRyM9OBOCZckxEgcOAQnQZNh2oR7vDheykaOQPTpS6HJKQt1n2/X11EEWuiUIUKAwoRJcgimKP5lhBYHNsOLtxtBGRaiWqzrRjf02r1OUQhSwGFKJLOFhrwZF6KzQqha9JksJXjFaFWaONANgsSxRIDChEl7C22DN6cvOYVCREaySuhuTAe5tn88EG2LtcEldDFJoYUIi+heWsE5u/rAcALJzC5ljyyB+cCGNcBCxnnfj4WKPU5RCFpF4HlE8//RS33HIL0tLSIAgCNm7c2OP19evX44YbbkBiYiIEQcCBAwe+cY7Ozk4sXrwYiYmJiImJQWFhIcxmc1/fA1HAbNhXi06nGznGWIzPTJC6HJIJpULA3NwBADzNskTkf70OKO3t7Rg3bhxeeOGFi74+bdo0PP300xc9x5IlS7B582a8++672LlzJ+rr6zFv3rzelkIUUJ7mWM+DAReyOZbOUzjeE1A+KWtEk80ucTVEoUfV2y+YNWsWZs2addHXf/jDHwIATp48ecHXLRYLXn31Vaxbtw7Tp08HAKxevRojRoxAcXExpkyZ0tuSiAJiz8kWVDTaEKVR+v61TOQ11BCLsek6fFlrweaD9fjx1CypSyIKKf3eg1JaWgqn04mCggLfvpycHGRmZqKoqKi/yyG6KO/U4jlXpSE2Qi1xNSRH87qD63ou2kbkd/0eUEwmEzQaDeLj43vsNxgMMJlMF/wau90Oq9XaYyMKpCabHf855Pn7eMfkgRJXQ3J1y7g0qBQCvqy1oMLcJnU5RCElKGbxrFixAjqdzrdlZGRIXRKFuPdKa+FwuTEuXYcx6TqpyyGZSozR4rs5KQDYLEvkb/0eUIxGIxwOB1pbW3vsN5vNMBqNF/yaZcuWwWKx+Laampp+qJTCldstYt1ub3MsR0/o23mbZTfur4PLzaXvifyl3wPKhAkToFarsX37dt++srIyVFdXIz8//4Jfo9VqERcX12MjCpQvTpzBqaYOxEaocPO4VKnLIZn7bk4KdJFqmKydKDrRJHU5RCGj17N4bDYbjh8/7vt9VVUVDhw4AL1ej8zMTDQ3N6O6uhr19Z7FrcrKygB4Rk6MRiN0Oh3uvfdeLF26FHq9HnFxcXjggQeQn5/PGTwkC2uLPaMnhePTEaXp9bcIhRmtSolbxqXijeJqvL+vFtOGJkldElFI6PUIyt69e5Gbm4vc3FwAwNKlS5Gbm4vly5cDADZt2oTc3FzMnj0bADB//nzk5ubi5Zdf9p1j5cqVuPnmm1FYWIjvfOc7MBqNWL9+vT/eD9EVMVs7sfWYZ9HAO/K4cixdHu/S91sOm2Czd0lcDVFoEMQgfF641WqFTqeDxWLh7R7yq1XbK/Ds1nJMHqTHOz+78C1HovOJoogZ/7cTlWfa8efbxuH7E/hQSaIL6c3nd1DM4iHqD10uN970NsfyuTvUC4IgYF53syyfcEzkHwwoRN0+KTuNBksn9NEa3Dj6wjPKiC7Gu9pwUWUT6lrPSlwNUfBjQCHq5l059rYJ6dCqlBJXQ8EmPSEKU7L1EEXPlGMiujIMKEQAapo78En5aQDAgsm8vUN9U9jdLPv+vloEYXsfkawwoBABeGtPNUQRuGZoEgYlRUtdDgWpWWNSEaFWoPJ0Ow7WWqQuhyioMaBQ2HN0ufH2Hk9j40JOLaYrEKNV4cZRnv6l90vZLEt0JRhQKOxtPWrGGZsdKbFazBhhkLocCnLeNVE2f1kPe5dL4mqIghcDCoU9b3Ps/EkZUCv5LUFXZuqQJBjitGjtcGLHV6elLocoaPGnMYW1E6dt2HWiCQoB+AGbY8kPlArBN+WYa6IQ9R0DCoW1N0s8C7NNz0nBgPhIiauhUDEv13ObZ0dZI5rbHRJXQxScGFAobHU6XXhvn7c5dqDE1VAoGW6MxegBcXC6RGw+WC91OURBiQGFwtaHhxrQ2uHEgPhIfGdYstTlUIjxronC2zxEfcOAQmFrbfftnTvyMqFUCBJXQ6HmlnFpUCkEHKy14Hhjm9TlEAUdBhQKS8carCg91QKVQsBtE/nkWfK/pBgtrhvuGZlbv49L3xP1FgMKhaV13aMnM0cZkRIbIXE1FKq8a6Js2F8Hl5tL3xP1BgMKhZ12exc2dD/MjSvHUiBNz0lBXIQKDZZOFFc2SV0OUVBhQKGws+lgPWz2LmQnRSN/cKLU5VAIi1Arccu4NACeBwgS0eVjQKGwIooi3ij2rBx7R14mBIHNsRRY3ts8Ww6b0G7vkrgaouDBgEJh5ctaC47UW6FRKXzTQIkCaXxmPAYlRqHD4cKWwyapyyEKGgwoFFa8z925eUwqEqI1EldD4UAQBN8oyvr9vM1DdLkYUChsWM46sal7Vc+FU9gcS/3n1u5n8+w60YT61rMSV0MUHBhQKGxs2FeLTqcbOcZYjM9MkLocCiMZ+ijkZekhisDGA1wThehyMKBQWBBF0bdy7EI2x5IEvD1P75fWQhS5JgrRpTCgUFjYc7IFFY02RKqVmNM93E7Un2aNMSJCrcCJ0+34stYidTlEsseAQmHB2xw756o0xEWoJa6GwlFshBozRxkB8AGCRJeDAYVCXpPNjv8c8kzvXJg3UOJqKJx5Z/NsOlgPR5db4mqI5I0BhULee6W1cLjcGJuuw5h0ndTlUBibOjgRKbFatHQ48UlZo9TlEMkaAwqFNLdbxLrdXzfHEklJpVRgbncPFJe+J/p2DCgU0r44cQanmjoQq1X5nolCJKV54z0B5eOvGtHS7pC4GiL5YkChkLa22DN6Mm/8AERpVBJXQwTkGOMwKi0OTpeID76sl7ocItliQKGQZbZ2YusxMwDgDjbHkox4m2Xf38dF24guhgGFQtbbe2rgcouYNCgBw42xUpdD5PO9cWlQKgQcqGnFidM2qcshkqVeB5RPP/0Ut9xyC9LS0iAIAjZu3NjjdVEUsXz5cqSmpiIyMhIFBQWoqKjocUxzczMWLlyIuLg4xMfH495774XNxm9S8p8ulxtv+ppjOXpC8pIcq8W1w5IBcE0UoovpdUBpb2/HuHHj8MILL1zw9WeeeQarVq3Cyy+/jJKSEkRHR2PmzJno7Oz0HbNw4UIcOXIEW7duxQcffIBPP/0UixYt6vu7IDrPJ2Wn0WDpREKUGjeONkpdDtE3eJe+37CvDm43l74nOl+vuwZnzZqFWbNmXfA1URTx3HPP4dFHH8WcOXMAAK+//joMBgM2btyI+fPn49ixY9iyZQv27NmDiRMnAgD+8pe/4KabbsKf//xnpKVxpgVdOe/KsbdNzECEWilxNUTfNGNECmIjVKi3dKK4qglXD06SuiQiWfFrD0pVVRVMJhMKCgp8+3Q6HfLy8lBUVAQAKCoqQnx8vC+cAEBBQQEUCgVKSkoueF673Q6r1dpjI7qYmuYOfFJ+GgCwYDLXPiF5ilArcfNYzz/I3i9lsyzR+fwaUEwmz3LiBoOhx36DweB7zWQyISUlpcfrKpUKer3ed8z5VqxYAZ1O59syMjL8WTaFmLf2VEMUgWlDkpCVFC11OUQXVdi9Jsp/Djegw9ElcTVE8hIUs3iWLVsGi8Xi22pqaqQuiWTK0eXG23s8TYdcOZbkbsLABAxMjEKHw4WPjlz4H2hE4cqvAcVo9DQjms3mHvvNZrPvNaPRiMbGns+g6OrqQnNzs++Y82m1WsTFxfXYiC5k61EzztjsSI7VomCk4dJfQCQhQRAwL9fTLLuea6IQ9eDXgJKVlQWj0Yjt27f79lmtVpSUlCA/Px8AkJ+fj9bWVpSWlvqO+fjjj+F2u5GXl+fPcigMeZtj50/KgFoZFAOEFOZu7X42z+fHz6DBclbiaojko9c/wW02Gw4cOIADBw4A8DTGHjhwANXV1RAEAQ899BCeeuopbNq0CYcOHcJdd92FtLQ0zJ07FwAwYsQI3Hjjjbjvvvuwe/dufPHFF7j//vsxf/58zuChK3LitA27TjRBIQDz2RxLQSIzMQqTB+khisDG/Vz6nsir1wFl7969yM3NRW5uLgBg6dKlyM3NxfLlywEAjzzyCB544AEsWrQIkyZNgs1mw5YtWxAREeE7x9q1a5GTk4MZM2bgpptuwrRp0/D3v//dT2+JwtWbJZ6F2b47PAUD4iMlrobo8hVO8IyirN9XC1HkmihEACCIQfjdYLVaodPpYLFY2I9CAIBOpwtTVmxHa4cT//jxREzPYf8JBQ9rpxOTntoGe5cbm++fhjHpOqlLIgqI3nx+8yY9hYQPDzWgtcOJAfGRuHZYyqW/gEhG4iLUuGGUZ5LA+1z6nggAAwqFiLXdt3cWTM6AUiFIXA1R783rXhNl08F6OLrcEldDJD0GFAp6xxqsKD3VApVCwO0TuYgfBadrhiQhOVaL5nYHdnavhEwUzhhQKOit6x49uWGUASlxEZc4mkieVEoF5l7lmcnIJxwTMaBQkGu3d2HDfs8CVwvzBkpcDdGVmdf9hOPtxxrR2uGQuBoiaTGgUFDbdLAeNnsXspKikZ+dKHU5RFdkRGocRqTGweFyY/OXDVKXQyQpBhQKat6VY++YnAkFm2MpBHgfIMjbPBTuGFAoaH1Z24rDdVZoVAoUTkiXuhwiv/jeVWlQKgTsr25F5Wmb1OUQSYYBhYLW2mJPc+zsManQR2skrobIP1JiI/CdoUkA4OuvIgpHDCgUlCxnndh00PPckoV5fO4OhRZvs+z6fXVwu4NusW8iv2BAoaC0cX8dzjpdGG6IxYSBCVKXQ+RX1480IDZChbrWsyipapa6HCJJMKBQ0BFF0dccu3BKJgSBzbEUWiLUStw8NhUAm2UpfDGgUNDZe6oF5WYbItVKzM0dIHU5RAHhvc3z4aEGnHW4JK6GqP8xoFDQWVvsGT2Zc1Ua4iLUEldDFBgTByYgQx+JdocLHx0xSV0OUb9jQKGg0tzuwIeHPD+suXIshTJBEDAv1zOKwiccUzhiQKGg8l5pDRwuN8am6zAmXSd1OUQB5X3C8RfHz8Bk6ZS4GqL+xYBCQcPtFn0PBuTUYgoHAxOjMWlQAtwi8K8DXBOFwgsDCgWNXSeacLKpA7FaFW4ZlyZ1OUT9wtss+/6+Wogi10Sh8MGAQkHDO7V43vgBiNKoJK6GqH/cNCYVGpUC5WYbjtRbpS6HqN8woFBQMFs78d+jZgDAHWyOpTCii1Tj+pEGAGyWpfDCgEJB4Z09NXC5RUwalIDhxlipyyHqV9/vvs2z6UA9nC63xNUQ9Q8GFJI9l1vEm7u9zbEcPaHwc83QJCTFaNDU7sCn5aelLoeoXzCgkOx9UtaIeksnEqLUuHG0UepyiPqdSqnAnKs8U455m4fCBQMKyd7a7qnFt03MQIRaKXE1RNLwromy7WgjLB1OiashCjwGFJK12pYO7ChrBAAsmMy1Tyh8jUrTIccYC4fLjQ8O1UtdDlHAMaCQrL21uwaiCEwbkoSspGipyyGSVGF3s+z6fVy0jUIfF5MgWXK5RXzwZT3e6F77hCvHEnkekLniP8dQeqoFVWfagzK0u9wiapo7UGZuQ4W5DWVmGyrMbahtOYvYCBX00RokxmiRFK1BYowG+mgtEmM0SIrRIDFaC320BkkxWkRqeLs31DGgkKy43SL+c9iE57aVo6LRBgAYZohBQfc6EEThLCUuAtcMTcbO8tPYsK8WS28YLnVJFyWKIupaz6Lc3IZysw3lpjaUN7ahwmyDvevCU6Vt9i40XOYzh6I0Sl+A8YaZxBgtEr2/PifM6KM10Kh4wyDYMKCQLIiiiP8eNWPl1nJ8ZWoD4FmgatF3svGjqwdBreQPFyIAKJyQjp3lp7F+fx0eKhgGhUKQtB5RFGG22ruDiGcrM9tw3NyGdofrgl+jVSkw1BCDYSmxGGqIxXBjDDL10ehwdKHJ5kBTuwNNNjua2h04Y7Ojud3h2W+z40y7A44uNzocLnQ0n0VN89nLqjMuQtUzwHh/3T1i4w01iTEaJERpoJT4/ysxoJDERFHEjrJGPLu1HIfrPMt4x2pVuPeaLNwzLQtxEWqJKySSlxtGGhCrVaG25Sz2nGxGXnZiv/3ZZ2x2z0jIObdmys1tsHZ2XfB4tVLA4OQYTwgxeP8biwx9VJ8DgCiKaHe4PGGlO7Q0tzt8YabJ5kCz99ftnl+73CKsnV2wdnah6kz7Jf8MQQD0UZru203nhpmvbzf5bj1FaxEXqYIgMND4GwMKSUIURXxacQbPbi3HwZpWAEC0Rom7p2bhvmuyoYtiMCG6kAi1EjeNScXbe2vw/r7agASU1g6H57bMOaMi5WYbmtsdFzxeqRAwKDEKw42xGJoSi2HdoyIDE6P9PvopCAJitCrEaFUYmHjpHhy3W4S109kjzJzxjtCcF2aabHa0nnVCFOH5fbsDFY2XrkmlEHy9M4Y4LX51/XCMSdf54d2Gt4AElLa2Njz22GPYsGEDGhsbkZubi+effx6TJk0C4Plwevzxx/HKK6+gtbUVU6dOxUsvvYShQ4cGohySmV3HPcFk76kWAECkWom7rh6In35nMPTRGomrI5K/eeMH4O29NfjwkAlPfG90nxtG2zqdqGjs7g8x21DR2IYyUxsa2+wXPF4QgEx9FIYZYjHMENP931hkJ0dDq5Jn06pCISA+SoP4KA2GpMRc8vgulxstHU40tdt73m4679aT979tnV3ocotobLOjsc2OYw2AWqnAK3dN7Id3F9oCElB+8pOf4PDhw/jnP/+JtLQ0vPHGGygoKMDRo0cxYMAAPPPMM1i1ahXWrFmDrKwsPPbYY5g5cyaOHj2KiIiIQJREMrC7qhnPbi1DcWUzAM996DunDMTPrh2M5FitxNURBY9Jg/RIT4hEbctZ/PeoybfK7MV0OLpwvNHmCSHmtu4ZNDbUtV68f2NAfGSPEDLMEIshKTEhP3tGpVQgOVZ72T+T7F0uX4/Ml7UW/H8bDmHPyWa43aLk/UHBThBFUfTnCc+ePYvY2Fj861//wuzZs337J0yYgFmzZuHJJ59EWloafvWrX+HXv/41AMBiscBgMOC1117D/PnzL/lnWK1W6HQ6WCwWxMXF+bN8CoDSUy1YubUcnx8/AwDQKBW4Iy8TP79uMAxxDKREffHs1nKs2l6Ba4clY809kwF4PixPNLb7RkK8t2lqWjpwsZ/0hjjtOSHEE0iGGmIRo2UHQG85XW6Me+K/6HC48J8Hr8GIVH4+na83n99+/xvY1dUFl8v1jZGQyMhIfP7556iqqoLJZEJBQYHvNZ1Oh7y8PBQVFV1WQKHgcLCmFSu3leOTMs/DzdRKAbdPzMDi7w5BWnykxNURBbd5uQOwansFPqs4jZ/9sxTljW041dQBl/vCSSQxWoOhhhgMN3hnzsRiWEos+738SK1UYMLABHxWcQYllU0MKFfI7wElNjYW+fn5ePLJJzFixAgYDAa8+eabKCoqwpAhQ2AymQAABkPPdS0MBoPvtfPZ7XbY7V/fE7Varf4um/zoSL0FK7dWYNsxMwBPA933x6fj/ulDkKGPkrg6otAwKCkaEwYmoPRUC7Yc+fpnZ1yEytOs2j1jZmj3qEhSDG+j9ocp2YmegFLVjB9PzZK6nKAWkDG8f/7zn7jnnnswYMAAKJVKjB8/HgsWLEBpaWmfzrdixQo88cQTfq6S/K3M1IaVW8t9PywVAnBrbjp+OWPIZXXbE1Hv/OHWMXhrT3V3v4hnVCQlVssprxLKy9ID8PTciaLIa3EFAhJQBg8ejJ07d6K9vR1WqxWpqan4wQ9+gOzsbBiNRgCA2WxGamqq72vMZjOuuuqqC55v2bJlWLp0qe/3VqsVGRkZgSid+uB4ow3PbSvHvw81QBQ9nf63jE3DgwVDMTj50l3zRNQ3w42xePyWUVKXQecYmx6PCLUCTe0OnDhtw5CUWKlLCloB7YKKjo5GdHQ0Wlpa8NFHH+GZZ55BVlYWjEYjtm/f7gskVqsVJSUl+PnPf37B82i1Wmi1HJ6Um5Nn2vH89gr860AdvLe9Z49JxYMFQzHMwG9KIgo/GpUC4zMTsOtEE4ormxlQrkBAAspHH30EURQxfPhwHD9+HA8//DBycnJw9913QxAEPPTQQ3jqqacwdOhQ3zTjtLQ0zJ07NxDlkJ/VNHdg1fYKrN9f52vIu2GkAUuuH8amMCIKe5Oz9Nh1ogklVc24c8pAqcsJWgEJKBaLBcuWLUNtbS30ej0KCwvx+9//Hmq1p1v8kUceQXt7OxYtWoTW1lZMmzYNW7Zs4RooMlfXehZ//fg43t1bg67uYDI9JwVLCoZx1UQiom55WYkAKlBS2cQ+lCvg93VQ+gPXQelfJksnXthxHG/tqYbT5fnr8p1hyVhSMBS5mQkSV0dEJC+dThfG/s9/4XC5sePX1yEriZMEvCRdB4VCR2NbJ1765ATWllTD0f149KsHJ2LJ9cMwaZBe4uqIiOQpQq3EVRnx2H2yGSWVTQwofcSAQt/QZLPjb59W4vWik+h0eoLJpEEJWHr9cOQP7r8npxIRBau8bD12n2zG7qpmzJ+cKXU5QYkBhXxa2h145bNKvLbrJDocLgBAbmY8fnX9cEwdksj7qERElykvKxF/wXGUVDVLXUrQYkAhWM468ernVfjH51Ww2bsAAGPTdVhy/TBcNyyZwYSIqJfGD4yHSiGgrvUsapo7uIp2HzCghLG2TidWf3ESr3xWibZOTzAZkRqHpdcPQ8GIFAYTIqI+itKoMCZdh/3VrSipamZA6QMGlDDUbu/CmqKT+PunlWjtcAIAhhlisKRgGGaOMvIR4UREfpCXlegJKJVN+P6EdKnLCToMKGHkrMOFN4pP4eWdJ9DU7gAAZCdH46GCYbh5TCqDCRGRH+Vl6/HyzhPYfZJ9KH3BgBIGOp0uvLm7Gi9+cgKn2zxPhR6UGIUHC4bie+MGQMlgQkTkdxMHJkAhAKeaOmCydMKo42KkvcGAEsLsXS68s7cWL3x8HCZrJwAgPSESv5wxFPNyB0ClVEhcIRFR6IqNUGNUmg6H6iwoqWrCnKsGSF1SUGFACUFOlxvvldbirx8fR13rWQBAmi4C908fiu9PSIdGxWBCRNQf8rL0OFRnQXFlMwNKLzGghJAulxsb9tdh1ccVqGn2BJOUWC3unz4EP5iUAa1KKXGFREThJS87Ef/v8yqUVDVJXUrQYUAJAS63iM0H6/H89gpUnWkHACTFaPDz64ZgYV4mItQMJkREUpg8SA9BACpPt6OxrRMpsexDuVwMKEHM7Rbx4eEGPLetAscbbQAAfbQGP7s2G3dOGYgoDS8vEZGUdFFq5BjjcKzBij1VLZg9NlXqkoIGP8GCkCiK+OiICSu3VqDM3AYA0EWqseg72fjR1YMQo+VlJSKSi7wsPY41WFFS1cSA0gv8JAsioihi+7FGrNxWjiP1VgBAbIQKP5mWjbunDUJchFriComI6Hx5WXq8tuskSiq5HkpvMKAEAVEUsbP8NFZuLcfBWgsAIFqjxD3TsvCTadnQRTGYEBHJ1eQsPQCgzNyG5nYH9NEaiSsKDgwoMiaKInadaMKzW8tReqoFABCpVuJHVw/Cou9k8y85EVEQSIzRYmhKDCoabdhd1YwbRxulLikoMKDIVEllE/5vazl2dz+qW6tS4IdTBuJn1w1GUoxW4uqIiKg38rL1qGi0oaSqiQHlMjGgyEzpqWY8u7UcXxz3zJnXKBW4Iy8Tv7huMFLiOD2NiCgY5WUl4o3iat8/OunSGFBk4kBNK1ZuLcfO8tMAALVSwA8mZWDxd4cgVRcpcXVERHQl8rr7UI42WGE564Qukr2Dl8KAIrHDdRY8t60c2441AgCUCgG3TUjH/dOHID0hSuLqiIjIH1LiIpCVFI2qM+3Ye7IZM0YYpC5J9hhQJPKVyYqVW8vx0REzAEAhAPPGp+OB6UMwMDFa4uqIiMjf8rL0qDrTjpIqBpTLwYDSz443tmHltgr8+8sGAIAgAHPGpeGXM4YiOzlG4uqIiChQ8rL1eGtPDUoq+Vyey8GA0k+qzrTj+W3l+NfBeoiiZ9/ssal4aMZQDDXESlscEREFXF5WIgDgcL0VNnsXV/2+BP7fCbDqpg6s+rgCG/bXweX2JJOZowx4qGAYRqTGSVwdERH1l7T4SGToI1HTfBalp1pw7bBkqUuSNQaUAKlt6cALO47j3b216OoOJjNyUrDk+mEYPUAncXVERCSFvKxE1DTXoqSyiQHlEhhQ/Mxk6cRfd1Tg7T01cLo8weTaYclYcv0wXJURL21xREQkqclZerxXWosSrodySQwoftLY1okXd5zAut3VcHS5AQBThyRiScEwTBykl7g6IiKSgyndfShf1rbirMOFSI1S4orkiwHlCjXZ7Hh55wn8s/gUOp2eYDJ5kB5LbxiGKdmJEldHRERykqGPRKouAg2WTuyrbsHUIUlSlyRbDCh91NLuwN8/q8SaXSfR4XABAHIz4/Gr64dj6pBECIIgcYVERCQ3giAgL0uPjQfqUVLZxIDyLRhQesly1olXP6vEP744CZu9CwAwNl2HpdcPw7XDkhlMiIjoW+VlJ3oCCvtQvhUDymVq63Ri9Rcn8cpnlWjr9ASTkalxWHr9MMwYkcJgQkREl2Vy93N59te0otPpQoSafSgXovD3CV0uFx577DFkZWUhMjISgwcPxpNPPgnRuzoZAFEUsXz5cqSmpiIyMhIFBQWoqKjwdyl+0W7vwgs7juOaZ3bg2a3laOvswnBDLF6+czw+eGAaCkYaGE6IiOiyZSdFIylGC0eXGwdrWqUuR7b8PoLy9NNP46WXXsKaNWswatQo7N27F3fffTd0Oh1++ctfAgCeeeYZrFq1CmvWrEFWVhYee+wxzJw5E0ePHkVERIS/S+qTsw4X/ll8Ei/vrERzuwMAMDg5Gg8VDMPsMalQKBhKiIio9wRBQF62Hv/+sgElVc3I44SKC/J7QNm1axfmzJmD2bNnAwAGDRqEN998E7t37wbgGT157rnn8Oijj2LOnDkAgNdffx0GgwEbN27E/Pnz/V1Sr3Q6XVhXUo0XPzmBMzY7AGBQYhQeLBiK740bACWDCRERXaEpWd6A0gRgqNTlyJLfb/FcffXV2L59O8rLywEABw8exOeff45Zs2YBAKqqqmAymVBQUOD7Gp1Oh7y8PBQVFV3wnHa7HVartccWCB9/Zca1f9qB//3gKM7Y7MjQR+JP3x+LbUuvxa256QwnRETkF95Rk9JTLb61s6gnv4+g/Pa3v4XVakVOTg6USiVcLhd+//vfY+HChQAAk8kEADAYej5q2mAw+F4734oVK/DEE0/4u9Rv0EdrYbbakaaLwAMzhuL7E9KhVvo9wxERUZgbmhIDfbQGze0OHKqzYMLABKlLkh2/f/q+8847WLt2LdatW4d9+/ZhzZo1+POf/4w1a9b0+ZzLli2DxWLxbTU1NX6s+GtXZcTj/901ETsevg4LJmcynBARUUAIgoBJgzyhxHObh87n90/ghx9+GL/97W8xf/58jBkzBj/84Q+xZMkSrFixAgBgNBoBAGazucfXmc1m32vn02q1iIuL67EFSsFIA7QqTvkiIqLAyute9r6kkuuhXIjfA0pHRwcUip6nVSqVcLs999iysrJgNBqxfft23+tWqxUlJSXIz8/3dzlERESylJftWQ9l78lmdLnYh3I+v/eg3HLLLfj973+PzMxMjBo1Cvv378ezzz6Le+65B4BnWOuhhx7CU089haFDh/qmGaelpWHu3Ln+LoeIiEiWcoxxiItQwdrZhSP1VozjE+978HtA+ctf/oLHHnsMv/jFL9DY2Ii0tDT89Kc/xfLly33HPPLII2hvb8eiRYvQ2tqKadOmYcuWLbJZA4WIiCjQlAoBk7P02HasEburmhlQziOI5y7xGiSsVit0Oh0sFktA+1GIiIgC6ZVPK/H7D4+hYEQK/t+PJkldTsD15vOb01SIiIgk4n0uz+6qZrjcQTdeEFAMKERERBIZlRaHGK2nD+UrU2AWIQ1WDChEREQSUSkVvkXaON24JwYUIiIiCXmnG3PBtp4YUIiIiCTkXbBtd1UzgnDeSsAwoBAREUlozAAdItVKtHQ4UdFok7oc2WBAISIikpBGpcD4gfEAgJJK3ubxYkAhIiKSmPc2T3EVG2W9GFCIiIgklte9HkpJJftQvBhQiIiIJDYuIx4alQJnbHZUnmmXuhxZYEAhIiKSWIRaidzuZ/Hs5m0eAAwoREREsvD1bR42ygIMKERERLKQl+1plC3heigAGFCIiIhkYXxmAtRKAQ2WTtQ0n5W6HMkxoBAREclApEaJsenxAIBiLnvPgEJERCQX5043DncMKERERDLh7UPZfZIjKAwoREREMjFhYAKUCgE1zWdR3xrefSgMKERERDIRo1VhdFocAKAkzPtQGFCIiIhkxDfdOMz7UBhQiIiIZMTXKBvmK8oyoBAREcnIxEF6CAJQdaYdjdZOqcuRDAMKERGRjOgi1RiZ6u1DCd9RFAYUIiIimZnsu80Tvo2yDChEREQyk5fFRlkGFCIiIpnxjqBUNNrQZLNLXI00GFCIiIhkRh+twXBDLABgd5j2oTCgEBERyVBednhPN2ZAISIikiFfHwoDChEREcnFpKwEAMBXJissHU6Jq+l/DChEREQylBIbgezkaIgisPtk+I2i+D2gDBo0CIIgfGNbvHgxAKCzsxOLFy9GYmIiYmJiUFhYCLPZ7O8yiIiIgt7X043Dbz0UvweUPXv2oKGhwbdt3boVAHDbbbcBAJYsWYLNmzfj3Xffxc6dO1FfX4958+b5uwwiIqKgNyWMG2VV/j5hcnJyj9//8Y9/xODBg3HttdfCYrHg1Vdfxbp16zB9+nQAwOrVqzFixAgUFxdjypQp/i6HiIgoaHnXQzlSb0FbpxOxEWqJK+o/Ae1BcTgceOONN3DPPfdAEASUlpbC6XSioKDAd0xOTg4yMzNRVFQUyFKIiIiCTqouEpn6KLhFYO+pFqnL6VcBDSgbN25Ea2srfvzjHwMATCYTNBoN4uPjexxnMBhgMpkueh673Q6r1dpjIyIiCgd53ufyhNmy9wENKK+++ipmzZqFtLS0KzrPihUroNPpfFtGRoafKiQiIpK3vGzveijh1SgbsIBy6tQpbNu2DT/5yU98+4xGIxwOB1pbW3scazabYTQaL3quZcuWwWKx+LaamppAlU1ERCQr3hGUQ7UWdDi6JK6m/wQsoKxevRopKSmYPXu2b9+ECROgVquxfft2376ysjJUV1cjPz//oufSarWIi4vrsREREYWDDH0UBsRHosstojSM+lACElDcbjdWr16NH/3oR1Cpvp4opNPpcO+992Lp0qXYsWMHSktLcffddyM/P58zeIiIiC7CO4oSTg8O9Ps0YwDYtm0bqqurcc8993zjtZUrV0KhUKCwsBB2ux0zZ87Eiy++GIgyiIiIQsLkLD3W768Lq0ZZQRRFUeoiestqtUKn08FisfB2DxERhbyqM+347p8/gUapwJf/cwMi1EqpS+qT3nx+81k8REREMjcoMQopsVo4XG7sr26Vupx+wYBCREQkc4IghN10YwYUIiKiIBBuC7YxoBAREQUB74MD91W3wNHllriawGNAISIiCgKDk2OQGK2BvcuNL2tbpS4n4BhQiIiIgoAgCL6nG5eEwXooDChERERBwtuHUlwZ+o2yDChERERBwjuTp/RUC5yu0O5DYUAhIiIKEsMNsYiPUqPD4cLhOovU5QQUAwoREVGQUCgETBoUHs/lYUAhIiIKInlh0ijLgEJERBRE8rI8fSh7qprhcgfd4/QuGwMKERFREBmZFodYrQpt9i4ca7BKXU7AMKAQEREFEaVCwMRBCQBCe7oxAwoREVGQ8U43DuVGWQYUIiKiIONdUXb3yWa4Q7QPhQGFiIgoyIwZoEOURonWDifKG9ukLicgGFCIiIiCjFqpwISBnj6UksrQvM3DgEJERBSEvl4PJTQbZRlQiIiIgtC5jbKiGHp9KAwoREREQWhsug5alQJnbA6cON0udTl+x4BCREQUhLQqJXIz4wGE5m0eBhQiIqIg5V32PhQbZRlQiIiIglRe9teNsqHWh8KAQkREFKTGZyZAo1TAbLXjVFOH1OX4FQMKERFRkIpQKzEuQwcg9PpQGFCIiIiCmK8PJcSey8OAQkREFMS8z+UJtUZZBhQiIqIgNmFgApQKAXWtZ1HbEjp9KAwoREREQSxaq8KYAd19KCE0isKAQkREFOTOnW4cKhhQiIiIgtyUEGyUDUhAqaurw5133onExERERkZizJgx2Lt3r+91URSxfPlypKamIjIyEgUFBaioqAhEKURERCFvwqAEKATgVFMHTJZOqcvxC78HlJaWFkydOhVqtRr/+c9/cPToUfzf//0fEhISfMc888wzWLVqFV5++WWUlJQgOjoaM2fORGdnaPxPJSIi6k9xEWqMTIsDEDq3eVT+PuHTTz+NjIwMrF692rcvKyvL92tRFPHcc8/h0UcfxZw5cwAAr7/+OgwGAzZu3Ij58+f7uyQiIqKQl5eViMN1VpRUNWPOVQOkLueK+X0EZdOmTZg4cSJuu+02pKSkIDc3F6+88orv9aqqKphMJhQUFPj26XQ65OXloaio6ILntNvtsFqtPTYiIiL6Wp5vPZTQGEHxe0CprKzESy+9hKFDh+Kjjz7Cz3/+c/zyl7/EmjVrAAAmkwkAYDAYenydwWDwvXa+FStWQKfT+baMjAx/l01ERBTUJmfpIQjAidPtON1ml7qcK+b3gOJ2uzF+/Hj84Q9/QG5uLhYtWoT77rsPL7/8cp/PuWzZMlgsFt9WU1Pjx4qJiIiCX3yUBsMNsQCA3SEwm8fvASU1NRUjR47ssW/EiBGorq4GABiNRgCA2WzucYzZbPa9dj6tVou4uLgeGxEREfXkvc2zOwQaZf0eUKZOnYqysrIe+8rLyzFw4EAAnoZZo9GI7du3+163Wq0oKSlBfn6+v8shIiIKG3nZobMeit8DypIlS1BcXIw//OEPOH78ONatW4e///3vWLx4MQBAEAQ89NBDeOqpp7Bp0yYcOnQId911F9LS0jB37lx/l0NERBQ2vA8O/MrUhpZ2h8TVXBm/B5RJkyZhw4YNePPNNzF69Gg8+eSTeO6557Bw4ULfMY888ggeeOABLFq0CJMmTYLNZsOWLVsQERHh73KIiIjCRlKMFkNSYgAAu08G9yiKIIqiKHURvWW1WqHT6WCxWNiPQkREdI7fbTiEtSXVuGdqFpbfMvLSX9CPevP5zWfxEBERhRBvH8ruk8HdKMuAQkREFEK8M3mO1lth7XRKXE3fMaAQERGFEENcBAYlRsEtAnuDuA+FAYWIiCjE5GV1TzeuZEAhIiIimcjL9tzmKQ7i9VAYUIiIiEKMt1H2cJ0FNnuXxNX0DQMKERFRiBkQH4n0hEi43CL2nWqRupw+YUAhIiIKQd5VZUuC9Lk8DChEREQhaEqQN8oyoBAREYUgb6PswdpWnHW4JK6m9xhQiIiIQlCmPgrGuAg4XSL2VwdfHwoDChERUQgSBCGopxszoBAREYUo74Jtu4OwUZYBhYiIKER5Z/Lsr26FvSu4+lAYUIiIiELU4ORoJMVoYe9y42CNRepyeoUBhYiIKEQJguB7unFJZXDd5mFAISIiCmHeRtmSIGuUZUAhIiIKYd5G2dJTLXC63BJXc/kYUIiIiELY0JQYxEepcdbpwqG64OlDYUAhIiIKYQqFgMmDvH0owXObhwGFiIgoxOVldz+XJ4jWQ2FAISIiCnHemTx7T7agK0j6UBhQiIiIQtyI1DjERqhgs3fhaINV6nIuCwMKERFRiFMGYR8KAwoREVEY8C57HyzroTCgEBERhQFvo+yek81wu0WJq7k0BhQiIqIwMDotDtEaJSxnnfjK1CZ1OZfEgEJERBQGVEoFJnj7UIJgujEDChERUZj4+sGB8u9DYUAhIiIKE1O6Hxy4+2QzRFHefSgMKERERGFizIB4RKgVaG534HijTepyvhUDChERUZjQqBQYn5kAACiW+XRjvweU//mf/4EgCD22nJwc3+udnZ1YvHgxEhMTERMTg8LCQpjNZn+XQURERBeQl9X9XJ5KeTfKBmQEZdSoUWhoaPBtn3/+ue+1JUuWYPPmzXj33Xexc+dO1NfXY968eYEog4iIiM6Tl/31gm1y7kNRBeSkKhWMRuM39lssFrz66qtYt24dpk+fDgBYvXo1RowYgeLiYkyZMiUQ5RAREVG3qzLioVEpcLrNjqoz7chOjpG6pAsKyAhKRUUF0tLSkJ2djYULF6K6uhoAUFpaCqfTiYKCAt+xOTk5yMzMRFFR0UXPZ7fbYbVae2xERETUexFqJa7KiAcA7JZxH4rfA0peXh5ee+01bNmyBS+99BKqqqpwzTXXoK2tDSaTCRqNBvHx8T2+xmAwwGQyXfScK1asgE6n820ZGRn+LpuIiChs5AXBc3n8fotn1qxZvl+PHTsWeXl5GDhwIN555x1ERkb26ZzLli3D0qVLfb+3Wq0MKURERH2Ul5WIv+A4SiqbIIoiBEGQuqRvCPg04/j4eAwbNgzHjx+H0WiEw+FAa2trj2PMZvMFe1a8tFot4uLiemxERETUN+MHxkOlEFBv6URty1mpy7mggAcUm82GEydOIDU1FRMmTIBarcb27dt9r5eVlaG6uhr5+fmBLoWIiIgARGlUGJuuAwAUy3S6sd8Dyq9//Wvs3LkTJ0+exK5du3DrrbdCqVRiwYIF0Ol0uPfee7F06VLs2LEDpaWluPvuu5Gfn88ZPERERP0oL7t7PRSZ9qH4vQeltrYWCxYsQFNTE5KTkzFt2jQUFxcjOTkZALBy5UooFAoUFhbCbrdj5syZePHFF/1dBhEREX2LyVl6vPTJCdnO5BFEOa/SchFWqxU6nQ4Wi4X9KERERH3Q1unEuCf+C7cIFC2bjlRd3yay9EZvPr/5LB4iIqIwFBuhxugBnj6Ukkr5jaIwoBAREYWpr9dDkV+jLAMKERFRmPr6wYEcQSEiIiKZmDRID0EAKs+0o9HaKXU5PTCgEBERhSldlBo5Rk+z6u6T8hpFYUAhIiIKY74+FJnd5mFAISIiCmNTsuXZKMuAQkREFMYmdzfKlpttaG53SFzN1xhQiIiIwpg+WoNhhhgAwG4ZjaIwoBAREYU573TjYhn1oTCgEBERhbnJ3Y2ycnouDwMKERFRmMvrbpQ9ZrLC0uGUuBoPBhQiIqIwlxIbgeykaIgisEcm66EwoBAREZFvFEUu040ZUIiIiOjr5/LIpA+FAYWIiIh8jbKH6yyw2bskroYBhYiIiACkxUciQx8JtwjslUEfCgMKERERAZDXbR4GFCIiIgJw7oMDpW+UZUAhIiIiAMCUbM8Iype1FnQ4pO1DYUAhIiIiAEB6QiTSdBHocovYd6pV0loYUIiIiAgAIAjCOcveS3ubhwGFiIiIfPK6b/MUS9woq5L0TyciIiJZmTo4CXfkZWLakCRJ62BAISIiIp/MxCj84dYxUpfBWzxEREQkPwwoREREJDsMKERERCQ7DChEREQkOwwoREREJDsMKERERCQ7AQ8of/zjHyEIAh566CHfvs7OTixevBiJiYmIiYlBYWEhzGZzoEshIiKiIBHQgLJnzx787W9/w9ixY3vsX7JkCTZv3ox3330XO3fuRH19PebNmxfIUoiIiCiIBCyg2Gw2LFy4EK+88goSEhJ8+y0WC1599VU8++yzmD59OiZMmIDVq1dj165dKC4uDlQ5REREFEQCFlAWL16M2bNno6CgoMf+0tJSOJ3OHvtzcnKQmZmJoqKiC57LbrfDarX22IiIiCh0BWSp+7feegv79u3Dnj17vvGayWSCRqNBfHx8j/0GgwEmk+mC51uxYgWeeOKJQJRKREREMuT3EZSamho8+OCDWLt2LSIiIvxyzmXLlsFisfi2mpoav5yXiIiI5MnvAaW0tBSNjY0YP348VCoVVCoVdu7ciVWrVkGlUsFgMMDhcKC1tbXH15nNZhiNxgueU6vVIi4ursdGREREocvvt3hmzJiBQ4cO9dh39913IycnB7/5zW+QkZEBtVqN7du3o7CwEABQVlaG6upq5OfnX9afIYoiALAXhYiIKIh4P7e9n+Pfxu8BJTY2FqNHj+6xLzo6GomJib799957L5YuXQq9Xo+4uDg88MADyM/Px5QpUy7rz2hrawMAZGRk+Ld4IiIiCri2tjbodLpvPSYgTbKXsnLlSigUChQWFsJut2PmzJl48cUXL/vr09LSUFNTg9jYWAiCEMBKg5fVakVGRgZqamp4S0wGeD3khddDXng95CdQ10QURbS1tSEtLe2Sxwri5YyzUNCxWq3Q6XSwWCz8hpcBXg954fWQF14P+ZHDNeGzeIiIiEh2GFCIiIhIdhhQQpRWq8Xjjz8OrVYrdSkEXg+54fWQF14P+ZHDNWEPChEREckOR1CIiIhIdhhQiIiISHYYUIiIiEh2GFCIiIhIdhhQgsinn36KW265BWlpaRAEARs3buzxutlsxo9//GOkpaUhKioKN954IyoqKnocYzKZ8MMf/hBGoxHR0dEYP3483n///X58F6FjxYoVmDRpEmJjY5GSkoK5c+eirKysxzGdnZ1YvHgxEhMTERMTg8LCQpjN5h7HVFdXY/bs2YiKikJKSgoefvhhdHV19edbCQn+uB4HDx7EggULkJGRgcjISIwYMQLPP/98f7+VkOCv7w+vpqYmpKenQxCEbzxsli7Nn9fjtddew9ixYxEREYGUlBQsXrw4IDUzoASR9vZ2jBs3Di+88MI3XhNFEXPnzkVlZSX+9a9/Yf/+/Rg4cCAKCgrQ3t7uO+6uu+5CWVkZNm3ahEOHDmHevHm4/fbbsX///v58KyFh586dWLx4MYqLi7F161Y4nU7ccMMNPf5/L1myBJs3b8a7776LnTt3or6+HvPmzfO97nK5MHv2bDgcDuzatQtr1qzBa6+9huXLl0vxloKaP65HaWkpUlJS8MYbb+DIkSP43e9+h2XLluGvf/2rFG8pqPnjepzr3nvvxdixY/ur/JDjr+vx7LPP4ne/+x1++9vf4siRI9i2bRtmzpwZmKJFCkoAxA0bNvh+X1ZWJgIQDx8+7NvncrnE5ORk8ZVXXvHti46OFl9//fUe59Lr9T2Oob5pbGwUAYg7d+4URVEUW1tbRbVaLb777ru+Y44dOyYCEIuKikRRFMUPP/xQVCgUoslk8h3z0ksviXFxcaLdbu/fNxBi+nI9LuQXv/iF+N3vfjfg9Ya6K7keL774onjttdeK27dvFwGILS0t/Vl6SOrL9WhubhYjIyPFbdu29UuNHEEJEXa7HQAQERHh26dQKKDVavH555/79l199dV4++230dzcDLfbjbfeegudnZ247rrr+rvkkGOxWAAAer0egOdf406nEwUFBb5jcnJykJmZiaKiIgBAUVERxowZA4PB4Dtm5syZsFqtOHLkSD9WH3r6cj0udh7vOajv+no9jh49iv/93//F66+/DoWCH1n+0pfrsXXrVrjdbtTV1WHEiBFIT0/H7bffjpqamoDUyKsdIrx/kZYtW4aWlhY4HA48/fTTqK2tRUNDg++4d955B06nE4mJidBqtfjpT3+KDRs2YMiQIRJWH/zcbjceeughTJ06FaNHjwbg6ffRaDSIj4/vcazBYIDJZPIdc2448b7ufY36pq/X43y7du3C22+/jUWLFgW65JDW1+tht9uxYMEC/OlPf0JmZmZ/lx2y+no9Kisr4Xa78Yc//AHPPfcc3nvvPTQ3N+P666+Hw+Hwe50qv5+RJKFWq7F+/Xrce++90Ov1UCqVKCgowKxZsyCes1jwY489htbWVmzbtg1JSUnYuHEjbr/9dnz22WcYM2aMhO8guC1evBiHDx/uMVpF0vHH9Th8+DDmzJmDxx9/HDfccIMfqws/fb0ey5Ytw4gRI3DnnXcGqLLw1Nfr4Xa74XQ6sWrVKt/3xJtvvgmj0YgdO3b4vReFIyghZMKECThw4ABaW1vR0NCALVu2oKmpCdnZ2QCAEydO4K9//Sv+8Y9/YMaMGRg3bhwef/xxTJw48YKNt3R57r//fnzwwQfYsWMH0tPTffuNRiMcDsc3ZhyYzWYYjUbfMed3yXt/7z2GeudKrofX0aNHMWPGDCxatAiPPvpof5Qdsq7kenz88cd49913oVKpoFKpMGPGDABAUlISHn/88X57D6HkSq5HamoqAGDkyJG+15OTk5GUlITq6mr/F9svnS7kdzivSfZCysvLRYVCIX700UeiKIril19+KQIQjx492uO4G264QbzvvvsCVWrIcrvd4uLFi8W0tDSxvLz8G697m87ee+89376vvvrqgk2yZrPZd8zf/vY3MS4uTuzs7Az8mwgh/rgeoiiKhw8fFlNSUsSHH364X+oOVf64HsePHxcPHTrk2/7xj3+IAMRdu3b1+J6hS/PH9fBOxji3SbapqanH54w/MaAEkba2NnH//v3i/v37RQDis88+K+7fv188deqUKIqi+M4774g7duwQT5w4IW7cuFEcOHCgOG/ePN/XOxwOcciQIeI111wjlpSUiMePHxf//Oc/i4IgiP/+97+leltB6+c//7mo0+nETz75RGxoaPBtHR0dvmN+9rOfiZmZmeLHH38s7t27V8zPzxfz8/N9r3d1dYmjR48Wb7jhBvHAgQPili1bxOTkZHHZsmVSvKWg5o/rcejQITE5OVm88847e5yjsbFRircU1PxxPc63Y8cOzuLpI39djzlz5oijRo0Sv/jiC/HQoUPizTffLI4cOVJ0OBx+r5kBJYh4vznP3370ox+JoiiKzz//vJieni6q1WoxMzNTfPTRR78xVbW8vFycN2+emJKSIkZFRYljx479xrRjujwXuhYAxNWrV/uOOXv2rPiLX/xCTEhIEKOiosRbb71VbGho6HGekydPirNmzRIjIyPFpKQk8Ve/+pXodDr7+d0EP39cj8cff/yC5xg4cGD/v6Eg56/vj3MxoPSdv66HxWIR77nnHjE+Pl7U6/XirbfeKlZXVwekZqG7cCIiIiLZYJMsERERyQ4DChEREckOAwoRERHJDgMKERERyQ4DChEREckOAwoRERHJDgMKERERyQ4DChEREckOAwoRERHJDgMKERERyQ4DChEREckOAwoRERHJzv8PmHiZvvkb5iMAAAAASUVORK5CYII=",
+      "text/plain": [
+       "<Figure size 640x480 with 1 Axes>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "with h5py.File(fnm, \"r\") as h5r:\n",
+    "    # src = \"/InGaN_nanowires_spectra/InGaN nanowires/Area 1/Full Area 1\"\n",
+    "    src = \"/VInP/VInP_108_L2/Area 10/LineScan 1/\"\n",
+    "    if f\"{src}/LSD\" in h5r.keys():\n",
+    "        # for key, val in enumerate(h5r[f\"{src}/LSD\"].attrs.items()):\n",
+    "        #     print(f\"{key}, {val}\")\n",
+    "        tmp = np.asarray(h5r[f\"{src}/LSD\"][0])\n",
+    "        print(f\"{type(tmp)}, {np.shape(tmp)}, {tmp.dtype}\")\n",
+    "        for idx in np.arange(0, 2):\n",
+    "            # src/ROIs/<element Xray line> is the integral\n",
+    "            print(f\"{idx}\\t\\tIn L\\t\\t{np.sum(tmp[idx,323:335 + 1])}\")\n",
+    "            print(f\"{idx}\\t\\tK K\\t\\t{np.sum(tmp[idx,326:337 + 1])}\")\n",
+    "            print(f\"{idx}\\t\\tP K\\t\\t{np.sum(tmp[idx,197:206 + 1])}\")\n",
+    "        # plt.plot(np.arange(323, 335 + 1), tmp[0,323:335 + 1])\n",
+    "        plt.plot(np.arange(197, 206 + 1), tmp[0,197:206 + 1])\n",
+    "    # for idx, val in enumerate(tmp.dtype.names):\n",
+    "    #     print(f\"{idx}, {val}, {tmp[val][0]}\")\n",
+    "\n",
+    "    \"\"\"\n",
+    "    if f\"{src}/SPC\" in h5r.keys():\n",
+    "        spc = np.asarray(h5r[f\"{src}/SPC\"])\n",
+    "    # print(f\"{type(spc)}, {np.shape(spc)}, {spc.dtype}\")\n",
+    "    reqs = [\"eVOffset\", \"evPch\"]  # , \"evPerChannel\", \"DeadTime\", \"CountRate\"]\n",
+    "    for req in reqs:  # \"\"SpectrumCounts\", \"\n",
+    "        if req in spc.dtype.names:\n",
+    "            print(f\"{req}, {spc[req][0]}\")\n",
+    "        else:\n",
+    "            raise ValueError(f\"Unable to find metadata entry {req}!\")\n",
+    "    # for idx, val in enumerate(spc.dtype.names):\n",
+    "    #     print(f\"{idx}, {val}, {spc[val][0]}\")\n",
+    "    print(\"DataStart\" in spc.dtype.names)\n",
+    "    print(f\"{type(spc['SpectrumCounts'][0])}, {np.shape(spc['SpectrumCounts'][0])}, {spc['SpectrumCounts'][0].dtype}\")  # [0])\n",
+    "    \"\"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "id": "7b58972c-dcd3-45ea-9fae-36c81de1ee9e",
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "NameError",
+     "evalue": "name 'dat' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[35], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m plt\u001b[38;5;241m.\u001b[39mplot(\u001b[43mdat\u001b[49m[\u001b[38;5;241m0\u001b[39m, :])\n",
+      "\u001b[0;31mNameError\u001b[0m: name 'dat' is not defined"
+     ]
+    }
+   ],
+   "source": [
+    "plt.plot(dat[0, :])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "441aaf8f-88df-47ea-9516-44f9666d717b",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dc341bf3-fefa-4a69-84d5-5abe576f2b29",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
   {
    "cell_type": "code",
    "execution_count": 24,
    "id": "f0a7f9ac-1ade-43d7-aedd-b2572d163b34",
-   "metadata": {
-    "jupyter": {
-     "source_hidden": true
-    }
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "\n",
diff --git a/pynxtools/dataconverter/readers/em/concepts/nxs_image_r_set.py b/pynxtools/dataconverter/readers/em/concepts/nxs_image_r_set.py
index 4a2ca2e58..da50a2101 100644
--- a/pynxtools/dataconverter/readers/em/concepts/nxs_image_r_set.py
+++ b/pynxtools/dataconverter/readers/em/concepts/nxs_image_r_set.py
@@ -65,9 +65,9 @@ def __init__(self):
         self.tmp: Dict = {}
         self.tmp["source"] = None
         for entry in NX_IMAGE_REAL_SPACE_SET_HDF_PATH:
-            if entry.endswith("-field") is True:
+            if entry.endswith("-field"):
                 self.tmp[entry[0:len(entry) - len("-field")]] = NxObject(eqv_hdf="dataset")
-            elif entry.endswith("-attribute") is True:
+            elif entry.endswith("-attribute"):
                 self.tmp[entry[0:len(entry) - len("-attribute")]] = NxObject(eqv_hdf="attribute")
             else:
                 self.tmp[entry[0:len(entry) - len("-group")]] = NxObject(eqv_hdf="group")
diff --git a/pynxtools/dataconverter/readers/em/concepts/nxs_object.py b/pynxtools/dataconverter/readers/em/concepts/nxs_object.py
index 696be9a86..93c6f882f 100644
--- a/pynxtools/dataconverter/readers/em/concepts/nxs_object.py
+++ b/pynxtools/dataconverter/readers/em/concepts/nxs_object.py
@@ -28,7 +28,7 @@ class NxObject:
     def __init__(self,
                  name: str = None,
                  unit: str = None,
-                 dtype=str,
+                 dtype=None,
                  value=None,
                  **kwargs):
         if (name is not None) and (name == ""):
@@ -44,10 +44,9 @@ def __init__(self,
         # use special values "unitless" for NX_UNITLESS (e.g. 1) and
         # "dimensionless" for NX_DIMENSIONLESS (e.g. 1m / 1m)
         self.dtype = dtype  # use np.dtype if possible
-        if value is None or dtype is str:
+        if value is None or isinstance(dtype, str):
             self.unit = "unitless"
-        if value is not None:
-            self.value = value
+        self.value = value
         # value should be a numpy scalar, tensor, or string if possible
         self.eqv_hdf = None
         if "eqv_hdf" in kwargs:
@@ -58,4 +57,4 @@ def __init__(self,
 
     def __repr__(self):
         """Report values."""
-        return f"Name: {self.name}, unit: {self.unit}, dtype: {self.dtype}, eqv_hdf: {self.eqv_hdf}"
+        return f"Name: {self.name}, unit: {self.unit}, dtype: {self.dtype}, value: {self.value}, eqv_hdf: {self.eqv_hdf}"
diff --git a/pynxtools/dataconverter/readers/em/concepts/nxs_spectrum_set.py b/pynxtools/dataconverter/readers/em/concepts/nxs_spectrum_set.py
index 0e09e4ec9..d7be722a3 100644
--- a/pynxtools/dataconverter/readers/em/concepts/nxs_spectrum_set.py
+++ b/pynxtools/dataconverter/readers/em/concepts/nxs_spectrum_set.py
@@ -34,19 +34,16 @@
                                   "PROCESS/mode-field",
                                   "PROCESS/PROGRAM-group",
                                   "PROCESS/source-group",
-                                  "spectrum_zerod-group",
                                   "spectrum_zerod/axis_energy-field",
                                   "spectrum_zerod/axis_energy@long_name-attribute",
                                   "spectrum_zerod/intensity-field",
                                   "spectrum_zerod/intensity@long_name-attribute",
-                                  "spectrum_oned-group",
                                   "spectrum_oned/axis_energy-field",
                                   "spectrum_oned/axis_energy@long_name-attribute",
                                   "spectrum_oned/axis_x-field",
                                   "spectrum_oned/axis_x@long_name-attribute",
                                   "spectrum_oned/intensity-field",
                                   "spectrum_oned/intensity@long_name-attribute",
-                                  "spectrum_threed-group",
                                   "spectrum_threed/axis_energy-field",
                                   "spectrum_threed/axis_energy@long_name-attribute",
                                   "spectrum_threed/axis_x-field",
@@ -57,7 +54,6 @@
                                   "spectrum_threed/axis_z@long_name-attribute",
                                   "spectrum_threed/intensity-field",
                                   "spectrum_threed/intensity@long_name-attribute",
-                                  "spectrum_twod-group",
                                   "spectrum_twod/axis_energy-field",
                                   "spectrum_twod/axis_energy@long_name-attribute",
                                   "spectrum_twod/axis_x-field",
diff --git a/pynxtools/dataconverter/readers/em/reader.py b/pynxtools/dataconverter/readers/em/reader.py
index 4ba4686ef..5a4f2b0b3 100644
--- a/pynxtools/dataconverter/readers/em/reader.py
+++ b/pynxtools/dataconverter/readers/em/reader.py
@@ -23,9 +23,9 @@
 
 from pynxtools.dataconverter.readers.base.reader import BaseReader
 # from pynxtools.dataconverter.readers.em.concepts.nxs_concepts import NxEmAppDef
-# from pynxtools.dataconverter.readers.em.subparsers.nxs_mtex import NxEmNxsMTexSubParser
-# from pynxtools.dataconverter.readers.em.subparsers.nxs_pyxem import NxEmNxsPyxemSubParser
-# from pynxtools.dataconverter.readers.em.subparsers.nxs_imgs import NxEmImagesSubParser
+from pynxtools.dataconverter.readers.em.subparsers.nxs_mtex import NxEmNxsMTexSubParser
+from pynxtools.dataconverter.readers.em.subparsers.nxs_pyxem import NxEmNxsPyxemSubParser
+from pynxtools.dataconverter.readers.em.subparsers.nxs_imgs import NxEmImagesSubParser
 from pynxtools.dataconverter.readers.em.subparsers.nxs_nion import NxEmZippedNionProjectSubParser
 from pynxtools.dataconverter.readers.em.utils.default_plots import NxEmDefaultPlotResolver
 # from pynxtools.dataconverter.readers.em.geometry.convention_mapper import NxEmConventionMapper
@@ -119,9 +119,9 @@ def read(self,
 
         # add further with resolving cases
         # if file_path is an HDF5 will use hfive parser
-        # sub_parser = "nxs_pyxem"
-        # subparser = NxEmNxsPyxemSubParser(entry_id, file_paths[0])
-        # subparser.parse(template)
+        sub_parser = "nxs_pyxem"
+        subparser = NxEmNxsPyxemSubParser(entry_id, file_paths[0])
+        subparser.parse(template)
         # TODO::check correct loop through!
 
         # sub_parser = "image_tiff"
@@ -129,8 +129,8 @@ def read(self,
         # subparser.parse(template)
 
         # sub_parser = "zipped_nion_project"
-        subparser = NxEmZippedNionProjectSubParser(entry_id, file_paths[0])
-        subparser.parse(template, verbose=True)
+        # subparser = NxEmZippedNionProjectSubParser(entry_id, file_paths[0])
+        # subparser.parse(template, verbose=True)
 
         # for dat_instance in case.dat_parser_type:
         #     print(f"Process pieces of information in {dat_instance} tech partner file...")
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
index 6539417ea..b210365d2 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
@@ -54,24 +54,39 @@ def init_support(self):
         """Init supported versions."""
         self.supported_version["tech_partner"] = ["EDAX, LLC"]
         self.supported_version["schema_name"] = ["EDAXH5"]
-        self.supported_version["schema_version"] = ["2.5.1001.0001"]
+        self.supported_version["schema_version"] = ["2.1.0009.0001",
+                                                    "2.2.0001.0001",
+                                                    "2.5.1001.0001"]
         self.supported_version["writer_name"] = ["APEX"]
-        self.supported_version["writer_version"] = ["2.5.1001.0001"]
+        self.supported_version["writer_version"] = ["2.1.0009.0001",
+                                                    "2.2.0001.0001",
+                                                    "2.5.1001.0001"]
 
     def check_if_supported(self):
         """Check if instance matches all constraints to qualify as supported H5OINA"""
         self.supported = 0  # voting-based
         with h5py.File(self.file_path, "r") as h5r:
-            # parse Company and PRODUCT_VERSION attribute values from the first group below / but these are not scalar but single value lists
+            # parse Company and PRODUCT_VERSION attribute values from the first group below
+            # but these are not scalar but single value lists
             # so much about interoperability
-            # but hehe for the APEX example from Sebastian and Sabine there is again no Company but PRODUCT_VERSION, 2 files, 2 "formats"
+            # but hehe for the APEX example from Sebastian and Sabine
+            # there is again no Company but PRODUCT_VERSION, 2 files, 2 "formats"
             grp_names = list(h5r["/"])
             if len(grp_names) == 1:
-                if read_strings_from_dataset(h5r[grp_names[0]].attrs["Company"][0]) in self.supported_version["tech_partner"]:
-                    self.supported += 1
-                if read_strings_from_dataset(h5r[grp_names[0]].attrs["PRODUCT_VERSION"][0]) in self.supported_version["schema_version"]:
-                    self.supported += 1
-            if self.supported == 2:
+                if "Company" in h5r[grp_names[0]].attrs:
+                    if read_strings_from_dataset(
+                        h5r[grp_names[0]].attrs["Company"][0]) \
+                            in self.supported_version["tech_partner"]:
+                        self.supported += 1
+                if "PRODUCT_VERSION" in h5r[grp_names[0]].attrs:
+                    if read_strings_from_dataset(
+                        h5r[grp_names[0]].attrs["PRODUCT_VERSION"][0]) \
+                            in self.supported_version["schema_version"]:
+                        self.supported += 1
+            if self.supported >= 1:
+                # this is not as strict because IKZ example does not contain Company EDAX, LLC
+                # but what if there are HDF5 files whose PRODUCT_VERSION is one of Apex but the file
+                # is not an APEX file, in this case be behavior is undefined but likely will fail
                 self.version = self.supported_version.copy()
                 self.supported = True
             else:
@@ -89,7 +104,7 @@ def parse_and_normalize(self):
                     for sub_sub_grp_nm in sub_sub_grp_nms:
                         if sub_sub_grp_nm.startswith("Area"):
                             # get field-of-view (fov in edax jargon, i.e. roi)
-                            if "/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}/FOVIMAGE" in h5r.keys():
+                            if f"/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}/FOVIMAGE" in h5r.keys():
                                 ckey = self.init_named_cache(f"roi{cache_id}")
                                 self.parse_and_normalize_eds_fov(
                                     h5r, f"/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}/FOVIMAGE", ckey)
@@ -105,6 +120,7 @@ def parse_and_normalize(self):
                                     self.parse_and_normalize_group_ebsd_header(h5r, ckey)
                                     self.parse_and_normalize_group_ebsd_phases(h5r, ckey)
                                     self.parse_and_normalize_group_ebsd_data(h5r, ckey)
+                                    self.parse_and_normalize_group_ebsd_complete(ckey)
                                     cache_id += 1
 
                                 # TODO: conceptually the content of the three
@@ -134,9 +150,6 @@ def parse_and_normalize(self):
                                     # and relative width/height of the sub-FOV
                                     # also supported in that Full Area has a region with (x,y) 0,0
                                     # and relative width/height 1./1.
-                                    self.prfx = f"/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}/{area_grp_nm}"
-                                    print(f"Parsing {self.prfx}")
-
                                     # SPC
                                     self.prfx = f"/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}/{area_grp_nm}"
                                     print(f"Parsing {self.prfx}")
@@ -197,19 +210,20 @@ def parse_and_normalize(self):
                                     # groups is the same TODO but maybe the physical ROI which they reference
                                     # respective differs (TODO:: LineScan refers to FOV that is in the parent of the group)
                                     ckey = self.init_named_cache(f"eds{cache_id}")
-                                    self.parse_and_normalize_eds_lsd(
+                                    self.parse_and_normalize_eds_line_lsd(
                                         h5r, f"/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}/{area_grp_nm}", ckey)
                                     cache_id += 1
 
                                     ckey = self.init_named_cache(f"eds_map{cache_id}")
-                                    self.parse_and_normalize_eds_rois(
+                                    self.parse_and_normalize_eds_line_rois(
                                         h5r, f"/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}/{area_grp_nm}", ckey)
                                     cache_id += 1
 
     def parse_and_normalize_group_ebsd_header(self, fp, ckey: str):
         # no official documentation yet from EDAX/APEX, deeply nested, chunking, virtual ds
         if f"{self.prfx}/EBSD/ANG/DATA/DATA" not in fp:
-            raise ValueError(f"Unable to parse {self.prfx}/EBSD/ANG/DATA/DATA !")
+            # raise ValueError(f"Unable to parse {self.prfx}/EBSD/ANG/DATA/DATA !")
+            return
 
         # for a regular tiling of R^2 with perfect hexagons
         n_pts = 0
@@ -243,7 +257,8 @@ def parse_and_normalize_group_ebsd_header(self, fp, ckey: str):
     def parse_and_normalize_group_ebsd_phases(self, fp, ckey: str):
         grp_name = f"{self.prfx}/EBSD/ANG/HEADER/Phase"
         if f"{grp_name}" not in fp:
-            raise ValueError(f"Unable to parse {grp_name} !")
+            # raise ValueError(f"Unable to parse {grp_name} !")
+            return
 
         # Phases, contains a subgroup for each phase where the name
         # of each subgroup is the index of the phase starting at 1.
@@ -311,7 +326,8 @@ def parse_and_normalize_group_ebsd_phases(self, fp, ckey: str):
     def parse_and_normalize_group_ebsd_data(self, fp, ckey: str):
         grp_name = f"{self.prfx}/EBSD/ANG/DATA/DATA"
         if f"{grp_name}" not in fp:
-            raise ValueError(f"Unable to parse {grp_name} !")
+            # raise ValueError(f"Unable to parse {grp_name} !")
+            return
 
         n_pts = self.tmp[ckey]["n_x"] * self.tmp[ckey]["n_y"]
         if np.shape(fp[f"{grp_name}"]) != (n_pts,) and n_pts > 0:
@@ -378,6 +394,11 @@ def parse_and_normalize_group_ebsd_data(self, fp, ckey: str):
         # almost two decades of commercialization of the technique now
         get_scan_point_coords(self.tmp[ckey])
 
+    def parse_and_normalize_group_ebsd_complete(ckey: str):
+        """Check if all relevant data for EBSD are available, if not clear the cache."""
+        # TODO::implement check and clearing procedure
+        pass
+
     def parse_and_normalize_eds_fov(self, fp, src: str, ckey: str):
         """Normalize and scale APEX-specific FOV/ROI image to NeXus."""
         reqs = ["FOVIMAGE", "FOVIMAGECOLLECTIONPARAMS", "FOVIPR"]
@@ -416,7 +437,7 @@ def parse_and_normalize_eds_fov(self, fp, src: str, ckey: str):
                 = f"Position along {dim} ({scan_unit[dim]})"
         for key, val in self.tmp[ckey].tmp.items():
             if key.startswith("image_twod"):
-                print(f"image_twod, key: {key}, val: {val}")
+                print(f"ckey: {ckey}, image_twod, key: {key}, val: {val}")
 
     def parse_and_normalize_eds_spc(self, fp, src: str, ckey: str):
         """Normalize and scale APEX-specific SPC (sum) spectrum to NeXus."""
@@ -426,31 +447,31 @@ def parse_and_normalize_eds_spc(self, fp, src: str, ckey: str):
             return
         if "NumberOfLines" in fp[f"{src}/SPC"].attrs.keys():
             return
-        reqs = ["eVOffset", "evPch", "NumberOfPoints", "SpectrumCount"]
+        reqs = ["eVOffset", "evPch", "NumberOfPoints", "SpectrumCounts"]
         for req in reqs:
-            if req not in fp[f"{src}/SPC"].attrs.keys():  # also check for shape
+            if req not in fp[f"{src}/SPC"].dtype.names:  # also check for shape
                 raise ValueError(f"Required attribute named {req} not found in {src}/SPC !")
 
         self.tmp[ckey] = NxSpectrumSet()
         self.tmp[ckey].tmp["source"] = f"{src}/SPC"
-        e_zero = fp[f"{src}/SPC"].attrs["eVOffset"][0]
-        e_delta = fp[f"{src}/SPC"].attrs["eVPCh"][0]
-        e_n = fp[f"{src}/SPC"].attrs["NumberOfPoints"][0]
+        e_zero = fp[f"{src}/SPC"]["eVOffset"][0]
+        e_delta = fp[f"{src}/SPC"]["evPch"][0]
+        e_n = fp[f"{src}/SPC"]["NumberOfPoints"][0]
         self.tmp[ckey].tmp["spectrum_zerod/axis_energy"].value \
             = e_zero + np.asarray(e_delta * np.linspace(0.,
                                                         int(e_n) - 1,
                                                         num=int(e_n),
                                                         endpoint=True),
-                                  e_zero.dtype)
+                                  e_zero.dtype) / 1000.  # eV to keV
         self.tmp[ckey].tmp["spectrum_zerod/axis_energy@long_name"].value \
             = "Energy (eV)"
         self.tmp[ckey].tmp["spectrum_zerod/intensity"].value \
-            = np.asarray(fp[f"{src}/SPC"].attrs["SpectrumCount"][0], np.int32)
+            = np.asarray(fp[f"{src}/SPC"]["SpectrumCounts"][0], np.int32)
         self.tmp[ckey].tmp["spectrum_zerod/intensity@long_name"].value \
             = f"Count (1)"
         for key, val in self.tmp[ckey].tmp.items():
             if key.startswith("spectrum_zerod"):
-                print(f"spectrum_zerod, key: {key}, val: {val}")
+                print(f"ckey: {ckey}, spectrum_zerod, key: {key}, val: {val}")
 
     def parse_and_normalize_eds_spd(self, fp, src: str, ckey: str):
         """Normalize and scale APEX-specific spectrum cuboid to NeXus."""
@@ -493,12 +514,17 @@ def parse_and_normalize_eds_spd(self, fp, src: str, ckey: str):
                 idx += chk_info[f"c{dim}"]
         for key, val in chk_bnds.items():
             print(f"{key}, {val}")
-        spd_chk = np.zeros((nyxe["y"], nyxe["x"], nyxe["e"]), fp[f"{src}/SPD"].dtype)
+        spd_chk = np.zeros((nyxe["y"], nyxe["x"], nyxe["e"]), fp[f"{src}/SPD"][0, 0][0].dtype)
         print(f"edax: {np.shape(spd_chk)}, {type(spd_chk)}, {spd_chk.dtype}")
+        print("WARNING::Currently the parsing of the SPD is switched off for debugging but works!")
+        return
         for chk_bnd_y in chk_bnds["y"]:
             for chk_bnd_x in chk_bnds["x"]:
                 spd_chk[chk_bnd_y[0]:chk_bnd_y[1], chk_bnd_x[0]:chk_bnd_x[1], :] \
                     = fp[f"{src}/SPD"][chk_bnd_y[0]:chk_bnd_y[1], chk_bnd_x[0]:chk_bnd_x[1]]
+        for key, val in self.tmp[ckey].tmp.items():
+            if key.startswith("spectrum_oned"):
+                print(f"ckey: {ckey}, spectrum_threed, key: {key}, val: {val}")
         # compared to naive reading, thereby we read the chunks as they are arranged in memory
         # and thus do not discard unnecessarily data cached in the hfive chunk cache
         # by contrast, if we were to read naively for each pixel the energy array most of the
@@ -515,7 +541,7 @@ def parse_and_normalize_eds_spd(self, fp, src: str, ckey: str):
         # specification details the metadata, i.e. energy per channel, start and end
         # we do not use the SPD instance right now
 
-    def parse_and_normalize_eds_lsd(self, fp, src: str, ckey: str):
+    def parse_and_normalize_eds_line_lsd(self, fp, src: str, ckey: str):
         """Normalize and scale APEX-specific line scan with one spectrum each to NeXus."""
         # https://hyperspy.org/rosettasciio/_downloads/
         # c2e8b23d511a3c44fc30c69114e2873e/SpcMap-spd.file.format.pdf
@@ -585,8 +611,27 @@ def parse_and_normalize_eds_lsd(self, fp, src: str, ckey: str):
             = f"Count (1)"
         for key, val in self.tmp[ckey].tmp.items():
             if key.startswith("spectrum_oned"):
-                print(f"spectrum_oned, key: {key}, val: {val}")
+                print(f"ckey: {ckey}, spectrum_oned, key: {key}, val: {val}")
 
-    def parse_and_normalize_eds_rois(self, fp, src: str, ckey: str):
+    def parse_and_normalize_eds_line_rois(self, fp, src: str, ckey: str):
         """Normalize and scale APEX-specific EDS element emission line maps to NeXus."""
-
+        # source of the information
+        pass
+        """
+        "indexing/element_names-field",
+        "indexing/IMAGE_R_SET-group",
+        "indexing/IMAGE_R_SET/PROCESS-group",
+        "indexing/IMAGE_R_SET/PROCESS/peaks-field",
+        "indexing/IMAGE_R_SET/PROCESS/weights-field",
+        "indexing/PEAK-group",
+        "indexing/PEAK/ION-group",
+        "indexing/PEAK/ION/energy-field",
+        "indexing/PEAK/ION/energy_range-field",
+        "indexing/PEAK/ION/iupac_line_names-field",
+        "indexing/PROGRAM-group",
+        "indexing/summary-group",
+        "indexing/summary/axis_energy-field",
+        "indexing/summary/axis_energy@long_name-attribute",
+        "indexing/summary/intensity-field",
+        "indexing/summary/intensity@long_name-attribute"
+        """
diff --git a/pynxtools/dataconverter/readers/em/subparsers/nxs_nion.py b/pynxtools/dataconverter/readers/em/subparsers/nxs_nion.py
index 076e7adde..789e28a86 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/nxs_nion.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/nxs_nion.py
@@ -80,7 +80,7 @@ def configure(self):
 
     def check_if_zipped_nionswift_project_file(self, verbose=False):
         """Inspect the content of the compressed project file to check if supported."""
-        with open(self.file_path, 'rb', 0) as fp:
+        with open(self.file_path, "rb", 0) as fp:
             s = mmap.mmap(fp.fileno(), 0, access=mmap.ACCESS_READ)
             magic = s.read(8)
             if verbose is True:

From bc2dc04994d5adf7643cf13454b202f29a98f995 Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@physik.hu-berlin.de>
Date: Mon, 15 Jan 2024 18:15:41 +0100
Subject: [PATCH 72/84] Started writing of normalized content to template, fov
 and h5web, and xray line identification

---
 debug/spctrscpy.batch.sh                      |   4 +-
 .../em/concepts/nxs_em_eds_indexing.py        |  22 +-
 .../readers/em/concepts/nxs_object.py         |   4 +-
 .../readers/em/subparsers/hfive_apex.py       | 251 +++++++++++-------
 .../readers/em/subparsers/nxs_pyxem.py        |  42 ++-
 .../em/utils/get_xrayline_iupac_names.py      |  43 +++
 6 files changed, 258 insertions(+), 108 deletions(-)
 create mode 100644 pynxtools/dataconverter/readers/em/utils/get_xrayline_iupac_names.py

diff --git a/debug/spctrscpy.batch.sh b/debug/spctrscpy.batch.sh
index fd9c22bad..a82b4501b 100755
--- a/debug/spctrscpy.batch.sh
+++ b/debug/spctrscpy.batch.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 
 datasource="../../../../../paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/pdi/"
-datasource="../../../../../paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/ikz/"
+#datasource="../../../../../paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/ikz/"
 
 
 # apex examples ikz, pdi
@@ -9,7 +9,7 @@ datasource="../../../../../paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/
 examples="AlGaO.nxs"
 examples="GeSi.nxs"
 examples="VInP_108_L2.h5"
-#examples="InGaN_nanowires_spectra.edaxh5"
+examples="InGaN_nanowires_spectra.edaxh5"
 
 for example in $examples; do
 	echo $example
diff --git a/pynxtools/dataconverter/readers/em/concepts/nxs_em_eds_indexing.py b/pynxtools/dataconverter/readers/em/concepts/nxs_em_eds_indexing.py
index 0ab4bb3ec..73cb22cc1 100644
--- a/pynxtools/dataconverter/readers/em/concepts/nxs_em_eds_indexing.py
+++ b/pynxtools/dataconverter/readers/em/concepts/nxs_em_eds_indexing.py
@@ -22,23 +22,21 @@
 from pynxtools.dataconverter.readers.em.concepts.nxs_object import NxObject
 
 
-NX_EM_EDS_INDEXING_HDF_PATH = ["indexing-group",
-                               "indexing/element_names-field",
-                               "indexing/IMAGE_R_SET-group",
+NX_EM_EDS_INDEXING_HDF_PATH = ["indexing/element_names-field",
                                "indexing/IMAGE_R_SET/PROCESS-group",
                                "indexing/IMAGE_R_SET/PROCESS/peaks-field",
+                               "indexing/IMAGE_R_SET/description-field",
+                               "indexing/IMAGE_R_SET/iupac_line_candidates-field",
                                "indexing/IMAGE_R_SET/PROCESS/weights-field",
-                               "indexing/PEAK-group",
-                               "indexing/PEAK/ION-group",
+                               "indexing/IMAGE_R_SET/PROCESS/weights-field",
+                               "indexing/IMAGE_R_SET/image_twod/axis_x-field",
+                               "indexing/IMAGE_R_SET/image_twod/axis_x@long_name-attribute",
+                               "indexing/IMAGE_R_SET/image_twod/axis_y-field",
+                               "indexing/IMAGE_R_SET/image_twod/axis_y@long_name-attribute",
+                               "indexing/IMAGE_R_SET/image_twod/intensity-field",
                                "indexing/PEAK/ION/energy-field",
                                "indexing/PEAK/ION/energy_range-field",
-                               "indexing/PEAK/ION/iupac_line_names-field",
-                               "indexing/PROGRAM-group",
-                               "indexing/summary-group",
-                               "indexing/summary/axis_energy-field",
-                               "indexing/summary/axis_energy@long_name-attribute",
-                               "indexing/summary/intensity-field",
-                               "indexing/summary/intensity@long_name-attribute"]
+                               "indexing/PEAK/ION/iupac_line_names-field"]
 
 
 class NxEmEdsIndexing():
diff --git a/pynxtools/dataconverter/readers/em/concepts/nxs_object.py b/pynxtools/dataconverter/readers/em/concepts/nxs_object.py
index 93c6f882f..f8f601228 100644
--- a/pynxtools/dataconverter/readers/em/concepts/nxs_object.py
+++ b/pynxtools/dataconverter/readers/em/concepts/nxs_object.py
@@ -19,6 +19,8 @@
 
 # pylint: disable=no-member,too-few-public-methods
 
+import numpy as np
+
 from typing import Dict
 
 
@@ -57,4 +59,4 @@ def __init__(self,
 
     def __repr__(self):
         """Report values."""
-        return f"Name: {self.name}, unit: {self.unit}, dtype: {self.dtype}, value: {self.value}, eqv_hdf: {self.eqv_hdf}"
+        return f"Name: {self.name}, unit: {self.unit}, dtype: {self.dtype}, np.shape(value): {np.shape(self.value)}, eqv_hdf: {self.eqv_hdf}"
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
index b210365d2..d57650c1a 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
@@ -35,6 +35,11 @@
     import NxImageRealSpaceSet
 from pynxtools.dataconverter.readers.em.concepts.nxs_spectrum_set \
     import NxSpectrumSet
+from pynxtools.dataconverter.readers.em.concepts.nxs_em_eds_indexing \
+    import NxEmEdsIndexing
+from pynxtools.dataconverter.readers.em.utils.get_xrayline_iupac_names \
+    import get_xrayline_candidates
+from pynxtools.dataconverter.readers.em.concepts.nxs_object import NxObject
 
 
 class HdfFiveEdaxApexReader(HdfFiveBaseParser):
@@ -43,6 +48,7 @@ def __init__(self, file_path: str = ""):
         super().__init__(file_path)
         self.prfx = None
         self.tmp = {}
+        self.cache_id = 1
         self.supported_version: Dict = {}
         self.version: Dict = {}
         self.supported = False
@@ -95,7 +101,7 @@ def check_if_supported(self):
     def parse_and_normalize(self):
         """Read and normalize away EDAX/APEX-specific formatting with an equivalent in NXem."""
         with h5py.File(f"{self.file_path}", "r") as h5r:
-            cache_id = 1
+            self.cache_id = 1
             grp_nms = list(h5r["/"])
             for grp_nm in grp_nms:
                 sub_grp_nms = list(h5r[grp_nm])
@@ -104,11 +110,8 @@ def parse_and_normalize(self):
                     for sub_sub_grp_nm in sub_sub_grp_nms:
                         if sub_sub_grp_nm.startswith("Area"):
                             # get field-of-view (fov in edax jargon, i.e. roi)
-                            if f"/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}/FOVIMAGE" in h5r.keys():
-                                ckey = self.init_named_cache(f"roi{cache_id}")
-                                self.parse_and_normalize_eds_fov(
-                                    h5r, f"/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}/FOVIMAGE", ckey)
-                                cache_id += 1
+                            self.prfx = f"/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}"
+                            self.parse_and_normalize_eds_fov(h5r)
 
                             # get oim_maps, live_maps, or full area if available
                             area_grp_nms = list(h5r[f"/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}"])
@@ -116,12 +119,12 @@ def parse_and_normalize(self):
                                 if area_grp_nm.startswith("OIM Map"):
                                     self.prfx = f"/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}/{area_grp_nm}"
                                     print(f"Parsing {self.prfx}")
-                                    ckey = self.init_named_cache(f"ebsd{cache_id}")
+                                    ckey = self.init_named_cache(f"ebsd{self.cache_id}")
                                     self.parse_and_normalize_group_ebsd_header(h5r, ckey)
                                     self.parse_and_normalize_group_ebsd_phases(h5r, ckey)
                                     self.parse_and_normalize_group_ebsd_data(h5r, ckey)
                                     self.parse_and_normalize_group_ebsd_complete(ckey)
-                                    cache_id += 1
+                                    self.cache_id += 1
 
                                 # TODO: conceptually the content of the three
                                 # above-mentioned groups has and uses for some
@@ -150,58 +153,28 @@ def parse_and_normalize(self):
                                     # and relative width/height of the sub-FOV
                                     # also supported in that Full Area has a region with (x,y) 0,0
                                     # and relative width/height 1./1.
-                                    # SPC
                                     self.prfx = f"/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}/{area_grp_nm}"
-                                    print(f"Parsing {self.prfx}")
-                                    ckey = self.init_named_cache(f"eds{cache_id}")
-                                    self.parse_and_normalize_eds_spc(
-                                        h5r, f"/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}/{area_grp_nm}", ckey)
-                                    cache_id += 1
+                                    self.parse_and_normalize_eds_spc(h5r)
 
                                 # there is a oned equivalent of the twod Free Draw called EDS Spot
                                 if area_grp_nm.startswith("EDS Spot"):
-                                    self.prfx = f"/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}/{area_grp_nm}"
-                                    print(f"Parsing {self.prfx}")
-
                                     # TODO: parse ../REGION x,y coordinate pair (relative coordinate)
                                     # with respect to parent FOV, SPC
                                     self.prfx = f"/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}/{area_grp_nm}"
-                                    print(f"Parsing {self.prfx}")
-                                    ckey = self.init_named_cache(f"eds{cache_id}")
-                                    self.parse_and_normalize_eds_spc(
-                                        h5r, f"/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}/{area_grp_nm}", ckey)
-                                    cache_id += 1
+                                    self.parse_and_normalize_eds_spc(h5r)
 
                                 if area_grp_nm.startswith("Free Draw"):
-                                    self.prfx = f"/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}/{area_grp_nm}"
-                                    print(f"Parsing {self.prfx}")
-
                                     # TODO: parse ../REGION x,y table (relative coordinate)
                                     # with respect to parent FOV, SPC
                                     self.prfx = f"/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}/{area_grp_nm}"
-                                    print(f"Parsing {self.prfx}")
-                                    ckey = self.init_named_cache(f"eds{cache_id}")
-                                    self.parse_and_normalize_eds_spc(
-                                        h5r, f"/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}/{area_grp_nm}", ckey)
-                                    cache_id += 1
+                                    self.parse_and_normalize_eds_spc(h5r)
 
                                 if area_grp_nm.startswith("Live Map"):
                                     self.prfx = f"/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}/{area_grp_nm}"
-                                    print(f"Parsing {self.prfx}")
-
-                                    # SPC
-                                    self.prfx = f"/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}/{area_grp_nm}"
-                                    print(f"Parsing {self.prfx}")
-                                    ckey = self.init_named_cache(f"eds{cache_id}")
-                                    self.parse_and_normalize_eds_spc(
-                                        h5r, f"/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}/{area_grp_nm}", ckey)
-                                    cache_id += 1
-
-                                    # SPD
-                                    ckey = self.init_named_cache(f"eds{cache_id}")
-                                    self.parse_and_normalize_eds_spd(
-                                        h5r, f"/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}/{area_grp_nm}", ckey)
-                                    cache_id += 1
+                                    self.parse_and_normalize_eds_spc(h5r)
+                                    self.parse_and_normalize_eds_spd(h5r)
+                                    # element-specific ROI (aka element map)
+                                    self.parse_and_normalize_eds_area_rois(h5r)
 
                                 if area_grp_nm.startswith("LineScan") \
                                         or area_grp_nm.startswith("ROILineScan"):
@@ -209,15 +182,9 @@ def parse_and_normalize(self):
                                     # TODO::currently I assume that the internal organization of LineScan and ROILineScan
                                     # groups is the same TODO but maybe the physical ROI which they reference
                                     # respective differs (TODO:: LineScan refers to FOV that is in the parent of the group)
-                                    ckey = self.init_named_cache(f"eds{cache_id}")
-                                    self.parse_and_normalize_eds_line_lsd(
-                                        h5r, f"/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}/{area_grp_nm}", ckey)
-                                    cache_id += 1
-
-                                    ckey = self.init_named_cache(f"eds_map{cache_id}")
-                                    self.parse_and_normalize_eds_line_rois(
-                                        h5r, f"/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}/{area_grp_nm}", ckey)
-                                    cache_id += 1
+                                    self.prfx = f"/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}/{area_grp_nm}"
+                                    self.parse_and_normalize_eds_line_lsd(h5r)
+                                    self.parse_and_normalize_eds_line_rois(h5r)
 
     def parse_and_normalize_group_ebsd_header(self, fp, ckey: str):
         # no official documentation yet from EDAX/APEX, deeply nested, chunking, virtual ds
@@ -399,8 +366,10 @@ def parse_and_normalize_group_ebsd_complete(ckey: str):
         # TODO::implement check and clearing procedure
         pass
 
-    def parse_and_normalize_eds_fov(self, fp, src: str, ckey: str):
+    def parse_and_normalize_eds_fov(self, fp):
         """Normalize and scale APEX-specific FOV/ROI image to NeXus."""
+        src = self.prfx
+        print(f"Parsing {src} ...")
         reqs = ["FOVIMAGE", "FOVIMAGECOLLECTIONPARAMS", "FOVIPR"]
         for req in reqs:
             if f"{src}/{req}" not in fp.keys():
@@ -411,20 +380,20 @@ def parse_and_normalize_eds_fov(self, fp, src: str, ckey: str):
                 raise ValueError(f"Required attribute named {req} not found in {src}/FOVIMAGE !")
         reqs = ["MicronsPerPixelX", "MicronsPerPixelY"]
         for req in reqs:
-            if req not in fp[f"{src}/FOVIPR"].attrs.keys():
+            if req not in fp[f"{src}/FOVIPR"].dtype.names:
                 raise ValueError(f"Required attribute named {req} not found in {src}/FOVIPR !")
 
+        ckey = self.init_named_cache(f"eds_roi{self.cache_id}")
         self.tmp[ckey] = NxImageRealSpaceSet()
         self.tmp[ckey].tmp["source"] = f"{src}/FOVIMAGE"
         nyx = {"y": fp[f"{src}/FOVIMAGE"].attrs["PixelHeight"][0],
                "x": fp[f"{src}/FOVIMAGE"].attrs["PixelWidth"][0]}
-        syx = {"x": fp[f"{src}/FOVIPR"].attrs["MicronsPerPixelX"][0],
-               "y": fp[f"{src}/FOVIPR"].attrs["MicronsPerPixelY"][0]}
+        syx = {"x": fp[f"{src}/FOVIPR"]["MicronsPerPixelX"][0],
+               "y": fp[f"{src}/FOVIPR"]["MicronsPerPixelY"][0]}
         scan_unit = {"x": "µm", "y": "µm"}
         # is micron because MicronsPerPixel{dim} used by EDAX
         self.tmp[ckey].tmp["image_twod/intensity"].value \
             = np.reshape(np.asarray(fp[f"{src}/FOVIMAGE"]), (nyx["y"], nyx["x"]))
-
         dims = ["y", "x"]
         for dim in dims:
             self.tmp[ckey].tmp[f"image_twod/axis_{dim}"].value \
@@ -435,14 +404,21 @@ def parse_and_normalize_eds_fov(self, fp, src: str, ckey: str):
                              syx["x"].dtype)
             self.tmp[ckey].tmp[f"image_twod/axis_{dim}@long_name"].value \
                 = f"Position along {dim} ({scan_unit[dim]})"
-        for key, val in self.tmp[ckey].tmp.items():
-            if key.startswith("image_twod"):
-                print(f"ckey: {ckey}, image_twod, key: {key}, val: {val}")
+        self.cache_id += 1
+
+        for key, obj in self.tmp[ckey].tmp.items():
+            if isinstance(obj, NxObject):
+                if obj.value is not None:
+                    print(f"ckey: {ckey}, key: {key}, obj: {obj}")
+            else:
+                print(f"ckey: {ckey}, key: {key}, obj: {obj}")
 
-    def parse_and_normalize_eds_spc(self, fp, src: str, ckey: str):
+    def parse_and_normalize_eds_spc(self, fp):
         """Normalize and scale APEX-specific SPC (sum) spectrum to NeXus."""
         # https://hyperspy.org/rosettasciio/_downloads/
         # 9e2f0ccf5287bb2d17f1b7550e1d626f/SPECTRUM-V70.pdf
+        src = self.prfx
+        print(f"Parsing {src} ...")
         if f"{src}/SPC" not in fp.keys():
             return
         if "NumberOfLines" in fp[f"{src}/SPC"].attrs.keys():
@@ -452,6 +428,7 @@ def parse_and_normalize_eds_spc(self, fp, src: str, ckey: str):
             if req not in fp[f"{src}/SPC"].dtype.names:  # also check for shape
                 raise ValueError(f"Required attribute named {req} not found in {src}/SPC !")
 
+        ckey = self.init_named_cache(f"eds_spc{self.cache_id}")
         self.tmp[ckey] = NxSpectrumSet()
         self.tmp[ckey].tmp["source"] = f"{src}/SPC"
         e_zero = fp[f"{src}/SPC"]["eVOffset"][0]
@@ -469,14 +446,21 @@ def parse_and_normalize_eds_spc(self, fp, src: str, ckey: str):
             = np.asarray(fp[f"{src}/SPC"]["SpectrumCounts"][0], np.int32)
         self.tmp[ckey].tmp["spectrum_zerod/intensity@long_name"].value \
             = f"Count (1)"
-        for key, val in self.tmp[ckey].tmp.items():
-            if key.startswith("spectrum_zerod"):
-                print(f"ckey: {ckey}, spectrum_zerod, key: {key}, val: {val}")
+        self.cache_id += 1
 
-    def parse_and_normalize_eds_spd(self, fp, src: str, ckey: str):
+        for key, obj in self.tmp[ckey].tmp.items():
+            if isinstance(obj, NxObject):
+                if obj.value is not None:
+                    print(f"ckey: {ckey}, key: {key}, obj: {obj}")
+            else:
+                print(f"ckey: {ckey}, key: {key}, obj: {obj}")
+
+    def parse_and_normalize_eds_spd(self, fp):
         """Normalize and scale APEX-specific spectrum cuboid to NeXus."""
         # https://hyperspy.org/rosettasciio/_downloads/
         # c2e8b23d511a3c44fc30c69114e2873e/SpcMap-spd.file.format.pdf
+        src = self.prfx
+        print(f"Parsing {src} ...")
         if f"{src}/SPD" not in fp.keys():
             return
         reqs = ["MicronPerPixelX",
@@ -488,6 +472,7 @@ def parse_and_normalize_eds_spd(self, fp, src: str, ckey: str):
             if req not in fp[f"{src}/SPD"].attrs.keys():  # also check for shape
                 raise ValueError(f"Required attribute named {req} not found in {src}/SPD !")
 
+        ckey = self.init_named_cache(f"eds_spc{self.cache_id}")
         self.tmp[ckey] = NxSpectrumSet()
         self.tmp[ckey].tmp["source"] = f"{src}/SPD"
         nyxe = {"y": fp[f"{src}/SPD"].attrs["NumberOfLines"][0],
@@ -517,14 +502,18 @@ def parse_and_normalize_eds_spd(self, fp, src: str, ckey: str):
         spd_chk = np.zeros((nyxe["y"], nyxe["x"], nyxe["e"]), fp[f"{src}/SPD"][0, 0][0].dtype)
         print(f"edax: {np.shape(spd_chk)}, {type(spd_chk)}, {spd_chk.dtype}")
         print("WARNING::Currently the parsing of the SPD is switched off for debugging but works!")
+        self.cache_id += 1
         return
         for chk_bnd_y in chk_bnds["y"]:
             for chk_bnd_x in chk_bnds["x"]:
                 spd_chk[chk_bnd_y[0]:chk_bnd_y[1], chk_bnd_x[0]:chk_bnd_x[1], :] \
                     = fp[f"{src}/SPD"][chk_bnd_y[0]:chk_bnd_y[1], chk_bnd_x[0]:chk_bnd_x[1]]
-        for key, val in self.tmp[ckey].tmp.items():
-            if key.startswith("spectrum_oned"):
-                print(f"ckey: {ckey}, spectrum_threed, key: {key}, val: {val}")
+        for key, obj in self.tmp[ckey].tmp.items():
+            if isinstance(obj, NxObject):
+                if obj.value is not None:
+                    print(f"ckey: {ckey}, key: {key}, obj: {obj}")
+            else:
+                print(f"ckey: {ckey}, key: {key}, obj: {obj}")
         # compared to naive reading, thereby we read the chunks as they are arranged in memory
         # and thus do not discard unnecessarily data cached in the hfive chunk cache
         # by contrast, if we were to read naively for each pixel the energy array most of the
@@ -541,7 +530,95 @@ def parse_and_normalize_eds_spd(self, fp, src: str, ckey: str):
         # specification details the metadata, i.e. energy per channel, start and end
         # we do not use the SPD instance right now
 
-    def parse_and_normalize_eds_line_lsd(self, fp, src: str, ckey: str):
+    def parse_and_normalize_eds_area_rois(self, fp):
+        """Normalize and scale APEX-specific EDS element emission line maps to NeXus."""
+        src = self.prfx
+        print(f"Parsing {src} ...")
+        reqs = ["ELEMENTOVRLAYIMGCOLLECTIONPARAMS", "PHASES", "ROIs", "SPC"]
+        for req in reqs:
+            if f"{src}/{req}" not in fp.keys():
+                return
+        reqs = ["eVOffset", "evPch", "NumberOfPoints", "SpectrumCounts"]
+        for req in reqs:
+            if req not in fp[f"{src}/SPC"].dtype.names:  # also check for shape
+                raise ValueError(f"Required attribute named {req} not found in {src}/SPC !")
+        reqs = ["ResolutionX", "ResolutionY", "mmFieldWidth", "mmFieldHeight"]
+        for req in reqs:
+            if req not in fp[f"{src}/ELEMENTOVRLAYIMGCOLLECTIONPARAMS"].dtype.names:
+                  # also check for shape
+                raise ValueError(f"Required attribute named {req} not found in "
+                                 f"{src}/ELEMENTOVRLAYIMGCOLLECTIONPARAMS !")
+        # find relevant EDS maps (pairs of <symbol>.dat, <symbol>.ipr) groups
+        uniq = set()
+        for group_name in fp[f"{src}/ROIs"].keys():
+            token = group_name.split(".")
+            if (len(token) == 2) and (token[1] in ("dat", "ipr")):
+                uniq.add(token[0])
+        for entry in uniq:
+            if (f"{src}/ROIs/{entry}.dat" not in fp[f"{src}/ROIs"].keys()) \
+                    or (f"{src}/ROIs/{entry}.ipr" not in fp[f"{src}/ROIs"].keys()):
+                uniq.remove(entry)
+                continue
+            if ("RoiStartChan" not in fp[f"{src}/ROIs/{entry}.dat"].attrs) \
+                    or ("RoiEndChan" not in fp[f"{src}/ROIs/{entry}.dat"].attrs):
+                uniq.remove(entry)
+
+        ckey = self.init_named_cache(f"eds_map{self.cache_id}")
+        self.tmp[ckey] = NxEmEdsIndexing()
+        self.tmp[ckey].tmp["source"] = f"{src}/ROIs"
+        self.tmp[ckey].tmp["IMAGE_R_SET"] = []
+
+        e_zero = fp[f"{src}/SPC"]["eVOffset"][0]
+        e_delta = fp[f"{src}/SPC"]["evPch"][0]
+        e_n = fp[f"{src}/SPC"]["NumberOfPoints"][0]
+        e_channels = e_zero + np.asarray(e_delta * np.linspace(0.,
+                                                               e_n - 1.,
+                                                               num=int(e_n),
+                                                               endpoint=True),
+                                         e_zero.dtype)
+        nxy = {"x": fp[f"{src}/ELEMENTOVRLAYIMGCOLLECTIONPARAMS"][0]["ResolutionX"],
+               "y": fp[f"{src}/ELEMENTOVRLAYIMGCOLLECTIONPARAMS"][0]["ResolutionY"],
+               "lx": fp[f"{src}/ELEMENTOVRLAYIMGCOLLECTIONPARAMS"][0]["mmFieldWidth"],
+               "ly": fp[f"{src}/ELEMENTOVRLAYIMGCOLLECTIONPARAMS"][0]["mmFieldHeight"]}
+        sxy = {"x": nxy["lx"] / nxy["x"],
+               "y": nxy["ly"] / nxy["y"]}
+        scan_unit = {"x": "µm",
+                     "y": "µm"}
+        for entry in uniq:
+            eds_map = NxImageRealSpaceSet()
+            eds_map.tmp["source"] = f"{src}/ROIs/{entry}"
+            eds_map.tmp["description"] = f"{entry}"
+            # this can be a custom name e.g. InL or In L but it is not necessarily
+            # a clean description of an element plus a IUPAC line, hence get all
+            # theoretical candidates within integrated energy region [e_roi_s, e_roi_e]
+            e_roi_s = fp[f"{src}/ROIs/{entry}.dat"].attrs["RoiStartChan"][0]
+            e_roi_e = fp[f"{src}/ROIs/{entry}.dat"].attrs["RoiEndChan"][0]
+            eds_map.tmp["iupac_line_candidates"] \
+                = ", ".join(get_xrayline_candidates(e_channels[e_roi_s],
+                                                    e_channels[e_roi_e + 1]))
+            for dim in ["x", "y"]:
+                eds_map.tmp[f"image_twod/axis_{dim}"].value \
+                    = np.asarray(0. + sxy[dim] * np.linspace(0.,
+                                                             nxy[dim] - 1,
+                                                             num=int(nxy[dim]),
+                                                             endpoint=True),
+                                 np.float32)
+                eds_map.tmp[f"image_twod/axis_{dim}@long_name"].value \
+                    = f"{dim}-axis pixel coordinate ({scan_unit[dim]})"
+                eds_map.tmp["image_twod/intensity"].value \
+                    = np.asarray(fp[f"{src}/ROIs/{entry}.dat"])
+            self.tmp[ckey].tmp["IMAGE_R_SET"].append(eds_map)  # copy
+        self.cache_id += 1
+
+        for key, val in self.tmp[ckey].tmp.items():
+            if key.startswith("IMAGE_R_SET"):
+                for img in val:
+                    for kkey, vval in img.tmp.items():
+                        print(f"\t\timg, key: {kkey}, val: {vval}")
+            else:
+                print(f"ckey: {ckey}, eds_mapspectrum_oned, key: {key}, val: {val}")
+
+    def parse_and_normalize_eds_line_lsd(self, fp):
         """Normalize and scale APEX-specific line scan with one spectrum each to NeXus."""
         # https://hyperspy.org/rosettasciio/_downloads/
         # c2e8b23d511a3c44fc30c69114e2873e/SpcMap-spd.file.format.pdf
@@ -551,6 +628,8 @@ def parse_and_normalize_eds_line_lsd(self, fp, src: str, ckey: str):
         # the absolute location of the line grid in the image of this LineScan group
         # and to get the spectra right
         # TODO: this can be an arbitrary free form line, right?
+        src = self.prfx
+        print(f"Parsing {src} ...")
         reqs = ["LSD", "SPC", "REGION", "LINEIMAGECOLLECTIONPARAMS"]
         for req in reqs:
             if f"{src}/{req}" not in fp.keys():
@@ -574,6 +653,7 @@ def parse_and_normalize_eds_line_lsd(self, fp, src: str, ckey: str):
                 raise ValueError(f"Required attribute named {req} not found "
                                  f"in {src}/LINEMAPIMAGECOLLECTIONPARAMS !")
 
+        ckey = self.init_named_cache(f"eds_lsd{self.cache_id}")
         self.tmp[ckey] = NxSpectrumSet()
         self.tmp[ckey].tmp["source"] = f"{src}/LSD"
         e_zero = 0.  # strong assumption based on VInP_108_L2 example from IKZ
@@ -609,29 +689,16 @@ def parse_and_normalize_eds_line_lsd(self, fp, src: str, ckey: str):
             = np.asarray(fp[f"{src}/LSD"][0], np.int32)
         self.tmp[ckey].tmp["spectrum_oned/intensity@long_name"].value \
             = f"Count (1)"
+        self.cache_id += 1
+
         for key, val in self.tmp[ckey].tmp.items():
-            if key.startswith("spectrum_oned"):
-                print(f"ckey: {ckey}, spectrum_oned, key: {key}, val: {val}")
+            print(f"ckey: {ckey}, spectrum_oned, key: {key}, val: {val}")
 
-    def parse_and_normalize_eds_line_rois(self, fp, src: str, ckey: str):
+    def parse_and_normalize_eds_line_rois(self, fp):
         """Normalize and scale APEX-specific EDS element emission line maps to NeXus."""
         # source of the information
+        src = self.prfx
+        print(f"Parsing {src} ...")
+        # ckey = self.init_named_cache(f"eds_map{self.cache_id}")
+        # self.cache_id += 1
         pass
-        """
-        "indexing/element_names-field",
-        "indexing/IMAGE_R_SET-group",
-        "indexing/IMAGE_R_SET/PROCESS-group",
-        "indexing/IMAGE_R_SET/PROCESS/peaks-field",
-        "indexing/IMAGE_R_SET/PROCESS/weights-field",
-        "indexing/PEAK-group",
-        "indexing/PEAK/ION-group",
-        "indexing/PEAK/ION/energy-field",
-        "indexing/PEAK/ION/energy_range-field",
-        "indexing/PEAK/ION/iupac_line_names-field",
-        "indexing/PROGRAM-group",
-        "indexing/summary-group",
-        "indexing/summary/axis_energy-field",
-        "indexing/summary/axis_energy@long_name-attribute",
-        "indexing/summary/intensity-field",
-        "indexing/summary/intensity@long_name-attribute"
-        """
diff --git a/pynxtools/dataconverter/readers/em/subparsers/nxs_pyxem.py b/pynxtools/dataconverter/readers/em/subparsers/nxs_pyxem.py
index 90e417c0f..5c6e0ed42 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/nxs_pyxem.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/nxs_pyxem.py
@@ -92,6 +92,7 @@ def __init__(self, entry_id: int = 1, input_file_name: str = ""):
         else:
             self.entry_id = 1
         self.file_path = input_file_name
+        self.event_id = 1
         self.cache = {"is_filled": False}
 
     def parse(self, template: dict) -> dict:
@@ -204,6 +205,9 @@ def process_into_template(self, inp: dict, template: dict) -> dict:
 
         self.process_roi_overview(inp, template)
         self.process_roi_ebsd_maps(inp, template)
+        self.process_roi_eds_spectra(inp, template)
+        self.process_roi_eds_maps(inp, template)
+
         return template
 
     def get_named_axis(self, inp: dict, dim_name: str):
@@ -224,7 +228,10 @@ def process_roi_overview(self, inp: dict, template: dict) -> dict:
             if ckey.startswith("ebsd") and inp[ckey] != {}:
                 self.process_roi_overview_ebsd_based(
                     inp[ckey], ckey.replace("ebsd", ""), template)
-                break  # only one roi for now
+                # break  # only one roi for now
+            if ckey.startswith("eds_roi") and inp[ckey] != {}:
+                self.process_roi_overview_eds_based(
+                    inp[ckey], template)
         return template
 
     def process_roi_overview_ebsd_based(self,
@@ -303,6 +310,33 @@ def process_roi_overview_ebsd_based(self,
             template[f"{trg}/AXISNAME[axis_{dim}]/@units"] = f"{scan_unit}"
         return template
 
+    def process_roi_overview_eds_based(self,
+                                       inp: dict,
+                                       template: dict) -> dict:
+        trg = f"/ENTRY[entry{self.entry_id}]/measurement/event_data_em_set/EVENT_DATA_EM" \
+              f"[event_data_em{self.event_id}]/IMAGE_R_SET[image_r_set1]/DATA[image_twod]"
+        template[f"{trg}/@NX_class"] = "NXdata"  # TODO::should be autodecorated
+        template[f"{trg}/description"] = inp.tmp["source"]
+        template[f"{trg}/title"] = f"Region-of-interest overview image"
+        template[f"{trg}/@signal"] = "intensity"
+        dims = [("x", 0), ("y", 1)]
+        template[f"{trg}/@axes"] = []
+        for dim in dims[::-1]:
+            template[f"{trg}/@axes"].append(f"axis_{dim[0]}")
+        template[f"{trg}/intensity"] \
+            = {"compress": inp.tmp["image_twod/intensity"].value, "strength": 1}
+        template[f"{trg}/intensity/@long_name"] = f"Signal"
+        for dim in dims:
+            template[f"{trg}/@AXISNAME_indices[axis_{dim[0]}_indices]"] \
+                = np.uint32(dim[1])
+            template[f"{trg}/AXISNAME[axis_{dim[0]}]"] \
+                = {"compress": inp.tmp[f"image_twod/axis_{dim[0]}"].value, "strength": 1}
+            template[f"{trg}/AXISNAME[axis_{dim[0]}]/@long_name"] \
+                = inp.tmp[f"image_twod/axis_{dim[0]}@long_name"].value
+            # template[f"{trg}/AXISNAME[axis_{dim}]/@units"] = f"{scan_unit}"
+        return template
+
+
     def process_roi_ebsd_maps(self, inp: dict, template: dict) -> dict:
         for ckey in inp.keys():
             if ckey.startswith("ebsd") and inp[ckey] != {}:
@@ -623,3 +657,9 @@ def process_roi_phase_ipfs_threed(self,
                     = f"Pixel along {dim}-axis"
                 template[f"{lgd}/AXISNAME[axis_{dim}]/@units"] = "px"
         return template
+
+    def process_roi_eds_spectra(self, inp: dict, template: dict) -> dict:
+        return template
+
+    def process_roi_eds_maps(self, inp: dict, template: dict) -> dict:
+        return template
diff --git a/pynxtools/dataconverter/readers/em/utils/get_xrayline_iupac_names.py b/pynxtools/dataconverter/readers/em/utils/get_xrayline_iupac_names.py
new file mode 100644
index 000000000..db649f72b
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/utils/get_xrayline_iupac_names.py
@@ -0,0 +1,43 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""For given energy range find possible X-ray emission lines in this region."""
+
+import xraydb
+
+from ase.data import chemical_symbols
+
+
+def get_all_xraylines() -> dict:
+    xray_lines = {}
+    for symbol in chemical_symbols[1:]:
+        for name, line in xraydb.xray_lines(symbol).items():
+            xray_lines[f"{symbol}-{name}"] = line.energy
+    return xray_lines
+
+
+def get_xrayline_candidates(e_min, e_max) -> list:
+    # one could try to resolve the line from the alias of
+    # the actual entry but this is not rigorous!
+    cand = []
+    for key, val in get_all_xraylines().items():
+        if val < e_min:
+            continue
+        if val > e_max:
+            continue
+        cand.append(key)
+    return cand

From 4ae24cc0d550fc0f489e120b186dc166ff820a62 Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@physik.hu-berlin.de>
Date: Mon, 15 Jan 2024 18:44:31 +0100
Subject: [PATCH 73/84] Added plotting of spectrum

---
 debug/spctrscpy.dev.ipynb                     | 319 +++---------------
 .../readers/em/subparsers/hfive_apex.py       |   6 +-
 .../readers/em/subparsers/nxs_pyxem.py        |  26 +-
 3 files changed, 76 insertions(+), 275 deletions(-)

diff --git a/debug/spctrscpy.dev.ipynb b/debug/spctrscpy.dev.ipynb
index f5162301d..5d1d5327e 100644
--- a/debug/spctrscpy.dev.ipynb
+++ b/debug/spctrscpy.dev.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "id": "6da1aea0-545b-446b-a3d1-1574af72f6c6",
    "metadata": {},
    "outputs": [],
@@ -18,19 +18,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
    "id": "8e721dee-7b6f-4dd0-b50e-ea8ff05d4682",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "1807\n",
-      "['Mg-Ka3', 'Ge-Lb1', 'Ge-Lb6', 'Se-Ln', 'Se-Ll', 'Pm-Mg', 'Gd-Mb', 'Tb-Ma', 'Lu-Mz']\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "xray_lines = {}\n",
     "for symbol in chemical_symbols[1:]:\n",
@@ -55,109 +46,69 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": null,
    "id": "3889c47f-11c4-4bf3-97de-04fc52f0798d",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "/home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/ikz/VInP_108_L2.h5\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "src = \"/home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy\"\n",
     "fnms = [(\"ikz\", \"VInP_108_L2.h5\"),\n",
+    "        (\"ikz\", \"GeSi.nxs\"),\n",
     "        (\"ikz\", \"GeSn_13.nxs\"),\n",
+    "        (\"ikz\", \"AlGaO.nxs\"),\n",
     "        (\"bruker\", \"pynx/46_ES-LP_L1_brg.bcf\"),\n",
     "        (\"emd\", \"pynx/1613_Si_HAADF_610_kx.emd\"),\n",
     "        (\"digitalmicrograph\", \"pynx/EELS_map_2_ROI_1_location_4.dm3\"),\n",
     "        (\"oxfordinstruments\", \"pynx/H5OINA_examples_Specimen_1_Map_EDS_+_EBSD_Map_Data_2.h5oina\"),\n",
     "        (\"pdi\", \"InGaN_nanowires_spectra.edaxh5\")]\n",
     "# pyUSID, HSMA\n",
-    "case = 0  # len(fnms) - 1\n",
+    "case = 0 # len(fnms) - 1  # len(fnms) - 1\n",
     "fnm = f\"{src}/{fnms[case][0]}/{fnms[case][1]}\"\n",
     "print(fnm)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": null,
    "id": "a4d9ef96-3c70-4c12-80ba-ea4a7d716d47",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/x-hdf5": "/home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/ikz/VInP_108_L2.h5",
-      "text/plain": [
-       "<jupyterlab_h5web.widget.H5Web object>"
-      ]
-     },
-     "execution_count": 14,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "H5Web(fnm)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": null,
+   "id": "221abf67-0d88-4088-9cc7-e0d9b85c4699",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "H5Web(\"debug.InGaN_nanowires_spectra.edaxh5.nxs\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
    "id": "6b883a7a-f6aa-4151-8ee4-f3c8c79ccc72",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "<class 'h5py._hl.dataset.Dataset'>, (200,), int32\n",
-      "<class 'h5py._hl.dataset.Dataset'>, (200,), int32\n",
-      "<class 'h5py._hl.dataset.Dataset'>, (200,), int32\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "with h5py.File(fnm, \"r\") as h5r:\n",
-    "    src = \"/VInP/VInP_108_L2/Area 10/LineScan 1/ROIs/\"\n",
-    "    for key in h5r[src].keys():\n",
-    "        tmp = h5r[f\"{src}/{key}\"]\n",
-    "        print(f\"{type(tmp)}, {np.shape(tmp)}, {tmp.dtype}\")"
+    "    src = \"/VInP/VInP_108_L2/Area 10/Live Map 1\"  # /ROIs/InL.dat\"\n",
+    "    tmp = h5r[f\"{src}/ELEMENTOVRLAYIMGCOLLECTIONPARAMS\"][0][\"ResolutionX\"]\n",
+    "    # tmp = h5r[f\"{src}\"]\n",
+    "    # for key in h5r[src].keys():\n",
+    "    #     tmp = h5r[f\"{src}/{key}\"]\n",
+    "    print(f\"{type(tmp)}, {np.shape(tmp)}, {tmp.dtype}\")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 46,
+   "execution_count": null,
    "id": "e99588fe-67dc-48df-8d60-28187d8daa0a",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "<class 'numpy.ndarray'>, (200, 1024), int32\n",
-      "0\t\tIn L\t\t849\n",
-      "0\t\tK K\t\t752\n",
-      "0\t\tP K\t\t938\n",
-      "1\t\tIn L\t\t857\n",
-      "1\t\tK K\t\t786\n",
-      "1\t\tP K\t\t1004\n"
-     ]
-    },
-    {
-     "data": {
-      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAigAAAGdCAYAAAA44ojeAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABIqklEQVR4nO3deXxU5d0+/uvMmn2SyTKTkAQStrAbthDBaiGKiBYk1YJYW7XSBa1Cqy2/Kj4+2lJtH1Fal9avRazgDhSsxQIiLiQBwiKbSSCBrDOBLDOZhMxMZs7vj8mMBEFImMk5M3O9X6/zEs6cHD7jIZmL+3zu+wiiKIogIiIikhGF1AUQERERnY8BhYiIiGSHAYWIiIhkhwGFiIiIZIcBhYiIiGSHAYWIiIhkhwGFiIiIZIcBhYiIiGRHJXUBfeF2u1FfX4/Y2FgIgiB1OURERHQZRFFEW1sb0tLSoFB8+xhJUAaU+vp6ZGRkSF0GERER9UFNTQ3S09O/9ZigDCixsbEAPG8wLi5O4mqIiIjoclitVmRkZPg+x79NUAYU722duLg4BhQiIqIgczntGWySJSIiItlhQCEiIiLZYUAhIiIi2WFAISIiItlhQCEiIiLZYUAhIiIi2WFAISIiItlhQCEiIiLZYUAhIiIi2WFAISIiItlhQCEiIiLZYUAhIiIi2QnKhwUSEV0Ot1tE61knmmx2nLE50NzuQFO759eRaiXuuyYLKiX/nUYkRwwoRBQ0RFGEtbPLEzS6Q0dTux3NNgea2h04Y7N3v9a9v90Bt3jx8yVEqTF/cmb/vQEiumwMKEQkqQ5HF5psPcPFmfNCR9M5ox9O17ckjouIj1IjMVqDxGgtEmM0aOlwoLiyGev31TGgEMkUAwoR+ZW9y/V10DgvdPiCxjm3XM46Xb3+M2K0KiTGaJAYrYE+WoukGA0SY875dXcQSYzWICFaA/V5t3FMlk7k/3E7dp9sRnVTBzITo/z19onITxhQiPpBXetZHK6zSF2GX3Q6XT1uo5w5J3Q02Rxos3f1+pxalQJJMdrukOEJGOeGjsQYDZKiv349Qq28ovdg1EVg2pAkfFZxBhv21+HBgqFXdD4i8j8GFKIA63S6MOevX+CMzS51Kf1GpRA8QSNG67m1ct6oRmLM16FDH6NBtEYJQRD6tcZ54wfgs4ozWL+/Fr+cMaTf/3wi+na9Diiffvop/vSnP6G0tBQNDQ3YsGED5s6dCwBwOp149NFH8eGHH6KyshI6nQ4FBQX44x//iLS0NN85mpub8cADD2Dz5s1QKBQoLCzE888/j5iYGL+9MSK5+PeXDThjsyNWq8JQQ/D/HdeoFOeEjQuHjrhIlew/8GeOMiJacxinmjpQeqoFEwfppS6JiM7R64DS3t6OcePG4Z577sG8efN6vNbR0YF9+/bhsccew7hx49DS0oIHH3wQ3/ve97B3717fcQsXLkRDQwO2bt0Kp9OJu+++G4sWLcK6deuu/B0RyczaklMAgJ9dNxiLvztE4mrIK0qjwqwxqXivtBbv76tjQCGSGUEUxd63xHu/WBB6jKBcyJ49ezB58mScOnUKmZmZOHbsGEaOHIk9e/Zg4sSJAIAtW7bgpptuQm1tbY+RlouxWq3Q6XSwWCyIi4vra/lEAXe03oqbVn0GlULArmXTkRIbIXVJdI5dJ87gjldKEBuhwp7fFVxxbwsRfbvefH4HfIUii8UCQRAQHx8PACgqKkJ8fLwvnABAQUEBFAoFSkpKLngOu90Oq9XaYyMKBut2e0ZPZo4yMpzI0JSsRAyIj0RbZxe2HTNLXQ4RnSOgAaWzsxO/+c1vsGDBAl9SMplMSElJ6XGcSqWCXq+HyWS64HlWrFgBnU7n2zIyMgJZNpFf2Oxd2LCvDgCwMI9rbciRQiHg1twBAID13deKiOQhYAHF6XTi9ttvhyiKeOmll67oXMuWLYPFYvFtNTU1fqqSKHA2HahHu8OF7KRo5A9OlLocuohbx3sCys7y0zjdFj4zrYjkLiABxRtOTp06ha1bt/a4z2Q0GtHY2Njj+K6uLjQ3N8NoNF7wfFqtFnFxcT02IjkTRdHXHHtHXqbsZ7SEs8HJMbgqIx4ut4hNB+ulLoeIuvk9oHjDSUVFBbZt24bExJ7/cszPz0draytKS0t9+z7++GO43W7k5eX5uxwiSRysteBIvRUalQKF49OlLocuobB7FOX90lqJKyEir14HFJvNhgMHDuDAgQMAgKqqKhw4cADV1dVwOp34/ve/j71792Lt2rVwuVwwmUwwmUxwOBwAgBEjRuDGG2/Efffdh927d+OLL77A/fffj/nz51/WDB6iYLC22DN6cvOYVCREaySuhi7l5rFpUCsFHG2w4lgDm/CJ5KDXAWXv3r3Izc1Fbm4uAGDp0qXIzc3F8uXLUVdXh02bNqG2thZXXXUVUlNTfduuXbt851i7di1ycnIwY8YM3HTTTZg2bRr+/ve/++9dEUnI0uHE5i89twoWTmFzbDBIiNZgRo4BALBhP5tlieSg1wu1XXfddfi2pVMuZ1kVvV7PRdkoZK3fX4tOpxs5xliMz0yQuhy6TPPGD8CWIyZs2F+HR2YOh0oZ8FUYiOhb8DuQyI88zbHVADxTi9kcGzyuG56ChCg1TrfZ8cWJJqnLIQp7DChEfrS7qhnHG22I0igxt3t9DQoOGpUC3xvn6YNjsyyR9BhQiPzIO3oy56o0xEaoJa6GeqtwgmfG1UdHTGjrdEpcDVF4Y0Ah8pMzNjv+c7gBAHDH5IESV0N9MWaADkNSYmDvcuM/hy68sjUR9Q8GFCI/ea+0Fk6XiHHpOoxJ10ldDvWBIAiY510TZR9v8xBJiQGFyA/cbhHrfM2xHD0JZrfmDoAgACVVzahp7pC6HKKwxYBC5AefHz+D6uYOxEaocPO4VKnLoSuQqovE1MFJALgmCpGUGFCI/MD73J3C8emI0vR6eSGSGe9tnvX7ai9rbSci8j8GFKIrZLJ0YtsxzwMw78jjyrGhYOYoI6I0Spxs6sC+6lapyyEKSwwoRFfo7T01cLlFTB6kxzBDrNTlkB9Ea1W4cbTn6epsliWSBgMK0RXocrnx1p7u5lg+dyekeJ9C/cHBenQ6XRJXQxR+GFCIrsCOstNosHRCH63x/YubQkN+diLSdBGwdnbh468apS6HKOwwoBBdAW9z7G0T0qFVKSWuhvxJoRB8jytYz9s8RP2OAYWoj2qaO7Cz/DQAYMFk3t4JRd7ZPJ+UncYZm13iaojCCwMKUR+9ubsaoghcMzQJg5KipS6HAmBISizGpevQ5Rax6UC91OUQhRUGFKI+cHS58c7eGgDAQk4tDmneBwiu38/bPET9iQGFqA/+e9SEMzYHUmK1mDHCIHU5FEA3j02DWingcJ0VZaY2qcshChsMKER9sLbYM7V4/qQMqJX8Ngpl+mgNvjs8BQBHUYj6E3+yEvXS8UYbiiqboBCAH7A5NizM614TZeP+OrjcXPqeqD8woBD10pu7PaMn03NSMCA+UuJqqD9Mz0lBfJQaZqsdXxw/I3U5RGGBAYWoFzqdLrxX6hnmX5g3UOJqqL9oVAp8b1waAK6JQtRfGFCIeuHfXzbActaJAfGR+M6wZKnLoX7kvc2z5YgJNnuXxNUQhT4GFKJe8K4ce0deJpQKQeJqqD+NS9chOzkanU43PjzUIHU5RCGPAYXoMh2tt2JfdStUCgG3TUyXuhzqZ4Ig+B4gyNs8RIHHgEJ0mdbt9oyezBxlREpshMTVkBTm5g6AIADFlc2obemQuhyikMaAQnQZbPYubNhXB4Arx4azAfGRyM9OBOCZckxEgcOAQnQZNh2oR7vDheykaOQPTpS6HJKQt1n2/X11EEWuiUIUKAwoRJcgimKP5lhBYHNsOLtxtBGRaiWqzrRjf02r1OUQhSwGFKJLOFhrwZF6KzQqha9JksJXjFaFWaONANgsSxRIDChEl7C22DN6cvOYVCREaySuhuTAe5tn88EG2LtcEldDFJoYUIi+heWsE5u/rAcALJzC5ljyyB+cCGNcBCxnnfj4WKPU5RCFpF4HlE8//RS33HIL0tLSIAgCNm7c2OP19evX44YbbkBiYiIEQcCBAwe+cY7Ozk4sXrwYiYmJiImJQWFhIcxmc1/fA1HAbNhXi06nGznGWIzPTJC6HJIJpULA3NwBADzNskTkf70OKO3t7Rg3bhxeeOGFi74+bdo0PP300xc9x5IlS7B582a8++672LlzJ+rr6zFv3rzelkIUUJ7mWM+DAReyOZbOUzjeE1A+KWtEk80ucTVEoUfV2y+YNWsWZs2addHXf/jDHwIATp48ecHXLRYLXn31Vaxbtw7Tp08HAKxevRojRoxAcXExpkyZ0tuSiAJiz8kWVDTaEKVR+v61TOQ11BCLsek6fFlrweaD9fjx1CypSyIKKf3eg1JaWgqn04mCggLfvpycHGRmZqKoqKi/yyG6KO/U4jlXpSE2Qi1xNSRH87qD63ou2kbkd/0eUEwmEzQaDeLj43vsNxgMMJlMF/wau90Oq9XaYyMKpCabHf855Pn7eMfkgRJXQ3J1y7g0qBQCvqy1oMLcJnU5RCElKGbxrFixAjqdzrdlZGRIXRKFuPdKa+FwuTEuXYcx6TqpyyGZSozR4rs5KQDYLEvkb/0eUIxGIxwOB1pbW3vsN5vNMBqNF/yaZcuWwWKx+Laampp+qJTCldstYt1ub3MsR0/o23mbZTfur4PLzaXvifyl3wPKhAkToFarsX37dt++srIyVFdXIz8//4Jfo9VqERcX12MjCpQvTpzBqaYOxEaocPO4VKnLIZn7bk4KdJFqmKydKDrRJHU5RCGj17N4bDYbjh8/7vt9VVUVDhw4AL1ej8zMTDQ3N6O6uhr19Z7FrcrKygB4Rk6MRiN0Oh3uvfdeLF26FHq9HnFxcXjggQeQn5/PGTwkC2uLPaMnhePTEaXp9bcIhRmtSolbxqXijeJqvL+vFtOGJkldElFI6PUIyt69e5Gbm4vc3FwAwNKlS5Gbm4vly5cDADZt2oTc3FzMnj0bADB//nzk5ubi5Zdf9p1j5cqVuPnmm1FYWIjvfOc7MBqNWL9+vT/eD9EVMVs7sfWYZ9HAO/K4cixdHu/S91sOm2Czd0lcDVFoEMQgfF641WqFTqeDxWLh7R7yq1XbK/Ds1nJMHqTHOz+78C1HovOJoogZ/7cTlWfa8efbxuH7E/hQSaIL6c3nd1DM4iHqD10uN970NsfyuTvUC4IgYF53syyfcEzkHwwoRN0+KTuNBksn9NEa3Dj6wjPKiC7Gu9pwUWUT6lrPSlwNUfBjQCHq5l059rYJ6dCqlBJXQ8EmPSEKU7L1EEXPlGMiujIMKEQAapo78En5aQDAgsm8vUN9U9jdLPv+vloEYXsfkawwoBABeGtPNUQRuGZoEgYlRUtdDgWpWWNSEaFWoPJ0Ow7WWqQuhyioMaBQ2HN0ufH2Hk9j40JOLaYrEKNV4cZRnv6l90vZLEt0JRhQKOxtPWrGGZsdKbFazBhhkLocCnLeNVE2f1kPe5dL4mqIghcDCoU9b3Ps/EkZUCv5LUFXZuqQJBjitGjtcGLHV6elLocoaPGnMYW1E6dt2HWiCQoB+AGbY8kPlArBN+WYa6IQ9R0DCoW1N0s8C7NNz0nBgPhIiauhUDEv13ObZ0dZI5rbHRJXQxScGFAobHU6XXhvn7c5dqDE1VAoGW6MxegBcXC6RGw+WC91OURBiQGFwtaHhxrQ2uHEgPhIfGdYstTlUIjxronC2zxEfcOAQmFrbfftnTvyMqFUCBJXQ6HmlnFpUCkEHKy14Hhjm9TlEAUdBhQKS8carCg91QKVQsBtE/nkWfK/pBgtrhvuGZlbv49L3xP1FgMKhaV13aMnM0cZkRIbIXE1FKq8a6Js2F8Hl5tL3xP1BgMKhZ12exc2dD/MjSvHUiBNz0lBXIQKDZZOFFc2SV0OUVBhQKGws+lgPWz2LmQnRSN/cKLU5VAIi1Arccu4NACeBwgS0eVjQKGwIooi3ij2rBx7R14mBIHNsRRY3ts8Ww6b0G7vkrgaouDBgEJh5ctaC47UW6FRKXzTQIkCaXxmPAYlRqHD4cKWwyapyyEKGgwoFFa8z925eUwqEqI1EldD4UAQBN8oyvr9vM1DdLkYUChsWM46sal7Vc+FU9gcS/3n1u5n8+w60YT61rMSV0MUHBhQKGxs2FeLTqcbOcZYjM9MkLocCiMZ+ijkZekhisDGA1wThehyMKBQWBBF0bdy7EI2x5IEvD1P75fWQhS5JgrRpTCgUFjYc7IFFY02RKqVmNM93E7Un2aNMSJCrcCJ0+34stYidTlEsseAQmHB2xw756o0xEWoJa6GwlFshBozRxkB8AGCRJeDAYVCXpPNjv8c8kzvXJg3UOJqKJx5Z/NsOlgPR5db4mqI5I0BhULee6W1cLjcGJuuw5h0ndTlUBibOjgRKbFatHQ48UlZo9TlEMkaAwqFNLdbxLrdXzfHEklJpVRgbncPFJe+J/p2DCgU0r44cQanmjoQq1X5nolCJKV54z0B5eOvGtHS7pC4GiL5YkChkLa22DN6Mm/8AERpVBJXQwTkGOMwKi0OTpeID76sl7ocItliQKGQZbZ2YusxMwDgDjbHkox4m2Xf38dF24guhgGFQtbbe2rgcouYNCgBw42xUpdD5PO9cWlQKgQcqGnFidM2qcshkqVeB5RPP/0Ut9xyC9LS0iAIAjZu3NjjdVEUsXz5cqSmpiIyMhIFBQWoqKjocUxzczMWLlyIuLg4xMfH495774XNxm9S8p8ulxtv+ppjOXpC8pIcq8W1w5IBcE0UoovpdUBpb2/HuHHj8MILL1zw9WeeeQarVq3Cyy+/jJKSEkRHR2PmzJno7Oz0HbNw4UIcOXIEW7duxQcffIBPP/0UixYt6vu7IDrPJ2Wn0WDpREKUGjeONkpdDtE3eJe+37CvDm43l74nOl+vuwZnzZqFWbNmXfA1URTx3HPP4dFHH8WcOXMAAK+//joMBgM2btyI+fPn49ixY9iyZQv27NmDiRMnAgD+8pe/4KabbsKf//xnpKVxpgVdOe/KsbdNzECEWilxNUTfNGNECmIjVKi3dKK4qglXD06SuiQiWfFrD0pVVRVMJhMKCgp8+3Q6HfLy8lBUVAQAKCoqQnx8vC+cAEBBQQEUCgVKSkoueF673Q6r1dpjI7qYmuYOfFJ+GgCwYDLXPiF5ilArcfNYzz/I3i9lsyzR+fwaUEwmz3LiBoOhx36DweB7zWQyISUlpcfrKpUKer3ed8z5VqxYAZ1O59syMjL8WTaFmLf2VEMUgWlDkpCVFC11OUQXVdi9Jsp/Djegw9ElcTVE8hIUs3iWLVsGi8Xi22pqaqQuiWTK0eXG23s8TYdcOZbkbsLABAxMjEKHw4WPjlz4H2hE4cqvAcVo9DQjms3mHvvNZrPvNaPRiMbGns+g6OrqQnNzs++Y82m1WsTFxfXYiC5k61EzztjsSI7VomCk4dJfQCQhQRAwL9fTLLuea6IQ9eDXgJKVlQWj0Yjt27f79lmtVpSUlCA/Px8AkJ+fj9bWVpSWlvqO+fjjj+F2u5GXl+fPcigMeZtj50/KgFoZFAOEFOZu7X42z+fHz6DBclbiaojko9c/wW02Gw4cOIADBw4A8DTGHjhwANXV1RAEAQ899BCeeuopbNq0CYcOHcJdd92FtLQ0zJ07FwAwYsQI3Hjjjbjvvvuwe/dufPHFF7j//vsxf/58zuChK3LitA27TjRBIQDz2RxLQSIzMQqTB+khisDG/Vz6nsir1wFl7969yM3NRW5uLgBg6dKlyM3NxfLlywEAjzzyCB544AEsWrQIkyZNgs1mw5YtWxAREeE7x9q1a5GTk4MZM2bgpptuwrRp0/D3v//dT2+JwtWbJZ6F2b47PAUD4iMlrobo8hVO8IyirN9XC1HkmihEACCIQfjdYLVaodPpYLFY2I9CAIBOpwtTVmxHa4cT//jxREzPYf8JBQ9rpxOTntoGe5cbm++fhjHpOqlLIgqI3nx+8yY9hYQPDzWgtcOJAfGRuHZYyqW/gEhG4iLUuGGUZ5LA+1z6nggAAwqFiLXdt3cWTM6AUiFIXA1R783rXhNl08F6OLrcEldDJD0GFAp6xxqsKD3VApVCwO0TuYgfBadrhiQhOVaL5nYHdnavhEwUzhhQKOit6x49uWGUASlxEZc4mkieVEoF5l7lmcnIJxwTMaBQkGu3d2HDfs8CVwvzBkpcDdGVmdf9hOPtxxrR2uGQuBoiaTGgUFDbdLAeNnsXspKikZ+dKHU5RFdkRGocRqTGweFyY/OXDVKXQyQpBhQKat6VY++YnAkFm2MpBHgfIMjbPBTuGFAoaH1Z24rDdVZoVAoUTkiXuhwiv/jeVWlQKgTsr25F5Wmb1OUQSYYBhYLW2mJPc+zsManQR2skrobIP1JiI/CdoUkA4OuvIgpHDCgUlCxnndh00PPckoV5fO4OhRZvs+z6fXVwu4NusW8iv2BAoaC0cX8dzjpdGG6IxYSBCVKXQ+RX1480IDZChbrWsyipapa6HCJJMKBQ0BFF0dccu3BKJgSBzbEUWiLUStw8NhUAm2UpfDGgUNDZe6oF5WYbItVKzM0dIHU5RAHhvc3z4aEGnHW4JK6GqP8xoFDQWVvsGT2Zc1Ua4iLUEldDFBgTByYgQx+JdocLHx0xSV0OUb9jQKGg0tzuwIeHPD+suXIshTJBEDAv1zOKwiccUzhiQKGg8l5pDRwuN8am6zAmXSd1OUQB5X3C8RfHz8Bk6ZS4GqL+xYBCQcPtFn0PBuTUYgoHAxOjMWlQAtwi8K8DXBOFwgsDCgWNXSeacLKpA7FaFW4ZlyZ1OUT9wtss+/6+Wogi10Sh8MGAQkHDO7V43vgBiNKoJK6GqH/cNCYVGpUC5WYbjtRbpS6HqN8woFBQMFs78d+jZgDAHWyOpTCii1Tj+pEGAGyWpfDCgEJB4Z09NXC5RUwalIDhxlipyyHqV9/vvs2z6UA9nC63xNUQ9Q8GFJI9l1vEm7u9zbEcPaHwc83QJCTFaNDU7sCn5aelLoeoXzCgkOx9UtaIeksnEqLUuHG0UepyiPqdSqnAnKs8U455m4fCBQMKyd7a7qnFt03MQIRaKXE1RNLwromy7WgjLB1OiashCjwGFJK12pYO7ChrBAAsmMy1Tyh8jUrTIccYC4fLjQ8O1UtdDlHAMaCQrL21uwaiCEwbkoSspGipyyGSVGF3s+z6fVy0jUIfF5MgWXK5RXzwZT3e6F77hCvHEnkekLniP8dQeqoFVWfagzK0u9wiapo7UGZuQ4W5DWVmGyrMbahtOYvYCBX00RokxmiRFK1BYowG+mgtEmM0SIrRIDFaC320BkkxWkRqeLs31DGgkKy43SL+c9iE57aVo6LRBgAYZohBQfc6EEThLCUuAtcMTcbO8tPYsK8WS28YLnVJFyWKIupaz6Lc3IZysw3lpjaUN7ahwmyDvevCU6Vt9i40XOYzh6I0Sl+A8YaZxBgtEr2/PifM6KM10Kh4wyDYMKCQLIiiiP8eNWPl1nJ8ZWoD4FmgatF3svGjqwdBreQPFyIAKJyQjp3lp7F+fx0eKhgGhUKQtB5RFGG22ruDiGcrM9tw3NyGdofrgl+jVSkw1BCDYSmxGGqIxXBjDDL10ehwdKHJ5kBTuwNNNjua2h04Y7Ojud3h2W+z40y7A44uNzocLnQ0n0VN89nLqjMuQtUzwHh/3T1i4w01iTEaJERpoJT4/ysxoJDERFHEjrJGPLu1HIfrPMt4x2pVuPeaLNwzLQtxEWqJKySSlxtGGhCrVaG25Sz2nGxGXnZiv/3ZZ2x2z0jIObdmys1tsHZ2XfB4tVLA4OQYTwgxeP8biwx9VJ8DgCiKaHe4PGGlO7Q0tzt8YabJ5kCz99ftnl+73CKsnV2wdnah6kz7Jf8MQQD0UZru203nhpmvbzf5bj1FaxEXqYIgMND4GwMKSUIURXxacQbPbi3HwZpWAEC0Rom7p2bhvmuyoYtiMCG6kAi1EjeNScXbe2vw/r7agASU1g6H57bMOaMi5WYbmtsdFzxeqRAwKDEKw42xGJoSi2HdoyIDE6P9PvopCAJitCrEaFUYmHjpHhy3W4S109kjzJzxjtCcF2aabHa0nnVCFOH5fbsDFY2XrkmlEHy9M4Y4LX51/XCMSdf54d2Gt4AElLa2Njz22GPYsGEDGhsbkZubi+effx6TJk0C4Plwevzxx/HKK6+gtbUVU6dOxUsvvYShQ4cGohySmV3HPcFk76kWAECkWom7rh6In35nMPTRGomrI5K/eeMH4O29NfjwkAlPfG90nxtG2zqdqGjs7g8x21DR2IYyUxsa2+wXPF4QgEx9FIYZYjHMENP931hkJ0dDq5Jn06pCISA+SoP4KA2GpMRc8vgulxstHU40tdt73m4679aT979tnV3ocotobLOjsc2OYw2AWqnAK3dN7Id3F9oCElB+8pOf4PDhw/jnP/+JtLQ0vPHGGygoKMDRo0cxYMAAPPPMM1i1ahXWrFmDrKwsPPbYY5g5cyaOHj2KiIiIQJREMrC7qhnPbi1DcWUzAM996DunDMTPrh2M5FitxNURBY9Jg/RIT4hEbctZ/PeoybfK7MV0OLpwvNHmCSHmtu4ZNDbUtV68f2NAfGSPEDLMEIshKTEhP3tGpVQgOVZ72T+T7F0uX4/Ml7UW/H8bDmHPyWa43aLk/UHBThBFUfTnCc+ePYvY2Fj861//wuzZs337J0yYgFmzZuHJJ59EWloafvWrX+HXv/41AMBiscBgMOC1117D/PnzL/lnWK1W6HQ6WCwWxMXF+bN8CoDSUy1YubUcnx8/AwDQKBW4Iy8TP79uMAxxDKREffHs1nKs2l6Ba4clY809kwF4PixPNLb7RkK8t2lqWjpwsZ/0hjjtOSHEE0iGGmIRo2UHQG85XW6Me+K/6HC48J8Hr8GIVH4+na83n99+/xvY1dUFl8v1jZGQyMhIfP7556iqqoLJZEJBQYHvNZ1Oh7y8PBQVFV1WQKHgcLCmFSu3leOTMs/DzdRKAbdPzMDi7w5BWnykxNURBbd5uQOwansFPqs4jZ/9sxTljW041dQBl/vCSSQxWoOhhhgMN3hnzsRiWEos+738SK1UYMLABHxWcQYllU0MKFfI7wElNjYW+fn5ePLJJzFixAgYDAa8+eabKCoqwpAhQ2AymQAABkPPdS0MBoPvtfPZ7XbY7V/fE7Varf4um/zoSL0FK7dWYNsxMwBPA933x6fj/ulDkKGPkrg6otAwKCkaEwYmoPRUC7Yc+fpnZ1yEytOs2j1jZmj3qEhSDG+j9ocp2YmegFLVjB9PzZK6nKAWkDG8f/7zn7jnnnswYMAAKJVKjB8/HgsWLEBpaWmfzrdixQo88cQTfq6S/K3M1IaVW8t9PywVAnBrbjp+OWPIZXXbE1Hv/OHWMXhrT3V3v4hnVCQlVssprxLKy9ID8PTciaLIa3EFAhJQBg8ejJ07d6K9vR1WqxWpqan4wQ9+gOzsbBiNRgCA2WxGamqq72vMZjOuuuqqC55v2bJlWLp0qe/3VqsVGRkZgSid+uB4ow3PbSvHvw81QBQ9nf63jE3DgwVDMTj50l3zRNQ3w42xePyWUVKXQecYmx6PCLUCTe0OnDhtw5CUWKlLCloB7YKKjo5GdHQ0Wlpa8NFHH+GZZ55BVlYWjEYjtm/f7gskVqsVJSUl+PnPf37B82i1Wmi1HJ6Um5Nn2vH89gr860AdvLe9Z49JxYMFQzHMwG9KIgo/GpUC4zMTsOtEE4ormxlQrkBAAspHH30EURQxfPhwHD9+HA8//DBycnJw9913QxAEPPTQQ3jqqacwdOhQ3zTjtLQ0zJ07NxDlkJ/VNHdg1fYKrN9f52vIu2GkAUuuH8amMCIKe5Oz9Nh1ogklVc24c8pAqcsJWgEJKBaLBcuWLUNtbS30ej0KCwvx+9//Hmq1p1v8kUceQXt7OxYtWoTW1lZMmzYNW7Zs4RooMlfXehZ//fg43t1bg67uYDI9JwVLCoZx1UQiom55WYkAKlBS2cQ+lCvg93VQ+gPXQelfJksnXthxHG/tqYbT5fnr8p1hyVhSMBS5mQkSV0dEJC+dThfG/s9/4XC5sePX1yEriZMEvCRdB4VCR2NbJ1765ATWllTD0f149KsHJ2LJ9cMwaZBe4uqIiOQpQq3EVRnx2H2yGSWVTQwofcSAQt/QZLPjb59W4vWik+h0eoLJpEEJWHr9cOQP7r8npxIRBau8bD12n2zG7qpmzJ+cKXU5QYkBhXxa2h145bNKvLbrJDocLgBAbmY8fnX9cEwdksj7qERElykvKxF/wXGUVDVLXUrQYkAhWM468ernVfjH51Ww2bsAAGPTdVhy/TBcNyyZwYSIqJfGD4yHSiGgrvUsapo7uIp2HzCghLG2TidWf3ESr3xWibZOTzAZkRqHpdcPQ8GIFAYTIqI+itKoMCZdh/3VrSipamZA6QMGlDDUbu/CmqKT+PunlWjtcAIAhhlisKRgGGaOMvIR4UREfpCXlegJKJVN+P6EdKnLCToMKGHkrMOFN4pP4eWdJ9DU7gAAZCdH46GCYbh5TCqDCRGRH+Vl6/HyzhPYfZJ9KH3BgBIGOp0uvLm7Gi9+cgKn2zxPhR6UGIUHC4bie+MGQMlgQkTkdxMHJkAhAKeaOmCydMKo42KkvcGAEsLsXS68s7cWL3x8HCZrJwAgPSESv5wxFPNyB0ClVEhcIRFR6IqNUGNUmg6H6iwoqWrCnKsGSF1SUGFACUFOlxvvldbirx8fR13rWQBAmi4C908fiu9PSIdGxWBCRNQf8rL0OFRnQXFlMwNKLzGghJAulxsb9tdh1ccVqGn2BJOUWC3unz4EP5iUAa1KKXGFREThJS87Ef/v8yqUVDVJXUrQYUAJAS63iM0H6/H89gpUnWkHACTFaPDz64ZgYV4mItQMJkREUpg8SA9BACpPt6OxrRMpsexDuVwMKEHM7Rbx4eEGPLetAscbbQAAfbQGP7s2G3dOGYgoDS8vEZGUdFFq5BjjcKzBij1VLZg9NlXqkoIGP8GCkCiK+OiICSu3VqDM3AYA0EWqseg72fjR1YMQo+VlJSKSi7wsPY41WFFS1cSA0gv8JAsioihi+7FGrNxWjiP1VgBAbIQKP5mWjbunDUJchFriComI6Hx5WXq8tuskSiq5HkpvMKAEAVEUsbP8NFZuLcfBWgsAIFqjxD3TsvCTadnQRTGYEBHJ1eQsPQCgzNyG5nYH9NEaiSsKDgwoMiaKInadaMKzW8tReqoFABCpVuJHVw/Cou9k8y85EVEQSIzRYmhKDCoabdhd1YwbRxulLikoMKDIVEllE/5vazl2dz+qW6tS4IdTBuJn1w1GUoxW4uqIiKg38rL1qGi0oaSqiQHlMjGgyEzpqWY8u7UcXxz3zJnXKBW4Iy8Tv7huMFLiOD2NiCgY5WUl4o3iat8/OunSGFBk4kBNK1ZuLcfO8tMAALVSwA8mZWDxd4cgVRcpcXVERHQl8rr7UI42WGE564Qukr2Dl8KAIrHDdRY8t60c2441AgCUCgG3TUjH/dOHID0hSuLqiIjIH1LiIpCVFI2qM+3Ye7IZM0YYpC5J9hhQJPKVyYqVW8vx0REzAEAhAPPGp+OB6UMwMDFa4uqIiMjf8rL0qDrTjpIqBpTLwYDSz443tmHltgr8+8sGAIAgAHPGpeGXM4YiOzlG4uqIiChQ8rL1eGtPDUoq+Vyey8GA0k+qzrTj+W3l+NfBeoiiZ9/ssal4aMZQDDXESlscEREFXF5WIgDgcL0VNnsXV/2+BP7fCbDqpg6s+rgCG/bXweX2JJOZowx4qGAYRqTGSVwdERH1l7T4SGToI1HTfBalp1pw7bBkqUuSNQaUAKlt6cALO47j3b216OoOJjNyUrDk+mEYPUAncXVERCSFvKxE1DTXoqSyiQHlEhhQ/Mxk6cRfd1Tg7T01cLo8weTaYclYcv0wXJURL21xREQkqclZerxXWosSrodySQwoftLY1okXd5zAut3VcHS5AQBThyRiScEwTBykl7g6IiKSgyndfShf1rbirMOFSI1S4orkiwHlCjXZ7Hh55wn8s/gUOp2eYDJ5kB5LbxiGKdmJEldHRERykqGPRKouAg2WTuyrbsHUIUlSlyRbDCh91NLuwN8/q8SaXSfR4XABAHIz4/Gr64dj6pBECIIgcYVERCQ3giAgL0uPjQfqUVLZxIDyLRhQesly1olXP6vEP744CZu9CwAwNl2HpdcPw7XDkhlMiIjoW+VlJ3oCCvtQvhUDymVq63Ri9Rcn8cpnlWjr9ASTkalxWHr9MMwYkcJgQkREl2Vy93N59te0otPpQoSafSgXovD3CV0uFx577DFkZWUhMjISgwcPxpNPPgnRuzoZAFEUsXz5cqSmpiIyMhIFBQWoqKjwdyl+0W7vwgs7juOaZ3bg2a3laOvswnBDLF6+czw+eGAaCkYaGE6IiOiyZSdFIylGC0eXGwdrWqUuR7b8PoLy9NNP46WXXsKaNWswatQo7N27F3fffTd0Oh1++ctfAgCeeeYZrFq1CmvWrEFWVhYee+wxzJw5E0ePHkVERIS/S+qTsw4X/ll8Ei/vrERzuwMAMDg5Gg8VDMPsMalQKBhKiIio9wRBQF62Hv/+sgElVc3I44SKC/J7QNm1axfmzJmD2bNnAwAGDRqEN998E7t37wbgGT157rnn8Oijj2LOnDkAgNdffx0GgwEbN27E/Pnz/V1Sr3Q6XVhXUo0XPzmBMzY7AGBQYhQeLBiK740bACWDCRERXaEpWd6A0gRgqNTlyJLfb/FcffXV2L59O8rLywEABw8exOeff45Zs2YBAKqqqmAymVBQUOD7Gp1Oh7y8PBQVFV3wnHa7HVartccWCB9/Zca1f9qB//3gKM7Y7MjQR+JP3x+LbUuvxa256QwnRETkF95Rk9JTLb61s6gnv4+g/Pa3v4XVakVOTg6USiVcLhd+//vfY+HChQAAk8kEADAYej5q2mAw+F4734oVK/DEE0/4u9Rv0EdrYbbakaaLwAMzhuL7E9KhVvo9wxERUZgbmhIDfbQGze0OHKqzYMLABKlLkh2/f/q+8847WLt2LdatW4d9+/ZhzZo1+POf/4w1a9b0+ZzLli2DxWLxbTU1NX6s+GtXZcTj/901ETsevg4LJmcynBARUUAIgoBJgzyhxHObh87n90/ghx9+GL/97W8xf/58jBkzBj/84Q+xZMkSrFixAgBgNBoBAGazucfXmc1m32vn02q1iIuL67EFSsFIA7QqTvkiIqLAyute9r6kkuuhXIjfA0pHRwcUip6nVSqVcLs999iysrJgNBqxfft23+tWqxUlJSXIz8/3dzlERESylJftWQ9l78lmdLnYh3I+v/eg3HLLLfj973+PzMxMjBo1Cvv378ezzz6Le+65B4BnWOuhhx7CU089haFDh/qmGaelpWHu3Ln+LoeIiEiWcoxxiItQwdrZhSP1VozjE+978HtA+ctf/oLHHnsMv/jFL9DY2Ii0tDT89Kc/xfLly33HPPLII2hvb8eiRYvQ2tqKadOmYcuWLbJZA4WIiCjQlAoBk7P02HasEburmhlQziOI5y7xGiSsVit0Oh0sFktA+1GIiIgC6ZVPK/H7D4+hYEQK/t+PJkldTsD15vOb01SIiIgk4n0uz+6qZrjcQTdeEFAMKERERBIZlRaHGK2nD+UrU2AWIQ1WDChEREQSUSkVvkXaON24JwYUIiIiCXmnG3PBtp4YUIiIiCTkXbBtd1UzgnDeSsAwoBAREUlozAAdItVKtHQ4UdFok7oc2WBAISIikpBGpcD4gfEAgJJK3ubxYkAhIiKSmPc2T3EVG2W9GFCIiIgklte9HkpJJftQvBhQiIiIJDYuIx4alQJnbHZUnmmXuhxZYEAhIiKSWIRaidzuZ/Hs5m0eAAwoREREsvD1bR42ygIMKERERLKQl+1plC3heigAGFCIiIhkYXxmAtRKAQ2WTtQ0n5W6HMkxoBAREclApEaJsenxAIBiLnvPgEJERCQX5043DncMKERERDLh7UPZfZIjKAwoREREMjFhYAKUCgE1zWdR3xrefSgMKERERDIRo1VhdFocAKAkzPtQGFCIiIhkxDfdOMz7UBhQiIiIZMTXKBvmK8oyoBAREcnIxEF6CAJQdaYdjdZOqcuRDAMKERGRjOgi1RiZ6u1DCd9RFAYUIiIimZnsu80Tvo2yDChEREQyk5fFRlkGFCIiIpnxjqBUNNrQZLNLXI00GFCIiIhkRh+twXBDLABgd5j2oTCgEBERyVBednhPN2ZAISIikiFfHwoDChEREcnFpKwEAMBXJissHU6Jq+l/DChEREQylBIbgezkaIgisPtk+I2i+D2gDBo0CIIgfGNbvHgxAKCzsxOLFy9GYmIiYmJiUFhYCLPZ7O8yiIiIgt7X043Dbz0UvweUPXv2oKGhwbdt3boVAHDbbbcBAJYsWYLNmzfj3Xffxc6dO1FfX4958+b5uwwiIqKgNyWMG2VV/j5hcnJyj9//8Y9/xODBg3HttdfCYrHg1Vdfxbp16zB9+nQAwOrVqzFixAgUFxdjypQp/i6HiIgoaHnXQzlSb0FbpxOxEWqJK+o/Ae1BcTgceOONN3DPPfdAEASUlpbC6XSioKDAd0xOTg4yMzNRVFQUyFKIiIiCTqouEpn6KLhFYO+pFqnL6VcBDSgbN25Ea2srfvzjHwMATCYTNBoN4uPjexxnMBhgMpkueh673Q6r1dpjIyIiCgd53ufyhNmy9wENKK+++ipmzZqFtLS0KzrPihUroNPpfFtGRoafKiQiIpK3vGzveijh1SgbsIBy6tQpbNu2DT/5yU98+4xGIxwOB1pbW3scazabYTQaL3quZcuWwWKx+LaamppAlU1ERCQr3hGUQ7UWdDi6JK6m/wQsoKxevRopKSmYPXu2b9+ECROgVquxfft2376ysjJUV1cjPz//oufSarWIi4vrsREREYWDDH0UBsRHosstojSM+lACElDcbjdWr16NH/3oR1Cpvp4opNPpcO+992Lp0qXYsWMHSktLcffddyM/P58zeIiIiC7CO4oSTg8O9Ps0YwDYtm0bqqurcc8993zjtZUrV0KhUKCwsBB2ux0zZ87Eiy++GIgyiIiIQsLkLD3W768Lq0ZZQRRFUeoiestqtUKn08FisfB2DxERhbyqM+347p8/gUapwJf/cwMi1EqpS+qT3nx+81k8REREMjcoMQopsVo4XG7sr26Vupx+wYBCREQkc4IghN10YwYUIiKiIBBuC7YxoBAREQUB74MD91W3wNHllriawGNAISIiCgKDk2OQGK2BvcuNL2tbpS4n4BhQiIiIgoAgCL6nG5eEwXooDChERERBwtuHUlwZ+o2yDChERERBwjuTp/RUC5yu0O5DYUAhIiIKEsMNsYiPUqPD4cLhOovU5QQUAwoREVGQUCgETBoUHs/lYUAhIiIKInlh0ijLgEJERBRE8rI8fSh7qprhcgfd4/QuGwMKERFREBmZFodYrQpt9i4ca7BKXU7AMKAQEREFEaVCwMRBCQBCe7oxAwoREVGQ8U43DuVGWQYUIiKiIONdUXb3yWa4Q7QPhQGFiIgoyIwZoEOURonWDifKG9ukLicgGFCIiIiCjFqpwISBnj6UksrQvM3DgEJERBSEvl4PJTQbZRlQiIiIgtC5jbKiGHp9KAwoREREQWhsug5alQJnbA6cON0udTl+x4BCREQUhLQqJXIz4wGE5m0eBhQiIqIg5V32PhQbZRlQiIiIglRe9teNsqHWh8KAQkREFKTGZyZAo1TAbLXjVFOH1OX4FQMKERFRkIpQKzEuQwcg9PpQGFCIiIiCmK8PJcSey8OAQkREFMS8z+UJtUZZBhQiIqIgNmFgApQKAXWtZ1HbEjp9KAwoREREQSxaq8KYAd19KCE0isKAQkREFOTOnW4cKhhQiIiIgtyUEGyUDUhAqaurw5133onExERERkZizJgx2Lt3r+91URSxfPlypKamIjIyEgUFBaioqAhEKURERCFvwqAEKATgVFMHTJZOqcvxC78HlJaWFkydOhVqtRr/+c9/cPToUfzf//0fEhISfMc888wzWLVqFV5++WWUlJQgOjoaM2fORGdnaPxPJSIi6k9xEWqMTIsDEDq3eVT+PuHTTz+NjIwMrF692rcvKyvL92tRFPHcc8/h0UcfxZw5cwAAr7/+OgwGAzZu3Ij58+f7uyQiIqKQl5eViMN1VpRUNWPOVQOkLueK+X0EZdOmTZg4cSJuu+02pKSkIDc3F6+88orv9aqqKphMJhQUFPj26XQ65OXloaio6ILntNvtsFqtPTYiIiL6Wp5vPZTQGEHxe0CprKzESy+9hKFDh+Kjjz7Cz3/+c/zyl7/EmjVrAAAmkwkAYDAYenydwWDwvXa+FStWQKfT+baMjAx/l01ERBTUJmfpIQjAidPtON1ml7qcK+b3gOJ2uzF+/Hj84Q9/QG5uLhYtWoT77rsPL7/8cp/PuWzZMlgsFt9WU1Pjx4qJiIiCX3yUBsMNsQCA3SEwm8fvASU1NRUjR47ssW/EiBGorq4GABiNRgCA2WzucYzZbPa9dj6tVou4uLgeGxEREfXkvc2zOwQaZf0eUKZOnYqysrIe+8rLyzFw4EAAnoZZo9GI7du3+163Wq0oKSlBfn6+v8shIiIKG3nZobMeit8DypIlS1BcXIw//OEPOH78ONatW4e///3vWLx4MQBAEAQ89NBDeOqpp7Bp0yYcOnQId911F9LS0jB37lx/l0NERBQ2vA8O/MrUhpZ2h8TVXBm/B5RJkyZhw4YNePPNNzF69Gg8+eSTeO6557Bw4ULfMY888ggeeOABLFq0CJMmTYLNZsOWLVsQERHh73KIiIjCRlKMFkNSYgAAu08G9yiKIIqiKHURvWW1WqHT6WCxWNiPQkREdI7fbTiEtSXVuGdqFpbfMvLSX9CPevP5zWfxEBERhRBvH8ruk8HdKMuAQkREFEK8M3mO1lth7XRKXE3fMaAQERGFEENcBAYlRsEtAnuDuA+FAYWIiCjE5GV1TzeuZEAhIiIimcjL9tzmKQ7i9VAYUIiIiEKMt1H2cJ0FNnuXxNX0DQMKERFRiBkQH4n0hEi43CL2nWqRupw+YUAhIiIKQd5VZUuC9Lk8DChEREQhaEqQN8oyoBAREYUgb6PswdpWnHW4JK6m9xhQiIiIQlCmPgrGuAg4XSL2VwdfHwoDChERUQgSBCGopxszoBAREYUo74Jtu4OwUZYBhYiIKER5Z/Lsr26FvSu4+lAYUIiIiELU4ORoJMVoYe9y42CNRepyeoUBhYiIKEQJguB7unFJZXDd5mFAISIiCmHeRtmSIGuUZUAhIiIKYd5G2dJTLXC63BJXc/kYUIiIiELY0JQYxEepcdbpwqG64OlDYUAhIiIKYQqFgMmDvH0owXObhwGFiIgoxOVldz+XJ4jWQ2FAISIiCnHemTx7T7agK0j6UBhQiIiIQtyI1DjERqhgs3fhaINV6nIuCwMKERFRiFMGYR8KAwoREVEY8C57HyzroTCgEBERhQFvo+yek81wu0WJq7k0BhQiIqIwMDotDtEaJSxnnfjK1CZ1OZfEgEJERBQGVEoFJnj7UIJgujEDChERUZj4+sGB8u9DYUAhIiIKE1O6Hxy4+2QzRFHefSgMKERERGFizIB4RKgVaG534HijTepyvhUDChERUZjQqBQYn5kAACiW+XRjvweU//mf/4EgCD22nJwc3+udnZ1YvHgxEhMTERMTg8LCQpjNZn+XQURERBeQl9X9XJ5KeTfKBmQEZdSoUWhoaPBtn3/+ue+1JUuWYPPmzXj33Xexc+dO1NfXY968eYEog4iIiM6Tl/31gm1y7kNRBeSkKhWMRuM39lssFrz66qtYt24dpk+fDgBYvXo1RowYgeLiYkyZMiUQ5RAREVG3qzLioVEpcLrNjqoz7chOjpG6pAsKyAhKRUUF0tLSkJ2djYULF6K6uhoAUFpaCqfTiYKCAt+xOTk5yMzMRFFR0UXPZ7fbYbVae2xERETUexFqJa7KiAcA7JZxH4rfA0peXh5ee+01bNmyBS+99BKqqqpwzTXXoK2tDSaTCRqNBvHx8T2+xmAwwGQyXfScK1asgE6n820ZGRn+LpuIiChs5AXBc3n8fotn1qxZvl+PHTsWeXl5GDhwIN555x1ERkb26ZzLli3D0qVLfb+3Wq0MKURERH2Ul5WIv+A4SiqbIIoiBEGQuqRvCPg04/j4eAwbNgzHjx+H0WiEw+FAa2trj2PMZvMFe1a8tFot4uLiemxERETUN+MHxkOlEFBv6URty1mpy7mggAcUm82GEydOIDU1FRMmTIBarcb27dt9r5eVlaG6uhr5+fmBLoWIiIgARGlUGJuuAwAUy3S6sd8Dyq9//Wvs3LkTJ0+exK5du3DrrbdCqVRiwYIF0Ol0uPfee7F06VLs2LEDpaWluPvuu5Gfn88ZPERERP0oL7t7PRSZ9qH4vQeltrYWCxYsQFNTE5KTkzFt2jQUFxcjOTkZALBy5UooFAoUFhbCbrdj5syZePHFF/1dBhEREX2LyVl6vPTJCdnO5BFEOa/SchFWqxU6nQ4Wi4X9KERERH3Q1unEuCf+C7cIFC2bjlRd3yay9EZvPr/5LB4iIqIwFBuhxugBnj6Ukkr5jaIwoBAREYWpr9dDkV+jLAMKERFRmPr6wYEcQSEiIiKZmDRID0EAKs+0o9HaKXU5PTCgEBERhSldlBo5Rk+z6u6T8hpFYUAhIiIKY74+FJnd5mFAISIiCmNTsuXZKMuAQkREFMYmdzfKlpttaG53SFzN1xhQiIiIwpg+WoNhhhgAwG4ZjaIwoBAREYU573TjYhn1oTCgEBERhbnJ3Y2ycnouDwMKERFRmMvrbpQ9ZrLC0uGUuBoPBhQiIqIwlxIbgeykaIgisEcm66EwoBAREZFvFEUu040ZUIiIiOjr5/LIpA+FAYWIiIh8jbKH6yyw2bskroYBhYiIiACkxUciQx8JtwjslUEfCgMKERERAZDXbR4GFCIiIgJw7oMDpW+UZUAhIiIiAMCUbM8Iype1FnQ4pO1DYUAhIiIiAEB6QiTSdBHocovYd6pV0loYUIiIiAgAIAjCOcveS3ubhwGFiIiIfPK6b/MUS9woq5L0TyciIiJZmTo4CXfkZWLakCRJ62BAISIiIp/MxCj84dYxUpfBWzxEREQkPwwoREREJDsMKERERCQ7DChEREQkOwwoREREJDsMKERERCQ7AQ8of/zjHyEIAh566CHfvs7OTixevBiJiYmIiYlBYWEhzGZzoEshIiKiIBHQgLJnzx787W9/w9ixY3vsX7JkCTZv3ox3330XO3fuRH19PebNmxfIUoiIiCiIBCyg2Gw2LFy4EK+88goSEhJ8+y0WC1599VU8++yzmD59OiZMmIDVq1dj165dKC4uDlQ5REREFEQCFlAWL16M2bNno6CgoMf+0tJSOJ3OHvtzcnKQmZmJoqKiC57LbrfDarX22IiIiCh0BWSp+7feegv79u3Dnj17vvGayWSCRqNBfHx8j/0GgwEmk+mC51uxYgWeeOKJQJRKREREMuT3EZSamho8+OCDWLt2LSIiIvxyzmXLlsFisfi2mpoav5yXiIiI5MnvAaW0tBSNjY0YP348VCoVVCoVdu7ciVWrVkGlUsFgMMDhcKC1tbXH15nNZhiNxgueU6vVIi4ursdGREREocvvt3hmzJiBQ4cO9dh39913IycnB7/5zW+QkZEBtVqN7du3o7CwEABQVlaG6upq5OfnX9afIYoiALAXhYiIKIh4P7e9n+Pfxu8BJTY2FqNHj+6xLzo6GomJib799957L5YuXQq9Xo+4uDg88MADyM/Px5QpUy7rz2hrawMAZGRk+Ld4IiIiCri2tjbodLpvPSYgTbKXsnLlSigUChQWFsJut2PmzJl48cUXL/vr09LSUFNTg9jYWAiCEMBKg5fVakVGRgZqamp4S0wGeD3khddDXng95CdQ10QURbS1tSEtLe2Sxwri5YyzUNCxWq3Q6XSwWCz8hpcBXg954fWQF14P+ZHDNeGzeIiIiEh2GFCIiIhIdhhQQpRWq8Xjjz8OrVYrdSkEXg+54fWQF14P+ZHDNWEPChEREckOR1CIiIhIdhhQiIiISHYYUIiIiEh2GFCIiIhIdhhQgsinn36KW265BWlpaRAEARs3buzxutlsxo9//GOkpaUhKioKN954IyoqKnocYzKZ8MMf/hBGoxHR0dEYP3483n///X58F6FjxYoVmDRpEmJjY5GSkoK5c+eirKysxzGdnZ1YvHgxEhMTERMTg8LCQpjN5h7HVFdXY/bs2YiKikJKSgoefvhhdHV19edbCQn+uB4HDx7EggULkJGRgcjISIwYMQLPP/98f7+VkOCv7w+vpqYmpKenQxCEbzxsli7Nn9fjtddew9ixYxEREYGUlBQsXrw4IDUzoASR9vZ2jBs3Di+88MI3XhNFEXPnzkVlZSX+9a9/Yf/+/Rg4cCAKCgrQ3t7uO+6uu+5CWVkZNm3ahEOHDmHevHm4/fbbsX///v58KyFh586dWLx4MYqLi7F161Y4nU7ccMMNPf5/L1myBJs3b8a7776LnTt3or6+HvPmzfO97nK5MHv2bDgcDuzatQtr1qzBa6+9huXLl0vxloKaP65HaWkpUlJS8MYbb+DIkSP43e9+h2XLluGvf/2rFG8pqPnjepzr3nvvxdixY/ur/JDjr+vx7LPP4ne/+x1++9vf4siRI9i2bRtmzpwZmKJFCkoAxA0bNvh+X1ZWJgIQDx8+7NvncrnE5ORk8ZVXXvHti46OFl9//fUe59Lr9T2Oob5pbGwUAYg7d+4URVEUW1tbRbVaLb777ru+Y44dOyYCEIuKikRRFMUPP/xQVCgUoslk8h3z0ksviXFxcaLdbu/fNxBi+nI9LuQXv/iF+N3vfjfg9Ya6K7keL774onjttdeK27dvFwGILS0t/Vl6SOrL9WhubhYjIyPFbdu29UuNHEEJEXa7HQAQERHh26dQKKDVavH555/79l199dV4++230dzcDLfbjbfeegudnZ247rrr+rvkkGOxWAAAer0egOdf406nEwUFBb5jcnJykJmZiaKiIgBAUVERxowZA4PB4Dtm5syZsFqtOHLkSD9WH3r6cj0udh7vOajv+no9jh49iv/93//F66+/DoWCH1n+0pfrsXXrVrjdbtTV1WHEiBFIT0/H7bffjpqamoDUyKsdIrx/kZYtW4aWlhY4HA48/fTTqK2tRUNDg++4d955B06nE4mJidBqtfjpT3+KDRs2YMiQIRJWH/zcbjceeughTJ06FaNHjwbg6ffRaDSIj4/vcazBYIDJZPIdc2448b7ufY36pq/X43y7du3C22+/jUWLFgW65JDW1+tht9uxYMEC/OlPf0JmZmZ/lx2y+no9Kisr4Xa78Yc//AHPPfcc3nvvPTQ3N+P666+Hw+Hwe50qv5+RJKFWq7F+/Xrce++90Ov1UCqVKCgowKxZsyCes1jwY489htbWVmzbtg1JSUnYuHEjbr/9dnz22WcYM2aMhO8guC1evBiHDx/uMVpF0vHH9Th8+DDmzJmDxx9/HDfccIMfqws/fb0ey5Ytw4gRI3DnnXcGqLLw1Nfr4Xa74XQ6sWrVKt/3xJtvvgmj0YgdO3b4vReFIyghZMKECThw4ABaW1vR0NCALVu2oKmpCdnZ2QCAEydO4K9//Sv+8Y9/YMaMGRg3bhwef/xxTJw48YKNt3R57r//fnzwwQfYsWMH0tPTffuNRiMcDsc3ZhyYzWYYjUbfMed3yXt/7z2GeudKrofX0aNHMWPGDCxatAiPPvpof5Qdsq7kenz88cd49913oVKpoFKpMGPGDABAUlISHn/88X57D6HkSq5HamoqAGDkyJG+15OTk5GUlITq6mr/F9svnS7kdzivSfZCysvLRYVCIX700UeiKIril19+KQIQjx492uO4G264QbzvvvsCVWrIcrvd4uLFi8W0tDSxvLz8G697m87ee+89376vvvrqgk2yZrPZd8zf/vY3MS4uTuzs7Az8mwgh/rgeoiiKhw8fFlNSUsSHH364X+oOVf64HsePHxcPHTrk2/7xj3+IAMRdu3b1+J6hS/PH9fBOxji3SbapqanH54w/MaAEkba2NnH//v3i/v37RQDis88+K+7fv188deqUKIqi+M4774g7duwQT5w4IW7cuFEcOHCgOG/ePN/XOxwOcciQIeI111wjlpSUiMePHxf//Oc/i4IgiP/+97+leltB6+c//7mo0+nETz75RGxoaPBtHR0dvmN+9rOfiZmZmeLHH38s7t27V8zPzxfz8/N9r3d1dYmjR48Wb7jhBvHAgQPili1bxOTkZHHZsmVSvKWg5o/rcejQITE5OVm88847e5yjsbFRircU1PxxPc63Y8cOzuLpI39djzlz5oijRo0Sv/jiC/HQoUPizTffLI4cOVJ0OBx+r5kBJYh4vznP3370ox+JoiiKzz//vJieni6q1WoxMzNTfPTRR78xVbW8vFycN2+emJKSIkZFRYljx479xrRjujwXuhYAxNWrV/uOOXv2rPiLX/xCTEhIEKOiosRbb71VbGho6HGekydPirNmzRIjIyPFpKQk8Ve/+pXodDr7+d0EP39cj8cff/yC5xg4cGD/v6Eg56/vj3MxoPSdv66HxWIR77nnHjE+Pl7U6/XirbfeKlZXVwekZqG7cCIiIiLZYJMsERERyQ4DChEREckOAwoRERHJDgMKERERyQ4DChEREckOAwoRERHJDgMKERERyQ4DChEREckOAwoRERHJDgMKERERyQ4DChEREckOAwoRERHJzv8PmHiZvvkb5iMAAAAASUVORK5CYII=",
-      "text/plain": [
-       "<Figure size 640x480 with 1 Axes>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
+   "outputs": [],
    "source": [
     "with h5py.File(fnm, \"r\") as h5r:\n",
     "    # src = \"/InGaN_nanowires_spectra/InGaN nanowires/Area 1/Full Area 1\"\n",
@@ -196,22 +147,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 35,
+   "execution_count": null,
    "id": "7b58972c-dcd3-45ea-9fae-36c81de1ee9e",
    "metadata": {},
-   "outputs": [
-    {
-     "ename": "NameError",
-     "evalue": "name 'dat' is not defined",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[35], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m plt\u001b[38;5;241m.\u001b[39mplot(\u001b[43mdat\u001b[49m[\u001b[38;5;241m0\u001b[39m, :])\n",
-      "\u001b[0;31mNameError\u001b[0m: name 'dat' is not defined"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "plt.plot(dat[0, :])"
    ]
@@ -234,7 +173,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": null,
    "id": "f0a7f9ac-1ade-43d7-aedd-b2572d163b34",
    "metadata": {},
    "outputs": [],
@@ -284,7 +223,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": null,
    "id": "97c3a10f-903a-4d7e-883b-779c6c34f4a0",
    "metadata": {},
    "outputs": [],
@@ -340,18 +279,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": null,
    "id": "8bbbaa03-0aac-43fb-941a-f63910496fa3",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'image_oned/axis_x': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: dataset, 'image_oned/axis_x@long_name': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: attribute, 'image_oned/intensity': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: dataset, 'image_threed/axis_x': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: dataset, 'image_threed/axis_x@long_name': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: attribute, 'image_threed/axis_y': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: dataset, 'image_threed/axis_y@long_name': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: attribute, 'image_threed/axis_z': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: dataset, 'image_threed/axis_z@long_name': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: attribute, 'image_threed/intensity': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: dataset, 'image_twod/axis_x': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: dataset, 'image_twod/axis_x@long_name': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: attribute, 'image_twod/axis_y': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: dataset, 'image_twod/axis_y@long_name': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: attribute, 'image_twod/intensity': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: dataset, 'stack_oned/axis_image_identifier': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: dataset, 'stack_oned/axis_image_identifier@long_name': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: attribute, 'stack_oned/axis_x': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: dataset, 'stack_oned/axis_x@long_name': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: attribute, 'stack_oned/intensity': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: dataset, 'stack_threed/axis_image_identifier': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: dataset, 'stack_threed/axis_image_identifier@long_name': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: attribute, 'stack_threed/axis_x': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: dataset, 'stack_threed/axis_x@long_name': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: attribute, 'stack_threed/axis_y': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: dataset, 'stack_threed/axis_y@long_name': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: attribute, 'stack_threed/axis_z': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: dataset, 'stack_threed/axis_z@long_name': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: attribute, 'stack_threed/intensity': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: dataset, 'stack_twod/axis_image_identifier': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: dataset, 'stack_twod/axis_image_identifier@long_name': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: attribute, 'stack_twod/axis_x': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: dataset, 'stack_twod/axis_x@long_name': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: attribute, 'stack_twod/axis_y': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: dataset, 'stack_twod/axis_y@long_name': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: attribute, 'stack_twod/intensity': Name: None, unit: unitless, dtype: <class 'str'>, eqv_hdf: dataset}\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "tmp = NxEmImageRealSpaceSet()\n",
     "# print(tmp.tmp)"
@@ -359,19 +290,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 39,
+   "execution_count": null,
    "id": "58052fb7-723f-476d-a8ca-df99efffcc05",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "<class 'numpy.ndarray'>, (1,), {'names': ['Version', 'ImageType', 'Label', 'SMin', 'SMax', 'Par_Color', 'EdsPresetMode', 'EdsPresetTime', 'EdsMapDataType', 'TiltCorrected', 'RasterMode', 'ChannelStart', 'ChannelEnd', 'IntDummy1', 'IAdc', 'ISize', 'IBits', 'NReads', 'NFrames', 'FDwell', 'KVolt', 'Tilt', 'TakeOff', 'Magnification', 'WorkingDistance', 'MicronsPerPixelX', 'MicronsPerPixelY', 'NumberOfCommentLines', 'TextLines', 'Fpar1', 'NOverlayElements', 'OverlayColors', 'XmpEdiTimeCnst', 'Fpar'], 'formats': ['<i2', '<i2', ('u1', (8,)), '<i2', '<i2', '<i2', '<i2', '<i4', '<i2', '<i2', '<i2', '<i2', '<i2', '<i4', '<i2', '<i2', '<i2', '<i2', '<i2', '<f4', '<i2', '<i2', '<i2', '<i4', '<i2', '<f4', '<f4', '<i2', ('u1', (128,)), ('<f4', (4,)), '<i2', ('<i2', (16,)), '<f4', ('<f4', (2,))], 'offsets': [0, 2, 4, 12, 14, 16, 18, 20, 24, 26, 28, 30, 32, 36, 40, 42, 44, 46, 48, 52, 56, 58, 60, 64, 68, 72, 76, 80, 82, 212, 228, 230, 264, 268], 'itemsize': 276}\n",
-      "0.4090192\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "with h5py.File(fnm, \"r\") as h5r:\n",
     "    src = \"/VInP/VInP_108_L2/Area 10/VInP/VInP_108_L2/Area 10/Live Map 1\"\n",
@@ -383,52 +305,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 77,
+   "execution_count": null,
    "id": "8f5892cb-476e-453d-99e0-befb766fa9ca",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'y': 400, 'x': 512, 'e': 1000}\n",
-      "edax: (400, 512), <class 'h5py._hl.dataset.Dataset'>, ('<i2', (1000,))\n",
-      "[(0, 102), (102, 204), (204, 306), (306, 400)]\n",
-      "[(0, 512), (0, 512), (0, 512), (0, 512)]\n",
-      "0\n",
-      "1\n",
-      "2\n",
-      "3\n",
-      "Chunking down\n",
-      "0\n",
-      "1\n",
-      "2\n",
-      "3\n",
-      "4\n",
-      "5\n",
-      "6\n",
-      "7\n",
-      "8\n",
-      "9\n",
-      "10\n",
-      "11\n"
-     ]
-    },
-    {
-     "ename": "KeyboardInterrupt",
-     "evalue": "",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[77], line 38\u001b[0m\n\u001b[1;32m     36\u001b[0m     \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mone\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m     37\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m two \u001b[38;5;129;01min\u001b[39;00m np\u001b[38;5;241m.\u001b[39marange(\u001b[38;5;241m0\u001b[39m, \u001b[38;5;241m512\u001b[39m):\n\u001b[0;32m---> 38\u001b[0m         spd_naive[one, two, :] \u001b[38;5;241m=\u001b[39m \u001b[43mspd_edax\u001b[49m\u001b[43m[\u001b[49m\u001b[43mone\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtwo\u001b[49m\u001b[43m]\u001b[49m\n\u001b[1;32m     39\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNaive done\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m     41\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28;01mFalse\u001b[39;00m \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:        \n",
-      "File \u001b[0;32mh5py/_objects.pyx:54\u001b[0m, in \u001b[0;36mh5py._objects.with_phil.wrapper\u001b[0;34m()\u001b[0m\n",
-      "File \u001b[0;32mh5py/_objects.pyx:55\u001b[0m, in \u001b[0;36mh5py._objects.with_phil.wrapper\u001b[0;34m()\u001b[0m\n",
-      "File \u001b[0;32m~/miniconda3/envs/py3.10.13/lib/python3.10/site-packages/h5py/_hl/dataset.py:841\u001b[0m, in \u001b[0;36mDataset.__getitem__\u001b[0;34m(self, args, new_dtype)\u001b[0m\n\u001b[1;32m    839\u001b[0m mspace \u001b[38;5;241m=\u001b[39m h5s\u001b[38;5;241m.\u001b[39mcreate_simple(selection\u001b[38;5;241m.\u001b[39mmshape)\n\u001b[1;32m    840\u001b[0m fspace \u001b[38;5;241m=\u001b[39m selection\u001b[38;5;241m.\u001b[39mid\n\u001b[0;32m--> 841\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mid\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmspace\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfspace\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43marr\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdxpl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_dxpl\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    843\u001b[0m \u001b[38;5;66;03m# Patch up the output for NumPy\u001b[39;00m\n\u001b[1;32m    844\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m arr\u001b[38;5;241m.\u001b[39mshape \u001b[38;5;241m==\u001b[39m ():\n",
-      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "with h5py.File(fnm, \"r\") as h5r:\n",
     "    src = \"/VInP/VInP_108_L2/Area 10/Live Map 1\"\n",
@@ -491,7 +371,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 78,
+   "execution_count": null,
    "id": "1d637cdc-0729-45aa-91f7-a12346307004",
    "metadata": {},
    "outputs": [],
@@ -501,21 +381,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 105,
+   "execution_count": null,
    "id": "dc034d09-b089-4f85-a4a0-b0689d76108c",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "None\n",
-      "(102, 512)\n",
-      "int16\n",
-      "int16\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "with h5py.File(fnm, \"r\") as h5r:\n",
     "    src = \"/VInP/VInP_108_L2/Area 10/Live Map 1\"\n",
@@ -530,19 +399,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 87,
+   "execution_count": null,
    "id": "3c7b1022-beea-4996-ab06-b120531c3a57",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "x, [(0, 512)]\n",
-      "y, [(0, 102), (102, 204), (204, 306), (306, 400)]\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "chk_bnds = {\"x\": [], \"y\": []}\n",
     "ifo = {\"ny\": 400,\n",
@@ -576,7 +436,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "id": "570da751-a38c-4902-b929-ef32cf19b1ba",
    "metadata": {},
    "outputs": [],
@@ -591,93 +451,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "id": "d2b5287d-7441-4141-b161-351de4bf7488",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Loading type(entry) <class 'dict'>\n",
-      "axes\n",
-      "[{'name': 'height', 'offset': 0, 'scale': 0.00351909256747931, 'units': 'µm', 'size': 512}, {'name': 'width', 'offset': 0, 'scale': 0.00351909256747931, 'units': 'µm', 'size': 512}]\n",
-      "metadata\n",
-      "{'Acquisition_instrument': {'TEM': {'beam_energy': 200, 'magnification': 56000}}, 'Sample': {'name': 'map 4'}, 'Signal': {}, 'General': {'title': 'HAADF', 'original_filename': '46_ES-LP_L1_brg.bcf'}}\n",
-      "original_metadata\n",
-      "{'Microscope': {'HV': 200, 'WD': -1, 'Mag': 56000, 'DX': 0.00351909256747931, 'DY': 0.00351909256747931, 'Flags': 16776960, 'XmlClassType': 'TRTSEMData'}, 'DSP Configuration': {'ImageWidth': 512, 'ImageHeight': 512, 'PixelAverage': 60, 'LineAverage': 1, 'SEBitCount': 16, 'ChannelCount': 4, 'ChannelName0': 'BF', 'ChannelName1': 'DF', 'ChannelName2': 'DF4', 'Channel3': 1, 'ChannelName3': 'HAADF', 'CounterIndex': 0, 'CounterChannelUsed': 0, 'TiltAngle': 0, 'CounterMode': 0, 'PixelTime': 1, 'XmlClassType': 'TRTDSPConfiguration'}, 'Stage': {'State': 7936, 'XmlClassType': 'TRTSEMStageData'}}\n",
-      "mapping\n",
-      "{'Stage.Rotation': ('Acquisition_instrument.TEM.Stage.rotation', None), 'Stage.Tilt': ('Acquisition_instrument.TEM.Stage.tilt_alpha', None), 'Stage.X': ('Acquisition_instrument.TEM.Stage.x', None), 'Stage.Y': ('Acquisition_instrument.TEM.Stage.y', None), 'Stage.Z': ('Acquisition_instrument.TEM.Stage.z', None)}\n",
-      "data\n",
-      "[[19447 20033 16242 ... 48594 49493 50088]\n",
-      " [21447 21375 21792 ... 47285 48770 49734]\n",
-      " [22395 22443 22459 ... 48080 48883 49461]\n",
-      " ...\n",
-      " [18395 18379 18853 ... 17190 17801 17768]\n",
-      " [17785 19704 18772 ... 15905 16178 17511]\n",
-      " [19471 20226 20355 ... 17142 17254 17793]]\n",
-      "Loading type(entry) <class 'dict'>\n",
-      "data\n",
-      "[[[0 0 0 ... 0 0 0]\n",
-      "  [0 0 0 ... 0 0 0]\n",
-      "  [0 0 0 ... 0 0 0]\n",
-      "  ...\n",
-      "  [0 0 0 ... 0 0 0]\n",
-      "  [0 0 0 ... 0 0 0]\n",
-      "  [0 0 0 ... 0 0 0]]\n",
-      "\n",
-      " [[0 0 0 ... 0 0 0]\n",
-      "  [0 0 0 ... 0 0 0]\n",
-      "  [0 0 0 ... 0 0 0]\n",
-      "  ...\n",
-      "  [0 0 0 ... 0 0 0]\n",
-      "  [0 0 0 ... 0 0 0]\n",
-      "  [0 0 0 ... 0 0 0]]\n",
-      "\n",
-      " [[0 0 0 ... 0 0 0]\n",
-      "  [0 0 0 ... 0 0 0]\n",
-      "  [0 0 0 ... 0 0 0]\n",
-      "  ...\n",
-      "  [0 0 0 ... 0 0 0]\n",
-      "  [0 0 0 ... 0 0 0]\n",
-      "  [0 0 0 ... 0 0 0]]\n",
-      "\n",
-      " ...\n",
-      "\n",
-      " [[0 0 0 ... 0 0 0]\n",
-      "  [0 0 0 ... 0 0 0]\n",
-      "  [0 0 0 ... 0 0 0]\n",
-      "  ...\n",
-      "  [0 0 0 ... 0 0 0]\n",
-      "  [0 0 0 ... 0 0 0]\n",
-      "  [0 0 0 ... 0 0 0]]\n",
-      "\n",
-      " [[0 0 0 ... 0 0 0]\n",
-      "  [0 0 0 ... 0 0 0]\n",
-      "  [0 0 0 ... 0 0 0]\n",
-      "  ...\n",
-      "  [0 0 0 ... 0 0 0]\n",
-      "  [0 0 0 ... 0 0 0]\n",
-      "  [0 0 0 ... 0 0 0]]\n",
-      "\n",
-      " [[0 0 0 ... 0 0 0]\n",
-      "  [0 0 0 ... 0 0 0]\n",
-      "  [0 0 0 ... 0 0 0]\n",
-      "  ...\n",
-      "  [0 0 0 ... 0 0 0]\n",
-      "  [0 0 0 ... 0 0 0]\n",
-      "  [0 0 0 ... 0 0 0]]]\n",
-      "axes\n",
-      "[{'name': 'height', 'size': 512, 'offset': 0, 'scale': 0.00351909256747931, 'units': 'µm', 'navigate': True}, {'name': 'width', 'size': 512, 'offset': 0, 'scale': 0.00351909256747931, 'units': 'µm', 'navigate': True}, {'name': 'Energy', 'size': 2048, 'offset': -0.4798465772, 'scale': 0.01000934711, 'units': 'keV', 'navigate': False}]\n",
-      "metadata\n",
-      "{'Acquisition_instrument': {'TEM': {'beam_energy': 200, 'magnification': 56000, 'Detector': {'EDS': {'elevation_angle': 22.0, 'detector_type': 'Custom type', 'azimuth_angle': 45.0, 'real_time': 723.7632, 'live_time': 13.678}}}}, 'General': {'original_filename': '46_ES-LP_L1_brg.bcf', 'title': 'EDX', 'date': '2020-07-22', 'time': '14:18:32'}, 'Sample': {'name': 'map 4', 'elements': ['Al', 'Ca', 'Fe', 'Hf', 'Lu', 'Mg', 'Nd', 'O', 'Si', 'Sm', 'U'], 'xray_lines': ['Al_Ka', 'Ca_Ka', 'Fe_Ka', 'Hf_La', 'Lu_La', 'Mg_Ka', 'Nd_La', 'O_Ka', 'Si_Ka', 'Sm_La', 'U_Ma']}, 'Signal': {'signal_type': 'EDS_TEM', 'quantity': 'X-rays (Counts)'}}\n",
-      "original_metadata\n",
-      "{'Hardware': {'TRTKnownHeader': {'Type': 'RTHardware', 'Size': 137}, 'RealTime': 16331, 'LifeTime': 13678, 'DeadTime': 16.0, 'ZeroPeakPosition': 95, 'ZeroPeakFrequency': 800, 'PulseDensity': 89486, 'Amplification': 20000.0, 'ShapingTime': 60000, 'XmlClassType': 'TRTSpectrumHardwareHeader'}, 'Detector': {'TRTKnownHeader': {'Type': 'RTDetector', 'Version': 5, 'Size': 9932}, 'Technology': 'SDD', 'Type': 'Custom type', 'DetectorThickness': 0.45, 'SiDeadLayerThickness': 0.01, 'DetLayers': {}, 'WindowType': 'Custom type', 'WindowLayers': None, 'Corrections': {'Escape': None, 'Tail': {'FormulaType': 'Internal', 'MainCorrection': 1}, 'Shelf': {'FormulaType': 'Internal', 'RangeStart': 0.08, 'RangeEnd': 10.0, 'MainCorrection': 1, 'Coefficient0': 1}, 'Shift': {'FormulaType': 'Internal', 'RangeStart': 0.08, 'RangeEnd': 0.555, 'MainCorrection': 1}, 'FWHMShift': None}, 'CorrectionType': 2, 'ResponseFunctionCount': 21, 'SampleCount': 5, 'SampleOffset': -3, 'PulsePairResTimeCount': 0, 'PileUpMinEnergy': 1, 'PileUpWithBG': False, 'TailFactor': 0, 'ShelfFactor': 0, 'ShiftFactor': 0, 'ShiftFactor2': 0, 'ShiftData': (0.079, 0, 0.08, 0.01, 0.555, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 'ResponseFunction': [(0, 3.3, 0.000801, 3.3, 0.00298, 3.3, 0.008902, 3.3, 0.025, 3.300046, 0.041098, 3.303475, 0.04702, 3.307302, 0.049199, 3.309237, 0.05, 3.31), (0, 3.3, 0.00444, 3.3, 0.01651, 3.3, 0.049318, 3.3, 0.1385, 3.300046, 0.227682, 3.303475, 0.26049, 3.307302, 0.27256, 3.309237, 0.277, 3.31), (0, 1.1, 0.006283, 1.1, 0.023364, 1.1, 0.069793, 1.1, 0.196, 1.102513, 0.322207, 1.291145, 0.368636, 1.50163, 0.385717, 1.608042, 0.392, 1.65), (0, 0.4, 0.008415, 0.4, 0.031291, 0.4, 0.093473, 0.4, 0.2625, 0.40457, 0.431527, 0.747537, 0.493709, 1.13024, 0.516585, 1.323712, 0.525, 1.4), (0, 0.2, 0.010836, 0.2, 0.040291, 0.2, 0.120357, 0.2, 0.338, 0.202513, 0.555643, 0.391145, 0.635709, 0.601632, 0.665164, 0.708042, 0.676, 0.75), (0, 0.03, 0.016687, 0.03, 0.062045, 0.03, 0.185343, 0.03, 0.5205, 0.032513, 0.855657, 0.221145, 0.978955, 0.43163, 1.024313, 0.538043, 1.041, 0.58), (0, 0.055, 0.020101, 0.055, 0.07474, 0.055, 0.223266, 0.055, 0.627, 0.057057, 1.030734, 0.21139, 1.17926, 0.383607, 1.233899, 0.470671, 1.254, 0.505), (0, 0.05, 0.023836, 0.05, 0.088627, 0.05, 0.26475, 0.05, 0.7435, 0.050732, 1.22225, 0.105607, 1.398373, 0.166839, 1.463164, 0.197794, 1.487, 0.21), (0, 0.03, 0.027891, 0.03, 0.103707, 0.03, 0.309795, 0.03, 0.87, 0.030594, 1.430205, 0.075181, 1.636293, 0.124932, 1.712109, 0.150082, 1.74, 0.16), (0, 0.15, 0.030776, 0.15, 0.114435, 0.15, 0.341842, 0.15, 0.96, 0.152377, 1.578158, 0.330719, 1.805565, 0.529724, 1.889224, 0.63033, 1.92, 0.67), (0, 0.15, 0.032283, 0.15, 0.120037, 0.15, 0.358578, 0.15, 1.007, 0.152377, 1.655422, 0.330719, 1.893963, 0.529725, 1.981717, 0.63033, 2.014, 0.67), (0, 0.085, 0.036996, 0.085, 0.13756, 0.085, 0.410923, 0.085, 1.154, 0.087055, 1.897077, 0.241391, 2.17044, 0.413607, 2.271004, 0.50067, 2.308, 0.535), (0, 0.085, 0.05918, 0.085, 0.220049, 0.085, 0.657334, 0.085, 1.846, 0.08589, 3.034665, 0.152769, 3.471952, 0.227397, 3.63282, 0.265124, 3.692, 0.28), (0, 0.035, 0.079378, 0.035, 0.295146, 0.035, 0.881668, 0.035, 2.476, 0.035549, 4.070332, 0.076705, 4.656854, 0.122629, 4.872623, 0.145845, 4.952, 0.155), (0, 0.035, 0.119867, 0.035, 0.445699, 0.035, 1.331404, 0.035, 3.739, 0.035457, 6.146595, 0.069754, 7.0323, 0.108024, 7.358133, 0.127371, 7.478, 0.135), (0, 0.035, 0.148303, 0.035, 0.551433, 0.035, 1.647253, 0.035, 4.626, 0.035457, 7.604747, 0.069754, 8.700567, 0.108024, 9.103698, 0.127371, 9.252, 0.135), (0, 0.023571, 0.176322, 0.023571, 0.655616, 0.023571, 1.958472, 0.023571, 5.5, 0.023597, 9.041529, 0.025499, 10.344384, 0.02762, 10.823678, 0.028693, 11, 0.029116), (0, 0.009286, 0.208381, 0.009286, 0.774819, 0.009286, 2.314557, 0.009286, 6.5, 0.009297, 10.685443, 0.010101, 12.225181, 0.010999, 12.791619, 0.011453, 13, 0.011632), (0, 0.010714, 0.24044, 0.010714, 0.894022, 0.010714, 2.670643, 0.010714, 7.5, 0.010714, 12.329357, 0.010714, 14.105978, 0.010714, 14.759561, 0.010714, 15, 0.010714), (0, 0, 0.320586, 0, 1.192029, 0, 3.560857, 0, 10, 0, 16.439142, 0, 18.80797, 0, 19.679415, 0, 20, 0), (0, 0, 1.60293, 0, 5.960146, 0, 17.804287, 0, 50, 0, 82.195709, 0, 94.039856, 0, 98.397072, 0, 100, 0)], 'XmlClassType': 'TRTDetectorHeader'}, 'Analysis': {'TRTKnownHeader': {'Type': 'RTESMA', 'Size': 662}, 'PrimaryEnergy': 200.0, 'ReferenceFactor': -1, 'ReferenceStdDev': -1, 'BaseRefStdDev': 0.002000100008, 'ElevationAngle': 22.0, 'AzimutAngle': 45.0, 'DetectorAngle': 15.0, 'CoatCorrection': None, 'XmlClassType': 'TRTESMAHeader'}, 'Spectrum': {'Size': 82, 'Date': '22.7.2020', 'Time': '14:18:32', 'ChannelCount': 2048, 'CalibAbs': -0.4798465772, 'CalibLin': 0.01000934711, 'SigmaAbs': 0.0004952410698, 'SigmaLin': 0.0004825546962, 'XmlClassType': 'TRTSpectrumHeader'}, 'DSP Configuration': {'ImageWidth': 512, 'ImageHeight': 512, 'PixelAverage': 60, 'LineAverage': 1, 'SEBitCount': 16, 'ChannelCount': 4, 'ChannelName0': 'BF', 'ChannelName1': 'DF', 'ChannelName2': 'DF4', 'Channel3': 1, 'ChannelName3': 'HAADF', 'CounterIndex': 0, 'CounterChannelUsed': 0, 'TiltAngle': 0, 'CounterMode': 0, 'PixelTime': 1, 'XmlClassType': 'TRTDSPConfiguration'}, 'Line counter': (47, 47, 47, 47, 47, 47, 47, 47, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46), 'Stage': {'State': 7936, 'XmlClassType': 'TRTSEMStageData'}, 'Microscope': {'HV': 200, 'WD': -1, 'Mag': 56000, 'DX': 0.00351909256747931, 'DY': 0.00351909256747931, 'Flags': 16776960, 'XmlClassType': 'TRTSEMData'}}\n",
-      "mapping\n",
-      "{'Stage.Rotation': ('Acquisition_instrument.TEM.Stage.rotation', None), 'Stage.Tilt': ('Acquisition_instrument.TEM.Stage.tilt_alpha', None), 'Stage.X': ('Acquisition_instrument.TEM.Stage.x', None), 'Stage.Y': ('Acquisition_instrument.TEM.Stage.y', None), 'Stage.Z': ('Acquisition_instrument.TEM.Stage.z', None)}\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "objs = bruker.file_reader(f\"{src}/{fnms[2][1]}\")\n",
     "# objs = emd.file_reader(f\"{src}/{fnms[3][1]}\")\n",
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
index d57650c1a..9412ae0b6 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
@@ -439,9 +439,9 @@ def parse_and_normalize_eds_spc(self, fp):
                                                         int(e_n) - 1,
                                                         num=int(e_n),
                                                         endpoint=True),
-                                  e_zero.dtype) / 1000.  # eV to keV
+                                  e_zero.dtype) / 1000.  # keV
         self.tmp[ckey].tmp["spectrum_zerod/axis_energy@long_name"].value \
-            = "Energy (eV)"
+            = "Energy (keV)"
         self.tmp[ckey].tmp["spectrum_zerod/intensity"].value \
             = np.asarray(fp[f"{src}/SPC"]["SpectrumCounts"][0], np.int32)
         self.tmp[ckey].tmp["spectrum_zerod/intensity@long_name"].value \
@@ -575,7 +575,7 @@ def parse_and_normalize_eds_area_rois(self, fp):
                                                                e_n - 1.,
                                                                num=int(e_n),
                                                                endpoint=True),
-                                         e_zero.dtype)
+                                         e_zero.dtype)  # eV, as xraydb demands
         nxy = {"x": fp[f"{src}/ELEMENTOVRLAYIMGCOLLECTIONPARAMS"][0]["ResolutionX"],
                "y": fp[f"{src}/ELEMENTOVRLAYIMGCOLLECTIONPARAMS"][0]["ResolutionY"],
                "lx": fp[f"{src}/ELEMENTOVRLAYIMGCOLLECTIONPARAMS"][0]["mmFieldWidth"],
diff --git a/pynxtools/dataconverter/readers/em/subparsers/nxs_pyxem.py b/pynxtools/dataconverter/readers/em/subparsers/nxs_pyxem.py
index 5c6e0ed42..ee424fab2 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/nxs_pyxem.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/nxs_pyxem.py
@@ -334,9 +334,9 @@ def process_roi_overview_eds_based(self,
             template[f"{trg}/AXISNAME[axis_{dim[0]}]/@long_name"] \
                 = inp.tmp[f"image_twod/axis_{dim[0]}@long_name"].value
             # template[f"{trg}/AXISNAME[axis_{dim}]/@units"] = f"{scan_unit}"
+        self.event_id += 1
         return template
 
-
     def process_roi_ebsd_maps(self, inp: dict, template: dict) -> dict:
         for ckey in inp.keys():
             if ckey.startswith("ebsd") and inp[ckey] != {}:
@@ -659,6 +659,30 @@ def process_roi_phase_ipfs_threed(self,
         return template
 
     def process_roi_eds_spectra(self, inp: dict, template: dict) -> dict:
+        for ckey in inp.keys():
+            if ckey.startswith("eds_spc") and inp[ckey] != {}:
+                trg = f"/ENTRY[entry{self.entry_id}]/measurement/event_data_em_set/" \
+                      f"EVENT_DATA_EM[event_data_em{self.event_id}]/" \
+                      f"SPECTRUM_SET[spectrum_set1]/DATA[spectrum_zerod]"
+                # TODO::check if its a spectrum_zerod !!!
+                template[f"{trg}/@NX_class"] = "NXdata"  # TODO::should be autodecorated
+                template[f"{trg}/description"] = inp[ckey].tmp["source"]
+                template[f"{trg}/title"] = f"Region-of-interest overview image"
+                template[f"{trg}/@signal"] = "intensity"
+                template[f"{trg}/@axes"] = ["axis_energy"]
+                template[f"{trg}/intensity"] \
+                    = {"compress": inp[ckey].tmp["spectrum_zerod/intensity"].value,
+                       "strength": 1}
+                template[f"{trg}/intensity/@long_name"] = f"Signal"
+                template[f"{trg}/@AXISNAME_indices[axis_energy_indices]"] = np.uint32(0)
+                template[f"{trg}/AXISNAME[axis_energy]"] \
+                    = {"compress": inp[ckey].tmp[f"spectrum_zerod/axis_energy"].value,
+                       "strength": 1}
+                template[f"{trg}/AXISNAME[axis_energy]/@long_name"] \
+                        = inp[ckey].tmp[f"spectrum_zerod/axis_energy@long_name"].value
+                    # template[f"{trg}/AXISNAME[axis_{dim}]/@units"] = f"{scan_unit}"
+                # TODO::increment spectrum_set1
+                self.event_id += 1
         return template
 
     def process_roi_eds_maps(self, inp: dict, template: dict) -> dict:

From 458e1296344a78674eda82d7d71543d23954bd68 Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@physik.hu-berlin.de>
Date: Tue, 16 Jan 2024 00:37:51 +0100
Subject: [PATCH 74/84] Added simple id management for objects written to HDF5,
 bugfixes for pdi/InGe, ikz/AlGaO, and ikz/GeSi example with EBSD maps also
 working

---
 debug/spctrscpy.batch.sh                      |  6 +-
 debug/spctrscpy.dev.ipynb                     | 68 +++++++++----------
 .../readers/em/examples/ebsd_database.py      |  3 +-
 .../readers/em/subparsers/hfive_apex.py       |  7 +-
 .../readers/em/subparsers/nxs_pyxem.py        | 65 ++++++++++++++----
 5 files changed, 97 insertions(+), 52 deletions(-)

diff --git a/debug/spctrscpy.batch.sh b/debug/spctrscpy.batch.sh
index a82b4501b..089ae2028 100755
--- a/debug/spctrscpy.batch.sh
+++ b/debug/spctrscpy.batch.sh
@@ -1,15 +1,15 @@
 #!/bin/bash
 
 datasource="../../../../../paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/pdi/"
-#datasource="../../../../../paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/ikz/"
+datasource="../../../../../paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/ikz/"
 
 
 # apex examples ikz, pdi
 # examples="ikz/VInP_108_L2.h5 ikz/GeSn_13.h5 pynx/46_ES-LP_L1_brg.bcf pynx/1613_Si_HAADF_610_kx.emd pynx/EELS_map_2_ROI_1_location_4.dm3 pynx/H5OINA_examples_Specimen_1_Map_EDS_+_EBSD_Map_Data_2.h5oina"
+examples="InGaN_nanowires_spectra.edaxh5"
 examples="AlGaO.nxs"
 examples="GeSi.nxs"
-examples="VInP_108_L2.h5"
-examples="InGaN_nanowires_spectra.edaxh5"
+#examples="VInP_108_L2.h5"
 
 for example in $examples; do
 	echo $example
diff --git a/debug/spctrscpy.dev.ipynb b/debug/spctrscpy.dev.ipynb
index 5d1d5327e..8eb7d3d4e 100644
--- a/debug/spctrscpy.dev.ipynb
+++ b/debug/spctrscpy.dev.ipynb
@@ -16,34 +16,6 @@
     "from ase.data import chemical_symbols"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "8e721dee-7b6f-4dd0-b50e-ea8ff05d4682",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "xray_lines = {}\n",
-    "for symbol in chemical_symbols[1:]:\n",
-    "    # print(f\"{symbol}\")\n",
-    "    for name, line in xraydb.xray_lines(symbol).items():\n",
-    "        xray_lines[f\"{symbol}-{name}\"] = line.energy\n",
-    "        # print(f\"{name}, {line.energy} eV\")\n",
-    "print(len(xray_lines))\n",
-    "\n",
-    "def get_xray_line_candidates(e_min=1200., e_max=1250.):\n",
-    "    cand = []\n",
-    "    for key, val in xray_lines.items():\n",
-    "        if val < e_min:\n",
-    "            continue\n",
-    "        if val > e_max:\n",
-    "            continue\n",
-    "        cand.append(key)\n",
-    "    return cand\n",
-    "\n",
-    "print(get_xray_line_candidates())"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -52,17 +24,17 @@
    "outputs": [],
    "source": [
     "src = \"/home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy\"\n",
-    "fnms = [(\"ikz\", \"VInP_108_L2.h5\"),\n",
+    "fnms = [(\"pdi\", \"InGaN_nanowires_spectra.edaxh5\"),\n",
+    "        (\"ikz\", \"AlGaO.nxs\"),\n",
     "        (\"ikz\", \"GeSi.nxs\"),\n",
     "        (\"ikz\", \"GeSn_13.nxs\"),\n",
-    "        (\"ikz\", \"AlGaO.nxs\"),\n",
+    "        (\"ikz\", \"VInP_108_L2.h5\"),\n",
     "        (\"bruker\", \"pynx/46_ES-LP_L1_brg.bcf\"),\n",
     "        (\"emd\", \"pynx/1613_Si_HAADF_610_kx.emd\"),\n",
     "        (\"digitalmicrograph\", \"pynx/EELS_map_2_ROI_1_location_4.dm3\"),\n",
-    "        (\"oxfordinstruments\", \"pynx/H5OINA_examples_Specimen_1_Map_EDS_+_EBSD_Map_Data_2.h5oina\"),\n",
-    "        (\"pdi\", \"InGaN_nanowires_spectra.edaxh5\")]\n",
+    "        (\"oxfordinstruments\", \"pynx/H5OINA_examples_Specimen_1_Map_EDS_+_EBSD_Map_Data_2.h5oina\")]\n",
     "# pyUSID, HSMA\n",
-    "case = 0 # len(fnms) - 1  # len(fnms) - 1\n",
+    "case = 2 # len(fnms) - 1  # len(fnms) - 1\n",
     "fnm = f\"{src}/{fnms[case][0]}/{fnms[case][1]}\"\n",
     "print(fnm)"
    ]
@@ -84,7 +56,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "H5Web(\"debug.InGaN_nanowires_spectra.edaxh5.nxs\")"
+    "H5Web(f\"debug.{fnms[case][1]}.nxs\")"
    ]
   },
   {
@@ -171,6 +143,34 @@
    "outputs": [],
    "source": []
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8e721dee-7b6f-4dd0-b50e-ea8ff05d4682",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "xray_lines = {}\n",
+    "for symbol in chemical_symbols[1:]:\n",
+    "    # print(f\"{symbol}\")\n",
+    "    for name, line in xraydb.xray_lines(symbol).items():\n",
+    "        xray_lines[f\"{symbol}-{name}\"] = line.energy\n",
+    "        # print(f\"{name}, {line.energy} eV\")\n",
+    "print(len(xray_lines))\n",
+    "\n",
+    "def get_xray_line_candidates(e_min=1200., e_max=1250.):\n",
+    "    cand = []\n",
+    "    for key, val in xray_lines.items():\n",
+    "        if val < e_min:\n",
+    "            continue\n",
+    "        if val > e_max:\n",
+    "            continue\n",
+    "        cand.append(key)\n",
+    "    return cand\n",
+    "\n",
+    "print(get_xray_line_candidates())"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/pynxtools/dataconverter/readers/em/examples/ebsd_database.py b/pynxtools/dataconverter/readers/em/examples/ebsd_database.py
index e140b570c..35f14f05a 100644
--- a/pynxtools/dataconverter/readers/em/examples/ebsd_database.py
+++ b/pynxtools/dataconverter/readers/em/examples/ebsd_database.py
@@ -316,4 +316,5 @@
 ASSUME_PHASE_NAME_TO_SPACE_GROUP = {"Silver": 225,
                                     "Copper": 225,
                                     "Ni (Nickel)": 225,
-                                    "Face Centered Cubic": 225}
+                                    "Face Centered Cubic": 225,
+                                    "Ge (Germanium)": 225}  # Ge (Germanium), really?
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
index 9412ae0b6..a7e5041f4 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
@@ -123,8 +123,9 @@ def parse_and_normalize(self):
                                     self.parse_and_normalize_group_ebsd_header(h5r, ckey)
                                     self.parse_and_normalize_group_ebsd_phases(h5r, ckey)
                                     self.parse_and_normalize_group_ebsd_data(h5r, ckey)
-                                    self.parse_and_normalize_group_ebsd_complete(ckey)
+                                    self.parse_and_normalize_group_ebsd_check(ckey)
                                     self.cache_id += 1
+                                continue
 
                                 # TODO: conceptually the content of the three
                                 # above-mentioned groups has and uses for some
@@ -270,6 +271,8 @@ def parse_and_normalize_group_ebsd_phases(self, fp, ckey: str):
                 space_group = None
                 if phase_name in ASSUME_PHASE_NAME_TO_SPACE_GROUP.keys():
                     space_group = ASSUME_PHASE_NAME_TO_SPACE_GROUP[phase_name]
+                else:
+                    raise ValueError(f"{phase_name} is not in ASSUME_PHASE_NAME_TO_SPACE_GROUP !")
                 self.tmp[ckey]["phases"][int(phase_id)]["space_group"] = space_group
 
                 if len(self.tmp[ckey]["space_group"]) > 0:
@@ -361,7 +364,7 @@ def parse_and_normalize_group_ebsd_data(self, fp, ckey: str):
         # almost two decades of commercialization of the technique now
         get_scan_point_coords(self.tmp[ckey])
 
-    def parse_and_normalize_group_ebsd_complete(ckey: str):
+    def parse_and_normalize_group_ebsd_check(self, ckey: str):
         """Check if all relevant data for EBSD are available, if not clear the cache."""
         # TODO::implement check and clearing procedure
         pass
diff --git a/pynxtools/dataconverter/readers/em/subparsers/nxs_pyxem.py b/pynxtools/dataconverter/readers/em/subparsers/nxs_pyxem.py
index ee424fab2..87690b4a9 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/nxs_pyxem.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/nxs_pyxem.py
@@ -59,6 +59,7 @@
 from pynxtools.dataconverter.readers.em.subparsers.hfive_ebsd import HdfFiveCommunityReader
 from pynxtools.dataconverter.readers.em.subparsers.hfive_emsoft import HdfFiveEmSoftReader
 from pynxtools.dataconverter.readers.em.subparsers.hfive_dreamthreed import HdfFiveDreamThreedReader
+from pynxtools.dataconverter.readers.em.concepts.nxs_image_r_set import NxImageRealSpaceSet
 
 
 PROJECTION_VECTORS = [Vector3d.xvector(), Vector3d.yvector(), Vector3d.zvector()]
@@ -92,7 +93,11 @@ def __init__(self, entry_id: int = 1, input_file_name: str = ""):
         else:
             self.entry_id = 1
         self.file_path = input_file_name
-        self.event_id = 1
+        self.id_mgn = {"event": 1,
+                       "event_img": 1,
+                       "event_spc": 1,
+                       "roi": 1,
+                       "eds_img": 1}
         self.cache = {"is_filled": False}
 
     def parse(self, template: dict) -> dict:
@@ -313,8 +318,9 @@ def process_roi_overview_ebsd_based(self,
     def process_roi_overview_eds_based(self,
                                        inp: dict,
                                        template: dict) -> dict:
-        trg = f"/ENTRY[entry{self.entry_id}]/measurement/event_data_em_set/EVENT_DATA_EM" \
-              f"[event_data_em{self.event_id}]/IMAGE_R_SET[image_r_set1]/DATA[image_twod]"
+        trg = f"/ENTRY[entry{self.entry_id}]/measurement/event_data_em_set/" \
+              f"EVENT_DATA_EM[event_data_em{self.id_mgn['event']}]/" \
+              f"IMAGE_R_SET[image_r_set{self.id_mgn['event_img']}]/DATA[image_twod]"
         template[f"{trg}/@NX_class"] = "NXdata"  # TODO::should be autodecorated
         template[f"{trg}/description"] = inp.tmp["source"]
         template[f"{trg}/title"] = f"Region-of-interest overview image"
@@ -333,8 +339,8 @@ def process_roi_overview_eds_based(self,
                 = {"compress": inp.tmp[f"image_twod/axis_{dim[0]}"].value, "strength": 1}
             template[f"{trg}/AXISNAME[axis_{dim[0]}]/@long_name"] \
                 = inp.tmp[f"image_twod/axis_{dim[0]}@long_name"].value
-            # template[f"{trg}/AXISNAME[axis_{dim}]/@units"] = f"{scan_unit}"
-        self.event_id += 1
+        self.id_mgn["event_img"] += 1
+        self.id_mgn["event"] += 1
         return template
 
     def process_roi_ebsd_maps(self, inp: dict, template: dict) -> dict:
@@ -662,8 +668,9 @@ def process_roi_eds_spectra(self, inp: dict, template: dict) -> dict:
         for ckey in inp.keys():
             if ckey.startswith("eds_spc") and inp[ckey] != {}:
                 trg = f"/ENTRY[entry{self.entry_id}]/measurement/event_data_em_set/" \
-                      f"EVENT_DATA_EM[event_data_em{self.event_id}]/" \
-                      f"SPECTRUM_SET[spectrum_set1]/DATA[spectrum_zerod]"
+                      f"EVENT_DATA_EM[event_data_em{self.id_mgn['event']}]/" \
+                      f"SPECTRUM_SET[spectrum_set{self.id_mgn['event_spc']}]/" \
+                      f"DATA[spectrum_zerod]"
                 # TODO::check if its a spectrum_zerod !!!
                 template[f"{trg}/@NX_class"] = "NXdata"  # TODO::should be autodecorated
                 template[f"{trg}/description"] = inp[ckey].tmp["source"]
@@ -673,17 +680,51 @@ def process_roi_eds_spectra(self, inp: dict, template: dict) -> dict:
                 template[f"{trg}/intensity"] \
                     = {"compress": inp[ckey].tmp["spectrum_zerod/intensity"].value,
                        "strength": 1}
-                template[f"{trg}/intensity/@long_name"] = f"Signal"
+                template[f"{trg}/intensity/@long_name"] \
+                    = inp[ckey].tmp["spectrum_zerod/intensity@long_name"].value  # f"Signal"
                 template[f"{trg}/@AXISNAME_indices[axis_energy_indices]"] = np.uint32(0)
                 template[f"{trg}/AXISNAME[axis_energy]"] \
                     = {"compress": inp[ckey].tmp[f"spectrum_zerod/axis_energy"].value,
                        "strength": 1}
                 template[f"{trg}/AXISNAME[axis_energy]/@long_name"] \
-                        = inp[ckey].tmp[f"spectrum_zerod/axis_energy@long_name"].value
-                    # template[f"{trg}/AXISNAME[axis_{dim}]/@units"] = f"{scan_unit}"
-                # TODO::increment spectrum_set1
-                self.event_id += 1
+                    = inp[ckey].tmp[f"spectrum_zerod/axis_energy@long_name"].value
+                self.id_mgn["event_spc"] += 1
+                self.id_mgn["event"] += 1
         return template
 
     def process_roi_eds_maps(self, inp: dict, template: dict) -> dict:
+        for ckey in inp.keys():
+            if ckey.startswith("eds_map") and inp[ckey] != {}:
+                trg = f"/ENTRY[entry{self.entry_id}]/ROI[roi{self.id_mgn['roi']}]/" \
+                      f"eds/indexing/"
+                template[f"{trg}/source"] = inp.tmp["source"]
+                for img in inp.tmp["IMAGE_R_SET"]:
+                    if not isinstance(img, NxImageRealSpaceSet):
+                        continue
+                    trg = f"/ENTRY[entry{self.entry_id}]/ROI[roi{self.id_mgn['roi']}]/eds/" \
+                          f"indexing/IMAGE_R_SET[image_r_set{self.id_mgn['eds_img']}]/"
+                    template[f"{trg}/description"] \
+                        = img.tmp["description"]
+                    template[f"{trg}/iupac_line_candidates"] \
+                        = img.tmp["iupac_line_candidates"]
+                    template[f"{trg}/@NX_class"] = "NXdata"  # TODO::should be autodecorated
+                    template[f"{trg}/title"] = f"EDS map"
+                    template[f"{trg}/@signal"] = "intensity"
+                    template[f"{trg}/@axes"] = ["axis_y", "axis_x"]
+                    template[f"{trg}/intensity"] \
+                        = {"compress": img.tmp["image_twod/intensity"].value,
+                           "strength": 1}
+                    template[f"{trg}/intensity/@long_name"] = f"Signal"
+                    dims = [("x", 0), ("y", 1)]
+                    for dim in dims:
+                        template[f"{trg}/@AXISNAME_indices[axis_{dim[0]}_indices]"] \
+                            = np.uint32(dim[1])
+                        template[f"{trg}/AXISNAME[axis_{dim[0]}]"] \
+                            = {"compress": img.tmp[f"image_twod/axis_{dim[0]}"].value,
+                               "strength": 1}
+                        template[f"{trg}/AXISNAME[axis_{dim[0]}]/@long_name"] \
+                            = img.tmp[f"image_twod/axis_{dim[0]}"].value
+                    self.id_mgn["eds_img"] += 1
+                self.id_mgn["roi"] += 1
+
         return template

From bfcd66672b022e106bfb30754e962398eaade017 Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@physik.hu-berlin.de>
Date: Tue, 16 Jan 2024 12:12:49 +0100
Subject: [PATCH 75/84] Fixed VInP reading and 2D EDS element maps showing up
 now, started implementation of Velox EMD parser v2

---
 debug/apex.dev.ipynb                          |   6 +-
 debug/spctrscpy.batch.sh                      |   2 +-
 debug/spctrscpy.dev.ipynb                     | 706 +-----------------
 pynxtools/dataconverter/readers/em/reader.py  |   5 +
 .../readers/em/subparsers/hfive_apex.py       |  38 +-
 .../readers/em/subparsers/nxs_pyxem.py        |  21 +-
 .../readers/em/subparsers/rsciio_velox.py     |  81 ++
 7 files changed, 139 insertions(+), 720 deletions(-)
 create mode 100644 pynxtools/dataconverter/readers/em/subparsers/rsciio_velox.py

diff --git a/debug/apex.dev.ipynb b/debug/apex.dev.ipynb
index 8d5a55da3..bba0eabf5 100644
--- a/debug/apex.dev.ipynb
+++ b/debug/apex.dev.ipynb
@@ -19,9 +19,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "# fpath = f\"/home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/\" \\\n",
+    "#         f\"data/development_spctrscpy/pdi/APEX-single-spectrum/InGaN_nanowires_spectra.edaxh5\"\n",
     "fpath = f\"/home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/\" \\\n",
-    "        f\"data/development_spctrscpy/pdi/APEX-single-spectrum/InGaN_nanowires_spectra.edaxh5\"\n",
-    "# H5Web(fpath)"
+    "        f\"data/development_spctrscpy/ikz/GeSn_13.nxs\"\n",
+    "H5Web(fpath)"
    ]
   },
   {
diff --git a/debug/spctrscpy.batch.sh b/debug/spctrscpy.batch.sh
index 089ae2028..15f825d90 100755
--- a/debug/spctrscpy.batch.sh
+++ b/debug/spctrscpy.batch.sh
@@ -9,7 +9,7 @@ datasource="../../../../../paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/
 examples="InGaN_nanowires_spectra.edaxh5"
 examples="AlGaO.nxs"
 examples="GeSi.nxs"
-#examples="VInP_108_L2.h5"
+examples="VInP_108_L2.h5"
 
 for example in $examples; do
 	echo $example
diff --git a/debug/spctrscpy.dev.ipynb b/debug/spctrscpy.dev.ipynb
index 8eb7d3d4e..a1532ce28 100644
--- a/debug/spctrscpy.dev.ipynb
+++ b/debug/spctrscpy.dev.ipynb
@@ -29,12 +29,14 @@
     "        (\"ikz\", \"GeSi.nxs\"),\n",
     "        (\"ikz\", \"GeSn_13.nxs\"),\n",
     "        (\"ikz\", \"VInP_108_L2.h5\"),\n",
+    "        (\"fhi\", \"CG71113 1513 HAADF-DF4-DF2-BF 1.2 Mx STEM.emd\"),\n",
+    "        (\"adrien\", \"1613_Si_HAADF_610_kx.emd\"),\n",
     "        (\"bruker\", \"pynx/46_ES-LP_L1_brg.bcf\"),\n",
     "        (\"emd\", \"pynx/1613_Si_HAADF_610_kx.emd\"),\n",
     "        (\"digitalmicrograph\", \"pynx/EELS_map_2_ROI_1_location_4.dm3\"),\n",
     "        (\"oxfordinstruments\", \"pynx/H5OINA_examples_Specimen_1_Map_EDS_+_EBSD_Map_Data_2.h5oina\")]\n",
     "# pyUSID, HSMA\n",
-    "case = 2 # len(fnms) - 1  # len(fnms) - 1\n",
+    "case = 5  # 5 # len(fnms) - 1  # len(fnms) - 1\n",
     "fnm = f\"{src}/{fnms[case][0]}/{fnms[case][1]}\"\n",
     "print(fnm)"
    ]
@@ -56,704 +58,30 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "# resulting NeXus artifact\n",
     "H5Web(f\"debug.{fnms[case][1]}.nxs\")"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "6b883a7a-f6aa-4151-8ee4-f3c8c79ccc72",
+   "id": "75b32c8f-8efa-4b40-bfc8-6f95300902ea",
    "metadata": {},
    "outputs": [],
    "source": [
-    "with h5py.File(fnm, \"r\") as h5r:\n",
-    "    src = \"/VInP/VInP_108_L2/Area 10/Live Map 1\"  # /ROIs/InL.dat\"\n",
-    "    tmp = h5r[f\"{src}/ELEMENTOVRLAYIMGCOLLECTIONPARAMS\"][0][\"ResolutionX\"]\n",
-    "    # tmp = h5r[f\"{src}\"]\n",
-    "    # for key in h5r[src].keys():\n",
-    "    #     tmp = h5r[f\"{src}/{key}\"]\n",
-    "    print(f\"{type(tmp)}, {np.shape(tmp)}, {tmp.dtype}\")"
+    "objs = emd.file_reader(fnm)\n",
+    "print(len(objs))\n",
+    "for obj in objs:\n",
+    "    if not isinstance(obj, dict):\n",
+    "        raise ValueError(\"No dict!\")\n",
+    "    print(obj.keys())\n",
+    "    for key, val in obj.items():\n",
+    "        print(f\"{key}, {np.shape(val)}\")\n",
+    "    print(obj[\"metadata\"])\n",
+    "    # print(obj[\"original_metadata\"])\n",
+    "# print(f\"{type(objs[0])}\")\n",
+    "# print(objs[0].keys())"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e99588fe-67dc-48df-8d60-28187d8daa0a",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "with h5py.File(fnm, \"r\") as h5r:\n",
-    "    # src = \"/InGaN_nanowires_spectra/InGaN nanowires/Area 1/Full Area 1\"\n",
-    "    src = \"/VInP/VInP_108_L2/Area 10/LineScan 1/\"\n",
-    "    if f\"{src}/LSD\" in h5r.keys():\n",
-    "        # for key, val in enumerate(h5r[f\"{src}/LSD\"].attrs.items()):\n",
-    "        #     print(f\"{key}, {val}\")\n",
-    "        tmp = np.asarray(h5r[f\"{src}/LSD\"][0])\n",
-    "        print(f\"{type(tmp)}, {np.shape(tmp)}, {tmp.dtype}\")\n",
-    "        for idx in np.arange(0, 2):\n",
-    "            # src/ROIs/<element Xray line> is the integral\n",
-    "            print(f\"{idx}\\t\\tIn L\\t\\t{np.sum(tmp[idx,323:335 + 1])}\")\n",
-    "            print(f\"{idx}\\t\\tK K\\t\\t{np.sum(tmp[idx,326:337 + 1])}\")\n",
-    "            print(f\"{idx}\\t\\tP K\\t\\t{np.sum(tmp[idx,197:206 + 1])}\")\n",
-    "        # plt.plot(np.arange(323, 335 + 1), tmp[0,323:335 + 1])\n",
-    "        plt.plot(np.arange(197, 206 + 1), tmp[0,197:206 + 1])\n",
-    "    # for idx, val in enumerate(tmp.dtype.names):\n",
-    "    #     print(f\"{idx}, {val}, {tmp[val][0]}\")\n",
-    "\n",
-    "    \"\"\"\n",
-    "    if f\"{src}/SPC\" in h5r.keys():\n",
-    "        spc = np.asarray(h5r[f\"{src}/SPC\"])\n",
-    "    # print(f\"{type(spc)}, {np.shape(spc)}, {spc.dtype}\")\n",
-    "    reqs = [\"eVOffset\", \"evPch\"]  # , \"evPerChannel\", \"DeadTime\", \"CountRate\"]\n",
-    "    for req in reqs:  # \"\"SpectrumCounts\", \"\n",
-    "        if req in spc.dtype.names:\n",
-    "            print(f\"{req}, {spc[req][0]}\")\n",
-    "        else:\n",
-    "            raise ValueError(f\"Unable to find metadata entry {req}!\")\n",
-    "    # for idx, val in enumerate(spc.dtype.names):\n",
-    "    #     print(f\"{idx}, {val}, {spc[val][0]}\")\n",
-    "    print(\"DataStart\" in spc.dtype.names)\n",
-    "    print(f\"{type(spc['SpectrumCounts'][0])}, {np.shape(spc['SpectrumCounts'][0])}, {spc['SpectrumCounts'][0].dtype}\")  # [0])\n",
-    "    \"\"\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "7b58972c-dcd3-45ea-9fae-36c81de1ee9e",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.plot(dat[0, :])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "441aaf8f-88df-47ea-9516-44f9666d717b",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "dc341bf3-fefa-4a69-84d5-5abe576f2b29",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "8e721dee-7b6f-4dd0-b50e-ea8ff05d4682",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "xray_lines = {}\n",
-    "for symbol in chemical_symbols[1:]:\n",
-    "    # print(f\"{symbol}\")\n",
-    "    for name, line in xraydb.xray_lines(symbol).items():\n",
-    "        xray_lines[f\"{symbol}-{name}\"] = line.energy\n",
-    "        # print(f\"{name}, {line.energy} eV\")\n",
-    "print(len(xray_lines))\n",
-    "\n",
-    "def get_xray_line_candidates(e_min=1200., e_max=1250.):\n",
-    "    cand = []\n",
-    "    for key, val in xray_lines.items():\n",
-    "        if val < e_min:\n",
-    "            continue\n",
-    "        if val > e_max:\n",
-    "            continue\n",
-    "        cand.append(key)\n",
-    "    return cand\n",
-    "\n",
-    "print(get_xray_line_candidates())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "f0a7f9ac-1ade-43d7-aedd-b2572d163b34",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "\n",
-    "from typing import Dict\n",
-    "\n",
-    "\n",
-    "class NxObject:\n",
-    "    \"\"\"An object in a graph e.g. an attribute, dataset, or group in NeXus.\"\"\"\n",
-    "\n",
-    "    def __init__(self,\n",
-    "                 name: str = None,\n",
-    "                 unit: str = None,\n",
-    "                 dtype=str,\n",
-    "                 value=None,\n",
-    "                 **kwargs):\n",
-    "        if (name is not None) and (name == \"\"):\n",
-    "            raise ValueError(f\"Value for argument name needs to be a non-empty string !\")\n",
-    "        if (unit is not None) and (unit == \"\"):\n",
-    "            raise ValueError(f\"Value for argument unit needs to be a non-empty string !\")\n",
-    "        if (dtype is not None) and isinstance(dtype, type) is False:\n",
-    "            raise ValueError(f\"Value of argument dtype must not be None \" \\\n",
-    "                             f\" and a valid, ideally a numpy datatype !\")\n",
-    "        # self.doc = None  # docstring\n",
-    "        self.name = name  # name of the field\n",
-    "        self.unit = unit  # not unit category but actual unit\n",
-    "        # use special values \"unitless\" for NX_UNITLESS (e.g. 1) and\n",
-    "        # \"dimensionless\" for NX_DIMENSIONLESS (e.g. 1m / 1m)\n",
-    "        self.dtype = dtype  # use np.dtype if possible\n",
-    "        if value is None or dtype is str:\n",
-    "            self.unit = \"unitless\"\n",
-    "        if value is not None:\n",
-    "            self.value = value\n",
-    "        # value should be a numpy scalar, tensor, or string if possible\n",
-    "        self.eqv_hdf = None\n",
-    "        if \"eqv_hdf\" in kwargs:\n",
-    "            if kwargs[\"eqv_hdf\"] in [\"group\", \"dataset\", \"attribute\"]:\n",
-    "                self.eqv_hdf = kwargs[\"eqv_hdf\"]\n",
-    "            else:\n",
-    "                raise ValueError(f\"Value of keyword argument eqv_hdf needs to be one of grp, dset, attr !\")\n",
-    "\n",
-    "    def __repr__(self):\n",
-    "        \"\"\"Report values.\"\"\"\n",
-    "        return f\"Name: {self.name}, unit: {self.unit}, dtype: {self.dtype}, eqv_hdf: {self.eqv_hdf}\"\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "97c3a10f-903a-4d7e-883b-779c6c34f4a0",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "NX_IMAGE_REAL_SPACE_SET_HDF_PATH = [\"image_oned/axis_x-field\",\n",
-    "\"image_oned/axis_x@long_name-attribute\",\n",
-    "\"image_oned/intensity-field\",\n",
-    "\"image_threed/axis_x-field\",\n",
-    "\"image_threed/axis_x@long_name-attribute\",\n",
-    "\"image_threed/axis_y-field\",\n",
-    "\"image_threed/axis_y@long_name-attribute\",\n",
-    "\"image_threed/axis_z-field\",\n",
-    "\"image_threed/axis_z@long_name-attribute\",\n",
-    "\"image_threed/intensity-field\",\n",
-    "\"image_twod/axis_x-field\",\n",
-    "\"image_twod/axis_x@long_name-attribute\",\n",
-    "\"image_twod/axis_y-field\",\n",
-    "\"image_twod/axis_y@long_name-attribute\",\n",
-    "\"image_twod/intensity-field\",\n",
-    "\"stack_oned/axis_image_identifier-field\",\n",
-    "\"stack_oned/axis_image_identifier@long_name-attribute\",\n",
-    "\"stack_oned/axis_x-field\",\n",
-    "\"stack_oned/axis_x@long_name-attribute\",\n",
-    "\"stack_oned/intensity-field\",\n",
-    "\"stack_threed/axis_image_identifier-field\",\n",
-    "\"stack_threed/axis_image_identifier@long_name-attribute\",\n",
-    "\"stack_threed/axis_x-field\",\n",
-    "\"stack_threed/axis_x@long_name-attribute\",\n",
-    "\"stack_threed/axis_y-field\",\n",
-    "\"stack_threed/axis_y@long_name-attribute\",\n",
-    "\"stack_threed/axis_z-field\",\n",
-    "\"stack_threed/axis_z@long_name-attribute\",\n",
-    "\"stack_threed/intensity-field\",\n",
-    "\"stack_twod/axis_image_identifier-field\",\n",
-    "\"stack_twod/axis_image_identifier@long_name-attribute\",\n",
-    "\"stack_twod/axis_x-field\",\n",
-    "\"stack_twod/axis_x@long_name-attribute\",\n",
-    "\"stack_twod/axis_y-field\",\n",
-    "\"stack_twod/axis_y@long_name-attribute\",\n",
-    "\"stack_twod/intensity-field\"]\n",
-    "\n",
-    "class NxEmImageRealSpaceSet():\n",
-    "    def __init__(self):\n",
-    "        self.tmp: Dict = {}\n",
-    "        for entry in NX_IMAGE_REAL_SPACE_SET_HDF_PATH:\n",
-    "            if entry.endswith(\"-field\") is True:\n",
-    "                self.tmp[entry[0:len(entry)-len(\"-field\")]] = NxObject(eqv_hdf=\"dataset\")\n",
-    "            elif entry.endswith(\"-attribute\") is True:\n",
-    "                self.tmp[entry[0:len(entry)-len(\"-attribute\")]] = NxObject(eqv_hdf=\"attribute\")\n",
-    "            else:\n",
-    "                self.tmp[entry[0:len(entry)-len(\"-group\")]] = NxObject(eqv_hdf=\"group\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "8bbbaa03-0aac-43fb-941a-f63910496fa3",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "tmp = NxEmImageRealSpaceSet()\n",
-    "# print(tmp.tmp)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "58052fb7-723f-476d-a8ca-df99efffcc05",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "with h5py.File(fnm, \"r\") as h5r:\n",
-    "    src = \"/VInP/VInP_108_L2/Area 10/VInP/VInP_108_L2/Area 10/Live Map 1\"\n",
-    "    if f\"{src}/FOVIMAGECOLLECTIONPARAMS\" in h5r.keys():\n",
-    "        ipr = np.asarray(h5r[f\"{src}/FOVIPR\"])\n",
-    "    print(f\"{type(ipr)}, {np.shape(ipr)}, {ipr.dtype}\")\n",
-    "    print(ipr[\"MicronsPerPixelY\"][0])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "8f5892cb-476e-453d-99e0-befb766fa9ca",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "with h5py.File(fnm, \"r\") as h5r:\n",
-    "    src = \"/VInP/VInP_108_L2/Area 10/Live Map 1\"\n",
-    "    trg = \"SPD\"\n",
-    "    reqs = [\"MicronPerPixelX\", \"MicronPerPixelY\", \"NumberOfLines\", \"NumberOfPoints\", \"NumberofChannels\"]\n",
-    "    for req in reqs:\n",
-    "        if req not in h5r[f\"{src}/{trg}\"].attrs.keys():\n",
-    "            # also check for shape\n",
-    "            raise ValueError(f\"Required attribute named {req} not found in {src}/{trg} !\")\n",
-    "    nyxe = {\"y\": h5r[f\"{src}/{trg}\"].attrs[\"NumberOfLines\"][0],\n",
-    "            \"x\": h5r[f\"{src}/{trg}\"].attrs[\"NumberOfPoints\"][0],\n",
-    "            \"e\": h5r[f\"{src}/{trg}\"].attrs[\"NumberofChannels\"][0]}\n",
-    "    print(nyxe)\n",
-    "    # the native APEX SPD concept instance is a two-dimensional array of arrays of length e (n_energy_bins)\n",
-    "    # likely EDAX has in their C(++) code a vector of vector or something equivalent either way we faced\n",
-    "    # nested C arrays of the base data type (here (u)int 16\n",
-    "    # even worse, chunked in HDF5 thus the e-long arrays are just some payload inside the compressed\n",
-    "    # chunk without some extra logic to resolve the third (energy) dimension:\n",
-    "    # how to reshape this efficiently without creating unnecessary copies\n",
-    "    # the following code is ugly as it needs a maximum large copy of the dataset\n",
-    "    spd_edax = h5r[f\"{src}/{trg}\"]\n",
-    "    print(f\"edax: {np.shape(spd_edax)}, {type(spd_edax)}, {spd_edax.dtype}\")\n",
-    "    spd_naive = np.zeros((nyxe[\"y\"], nyxe[\"x\"], nyxe[\"e\"]), \"<i2\")\n",
-    "    spd_chunk = np.zeros((nyxe[\"y\"], nyxe[\"x\"], nyxe[\"e\"]), \"<i2\")\n",
-    "    # spd = spd_edax.view().reshape((400, 512, 1000))\n",
-    "    \n",
-    "    chk_one = [(0, 102), (102, 102 + 102), (102 + 102, 102 + 102 + 102), (102 + 102 + 102, 400)]\n",
-    "    print(chk_one)\n",
-    "    chk_two = [(0, 512), (0, 512), (0, 512), (0, 512)]\n",
-    "    print(chk_two)\n",
-    "    for chk_idx in np.arange(0, 4):\n",
-    "        print(f\"{chk_idx}\")\n",
-    "        spd_chunk[chk_one[chk_idx][0]:chk_one[chk_idx][1], chk_two[chk_idx][0]:chk_two[chk_idx][1], :] \\\n",
-    "            = spd_edax[chk_one[chk_idx][0]:chk_one[chk_idx][1], chk_two[chk_idx][0]:chk_two[chk_idx][1]]\n",
-    "    print(\"Chunking down\")   \n",
-    "    for one in np.arange(0, 400):\n",
-    "        print(f\"{one}\")\n",
-    "        for two in np.arange(0, 512):\n",
-    "            spd_naive[one, two, :] = spd_edax[one, two]\n",
-    "    print(\"Naive done\")\n",
-    "   \n",
-    "    if False is True:        \n",
-    "        img.tmp[\"image_twod/intensity\"] = np.reshape(np.asarray(h5r[f\"{src}/FOVIMAGE\"]), (nyx[\"y\"], nyx[\"x\"]))\n",
-    "    \n",
-    "        syx = {\"x\": 1., \"y\": 1.}\n",
-    "        scan_unit = {\"x\": \"px\", \"y\": \"px\"}\n",
-    "        if f\"{src}/FOVIMAGECOLLECTIONPARAMS\" in h5r.keys():\n",
-    "            ipr = np.asarray(h5r[f\"{src}/FOVIPR\"])\n",
-    "            syx = {\"x\": ipr[\"MicronsPerPixelX\"][0], \"y\": ipr[\"MicronsPerPixelY\"][0]}\n",
-    "            scan_unit = {\"x\": \"µm\", \"y\": \"µm\"}\n",
-    "        dims = [\"y\", \"x\"]\n",
-    "        for dim in dims:\n",
-    "            img.tmp[f\"image_twod/axis_{dim}\"] = np.asarray(np.linspace(0, nyx[dim] - 1, num=nyx[dim], endpoint=True) * syx[dim], np.float64)\n",
-    "            img.tmp[f\"image_twod/axis_{dim}@long_name\"] = f\"Calibrated pixel position along {dim} ({scan_unit[dim]})\"\n",
-    "    \n",
-    "        for key, val in img.tmp.items():\n",
-    "            if key.startswith(\"image_twod\"):\n",
-    "                print(f\"{key}, {val}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "1d637cdc-0729-45aa-91f7-a12346307004",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "diff = spd_chunk[0:10, :, :] - spd_naive[0:10, :, :]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "dc034d09-b089-4f85-a4a0-b0689d76108c",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "with h5py.File(fnm, \"r\") as h5r:\n",
-    "    src = \"/VInP/VInP_108_L2/Area 10/Live Map 1\"\n",
-    "    cps = h5r[f\"{src}/CPS\"]\n",
-    "    print(cps.chunks)\n",
-    "    spd = h5r[f\"{src}/SPD\"]\n",
-    "    print(spd.chunks)\n",
-    "    print(h5r[f\"{src}/SPD\"][0, 0].dtype)\n",
-    "    spd_chunk = np.zeros((nyxe[\"y\"], nyxe[\"x\"], nyxe[\"e\"]), h5r[f\"{src}/SPD\"][0, 0].dtype)\n",
-    "    print(spd_chunk.dtype)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "3c7b1022-beea-4996-ab06-b120531c3a57",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "chk_bnds = {\"x\": [], \"y\": []}\n",
-    "ifo = {\"ny\": 400,\n",
-    "       \"cy\": spd.chunks[0],\n",
-    "       \"nx\": 512,\n",
-    "       \"cx\": spd.chunks[1]}\n",
-    "for dim in [\"y\", \"x\"]:\n",
-    "    idx = 0\n",
-    "    while idx < ifo[f\"n{dim}\"]:\n",
-    "        if idx + ifo[f\"c{dim}\"] < ifo[f\"n{dim}\"]:\n",
-    "            chk_bnds[f\"{dim}\"].append((idx, idx + ifo[f\"c{dim}\"]))\n",
-    "        else:\n",
-    "            chk_bnds[f\"{dim}\"].append((idx, ifo[f\"n{dim}\"]))\n",
-    "        idx += ifo[f\"c{dim}\"]\n",
-    "for key, val in chk_bnds.items():\n",
-    "    print(f\"{key}, {val}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e692b704-6a28-4e4c-a6b3-58864e0f98cf",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "    #    plt.imshow(fov, interpolation='nearest')\n",
-    "    #    plt.show()\n",
-    "    #    print(f\"{type(cmpd)}, {np.shape(cmpd)}, {cmpd.dtype}\")\n",
-    "    #    print(cmpd[\"DetectorLabel\"][0].decode(\"utf8\"))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "570da751-a38c-4902-b929-ef32cf19b1ba",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "report_all_plugins = False\n",
-    "if report_all_plugins is True:\n",
-    "    for plugin in rsciio.IO_PLUGINS:\n",
-    "        print(f\"\\n\\n\")\n",
-    "        for key, val in plugin.items():\n",
-    "            print(f\"{key}, {val}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "d2b5287d-7441-4141-b161-351de4bf7488",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "objs = bruker.file_reader(f\"{src}/{fnms[2][1]}\")\n",
-    "# objs = emd.file_reader(f\"{src}/{fnms[3][1]}\")\n",
-    "# objs = digitalmicrograph.file_reader(f\"{src}/{fnms[4][1]}\")\n",
-    "if isinstance(objs, list) is True:\n",
-    "    for entry in objs:\n",
-    "        print(f\"Loading type(entry) {type(entry)}\")\n",
-    "        if isinstance(entry, dict) is True:\n",
-    "            for key, val in entry.items():\n",
-    "                print(key)\n",
-    "                print(val)   "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "c92b0a71-e9d8-460e-99b5-b12208b56258",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e04b275f-bc59-4fbc-8c56-ae4d6e964d14",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "48f31e6c-1554-4476-8688-5f5323d513c8",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# https://codereview.stackexchange.com/a/21035\n",
-    "# https://stackoverflow.com/questions/38852822/how-to-flatten-xml-file-in-python\n",
-    "from collections import OrderedDict\n",
-    "\n",
-    "def flatten_dict(d):\n",
-    "    def items():\n",
-    "        for key, value in d.items():\n",
-    "            # nested subtree\n",
-    "            if isinstance(value, dict):\n",
-    "                for subkey, subvalue in flatten_dict(value).items():\n",
-    "                    yield '{}.{}'.format(key, subkey), subvalue\n",
-    "            # nested list\n",
-    "            elif isinstance(value, list):\n",
-    "                for num, elem in enumerate(value):\n",
-    "                    for subkey, subvalue in flatten_dict(elem).items():\n",
-    "                        yield '{}.[{}].{}'.format(key, num, subkey), subvalue\n",
-    "            # everything else (only leafs should remain)\n",
-    "            else:\n",
-    "                yield key, value\n",
-    "    return OrderedDict(items())\n",
-    "\n",
-    "import xmltodict"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "a5d5cbb4-c5a2-44a1-b6a4-277167582869",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "with Image.open(fnm, mode=\"r\") as fp:\n",
-    "    fp.load()  # Needed only for .png EXIF data (see citation above)\n",
-    "    if \"MicroscopeControlImage\" in fp.info.keys():\n",
-    "        # print(fp.info[\"MicroscopeControlImage\"])\n",
-    "        xml_content = xmltodict.parse(fp.info[\"MicroscopeControlImage\"])\n",
-    "        flattened_xml = flatten_dict(xml_content)\n",
-    "        for k,v in flattened_xml.items():\n",
-    "            print('{} = {}'.format(k,v))\n",
-    "    elif fnm.lower().endswith(\".png\") is True:  # check for mime type instead\n",
-    "        print(f\"There is no iTXt chunk in {fnm} which has embedded XML within the AXON namespace MicroscopeControlImage!\")\n",
-    "    else:\n",
-    "        print(f\"There is nothing to harvest here!\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "4f4bf73d-66b7-414b-abb1-db99b2bf370a",
-   "metadata": {},
-   "source": [
-    "***"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "1963afb6-6e48-4628-a0e8-d2da0874701e",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# handle TIFF\n",
-    "with Image.open(fnm, mode=\"r\") as fp:\n",
-    "    for key in fp.tag_v2:\n",
-    "        if key in [34118, 34119]:\n",
-    "            print(type(fp.tag[key]))\n",
-    "            print(len(fp.tag[key]))        \n",
-    "            # print(f\"{key}, {fp.tag[key]}\")\n",
-    "        if key not in TAGS.keys():\n",
-    "            print(f\"--->tag {key}, is not in PIL.TiffTAGS !\")\n",
-    "    # self.tags = {TAGS[key] : fp.tag[key] for key in fp.tag_v2}\n",
-    "    # for key, val in self.tags.items():\n",
-    "    #     print(f\"{key}, {val}\")\n",
-    "    nparr = np.array(fp)\n",
-    "    print(f\"{type(nparr)}\")\n",
-    "    print(f\"{nparr.dtype}\")\n",
-    "    print(f\"{np.shape(nparr)}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "a9ef2a35-a260-4a54-9b83-eae1d588966f",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "with Image.open(fnm, mode=\"r\") as fp:\n",
-    "    if True is False:\n",
-    "        czi_keys = [34118, 34119]\n",
-    "        for czi_key in czi_keys:\n",
-    "            if czi_key in fp.tag_v2:\n",
-    "                print(f\"Found czi_key {tfs_key}...\")\n",
-    "                utf = fp.tag[czi_key]\n",
-    "                print(type(utf))\n",
-    "                if len(utf) == 1:\n",
-    "                    print(utf[0])\n",
-    "    # exit(1)\n",
-    "    tfs_keys = [34682]\n",
-    "    for tfs_key in tfs_keys:\n",
-    "        if tfs_key in fp.tag_v2:\n",
-    "            print(f\"Found tfs_key {tfs_key}...\")\n",
-    "            utf = fp.tag[tfs_key]\n",
-    "            print(type(utf))\n",
-    "            if len(utf) == 1:\n",
-    "                print(utf[0])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "28687c0e-6f14-484c-b511-3a4906d9672e",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "a8ada062-e308-4288-8f00-b3e620f3c890",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np\n",
-    "# https://www.geeksforgeeks.org/python-program-to-sort-a-list-of-tuples-by-second-item/\n",
-    "def sort_tuple(tup):\n",
-    "    # convert the list of tuples to a numpy array with data type (object, int)\n",
-    "    arr = np.array(tup, dtype=[('col1', object), ('col2', int)])\n",
-    "    # get the indices that would sort the array based on the second column\n",
-    "    indices = np.argsort(arr['col2'])\n",
-    "    # use the resulting indices to sort the array\n",
-    "    sorted_arr = arr[indices]\n",
-    "    # convert the sorted numpy array back to a list of tuples\n",
-    "    sorted_tup = [(row['col1'], row['col2']) for row in sorted_arr]\n",
-    "    return sorted_tup"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "d27df293-626c-4d37-80df-96c182d4f401",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def if_str_represents_float(s):\n",
-    "    try:\n",
-    "        return isinstance(float(s), float)\n",
-    "        # return str(float(s)) == s\n",
-    "    except ValueError:\n",
-    "        return False"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "f647fa79-330b-48b2-8360-f92fc5ead187",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "\"10\".isdigit()\n",
-    "# isinstance(float(\"8.99306e-010\"), float)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "1a2f0864-f8b3-4d53-bf9d-08a5787c32fb",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# TFS sections based on IKZ ALN_baoh_021.tif example\n",
-    "import mmap\n",
-    "\n",
-    "tfs_section_names = [\"[User]\",\n",
-    "                     \"[System]\",\n",
-    "                     \"[Beam]\",\n",
-    "                     \"[EBeam]\",                 \n",
-    "                     \"[GIS]\",\n",
-    "                     \"[Scan]\",\n",
-    "                     \"[EScan]\",\n",
-    "                     \"[Stage]\",\n",
-    "                     \"[Image]\",\n",
-    "                     \"[Vacuum]\",\n",
-    "                     \"[Specimen]\",\n",
-    "                     \"[Detectors]\",\n",
-    "                     \"[T2]\",\n",
-    "                     \"[Accessories]\",\n",
-    "                     \"[EBeamDeceleration]\",\n",
-    "                     \"[CompoundLensFilter]\",\n",
-    "                     \"[PrivateFei]\",\n",
-    "                     \"[HiResIllumination]\",\n",
-    "                     \"[EasyLift]\",\n",
-    "                     \"[HotStageMEMS]\",\n",
-    "                     \"[HotStage]\",\n",
-    "                     \"[HotStageHVHS]\",\n",
-    "                     \"[ColdStage]\"]\n",
-    "\n",
-    "tfs_section_details = {\"[System]\": [\"Type\", \"Dnumber\", \"Software\", \"BuildNr\", \"Source\", \"Column\", \"FinalLens\", \"Chamber\", \"Stage\", \"Pump\",\n",
-    "              \"ESEM\", \"Aperture\", \"Scan\", \"Acq\", \"EucWD\", \"SystemType\", \"DisplayWidth\", \"DisplayHeight\"]}\n",
-    "tfs_section_offsets = {}\n",
-    "\n",
-    "with open(fnm, 'rb', 0) as file:\n",
-    "    s = mmap.mmap(file.fileno(), 0, access=mmap.ACCESS_READ)\n",
-    "    for section_name in tfs_section_names:\n",
-    "        pos = s.find(bytes(section_name, \"utf8\"))  # != -1\n",
-    "        tfs_section_offsets[section_name] = pos\n",
-    "    print(tfs_section_offsets)\n",
-    "\n",
-    "    # define search offsets\n",
-    "    tpl = []\n",
-    "    for key, value in tfs_section_offsets.items():\n",
-    "        tpl.append((key, value))\n",
-    "    # print(tpl)\n",
-    "    tpl = sort_tuple(tpl)\n",
-    "    print(tpl)\n",
-    "    # if section_name == \"[System]\":\n",
-    "    pos_s = None\n",
-    "    pos_e = None\n",
-    "    for idx in np.arange(0, len(tpl)):\n",
-    "        if tpl[idx][0] != \"[System]\":\n",
-    "            continue\n",
-    "        else:\n",
-    "            pos_s = tpl[idx][1]\n",
-    "            if idx <= len(tpl) - 1:\n",
-    "                pos_e = tpl[idx + 1][1]\n",
-    "            break\n",
-    "    print(f\"Search in between byte offsets {pos_s} and {pos_e}\")\n",
-    "    # fish metadata of e.g. the system section\n",
-    "    section_metadata = {}\n",
-    "    for term in tfs_section_details[\"[System]\"]:\n",
-    "        \n",
-    "        s.seek(pos_s, 0)\n",
-    "        pos = s.find(bytes(term, \"utf8\"))\n",
-    "        if pos < pos_e:  # check if pos_e is None\n",
-    "            s.seek(pos, 0)\n",
-    "            section_metadata[f\"{term}\"] = f\"{s.readline().strip().decode('utf8').replace(f'{term}=', '')}\"\n",
-    "            if if_str_represents_float(section_metadata[f\"{term}\"]) is True:\n",
-    "                section_metadata[f\"{term}\"] = np.float64(section_metadata[f\"{term}\"])\n",
-    "            elif section_metadata[f\"{term}\"].isdigit() is True:\n",
-    "                section_metadata[f\"{term}\"] = np.int64(section_metadata[f\"{term}\"])\n",
-    "            else:\n",
-    "                pass\n",
-    "            # print(f\"{term}, {pos}, {pos + len(term) + 1}\")\n",
-    "    #        tfs_section_offswr\n",
-    "    #        file.seek(pos, 0)  #\n",
-    "    print(section_metadata)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "2f3eb287-8f55-424c-a016-a07fc59f068a",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "'2'.isdigit()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "c1341e30-fcce-4a3d-a099-d342b8bbe318",
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
diff --git a/pynxtools/dataconverter/readers/em/reader.py b/pynxtools/dataconverter/readers/em/reader.py
index 5a4f2b0b3..2bcbe2813 100644
--- a/pynxtools/dataconverter/readers/em/reader.py
+++ b/pynxtools/dataconverter/readers/em/reader.py
@@ -27,6 +27,7 @@
 from pynxtools.dataconverter.readers.em.subparsers.nxs_pyxem import NxEmNxsPyxemSubParser
 from pynxtools.dataconverter.readers.em.subparsers.nxs_imgs import NxEmImagesSubParser
 from pynxtools.dataconverter.readers.em.subparsers.nxs_nion import NxEmZippedNionProjectSubParser
+from pynxtools.dataconverter.readers.em.subparsers.rsciio_velox import RsciioVeloxSubParser
 from pynxtools.dataconverter.readers.em.utils.default_plots import NxEmDefaultPlotResolver
 # from pynxtools.dataconverter.readers.em.geometry.convention_mapper import NxEmConventionMapper
 
@@ -132,6 +133,10 @@ def read(self,
         # subparser = NxEmZippedNionProjectSubParser(entry_id, file_paths[0])
         # subparser.parse(template, verbose=True)
 
+        # sub_parser = "velox_emd"
+        # subparser = RsciioVeloxSubParser(entry_id, file_paths[0])
+        # subparser.parse(template, verbose=True)
+
         # for dat_instance in case.dat_parser_type:
         #     print(f"Process pieces of information in {dat_instance} tech partner file...")
         #    continue
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
index a7e5041f4..1dd844184 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
@@ -116,6 +116,7 @@ def parse_and_normalize(self):
                             # get oim_maps, live_maps, or full area if available
                             area_grp_nms = list(h5r[f"/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}"])
                             for area_grp_nm in area_grp_nms:
+
                                 if area_grp_nm.startswith("OIM Map"):
                                     self.prfx = f"/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}/{area_grp_nm}"
                                     print(f"Parsing {self.prfx}")
@@ -125,26 +126,23 @@ def parse_and_normalize(self):
                                     self.parse_and_normalize_group_ebsd_data(h5r, ckey)
                                     self.parse_and_normalize_group_ebsd_check(ckey)
                                     self.cache_id += 1
-                                continue
-
-                                # TODO: conceptually the content of the three
-                                # above-mentioned groups has and uses for some
-                                # groups the same formatting but fundamentally I assume
-                                # that they are three different concepts:
-                                # free draw polygonal region choosen via GUI interaction
-                                #   over which one integrates
-                                # full area rectangular region typically used
-                                # i.e. difference between free draw and full area
-                                #    is integration region
-                                # live map rectangular region plus child concepts
+
+                                # EDAX, APEX distinguishes different concept/groups:
+                                # FOV*, full area rectangular region plus siblings
+                                # Live Map *, rectangular region plus childs
+                                # Free Draw *, polygonal region via GUI interaction
+                                # OIM Map *, EBSD, orientation data + metadata childs
                                 # with (sum) spectrum SPC, spectrum stack (SPD)
                                 # with eventually different number of energy bins and
-                                # Live Map */ROIs for the individual elements aka
-                                # "element mappings"
+                                # Live Map */ROIs for the individual elements/ EDS maps
+                                # TODO means here not planned for immediate implementation
                                 # TODO: LIVENETMAPS groups are not parsed cuz not requested
                                 # TODO: EBSD+EDS groups are not parsed cuz internal structure
-                                # TODO: ZAF WtLineScan 2
-                                #   mirrors concept tree behind an OIM Map and Live Map
+                                # TODO: ZAF WtLineScan 2 and other custom concepts like e.g.
+                                # /GeSn/GeSn_404b/Added Spectra/GeSn | GeSn_404b |
+                                # Area 1 | ZAF AtLineScan 1 | 2023-01-16-15-37-41
+                                # but mirror concept tree similar to those of the here
+                                # implemented OIM Map and Live Map concept trees
                                 if area_grp_nm.startswith("Full Area") \
                                         or area_grp_nm.startswith("Selected Area"):
                                     # TODO: Selected Area groups have a REGION and I assume that this
@@ -182,7 +180,9 @@ def parse_and_normalize(self):
                                     # "free form? or (which I assume) orthogonal line grid inside the FOV
                                     # TODO::currently I assume that the internal organization of LineScan and ROILineScan
                                     # groups is the same TODO but maybe the physical ROI which they reference
-                                    # respective differs (TODO:: LineScan refers to FOV that is in the parent of the group)
+                                    # respective differs !?
+                                    # (TODO:: LineScan refers to the FOV that is
+                                    # in the parent of the LineScan group)
                                     self.prfx = f"/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}/{area_grp_nm}"
                                     self.parse_and_normalize_eds_line_lsd(h5r)
                                     self.parse_and_normalize_eds_line_rois(h5r)
@@ -596,6 +596,8 @@ def parse_and_normalize_eds_area_rois(self, fp):
             # theoretical candidates within integrated energy region [e_roi_s, e_roi_e]
             e_roi_s = fp[f"{src}/ROIs/{entry}.dat"].attrs["RoiStartChan"][0]
             e_roi_e = fp[f"{src}/ROIs/{entry}.dat"].attrs["RoiEndChan"][0]
+            eds_map.tmp["energy_range"] = NxObject(unit="eV",
+                value=np.asarray([e_channels[e_roi_s], e_channels[e_roi_e + 1]]))
             eds_map.tmp["iupac_line_candidates"] \
                 = ", ".join(get_xrayline_candidates(e_channels[e_roi_s],
                                                     e_channels[e_roi_e + 1]))
@@ -619,7 +621,7 @@ def parse_and_normalize_eds_area_rois(self, fp):
                     for kkey, vval in img.tmp.items():
                         print(f"\t\timg, key: {kkey}, val: {vval}")
             else:
-                print(f"ckey: {ckey}, eds_mapspectrum_oned, key: {key}, val: {val}")
+                print(f"ckey: {ckey}, eds_map, key: {key}, val: {val}")
 
     def parse_and_normalize_eds_line_lsd(self, fp):
         """Normalize and scale APEX-specific line scan with one spectrum each to NeXus."""
diff --git a/pynxtools/dataconverter/readers/em/subparsers/nxs_pyxem.py b/pynxtools/dataconverter/readers/em/subparsers/nxs_pyxem.py
index 87690b4a9..08d3dc31a 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/nxs_pyxem.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/nxs_pyxem.py
@@ -696,21 +696,22 @@ def process_roi_eds_maps(self, inp: dict, template: dict) -> dict:
         for ckey in inp.keys():
             if ckey.startswith("eds_map") and inp[ckey] != {}:
                 trg = f"/ENTRY[entry{self.entry_id}]/ROI[roi{self.id_mgn['roi']}]/" \
-                      f"eds/indexing/"
-                template[f"{trg}/source"] = inp.tmp["source"]
-                for img in inp.tmp["IMAGE_R_SET"]:
+                      f"eds/indexing"
+                template[f"{trg}/source"] = inp[ckey].tmp["source"]
+                for img in inp[ckey].tmp["IMAGE_R_SET"]:
                     if not isinstance(img, NxImageRealSpaceSet):
                         continue
                     trg = f"/ENTRY[entry{self.entry_id}]/ROI[roi{self.id_mgn['roi']}]/eds/" \
-                          f"indexing/IMAGE_R_SET[image_r_set{self.id_mgn['eds_img']}]/"
-                    template[f"{trg}/description"] \
-                        = img.tmp["description"]
-                    template[f"{trg}/iupac_line_candidates"] \
-                        = img.tmp["iupac_line_candidates"]
+                          f"indexing/IMAGE_R_SET[image_r_set{self.id_mgn['eds_img']}]"
+                    template[f"{trg}/source"] = img.tmp["source"]
+                    template[f"{trg}/description"] = img.tmp["description"]
+                    template[f"{trg}/energy_range"] = img.tmp["energy_range"].value
+                    template[f"{trg}/energy_range/@units"] = img.tmp["energy_range"].unit
+                    template[f"{trg}/iupac_line_candidates"] = img.tmp["iupac_line_candidates"]
                     template[f"{trg}/@NX_class"] = "NXdata"  # TODO::should be autodecorated
-                    template[f"{trg}/title"] = f"EDS map"
                     template[f"{trg}/@signal"] = "intensity"
                     template[f"{trg}/@axes"] = ["axis_y", "axis_x"]
+                    template[f"{trg}/title"] = f"EDS map {img.tmp['description']}"
                     template[f"{trg}/intensity"] \
                         = {"compress": img.tmp["image_twod/intensity"].value,
                            "strength": 1}
@@ -723,7 +724,7 @@ def process_roi_eds_maps(self, inp: dict, template: dict) -> dict:
                             = {"compress": img.tmp[f"image_twod/axis_{dim[0]}"].value,
                                "strength": 1}
                         template[f"{trg}/AXISNAME[axis_{dim[0]}]/@long_name"] \
-                            = img.tmp[f"image_twod/axis_{dim[0]}"].value
+                            = img.tmp[f"image_twod/axis_{dim[0]}@long_name"].value
                     self.id_mgn["eds_img"] += 1
                 self.id_mgn["roi"] += 1
 
diff --git a/pynxtools/dataconverter/readers/em/subparsers/rsciio_velox.py b/pynxtools/dataconverter/readers/em/subparsers/rsciio_velox.py
new file mode 100644
index 000000000..420cea1e0
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/subparsers/rsciio_velox.py
@@ -0,0 +1,81 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""(Sub-)parser for reading content from ThermoFisher Velox *.emd (HDF5) via rosettasciio."""
+
+from typing import Dict, List
+from rsciio import emd
+
+from pynxtools.dataconverter.readers.em.subparsers.rsciio_base import RsciioBaseParser
+
+
+class RsciioVeloxSubParser(RsciioBaseParser):
+    """Read Velox EMD File Format emd."""
+    def __init__(self, entry_id: int = 1, file_path: str = ""):
+        super().__init__(file_path)
+        if entry_id > 0:
+            self.entry_id = entry_id
+        else:
+            self.entry_id = 1
+        self.id_mgn = {}
+        self.prfx = None
+        self.tmp: Dict = {}
+        self.supported_version: Dict = {}
+        self.version: Dict = {}
+        self.supported = False
+        self.check_if_supported()
+
+    def check_if_supported(self):
+        try:
+            self.objs = emd.file_reader(self.file_path)
+            # TODO::what to do if the content of the file is larger than the available
+            # main memory, one approach to handle this is to have the file_reader parsing
+            # only the collection of the concepts without the actual instance data
+            # based on this one could then plan how much memory has to be reserved
+            # in the template and stream out accordingly
+            self.supported = True
+        except IOError:
+            print(f"Loading {self.file_path} using {self.__name__} is not supported !")
+
+    def parse_and_normalize_and_process_into_template(self, template: dict) -> dict:
+        """Perform actual parsing filling cache self.tmp."""
+        if self.supported is True:
+            print(f"Parsing with {self.__name__}...")
+            self.tech_partner_to_nexus_normalization(template)
+        else:
+            print(f"{self.file_path} is not a Velox-specific "
+                  f"EMD file that this parser can process !")
+        return template
+
+    def tech_partner_to_nexus_normalization(self, template: dict) -> dict:
+        """Translate tech partner concepts to NeXus concepts."""
+        self.normalize_bfdf_content(template)  # conventional bright/dark field
+        self.normalize_adf_content(template)  # (high-angle) annular dark field
+        self.normalize_edxs_content(template)  # EDS in the TEM
+        self.normalize_eels_content(template)  # electron energy loss spectroscopy
+
+    def normalize_bfdf_content(self, template: dict) -> dict:
+        return template
+
+    def normalize_adf_content(self, template: dict) -> dict:
+        return template
+
+    def normalize_edxs_content(self, template: dict) -> dict:
+        return template
+
+    def normalize_eels_content(self, template: dict) -> dict:
+        return template

From 1b2bd4992ef920fc6164b1e519d323e1b9e60049 Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@physik.hu-berlin.de>
Date: Tue, 16 Jan 2024 12:26:54 +0100
Subject: [PATCH 76/84] Styling, linting, mypy

---
 pynxtools/dataconverter/readers/em/reader.py       | 10 +++++-----
 .../readers/em/subparsers/hfive_apex.py            | 14 ++++++++------
 .../readers/em/subparsers/nxs_pyxem.py             |  2 +-
 .../readers/em/subparsers/rsciio_velox.py          |  4 ++--
 4 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/pynxtools/dataconverter/readers/em/reader.py b/pynxtools/dataconverter/readers/em/reader.py
index 2bcbe2813..c6d97b16a 100644
--- a/pynxtools/dataconverter/readers/em/reader.py
+++ b/pynxtools/dataconverter/readers/em/reader.py
@@ -23,11 +23,11 @@
 
 from pynxtools.dataconverter.readers.base.reader import BaseReader
 # from pynxtools.dataconverter.readers.em.concepts.nxs_concepts import NxEmAppDef
-from pynxtools.dataconverter.readers.em.subparsers.nxs_mtex import NxEmNxsMTexSubParser
+# from pynxtools.dataconverter.readers.em.subparsers.nxs_mtex import NxEmNxsMTexSubParser
 from pynxtools.dataconverter.readers.em.subparsers.nxs_pyxem import NxEmNxsPyxemSubParser
-from pynxtools.dataconverter.readers.em.subparsers.nxs_imgs import NxEmImagesSubParser
-from pynxtools.dataconverter.readers.em.subparsers.nxs_nion import NxEmZippedNionProjectSubParser
-from pynxtools.dataconverter.readers.em.subparsers.rsciio_velox import RsciioVeloxSubParser
+# from pynxtools.dataconverter.readers.em.subparsers.nxs_imgs import NxEmImagesSubParser
+# from pynxtools.dataconverter.readers.em.subparsers.nxs_nion import NxEmZippedNionProjectSubParser
+# from pynxtools.dataconverter.readers.em.subparsers.rsciio_velox import RsciioVeloxSubParser
 from pynxtools.dataconverter.readers.em.utils.default_plots import NxEmDefaultPlotResolver
 # from pynxtools.dataconverter.readers.em.geometry.convention_mapper import NxEmConventionMapper
 
@@ -120,7 +120,7 @@ def read(self,
 
         # add further with resolving cases
         # if file_path is an HDF5 will use hfive parser
-        sub_parser = "nxs_pyxem"
+        # sub_parser = "nxs_pyxem"
         subparser = NxEmNxsPyxemSubParser(entry_id, file_paths[0])
         subparser.parse(template)
         # TODO::check correct loop through!
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
index 1dd844184..35624787f 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
@@ -548,7 +548,7 @@ def parse_and_normalize_eds_area_rois(self, fp):
         reqs = ["ResolutionX", "ResolutionY", "mmFieldWidth", "mmFieldHeight"]
         for req in reqs:
             if req not in fp[f"{src}/ELEMENTOVRLAYIMGCOLLECTIONPARAMS"].dtype.names:
-                  # also check for shape
+                # also check for shape
                 raise ValueError(f"Required attribute named {req} not found in "
                                  f"{src}/ELEMENTOVRLAYIMGCOLLECTIONPARAMS !")
         # find relevant EDS maps (pairs of <symbol>.dat, <symbol>.ipr) groups
@@ -596,8 +596,10 @@ def parse_and_normalize_eds_area_rois(self, fp):
             # theoretical candidates within integrated energy region [e_roi_s, e_roi_e]
             e_roi_s = fp[f"{src}/ROIs/{entry}.dat"].attrs["RoiStartChan"][0]
             e_roi_e = fp[f"{src}/ROIs/{entry}.dat"].attrs["RoiEndChan"][0]
-            eds_map.tmp["energy_range"] = NxObject(unit="eV",
-                value=np.asarray([e_channels[e_roi_s], e_channels[e_roi_e + 1]]))
+            eds_map.tmp["energy_range"] \
+                = NxObject(unit="eV",
+                           value=np.asarray([e_channels[e_roi_s],
+                                             e_channels[e_roi_e + 1]]))
             eds_map.tmp["iupac_line_candidates"] \
                 = ", ".join(get_xrayline_candidates(e_channels[e_roi_s],
                                                     e_channels[e_roi_e + 1]))
@@ -648,7 +650,7 @@ def parse_and_normalize_eds_line_lsd(self, fp):
         for req in reqs:
             if req not in fp[f"{src}/SPC"].attrs.keys():
                 raise ValueError(f"Required attribute named {req} not found in {src}/SPC !")
-        reqs = ["Step","X1", "X2", "Y1", "Y2"]
+        reqs = ["Step", "X1", "X2", "Y1", "Y2"]
         for req in reqs:
             if req not in fp[f"{src}/REGION"].attrs.keys():
                 raise ValueError(f"Required attribute named {req} not found in {src}/REGION !")
@@ -675,9 +677,9 @@ def parse_and_normalize_eds_line_lsd(self, fp):
 
         # vector representation of the line's physical length from mm to µm
         line = np.asarray([
-            (fp[f"{src}/REGION"].attrs["X2"][0] - fp[f"{src}/REGION"].attrs["X1"][0]) \
+            (fp[f"{src}/REGION"].attrs["X2"][0] - fp[f"{src}/REGION"].attrs["X1"][0])
             * fp[f"{src}/LINEMAPIMAGECOLLECTIONPARAMS"].attrs["mmFieldWidth"] * 1000.,
-            (fp[f"{src}/REGION"].attrs["Y2"][0] - fp[f"{src}/REGION"].attrs["Y1"][0]) \
+            (fp[f"{src}/REGION"].attrs["Y2"][0] - fp[f"{src}/REGION"].attrs["Y1"][0])
             * fp[f"{src}/LINEMAPIMAGECOLLECTIONPARAMS"].attrs["mmFieldHeight"] * 1000.])
         i_n = fp[f"{src}/LSD"].attrs["NumberOfSpectra"][0]
         line_length = np.sqrt(line[0]**2 + line[1]**2)
diff --git a/pynxtools/dataconverter/readers/em/subparsers/nxs_pyxem.py b/pynxtools/dataconverter/readers/em/subparsers/nxs_pyxem.py
index 08d3dc31a..c1cc0fd97 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/nxs_pyxem.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/nxs_pyxem.py
@@ -316,7 +316,7 @@ def process_roi_overview_ebsd_based(self,
         return template
 
     def process_roi_overview_eds_based(self,
-                                       inp: dict,
+                                       inp,
                                        template: dict) -> dict:
         trg = f"/ENTRY[entry{self.entry_id}]/measurement/event_data_em_set/" \
               f"EVENT_DATA_EM[event_data_em{self.id_mgn['event']}]/" \
diff --git a/pynxtools/dataconverter/readers/em/subparsers/rsciio_velox.py b/pynxtools/dataconverter/readers/em/subparsers/rsciio_velox.py
index 420cea1e0..4c310002b 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/rsciio_velox.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/rsciio_velox.py
@@ -31,7 +31,7 @@ def __init__(self, entry_id: int = 1, file_path: str = ""):
             self.entry_id = entry_id
         else:
             self.entry_id = 1
-        self.id_mgn = {}
+        self.id_mgn: Dict = {}
         self.prfx = None
         self.tmp: Dict = {}
         self.supported_version: Dict = {}
@@ -54,7 +54,6 @@ def check_if_supported(self):
     def parse_and_normalize_and_process_into_template(self, template: dict) -> dict:
         """Perform actual parsing filling cache self.tmp."""
         if self.supported is True:
-            print(f"Parsing with {self.__name__}...")
             self.tech_partner_to_nexus_normalization(template)
         else:
             print(f"{self.file_path} is not a Velox-specific "
@@ -67,6 +66,7 @@ def tech_partner_to_nexus_normalization(self, template: dict) -> dict:
         self.normalize_adf_content(template)  # (high-angle) annular dark field
         self.normalize_edxs_content(template)  # EDS in the TEM
         self.normalize_eels_content(template)  # electron energy loss spectroscopy
+        return template
 
     def normalize_bfdf_content(self, template: dict) -> dict:
         return template

From fb0a9960c506522fbeb76ab745135f1976329705 Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@physik.hu-berlin.de>
Date: Tue, 16 Jan 2024 15:03:34 +0100
Subject: [PATCH 77/84] Minor edit batch files

---
 debug/spctrscpy.batch.sh  |  3 ++-
 debug/spctrscpy.dev.ipynb | 45 ++++++++++++++++++++++++++++-----------
 2 files changed, 34 insertions(+), 14 deletions(-)

diff --git a/debug/spctrscpy.batch.sh b/debug/spctrscpy.batch.sh
index 15f825d90..1d2c5f660 100755
--- a/debug/spctrscpy.batch.sh
+++ b/debug/spctrscpy.batch.sh
@@ -9,7 +9,8 @@ datasource="../../../../../paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/
 examples="InGaN_nanowires_spectra.edaxh5"
 examples="AlGaO.nxs"
 examples="GeSi.nxs"
-examples="VInP_108_L2.h5"
+examples="GeSn_13.nxs"
+# examples="VInP_108_L2.h5"
 
 for example in $examples; do
 	echo $example
diff --git a/debug/spctrscpy.dev.ipynb b/debug/spctrscpy.dev.ipynb
index a1532ce28..ce9e812e6 100644
--- a/debug/spctrscpy.dev.ipynb
+++ b/debug/spctrscpy.dev.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "id": "6da1aea0-545b-446b-a3d1-1574af72f6c6",
    "metadata": {},
    "outputs": [],
@@ -18,10 +18,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "id": "3889c47f-11c4-4bf3-97de-04fc52f0798d",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/pdi/InGaN_nanowires_spectra.edaxh5\n"
+     ]
+    }
+   ],
    "source": [
     "src = \"/home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy\"\n",
     "fnms = [(\"pdi\", \"InGaN_nanowires_spectra.edaxh5\"),\n",
@@ -31,35 +39,46 @@
     "        (\"ikz\", \"VInP_108_L2.h5\"),\n",
     "        (\"fhi\", \"CG71113 1513 HAADF-DF4-DF2-BF 1.2 Mx STEM.emd\"),\n",
     "        (\"adrien\", \"1613_Si_HAADF_610_kx.emd\"),\n",
-    "        (\"bruker\", \"pynx/46_ES-LP_L1_brg.bcf\"),\n",
-    "        (\"emd\", \"pynx/1613_Si_HAADF_610_kx.emd\"),\n",
-    "        (\"digitalmicrograph\", \"pynx/EELS_map_2_ROI_1_location_4.dm3\"),\n",
-    "        (\"oxfordinstruments\", \"pynx/H5OINA_examples_Specimen_1_Map_EDS_+_EBSD_Map_Data_2.h5oina\")]\n",
+    "        (\"adrien\", \"46_ES-LP_L1_brg.bcf\"),\n",
+    "        (\"benedikt\", \"EELS_map_2_ROI_1_location_4.dm3\"),\n",
+    "        (\"phillippe\", \"H5OINA_examples_Specimen_1_Map_EDS_+_EBSD_Map_Data_2.h5oina\")]\n",
     "# pyUSID, HSMA\n",
-    "case = 5  # 5 # len(fnms) - 1  # len(fnms) - 1\n",
+    "case = 0  # 5 # len(fnms) - 1  # len(fnms) - 1\n",
     "fnm = f\"{src}/{fnms[case][0]}/{fnms[case][1]}\"\n",
     "print(fnm)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "id": "a4d9ef96-3c70-4c12-80ba-ea4a7d716d47",
    "metadata": {},
    "outputs": [],
    "source": [
-    "H5Web(fnm)"
+    "# H5Web(fnm)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "id": "221abf67-0d88-4088-9cc7-e0d9b85c4699",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "application/x-hdf5": "/home/kaiobach/Research/hu_hu_hu/sprint17/pynx/pynxtools/debug/spctrscpy/debug.InGaN_nanowires_spectra.edaxh5.nxs",
+      "text/plain": [
+       "<jupyterlab_h5web.widget.H5Web object>"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "# resulting NeXus artifact\n",
-    "H5Web(f\"debug.{fnms[case][1]}.nxs\")"
+    "H5Web(f\"spctrscpy/debug.{fnms[case][1]}.nxs\")"
    ]
   },
   {

From 4a9de81f3e274d21171d0369ec3e8ab092189f9c Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@physik.hu-berlin.de>
Date: Wed, 17 Jan 2024 11:34:30 +0100
Subject: [PATCH 78/84] Edit to make version a child of definitions

---
 pynxtools/dataconverter/readers/em/concepts/nxs_concepts.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pynxtools/dataconverter/readers/em/concepts/nxs_concepts.py b/pynxtools/dataconverter/readers/em/concepts/nxs_concepts.py
index 09e00228b..1315b2ae4 100644
--- a/pynxtools/dataconverter/readers/em/concepts/nxs_concepts.py
+++ b/pynxtools/dataconverter/readers/em/concepts/nxs_concepts.py
@@ -34,9 +34,9 @@
 NxEmRoot = {"/ENTRY[entry*]/PROGRAM[program1]/program": "pynxtools/dataconverter/readers/em",
             "/ENTRY[entry*]/PROGRAM[program1]/program/@version": PYNXTOOLS_VERSION,
             "/ENTRY[entry*]/PROGRAM[program1]/program/@url": PYNXTOOLS_URL,
-            "/ENTRY[entry*]/@version": NXEM_VERSION,
             "/ENTRY[entry*]/@url": NXEM_URL,
-            "/ENTRY[entry*]/definition": NXEM_NAME}
+            "/ENTRY[entry*]/definition": NXEM_NAME,
+            "/ENTRY[entry*]/definition/@version": NXEM_VERSION}
 # alternatively the above-mentioned program1 entries to place under "/"
 
 

From 3892c7011fdeb14eea81670f16f7c2a913f2b3c3 Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@physik.hu-berlin.de>
Date: Wed, 17 Jan 2024 16:10:03 +0100
Subject: [PATCH 79/84] Added first version of a imaging mode case distinction
 logic, tested imgs, adf, and ceta imaging modes. NeXus files were generated
 successfully but weird h5web display error coming up within ipynb,
 removechild tested if spaces in filenames cause this but no, in hdfviewer
 file shows without any issues

---
 debug/spctrscpy.batch.sh                      |   9 +-
 pynxtools/dataconverter/readers/em/reader.py  |  12 +-
 .../readers/em/subparsers/rsciio_velox.py     | 212 +++++++++++++++++-
 .../readers/em/utils/rsciio_hyperspy_utils.py |  68 ++++++
 4 files changed, 282 insertions(+), 19 deletions(-)
 create mode 100644 pynxtools/dataconverter/readers/em/utils/rsciio_hyperspy_utils.py

diff --git a/debug/spctrscpy.batch.sh b/debug/spctrscpy.batch.sh
index 1d2c5f660..949de3dfb 100755
--- a/debug/spctrscpy.batch.sh
+++ b/debug/spctrscpy.batch.sh
@@ -2,7 +2,7 @@
 
 datasource="../../../../../paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/pdi/"
 datasource="../../../../../paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/ikz/"
-
+datasource="../../../../../paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/fhi/"
 
 # apex examples ikz, pdi
 # examples="ikz/VInP_108_L2.h5 ikz/GeSn_13.h5 pynx/46_ES-LP_L1_brg.bcf pynx/1613_Si_HAADF_610_kx.emd pynx/EELS_map_2_ROI_1_location_4.dm3 pynx/H5OINA_examples_Specimen_1_Map_EDS_+_EBSD_Map_Data_2.h5oina"
@@ -11,8 +11,11 @@ examples="AlGaO.nxs"
 examples="GeSi.nxs"
 examples="GeSn_13.nxs"
 # examples="VInP_108_L2.h5"
+examples="CG71113 1513 HAADF-DF4-DF2-BF 1.2 Mx STEM.emd"
+examples="CG71113 1138 Ceta 660 mm Camera.emd"
+examples="CG71113 1125 Ceta 1.1 Mx Camera.emd"
 
-for example in $examples; do
+for example in "$examples"; do
 	echo $example
-	dataconverter --reader em --nxdl NXroot --input-file $datasource$example --output debug.$example.nxs 1>stdout.$example.nxs.txt 2>stderr.$example.nxs.txt
+	dataconverter --reader em --nxdl NXroot --input-file "$datasource$example" --output "debug.$example.nxs" 1>"stdout.$example.nxs.txt" 2>"stderr.$example.nxs.txt"
 done
diff --git a/pynxtools/dataconverter/readers/em/reader.py b/pynxtools/dataconverter/readers/em/reader.py
index c6d97b16a..72c33bd5e 100644
--- a/pynxtools/dataconverter/readers/em/reader.py
+++ b/pynxtools/dataconverter/readers/em/reader.py
@@ -27,7 +27,7 @@
 from pynxtools.dataconverter.readers.em.subparsers.nxs_pyxem import NxEmNxsPyxemSubParser
 # from pynxtools.dataconverter.readers.em.subparsers.nxs_imgs import NxEmImagesSubParser
 # from pynxtools.dataconverter.readers.em.subparsers.nxs_nion import NxEmZippedNionProjectSubParser
-# from pynxtools.dataconverter.readers.em.subparsers.rsciio_velox import RsciioVeloxSubParser
+from pynxtools.dataconverter.readers.em.subparsers.rsciio_velox import RsciioVeloxSubParser
 from pynxtools.dataconverter.readers.em.utils.default_plots import NxEmDefaultPlotResolver
 # from pynxtools.dataconverter.readers.em.geometry.convention_mapper import NxEmConventionMapper
 
@@ -121,21 +121,23 @@ def read(self,
         # add further with resolving cases
         # if file_path is an HDF5 will use hfive parser
         # sub_parser = "nxs_pyxem"
-        subparser = NxEmNxsPyxemSubParser(entry_id, file_paths[0])
-        subparser.parse(template)
+        # subparser = NxEmNxsPyxemSubParser(entry_id, file_paths[0])
+        # subparser.parse(template)
         # TODO::check correct loop through!
 
         # sub_parser = "image_tiff"
         # subparser = NxEmImagesSubParser(entry_id, file_paths[0])
         # subparser.parse(template)
+        # TODO::check correct loop through!
 
         # sub_parser = "zipped_nion_project"
         # subparser = NxEmZippedNionProjectSubParser(entry_id, file_paths[0])
         # subparser.parse(template, verbose=True)
+        # TODO::check correct loop through!
 
         # sub_parser = "velox_emd"
-        # subparser = RsciioVeloxSubParser(entry_id, file_paths[0])
-        # subparser.parse(template, verbose=True)
+        subparser = RsciioVeloxSubParser(entry_id, file_paths[0])
+        subparser.parse(template, verbose=True)
 
         # for dat_instance in case.dat_parser_type:
         #     print(f"Process pieces of information in {dat_instance} tech partner file...")
diff --git a/pynxtools/dataconverter/readers/em/subparsers/rsciio_velox.py b/pynxtools/dataconverter/readers/em/subparsers/rsciio_velox.py
index 4c310002b..2eabd2ea2 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/rsciio_velox.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/rsciio_velox.py
@@ -17,10 +17,17 @@
 #
 """(Sub-)parser for reading content from ThermoFisher Velox *.emd (HDF5) via rosettasciio."""
 
+import flatdict as fd
+import numpy as np
+
 from typing import Dict, List
 from rsciio import emd
 
 from pynxtools.dataconverter.readers.em.subparsers.rsciio_base import RsciioBaseParser
+from pynxtools.dataconverter.readers.em.utils.rsciio_hyperspy_utils \
+    import get_named_axis, get_axes_dims, get_axes_units
+from pynxtools.dataconverter.readers.shared.shared_utils \
+    import get_sha256_of_file_content
 
 
 class RsciioVeloxSubParser(RsciioBaseParser):
@@ -31,8 +38,11 @@ def __init__(self, entry_id: int = 1, file_path: str = ""):
             self.entry_id = entry_id
         else:
             self.entry_id = 1
-        self.id_mgn: Dict = {}
-        self.prfx = None
+        self.id_mgn: Dict = {"event": 1,
+                             "event_img": 1,
+                             "event_spc": 1,
+                             "roi": 1}
+        self.file_path_sha256 = None
         self.tmp: Dict = {}
         self.supported_version: Dict = {}
         self.version: Dict = {}
@@ -47,11 +57,15 @@ def check_if_supported(self):
             # only the collection of the concepts without the actual instance data
             # based on this one could then plan how much memory has to be reserved
             # in the template and stream out accordingly
+            with open(self.file_path, "rb", 0) as fp:
+                self.file_path_sha256 = get_sha256_of_file_content(fp)
+
+            print(f"Parsing {self.file_path} with SHA256 {self.file_path_sha256} ...")
             self.supported = True
         except IOError:
             print(f"Loading {self.file_path} using {self.__name__} is not supported !")
 
-    def parse_and_normalize_and_process_into_template(self, template: dict) -> dict:
+    def parse(self, template: dict, verbose=False) -> dict:
         """Perform actual parsing filling cache self.tmp."""
         if self.supported is True:
             self.tech_partner_to_nexus_normalization(template)
@@ -62,20 +76,196 @@ def parse_and_normalize_and_process_into_template(self, template: dict) -> dict:
 
     def tech_partner_to_nexus_normalization(self, template: dict) -> dict:
         """Translate tech partner concepts to NeXus concepts."""
-        self.normalize_bfdf_content(template)  # conventional bright/dark field
-        self.normalize_adf_content(template)  # (high-angle) annular dark field
-        self.normalize_edxs_content(template)  # EDS in the TEM
-        self.normalize_eels_content(template)  # electron energy loss spectroscopy
+        reqs = ["data", "axes", "metadata", "original_metadata", "mapping"]
+        for idx, obj in enumerate(self.objs):
+            if not isinstance(obj, dict):
+                continue
+            parse = True
+            for req in reqs:
+                if req not in obj:
+                    parse = False
+            if parse == False:
+                continue
+
+            content_type = self.content_resolver(obj)
+            print(f"Parsing {idx}-th object in {self.file_path} content type is {content_type}")
+            if content_type == "imgs":
+                self.normalize_imgs_content(obj, template)  # generic imaging modes
+                # TODO:: could later make an own one for bright/dark field, but
+                # currently no distinction in hyperspy
+            elif content_type == "adf":
+                self.normalize_adf_content(obj, template)  # (high-angle) annular dark field
+            elif content_type == "diff":  # diffraction image in reciprocal space
+                self.normalize_diff_content(obj, template)  # diffraction images
+            elif content_type == "eds":
+                self.normalize_eds_content(obj,template)  # ED(X)S in the TEM
+            elif content_type == "eels":
+                self.normalize_eels_content(obj, template)  # electron energy loss spectroscopy
+            else:  # == "n/a"
+                print(f"WARNING::Unable to resolve content of {idx}-th object in {self.file_path}!")
         return template
 
-    def normalize_bfdf_content(self, template: dict) -> dict:
+    def content_resolver(self, obj: dict) -> str:
+        """Try to identify which content the obj describes best."""
+        # assume rosettasciio-specific formatting of the emd parser
+        # i.e. a dictionary with the following keys:
+        # "data", "axes", "metadata", "original_metadata", "mapping"
+        meta = fd.FlatDict(obj["metadata"], "/")
+        orgmeta = fd.FlatDict(obj["original_metadata"], "/")
+        dims = get_axes_dims(obj["axes"])
+        units = get_axes_units(obj["axes"])
+        if "General/title" not in meta.keys():
+            return "n/a"
+        if (meta["General/title"] in ("BF")) or (meta["General/title"].startswith("DF")):
+            # TODO::the problem with using here the explicit name DF4 is that this may only
+            # work for a particular microscope:
+            # Core/MetadataDefinitionVersion: 7.9, Core/MetadataSchemaVersion: v1/2013/07
+            # Instrument/ControlSoftwareVersion: 1.15.4, Instrument/Manufacturer: FEI Company
+            # Instrument/InstrumentId: 6338, Instrument/InstrumentModel: Talos F200X
+            # instead there should be a logic added which resolves which concept
+            # the data in this obj are best described by when asking a community-wide
+            # glossary but not the FEI-specific glossary
+            # all that logic is unneeded and thereby the data more interoperable
+            # if FEI would harmonize their obvious company metadata standard with the
+            # electron microscopy community!
+            return "imgs"
+        if meta["General/title"] in ("HAADF"):
+            return "adf"
+        # all units indicating we are in real or complex i.e. reciprocal space
+        vote_r_c = [0, 0]  # real space, complex space
+        for unit in units:
+            if unit.startswith("1 /"):
+                vote_r_c[1] += 1
+            else:
+                vote_r_c[0] += 1
+        if vote_r_c[0] == len(units) and vote_r_c[1] == 0:
+            return "imgs"
+        if vote_r_c[0] == 0 and vote_r_c[1] == len(units):
+            return "diff"
+        del vote_r_c
+        return "n/a"
+
+    def normalize_imgs_content(self, obj: dict, template: dict) -> dict:
+        """Map generic scanned images (e.g. BF/DF) to NeXus."""
+        meta = fd.FlatDict(obj["metadata"], "/")
+        dims = get_axes_dims(obj["axes"])
+        trg = f"/ENTRY[entry{self.entry_id}]/measurement/event_data_em_set/" \
+              f"EVENT_DATA_EM[event_data_em{self.id_mgn['event']}]/" \
+              f"IMAGE_R_SET[image_r_set{self.id_mgn['event_img']}]"
+        template[f"{trg}/PROCESS[process]/source/type"] = "file"
+        template[f"{trg}/PROCESS[process]/source/path"] = self.file_path
+        template[f"{trg}/PROCESS[process]/source/checksum"] = self.file_path_sha256
+        template[f"{trg}/PROCESS[process]/source/algorithm"] = "SHA256"
+        template[f"{trg}/PROCESS[process]/detector_identifier"] = meta["General/title"]
+        template[f"{trg}/image_twod/@NX_class"] = "NXdata"  # TODO::writer should do!
+        template[f"{trg}/image_twod/@signal"] = "intensity"
+        template[f"{trg}/image_twod/@axes"] = []
+        for dim in dims:
+            template[f"{trg}/image_twod/@axes"].append(f"axis_{dim[0]}")
+            template[f"{trg}/image_twod/@AXISNAME_indices[axis_{dim[0]}]"] \
+                = np.uint32(dim[1])
+            support, unit = get_named_axis(obj["axes"], dim[0])
+            if support is not None and unit is not None:
+                template[f"{trg}/image_twod/axis_{dim[0]}"] \
+                    = {"compress": support, "strength": 1}
+                template[f"{trg}/image_twod/axis_{dim[0]}/@long_name"] \
+                    = f"{dim[0]}-axis position ({unit})"
+        template[f"{trg}/image_twod/title"] = meta["General/title"]
+        template[f"{trg}/image_twod/intensity"] \
+            = {"compress": np.asarray(obj["data"]), "strength": 1}
+        # template[f"{trg}/image_twod/intensity/@units"]
+        # TODO::add metadata
+        self.id_mgn["event_img"] += 1
+        self.id_mgn["event"] += 1
         return template
 
-    def normalize_adf_content(self, template: dict) -> dict:
+    def normalize_adf_content(self, obj: dict, template: dict) -> dict:
+        """Map relevant (high-angle) annular dark field images to NeXus."""
+        meta = fd.FlatDict(obj["metadata"], "/")
+        dims = get_axes_dims(obj["axes"])
+        trg = f"/ENTRY[entry{self.entry_id}]/measurement/event_data_em_set/" \
+              f"EVENT_DATA_EM[event_data_em{self.id_mgn['event']}]/" \
+              f"IMAGE_R_SET[image_r_set{self.id_mgn['event_img']}]"
+        template[f"{trg}/PROCESS[process]/source/type"] = "file"
+        template[f"{trg}/PROCESS[process]/source/path"] = self.file_path
+        template[f"{trg}/PROCESS[process]/source/checksum"] = self.file_path_sha256
+        template[f"{trg}/PROCESS[process]/source/algorithm"] = "SHA256"
+        template[f"{trg}/PROCESS[process]/detector_identifier"] = meta["General/title"]
+        template[f"{trg}/image_twod/@NX_class"] = "NXdata"  # TODO::writer should do!
+        template[f"{trg}/image_twod/@signal"] = "intensity"
+        template[f"{trg}/image_twod/@axes"] = []
+        for dim in dims:
+            template[f"{trg}/image_twod/@axes"].append(f"axis_{dim[0]}")
+            template[f"{trg}/image_twod/@AXISNAME_indices[axis_{dim[0]}]"] \
+                = np.uint32(dim[1])
+            support, unit = get_named_axis(obj["axes"], dim[0])
+            if support is not None and unit is not None:
+                template[f"{trg}/image_twod/axis_{dim[0]}"] \
+                    = {"compress": support, "strength": 1}
+                template[f"{trg}/image_twod/axis_{dim[0]}/@long_name"] \
+                    = f"{dim[0]}-axis position ({unit})"
+        template[f"{trg}/image_twod/title"] = meta["General/title"]
+        template[f"{trg}/image_twod/intensity"] \
+            = {"compress": np.asarray(obj["data"]), "strength": 1}
+        # template[f"{trg}/image_twod/intensity/@units"]
+        # TODO::coll. angles given in original_metadata map to half_angle_interval
+        # TODO::add metadata
+        self.id_mgn["event_img"] += 1
+        self.id_mgn["event"] += 1
+        return template
+
+    def normalize_diff_content(self, obj: dict, template: dict) -> dict:
+        """Map relevant diffraction images to NeXus."""
+        # TODO::the above-mentioned constraint is not general enough
+        # this can work only for cases where we know that we not only have a
+        # Ceta camera but also use it for taking diffraction pattern
+        # TODO::this is an example that more logic is needed to identify whether
+        # the information inside obj really has a similarity with the concept of
+        # somebody having taken a diffraction image
+        # one can compare the situation with the following:
+        # assume you wish to take pictures of apples and have an NXapple_picture
+        # but all you get is an image from a digital camera where the dataset is
+        # named maybe DCIM, without a logic one cannot make the mapping robustly!
+        # can one map y, x, on j, i indices
+        idx_map = {"y": "j", "x": "i"}
+        dims = get_axes_dims(obj["axes"])
+        print(dims)
+        for dim in dims:
+            if dim[0] not in idx_map.keys():
+                raise ValueError(f"Unable to map index {dim[0]} on something!")
+
+        trg = f"/ENTRY[entry{self.entry_id}]/measurement/event_data_em_set/" \
+                f"EVENT_DATA_EM[event_data_em{self.id_mgn['event']}]/" \
+                f"IMAGE_C_SET[image_c_set{self.id_mgn['event_img']}]"
+        template[f"{trg}/PROCESS[process]/source/type"] = "file"
+        template[f"{trg}/PROCESS[process]/source/path"] = self.file_path
+        template[f"{trg}/PROCESS[process]/source/checksum"] = self.file_path_sha256
+        template[f"{trg}/PROCESS[process]/source/algorithm"] = "SHA256"
+        template[f"{trg}/PROCESS[process]/detector_identifier"] = meta["General/title"]
+        template[f"{trg}/image_twod/@NX_class"] = "NXdata"  # TODO::writer should do!
+        template[f"{trg}/image_twod/@signal"] = "magnitude"
+        template[f"{trg}/image_twod/@axes"] = []
+        for dim in dims:
+            template[f"{trg}/image_twod/@axes"].append(f"axis_{idx_map[dim[0]]}")
+            template[f"{trg}/image_twod/@AXISNAME_indices[axis_{idx_map[dim[0]]}]"] \
+                = np.uint32(dim[1])
+            support, unit = get_named_axis(obj["axes"], dim[0])
+            if support is not None and unit is not None and unit.startswith("1 /"):
+                template[f"{trg}/image_twod/axis_{idx_map[dim[0]]}"] \
+                    = {"compress": support, "strength": 1}
+                template[f"{trg}/image_twod/axis_{idx_map[dim[0]]}/@long_name"] \
+                    = f"{idx_map[dim[0]]}-axis position ({unit})"
+        template[f"{trg}/image_twod/title"] = meta["General/title"]
+        template[f"{trg}/image_twod/magnitude"] \
+            = {"compress": np.asarray(obj["data"]), "strength": 1}
+        # template[f"{trg}/image_twod/magnitude/@units"]
+        # TODO::add metadata
+        self.id_mgn["event_img"] += 1
+        self.id_mgn["event"] += 1
         return template
 
-    def normalize_edxs_content(self, template: dict) -> dict:
+    def normalize_eds_content(self, obj: dict, template: dict) -> dict:
         return template
 
-    def normalize_eels_content(self, template: dict) -> dict:
+    def normalize_eels_content(self, obj: dict, template: dict) -> dict:
         return template
diff --git a/pynxtools/dataconverter/readers/em/utils/rsciio_hyperspy_utils.py b/pynxtools/dataconverter/readers/em/utils/rsciio_hyperspy_utils.py
new file mode 100644
index 000000000..b4aeaeaa4
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/utils/rsciio_hyperspy_utils.py
@@ -0,0 +1,68 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Utility functions to interpret data from hyperspy-project-specific representation."""
+
+import numpy as np
+
+
+def get_named_axis(axes_metadata, dim_name):
+    """Return numpy array with tuple (axis pos, unit) along dim_name or None."""
+    retval = None
+    if len(axes_metadata) >= 1:
+        for axis in axes_metadata:
+            if isinstance(axis, dict):
+                if ("name" in axis):
+                    if axis["name"] == dim_name:
+                        reqs = ["index_in_array", "offset", "scale", "size", "units", "navigate"]  # "name"
+                        for req in reqs:
+                            if req not in axis:
+                                raise ValueError(f"{req} not in {axis}!")
+                        retval = (
+                            np.asarray(
+                                axis["offset"] + (np.linspace(0.,
+                                                              axis["size"] - 1.,
+                                                              num=int(axis["size"]),
+                                                              endpoint=True)
+                                                  * axis["scale"]),
+                                       np.float64),
+                                  axis["units"])
+    return retval
+
+
+def get_axes_dims(axes_metadata):
+    """Return list of (axis) name, index_in_array tuple or empty list."""
+    retval = []
+    if len(axes_metadata) >= 1:
+        for axis in axes_metadata:
+            if isinstance(axis, dict):
+                if ("name" in axis) and ("index_in_array" in axis):
+                    retval.append((axis["name"], axis["index_in_array"]))
+    # TODO::it seems that hyperspy sorts this by index_in_array
+    return retval
+
+
+def get_axes_units(axes_metadata):
+    """Return list of units or empty list."""
+    retval = []
+    if len(axes_metadata) >= 1:
+        for axis in axes_metadata:
+            if isinstance(axis, dict):
+                if "units" in axis:
+                    retval.append(axis["units"])
+    # TODO::it seems that hyperspy sorts this by index_in_array
+    return retval

From 020225d8e965eabd10ef97a2448e2a219ed473c4 Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@physik.hu-berlin.de>
Date: Wed, 17 Jan 2024 18:07:15 +0100
Subject: [PATCH 80/84] Added eds_map tested FHI Rohner emd file

---
 debug/spctrscpy.batch.sh                      |   1 +
 debug/spctrscpy.dev.ipynb                     |  72 ++++++------
 .../readers/em/subparsers/rsciio_velox.py     | 107 +++++++++++++++---
 .../readers/em/utils/rsciio_hyperspy_utils.py |  16 ++-
 4 files changed, 133 insertions(+), 63 deletions(-)

diff --git a/debug/spctrscpy.batch.sh b/debug/spctrscpy.batch.sh
index 949de3dfb..9f90e6380 100755
--- a/debug/spctrscpy.batch.sh
+++ b/debug/spctrscpy.batch.sh
@@ -14,6 +14,7 @@ examples="GeSn_13.nxs"
 examples="CG71113 1513 HAADF-DF4-DF2-BF 1.2 Mx STEM.emd"
 examples="CG71113 1138 Ceta 660 mm Camera.emd"
 examples="CG71113 1125 Ceta 1.1 Mx Camera.emd"
+examples="CG71113 1412 EDS-HAADF-DF4-DF2-BF 4.8 Mx SI.emd"
 
 for example in "$examples"; do
 	echo $example
diff --git a/debug/spctrscpy.dev.ipynb b/debug/spctrscpy.dev.ipynb
index ce9e812e6..51c04c750 100644
--- a/debug/spctrscpy.dev.ipynb
+++ b/debug/spctrscpy.dev.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "id": "6da1aea0-545b-446b-a3d1-1574af72f6c6",
    "metadata": {},
    "outputs": [],
@@ -13,23 +13,17 @@
     "import h5py\n",
     "from matplotlib import pyplot as plt\n",
     "import xraydb\n",
-    "from ase.data import chemical_symbols"
+    "import flatdict as fd\n",
+    "from ase.data import chemical_symbols\n",
+    "print(chemical_symbols[1::])"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "id": "3889c47f-11c4-4bf3-97de-04fc52f0798d",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "/home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/pdi/InGaN_nanowires_spectra.edaxh5\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "src = \"/home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy\"\n",
     "fnms = [(\"pdi\", \"InGaN_nanowires_spectra.edaxh5\"),\n",
@@ -38,19 +32,22 @@
     "        (\"ikz\", \"GeSn_13.nxs\"),\n",
     "        (\"ikz\", \"VInP_108_L2.h5\"),\n",
     "        (\"fhi\", \"CG71113 1513 HAADF-DF4-DF2-BF 1.2 Mx STEM.emd\"),\n",
+    "        (\"fhi\", \"CG71113 1138 Ceta 660 mm Camera.emd\"),\n",
+    "        (\"fhi\", \"CG71113 1125 Ceta 1.1 Mx Camera.emd\"),\n",
+    "        (\"fhi\", \"CG71113 1412 EDS-HAADF-DF4-DF2-BF 4.8 Mx SI.emd\"),\n",
     "        (\"adrien\", \"1613_Si_HAADF_610_kx.emd\"),\n",
     "        (\"adrien\", \"46_ES-LP_L1_brg.bcf\"),\n",
     "        (\"benedikt\", \"EELS_map_2_ROI_1_location_4.dm3\"),\n",
     "        (\"phillippe\", \"H5OINA_examples_Specimen_1_Map_EDS_+_EBSD_Map_Data_2.h5oina\")]\n",
     "# pyUSID, HSMA\n",
-    "case = 0  # 5 # len(fnms) - 1  # len(fnms) - 1\n",
+    "case = 8  # 5 # len(fnms) - 1  # len(fnms) - 1\n",
     "fnm = f\"{src}/{fnms[case][0]}/{fnms[case][1]}\"\n",
     "print(fnm)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "id": "a4d9ef96-3c70-4c12-80ba-ea4a7d716d47",
    "metadata": {},
    "outputs": [],
@@ -60,44 +57,47 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "id": "221abf67-0d88-4088-9cc7-e0d9b85c4699",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/x-hdf5": "/home/kaiobach/Research/hu_hu_hu/sprint17/pynx/pynxtools/debug/spctrscpy/debug.InGaN_nanowires_spectra.edaxh5.nxs",
-      "text/plain": [
-       "<jupyterlab_h5web.widget.H5Web object>"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
    "source": [
     "# resulting NeXus artifact\n",
-    "H5Web(f\"spctrscpy/debug.{fnms[case][1]}.nxs\")"
+    "# tmp = \"debug.CG71113 1138 Ceta 660 mm Camera.emd.nxs\"\n",
+    "# print(tmp)\n",
+    "# H5Web(tmp)\n",
+    "H5Web(f\"debug.{fnms[case][1]}.nxs\")"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "id": "75b32c8f-8efa-4b40-bfc8-6f95300902ea",
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "objs = emd.file_reader(fnm)\n",
     "print(len(objs))\n",
-    "for obj in objs:\n",
+    "for idx, obj in enumerate(objs):\n",
     "    if not isinstance(obj, dict):\n",
     "        raise ValueError(\"No dict!\")\n",
     "    print(obj.keys())\n",
-    "    for key, val in obj.items():\n",
-    "        print(f\"{key}, {np.shape(val)}\")\n",
-    "    print(obj[\"metadata\"])\n",
-    "    # print(obj[\"original_metadata\"])\n",
+    "    meta = fd.FlatDict(obj[\"metadata\"], \"/\")\n",
+    "    if meta[\"General/title\"] == \"Te\":\n",
+    "        for key, val in obj.items():\n",
+    "            print(f\"{idx}, {key}, {np.shape(val)}\")\n",
+    "        print(f\"\\n\\n{obj['axes']}\")\n",
+    "        print(\"\\n\\n\")\n",
+    "        for key, val in meta.items():\n",
+    "            print(f\"{key}: {val}\")\n",
+    "        orgmeta = fd.FlatDict(obj[\"original_metadata\"], \"/\")\n",
+    "        print(\"\\n\\n\")\n",
+    "        for key, val in orgmeta.items():\n",
+    "            print(f\"{key}: {val}\")\n",
     "# print(f\"{type(objs[0])}\")\n",
     "# print(objs[0].keys())"
    ]
diff --git a/pynxtools/dataconverter/readers/em/subparsers/rsciio_velox.py b/pynxtools/dataconverter/readers/em/subparsers/rsciio_velox.py
index 2eabd2ea2..0ae4006f5 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/rsciio_velox.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/rsciio_velox.py
@@ -22,6 +22,7 @@
 
 from typing import Dict, List
 from rsciio import emd
+from ase.data import chemical_symbols
 
 from pynxtools.dataconverter.readers.em.subparsers.rsciio_base import RsciioBaseParser
 from pynxtools.dataconverter.readers.em.utils.rsciio_hyperspy_utils \
@@ -29,6 +30,9 @@
 from pynxtools.dataconverter.readers.shared.shared_utils \
     import get_sha256_of_file_content
 
+REAL_SPACE = 0
+COMPLEX_SPACE = 1
+
 
 class RsciioVeloxSubParser(RsciioBaseParser):
     """Read Velox EMD File Format emd."""
@@ -41,7 +45,8 @@ def __init__(self, entry_id: int = 1, file_path: str = ""):
         self.id_mgn: Dict = {"event": 1,
                              "event_img": 1,
                              "event_spc": 1,
-                             "roi": 1}
+                             "roi": 1,
+                             "eds_img": 1}
         self.file_path_sha256 = None
         self.tmp: Dict = {}
         self.supported_version: Dict = {}
@@ -84,11 +89,12 @@ def tech_partner_to_nexus_normalization(self, template: dict) -> dict:
             for req in reqs:
                 if req not in obj:
                     parse = False
-            if parse == False:
+            if parse is False:
                 continue
 
             content_type = self.content_resolver(obj)
             print(f"Parsing {idx}-th object in {self.file_path} content type is {content_type}")
+            print(f"dims: {obj['axes']}")
             if content_type == "imgs":
                 self.normalize_imgs_content(obj, template)  # generic imaging modes
                 # TODO:: could later make an own one for bright/dark field, but
@@ -97,8 +103,8 @@ def tech_partner_to_nexus_normalization(self, template: dict) -> dict:
                 self.normalize_adf_content(obj, template)  # (high-angle) annular dark field
             elif content_type == "diff":  # diffraction image in reciprocal space
                 self.normalize_diff_content(obj, template)  # diffraction images
-            elif content_type == "eds":
-                self.normalize_eds_content(obj,template)  # ED(X)S in the TEM
+            elif content_type == "eds_map":
+                self.normalize_eds_map_content(obj, template)  # ED(X)S in the TEM
             elif content_type == "eels":
                 self.normalize_eels_content(obj, template)  # electron energy loss spectroscopy
             else:  # == "n/a"
@@ -111,12 +117,17 @@ def content_resolver(self, obj: dict) -> str:
         # i.e. a dictionary with the following keys:
         # "data", "axes", "metadata", "original_metadata", "mapping"
         meta = fd.FlatDict(obj["metadata"], "/")
-        orgmeta = fd.FlatDict(obj["original_metadata"], "/")
+        # orgmeta = fd.FlatDict(obj["original_metadata"], "/")
         dims = get_axes_dims(obj["axes"])
         units = get_axes_units(obj["axes"])
+
         if "General/title" not in meta.keys():
             return "n/a"
+
         if (meta["General/title"] in ("BF")) or (meta["General/title"].startswith("DF")):
+            uniq = set()
+            for dim in dims:
+                uniq.add(dim[0])
             # TODO::the problem with using here the explicit name DF4 is that this may only
             # work for a particular microscope:
             # Core/MetadataDefinitionVersion: 7.9, Core/MetadataSchemaVersion: v1/2013/07
@@ -128,27 +139,45 @@ def content_resolver(self, obj: dict) -> str:
             # all that logic is unneeded and thereby the data more interoperable
             # if FEI would harmonize their obvious company metadata standard with the
             # electron microscopy community!
-            return "imgs"
+            if sorted(uniq) == ["x", "y"]:
+                return "imgs"
+
         if meta["General/title"] in ("HAADF"):
             return "adf"
+
         # all units indicating we are in real or complex i.e. reciprocal space
+        if meta["General/title"] in ("EDS"):
+            return "eds_spc"
+
+        for symbol in chemical_symbols[1::]:  # an eds_map
+            # TODO::does rosettasciio via hyperspy identify the symbol or is the
+            # title by default already in Velox set (by default) to the chemical symbol?
+            if meta["General/title"] != symbol:
+                continue
+            return "eds_map"
+
         vote_r_c = [0, 0]  # real space, complex space
         for unit in units:
-            if unit.startswith("1 /"):
-                vote_r_c[1] += 1
-            else:
-                vote_r_c[0] += 1
-        if vote_r_c[0] == len(units) and vote_r_c[1] == 0:
+            if unit.lower().replace(" ", "") \
+                    in ["m", "cm", "mm", "µm", "nm", "pm"]:
+                vote_r_c[REAL_SPACE] += 1
+            if unit.lower().replace(" ", "") \
+                    in ["1/m", "1/cm", "1/mm", "1/µm", "1/nm", "1/pm"]:
+                vote_r_c[COMPLEX_SPACE] += 1
+
+        if (vote_r_c[0] == len(units)) and (vote_r_c[1] == 0):
             return "imgs"
-        if vote_r_c[0] == 0 and vote_r_c[1] == len(units):
+        if (vote_r_c[0] == 0) and (vote_r_c[1] == len(units)):
             return "diff"
-        del vote_r_c
+
         return "n/a"
 
     def normalize_imgs_content(self, obj: dict, template: dict) -> dict:
         """Map generic scanned images (e.g. BF/DF) to NeXus."""
         meta = fd.FlatDict(obj["metadata"], "/")
         dims = get_axes_dims(obj["axes"])
+        if len(dims) != 2:
+            raise ValueError(f"{obj['axes']}")
         trg = f"/ENTRY[entry{self.entry_id}]/measurement/event_data_em_set/" \
               f"EVENT_DATA_EM[event_data_em{self.id_mgn['event']}]/" \
               f"IMAGE_R_SET[image_r_set{self.id_mgn['event_img']}]"
@@ -183,6 +212,8 @@ def normalize_adf_content(self, obj: dict, template: dict) -> dict:
         """Map relevant (high-angle) annular dark field images to NeXus."""
         meta = fd.FlatDict(obj["metadata"], "/")
         dims = get_axes_dims(obj["axes"])
+        if len(dims) != 2:
+            raise ValueError(f"{obj['axes']}")
         trg = f"/ENTRY[entry{self.entry_id}]/measurement/event_data_em_set/" \
               f"EVENT_DATA_EM[event_data_em{self.id_mgn['event']}]/" \
               f"IMAGE_R_SET[image_r_set{self.id_mgn['event_img']}]"
@@ -228,15 +259,17 @@ def normalize_diff_content(self, obj: dict, template: dict) -> dict:
         # named maybe DCIM, without a logic one cannot make the mapping robustly!
         # can one map y, x, on j, i indices
         idx_map = {"y": "j", "x": "i"}
+        meta = fd.FlatDict(obj["metadata"], "/")
         dims = get_axes_dims(obj["axes"])
-        print(dims)
+        if len(dims) != 2:
+            raise ValueError(f"{obj['axes']}")
         for dim in dims:
             if dim[0] not in idx_map.keys():
                 raise ValueError(f"Unable to map index {dim[0]} on something!")
 
         trg = f"/ENTRY[entry{self.entry_id}]/measurement/event_data_em_set/" \
-                f"EVENT_DATA_EM[event_data_em{self.id_mgn['event']}]/" \
-                f"IMAGE_C_SET[image_c_set{self.id_mgn['event_img']}]"
+              f"EVENT_DATA_EM[event_data_em{self.id_mgn['event']}]/" \
+              f"IMAGE_C_SET[image_c_set{self.id_mgn['event_img']}]"
         template[f"{trg}/PROCESS[process]/source/type"] = "file"
         template[f"{trg}/PROCESS[process]/source/path"] = self.file_path
         template[f"{trg}/PROCESS[process]/source/checksum"] = self.file_path_sha256
@@ -250,7 +283,7 @@ def normalize_diff_content(self, obj: dict, template: dict) -> dict:
             template[f"{trg}/image_twod/@AXISNAME_indices[axis_{idx_map[dim[0]]}]"] \
                 = np.uint32(dim[1])
             support, unit = get_named_axis(obj["axes"], dim[0])
-            if support is not None and unit is not None and unit.startswith("1 /"):
+            if support is not None and unit is not None:
                 template[f"{trg}/image_twod/axis_{idx_map[dim[0]]}"] \
                     = {"compress": support, "strength": 1}
                 template[f"{trg}/image_twod/axis_{idx_map[dim[0]]}/@long_name"] \
@@ -264,7 +297,45 @@ def normalize_diff_content(self, obj: dict, template: dict) -> dict:
         self.id_mgn["event"] += 1
         return template
 
-    def normalize_eds_content(self, obj: dict, template: dict) -> dict:
+    def normalize_eds_map_content(self, obj: dict, template: dict) -> dict:
+        """Map relevant EDS map to NeXus."""
+        meta = fd.FlatDict(obj["metadata"], "/")
+        dims = get_axes_dims(obj["axes"])
+        if len(dims) != 2:
+            raise ValueError(f"{obj['axes']}")
+        trg = f"/ENTRY[entry{self.entry_id}]/ROI[roi{self.id_mgn['roi']}]/eds/indexing"
+        template[f"{trg}/source"] = meta["General/title"]
+        trg = f"/ENTRY[entry{self.entry_id}]/ROI[roi{self.id_mgn['roi']}]/eds/indexing/" \
+              f"IMAGE_R_SET[image_r_set{self.id_mgn['eds_img']}]"
+        template[f"{trg}/PROCESS[process]/source/type"] = "file"
+        template[f"{trg}/PROCESS[process]/source/path"] = self.file_path
+        template[f"{trg}/PROCESS[process]/source/checksum"] = self.file_path_sha256
+        template[f"{trg}/PROCESS[process]/source/algorithm"] = "SHA256"
+        template[f"{trg}/PROCESS[process]/detector_identifier"] \
+            = f"Check carefully how rsciio/hyperspy knows this {meta['General/title']}!"
+        # template[f"{trg}/description"] = ""
+        # template[f"{trg}/energy_range"] = (0., 0.)
+        # template[f"{trg}/energy_range/@units"] = "keV"
+        # template[f"{trg}/iupac_line_candidates"] = ""
+        template[f"{trg}/image_twod/@NX_class"] = "NXdata"  # TODO::should be autodecorated
+        template[f"{trg}/image_twod/@signal"] = "intensity"
+        template[f"{trg}/image_twod/@axes"] = []
+        for dim in dims:
+            template[f"{trg}/image_twod/@axes"].append(f"axis_{dim[0]}")
+            template[f"{trg}/image_twod/@AXISNAME_indices[axis_{dim[0]}_indices]"] \
+                = np.uint32(dim[1])
+            support, unit = get_named_axis(obj["axes"], dim[0])
+            if support is not None and unit is not None:
+                template[f"{trg}/image_twod/AXISNAME[axis_{dim[0]}]"] \
+                    = {"compress": support, "strength": 1}
+                template[f"{trg}/image_twod/axis_{dim[0]}/@long_name"] \
+                    = f"{dim[0]}-axis position ({unit})"
+        template[f"{trg}/title"] = f"EDS map {meta['General/title']}"
+        template[f"{trg}/image_twod/intensity"] \
+            = {"compress": np.asarray(obj["data"]), "strength": 1}
+        # template[f"{trg}/image_twod/intensity/@long_name"] = f"Signal"
+        self.id_mgn["eds_img"] += 1
+        self.id_mgn["roi"] += 1  # TODO not necessarily has to be incremented!
         return template
 
     def normalize_eels_content(self, obj: dict, template: dict) -> dict:
diff --git a/pynxtools/dataconverter/readers/em/utils/rsciio_hyperspy_utils.py b/pynxtools/dataconverter/readers/em/utils/rsciio_hyperspy_utils.py
index b4aeaeaa4..a0d9b356e 100644
--- a/pynxtools/dataconverter/readers/em/utils/rsciio_hyperspy_utils.py
+++ b/pynxtools/dataconverter/readers/em/utils/rsciio_hyperspy_utils.py
@@ -32,15 +32,13 @@ def get_named_axis(axes_metadata, dim_name):
                         for req in reqs:
                             if req not in axis:
                                 raise ValueError(f"{req} not in {axis}!")
-                        retval = (
-                            np.asarray(
-                                axis["offset"] + (np.linspace(0.,
-                                                              axis["size"] - 1.,
-                                                              num=int(axis["size"]),
-                                                              endpoint=True)
-                                                  * axis["scale"]),
-                                       np.float64),
-                                  axis["units"])
+                        retval = (np.asarray(axis["offset"]
+                                             + (np.linspace(0.,
+                                                            axis["size"] - 1.,
+                                                            num=int(axis["size"]),
+                                                            endpoint=True)
+                                             * axis["scale"]),
+                                             np.float64), axis["units"])
     return retval
 
 

From 1948a475d67a7aa8f982fbf9e92e6e0caf907f76 Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@physik.hu-berlin.de>
Date: Thu, 18 Jan 2024 15:01:12 +0100
Subject: [PATCH 81/84] Added support for y, x, energy, and energy EDS spectra,
 all content from FHI C. Rohner's example is parsed successfully, next steps:
 i) add Velox metadata schema version, ii) add microscope metadata, iii) merge
 PRs

---
 .../readers/em/subparsers/rsciio_velox.py     | 62 +++++++++++++++++++
 .../readers/em/utils/rsciio_hyperspy_utils.py | 15 ++++-
 2 files changed, 74 insertions(+), 3 deletions(-)

diff --git a/pynxtools/dataconverter/readers/em/subparsers/rsciio_velox.py b/pynxtools/dataconverter/readers/em/subparsers/rsciio_velox.py
index 0ae4006f5..005d38c98 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/rsciio_velox.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/rsciio_velox.py
@@ -105,6 +105,8 @@ def tech_partner_to_nexus_normalization(self, template: dict) -> dict:
                 self.normalize_diff_content(obj, template)  # diffraction images
             elif content_type == "eds_map":
                 self.normalize_eds_map_content(obj, template)  # ED(X)S in the TEM
+            elif content_type == "eds_spc":
+                self.normalize_eds_spc_content(obj, template)  # EDS spectrum/(a)
             elif content_type == "eels":
                 self.normalize_eels_content(obj, template)  # electron energy loss spectroscopy
             else:  # == "n/a"
@@ -148,6 +150,7 @@ def content_resolver(self, obj: dict) -> str:
         # all units indicating we are in real or complex i.e. reciprocal space
         if meta["General/title"] in ("EDS"):
             return "eds_spc"
+            # applies to multiple cases, sum spectrum, spectrum stack etc.
 
         for symbol in chemical_symbols[1::]:  # an eds_map
             # TODO::does rosettasciio via hyperspy identify the symbol or is the
@@ -297,6 +300,65 @@ def normalize_diff_content(self, obj: dict, template: dict) -> dict:
         self.id_mgn["event"] += 1
         return template
 
+    def normalize_eds_spc_content(self, obj: dict, template: dict) -> dict:
+        """Map relevant EDS spectrum/(a) to NeXus."""
+        meta = fd.FlatDict(obj["metadata"], "/")
+        dims = get_axes_dims(obj["axes"])
+        n_dims = None
+        if dims == [('Energy', 0)]:
+            n_dims = 1
+        elif dims == [('y', 0), ('x', 1), ('X-ray energy', 2)]:
+            n_dims = 3
+        else:
+            print(f"WARNING eds_spc for {dims} is not implemented!")
+            return
+        trg = f"/ENTRY[entry{self.entry_id}]/measurement/event_data_em_set/" \
+              f"EVENT_DATA_EM[event_data_em{self.id_mgn['event']}]/" \
+              f"SPECTRUM_SET[spectrum_set{self.id_mgn['event_spc']}]"
+        template[f"{trg}/source"] = meta["General/title"]
+        template[f"{trg}/PROCESS[process]/source/type"] = "file"
+        template[f"{trg}/PROCESS[process]/source/path"] = self.file_path
+        template[f"{trg}/PROCESS[process]/source/checksum"] = self.file_path_sha256
+        template[f"{trg}/PROCESS[process]/source/algorithm"] = "SHA256"
+        template[f"{trg}/PROCESS[process]/detector_identifier"] \
+            = f"Check carefully how rsciio/hyperspy knows this {meta['General/title']}!"
+        trg = f"/ENTRY[entry{self.entry_id}]/measurement/event_data_em_set/" \
+              f"EVENT_DATA_EM[event_data_em{self.id_mgn['event']}]/" \
+              f"SPECTRUM_SET[spectrum_set{self.id_mgn['event_spc']}]" \
+              f"DATA[spectrum_zerod]"
+        template[f"{trg}/@NX_class"] = "NXdata"  # TODO::should be autodecorated
+        template[f"{trg}/@signal"] = "intensity"
+        if n_dims == 1:
+            template[f"{trg}/@axes"] = ["axis_energy"]
+            template[f"{trg}/@AXISNAME_indices[axis_energy_indices]"] = np.uint32(0)
+            support, unit = get_named_axis(obj["axes"], "Energy")
+            template[f"{trg}/AXISNAME[axis_energy]"] \
+                = {"compress": support, "strength": 1}
+            template[f"{trg}/AXISNAME[axis_energy]/@long_name"] \
+                = f"Energy ({unit})"
+        if n_dims == 3:
+            template[f"{trg}/@axes"] = ["axis_y", "axis_x", "axis_energy"]
+            template[f"{trg}/@AXISNAME_indices[axis_y_indices]"] = np.uint32(2)
+            template[f"{trg}/@AXISNAME_indices[axis_x_indices]"] = np.uint32(1)
+            template[f"{trg}/@AXISNAME_indices[axis_energy_indices]"] = np.uint32(0)
+            support, unit = get_named_axis(obj["axes"], "y")
+            template[f"{trg}/AXISNAME[axis_y]"] = {"compress": support, "strength": 1}
+            template[f"{trg}/AXISNAME[axis_y]/@long_name"] = f"y-axis position ({unit})"
+            support, unit = get_named_axis(obj["axes"], "x")
+            template[f"{trg}/AXISNAME[axis_x]"] = {"compress": support, "strength": 1}
+            template[f"{trg}/AXISNAME[axis_x]/@long_name"] = f"x-axis position ({unit})"
+            support, unit = get_named_axis(obj["axes"], "X-ray energy")
+            template[f"{trg}/AXISNAME[axis_energy]"] = {"compress": support, "strength": 1}
+            template[f"{trg}/AXISNAME[axis_energy]/@long_name"] = f"Energy ({unit})"
+        # template[f"{trg}/description"] = ""
+        template[f"{trg}/title"] = f"EDS spectrum {meta['General/title']}"
+        template[f"{trg}/intensity"] \
+            = {"compress": np.asarray(obj["data"]), "strength": 1}
+        # template[f"{trg}/intensity/@long_name"] = ""
+        self.id_mgn["event_spc"] += 1
+        self.id_mgn["event"] += 1
+        return template
+
     def normalize_eds_map_content(self, obj: dict, template: dict) -> dict:
         """Map relevant EDS map to NeXus."""
         meta = fd.FlatDict(obj["metadata"], "/")
diff --git a/pynxtools/dataconverter/readers/em/utils/rsciio_hyperspy_utils.py b/pynxtools/dataconverter/readers/em/utils/rsciio_hyperspy_utils.py
index a0d9b356e..7bd8f4e0b 100644
--- a/pynxtools/dataconverter/readers/em/utils/rsciio_hyperspy_utils.py
+++ b/pynxtools/dataconverter/readers/em/utils/rsciio_hyperspy_utils.py
@@ -28,7 +28,9 @@ def get_named_axis(axes_metadata, dim_name):
             if isinstance(axis, dict):
                 if ("name" in axis):
                     if axis["name"] == dim_name:
-                        reqs = ["index_in_array", "offset", "scale", "size", "units", "navigate"]  # "name"
+                        reqs = ["offset", "scale", "size", "units"]
+                        # "index_in_array" and "navigate" are currently not required
+                        # and ignored but might become important
                         for req in reqs:
                             if req not in axis:
                                 raise ValueError(f"{req} not in {axis}!")
@@ -48,8 +50,15 @@ def get_axes_dims(axes_metadata):
     if len(axes_metadata) >= 1:
         for axis in axes_metadata:
             if isinstance(axis, dict):
-                if ("name" in axis) and ("index_in_array" in axis):
-                    retval.append((axis["name"], axis["index_in_array"]))
+                if ("name" in axis):
+                    if "index_in_array" in axis:
+                        retval.append((axis["name"], axis["index_in_array"]))
+                    else:
+                        if len(axes_metadata) == 1:
+                            retval.append((axis["name"], 0))
+                        else:
+                            raise ValueError(f"get_axes_dims {axes_metadata} " \
+                                             f"is a case not implemented!")
     # TODO::it seems that hyperspy sorts this by index_in_array
     return retval
 

From c185901d3c6f0d92d78bd6c07a72356792271521 Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@physik.hu-berlin.de>
Date: Fri, 19 Jan 2024 00:53:41 +0100
Subject: [PATCH 82/84] Added i) schema version, started ii) mapping table for
 microscope metadata

---
 debug/spctrscpy.dev.ipynb                     | 1076 ++++++++++++++++-
 pynxtools/dataconverter/readers/em/reader.py  |    4 +-
 .../readers/em/subparsers/rsciio_velox.py     |   56 +-
 .../em/subparsers/rsciio_velox_concepts.py    |   86 ++
 4 files changed, 1177 insertions(+), 45 deletions(-)
 create mode 100644 pynxtools/dataconverter/readers/em/subparsers/rsciio_velox_concepts.py

diff --git a/debug/spctrscpy.dev.ipynb b/debug/spctrscpy.dev.ipynb
index 51c04c750..5242075eb 100644
--- a/debug/spctrscpy.dev.ipynb
+++ b/debug/spctrscpy.dev.ipynb
@@ -2,11 +2,20 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "id": "6da1aea0-545b-446b-a3d1-1574af72f6c6",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['H', 'He', 'Li', 'Be', 'B', 'C', 'N', 'O', 'F', 'Ne', 'Na', 'Mg', 'Al', 'Si', 'P', 'S', 'Cl', 'Ar', 'K', 'Ca', 'Sc', 'Ti', 'V', 'Cr', 'Mn', 'Fe', 'Co', 'Ni', 'Cu', 'Zn', 'Ga', 'Ge', 'As', 'Se', 'Br', 'Kr', 'Rb', 'Sr', 'Y', 'Zr', 'Nb', 'Mo', 'Tc', 'Ru', 'Rh', 'Pd', 'Ag', 'Cd', 'In', 'Sn', 'Sb', 'Te', 'I', 'Xe', 'Cs', 'Ba', 'La', 'Ce', 'Pr', 'Nd', 'Pm', 'Sm', 'Eu', 'Gd', 'Tb', 'Dy', 'Ho', 'Er', 'Tm', 'Yb', 'Lu', 'Hf', 'Ta', 'W', 'Re', 'Os', 'Ir', 'Pt', 'Au', 'Hg', 'Tl', 'Pb', 'Bi', 'Po', 'At', 'Rn', 'Fr', 'Ra', 'Ac', 'Th', 'Pa', 'U', 'Np', 'Pu', 'Am', 'Cm', 'Bk', 'Cf', 'Es', 'Fm', 'Md', 'No', 'Lr', 'Rf', 'Db', 'Sg', 'Bh', 'Hs', 'Mt', 'Ds', 'Rg', 'Cn', 'Nh', 'Fl', 'Mc', 'Lv', 'Ts', 'Og']\n"
+     ]
+    }
+   ],
    "source": [
+    "import re\n",
     "import numpy as np\n",
     "from rsciio import bruker, emd, digitalmicrograph\n",
     "from jupyterlab_h5web import H5Web\n",
@@ -20,10 +29,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 29,
    "id": "3889c47f-11c4-4bf3-97de-04fc52f0798d",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/fhi/CG71113 1513 HAADF-DF4-DF2-BF 1.2 Mx STEM.emd\n"
+     ]
+    }
+   ],
    "source": [
     "src = \"/home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy\"\n",
     "fnms = [(\"pdi\", \"InGaN_nanowires_spectra.edaxh5\"),\n",
@@ -40,29 +57,57 @@
     "        (\"benedikt\", \"EELS_map_2_ROI_1_location_4.dm3\"),\n",
     "        (\"phillippe\", \"H5OINA_examples_Specimen_1_Map_EDS_+_EBSD_Map_Data_2.h5oina\")]\n",
     "# pyUSID, HSMA\n",
-    "case = 8  # 5 # len(fnms) - 1  # len(fnms) - 1\n",
+    "case = 5  # 5 # len(fnms) - 1  # len(fnms) - 1\n",
     "fnm = f\"{src}/{fnms[case][0]}/{fnms[case][1]}\"\n",
     "print(fnm)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 31,
    "id": "a4d9ef96-3c70-4c12-80ba-ea4a7d716d47",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "application/x-hdf5": "/home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/fhi/CG71113 1513 HAADF-DF4-DF2-BF 1.2 Mx STEM.emd",
+      "text/plain": [
+       "<jupyterlab_h5web.widget.H5Web object>"
+      ]
+     },
+     "execution_count": 31,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# H5Web(fnm)"
+    "H5Web(fnm)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "id": "221abf67-0d88-4088-9cc7-e0d9b85c4699",
    "metadata": {
+    "collapsed": true,
+    "jupyter": {
+     "outputs_hidden": true
+    },
     "tags": []
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "application/x-hdf5": "/home/kaiobach/Research/hu_hu_hu/sprint17/pynx/pynxtools/debug/debug.CG71113 1412 EDS-HAADF-DF4-DF2-BF 4.8 Mx SI.emd.nxs",
+      "text/plain": [
+       "<jupyterlab_h5web.widget.H5Web object>"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "# resulting NeXus artifact\n",
     "# tmp = \"debug.CG71113 1138 Ceta 660 mm Camera.emd.nxs\"\n",
@@ -71,36 +116,1005 @@
     "H5Web(f\"debug.{fnms[case][1]}.nxs\")"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "b2a11a87-b063-4e77-b36c-de0acbf7c7e2",
+   "metadata": {},
+   "source": [
+    "## Collect unique keys from set of files"
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 28,
    "id": "75b32c8f-8efa-4b40-bfc8-6f95300902ea",
    "metadata": {
     "tags": []
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "fpath: /home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/fhi/CG71113 1121 Ceta 310.0 kx Camera.emd, len(objs): 1\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "fpath: /home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/fhi/CG71113 1125 Ceta 1.1 Mx Camera 0001.emd, len(objs): 1\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "fpath: /home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/fhi/CG71113 1125 Ceta 1.1 Mx Camera.emd, len(objs): 1\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "fpath: /home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/fhi/CG71113 1126 Ceta 1.1 Mx Camera.emd, len(objs): 1\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "fpath: /home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/fhi/CG71113 1134 Ceta 660 mm Camera.emd, len(objs): 1\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "fpath: /home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/fhi/CG71113 1138 Ceta 660 mm Camera.emd, len(objs): 1\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "fpath: /home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/fhi/CG71113 1405 HAADF-DF4-DF2-BF 4.8 Mx STEM.emd, len(objs): 4\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "fpath: /home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/fhi/CG71113 1407 HAADF-DF4-DF2-BF 4.8 Mx STEM.emd, len(objs): 4\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "fpath: /home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/fhi/CG71113 1409 HAADF-DF4-DF2-BF 6.7 Mx STEM.emd, len(objs): 4\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "The file contains only one spectrum stream\n",
+      "The file contains only one spectrum stream\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "fpath: /home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/fhi/CG71113 1411 HAADF-DF4-DF2-BF 4.8 Mx STEM.emd, len(objs): 4\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "fpath: /home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/fhi/CG71113 1411 HAADF-DF4-DF2-BF 6.7 Mx STEM.emd, len(objs): 4\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "fpath: /home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/fhi/CG71113 1412 EDS-HAADF-DF4-DF2-BF 4.8 Mx SI.emd, len(objs): 12\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "fpath: /home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/fhi/CG71113 1422 EDS-HAADF-DF4-DF2-BF 1.2 Mx SI.emd, len(objs): 12\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "fpath: /home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/fhi/CG71113 1423 EDS-HAADF-DF4-DF2-BF 1.2 Mx SI.emd, len(objs): 0\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "The file contains only one spectrum stream\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "fpath: /home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/fhi/CG71113 1444 EDS-HAADF-DF4-DF2-BF 595.5 kx SI.emd, len(objs): 10\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "fpath: /home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/fhi/CG71113 1513 HAADF-DF4-DF2-BF 1.2 Mx STEM.emd, len(objs): 4\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "The file contains only one spectrum stream\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "fpath: /home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/fhi/CG71113 1514 EDS-HAADF-DF4-DF2-BF 1.2 Mx SI.emd, len(objs): 11\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "fpath: /home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/fhi/CG71113 1537 HAADF-DF4-DF2-BF 432.2 kx STEM.emd, len(objs): 4\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "Core/MetadataDefinitionVersion, 7.9\n",
+      "Core/MetadataSchemaVersion, v1/2013/07\n",
+      "len(uniq): 1380\n"
+     ]
+    }
+   ],
    "source": [
-    "objs = emd.file_reader(fnm)\n",
-    "print(len(objs))\n",
-    "for idx, obj in enumerate(objs):\n",
-    "    if not isinstance(obj, dict):\n",
-    "        raise ValueError(\"No dict!\")\n",
-    "    print(obj.keys())\n",
-    "    meta = fd.FlatDict(obj[\"metadata\"], \"/\")\n",
-    "    if meta[\"General/title\"] == \"Te\":\n",
-    "        for key, val in obj.items():\n",
-    "            print(f\"{idx}, {key}, {np.shape(val)}\")\n",
-    "        print(f\"\\n\\n{obj['axes']}\")\n",
-    "        print(\"\\n\\n\")\n",
-    "        for key, val in meta.items():\n",
-    "            print(f\"{key}: {val}\")\n",
-    "        orgmeta = fd.FlatDict(obj[\"original_metadata\"], \"/\")\n",
-    "        print(\"\\n\\n\")\n",
-    "        for key, val in orgmeta.items():\n",
-    "            print(f\"{key}: {val}\")\n",
+    "fnms = [\"CG71113 1121 Ceta 310.0 kx Camera.emd\",\n",
+    "        \"CG71113 1125 Ceta 1.1 Mx Camera 0001.emd\",\n",
+    "        \"CG71113 1125 Ceta 1.1 Mx Camera.emd\",\n",
+    "        \"CG71113 1126 Ceta 1.1 Mx Camera.emd\",\n",
+    "        \"CG71113 1134 Ceta 660 mm Camera.emd\",\n",
+    "        \"CG71113 1138 Ceta 660 mm Camera.emd\",\n",
+    "        \"CG71113 1405 HAADF-DF4-DF2-BF 4.8 Mx STEM.emd\",\n",
+    "        \"CG71113 1407 HAADF-DF4-DF2-BF 4.8 Mx STEM.emd\",\n",
+    "        \"CG71113 1409 HAADF-DF4-DF2-BF 6.7 Mx STEM.emd\",\n",
+    "        \"CG71113 1411 HAADF-DF4-DF2-BF 4.8 Mx STEM.emd\",\n",
+    "        \"CG71113 1411 HAADF-DF4-DF2-BF 6.7 Mx STEM.emd\",\n",
+    "        \"CG71113 1412 EDS-HAADF-DF4-DF2-BF 4.8 Mx SI.emd\",\n",
+    "        \"CG71113 1422 EDS-HAADF-DF4-DF2-BF 1.2 Mx SI.emd\",\n",
+    "        \"CG71113 1423 EDS-HAADF-DF4-DF2-BF 1.2 Mx SI.emd\",\n",
+    "        \"CG71113 1444 EDS-HAADF-DF4-DF2-BF 595.5 kx SI.emd\",\n",
+    "        \"CG71113 1513 HAADF-DF4-DF2-BF 1.2 Mx STEM.emd\",\n",
+    "        \"CG71113 1514 EDS-HAADF-DF4-DF2-BF 1.2 Mx SI.emd\",\n",
+    "        \"CG71113 1537 HAADF-DF4-DF2-BF 432.2 kx STEM.emd\"]\n",
+    "\n",
+    "uniq = set()\n",
+    "\n",
+    "for fnm in fnms:\n",
+    "    fpath = f\"{src}/fhi/{fnm}\" \n",
+    "    objs = emd.file_reader(fpath)\n",
+    "    print(f\"fpath: {fpath}, len(objs): {len(objs)}\")\n",
+    "    for idx, obj in enumerate(objs):\n",
+    "        if not isinstance(obj, dict):\n",
+    "            raise ValueError(\"No dict!\")\n",
+    "        # print(obj.keys())\n",
+    "        meta = fd.FlatDict(obj[\"metadata\"], \"/\")\n",
+    "        if True is True:  # meta[\"General/title\"] == \"Te\":\n",
+    "            # for key, val in obj.items():\n",
+    "            #     print(f\"{idx}, {key}, {np.shape(val)}\")\n",
+    "            # print(f\"\\n\\n{obj['axes']}\")\n",
+    "            # print(\"\\n\\n\")\n",
+    "            # for key, val in meta.items():\n",
+    "                # print(f\"{key}: {val}\")\n",
+    "            orgmeta = fd.FlatDict(obj[\"original_metadata\"], \"/\")\n",
+    "            # print(\"\\n\\n\")\n",
+    "            for key, val in orgmeta.items():\n",
+    "                # print(f\"{key}: {val}\")\n",
+    "                if key == \"Core/MetadataDefinitionVersion\" or key == \"Core/MetadataSchemaVersion\":\n",
+    "                    print(f\"{key}, {val}\")\n",
+    "                uniq.add(f\"{key}\")\n",
     "# print(f\"{type(objs[0])}\")\n",
-    "# print(objs[0].keys())"
+    "# print(objs[0].keys())\n",
+    "print(f\"len(uniq): {len(uniq)}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "id": "c5696211-de5b-4b54-a9fa-38dd48048a95",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "rosettasciio                  0.2\n",
+      "hyperspy                      1.7.6\n"
+     ]
+    }
+   ],
+   "source": [
+    "! pip list | grep rosettasciio\n",
+    "! pip list | grep hyperspy"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "id": "acb98c77-d6ec-46d5-abe6-8b45bfa3fb4a",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "len(uniq): 1380\n",
+      "len(hashes_removed): 893\n",
+      "len(detectors_removed): 662\n",
+      "len(apertures_removed): 600\n",
+      "Acquisition/AcquisitionDatetime/DateTime\n",
+      "Acquisition/AcquisitionStartDatetime/DateTime\n",
+      "Acquisition/BeamType\n",
+      "Acquisition/SourceType\n",
+      "BinaryResult/AcquisitionUnit\n",
+      "BinaryResult/BitsPerPixel\n",
+      "BinaryResult/CompositionType\n",
+      "BinaryResult/Detector\n",
+      "BinaryResult/DetectorIndex\n",
+      "BinaryResult/Encoding\n",
+      "BinaryResult/ImageSize/height\n",
+      "BinaryResult/ImageSize/width\n",
+      "BinaryResult/Offset/x\n",
+      "BinaryResult/Offset/y\n",
+      "BinaryResult/PixelSize/height\n",
+      "BinaryResult/PixelSize/width\n",
+      "BinaryResult/PixelUnitX\n",
+      "BinaryResult/PixelUnitY\n",
+      "Core/MetadataDefinitionVersion\n",
+      "Core/MetadataSchemaVersion\n",
+      "Core/guid\n",
+      "CustomProperties/Aperture[C1].Name/type\n",
+      "CustomProperties/Aperture[C1].Name/value\n",
+      "CustomProperties/Aperture[C2].Name/type\n",
+      "CustomProperties/Aperture[C2].Name/value\n",
+      "CustomProperties/Aperture[OBJ].Name/type\n",
+      "CustomProperties/Aperture[OBJ].Name/value\n",
+      "CustomProperties/Aperture[SA].Name/type\n",
+      "CustomProperties/Aperture[SA].Name/value\n",
+      "CustomProperties/Detectors[BM-Ceta].CombinedSubFrames/type\n",
+      "CustomProperties/Detectors[BM-Ceta].CombinedSubFrames/value\n",
+      "CustomProperties/Detectors[BM-Ceta].DigitalGain/type\n",
+      "CustomProperties/Detectors[BM-Ceta].DigitalGain/value\n",
+      "CustomProperties/Detectors[BM-Ceta].FrameID/type\n",
+      "CustomProperties/Detectors[BM-Ceta].FrameID/value\n",
+      "CustomProperties/Detectors[BM-Ceta].MaxPossiblePixelValue/type\n",
+      "CustomProperties/Detectors[BM-Ceta].MaxPossiblePixelValue/value\n",
+      "CustomProperties/Detectors[BM-Ceta].SaturationPoint/type\n",
+      "CustomProperties/Detectors[BM-Ceta].SaturationPoint/value\n",
+      "CustomProperties/Detectors[BM-Ceta].TimeStamp/type\n",
+      "CustomProperties/Detectors[BM-Ceta].TimeStamp/value\n",
+      "CustomProperties/Detectors[BM-Ceta].TransferOK/type\n",
+      "CustomProperties/Detectors[BM-Ceta].TransferOK/value\n",
+      "CustomProperties/Detectors[SuperXG11].IncidentAngle/type\n",
+      "CustomProperties/Detectors[SuperXG11].IncidentAngle/value\n",
+      "CustomProperties/Detectors[SuperXG11].SpectrumBeginEnergy/type\n",
+      "CustomProperties/Detectors[SuperXG11].SpectrumBeginEnergy/value\n",
+      "CustomProperties/Detectors[SuperXG11].Temperature/type\n",
+      "CustomProperties/Detectors[SuperXG11].Temperature/value\n",
+      "CustomProperties/Detectors[SuperXG12].IncidentAngle/type\n",
+      "CustomProperties/Detectors[SuperXG12].IncidentAngle/value\n",
+      "CustomProperties/Detectors[SuperXG12].SpectrumBeginEnergy/type\n",
+      "CustomProperties/Detectors[SuperXG12].SpectrumBeginEnergy/value\n",
+      "CustomProperties/Detectors[SuperXG12].Temperature/type\n",
+      "CustomProperties/Detectors[SuperXG12].Temperature/value\n",
+      "CustomProperties/Detectors[SuperXG13].IncidentAngle/type\n",
+      "CustomProperties/Detectors[SuperXG13].IncidentAngle/value\n",
+      "CustomProperties/Detectors[SuperXG13].SpectrumBeginEnergy/type\n",
+      "CustomProperties/Detectors[SuperXG13].SpectrumBeginEnergy/value\n",
+      "CustomProperties/Detectors[SuperXG13].Temperature/type\n",
+      "CustomProperties/Detectors[SuperXG13].Temperature/value\n",
+      "CustomProperties/Detectors[SuperXG14].IncidentAngle/type\n",
+      "CustomProperties/Detectors[SuperXG14].IncidentAngle/value\n",
+      "CustomProperties/Detectors[SuperXG14].SpectrumBeginEnergy/type\n",
+      "CustomProperties/Detectors[SuperXG14].SpectrumBeginEnergy/value\n",
+      "CustomProperties/Detectors[SuperXG14].Temperature/type\n",
+      "CustomProperties/Detectors[SuperXG14].Temperature/value\n",
+      "CustomProperties/IntegratedFrameCount/type\n",
+      "CustomProperties/IntegratedFrameCount/value\n",
+      "CustomProperties/MaxPossiblePixelValue/type\n",
+      "CustomProperties/MaxPossiblePixelValue/value\n",
+      "CustomProperties/RecodeFactor/type\n",
+      "CustomProperties/RecodeFactor/value\n",
+      "CustomProperties/Scan.ScanTransformation.A11/type\n",
+      "CustomProperties/Scan.ScanTransformation.A11/value\n",
+      "CustomProperties/Scan.ScanTransformation.A12/type\n",
+      "CustomProperties/Scan.ScanTransformation.A12/value\n",
+      "CustomProperties/Scan.ScanTransformation.A13/type\n",
+      "CustomProperties/Scan.ScanTransformation.A13/value\n",
+      "CustomProperties/Scan.ScanTransformation.A21/type\n",
+      "CustomProperties/Scan.ScanTransformation.A21/value\n",
+      "CustomProperties/Scan.ScanTransformation.A22/type\n",
+      "CustomProperties/Scan.ScanTransformation.A22/value\n",
+      "CustomProperties/Scan.ScanTransformation.A23/type\n",
+      "CustomProperties/Scan.ScanTransformation.A23/value\n",
+      "CustomProperties/StemMagnification/type\n",
+      "CustomProperties/StemMagnification/value\n",
+      "DetectorMetadata\n",
+      "DetectorMetadata/CollectionAngleRange/begin\n",
+      "DetectorMetadata/CollectionAngleRange/end\n",
+      "DetectorMetadata/DetectorName\n",
+      "DetectorMetadata/DetectorType\n",
+      "DetectorMetadata/Enabled\n",
+      "DetectorMetadata/Gain\n",
+      "DetectorMetadata/Inserted\n",
+      "DetectorMetadata/Offset\n",
+      "Detectors/Detector-*/AnalyticalDetectorShutterState\n",
+      "Detectors/Detector-*/AzimuthAngle\n",
+      "Detectors/Detector-*/BeginEnergy\n",
+      "Detectors/Detector-*/Binning/height\n",
+      "Detectors/Detector-*/Binning/width\n",
+      "Detectors/Detector-*/CollectionAngle\n",
+      "Detectors/Detector-*/CollectionAngleRange/begin\n",
+      "Detectors/Detector-*/CollectionAngleRange/end\n",
+      "Detectors/Detector-*/DarkGainCorrectionType\n",
+      "Detectors/Detector-*/DetectorName\n",
+      "Detectors/Detector-*/DetectorType\n",
+      "Detectors/Detector-*/Dispersion\n",
+      "Detectors/Detector-*/ElectronicsNoise\n",
+      "Detectors/Detector-*/ElevationAngle\n",
+      "Detectors/Detector-*/Enabled\n",
+      "Detectors/Detector-*/ExposureMode\n",
+      "Detectors/Detector-*/ExposureTime\n",
+      "Detectors/Detector-*/Gain\n",
+      "Detectors/Detector-*/InputCountRate\n",
+      "Detectors/Detector-*/Inserted\n",
+      "Detectors/Detector-*/LiveTime\n",
+      "Detectors/Detector-*/Offset\n",
+      "Detectors/Detector-*/OffsetEnergy\n",
+      "Detectors/Detector-*/OutputCountRate\n",
+      "Detectors/Detector-*/PulseProcessTime\n",
+      "Detectors/Detector-*/ReadOutArea/bottom\n",
+      "Detectors/Detector-*/ReadOutArea/left\n",
+      "Detectors/Detector-*/ReadOutArea/right\n",
+      "Detectors/Detector-*/ReadOutArea/top\n",
+      "Detectors/Detector-*/RealTime\n",
+      "Detectors/Detector-*/Shutters/Shutter-0/Position\n",
+      "Detectors/Detector-*/Shutters/Shutter-0/Type\n",
+      "EnergyFilter/EntranceApertureType\n",
+      "GasInjectionSystems\n",
+      "ImportedDataParameter/First_frame\n",
+      "ImportedDataParameter/Last_frame\n",
+      "ImportedDataParameter/Number_of_channels\n",
+      "ImportedDataParameter/Number_of_frames\n",
+      "ImportedDataParameter/Rebin_energy\n",
+      "Instrument/ComputerName\n",
+      "Instrument/ControlSoftwareVersion\n",
+      "Instrument/InstrumentClass\n",
+      "Instrument/InstrumentId\n",
+      "Instrument/InstrumentModel\n",
+      "Instrument/Manufacturer\n",
+      "Operations/CameraInputOperation/*/cameraName\n",
+      "Operations/CameraInputOperation/*/dataPath\n",
+      "Operations/CameraInputOperation/*/outputs\n",
+      "Operations/DisplayLevelsOperation/*/outputs\n",
+      "Operations/EDSInputOperation/*/dataPath\n",
+      "Operations/EDSInputOperation/*/detector\n",
+      "Operations/EDSInputOperation/*/outputs\n",
+      "Operations/ImageQuantificationOperation/*/absorptionCorrection/calibrationError\n",
+      "Operations/ImageQuantificationOperation/*/absorptionCorrection/density\n",
+      "Operations/ImageQuantificationOperation/*/absorptionCorrection/sampleThickness\n",
+      "Operations/ImageQuantificationOperation/*/absorptionCorrection/useAbsorptionCorrection\n",
+      "Operations/ImageQuantificationOperation/*/absorptionCorrection/useDensity\n",
+      "Operations/ImageQuantificationOperation/*/backgroundCorrection/backgroundModel\n",
+      "Operations/ImageQuantificationOperation/*/backgroundCorrection/backgroundModelModeled/useBackgroundWindows\n",
+      "Operations/ImageQuantificationOperation/*/backgroundCorrection/backgroundModelMultiPoly/backgroundOrder\n",
+      "Operations/ImageQuantificationOperation/*/backgroundCorrection/backgroundModelMultiPoly/peakOrder\n",
+      "Operations/ImageQuantificationOperation/*/backgroundCorrection/backgroundModelMultiPoly/useBackgroundWindows\n",
+      "Operations/ImageQuantificationOperation/*/backgroundCorrection/backgroundWindows\n",
+      "Operations/ImageQuantificationOperation/*/backgroundCorrection/useManualBackgroundWindows\n",
+      "Operations/ImageQuantificationOperation/*/colormixSelection\n",
+      "Operations/ImageQuantificationOperation/*/dataPath\n",
+      "Operations/ImageQuantificationOperation/*/elementProperties\n",
+      "Operations/ImageQuantificationOperation/*/elementSelection\n",
+      "Operations/ImageQuantificationOperation/*/elementsIdentified\n",
+      "Operations/ImageQuantificationOperation/*/ionizationCrossSectionModel\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Acquisition/AcquisitionDatetime/DateTime\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Acquisition/AcquisitionStartDatetime/DateTime\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Acquisition/BeamType\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Acquisition/SourceType\n",
+      "Operations/ImageQuantificationOperation/*/metadata/BinaryResult/AcquisitionUnit\n",
+      "Operations/ImageQuantificationOperation/*/metadata/BinaryResult/CompositionType\n",
+      "Operations/ImageQuantificationOperation/*/metadata/BinaryResult/Detector\n",
+      "Operations/ImageQuantificationOperation/*/metadata/BinaryResult/Encoding\n",
+      "Operations/ImageQuantificationOperation/*/metadata/BinaryResult/Offset/x\n",
+      "Operations/ImageQuantificationOperation/*/metadata/BinaryResult/Offset/y\n",
+      "Operations/ImageQuantificationOperation/*/metadata/BinaryResult/PixelSize/height\n",
+      "Operations/ImageQuantificationOperation/*/metadata/BinaryResult/PixelSize/width\n",
+      "Operations/ImageQuantificationOperation/*/metadata/BinaryResult/PixelUnitX\n",
+      "Operations/ImageQuantificationOperation/*/metadata/BinaryResult/PixelUnitY\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Core/MetadataDefinitionVersion\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Core/MetadataSchemaVersion\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Core/guid\n",
+      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Aperture[C1].Name/type\n",
+      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Aperture[C1].Name/value\n",
+      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Aperture[C2].Name/type\n",
+      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Aperture[C2].Name/value\n",
+      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Aperture[OBJ].Name/type\n",
+      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Aperture[OBJ].Name/value\n",
+      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Aperture[SA].Name/type\n",
+      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Aperture[SA].Name/value\n",
+      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG11].IncidentAngle/type\n",
+      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG11].IncidentAngle/value\n",
+      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG11].SpectrumBeginEnergy/type\n",
+      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG11].SpectrumBeginEnergy/value\n",
+      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG11].Temperature/type\n",
+      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG11].Temperature/value\n",
+      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG12].IncidentAngle/type\n",
+      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG12].IncidentAngle/value\n",
+      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG12].SpectrumBeginEnergy/type\n",
+      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG12].SpectrumBeginEnergy/value\n",
+      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG12].Temperature/type\n",
+      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG12].Temperature/value\n",
+      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG13].IncidentAngle/type\n",
+      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG13].IncidentAngle/value\n",
+      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG13].SpectrumBeginEnergy/type\n",
+      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG13].SpectrumBeginEnergy/value\n",
+      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG13].Temperature/type\n",
+      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG13].Temperature/value\n",
+      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG14].IncidentAngle/type\n",
+      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG14].IncidentAngle/value\n",
+      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG14].SpectrumBeginEnergy/type\n",
+      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG14].SpectrumBeginEnergy/value\n",
+      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG14].Temperature/type\n",
+      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG14].Temperature/value\n",
+      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/IntegratedFrameCount/type\n",
+      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/IntegratedFrameCount/value\n",
+      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Scan.ScanTransformation.A11/type\n",
+      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Scan.ScanTransformation.A11/value\n",
+      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Scan.ScanTransformation.A12/type\n",
+      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Scan.ScanTransformation.A12/value\n",
+      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Scan.ScanTransformation.A13/type\n",
+      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Scan.ScanTransformation.A13/value\n",
+      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Scan.ScanTransformation.A21/type\n",
+      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Scan.ScanTransformation.A21/value\n",
+      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Scan.ScanTransformation.A22/type\n",
+      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Scan.ScanTransformation.A22/value\n",
+      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Scan.ScanTransformation.A23/type\n",
+      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Scan.ScanTransformation.A23/value\n",
+      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/StemMagnification/type\n",
+      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/StemMagnification/value\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/AnalyticalDetectorShutterState\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/AzimuthAngle\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/BeginEnergy\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/Binning/height\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/Binning/width\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/CollectionAngle\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/CollectionAngleRange/begin\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/CollectionAngleRange/end\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/DarkGainCorrectionType\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/DetectorName\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/DetectorType\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/Dispersion\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/ElectronicsNoise\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/ElevationAngle\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/Enabled\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/ExposureMode\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/ExposureTime\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/Gain\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/InputCountRate\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/Inserted\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/LiveTime\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/Offset\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/OffsetEnergy\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/OutputCountRate\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/PulseProcessTime\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/ReadOutArea/bottom\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/ReadOutArea/left\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/ReadOutArea/right\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/ReadOutArea/top\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/RealTime\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/Shutters/Shutter-0/Position\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/Shutters/Shutter-0/Type\n",
+      "Operations/ImageQuantificationOperation/*/metadata/EnergyFilter/EntranceApertureType\n",
+      "Operations/ImageQuantificationOperation/*/metadata/GasInjectionSystems\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Instrument/ComputerName\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Instrument/ControlSoftwareVersion\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Instrument/InstrumentClass\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Instrument/InstrumentId\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Instrument/InstrumentModel\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Instrument/Manufacturer\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Optics/AccelerationVoltage\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Optics/Apertures/Aperture-*/Diameter\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Optics/Apertures/Aperture-*/Enabled\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Optics/Apertures/Aperture-*/MechanismType\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Optics/Apertures/Aperture-*/Name\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Optics/Apertures/Aperture-*/Number\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Optics/Apertures/Aperture-*/PositionOffset/x\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Optics/Apertures/Aperture-*/PositionOffset/y\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Optics/Apertures/Aperture-*/Type\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Optics/BeamConvergence\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Optics/C1LensIntensity\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Optics/C2LensIntensity\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Optics/CameraLength\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Optics/Defocus\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Optics/DiffractionLensIntensity\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Optics/EFTEMOn\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Optics/ExtractorVoltage\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Optics/Focus\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Optics/FullScanFieldOfView/x\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Optics/FullScanFieldOfView/y\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Optics/GunLensSetting\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Optics/HighMagnificationMode\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Optics/IlluminationMode\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Optics/IntermediateLensIntensity\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Optics/LastMeasuredScreenCurrent\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Optics/MiniCondenserLensIntensity\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Optics/ObjectiveLensIntensity\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Optics/ObjectiveLensMode\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Optics/OperatingMode\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Optics/ProbeMode\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Optics/Projector1LensIntensity\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Optics/Projector2LensIntensity\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Optics/ProjectorMode\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Optics/ScreenCurrent\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Optics/SpotIndex\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Optics/StemFocus\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Optics/TemOperatingSubMode\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Sample\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Scan/DwellTime\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Scan/FrameTime\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Scan/LineIntegrationCount\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Scan/LineInterlacing\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Scan/LineTime\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Scan/MainsLockOn\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Scan/ScanArea/bottom\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Scan/ScanArea/left\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Scan/ScanArea/right\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Scan/ScanArea/top\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Scan/ScanRotation\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Scan/ScanSize/height\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Scan/ScanSize/width\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Stage/AlphaTilt\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Stage/BetaTilt\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Stage/HolderType\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Stage/Position/x\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Stage/Position/y\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Stage/Position/z\n",
+      "Operations/ImageQuantificationOperation/*/metadata/Vacuum/VacuumMode\n",
+      "Operations/ImageQuantificationOperation/*/outputs\n",
+      "Operations/ImageQuantificationOperation/*/quantificationKernelFilter\n",
+      "Operations/ImageQuantificationOperation/*/quantificationKernelFilterSigma\n",
+      "Operations/ImageQuantificationOperation/*/quantificationKernelFilterSize\n",
+      "Operations/ImageQuantificationOperation/*/quantificationMode\n",
+      "Operations/ImageQuantificationOperation/*/quantificationRadialFilterEdge\n",
+      "Operations/ImageQuantificationOperation/*/quantificationWienerFilterFrequency\n",
+      "Operations/ImageQuantificationOperation/*/spectralFilter\n",
+      "Operations/ImageQuantificationOperation/*/spectralFilterSigma\n",
+      "Operations/ImageQuantificationOperation/*/spectralFilterSize\n",
+      "Operations/ImageQuantificationOperation/*/useOptimizedCalibration\n",
+      "Operations/ImageQuantificationOperation/*/useQuantificationKernelFilter\n",
+      "Operations/ImageQuantificationOperation/*/useSpectralFilter\n",
+      "Operations/IntegrationRectangleOperation/*/dataPath\n",
+      "Operations/IntegrationRectangleOperation/*/outputs\n",
+      "Operations/IntensityProfileOperation/*/dataPath\n",
+      "Operations/IntensityProfileOperation/*/outputs\n",
+      "Operations/MixOperation/*/blend\n",
+      "Operations/MixOperation/*/outputs\n",
+      "Operations/Operations/operations\n",
+      "Operations/SpectrumQuantificationOperation/*/absorptionCorrection/calibrationError\n",
+      "Operations/SpectrumQuantificationOperation/*/absorptionCorrection/density\n",
+      "Operations/SpectrumQuantificationOperation/*/absorptionCorrection/sampleThickness\n",
+      "Operations/SpectrumQuantificationOperation/*/absorptionCorrection/useAbsorptionCorrection\n",
+      "Operations/SpectrumQuantificationOperation/*/absorptionCorrection/useDensity\n",
+      "Operations/SpectrumQuantificationOperation/*/backgroundCorrection/backgroundModel\n",
+      "Operations/SpectrumQuantificationOperation/*/backgroundCorrection/backgroundModelModeled/useBackgroundWindows\n",
+      "Operations/SpectrumQuantificationOperation/*/backgroundCorrection/backgroundModelMultiPoly/backgroundOrder\n",
+      "Operations/SpectrumQuantificationOperation/*/backgroundCorrection/backgroundModelMultiPoly/peakOrder\n",
+      "Operations/SpectrumQuantificationOperation/*/backgroundCorrection/backgroundModelMultiPoly/useBackgroundWindows\n",
+      "Operations/SpectrumQuantificationOperation/*/backgroundCorrection/backgroundWindows\n",
+      "Operations/SpectrumQuantificationOperation/*/backgroundCorrection/useManualBackgroundWindows\n",
+      "Operations/SpectrumQuantificationOperation/*/dataPath\n",
+      "Operations/SpectrumQuantificationOperation/*/elementProperties\n",
+      "Operations/SpectrumQuantificationOperation/*/elementSelection\n",
+      "Operations/SpectrumQuantificationOperation/*/elementsIdentified\n",
+      "Operations/SpectrumQuantificationOperation/*/ionizationCrossSectionModel\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Acquisition/AcquisitionDatetime/DateTime\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Acquisition/AcquisitionStartDatetime/DateTime\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Acquisition/BeamType\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Acquisition/SourceType\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/BinaryResult/AcquisitionUnit\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/BinaryResult/CompositionType\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/BinaryResult/Detector\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/BinaryResult/Encoding\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/BinaryResult/Offset/x\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/BinaryResult/Offset/y\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/BinaryResult/PixelSize/height\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/BinaryResult/PixelSize/width\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/BinaryResult/PixelUnitX\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/BinaryResult/PixelUnitY\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Core/MetadataDefinitionVersion\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Core/MetadataSchemaVersion\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Core/guid\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Aperture[C1].Name/type\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Aperture[C1].Name/value\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Aperture[C2].Name/type\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Aperture[C2].Name/value\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Aperture[OBJ].Name/type\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Aperture[OBJ].Name/value\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Aperture[SA].Name/type\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Aperture[SA].Name/value\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG11].IncidentAngle/type\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG11].IncidentAngle/value\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG11].SpectrumBeginEnergy/type\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG11].SpectrumBeginEnergy/value\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG11].Temperature/type\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG11].Temperature/value\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG12].IncidentAngle/type\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG12].IncidentAngle/value\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG12].SpectrumBeginEnergy/type\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG12].SpectrumBeginEnergy/value\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG12].Temperature/type\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG12].Temperature/value\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG13].IncidentAngle/type\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG13].IncidentAngle/value\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG13].SpectrumBeginEnergy/type\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG13].SpectrumBeginEnergy/value\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG13].Temperature/type\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG13].Temperature/value\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG14].IncidentAngle/type\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG14].IncidentAngle/value\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG14].SpectrumBeginEnergy/type\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG14].SpectrumBeginEnergy/value\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG14].Temperature/type\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG14].Temperature/value\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/IntegratedFrameCount/type\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/IntegratedFrameCount/value\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Scan.ScanTransformation.A11/type\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Scan.ScanTransformation.A11/value\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Scan.ScanTransformation.A12/type\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Scan.ScanTransformation.A12/value\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Scan.ScanTransformation.A13/type\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Scan.ScanTransformation.A13/value\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Scan.ScanTransformation.A21/type\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Scan.ScanTransformation.A21/value\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Scan.ScanTransformation.A22/type\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Scan.ScanTransformation.A22/value\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Scan.ScanTransformation.A23/type\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Scan.ScanTransformation.A23/value\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/StemMagnification/type\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/StemMagnification/value\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Velox.IntegrationShape.Area/type\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Velox.IntegrationShape.Area/value\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Velox.Plot.Label/type\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Velox.Plot.Label/value\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/AnalyticalDetectorShutterState\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/AzimuthAngle\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/BeginEnergy\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/Binning/height\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/Binning/width\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/CollectionAngle\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/CollectionAngleRange/begin\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/CollectionAngleRange/end\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/DarkGainCorrectionType\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/DetectorName\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/DetectorType\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/Dispersion\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/ElectronicsNoise\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/ElevationAngle\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/Enabled\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/ExposureMode\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/ExposureTime\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/Gain\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/InputCountRate\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/Inserted\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/LiveTime\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/Offset\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/OffsetEnergy\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/OutputCountRate\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/PulseProcessTime\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/ReadOutArea/bottom\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/ReadOutArea/left\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/ReadOutArea/right\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/ReadOutArea/top\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/RealTime\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/Shutters/Shutter-0/Position\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/Shutters/Shutter-0/Type\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/EnergyFilter/EntranceApertureType\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/GasInjectionSystems\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Instrument/ComputerName\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Instrument/ControlSoftwareVersion\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Instrument/InstrumentClass\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Instrument/InstrumentId\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Instrument/InstrumentModel\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Instrument/Manufacturer\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/AccelerationVoltage\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/Apertures/Aperture-*/Diameter\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/Apertures/Aperture-*/Enabled\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/Apertures/Aperture-*/MechanismType\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/Apertures/Aperture-*/Name\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/Apertures/Aperture-*/Number\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/Apertures/Aperture-*/PositionOffset/x\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/Apertures/Aperture-*/PositionOffset/y\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/Apertures/Aperture-*/Type\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/BeamConvergence\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/C1LensIntensity\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/C2LensIntensity\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/CameraLength\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/Defocus\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/DiffractionLensIntensity\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/EFTEMOn\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/ExtractorVoltage\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/Focus\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/FullScanFieldOfView/x\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/FullScanFieldOfView/y\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/GunLensSetting\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/HighMagnificationMode\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/IlluminationMode\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/IntermediateLensIntensity\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/LastMeasuredScreenCurrent\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/MiniCondenserLensIntensity\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/ObjectiveLensIntensity\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/ObjectiveLensMode\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/OperatingMode\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/ProbeMode\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/Projector1LensIntensity\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/Projector2LensIntensity\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/ProjectorMode\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/ScreenCurrent\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/SpotIndex\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/StemFocus\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/TemOperatingSubMode\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Sample\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Scan/DwellTime\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Scan/FrameTime\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Scan/LineIntegrationCount\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Scan/LineInterlacing\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Scan/LineTime\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Scan/MainsLockOn\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Scan/ScanArea/bottom\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Scan/ScanArea/left\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Scan/ScanArea/right\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Scan/ScanArea/top\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Scan/ScanRotation\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Scan/ScanSize/height\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Scan/ScanSize/width\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Stage/AlphaTilt\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Stage/BetaTilt\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Stage/HolderType\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Stage/Position/x\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Stage/Position/y\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Stage/Position/z\n",
+      "Operations/SpectrumQuantificationOperation/*/metadata/Vacuum/VacuumMode\n",
+      "Operations/SpectrumQuantificationOperation/*/outputs\n",
+      "Operations/SpectrumQuantificationOperation/*/useOptimizedCalibration\n",
+      "Operations/StemInputOperation/*/dataPath\n",
+      "Operations/StemInputOperation/*/detector\n",
+      "Operations/StemInputOperation/*/detectorInfo/name\n",
+      "Operations/StemInputOperation/*/detectorInfo/segments\n",
+      "Operations/StemInputOperation/*/outputs\n",
+      "Operations/StemInputOperation/*/scanArea\n",
+      "Optics/AccelerationVoltage\n",
+      "Optics/Apertures/Aperture-*/Diameter\n",
+      "Optics/Apertures/Aperture-*/Enabled\n",
+      "Optics/Apertures/Aperture-*/MechanismType\n",
+      "Optics/Apertures/Aperture-*/Name\n",
+      "Optics/Apertures/Aperture-*/Number\n",
+      "Optics/Apertures/Aperture-*/PositionOffset/x\n",
+      "Optics/Apertures/Aperture-*/PositionOffset/y\n",
+      "Optics/Apertures/Aperture-*/Type\n",
+      "Optics/BeamConvergence\n",
+      "Optics/C1LensIntensity\n",
+      "Optics/C2LensIntensity\n",
+      "Optics/CameraLength\n",
+      "Optics/Defocus\n",
+      "Optics/DiffractionLensIntensity\n",
+      "Optics/EFTEMOn\n",
+      "Optics/ExtractorVoltage\n",
+      "Optics/Focus\n",
+      "Optics/FullScanFieldOfView/x\n",
+      "Optics/FullScanFieldOfView/y\n",
+      "Optics/GunLensSetting\n",
+      "Optics/HighMagnificationMode\n",
+      "Optics/IlluminationMode\n",
+      "Optics/IntermediateLensIntensity\n",
+      "Optics/LastMeasuredScreenCurrent\n",
+      "Optics/MiniCondenserLensIntensity\n",
+      "Optics/NominalMagnification\n",
+      "Optics/ObjectiveLensIntensity\n",
+      "Optics/ObjectiveLensMode\n",
+      "Optics/OperatingMode\n",
+      "Optics/ProbeMode\n",
+      "Optics/Projector1LensIntensity\n",
+      "Optics/Projector2LensIntensity\n",
+      "Optics/ProjectorMode\n",
+      "Optics/ScreenCurrent\n",
+      "Optics/SpotIndex\n",
+      "Optics/StemFocus\n",
+      "Optics/TemOperatingSubMode\n",
+      "Sample\n",
+      "Scan/DwellTime\n",
+      "Scan/FrameTime\n",
+      "Scan/LineIntegrationCount\n",
+      "Scan/LineInterlacing\n",
+      "Scan/LineTime\n",
+      "Scan/MainsLockOn\n",
+      "Scan/ScanArea/bottom\n",
+      "Scan/ScanArea/left\n",
+      "Scan/ScanArea/right\n",
+      "Scan/ScanArea/top\n",
+      "Scan/ScanRotation\n",
+      "Scan/ScanSize/height\n",
+      "Scan/ScanSize/width\n",
+      "Stage/AlphaTilt\n",
+      "Stage/BetaTilt\n",
+      "Stage/HolderType\n",
+      "Stage/Position/x\n",
+      "Stage/Position/y\n",
+      "Stage/Position/z\n",
+      "Vacuum/VacuumMode\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(f\"len(uniq): {len(uniq)}\")\n",
+    "hashes_removed = set()\n",
+    "for entry in uniq:\n",
+    "    where = re.findall(r\"([0-9a-f]{32})\", entry)\n",
+    "    if where != []:\n",
+    "        if len(where) == 1:\n",
+    "            # print(f\"{entry} >>>> {entry.replace(where[0], '*')}\")\n",
+    "            hashes_removed.add(entry.replace(where[0], '*'))            \n",
+    "        else:\n",
+    "            raise ValueError(\"len(where) > 1 !\")        \n",
+    "    else:\n",
+    "        hashes_removed.add(entry)\n",
+    "print(f\"len(hashes_removed): {len(hashes_removed)}\")\n",
+    "\n",
+    "detectors_removed = set()\n",
+    "for entry in hashes_removed:\n",
+    "    where = re.findall(r\"(Detector-[0-9]+)\", entry)\n",
+    "    if where != []:\n",
+    "        if len(where) == 1:\n",
+    "            detectors_removed.add(entry.replace(where[0], 'Detector-*'))            \n",
+    "        else:\n",
+    "            raise ValueError(\"len(where) > 1 !\")        \n",
+    "    else:\n",
+    "        detectors_removed.add(entry)\n",
+    "print(f\"len(detectors_removed): {len(detectors_removed)}\")\n",
+    "\n",
+    "apertures_removed = set()\n",
+    "for entry in detectors_removed:\n",
+    "    where = re.findall(r\"(Aperture-[0-9]+)\", entry)\n",
+    "    if where != []:\n",
+    "        if len(where) == 1:\n",
+    "            apertures_removed.add(entry.replace(where[0], 'Aperture-*'))            \n",
+    "        else:\n",
+    "            raise ValueError(\"len(where) > 1 !\")        \n",
+    "    else:\n",
+    "        apertures_removed.add(entry)\n",
+    "print(f\"len(apertures_removed): {len(apertures_removed)}\")\n",
+    "\n",
+    "for entry in sorted(list(apertures_removed)):\n",
+    "    print(entry)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "a2f08153-3651-4dce-aa45-62bb7c645da2",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['Detector-11']\n"
+     ]
+    }
+   ],
+   "source": [
+    "token = \"Detectors/Detector-11/CollectionAngleRange/begin\"\n",
+    "where = re.findall(r\"(Detector-[0-9]+)\", token)\n",
+    "print(where)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e83d303f-2521-4078-b9a7-d3b09e52cfb9",
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
diff --git a/pynxtools/dataconverter/readers/em/reader.py b/pynxtools/dataconverter/readers/em/reader.py
index 72c33bd5e..6f1f802a5 100644
--- a/pynxtools/dataconverter/readers/em/reader.py
+++ b/pynxtools/dataconverter/readers/em/reader.py
@@ -136,8 +136,8 @@ def read(self,
         # TODO::check correct loop through!
 
         # sub_parser = "velox_emd"
-        subparser = RsciioVeloxSubParser(entry_id, file_paths[0])
-        subparser.parse(template, verbose=True)
+        subparser = RsciioVeloxSubParser(entry_id, file_paths[0], verbose=True)
+        subparser.parse(template)
 
         # for dat_instance in case.dat_parser_type:
         #     print(f"Process pieces of information in {dat_instance} tech partner file...")
diff --git a/pynxtools/dataconverter/readers/em/subparsers/rsciio_velox.py b/pynxtools/dataconverter/readers/em/subparsers/rsciio_velox.py
index 005d38c98..78c36b29c 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/rsciio_velox.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/rsciio_velox.py
@@ -33,10 +33,19 @@
 REAL_SPACE = 0
 COMPLEX_SPACE = 1
 
+def all_req_keywords_in_dict(dct: dict, keywords: list) -> bool:
+    """Check if dict dct has all keywords in keywords as keys from."""
+    # falsifiable?
+    for key in keywords:
+        if key in dct:
+            continue
+        return False
+    return True
+
 
 class RsciioVeloxSubParser(RsciioBaseParser):
     """Read Velox EMD File Format emd."""
-    def __init__(self, entry_id: int = 1, file_path: str = ""):
+    def __init__(self, entry_id: int = 1, file_path: str = "", verbose=False):
         super().__init__(file_path)
         if entry_id > 0:
             self.entry_id = entry_id
@@ -49,9 +58,13 @@ def __init__(self, entry_id: int = 1, file_path: str = ""):
                              "eds_img": 1}
         self.file_path_sha256 = None
         self.tmp: Dict = {}
-        self.supported_version: Dict = {}
-        self.version: Dict = {}
-        self.supported = False
+        self.supported_version: Dict = {"Core/MetadataDefinitionVersion": ["7.9"],
+                                        "Core/MetadataSchemaVersion": ["v1/2013/07"]}
+        self.version: Dict = {"Core/MetadataDefinitionVersion": None,
+                              "Core/MetadataSchemaVersion": None}
+        self.obj_idx_supported = []
+        self.supported = True
+        self.verbose = verbose
         self.check_if_supported()
 
     def check_if_supported(self):
@@ -62,15 +75,37 @@ def check_if_supported(self):
             # only the collection of the concepts without the actual instance data
             # based on this one could then plan how much memory has to be reserved
             # in the template and stream out accordingly
+
             with open(self.file_path, "rb", 0) as fp:
                 self.file_path_sha256 = get_sha256_of_file_content(fp)
 
             print(f"Parsing {self.file_path} with SHA256 {self.file_path_sha256} ...")
-            self.supported = True
+
+            reqs = ["data", "axes", "metadata", "original_metadata", "mapping"]
+            for idx, obj in enumerate(self.objs):
+                if not isinstance(obj, dict):
+                    continue
+                if all_req_keywords_in_dict(obj, reqs) == False:
+                    continue
+                orgmeta = fd.FlatDict(obj["original_metadata"], "/")  # could be optimized
+                if "Core/MetadataDefinitionVersion" in orgmeta:
+                    if orgmeta["Core/MetadataDefinitionVersion"] not in \
+                            self.supported_version["Core/MetadataDefinitionVersion"]:
+                        continue
+                    if orgmeta["Core/MetadataSchemaVersion"] not in \
+                            self.supported_version["Core/MetadataSchemaVersion"]:
+                        continue
+                self.obj_idx_supported.append(idx)
+                if self.verbose == True:
+                    print(f"{idx}-th obj is supported")
+            if len(self.obj_idx_supported) > 0:  # there is at least some supported content
+                self.supported = True
+            else:
+                print(f"WARNING {self.file_path} has not supported content !")
         except IOError:
             print(f"Loading {self.file_path} using {self.__name__} is not supported !")
 
-    def parse(self, template: dict, verbose=False) -> dict:
+    def parse(self, template: dict) -> dict:
         """Perform actual parsing filling cache self.tmp."""
         if self.supported is True:
             self.tech_partner_to_nexus_normalization(template)
@@ -85,16 +120,13 @@ def tech_partner_to_nexus_normalization(self, template: dict) -> dict:
         for idx, obj in enumerate(self.objs):
             if not isinstance(obj, dict):
                 continue
-            parse = True
-            for req in reqs:
-                if req not in obj:
-                    parse = False
-            if parse is False:
+            if all_req_keywords_in_dict(obj, reqs) == False:
                 continue
 
             content_type = self.content_resolver(obj)
             print(f"Parsing {idx}-th object in {self.file_path} content type is {content_type}")
-            print(f"dims: {obj['axes']}")
+            if self.verbose == True:
+                print(f"dims: {obj['axes']}")
             if content_type == "imgs":
                 self.normalize_imgs_content(obj, template)  # generic imaging modes
                 # TODO:: could later make an own one for bright/dark field, but
diff --git a/pynxtools/dataconverter/readers/em/subparsers/rsciio_velox_concepts.py b/pynxtools/dataconverter/readers/em/subparsers/rsciio_velox_concepts.py
new file mode 100644
index 000000000..6dda4dd8e
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/subparsers/rsciio_velox_concepts.py
@@ -0,0 +1,86 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Map Velox to NeXus concepts."""
+
+# Velox *.emd
+# "Core/MetadataDefinitionVersion": ["7.9"]
+# "Core/MetadataSchemaVersion": ["v1/2013/07"]
+# all *.emd files from https://ac.archive.fhi.mpg.de/D62142 parsed with
+# rosettasciio 0.2, hyperspy 1.7.6
+# unique original_metadata keys
+# keys with hash instance duplicates removed r"([0-9a-f]{32})"
+# keys with detector instance duplicates removed r"(Detector-[0-9]+)"
+# keys with aperture instance duplicates removed r"(Aperture-[0-9]+)"
+# remaining instance duplicates for BM-Ceta and r"(SuperXG[0-9]{2})" removed manually
+# Concept names like Projector1Lens<term> and Projector2Lens<term> mean two different concept instances
+# of the same concept Projector*Lens<term> in NeXus this would become lens_em1(NXlens_em) name: projector, and field named <term>
+
+NX_VELOX_TO_NX_EM = []
+
+("/ENTRY[entry*]/", "to_iso8601", "Acquisition/AcquisitionStartDatetime/DateTime"),
+("/ENTRY[entry*]/", "load_from", "Acquisition/BeamType"),
+("/ENTRY[entry*]/", "load_from", "Acquisition/SourceType"),
+("/ENTRY[entry*]/", "load_from", "Core/MetadataDefinitionVersion"),
+("/ENTRY[entry*]/", "load_from", "Core/MetadataSchemaVersion"),
+("/ENTRY[entry*]/", "load_from", ["Detectors/Detector-*/CollectionAngleRange/begin", "Detectors/Detector-*/CollectionAngleRange/end"]),
+("/ENTRY[entry*]/", "load_from", "Detectors/Detector-*/DetectorName"),
+("/ENTRY[entry*]/", "load_from", "Detectors/Detector-*/DetectorType"),
+("/ENTRY[entry*]/", "load_from", "Detectors/Detector-*/Enabled"),
+("/ENTRY[entry*]/", "load_from", "Detectors/Detector-*/Inserted"),
+("/ENTRY[entry*]/", "load_from", "Instrument/ControlSoftwareVersion"),
+("/ENTRY[entry*]/", "load_from", "Instrument/InstrumentId"),
+("/ENTRY[entry*]/", "load_from", "Instrument/InstrumentModel"),
+("/ENTRY[entry*]/", "load_from", "Instrument/Manufacturer"),
+("/ENTRY[entry*]/", "load_from", "Optics/AccelerationVoltage"),
+("/ENTRY[entry*]/", "load_from", "Optics/Apertures/Aperture-*/Diameter"),
+("/ENTRY[entry*]/", "load_from", "Optics/Apertures/Aperture-*/Enabled"),
+("/ENTRY[entry*]/", "load_from", "Optics/Apertures/Aperture-*/Name"),
+("/ENTRY[entry*]/", "load_from", "Optics/Apertures/Aperture-*/Type"),
+("/ENTRY[entry*]/", "load_from", "Optics/BeamConvergence"),
+("/ENTRY[entry*]/", "load_from", "Optics/C1LensIntensity"),
+("/ENTRY[entry*]/", "load_from", "Optics/C2LensIntensity"),
+("/ENTRY[entry*]/", "load_from", "Optics/CameraLength"),
+("/ENTRY[entry*]/", "load_from", "Optics/Defocus"),
+("/ENTRY[entry*]/", "load_from", "Optics/DiffractionLensIntensity"),
+("/ENTRY[entry*]/", "load_from", "Optics/EFTEMOn"),
+("/ENTRY[entry*]/", "load_from", "Optics/ExtractorVoltage"),
+("/ENTRY[entry*]/", "load_from", "Optics/Focus"),
+("/ENTRY[entry*]/", "load_from", "Optics/FullScanFieldOfView/x"),
+("/ENTRY[entry*]/", "load_from", "Optics/FullScanFieldOfView/y"),
+("/ENTRY[entry*]/", "load_from", "Optics/GunLensSetting"),
+("/ENTRY[entry*]/", "load_from", "Optics/HighMagnificationMode"),
+("/ENTRY[entry*]/", "load_from", "Optics/IlluminationMode"),
+("/ENTRY[entry*]/", "load_from", "Optics/IntermediateLensIntensity"),
+("/ENTRY[entry*]/", "load_from", "Optics/LastMeasuredScreenCurrent"),
+("/ENTRY[entry*]/", "load_from", "Optics/MiniCondenserLensIntensity"),
+("/ENTRY[entry*]/", "load_from", "Optics/NominalMagnification"),
+("/ENTRY[entry*]/", "load_from", "Optics/ObjectiveLensIntensity"),
+("/ENTRY[entry*]/", "load_from", "Optics/ObjectiveLensMode"),
+("/ENTRY[entry*]/", "load_from", "Optics/OperatingMode"),
+("/ENTRY[entry*]/", "load_from", "Optics/ProbeMode"),
+("/ENTRY[entry*]/", "load_from", "Optics/Projector1LensIntensity"),
+("/ENTRY[entry*]/", "load_from", "Optics/Projector2LensIntensity"),
+("/ENTRY[entry*]/", "load_from", "Optics/ProjectorMode"),
+("/ENTRY[entry*]/", "load_from", "Optics/SpotIndex"),
+("/ENTRY[entry*]/", "load_from", "Optics/StemFocus"),
+("/ENTRY[entry*]/", "load_from", "Optics/TemOperatingSubMode"),
+("/ENTRY[entry*]/", "load_from", "Sample"),
+("/ENTRY[entry*]/", "load_from", "Scan/DwellTime"),
+("/ENTRY[entry*]/", "load_from", "Stage/AlphaTilt"),
+("/ENTRY[entry*]/", "load_from", "Stage/BetaTilt"),
+("/ENTRY[entry*]/", "load_from", ["Stage/Position/x", "Stage/Position/y", "Stage/Position/z"])]

From 1bd6c59a6f57be11917170044b5b26ef77368501 Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@physik.hu-berlin.de>
Date: Fri, 19 Jan 2024 17:25:15 +0100
Subject: [PATCH 83/84] A working version for parsing some of the Velox/FEI
 metadata and map them on NeXus

---
 debug/spctrscpy.dev.ipynb                     | 934 +-----------------
 pynxtools/dataconverter/readers/em/reader.py  |   2 +-
 .../readers/em/subparsers/rsciio_velox.py     | 120 ++-
 .../em/subparsers/rsciio_velox_concepts.py    | 112 ++-
 4 files changed, 195 insertions(+), 973 deletions(-)

diff --git a/debug/spctrscpy.dev.ipynb b/debug/spctrscpy.dev.ipynb
index 5242075eb..7da7f48ae 100644
--- a/debug/spctrscpy.dev.ipynb
+++ b/debug/spctrscpy.dev.ipynb
@@ -2,18 +2,10 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "id": "6da1aea0-545b-446b-a3d1-1574af72f6c6",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "['H', 'He', 'Li', 'Be', 'B', 'C', 'N', 'O', 'F', 'Ne', 'Na', 'Mg', 'Al', 'Si', 'P', 'S', 'Cl', 'Ar', 'K', 'Ca', 'Sc', 'Ti', 'V', 'Cr', 'Mn', 'Fe', 'Co', 'Ni', 'Cu', 'Zn', 'Ga', 'Ge', 'As', 'Se', 'Br', 'Kr', 'Rb', 'Sr', 'Y', 'Zr', 'Nb', 'Mo', 'Tc', 'Ru', 'Rh', 'Pd', 'Ag', 'Cd', 'In', 'Sn', 'Sb', 'Te', 'I', 'Xe', 'Cs', 'Ba', 'La', 'Ce', 'Pr', 'Nd', 'Pm', 'Sm', 'Eu', 'Gd', 'Tb', 'Dy', 'Ho', 'Er', 'Tm', 'Yb', 'Lu', 'Hf', 'Ta', 'W', 'Re', 'Os', 'Ir', 'Pt', 'Au', 'Hg', 'Tl', 'Pb', 'Bi', 'Po', 'At', 'Rn', 'Fr', 'Ra', 'Ac', 'Th', 'Pa', 'U', 'Np', 'Pu', 'Am', 'Cm', 'Bk', 'Cf', 'Es', 'Fm', 'Md', 'No', 'Lr', 'Rf', 'Db', 'Sg', 'Bh', 'Hs', 'Mt', 'Ds', 'Rg', 'Cn', 'Nh', 'Fl', 'Mc', 'Lv', 'Ts', 'Og']\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "import re\n",
     "import numpy as np\n",
@@ -29,18 +21,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": null,
    "id": "3889c47f-11c4-4bf3-97de-04fc52f0798d",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "/home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/fhi/CG71113 1513 HAADF-DF4-DF2-BF 1.2 Mx STEM.emd\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "src = \"/home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy\"\n",
     "fnms = [(\"pdi\", \"InGaN_nanowires_spectra.edaxh5\"),\n",
@@ -57,57 +41,29 @@
     "        (\"benedikt\", \"EELS_map_2_ROI_1_location_4.dm3\"),\n",
     "        (\"phillippe\", \"H5OINA_examples_Specimen_1_Map_EDS_+_EBSD_Map_Data_2.h5oina\")]\n",
     "# pyUSID, HSMA\n",
-    "case = 5  # 5 # len(fnms) - 1  # len(fnms) - 1\n",
+    "case = 8  # 5 # len(fnms) - 1  # len(fnms) - 1\n",
     "fnm = f\"{src}/{fnms[case][0]}/{fnms[case][1]}\"\n",
     "print(fnm)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": null,
    "id": "a4d9ef96-3c70-4c12-80ba-ea4a7d716d47",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/x-hdf5": "/home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/fhi/CG71113 1513 HAADF-DF4-DF2-BF 1.2 Mx STEM.emd",
-      "text/plain": [
-       "<jupyterlab_h5web.widget.H5Web object>"
-      ]
-     },
-     "execution_count": 31,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
-    "H5Web(fnm)"
+    "# H5Web(fnm)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "id": "221abf67-0d88-4088-9cc7-e0d9b85c4699",
    "metadata": {
-    "collapsed": true,
-    "jupyter": {
-     "outputs_hidden": true
-    },
     "tags": []
    },
-   "outputs": [
-    {
-     "data": {
-      "application/x-hdf5": "/home/kaiobach/Research/hu_hu_hu/sprint17/pynx/pynxtools/debug/debug.CG71113 1412 EDS-HAADF-DF4-DF2-BF 4.8 Mx SI.emd.nxs",
-      "text/plain": [
-       "<jupyterlab_h5web.widget.H5Web object>"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "# resulting NeXus artifact\n",
     "# tmp = \"debug.CG71113 1138 Ceta 660 mm Camera.emd.nxs\"\n",
@@ -126,236 +82,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": null,
    "id": "75b32c8f-8efa-4b40-bfc8-6f95300902ea",
    "metadata": {
     "tags": []
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "fpath: /home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/fhi/CG71113 1121 Ceta 310.0 kx Camera.emd, len(objs): 1\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "fpath: /home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/fhi/CG71113 1125 Ceta 1.1 Mx Camera 0001.emd, len(objs): 1\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "fpath: /home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/fhi/CG71113 1125 Ceta 1.1 Mx Camera.emd, len(objs): 1\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "fpath: /home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/fhi/CG71113 1126 Ceta 1.1 Mx Camera.emd, len(objs): 1\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "fpath: /home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/fhi/CG71113 1134 Ceta 660 mm Camera.emd, len(objs): 1\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "fpath: /home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/fhi/CG71113 1138 Ceta 660 mm Camera.emd, len(objs): 1\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "fpath: /home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/fhi/CG71113 1405 HAADF-DF4-DF2-BF 4.8 Mx STEM.emd, len(objs): 4\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "fpath: /home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/fhi/CG71113 1407 HAADF-DF4-DF2-BF 4.8 Mx STEM.emd, len(objs): 4\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "fpath: /home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/fhi/CG71113 1409 HAADF-DF4-DF2-BF 6.7 Mx STEM.emd, len(objs): 4\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "The file contains only one spectrum stream\n",
-      "The file contains only one spectrum stream\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "fpath: /home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/fhi/CG71113 1411 HAADF-DF4-DF2-BF 4.8 Mx STEM.emd, len(objs): 4\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "fpath: /home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/fhi/CG71113 1411 HAADF-DF4-DF2-BF 6.7 Mx STEM.emd, len(objs): 4\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "fpath: /home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/fhi/CG71113 1412 EDS-HAADF-DF4-DF2-BF 4.8 Mx SI.emd, len(objs): 12\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "fpath: /home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/fhi/CG71113 1422 EDS-HAADF-DF4-DF2-BF 1.2 Mx SI.emd, len(objs): 12\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "fpath: /home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/fhi/CG71113 1423 EDS-HAADF-DF4-DF2-BF 1.2 Mx SI.emd, len(objs): 0\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "The file contains only one spectrum stream\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "fpath: /home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/fhi/CG71113 1444 EDS-HAADF-DF4-DF2-BF 595.5 kx SI.emd, len(objs): 10\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "fpath: /home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/fhi/CG71113 1513 HAADF-DF4-DF2-BF 1.2 Mx STEM.emd, len(objs): 4\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "The file contains only one spectrum stream\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "fpath: /home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/fhi/CG71113 1514 EDS-HAADF-DF4-DF2-BF 1.2 Mx SI.emd, len(objs): 11\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "fpath: /home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/fhi/CG71113 1537 HAADF-DF4-DF2-BF 432.2 kx STEM.emd, len(objs): 4\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "Core/MetadataDefinitionVersion, 7.9\n",
-      "Core/MetadataSchemaVersion, v1/2013/07\n",
-      "len(uniq): 1380\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "fnms = [\"CG71113 1121 Ceta 310.0 kx Camera.emd\",\n",
     "        \"CG71113 1125 Ceta 1.1 Mx Camera 0001.emd\",\n",
@@ -397,7 +129,7 @@
     "            orgmeta = fd.FlatDict(obj[\"original_metadata\"], \"/\")\n",
     "            # print(\"\\n\\n\")\n",
     "            for key, val in orgmeta.items():\n",
-    "                # print(f\"{key}: {val}\")\n",
+    "                print(f\"{key}: {val}\")\n",
     "                if key == \"Core/MetadataDefinitionVersion\" or key == \"Core/MetadataSchemaVersion\":\n",
     "                    print(f\"{key}, {val}\")\n",
     "                uniq.add(f\"{key}\")\n",
@@ -408,19 +140,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 34,
+   "execution_count": null,
    "id": "c5696211-de5b-4b54-a9fa-38dd48048a95",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "rosettasciio                  0.2\n",
-      "hyperspy                      1.7.6\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "! pip list | grep rosettasciio\n",
     "! pip list | grep hyperspy"
@@ -428,623 +151,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": null,
    "id": "acb98c77-d6ec-46d5-abe6-8b45bfa3fb4a",
    "metadata": {
     "tags": []
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "len(uniq): 1380\n",
-      "len(hashes_removed): 893\n",
-      "len(detectors_removed): 662\n",
-      "len(apertures_removed): 600\n",
-      "Acquisition/AcquisitionDatetime/DateTime\n",
-      "Acquisition/AcquisitionStartDatetime/DateTime\n",
-      "Acquisition/BeamType\n",
-      "Acquisition/SourceType\n",
-      "BinaryResult/AcquisitionUnit\n",
-      "BinaryResult/BitsPerPixel\n",
-      "BinaryResult/CompositionType\n",
-      "BinaryResult/Detector\n",
-      "BinaryResult/DetectorIndex\n",
-      "BinaryResult/Encoding\n",
-      "BinaryResult/ImageSize/height\n",
-      "BinaryResult/ImageSize/width\n",
-      "BinaryResult/Offset/x\n",
-      "BinaryResult/Offset/y\n",
-      "BinaryResult/PixelSize/height\n",
-      "BinaryResult/PixelSize/width\n",
-      "BinaryResult/PixelUnitX\n",
-      "BinaryResult/PixelUnitY\n",
-      "Core/MetadataDefinitionVersion\n",
-      "Core/MetadataSchemaVersion\n",
-      "Core/guid\n",
-      "CustomProperties/Aperture[C1].Name/type\n",
-      "CustomProperties/Aperture[C1].Name/value\n",
-      "CustomProperties/Aperture[C2].Name/type\n",
-      "CustomProperties/Aperture[C2].Name/value\n",
-      "CustomProperties/Aperture[OBJ].Name/type\n",
-      "CustomProperties/Aperture[OBJ].Name/value\n",
-      "CustomProperties/Aperture[SA].Name/type\n",
-      "CustomProperties/Aperture[SA].Name/value\n",
-      "CustomProperties/Detectors[BM-Ceta].CombinedSubFrames/type\n",
-      "CustomProperties/Detectors[BM-Ceta].CombinedSubFrames/value\n",
-      "CustomProperties/Detectors[BM-Ceta].DigitalGain/type\n",
-      "CustomProperties/Detectors[BM-Ceta].DigitalGain/value\n",
-      "CustomProperties/Detectors[BM-Ceta].FrameID/type\n",
-      "CustomProperties/Detectors[BM-Ceta].FrameID/value\n",
-      "CustomProperties/Detectors[BM-Ceta].MaxPossiblePixelValue/type\n",
-      "CustomProperties/Detectors[BM-Ceta].MaxPossiblePixelValue/value\n",
-      "CustomProperties/Detectors[BM-Ceta].SaturationPoint/type\n",
-      "CustomProperties/Detectors[BM-Ceta].SaturationPoint/value\n",
-      "CustomProperties/Detectors[BM-Ceta].TimeStamp/type\n",
-      "CustomProperties/Detectors[BM-Ceta].TimeStamp/value\n",
-      "CustomProperties/Detectors[BM-Ceta].TransferOK/type\n",
-      "CustomProperties/Detectors[BM-Ceta].TransferOK/value\n",
-      "CustomProperties/Detectors[SuperXG11].IncidentAngle/type\n",
-      "CustomProperties/Detectors[SuperXG11].IncidentAngle/value\n",
-      "CustomProperties/Detectors[SuperXG11].SpectrumBeginEnergy/type\n",
-      "CustomProperties/Detectors[SuperXG11].SpectrumBeginEnergy/value\n",
-      "CustomProperties/Detectors[SuperXG11].Temperature/type\n",
-      "CustomProperties/Detectors[SuperXG11].Temperature/value\n",
-      "CustomProperties/Detectors[SuperXG12].IncidentAngle/type\n",
-      "CustomProperties/Detectors[SuperXG12].IncidentAngle/value\n",
-      "CustomProperties/Detectors[SuperXG12].SpectrumBeginEnergy/type\n",
-      "CustomProperties/Detectors[SuperXG12].SpectrumBeginEnergy/value\n",
-      "CustomProperties/Detectors[SuperXG12].Temperature/type\n",
-      "CustomProperties/Detectors[SuperXG12].Temperature/value\n",
-      "CustomProperties/Detectors[SuperXG13].IncidentAngle/type\n",
-      "CustomProperties/Detectors[SuperXG13].IncidentAngle/value\n",
-      "CustomProperties/Detectors[SuperXG13].SpectrumBeginEnergy/type\n",
-      "CustomProperties/Detectors[SuperXG13].SpectrumBeginEnergy/value\n",
-      "CustomProperties/Detectors[SuperXG13].Temperature/type\n",
-      "CustomProperties/Detectors[SuperXG13].Temperature/value\n",
-      "CustomProperties/Detectors[SuperXG14].IncidentAngle/type\n",
-      "CustomProperties/Detectors[SuperXG14].IncidentAngle/value\n",
-      "CustomProperties/Detectors[SuperXG14].SpectrumBeginEnergy/type\n",
-      "CustomProperties/Detectors[SuperXG14].SpectrumBeginEnergy/value\n",
-      "CustomProperties/Detectors[SuperXG14].Temperature/type\n",
-      "CustomProperties/Detectors[SuperXG14].Temperature/value\n",
-      "CustomProperties/IntegratedFrameCount/type\n",
-      "CustomProperties/IntegratedFrameCount/value\n",
-      "CustomProperties/MaxPossiblePixelValue/type\n",
-      "CustomProperties/MaxPossiblePixelValue/value\n",
-      "CustomProperties/RecodeFactor/type\n",
-      "CustomProperties/RecodeFactor/value\n",
-      "CustomProperties/Scan.ScanTransformation.A11/type\n",
-      "CustomProperties/Scan.ScanTransformation.A11/value\n",
-      "CustomProperties/Scan.ScanTransformation.A12/type\n",
-      "CustomProperties/Scan.ScanTransformation.A12/value\n",
-      "CustomProperties/Scan.ScanTransformation.A13/type\n",
-      "CustomProperties/Scan.ScanTransformation.A13/value\n",
-      "CustomProperties/Scan.ScanTransformation.A21/type\n",
-      "CustomProperties/Scan.ScanTransformation.A21/value\n",
-      "CustomProperties/Scan.ScanTransformation.A22/type\n",
-      "CustomProperties/Scan.ScanTransformation.A22/value\n",
-      "CustomProperties/Scan.ScanTransformation.A23/type\n",
-      "CustomProperties/Scan.ScanTransformation.A23/value\n",
-      "CustomProperties/StemMagnification/type\n",
-      "CustomProperties/StemMagnification/value\n",
-      "DetectorMetadata\n",
-      "DetectorMetadata/CollectionAngleRange/begin\n",
-      "DetectorMetadata/CollectionAngleRange/end\n",
-      "DetectorMetadata/DetectorName\n",
-      "DetectorMetadata/DetectorType\n",
-      "DetectorMetadata/Enabled\n",
-      "DetectorMetadata/Gain\n",
-      "DetectorMetadata/Inserted\n",
-      "DetectorMetadata/Offset\n",
-      "Detectors/Detector-*/AnalyticalDetectorShutterState\n",
-      "Detectors/Detector-*/AzimuthAngle\n",
-      "Detectors/Detector-*/BeginEnergy\n",
-      "Detectors/Detector-*/Binning/height\n",
-      "Detectors/Detector-*/Binning/width\n",
-      "Detectors/Detector-*/CollectionAngle\n",
-      "Detectors/Detector-*/CollectionAngleRange/begin\n",
-      "Detectors/Detector-*/CollectionAngleRange/end\n",
-      "Detectors/Detector-*/DarkGainCorrectionType\n",
-      "Detectors/Detector-*/DetectorName\n",
-      "Detectors/Detector-*/DetectorType\n",
-      "Detectors/Detector-*/Dispersion\n",
-      "Detectors/Detector-*/ElectronicsNoise\n",
-      "Detectors/Detector-*/ElevationAngle\n",
-      "Detectors/Detector-*/Enabled\n",
-      "Detectors/Detector-*/ExposureMode\n",
-      "Detectors/Detector-*/ExposureTime\n",
-      "Detectors/Detector-*/Gain\n",
-      "Detectors/Detector-*/InputCountRate\n",
-      "Detectors/Detector-*/Inserted\n",
-      "Detectors/Detector-*/LiveTime\n",
-      "Detectors/Detector-*/Offset\n",
-      "Detectors/Detector-*/OffsetEnergy\n",
-      "Detectors/Detector-*/OutputCountRate\n",
-      "Detectors/Detector-*/PulseProcessTime\n",
-      "Detectors/Detector-*/ReadOutArea/bottom\n",
-      "Detectors/Detector-*/ReadOutArea/left\n",
-      "Detectors/Detector-*/ReadOutArea/right\n",
-      "Detectors/Detector-*/ReadOutArea/top\n",
-      "Detectors/Detector-*/RealTime\n",
-      "Detectors/Detector-*/Shutters/Shutter-0/Position\n",
-      "Detectors/Detector-*/Shutters/Shutter-0/Type\n",
-      "EnergyFilter/EntranceApertureType\n",
-      "GasInjectionSystems\n",
-      "ImportedDataParameter/First_frame\n",
-      "ImportedDataParameter/Last_frame\n",
-      "ImportedDataParameter/Number_of_channels\n",
-      "ImportedDataParameter/Number_of_frames\n",
-      "ImportedDataParameter/Rebin_energy\n",
-      "Instrument/ComputerName\n",
-      "Instrument/ControlSoftwareVersion\n",
-      "Instrument/InstrumentClass\n",
-      "Instrument/InstrumentId\n",
-      "Instrument/InstrumentModel\n",
-      "Instrument/Manufacturer\n",
-      "Operations/CameraInputOperation/*/cameraName\n",
-      "Operations/CameraInputOperation/*/dataPath\n",
-      "Operations/CameraInputOperation/*/outputs\n",
-      "Operations/DisplayLevelsOperation/*/outputs\n",
-      "Operations/EDSInputOperation/*/dataPath\n",
-      "Operations/EDSInputOperation/*/detector\n",
-      "Operations/EDSInputOperation/*/outputs\n",
-      "Operations/ImageQuantificationOperation/*/absorptionCorrection/calibrationError\n",
-      "Operations/ImageQuantificationOperation/*/absorptionCorrection/density\n",
-      "Operations/ImageQuantificationOperation/*/absorptionCorrection/sampleThickness\n",
-      "Operations/ImageQuantificationOperation/*/absorptionCorrection/useAbsorptionCorrection\n",
-      "Operations/ImageQuantificationOperation/*/absorptionCorrection/useDensity\n",
-      "Operations/ImageQuantificationOperation/*/backgroundCorrection/backgroundModel\n",
-      "Operations/ImageQuantificationOperation/*/backgroundCorrection/backgroundModelModeled/useBackgroundWindows\n",
-      "Operations/ImageQuantificationOperation/*/backgroundCorrection/backgroundModelMultiPoly/backgroundOrder\n",
-      "Operations/ImageQuantificationOperation/*/backgroundCorrection/backgroundModelMultiPoly/peakOrder\n",
-      "Operations/ImageQuantificationOperation/*/backgroundCorrection/backgroundModelMultiPoly/useBackgroundWindows\n",
-      "Operations/ImageQuantificationOperation/*/backgroundCorrection/backgroundWindows\n",
-      "Operations/ImageQuantificationOperation/*/backgroundCorrection/useManualBackgroundWindows\n",
-      "Operations/ImageQuantificationOperation/*/colormixSelection\n",
-      "Operations/ImageQuantificationOperation/*/dataPath\n",
-      "Operations/ImageQuantificationOperation/*/elementProperties\n",
-      "Operations/ImageQuantificationOperation/*/elementSelection\n",
-      "Operations/ImageQuantificationOperation/*/elementsIdentified\n",
-      "Operations/ImageQuantificationOperation/*/ionizationCrossSectionModel\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Acquisition/AcquisitionDatetime/DateTime\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Acquisition/AcquisitionStartDatetime/DateTime\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Acquisition/BeamType\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Acquisition/SourceType\n",
-      "Operations/ImageQuantificationOperation/*/metadata/BinaryResult/AcquisitionUnit\n",
-      "Operations/ImageQuantificationOperation/*/metadata/BinaryResult/CompositionType\n",
-      "Operations/ImageQuantificationOperation/*/metadata/BinaryResult/Detector\n",
-      "Operations/ImageQuantificationOperation/*/metadata/BinaryResult/Encoding\n",
-      "Operations/ImageQuantificationOperation/*/metadata/BinaryResult/Offset/x\n",
-      "Operations/ImageQuantificationOperation/*/metadata/BinaryResult/Offset/y\n",
-      "Operations/ImageQuantificationOperation/*/metadata/BinaryResult/PixelSize/height\n",
-      "Operations/ImageQuantificationOperation/*/metadata/BinaryResult/PixelSize/width\n",
-      "Operations/ImageQuantificationOperation/*/metadata/BinaryResult/PixelUnitX\n",
-      "Operations/ImageQuantificationOperation/*/metadata/BinaryResult/PixelUnitY\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Core/MetadataDefinitionVersion\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Core/MetadataSchemaVersion\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Core/guid\n",
-      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Aperture[C1].Name/type\n",
-      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Aperture[C1].Name/value\n",
-      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Aperture[C2].Name/type\n",
-      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Aperture[C2].Name/value\n",
-      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Aperture[OBJ].Name/type\n",
-      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Aperture[OBJ].Name/value\n",
-      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Aperture[SA].Name/type\n",
-      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Aperture[SA].Name/value\n",
-      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG11].IncidentAngle/type\n",
-      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG11].IncidentAngle/value\n",
-      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG11].SpectrumBeginEnergy/type\n",
-      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG11].SpectrumBeginEnergy/value\n",
-      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG11].Temperature/type\n",
-      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG11].Temperature/value\n",
-      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG12].IncidentAngle/type\n",
-      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG12].IncidentAngle/value\n",
-      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG12].SpectrumBeginEnergy/type\n",
-      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG12].SpectrumBeginEnergy/value\n",
-      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG12].Temperature/type\n",
-      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG12].Temperature/value\n",
-      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG13].IncidentAngle/type\n",
-      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG13].IncidentAngle/value\n",
-      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG13].SpectrumBeginEnergy/type\n",
-      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG13].SpectrumBeginEnergy/value\n",
-      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG13].Temperature/type\n",
-      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG13].Temperature/value\n",
-      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG14].IncidentAngle/type\n",
-      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG14].IncidentAngle/value\n",
-      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG14].SpectrumBeginEnergy/type\n",
-      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG14].SpectrumBeginEnergy/value\n",
-      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG14].Temperature/type\n",
-      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG14].Temperature/value\n",
-      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/IntegratedFrameCount/type\n",
-      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/IntegratedFrameCount/value\n",
-      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Scan.ScanTransformation.A11/type\n",
-      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Scan.ScanTransformation.A11/value\n",
-      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Scan.ScanTransformation.A12/type\n",
-      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Scan.ScanTransformation.A12/value\n",
-      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Scan.ScanTransformation.A13/type\n",
-      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Scan.ScanTransformation.A13/value\n",
-      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Scan.ScanTransformation.A21/type\n",
-      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Scan.ScanTransformation.A21/value\n",
-      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Scan.ScanTransformation.A22/type\n",
-      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Scan.ScanTransformation.A22/value\n",
-      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Scan.ScanTransformation.A23/type\n",
-      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/Scan.ScanTransformation.A23/value\n",
-      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/StemMagnification/type\n",
-      "Operations/ImageQuantificationOperation/*/metadata/CustomProperties/StemMagnification/value\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/AnalyticalDetectorShutterState\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/AzimuthAngle\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/BeginEnergy\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/Binning/height\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/Binning/width\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/CollectionAngle\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/CollectionAngleRange/begin\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/CollectionAngleRange/end\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/DarkGainCorrectionType\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/DetectorName\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/DetectorType\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/Dispersion\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/ElectronicsNoise\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/ElevationAngle\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/Enabled\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/ExposureMode\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/ExposureTime\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/Gain\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/InputCountRate\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/Inserted\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/LiveTime\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/Offset\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/OffsetEnergy\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/OutputCountRate\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/PulseProcessTime\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/ReadOutArea/bottom\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/ReadOutArea/left\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/ReadOutArea/right\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/ReadOutArea/top\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/RealTime\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/Shutters/Shutter-0/Position\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Detectors/Detector-*/Shutters/Shutter-0/Type\n",
-      "Operations/ImageQuantificationOperation/*/metadata/EnergyFilter/EntranceApertureType\n",
-      "Operations/ImageQuantificationOperation/*/metadata/GasInjectionSystems\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Instrument/ComputerName\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Instrument/ControlSoftwareVersion\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Instrument/InstrumentClass\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Instrument/InstrumentId\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Instrument/InstrumentModel\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Instrument/Manufacturer\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Optics/AccelerationVoltage\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Optics/Apertures/Aperture-*/Diameter\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Optics/Apertures/Aperture-*/Enabled\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Optics/Apertures/Aperture-*/MechanismType\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Optics/Apertures/Aperture-*/Name\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Optics/Apertures/Aperture-*/Number\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Optics/Apertures/Aperture-*/PositionOffset/x\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Optics/Apertures/Aperture-*/PositionOffset/y\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Optics/Apertures/Aperture-*/Type\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Optics/BeamConvergence\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Optics/C1LensIntensity\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Optics/C2LensIntensity\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Optics/CameraLength\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Optics/Defocus\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Optics/DiffractionLensIntensity\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Optics/EFTEMOn\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Optics/ExtractorVoltage\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Optics/Focus\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Optics/FullScanFieldOfView/x\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Optics/FullScanFieldOfView/y\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Optics/GunLensSetting\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Optics/HighMagnificationMode\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Optics/IlluminationMode\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Optics/IntermediateLensIntensity\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Optics/LastMeasuredScreenCurrent\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Optics/MiniCondenserLensIntensity\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Optics/ObjectiveLensIntensity\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Optics/ObjectiveLensMode\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Optics/OperatingMode\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Optics/ProbeMode\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Optics/Projector1LensIntensity\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Optics/Projector2LensIntensity\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Optics/ProjectorMode\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Optics/ScreenCurrent\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Optics/SpotIndex\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Optics/StemFocus\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Optics/TemOperatingSubMode\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Sample\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Scan/DwellTime\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Scan/FrameTime\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Scan/LineIntegrationCount\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Scan/LineInterlacing\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Scan/LineTime\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Scan/MainsLockOn\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Scan/ScanArea/bottom\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Scan/ScanArea/left\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Scan/ScanArea/right\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Scan/ScanArea/top\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Scan/ScanRotation\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Scan/ScanSize/height\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Scan/ScanSize/width\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Stage/AlphaTilt\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Stage/BetaTilt\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Stage/HolderType\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Stage/Position/x\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Stage/Position/y\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Stage/Position/z\n",
-      "Operations/ImageQuantificationOperation/*/metadata/Vacuum/VacuumMode\n",
-      "Operations/ImageQuantificationOperation/*/outputs\n",
-      "Operations/ImageQuantificationOperation/*/quantificationKernelFilter\n",
-      "Operations/ImageQuantificationOperation/*/quantificationKernelFilterSigma\n",
-      "Operations/ImageQuantificationOperation/*/quantificationKernelFilterSize\n",
-      "Operations/ImageQuantificationOperation/*/quantificationMode\n",
-      "Operations/ImageQuantificationOperation/*/quantificationRadialFilterEdge\n",
-      "Operations/ImageQuantificationOperation/*/quantificationWienerFilterFrequency\n",
-      "Operations/ImageQuantificationOperation/*/spectralFilter\n",
-      "Operations/ImageQuantificationOperation/*/spectralFilterSigma\n",
-      "Operations/ImageQuantificationOperation/*/spectralFilterSize\n",
-      "Operations/ImageQuantificationOperation/*/useOptimizedCalibration\n",
-      "Operations/ImageQuantificationOperation/*/useQuantificationKernelFilter\n",
-      "Operations/ImageQuantificationOperation/*/useSpectralFilter\n",
-      "Operations/IntegrationRectangleOperation/*/dataPath\n",
-      "Operations/IntegrationRectangleOperation/*/outputs\n",
-      "Operations/IntensityProfileOperation/*/dataPath\n",
-      "Operations/IntensityProfileOperation/*/outputs\n",
-      "Operations/MixOperation/*/blend\n",
-      "Operations/MixOperation/*/outputs\n",
-      "Operations/Operations/operations\n",
-      "Operations/SpectrumQuantificationOperation/*/absorptionCorrection/calibrationError\n",
-      "Operations/SpectrumQuantificationOperation/*/absorptionCorrection/density\n",
-      "Operations/SpectrumQuantificationOperation/*/absorptionCorrection/sampleThickness\n",
-      "Operations/SpectrumQuantificationOperation/*/absorptionCorrection/useAbsorptionCorrection\n",
-      "Operations/SpectrumQuantificationOperation/*/absorptionCorrection/useDensity\n",
-      "Operations/SpectrumQuantificationOperation/*/backgroundCorrection/backgroundModel\n",
-      "Operations/SpectrumQuantificationOperation/*/backgroundCorrection/backgroundModelModeled/useBackgroundWindows\n",
-      "Operations/SpectrumQuantificationOperation/*/backgroundCorrection/backgroundModelMultiPoly/backgroundOrder\n",
-      "Operations/SpectrumQuantificationOperation/*/backgroundCorrection/backgroundModelMultiPoly/peakOrder\n",
-      "Operations/SpectrumQuantificationOperation/*/backgroundCorrection/backgroundModelMultiPoly/useBackgroundWindows\n",
-      "Operations/SpectrumQuantificationOperation/*/backgroundCorrection/backgroundWindows\n",
-      "Operations/SpectrumQuantificationOperation/*/backgroundCorrection/useManualBackgroundWindows\n",
-      "Operations/SpectrumQuantificationOperation/*/dataPath\n",
-      "Operations/SpectrumQuantificationOperation/*/elementProperties\n",
-      "Operations/SpectrumQuantificationOperation/*/elementSelection\n",
-      "Operations/SpectrumQuantificationOperation/*/elementsIdentified\n",
-      "Operations/SpectrumQuantificationOperation/*/ionizationCrossSectionModel\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Acquisition/AcquisitionDatetime/DateTime\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Acquisition/AcquisitionStartDatetime/DateTime\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Acquisition/BeamType\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Acquisition/SourceType\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/BinaryResult/AcquisitionUnit\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/BinaryResult/CompositionType\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/BinaryResult/Detector\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/BinaryResult/Encoding\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/BinaryResult/Offset/x\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/BinaryResult/Offset/y\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/BinaryResult/PixelSize/height\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/BinaryResult/PixelSize/width\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/BinaryResult/PixelUnitX\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/BinaryResult/PixelUnitY\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Core/MetadataDefinitionVersion\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Core/MetadataSchemaVersion\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Core/guid\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Aperture[C1].Name/type\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Aperture[C1].Name/value\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Aperture[C2].Name/type\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Aperture[C2].Name/value\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Aperture[OBJ].Name/type\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Aperture[OBJ].Name/value\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Aperture[SA].Name/type\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Aperture[SA].Name/value\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG11].IncidentAngle/type\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG11].IncidentAngle/value\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG11].SpectrumBeginEnergy/type\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG11].SpectrumBeginEnergy/value\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG11].Temperature/type\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG11].Temperature/value\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG12].IncidentAngle/type\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG12].IncidentAngle/value\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG12].SpectrumBeginEnergy/type\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG12].SpectrumBeginEnergy/value\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG12].Temperature/type\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG12].Temperature/value\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG13].IncidentAngle/type\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG13].IncidentAngle/value\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG13].SpectrumBeginEnergy/type\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG13].SpectrumBeginEnergy/value\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG13].Temperature/type\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG13].Temperature/value\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG14].IncidentAngle/type\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG14].IncidentAngle/value\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG14].SpectrumBeginEnergy/type\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG14].SpectrumBeginEnergy/value\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG14].Temperature/type\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Detectors[SuperXG14].Temperature/value\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/IntegratedFrameCount/type\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/IntegratedFrameCount/value\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Scan.ScanTransformation.A11/type\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Scan.ScanTransformation.A11/value\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Scan.ScanTransformation.A12/type\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Scan.ScanTransformation.A12/value\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Scan.ScanTransformation.A13/type\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Scan.ScanTransformation.A13/value\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Scan.ScanTransformation.A21/type\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Scan.ScanTransformation.A21/value\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Scan.ScanTransformation.A22/type\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Scan.ScanTransformation.A22/value\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Scan.ScanTransformation.A23/type\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Scan.ScanTransformation.A23/value\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/StemMagnification/type\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/StemMagnification/value\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Velox.IntegrationShape.Area/type\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Velox.IntegrationShape.Area/value\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Velox.Plot.Label/type\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/CustomProperties/Velox.Plot.Label/value\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/AnalyticalDetectorShutterState\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/AzimuthAngle\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/BeginEnergy\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/Binning/height\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/Binning/width\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/CollectionAngle\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/CollectionAngleRange/begin\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/CollectionAngleRange/end\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/DarkGainCorrectionType\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/DetectorName\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/DetectorType\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/Dispersion\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/ElectronicsNoise\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/ElevationAngle\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/Enabled\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/ExposureMode\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/ExposureTime\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/Gain\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/InputCountRate\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/Inserted\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/LiveTime\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/Offset\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/OffsetEnergy\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/OutputCountRate\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/PulseProcessTime\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/ReadOutArea/bottom\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/ReadOutArea/left\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/ReadOutArea/right\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/ReadOutArea/top\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/RealTime\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/Shutters/Shutter-0/Position\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Detectors/Detector-*/Shutters/Shutter-0/Type\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/EnergyFilter/EntranceApertureType\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/GasInjectionSystems\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Instrument/ComputerName\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Instrument/ControlSoftwareVersion\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Instrument/InstrumentClass\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Instrument/InstrumentId\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Instrument/InstrumentModel\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Instrument/Manufacturer\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/AccelerationVoltage\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/Apertures/Aperture-*/Diameter\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/Apertures/Aperture-*/Enabled\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/Apertures/Aperture-*/MechanismType\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/Apertures/Aperture-*/Name\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/Apertures/Aperture-*/Number\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/Apertures/Aperture-*/PositionOffset/x\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/Apertures/Aperture-*/PositionOffset/y\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/Apertures/Aperture-*/Type\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/BeamConvergence\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/C1LensIntensity\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/C2LensIntensity\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/CameraLength\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/Defocus\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/DiffractionLensIntensity\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/EFTEMOn\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/ExtractorVoltage\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/Focus\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/FullScanFieldOfView/x\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/FullScanFieldOfView/y\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/GunLensSetting\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/HighMagnificationMode\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/IlluminationMode\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/IntermediateLensIntensity\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/LastMeasuredScreenCurrent\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/MiniCondenserLensIntensity\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/ObjectiveLensIntensity\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/ObjectiveLensMode\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/OperatingMode\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/ProbeMode\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/Projector1LensIntensity\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/Projector2LensIntensity\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/ProjectorMode\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/ScreenCurrent\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/SpotIndex\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/StemFocus\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Optics/TemOperatingSubMode\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Sample\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Scan/DwellTime\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Scan/FrameTime\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Scan/LineIntegrationCount\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Scan/LineInterlacing\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Scan/LineTime\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Scan/MainsLockOn\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Scan/ScanArea/bottom\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Scan/ScanArea/left\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Scan/ScanArea/right\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Scan/ScanArea/top\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Scan/ScanRotation\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Scan/ScanSize/height\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Scan/ScanSize/width\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Stage/AlphaTilt\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Stage/BetaTilt\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Stage/HolderType\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Stage/Position/x\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Stage/Position/y\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Stage/Position/z\n",
-      "Operations/SpectrumQuantificationOperation/*/metadata/Vacuum/VacuumMode\n",
-      "Operations/SpectrumQuantificationOperation/*/outputs\n",
-      "Operations/SpectrumQuantificationOperation/*/useOptimizedCalibration\n",
-      "Operations/StemInputOperation/*/dataPath\n",
-      "Operations/StemInputOperation/*/detector\n",
-      "Operations/StemInputOperation/*/detectorInfo/name\n",
-      "Operations/StemInputOperation/*/detectorInfo/segments\n",
-      "Operations/StemInputOperation/*/outputs\n",
-      "Operations/StemInputOperation/*/scanArea\n",
-      "Optics/AccelerationVoltage\n",
-      "Optics/Apertures/Aperture-*/Diameter\n",
-      "Optics/Apertures/Aperture-*/Enabled\n",
-      "Optics/Apertures/Aperture-*/MechanismType\n",
-      "Optics/Apertures/Aperture-*/Name\n",
-      "Optics/Apertures/Aperture-*/Number\n",
-      "Optics/Apertures/Aperture-*/PositionOffset/x\n",
-      "Optics/Apertures/Aperture-*/PositionOffset/y\n",
-      "Optics/Apertures/Aperture-*/Type\n",
-      "Optics/BeamConvergence\n",
-      "Optics/C1LensIntensity\n",
-      "Optics/C2LensIntensity\n",
-      "Optics/CameraLength\n",
-      "Optics/Defocus\n",
-      "Optics/DiffractionLensIntensity\n",
-      "Optics/EFTEMOn\n",
-      "Optics/ExtractorVoltage\n",
-      "Optics/Focus\n",
-      "Optics/FullScanFieldOfView/x\n",
-      "Optics/FullScanFieldOfView/y\n",
-      "Optics/GunLensSetting\n",
-      "Optics/HighMagnificationMode\n",
-      "Optics/IlluminationMode\n",
-      "Optics/IntermediateLensIntensity\n",
-      "Optics/LastMeasuredScreenCurrent\n",
-      "Optics/MiniCondenserLensIntensity\n",
-      "Optics/NominalMagnification\n",
-      "Optics/ObjectiveLensIntensity\n",
-      "Optics/ObjectiveLensMode\n",
-      "Optics/OperatingMode\n",
-      "Optics/ProbeMode\n",
-      "Optics/Projector1LensIntensity\n",
-      "Optics/Projector2LensIntensity\n",
-      "Optics/ProjectorMode\n",
-      "Optics/ScreenCurrent\n",
-      "Optics/SpotIndex\n",
-      "Optics/StemFocus\n",
-      "Optics/TemOperatingSubMode\n",
-      "Sample\n",
-      "Scan/DwellTime\n",
-      "Scan/FrameTime\n",
-      "Scan/LineIntegrationCount\n",
-      "Scan/LineInterlacing\n",
-      "Scan/LineTime\n",
-      "Scan/MainsLockOn\n",
-      "Scan/ScanArea/bottom\n",
-      "Scan/ScanArea/left\n",
-      "Scan/ScanArea/right\n",
-      "Scan/ScanArea/top\n",
-      "Scan/ScanRotation\n",
-      "Scan/ScanSize/height\n",
-      "Scan/ScanSize/width\n",
-      "Stage/AlphaTilt\n",
-      "Stage/BetaTilt\n",
-      "Stage/HolderType\n",
-      "Stage/Position/x\n",
-      "Stage/Position/y\n",
-      "Stage/Position/z\n",
-      "Vacuum/VacuumMode\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "print(f\"len(uniq): {len(uniq)}\")\n",
     "hashes_removed = set()\n",
@@ -1090,18 +202,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": null,
    "id": "a2f08153-3651-4dce-aa45-62bb7c645da2",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "['Detector-11']\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "token = \"Detectors/Detector-11/CollectionAngleRange/begin\"\n",
     "where = re.findall(r\"(Detector-[0-9]+)\", token)\n",
diff --git a/pynxtools/dataconverter/readers/em/reader.py b/pynxtools/dataconverter/readers/em/reader.py
index 6f1f802a5..a287d7fad 100644
--- a/pynxtools/dataconverter/readers/em/reader.py
+++ b/pynxtools/dataconverter/readers/em/reader.py
@@ -136,7 +136,7 @@ def read(self,
         # TODO::check correct loop through!
 
         # sub_parser = "velox_emd"
-        subparser = RsciioVeloxSubParser(entry_id, file_paths[0], verbose=True)
+        subparser = RsciioVeloxSubParser(entry_id, file_paths[0], verbose=False)
         subparser.parse(template)
 
         # for dat_instance in case.dat_parser_type:
diff --git a/pynxtools/dataconverter/readers/em/subparsers/rsciio_velox.py b/pynxtools/dataconverter/readers/em/subparsers/rsciio_velox.py
index 78c36b29c..9672d78b9 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/rsciio_velox.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/rsciio_velox.py
@@ -21,18 +21,25 @@
 import numpy as np
 
 from typing import Dict, List
+from datetime import datetime
 from rsciio import emd
 from ase.data import chemical_symbols
 
-from pynxtools.dataconverter.readers.em.subparsers.rsciio_base import RsciioBaseParser
+from pynxtools.dataconverter.readers.em.subparsers.rsciio_base \
+    import RsciioBaseParser
 from pynxtools.dataconverter.readers.em.utils.rsciio_hyperspy_utils \
     import get_named_axis, get_axes_dims, get_axes_units
 from pynxtools.dataconverter.readers.shared.shared_utils \
     import get_sha256_of_file_content
+from pynxtools.dataconverter.readers.em.subparsers.rsciio_velox_concepts \
+    import NX_VELOX_TO_NX_EVENT_DATA_EM
+from pynxtools.dataconverter.readers.em.concepts.concept_mapper \
+    import variadic_path_to_specific_path
 
 REAL_SPACE = 0
 COMPLEX_SPACE = 1
 
+
 def all_req_keywords_in_dict(dct: dict, keywords: list) -> bool:
     """Check if dict dct has all keywords in keywords as keys from."""
     # falsifiable?
@@ -207,9 +214,96 @@ def content_resolver(self, obj: dict) -> str:
 
         return "n/a"
 
+    def add_various_event_metadata(self, orgmeta: fd.FlatDict,
+                                   identifier: list, template: dict) -> dict:
+        """Map various Velox/FEI-specific metadata on NeXus in event_data_em."""
+        if (len(identifier) != 3) or (not all(isinstance(x, int) for x in identifier)):
+            raise ValueError(f"Argument identifier {identifier} needs three int values!")
+        for tpl in NX_VELOX_TO_NX_EVENT_DATA_EM:
+            if not isinstance(tpl, tuple):
+                continue
+            if len(tpl) != 3:
+                continue
+            if tpl[1] == "ignore":
+                continue
+
+            trg = variadic_path_to_specific_path(tpl[0], identifier)
+            if tpl[1] == "is":
+                if self.verbose == True:
+                    print(f">>>> is >>>> tpl: {tpl}, trg: {trg}")
+                template[f"{trg}"] = tpl[2]
+            elif tpl[1] == "load_from":
+                if isinstance(tpl[2], str):
+                    if self.verbose == True:
+                        print(f">>>> load_from, str >>>> tpl: {tpl}, trg: {trg}")
+                    if tpl[2] in orgmeta:
+                        template[f"{trg}"] = orgmeta[tpl[2]]
+                elif isinstance(tpl[2], list) and all(isinstance(x, str) for x in tpl[2]):
+                    res = []
+                    for entry in tpl[2]:
+                        if entry in orgmeta:
+                            res.append(orgmeta[entry])
+                    if len(res) != len(tpl[2]):
+                        raise ValueError(">>>> load_from, list >>>> not all values found!")
+                    template[f"{trg}"] = np.asarray(res, np.float64)
+                    # TODO::add position information
+                else:
+                    raise ValueError(f">>>> load_from >>>> tpl[2] not a str {tpl[2]} !")
+            elif tpl[1] == "unix_to_iso8601":
+                if isinstance(tpl[2], str):
+                    if self.verbose == True:
+                        print(f">>>> unix_to_iso8601, str >>>> tpl: {tpl}, trg: {trg}")
+                    if tpl[2] in orgmeta:
+                        template[f"{trg}"] = datetime.fromtimestamp(
+                            int(orgmeta[tpl[2]])).isoformat()
+                        # TODO::is this really a UNIX timestamp, what about the timezone?
+            elif tpl[1] == "concatenate":
+                if isinstance(tpl[2], list) and all(isinstance(x, str) for x in tpl[2]):
+                    res = f""
+                    for idx in np.arange(0, len(tpl[2])):
+                        if tpl[2][idx] in orgmeta:
+                            res += f"{tpl[2][idx]}: {orgmeta[tpl[2][idx]]}, "
+                        else:
+                            continue
+                        template[f"{trg}"] = f"{res[0:len(res) - 2]}"
+            else:
+                # if self.verbose == True:
+                print(f"Found modifier {tpl[1]}")
+        return template
+
+    def add_lens_event_data(self, orgmeta: fd.FlatDict,
+                            identifier: list, template: dict) -> dict:
+        """Map lens-specific Velox/FEI metadata on NeXus NXlens_em instances."""
+        if (len(identifier) != 3) or (not all(isinstance(x, int) for x in identifier)):
+            raise ValueError(f"Argument identifier {identifier} needs three int values!")
+        trg = f"/ENTRY[entry{identifier[0]}]/measurement/event_data_em_set/EVENT_DATA_EM" \
+              f"[event_data_em{identifier[1]}]/em_lab/EBEAM_COLUMN[ebeam_column]"
+        lens_names = ["C1", "C2", "Diffraction", "Gun",
+                      "Intermediate", "MiniCondenser",
+                      "Objective", "Projector1", "Projector2"]
+        lens_idx = 1
+        for lens_name in lens_names:
+            toggle = False
+            if f"Optics/{lens_name}LensIntensity" in orgmeta:
+                template[f"{trg}/LENS_EM[lens_em{lens_idx}]/value"] \
+                    = orgmeta[f"Optics/{lens_name}LensIntensity"]
+                # TODO::unit?
+                toggle = True
+            if f"Optics/{lens_name}LensMode" in orgmeta:
+                template[f"{trg}/LENS_EM[lens_em{lens_idx}]/mode"] \
+                    = orgmeta[f"Optics/{lens_name}LensMode"]
+                toggle = True
+            if toggle == True:
+                template[f"{trg}/LENS_EM[lens_em{lens_idx}]/name"] \
+                    = f"{lens_name}"
+                lens_idx += 1
+        # Optics/GunLensSetting
+        return template
+
     def normalize_imgs_content(self, obj: dict, template: dict) -> dict:
         """Map generic scanned images (e.g. BF/DF) to NeXus."""
         meta = fd.FlatDict(obj["metadata"], "/")
+        orgmeta = fd.FlatDict(obj["original_metadata"], "/")
         dims = get_axes_dims(obj["axes"])
         if len(dims) != 2:
             raise ValueError(f"{obj['axes']}")
@@ -238,7 +332,11 @@ def normalize_imgs_content(self, obj: dict, template: dict) -> dict:
         template[f"{trg}/image_twod/intensity"] \
             = {"compress": np.asarray(obj["data"]), "strength": 1}
         # template[f"{trg}/image_twod/intensity/@units"]
-        # TODO::add metadata
+        self.add_various_event_metadata(orgmeta,
+            [self.entry_id, self.id_mgn["event"], self.id_mgn["event_img"]], template)
+        self.add_lens_event_data(orgmeta,
+            [self.entry_id, self.id_mgn["event"], self.id_mgn["event_img"]], template)
+        # TODO: add detector data
         self.id_mgn["event_img"] += 1
         self.id_mgn["event"] += 1
         return template
@@ -246,6 +344,7 @@ def normalize_imgs_content(self, obj: dict, template: dict) -> dict:
     def normalize_adf_content(self, obj: dict, template: dict) -> dict:
         """Map relevant (high-angle) annular dark field images to NeXus."""
         meta = fd.FlatDict(obj["metadata"], "/")
+        orgmeta = fd.FlatDict(obj["original_metadata"], "/")
         dims = get_axes_dims(obj["axes"])
         if len(dims) != 2:
             raise ValueError(f"{obj['axes']}")
@@ -274,8 +373,12 @@ def normalize_adf_content(self, obj: dict, template: dict) -> dict:
         template[f"{trg}/image_twod/intensity"] \
             = {"compress": np.asarray(obj["data"]), "strength": 1}
         # template[f"{trg}/image_twod/intensity/@units"]
+        self.add_various_event_metadata(orgmeta,
+            [self.entry_id, self.id_mgn["event"], self.id_mgn["event_img"]], template)
+        self.add_lens_event_data(orgmeta,
+            [self.entry_id, self.id_mgn["event"], self.id_mgn["event_img"]], template)
+        # TODO: add detector data
         # TODO::coll. angles given in original_metadata map to half_angle_interval
-        # TODO::add metadata
         self.id_mgn["event_img"] += 1
         self.id_mgn["event"] += 1
         return template
@@ -295,6 +398,7 @@ def normalize_diff_content(self, obj: dict, template: dict) -> dict:
         # can one map y, x, on j, i indices
         idx_map = {"y": "j", "x": "i"}
         meta = fd.FlatDict(obj["metadata"], "/")
+        orgmeta = fd.FlatDict(obj["original_metadata"], "/")
         dims = get_axes_dims(obj["axes"])
         if len(dims) != 2:
             raise ValueError(f"{obj['axes']}")
@@ -327,7 +431,10 @@ def normalize_diff_content(self, obj: dict, template: dict) -> dict:
         template[f"{trg}/image_twod/magnitude"] \
             = {"compress": np.asarray(obj["data"]), "strength": 1}
         # template[f"{trg}/image_twod/magnitude/@units"]
-        # TODO::add metadata
+        self.add_various_event_metadata(orgmeta,
+            [self.entry_id, self.id_mgn["event"], self.id_mgn["event_img"]], template)
+        self.add_lens_event_data(orgmeta,
+            [self.entry_id, self.id_mgn["event"], self.id_mgn["event_img"]], template)
         self.id_mgn["event_img"] += 1
         self.id_mgn["event"] += 1
         return template
@@ -335,6 +442,7 @@ def normalize_diff_content(self, obj: dict, template: dict) -> dict:
     def normalize_eds_spc_content(self, obj: dict, template: dict) -> dict:
         """Map relevant EDS spectrum/(a) to NeXus."""
         meta = fd.FlatDict(obj["metadata"], "/")
+        orgmeta = fd.FlatDict(obj["original_metadata"], "/")
         dims = get_axes_dims(obj["axes"])
         n_dims = None
         if dims == [('Energy', 0)]:
@@ -387,6 +495,10 @@ def normalize_eds_spc_content(self, obj: dict, template: dict) -> dict:
         template[f"{trg}/intensity"] \
             = {"compress": np.asarray(obj["data"]), "strength": 1}
         # template[f"{trg}/intensity/@long_name"] = ""
+        self.add_various_event_metadata(orgmeta,
+            [self.entry_id, self.id_mgn["event"], self.id_mgn["event_spc"]], template)
+        self.add_lens_event_data(orgmeta,
+            [self.entry_id, self.id_mgn["event"], self.id_mgn["event_spc"]], template)
         self.id_mgn["event_spc"] += 1
         self.id_mgn["event"] += 1
         return template
diff --git a/pynxtools/dataconverter/readers/em/subparsers/rsciio_velox_concepts.py b/pynxtools/dataconverter/readers/em/subparsers/rsciio_velox_concepts.py
index 6dda4dd8e..547d8e376 100644
--- a/pynxtools/dataconverter/readers/em/subparsers/rsciio_velox_concepts.py
+++ b/pynxtools/dataconverter/readers/em/subparsers/rsciio_velox_concepts.py
@@ -30,57 +30,63 @@
 # Concept names like Projector1Lens<term> and Projector2Lens<term> mean two different concept instances
 # of the same concept Projector*Lens<term> in NeXus this would become lens_em1(NXlens_em) name: projector, and field named <term>
 
-NX_VELOX_TO_NX_EM = []
+# ("/ENTRY[entry*]/measurement/event_data_em_set/EVENT_DATA_EM[event_data_em*]/em_lab/LENS_EM[lens_em*]/name", "is", "C1"),
+# ("/ENTRY[entry*]/measurement/event_data_em_set/EVENT_DATA_EM[event_data_em*]/em_lab/LENS_EM[lens_em*]/value", "load_from", "Optics/C1LensIntensity"),
+# ("/ENTRY[entry*]/", "load_from", "Optics/C2LensIntensity")
+# this can not work but has to be made explicit with an own function that is Velox MetadataSchema-version and NeXus NXem-schema-version-dependent for the lenses
 
-("/ENTRY[entry*]/", "to_iso8601", "Acquisition/AcquisitionStartDatetime/DateTime"),
-("/ENTRY[entry*]/", "load_from", "Acquisition/BeamType"),
-("/ENTRY[entry*]/", "load_from", "Acquisition/SourceType"),
-("/ENTRY[entry*]/", "load_from", "Core/MetadataDefinitionVersion"),
-("/ENTRY[entry*]/", "load_from", "Core/MetadataSchemaVersion"),
-("/ENTRY[entry*]/", "load_from", ["Detectors/Detector-*/CollectionAngleRange/begin", "Detectors/Detector-*/CollectionAngleRange/end"]),
-("/ENTRY[entry*]/", "load_from", "Detectors/Detector-*/DetectorName"),
-("/ENTRY[entry*]/", "load_from", "Detectors/Detector-*/DetectorType"),
-("/ENTRY[entry*]/", "load_from", "Detectors/Detector-*/Enabled"),
-("/ENTRY[entry*]/", "load_from", "Detectors/Detector-*/Inserted"),
-("/ENTRY[entry*]/", "load_from", "Instrument/ControlSoftwareVersion"),
-("/ENTRY[entry*]/", "load_from", "Instrument/InstrumentId"),
-("/ENTRY[entry*]/", "load_from", "Instrument/InstrumentModel"),
-("/ENTRY[entry*]/", "load_from", "Instrument/Manufacturer"),
-("/ENTRY[entry*]/", "load_from", "Optics/AccelerationVoltage"),
-("/ENTRY[entry*]/", "load_from", "Optics/Apertures/Aperture-*/Diameter"),
-("/ENTRY[entry*]/", "load_from", "Optics/Apertures/Aperture-*/Enabled"),
-("/ENTRY[entry*]/", "load_from", "Optics/Apertures/Aperture-*/Name"),
-("/ENTRY[entry*]/", "load_from", "Optics/Apertures/Aperture-*/Type"),
-("/ENTRY[entry*]/", "load_from", "Optics/BeamConvergence"),
-("/ENTRY[entry*]/", "load_from", "Optics/C1LensIntensity"),
-("/ENTRY[entry*]/", "load_from", "Optics/C2LensIntensity"),
-("/ENTRY[entry*]/", "load_from", "Optics/CameraLength"),
-("/ENTRY[entry*]/", "load_from", "Optics/Defocus"),
-("/ENTRY[entry*]/", "load_from", "Optics/DiffractionLensIntensity"),
-("/ENTRY[entry*]/", "load_from", "Optics/EFTEMOn"),
-("/ENTRY[entry*]/", "load_from", "Optics/ExtractorVoltage"),
-("/ENTRY[entry*]/", "load_from", "Optics/Focus"),
-("/ENTRY[entry*]/", "load_from", "Optics/FullScanFieldOfView/x"),
-("/ENTRY[entry*]/", "load_from", "Optics/FullScanFieldOfView/y"),
-("/ENTRY[entry*]/", "load_from", "Optics/GunLensSetting"),
-("/ENTRY[entry*]/", "load_from", "Optics/HighMagnificationMode"),
-("/ENTRY[entry*]/", "load_from", "Optics/IlluminationMode"),
-("/ENTRY[entry*]/", "load_from", "Optics/IntermediateLensIntensity"),
-("/ENTRY[entry*]/", "load_from", "Optics/LastMeasuredScreenCurrent"),
-("/ENTRY[entry*]/", "load_from", "Optics/MiniCondenserLensIntensity"),
-("/ENTRY[entry*]/", "load_from", "Optics/NominalMagnification"),
-("/ENTRY[entry*]/", "load_from", "Optics/ObjectiveLensIntensity"),
-("/ENTRY[entry*]/", "load_from", "Optics/ObjectiveLensMode"),
-("/ENTRY[entry*]/", "load_from", "Optics/OperatingMode"),
-("/ENTRY[entry*]/", "load_from", "Optics/ProbeMode"),
-("/ENTRY[entry*]/", "load_from", "Optics/Projector1LensIntensity"),
-("/ENTRY[entry*]/", "load_from", "Optics/Projector2LensIntensity"),
-("/ENTRY[entry*]/", "load_from", "Optics/ProjectorMode"),
-("/ENTRY[entry*]/", "load_from", "Optics/SpotIndex"),
-("/ENTRY[entry*]/", "load_from", "Optics/StemFocus"),
-("/ENTRY[entry*]/", "load_from", "Optics/TemOperatingSubMode"),
-("/ENTRY[entry*]/", "load_from", "Sample"),
-("/ENTRY[entry*]/", "load_from", "Scan/DwellTime"),
-("/ENTRY[entry*]/", "load_from", "Stage/AlphaTilt"),
-("/ENTRY[entry*]/", "load_from", "Stage/BetaTilt"),
-("/ENTRY[entry*]/", "load_from", ["Stage/Position/x", "Stage/Position/y", "Stage/Position/z"])]
+NX_VELOX_TO_NX_EVENT_DATA_EM = [("/ENTRY[entry*]/measurement/event_data_em_set/EVENT_DATA_EM[event_data_em*]/start_time", "unix_to_iso8601", "Acquisition/AcquisitionStartDatetime/DateTime"),
+                                ("/ENTRY[entry*]/measurement/em_lab/EBEAM_COLUMN[ebeam_column]/electron_source/emitter_type", "load_from", "Acquisition/SourceType"),
+                                ("/ENTRY[entry*]/measurement/em_lab/EBEAM_COLUMN[ebeam_column]/electron_source/probe", "is", "electron"),
+                                ("", "ignore", "Core/MetadataDefinitionVersion"),
+                                ("", "ignore", "Core/MetadataSchemaVersion"),
+                                ("has to be loaded explicitly as not always equally relevant", "ignore", ["Detectors/Detector-*/CollectionAngleRange/begin", "Detectors/Detector-*/CollectionAngleRange/end"]),
+                                ("", "ignore", "Detectors/Detector-*/DetectorName"),
+                                ("", "ignore", "Detectors/Detector-*/DetectorType"),
+                                ("", "ignore", "Detectors/Detector-*/Enabled"),
+                                ("", "ignore", "Detectors/Detector-*/Inserted"),
+                                ("/ENTRY[entry*]/measurement/em_lab/control_program/program", "is", "Not reported in original_metadata parsed from Velox EMD using rosettasciio"),
+                                ("/ENTRY[entry*]/measurement/em_lab/control_program/program/@version", "load_from", "Instrument/ControlSoftwareVersion"),
+                                ("/ENTRY[entry*]/measurement/em_lab/FABRICATION[fabrication]/identifier", "load_from", "Instrument/InstrumentId"),
+                                ("/ENTRY[entry*]/measurement/em_lab/FABRICATION[fabrication]/model", "load_from", "Instrument/InstrumentModel"),
+                                ("/ENTRY[entry*]/measurement/em_lab/FABRICATION[fabrication]/vendor", "load_from", "Instrument/Manufacturer"),
+                                ("/ENTRY[entry*]/measurement/event_data_em_set/EVENT_DATA_EM[event_data_em*]/em_lab/EBEAM_COLUMN[ebeam_column]/electron_source/voltage", "load_from", "Optics/AccelerationVoltage"),
+                                ("", "ignore", "Optics/Apertures/Aperture-*/Diameter"),
+                                ("", "ignore", "Optics/Apertures/Aperture-*/Enabled"),
+                                ("", "ignore", "Optics/Apertures/Aperture-*/Name"),
+                                ("", "ignore", "Optics/Apertures/Aperture-*/Type"),
+                                ("", "ignore", "Optics/BeamConvergence"),
+                                ("", "ignore", "Optics/C1LensIntensity"),
+                                ("", "ignore", "Optics/C2LensIntensity"),
+                                ("/ENTRY[entry*]/measurement/event_data_em_set/EVENT_DATA_EM[event_data_em*]/em_lab/OPTICAL_SYSTEM_EM[optical_system_em]/camera_length", "load_from", "Optics/CameraLength"),
+                                ("/ENTRY[entry*]/measurement/event_data_em_set/EVENT_DATA_EM[event_data_em*]/em_lab/OPTICAL_SYSTEM_EM[optical_system_em]/defocus", "load_from", "Optics/Defocus"),
+                                ("", "ignore", "Optics/DiffractionLensIntensity"),
+                                ("place in optical_system_em", "ignore", "Optics/EFTEMOn"),
+                                ("place in electron_source in event data", "ignore", "Optics/ExtractorVoltage"),
+                                ("place in optical_system_em", "ignore", "Optics/Focus"),
+                                ("place in optical_system_em", "ignore", "Optics/FullScanFieldOfView/x"),
+                                ("place in optical_system_em", "ignore", "Optics/FullScanFieldOfView/y"),
+                                ("", "ignore", "Optics/GunLensSetting"),
+                                ("place in optical_system_em", "ignore", "Optics/HighMagnificationMode"),
+                                ("place in optical_system_em", "ignore", "Optics/IlluminationMode"),
+                                ("", "ignore", "Optics/IntermediateLensIntensity"),
+                                ("place in optical_system_em", "ignore", "Optics/LastMeasuredScreenCurrent"),
+                                ("", "ignore", "Optics/MiniCondenserLensIntensity"),
+                                ("/ENTRY[entry*]/measurement/event_data_em_set/EVENT_DATA_EM[event_data_em*]/em_lab/OPTICAL_SYSTEM_EM[optical_system_em]/magnification", "load_from", "Optics/NominalMagnification"),
+                                ("", "ignore", "Optics/ObjectiveLensIntensity"),
+                                ("", "ignore", "Optics/ObjectiveLensMode"),
+                                ("/ENTRY[entry*]/measurement/event_data_em_set/EVENT_DATA_EM[event_data_em*]/em_lab/EBEAM_COLUMN[ebeam_column]/operation_mode", "concatenate", ["Optics/OperatingMode",
+                                                                                                                                                                                "Optics/TemOperatingSubMode"]),
+                                ("", "ignore", "Optics/Projector1LensIntensity"),
+                                ("", "ignore", "Optics/Projector2LensIntensity"),
+                                ("", "ignore", "Optics/ProjectorMode"),
+                                ("", "ignore", "Optics/SpotIndex"),
+                                ("", "ignore", "Optics/StemFocus"),
+                                ("", "ignore", "Sample"),
+                                ("/ENTRY[entry*]/measurement/event_data_em_set/EVENT_DATA_EM[event_data_em*]/em_lab/SCANBOX_EM[scanbox_em]/dwell_time", "load_from", "Scan/DwellTime"),
+                                ("/ENTRY[entry*]/measurement/event_data_em_set/EVENT_DATA_EM[event_data_em*]/em_lab/STAGE_LAB[stage_lab]/design", "load_from", "Stage/HolderType"),
+                                ("/ENTRY[entry*]/measurement/event_data_em_set/EVENT_DATA_EM[event_data_em*]/em_lab/STAGE_LAB[stage_lab]/tilt1", "load_from", "Stage/AlphaTilt"),
+                                ("/ENTRY[entry*]/measurement/event_data_em_set/EVENT_DATA_EM[event_data_em*]/em_lab/STAGE_LAB[stage_lab]/tilt2", "load_from", "Stage/BetaTilt"),
+                                ("/ENTRY[entry*]/measurement/event_data_em_set/EVENT_DATA_EM[event_data_em*]/em_lab/STAGE_LAB[stage_lab]/position", "load_from", ["Stage/Position/x",
+                                                                                                                                                                  "Stage/Position/y",
+                                                                                                                                                                  "Stage/Position/z"])]

From 84b473d248877efa3b19f570da660d4677723acd Mon Sep 17 00:00:00 2001
From: mkuehbach <markus.kuehbach@physik.hu-berlin.de>
Date: Tue, 23 Jan 2024 10:28:39 +0100
Subject: [PATCH 84/84] Hooked in the correct NeXus definitions branch to use
 for the em_refactoring feature branch

---
 pynxtools/definitions | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pynxtools/definitions b/pynxtools/definitions
index 999837671..804152347 160000
--- a/pynxtools/definitions
+++ b/pynxtools/definitions
@@ -1 +1 @@
-Subproject commit 999837671373b962fed932829becd42acb7482f6
+Subproject commit 80415234701249690d86f34db5fd26fa4da6d154