FAIRmat-NFDI · mkuehbach · Jul 5, 2022 · Jul 4, 2022 · Jul 5, 2022
diff --git a/nexusparser/tools/dataconverter/readers/apm/reader.py b/nexusparser/tools/dataconverter/readers/apm/reader.py
diff --git a/nexusparser/tools/dataconverter/readers/apm/utils/aptfim_io_apt6_headers.py b/nexusparser/tools/dataconverter/readers/apm/utils/aptfim_io_apt6_headers.py
@@ -24,9 +24,8 @@
 
 import numpy as np
 
-# from readers.nx_apm_utils.aptfim_io_apt6_utils import np_uint16_to_string
 from nexusparser.tools.dataconverter.readers.apm.utils.aptfim_io_apt6_utils \
-    import string_to_typed_nparray
+    import np_uint16_to_string, string_to_typed_nparray
 
 
 class AptFileHeaderMetadata():

diff --git a/nexusparser/tools/dataconverter/readers/apm/utils/aptfim_io_apt6_reader.py b/nexusparser/tools/dataconverter/readers/apm/utils/aptfim_io_apt6_reader.py
@@ -119,21 +119,33 @@ def parse_file_structure(self):
                 keyword = np_uint16_to_string(
                     found_section['wcSectionType'][0])
 
+                print(keyword)
+                print(found_section)
                 assert keyword not in self.available_sections.keys(), \
                     'Found a duplicate of an already parsed section! Please \
                     contact the development team as we have never encountered \
                     an example of such a section duplication and here seems \
                     to be an example to inspect the matter.'
-                assert keyword in EXPECTED_SECTIONS.keys(), \
-                    'Found an unknown section, seems like an unknown/new \
-                    branch! Please contact the development team to enable us \
-                    to contact AMETEK and discuss the situation.'
-
-                metadata_section = EXPECTED_SECTIONS[keyword]
-                assert metadata_section.matches(found_section), \
-                    'Found an uninterpretable section! Please contact the \
-                    development team to help us fixing this.'
-                self.available_sections[keyword] = metadata_section
+
+                if keyword not in ['Delta Pulse', 'Epos ToF']:
+                    assert keyword in EXPECTED_SECTIONS.keys(), \
+                        'Found an unknown section, seems like an unknown/new \
+                        branch! Please contact the development team to enable us \
+                        to contact AMETEK and discuss the situation.'
+
+                    metadata_section = EXPECTED_SECTIONS[keyword]
+                    if metadata_section.matches(found_section) is True:
+                        # assert metadata_section.matches(found_section), \
+                        #     'Found an uninterpretable section! Please contact the \
+                        #     development team to help us fixing this.'
+                        self.available_sections[keyword] = metadata_section
+                else:
+                    print('WARNING:: Found an uninterpretable section!')
+                    print('WARNING:: This section was not be registered!')
+                    print('WARNING:: Please contact the development team!')
+                    print('WARNING::     to help us improving this!')
+                    print('WARNING:: Try to continue parsing though...!')
+                    print('llByteCount ' + str(found_section['llByteCount'][0]))
 
                 self.byte_offsets[keyword] = np.uint64(file_handle.tell())
                 if keyword == 'Position':

diff --git a/nexusparser/tools/dataconverter/readers/apm/utils/aptfim_io_apt6_sections_branches.py b/nexusparser/tools/dataconverter/readers/apm/utils/aptfim_io_apt6_sections_branches.py
@@ -688,9 +688,15 @@
 # EXPECTED_SECTIONS['Var44'].set_section_name('Var44')
 
 
-# deprecated sections
-# EXPECTED_SECTIONS['Vref'] = AptFileSectionMetadata()  # now 'Voltage'
-
+# deprecated sections or sections with detected inconsistencies across versions
+# Vref vs Voltage branch issue
+EXPECTED_SECTIONS['Vref'] = EXPECTED_SECTIONS['Voltage']
+EXPECTED_SECTIONS['Vref'].set_wc_data_unit('V')
+EXPECTED_SECTIONS['Vref'].set_accepted_units(['V'])
+
+# pulseDelta vs Delta Pulse issue
+# at least in one case a section Delta Pulse appeared
+# at least in one case a section Epos ToF appeared
 
 # other comments and issues
 # Need to check APSuite version and build number

diff --git a/nexusparser/tools/dataconverter/readers/apm/utils/aptfim_io_apt6_utils.py b/nexusparser/tools/dataconverter/readers/apm/utils/aptfim_io_apt6_utils.py
@@ -23,8 +23,6 @@
 # pylint: disable=E1101
 
 
-# from .nomad4exp_process_aptfim_utils import *
-
 import numpy as np
 
 

diff --git a/nexusparser/tools/dataconverter/readers/apm/utils/aptfim_io_rng_reader.py b/nexusparser/tools/dataconverter/readers/apm/utils/aptfim_io_rng_reader.py
@@ -149,7 +149,7 @@ def read(self):
                 self.rng['ions'][keyword].name = NxField(keyword, None)
                 self.rng['ions'][keyword].charge_state = \
                     NxField(np.int32(0), '')
-                # RNG files do not store charge state
+                # RNG files do not store charge state and isotopes explicitly
                 self.rng['ions'][keyword].isotope_vector = \
                     NxField(hashvector, None)
 

diff --git a/nexusparser/tools/dataconverter/readers/apm/utils/aptfim_io_rrng_reader.py b/nexusparser/tools/dataconverter/readers/apm/utils/aptfim_io_rrng_reader.py
@@ -27,7 +27,8 @@
 import numpy as np
 
 from nexusparser.tools.dataconverter.readers.apm.utils.aptfim_io_utils \
-    import NxField, NxIon, significant_range, create_isotope_vector, isotope_vector_to_dict_keyword
+    import NxField, NxIon, significant_range, create_isotope_vector, \
+    isotope_vector_to_dict_keyword
 
 
 def evaluate_rrng_range_line(i: int, line: str, ion_type_names: list) -> dict:
@@ -170,7 +171,7 @@ def read(self):
                 self.rrng['ions'][keyword].name = NxField(keyword, None)
                 self.rrng['ions'][keyword].charge_state = \
                     NxField(np.int32(0), '')
-                # RRNG files do not store charge state
+                # RRNG files do not store charge state and isotopes explicitly
                 self.rrng['ions'][keyword].isotope_vector = \
                     NxField(hashvector, None)
 

diff --git a/nexusparser/tools/dataconverter/readers/apm/utils/aptfim_io_utils.py b/nexusparser/tools/dataconverter/readers/apm/utils/aptfim_io_utils.py
@@ -1,6 +1,11 @@
 #!/usr/bin/env python3
 """Set of utility tools for parsing file formats used by atom probe."""
 
+# Also convenience functions are included which translate human-readable ion
+# names into the isotope_vector description proposed by Kuehbach et al. in
+# DOI: 10.1017/S1431927621012241 to the human-readable ion names which are use
+# in P. Felfer et al.'s atom probe toolbox
+
 # -*- coding: utf-8 -*-
 #
 # Copyright The NOMAD Authors.
@@ -22,13 +27,17 @@
 
 # pylint: disable=E1101
 
+import re
+
 from typing import Tuple
 
 import mmap
 
 import numpy as np
 
 from ase.data import atomic_numbers
+from ase.data import chemical_symbols
+from ase.data.isotopes import download_isotope_data
 
 # restrict the number distinguished ion types
 MAX_NUMBER_OF_ION_SPECIES = 256
@@ -113,6 +122,105 @@ def create_isotope_vector(building_blocks: list) -> np.ndarray:
     return retval
 
 
+def charge_estimation_heuristics(ivec, mleft, mright, sign: str) -> np.int32:
+    """Estimate molecular ion charge based on isotopes and associated range."""
+    # estimate the charge of a molecular ion given its range
+    # assume molecular ion mass is additive based on individual isotope mass
+    # assume mass-to-charge-state-ratio interval [mleft, mright] is reasonably
+    # centered to make an integer estimation
+
+    # the below code is too simplistic because in general a molecular ion
+    # is the following 1d array
+    # (a_i)^El_i, a_i is a positive integer for an isotope, El an element
+    # 2* \sum_i=0^i=j (a_i)^El_i ) / delta_mass \approximately an int \in [1, 7]
+    # with j number of isotopes/atoms in the molecular ion
+    # the problem is that this is underconstraint equation for j > 1
+    # so especially for atoms with different isotope combinations and hydrogen
+    # or small Z element isotopes added there is uncertainty and missing clarity
+
+    # a test case
+    # ivec = np.array([0] * MAX_NUMBER_OF_ATOMS_PER_ION, dtype=np.uint16)
+    # ivec[0] = hash_isotope(75, 185-75)
+    # ivec[1] = hash_isotope(75, 187-75)
+    # ivec[2] = hash_isotope(1, 3-1)
+    # mleft = 186.2510
+    # mright = 186.6570
+    # sign = 'positive'
+
+    isotopes = download_isotope_data()
+    accumulated_mass = 0.
+    for hashvalue in ivec:
+        if hashvalue != 0:
+            protons, neutrons = unhash_isotope(int(hashvalue))
+            # get the mass of this isotope
+            # print('Isotope ' + str(protons) + ', ' + str(neutrons))
+            # print('Mass ' + str(isotopes[int(protons)][int(protons + neutrons)]['mass']))
+            accumulated_mass += isotopes[int(protons)][int(protons + neutrons)]['mass']
+        else:
+            break  # ivec is always sorted in descending order
+    # print('accumulated mass ' + str(accumulated_mass))
+    charge = np.int32(round(2. * accumulated_mass / (mleft + mright)))
+    assert charge >= 1 and charge <= 7, \
+        'charge estimated out of reasonable bounds!'
+    return charge
+
+
+def ascii_to_paraprobe_iontype(building_blocks: list) -> np.ndarray:
+    """Create a formatted isotope hashvalue list for paraprobe."""
+    # equivalent to translating iontype names from felfer 2 paraprobe notation
+    assert isinstance(building_blocks, list), \
+        'Building blocks needs to be a list !'
+
+    if building_blocks == []:  # special case unknown ion type
+        return np.array([0] * MAX_NUMBER_OF_ATOMS_PER_ION, dtype=np.uint16)
+
+    hashvector = []
+    for block in building_blocks:
+        assert isinstance(block, str), \
+            'block needs to be a string !'
+        # check if the given string represents at all an element
+        tmp = re.findall(r"([A-Z]{1})([a-z]{1})?", block)
+        assert tmp != [], \
+            'block does not seem to specify a string representing an element !'
+        element_name = tmp[0][0] + tmp[0][1]
+        # check if a preceeding isotope number is present
+        tmp = re.findall(r"^(\d+)", block)
+        if tmp == []:
+            mass_number = int(0)
+        else:
+            mass_number = int(tmp[0])
+        # check for eventual preceeding multiplier e.g. H2 meaning two H atoms
+        tmp = re.findall(r"(\d+)$", block)
+        if tmp == []:
+            multiplier = 1
+        else:
+            multiplier = int(tmp[0])
+
+        if element_name in atomic_numbers.keys():
+            proton_number = atomic_numbers[element_name]
+            if mass_number == 0:
+                neutron_number = 0
+            else:
+                neutron_number = mass_number - proton_number
+            for i in np.arange(0, multiplier):
+                hashvector.append(hash_isotope(proton_number, neutron_number))
+        else:
+            print('WARNING: Block does not specify a unique element name !')
+            print('WARNING: Importing user-defined iontypes not supported !')
+            # special case user_defined_type
+            return np.array([0] * MAX_NUMBER_OF_ATOMS_PER_ION, dtype=np.uint16)
+
+    assert len(hashvector) <= MAX_NUMBER_OF_ATOMS_PER_ION, \
+        'More than ' + MAX_NUMBER_OF_ATOMS_PER_ION \
+        + ' atoms in the molecular ion is currently not supported !'
+
+    hashvector = np.asarray(hashvector, np.uint16)
+    hashvector = np.sort(hashvector, kind='stable')[::-1]
+    retval = np.array([0] * MAX_NUMBER_OF_ATOMS_PER_ION, dtype=np.uint16)
+    retval[0:len(hashvector)] = hashvector
+    return retval
+
+
 def isotope_vector_to_dict_keyword(uint16_array: np.ndarray) -> str:
     """Create keyword for dictionary from isotope_vector."""
     lst = []
@@ -186,10 +294,38 @@ def get_unit(self):
 class NxIon():
     """Representative of a NeXus base class NXion."""
 
-    def __init__(self):
+    def __init__(self, *args, **kwargs):
         self.ion_type = NxField('', '')
-        self.isotope_vector = NxField(np.empty(0, np.uint16), '')
-        self.charge_state = NxField(np.int32(0), '')
+        self.isotope_vector = NxField(ascii_to_paraprobe_iontype([]), '')
+        if len(args) >= 1:
+            assert isinstance(args[0], list), 'args[0] needs to be a list !'
+            self.isotope_vector \
+                = NxField(ascii_to_paraprobe_iontype(args[0]), '')
+        elif 'isotope_vector' in kwargs.keys():
+            assert isinstance(kwargs['isotope_vector'], np.ndarray), \
+                'kwargs isotope_vector needs to be an np.ndarray !'
+            assert len(kwargs['isotope_vector']) \
+                == MAX_NUMBER_OF_ATOMS_PER_ION, \
+                'kwargs isotope_vector needs to have ' \
+                + str(MAX_NUMBER_OF_ATOMS_PER_ION) + ' entries !'
+            self.isotope_vector \
+                = NxField(np.asarray(kwargs['isotope_vector'], np.uint16), '')
+        # else:
+        #     assert True is False, \
+        #        'Give either a list of isotopes, \
+        #        or an isotope vector as a keyword argument !'
+        self.charge_state = NxField(np.int32(0), 'eV')
+        # if len(args) == 2:
+        #     assert isinstance(args[1], int), 'args[1] needs to be an integer !'
+        #    self.charge_state = NxField(np.int32(args[1], 'eV'))
+        if 'charge_state' in kwargs.keys():
+            assert isinstance(kwargs['charge_state'], int), \
+                'kwargs charge_state needs to be an int !'
+            assert kwargs['charge_state'] > -8, \
+                'kwargs charge_state needs to be at least -7 !'
+            assert kwargs['charge_state'] < +8, \
+                'kwargs charge_state needs to be at most +7 !'
+            self.charge_state = NxField(np.int32(kwargs['charge_state']), 'eV')
         self.name = NxField('', '')
         self.ranges = NxField(np.empty((0, 2), np.float64), 'amu')
 
@@ -205,11 +341,24 @@ def add_range(self, mqmin: np.float64, mqmax: np.float64):
 
     def get_human_readable_name(self):
         """Get human-readable name from isotop_vector."""
+        # equivalent to paraprobe 2 felfer notation
         # NEW ISSUE: how to display the isotope_vector in LaTeX notation?
-        return self.name.value
-
-
-# a = NxIon()
-# a.add_range(1., 2.)
-# a.add_range(2.2, 3.)
-# a.add_range(0.1, 0.99)
+        human_readable = ''
+        for hash_value in self.isotope_vector.value:
+            if hash_value > 0:
+                protons, neutrons = unhash_isotope(int(hash_value))
+                if neutrons > 0:
+                    human_readable += str(protons + neutrons) \
+                        + chemical_symbols[protons]
+                else:
+                    human_readable += chemical_symbols[protons]
+                human_readable += ' '
+            else:
+                break
+        if self.charge_state.value > 0:
+            human_readable += '+' * self.charge_state.value
+        elif self.charge_state.value < 0:
+            human_readable += '-' * -self.charge_state.value
+        else:
+            human_readable = human_readable[0:-1]
+        return human_readable
diff --git a/requirements.txt b/requirements.txt
@@ -16,6 +16,7 @@ pandas==1.3.5
 odfpy==1.4.1
 ase==3.19.0
 structlog==21.5.0
+flatdict==4.0.1
 
 # [nomad]
 nomad-lab>=1.1.1
diff --git a/tests/data/tools/dataconverter/readers/apm/Apm.NeXus.Apm.Example.1.ipynb b/tests/data/tools/dataconverter/readers/apm/Apm.NeXus.Apm.Example.1.ipynb
@@ -38,7 +38,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "! pip install --upgrade nodejs && pip install ipywidgets h5py==3.5.0 h5glance==0.7 h5grove==0.0.14 jupyterlab[full]==3.2.9 jupyterlab_h5web[full]==1.3.0 punx==0.2.5 nexpy==0.14.1 silx[full]==1.0.0 && jupyter lab build"
+    "#! pip install --upgrade nodejs && pip install ipywidgets h5py==3.5.0 h5glance==0.7 h5grove==0.0.14 jupyterlab[full]==3.2.9 jupyterlab_h5web[full]==1.3.0 punx==0.2.5 nexpy==0.14.1 silx[full]==1.0.0 && jupyter lab build",
+    "#! pip install --upgrade nodejs && pip install ipykernel=6.15.0 sphinx=5.0.2"
    ]
   },
   {
Original file line number	Diff line number	Diff line change
Expand Up		@@ -23,8 +23,6 @@
		# pylint: disable=E1101


		# from .nomad4exp_process_aptfim_utils import *

		import numpy as np


Expand Down