8000 MPES reader changes from mpes refactoring by domna · Pull Request #203 · FAIRmat-NFDI/pynxtools · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

MPES reader changes from mpes refactoring #203

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 42 commits into from
Jan 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
9f8fe41
Definitions upgraded to latest mpes-refactor
domna Jan 8, 2024
b86a5fe
Adapt vscode settings and update definitions, again
domna Jan 11, 2024
781826b
Updates definitions
domna Jan 17, 2024
26841e4
Updates config file
domna Jan 17, 2024
872fa05
Use nested config file structure
domna Jan 17, 2024
9a3246b
flatten_json cleaned up
domna Jan 17, 2024
4d469fa
Updates definitions
domna Jan 17, 2024
6e56b02
Renames DATA[data] to just data
domna Jan 18, 2024
e72f25e
Use beam_TYPE and source_TYPE
domna Jan 18, 2024
fa4e27f
Add flag to reatin link dicts for flatten_json
domna Jan 18, 2024
669dc52
Properly resolve upper/lower case group fields
domna Jan 18, 2024
faa2792
Fixes a lookup error when keys have been filled in template with a di…
sherjeelshabih Jan 18, 2024
7514c1d
Fixes the case where the reader sends back a template entry with the …
sherjeelshabih Jan 18, 2024
fc4b034
Correct coeffs path for raw data file
domna Jan 18, 2024
57a3900
Fix path in data dict test
domna Jan 18, 2024
c9142e9
Update mpes reference file
domna Jan 18, 2024
adb6926
Updates reference nexus log
domna Jan 18, 2024
7c07f35
Merge branch 'master' into mpes-reader-update
domna Jan 18, 2024
f890df0
Fix nexus regression file
domna Jan 18, 2024
d8a8b36
Don't use removeprefix to support py3.8
domna Jan 18, 2024
96e40a1
Move pressure_gauge to correct location
domna Jan 18, 2024
efb56e8
Corrects link to pressure_gauge
domna Jan 18, 2024
5b45780
Updates reference mpes test log
domna Jan 18, 2024
fa1db7d
Updates path for eln mapping
domna Jan 18, 2024
27fe1b8
Fixes eln mapping
domna Jan 18, 2024
343cd54
Skip test for xps reader
domna Jan 18, 2024
79d845b
Fix typos
domna Jan 18, 2024
9200ef3
Updates generated eln files
domna Jan 18, 2024
5f25fb2
Fix undocumented @units if key is documented
domna Jan 18, 2024
eda5262
Updates mpes reference file
domna Jan 18, 2024
9140ff3
Removed applied fields in calibrations
domna Jan 19, 2024
5ba4f66
Removed photon_energies from source
domna Jan 19, 2024
3a3f86f
Nest electronanalyser under instrument
domna Jan 19, 2024
31f71fe
Use momentum_resolution and spatial_resolution as named concepts
domna Jan 19, 2024
e780737
Fix eln mapping for spatial and momentum resolution in electronanalyser
domna Jan 19, 2024
44d94e7
Updates mpes reference file
domna Jan 19, 2024
689a0ee
Renamings
domna Jan 19, 2024
116f099
Renamings in NXsample
domna Jan 19, 2024
279ba0e
Properly deal with undocumented @units as discussed w/ @sherjeelshabih
domna Jan 19, 2024
3afb459
Updates mpes reference file
domna Jan 19, 2024
3b04d23
Correctly map chemical formula
domna Jan 19, 2024
73728f7
Adds additional mappings
domna Jan 19, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Lo 8000 ading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
"[python]": {
"editor.formatOnSave": true,
"editor.codeActionsOnSave": {
"source.fixAll": true,
"source.organizeImports": true
"source.fixAll": "explicit",
"source.organizeImports": "explicit"
},
"editor.defaultFormatter": "charliermarsh.ruff"
},
Expand Down
36 changes: 31 additions & 5 deletions pynxtools/dataconverter/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@
"""Helper functions commonly used by the convert routine."""

import json
import re
import logging
import re
import xml.etree.ElementTree as ET
from datetime import datetime, timezone
from typing import Any, Callable, List, Optional, Tuple, Union
Expand Down Expand Up @@ -386,11 +386,20 @@ def is_valid_data_field(value, nxdl_type, path):
return value


def path_in_data_dict(nxdl_path: str, data: dict) -> Tuple[bool, str]:
def path_in_data_dict(nxdl_path: str, hdf_path: str, data: dict) -> Tuple[bool, str]:
"""Checks if there is an accepted variation of path in the dictionary & returns the path."""
accepted_unfilled_key = None
for key in data.keys():
if nxdl_path == convert_data_converter_dict_to_nxdl_path(key):
if (
nxdl_path == convert_data_converter_dict_to_nxdl_path(key)
or convert_data_dict_path_to_hdf5_path(key) == hdf_path
):
if data[key] is None:
accepted_unfilled_key = key
cont 8000 inue
return True, key
if accepted_unfilled_key:
return True, accepted_unfilled_key
return False, None


Expand Down Expand Up @@ -435,7 +444,12 @@ def all_required_children_are_set(optional_parent_path, data, nxdl_root):
if (
nxdl_key[0 : nxdl_key.rfind("/")] == optional_parent_path
and is_node_required(nxdl_key, nxdl_root)
and data[key] is None
and data[
path_in_data_dict(
nxdl_key, convert_data_dict_path_to_hdf5_path(key), data
)[1]
]
is None
):
return False

Expand Down Expand Up @@ -497,7 +511,9 @@ def ensure_all_required_fields_exist(template, data, nxdl_root):
if entry_name == "@units":
continue
nxdl_path = convert_data_converter_dict_to_nxdl_path(path)
is_path_in_data_dict, renamed_path = path_in_data_dict(nxdl_path, data)
is_path_in_data_dict, renamed_path = path_in_data_dict(
nxdl_path, convert_data_dict_path_to_hdf5_path(path), data
)

renamed_path = path if renamed_path is None else renamed_path
if path in template["lone_groups"]:
Expand Down Expand Up @@ -529,6 +545,16 @@ def try_undocumented(data, nxdl_root: ET.Element):
nxdl_path = convert_data_converter_dict_to_nxdl_path(path)

if entry_name == "@units":
field_path = path.rsplit("/", 1)[0]
if field_path in data.get_documented() and path in data.undocumented:
field_requiredness = get_required_string(
nexus.get_node_at_nxdl_path(
nxdl_path=convert_data_converter_dict_to_nxdl_path(field_path),
elem=nxdl_root,
)
)
data[field_requiredness][path] = data.undocumented[path]
del data.undocumented[path]
continue

if entry_name[0] == "@" and "@" in nxdl_path:
Expand Down
43 changes: 30 additions & 13 deletions pynxtools/dataconverter/readers/mpes/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,20 @@
#
"""MPES reader implementation for the DataConverter."""
import errno
import json
import os
from functools import reduce
from typing import Any
from typing import Tuple
from typing import Any, Tuple

import h5py
import xarray as xr
import yaml

from pynxtools.dataconverter.readers.base.reader import BaseReader
from pynxtools.dataconverter.readers.utils import flatten_and_replace, FlattenSettings
from pynxtools.dataconverter.readers.utils import (
FlattenSettings,
flatten_and_replace,
parse_flatten_json,
)

DEFAULT_UNITS = {
"X": "step",
Expand Down Expand Up @@ -160,19 +162,35 @@ def iterate_dictionary(dic, key_string):
"Instrument": "INSTRUMENT[instrument]",
"Analyzer": "ELECTRONANALYSER[electronanalyser]",
"Manipulator": "MANIPULATOR[manipulator]",
"Beam": "BEAM[beam]",
"Beam": "beam_TYPE[beam]",
"unit": "@units",
"Sample": "SAMPLE[sample]",
"Source": "SOURCE[source]",
"Source": "source_TYPE[source]",
"User": "USER[user]",
"energy_resolution": "energy_resolution/resolution",
"momentum_resolution": "RESOLUTION[momentum_resolution]/resolution",
"temporal_resolution": "RESOLUTION[temporal_resolution]/resolution",
"spatial_resolution": "RESOLUTION[spatial_resolution]/resolution",
"sample_temperature": "temperature_sensor/value",
}

REPLACE_NESTED = {
"SOURCE[source]/Probe": "SOURCE[source]",
"SOURCE[source]/Pump": "SOURCE[source_pump]",
"BEAM[beam]/Probe": "BEAM[beam]",
"BEAM[beam]/Pump": "BEAM[beam_pump]",
"sample_history": "sample_history/description",
"SAMPLE[sample]/chemical_formula": "SAMPLE[sample]/SUBSTANCE[substance]/molecular_formula_hill",
"source_TYPE[source]/Probe": "source_TYPE[source_probe]",
"source_TYPE[source]/Pump": "source_TYPE[source_pump]",
"beam_TYPE[beam]/Probe": "beam_TYPE[beam_probe]",
"beam_TYPE[beam]/Pump": "beam_TYPE[beam_pump]",
"sample_history": "sample_history/notes",
"ELECTRONANALYSER[electronanalyser]/RESOLUTION[momentum_resolution]": (
"ELECTRONANALYSER[electronanalyser]/momentum_resolution"
),
"ELECTRONANALYSER[electronanalyser]/RESOLUTION[spatial_resolution]": (
"ELECTRONANALYSER[electronanalyser]/spatial_resolution"
),
"SAMPLE[sample]/gas_pressure": "INSTRUMENT[instrument]/pressure_gauge/value",
"SAMPLE[sample]/temperature": (
"INSTRUMENT[instrument]/MANIPULATOR[manipulator]/temperature_sensor/value"
),
}


Expand Down Expand Up @@ -208,8 +226,7 @@ def handle_h5_and_json_file(file_paths, objects):
if file_extension == ".h5":
x_array_loaded = h5_to_xarray(file_path)
elif file_extension == ".json":
with open(file_path, encoding="utf-8") as file:
config_file_dict = json.load(file)
config_file_dict = parse_flatten_json(file_path)
elif file_extension in [".yaml", ".yml"]:
with open(file_path, encoding="utf-8") as feln:
eln_data_dict = flatten_and_replace(
Expand Down
100 changes: 97 additions & 3 deletions pynxtools/dataconverter/readers/utils.py
F438
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,13 @@
# limitations under the License.
#
"""Utility functions for the NeXus reader classes."""
import json
import logging
from dataclasses import dataclass, replace
from typing import List, Any, Dict, Optional, Tuple
import re
from collections.abc import Mapping
import json
from dataclasses import dataclass, replace
from typing import Any, Dict, List, Optional, Tuple

import yaml

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -197,6 +199,83 @@ def parse_yml(
)


short_notation_regex = re.compile(r"\*\{([\w,]+)\}")


def flatten_json(
json_data: Dict[str, Any],
base_key: Optional[str] = None,
replacement_key: Optional[str] = None,
dont_flatten_link_dict: bool = False,
) -> Dict[str, Any]:
"""
Flattens a json dict into a flat dictionary of absolute paths.

Args:
json_data (Dict[str, Any]): The dictionary read from the json file.
base_key (Optional[str], optional):
A base key to prefix to all keys.
Defaults to None.
replacement_key (Optional[str], optional):
A replacement key which replaces all occurences of * with this string.
Defaults to None.
dont_flatten_link_dict (bool):
If true, the dict will not be flattened if it only contains a link key.
Defaults to False.

Returns:
Dict[str, Any]: The flattened dict
"""
if (
dont_flatten_link_dict
and base_key is not None
and len(json_data) == 1
and "link" in json_data
):
return {base_key: json_data}

flattened_config = {}

def update_config(key, value, rkey):
if isinstance(value, dict):
flattened_config.update(
flatten_json(
value,
base_key=key,
replacement_key=rkey,
dont_flatten_link_dict=dont_flatten_link_dict,
)
)
elif isinstance(value, str) and value.startswith("@link:"):
flattened_config[key] = {"link": value[6:]}
else:
flattened_config[key] = value

for key, value in json_data.items():
if base_key is not None:
key = f"{base_key}/{key}"

if replacement_key is not None:
key = key.replace("*", replacement_key)
if isinstance(value, str):
value = value.replace("*", replacement_key)

expand_match = short_notation_regex.search(key)
if replacement_key is None and expand_match is not None:
expand_keys = expand_match.group(1).split(",")
for ekey in expand_keys:
rkey = key.replace(expand_match.group(0), ekey)

if isinstance(value, str):
value = value.replace("*", ekey)

update_config(rkey, value, ekey)
continue

update_config(key, value, None)
return flattened_config


def parse_json(file_path: str) -> Dict[str, Any]:
"""Parses a metadata json file into a dictionary.

Expand All @@ -210,6 +289,21 @@ def parse_json(file_path: str) -> Dict[str, Any]:
return json.load(file)


def parse_flatten_json(file_path: str) -> Dict[str, Any]:
"""
Parses a metadata json file into a dictionary and
flattens it into a flat dictionary of absolute paths.

Args:
file_path (str): The file path of the json file.

Returns:
Dict[str, Any]:
The flattened dictionary containing the data readout from the json.
"""
return flatten_json(parse_json(file_path))


def handle_objects(objects: Tuple[Any]) -> Dict[str, Any]:
"""Handle objects and generate template entries from them"""
if objects is None:
Expand Down
2 changes: 1 addition & 1 deletion pynxtools/definitions
Submodule definitions updated 151 files
Loading
0