8000 Section parsing for json_yml reader by domna · Pull Request #54 · FAIRmat-NFDI/pynxtools · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

Section parsing for json_yml reader #54

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 20, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions nexusutils/dataconverter/readers/hall/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@
#
"""Lake Shore Hall file reader implementation for the DataConverter."""
import re
from typing import Any, List, TextIO, Dict
from typing import Any, List, TextIO, Dict, Optional
import numpy as np
import pandas as pd

from nexusutils.dataconverter.readers.json_yml.reader import YamlJsonReader
import nexusutils.dataconverter.readers.hall.helpers as helpers
from nexusutils.dataconverter.readers.hall import helpers
from nexusutils.dataconverter.readers.utils import parse_json, parse_yml

# Replacement dict for section names
Expand All @@ -47,7 +47,7 @@
MEASUREMENT_KEYS = ["Contact Sets"]


def split_add_key(fobj: TextIO, dic: dict, prefix: str, expr: str) -> None:
def split_add_key(fobj: Optional[TextIO], dic: dict, prefix: str, expr: str) -> None:
"""Splits a key value pair and adds it to the dictionary.
It also checks for measurement headers and adds the full tabular data as a
pandas array to the dictionary.
Expand All @@ -61,7 +61,7 @@ def split_add_key(fobj: TextIO, dic: dict, prefix: str, expr: str) -> None:
key, *val = re.split(r"\s*[:|=]\s*", expr)
jval = "".join(val).strip()

if key in MEASUREMENT_KEYS:
if fobj is not None and key in MEASUREMENT_KEYS:
data = []
for line in fobj:
if not line.strip():
Expand Down Expand Up @@ -150,8 +150,8 @@ class HallReader(YamlJsonReader):
extensions = {
".txt": lambda fname: parse_txt(fname, "iso-8859-1"),
".json": parse_json,
".yml": lambda fname: parse_yml(fname, {}, {}),
".yaml": lambda fname: parse_yml(fname, {}, {}),
".yml": lambda fname: parse_yml(fname, None, None),
".yaml": lambda fname: parse_yml(fname, None, None),
}


Expand Down
8 changes: 6 additions & 2 deletions nexusutils/dataconverter/readers/mpes/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
import yaml

from nexusutils.dataconverter.readers.base.reader import BaseReader
from nexusutils.dataconverter.readers.utils import flatten_and_replace
from nexusutils.dataconverter.readers.utils import flatten_and_replace, FlattenSettings

DEFAULT_UNITS = {
"X": "step",
Expand Down Expand Up @@ -225,7 +225,11 @@ def handle_h5_and_json_file(file_paths, objects):
elif file_extension in [".yaml", ".yml"]:
with open(file_path) as feln:
eln_data_dict = flatten_and_replace(
yaml.safe_load(feln), CONVERT_DICT, REPLACE_NESTED
FlattenSettings(
dic=yaml.safe_load(feln),
convert_dict=CONVERT_DICT,
replace_nested=REPLACE_NESTED
)
)

if objects is not None:
Expand Down
132 changes: 113 additions & 19 deletions nexusutils/dataconverter/readers/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,21 +16,16 @@
# limitations under the License.
#
"""Utility functions for the NeXus reader classes."""
from dataclasses import dataclass, replace
from typing import List, Any, Dict, Optional
from collections.abc import Mapping
import json
import yaml


def flatten_and_replace(
dic: Mapping,
convert_dict: dict,
replace_nested: dict,
parent_key: str = "/ENTRY[entry]",
sep: str = "/"
) -> dict:
"""Flatten a nested dictionary, and replace the keys with the appropriate
paths in the nxs file.
@dataclass
class FlattenSettings():
"""Settings for flattening operations.

Args:
dic (dict): Dictionary to flatten
Expand All @@ -39,26 +34,117 @@ def flatten_and_replace(
parent_key (str, optional):
Parent key of the dictionary. Defaults to "/ENTRY[entry]".
sep (str, optional): Separator for the keys. Defaults to "/".
"""
dic: Mapping
convert_dict: dict
replace_nested: dict
parent_key: str = "/ENTRY[entry]"
sep: str = "/"
is_in_section: bool = False


def is_section(val: Any) -> bool:
"""Checks whether a value is a section.

Args:
val (Any): A list or value.

Returns:
bool: True if val is a section.
"""
return isinstance(val, list) and len(val) > 0 and isinstance(val[0], dict)


def is_key_value_pair(val: Any) -> bool:
if not isinstance(val, dict):
return False

if len(val) == 2 and "value" in val and "unit" in val:
return True
return False


def uniquify_keys(ldic: list) -> List[Any]:
"""Uniquifys keys in a list of tuple lists containing key value pairs.

Args:
ldic (list): List of lists of length two, containing key value pairs.

Returns:
List[Any]: Uniquified list, where duplicate keys are appended with 1, 2, etc.
"""
dic: dict = {}
for key, val in ldic:
suffix = 0
sstr = "" if suffix == 0 else str(suffix)
while f"{key}{sstr}" in dic.keys():
sstr = "" if suffix == 0 else str(suffix)
suffix += 1

if is_key_value_pair(val):
dic[f"{key}{sstr}"] = val["value"]
dic[f"{key}{sstr}/@units"] = val["unit"]
continue
dic[f"{key}{sstr}"] = val

return list(map(list, dic.items()))


def parse_section(key: str, val: Any, settings: FlattenSettings) -> List[Any]:
"""Parse a section, i.e. an entry containing a list of entries.

Args:
key (str): The key which is currently being checked.
val (Any): The value at the current key.
settings (FlattenSettings): The flattening settings.

Returns:
List[Any]: A list of list tuples containing key, value pairs.
"""
if not is_section(val):
return [(key, val)]

groups: List[Any] = []
for group in val:
groups.extend(
flatten_and_replace(
replace(settings, dic=group, parent_key=key, is_in_section=True)
).items()
)

return uniquify_keys(groups)


def flatten_and_replace(settings: FlattenSettings) -> dict:
"""Flatten a nested dictionary, and replace the keys with the appropriate
paths in the nxs file.

Args:
settings (FlattenSettings): Settings dataclass for flattening the data.

Returns:
dict: Flattened dictionary
"""
items: List[Any] = []
for key, val in dic.items():
new_key = parent_key + sep + convert_dict.get(key, key)
for key, val in settings.dic.items():
new_key = settings.parent_key + settings.sep + settings.convert_dict.get(key, key)
if isinstance(val, Mapping):
items.extend(
flatten_and_replace(val, convert_dict, replace_nested, new_key, sep=sep)
flatten_and_replace(replace(settings, dic=val, parent_key=new_key))
.items()
if not (settings.is_in_section and is_key_value_pair(val))
else [[new_key, val]]
)
continue

for old, new in settings.replace_nested.items():
new_key = new_key.replace(old, new)

if new_key.endswith("/value"):
items.append((new_key[:-6], val))
else:
for old, new in replace_nested.items():
new_key = new_key.replace(old, new)
items.extend(parse_section(new_key, val, settings))

if new_key.endswith("/value"):
items.append((new_key[:-6], val))
else:
items.append((new_key, val))
return dict(items)


Expand All @@ -81,8 +167,16 @@ def parse_yml(
if replace_nested is None:
replace_nested = {}

convert_dict["unit"] = "@units"

with open(file_path) as file:
return flatten_and_replace(yaml.safe_load(file), convert_dict, replace_nested)
return flatten_and_replace(
FlattenSettings(
dic=yaml.safe_load(file),
convert_dict=convert_dict,
replace_nested=replace_nested
)
)


def parse_json(file_path: str) -> Dict[str, Any]:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
instrument:
amperemeter: Keithley 485 Autoranging Picoammeter
current_source: Keithley 220 Programmable current source
model: Lake Shore Hall Measurement System
software: Lake Shore Hall Measurement Software
switch_matrix: Keithley 7001 Switch System
voltmeter: Keithley 2182 Nanovoltmeter
samples:
final_sample:
- contacts_grafting:
- metal_stack:
- empirical_formula: Ti
- empirical_formula: Au
- empirical_formula: Ra
process_steps:
- elapsed_time:
unit: minute
value: 1.0
step_comment: Was there a post-annealing step?
step_name: Post-annealing
- elapsed_time:
unit: minute
value: 5.0
step_comment: "Was there a pre-treatment of the surface? \n"
step_name: Pre-treatment
- step_comment: Was an implatation made for contact formation?
step_name: implantation
- step_comment: Was a regrowth made for contact formation?
step_name: regrowth
layer:
- components:
- ../upload/raw/Substrate1.data.archive.yaml#data
epitaxy_type: Homoepitaxy
65 changes: 65 additions & 0 deletions tests/data/dataconverter/readers/mpes/eln_data.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
Instrument:
Analyzer:
energy_resolution: 110
momentum_resolution: 0.08
slow_axes: delay
spatial_resolution: 10
Beam:
Probe:
extent:
unit: m
value:
- 7.999999999999999e-05
- 7.999999999999999e-05
incident_energy:
unit: J
value: 3.4767232957799996e-18
incident_energy_spread:
unit: J
value: 1.7623942974e-20
incident_polarization:
- 1
- 1
- 0
- 0
pulse_duration:
unit: s
value: 2.0000000000000003e-14
Pump:
extent:
unit: m
value:
- 0.00022999999999999998
- 0.000265
incident_energy:
unit: J
value: 2.4833737826999997e-19
incident_energy_spread:
unit: J
value: 1.2817413071999999e-20
incident_polarization:
- 1
- -1
- 0
- 0
pulse_duration:
unit: s
value: 3.5e-14
Sample:
chemical_formula: WSe2
description: Sample
name: WSe2 Single Crystal
preparation_date: "2019-01-13T09:00:00+00:00"
pressure:
unit: bar
value: 5.0e-14
sample_history: Cleaved
temperature:
unit: °C
value: 26.850000000000023
User:
address: Faradayweg 4-6, 14915 Berlin
affiliation: Fritz Haber Institute of the Max Planck Society
email: maklar@fhi-berlin.mpg.de
name: Julian Maklar
role: Principal Investigator
20 changes: 20 additions & 0 deletions tests/dataconverter/test_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,3 +198,23 @@ def test_mpes_writing(tmp_path):
with open(os.path.join(dirpath, 'Ref_nexus_mpes.log'), 'r') as logfile:
ref_log = logfile.readlines()
assert log == ref_log


def test_eln_data(tmp_path):
"""Check if the subsections in the eln_data.yml file work."""
dirpath = os.path.join(os.path.dirname(__file__), "../data/dataconverter/readers/mpes")
dataconverter.convert((os.path.join(dirpath, "xarray_saved_small_calibration.h5"),
os.path.join(dirpath, "config_file.json"),
os.path.join(dirpath, "eln_data.yaml")),
"mpes", "NXmpes",
os.path.join(tmp_path, "mpes.small_test.nxs"),
False, False)


def test_eln_data_subsections(tmp_path):
"""Check if the subsections in the eln_data.yml file work."""
dirpath = os.path.join(os.path.dirname(__file__), "../data/dataconverter/readers/json_yml")
dataconverter.convert((os.path.join(dirpath, "eln_data_w_subsections.yaml",),),
"hall", "NXroot",
os.path.join(tmp_path, "hall.nxs"),
False, False)
0