8000 Dataconverter param file by sherjeelshabih · Pull Request #23 · FAIRmat-NFDI/pynxtools · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

Dataconverter param file #23

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jul 1, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions nexusparser.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# NexusParser Parameter File - v0.0.1

dataconverter:
reader: mpes
output: output.nxs
nxdl: NXmpes
input-file: ['config_file.json', 'ELN_metadata_example.yaml', 'xarray_saved_small_calibration.h5']
51 changes: 43 additions & 8 deletions nexusparser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,12 @@
# limitations under the License.
#

from typing import Iterable, Union
10000 import os
import pathlib
import numpy as np
from nomad.datamodel import EntryArchive
from nomad.parsing import MatchingParser
from nomad.parsing import Parser
# from . import metainfo # pylint: disable=unused-import
from nexusparser.tools import nexus as read_nexus
from nexusparser.metainfo import nexus
Expand Down Expand Up @@ -172,19 +175,28 @@ def add_log(params, logstr):
return logstr


class NexusParser(MatchingParser):
class NexusParser(Parser):
"""NesusParser doc

"""
def __init__(self):
super().__init__(
name='parsers/nexus', code_name='NEXUS', code_homepage='https://www.nexus.eu/',
mainfile_mime_re=r'(application/.*)|(text/.*)',
mainfile_name_re=(r'.*\.nxs'),
supported_compressions=['gz', 'bz2', 'xz']
)
super().__init__()
self.name = "parsers/nexus"
self.archive = None
self.nxroot = None
self.domain = 'ems'

def is_mainfile( # pylint: disable=too-many-arguments
self, filename: str, mime: str, buffer: bytes, decoded_buffer: str,
compression: str = None) -> Union[bool, Iterable[str]]:
accepted_extensions = (".nxs", ".yaml", ".yml")
extension = pathlib.Path(filename).suffix
if extension in accepted_extensions:
if buffer[0:8] == b'\x89HDF\r\n\x1a\n':
return True
if buffer[0:30] == b"# NexusParser Parameter File -":
return True
return False

# def get_nomad_classname(self, xml_name, xml_type, suffix):
# """Get nomad classname from xml file
Expand Down Expand Up @@ -237,6 +249,29 @@ def parse(self, mainfile: str, archive: EntryArchive, logger=None, child_archive
self.archive.m_create(nexus.Nexus) # type: ignore[attr-defined] # pylint: disable=no-member
self.nxroot = self.archive.nexus

extension = pathlib.Path(mainfile).suffix
if extension in (".yaml", ".yml"):
base_dir = os.path.dirname(mainfile)
from nexusparser.tools.dataconverter.convert import convert, parse_params_file
with open(mainfile) as file:
conv_params = parse_params_file(file)

def check_path(path: str):
"""Return true if path supplied by the user is not absolute or has a ../"""
if os.path.isabs(path) or ".." in path:
raise Exception("The user provided an invalid path in the parameter YAML.")
return path

if isinstance(conv_params["input_file"], list):
conv_params["input_file"] = [f"{base_dir}{os.sep}{check_path(file)}"
for file in conv_params["input_file"]]
else:
conv_params["input_file"] = (f"{base_dir}{os.sep}"
f"{check_path(conv_params['input_file'])}")
conv_params["output"] = f"{base_dir}{os.sep}{check_path(conv_params['output'])}"
convert(**conv_params)
mainfile = conv_params["output"]

nexus_helper = read_nexus.HandleNexus(logger, [mainfile])
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Won't NOMAD try to parse the freshly generated new NeXus file anyway?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The Nomad walk over the files doesn't win the race here. The user can press reprocess to have another entry for the NXS file. It's hard to enforce this and can lead to unexpected behavior.

I've went with this additional feature in h5web in Nomad that allows us to show the Nexus file assosciated with the dataconverter parameter file. Here is the relevant MR: https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR/-/merge_requests/734

nexus_helper.process_nexus_master_file(self.nexus_populate)

Expand Down
38 changes: 34 additions & 4 deletions nexusparser/tools/dataconverter/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import xml.etree.ElementTree as ET

import click
import yaml

from nexusparser.tools.dataconverter.readers.base.reader import BaseReader
from nexusparser.tools.dataconverter import helpers
Expand Down Expand Up @@ -91,6 +92,8 @@ def convert(input_file: Tuple[str], # pylint: disable=too-many-arguments
return

# Setting up all the input data
if isinstance(input_file, str):
input_file = (input_file,)
bulletpoint = "\n\u2022 "
logger.info("Using %s reader to convert the given files: %s ",
reader,
Expand Down Expand Up @@ -125,6 +128,14 @@ def convert(input_file: Tuple[str], # pylint: disable=too-many-arguments
logger.info("The output file generated: %s", output)


def parse_params_file(params_file):
"""Parses the parameters from a given dictionary and returns them"""
params = yaml.load(params_file, Loader=yaml.Loader)['dataconverter']
for param in list(params.keys()):
params[param.replace("-", "_")] = params.pop(param)
return params


@click.command()
@click.option(
'--input-file',
Expand All @@ -141,7 +152,7 @@ def convert(input_file: Tuple[str], # pylint: disable=too-many-arguments
@click.option(
'--nxdl',
default=None,
required=True,
required=False,
help='The name of the NXDL file to use without extension.'
)
@click.option(
Expand All @@ -155,20 +166,39 @@ def convert(input_file: Tuple[str], # pylint: disable=too-many-arguments
default=False,
help='Just print out the template generated from given NXDL file.'
)
@click.option( # pylint: disable=too-many-arguments
@click.option(
'--fair',
is_flag=True,
default=False,
help='Let the converter know to be stricter in checking the documentation.'
) # pylint: disable=too-many-arguments
@click.option(
'--params-file',
type=click.File('r'),
default=None,
help='Allows to pass a .yaml file with all the parameters the converter supports.'
)
def convert_cli(input_file: Tuple[str],
reader: str,
nxdl: str,
output: str,
generate_template: bool,
fair: bool):
fair: bool,
params_file: str):
"""The CLI entrypoint for the convert function"""
convert(input_file, reader, nxdl, output, generate_template, fair)
if params_file:
try:
convert(**parse_params_file(params_file))
except TypeError:
raise Exception(("Please make sure you have the following entries in your "
"parameter file:\n\n# NexusParser Parameter File - v0.0.1"
"\n\ndataconverter:\n\treader: value\n\tnxdl: value\n\tin"
"put-file: value"))
else:
if nxdl is None:
raise Exception("\nError: Please supply an NXDL file with the option:"
" --nxdl <path to NXDL>")
convert(input_file, reader, nxdl, output, generate_template, fair)


if __name__ == '__main__':
Expand Down
0