8000 Avoid name conflict with BaseSection classes by lukaspie · Pull Request #453 · FAIRmat-NFDI/pynxtools · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

Avoid name conflict with BaseSection classes #453

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Oct 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ jobs:
- name: Install nomad
if: "${{ matrix.python_version != '3.8' && matrix.python_version != '3.12'}}"
run: |
uv pip install nomad-lab@git+https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR.git
uv pip install git+https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR.git@fixes_resolve_variadic_name
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This must be changed back later.

- name: Install pynx
run: |
uv pip install ".[dev]"
Expand Down
24 changes: 14 additions & 10 deletions src/pynxtools/nomad/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
import pynxtools.nomad.schema as nexus_schema
from pynxtools.nexus.nexus import HandleNexus
from pynxtools.nomad.utils import __REPLACEMENT_FOR_NX
from pynxtools.nomad.utils import __rename_nx_to_nomad as rename_nx_to_nomad
from pynxtools.nomad.utils import __rename_nx_for_nomad as rename_nx_for_nomad


def _to_group_name(nx_node: ET.Element):
Expand Down Expand Up @@ -93,6 +93,8 @@ def _to_section(
# no need to change section for quantities and attributes
return current

nomad_def_name = rename_nx_for_nomad(nomad_def_name, is_group=True)

# for groups, get the definition from the package
new_def = current.m_def.all_sub_sections[nomad_def_name]

Expand Down Expand Up @@ -218,7 +220,7 @@ def _populate_data(
"setting attribute attempt before creating quantity"
)
current.m_set_quantity_attribute(
metainfo_def, attr_name, attr_value, quantity=quantity
quantity.name, attr_name, attr_value
)
except Exception as e:
self._logger.warning(
Expand Down Expand Up @@ -292,26 +294,26 @@ def _populate_data(
try:
current.m_set(metainfo_def, field)
current.m_set_quantity_attribute(
metainfo_def, "m_nx_data_path", hdf_node.name, quantity=field
data_instance_name, "m_nx_data_path", hdf_node.name
)
current.m_set_quantity_attribute(
metainfo_def, "m_nx_data_file", self.nxs_fname, quantity=field
data_instance_name, "m_nx_data_file", self.nxs_fname
)
if field_stats is not None:
# TODO _add_additional_attributes function has created these nx_data_*
# attributes speculatively already so if the field_stats is None
# this will cause unpopulated attributes in the GUI
current.m_set_quantity_attribute(
metainfo_def, "nx_data_mean", field_stats[0], quantity=field
data_instance_name, "nx_data_mean", field_stats[0]
)
current.m_set_quantity_attribute(
metainfo_def, "nx_data_var", field_stats[1], quantity=field
data_instance_name, "nx_data_var", field_stats[1]
)
current.m_set_quantity_attribute(
metainfo_def, "nx_data_min", field_stats[2], quantity=field
data_instance_name, "nx_data_min", field_stats[2]
)
current.m_set_quantity_attribute(
metainfo_def, "nx_data_max", field_stats[3], quantity=field
data_instance_name, "nx_data_max", field_stats[3]
)
except Exception as e:
self._logger.warning(
Expand All @@ -333,7 +335,8 @@ def __nexus_populate(self, params: dict, attr=None): # pylint: disable=W0613
hdf_path: str = hdf_info["hdf_path"]
hdf_node = hdf_info["hdf_node"]
if nx_def is not None:
nx_def = rename_nx_to_nomad(nx_def)
nx_def = rename_nx_for_nomad(nx_def)

if nx_path is None:
return

Expand Down Expand Up @@ -473,7 +476,8 @@ def parse(
child_archives: Dict[str, EntryArchive] = None,
) -> None:
self.archive = archive
self.nx_root = nexus_schema.NeXus()
self.nx_root = nexus_schema.NeXus() # type: ignore # pylint: disable=no-member

self.archive.data = self.nx_root
self._logger = logger if logger else get_logger(__name__)
self._clear_class_refs()
Expand Down
22 changes: 12 additions & 10 deletions src/pynxtools/nomad/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@

from pynxtools import get_definitions_url
from pynxtools.definitions.dev_tools.utils.nxdl_utils import get_nexus_definitions_path
from pynxtools.nomad.utils import __REPLACEMENT_FOR_NX, __rename_nx_to_nomad
from pynxtools.nomad.utils import __REPLACEMENT_FOR_NX, __rename_nx_for_nomad

# __URL_REGEXP from
# https://stackoverflow.com/questions/3809401/what-is-a-good-regular-expression-to-match-a-url
Expand All @@ -83,6 +83,7 @@
r"(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+"
r'(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'".,<>?«»“”‘’]))'
)

# noinspection HttpUrlsUsage
__XML_NAMESPACES = {"nx": "http://definition.nexusformat.org/nxdl/3.1"}

Expand Down Expand Up @@ -294,8 +295,6 @@ def __to_section(name: str, **kwargs) -> Section:
class nexus definition.
"""

# name = __rename_nx_to_nomad(name)

if name in __section_definitions:
section = __section_definitions[name]
section.more.update(**kwargs)
Expand Down Expand Up @@ -373,7 +372,7 @@ def __create_attributes(xml_node: ET.Element, definition: Union[Section, Quantit
todo: account for more attributes of attribute, e.g., default, minOccurs
"""
for attribute in xml_node.findall("nx:attribute", __XML_NAMESPACES):
name = attribute.get("name") + "__attribute"
name = __rename_nx_for_nomad(attribute.get("name"), is_attribute=True)

nx_enum = __get_enumeration(attribute)
if nx_enum:
Expand Down Expand Up @@ -466,7 +465,8 @@ def __create_field(xml_node: ET.Element, container: Section) -> Quantity:

# name
assert "name" in xml_attrs, "Expecting name to be present"
name = xml_attrs["name"] + "__field"

name = __rename_nx_for_nomad(xml_attrs["name"], is_field=True)

# type
nx_type = xml_attrs.get("type", "NX_CHAR")
Expand Down Expand Up @@ -549,21 +549,23 @@ def __create_group(xml_node: ET.Element, root_section: Section):
xml_attrs = group.attrib

assert "type" in xml_attrs, "Expecting type to be present"
nx_type = __rename_nx_to_nomad(xml_attrs["type"])
nx_type = __rename_nx_for_nomad(xml_attrs["type"])

nx_name = xml_attrs.get("name", nx_type)
group_section = Section(validate=VALIDATE, nx_kind="group", name=nx_name)
section_name = __rename_nx_for_nomad(nx_name, is_group=True)
group_section = Section(validate=VALIDATE, nx_kind="group", name=section_name)

__attach_base_section(group_section, root_section, __to_section(nx_type))
__add_common_properties(group, group_section)

nx_name = xml_attrs.get(
"name", nx_type.replace(__REPLACEMENT_FOR_NX, "").upper()
)
subsection_name = __rename_nx_for_nomad(nx_name, is_group=True)
group_subsection = SubSection(
section_def=group_section,
nx_kind="group",
name=nx_name,
name=subsection_name,
repeats=__if_repeats(nx_name, xml_attrs.get("maxOccurs", "0")),
variable=__if_template(nx_name),
)
Expand Down Expand Up @@ -605,15 +607,15 @@ def __create_class_section(xml_node: ET.Element) -> Section:
nx_type = xml_attrs["type"]
nx_category = xml_attrs["category"]

nx_name = __rename_nx_to_nomad(nx_name)
nx_name = __rename_nx_for_nomad(nx_name)
class_section: Section = __to_section(
nx_name, nx_kind=nx_type, nx_category=nx_category
)

nomad_base_sec_cls = __BASESECTIONS_MAP.get(nx_name, [BaseSection])

if "extends" in xml_attrs:
nx_base_sec = __to_section(__rename_nx_to_nomad(xml_attrs["extends"]))
nx_base_sec = __to_section(__rename_nx_for_nomad(xml_attrs["extends"]))
class_section.base_sections = [nx_base_sec] + [
cls.m_def for cls in nomad_base_sec_cls
]
Expand Down
62 changes: 55 additions & 7 deletions src/pynxtools/nomad/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,64 @@

__REPLACEMENT_FOR_NX = ""

# This is a list of NeXus group names that are not allowed because they are defined as quantities in the BaseSection class.
UNALLOWED_GROUP_NAMES = {"name", "datetime", "lab_id", "description"}

def __rename_nx_to_nomad(name: str) -> Optional[str]:

def __rename_classes_in_nomad(nx_name: str) -> Optional[str]:
"""
Modify group names that conflict with NOMAD due to being defined as quantities
in the BaseSection class by appending '__group' to those names.

Some quantities names names are reserved in the BaseSection class (or even higher up in metainfo),
and thus require renaming to avoid collisions.

Args:
nx_name (str): The original group name.

Returns:
Optional[str]: The modified group name with '__group' appended if it's in
UNALLOWED_GROUP_NAMES, or the original name if no change is needed.
"""
return nx_name + "__group" if nx_name in UNALLOWED_GROUP_NAMES else nx_name


def __rename_nx_for_nomad(
name: str,
is_group: bool = False,
is_field: bool = False,
is_attribute: bool = False,
) -> Optional[str]:
"""
Rename the NXDL name to NOMAD.
For example: NXdata -> data,
except NXobject -> NXobject
Rename NXDL names for compatibility with NOMAD, applying specific rules
based on the type of the NeXus concept. (group, field, or attribute).

- NXobject is unchanged.
- NX-prefixed names (e.g., NXdata) are renamed by replacing 'NX' with a custom string.
- Group names are passed to __rename_classes_in_nomad(), and the result is capitalized.
- Fields and attributes have '__field' or '__attribute' appended, respectively.

Args:
name (str): The NXDL name.
is_group (bool): Whether the name represents a group.
is_field (bool): Whether the name represents a field.
is_attribute (bool): Whether the name represents an attribute.

Returns:
Optional[str]: The renamed NXDL name, with group names capitalized,
or None if input is invalid.
"""
if name == "NXobject":
return name
if name is not None:
if name.startswith("NX"):
return __REPLACEMENT_FOR_NX + name[2:]

if name and name.startswith("NX"):
name = __REPLACEMENT_FOR_NX + name[2:]

if is_group:
name = __rename_classes_in_nomad(name)
elif is_field:
name += "__field"
elif is_attribute:
name += "__attribute"

return name
Binary file added tests/data/nomad/NXlauetof.hdf5
Binary file not shown.
117 changes: 117 additions & 0 deletions tests/nomad/test_metainfo_schema.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
"""This is a code that performs several tests on nexus tool"""

#
# Copyright The NOMAD Authors.
#
# This file is part of NOMAD. See https://nomad-lab.eu for further info.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import pytest

try:
from nomad.metainfo import Section
except ImportError:
pytest.skip("nomad not installed", allow_module_level=True)

from typing import Any

from pynxtools.nomad.schema import nexus_metainfo_package
from pynxtools.nomad.utils import __rename_nx_for_nomad as rename_nx_for_nomad


@pytest.mark.parametrize(
"path,value",
[
pytest.param("name", "nexus"),
pytest.param("NXobject.name", "NXobject"),
pytest.param(rename_nx_for_nomad("NXentry") + ".nx_kind", "group"),
pytest.param(rename_nx_for_nomad("NXdetector") + ".real_time__field", "*"),
pytest.param(rename_nx_for_nomad("NXentry") + ".DATA.nx_optional", True),
pytest.param(rename_nx_for_nomad("NXentry") + ".DATA.nx_kind", "group"),
pytest.param(rename_nx_for_nomad("NXentry") + ".DATA.nx_optional", True),
pytest.param(
rename_nx_for_nomad("NXdetector") + ".real_time__field.name",
"real_time__field",
),
pytest.param(
rename_nx_for_nomad("NXdetector") + ".real_time__field.nx_type", "NX_NUMBER"
),
pytest.param(
rename_nx_for_nomad("NXdetector") + ".real_time__field.nx_units", "NX_TIME"
),
pytest.param(rename_nx_for_nomad("NXarpes") + ".ENTRY.DATA.nx_optional", False),
pytest.param(rename_nx_for_nomad("NXentry") + ".nx_category", "base"),
pytest.param(
rename_nx_for_nomad("NXdispersion_table")
+ ".refractive_index__field.nx_type",
"NX_COMPLEX",
),
pytest.param(
rename_nx_for_nomad("NXdispersive_material")
+ ".ENTRY.dispersion_x."
+ "DISPERSION_TABLE.refractive_index__field.nx_type",
"NX_COMPLEX",
),
pytest.param(rename_nx_for_nomad("NXapm") + ".nx_category", "application"),
],
)
def test_assert_nexus_metainfo(path: str, value: Any):
"""
Test the existence of nexus metainfo

pytest.param('NXdispersive_material.inner_section_definitions[0].sub_sections[1].sub_section.inner_section_definitions[0].quantities[4].more["nx_type"]
"""
current = nexus_metainfo_package
for name in path.split("."):
elements: list = []
if name.endswith("__field"):
subelement_list = getattr(current, "quantities", None)
if subelement_list:
elements += subelement_list
else:
subelement_list = getattr(current, "section_definitions", None)
if subelement_list:
elements += subelement_list
subelement_list = getattr(current, "sub_sections", None)
if subelement_list:
elements += subelement_list
subelement_list = getattr(current, "attributes", None)
if subelement_list:
elements += subelement_list
subelement_list = current.m_contents()
if subelement_list:
elements += subelement_list
for content in elements:
if getattr(content, "name", None) == name:
current = content # type: ignore
if getattr(current, "sub_section", None):
current = current.section_definition
break
else:
current = getattr(current, name, None)
if current is None:
assert False, f"{path} does not exist"

if value == "*":
assert current is not None, f"{path} does not exist"
elif value is None:
assert current is None, f"{path} does exist"
else:
assert current == value, f"{path} has wrong value"

if isinstance(current, Section):
assert current.nx_kind is not None
for base_section in current.all_base_sections:
assert base_section.nx_kind == current.nx_kind
Loading
Loading
0