8000 Optionality check in nyam2nxdl by RubelMozumder · Pull Request #126 · FAIRmat-NFDI/pynxtools · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

Optionality check in nyam2nxdl #126

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Jun 13, 2023
5 changes: 4 additions & 1 deletion pynxtools/nyaml2nxdl/nyaml2nxdl.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,10 @@
compare_niac_and_my)


DEPTH_SIZE = " "
DEPTH_SIZE = 4 * " "

# NOTE: Some handful links for nyaml2nxdl converter:
# https://manual.nexusformat.org/nxdl_desc.html?highlight=optional


def generate_nxdl_or_retrieve_nxdl(yaml_file, out_xml_file, verbose):
Expand Down
16 changes: 9 additions & 7 deletions pynxtools/nyaml2nxdl/nyaml2nxdl_backward_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ def handle_not_root_level_doc(self, depth, text, tag='doc', file_out=None):
Handle docs field along the yaml file. In this function we also tried to keep
the track of intended indentation. E.g. the bollow doc block.
* Topic name
DEscription of topic
Description of topic
"""

# Handling empty doc
Expand All @@ -280,14 +280,16 @@ def handle_not_root_level_doc(self, depth, text, tag='doc', file_out=None):
text = handle_mapping_char(text, -1, True)
if "\n" in text:
# To remove '\n' character as it will be added before text.
text = text.split('\n')
text = cleaning_empty_lines(text)
text = cleaning_empty_lines(text.split('\n'))
text_tmp = []
yaml_indent_n = len((depth + 1) * DEPTH_SIZE)
# Find indentaion in the first valid line with alphabet
# Find indentaion in the first text line with alphabet
tmp_i = 0
while tmp_i != -1:
first_line_indent_n = 0
# Taking care of empty text whitout any character
if len(text) == 1 and text[0] == '':
break
for ch_ in text[tmp_i]:
if ch_ == ' ':
first_line_indent_n = first_line_indent_n + 1
Expand Down Expand Up @@ -538,8 +540,8 @@ def handle_dimension(self, depth, node, file_out):
and attributes of dim has been handled inside this function here.
"""
# pylint: disable=consider-using-f-string
possible_dim_attrs = ['ref', 'optional', 'recommended',
'required', 'incr', 'refindex']
possible_dim_attrs = ['ref', 'required',
'incr', 'refindex']
possible_dimemsion_attrs = ['rank']

# taking care of Dimension tag
Expand Down Expand Up @@ -851,7 +853,7 @@ def xmlparse(self, output_yml, xml_tree, depth, verbose):
sys.stdout.write(f'Attributes: {node.attrib}\n')
with open(output_yml, "a", encoding="utf-8") as file_out:
tag = remove_namespace_from_tag(node.tag)
if tag == ('definition'):
if tag == 'definition':
self.found_definition = True
self.handle_definition(node)
# Taking care of root level doc and symbols
Expand Down
106 changes: 78 additions & 28 deletions pynxtools/nyaml2nxdl/nyaml2nxdl_forward_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@

from pynxtools.nexus import nexus
from pynxtools.nyaml2nxdl.comment_collector import CommentCollector
from pynxtools.dataconverter.helpers import remove_namespace_from_tag
from pynxtools.nyaml2nxdl.nyaml2nxdl_helper import (get_yaml_escape_char_reverter_dict,
nx_name_type_resolving,
cleaning_empty_lines, LineLoader)
Expand Down Expand Up @@ -65,6 +66,7 @@
DEPTH_SIZE = " "
NX_UNIT_TYPES = nexus.get_nx_units()
COMMENT_BLOCKS: CommentCollector
CATEGORY = '' # Definition would be either 'base' or 'application'


def check_for_dom_comment_in_yaml():
Expand Down Expand Up @@ -117,9 +119,60 @@ def yml_reader(inputfile):
global DOM_COMMENT
if dom_cmnt_frm_yaml:
DOM_COMMENT = dom_cmnt_frm_yaml

if 'category' not in loaded_yaml.keys():
raise ValueError("All definitions should be either 'base' or 'application' category. "
"No category has been found.")
global CATEGORY
CATEGORY = loaded_yaml['category']
return loaded_yaml


def check_for_default_attribute_and_value(xml_element):
"""NeXus Groups, fields and attributes might have xml default attributes and valuesthat must
come. For example: 'optional' which is 'true' by default for base class and false otherwise.
"""

# base:Default attributes and value for all elements of base class except dimension element
base_attr_to_val = {'optional': 'true'}

# application: Default attributes and value for all elements of application class except
# dimension element
application_attr_to_val = {'optional': 'false'}

# Default attributes and value for dimension element
base_dim_attr_to_val = {'required': 'false'}
application_dim_attr_to_val = {'required': 'true'}

# Eligible tag for default attr and value
elegible_tag = ['group', 'field', 'attribute']

def set_default_attribute(xml_elem, default_attr_to_val):
for deflt_attr, deflt_val in default_attr_to_val.items():
if deflt_attr not in xml_elem.attrib \
and 'maxOccurs' not in xml_elem.attrib \
and 'minOccurs' not in xml_elem.attrib \
and 'recommended' not in xml_elem.attrib:
xml_elem.set(deflt_attr, deflt_val)

for child in list(xml_element):
# skiping comment 'function' that mainly collect comment from yaml file.
if not isinstance(child.tag, str):
continue
tag = remove_namespace_from_tag(child.tag)

if tag == 'dim' and CATEGORY == 'base':
set_default_attribute(child, base_dim_attr_to_val)
if tag == 'dim' and CATEGORY == 'application':
set_default_attribute(child, application_dim_attr_to_val)
if tag in elegible_tag and CATEGORY == 'base':
set_default_attribute(child, base_attr_to_val)
if tag in elegible_tag and CATEGORY == 'application':

set_default_attribute(child, application_attr_to_val)
check_for_default_attribute_and_value(child)


def yml_reader_nolinetag(inputfile):
"""
pyyaml based parsing of yaml file in python dict
Expand All @@ -132,7 +185,7 @@ def yml_reader_nolinetag(inputfile):
def check_for_skiped_attributes(component, value, allowed_attr=None, verbose=False):
"""
Check for any attributes have been skipped or not.
NOTE: We should we should keep in mind about 'doc'
NOTE: We should keep in mind about 'doc'
"""
block_tag = ['enumeration']
if value:
Expand All @@ -154,20 +207,6 @@ def check_for_skiped_attributes(component, value, allowed_attr=None, verbose=Fal
f"moment. The allowed attrbutes are {allowed_attr}")


def check_optionality_and_write(obj, opl_key, opl_val):
"""
Taking care of optinality.
"""
if opl_key == 'optional':
if opl_val == 'false':
obj.set('required', 'true')
elif opl_key == 'minOccurs':
if opl_val == '0':
pass
else:
obj.set(opl_key, str(opl_val))


def format_nxdl_doc(string):
"""NeXus format for doc string
"""
Expand Down Expand Up @@ -237,20 +276,19 @@ def xml_handle_exists(dct, obj, keyword, value):
"""
This function creates an 'exists' element instance, and appends it to an existing element
"""

line_number = f'__line__{keyword}'
assert value is not None, f'Line {dct[line_number]}: exists argument must not be None !'
if isinstance(value, list):
if len(value) == 2 and value[0] == 'min':
obj.set('minOccurs', str(value[1]))
elif len(value) == 2 and value[0] == 'max':
obj.set('maxOccurs', str(value[1]))
elif len(value) == 4 and value[0] == 'min' and value[2] == 'max':
if len(value) == 4 and value[0] == 'min' and value[2] == 'max':
obj.set('minOccurs', str(value[1]))
if str(value[3]) != 'infty':
obj.set('maxOccurs', str(value[3]))
else:
obj.set('maxOccurs', 'unbounded')
elif len(value) == 2 and value[0] == 'min':
obj.set('minOccurs', str(value[1]))
elif len(value) == 2 and value[0] == 'max':
obj.set('maxOccurs', str(value[1]))
elif len(value) == 4 and value[0] == 'max' and value[2] == 'min':
obj.set('minOccurs', str(value[3]))
if str(value[1]) != 'infty':
Expand All @@ -268,12 +306,14 @@ def xml_handle_exists(dct, obj, keyword, value):
f'entries either [min, <uint>] or [max, <uint>], or a list of four '
f'entries [min, <uint>, max, <uint>] !')
else:
# This clause take optional in all concept except dimension where 'required' key is allowed
# not the 'optional' key.
if value == 'optional':
obj.set('optional', 'true')
elif value == 'recommended':
obj.set('recommended', 'true')
elif value == 'required':
obj.set('required', 'true')
obj.set('optional', 'false')
else:
obj.set('minOccurs', '0')

Expand All @@ -300,7 +340,6 @@ def xml_handle_group(dct, obj, keyword, value, verbose=False):
raise ValueError("A group must have both value and name. Check for group.")
grp = ET.SubElement(obj, 'group')

# type come first
if l_bracket == 0 and r_bracket > 0:
grp.set('type', keyword_type)
if keyword_name:
Expand Down Expand Up @@ -364,7 +403,7 @@ def xml_handle_dimensions(dct, obj, keyword, value: dict):
incr:[...]'
"""

possible_dimension_attrs = ['rank']
possible_dimension_attrs = ['rank'] # nxdl attributes
line_number = f'__line__{keyword}'
line_loc = dct[line_number]
assert 'dim' in value.keys(), (f"Line {line_loc}: No dim as child of dimension has "
Expand All @@ -373,7 +412,7 @@ def xml_handle_dimensions(dct, obj, keyword, value: dict):
dims = ET.SubElement(obj, 'dimensions')
# Consider all the childs under dimension is dim element and
# its attributes
# val_attrs = list(value.keys())

rm_key_list = []
rank = ''
for key, val in value.items():
Expand Down Expand Up @@ -418,7 +457,11 @@ def xml_handle_dim_from_dimension_dict(dct, dims_obj, keyword, value, rank, verb
function. please also read note in xml_handle_dimensions.
"""

possible_dim_attrs = ['ref', 'optional', 'recommended', 'required', 'incr', 'refindex']
possible_dim_attrs = ['ref', 'incr', 'refindex', 'required']

# Some attributes might have equivalent name e.g. 'required' is correct one and
# 'optional' could be another name. Then change attribute to the correct one.
wrong_to_correct_attr = [('optional', 'required')]
header_line_number = f"__line__{keyword}"
dim_list = []
rm_key_list = []
Expand All @@ -431,7 +474,6 @@ def xml_handle_dim_from_dimension_dict(dct, dims_obj, keyword, value, rank, verb
for attr, vvalue in value.items():
if '__line__' in attr:
continue

line_number = f"__line__{attr}"
line_loc = value[line_number]
# dim comes in precedence
Expand Down Expand Up @@ -466,6 +508,11 @@ def xml_handle_dim_from_dimension_dict(dct, dims_obj, keyword, value, rank, verb
continue
cmnt_number = f'__line__{kkkey}'
cmnt_loc = vvalue[cmnt_number]
# Check whether any optional attributes added
for tuple_wng_crt in wrong_to_correct_attr:
if kkkey == tuple_wng_crt[0]:
raise ValueError(f"{cmnt_loc}: Attribute '{kkkey}' is prohibited, use "
f"'{tuple_wng_crt[1]}")
if kkkey == 'doc' and dim_list:
# doc comes as list of doc
for i, dim in enumerate(dim_list):
Expand Down Expand Up @@ -782,7 +829,6 @@ def xml_handle_fields(obj, keyword, value, line_annot, line_loc, verbose=False):
then the not empty keyword_name is a field!
This simple function will define a new node of xml tree
"""

# List of possible attributes of xml elements
allowed_attr = ['name', 'type', 'nameType', 'unit', 'minOccurs', 'long_name',
'axis', 'signal', 'deprecated', 'axes', 'exists',
Expand Down Expand Up @@ -1106,6 +1152,10 @@ def nyaml2nxdl(input_file: str, out_file, verbose: bool):
(lin_annot, line_loc) = post_comment.get_line_info()
xml_handle_comment(xml_root, lin_annot, line_loc)

# Note: Just to keep the functionality if we need this functionality later.
default_attr = False
if default_attr:
check_for_default_attribute_and_value(xml_root)
pretty_print_xml(xml_root, out_file, def_cmnt_text)
if verbose:
sys.stdout.write('Parsed YAML to NXDL successfully\n')
11 changes: 7 additions & 4 deletions pynxtools/nyaml2nxdl/nyaml2nxdl_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
# So the corresponding value is to skip them and
# and also carefull about this order
import hashlib
import os
from yaml.composer import Composer
from yaml.constructor import Constructor

Expand Down Expand Up @@ -111,7 +110,6 @@ def cleaning_empty_lines(line_list):
"""
Cleaning up empty lines on top and bottom.
"""

if not isinstance(line_list, list):
li 10000 ne_list = line_list.split('\n') if '\n' in line_list else ['']

Expand All @@ -120,11 +118,18 @@ def cleaning_empty_lines(line_list):
if line_list[0].strip():
break
line_list = line_list[1:]
if len(line_list) == 0:
line_list.append('')
return line_list

# Clining bottom empty lines
while True:
if line_list[-1].strip():
break
line_list = line_list[0:-1]
if len(line_list) == 0:
line_list.append('')
return line_list

return line_list

Expand Down Expand Up @@ -215,7 +220,5 @@ def separate_hash_yaml_and_nxdl(yaml_file, sep_yaml, sep_xml):
# If the yaml fiile does not contain any hash for nxdl then we may have last line.
if last_line:
yml_f_ob.write(last_line)
if not sha_hash:
os.remove(sep_xml)

return sha_hash
15 changes: 14 additions & 1 deletion tests/data/nyaml2nxdl/NXattributes.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ NXellipsometry_base_draft(my_test_extends):
doc: attribute documentation
doc: documentation no. 2
experiment_identifier:
exists: required
exists: ['min', 3, 'max', 100]
doc: documentation no. 3
experiment_description:
exists: required
Expand All @@ -19,10 +19,23 @@ NXellipsometry_base_draft(my_test_extends):
program_name:
doc: documentation no. 4
program_version:
exists: ['min', 5]
doc: documentation no. 5
time_zone(NX_DATE_TIME):
exists: required
doc: documentation no. 6
definition_local:
exists: ['max', 5]
doc: documentation no. 7
\@version:
calibration_data(NX_NUMBER):
unit: NX_UNITLESS
doc: |
Calibration is performed on a reference surface (usually silicon wafer with well
defined oxide layer) at a number of angles, then in a straight through mode
(transmission in air).
dimensions:
rank: 3
dim: [[3, N_calibration_angles+1], [2, N_variables], [1, N_calibration_wavelength]]
dim_parameters:
required: ['true', 'true', 'true']
Loading
0