FAIRmat-NFDI · sanbrock · Jun 13, 2023 · Jun 9, 2023 · Jun 9, 2023 · Jun 13, 2023
diff --git a/pynxtools/nyaml2nxdl/nyaml2nxdl.py b/pynxtools/nyaml2nxdl/nyaml2nxdl.py
@@ -34,7 +34,10 @@
                                                             compare_niac_and_my)
 
 
-DEPTH_SIZE = "    "
+DEPTH_SIZE = 4 * " "
+
+# NOTE: Some handful links for nyaml2nxdl converter:
+# https://manual.nexusformat.org/nxdl_desc.html?highlight=optional
 
 
 def generate_nxdl_or_retrieve_nxdl(yaml_file, out_xml_file, verbose):

diff --git a/pynxtools/nyaml2nxdl/nyaml2nxdl_backward_tools.py b/pynxtools/nyaml2nxdl/nyaml2nxdl_backward_tools.py
@@ -270,7 +270,7 @@ def handle_not_root_level_doc(self, depth, text, tag='doc', file_out=None):
         Handle docs field along the yaml file. In this function we also tried to keep
         the track of intended indentation. E.g. the bollow doc block.
             * Topic name
-              DEscription of topic
+                Description of topic
         """
 
         # Handling empty doc
@@ -280,14 +280,16 @@ def handle_not_root_level_doc(self, depth, text, tag='doc', file_out=None):
             text = handle_mapping_char(text, -1, True)
         if "\n" in text:
             # To remove '\n' character as it will be added before text.
-            text = text.split('\n')
-            text = cleaning_empty_lines(text)
+            text = cleaning_empty_lines(text.split('\n'))
             text_tmp = []
             yaml_indent_n = len((depth + 1) * DEPTH_SIZE)
-            # Find indentaion in the first valid line with alphabet
+            # Find indentaion in the first text line with alphabet
             tmp_i = 0
             while tmp_i != -1:
                 first_line_indent_n = 0
+                # Taking care of empty text whitout any character
+                if len(text) == 1 and text[0] == '':
+                    break
                 for ch_ in text[tmp_i]:
                     if ch_ == ' ':
                         first_line_indent_n = first_line_indent_n + 1
@@ -538,8 +540,8 @@ def handle_dimension(self, depth, node, file_out):
           and attributes of dim has been handled inside this function here.
         """
         # pylint: disable=consider-using-f-string
-        possible_dim_attrs = ['ref', 'optional', 'recommended',
-                              'required', 'incr', 'refindex']
+        possible_dim_attrs = ['ref', 'required',
+                              'incr', 'refindex']
         possible_dimemsion_attrs = ['rank']
 
         # taking care of Dimension tag
@@ -851,7 +853,7 @@ def xmlparse(self, output_yml, xml_tree, depth, verbose):
             sys.stdout.write(f'Attributes: {node.attrib}\n')
         with open(output_yml, "a", encoding="utf-8") as file_out:
             tag = remove_namespace_from_tag(node.tag)
-            if tag == ('definition'):
+            if tag == 'definition':
                 self.found_definition = True
                 self.handle_definition(node)
                 # Taking care of root level doc and symbols

diff --git a/pynxtools/nyaml2nxdl/nyaml2nxdl_forward_tools.py b/pynxtools/nyaml2nxdl/nyaml2nxdl_forward_tools.py
@@ -31,6 +31,7 @@
 
 from pynxtools.nexus import nexus
 from pynxtools.nyaml2nxdl.comment_collector import CommentCollector
+from pynxtools.dataconverter.helpers import remove_namespace_from_tag
 from pynxtools.nyaml2nxdl.nyaml2nxdl_helper import (get_yaml_escape_char_reverter_dict,
                                                     nx_name_type_resolving,
                                                     cleaning_empty_lines, LineLoader)
@@ -65,6 +66,7 @@
 DEPTH_SIZE = "    "
 NX_UNIT_TYPES = nexus.get_nx_units()
 COMMENT_BLOCKS: CommentCollector
+CATEGORY = ''  # Definition would be either 'base' or 'application'
 
 
 def check_for_dom_comment_in_yaml():
@@ -117,9 +119,60 @@ def yml_reader(inputfile):
     global DOM_COMMENT
     if dom_cmnt_frm_yaml:
         DOM_COMMENT = dom_cmnt_frm_yaml
+
+    if 'category' not in loaded_yaml.keys():
+        raise ValueError("All definitions should be either 'base' or 'application' category. "
+                         "No category has been found.")
+    global CATEGORY
+    CATEGORY = loaded_yaml['category']
     return loaded_yaml
 
 
+def check_for_default_attribute_and_value(xml_element):
+    """NeXus Groups, fields and attributes might have xml default attributes and valuesthat must
+    come. For example: 'optional' which is 'true' by default for base class and false otherwise.
+    """
+
+    # base:Default attributes and value for all elements of base class except dimension element
+    base_attr_to_val = {'optional': 'true'}
+
+    # application: Default attributes and value for all elements of application class except
+    # dimension element
+    application_attr_to_val = {'optional': 'false'}
+
+    # Default attributes and value for dimension element
+    base_dim_attr_to_val = {'required': 'false'}
+    application_dim_attr_to_val = {'required': 'true'}
+
+    # Eligible tag for default attr and value
+    elegible_tag = ['group', 'field', 'attribute']
+
+    def set_default_attribute(xml_elem, default_attr_to_val):
+        for deflt_attr, deflt_val in default_attr_to_val.items():
+            if deflt_attr not in xml_elem.attrib \
+                and 'maxOccurs' not in xml_elem.attrib \
+                    and 'minOccurs' not in xml_elem.attrib \
+                        and 'recommended' not in xml_elem.attrib:
+                xml_elem.set(deflt_attr, deflt_val)
+
+    for child in list(xml_element):
+        # skiping comment 'function' that mainly collect comment from yaml file.
+        if not isinstance(child.tag, str):
+            continue
+        tag = remove_namespace_from_tag(child.tag)
+
+        if tag == 'dim' and CATEGORY == 'base':
+            set_default_attribute(child, base_dim_attr_to_val)
+        if tag == 'dim' and CATEGORY == 'application':
+            set_default_attribute(child, application_dim_attr_to_val)
+        if tag in elegible_tag and CATEGORY == 'base':
+            set_default_attribute(child, base_attr_to_val)
+        if tag in elegible_tag and CATEGORY == 'application':
+
+            set_default_attribute(child, application_attr_to_val)
+        check_for_default_attribute_and_value(child)
+
+
 def yml_reader_nolinetag(inputfile):
     """
     pyyaml based parsing of yaml file in python dict
@@ -132,7 +185,7 @@ def yml_reader_nolinetag(inputfile):
 def check_for_skiped_attributes(component, value, allowed_attr=None, verbose=False):
     """
         Check for any attributes have been skipped or not.
-        NOTE: We should we should keep in mind about 'doc'
+        NOTE: We should keep in mind about 'doc'
     """
     block_tag = ['enumeration']
     if value:
@@ -154,20 +207,6 @@ def check_for_skiped_attributes(component, value, allowed_attr=None, verbose=Fal
                                  f"moment. The allowed attrbutes are {allowed_attr}")
 
 
-def check_optionality_and_write(obj, opl_key, opl_val):
-    """
-    Taking care of optinality.
-    """
-    if opl_key == 'optional':
-        if opl_val == 'false':
-            obj.set('required', 'true')
-    elif opl_key == 'minOccurs':
-        if opl_val == '0':
-            pass
-        else:
-            obj.set(opl_key, str(opl_val))
-
-
 def format_nxdl_doc(string):
     """NeXus format for doc string
     """
@@ -237,20 +276,19 @@ def xml_handle_exists(dct, obj, keyword, value):
     """
     This function creates an 'exists' element instance, and appends it to an existing element
     """
-
     line_number = f'__line__{keyword}'
     assert value is not None, f'Line {dct[line_number]}: exists argument must not be None !'
     if isinstance(value, list):
-        if len(value) == 2 and value[0] == 'min':
-            obj.set('minOccurs', str(value[1]))
-        elif len(value) == 2 and value[0] == 'max':
-            obj.set('maxOccurs', str(value[1]))
-        elif len(value) == 4 and value[0] == 'min' and value[2] == 'max':
+        if len(value) == 4 and value[0] == 'min' and value[2] == 'max':
             obj.set('minOccurs', str(value[1]))
             if str(value[3]) != 'infty':
                 obj.set('maxOccurs', str(value[3]))
             else:
                 obj.set('maxOccurs', 'unbounded')
+        elif len(value) == 2 and value[0] == 'min':
+            obj.set('minOccurs', str(value[1]))
+        elif len(value) == 2 and value[0] == 'max':
+            obj.set('maxOccurs', str(value[1]))
         elif len(value) == 4 and value[0] == 'max' and value[2] == 'min':
             obj.set('minOccurs', str(value[3]))
             if str(value[1]) != 'infty':
@@ -268,12 +306,14 @@ def xml_handle_exists(dct, obj, keyword, value):
                              f'entries either [min, <uint>] or [max, <uint>], or a list of four '
                              f'entries [min, <uint>, max, <uint>] !')
     else:
+        # This clause take optional in all concept except dimension where 'required' key is allowed
+        # not the 'optional' key.
         if value == 'optional':
             obj.set('optional', 'true')
         elif value == 'recommended':
             obj.set('recommended', 'true')
         elif value == 'required':
-            obj.set('required', 'true')
+            obj.set('optional', 'false')
         else:
             obj.set('minOccurs', '0')
 
@@ -300,7 +340,6 @@ def xml_handle_group(dct, obj, keyword, value, verbose=False):
         raise ValueError("A group must have both value and name. Check for group.")
     grp = ET.SubElement(obj, 'group')
 
-    # type come first
     if l_bracket == 0 and r_bracket > 0:
         grp.set('type', keyword_type)
         if keyword_name:
@@ -364,7 +403,7 @@ def xml_handle_dimensions(dct, obj, keyword, value: dict):
             incr:[...]'
     """
 
-    possible_dimension_attrs = ['rank']
+    possible_dimension_attrs = ['rank']  # nxdl attributes
     line_number = f'__line__{keyword}'
     line_loc = dct[line_number]
     assert 'dim' in value.keys(), (f"Line {line_loc}: No dim as child of dimension has "
@@ -373,7 +412,7 @@ def xml_handle_dimensions(dct, obj, keyword, value: dict):
     dims = ET.SubElement(obj, 'dimensions')
     # Consider all the childs under dimension is dim element and
     # its attributes
-#    val_attrs = list(value.keys())
+
     rm_key_list = []
     rank = ''
     for key, val in value.items():
@@ -418,7 +457,11 @@ def xml_handle_dim_from_dimension_dict(dct, dims_obj, keyword, value, rank, verb
         function. please also read note in xml_handle_dimensions.
     """
 
-    possible_dim_attrs = ['ref', 'optional', 'recommended', 'required', 'incr', 'refindex']
+    possible_dim_attrs = ['ref', 'incr', 'refindex', 'required']
+
+    # Some attributes might have equivalent name e.g. 'required' is correct one and
+    # 'optional' could be another name. Then change attribute to the correct one.
+    wrong_to_correct_attr = [('optional', 'required')]
     header_line_number = f"__line__{keyword}"
     dim_list = []
     rm_key_list = []
@@ -431,7 +474,6 @@ def xml_handle_dim_from_dimension_dict(dct, dims_obj, keyword, value, rank, verb
     for attr, vvalue in value.items():
         if '__line__' in attr:
             continue
-
         line_number = f"__line__{attr}"
         line_loc = value[line_number]
         # dim comes in precedence
@@ -466,6 +508,11 @@ def xml_handle_dim_from_dimension_dict(dct, dims_obj, keyword, value, rank, verb
                     continue
                 cmnt_number = f'__line__{kkkey}'
                 cmnt_loc = vvalue[cmnt_number]
+                # Check whether any optional attributes added
+                for tuple_wng_crt in wrong_to_correct_attr:
+                    if kkkey == tuple_wng_crt[0]:
+                        raise ValueError(f"{cmnt_loc}: Attribute '{kkkey}' is prohibited, use "
+                                         f"'{tuple_wng_crt[1]}")
                 if kkkey == 'doc' and dim_list:
                     # doc comes as list of doc
                     for i, dim in enumerate(dim_list):
@@ -782,7 +829,6 @@ def xml_handle_fields(obj, keyword, value, line_annot, line_loc, verbose=False):
     then the not empty keyword_name is a field!
     This simple function will define a new node of xml tree
     """
-
     # List of possible attributes of xml elements
     allowed_attr = ['name', 'type', 'nameType', 'unit', 'minOccurs', 'long_name',
                     'axis', 'signal', 'deprecated', 'axes', 'exists',
@@ -1106,6 +1152,10 @@ def nyaml2nxdl(input_file: str, out_file, verbose: bool):
         (lin_annot, line_loc) = post_comment.get_line_info()
         xml_handle_comment(xml_root, lin_annot, line_loc)
 
+    # Note: Just to keep the functionality if we need this functionality later.
+    default_attr = False
+    if default_attr:
+        check_for_default_attribute_and_value(xml_root)
     pretty_print_xml(xml_root, out_file, def_cmnt_text)
     if verbose:
         sys.stdout.write('Parsed YAML to NXDL successfully\n')
diff --git a/pynxtools/nyaml2nxdl/nyaml2nxdl_helper.py b/pynxtools/nyaml2nxdl/nyaml2nxdl_helper.py
@@ -28,7 +28,6 @@
 # So the corresponding value is to skip them and
 # and also carefull about this order
 import hashlib
-import os
 from yaml.composer import Composer
 from yaml.constructor import Constructor
 
@@ -111,7 +110,6 @@ def cleaning_empty_lines(line_list):
     """
         Cleaning up empty lines on top and bottom.
     """
-
     if not isinstance(line_list, list):
         li
10000
ne_list = line_list.split('\n') if '\n' in line_list else ['']
 
@@ -120,11 +118,18 @@ def cleaning_empty_lines(line_list):
         if line_list[0].strip():
             break
         line_list = line_list[1:]
+        if len(line_list) == 0:
+            line_list.append('')
+            return line_list
+
     # Clining bottom empty lines
     while True:
         if line_list[-1].strip():
             break
         line_list = line_list[0:-1]
+        if len(line_list) == 0:
+            line_list.append('')
+            return line_list
 
     return line_list
 
@@ -215,7 +220,5 @@ def separate_hash_yaml_and_nxdl(yaml_file, sep_yaml, sep_xml):
             # If the yaml fiile does not contain any hash for nxdl then we may have last line.
             if last_line:
                 yml_f_ob.write(last_line)
-    if not sha_hash:
-        os.remove(sep_xml)
 
     return sha_hash
diff --git a/tests/data/nyaml2nxdl/NXattributes.yaml b/tests/data/nyaml2nxdl/NXattributes.yaml
@@ -9,7 +9,7 @@ NXellipsometry_base_draft(my_test_extends):
       doc: attribute documentation
     doc: documentation no. 2
     experiment_identifier:
-      exists: required
+      exists: ['min', 3, 'max', 100]
       doc: documentation no. 3
     experiment_description:
       exists: required
@@ -19,10 +19,23 @@ NXellipsometry_base_draft(my_test_extends):
     program_name:
       doc: documentation no. 4
     program_version:
+      exists: ['min', 5]
       doc: documentation no. 5
     time_zone(NX_DATE_TIME):
       exists: required
       doc: documentation no. 6
     definition_local:
+      exists: ['max', 5]
       doc: documentation no. 7
       \@version:
+    calibration_data(NX_NUMBER):
+      unit: NX_UNITLESS
+      doc: |
+        Calibration is performed on a reference surface (usually silicon wafer with well
+        defined oxide layer) at a number of angles, then in a straight through mode
+        (transmission in air).
+      dimensions:
+        rank: 3
+        dim: [[3, N_calibration_angles+1], [2, N_variables], [1, N_calibration_wavelength]]
+        dim_parameters:
+          required: ['true', 'true', 'true']