Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes the types and removes bytes from NX_char as that creates failures #577

Open
wants to merge 13 commits into
base: nx_char_type
Choose a base branch
from
108 changes: 28 additions & 80 deletions src/pynxtools/dataconverter/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from datetime import datetime, timezone
from enum import Enum
from functools import lru_cache
from typing import Any, Callable, List, Optional, Tuple, Union
from typing import Any, Callable, List, Optional, Tuple, Union, Sequence

import h5py
import lxml.etree as ET
Expand Down Expand Up @@ -215,7 +215,6 @@ def get_nxdl_name_for(xml_elem: ET._Element) -> Optional[str]:
The name of the element.
None if the xml element has no name or type attribute.
"""
""""""
if "name" in xml_elem.attrib:
return xml_elem.attrib["name"]
if "type" in xml_elem.attrib:
Expand Down Expand Up @@ -575,110 +574,59 @@ def is_value_valid_element_of_enum(value, elist) -> Tuple[bool, list]:
return True, []


NUMPY_FLOAT_TYPES = (np.half, np.float16, np.single, np.double, np.longdouble)
NUMPY_INT_TYPES = (np.short, np.intc, np.int_)
NUMPY_UINT_TYPES = (np.ushort, np.uintc, np.uint)
# np int for np version 1.26.0
np_int = (np.integer,)
np_float = (np.floating,)
# Not to be confused with `np.byte` and `np.ubyte`, these store
# an integer of `8bit` and `unsigned 8bit` respectively.
np_bytes = (np.bytes_,)
np_char = (
np.str_,
np.bytes_,
np.chararray,
) # Only numpy Unicode string and Byte string
np_bool = (np.bool_,)
np_complex = (np.complex64, np.complex128, np.cdouble, np.csingle, np.complex_)
nx_char = (str, np.character)
nx_int = (int, np.integer)
nx_float = (float, np.floating)
nx_number = nx_int + nx_float

NEXUS_TO_PYTHON_DATA_TYPES = {
"ISO8601": (str,),
"NX_BINARY": (
bytes,
*np_bytes,
),
"NX_BOOLEAN": (bool, *np_bool),
"NX_CHAR": (str, *np_char),
"NX_BINARY": (bytes, bytearray, np.bytes_),
"NX_BOOLEAN": (bool, np.bool_),
"NX_CHAR": nx_char,
"NX_DATE_TIME": (str,),
"NX_FLOAT": (float, *np_float),
"NX_INT": (int, *np_int),
"NX_UINT": (
np.unsignedinteger,
np.uint,
),
"NX_NUMBER": (
int,
float,
*np_int,
*np_float,
"NX_FLOAT": nx_float,
"NX_INT": nx_int,
"NX_UINT": (np.unsignedinteger,),
"NX_NUMBER": nx_number,
"NX_POSINT": nx_int, # > 0 is checked in is_valid_data_field()
"NX_COMPLEX": (
complex,
np.complexfloating,
),
"NX_POSINT": (
int,
np.signedinteger,
), # > 0 is checked in is_valid_data_field()
"NX_COMPLEX": (complex, *np_complex),
"NX_CHAR_OR_NUMBER": nx_char + nx_number,
"NXDL_TYPE_UNAVAILABLE": (
str,
*np_char,
nx_char,
), # Defaults to a string if a type is not provided.
"NX_CHAR_OR_NUMBER": (
str,
int,
float,
*np_char,
*np_int,
*np_float,
),
}


def check_all_children_for_callable(
objects: Union[list, np.ndarray],
checker: Optional[Callable] = None,
accepted_types: Optional[tuple] = None,
objects: Union[list, np.ndarray], check_function: Optional[Callable] = None, *args
) -> bool:
"""Checks whether all objects in list or numpy array are validated
by given callable and types.
"""
if not isinstance(objects, np.ndarray):
objects = np.array(objects)

if checker is not None:
for obj in objects:
args = (obj, accepted_types) if accepted_types is not None else (obj,)
if not checker(*args):
return False
return True
if isinstance(objects, tuple):
return False
if isinstance(objects, list):
# Handles list and list of list
tmp_arr = np.array(objects)
elif isinstance(objects, np.ndarray):
tmp_arr = objects
if tmp_arr is not None:
return any([np.issubdtype(tmp_arr.dtype, type_) for type_ in accepted_types])

return False
return all([check_function(o, *args) for o in objects.flat])


def is_valid_data_type(value, accepted_types):
"""Checks whether the given value or its children are of an accepted type."""

if not isinstance(value, (list, np.ndarray)):
return isinstance(value, accepted_types)

return check_all_children_for_callable(objects=value, accepted_types=accepted_types)
return check_all_children_for_callable(value, isinstance, accepted_types)


def is_positive_int(value):
"""Checks whether the given value or its children are positive."""

def is_greater_than(num):
return num.flat[0] > 0 if isinstance(num, np.ndarray) else num > 0

if isinstance(value, list):
return check_all_children_for_callable(objects=value, checker=is_greater_than)
return num > 0

return value.flat[0] > 0 if isinstance(value, np.ndarray) else value > 0
return check_all_children_for_callable(
objects=value, check_function=is_greater_than
)


def convert_str_to_bool_safe(value: str) -> Optional[bool]:
Expand Down
18 changes: 4 additions & 14 deletions tests/dataconverter/test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,7 @@ def fixture_filled_test_data(template, tmp_path):
"0",
),
(
"The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/number_value should be one of the following Python types: (<class 'int'>, <class 'float'>, <class 'numpy.integer'>, <class 'numpy.floating'>), as defined in the NXDL as NX_NUMBER."
"The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/number_value should be one of the following Python types: (<class 'int'>, <class 'numpy.integer'>, <class 'float'>, <class 'numpy.floating'>), as defined in the NXDL as NX_NUMBER."
),
id="str-instead-of-number",
),
Expand All @@ -413,7 +413,7 @@ def fixture_filled_test_data(template, tmp_path):
),
(
"The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value should be one"
" of the following Python types: (<class 'str'>, <class 'numpy.str_'>, <class 'numpy.bytes_'>, <class 'numpy.chararray'>), as"
" of the following Python types: (<class 'str'>, <class 'numpy.character'>), as"
" defined in the NXDL as NX_CHAR."
),
id="wrong-type-ndarray-instead-of-char",
Expand Down Expand Up @@ -494,7 +494,7 @@ def fixture_filled_test_data(template, tmp_path):
),
(
"The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value should be one of the following Python types:"
" (<class 'str'>, <class 'numpy.str_'>, <class 'numpy.bytes_'>, <class 'numpy.chararray'>),"
" (<class 'str'>, <class 'numpy.character'>),"
" as defined in the NXDL as NX_CHAR."
),
id="int-instead-of-chars",
Expand All @@ -517,17 +517,6 @@ def fixture_filled_test_data(template, tmp_path):
(""),
id="array-of-bytes-chars",
),
# pytest.param(
# alter_dict(
# TEMPLATE,
# "/ENTRY[my_entry]/NXODD_name[nxodd_name]/char_value",
# np.char.chararray(["1", "2", "3"]),
# ),
# (""),
# id="numpy-chararray",
# ),
# TODO add test array of char
# TODO add test for numpy array of char and chararray
pytest.param(
alter_dict(
TEMPLATE,
Expand Down Expand Up @@ -802,6 +791,7 @@ def format_error_message(msg: str) -> str:
"array-of-chars",
"array-of-bytes-chars",
"array-of-float-instead-of-float",
"numpy-chararray",
):
with caplog.at_level(logging.WARNING):
assert validate_dict_against("NXtest", data_dict)[0]
Expand Down