Skip to content

Commit

Permalink
Release/2.3.0 (#86)
Browse files Browse the repository at this point in the history
* Added history.

* Added support for subarrays in structured types.

* Added support for wildcards in structure expressions.

Co-authored-by: Ramon <p8u7wAPC5Pg9HYkkCkzA>
  • Loading branch information
ramonhagenaars committed Aug 28, 2022
1 parent 260da26 commit 389c5a1
Show file tree
Hide file tree
Showing 7 changed files with 159 additions and 16 deletions.
5 changes: 5 additions & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# History

## 2.3.0 (2022-08-28)

- Added support for subarrays with shape expressions inside structure expressions.
- Added support for wildcards in structure expressions.

## 2.2.0 (2022-06-26)

- Added support for expressing "at least N dimensions".
Expand Down
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,13 @@ True

```

Subarrays can be expressed with a shape expression between square brackets:
```python
>>> Structure["name: Int[3, 3]"]
Structure['name: Int[3, 3]']

```

### Record arrays
The recarray is a specialization of a structured array. You can use `RecArray`
to express them.
Expand Down
28 changes: 21 additions & 7 deletions USERDOCS.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
* [DTypes](#DTypes)
* [Structure expressions](#Structure-expressions)
* [Syntax](#Syntax-structure-expressions)
* [Subarrays](#Subarrays)
* [RecArray](#RecArray)
* [Examples](#Examples)
* [Similar projects](#Similar-projects)
Expand Down Expand Up @@ -212,8 +213,6 @@ False

```



#### Dimension breakdowns
A dimension can be broken down into more detail. We call this a **dimension breakdown**. This can be useful to clearly
describe what a dimension means. Example:
Expand Down Expand Up @@ -328,7 +327,6 @@ NDArray[Any, Floating]

```


### Structure expressions
You can denote the structure of a structured array using what we call a **structure expression**. This expression
- again a string - can be put into `Structure` and can then be used in an `NDArray`.
Expand Down Expand Up @@ -391,10 +389,12 @@ colons), but this is not included in the schema below.
```
structure-expression = <fields>
fields = <field>|<field>","<fields>
field = <field_name>":"<field_type>|"["<combined_field_names>"]:"<field_type>
combined_field_names = <field_name>","<field_name>|<field_name>","<combined_field_names>
field_type = <word>
field_name = <word>
field = <field-name>":"<field-type>|"["<combined-field-names>"]:"<field-type>
combined-field-names = <field-name>","<field-name>|<field-name>","<combined-field-names>
field-type = <word>|<word><field-subarray-shape>|<wildcard>
wildcard = "*"
field-subarray-shape = "["<shape-expression>"]"
field-name = <word>
word = <letter>|<word><underscore>|<word><number>
letter = <lletter>|<uletter>
uletter = "A"|"B"|"C"|"D"|"E"|"F"|"G"|"H"|"I"|"J"|"K"|"L"|"M"|"N"|"O"|"P"|"Q"|"R"|"S"|"T"|"U"|"V"|"W"|"X"|"Y"|"Z"
Expand All @@ -404,6 +404,20 @@ digit = "0"|"1"|"2"|"3"|"4"|"5"|"6"|"7"|"8"|"9"
underscore = "_"
```

#### Subarrays
You can express the shape of a subarray using brackets after a type. You can use the full power of shape expressions.

```python
>>> from typing import Any
>>> import numpy as np
>>> from nptyping import NDArray, Structure

>>> arr = np.array([("x")], np.dtype([("x", "U10", (2, 2))]))
>>> isinstance(arr, NDArray[Any, Structure["x: Str[2, 2]"]])
True

```

### RecArray
The `RecArray` corresponds to [numpy.recarray](https://numpy.org/doc/stable/reference/generated/numpy.recarray.html).
It is an extension of `NDArray` and behaves similarly. A key difference is that with `RecArray`, the `Structure` OR
Expand Down
2 changes: 1 addition & 1 deletion nptyping/package_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
SOFTWARE.
"""
__title__ = "nptyping"
__version__ = "2.2.0"
__version__ = "2.3.0"
__author__ = "Ramon Hagenaars"
__author_email__ = "[email protected]"
__description__ = "Type hints for NumPy."
Expand Down
75 changes: 67 additions & 8 deletions nptyping/structure_expression.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,19 @@
Dict,
List,
Mapping,
Type,
Union,
)

import numpy as np

from nptyping.error import InvalidStructureError
from nptyping.error import InvalidShapeError, InvalidStructureError
from nptyping.shape import Shape
from nptyping.shape_expression import (
check_shape,
normalize_shape_expression,
validate_shape_expression,
)
from nptyping.typing_ import StructureExpression

if TYPE_CHECKING:
Expand All @@ -59,6 +66,7 @@ def validate_structure_expression(
_validate_structure_expression_contains_no_multiple_field_names(
structure_expression
)
_validate_sub_array_expressions(structure_expression)


def check_structure(
Expand All @@ -77,17 +85,38 @@ def check_structure(
:return: True if the given dtype is valid with the given target.
"""
fields: Mapping[str, Any] = structured_dtype.fields or {} # type: ignore[assignment]

# Add the wildcard to the lexicon. We want to do this here to keep
# knowledge on wildcards in one place (this module).
type_per_name_with_wildcard = {**type_per_name, "*": object} # type: ignore[arg-type]

for name, dtype_tuple in fields.items():
dtype = dtype_tuple[0]
target_type_name = target.get_type(name)
check_type_name(target_type_name, type_per_name)
target_type = type_per_name[target_type_name]
if not issubclass(dtype.type, target_type):
target_type_shape_match = re.search(_REGEX_FIELD_SHAPE, target_type_name)
actual_type = dtype.type
if target_type_shape_match:
if not dtype.subdtype:
# the dtype does not contain a shape.
return False
actual_type = dtype.subdtype[0].type
target_type_shape = target_type_shape_match.group(1)
shape_corresponds = check_shape(dtype.shape, Shape[target_type_shape])
if not shape_corresponds:
return False
target_type_name = target_type_name.replace(
target_type_shape_match.group(0), ""
)
check_type_name(target_type_name, type_per_name_with_wildcard)
target_type = type_per_name_with_wildcard[target_type_name]
if not issubclass(actual_type, target_type):
return False
return True


def check_type_names(structure: "Structure", type_per_name: Dict[str, type]) -> None:
def check_type_names(
structure: "Structure", type_per_name: Dict[str, Type[object]]
) -> None:
"""
Check the given structure for any invalid type names in the given context
of type_per_name. Raises an InvalidStructureError if a type name is
Expand All @@ -100,14 +129,16 @@ def check_type_names(structure: "Structure", type_per_name: Dict[str, type]) ->
check_type_name(type_, type_per_name)


def check_type_name(type_name: str, type_per_name: Dict[str, type]) -> None:
def check_type_name(type_name: str, type_per_name: Dict[str, Type[object]]) -> None:
"""
Check if the given type_name is in type_per_name and raise a meaningful
error if not.
:param type_name: the key that is checked to be in type_per_name.
:param type_per_name: a dict that is looked in for type_name.
:return: None.
"""
# Remove any subarray stuff here.
type_name = type_name.split("[")[0]
if type_name not in type_per_name:
close_matches = get_close_matches(
type_name, type_per_name.keys(), 3, cutoff=0.4
Expand Down Expand Up @@ -135,7 +166,7 @@ def normalize_structure_expression(
structure_expression = re.sub(r"\s*", "", structure_expression)
type_to_names_dict = _create_type_to_names_dict(structure_expression)
normalized_structure_expression = _type_to_names_dict_to_str(type_to_names_dict)
return normalized_structure_expression.replace(",", ", ")
return normalized_structure_expression.replace(",", ", ").replace(" ", " ")


def create_name_to_type_dict(
Expand Down Expand Up @@ -188,6 +219,23 @@ def _validate_structure_expression_contains_no_multiple_field_names(
)


def _validate_sub_array_expressions(structure_expression: str) -> None:
# Validate that the given structure expression does not contain any shape
# expressions for sub arrays that are invalid.
for field_match in re.findall(_REGEX_FIELD, structure_expression):
field_type = field_match[0].split(_FIELD_TYPE_POINTER)[1]
type_shape_match = re.search(_REGEX_FIELD_SHAPE, field_type)
if type_shape_match:
type_shape = type_shape_match[1]
try:
validate_shape_expression(type_shape)
except InvalidShapeError as err:
raise InvalidStructureError(
f"'{structure_expression}' is not a valid structure"
f" expression; {str(err)}"
) from err


def _create_type_to_names_dict(
structure_expression: StructureExpression,
) -> Dict[str, List[str]]:
Expand All @@ -199,10 +247,17 @@ def _create_type_to_names_dict(
field_name_combination_match = re.match(
_REGEX_FIELD_NAMES_COMBINATION, field_name_combination
)
field_type_shape_match = re.search(_REGEX_FIELD_SHAPE, field_type)
if field_name_combination_match:
field_names = field_name_combination_match.group(2).split(_SEPARATOR)
else:
field_names = [field_name_combination]
if field_type_shape_match:
type_shape = field_type_shape_match.group(1)
normalized_type_shape = normalize_shape_expression(type_shape)
field_type = field_type.replace(
field_type_shape_match.group(0), f"[{normalized_type_shape}]"
)
names_per_type[field_type] += field_names
return {
field_type: sorted(names_per_type[field_type])
Expand Down Expand Up @@ -230,7 +285,11 @@ def _type_to_names_dict_to_str(type_to_names_dict: Dict[str, List[str]]) -> str:
_REGEX_FIELD_LEFT = rf"({_REGEX_FIELD_NAME}|{_REGEX_FIELD_NAMES_COMBINATION})"
_REGEX_FIELD_TYPE = r"(\s*[a-zA-Z]\w*\s*)"
_REGEX_FIELD_TYPE_WILDCARD = r"(\s*\*\s*)"
_REGEX_FIELD_RIGHT = rf"({_REGEX_FIELD_TYPE}|{_REGEX_FIELD_TYPE_WILDCARD})"
_REGEX_FIELD_SHAPE = r"\[([^\]]+)\]"
_REGEX_FIELD_SHAPE_MAYBE = rf"\s*({_REGEX_FIELD_SHAPE})?\s*"
_REGEX_FIELD_RIGHT = (
rf"({_REGEX_FIELD_TYPE}|{_REGEX_FIELD_TYPE_WILDCARD}){_REGEX_FIELD_SHAPE_MAYBE}"
)
_REGEX_FIELD_TYPE_POINTER = rf"(\s*{_FIELD_TYPE_POINTER}\s*)"
_REGEX_FIELD = (
rf"(\s*{_REGEX_FIELD_LEFT}{_REGEX_FIELD_TYPE_POINTER}{_REGEX_FIELD_RIGHT}\s*)"
Expand Down
4 changes: 4 additions & 0 deletions tests/test_ndarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,10 @@ def test_isinstance_fails_if_structure_doesnt_match(self):
NDArray[Any, Structure["[name, age]: Str"]],
)

def test_isinstance_succeeds_if_structure_subarray_matches(self):
arr = np.array([("x")], np.dtype([("x", "U10", (2, 2))]))
self.assertIsInstance(arr, NDArray[Any, Structure["x: Str[2, 2]"]])

def test_isinstance_fails_if_structure_contains_invalid_types(self):
with self.assertRaises(InvalidStructureError) as err:
NDArray[Any, Structure["name: Str, age: Float, address: Address"]]
Expand Down
54 changes: 54 additions & 0 deletions tests/test_structure_expression.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,44 @@ def test_check_structure_true(self):
structure2 = Structure["[a, b, c]: Integer"]
self.assertTrue(check_structure(dtype2, structure2, dtype_per_name))

dtype3 = np.dtype([("name", "U10")])
structure3 = Structure["name: *"]
self.assertTrue(check_structure(dtype3, structure3, dtype_per_name))

def test_check_structure_with_sub_array(self):
dtype = np.dtype([("x", "U10", (2, 2))])
structure = Structure["x: Str[2, 2]"]
self.assertTrue(check_structure(dtype, structure, dtype_per_name))

dtype2 = np.dtype([("x", "U10", (2, 2))])
structure2 = Structure["x: Int[2, 2]"]
self.assertFalse(
check_structure(dtype2, structure2, dtype_per_name),
"It should fail because of the type.",
)

dtype3 = np.dtype([("x", "U10", (3, 3))])
structure3 = Structure["x: Str[2, 2]"]
self.assertFalse(
check_structure(dtype3, structure3, dtype_per_name),
"It should fail because of the shape.",
)

dtype4 = np.dtype([("x", "U10", (2, 2)), ("y", "U10", (2, 2))])
structure4 = Structure["x: Str[2, 2], y: Str[2, 2]"]
self.assertTrue(check_structure(dtype4, structure4, dtype_per_name))

dtype5 = np.dtype([("x", "U10", (2, 2)), ("y", "U10", (2, 2))])
structure5 = Structure["[x, y]: Str[2, 2]"]
self.assertTrue(check_structure(dtype5, structure5, dtype_per_name))

dtype6 = np.dtype([("x", "U10")])
structure6 = Structure["x: Str[2, 2]"]
self.assertFalse(
check_structure(dtype6, structure6, dtype_per_name),
"It should fail because it doesn't contain a sub array at all.",
)

def test_check_structure_false(self):
dtype = np.dtype([("name", "U10"), ("age", "i4")])
structure = Structure["name: Str, age: UInt"]
Expand Down Expand Up @@ -66,8 +104,12 @@ def test_validate_structure_expression_success(self):
validate_structure_expression("a_123: t")
validate_structure_expression("abc: type")
validate_structure_expression("abc: *")
validate_structure_expression("abc: type[2, 2]")
validate_structure_expression("abc: type [*, ...]")
validate_structure_expression("abc: type, def: type")
validate_structure_expression("abc: type[*, 2, ...], def: type[2 ]")
validate_structure_expression("[abc, def]: type")
validate_structure_expression("[abc, def]: type[*, ...]")
validate_structure_expression("[abc, def]: type1, ghi: type2")
validate_structure_expression("[abc, def]: type1, [ghi, jkl]: type2")
validate_structure_expression(
Expand Down Expand Up @@ -104,6 +146,15 @@ def test_validate_structure_expression_fail(self):
validate_structure_expression("[a,b,]: type")
with self.assertRaises(InvalidStructureError):
validate_structure_expression("[,a,b]: type")
with self.assertRaises(InvalidStructureError):
validate_structure_expression("abc: type []")
with self.assertRaises(InvalidStructureError):
validate_structure_expression("a: t[]")
with self.assertRaises(InvalidStructureError) as err:
validate_structure_expression(
"a: t[*, 2, ...], b: t[not-a-valid-shape-expression]"
)
self.assertIn("not-a-valid-shape-expression", str(err.exception))
with self.assertRaises(InvalidStructureError) as err:
validate_structure_expression("a: t1, b: t2, c: t3, a: t4")
self.assertEqual(
Expand Down Expand Up @@ -138,6 +189,9 @@ def test_normalize_structure_expression(self):
"[a, b, d, e]: t1, c: t2",
normalize_structure_expression("[b, a]: t1, c: t2, [d, e]: t1"),
)
self.assertEqual(
"a: t[*, ...]", normalize_structure_expression(" a : t [ * , ... ]")
)

def test_create_name_to_type_dict(self):
output = create_name_to_type_dict("a: t1, b: t2, c: t1")
Expand Down

0 comments on commit 389c5a1

Please sign in to comment.