From 1c21c20704b5b18e06dedd84a84191da15f3424a Mon Sep 17 00:00:00 2001 From: Riley Clement Date: Wed, 13 Oct 2021 00:26:32 +1100 Subject: [PATCH 1/4] functionality implemented for intervalarray.issubset and intervalarray.issuperset --- piso/__init__.py | 2 + piso/accessor.py | 14 +++++ piso/intervalarray.py | 45 ++++++++++++++++ tests/test_multiple_interval_array.py | 71 ++++++++++++++++++++++++ tests/test_single_interval_array.py | 77 +++++++++++++++++++++++++++ 5 files changed, 209 insertions(+) diff --git a/piso/__init__.py b/piso/__init__.py index f996cf9..c589f89 100644 --- a/piso/__init__.py +++ b/piso/__init__.py @@ -2,6 +2,8 @@ difference, intersection, isdisjoint, + issubset, + issuperset, symmetric_difference, union, ) diff --git a/piso/accessor.py b/piso/accessor.py index b22b3e4..0e0b8b2 100644 --- a/piso/accessor.py +++ b/piso/accessor.py @@ -125,6 +125,20 @@ def isdisjoint(self, *interval_arrays): *interval_arrays, ) + def issuperset(self, *interval_arrays, squeeze=False): + return intervalarray.issuperset( + self._interval_array, + *interval_arrays, + squeeze=squeeze, + ) + + def issubset(self, *interval_arrays, squeeze=False): + return intervalarray.issubset( + self._interval_array, + *interval_arrays, + squeeze=squeeze, + ) + def _register_accessors(): _register_accessor("piso", pd.IntervalIndex)(ArrayAccessor) diff --git a/piso/intervalarray.py b/piso/intervalarray.py index f98a2c3..64dd83e 100644 --- a/piso/intervalarray.py +++ b/piso/intervalarray.py @@ -119,3 +119,48 @@ def isdisjoint(interval_array, *interval_arrays): arr = arr[arr[:, 0].argsort()] result = np.all(arr[0, 1:] >= arr[1, :-1]) return result + + +def _create_is_super_or_sub(which): + + comparator_func = {"superset": sc.Stairs.ge, "subset": sc.Stairs.le}[which] + left_bound_comparator = {"superset": np.less_equal, "subset": np.greater_equal}[ + which + ] + right_bound_comparator = {"superset": np.greater_equal, "subset": np.less_equal}[ + which + ] + + def func(interval_array, *interval_arrays, squeeze=False): + _validate_array_of_intervals_arrays(interval_array, *interval_arrays) + + if interval_arrays: + stepfunction = _interval_x_to_stairs(interval_array).make_boolean() + + def _comp(ia): + return bool( + comparator_func( + stepfunction, + _interval_x_to_stairs(ia).make_boolean(), + ) + ) + + result = np.array([_comp(ia) for ia in interval_arrays]) + else: + assert len(interval_array) >= 2 + result = np.logical_and( + left_bound_comparator(interval_array[0].left, interval_array[1:].left), + right_bound_comparator( + interval_array[0].right, interval_array[1:].right + ), + ) + + if squeeze and len(result) == 1: + result = result[0] + return result + + return func + + +issuperset = _create_is_super_or_sub("superset") +issubset = _create_is_super_or_sub("subset") diff --git a/tests/test_multiple_interval_array.py b/tests/test_multiple_interval_array.py index 1d271a4..93c7096 100644 --- a/tests/test_multiple_interval_array.py +++ b/tests/test_multiple_interval_array.py @@ -1,3 +1,6 @@ +import operator + +import numpy as np import pandas as pd import pytest @@ -15,6 +18,8 @@ def get_accessor_method(self, function): piso_intervalarray.difference: self.piso.difference, piso_intervalarray.symmetric_difference: self.piso.symmetric_difference, piso_intervalarray.isdisjoint: self.piso.isdisjoint, + piso_intervalarray.issuperset: self.piso.issuperset, + piso_intervalarray.issubset: self.piso.issubset, }[function] @@ -25,6 +30,8 @@ def get_package_method(function): piso_intervalarray.symmetric_difference: piso.symmetric_difference, piso_intervalarray.difference: piso.difference, piso_intervalarray.isdisjoint: piso.isdisjoint, + piso_intervalarray.issuperset: piso.issuperset, + piso_intervalarray.issubset: piso.issubset, }[function] @@ -494,3 +501,67 @@ def test_isdisjoint(interval_index, tuples, expected, closed, date_type, how): ia3, interval_array, how=how, function=piso_intervalarray.isdisjoint ) assert result == expected + + +@pytest.mark.parametrize( + "interval_index", + [True, False], +) +@pytest.mark.parametrize( + "ia_makers, expected", + [ + ([make_ia1, make_ia2], True), + ([make_ia1, make_ia3], False), + ([make_ia1, make_ia2, make_ia3], np.array([True, False])), + ], +) +@pytest.mark.parametrize( + "closed", + ["left", "right"], +) +@pytest.mark.parametrize( + "how", + ["supplied", "accessor", "package"], +) +def test_issuperset(interval_index, ia_makers, expected, closed, how): + ias = [make_ia(interval_index, closed) for make_ia in ia_makers] + result = perform_op( + *ias, + how=how, + function=piso_intervalarray.issuperset, + squeeze=True, + ) + equal_op = np.array_equal if isinstance(expected, np.ndarray) else operator.eq + assert equal_op(result, expected) + + +@pytest.mark.parametrize( + "interval_index", + [True, False], +) +@pytest.mark.parametrize( + "ia_makers, expected", + [ + ([make_ia2, make_ia1], True), + ([make_ia3, make_ia1], False), + ([make_ia2, make_ia1, make_ia3], np.array([True, False])), + ], +) +@pytest.mark.parametrize( + "closed", + ["left", "right"], +) +@pytest.mark.parametrize( + "how", + ["supplied", "accessor", "package"], +) +def test_issubset(interval_index, ia_makers, expected, closed, how): + ias = [make_ia(interval_index, closed) for make_ia in ia_makers] + result = perform_op( + *ias, + how=how, + function=piso_intervalarray.issubset, + squeeze=True, + ) + equal_op = np.array_equal if isinstance(expected, np.ndarray) else operator.eq + assert equal_op(result, expected) diff --git a/tests/test_single_interval_array.py b/tests/test_single_interval_array.py index 5b7d5d4..ef47246 100644 --- a/tests/test_single_interval_array.py +++ b/tests/test_single_interval_array.py @@ -1,3 +1,6 @@ +import operator + +import numpy as np import pandas as pd import pytest @@ -14,6 +17,8 @@ def get_accessor_method(self, function): piso_intervalarray.intersection: self.piso.intersection, piso_intervalarray.symmetric_difference: self.piso.symmetric_difference, piso_intervalarray.isdisjoint: self.piso.isdisjoint, + piso_intervalarray.issuperset: self.piso.issuperset, + piso_intervalarray.issubset: self.piso.issubset, }[function] @@ -23,6 +28,8 @@ def get_package_method(function): piso_intervalarray.intersection: piso.intersection, piso_intervalarray.symmetric_difference: piso.symmetric_difference, piso_intervalarray.isdisjoint: piso_intervalarray.isdisjoint, + piso_intervalarray.issuperset: piso.issuperset, + piso_intervalarray.issubset: piso.issubset, }[function] @@ -480,3 +487,73 @@ def test_isdisjoint(interval_index, tuples, expected, closed, date_type, how): interval_array = map_to_dates(interval_array, date_type) result = perform_op(interval_array, how=how, function=piso_intervalarray.isdisjoint) assert result == expected + + +@pytest.mark.parametrize( + "interval_index", + [True, False], +) +@pytest.mark.parametrize( + "tuples, squeeze, expected", + [ + ([(1, 2), (1, 2)], True, True), + ([(1, 3), (0, 2)], True, False), + ([(1, 3), (1, 2), (0, 1)], True, np.array([True, False])), + ([(1, 2), (1, 2)], False, np.array([True])), + ([(1, 3), (0, 2)], False, np.array([False])), + ([(1, 3), (1, 2), (0, 1)], False, np.array([True, False])), + ], +) +@pytest.mark.parametrize( + "closed", + ["left", "right"], +) +@pytest.mark.parametrize( + "how", + ["supplied", "accessor", "package"], +) +def test_issuperset(interval_index, tuples, squeeze, expected, closed, how): + interval_array = make_ia_from_tuples(interval_index, tuples, closed) + result = perform_op( + interval_array, + how=how, + function=piso_intervalarray.issuperset, + squeeze=squeeze, + ) + equal_op = np.array_equal if isinstance(expected, np.ndarray) else operator.eq + assert equal_op(result, expected) + + +@pytest.mark.parametrize( + "interval_index", + [True, False], +) +@pytest.mark.parametrize( + "tuples, squeeze, expected", + [ + ([(1, 2), (1, 2)], True, True), + ([(1, 3), (0, 2)], True, False), + ([(1, 3), (1, 4), (0, 1)], True, np.array([True, False])), + ([(1, 2), (1, 2)], False, np.array([True])), + ([(1, 3), (0, 2)], False, np.array([False])), + ([(1, 3), (1, 4), (0, 1)], False, np.array([True, False])), + ], +) +@pytest.mark.parametrize( + "closed", + ["left", "right"], +) +@pytest.mark.parametrize( + "how", + ["supplied", "accessor", "package"], +) +def test_issubset(interval_index, tuples, squeeze, expected, closed, how): + interval_array = make_ia_from_tuples(interval_index, tuples, closed) + result = perform_op( + interval_array, + how=how, + function=piso_intervalarray.issubset, + squeeze=squeeze, + ) + equal_op = np.array_equal if isinstance(expected, np.ndarray) else operator.eq + assert equal_op(result, expected) From 98f15e8bb24ad09f4cb26e4737ac3ae3764e3482 Mon Sep 17 00:00:00 2001 From: Riley Clement Date: Thu, 14 Oct 2021 09:58:19 +1100 Subject: [PATCH 2/4] Added issuperset and issubset --- docs/reference/accessors.rst | 4 +- docs/reference/interval.rst | 4 +- docs/reference/package.rst | 4 +- docs/release_notes/index.rst | 19 +++- piso/accessor.py | 2 + piso/docstrings/accessor.py | 131 +++++++++++++++++++++-- piso/docstrings/interval.py | 104 +++++++++++++++++- piso/docstrings/intervalarray.py | 148 +++++++++++++++++++++++--- piso/interval.py | 29 +++++ piso/intervalarray.py | 46 +++----- tests/test_interval.py | 49 +++++++++ tests/test_multiple_interval_array.py | 30 +++--- tests/test_single_interval_array.py | 70 ------------ 13 files changed, 497 insertions(+), 143 deletions(-) diff --git a/docs/reference/accessors.rst b/docs/reference/accessors.rst index ce664b6..5927321 100644 --- a/docs/reference/accessors.rst +++ b/docs/reference/accessors.rst @@ -13,4 +13,6 @@ Accessors ArrayAccessor.intersection ArrayAccessor.difference ArrayAccessor.symmetric_difference - ArrayAccessor.isdisjoint \ No newline at end of file + ArrayAccessor.isdisjoint + ArrayAccessor.issuperset + ArrayAccessor.issubset \ No newline at end of file diff --git a/docs/reference/interval.rst b/docs/reference/interval.rst index bd67928..a8ec10c 100644 --- a/docs/reference/interval.rst +++ b/docs/reference/interval.rst @@ -12,4 +12,6 @@ Interval union intersection difference - symmetric_difference \ No newline at end of file + symmetric_difference + issuperset + issubset \ No newline at end of file diff --git a/docs/reference/package.rst b/docs/reference/package.rst index fcd18c0..b0f4af1 100644 --- a/docs/reference/package.rst +++ b/docs/reference/package.rst @@ -15,4 +15,6 @@ Top level functions intersection difference symmetric_difference - isdisjoint \ No newline at end of file + isdisjoint + issuperset + issubset \ No newline at end of file diff --git a/docs/release_notes/index.rst b/docs/release_notes/index.rst index 56509a9..bc89392 100644 --- a/docs/release_notes/index.rst +++ b/docs/release_notes/index.rst @@ -4,19 +4,32 @@ Release notes ======================== -- added :meth:`piso.isdisjoint` method, and corresponding accessor method +Added the following methods -ADD UNRELEASED CHANGED ABOVE THIS LINE +- :meth:`piso.isdisjoint` +- :meth:`piso.issuperset` +- :meth:`piso.issubset` +- :meth:`ArrayAccessor.isdisjoint() ` +- :meth:`ArrayAccessor.issuperset() ` +- :meth:`ArrayAccessor.issubset() ` +- :meth:`piso.interval.issuperset` +- :meth:`piso.interval.issubset` + +ADD UNRELEASED CHANGES ABOVE THIS LINE **v0.1.0 2021-10-10** -The following methods (and corresponding accessor methods) are included in the initial release of `piso` +The following methods are included in the initial release of `piso` - :meth:`piso.register_accessors` - :meth:`piso.union` - :meth:`piso.intersection` - :meth:`piso.difference` - :meth:`piso.symmetric_difference` +- :meth:`ArrayAccessor.union() ` +- :meth:`ArrayAccessor.intersection() ` +- :meth:`ArrayAccessor.difference() ` +- :meth:`ArrayAccessor.symmetric_difference() ` - :meth:`piso.interval.union` - :meth:`piso.interval.intersection` - :meth:`piso.interval.difference` diff --git a/piso/accessor.py b/piso/accessor.py index 0e0b8b2..354c174 100644 --- a/piso/accessor.py +++ b/piso/accessor.py @@ -125,6 +125,7 @@ def isdisjoint(self, *interval_arrays): *interval_arrays, ) + @Appender(docstrings.issuperset_docstring, join="\n", indents=1) def issuperset(self, *interval_arrays, squeeze=False): return intervalarray.issuperset( self._interval_array, @@ -132,6 +133,7 @@ def issuperset(self, *interval_arrays, squeeze=False): squeeze=squeeze, ) + @Appender(docstrings.issubset_docstring, join="\n", indents=1) def issubset(self, *interval_arrays, squeeze=False): return intervalarray.issubset( self._interval_array, diff --git a/piso/docstrings/accessor.py b/piso/docstrings/accessor.py index a44dcfb..dc6e5d8 100644 --- a/piso/docstrings/accessor.py +++ b/piso/docstrings/accessor.py @@ -288,6 +288,69 @@ False """ +issuperset_examples = """ +Examples +----------- + +>>> import pandas as pd +>>> import piso +>>> piso.register_accessors() + +>>> arr1 = pd.arrays.IntervalArray.from_tuples( +... [(0, 4), (3, 6), (7, 8), (10, 12)], +... ) +>>> arr2 = pd.arrays.IntervalArray.from_tuples( +... [(2, 5), (7, 8)], +... ) +>>> arr3 = pd.arrays.IntervalArray.from_tuples( +... [(3, 4), (10, 11)], +... ) + +>>> arr1.piso.issuperset(arr2) +True + +>>> arr1.piso.issuperset(arr2, squeeze=False) +array([ True]) + +>>> arr1.piso.issuperset(arr2, arr3) +array([ True, True]) + +>>> arr2.piso.issuperset(arr3) +False +""" + + +issubset_examples = """ +Examples +----------- + +>>> import pandas as pd +>>> import piso +>>> piso.register_accessors() + +>>> arr1 = pd.arrays.IntervalArray.from_tuples( +... [(2, 5), (7, 8)], +... ) +>>> arr2 = pd.arrays.IntervalArray.from_tuples( +... [(0, 4), (3, 6), (7, 8), (10, 12)], +... ) +>>> arr3 = pd.arrays.IntervalArray.from_tuples( +... [(3, 4), (10, 11)], +... ) + +>>> arr1.piso.issubset(arr2) +True + +>>> arr1.piso.issubset(arr2, squeeze=False) +array([ True]) + +>>> arr1.piso.issubset(arr2, arr3) +array([ True, False]) + +>>> arr1.piso.issubset(arr3) +False +""" + def join_params(list_of_param_strings): return "".join(list_of_param_strings).replace("\n\n", "\n") @@ -298,7 +361,7 @@ def join_params(list_of_param_strings): May contain zero or more arguments. """ -param_optional_args_difference = """ +param_optional_args_min_one = """ *interval_arrays : argument list of :class:`pandas.IntervalIndex` or :class:`pandas.arrays.IntervalArray` Must contain at least one argument. """ @@ -312,8 +375,8 @@ def join_params(list_of_param_strings): """ param_squeeze = """ -squeeze : boolean, default True - If True, will try to coerce the return value to a pandas.Interval. +squeeze : boolean, default {default} + If True, will try to coerce the return value to a single pandas.Interval. If supplied, must be done so as a keyword argument. """ @@ -368,9 +431,6 @@ def join_params(list_of_param_strings): and the union of the sets in *interval_arrays*. This is equivalent to iteratively applying a set difference operation with each array in *interval_arrays* as the second operand. -Each of these array operands is assumed to contain disjoint intervals (and satisfy the definition of a set). Any array containing -overlaps between intervals will be mapped to one with disjoint intervals via a union operation. - {extra_desc} Parameters ---------- @@ -383,6 +443,30 @@ def join_params(list_of_param_strings): {examples} """ +is_super_sub_set_template = """ +Indicates whether a set is a {operation} of one, or more, other sets. + +The array elements of *interval_arrays*, and the interval array object the accessor belongs to +(an instance of :class:`pandas.IntervalIndex`, :class:`pandas.arrays.IntervalArray`) are considered to be the sets over which +the operation is performed. Each of these arrays is assumed to contain disjoint intervals (and satisfy the definition of a set). +Any array containing overlaps between intervals will be mapped to one with disjoint intervals via a union operation. + +The list *interval_arrays* must contain at least one element. The {operation} comparison is iteratively applied between +the interval array the accessor belongs to, and each array in *interval_arrays*. When *interval_arrays* contains multiple +interval arrays, the return type will be a numpy array. If it contains one interval array then the result can be coerced to +a single boolean using the *squeeze* parameter. + +Parameters +---------- +{params} + +Returns +---------- +:class:`pandas.IntervalIndex` or :class:`pandas.arrays.IntervalArray` + +{examples} +""" + array_return_type = ( ":class:`pandas.IntervalIndex` or :class:`pandas.arrays.IntervalArray`" ) @@ -390,7 +474,7 @@ def join_params(list_of_param_strings): union_params = join_params( [ param_optional_args, - param_squeeze, + param_squeeze.format(default="False"), param_return_type, ] ) @@ -406,7 +490,7 @@ def join_params(list_of_param_strings): [ param_optional_args, param_min_overlaps, - param_squeeze, + param_squeeze.format(default="False"), param_return_type, ] ) @@ -420,8 +504,8 @@ def join_params(list_of_param_strings): difference_params = join_params( [ - param_optional_args_difference, - param_squeeze, + param_optional_args_min_one, + param_squeeze.format(default="False"), param_return_type, ] ) @@ -438,7 +522,7 @@ def join_params(list_of_param_strings): [ param_optional_args, param_min_overlaps, - param_squeeze, + param_squeeze.format(default="False"), param_return_type, ] ) @@ -474,3 +558,28 @@ def join_params(list_of_param_strings): return_type="boolean", examples=isdisjoint_examples, ) + + +issuperset_params = join_params( + [ + param_optional_args_min_one, + param_squeeze.format(default="True"), + ] +) +issuperset_docstring = is_super_sub_set_template.format( + operation="superset", + params=issuperset_params, + examples=issuperset_examples, +) + +issubset_params = join_params( + [ + param_optional_args_min_one, + param_squeeze.format(default="True"), + ] +) +issubset_docstring = is_super_sub_set_template.format( + operation="subset", + params=issubset_params, + examples=issubset_examples, +) diff --git a/piso/docstrings/interval.py b/piso/docstrings/interval.py index 301f6ec..f326b96 100644 --- a/piso/docstrings/interval.py +++ b/piso/docstrings/interval.py @@ -150,8 +150,77 @@ Length: 2, closed: right, dtype: interval[float64] """ +issuperset_examples = """ +Examples +----------- + +>>> import pandas as pd +>>> import piso.interval + +>>> piso.interval.issuperset( +... pd.Interval(1, 4), +... pd.Interval(2, 4), +... ) +True + +>>> piso.interval.issuperset( +... pd.Interval(1, 4), +... pd.Interval(0, 3), +... ) +False + +>>> piso.interval.issuperset( +... pd.Interval(1, 4), +... pd.Interval(2, 4), +... pd.Interval(0, 3), +... ) +array([ True, False]) + +>>> piso.interval.issuperset( +... pd.Interval(0, 3), +... pd.Interval(0, 3), +... squeeze=False +... ) +array([ True]) +""" + + +issubset_examples = """ +Examples +----------- + +>>> import pandas as pd +>>> import piso.interval + +>>> piso.interval.issubset( +... pd.Interval(2, 4), +... pd.Interval(1, 4), +... ) +True + +>>> piso.interval.issubset( +... pd.Interval(2, 4), +... pd.Interval(0, 3), +... ) +False + +>>> piso.interval.issubset( +... pd.Interval(2, 4), +... pd.Interval(1, 4), +... pd.Interval(0, 3), +... ) +array([ True, False]) + +>>> piso.interval.issubset( +... pd.Interval(1, 4), +... pd.Interval(1, 4), +... squeeze=False +... ) +array([ True]) +""" + template_doc = """ -Performs the {operation} of two pandas.Intervals +Performs the {operation} of two :class:`pandas.Interval` Parameters ---------- @@ -160,7 +229,7 @@ interval2 : pandas.Interval the second operand squeeze : boolean, default True - If True, will try to coerce the return value to a pandas.Interval + If True, will try to coerce the return value to a :class:`pandas.Interval` Returns ---------- @@ -169,6 +238,7 @@ {examples} """ + union_docstring = template_doc.format(operation="union", examples=union_examples) intersection_docstring = template_doc.format( operation="intersection", examples=intersection_examples @@ -179,3 +249,33 @@ symmetric_difference_docstring = template_doc.format( operation="symmetric difference", examples=symmetric_difference_examples ) + + +is_sub_super_doc = """ +Indicates whether one :class:`pandas.Interval` is a {operation} of one, or more, others. + +Parameters +---------- +interval : :class:`pandas.Interval` + An interval, against which all other intervals belonging to *intervals* are compared. +*intervals : argument list of :class:`pandas.Interval` + Must contain at least one argument. +squeeze : boolean, default True + If True, will try to coerce the return value to a single boolean + +Returns +---------- +boolean, or :class:`numpy.ndarray` of booleans + +{examples} +""" + +issuperset_docstring = is_sub_super_doc.format( + operation="superset", + examples=issuperset_examples, +) + +issubset_docstring = is_sub_super_doc.format( + operation="subset", + examples=issubset_examples, +) diff --git a/piso/docstrings/intervalarray.py b/piso/docstrings/intervalarray.py index 6ca7231..92e5f36 100644 --- a/piso/docstrings/intervalarray.py +++ b/piso/docstrings/intervalarray.py @@ -272,6 +272,68 @@ """ +issuperset_examples = """ +Examples +----------- + +>>> import pandas as pd +>>> import piso + +>>> arr1 = pd.arrays.IntervalArray.from_tuples( +... [(0, 4), (3, 6), (7, 8), (10, 12)], +... ) +>>> arr2 = pd.arrays.IntervalArray.from_tuples( +... [(2, 5), (7, 8)], +... ) +>>> arr3 = pd.arrays.IntervalArray.from_tuples( +... [(3, 4), (10, 11)], +... ) + +>>> piso.issuperset(arr1, arr2) +True + +>>> piso.issuperset(arr1, arr2, squeeze=False) +array([ True]) + +>>> piso.issuperset(arr1, arr2, arr3) +array([ True, True]) + +>>> piso.issuperset(arr2, arr3) +False +""" + + +issubset_examples = """ +Examples +----------- + +>>> import pandas as pd +>>> import piso + +>>> arr1 = pd.arrays.IntervalArray.from_tuples( +... [(2, 5), (7, 8)], +... ) +>>> arr2 = pd.arrays.IntervalArray.from_tuples( +... [(0, 4), (3, 6), (7, 8), (10, 12)], +... ) +>>> arr3 = pd.arrays.IntervalArray.from_tuples( +... [(3, 4), (10, 11)], +... ) + +>>> piso.issubset(arr1, arr2) +True + +>>> piso.issubset(arr1, arr2, squeeze=False) +array([ True]) + +>>> piso.issubset(arr1, arr2, arr3) +array([ True, False]) + +>>> piso.issubset(arr1, arr3) +False +""" + + def join_params(list_of_param_strings): return "".join(list_of_param_strings).replace("\n\n", "\n") @@ -281,12 +343,22 @@ def join_params(list_of_param_strings): The first (and possibly only) operand to the {operation} operation. """ +param_interval_array_non_optional = """ +interval_array : :class:`pandas.IntervalIndex` or :class:`pandas.arrays.IntervalArray` + The first operand to the {operation} operation. +""" + +param_interval_sub_super_set = """ +interval_array : :class:`pandas.IntervalIndex` or :class:`pandas.arrays.IntervalArray` + The first operand to which all others are compared operation. +""" + param_optional_args = """ *interval_arrays : argument list of :class:`pandas.IntervalIndex` or :class:`pandas.arrays.IntervalArray` May contain zero or more arguments. """ -param_optional_args_difference = """ +param_optional_args_min_one = """ *interval_arrays : argument list of :class:`pandas.IntervalIndex` or :class:`pandas.arrays.IntervalArray` Must contain at least one argument. """ @@ -300,8 +372,8 @@ def join_params(list_of_param_strings): """ param_squeeze = """ -squeeze : boolean, default True - If True, will try to coerce the return value to a pandas.Interval. +squeeze : boolean, default {default} + If True, will try to coerce the return value to a single pandas.Interval. If supplied, must be done so as a keyword argument. """ @@ -355,9 +427,6 @@ def join_params(list_of_param_strings): multiple elements then the result is the set difference between *interval_array* and the union of the sets in *interval_arrays*. This is equivalent to iteratively applying a set difference operation with each array in *interval_arrays* as the second operand. -Each of these array operands is assumed to contain disjoint intervals (and satisfy the definition of a set). Any array containing -overlaps between intervals will be mapped to one with disjoint intervals via a union operation. - {extra_desc} Parameters ---------- @@ -370,6 +439,31 @@ def join_params(list_of_param_strings): {examples} """ +doc_is_sub_super_set_template = """ +Indicates whether a set is a {operation} of one, or more, other sets. + +The argument *interval_array* and the array elements of *interval_arrays* are all considered to be the sets for the purposes +of this set method. Each of these arrays is assumed to contain disjoint intervals (and satisfy the definition of a set). +Any array containing overlaps between intervals will be mapped to one with disjoint intervals via a union operation. + +The list *interval_arrays* must contain at least one element. The {operation} comparison is iteratively applied between +*interval_array* and each array in *interval_arrays*. When *interval_arrays* contains multiple interval arrays, the return +type will be a numpy array. If it contains one interval array then the result can be coerced to a single boolean using the +*squeeze* parameter. + +{extra_desc} +Parameters +---------- +{params} + +Returns +---------- +boolean, or :class:`numpy.ndarray` of boolean + +{examples} +""" + + array_return_type = ( ":class:`pandas.IntervalIndex` or :class:`pandas.arrays.IntervalArray`" ) @@ -378,7 +472,7 @@ def join_params(list_of_param_strings): [ param_interval_array.format(operation="union"), param_optional_args, - param_squeeze, + param_squeeze.format(default="False"), param_return_type, ] ) @@ -395,7 +489,7 @@ def join_params(list_of_param_strings): param_interval_array.format(operation="intersection"), param_optional_args, param_min_overlaps, - param_squeeze, + param_squeeze.format(default="False"), param_return_type, ] ) @@ -409,9 +503,9 @@ def join_params(list_of_param_strings): difference_params = join_params( [ - param_interval_array.format(operation="difference"), - param_optional_args_difference, - param_squeeze, + param_interval_array_non_optional.format(operation="difference"), + param_optional_args_min_one, + param_squeeze.format(default="False"), param_return_type, ] ) @@ -429,7 +523,7 @@ def join_params(list_of_param_strings): param_interval_array.format(operation="symmetric difference"), param_optional_args, param_min_overlaps, - param_squeeze, + param_squeeze.format(default="False"), param_return_type, ] ) @@ -466,3 +560,33 @@ def join_params(list_of_param_strings): return_type="boolean", examples=isdisjoint_examples, ) + + +issuperset_params = join_params( + [ + param_interval_sub_super_set, + param_optional_args_min_one, + param_squeeze.format(default="True"), + ] +) +issuperset_docstring = doc_is_sub_super_set_template.format( + operation="superset", + extra_desc="", + params=issuperset_params, + examples=issuperset_examples, +) + + +issubset_params = join_params( + [ + param_interval_sub_super_set, + param_optional_args_min_one, + param_squeeze.format(default="True"), + ] +) +issubset_docstring = doc_is_sub_super_set_template.format( + operation="subset", + extra_desc="", + params=issubset_params, + examples=issubset_examples, +) diff --git a/piso/interval.py b/piso/interval.py index 4ac94db..619be29 100644 --- a/piso/interval.py +++ b/piso/interval.py @@ -1,3 +1,4 @@ +import numpy as np import pandas as pd import piso.docstrings.interval as docstrings @@ -109,3 +110,31 @@ def symmetric_difference(interval1, interval2, squeeze=True): closed=interval1.closed, ) return result + + +def _make_is_sub_or_superset(which, docstring): + + left_bound_comparator = {"super": np.less_equal, "sub": np.greater_equal}[which] + right_bound_comparator = {"super": np.greater_equal, "sub": np.less_equal}[which] + + @Appender(docstring, join="\n", indents=1) + def func(interval, *intervals, squeeze=True): + assert intervals + lefts = np.array([i.left for i in intervals]) + rights = np.array([i.right for i in intervals]) + + result = np.logical_and( + left_bound_comparator(interval.left, lefts), + right_bound_comparator(interval.right, rights), + ) + + if len(result) == 1 and squeeze: + result = result[0] + + return result + + return func + + +issuperset = _make_is_sub_or_superset("super", docstrings.issuperset_docstring) +issubset = _make_is_sub_or_superset("sub", docstrings.issubset_docstring) diff --git a/piso/intervalarray.py b/piso/intervalarray.py index 64dd83e..3c635aa 100644 --- a/piso/intervalarray.py +++ b/piso/intervalarray.py @@ -121,40 +121,26 @@ def isdisjoint(interval_array, *interval_arrays): return result -def _create_is_super_or_sub(which): +def _create_is_super_or_sub(which, docstring): comparator_func = {"superset": sc.Stairs.ge, "subset": sc.Stairs.le}[which] - left_bound_comparator = {"superset": np.less_equal, "subset": np.greater_equal}[ - which - ] - right_bound_comparator = {"superset": np.greater_equal, "subset": np.less_equal}[ - which - ] - - def func(interval_array, *interval_arrays, squeeze=False): - _validate_array_of_intervals_arrays(interval_array, *interval_arrays) - - if interval_arrays: - stepfunction = _interval_x_to_stairs(interval_array).make_boolean() - def _comp(ia): - return bool( - comparator_func( - stepfunction, - _interval_x_to_stairs(ia).make_boolean(), - ) + @Appender(docstring, join="\n", indents=1) + def func(interval_array, *interval_arrays, squeeze=True): + _validate_array_of_intervals_arrays(interval_array, *interval_arrays) + assert interval_arrays + stepfunction = _interval_x_to_stairs(interval_array).make_boolean() + + def _comp(ia): + return bool( + comparator_func( + stepfunction, + _interval_x_to_stairs(ia).make_boolean(), ) - - result = np.array([_comp(ia) for ia in interval_arrays]) - else: - assert len(interval_array) >= 2 - result = np.logical_and( - left_bound_comparator(interval_array[0].left, interval_array[1:].left), - right_bound_comparator( - interval_array[0].right, interval_array[1:].right - ), ) + result = np.array([_comp(ia) for ia in interval_arrays]) + if squeeze and len(result) == 1: result = result[0] return result @@ -162,5 +148,5 @@ def _comp(ia): return func -issuperset = _create_is_super_or_sub("superset") -issubset = _create_is_super_or_sub("subset") +issuperset = _create_is_super_or_sub("superset", docstrings.issuperset_docstring) +issubset = _create_is_super_or_sub("subset", docstrings.issubset_docstring) diff --git a/tests/test_interval.py b/tests/test_interval.py index b0fda74..d546f0e 100644 --- a/tests/test_interval.py +++ b/tests/test_interval.py @@ -1,3 +1,6 @@ +import operator + +import numpy as np import pandas as pd import pytest @@ -624,3 +627,49 @@ def test_symmetric_difference_closed_value_error(closed_values): ) with pytest.raises(ClosedValueError): piso_interval.symmetric_difference(*intervals) + + + + +@pytest.mark.parametrize( + "tuples, squeeze, expected", + [ + ([(1, 2), (1, 2)], True, True), + ([(1, 3), (0, 2)], True, False), + ([(1, 3), (1, 2), (0, 1)], True, np.array([True, False])), + ([(1, 2), (1, 2)], False, np.array([True])), + ([(1, 3), (0, 2)], False, np.array([False])), + ([(1, 3), (1, 2), (0, 1)], False, np.array([True, False])), + ], +) +@pytest.mark.parametrize( + "closed", + ["left", "right"], +) +def test_issuperset(tuples, squeeze, expected, closed): + intervals = [pd.Interval(*i, closed=closed) for i in tuples] + result = piso_interval.issuperset(*intervals, squeeze=squeeze) + equal_op = np.array_equal if isinstance(expected, np.ndarray) else operator.eq + assert equal_op(result, expected) + + +@pytest.mark.parametrize( + "tuples, squeeze, expected", + [ + ([(1, 2), (1, 2)], True, True), + ([(1, 3), (0, 2)], True, False), + ([(1, 3), (1, 4), (0, 1)], True, np.array([True, False])), + ([(1, 2), (1, 2)], False, np.array([True])), + ([(1, 3), (0, 2)], False, np.array([False])), + ([(1, 3), (1, 4), (0, 1)], False, np.array([True, False])), + ], +) +@pytest.mark.parametrize( + "closed", + ["left", "right"], +) +def test_issubset(tuples, squeeze, expected, closed): + intervals = [pd.Interval(*i, closed=closed) for i in tuples] + result = piso_interval.issubset(*intervals, squeeze=squeeze) + equal_op = np.array_equal if isinstance(expected, np.ndarray) else operator.eq + assert equal_op(result, expected) diff --git a/tests/test_multiple_interval_array.py b/tests/test_multiple_interval_array.py index 93c7096..24dc187 100644 --- a/tests/test_multiple_interval_array.py +++ b/tests/test_multiple_interval_array.py @@ -508,11 +508,14 @@ def test_isdisjoint(interval_index, tuples, expected, closed, date_type, how): [True, False], ) @pytest.mark.parametrize( - "ia_makers, expected", + "ia_makers, squeeze, expected", [ - ([make_ia1, make_ia2], True), - ([make_ia1, make_ia3], False), - ([make_ia1, make_ia2, make_ia3], np.array([True, False])), + ([make_ia1, make_ia2], True, True), + ([make_ia1, make_ia3], True, False), + ([make_ia1, make_ia2, make_ia3], True, np.array([True, False])), + ([make_ia1, make_ia2], False, np.array([True])), + ([make_ia1, make_ia3], False, np.array([False])), + ([make_ia1, make_ia2, make_ia3], False, np.array([True, False])), ], ) @pytest.mark.parametrize( @@ -523,13 +526,13 @@ def test_isdisjoint(interval_index, tuples, expected, closed, date_type, how): "how", ["supplied", "accessor", "package"], ) -def test_issuperset(interval_index, ia_makers, expected, closed, how): +def test_issuperset(interval_index, ia_makers, squeeze, expected, closed, how): ias = [make_ia(interval_index, closed) for make_ia in ia_makers] result = perform_op( *ias, how=how, function=piso_intervalarray.issuperset, - squeeze=True, + squeeze=squeeze, ) equal_op = np.array_equal if isinstance(expected, np.ndarray) else operator.eq assert equal_op(result, expected) @@ -540,11 +543,14 @@ def test_issuperset(interval_index, ia_makers, expected, closed, how): [True, False], ) @pytest.mark.parametrize( - "ia_makers, expected", + "ia_makers, squeeze, expected", [ - ([make_ia2, make_ia1], True), - ([make_ia3, make_ia1], False), - ([make_ia2, make_ia1, make_ia3], np.array([True, False])), + ([make_ia2, make_ia1], True, True), + ([make_ia3, make_ia1], True, False), + ([make_ia2, make_ia1, make_ia3], True, np.array([True, False])), + ([make_ia2, make_ia1], False, np.array([True])), + ([make_ia3, make_ia1], False, np.array([False])), + ([make_ia2, make_ia1, make_ia3], False, np.array([True, False])), ], ) @pytest.mark.parametrize( @@ -555,13 +561,13 @@ def test_issuperset(interval_index, ia_makers, expected, closed, how): "how", ["supplied", "accessor", "package"], ) -def test_issubset(interval_index, ia_makers, expected, closed, how): +def test_issubset(interval_index, ia_makers, squeeze, expected, closed, how): ias = [make_ia(interval_index, closed) for make_ia in ia_makers] result = perform_op( *ias, how=how, function=piso_intervalarray.issubset, - squeeze=True, + squeeze=squeeze, ) equal_op = np.array_equal if isinstance(expected, np.ndarray) else operator.eq assert equal_op(result, expected) diff --git a/tests/test_single_interval_array.py b/tests/test_single_interval_array.py index ef47246..d1ad43c 100644 --- a/tests/test_single_interval_array.py +++ b/tests/test_single_interval_array.py @@ -487,73 +487,3 @@ def test_isdisjoint(interval_index, tuples, expected, closed, date_type, how): interval_array = map_to_dates(interval_array, date_type) result = perform_op(interval_array, how=how, function=piso_intervalarray.isdisjoint) assert result == expected - - -@pytest.mark.parametrize( - "interval_index", - [True, False], -) -@pytest.mark.parametrize( - "tuples, squeeze, expected", - [ - ([(1, 2), (1, 2)], True, True), - ([(1, 3), (0, 2)], True, False), - ([(1, 3), (1, 2), (0, 1)], True, np.array([True, False])), - ([(1, 2), (1, 2)], False, np.array([True])), - ([(1, 3), (0, 2)], False, np.array([False])), - ([(1, 3), (1, 2), (0, 1)], False, np.array([True, False])), - ], -) -@pytest.mark.parametrize( - "closed", - ["left", "right"], -) -@pytest.mark.parametrize( - "how", - ["supplied", "accessor", "package"], -) -def test_issuperset(interval_index, tuples, squeeze, expected, closed, how): - interval_array = make_ia_from_tuples(interval_index, tuples, closed) - result = perform_op( - interval_array, - how=how, - function=piso_intervalarray.issuperset, - squeeze=squeeze, - ) - equal_op = np.array_equal if isinstance(expected, np.ndarray) else operator.eq - assert equal_op(result, expected) - - -@pytest.mark.parametrize( - "interval_index", - [True, False], -) -@pytest.mark.parametrize( - "tuples, squeeze, expected", - [ - ([(1, 2), (1, 2)], True, True), - ([(1, 3), (0, 2)], True, False), - ([(1, 3), (1, 4), (0, 1)], True, np.array([True, False])), - ([(1, 2), (1, 2)], False, np.array([True])), - ([(1, 3), (0, 2)], False, np.array([False])), - ([(1, 3), (1, 4), (0, 1)], False, np.array([True, False])), - ], -) -@pytest.mark.parametrize( - "closed", - ["left", "right"], -) -@pytest.mark.parametrize( - "how", - ["supplied", "accessor", "package"], -) -def test_issubset(interval_index, tuples, squeeze, expected, closed, how): - interval_array = make_ia_from_tuples(interval_index, tuples, closed) - result = perform_op( - interval_array, - how=how, - function=piso_intervalarray.issubset, - squeeze=squeeze, - ) - equal_op = np.array_equal if isinstance(expected, np.ndarray) else operator.eq - assert equal_op(result, expected) From de163114a4b453f676176deab48cd7e24f9a7a4e Mon Sep 17 00:00:00 2001 From: Riley Clement Date: Thu, 14 Oct 2021 10:07:48 +1100 Subject: [PATCH 3/4] black reformat --- tests/test_interval.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/test_interval.py b/tests/test_interval.py index d546f0e..2927308 100644 --- a/tests/test_interval.py +++ b/tests/test_interval.py @@ -629,8 +629,6 @@ def test_symmetric_difference_closed_value_error(closed_values): piso_interval.symmetric_difference(*intervals) - - @pytest.mark.parametrize( "tuples, squeeze, expected", [ From 7992cf6cc31faebf6024d810f435caee303d8057 Mon Sep 17 00:00:00 2001 From: Riley Clement Date: Thu, 14 Oct 2021 10:17:45 +1100 Subject: [PATCH 4/4] linting --- tests/test_single_interval_array.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/test_single_interval_array.py b/tests/test_single_interval_array.py index d1ad43c..bdb91dd 100644 --- a/tests/test_single_interval_array.py +++ b/tests/test_single_interval_array.py @@ -1,6 +1,3 @@ -import operator - -import numpy as np import pandas as pd import pytest