diff --git a/docs/reference/accessors.rst b/docs/reference/accessors.rst index 9682181..ce664b6 100644 --- a/docs/reference/accessors.rst +++ b/docs/reference/accessors.rst @@ -12,4 +12,5 @@ Accessors ArrayAccessor.union ArrayAccessor.intersection ArrayAccessor.difference - ArrayAccessor.symmetric_difference \ No newline at end of file + ArrayAccessor.symmetric_difference + ArrayAccessor.isdisjoint \ No newline at end of file diff --git a/docs/reference/package.rst b/docs/reference/package.rst index bfc4708..fcd18c0 100644 --- a/docs/reference/package.rst +++ b/docs/reference/package.rst @@ -14,4 +14,5 @@ Top level functions union intersection difference - symmetric_difference \ No newline at end of file + symmetric_difference + isdisjoint \ No newline at end of file diff --git a/docs/release_notes/index.rst b/docs/release_notes/index.rst index 7873be7..56509a9 100644 --- a/docs/release_notes/index.rst +++ b/docs/release_notes/index.rst @@ -4,9 +4,13 @@ Release notes ======================== +- added :meth:`piso.isdisjoint` method, and corresponding accessor method + +ADD UNRELEASED CHANGED ABOVE THIS LINE + **v0.1.0 2021-10-10** -The following methods are included in the initial release of `piso` +The following methods (and corresponding accessor methods) are included in the initial release of `piso` - :meth:`piso.register_accessors` - :meth:`piso.union` @@ -17,3 +21,4 @@ The following methods are included in the initial release of `piso` - :meth:`piso.interval.intersection` - :meth:`piso.interval.difference` - :meth:`piso.interval.symmetric_difference` + diff --git a/piso/__init__.py b/piso/__init__.py index f9f67f2..f996cf9 100644 --- a/piso/__init__.py +++ b/piso/__init__.py @@ -1,4 +1,10 @@ -from piso.intervalarray import difference, intersection, symmetric_difference, union +from piso.intervalarray import ( + difference, + intersection, + isdisjoint, + symmetric_difference, + union, +) def register_accessors(): diff --git a/piso/accessor.py b/piso/accessor.py index cb03bf6..b22b3e4 100644 --- a/piso/accessor.py +++ b/piso/accessor.py @@ -118,6 +118,13 @@ def symmetric_difference( return_type=return_type, ) + @Appender(docstrings.isdisjoint_docstring, join="\n", indents=1) + def isdisjoint(self, *interval_arrays): + return intervalarray.isdisjoint( + self._interval_array, + *interval_arrays, + ) + def _register_accessors(): _register_accessor("piso", pd.IntervalIndex)(ArrayAccessor) diff --git a/piso/docstrings/accessor.py b/piso/docstrings/accessor.py index da9a779..a44dcfb 100644 --- a/piso/docstrings/accessor.py +++ b/piso/docstrings/accessor.py @@ -257,6 +257,38 @@ """ +isdisjoint_examples = """ +Examples +----------- + +>>> import pandas as pd +>>> import piso +>>> piso.register_accessors() + +>>> arr1 = pd.arrays.IntervalArray.from_tuples( +... [(0, 3), (2, 4)], +... ) +>>> arr2 = pd.arrays.IntervalArray.from_tuples( +... [(4, 7), (8, 11)], +... ) +>>> arr3 = pd.arrays.IntervalArray.from_tuples( +... [(2, 4), (7, 8)], +... ) + +>>> arr1.piso.isdisjoint() +False + +>>> arr2.piso.isdisjoint() +True + +>>> arr1.piso.isdisjoint(arr2) +True + +>>> arr1.piso.isdisjoint(arr3) +False +""" + + def join_params(list_of_param_strings): return "".join(list_of_param_strings).replace("\n\n", "\n") @@ -291,17 +323,15 @@ def join_params(list_of_param_strings): If supplied, must be done so as a keyword argument. """ - template_doc = """ -Performs a set {operation} operation. - What is considered a set is determined by the number of positional arguments used, that is, determined by the size of *interval_arrays*. -If *interval_arrays* is empty then the sets are considered to be the intervals contained in *interval_array*. +If *interval_arrays* is empty then the sets are considered to be the intervals contained in the array object the +accessor belongs to (an instance of :class:`pandas.IntervalIndex`, :class:`pandas.arrays.IntervalArray`). If *interval_arrays* is not empty then the sets are considered to be the elements in *interval_arrays*, in addition to the -interval array object the accessor belongs to (an instance of :class:`pandas.IntervalIndex`, :class:`pandas.arrays.IntervalArray`). +intervals in the array object the accessor belongs to. Each of these arrays is assumed to contain disjoint intervals (and satisfy the definition of a set). Any array containing overlaps between intervals will be mapped to one with disjoint intervals via a union operation. @@ -312,11 +342,18 @@ def join_params(list_of_param_strings): Returns ---------- -:class:`pandas.IntervalIndex` or :class:`pandas.arrays.IntervalArray` +{return_type} {examples} """ +operation_template_doc = ( + """ +Performs a set {operation} operation. +""" + + template_doc +) + doc_difference_template = """ Performs a set difference operation. @@ -346,6 +383,9 @@ def join_params(list_of_param_strings): {examples} """ +array_return_type = ( + ":class:`pandas.IntervalIndex` or :class:`pandas.arrays.IntervalArray`" +) union_params = join_params( [ @@ -354,10 +394,11 @@ def join_params(list_of_param_strings): param_return_type, ] ) -union_docstring = template_doc.format( +union_docstring = operation_template_doc.format( operation="union", extra_desc="", params=union_params, + return_type=array_return_type, examples=union_examples, ) @@ -369,10 +410,11 @@ def join_params(list_of_param_strings): param_return_type, ] ) -intersection_docstring = template_doc.format( +intersection_docstring = operation_template_doc.format( operation="intersection", extra_desc="", params=intersection_params, + return_type=array_return_type, examples=intersection_examples, ) @@ -387,6 +429,7 @@ def join_params(list_of_param_strings): operation="difference", extra_desc="", params=difference_params, + return_type=array_return_type, examples=difference_examples, ) @@ -404,9 +447,30 @@ def join_params(list_of_param_strings): The parameter *min_overlaps* in :meth:`piso.intersection`, which defines the minimum number of intervals in an overlap required to constitute an intersection, follows through to symmetric difference under this definition. """ -symmetric_difference_docstring = template_doc.format( +symmetric_difference_docstring = operation_template_doc.format( operation="symmetric difference", extra_desc=symmetric_difference_extra_desc, params=symmetric_difference_params, + return_type=array_return_type, examples=symmetric_difference_examples, ) + + +isdisjoint_doc = ( + """ +Indicates whether one, or more, sets are disjoint or not. +""" + + template_doc +) + +isdisjoint_params = join_params( + [ + param_optional_args, + ] +) +isdisjoint_docstring = isdisjoint_doc.format( + extra_desc="", + params=isdisjoint_params, + return_type="boolean", + examples=isdisjoint_examples, +) diff --git a/piso/docstrings/intervalarray.py b/piso/docstrings/intervalarray.py index 09c6371..6ca7231 100644 --- a/piso/docstrings/intervalarray.py +++ b/piso/docstrings/intervalarray.py @@ -241,6 +241,37 @@ """ +isdisjoint_examples = """ +Examples +----------- + +>>> import pandas as pd +>>> import piso + +>>> arr1 = pd.arrays.IntervalArray.from_tuples( +... [(0, 3), (2, 4)], +... ) +>>> arr2 = pd.arrays.IntervalArray.from_tuples( +... [(4, 7), (8, 11)], +... ) +>>> arr3 = pd.arrays.IntervalArray.from_tuples( +... [(2, 4), (7, 8)], +... ) + +>>> piso.isdisjoint(arr1) +False + +>>> piso.isdisjoint(arr2) +True + +>>> piso.isdisjoint(arr1, arr2) +True + +>>> piso.isdisjoint(arr1, arr3) +False +""" + + def join_params(list_of_param_strings): return "".join(list_of_param_strings).replace("\n\n", "\n") @@ -282,8 +313,6 @@ def join_params(list_of_param_strings): template_doc = """ -Performs a set {operation} operation. - What is considered a set is determined by the number of positional arguments used, that is, determined by the size of *interval_arrays*. @@ -300,11 +329,20 @@ def join_params(list_of_param_strings): Returns ---------- -:class:`pandas.IntervalIndex` or :class:`pandas.arrays.IntervalArray` +{return_type} {examples} """ + +operation_template_doc = ( + """ +Performs a set {operation} operation. +""" + + template_doc +) + + doc_difference_template = """ Performs a set difference operation. @@ -332,6 +370,9 @@ def join_params(list_of_param_strings): {examples} """ +array_return_type = ( + ":class:`pandas.IntervalIndex` or :class:`pandas.arrays.IntervalArray`" +) union_params = join_params( [ @@ -341,10 +382,11 @@ def join_params(list_of_param_strings): param_return_type, ] ) -union_docstring = template_doc.format( +union_docstring = operation_template_doc.format( operation="union", extra_desc="", params=union_params, + return_type=array_return_type, examples=union_examples, ) @@ -357,10 +399,11 @@ def join_params(list_of_param_strings): param_return_type, ] ) -intersection_docstring = template_doc.format( +intersection_docstring = operation_template_doc.format( operation="intersection", extra_desc="", params=intersection_params, + return_type=array_return_type, examples=intersection_examples, ) @@ -376,6 +419,7 @@ def join_params(list_of_param_strings): operation="difference", extra_desc="", params=difference_params, + return_type=array_return_type, examples=difference_examples, ) @@ -394,9 +438,31 @@ def join_params(list_of_param_strings): The parameter *min_overlaps* in :meth:`piso.intersection`, which defines the minimum number of intervals in an overlap required to constitute an intersection, follows through to symmetric difference under this definition. """ -symmetric_difference_docstring = template_doc.format( +symmetric_difference_docstring = operation_template_doc.format( operation="symmetric difference", extra_desc=symmetric_difference_extra_desc, params=symmetric_difference_params, + return_type=array_return_type, examples=symmetric_difference_examples, ) + + +isdisjoint_doc = ( + """ +Indicates whether one, or more, sets are disjoint or not. +""" + + template_doc +) + +isdisjoint_params = join_params( + [ + param_interval_array.format(operation="isdisjoint"), + param_optional_args, + ] +) +isdisjoint_docstring = isdisjoint_doc.format( + extra_desc="", + params=isdisjoint_params, + return_type="boolean", + examples=isdisjoint_examples, +) diff --git a/piso/intervalarray.py b/piso/intervalarray.py index 400ccaa..f98a2c3 100644 --- a/piso/intervalarray.py +++ b/piso/intervalarray.py @@ -1,3 +1,4 @@ +import numpy as np import pandas as pd import staircase as sc @@ -103,3 +104,18 @@ def symmetric_difference( if squeeze and len(result) == 1: result = result[0] return result + + +@Appender(docstrings.isdisjoint_docstring, join="\n", indents=1) +def isdisjoint(interval_array, *interval_arrays): + _validate_array_of_intervals_arrays(interval_array, *interval_arrays) + if interval_arrays: + stairs = _make_stairs(interval_array, *interval_arrays) + result = stairs.max() <= 1 + elif len(interval_array) == 0: + result = True + else: + arr = np.stack([interval_array.left.values, interval_array.right.values]) + arr = arr[arr[:, 0].argsort()] + result = np.all(arr[0, 1:] >= arr[1, :-1]) + return result diff --git a/tests/test_multiple_interval_array.py b/tests/test_multiple_interval_array.py index 74ed169..1d271a4 100644 --- a/tests/test_multiple_interval_array.py +++ b/tests/test_multiple_interval_array.py @@ -14,6 +14,7 @@ def get_accessor_method(self, function): piso_intervalarray.intersection: self.piso.intersection, piso_intervalarray.difference: self.piso.difference, piso_intervalarray.symmetric_difference: self.piso.symmetric_difference, + piso_intervalarray.isdisjoint: self.piso.isdisjoint, }[function] @@ -23,6 +24,7 @@ def get_package_method(function): piso_intervalarray.intersection: piso.intersection, piso_intervalarray.symmetric_difference: piso.symmetric_difference, piso_intervalarray.difference: piso.difference, + piso_intervalarray.isdisjoint: piso.isdisjoint, }[function] @@ -429,3 +431,66 @@ def test_difference_4(closed, interval_index, return_type, how): expected, interval_index, ) + + +def map_to_dates(interval_array, date_type): + def make_date(x): + ts = pd.Timestamp(f"2021-10-{x}") + if date_type == "numpy": + return ts.to_numpy() + if date_type == "datetime": + return ts.to_pydatetime() + if date_type == "timedelta": + return ts - pd.Timestamp("2021-10-1") + return ts + + return interval_array.from_arrays( + interval_array.left.map(make_date), + interval_array.right.map(make_date), + ) + + +def make_ia_from_tuples(interval_index, tuples, closed): + klass = pd.IntervalIndex if interval_index else pd.arrays.IntervalArray + return klass.from_tuples(tuples, closed=closed) + + +@pytest.mark.parametrize( + "interval_index", + [True, False], +) +@pytest.mark.parametrize( + "tuples, expected", + [ + ([], True), + ([(1, 3)], True), + ([(3, 11)], False), + ([(1, 2), (2, 3)], True), + ([(1, 2), (1, 3)], True), + ([(1, 3), (7, 9)], False), + ([(1, 5), (6, 7)], False), + ([(1, 2), (6, 7), (9, 10)], False), + ], +) +@pytest.mark.parametrize( + "closed", + ["left", "right"], +) +@pytest.mark.parametrize( + "date_type", + ["timestamp", "numpy", "datetime", "timedelta", None], +) +@pytest.mark.parametrize( + "how", + ["supplied", "accessor", "package"], +) +def test_isdisjoint(interval_index, tuples, expected, closed, date_type, how): + # all intervals are compared to ia3 + ia3 = make_ia3(interval_index, closed) # intervals = (3,4), (8,11) + ia3 = map_to_dates(ia3, date_type) + interval_array = make_ia_from_tuples(interval_index, tuples, closed) + interval_array = map_to_dates(interval_array, date_type) + result = perform_op( + ia3, interval_array, how=how, function=piso_intervalarray.isdisjoint + ) + assert result == expected diff --git a/tests/test_single_interval_array.py b/tests/test_single_interval_array.py index 3d7875e..5b7d5d4 100644 --- a/tests/test_single_interval_array.py +++ b/tests/test_single_interval_array.py @@ -13,6 +13,7 @@ def get_accessor_method(self, function): piso_intervalarray.union: self.piso.union, piso_intervalarray.intersection: self.piso.intersection, piso_intervalarray.symmetric_difference: self.piso.symmetric_difference, + piso_intervalarray.isdisjoint: self.piso.isdisjoint, }[function] @@ -21,6 +22,7 @@ def get_package_method(function): piso_intervalarray.union: piso.union, piso_intervalarray.intersection: piso.intersection, piso_intervalarray.symmetric_difference: piso.symmetric_difference, + piso_intervalarray.isdisjoint: piso_intervalarray.isdisjoint, }[function] @@ -65,6 +67,11 @@ def make_ia3(interval_index, closed): return ia3 +def make_ia_from_tuples(interval_index, tuples, closed): + klass = pd.IntervalIndex if interval_index else pd.arrays.IntervalArray + return klass.from_tuples(tuples, closed=closed) + + def assert_interval_array_equal(interval_array, expected, interval_index): if interval_index: interval_array = interval_array.values @@ -418,3 +425,58 @@ def test_symmetric_difference_min_overlaps_all_2( expected, interval_index, ) + + +def map_to_dates(interval_array, date_type): + def make_date(x): + ts = pd.Timestamp(f"2021-10-{x}") + if date_type == "numpy": + return ts.to_numpy() + if date_type == "datetime": + return ts.to_pydatetime() + if date_type == "timedelta": + return ts - pd.Timestamp("2021-10-1") + return ts + + return interval_array.from_arrays( + interval_array.left.map(make_date), + interval_array.right.map(make_date), + ) + + +@pytest.mark.parametrize( + "interval_index", + [True, False], +) +@pytest.mark.parametrize( + "tuples, expected", + [ + ([], True), + ([(1, 2), (2, 3)], True), + ([(1, 2), (3, 4)], True), + ([(1, 3), (2, 4)], False), + ([(1, 4), (2, 3)], False), + ([(1, 2), (2, 3), (3, 4)], True), + ([(1, 2), (3, 4), (5, 6)], True), + ([(1, 3), (2, 4), (5, 6)], False), + ([(1, 4), (2, 3), (5, 6)], False), + ], +) +@pytest.mark.parametrize( + "closed", + ["left", "right"], +) +@pytest.mark.parametrize( + "date_type", + ["timestamp", "numpy", "datetime", "timedelta", None], +) +@pytest.mark.parametrize( + "how", + ["supplied", "accessor", "package"], +) +def test_isdisjoint(interval_index, tuples, expected, closed, date_type, how): + + interval_array = make_ia_from_tuples(interval_index, tuples, closed) + interval_array = map_to_dates(interval_array, date_type) + result = perform_op(interval_array, how=how, function=piso_intervalarray.isdisjoint) + assert result == expected