diff --git a/docs/img/powered_by_staircase.svg b/docs/img/powered_by_staircase.svg index cc793e6..9bd46ee 100644 --- a/docs/img/powered_by_staircase.svg +++ b/docs/img/powered_by_staircase.svg @@ -2,9 +2,9 @@ + + inkscape:pagecheckerboard="0" + showguides="true" + inkscape:guide-bbox="true"> + + @@ -95,115 +104,119 @@ inkscape:label="Layer 1" inkscape:groupmode="layer" id="layer1" - transform="translate(377.75726,326.24246)"> + transform="translate(632.60692,319.18132)"> + width="802.06549" + height="142.67062" + x="-631.41364" + y="-318.31769" /> + id="g1223" + transform="translate(-259.36723,77.589223)"> + + + + + + + + + + + + + + id="path7928" + d="M 3.7846419,-314.6503 H 133.02875" + style="fill:none;stroke:#828282;stroke-width:2.05538;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:2.8;stroke-dasharray:none;stroke-opacity:1" + inkscape:export-filename="C:\Users\Riley\Pictures\staircase\logo1.png" + inkscape:export-xdpi="50" + inkscape:export-ydpi="50" /> + id="path7930" + d="M -80.584528,-273.74018 H -4.0089881" + style="fill:none;stroke:#828282;stroke-width:1.58243;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" + inkscape:export-filename="C:\Users\Riley\Pictures\staircase\logo1.png" + inkscape:export-xdpi="50" + inkscape:export-ydpi="50" /> + id="path7932" + d="M 2.7262919,-314.7503 H 135.46667" + style="fill:#828282;fill-opacity:1;stroke:#828282;stroke-width:0.683;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:2.8;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#marker7938)" + inkscape:export-filename="C:\Users\Riley\Pictures\staircase\logo1.png" + inkscape:export-xdpi="50" + inkscape:export-ydpi="50" /> - - - - - - + inkscape:export-filename="C:\Users\Riley\Pictures\staircase\logo1.png" + inkscape:export-xdpi="50" + inkscape:export-ydpi="50" /> - - - - - powered by + id="tspan1373">powered by + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/index.rst b/docs/index.rst index 239418e..209c838 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -20,7 +20,7 @@ Pandas Interval Set Operations: methods for set operations, analytics, lookups a .. image:: img/powered_by_staircase.svg :target: https://www.staircase.dev - :width: 200 + :width: 300 :alt: powered_by_staircase :align: center diff --git a/docs/reference/accessors.rst b/docs/reference/accessors.rst index 0ad0a59..43a3545 100644 --- a/docs/reference/accessors.rst +++ b/docs/reference/accessors.rst @@ -19,4 +19,5 @@ Accessors ArrayAccessor.coverage ArrayAccessor.complement ArrayAccessor.contains - ArrayAccessor.get_indexer \ No newline at end of file + ArrayAccessor.split + ArrayAccessor.adjacency_matrix \ No newline at end of file diff --git a/docs/reference/package.rst b/docs/reference/package.rst index 59f084c..b9272d8 100644 --- a/docs/reference/package.rst +++ b/docs/reference/package.rst @@ -21,6 +21,7 @@ Top level functions coverage complement contains - get_indexer + split lookup - join \ No newline at end of file + join + adjacency_matrix \ No newline at end of file diff --git a/docs/release_notes/index.rst b/docs/release_notes/index.rst index 30150bc..c37c3bf 100644 --- a/docs/release_notes/index.rst +++ b/docs/release_notes/index.rst @@ -5,6 +5,20 @@ Release notes ======================== +**v0.7.0 2021-11-20** + +Added the following methods + +- :func:`piso.split` +- :func:`piso.adjacency_matrix` +- :meth:`ArrayAccessor.split() ` +- :meth:`ArrayAccessor.adjacency_matrix() ` + +Removed the following methods + +- removed :func:`piso.get_indexer` in favour of :meth:`pandas.IntervalIndex.get_indexer` + + **v0.6.0 2021-11-05** The following methods were extended to accommodate intervals with *closed = "both"* or *"neither"* @@ -12,7 +26,7 @@ The following methods were extended to accommodate intervals with *closed = "bot - :func:`piso.contains` (and :meth:`ArrayAccessor.contains() `) - :func:`piso.get_indexer` (and :meth:`ArrayAccessor.get_indexer() `) - :func:`piso.lookup` -- :func:`piso.isdisjoint` (and :meth:`ArrayAccessor.get_indexer() `) +- :func:`piso.isdisjoint` (and :meth:`ArrayAccessor.isdisjoint() `) **v0.5.0 2021-11-02** @@ -34,7 +48,7 @@ Added the following methods - :func:`piso.lookup` - :func:`piso.get_indexer` -- :meth:`ArrayAccessor.get_indexer() ` +- :meth:`ArrayAccessor.get_indexer` **v0.3.0 2021-10-23** diff --git a/piso/__init__.py b/piso/__init__.py index 87dee33..cefa917 100644 --- a/piso/__init__.py +++ b/piso/__init__.py @@ -1,13 +1,14 @@ +from piso.graph import adjacency_matrix from piso.intervalarray import ( complement, contains, coverage, difference, - get_indexer, intersection, isdisjoint, issubset, issuperset, + split, symmetric_difference, union, ) diff --git a/piso/accessor.py b/piso/accessor.py index 4fd5e03..0813c5c 100644 --- a/piso/accessor.py +++ b/piso/accessor.py @@ -3,7 +3,7 @@ import pandas as pd import piso.docstrings.accessor as docstrings -from piso import intervalarray +from piso import graph, intervalarray from piso._decorators import Appender @@ -155,13 +155,6 @@ def complement(self, domain=None): domain, ) - @Appender(docstrings.get_indexer_docstring, join="\n", indents=1) - def get_indexer(self, x): - return intervalarray.get_indexer( - self._interval_array, - x, - ) - @Appender(docstrings.contains_docstring, join="\n", indents=1) def contains(self, x, include_index=True): return intervalarray.contains( @@ -170,6 +163,21 @@ def contains(self, x, include_index=True): include_index, ) + @Appender(docstrings.split_docstring, join="\n", indents=1) + def split(self, x): + return intervalarray.split( + self._interval_array, + x, + ) + + @Appender(docstrings.adjacency_matrix_docstring, join="\n", indents=1) + def adjacency_matrix(self, edges="intersect", include_index=True): + return graph.adjacency_matrix( + self._interval_array, + edges=edges, + include_index=include_index, + ) + def _register_accessors(): _register_accessor("piso", pd.IntervalIndex)(ArrayAccessor) diff --git a/piso/docstrings/accessor.py b/piso/docstrings/accessor.py index d349427..e182740 100644 --- a/piso/docstrings/accessor.py +++ b/piso/docstrings/accessor.py @@ -1,3 +1,5 @@ +from piso.graph import adjacency_matrix + union_examples = """ Examples ----------- @@ -544,7 +546,7 @@ def join_params(list_of_param_strings): """ Indicates whether one, or more, sets are disjoint or not. -*interval_array* must be left-closed or right-closed if *interval_arrays is non-empty. +*interval_array* must be left-closed or right-closed if \\*interval_arrays is non-empty. If no arguments are provided then this restriction does not apply. """ + template_doc @@ -782,3 +784,111 @@ def join_params(list_of_param_strings): >>> pd.IntervalIndex.from_tuples([(0,2)]).piso.contains(1, include_index=False) array([[ True]]) """ + + +split_docstring = """ +Given a set of intervals, and break points, splits the intervals into pieces wherever +the overlap a break point. + +The intervals are contained in the object the accessor belongs to. They may be left-closed, +right-closed, both, or neither, and contain overlapping intervals. + +Parameters +---------- +x : scalar, or array-like of scalars + Values in *x* should belong to the same domain as the intervals in *interval_array*. + May contain duplicates and be unsorted. + +Returns +---------- +:class:`pandas.IntervalIndex` or :class:`pandas.arrays.IntervalArray` + Return type will be the same type as the object the accessor belongs to. + +Examples +----------- + +>>> import pandas as pd +>>> import piso +>>> piso.register_accessors() + +>>> arr = pd.arrays.IntervalArray.from_tuples( +... [(0, 4), (2, 5)], +... ) + +>>> arr.piso.split(3) + +[(0, 3], (3, 4], (2, 3], (3, 5]] +Length: 4, closed: right, dtype: interval[int64] + +>>> arr.piso.split([3,3,3,3]) + +[(0, 3], (3, 4], (2, 3], (3, 5]] +Length: 4, closed: right, dtype: interval[int64] + +>>> arr = pd.IntervalIndex.from_tuples( +... [(0, 4), (2, 5)], closed="neither", +... ) + +>>> arr.piso.split([1, 6, 4]) +IntervalIndex([(0.0, 1.0), (1.0, 4.0), (2.0, 4.0), (4.0, 5.0)], + closed='neither', + dtype='interval[float64]') +""" + + +adjacency_matrix_docstring = """ +Returns a 2D array (or dataframe) of boolean values indicating edges between nodes in a graph. + +The set of nodes correspond to intervals and the edges are defined by the relationship +defined by the *edges* parameter. + +Note that the diagonal is defined with False values by default. + +Parameters +---------- +edges : {"intersect", "disjoint"}, default "intersect" + Defines the relationship that edges between nodes represent. +include_index : bool, default True + If True then a :class:`pandas.DataFrame`, indexed by the intervals, is returned. + If False then a :class:`numpy.ndarray` is returned. + +Returns +------- +:class:`pandas.DataFrame` or :class:`numpy.ndarray` + Boolean valued, symmetrical, with False along diagonal. + +Examples +--------- + +>>> import pandas as pd +>>> import piso +>>> piso.register_accessors() + +>>> arr = pd.arrays.IntervalArray.from_tuples( +... [(0,4), (3,6), (5, 7), (8,9), (9,10)], +... closed="both", +... ) + +>>> arr.piso.adjacency_matrix() + [0, 4] [3, 6] [5, 7] [8, 9] [9, 10] +[0, 4] False True False False False +[3, 6] True False True False False +[5, 7] False True False False False +[8, 9] False False False False True +[9, 10] False False False True False + +>>> arr.piso.adjacency_matrix(arr, include_index=False) +array([[False, True, False, False, False], + [ True, False, True, False, False], + [False, True, False, False, False], + [False, False, False, False, True], + [False, False, False, True, False]]) + +>>> arr.piso.adjacency_matrix(arr, edges="disjoint") + [0, 4] [3, 6] [5, 7] [8, 9] [9, 10] +[0, 4] False False True True True +[3, 6] False False False True True +[5, 7] True False False True True +[8, 9] True True True False False +[9, 10] True True True False False +""" diff --git a/piso/docstrings/intervalarray.py b/piso/docstrings/intervalarray.py index c37beba..1b5fdcf 100644 --- a/piso/docstrings/intervalarray.py +++ b/piso/docstrings/intervalarray.py @@ -791,3 +791,50 @@ def join_params(list_of_param_strings): >>> piso.contains(pd.IntervalIndex.from_tuples([(0,2)]), 1, include_index=False) array([[ True]]) """ + +split_docstring = """ +Given a set of intervals, and break points, splits the intervals into pieces wherever +the overlap a break point. + +Parameters +---------- +interval_array : :class:`pandas.IntervalIndex` or :class:`pandas.arrays.IntervalArray` + Contains the (possibly overlapping) intervals. May be left-closed, right-closed, both, or neither. +x : scalar, or array-like of scalars + Values in *x* should belong to the same domain as the intervals in *interval_array*. + May contain duplicates and be unsorted. + +Returns +---------- +:class:`pandas.IntervalIndex` or :class:`pandas.arrays.IntervalArray` + Return type will be the same type as *interval_array* + +Examples +----------- + +>>> import pandas as pd +>>> import piso + +>>> arr = pd.arrays.IntervalArray.from_tuples( +... [(0, 4), (2, 5)], +... ) + +>>> piso.split(arr, 3) + +[(0, 3], (3, 4], (2, 3], (3, 5]] +Length: 4, closed: right, dtype: interval[int64] + +>>> piso.split(arr, [3,3,3,3]) + +[(0, 3], (3, 4], (2, 3], (3, 5]] +Length: 4, closed: right, dtype: interval[int64] + +>>> arr = pd.IntervalIndex.from_tuples( +... [(0, 4), (2, 5)], closed="neither", +... ) + +>>> piso.split(arr, [1, 6, 4]) +IntervalIndex([(0.0, 1.0), (1.0, 4.0), (2.0, 4.0), (4.0, 5.0)], + closed='neither', + dtype='interval[float64]') +""" diff --git a/piso/graph.py b/piso/graph.py new file mode 100644 index 0000000..39144df --- /dev/null +++ b/piso/graph.py @@ -0,0 +1,89 @@ +import numpy as np +import pandas as pd +from pandas.core.indexes import interval + + +def adjacency_matrix(interval_array, edges="intersect", include_index=True): + """ + Returns a 2D array (or dataframe) of boolean values indicating edges between nodes in a graph. + + The set of nodes correspond to intervals and the edges are defined by the relationship + defined by the *edges* parameter. + + Note that the diagonal is defined with False values by default. + + Parameters + ---------- + interval_array : :class:`pandas.arrays.IntervalArray` or :class:`pandas.IntervalIndex` + Contains the intervals. + edges : {"intersect", "disjoint"}, default "intersect" + Defines the relationship that edges between nodes represent. + include_index : bool, default True + If True then a :class:`pandas.DataFrame`, indexed by the intervals, is returned. + If False then a :class:`numpy.ndarray` is returned. + + Returns + ------- + :class:`pandas.DataFrame` or :class:`numpy.ndarray` + Boolean valued, symmetrical, with False along diagonal. + + Examples + --------- + + >>> import pandas as pd + >>> import piso + + >>> arr = pd.arrays.IntervalArray.from_tuples( + ... [(0,4), (3,6), (5, 7), (8,9), (9,10)], + ... closed="both", + ... ) + + >>> piso.adjacency_matrix(arr) + [0, 4] [3, 6] [5, 7] [8, 9] [9, 10] + [0, 4] False True False False False + [3, 6] True False True False False + [5, 7] False True False False False + [8, 9] False False False False True + [9, 10] False False False True False + + >>> piso.adjacency_matrix(arr, include_index=False) + array([[False, True, False, False, False], + [ True, False, True, False, False], + [False, True, False, False, False], + [False, False, False, False, True], + [False, False, False, True, False]]) + + >>> piso.adjacency_matrix(arr, edges="disjoint") + [0, 4] [3, 6] [5, 7] [8, 9] [9, 10] + [0, 4] False False True True True + [3, 6] False False False True True + [5, 7] True False False True True + [8, 9] True True True False False + [9, 10] True True True False False + """ + if edges == "intersect": + result = _adj_mat_intersection(interval_array) + elif edges == "disjoint": + result = ~_adj_mat_intersection(interval_array, fill_diagonal=False) + else: + raise ValueError(f"Invalid value for edges parameter: {edges}") + + if include_index: + result = pd.DataFrame(result, index=interval_array, columns=interval_array) + + return result + + +def _adj_mat_intersection(interval_array, fill_diagonal=True): + result = np.greater.outer( + interval_array.right, interval_array.left + ) & np.less.outer(interval_array.left, interval_array.right) + if interval_array.closed == "both": + result = ( + result + | np.equal.outer(interval_array.right, interval_array.left) + | np.equal.outer(interval_array.left, interval_array.right) + ) + if fill_diagonal: + np.fill_diagonal(result, False) + return result diff --git a/piso/intervalarray.py b/piso/intervalarray.py index f5b873d..692360d 100644 --- a/piso/intervalarray.py +++ b/piso/intervalarray.py @@ -219,10 +219,21 @@ def contains(interval_array, x, include_index=True): return result -@Appender(docstrings.get_indexer_docstring, join="\n", indents=1) -def get_indexer(interval_array, x): - if not isdisjoint(interval_array): - raise ValueError("get_indexer method is only valid for disjoint intervals.") - ia_length = len(interval_array) - contain_matrix = contains(interval_array, x, include_index=False) - return (np.linspace(1, ia_length, ia_length).dot(contain_matrix) - 1).astype(int) +@Appender(docstrings.split_docstring, join="\n", indents=1) +def split(interval_array, x): + # x = pd.Series(x).values + x = pd.Series(sorted(set(x))).values # converting to numpy array will not work + contained = contains(interval_array.set_closed("neither"), x, include_index=False) + breakpoints = np.concatenate( + ( + np.expand_dims(interval_array.left.values, 1), + pd.DataFrame(np.broadcast_to(x, contained.shape)).where(contained).values, + np.expand_dims(interval_array.right.values, 1), + ), + axis=1, + ) + lefts = breakpoints[:, :-1] + rights = breakpoints[:, 1:] + return interval_array.from_arrays( + lefts[~np.isnan(lefts)], rights[~np.isnan(rights)], closed=interval_array.closed + ) diff --git a/piso/ndframe.py b/piso/ndframe.py index 7aa0163..d792363 100644 --- a/piso/ndframe.py +++ b/piso/ndframe.py @@ -14,7 +14,7 @@ def lookup(frame_or_series, x): raise ValueError("DataFrame or Series must be indexed by an IntervalIndex") if not hasattr(x, "__len__"): x = np.array(x, ndmin=1) - indexer = intervalarray.get_indexer(frame_or_series.index, x) + indexer = frame_or_series.index.get_indexer(x) result = frame_or_series.copy().iloc[indexer].set_axis(x) set_nan = indexer == -1 if set_nan.any(): @@ -52,7 +52,7 @@ def _get_indexers(*dfs): ) tiling_index = pd.IntervalIndex.from_breaks(sorted(set(breaks))) lookups = tiling_index.left if closed == "left" else tiling_index.right - indexers = [intervalarray.get_indexer(df.index, lookups) for df in dfs] + indexers = [df.index.get_indexer(lookups) for df in dfs] return tiling_index, indexers diff --git a/pyproject.toml b/pyproject.toml index 00f1e53..27a2100 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ build-backend = "poetry.masonry.api" [tool.poetry] name = "piso" -version = "0.6.0" +version = "0.7.0" description = "Pandas Interval Set Operations: methods for set operations, analytics, lookups and joins on pandas' Interval, IntervalArray and IntervalIndex" readme = "README.md" authors = ["Riley Clement "] diff --git a/tests/test_graph.py b/tests/test_graph.py new file mode 100644 index 0000000..cc7f4a7 --- /dev/null +++ b/tests/test_graph.py @@ -0,0 +1,291 @@ +import numpy as np +import pandas as pd +import pytest + +import piso +import piso.graph as piso_graph +from piso import register_accessors + +register_accessors() + + +def get_accessor_method(self, function): + return { + piso_graph.adjacency_matrix: self.piso.adjacency_matrix, + }[function] + + +def get_package_method(function): + return { + piso_graph.adjacency_matrix: piso.adjacency_matrix, + }[function] + + +def perform_op(*args, how, function, **kwargs): + # how = "supplied, accessor, or package" + if how == "accessor": + self, *args = args + return get_accessor_method(self, function)(*args, **kwargs) + elif how == "package": + return get_package_method(function)(*args, **kwargs) + else: + return function(*args, **kwargs) + + +def map_to_dates(obj, date_type): + def make_date(x): + ts = pd.to_datetime(x, unit="d", origin="2021-09-30") + if date_type == "numpy": + return ts.to_numpy() + if date_type == "datetime": + return ts.to_pydatetime() + if date_type == "timedelta": + return ts - pd.Timestamp("2021-10-1") + return ts + + if isinstance(obj, (pd.IntervalIndex, pd.arrays.IntervalArray)): + return obj.from_arrays( + obj.left.map(make_date), + obj.right.map(make_date), + obj.closed, + ) + elif isinstance(obj, list): + return [make_date(x) for x in obj] + + +@pytest.mark.parametrize( + "closed", + ["left", "right", "neither"], +) +@pytest.mark.parametrize( + "interval_index", + [True, False], +) +@pytest.mark.parametrize( + "include_index", + [True, False], +) +@pytest.mark.parametrize( + "date_type", + ["timestamp", "numpy", "datetime", "timedelta", None], +) +@pytest.mark.parametrize( + "how", + ["supplied", "accessor", "package"], +) +def test_adjacency_matrix_intersects_1( + closed, interval_index, include_index, date_type, how +): + interval_array = pd.arrays.IntervalArray.from_tuples( + [(0, 4), (3, 6), (5, 7), (8, 9), (9, 10)], + closed=closed, + ) + if interval_index: + interval_array = pd.IntervalIndex(interval_array) + + if date_type: + interval_array = map_to_dates(interval_array, date_type) + + expected = np.array( + [ + [False, True, False, False, False], + [True, False, True, False, False], + [False, True, False, False, False], + [False, False, False, False, False], + [False, False, False, False, False], + ] + ) + + result = perform_op( + interval_array, + how=how, + function=piso_graph.adjacency_matrix, + edges="intersect", + include_index=include_index, + ) + if include_index: + expected = pd.DataFrame(expected, columns=interval_array, index=interval_array) + pd.testing.assert_frame_equal(result, expected) + else: + assert np.array_equal(result, expected) + + +@pytest.mark.parametrize( + "interval_index", + [True, False], +) +@pytest.mark.parametrize( + "include_index", + [True, False], +) +@pytest.mark.parametrize( + "date_type", + ["timestamp", "numpy", "datetime", "timedelta", None], +) +@pytest.mark.parametrize( + "how", + ["supplied", "accessor", "package"], +) +def test_adjacency_matrix_intersects_2(interval_index, include_index, date_type, how): + interval_array = pd.arrays.IntervalArray.from_tuples( + [(0, 4), (3, 6), (5, 7), (8, 9), (9, 10)], + closed="both", + ) + if interval_index: + interval_array = pd.IntervalIndex(interval_array) + + if date_type: + interval_array = map_to_dates(interval_array, date_type) + + expected = np.array( + [ + [False, True, False, False, False], + [True, False, True, False, False], + [False, True, False, False, False], + [False, False, False, False, True], + [False, False, False, True, False], + ] + ) + + result = perform_op( + interval_array, + how=how, + function=piso_graph.adjacency_matrix, + edges="intersect", + include_index=include_index, + ) + if include_index: + expected = pd.DataFrame(expected, columns=interval_array, index=interval_array) + pd.testing.assert_frame_equal(result, expected) + else: + assert np.array_equal(result, expected) + + +@pytest.mark.parametrize( + "closed", + ["left", "right", "neither"], +) +@pytest.mark.parametrize( + "interval_index", + [True, False], +) +@pytest.mark.parametrize( + "include_index", + [True, False], +) +@pytest.mark.parametrize( + "date_type", + ["timestamp", "numpy", "datetime", "timedelta", None], +) +@pytest.mark.parametrize( + "how", + ["supplied", "accessor", "package"], +) +def test_adjacency_matrix_disjoint_1( + closed, interval_index, include_index, date_type, how +): + interval_array = pd.arrays.IntervalArray.from_tuples( + [(0, 4), (3, 6), (5, 7), (8, 9), (9, 10)], + closed=closed, + ) + if interval_index: + interval_array = pd.IntervalIndex(interval_array) + + if date_type: + interval_array = map_to_dates(interval_array, date_type) + + expected = np.array( + [ + [False, False, True, True, True], + [False, False, False, True, True], + [True, False, False, True, True], + [True, True, True, False, True], + [True, True, True, True, False], + ] + ) + + result = perform_op( + interval_array, + how=how, + function=piso_graph.adjacency_matrix, + edges="disjoint", + include_index=include_index, + ) + if include_index: + expected = pd.DataFrame(expected, columns=interval_array, index=interval_array) + pd.testing.assert_frame_equal(result, expected) + else: + assert np.array_equal(result, expected) + + +@pytest.mark.parametrize( + "interval_index", + [True, False], +) +@pytest.mark.parametrize( + "include_index", + [True, False], +) +@pytest.mark.parametrize( + "date_type", + ["timestamp", "numpy", "datetime", "timedelta", None], +) +@pytest.mark.parametrize( + "how", + ["supplied", "accessor", "package"], +) +def test_adjacency_matrix_disjoint_2(interval_index, include_index, date_type, how): + interval_array = pd.arrays.IntervalArray.from_tuples( + [(0, 4), (3, 6), (5, 7), (8, 9), (9, 10)], + closed="both", + ) + if interval_index: + interval_array = pd.IntervalIndex(interval_array) + + if date_type: + interval_array = map_to_dates(interval_array, date_type) + + expected = np.array( + [ + [False, False, True, True, True], + [False, False, False, True, True], + [True, False, False, True, True], + [True, True, True, False, False], + [True, True, True, False, False], + ] + ) + + result = perform_op( + interval_array, + how=how, + function=piso_graph.adjacency_matrix, + edges="disjoint", + include_index=include_index, + ) + if include_index: + expected = pd.DataFrame(expected, columns=interval_array, index=interval_array) + pd.testing.assert_frame_equal(result, expected) + else: + assert np.array_equal(result, expected) + + +@pytest.mark.parametrize( + "closed", + ["left", "right", "both", "neither"], +) +@pytest.mark.parametrize( + "how", + ["supplied", "accessor", "package"], +) +def test_adjacency_matrix_edges_exception(closed, how): + interval_array = pd.arrays.IntervalArray.from_tuples( + [(0, 4), (3, 6), (5, 7), (8, 9), (9, 10)], + closed=closed, + ) + with pytest.raises(ValueError): + perform_op( + interval_array, + how=how, + function=piso_graph.adjacency_matrix, + edges="not_an_option", + ) diff --git a/tests/test_single_interval_array.py b/tests/test_single_interval_array.py index ccbbf3a..7cd41e4 100644 --- a/tests/test_single_interval_array.py +++ b/tests/test_single_interval_array.py @@ -19,8 +19,8 @@ def get_accessor_method(self, function): piso_intervalarray.issubset: self.piso.issubset, piso_intervalarray.coverage: self.piso.coverage, piso_intervalarray.complement: self.piso.complement, - piso_intervalarray.get_indexer: self.piso.get_indexer, piso_intervalarray.contains: self.piso.contains, + piso_intervalarray.split: self.piso.split, }[function] @@ -34,8 +34,8 @@ def get_package_method(function): piso_intervalarray.issubset: piso.issubset, piso_intervalarray.coverage: piso.coverage, piso_intervalarray.complement: piso.complement, - piso_intervalarray.get_indexer: piso.get_indexer, piso_intervalarray.contains: piso.contains, + piso_intervalarray.split: piso.split, }[function] @@ -80,6 +80,16 @@ def make_ia3(interval_index, closed): return ia3 +def make_ia4(interval_index, closed): + ia4 = pd.arrays.IntervalArray.from_tuples( + [(1, 4), (2, 5), (3, 6)], + closed=closed, + ) + if interval_index: + ia4 = pd.IntervalIndex(ia4) + return ia4 + + def make_ia_from_tuples(interval_index, tuples, closed): klass = pd.IntervalIndex if interval_index else pd.arrays.IntervalArray return klass.from_tuples(tuples, closed=closed) @@ -440,9 +450,9 @@ def test_symmetric_difference_min_overlaps_all_2( ) -def map_to_dates(interval_array, date_type): +def map_to_dates(obj, date_type): def make_date(x): - ts = pd.Timestamp(f"2021-10-{x}") + ts = pd.to_datetime(x, unit="d", origin="2021-09-30") if date_type == "numpy": return ts.to_numpy() if date_type == "datetime": @@ -451,11 +461,14 @@ def make_date(x): return ts - pd.Timestamp("2021-10-1") return ts - return interval_array.from_arrays( - interval_array.left.map(make_date), - interval_array.right.map(make_date), - interval_array.closed, - ) + if isinstance(obj, (pd.IntervalIndex, pd.arrays.IntervalArray)): + return obj.from_arrays( + obj.left.map(make_date), + obj.right.map(make_date), + obj.closed, + ) + elif isinstance(obj, list): + return [make_date(x) for x in obj] @pytest.mark.parametrize( @@ -660,60 +673,6 @@ def test_complement(interval_index, domain, expected_tuples, closed, how): ) -@pytest.mark.parametrize( - "interval_index", - [True, False], -) -@pytest.mark.parametrize( - "x, closed, expected", - [ - (3, "left", 0), - (4, "left", -1), - (3, "right", -1), - (4, "right", 0), - (3, "both", 0), - (4, "both", 0), - (3, "neither", -1), - (4, "neither", -1), - ([3, 9, 12], "left", np.array([0, 1, -1])), - ([3, 9, 12], "right", np.array([-1, 1, -1])), - ([3, 9, 12], "both", np.array([0, 1, -1])), - ([3, 9, 12], "neither", np.array([-1, 1, -1])), - ], -) -@pytest.mark.parametrize( - "how", - ["supplied", "accessor", "package"], -) -def test_get_indexer(interval_index, x, closed, expected, how): - ia = make_ia3(interval_index, closed) - result = perform_op( - ia, - x, - how=how, - function=piso_intervalarray.get_indexer, - ) - if hasattr(expected, "__len__"): - assert all(result == expected) - else: - assert result == expected - - -@pytest.mark.parametrize( - "how", - ["supplied", "accessor", "package"], -) -def test_get_indexer_exception(how): - ia = make_ia1(True, "left") - with pytest.raises(ValueError): - perform_op( - ia, - 1, - how=how, - function=piso_intervalarray.get_indexer, - ) - - @pytest.mark.parametrize( "interval_index", [True, False], @@ -774,3 +733,52 @@ def test_contains(interval_index, x, closed, expected, how, include_index): else: expected_result = np.array(expected) assert (result == expected_result).all() + + +@pytest.mark.parametrize( + "interval_index", + [True, False], +) +@pytest.mark.parametrize( + "x, expected_tuples", + [ + ([4], [(1, 4), (2, 4), (4, 5), (3, 4), (4, 6)]), + ([3.5], [(1, 3.5), (3.5, 4), (2, 3.5), (3.5, 5), (3, 3.5), (3.5, 6)]), + ([3, 4], [(1, 3), (3, 4), (2, 3), (3, 4), (4, 5), (3, 4), (4, 6)]), + ([0, 3, 4, 7], [(1, 3), (3, 4), (2, 3), (3, 4), (4, 5), (3, 4), (4, 6)]), + ([0], [(1, 4), (2, 5), (3, 6)]), + ([4, 4], [(1, 4), (2, 4), (4, 5), (3, 4), (4, 6)]), + ([4, 3], [(1, 3), (3, 4), (2, 3), (3, 4), (4, 5), (3, 4), (4, 6)]), + ], +) +@pytest.mark.parametrize( + "closed", + ["left", "right", "both", "neither"], +) +@pytest.mark.parametrize( + "how", + ["supplied", "accessor", "package"], +) +@pytest.mark.parametrize( + "date_type", + ["timestamp", "numpy", "datetime", "timedelta", None], +) +def test_split(interval_index, x, expected_tuples, closed, how, date_type): + ia = make_ia4(interval_index, closed) + ia = map_to_dates(ia, date_type) + + expected = make_ia_from_tuples(False, expected_tuples, closed) + expected = map_to_dates(expected, date_type) + x = map_to_dates(x, date_type) + + result = perform_op( + ia, + x, + how=how, + function=piso_intervalarray.split, + ) + assert_interval_array_equal( + result, + expected, + interval_index, + )