From e8cf59c32839b1567db16a35e3fd2344db463506 Mon Sep 17 00:00:00 2001 From: Venaturum Date: Sat, 6 Nov 2021 00:29:56 +1100 Subject: [PATCH 1/5] utilising pandas.IntervalIndex.get_indexer (#41) * utilising pandas.IntervalIndex.get_indexer * removing get_indexer * removed references to get_indexer in tests --- docs/reference/accessors.rst | 3 +- docs/reference/package.rst | 1 - docs/release_notes/index.rst | 1 + piso/__init__.py | 1 - piso/accessor.py | 7 ---- piso/intervalarray.py | 9 ----- piso/ndframe.py | 4 +-- tests/test_single_interval_array.py | 56 ----------------------------- 8 files changed, 4 insertions(+), 78 deletions(-) diff --git a/docs/reference/accessors.rst b/docs/reference/accessors.rst index 0ad0a59..a1c76e4 100644 --- a/docs/reference/accessors.rst +++ b/docs/reference/accessors.rst @@ -18,5 +18,4 @@ Accessors ArrayAccessor.issubset ArrayAccessor.coverage ArrayAccessor.complement - ArrayAccessor.contains - ArrayAccessor.get_indexer \ No newline at end of file + ArrayAccessor.contains \ No newline at end of file diff --git a/docs/reference/package.rst b/docs/reference/package.rst index 59f084c..073fd94 100644 --- a/docs/reference/package.rst +++ b/docs/reference/package.rst @@ -21,6 +21,5 @@ Top level functions coverage complement contains - get_indexer lookup join \ No newline at end of file diff --git a/docs/release_notes/index.rst b/docs/release_notes/index.rst index 7419e52..eb2bc60 100644 --- a/docs/release_notes/index.rst +++ b/docs/release_notes/index.rst @@ -4,6 +4,7 @@ Release notes ======================== +- removed :func:`piso.get_indexer` in favour of :meth:`pandas.IntervalIndex.get_indexer ADD UNRELEASED CHANGES ABOVE THIS LINE diff --git a/piso/__init__.py b/piso/__init__.py index 87dee33..9215231 100644 --- a/piso/__init__.py +++ b/piso/__init__.py @@ -3,7 +3,6 @@ contains, coverage, difference, - get_indexer, intersection, isdisjoint, issubset, diff --git a/piso/accessor.py b/piso/accessor.py index 4fd5e03..bd3d6ad 100644 --- a/piso/accessor.py +++ b/piso/accessor.py @@ -155,13 +155,6 @@ def complement(self, domain=None): domain, ) - @Appender(docstrings.get_indexer_docstring, join="\n", indents=1) - def get_indexer(self, x): - return intervalarray.get_indexer( - self._interval_array, - x, - ) - @Appender(docstrings.contains_docstring, join="\n", indents=1) def contains(self, x, include_index=True): return intervalarray.contains( diff --git a/piso/intervalarray.py b/piso/intervalarray.py index f5b873d..5fd9cdd 100644 --- a/piso/intervalarray.py +++ b/piso/intervalarray.py @@ -217,12 +217,3 @@ def contains(interval_array, x, include_index=True): if include_index: return pd.DataFrame(result, index=interval_array, columns=x) return result - - -@Appender(docstrings.get_indexer_docstring, join="\n", indents=1) -def get_indexer(interval_array, x): - if not isdisjoint(interval_array): - raise ValueError("get_indexer method is only valid for disjoint intervals.") - ia_length = len(interval_array) - contain_matrix = contains(interval_array, x, include_index=False) - return (np.linspace(1, ia_length, ia_length).dot(contain_matrix) - 1).astype(int) diff --git a/piso/ndframe.py b/piso/ndframe.py index 7aa0163..d792363 100644 --- a/piso/ndframe.py +++ b/piso/ndframe.py @@ -14,7 +14,7 @@ def lookup(frame_or_series, x): raise ValueError("DataFrame or Series must be indexed by an IntervalIndex") if not hasattr(x, "__len__"): x = np.array(x, ndmin=1) - indexer = intervalarray.get_indexer(frame_or_series.index, x) + indexer = frame_or_series.index.get_indexer(x) result = frame_or_series.copy().iloc[indexer].set_axis(x) set_nan = indexer == -1 if set_nan.any(): @@ -52,7 +52,7 @@ def _get_indexers(*dfs): ) tiling_index = pd.IntervalIndex.from_breaks(sorted(set(breaks))) lookups = tiling_index.left if closed == "left" else tiling_index.right - indexers = [intervalarray.get_indexer(df.index, lookups) for df in dfs] + indexers = [df.index.get_indexer(lookups) for df in dfs] return tiling_index, indexers diff --git a/tests/test_single_interval_array.py b/tests/test_single_interval_array.py index ccbbf3a..be27ff1 100644 --- a/tests/test_single_interval_array.py +++ b/tests/test_single_interval_array.py @@ -19,7 +19,6 @@ def get_accessor_method(self, function): piso_intervalarray.issubset: self.piso.issubset, piso_intervalarray.coverage: self.piso.coverage, piso_intervalarray.complement: self.piso.complement, - piso_intervalarray.get_indexer: self.piso.get_indexer, piso_intervalarray.contains: self.piso.contains, }[function] @@ -34,7 +33,6 @@ def get_package_method(function): piso_intervalarray.issubset: piso.issubset, piso_intervalarray.coverage: piso.coverage, piso_intervalarray.complement: piso.complement, - piso_intervalarray.get_indexer: piso.get_indexer, piso_intervalarray.contains: piso.contains, }[function] @@ -660,60 +658,6 @@ def test_complement(interval_index, domain, expected_tuples, closed, how): ) -@pytest.mark.parametrize( - "interval_index", - [True, False], -) -@pytest.mark.parametrize( - "x, closed, expected", - [ - (3, "left", 0), - (4, "left", -1), - (3, "right", -1), - (4, "right", 0), - (3, "both", 0), - (4, "both", 0), - (3, "neither", -1), - (4, "neither", -1), - ([3, 9, 12], "left", np.array([0, 1, -1])), - ([3, 9, 12], "right", np.array([-1, 1, -1])), - ([3, 9, 12], "both", np.array([0, 1, -1])), - ([3, 9, 12], "neither", np.array([-1, 1, -1])), - ], -) -@pytest.mark.parametrize( - "how", - ["supplied", "accessor", "package"], -) -def test_get_indexer(interval_index, x, closed, expected, how): - ia = make_ia3(interval_index, closed) - result = perform_op( - ia, - x, - how=how, - function=piso_intervalarray.get_indexer, - ) - if hasattr(expected, "__len__"): - assert all(result == expected) - else: - assert result == expected - - -@pytest.mark.parametrize( - "how", - ["supplied", "accessor", "package"], -) -def test_get_indexer_exception(how): - ia = make_ia1(True, "left") - with pytest.raises(ValueError): - perform_op( - ia, - 1, - how=how, - function=piso_intervalarray.get_indexer, - ) - - @pytest.mark.parametrize( "interval_index", [True, False], From 3e0e8cca1d6a0331ab415f0b3c9fe191463b574f Mon Sep 17 00:00:00 2001 From: Venaturum Date: Sat, 6 Nov 2021 21:58:52 +1100 Subject: [PATCH 2/5] added piso.split (+ tests + docs) (#42) * added piso.split (+ tests + docs) * erroneous import closes #36 --- docs/reference/accessors.rst | 3 +- docs/reference/package.rst | 4 +- docs/release_notes/index.rst | 11 ++-- piso/__init__.py | 1 + piso/accessor.py | 7 +++ piso/docstrings/accessor.py | 52 ++++++++++++++++++- piso/docstrings/intervalarray.py | 47 +++++++++++++++++ piso/intervalarray.py | 20 ++++++++ tests/test_single_interval_array.py | 78 ++++++++++++++++++++++++++--- 9 files changed, 210 insertions(+), 13 deletions(-) diff --git a/docs/reference/accessors.rst b/docs/reference/accessors.rst index a1c76e4..20272e7 100644 --- a/docs/reference/accessors.rst +++ b/docs/reference/accessors.rst @@ -18,4 +18,5 @@ Accessors ArrayAccessor.issubset ArrayAccessor.coverage ArrayAccessor.complement - ArrayAccessor.contains \ No newline at end of file + ArrayAccessor.contains + ArrayAccessor.split \ No newline at end of file diff --git a/docs/reference/package.rst b/docs/reference/package.rst index 073fd94..191170a 100644 --- a/docs/reference/package.rst +++ b/docs/reference/package.rst @@ -21,5 +21,7 @@ Top level functions coverage complement contains + split lookup - join \ No newline at end of file + join + \ No newline at end of file diff --git a/docs/release_notes/index.rst b/docs/release_notes/index.rst index eb2bc60..8b3b78c 100644 --- a/docs/release_notes/index.rst +++ b/docs/release_notes/index.rst @@ -4,7 +4,12 @@ Release notes ======================== -- removed :func:`piso.get_indexer` in favour of :meth:`pandas.IntervalIndex.get_indexer +Added the following methods + +- :func:`piso.split` +- :meth:`ArrayAccessor.split() ` + +- removed :func:`piso.get_indexer` in favour of :meth:`pandas.IntervalIndex.get_indexer` ADD UNRELEASED CHANGES ABOVE THIS LINE @@ -15,7 +20,7 @@ The following methods were extended to accommodate intervals with *closed = "bot - :func:`piso.contains` (and :meth:`ArrayAccessor.contains() `) - :func:`piso.get_indexer` (and :meth:`ArrayAccessor.get_indexer() `) - :func:`piso.lookup` -- :func:`piso.isdisjoint` (and :meth:`ArrayAccessor.get_indexer() `) +- :func:`piso.isdisjoint` (and :meth:`ArrayAccessor.isdisjoint() `) **v0.5.0 2021-11-02** @@ -37,7 +42,7 @@ Added the following methods - :func:`piso.lookup` - :func:`piso.get_indexer` -- :meth:`ArrayAccessor.get_indexer() ` +- :meth:`ArrayAccessor.get_indexer` **v0.3.0 2021-10-23** diff --git a/piso/__init__.py b/piso/__init__.py index 9215231..a1449d8 100644 --- a/piso/__init__.py +++ b/piso/__init__.py @@ -7,6 +7,7 @@ isdisjoint, issubset, issuperset, + split, symmetric_difference, union, ) diff --git a/piso/accessor.py b/piso/accessor.py index bd3d6ad..172b6fc 100644 --- a/piso/accessor.py +++ b/piso/accessor.py @@ -163,6 +163,13 @@ def contains(self, x, include_index=True): include_index, ) + @Appender(docstrings.split_docstring, join="\n", indents=1) + def split(self, x): + return intervalarray.split( + self._interval_array, + x, + ) + def _register_accessors(): _register_accessor("piso", pd.IntervalIndex)(ArrayAccessor) diff --git a/piso/docstrings/accessor.py b/piso/docstrings/accessor.py index d349427..7efa377 100644 --- a/piso/docstrings/accessor.py +++ b/piso/docstrings/accessor.py @@ -544,7 +544,7 @@ def join_params(list_of_param_strings): """ Indicates whether one, or more, sets are disjoint or not. -*interval_array* must be left-closed or right-closed if *interval_arrays is non-empty. +*interval_array* must be left-closed or right-closed if \\*interval_arrays is non-empty. If no arguments are provided then this restriction does not apply. """ + template_doc @@ -782,3 +782,53 @@ def join_params(list_of_param_strings): >>> pd.IntervalIndex.from_tuples([(0,2)]).piso.contains(1, include_index=False) array([[ True]]) """ + + +split_docstring = """ +Given a set of intervals, and break points, splits the intervals into pieces wherever +the overlap a break point. + +The intervals are contained in the object the accessor belongs to. They may be left-closed, +right-closed, both, or neither, and contain overlapping intervals. + +Parameters +---------- +x : scalar, or array-like of scalars + Values in *x* should belong to the same domain as the intervals in *interval_array*. + May contain duplicates and be unsorted. + +Returns +---------- +:class:`pandas.IntervalIndex` or :class:`pandas.arrays.IntervalArray` + Return type will be the same type as the object the accessor belongs to. + +Examples +----------- + +>>> import pandas as pd +>>> import piso +>>> piso.register_accessors() + +>>> arr = pd.arrays.IntervalArray.from_tuples( +... [(0, 4), (2, 5)], +... ) + +>>> arr.piso.split(3) + +[(0, 3], (3, 4], (2, 3], (3, 5]] +Length: 4, closed: right, dtype: interval[int64] + +>>> arr.piso.split([3,3,3,3]) + +[(0, 3], (3, 4], (2, 3], (3, 5]] +Length: 4, closed: right, dtype: interval[int64] + +>>> arr = pd.IntervalIndex.from_tuples( +... [(0, 4), (2, 5)], closed="neither", +... ) + +>>> arr.piso.split([1, 6, 4]) +IntervalIndex([(0.0, 1.0), (1.0, 4.0), (2.0, 4.0), (4.0, 5.0)], + closed='neither', + dtype='interval[float64]') +""" diff --git a/piso/docstrings/intervalarray.py b/piso/docstrings/intervalarray.py index c37beba..1b5fdcf 100644 --- a/piso/docstrings/intervalarray.py +++ b/piso/docstrings/intervalarray.py @@ -791,3 +791,50 @@ def join_params(list_of_param_strings): >>> piso.contains(pd.IntervalIndex.from_tuples([(0,2)]), 1, include_index=False) array([[ True]]) """ + +split_docstring = """ +Given a set of intervals, and break points, splits the intervals into pieces wherever +the overlap a break point. + +Parameters +---------- +interval_array : :class:`pandas.IntervalIndex` or :class:`pandas.arrays.IntervalArray` + Contains the (possibly overlapping) intervals. May be left-closed, right-closed, both, or neither. +x : scalar, or array-like of scalars + Values in *x* should belong to the same domain as the intervals in *interval_array*. + May contain duplicates and be unsorted. + +Returns +---------- +:class:`pandas.IntervalIndex` or :class:`pandas.arrays.IntervalArray` + Return type will be the same type as *interval_array* + +Examples +----------- + +>>> import pandas as pd +>>> import piso + +>>> arr = pd.arrays.IntervalArray.from_tuples( +... [(0, 4), (2, 5)], +... ) + +>>> piso.split(arr, 3) + +[(0, 3], (3, 4], (2, 3], (3, 5]] +Length: 4, closed: right, dtype: interval[int64] + +>>> piso.split(arr, [3,3,3,3]) + +[(0, 3], (3, 4], (2, 3], (3, 5]] +Length: 4, closed: right, dtype: interval[int64] + +>>> arr = pd.IntervalIndex.from_tuples( +... [(0, 4), (2, 5)], closed="neither", +... ) + +>>> piso.split(arr, [1, 6, 4]) +IntervalIndex([(0.0, 1.0), (1.0, 4.0), (2.0, 4.0), (4.0, 5.0)], + closed='neither', + dtype='interval[float64]') +""" diff --git a/piso/intervalarray.py b/piso/intervalarray.py index 5fd9cdd..692360d 100644 --- a/piso/intervalarray.py +++ b/piso/intervalarray.py @@ -217,3 +217,23 @@ def contains(interval_array, x, include_index=True): if include_index: return pd.DataFrame(result, index=interval_array, columns=x) return result + + +@Appender(docstrings.split_docstring, join="\n", indents=1) +def split(interval_array, x): + # x = pd.Series(x).values + x = pd.Series(sorted(set(x))).values # converting to numpy array will not work + contained = contains(interval_array.set_closed("neither"), x, include_index=False) + breakpoints = np.concatenate( + ( + np.expand_dims(interval_array.left.values, 1), + pd.DataFrame(np.broadcast_to(x, contained.shape)).where(contained).values, + np.expand_dims(interval_array.right.values, 1), + ), + axis=1, + ) + lefts = breakpoints[:, :-1] + rights = breakpoints[:, 1:] + return interval_array.from_arrays( + lefts[~np.isnan(lefts)], rights[~np.isnan(rights)], closed=interval_array.closed + ) diff --git a/tests/test_single_interval_array.py b/tests/test_single_interval_array.py index be27ff1..7cd41e4 100644 --- a/tests/test_single_interval_array.py +++ b/tests/test_single_interval_array.py @@ -20,6 +20,7 @@ def get_accessor_method(self, function): piso_intervalarray.coverage: self.piso.coverage, piso_intervalarray.complement: self.piso.complement, piso_intervalarray.contains: self.piso.contains, + piso_intervalarray.split: self.piso.split, }[function] @@ -34,6 +35,7 @@ def get_package_method(function): piso_intervalarray.coverage: piso.coverage, piso_intervalarray.complement: piso.complement, piso_intervalarray.contains: piso.contains, + piso_intervalarray.split: piso.split, }[function] @@ -78,6 +80,16 @@ def make_ia3(interval_index, closed): return ia3 +def make_ia4(interval_index, closed): + ia4 = pd.arrays.IntervalArray.from_tuples( + [(1, 4), (2, 5), (3, 6)], + closed=closed, + ) + if interval_index: + ia4 = pd.IntervalIndex(ia4) + return ia4 + + def make_ia_from_tuples(interval_index, tuples, closed): klass = pd.IntervalIndex if interval_index else pd.arrays.IntervalArray return klass.from_tuples(tuples, closed=closed) @@ -438,9 +450,9 @@ def test_symmetric_difference_min_overlaps_all_2( ) -def map_to_dates(interval_array, date_type): +def map_to_dates(obj, date_type): def make_date(x): - ts = pd.Timestamp(f"2021-10-{x}") + ts = pd.to_datetime(x, unit="d", origin="2021-09-30") if date_type == "numpy": return ts.to_numpy() if date_type == "datetime": @@ -449,11 +461,14 @@ def make_date(x): return ts - pd.Timestamp("2021-10-1") return ts - return interval_array.from_arrays( - interval_array.left.map(make_date), - interval_array.right.map(make_date), - interval_array.closed, - ) + if isinstance(obj, (pd.IntervalIndex, pd.arrays.IntervalArray)): + return obj.from_arrays( + obj.left.map(make_date), + obj.right.map(make_date), + obj.closed, + ) + elif isinstance(obj, list): + return [make_date(x) for x in obj] @pytest.mark.parametrize( @@ -718,3 +733,52 @@ def test_contains(interval_index, x, closed, expected, how, include_index): else: expected_result = np.array(expected) assert (result == expected_result).all() + + +@pytest.mark.parametrize( + "interval_index", + [True, False], +) +@pytest.mark.parametrize( + "x, expected_tuples", + [ + ([4], [(1, 4), (2, 4), (4, 5), (3, 4), (4, 6)]), + ([3.5], [(1, 3.5), (3.5, 4), (2, 3.5), (3.5, 5), (3, 3.5), (3.5, 6)]), + ([3, 4], [(1, 3), (3, 4), (2, 3), (3, 4), (4, 5), (3, 4), (4, 6)]), + ([0, 3, 4, 7], [(1, 3), (3, 4), (2, 3), (3, 4), (4, 5), (3, 4), (4, 6)]), + ([0], [(1, 4), (2, 5), (3, 6)]), + ([4, 4], [(1, 4), (2, 4), (4, 5), (3, 4), (4, 6)]), + ([4, 3], [(1, 3), (3, 4), (2, 3), (3, 4), (4, 5), (3, 4), (4, 6)]), + ], +) +@pytest.mark.parametrize( + "closed", + ["left", "right", "both", "neither"], +) +@pytest.mark.parametrize( + "how", + ["supplied", "accessor", "package"], +) +@pytest.mark.parametrize( + "date_type", + ["timestamp", "numpy", "datetime", "timedelta", None], +) +def test_split(interval_index, x, expected_tuples, closed, how, date_type): + ia = make_ia4(interval_index, closed) + ia = map_to_dates(ia, date_type) + + expected = make_ia_from_tuples(False, expected_tuples, closed) + expected = map_to_dates(expected, date_type) + x = map_to_dates(x, date_type) + + result = perform_op( + ia, + x, + how=how, + function=piso_intervalarray.split, + ) + assert_interval_array_equal( + result, + expected, + interval_index, + ) From 4763aa83e8b35d8e9083c556c998497104ec59af Mon Sep 17 00:00:00 2001 From: Riley Clement Date: Wed, 10 Nov 2021 17:28:23 +1100 Subject: [PATCH 3/5] DOC UPDATE --- docs/img/powered_by_staircase.svg | 466 +++++++++++++++++++++++------- docs/index.rst | 2 +- 2 files changed, 357 insertions(+), 111 deletions(-) diff --git a/docs/img/powered_by_staircase.svg b/docs/img/powered_by_staircase.svg index cc793e6..9bd46ee 100644 --- a/docs/img/powered_by_staircase.svg +++ b/docs/img/powered_by_staircase.svg @@ -2,9 +2,9 @@ + + inkscape:pagecheckerboard="0" + showguides="true" + inkscape:guide-bbox="true"> + + @@ -95,115 +104,119 @@ inkscape:label="Layer 1" inkscape:groupmode="layer" id="layer1" - transform="translate(377.75726,326.24246)"> + transform="translate(632.60692,319.18132)"> + width="802.06549" + height="142.67062" + x="-631.41364" + y="-318.31769" /> + id="g1223" + transform="translate(-259.36723,77.589223)"> + + + + + + + + + + + + + + id="path7928" + d="M 3.7846419,-314.6503 H 133.02875" + style="fill:none;stroke:#828282;stroke-width:2.05538;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:2.8;stroke-dasharray:none;stroke-opacity:1" + inkscape:export-filename="C:\Users\Riley\Pictures\staircase\logo1.png" + inkscape:export-xdpi="50" + inkscape:export-ydpi="50" /> + id="path7930" + d="M -80.584528,-273.74018 H -4.0089881" + style="fill:none;stroke:#828282;stroke-width:1.58243;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" + inkscape:export-filename="C:\Users\Riley\Pictures\staircase\logo1.png" + inkscape:export-xdpi="50" + inkscape:export-ydpi="50" /> + id="path7932" + d="M 2.7262919,-314.7503 H 135.46667" + style="fill:#828282;fill-opacity:1;stroke:#828282;stroke-width:0.683;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:2.8;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#marker7938)" + inkscape:export-filename="C:\Users\Riley\Pictures\staircase\logo1.png" + inkscape:export-xdpi="50" + inkscape:export-ydpi="50" /> - - - - - - + inkscape:export-filename="C:\Users\Riley\Pictures\staircase\logo1.png" + inkscape:export-xdpi="50" + inkscape:export-ydpi="50" /> - - - - - powered by + id="tspan1373">powered by + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/index.rst b/docs/index.rst index 239418e..209c838 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -20,7 +20,7 @@ Pandas Interval Set Operations: methods for set operations, analytics, lookups a .. image:: img/powered_by_staircase.svg :target: https://www.staircase.dev - :width: 200 + :width: 300 :alt: powered_by_staircase :align: center From f3acc049028b28fe8ac98e9af573d2b3d814c0ec Mon Sep 17 00:00:00 2001 From: Venaturum Date: Sat, 20 Nov 2021 12:22:07 +1100 Subject: [PATCH 4/5] adjacency_matrix methods added (GH25) (#43) * adjacency_matrix methods added (GH25) * extra tests closes #25 --- docs/reference/accessors.rst | 3 +- docs/reference/package.rst | 2 +- docs/release_notes/index.rst | 2 + piso/__init__.py | 1 + piso/accessor.py | 10 +- piso/docstrings/accessor.py | 60 ++++++++ piso/graph.py | 89 +++++++++++ tests/test_graph.py | 291 +++++++++++++++++++++++++++++++++++ 8 files changed, 455 insertions(+), 3 deletions(-) create mode 100644 piso/graph.py create mode 100644 tests/test_graph.py diff --git a/docs/reference/accessors.rst b/docs/reference/accessors.rst index 20272e7..43a3545 100644 --- a/docs/reference/accessors.rst +++ b/docs/reference/accessors.rst @@ -19,4 +19,5 @@ Accessors ArrayAccessor.coverage ArrayAccessor.complement ArrayAccessor.contains - ArrayAccessor.split \ No newline at end of file + ArrayAccessor.split + ArrayAccessor.adjacency_matrix \ No newline at end of file diff --git a/docs/reference/package.rst b/docs/reference/package.rst index 191170a..b9272d8 100644 --- a/docs/reference/package.rst +++ b/docs/reference/package.rst @@ -24,4 +24,4 @@ Top level functions split lookup join - \ No newline at end of file + adjacency_matrix \ No newline at end of file diff --git a/docs/release_notes/index.rst b/docs/release_notes/index.rst index 8b3b78c..e8f0550 100644 --- a/docs/release_notes/index.rst +++ b/docs/release_notes/index.rst @@ -7,7 +7,9 @@ Release notes Added the following methods - :func:`piso.split` +- :func:`piso.adjacency_matrix` - :meth:`ArrayAccessor.split() ` +- :meth:`ArrayAccessor.adjacency_matrix() ` - removed :func:`piso.get_indexer` in favour of :meth:`pandas.IntervalIndex.get_indexer` diff --git a/piso/__init__.py b/piso/__init__.py index a1449d8..cefa917 100644 --- a/piso/__init__.py +++ b/piso/__init__.py @@ -1,3 +1,4 @@ +from piso.graph import adjacency_matrix from piso.intervalarray import ( complement, contains, diff --git a/piso/accessor.py b/piso/accessor.py index 172b6fc..0813c5c 100644 --- a/piso/accessor.py +++ b/piso/accessor.py @@ -3,7 +3,7 @@ import pandas as pd import piso.docstrings.accessor as docstrings -from piso import intervalarray +from piso import graph, intervalarray from piso._decorators import Appender @@ -170,6 +170,14 @@ def split(self, x): x, ) + @Appender(docstrings.adjacency_matrix_docstring, join="\n", indents=1) + def adjacency_matrix(self, edges="intersect", include_index=True): + return graph.adjacency_matrix( + self._interval_array, + edges=edges, + include_index=include_index, + ) + def _register_accessors(): _register_accessor("piso", pd.IntervalIndex)(ArrayAccessor) diff --git a/piso/docstrings/accessor.py b/piso/docstrings/accessor.py index 7efa377..e182740 100644 --- a/piso/docstrings/accessor.py +++ b/piso/docstrings/accessor.py @@ -1,3 +1,5 @@ +from piso.graph import adjacency_matrix + union_examples = """ Examples ----------- @@ -832,3 +834,61 @@ def join_params(list_of_param_strings): closed='neither', dtype='interval[float64]') """ + + +adjacency_matrix_docstring = """ +Returns a 2D array (or dataframe) of boolean values indicating edges between nodes in a graph. + +The set of nodes correspond to intervals and the edges are defined by the relationship +defined by the *edges* parameter. + +Note that the diagonal is defined with False values by default. + +Parameters +---------- +edges : {"intersect", "disjoint"}, default "intersect" + Defines the relationship that edges between nodes represent. +include_index : bool, default True + If True then a :class:`pandas.DataFrame`, indexed by the intervals, is returned. + If False then a :class:`numpy.ndarray` is returned. + +Returns +------- +:class:`pandas.DataFrame` or :class:`numpy.ndarray` + Boolean valued, symmetrical, with False along diagonal. + +Examples +--------- + +>>> import pandas as pd +>>> import piso +>>> piso.register_accessors() + +>>> arr = pd.arrays.IntervalArray.from_tuples( +... [(0,4), (3,6), (5, 7), (8,9), (9,10)], +... closed="both", +... ) + +>>> arr.piso.adjacency_matrix() + [0, 4] [3, 6] [5, 7] [8, 9] [9, 10] +[0, 4] False True False False False +[3, 6] True False True False False +[5, 7] False True False False False +[8, 9] False False False False True +[9, 10] False False False True False + +>>> arr.piso.adjacency_matrix(arr, include_index=False) +array([[False, True, False, False, False], + [ True, False, True, False, False], + [False, True, False, False, False], + [False, False, False, False, True], + [False, False, False, True, False]]) + +>>> arr.piso.adjacency_matrix(arr, edges="disjoint") + [0, 4] [3, 6] [5, 7] [8, 9] [9, 10] +[0, 4] False False True True True +[3, 6] False False False True True +[5, 7] True False False True True +[8, 9] True True True False False +[9, 10] True True True False False +""" diff --git a/piso/graph.py b/piso/graph.py new file mode 100644 index 0000000..39144df --- /dev/null +++ b/piso/graph.py @@ -0,0 +1,89 @@ +import numpy as np +import pandas as pd +from pandas.core.indexes import interval + + +def adjacency_matrix(interval_array, edges="intersect", include_index=True): + """ + Returns a 2D array (or dataframe) of boolean values indicating edges between nodes in a graph. + + The set of nodes correspond to intervals and the edges are defined by the relationship + defined by the *edges* parameter. + + Note that the diagonal is defined with False values by default. + + Parameters + ---------- + interval_array : :class:`pandas.arrays.IntervalArray` or :class:`pandas.IntervalIndex` + Contains the intervals. + edges : {"intersect", "disjoint"}, default "intersect" + Defines the relationship that edges between nodes represent. + include_index : bool, default True + If True then a :class:`pandas.DataFrame`, indexed by the intervals, is returned. + If False then a :class:`numpy.ndarray` is returned. + + Returns + ------- + :class:`pandas.DataFrame` or :class:`numpy.ndarray` + Boolean valued, symmetrical, with False along diagonal. + + Examples + --------- + + >>> import pandas as pd + >>> import piso + + >>> arr = pd.arrays.IntervalArray.from_tuples( + ... [(0,4), (3,6), (5, 7), (8,9), (9,10)], + ... closed="both", + ... ) + + >>> piso.adjacency_matrix(arr) + [0, 4] [3, 6] [5, 7] [8, 9] [9, 10] + [0, 4] False True False False False + [3, 6] True False True False False + [5, 7] False True False False False + [8, 9] False False False False True + [9, 10] False False False True False + + >>> piso.adjacency_matrix(arr, include_index=False) + array([[False, True, False, False, False], + [ True, False, True, False, False], + [False, True, False, False, False], + [False, False, False, False, True], + [False, False, False, True, False]]) + + >>> piso.adjacency_matrix(arr, edges="disjoint") + [0, 4] [3, 6] [5, 7] [8, 9] [9, 10] + [0, 4] False False True True True + [3, 6] False False False True True + [5, 7] True False False True True + [8, 9] True True True False False + [9, 10] True True True False False + """ + if edges == "intersect": + result = _adj_mat_intersection(interval_array) + elif edges == "disjoint": + result = ~_adj_mat_intersection(interval_array, fill_diagonal=False) + else: + raise ValueError(f"Invalid value for edges parameter: {edges}") + + if include_index: + result = pd.DataFrame(result, index=interval_array, columns=interval_array) + + return result + + +def _adj_mat_intersection(interval_array, fill_diagonal=True): + result = np.greater.outer( + interval_array.right, interval_array.left + ) & np.less.outer(interval_array.left, interval_array.right) + if interval_array.closed == "both": + result = ( + result + | np.equal.outer(interval_array.right, interval_array.left) + | np.equal.outer(interval_array.left, interval_array.right) + ) + if fill_diagonal: + np.fill_diagonal(result, False) + return result diff --git a/tests/test_graph.py b/tests/test_graph.py new file mode 100644 index 0000000..cc7f4a7 --- /dev/null +++ b/tests/test_graph.py @@ -0,0 +1,291 @@ +import numpy as np +import pandas as pd +import pytest + +import piso +import piso.graph as piso_graph +from piso import register_accessors + +register_accessors() + + +def get_accessor_method(self, function): + return { + piso_graph.adjacency_matrix: self.piso.adjacency_matrix, + }[function] + + +def get_package_method(function): + return { + piso_graph.adjacency_matrix: piso.adjacency_matrix, + }[function] + + +def perform_op(*args, how, function, **kwargs): + # how = "supplied, accessor, or package" + if how == "accessor": + self, *args = args + return get_accessor_method(self, function)(*args, **kwargs) + elif how == "package": + return get_package_method(function)(*args, **kwargs) + else: + return function(*args, **kwargs) + + +def map_to_dates(obj, date_type): + def make_date(x): + ts = pd.to_datetime(x, unit="d", origin="2021-09-30") + if date_type == "numpy": + return ts.to_numpy() + if date_type == "datetime": + return ts.to_pydatetime() + if date_type == "timedelta": + return ts - pd.Timestamp("2021-10-1") + return ts + + if isinstance(obj, (pd.IntervalIndex, pd.arrays.IntervalArray)): + return obj.from_arrays( + obj.left.map(make_date), + obj.right.map(make_date), + obj.closed, + ) + elif isinstance(obj, list): + return [make_date(x) for x in obj] + + +@pytest.mark.parametrize( + "closed", + ["left", "right", "neither"], +) +@pytest.mark.parametrize( + "interval_index", + [True, False], +) +@pytest.mark.parametrize( + "include_index", + [True, False], +) +@pytest.mark.parametrize( + "date_type", + ["timestamp", "numpy", "datetime", "timedelta", None], +) +@pytest.mark.parametrize( + "how", + ["supplied", "accessor", "package"], +) +def test_adjacency_matrix_intersects_1( + closed, interval_index, include_index, date_type, how +): + interval_array = pd.arrays.IntervalArray.from_tuples( + [(0, 4), (3, 6), (5, 7), (8, 9), (9, 10)], + closed=closed, + ) + if interval_index: + interval_array = pd.IntervalIndex(interval_array) + + if date_type: + interval_array = map_to_dates(interval_array, date_type) + + expected = np.array( + [ + [False, True, False, False, False], + [True, False, True, False, False], + [False, True, False, False, False], + [False, False, False, False, False], + [False, False, False, False, False], + ] + ) + + result = perform_op( + interval_array, + how=how, + function=piso_graph.adjacency_matrix, + edges="intersect", + include_index=include_index, + ) + if include_index: + expected = pd.DataFrame(expected, columns=interval_array, index=interval_array) + pd.testing.assert_frame_equal(result, expected) + else: + assert np.array_equal(result, expected) + + +@pytest.mark.parametrize( + "interval_index", + [True, False], +) +@pytest.mark.parametrize( + "include_index", + [True, False], +) +@pytest.mark.parametrize( + "date_type", + ["timestamp", "numpy", "datetime", "timedelta", None], +) +@pytest.mark.parametrize( + "how", + ["supplied", "accessor", "package"], +) +def test_adjacency_matrix_intersects_2(interval_index, include_index, date_type, how): + interval_array = pd.arrays.IntervalArray.from_tuples( + [(0, 4), (3, 6), (5, 7), (8, 9), (9, 10)], + closed="both", + ) + if interval_index: + interval_array = pd.IntervalIndex(interval_array) + + if date_type: + interval_array = map_to_dates(interval_array, date_type) + + expected = np.array( + [ + [False, True, False, False, False], + [True, False, True, False, False], + [False, True, False, False, False], + [False, False, False, False, True], + [False, False, False, True, False], + ] + ) + + result = perform_op( + interval_array, + how=how, + function=piso_graph.adjacency_matrix, + edges="intersect", + include_index=include_index, + ) + if include_index: + expected = pd.DataFrame(expected, columns=interval_array, index=interval_array) + pd.testing.assert_frame_equal(result, expected) + else: + assert np.array_equal(result, expected) + + +@pytest.mark.parametrize( + "closed", + ["left", "right", "neither"], +) +@pytest.mark.parametrize( + "interval_index", + [True, False], +) +@pytest.mark.parametrize( + "include_index", + [True, False], +) +@pytest.mark.parametrize( + "date_type", + ["timestamp", "numpy", "datetime", "timedelta", None], +) +@pytest.mark.parametrize( + "how", + ["supplied", "accessor", "package"], +) +def test_adjacency_matrix_disjoint_1( + closed, interval_index, include_index, date_type, how +): + interval_array = pd.arrays.IntervalArray.from_tuples( + [(0, 4), (3, 6), (5, 7), (8, 9), (9, 10)], + closed=closed, + ) + if interval_index: + interval_array = pd.IntervalIndex(interval_array) + + if date_type: + interval_array = map_to_dates(interval_array, date_type) + + expected = np.array( + [ + [False, False, True, True, True], + [False, False, False, True, True], + [True, False, False, True, True], + [True, True, True, False, True], + [True, True, True, True, False], + ] + ) + + result = perform_op( + interval_array, + how=how, + function=piso_graph.adjacency_matrix, + edges="disjoint", + include_index=include_index, + ) + if include_index: + expected = pd.DataFrame(expected, columns=interval_array, index=interval_array) + pd.testing.assert_frame_equal(result, expected) + else: + assert np.array_equal(result, expected) + + +@pytest.mark.parametrize( + "interval_index", + [True, False], +) +@pytest.mark.parametrize( + "include_index", + [True, False], +) +@pytest.mark.parametrize( + "date_type", + ["timestamp", "numpy", "datetime", "timedelta", None], +) +@pytest.mark.parametrize( + "how", + ["supplied", "accessor", "package"], +) +def test_adjacency_matrix_disjoint_2(interval_index, include_index, date_type, how): + interval_array = pd.arrays.IntervalArray.from_tuples( + [(0, 4), (3, 6), (5, 7), (8, 9), (9, 10)], + closed="both", + ) + if interval_index: + interval_array = pd.IntervalIndex(interval_array) + + if date_type: + interval_array = map_to_dates(interval_array, date_type) + + expected = np.array( + [ + [False, False, True, True, True], + [False, False, False, True, True], + [True, False, False, True, True], + [True, True, True, False, False], + [True, True, True, False, False], + ] + ) + + result = perform_op( + interval_array, + how=how, + function=piso_graph.adjacency_matrix, + edges="disjoint", + include_index=include_index, + ) + if include_index: + expected = pd.DataFrame(expected, columns=interval_array, index=interval_array) + pd.testing.assert_frame_equal(result, expected) + else: + assert np.array_equal(result, expected) + + +@pytest.mark.parametrize( + "closed", + ["left", "right", "both", "neither"], +) +@pytest.mark.parametrize( + "how", + ["supplied", "accessor", "package"], +) +def test_adjacency_matrix_edges_exception(closed, how): + interval_array = pd.arrays.IntervalArray.from_tuples( + [(0, 4), (3, 6), (5, 7), (8, 9), (9, 10)], + closed=closed, + ) + with pytest.raises(ValueError): + perform_op( + interval_array, + how=how, + function=piso_graph.adjacency_matrix, + edges="not_an_option", + ) From a9520b89187a1e36a3dcd621d2dfdcc5d3467d27 Mon Sep 17 00:00:00 2001 From: Riley Clement Date: Sat, 20 Nov 2021 12:31:12 +1100 Subject: [PATCH 5/5] v0.7.0 --- docs/release_notes/index.rst | 8 +++++++- pyproject.toml | 2 +- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/docs/release_notes/index.rst b/docs/release_notes/index.rst index e8f0550..8ab41f7 100644 --- a/docs/release_notes/index.rst +++ b/docs/release_notes/index.rst @@ -4,6 +4,10 @@ Release notes ======================== +ADD UNRELEASED CHANGES ABOVE THIS LINE + +**v0.7.0 2021-11-20** + Added the following methods - :func:`piso.split` @@ -11,9 +15,11 @@ Added the following methods - :meth:`ArrayAccessor.split() ` - :meth:`ArrayAccessor.adjacency_matrix() ` +Removed the following methods + - removed :func:`piso.get_indexer` in favour of :meth:`pandas.IntervalIndex.get_indexer` -ADD UNRELEASED CHANGES ABOVE THIS LINE + **v0.6.0 2021-11-05** diff --git a/pyproject.toml b/pyproject.toml index 00f1e53..27a2100 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ build-backend = "poetry.masonry.api" [tool.poetry] name = "piso" -version = "0.6.0" +version = "0.7.0" description = "Pandas Interval Set Operations: methods for set operations, analytics, lookups and joins on pandas' Interval, IntervalArray and IntervalIndex" readme = "README.md" authors = ["Riley Clement "]