Merge branch 'master' into release

staircase-dev · Nov 20, 2021 · 43f44e4 · 43f44e4
2 parents 0ac48ad + a9520b8
commit 43f44e4
Show file tree

Hide file tree

Showing 15 changed files with 1,026 additions and 199 deletions.
diff --git a/docs/img/powered_by_staircase.svg b/docs/img/powered_by_staircase.svg
diff --git a/docs/index.rst b/docs/index.rst
@@ -20,7 +20,7 @@ Pandas Interval Set Operations: methods for set operations, analytics, lookups a
 
 .. image:: img/powered_by_staircase.svg
    :target: https://www.staircase.dev
-   :width: 200
+   :width: 300
    :alt: powered_by_staircase
    :align: center
 

diff --git a/docs/reference/accessors.rst b/docs/reference/accessors.rst
@@ -19,4 +19,5 @@ Accessors
    ArrayAccessor.coverage
    ArrayAccessor.complement
    ArrayAccessor.contains
-   ArrayAccessor.get_indexer
+   ArrayAccessor.split
+   ArrayAccessor.adjacency_matrix
diff --git a/docs/reference/package.rst b/docs/reference/package.rst
@@ -21,6 +21,7 @@ Top level functions
    coverage
    complement
    contains
-   get_indexer
+   split
    lookup
-   join
+   join
+   adjacency_matrix
diff --git a/docs/release_notes/index.rst b/docs/release_notes/index.rst
@@ -5,14 +5,28 @@ Release notes
 ========================
 
 
+**v0.7.0 2021-11-20**
+
+Added the following methods
+
+- :func:`piso.split`
+- :func:`piso.adjacency_matrix`
+- :meth:`ArrayAccessor.split() <piso.accessor.ArrayAccessor.split>`
+- :meth:`ArrayAccessor.adjacency_matrix() <piso.accessor.ArrayAccessor.adjacency_matrix>`
+
+Removed the following methods
+
+- removed :func:`piso.get_indexer` in favour of :meth:`pandas.IntervalIndex.get_indexer`
+
+
 **v0.6.0 2021-11-05**
 
 The following methods were extended to accommodate intervals with *closed = "both"* or *"neither"*
 
 - :func:`piso.contains` (and :meth:`ArrayAccessor.contains() <piso.accessor.ArrayAccessor.contains>`)
 - :func:`piso.get_indexer` (and :meth:`ArrayAccessor.get_indexer() <piso.accessor.ArrayAccessor.get_indexer>`)
 - :func:`piso.lookup`
-- :func:`piso.isdisjoint` (and :meth:`ArrayAccessor.get_indexer() <piso.accessor.ArrayAccessor.get_indexer>`)
+- :func:`piso.isdisjoint` (and :meth:`ArrayAccessor.isdisjoint() <piso.accessor.ArrayAccessor.isdisjoint>`)
 
 **v0.5.0 2021-11-02**
 
@@ -34,7 +48,7 @@ Added the following methods
 
 - :func:`piso.lookup`
 - :func:`piso.get_indexer`
-- :meth:`ArrayAccessor.get_indexer() <piso.accessor.ArrayAccessor.get_indexer>`
+- :meth:`ArrayAccessor.get_indexer`
 
 
 **v0.3.0 2021-10-23**

diff --git a/piso/__init__.py b/piso/__init__.py
@@ -1,13 +1,14 @@
+from piso.graph import adjacency_matrix
 from piso.intervalarray import (
     complement,
     contains,
     coverage,
     difference,
-    get_indexer,
     intersection,
     isdisjoint,
     issubset,
     issuperset,
+    split,
     symmetric_difference,
     union,
 )

diff --git a/piso/accessor.py b/piso/accessor.py
@@ -3,7 +3,7 @@
 import pandas as pd
 
 import piso.docstrings.accessor as docstrings
-from piso import intervalarray
+from piso import graph, intervalarray
 from piso._decorators import Appender
 
 
@@ -155,13 +155,6 @@ def complement(self, domain=None):
             domain,
         )
 
-    @Appender(docstrings.get_indexer_docstring, join="\n", indents=1)
-    def get_indexer(self, x):
-        return intervalarray.get_indexer(
-            self._interval_array,
-            x,
-        )
-
     @Appender(docstrings.contains_docstring, join="\n", indents=1)
     def contains(self, x, include_index=True):
         return intervalarray.contains(
@@ -170,6 +163,21 @@ def contains(self, x, include_index=True):
             include_index,
         )
 
+    @Appender(docstrings.split_docstring, join="\n", indents=1)
+    def split(self, x):
+        return intervalarray.split(
+            self._interval_array,
+            x,
+        )
+
+    @Appender(docstrings.adjacency_matrix_docstring, join="\n", indents=1)
+    def adjacency_matrix(self, edges="intersect", include_index=True):
+        return graph.adjacency_matrix(
+            self._interval_array,
+            edges=edges,
+            include_index=include_index,
+        )
+
 
 def _register_accessors():
     _register_accessor("piso", pd.IntervalIndex)(ArrayAccessor)

diff --git a/piso/docstrings/accessor.py b/piso/docstrings/accessor.py
@@ -1,3 +1,5 @@
+from piso.graph import adjacency_matrix
+
 union_examples = """
 Examples
 -----------
@@ -544,7 +546,7 @@ def join_params(list_of_param_strings):
     """
 Indicates whether one, or more, sets are disjoint or not.
 
-*interval_array* must be left-closed or right-closed if *interval_arrays is non-empty.
+*interval_array* must be left-closed or right-closed if \\*interval_arrays is non-empty.
 If no arguments are provided then this restriction does not apply.
 """
     + template_doc
@@ -782,3 +784,111 @@ def join_params(list_of_param_strings):
 >>> pd.IntervalIndex.from_tuples([(0,2)]).piso.contains(1, include_index=False)
 array([[ True]])
 """
+
+
+split_docstring = """
+Given a set of intervals, and break points, splits the intervals into pieces wherever
+the overlap a break point.
+
+The intervals are contained in the object the accessor belongs to.  They may be left-closed,
+right-closed, both, or neither, and contain overlapping intervals.
+
+Parameters
+----------
+x : scalar, or array-like of scalars
+    Values in *x* should belong to the same domain as the intervals in *interval_array*.
+    May contain duplicates and be unsorted.
+
+Returns
+----------
+:class:`pandas.IntervalIndex` or :class:`pandas.arrays.IntervalArray`
+    Return type will be the same type as the object the accessor belongs to.
+
+Examples
+-----------
+
+>>> import pandas as pd
+>>> import piso
+>>> piso.register_accessors()
+
+>>> arr = pd.arrays.IntervalArray.from_tuples(
+...     [(0, 4), (2, 5)],
+... )
+
+>>> arr.piso.split(3)
+<IntervalArray>
+[(0, 3], (3, 4], (2, 3], (3, 5]]
+Length: 4, closed: right, dtype: interval[int64]
+
+>>> arr.piso.split([3,3,3,3])
+<IntervalArray>
+[(0, 3], (3, 4], (2, 3], (3, 5]]
+Length: 4, closed: right, dtype: interval[int64]
+
+>>> arr = pd.IntervalIndex.from_tuples(
+...     [(0, 4), (2, 5)], closed="neither",
+... )
+
+>>> arr.piso.split([1, 6, 4])
+IntervalIndex([(0.0, 1.0), (1.0, 4.0), (2.0, 4.0), (4.0, 5.0)],
+              closed='neither',
+              dtype='interval[float64]')
+"""
+
+
+adjacency_matrix_docstring = """
+Returns a 2D array (or dataframe) of boolean values indicating edges between nodes in a graph.
+
+The set of nodes correspond to intervals and the edges are defined by the relationship
+defined by the *edges* parameter.
+
+Note that the diagonal is defined with False values by default.
+
+Parameters
+----------
+edges : {"intersect", "disjoint"}, default "intersect"
+    Defines the relationship that edges between nodes represent.
+include_index : bool, default True
+    If True then a :class:`pandas.DataFrame`, indexed by the intervals, is returned.
+    If False then a :class:`numpy.ndarray` is returned.
+
+Returns
+-------
+:class:`pandas.DataFrame` or :class:`numpy.ndarray`
+    Boolean valued, symmetrical, with False along diagonal.
+
+Examples
+---------
+
+>>> import pandas as pd
+>>> import piso
+>>> piso.register_accessors()
+
+>>> arr = pd.arrays.IntervalArray.from_tuples(
+...    [(0,4), (3,6), (5, 7), (8,9), (9,10)],
+...    closed="both",
+... )
+
+>>> arr.piso.adjacency_matrix()
+         [0, 4]  [3, 6]  [5, 7]  [8, 9]  [9, 10]
+[0, 4]    False    True   False   False    False
+[3, 6]     True   False    True   False    False
+[5, 7]    False    True   False   False    False
+[8, 9]    False   False   False   False     True
+[9, 10]   False   False   False    True    False
+
+>>> arr.piso.adjacency_matrix(arr, include_index=False)
+array([[False,  True, False, False, False],
+       [ True, False,  True, False, False],
+       [False,  True, False, False, False],
+       [False, False, False, False,  True],
+       [False, False, False,  True, False]])
+
+>>> arr.piso.adjacency_matrix(arr, edges="disjoint")
+         [0, 4]  [3, 6]  [5, 7]  [8, 9]  [9, 10]
+[0, 4]    False   False    True    True     True
+[3, 6]    False   False   False    True     True
+[5, 7]     True   False   False    True     True
+[8, 9]     True    True    True   False    False
+[9, 10]    True    True    True   False    False
+"""
diff --git a/piso/docstrings/intervalarray.py b/piso/docstrings/intervalarray.py
@@ -791,3 +791,50 @@ def join_params(list_of_param_strings):
 >>> piso.contains(pd.IntervalIndex.from_tuples([(0,2)]), 1, include_index=False)
 array([[ True]])
 """
+
+split_docstring = """
+Given a set of intervals, and break points, splits the intervals into pieces wherever
+the overlap a break point.
+
+Parameters
+----------
+interval_array : :class:`pandas.IntervalIndex` or :class:`pandas.arrays.IntervalArray`
+    Contains the (possibly overlapping) intervals.  May be left-closed, right-closed, both, or neither.
+x : scalar, or array-like of scalars
+    Values in *x* should belong to the same domain as the intervals in *interval_array*.
+    May contain duplicates and be unsorted.
+
+Returns
+----------
+:class:`pandas.IntervalIndex` or :class:`pandas.arrays.IntervalArray`
+    Return type will be the same type as *interval_array*
+
+Examples
+-----------
+
+>>> import pandas as pd
+>>> import piso
+
+>>> arr = pd.arrays.IntervalArray.from_tuples(
+...     [(0, 4), (2, 5)],
+... )
+
+>>> piso.split(arr, 3)
+<IntervalArray>
+[(0, 3], (3, 4], (2, 3], (3, 5]]
+Length: 4, closed: right, dtype: interval[int64]
+
+>>> piso.split(arr, [3,3,3,3])
+<IntervalArray>
+[(0, 3], (3, 4], (2, 3], (3, 5]]
+Length: 4, closed: right, dtype: interval[int64]
+
+>>> arr = pd.IntervalIndex.from_tuples(
+...     [(0, 4), (2, 5)], closed="neither",
+... )
+
+>>> piso.split(arr, [1, 6, 4])
+IntervalIndex([(0.0, 1.0), (1.0, 4.0), (2.0, 4.0), (4.0, 5.0)],
+              closed='neither',
+              dtype='interval[float64]')
+"""
diff --git a/piso/graph.py b/piso/graph.py
@@ -0,0 +1,89 @@
+import numpy as np
+import pandas as pd
+from pandas.core.indexes import interval
+
+
+def adjacency_matrix(interval_array, edges="intersect", include_index=True):
+    """
+    Returns a 2D array (or dataframe) of boolean values indicating edges between nodes in a graph.
+
+    The set of nodes correspond to intervals and the edges are defined by the relationship
+    defined by the *edges* parameter.
+
+    Note that the diagonal is defined with False values by default.
+
+    Parameters
+    ----------
+    interval_array : :class:`pandas.arrays.IntervalArray` or :class:`pandas.IntervalIndex`
+        Contains the intervals.
+    edges : {"intersect", "disjoint"}, default "intersect"
+        Defines the relationship that edges between nodes represent.
+    include_index : bool, default True
+        If True then a :class:`pandas.DataFrame`, indexed by the intervals, is returned.
+        If False then a :class:`numpy.ndarray` is returned.
+
+    Returns
+    -------
+    :class:`pandas.DataFrame` or :class:`numpy.ndarray`
+        Boolean valued, symmetrical, with False along diagonal.
+
+    Examples
+    ---------
+
+    >>> import pandas as pd
+    >>> import piso
+
+    >>> arr = pd.arrays.IntervalArray.from_tuples(
+    ...    [(0,4), (3,6), (5, 7), (8,9), (9,10)],
+    ...    closed="both",
+    ... )
+
+    >>> piso.adjacency_matrix(arr)
+             [0, 4]  [3, 6]  [5, 7]  [8, 9]  [9, 10]
+    [0, 4]    False    True   False   False    False
+    [3, 6]     True   False    True   False    False
+    [5, 7]    False    True   False   False    False
+    [8, 9]    False   False   False   False     True
+    [9, 10]   False   False   False    True    False
+
+    >>> piso.adjacency_matrix(arr, include_index=False)
+    array([[False,  True, False, False, False],
+           [ True, False,  True, False, False],
+           [False,  True, False, False, False],
+           [False, False, False, False,  True],
+           [False, False, False,  True, False]])
+
+    >>> piso.adjacency_matrix(arr, edges="disjoint")
+             [0, 4]  [3, 6]  [5, 7]  [8, 9]  [9, 10]
+    [0, 4]    False   False    True    True     True
+    [3, 6]    False   False   False    True     True
+    [5, 7]     True   False   False    True     True
+    [8, 9]     True    True    True   False    False
+    [9, 10]    True    True    True   False    False
+    """
+    if edges == "intersect":
+        result = _adj_mat_intersection(interval_array)
+    elif edges == "disjoint":
+        result = ~_adj_mat_intersection(interval_array, fill_diagonal=False)
+    else:
+        raise ValueError(f"Invalid value for edges parameter: {edges}")
+
+    if include_index:
+        result = pd.DataFrame(result, index=interval_array, columns=interval_array)
+
+    return result
+
+
+def _adj_mat_intersection(interval_array, fill_diagonal=True):
+    result = np.greater.outer(
+        interval_array.right, interval_array.left
+    ) & np.less.outer(interval_array.left, interval_array.right)
+    if interval_array.closed == "both":
+        result = (
+            result
+            | np.equal.outer(interval_array.right, interval_array.left)
+            | np.equal.outer(interval_array.left, interval_array.right)
+        )
+    if fill_diagonal:
+        np.fill_diagonal(result, False)
+    return result