Skip to content

Commit

Permalink
Merge branch 'master' into release
Browse files Browse the repository at this point in the history
  • Loading branch information
venaturum committed Nov 20, 2021
2 parents 0ac48ad + a9520b8 commit 43f44e4
Show file tree
Hide file tree
Showing 15 changed files with 1,026 additions and 199 deletions.
466 changes: 356 additions & 110 deletions docs/img/powered_by_staircase.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ Pandas Interval Set Operations: methods for set operations, analytics, lookups a

.. image:: img/powered_by_staircase.svg
:target: https://www.staircase.dev
:width: 200
:width: 300
:alt: powered_by_staircase
:align: center

Expand Down
3 changes: 2 additions & 1 deletion docs/reference/accessors.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,5 @@ Accessors
ArrayAccessor.coverage
ArrayAccessor.complement
ArrayAccessor.contains
ArrayAccessor.get_indexer
ArrayAccessor.split
ArrayAccessor.adjacency_matrix
5 changes: 3 additions & 2 deletions docs/reference/package.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ Top level functions
coverage
complement
contains
get_indexer
split
lookup
join
join
adjacency_matrix
18 changes: 16 additions & 2 deletions docs/release_notes/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,28 @@ Release notes
========================


**v0.7.0 2021-11-20**

Added the following methods

- :func:`piso.split`
- :func:`piso.adjacency_matrix`
- :meth:`ArrayAccessor.split() <piso.accessor.ArrayAccessor.split>`
- :meth:`ArrayAccessor.adjacency_matrix() <piso.accessor.ArrayAccessor.adjacency_matrix>`

Removed the following methods

- removed :func:`piso.get_indexer` in favour of :meth:`pandas.IntervalIndex.get_indexer`


**v0.6.0 2021-11-05**

The following methods were extended to accommodate intervals with *closed = "both"* or *"neither"*

- :func:`piso.contains` (and :meth:`ArrayAccessor.contains() <piso.accessor.ArrayAccessor.contains>`)
- :func:`piso.get_indexer` (and :meth:`ArrayAccessor.get_indexer() <piso.accessor.ArrayAccessor.get_indexer>`)
- :func:`piso.lookup`
- :func:`piso.isdisjoint` (and :meth:`ArrayAccessor.get_indexer() <piso.accessor.ArrayAccessor.get_indexer>`)
- :func:`piso.isdisjoint` (and :meth:`ArrayAccessor.isdisjoint() <piso.accessor.ArrayAccessor.isdisjoint>`)

**v0.5.0 2021-11-02**

Expand All @@ -34,7 +48,7 @@ Added the following methods

- :func:`piso.lookup`
- :func:`piso.get_indexer`
- :meth:`ArrayAccessor.get_indexer() <piso.accessor.ArrayAccessor.get_indexer>`
- :meth:`ArrayAccessor.get_indexer`


**v0.3.0 2021-10-23**
Expand Down
3 changes: 2 additions & 1 deletion piso/__init__.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
from piso.graph import adjacency_matrix
from piso.intervalarray import (
complement,
contains,
coverage,
difference,
get_indexer,
intersection,
isdisjoint,
issubset,
issuperset,
split,
symmetric_difference,
union,
)
Expand Down
24 changes: 16 additions & 8 deletions piso/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import pandas as pd

import piso.docstrings.accessor as docstrings
from piso import intervalarray
from piso import graph, intervalarray
from piso._decorators import Appender


Expand Down Expand Up @@ -155,13 +155,6 @@ def complement(self, domain=None):
domain,
)

@Appender(docstrings.get_indexer_docstring, join="\n", indents=1)
def get_indexer(self, x):
return intervalarray.get_indexer(
self._interval_array,
x,
)

@Appender(docstrings.contains_docstring, join="\n", indents=1)
def contains(self, x, include_index=True):
return intervalarray.contains(
Expand All @@ -170,6 +163,21 @@ def contains(self, x, include_index=True):
include_index,
)

@Appender(docstrings.split_docstring, join="\n", indents=1)
def split(self, x):
return intervalarray.split(
self._interval_array,
x,
)

@Appender(docstrings.adjacency_matrix_docstring, join="\n", indents=1)
def adjacency_matrix(self, edges="intersect", include_index=True):
return graph.adjacency_matrix(
self._interval_array,
edges=edges,
include_index=include_index,
)


def _register_accessors():
_register_accessor("piso", pd.IntervalIndex)(ArrayAccessor)
Expand Down
112 changes: 111 additions & 1 deletion piso/docstrings/accessor.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from piso.graph import adjacency_matrix

union_examples = """
Examples
-----------
Expand Down Expand Up @@ -544,7 +546,7 @@ def join_params(list_of_param_strings):
"""
Indicates whether one, or more, sets are disjoint or not.
*interval_array* must be left-closed or right-closed if *interval_arrays is non-empty.
*interval_array* must be left-closed or right-closed if \\*interval_arrays is non-empty.
If no arguments are provided then this restriction does not apply.
"""
+ template_doc
Expand Down Expand Up @@ -782,3 +784,111 @@ def join_params(list_of_param_strings):
>>> pd.IntervalIndex.from_tuples([(0,2)]).piso.contains(1, include_index=False)
array([[ True]])
"""


split_docstring = """
Given a set of intervals, and break points, splits the intervals into pieces wherever
the overlap a break point.
The intervals are contained in the object the accessor belongs to. They may be left-closed,
right-closed, both, or neither, and contain overlapping intervals.
Parameters
----------
x : scalar, or array-like of scalars
Values in *x* should belong to the same domain as the intervals in *interval_array*.
May contain duplicates and be unsorted.
Returns
----------
:class:`pandas.IntervalIndex` or :class:`pandas.arrays.IntervalArray`
Return type will be the same type as the object the accessor belongs to.
Examples
-----------
>>> import pandas as pd
>>> import piso
>>> piso.register_accessors()
>>> arr = pd.arrays.IntervalArray.from_tuples(
... [(0, 4), (2, 5)],
... )
>>> arr.piso.split(3)
<IntervalArray>
[(0, 3], (3, 4], (2, 3], (3, 5]]
Length: 4, closed: right, dtype: interval[int64]
>>> arr.piso.split([3,3,3,3])
<IntervalArray>
[(0, 3], (3, 4], (2, 3], (3, 5]]
Length: 4, closed: right, dtype: interval[int64]
>>> arr = pd.IntervalIndex.from_tuples(
... [(0, 4), (2, 5)], closed="neither",
... )
>>> arr.piso.split([1, 6, 4])
IntervalIndex([(0.0, 1.0), (1.0, 4.0), (2.0, 4.0), (4.0, 5.0)],
closed='neither',
dtype='interval[float64]')
"""


adjacency_matrix_docstring = """
Returns a 2D array (or dataframe) of boolean values indicating edges between nodes in a graph.
The set of nodes correspond to intervals and the edges are defined by the relationship
defined by the *edges* parameter.
Note that the diagonal is defined with False values by default.
Parameters
----------
edges : {"intersect", "disjoint"}, default "intersect"
Defines the relationship that edges between nodes represent.
include_index : bool, default True
If True then a :class:`pandas.DataFrame`, indexed by the intervals, is returned.
If False then a :class:`numpy.ndarray` is returned.
Returns
-------
:class:`pandas.DataFrame` or :class:`numpy.ndarray`
Boolean valued, symmetrical, with False along diagonal.
Examples
---------
>>> import pandas as pd
>>> import piso
>>> piso.register_accessors()
>>> arr = pd.arrays.IntervalArray.from_tuples(
... [(0,4), (3,6), (5, 7), (8,9), (9,10)],
... closed="both",
... )
>>> arr.piso.adjacency_matrix()
[0, 4] [3, 6] [5, 7] [8, 9] [9, 10]
[0, 4] False True False False False
[3, 6] True False True False False
[5, 7] False True False False False
[8, 9] False False False False True
[9, 10] False False False True False
>>> arr.piso.adjacency_matrix(arr, include_index=False)
array([[False, True, False, False, False],
[ True, False, True, False, False],
[False, True, False, False, False],
[False, False, False, False, True],
[False, False, False, True, False]])
>>> arr.piso.adjacency_matrix(arr, edges="disjoint")
[0, 4] [3, 6] [5, 7] [8, 9] [9, 10]
[0, 4] False False True True True
[3, 6] False False False True True
[5, 7] True False False True True
[8, 9] True True True False False
[9, 10] True True True False False
"""
47 changes: 47 additions & 0 deletions piso/docstrings/intervalarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -791,3 +791,50 @@ def join_params(list_of_param_strings):
>>> piso.contains(pd.IntervalIndex.from_tuples([(0,2)]), 1, include_index=False)
array([[ True]])
"""

split_docstring = """
Given a set of intervals, and break points, splits the intervals into pieces wherever
the overlap a break point.
Parameters
----------
interval_array : :class:`pandas.IntervalIndex` or :class:`pandas.arrays.IntervalArray`
Contains the (possibly overlapping) intervals. May be left-closed, right-closed, both, or neither.
x : scalar, or array-like of scalars
Values in *x* should belong to the same domain as the intervals in *interval_array*.
May contain duplicates and be unsorted.
Returns
----------
:class:`pandas.IntervalIndex` or :class:`pandas.arrays.IntervalArray`
Return type will be the same type as *interval_array*
Examples
-----------
>>> import pandas as pd
>>> import piso
>>> arr = pd.arrays.IntervalArray.from_tuples(
... [(0, 4), (2, 5)],
... )
>>> piso.split(arr, 3)
<IntervalArray>
[(0, 3], (3, 4], (2, 3], (3, 5]]
Length: 4, closed: right, dtype: interval[int64]
>>> piso.split(arr, [3,3,3,3])
<IntervalArray>
[(0, 3], (3, 4], (2, 3], (3, 5]]
Length: 4, closed: right, dtype: interval[int64]
>>> arr = pd.IntervalIndex.from_tuples(
... [(0, 4), (2, 5)], closed="neither",
... )
>>> piso.split(arr, [1, 6, 4])
IntervalIndex([(0.0, 1.0), (1.0, 4.0), (2.0, 4.0), (4.0, 5.0)],
closed='neither',
dtype='interval[float64]')
"""
89 changes: 89 additions & 0 deletions piso/graph.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import numpy as np
import pandas as pd
from pandas.core.indexes import interval


def adjacency_matrix(interval_array, edges="intersect", include_index=True):
"""
Returns a 2D array (or dataframe) of boolean values indicating edges between nodes in a graph.
The set of nodes correspond to intervals and the edges are defined by the relationship
defined by the *edges* parameter.
Note that the diagonal is defined with False values by default.
Parameters
----------
interval_array : :class:`pandas.arrays.IntervalArray` or :class:`pandas.IntervalIndex`
Contains the intervals.
edges : {"intersect", "disjoint"}, default "intersect"
Defines the relationship that edges between nodes represent.
include_index : bool, default True
If True then a :class:`pandas.DataFrame`, indexed by the intervals, is returned.
If False then a :class:`numpy.ndarray` is returned.
Returns
-------
:class:`pandas.DataFrame` or :class:`numpy.ndarray`
Boolean valued, symmetrical, with False along diagonal.
Examples
---------
>>> import pandas as pd
>>> import piso
>>> arr = pd.arrays.IntervalArray.from_tuples(
... [(0,4), (3,6), (5, 7), (8,9), (9,10)],
... closed="both",
... )
>>> piso.adjacency_matrix(arr)
[0, 4] [3, 6] [5, 7] [8, 9] [9, 10]
[0, 4] False True False False False
[3, 6] True False True False False
[5, 7] False True False False False
[8, 9] False False False False True
[9, 10] False False False True False
>>> piso.adjacency_matrix(arr, include_index=False)
array([[False, True, False, False, False],
[ True, False, True, False, False],
[False, True, False, False, False],
[False, False, False, False, True],
[False, False, False, True, False]])
>>> piso.adjacency_matrix(arr, edges="disjoint")
[0, 4] [3, 6] [5, 7] [8, 9] [9, 10]
[0, 4] False False True True True
[3, 6] False False False True True
[5, 7] True False False True True
[8, 9] True True True False False
[9, 10] True True True False False
"""
if edges == "intersect":
result = _adj_mat_intersection(interval_array)
elif edges == "disjoint":
result = ~_adj_mat_intersection(interval_array, fill_diagonal=False)
else:
raise ValueError(f"Invalid value for edges parameter: {edges}")

if include_index:
result = pd.DataFrame(result, index=interval_array, columns=interval_array)

return result


def _adj_mat_intersection(interval_array, fill_diagonal=True):
result = np.greater.outer(
interval_array.right, interval_array.left
) & np.less.outer(interval_array.left, interval_array.right)
if interval_array.closed == "both":
result = (
result
| np.equal.outer(interval_array.right, interval_array.left)
| np.equal.outer(interval_array.left, interval_array.right)
)
if fill_diagonal:
np.fill_diagonal(result, False)
return result
Loading

0 comments on commit 43f44e4

Please sign in to comment.