Skip to content

Commit

Permalink
Merge branch 'master' into release
Browse files Browse the repository at this point in the history
  • Loading branch information
venaturum committed Oct 22, 2021
2 parents 31340fe + 512bb16 commit 1a40cb6
Show file tree
Hide file tree
Showing 11 changed files with 417 additions and 5 deletions.
4 changes: 3 additions & 1 deletion docs/reference/accessors.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,6 @@ Accessors
ArrayAccessor.symmetric_difference
ArrayAccessor.isdisjoint
ArrayAccessor.issuperset
ArrayAccessor.issubset
ArrayAccessor.issubset
ArrayAccessor.coverage
ArrayAccessor.complement
5 changes: 4 additions & 1 deletion docs/reference/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,7 @@ This page gives an overview of all public `piso` functionality. Classes and fun

package
accessors
interval
interval

.. automodule:: piso
:undoc-members:
4 changes: 3 additions & 1 deletion docs/reference/package.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,6 @@ Top level functions
symmetric_difference
isdisjoint
issuperset
issubset
issubset
coverage
complement
10 changes: 10 additions & 0 deletions docs/release_notes/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,16 @@ Release notes
========================


**v0.3.0 2021-10-23**

Added the following methods

- :meth:`piso.coverage`
- :meth:`piso.complement`
- :meth:`ArrayAccessor.coverage() <piso.accessor.ArrayAccessor.coverage>`
- :meth:`ArrayAccessor.complement() <piso.accessor.ArrayAccessor.complement>`


**v0.2.0 2021-10-15**

Added the following methods
Expand Down
2 changes: 2 additions & 0 deletions piso/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from piso.intervalarray import (
complement,
coverage,
difference,
intersection,
isdisjoint,
Expand Down
14 changes: 14 additions & 0 deletions piso/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,20 @@ def issubset(self, *interval_arrays, squeeze=False):
squeeze=squeeze,
)

@Appender(docstrings.coverage_docstring, join="\n", indents=1)
def coverage(self, domain=None):
return intervalarray.coverage(
self._interval_array,
domain,
)

@Appender(docstrings.complement_docstring, join="\n", indents=1)
def complement(self, domain=None):
return intervalarray.complement(
self._interval_array,
domain,
)


def _register_accessors():
_register_accessor("piso", pd.IntervalIndex)(ArrayAccessor)
Expand Down
103 changes: 103 additions & 0 deletions piso/docstrings/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -583,3 +583,106 @@ def join_params(list_of_param_strings):
params=issubset_params,
examples=issubset_examples,
)


coverage_docstring = """
Calculates the fraction of a domain covered by a collection of intervals.
The intervals are contained in the array object the accessor belongs to.
The (possibly overlapping) intervals may not, or partially, or wholly cover the domain.
Parameters
----------
domain : :py:class:`tuple`, :class:`pandas.Interval`, :class:`pandas.IntervalIndex` or :class:`pandas.arrays.IntervalArray`, optional
Specifies the domain over which to calculate the "coverage". If *domain* is `None`,
then the domain is considered to be the extremities of the intervals contained in the interval array the accessor belongs to.
If *domain* is a tuple then it should specify lower and upper bounds, and be equivalent to a
:class:`pandas.Interval`. If *domain* is a :class:`pandas.IntervalIndex` or :class:`pandas.arrays.IntervalArray`
then the intervals it contains define a possibly disconnected domain.
Returns
----------
float
a number between 0 and 1, representing the fraction of the domain covered.
Examples
-----------
>>> import pandas as pd
>>> import piso
>>> piso.register_accessors()
>>> arr1 = pd.arrays.IntervalArray.from_tuples(
... [(0, 4), (3, 5), (7, 8)],
... )
>>> arr1.piso.coverage()
0.75
>>> arr1.piso.coverage((0, 10))
0.6
>>> arr1.piso.coverage(pd.Interval(-10, 10))
0.3
>>> domain = pd.arrays.IntervalArray.from_tuples(
... [(4,6), (7, 9)],
... )
>>> arr1.piso.coverage(domain)
0.5
"""

complement_docstring = """
Calculates the complement of a collection of intervals (in an array) over some domain.
Equivalent to the set difference of the domain and the intervals in the array that the accessor
belongs to.
Parameters
----------
domain : :py:class:`tuple`, :class:`pandas.Interval`, :class:`pandas.IntervalIndex` or :class:`pandas.arrays.IntervalArray`, optional
Specifies the domain over which to calculate the "complement". If *domain* is `None`,
then the domain is considered to be the extremities of the intervals contained in the interval array
that the accessor belongs to. If *domain* is a tuple then it should specify lower and upper bounds, and be equivalent to a
:class:`pandas.Interval`. If *domain* is a :class:`pandas.IntervalIndex` or :class:`pandas.arrays.IntervalArray`
then the intervals it contains define a possibly disconnected domain.
Returns
----------
:class:`pandas.IntervalIndex` or :class:`pandas.arrays.IntervalArray`
The return type will be the same as the interval array object the accessor belongs to.
Examples
-----------
>>> import pandas as pd
>>> import piso
>>> arr1 = pd.arrays.IntervalArray.from_tuples(
... [(0, 4), (3, 5), (7, 8)],
... )
>>> arr1.piso.complement()
<IntervalArray>
[(5, 7]]
Length: 1, closed: right, dtype: interval[int64]
>>> arr1.piso.complement((-5, 10))
<IntervalArray>
[(-5, 0], (5, 7], (8, 10]]
Length: 3, closed: right, dtype: interval[int64]
>>> arr1.piso.complement(pd.Interval(-5, 6))
<IntervalArray>
[(-5, 0], (5, 6]]
Length: 2, closed: right, dtype: interval[int64]
>>> domain = pd.arrays.IntervalArray.from_tuples(
... [(-5,-2), (7,10)],
... )
>>> arr1.piso.complement(domain)
<IntervalArray>
[(-5, -2], (8, 10]]
Length: 2, closed: right, dtype: interval[int64]
"""
103 changes: 103 additions & 0 deletions piso/docstrings/intervalarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -590,3 +590,106 @@ def join_params(list_of_param_strings):
params=issubset_params,
examples=issubset_examples,
)


coverage_docstring = """
Calculates the fraction of a domain covered by a collection of intervals.
Parameters
----------
interval_array : :class:`pandas.IntervalIndex` or :class:`pandas.arrays.IntervalArray`
Contains the (possibly overlapping) intervals which partially, or wholly cover the domain.
domain : :py:class:`tuple`, :class:`pandas.Interval`, :class:`pandas.IntervalIndex` or :class:`pandas.arrays.IntervalArray`, optional
Specifies the domain over which to calculate the "coverage". If *domain* is `None`,
then the domain is considered to be the extremities of the intervals contained in *interval_array*
If *domain* is a tuple then it should specify lower and upper bounds, and be equivalent to a
:class:`pandas.Interval`. If *domain* is a :class:`pandas.IntervalIndex` or :class:`pandas.arrays.IntervalArray`
then the intervals it contains define a possibly disconnected domain.
Returns
----------
float
a number between 0 and 1, representing the fraction of the domain covered.
Examples
-----------
>>> import pandas as pd
>>> import piso
>>> arr1 = pd.arrays.IntervalArray.from_tuples(
... [(0, 4), (3, 5), (7, 8)],
... )
>>> piso.coverage(arr1)
0.75
>>> piso.coverage(arr1, (0, 10))
0.6
>>> piso.coverage(arr1, pd.Interval(-10, 10))
0.3
>>> domain = pd.arrays.IntervalArray.from_tuples(
... [(4,6), (7, 9)],
... )
>>> piso.coverage(arr1, domain)
0.5
"""


complement_docstring = """
Calculates the complement of a collection of intervals (in an array) over some domain.
Equivalent to the set difference of the domain and the intervals in the array.
Parameters
----------
interval_array : :class:`pandas.IntervalIndex` or :class:`pandas.arrays.IntervalArray`
Contains the (possibly overlapping) intervals.
domain : :py:class:`tuple`, :class:`pandas.Interval`, :class:`pandas.IntervalIndex` or :class:`pandas.arrays.IntervalArray`, optional
Specifies the domain over which to calculate the "complement". If *domain* is `None`,
then the domain is considered to be the extremities of the intervals contained in *interval_array*
If *domain* is a tuple then it should specify lower and upper bounds, and be equivalent to a
:class:`pandas.Interval`. If *domain* is a :class:`pandas.IntervalIndex` or :class:`pandas.arrays.IntervalArray`
then the intervals it contains define a possibly disconnected domain.
Returns
----------
:class:`pandas.IntervalIndex` or :class:`pandas.arrays.IntervalArray`
The return type will be the same as *interval_array*.
Examples
-----------
>>> import pandas as pd
>>> import piso
>>> arr1 = pd.arrays.IntervalArray.from_tuples(
... [(0, 4), (3, 5), (7, 8)],
... )
>>> piso.complement(arr1)
<IntervalArray>
[(5, 7]]
Length: 1, closed: right, dtype: interval[int64]
>>> piso.complement(arr1, (-5, 10))
<IntervalArray>
[(-5, 0], (5, 7], (8, 10]]
Length: 3, closed: right, dtype: interval[int64]
>>> piso.complement(arr1, pd.Interval(-5, 6))
<IntervalArray>
[(-5, 0], (5, 6]]
Length: 2, closed: right, dtype: interval[int64]
>>> domain = pd.arrays.IntervalArray.from_tuples(
... [(-5,-2), (7,10)],
... )
>>> piso.complement(arr1, domain)
<IntervalArray>
[(-5, -2], (8, 10]]
Length: 2, closed: right, dtype: interval[int64]
"""
45 changes: 44 additions & 1 deletion piso/intervalarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def intersection(
*interval_arrays,
min_overlaps="all",
squeeze=False,
return_type="infer"
return_type="infer",
):
_validate_array_of_intervals_arrays(interval_array, *interval_arrays)
klass = _get_return_type(interval_array, return_type)
Expand Down Expand Up @@ -150,3 +150,46 @@ def _comp(ia):

issuperset = _create_is_super_or_sub("superset", docstrings.issuperset_docstring)
issubset = _create_is_super_or_sub("subset", docstrings.issubset_docstring)


def _get_domain_tuple(interval_array, domain):
if domain is None and len(interval_array) > 0:
domain = (interval_array.left.min(), interval_array.right.max())
elif domain is None and len(interval_array) == 0:
domain = (0, 1) # dummy domain to ensure no failure
elif isinstance(domain, tuple):
if len(domain) != 2:
raise ValueError(
f"If domain parameter is tuple then it must have length 2. Supplied argument has length {len(domain)}."
)
elif isinstance(domain, pd.Interval):
domain = (domain.left, domain.right)
else:
raise ValueError(
"The domain parameter must be either a 2-tuple, pandas.Interval, or None."
)
return domain


@Appender(docstrings.coverage_docstring, join="\n", indents=1)
def coverage(interval_array, domain=None):
stepfunction = _interval_x_to_stairs(interval_array).make_boolean()
if isinstance(domain, (pd.IntervalIndex, pd.arrays.IntervalArray)):
domain = _interval_x_to_stairs(domain)
result = stepfunction.where(domain).mean()
else:
domain = _get_domain_tuple(interval_array, domain)
result = stepfunction.clip(*domain).mean()
return result


@Appender(docstrings.complement_docstring, join="\n", indents=1)
def complement(interval_array, domain=None):
stepfunction = _interval_x_to_stairs(interval_array).invert()
if isinstance(domain, (pd.IntervalIndex, pd.arrays.IntervalArray)):
domain = _interval_x_to_stairs(domain)
result = stepfunction.where(domain).fillna(0)
else:
domain = _get_domain_tuple(interval_array, domain)
result = stepfunction.clip(*domain).fillna(0)
return _boolean_stairs_to_interval_array(result, interval_array.__class__)
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ build-backend = "poetry.masonry.api"

[tool.poetry]
name = "piso"
version = "0.2.0"
version = "0.3.0"
description = "Pandas Interval Set Operations: methods for set operations for pandas' Interval, IntervalArray and IntervalIndex"
readme = "README.md"
authors = ["Riley Clement <[email protected]>"]
Expand Down
Loading

0 comments on commit 1a40cb6

Please sign in to comment.