Skip to content

Commit 0bc188a

Browse files
authored
Merge pull request #287 from mkopec87/feature/update-pandas
Add pandas and numpy >= 2 compatibility
2 parents ac79d21 + 150710a commit 0bc188a

File tree

15 files changed

+117
-62
lines changed

15 files changed

+117
-62
lines changed

.github/workflows/build.yml

Lines changed: 23 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ jobs:
1010
strategy:
1111
matrix:
1212
os: [ubuntu-latest]
13-
python: ['3.8', '3.9', '3.10', '3.11']
13+
python: ['3.9', '3.10', '3.11', '3.12']
1414
runs-on: ${{ matrix.os }}
1515

1616
steps:
@@ -35,21 +35,23 @@ jobs:
3535
- name: Test with pytest
3636
run: |
3737
pytest -m "not spark"
38-
38+
3939
test_spark:
4040
strategy:
4141
matrix:
4242
include:
43-
# - SPARK_VERSION: "2.4.8"
44-
# HADOOP_VERSION: "2.7"
45-
# JAVA_VERSION: "8"
46-
# python: "3.7"
47-
# os: ubuntu-latest
4843
- SPARK_VERSION: "3.3.2"
4944
HADOOP_VERSION: "3"
5045
JAVA_VERSION: "11"
51-
python: "3.8"
46+
python: "3.9"
5247
os: ubuntu-latest
48+
dependency_constraints: '"pandas<2" "numpy<2"'
49+
- SPARK_VERSION: "3.5.4"
50+
HADOOP_VERSION: "3"
51+
JAVA_VERSION: "11"
52+
python: "3.12"
53+
os: ubuntu-latest
54+
dependency_constraints: '"pandas>=2" "numpy>=2"'
5355
runs-on: ${{ matrix.os }}
5456
name: ${{ matrix.os }}, Spark ${{ matrix.SPARK_VERSION}}, Python ${{ matrix.python }}
5557

@@ -67,10 +69,9 @@ jobs:
6769
/home/runner/work/spark.tgz
6870
~/.cache/pip
6971
key: ${{ runner.os }}-spark-${{ matrix.SPARK_VERSION }}-hadoop${{ matrix.HADOOP_VERSION }}-java${{ matrix.JAVA_VERSION }}-${{ hashFiles('**/pyproject.toml') }}
70-
- name: Install dependencies
72+
- name: Install pip and setuptools
7173
run: |
7274
python -m pip install --upgrade pip setuptools
73-
pip install -e .[test]
7475
- name: Download spark
7576
if: steps.cache-spark.outputs.cache-hit != 'true'
7677
env:
@@ -93,6 +94,12 @@ jobs:
9394
# https://github.com/python-poetry/poetry/issues/6792
9495
pip3 install "pypandoc<1.8"
9596
pip install "pyspark==${SPARK_VERSION}"
97+
- name: Install Spark-related dependency versions
98+
run: |
99+
pip install ${{ matrix.dependency_constraints }}
100+
- name: Install project dependencies
101+
run: |
102+
pip install -e .[test]
96103
- name: Test with pytest (spark-specific)
97104
env:
98105
BUILD_DIR: "/home/runner/work/" #${{ github.workspace }}
@@ -106,15 +113,15 @@ jobs:
106113
107114
examples:
108115
runs-on: ubuntu-latest
109-
needs:
116+
needs:
110117
- test
111118
- test_spark
112119
steps:
113120
- uses: actions/checkout@v2
114-
- name: Set up Python 3.8
121+
- name: Set up Python 3.9
115122
uses: actions/setup-python@v1
116123
with:
117-
python-version: 3.8
124+
python-version: 3.9
118125
- name: Install dependencies
119126
run: |
120127
python -m pip install --upgrade pip
@@ -130,17 +137,17 @@ jobs:
130137
python flight_delays.py
131138
cd synthetic_data_streams
132139
python hyperplane.py
133-
- uses: actions/upload-artifact@v2
140+
- uses: actions/upload-artifact@v4
134141
with:
135142
name: synthetic-report
136143
path: examples/test_data_report.html
137144
if-no-files-found: error
138-
- uses: actions/upload-artifact@v2
145+
- uses: actions/upload-artifact@v4
139146
with:
140147
name: flight-delays-report
141148
path: examples/flight_delays_report.html
142149
if-no-files-found: error
143-
- uses: actions/upload-artifact@v2
150+
- uses: actions/upload-artifact@v4
144151
with:
145152
name: hyperplane-1-report
146153
path: examples/synthetic_data_streams/reports/hyperplane_1.html

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,3 +147,5 @@ docs/build
147147
# Developer's playground
148148
/playground/
149149
.ruff_cache/
150+
151+
notebooks/report.html

docs/source/developing.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@ For this you'll need to install our test requirements:
2727
.. code-block:: bash
2828
2929
cd popmon/
30-
pip install -r requirements-test.txt
31-
python setup.py test
30+
pip install -r .[test]
31+
pytest
3232
3333
That's it!
3434

popmon/analysis/functions.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ def expanding_mean(df, shift: int = 1):
8383
:param int shift: size of shift. default is 1.
8484
:return: df with expanding means of columns
8585
"""
86-
return df.shift(shift).expanding().mean()
86+
return df.shift(shift).expanding().mean(numeric_only=True)
8787

8888

8989
def expanding_std(df, shift: int = 1):
@@ -95,7 +95,7 @@ def expanding_std(df, shift: int = 1):
9595
:param int shift: size of shift. default is 1.
9696
:return: df with expanding std of columns
9797
"""
98-
return df.shift(shift).expanding().std()
98+
return df.shift(shift).expanding().std(numeric_only=True)
9999

100100

101101
def expanding_apply(df, func, shift: int = 1, *args, **kwargs):
@@ -123,7 +123,7 @@ def rolling_std(df, window, shift: int = 1):
123123
:param int window: size of rolling window.
124124
:return: df with rolling std of columns
125125
"""
126-
return df.shift(shift).rolling(window).std()
126+
return df.shift(shift).rolling(window).std(numeric_only=True)
127127

128128

129129
def rolling_mean(df, window, shift: int = 1):
@@ -136,7 +136,7 @@ def rolling_mean(df, window, shift: int = 1):
136136
:param int window: size of rolling window.
137137
:return: df with rolling mean of columns
138138
"""
139-
return df.shift(shift).rolling(window).mean()
139+
return df.shift(shift).rolling(window).mean(numeric_only=True)
140140

141141

142142
def rolling_apply(df, window, func, shift: int = 1, *args, **kwargs):

popmon/analysis/profiling/profiles.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -186,9 +186,7 @@ def replace(bl):
186186
if len(bin_labels) == 0 or len(bin_labels) > 4 or np.sum(bin_entries) == 0:
187187
return np.nan
188188
if not np.all([isinstance(bl, (bool, np.bool_)) for bl in bin_labels]):
189-
if not np.all(
190-
[isinstance(bl, (str, np.str_, np.string_)) for bl in bin_labels]
191-
):
189+
if not np.all([isinstance(bl, (str, np.str_, np.bytes_)) for bl in bin_labels]):
192190
return np.nan
193191
# all strings from hereon
194192
n_true = (bin_labels == "True").sum() + (bin_labels == "true").sum()

popmon/analysis/profiling/pull_calculator.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
1717
# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
1818
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19-
19+
from functools import partial
2020

2121
import numpy as np
2222
import pandas as pd
@@ -233,8 +233,8 @@ def __init__(
233233
:param kwargs: (dict, optional): residual kwargs passed on to mean and std functions
234234
"""
235235
super().__init__(
236-
np.mean,
237-
np.std,
236+
partial(pd.DataFrame.mean, numeric_only=True),
237+
partial(pd.DataFrame.std, numeric_only=True, ddof=0),
238238
reference_key,
239239
assign_to_key,
240240
store_key,

popmon/notebooks/__init__.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Copyright (c) 2023 ING Analytics Wholesale Banking
2+
#
3+
# Permission is hereby granted, free of charge, to any person obtaining a copy of
4+
# this software and associated documentation files (the "Software"), to deal in
5+
# the Software without restriction, including without limitation the rights to
6+
# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
7+
# the Software, and to permit persons to whom the Software is furnished to do so,
8+
# subject to the following conditions:
9+
#
10+
# The above copyright notice and this permission notice shall be included in all
11+
# copies or substantial portions of the Software.
12+
#
13+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
15+
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
16+
# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
17+
# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
18+
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

popmon/resources.py

Lines changed: 10 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -20,37 +20,30 @@
2020

2121
# Resources lookup file for popmon
2222
import json
23-
import pathlib
23+
from importlib import resources
2424

2525
from jinja2 import Environment, FileSystemLoader
26-
from pkg_resources import resource_filename
2726

28-
import popmon
27+
from popmon import notebooks, test_data, visualization
2928

3029
# data files that are shipped with popmon.
31-
_DATA = {
32-
_.name: _
33-
for _ in pathlib.Path(resource_filename(popmon.__name__, "test_data")).glob("*")
34-
}
30+
_DATA = {_.name: _ for _ in resources.files(test_data).iterdir()}
3531

3632
# Tutorial notebooks
3733
_NOTEBOOK = {
38-
_.name: _
39-
for _ in pathlib.Path(resource_filename(popmon.__name__, "notebooks")).glob(
40-
"*.ipynb"
41-
)
34+
p.name: p for p in resources.files(notebooks).iterdir() if p.suffix == ".ipynb"
4235
}
4336

4437
# Resource types
4538
_RESOURCES = {"data": _DATA, "notebook": _NOTEBOOK}
4639

4740
# Environment for visualization templates' directory
48-
_TEMPLATES_ENV = Environment(
49-
loader=FileSystemLoader(
50-
resource_filename(popmon.__name__, "visualization/templates")
51-
),
52-
autoescape=True,
53-
)
41+
ref = resources.files(visualization) / "templates"
42+
with resources.as_file(ref) as templates_dir_path:
43+
_TEMPLATES_ENV = Environment(
44+
loader=FileSystemLoader(templates_dir_path),
45+
autoescape=True,
46+
)
5447
_TEMPLATES_ENV.filters["fmt_metric"] = lambda x: x.replace("_", " ")
5548

5649

popmon/test_data/__init__.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Copyright (c) 2023 ING Analytics Wholesale Banking
2+
#
3+
# Permission is hereby granted, free of charge, to any person obtaining a copy of
4+
# this software and associated documentation files (the "Software"), to deal in
5+
# the Software without restriction, including without limitation the rights to
6+
# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
7+
# the Software, and to permit persons to whom the Software is furnished to do so,
8+
# subject to the following conditions:
9+
#
10+
# The above copyright notice and this permission notice shall be included in all
11+
# copies or substantial portions of the Software.
12+
#
13+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
15+
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
16+
# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
17+
# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
18+
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

pyproject.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,14 @@ keywords = [
1717
"ipython"
1818
]
1919
readme = "README.rst"
20-
requires-python = ">=3.7"
20+
requires-python = ">=3.9"
2121
authors = [{name = "ING Analytics Wholesale Banking", email = "[email protected]"}]
2222
license = {type = "MIT", file = "LICENSE"}
2323
dependencies = [
2424
"numpy>=1.18.0",
25-
"pandas>=0.25.1,<2",
25+
"pandas>=0.25.1",
2626
"scipy>=1.5.2",
27-
"histogrammar>=1.0.32",
27+
"histogrammar>=1.0.34",
2828
"phik",
2929
"jinja2",
3030
"tqdm",

0 commit comments

Comments
 (0)