Skip to content

Commit

Permalink
bump to 0.29.0; bump scipy and pandas dependencies; numpy is still be…
Browse files Browse the repository at this point in the history
…low 2.0, but that is next
  • Loading branch information
CamDavidsonPilon committed Jun 26, 2024
1 parent fb5ad90 commit 56ff1fa
Show file tree
Hide file tree
Showing 9 changed files with 20 additions and 16 deletions.
7 changes: 5 additions & 2 deletions lifelines/fitters/coxph_fitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -3222,8 +3222,9 @@ def predict_cumulative_hazard(self, df, times=None, conditional_after=None) -> p
cumulative_hazard = pd.DataFrame()

for stratum, stratified_X in df.groupby(self.strata):
print(self.params_)
log_lambdas_ = anp.array(
[0] + [self.params_[self._strata_labeler(stratum, i)][0] for i in range(2, self.n_breakpoints + 2)]
[0] + [self.params_.loc[self._strata_labeler(stratum, i)].iloc[0] for i in range(2, self.n_breakpoints + 2)]
)
lambdas_ = np.exp(log_lambdas_)

Expand All @@ -3237,7 +3238,9 @@ def predict_cumulative_hazard(self, df, times=None, conditional_after=None) -> p
return cumulative_hazard

else:
log_lambdas_ = np.array([0] + [self.params_[param][0] for param in self._fitted_parameter_names if param != "beta_"])
log_lambdas_ = np.array(
[0] + [self.params_.loc[param].iloc[0] for param in self._fitted_parameter_names if param != "beta_"]
)
lambdas_ = np.exp(log_lambdas_)

Xs = self.regressors.transform_df(df)
Expand Down
2 changes: 1 addition & 1 deletion lifelines/fitters/npmle.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@ def reconstruct_survival_function(

# First backfill at events between known observations
# Second fill all events _outside_ known obs with running_sum
return full_dataframe.combine_first(df).bfill().fillna(running_sum).clip(lower=0.0)
return full_dataframe.combine_first(df).astype(float).bfill().fillna(running_sum).clip(lower=0.0)


def npmle_compute_confidence_intervals(left, right, mle_, alpha=0.05, samples=1000):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def _add_penalty(self, params, neg_ll):
coef_penalty = 0
if self.penalizer > 0:
for i in range(params_stacked.shape[1]):
if not self._cols_to_not_penalize[i]:
if not self._cols_to_not_penalize.iloc[i]:
coef_penalty = coef_penalty + (params_stacked[:, i]).var()

return neg_ll + self.penalizer * coef_penalty
Expand Down
4 changes: 2 additions & 2 deletions lifelines/generate_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from scipy import stats
from scipy.optimize import newton
from scipy.integrate import cumtrapz
from scipy.integrate import cumulative_trapezoid

random = np.random

Expand Down Expand Up @@ -308,7 +308,7 @@ def cumulative_integral(fx, x):
fx: (n,d) numpy array, what you want to integral of
x: (n,) numpy array, location to integrate over.
"""
return cumtrapz(fx.T, x, initial=0).T
return cumulative_trapezoid(fx.T, x, initial=0).T


def construct_survival_curves(hazard_rates, timelines):
Expand Down
3 changes: 2 additions & 1 deletion lifelines/tests/test_estimation.py
Original file line number Diff line number Diff line change
Expand Up @@ -490,6 +490,7 @@ def test_cumulative_density_ci_is_ordered_correctly(self, positive_sample_lifeti
if not hasattr(fitter, "confidence_interval_cumulative_density_"):
continue
lower, upper = f"{fitter.label}_lower_0.95", f"{fitter.label}_upper_0.95"
print(fitter.confidence_interval_cumulative_density_)
assert np.all(
(fitter.confidence_interval_cumulative_density_[upper] - fitter.confidence_interval_cumulative_density_[lower])
>= 0
Expand Down Expand Up @@ -2008,7 +2009,7 @@ def test_joblib_serialization(self, rossi, regression_models):
def test_fit_will_accept_object_dtype_as_event_col(self, regression_models_sans_strata_model, rossi):
# issue #638
rossi["arrest"] = rossi["arrest"].astype(object)
rossi["arrest"].iloc[0] = None
rossi.loc[0, "arrest"] = None

assert rossi["arrest"].dtype == object
rossi = rossi.dropna()
Expand Down
2 changes: 1 addition & 1 deletion lifelines/tests/utils/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@ def test_survival_table_from_events_at_risk_column():
1.0,
]
df = utils.survival_table_from_events(df["T"], df["E"])
assert list(df["at_risk"][1:]) == expected # skip the first event as that is the birth time, 0.
assert list(df["at_risk"].loc[1:]) == expected # skip the first event as that is the birth time, 0.


def test_survival_table_to_events_casts_to_float():
Expand Down
10 changes: 5 additions & 5 deletions lifelines/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -556,7 +556,7 @@ def _group_event_table_by_intervals(event_table, intervals) -> pd.DataFrame:

intervals = np.arange(0, event_max + bin_width, bin_width)

event_table = event_table.groupby(pd.cut(event_table["event_at"], intervals, include_lowest=True)).agg(
event_table = event_table.groupby(pd.cut(event_table["event_at"], intervals, include_lowest=True), observed=False).agg(
{"removed": ["sum"], "observed": ["sum"], "censored": ["sum"], "at_risk": ["max"]}
)
# convert columns from multiindex
Expand Down Expand Up @@ -648,7 +648,7 @@ def datetimes_to_durations(
the units of time to use. See Pandas 'freq'. Default 'D' for days.
dayfirst: bool, optional (default=False)
see Pandas `to_datetime`
na_values : list, optional
na_values : list[str], optional
list of values to recognize as NA/NaN. Ex: ['', 'NaT']
format:
see Pandas `to_datetime`
Expand Down Expand Up @@ -679,7 +679,7 @@ def datetimes_to_durations(
start_times = pd.Series(start_times).copy()
end_times = pd.Series(end_times).copy()

C = ~(pd.isnull(end_times).values | end_times.isin(na_values or [""]))
C = ~(pd.isnull(end_times).values | end_times.astype(str).isin(na_values or [""]))
end_times[~C] = fill_date_
start_times_ = pd.to_datetime(start_times, dayfirst=dayfirst, format=format)
end_times_ = pd.to_datetime(end_times, dayfirst=dayfirst, errors="coerce", format=format)
Expand Down Expand Up @@ -1464,7 +1464,7 @@ def expand(df, cvs):
cv = cv.sort_values([id_col, duration_col])
cvs = cv.pipe(remove_redundant_rows).pipe(transform_cv_to_long_format).groupby(id_col, sort=True)

long_form_df = long_form_df.groupby(id_col, group_keys=False, sort=True).apply(expand, cvs=cvs)
long_form_df = long_form_df.groupby(id_col, group_keys=False, sort=True)[long_form_df.columns].apply(expand, cvs=cvs)
return long_form_df.reset_index(drop=True)


Expand Down Expand Up @@ -1506,7 +1506,7 @@ def covariates_from_event_matrix(df, id_col) -> pd.DataFrame:
"""
df = df.set_index(id_col)
df = df.fillna(np.inf)
df = df.stack(dropna=False).reset_index()
df = df.stack(future_stack=True).reset_index()
df.columns = [id_col, "event", "duration"]
df["_counter"] = 1
return (
Expand Down
2 changes: 1 addition & 1 deletion lifelines/version.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals

__version__ = "0.28.0"
__version__ = "0.29.0"
4 changes: 2 additions & 2 deletions reqs/base-requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
numpy>=1.14.0,<2.0
scipy>=1.2.0
pandas>=1.2.0
scipy>=1.7.0
pandas>=2.1

This comment has been minimized.

Copy link
@enryH

enryH Jul 1, 2024

Is there a reason why 2.1 and not 2 ?

This comment has been minimized.

Copy link
@CamDavidsonPilon

CamDavidsonPilon Jul 1, 2024

Author Owner

Yea, it was the future_stack=True in pandas 2.1

(which btw, feels like the strangest way to introduce forward compatibly)

This comment has been minimized.

Copy link
@enryH

enryH Jul 2, 2024

I see. I agree with the change update in 2.1. (https://pandas.pydata.org/docs/whatsnew/v2.1.0.html#new-implementation-of-dataframe-stack). I use stack myself quite a bit, but the default behaviour did not change? So did you use it to have Series including NAs which were present in the original DataFrame? I would just be interested to see the use case for inspiration.

fyi: I noticed your lower limit as it forced colab to install a newer version of pandas when I installed a package with lifelines as a dependency.

matplotlib>=3.0
autograd>=1.5
autograd-gamma>=0.3
Expand Down

0 comments on commit 56ff1fa

Please sign in to comment.