diff --git a/lifelines/fitters/coxph_fitter.py b/lifelines/fitters/coxph_fitter.py index d6972a78e..c2ac8a63e 100644 --- a/lifelines/fitters/coxph_fitter.py +++ b/lifelines/fitters/coxph_fitter.py @@ -3222,8 +3222,9 @@ def predict_cumulative_hazard(self, df, times=None, conditional_after=None) -> p cumulative_hazard = pd.DataFrame() for stratum, stratified_X in df.groupby(self.strata): + print(self.params_) log_lambdas_ = anp.array( - [0] + [self.params_[self._strata_labeler(stratum, i)][0] for i in range(2, self.n_breakpoints + 2)] + [0] + [self.params_.loc[self._strata_labeler(stratum, i)].iloc[0] for i in range(2, self.n_breakpoints + 2)] ) lambdas_ = np.exp(log_lambdas_) @@ -3237,7 +3238,9 @@ def predict_cumulative_hazard(self, df, times=None, conditional_after=None) -> p return cumulative_hazard else: - log_lambdas_ = np.array([0] + [self.params_[param][0] for param in self._fitted_parameter_names if param != "beta_"]) + log_lambdas_ = np.array( + [0] + [self.params_.loc[param].iloc[0] for param in self._fitted_parameter_names if param != "beta_"] + ) lambdas_ = np.exp(log_lambdas_) Xs = self.regressors.transform_df(df) diff --git a/lifelines/fitters/npmle.py b/lifelines/fitters/npmle.py index 53bd25a52..abc57dd53 100644 --- a/lifelines/fitters/npmle.py +++ b/lifelines/fitters/npmle.py @@ -291,7 +291,7 @@ def reconstruct_survival_function( # First backfill at events between known observations # Second fill all events _outside_ known obs with running_sum - return full_dataframe.combine_first(df).bfill().fillna(running_sum).clip(lower=0.0) + return full_dataframe.combine_first(df).astype(float).bfill().fillna(running_sum).clip(lower=0.0) def npmle_compute_confidence_intervals(left, right, mle_, alpha=0.05, samples=1000): diff --git a/lifelines/fitters/piecewise_exponential_regression_fitter.py b/lifelines/fitters/piecewise_exponential_regression_fitter.py index 6643bed70..77b282931 100644 --- a/lifelines/fitters/piecewise_exponential_regression_fitter.py +++ b/lifelines/fitters/piecewise_exponential_regression_fitter.py @@ -66,7 +66,7 @@ def _add_penalty(self, params, neg_ll): coef_penalty = 0 if self.penalizer > 0: for i in range(params_stacked.shape[1]): - if not self._cols_to_not_penalize[i]: + if not self._cols_to_not_penalize.iloc[i]: coef_penalty = coef_penalty + (params_stacked[:, i]).var() return neg_ll + self.penalizer * coef_penalty diff --git a/lifelines/generate_datasets.py b/lifelines/generate_datasets.py index 7694b6444..4f92e77a9 100644 --- a/lifelines/generate_datasets.py +++ b/lifelines/generate_datasets.py @@ -5,7 +5,7 @@ from scipy import stats from scipy.optimize import newton -from scipy.integrate import cumtrapz +from scipy.integrate import cumulative_trapezoid random = np.random @@ -308,7 +308,7 @@ def cumulative_integral(fx, x): fx: (n,d) numpy array, what you want to integral of x: (n,) numpy array, location to integrate over. """ - return cumtrapz(fx.T, x, initial=0).T + return cumulative_trapezoid(fx.T, x, initial=0).T def construct_survival_curves(hazard_rates, timelines): diff --git a/lifelines/tests/test_estimation.py b/lifelines/tests/test_estimation.py index b7c9c2969..7f4e510e5 100644 --- a/lifelines/tests/test_estimation.py +++ b/lifelines/tests/test_estimation.py @@ -490,6 +490,7 @@ def test_cumulative_density_ci_is_ordered_correctly(self, positive_sample_lifeti if not hasattr(fitter, "confidence_interval_cumulative_density_"): continue lower, upper = f"{fitter.label}_lower_0.95", f"{fitter.label}_upper_0.95" + print(fitter.confidence_interval_cumulative_density_) assert np.all( (fitter.confidence_interval_cumulative_density_[upper] - fitter.confidence_interval_cumulative_density_[lower]) >= 0 @@ -2008,7 +2009,7 @@ def test_joblib_serialization(self, rossi, regression_models): def test_fit_will_accept_object_dtype_as_event_col(self, regression_models_sans_strata_model, rossi): # issue #638 rossi["arrest"] = rossi["arrest"].astype(object) - rossi["arrest"].iloc[0] = None + rossi.loc[0, "arrest"] = None assert rossi["arrest"].dtype == object rossi = rossi.dropna() diff --git a/lifelines/tests/utils/test_utils.py b/lifelines/tests/utils/test_utils.py index cab3ca0a6..743793707 100644 --- a/lifelines/tests/utils/test_utils.py +++ b/lifelines/tests/utils/test_utils.py @@ -347,7 +347,7 @@ def test_survival_table_from_events_at_risk_column(): 1.0, ] df = utils.survival_table_from_events(df["T"], df["E"]) - assert list(df["at_risk"][1:]) == expected # skip the first event as that is the birth time, 0. + assert list(df["at_risk"].loc[1:]) == expected # skip the first event as that is the birth time, 0. def test_survival_table_to_events_casts_to_float(): diff --git a/lifelines/utils/__init__.py b/lifelines/utils/__init__.py index 9b1767f31..3723a9355 100644 --- a/lifelines/utils/__init__.py +++ b/lifelines/utils/__init__.py @@ -556,7 +556,7 @@ def _group_event_table_by_intervals(event_table, intervals) -> pd.DataFrame: intervals = np.arange(0, event_max + bin_width, bin_width) - event_table = event_table.groupby(pd.cut(event_table["event_at"], intervals, include_lowest=True)).agg( + event_table = event_table.groupby(pd.cut(event_table["event_at"], intervals, include_lowest=True), observed=False).agg( {"removed": ["sum"], "observed": ["sum"], "censored": ["sum"], "at_risk": ["max"]} ) # convert columns from multiindex @@ -648,7 +648,7 @@ def datetimes_to_durations( the units of time to use. See Pandas 'freq'. Default 'D' for days. dayfirst: bool, optional (default=False) see Pandas `to_datetime` - na_values : list, optional + na_values : list[str], optional list of values to recognize as NA/NaN. Ex: ['', 'NaT'] format: see Pandas `to_datetime` @@ -679,7 +679,7 @@ def datetimes_to_durations( start_times = pd.Series(start_times).copy() end_times = pd.Series(end_times).copy() - C = ~(pd.isnull(end_times).values | end_times.isin(na_values or [""])) + C = ~(pd.isnull(end_times).values | end_times.astype(str).isin(na_values or [""])) end_times[~C] = fill_date_ start_times_ = pd.to_datetime(start_times, dayfirst=dayfirst, format=format) end_times_ = pd.to_datetime(end_times, dayfirst=dayfirst, errors="coerce", format=format) @@ -1464,7 +1464,7 @@ def expand(df, cvs): cv = cv.sort_values([id_col, duration_col]) cvs = cv.pipe(remove_redundant_rows).pipe(transform_cv_to_long_format).groupby(id_col, sort=True) - long_form_df = long_form_df.groupby(id_col, group_keys=False, sort=True).apply(expand, cvs=cvs) + long_form_df = long_form_df.groupby(id_col, group_keys=False, sort=True)[long_form_df.columns].apply(expand, cvs=cvs) return long_form_df.reset_index(drop=True) @@ -1506,7 +1506,7 @@ def covariates_from_event_matrix(df, id_col) -> pd.DataFrame: """ df = df.set_index(id_col) df = df.fillna(np.inf) - df = df.stack(dropna=False).reset_index() + df = df.stack(future_stack=True).reset_index() df.columns = [id_col, "event", "duration"] df["_counter"] = 1 return ( diff --git a/lifelines/version.py b/lifelines/version.py index ca9e20db6..112869107 100644 --- a/lifelines/version.py +++ b/lifelines/version.py @@ -1,4 +1,4 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals -__version__ = "0.28.0" +__version__ = "0.29.0" diff --git a/reqs/base-requirements.txt b/reqs/base-requirements.txt index 816cf6453..862b2ca0a 100644 --- a/reqs/base-requirements.txt +++ b/reqs/base-requirements.txt @@ -1,6 +1,6 @@ numpy>=1.14.0,<2.0 -scipy>=1.2.0 -pandas>=1.2.0 +scipy>=1.7.0 +pandas>=2.1 matplotlib>=3.0 autograd>=1.5 autograd-gamma>=0.3