diff --git a/insolver/pricing/__init__.py b/insolver/pricing/__init__.py new file mode 100644 index 0000000..4126e5e --- /dev/null +++ b/insolver/pricing/__init__.py @@ -0,0 +1 @@ +from .pricing import dynamic_price diff --git a/insolver/pricing/evaluation.py b/insolver/pricing/evaluation.py new file mode 100644 index 0000000..3aeab84 --- /dev/null +++ b/insolver/pricing/evaluation.py @@ -0,0 +1,42 @@ +import numpy as np +from pandas import DataFrame, Series +from typing import Union, Any, Iterable + + +def price_eval( + x: DataFrame, + model: Any, + feature_names: Iterable, + burning_cost_pct: Union[float, int] = 0.8, + threshold: Union[float, int] = 0.5, +) -> DataFrame: + if not (hasattr(model, 'predict_proba') and callable(model.predict_proba)): + raise ValueError("Model has no predict_proba() method.") + prices = x.to_numpy()[:, 0] + old_price = x['orig_premium'].to_numpy() + pred = model.predict_proba(x[feature_names])[:, 1] + price_name = x.columns[0] + x_orig = x.copy().drop(price_name, axis=1).rename({'orig_premium': price_name}, axis=1) + pred_orig = model.predict_proba(x_orig[feature_names])[:, 1] + + profit = pred * prices * (1 - burning_cost_pct * old_price / prices) + profit_orig = pred_orig * old_price * (1 - burning_cost_pct) + + act_profit = profit * (pred >= threshold) + act_profit_orig = profit_orig * (pred_orig >= threshold) + + df = DataFrame(np.dstack((prices, old_price, pred, pred_orig, profit, profit_orig, act_profit, act_profit_orig))[0]) + df.columns = ['price', 'orig_price', 'pred', 'orig_pred', 'profit', 'profit_orig', 'act_profit', 'act_profit_orig'] + return df + + +def eval_candidate( + df: DataFrame, + model: Any, + feature_names: Iterable, + burning_cost_pct: Union[float, int] = 0.8, + threshold: Union[float, int] = 0.5, +) -> Series: + return df.apply( + lambda x: price_eval(x, model, feature_names, burning_cost_pct=burning_cost_pct, threshold=threshold) + ) diff --git a/insolver/pricing/generation.py b/insolver/pricing/generation.py new file mode 100644 index 0000000..7f328e5 --- /dev/null +++ b/insolver/pricing/generation.py @@ -0,0 +1,95 @@ +import numpy as np +from pandas import DataFrame, Series +from numba import jit +from typing import Union + + +@jit(cache=True, nopython=True) +def gen_prices( + price: Union[int, float], lower_bound: Union[int, float], upper_upper: Union[int, float], step: float +) -> np.ndarray: + percents = np.arange(lower_bound, upper_upper, step) + prices = price * percents + return prices + + +@jit(cache=True, nopython=True) +def filter_candidates( + prices: np.ndarray, + minimum: Union[None, int, float] = None, + maximum: Union[None, int, float] = None, + frac_min: Union[int, float] = 1, + frac_max: Union[int, float] = 1, +) -> np.ndarray: + min_ = np.min(prices) if not minimum else minimum + max_ = np.max(prices) if not maximum else maximum + bound_filtered = prices[(prices >= frac_min * min_) & (prices <= frac_max * max_)] + if len(bound_filtered) == 0: + raise ValueError('Filters is too restrictive: no candidates left.') + else: + return bound_filtered + + +def gen_potential_prices( + entity: Series, + price_name: str, + lower_bound: Union[float, int] = 0.25, + upper_upper: Union[float, int] = 2.05, + step: Union[float, int] = 0.05, + decimals: int = 2, + filter_minimum: Union[None, str, int, float] = None, + filter_maximum: Union[None, str, int, float] = None, + filter_frac_min: Union[int, float] = 1, + filter_frac_max: Union[int, float] = 1, + dtypes: Union[None, dict] = None, +) -> DataFrame: + prices = gen_prices(entity[price_name], lower_bound, upper_upper, step) + if filter_minimum or filter_maximum: + if isinstance(filter_minimum, str): + filter_minimum = entity[filter_minimum] + if isinstance(filter_maximum, str): + filter_maximum = entity[filter_maximum] + prices_filtered = filter_candidates( + prices, minimum=filter_minimum, maximum=filter_maximum, frac_min=filter_frac_min, frac_max=filter_frac_max + ) + else: + prices_filtered = prices + c_df = np.column_stack((np.round(prices_filtered, decimals), np.vstack([entity.to_numpy()] * len(prices_filtered)))) + df_c = DataFrame(c_df, columns=[price_name, *[x if x != price_name else 'orig_premium' for x in entity.index]]) + if dtypes: + df_c['orig_premium'] = df_c['orig_premium'].astype(dtypes[price_name]) + df_c = df_c.astype(dtypes) + return df_c + + +def candidates( + df: DataFrame, + price_name: str, + lower_bound: Union[float, int] = 0.25, + upper_upper: Union[float, int] = 2.05, + step: Union[float, int] = 0.05, + decimals: int = 2, + filter_minimum: Union[None, str, int, float] = None, + filter_maximum: Union[None, str, int, float] = None, + filter_frac_min: Union[int, float] = 1.0, + filter_frac_max: Union[int, float] = 1.0, +) -> Series: + if (df[price_name].isnull().sum() == 0) & ((df[price_name] == 0).sum() == 0): + return df.apply( + lambda x: gen_potential_prices( + entity=x, + price_name=price_name, + lower_bound=lower_bound, + upper_upper=upper_upper, + step=step, + decimals=decimals, + filter_minimum=filter_minimum, + filter_maximum=filter_maximum, + filter_frac_min=filter_frac_min, + filter_frac_max=filter_frac_max, + dtypes=df.dtypes.to_dict(), + ), + axis=1, + ) + else: + raise ValueError(f"Dataframe contains empty or zero values in price column '{price_name}'!") diff --git a/insolver/pricing/optimization.py b/insolver/pricing/optimization.py new file mode 100644 index 0000000..dd19942 --- /dev/null +++ b/insolver/pricing/optimization.py @@ -0,0 +1,46 @@ +import numpy as np +from pandas import DataFrame, concat +from numba import jit +from typing import Union, Literal + + +@jit(cache=True, nopython=True) +def max_profit(x: np.ndarray) -> np.ndarray: + max_profit_ = x[x[:, -2] == x[:, -2].max()] # [:,-2] for profit + if max_profit_.shape[0] > 1: + max_profit_ = max_profit_[max_profit_[:, 2] == max_profit_[:, 2].max()] # [:,2] for pred + if max_profit_.shape[0] > 1: + max_profit_ = max_profit_[max_profit_[:, 0] == max_profit_[:, 0].min()] # [:,0] for price + return max_profit_ + + +@jit(cache=True, nopython=True) +def max_conversion(x: np.ndarray, threshold: Union[float, int] = 0.5) -> np.ndarray: + orig = x[np.abs(x[:, 0] - x[:, 1].max()) < 0.01] # [:, 0] for price, [:, 1] for orig_price + if orig.shape[0] == 0: + orig = max_profit(x) + if x[:, 3].min() >= 0.5: # [:, 3] for orig_pred + choice = orig + else: + converted = x[x[:, 2] >= threshold] + if converted.shape[0] == 0: + choice = orig + else: + choice = max_profit(converted) + return choice + + +def maximize( + df: DataFrame, method: Literal['profit', 'conversion'] = 'profit', threshold: Union[float, int] = 0.5 +) -> DataFrame: + if method == 'profit': + res = df.apply(lambda x: DataFrame(max_profit(x.to_numpy(dtype=float)), columns=x.columns)) + elif method == 'conversion': + res = df.apply( + lambda x: DataFrame(max_conversion(x.to_numpy(dtype=float), threshold=threshold), columns=x.columns) + ) + else: + raise ValueError('method should be one of ["profit", "conversion"]') + result = concat(res.to_numpy()) + result.index = df.index + return result diff --git a/insolver/pricing/pricing.py b/insolver/pricing/pricing.py new file mode 100644 index 0000000..98535ad --- /dev/null +++ b/insolver/pricing/pricing.py @@ -0,0 +1,44 @@ +from pandas import DataFrame +from typing import Any, Literal, Union, Iterable + +from .generation import candidates +from .evaluation import eval_candidate +from .optimization import maximize + + +def dynamic_price( + df: DataFrame, + price_name: str, + model: Any, + feature_names: Iterable, + burning_cost_pct: Union[float, int] = 0.8, + threshold: Union[float, int] = 0.5, + method: Literal['profit', 'conversion'] = 'profit', + lower_bound: Union[float, int] = 0.25, + upper_upper: Union[float, int] = 2.05, + step: Union[float, int] = 0.05, + decimals: int = 2, + filter_minimum: Union[None, str, int, float] = None, + filter_maximum: Union[None, str, int, float] = None, + filter_frac_min: Union[float, int] = 1, + filter_frac_max: Union[float, int] = 1, +) -> DataFrame: + _df = eval_candidate( + candidates( + df, + price_name=price_name, + lower_bound=lower_bound, + upper_upper=upper_upper, + step=step, + decimals=decimals, + filter_minimum=filter_minimum, + filter_maximum=filter_maximum, + filter_frac_min=filter_frac_min, + filter_frac_max=filter_frac_max, + ), + model, + feature_names, + burning_cost_pct=burning_cost_pct, + threshold=threshold, + ) + return maximize(_df, method=method, threshold=threshold) diff --git a/requirements.txt b/requirements.txt index 3b0df69..dc0481a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,13 +14,14 @@ h2o>=3.46.0.1 statsmodels>=0.13.1 lime>=0.2.0.1 dill>=0.3.4 +numba>=0.58.0 # insolver.report -jinja2>= 3.1.3 +Jinja2>= 3.1.3 ydata-profiling==4.7.0 # insolver.serving -flask>=2.3.2 +Flask>=2.3.2 fastapi>=0.109.1 uvicorn[standard]>=0.13.3 pydantic>=2 diff --git a/tests/test_evaluation.py b/tests/test_evaluation.py new file mode 100644 index 0000000..c986928 --- /dev/null +++ b/tests/test_evaluation.py @@ -0,0 +1,52 @@ +import pytest +import numpy as np +from pandas import DataFrame, Series +from insolver.pricing.evaluation import price_eval, eval_candidate +from insolver.pricing.generation import candidates + + +# Mock model class with predict_proba method +class MockModel: + @staticmethod + def predict_proba(x): + return np.random.rand(len(x), 2) + + +class MockModel2: + @staticmethod + def predict(x): + return np.random.rand(len(x), 2) + + +# Test price_eval function +def test_price_eval(): + x = DataFrame({'price': [100, 150], 'orig_premium': [80, 100], 'feature1': [1, 2], 'feature2': [3, 4]}) + model = MockModel() + feature_names = ['feature1', 'feature2'] + burning_cost_pct = 0.8 + threshold = 0.5 + result = price_eval(x, model, feature_names, burning_cost_pct, threshold) + assert isinstance(result, DataFrame) + assert result.shape == (2, 8) + + +def test_price_eval2(): + x = DataFrame({'price': [100, 150], 'orig_premium': [80, 100], 'feature1': [1, 2], 'feature2': [3, 4]}) + model = MockModel2() + feature_names = ['feature1', 'feature2'] + burning_cost_pct = 0.8 + threshold = 0.5 + with pytest.raises(ValueError): + price_eval(x, model, feature_names, burning_cost_pct, threshold) + + +# Test eval_candidate function +def test_eval_candidate(): + df = DataFrame({'price': [100, 150], 'feature1': [1, 2], 'feature2': [3, 4]}) + model = MockModel() + feature_names = ['feature1', 'feature2'] + burning_cost_pct = 0.8 + threshold = 0.5 + result = eval_candidate(candidates(df, 'price'), model, feature_names, burning_cost_pct, threshold) + assert isinstance(result, Series) + assert len(result) == len(df) diff --git a/tests/test_generation.py b/tests/test_generation.py new file mode 100644 index 0000000..ca1f21f --- /dev/null +++ b/tests/test_generation.py @@ -0,0 +1,236 @@ +import pytest +import numpy as np +from pandas import DataFrame, Series +from insolver.pricing.generation import ( + candidates, + filter_candidates, + gen_potential_prices, + gen_prices, +) + + +# Test gen_prices function +def test_gen_prices(): + price = 100 + lower_bound = 0.5 + upper_upper = 2.0 + step = 0.1 + expected_prices = np.array( + [50.0, 60.0, 70.0, 80.0, 90.0, 100.0, 110.0, 120.0, 130.0, 140.0, 150.0, 160.0, 170.0, 180.0, 190.0] + ) + assert np.allclose(gen_prices(price, lower_bound, upper_upper, step), expected_prices) + + +# Test filter_candidates function +def test_filter_candidates(): + prices = np.array( + [50.0, 60.0, 70.0, 80.0, 90.0, 100.0, 110.0, 120.0, 130.0, 140.0, 150.0, 160.0, 170.0, 180.0, 190.0] + ) + filtered_prices = filter_candidates(prices, minimum=80, maximum=300, frac_min=1, frac_max=0.5) + expected_filtered_prices = np.array([80.0, 90.0, 100.0, 110.0, 120.0, 130.0, 140.0, 150.0]) + assert np.allclose(filtered_prices, expected_filtered_prices) + + +def test_filter_candidates2(): + prices = np.array( + [50.0, 60.0, 70.0, 80.0, 90.0, 100.0, 110.0, 120.0, 130.0, 140.0, 150.0, 160.0, 170.0, 180.0, 190.0] + ) + filtered_prices = filter_candidates(prices) + expected_filtered_prices = np.array( + [50.0, 60.0, 70.0, 80.0, 90.0, 100.0, 110.0, 120.0, 130.0, 140.0, 150.0, 160.0, 170.0, 180.0, 190.0] + ) + assert np.allclose(filtered_prices, expected_filtered_prices) + + +def test_filter_candidates3(): + prices = np.array( + [50.0, 60.0, 70.0, 80.0, 90.0, 100.0, 110.0, 120.0, 130.0, 140.0, 150.0, 160.0, 170.0, 180.0, 190.0] + ) + with pytest.raises(ValueError): + filter_candidates(prices, minimum=200) + + +# Test gen_potential_prices function +def test_gen_potential_prices(): + entity = Series({'price': 100, 'foo': True, 'bar': 'text'}) + price_name = 'price' + lower_bound = 0.5 + upper_upper = 2.0 + step = 0.1 + decimals = 2 + filter_minimum = 160 + filter_maximum = 300 + filter_frac_min = 0.5 + filter_frac_max = 0.5 + expected_df = DataFrame( + { + 'price': [80.0, 90.0, 100.0, 110.0, 120.0, 130.0, 140.0, 150.0], + 'orig_premium': [100] * 8, + 'foo': [True] * 8, + 'bar': ['text'] * 8, + } + ) + assert gen_potential_prices( + entity, + price_name, + lower_bound, + upper_upper, + step, + decimals, + filter_minimum, + filter_maximum, + filter_frac_min, + filter_frac_max, + dtypes=expected_df.dtypes.to_dict(), + ).equals(expected_df) + + +def test_gen_potential_prices2(): + entity = Series({'price': 100, 'foo': True, 'bar': 'text'}) + price_name = 'price' + lower_bound = 0.5 + upper_upper = 2.0 + step = 0.1 + decimals = 2 + expected_df = DataFrame( + { + 'price': [ + 50.0, + 60.0, + 70.0, + 80.0, + 90.0, + 100.0, + 110.0, + 120.0, + 130.0, + 140.0, + 150.0, + 160.0, + 170.0, + 180.0, + 190.0, + ], + 'orig_premium': [100] * 15, + 'foo': [True] * 15, + 'bar': ['text'] * 15, + } + ) + assert gen_potential_prices( + entity, price_name, lower_bound, upper_upper, step, decimals, dtypes=expected_df.dtypes.to_dict() + ).equals(expected_df) + + +def test_gen_potential_prices3(): + entity = Series({'price': 100.0, 'foo': 300, 'bar': 80}) + price_name = 'price' + lower_bound = 0.5 + upper_upper = 2.0 + step = 0.1 + decimals = 2 + filter_minimum = 'bar' + filter_maximum = 'foo' + filter_frac_min = 1 + filter_frac_max = 0.5 + expected_df = DataFrame( + { + 'price': [80.0, 90.0, 100.0, 110.0, 120.0, 130.0, 140.0, 150.0], + 'orig_premium': [100.0] * 8, + 'foo': [300.0] * 8, + 'bar': [80.0] * 8, + } + ) + assert gen_potential_prices( + entity, + price_name, + lower_bound, + upper_upper, + step, + decimals, + filter_minimum, + filter_maximum, + filter_frac_min, + filter_frac_max, + ).equals(expected_df) + + +# Test candidates function +def test_candidates(): + df = DataFrame({'price': [100, 150], 'foo': [True, None], 'bar': [-2.0, 'text']}) + price_name = 'price' + lower_bound = 0.5 + upper_upper = 2.0 + filter_maximum = 275 + step = 0.1 + decimals = 2 + expected_series = Series( + [ + DataFrame( + { + 'price': np.arange(50, 200, 10), + 'orig_premium': [100] * 15, + 'foo': [True] * 15, + 'bar': [-2.0] * 15, + } + ).astype({'price': 'int64', 'foo': 'object', 'bar': 'object'}), + DataFrame( + { + 'price': np.arange(75, 275, 15), + 'orig_premium': [150] * 14, + 'foo': [None] * 14, + 'bar': ['text'] * 14, + }, + ).astype({'price': 'int64', 'foo': 'object', 'bar': 'object'}), + ] + ) + results = candidates( + df, + price_name=price_name, + lower_bound=lower_bound, + upper_upper=upper_upper, + step=step, + decimals=decimals, + filter_maximum=filter_maximum, + ) + for i in range(len(results)): + assert results.iloc[i].equals(expected_series.iloc[i]) + + +def test_candidates2(): + df = DataFrame({'price': [None, 150], 'foo': [True, None], 'bar': [-2.0, 'text']}) + price_name = 'price' + lower_bound = 0.5 + upper_upper = 2.0 + filter_maximum = 275 + step = 0.1 + decimals = 2 + with pytest.raises(ValueError): + candidates( + df, + price_name=price_name, + lower_bound=lower_bound, + upper_upper=upper_upper, + step=step, + decimals=decimals, + filter_maximum=filter_maximum, + ) + + +def test_candidates3(): + df = DataFrame({'price': [150, 0], 'foo': [True, None], 'bar': [-2.0, 'text']}) + price_name = 'price' + lower_bound = 0.5 + upper_upper = 2.0 + filter_maximum = 275 + step = 0.1 + decimals = 2 + with pytest.raises(ValueError): + candidates( + df, + price_name=price_name, + lower_bound=lower_bound, + upper_upper=upper_upper, + step=step, + decimals=decimals, + filter_maximum=filter_maximum, + ) diff --git a/tests/test_optimization.py b/tests/test_optimization.py new file mode 100644 index 0000000..13aaabd --- /dev/null +++ b/tests/test_optimization.py @@ -0,0 +1,118 @@ +import pytest +import numpy as np +from pandas import DataFrame, Series +from insolver.pricing.optimization import maximize, max_profit, max_conversion + + +# Test max_profit function +def test_max_profit(): + x = np.array([[100, 80, 0.7, 0.6], [150, 100, 0.8, 0.5], [120, 90, 0.6, 0.7]]) + expected_result = np.array([[150.0, 100.0, 0.8, 0.5]]) + assert np.array_equal(max_profit(x), expected_result) + + +def test_max_profit2(): + x = np.array([[100, 80, 0.7, 0.6], [150, 100, 0.7, 0.5], [120, 90, 0.7, 0.7]]) + expected_result = np.array([[100, 80, 0.7, 0.6]]) + assert np.array_equal(max_profit(x), expected_result) + + +# Test max_conversion function +def test_max_conversion(): + df = DataFrame( + { + 'price': [95.0, 100.0, 105.0], + 'orig_price': [100.0, 100.0, 100.0], + 'pred': [0.71, 0.68, 0.28], + 'orig_pred': [0.72, 0.57, 0.56], + 'profit': [10.68, 13.68, 7.14], + 'profit_orig': [14.59, 11.55, 11.30], + 'act_profit': [10.68, 13.68, 0.0], + 'act_profit_orig': [14.59, 11.55, 11.30], + } + ) + expected_result = np.array([[100.0, 100.0, 0.68, 0.57, 13.68, 11.55, 13.68, 11.55]]) + assert np.array_equal(max_conversion(df.to_numpy(dtype=float)), expected_result) + + +# Test max_conversion function +def test_max_conversion2(): + df = DataFrame( + { + 'price': [95.0, 101.0, 105.0], + 'orig_price': [100.0, 100.0, 100.0], + 'pred': [0.71, 0.68, 0.28], + 'orig_pred': [0.72, 0.49, 0.56], + 'profit': [10.68, 13.68, 7.14], + 'profit_orig': [14.59, 11.55, 11.30], + 'act_profit': [10.68, 13.68, 0.0], + 'act_profit_orig': [14.59, 11.55, 11.30], + } + ) + expected_result = np.array([[101.0, 100.0, 0.68, 0.49, 13.68, 11.55, 13.68, 11.55]]) + assert np.array_equal(max_conversion(df.to_numpy(dtype=float)), expected_result) + + +# Test max_conversion function +def test_max_conversion3(): + df = DataFrame( + { + 'price': [95.0, 100.0, 105.0], + 'orig_price': [100.0, 100.0, 100.0], + 'pred': [0.49, 0.49, 0.28], + 'orig_pred': [0.49, 0.49, 0.49], + 'profit': [10.68, 13.68, 7.14], + 'profit_orig': [14.59, 11.55, 11.30], + 'act_profit': [10.68, 13.68, 0.0], + 'act_profit_orig': [14.59, 11.55, 11.30], + } + ) + expected_result = np.array([[100.0, 100.0, 0.49, 0.49, 13.68, 11.55, 13.68, 11.55]]) + assert np.array_equal(max_conversion(df.to_numpy(dtype=float)), expected_result) + + +# Test maximize function with method='profit' +def test_maximize_profit(): + df = DataFrame( + { + 'price': [95.0, 100.0, 105.0], + 'orig_price': [100.0, 100.0, 100.0], + 'pred': [0.71, 0.68, 0.28], + 'orig_pred': [0.72, 0.57, 0.56], + 'profit': [10.68, 13.68, 7.14], + 'profit_orig': [14.59, 11.55, 11.30], + 'act_profit': [10.68, 13.68, 0.0], + 'act_profit_orig': [14.59, 11.55, 11.30], + } + ) + ser = Series([df]) + result = maximize(ser, method='profit') + assert isinstance(result, DataFrame) + assert result.shape == (1, 8) + + +# Test maximize function with method='conversion' +def test_maximize_conversion(): + df = DataFrame( + { + 'price': [95.0, 100.0, 105.0], + 'orig_price': [100.0, 100.0, 100.0], + 'pred': [0.71, 0.68, 0.28], + 'orig_pred': [0.72, 0.57, 0.56], + 'profit': [10.68, 13.68, 7.14], + 'profit_orig': [14.59, 11.55, 11.30], + 'act_profit': [10.68, 13.68, 0.0], + 'act_profit_orig': [14.59, 11.55, 11.30], + } + ) + ser = Series([df]) + result = maximize(ser, method='conversion') + assert isinstance(result, DataFrame) + assert result.shape == (1, 8) + + +# Test maximize function with invalid method +def test_maximize_invalid_method(): + df = DataFrame({'price': [100, 150], 'orig_price': [80, 100], 'pred': [0.7, 0.8], 'profit': [0.6, 0.5]}) + with pytest.raises(ValueError): + maximize(df, method='invalid_method') diff --git a/tests/test_pricing.py b/tests/test_pricing.py new file mode 100644 index 0000000..3e99f4b --- /dev/null +++ b/tests/test_pricing.py @@ -0,0 +1,29 @@ +import numpy as np +from pandas import DataFrame +from insolver.pricing import dynamic_price + + +class MockModel: + @staticmethod + def predict_proba(x): + return np.random.rand(len(x), 2) + + +def test_dynamic_price(monkeypatch): + df = DataFrame({'price': [100, 150], 'feature1': [1, 2], 'feature2': [3, 4]}) + model = MockModel() + feature_names = ['feature1', 'feature2'] + burning_cost_pct = 0.8 + threshold = 0.5 + + result = dynamic_price( + df, + price_name='price', + model=model, + feature_names=feature_names, + burning_cost_pct=burning_cost_pct, + threshold=threshold, + ) + + assert isinstance(result, DataFrame) + assert result.shape == (2, 8)