Skip to content

Commit

Permalink
Adding pricing module
Browse files Browse the repository at this point in the history
  • Loading branch information
alexmindset committed May 8, 2024
1 parent a68434d commit 2624ffb
Show file tree
Hide file tree
Showing 10 changed files with 666 additions and 2 deletions.
1 change: 1 addition & 0 deletions insolver/pricing/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .pricing import dynamic_price
42 changes: 42 additions & 0 deletions insolver/pricing/evaluation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import numpy as np
from pandas import DataFrame, Series
from typing import Union, Any, Iterable


def price_eval(
x: DataFrame,
model: Any,
feature_names: Iterable,
burning_cost_pct: Union[float, int] = 0.8,
threshold: Union[float, int] = 0.5,
) -> DataFrame:
if not (hasattr(model, 'predict_proba') and callable(model.predict_proba)):
raise ValueError("Model has no predict_proba() method.")
prices = x.to_numpy()[:, 0]
old_price = x['orig_premium'].to_numpy()
pred = model.predict_proba(x[feature_names])[:, 1]
price_name = x.columns[0]
x_orig = x.copy().drop(price_name, axis=1).rename({'orig_premium': price_name}, axis=1)
pred_orig = model.predict_proba(x_orig[feature_names])[:, 1]

profit = pred * prices * (1 - burning_cost_pct * old_price / prices)
profit_orig = pred_orig * old_price * (1 - burning_cost_pct)

act_profit = profit * (pred >= threshold)
act_profit_orig = profit_orig * (pred_orig >= threshold)

df = DataFrame(np.dstack((prices, old_price, pred, pred_orig, profit, profit_orig, act_profit, act_profit_orig))[0])
df.columns = ['price', 'orig_price', 'pred', 'orig_pred', 'profit', 'profit_orig', 'act_profit', 'act_profit_orig']
return df


def eval_candidate(
df: DataFrame,
model: Any,
feature_names: Iterable,
burning_cost_pct: Union[float, int] = 0.8,
threshold: Union[float, int] = 0.5,
) -> Series:
return df.apply(
lambda x: price_eval(x, model, feature_names, burning_cost_pct=burning_cost_pct, threshold=threshold)
)
95 changes: 95 additions & 0 deletions insolver/pricing/generation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
import numpy as np
from pandas import DataFrame, Series
from numba import jit
from typing import Union


@jit(cache=True, nopython=True)
def gen_prices(
price: Union[int, float], lower_bound: Union[int, float], upper_upper: Union[int, float], step: float
) -> np.ndarray:
percents = np.arange(lower_bound, upper_upper, step)
prices = price * percents
return prices


@jit(cache=True, nopython=True)
def filter_candidates(
prices: np.ndarray,
minimum: Union[None, int, float] = None,
maximum: Union[None, int, float] = None,
frac_min: Union[int, float] = 1,
frac_max: Union[int, float] = 1,
) -> np.ndarray:
min_ = np.min(prices) if not minimum else minimum
max_ = np.max(prices) if not maximum else maximum
bound_filtered = prices[(prices >= frac_min * min_) & (prices <= frac_max * max_)]
if len(bound_filtered) == 0:
raise ValueError('Filters is too restrictive: no candidates left.')
else:
return bound_filtered


def gen_potential_prices(
entity: Series,
price_name: str,
lower_bound: Union[float, int] = 0.25,
upper_upper: Union[float, int] = 2.05,
step: Union[float, int] = 0.05,
decimals: int = 2,
filter_minimum: Union[None, str, int, float] = None,
filter_maximum: Union[None, str, int, float] = None,
filter_frac_min: Union[int, float] = 1,
filter_frac_max: Union[int, float] = 1,
dtypes: Union[None, dict] = None,
) -> DataFrame:
prices = gen_prices(entity[price_name], lower_bound, upper_upper, step)
if filter_minimum or filter_maximum:
if isinstance(filter_minimum, str):
filter_minimum = entity[filter_minimum]
if isinstance(filter_maximum, str):
filter_maximum = entity[filter_maximum]
prices_filtered = filter_candidates(
prices, minimum=filter_minimum, maximum=filter_maximum, frac_min=filter_frac_min, frac_max=filter_frac_max
)
else:
prices_filtered = prices
c_df = np.column_stack((np.round(prices_filtered, decimals), np.vstack([entity.to_numpy()] * len(prices_filtered))))
df_c = DataFrame(c_df, columns=[price_name, *[x if x != price_name else 'orig_premium' for x in entity.index]])
if dtypes:
df_c['orig_premium'] = df_c['orig_premium'].astype(dtypes[price_name])
df_c = df_c.astype(dtypes)
return df_c


def candidates(
df: DataFrame,
price_name: str,
lower_bound: Union[float, int] = 0.25,
upper_upper: Union[float, int] = 2.05,
step: Union[float, int] = 0.05,
decimals: int = 2,
filter_minimum: Union[None, str, int, float] = None,
filter_maximum: Union[None, str, int, float] = None,
filter_frac_min: Union[int, float] = 1.0,
filter_frac_max: Union[int, float] = 1.0,
) -> Series:
if (df[price_name].isnull().sum() == 0) & ((df[price_name] == 0).sum() == 0):
return df.apply(
lambda x: gen_potential_prices(
entity=x,
price_name=price_name,
lower_bound=lower_bound,
upper_upper=upper_upper,
step=step,
decimals=decimals,
filter_minimum=filter_minimum,
filter_maximum=filter_maximum,
filter_frac_min=filter_frac_min,
filter_frac_max=filter_frac_max,
dtypes=df.dtypes.to_dict(),
),
axis=1,
)
else:
raise ValueError(f"Dataframe contains empty or zero values in price column '{price_name}'!")
46 changes: 46 additions & 0 deletions insolver/pricing/optimization.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import numpy as np
from pandas import DataFrame, concat
from numba import jit
from typing import Union, Literal


@jit(cache=True, nopython=True)
def max_profit(x: np.ndarray) -> np.ndarray:
max_profit_ = x[x[:, -2] == x[:, -2].max()] # [:,-2] for profit
if max_profit_.shape[0] > 1:
max_profit_ = max_profit_[max_profit_[:, 2] == max_profit_[:, 2].max()] # [:,2] for pred
if max_profit_.shape[0] > 1:
max_profit_ = max_profit_[max_profit_[:, 0] == max_profit_[:, 0].min()] # [:,0] for price
return max_profit_


@jit(cache=True, nopython=True)
def max_conversion(x: np.ndarray, threshold: Union[float, int] = 0.5) -> np.ndarray:
orig = x[np.abs(x[:, 0] - x[:, 1].max()) < 0.01] # [:, 0] for price, [:, 1] for orig_price
if orig.shape[0] == 0:
orig = max_profit(x)
if x[:, 3].min() >= 0.5: # [:, 3] for orig_pred
choice = orig
else:
converted = x[x[:, 2] >= threshold]
if converted.shape[0] == 0:
choice = orig
else:
choice = max_profit(converted)
return choice


def maximize(
df: DataFrame, method: Literal['profit', 'conversion'] = 'profit', threshold: Union[float, int] = 0.5
) -> DataFrame:
if method == 'profit':
res = df.apply(lambda x: DataFrame(max_profit(x.to_numpy(dtype=float)), columns=x.columns))
elif method == 'conversion':
res = df.apply(
lambda x: DataFrame(max_conversion(x.to_numpy(dtype=float), threshold=threshold), columns=x.columns)
)
else:
raise ValueError('method should be one of ["profit", "conversion"]')
result = concat(res.to_numpy())
result.index = df.index
return result
44 changes: 44 additions & 0 deletions insolver/pricing/pricing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
from pandas import DataFrame
from typing import Any, Literal, Union, Iterable

from .generation import candidates
from .evaluation import eval_candidate
from .optimization import maximize


def dynamic_price(
df: DataFrame,
price_name: str,
model: Any,
feature_names: Iterable,
burning_cost_pct: Union[float, int] = 0.8,
threshold: Union[float, int] = 0.5,
method: Literal['profit', 'conversion'] = 'profit',
lower_bound: Union[float, int] = 0.25,
upper_upper: Union[float, int] = 2.05,
step: Union[float, int] = 0.05,
decimals: int = 2,
filter_minimum: Union[None, str, int, float] = None,
filter_maximum: Union[None, str, int, float] = None,
filter_frac_min: Union[float, int] = 1,
filter_frac_max: Union[float, int] = 1,
) -> DataFrame:
_df = eval_candidate(
candidates(
df,
price_name=price_name,
lower_bound=lower_bound,
upper_upper=upper_upper,
step=step,
decimals=decimals,
filter_minimum=filter_minimum,
filter_maximum=filter_maximum,
filter_frac_min=filter_frac_min,
filter_frac_max=filter_frac_max,
),
model,
feature_names,
burning_cost_pct=burning_cost_pct,
threshold=threshold,
)
return maximize(_df, method=method, threshold=threshold)
5 changes: 3 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,14 @@ h2o>=3.46.0.1
statsmodels>=0.13.1
lime>=0.2.0.1
dill>=0.3.4
numba>=0.58.0

# insolver.report
jinja2>= 3.1.3
Jinja2>= 3.1.3
ydata-profiling==4.7.0

# insolver.serving
flask>=2.3.2
Flask>=2.3.2
fastapi>=0.109.1
uvicorn[standard]>=0.13.3
pydantic>=2
Expand Down
52 changes: 52 additions & 0 deletions tests/test_evaluation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import pytest
import numpy as np
from pandas import DataFrame, Series
from insolver.pricing.evaluation import price_eval, eval_candidate
from insolver.pricing.generation import candidates


# Mock model class with predict_proba method
class MockModel:
@staticmethod
def predict_proba(x):
return np.random.rand(len(x), 2)


class MockModel2:
@staticmethod
def predict(x):
return np.random.rand(len(x), 2)


# Test price_eval function
def test_price_eval():
x = DataFrame({'price': [100, 150], 'orig_premium': [80, 100], 'feature1': [1, 2], 'feature2': [3, 4]})
model = MockModel()
feature_names = ['feature1', 'feature2']
burning_cost_pct = 0.8
threshold = 0.5
result = price_eval(x, model, feature_names, burning_cost_pct, threshold)
assert isinstance(result, DataFrame)
assert result.shape == (2, 8)


def test_price_eval2():
x = DataFrame({'price': [100, 150], 'orig_premium': [80, 100], 'feature1': [1, 2], 'feature2': [3, 4]})
model = MockModel2()
feature_names = ['feature1', 'feature2']
burning_cost_pct = 0.8
threshold = 0.5
with pytest.raises(ValueError):
price_eval(x, model, feature_names, burning_cost_pct, threshold)


# Test eval_candidate function
def test_eval_candidate():
df = DataFrame({'price': [100, 150], 'feature1': [1, 2], 'feature2': [3, 4]})
model = MockModel()
feature_names = ['feature1', 'feature2']
burning_cost_pct = 0.8
threshold = 0.5
result = eval_candidate(candidates(df, 'price'), model, feature_names, burning_cost_pct, threshold)
assert isinstance(result, Series)
assert len(result) == len(df)
Loading

0 comments on commit 2624ffb

Please sign in to comment.