Adding pricing module

MindSetLib · May 8, 2024 · 2624ffb · 2624ffb
1 parent a68434d
commit 2624ffb
Show file tree

Hide file tree

Showing 10 changed files with 666 additions and 2 deletions.
diff --git a/insolver/pricing/__init__.py b/insolver/pricing/__init__.py
@@ -0,0 +1 @@
+from .pricing import dynamic_price
diff --git a/insolver/pricing/evaluation.py b/insolver/pricing/evaluation.py
@@ -0,0 +1,42 @@
+import numpy as np
+from pandas import DataFrame, Series
+from typing import Union, Any, Iterable
+
+
+def price_eval(
+    x: DataFrame,
+    model: Any,
+    feature_names: Iterable,
+    burning_cost_pct: Union[float, int] = 0.8,
+    threshold: Union[float, int] = 0.5,
+) -> DataFrame:
+    if not (hasattr(model, 'predict_proba') and callable(model.predict_proba)):
+        raise ValueError("Model has no predict_proba() method.")
+    prices = x.to_numpy()[:, 0]
+    old_price = x['orig_premium'].to_numpy()
+    pred = model.predict_proba(x[feature_names])[:, 1]
+    price_name = x.columns[0]
+    x_orig = x.copy().drop(price_name, axis=1).rename({'orig_premium': price_name}, axis=1)
+    pred_orig = model.predict_proba(x_orig[feature_names])[:, 1]
+
+    profit = pred * prices * (1 - burning_cost_pct * old_price / prices)
+    profit_orig = pred_orig * old_price * (1 - burning_cost_pct)
+
+    act_profit = profit * (pred >= threshold)
+    act_profit_orig = profit_orig * (pred_orig >= threshold)
+
+    df = DataFrame(np.dstack((prices, old_price, pred, pred_orig, profit, profit_orig, act_profit, act_profit_orig))[0])
+    df.columns = ['price', 'orig_price', 'pred', 'orig_pred', 'profit', 'profit_orig', 'act_profit', 'act_profit_orig']
+    return df
+
+
+def eval_candidate(
+    df: DataFrame,
+    model: Any,
+    feature_names: Iterable,
+    burning_cost_pct: Union[float, int] = 0.8,
+    threshold: Union[float, int] = 0.5,
+) -> Series:
+    return df.apply(
+        lambda x: price_eval(x, model, feature_names, burning_cost_pct=burning_cost_pct, threshold=threshold)
+    )
diff --git a/insolver/pricing/generation.py b/insolver/pricing/generation.py
@@ -0,0 +1,95 @@
+import numpy as np
+from pandas import DataFrame, Series
+from numba import jit
+from typing import Union
+
+
+@jit(cache=True, nopython=True)
+def gen_prices(
+    price: Union[int, float], lower_bound: Union[int, float], upper_upper: Union[int, float], step: float
+) -> np.ndarray:
+    percents = np.arange(lower_bound, upper_upper, step)
+    prices = price * percents
+    return prices
+
+
+@jit(cache=True, nopython=True)
+def filter_candidates(
+    prices: np.ndarray,
+    minimum: Union[None, int, float] = None,
+    maximum: Union[None, int, float] = None,
+    frac_min: Union[int, float] = 1,
+    frac_max: Union[int, float] = 1,
+) -> np.ndarray:
+    min_ = np.min(prices) if not minimum else minimum
+    max_ = np.max(prices) if not maximum else maximum
+    bound_filtered = prices[(prices >= frac_min * min_) & (prices <= frac_max * max_)]
+    if len(bound_filtered) == 0:
+        raise ValueError('Filters is too restrictive: no candidates left.')
+    else:
+        return bound_filtered
+
+
+def gen_potential_prices(
+    entity: Series,
+    price_name: str,
+    lower_bound: Union[float, int] = 0.25,
+    upper_upper: Union[float, int] = 2.05,
+    step: Union[float, int] = 0.05,
+    decimals: int = 2,
+    filter_minimum: Union[None, str, int, float] = None,
+    filter_maximum: Union[None, str, int, float] = None,
+    filter_frac_min: Union[int, float] = 1,
+    filter_frac_max: Union[int, float] = 1,
+    dtypes: Union[None, dict] = None,
+) -> DataFrame:
+    prices = gen_prices(entity[price_name], lower_bound, upper_upper, step)
+    if filter_minimum or filter_maximum:
+        if isinstance(filter_minimum, str):
+            filter_minimum = entity[filter_minimum]
+        if isinstance(filter_maximum, str):
+            filter_maximum = entity[filter_maximum]
+        prices_filtered = filter_candidates(
+            prices, minimum=filter_minimum, maximum=filter_maximum, frac_min=filter_frac_min, frac_max=filter_frac_max
+        )
+    else:
+        prices_filtered = prices
+    c_df = np.column_stack((np.round(prices_filtered, decimals), np.vstack([entity.to_numpy()] * len(prices_filtered))))
+    df_c = DataFrame(c_df, columns=[price_name, *[x if x != price_name else 'orig_premium' for x in entity.index]])
+    if dtypes:
+        df_c['orig_premium'] = df_c['orig_premium'].astype(dtypes[price_name])
+        df_c = df_c.astype(dtypes)
+    return df_c
+
+
+def candidates(
+    df: DataFrame,
+    price_name: str,
+    lower_bound: Union[float, int] = 0.25,
+    upper_upper: Union[float, int] = 2.05,
+    step: Union[float, int] = 0.05,
+    decimals: int = 2,
+    filter_minimum: Union[None, str, int, float] = None,
+    filter_maximum: Union[None, str, int, float] = None,
+    filter_frac_min: Union[int, float] = 1.0,
+    filter_frac_max: Union[int, float] = 1.0,
+) -> Series:
+    if (df[price_name].isnull().sum() == 0) & ((df[price_name] == 0).sum() == 0):
+        return df.apply(
+            lambda x: gen_potential_prices(
+                entity=x,
+                price_name=price_name,
+                lower_bound=lower_bound,
+                upper_upper=upper_upper,
+                step=step,
+                decimals=decimals,
+                filter_minimum=filter_minimum,
+                filter_maximum=filter_maximum,
+                filter_frac_min=filter_frac_min,
+                filter_frac_max=filter_frac_max,
+                dtypes=df.dtypes.to_dict(),
+            ),
+            axis=1,
+        )
+    else:
+        raise ValueError(f"Dataframe contains empty or zero values in price column '{price_name}'!")
diff --git a/insolver/pricing/optimization.py b/insolver/pricing/optimization.py
@@ -0,0 +1,46 @@
+import numpy as np
+from pandas import DataFrame, concat
+from numba import jit
+from typing import Union, Literal
+
+
+@jit(cache=True, nopython=True)
+def max_profit(x: np.ndarray) -> np.ndarray:
+    max_profit_ = x[x[:, -2] == x[:, -2].max()]  # [:,-2] for profit
+    if max_profit_.shape[0] > 1:
+        max_profit_ = max_profit_[max_profit_[:, 2] == max_profit_[:, 2].max()]  # [:,2] for pred
+        if max_profit_.shape[0] > 1:
+            max_profit_ = max_profit_[max_profit_[:, 0] == max_profit_[:, 0].min()]  # [:,0] for price
+    return max_profit_
+
+
+@jit(cache=True, nopython=True)
+def max_conversion(x: np.ndarray, threshold: Union[float, int] = 0.5) -> np.ndarray:
+    orig = x[np.abs(x[:, 0] - x[:, 1].max()) < 0.01]  # [:, 0] for price, [:, 1] for orig_price
+    if orig.shape[0] == 0:
+        orig = max_profit(x)
+    if x[:, 3].min() >= 0.5:  # [:, 3] for orig_pred
+        choice = orig
+    else:
+        converted = x[x[:, 2] >= threshold]
+        if converted.shape[0] == 0:
+            choice = orig
+        else:
+            choice = max_profit(converted)
+    return choice
+
+
+def maximize(
+    df: DataFrame, method: Literal['profit', 'conversion'] = 'profit', threshold: Union[float, int] = 0.5
+) -> DataFrame:
+    if method == 'profit':
+        res = df.apply(lambda x: DataFrame(max_profit(x.to_numpy(dtype=float)), columns=x.columns))
+    elif method == 'conversion':
+        res = df.apply(
+            lambda x: DataFrame(max_conversion(x.to_numpy(dtype=float), threshold=threshold), columns=x.columns)
+        )
+    else:
+        raise ValueError('method should be one of ["profit", "conversion"]')
+    result = concat(res.to_numpy())
+    result.index = df.index
+    return result
diff --git a/insolver/pricing/pricing.py b/insolver/pricing/pricing.py
@@ -0,0 +1,44 @@
+from pandas import DataFrame
+from typing import Any, Literal, Union, Iterable
+
+from .generation import candidates
+from .evaluation import eval_candidate
+from .optimization import maximize
+
+
+def dynamic_price(
+    df: DataFrame,
+    price_name: str,
+    model: Any,
+    feature_names: Iterable,
+    burning_cost_pct: Union[float, int] = 0.8,
+    threshold: Union[float, int] = 0.5,
+    method: Literal['profit', 'conversion'] = 'profit',
+    lower_bound: Union[float, int] = 0.25,
+    upper_upper: Union[float, int] = 2.05,
+    step: Union[float, int] = 0.05,
+    decimals: int = 2,
+    filter_minimum: Union[None, str, int, float] = None,
+    filter_maximum: Union[None, str, int, float] = None,
+    filter_frac_min: Union[float, int] = 1,
+    filter_frac_max: Union[float, int] = 1,
+) -> DataFrame:
+    _df = eval_candidate(
+        candidates(
+            df,
+            price_name=price_name,
+            lower_bound=lower_bound,
+            upper_upper=upper_upper,
+            step=step,
+            decimals=decimals,
+            filter_minimum=filter_minimum,
+            filter_maximum=filter_maximum,
+            filter_frac_min=filter_frac_min,
+            filter_frac_max=filter_frac_max,
+        ),
+        model,
+        feature_names,
+        burning_cost_pct=burning_cost_pct,
+        threshold=threshold,
+    )
+    return maximize(_df, method=method, threshold=threshold)
diff --git a/requirements.txt b/requirements.txt
@@ -14,13 +14,14 @@ h2o>=3.46.0.1
 statsmodels>=0.13.1
 lime>=0.2.0.1
 dill>=0.3.4
+numba>=0.58.0
 
 # insolver.report
-jinja2>= 3.1.3
+Jinja2>= 3.1.3
 ydata-profiling==4.7.0
 
 # insolver.serving
-flask>=2.3.2
+Flask>=2.3.2
 fastapi>=0.109.1
 uvicorn[standard]>=0.13.3
 pydantic>=2

diff --git a/tests/test_evaluation.py b/tests/test_evaluation.py
@@ -0,0 +1,52 @@
+import pytest
+import numpy as np
+from pandas import DataFrame, Series
+from insolver.pricing.evaluation import price_eval, eval_candidate
+from insolver.pricing.generation import candidates
+
+
+# Mock model class with predict_proba method
+class MockModel:
+    @staticmethod
+    def predict_proba(x):
+        return np.random.rand(len(x), 2)
+
+
+class MockModel2:
+    @staticmethod
+    def predict(x):
+        return np.random.rand(len(x), 2)
+
+
+# Test price_eval function
+def test_price_eval():
+    x = DataFrame({'price': [100, 150], 'orig_premium': [80, 100], 'feature1': [1, 2], 'feature2': [3, 4]})
+    model = MockModel()
+    feature_names = ['feature1', 'feature2']
+    burning_cost_pct = 0.8
+    threshold = 0.5
+    result = price_eval(x, model, feature_names, burning_cost_pct, threshold)
+    assert isinstance(result, DataFrame)
+    assert result.shape == (2, 8)
+
+
+def test_price_eval2():
+    x = DataFrame({'price': [100, 150], 'orig_premium': [80, 100], 'feature1': [1, 2], 'feature2': [3, 4]})
+    model = MockModel2()
+    feature_names = ['feature1', 'feature2']
+    burning_cost_pct = 0.8
+    threshold = 0.5
+    with pytest.raises(ValueError):
+        price_eval(x, model, feature_names, burning_cost_pct, threshold)
+
+
+# Test eval_candidate function
+def test_eval_candidate():
+    df = DataFrame({'price': [100, 150], 'feature1': [1, 2], 'feature2': [3, 4]})
+    model = MockModel()
+    feature_names = ['feature1', 'feature2']
+    burning_cost_pct = 0.8
+    threshold = 0.5
+    result = eval_candidate(candidates(df, 'price'), model, feature_names, burning_cost_pct, threshold)
+    assert isinstance(result, Series)
+    assert len(result) == len(df)