forked from thomasdouenne/beliefs_climate_policies
-
Notifications
You must be signed in to change notification settings - Fork 0
/
test_predictions_ols_regression.py
53 lines (35 loc) · 2.17 KB
/
test_predictions_ols_regression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# -*- coding: utf-8 -*-
# Careful : go to nb of adults
# Compute proba for probit model
# Deal with people that consume both gas and domestic fuel
from __future__ import division
import numpy as np
from model_reforms_data.prepare_dataset_housing import prepare_dataset_housing
from model_reforms_data.regression_feedback import compute_gains_losses_housing, \
regress_ols_housing_expenditures_increase
def compute_probability_to_win_from_ols_regression(df_hh, df_estimation):
results_regressions = regress_ols_housing_expenditures_increase(df_estimation)
df_hh['hh_income_2'] = df_hh['hh_income'] ** 2
df_hh['Intercept'] = 1
df_hh['natural_gas:accommodation_size'] = df_hh['natural_gas']*df_hh['accommodation_size']
df_hh['domestic_fuel:accommodation_size'] = df_hh['domestic_fuel']*df_hh['accommodation_size']
params = results_regressions.params
params = params.to_frame().T
explanatory_vars = params.columns.tolist()
df_hh['predicted_housing_expenditures_increase'] = 0
for var in explanatory_vars:
df_hh['predicted_housing_expenditures_increase'] += df_hh[var] * params[var][0]
return df_hh
if __name__ == "__main__":
df_hh = prepare_dataset_housing('bdf')
df_estimation = prepare_dataset_housing('enl')
df_hh = compute_gains_losses_housing(df_hh)
df_estimation = compute_gains_losses_housing(df_estimation)
df_hh['winner'] = 0 + 1 * (df_hh['housing_expenditures_increase'] < 50 * df_hh['nb_beneficiaries'])
df_hh = compute_probability_to_win_from_ols_regression(df_hh, df_estimation)
df_hh['predicted_winner'] = 0 + 1 * (df_hh['predicted_housing_expenditures_increase'] < 50 * df_hh['nb_beneficiaries'])
df_hh['mistake'] = \
1 * ((df_hh['winner'] - df_hh['predicted_winner']) != 0)
print "Share of mistakes:", (float(len(df_hh.query('mistake == 1'))) / len(df_hh))
print "Probability predict that a loser wins:", float(len(df_hh.query('winner == 0').query('predicted_winner == 1'))) / len(df_hh.query('winner == 0'))
print "Probability predict that a winner loses:", float(len(df_hh.query('winner == 1').query('predicted_winner == 0'))) / len(df_hh.query('winner == 1'))