forked from thomasdouenne/beliefs_climate_policies
-
Notifications
You must be signed in to change notification settings - Fork 0
/
consistency_beliefs_losses.py
130 lines (98 loc) · 6.04 KB
/
consistency_beliefs_losses.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# -*- coding: utf-8 -*-
from __future__ import division
import pandas as pd
import seaborn
import statsmodels.formula.api as smf
import numpy as np
from model_reforms_data.prepare_dataset import prepare_dataset
from model_reforms_data.gains_losses_data import compute_gains_losses
from model_reforms_data.standardize_data_bdf_ptc import variables_names_bdf_to_ptc, \
create_new_variables_bdf_ptc, compute_gain_net_uc, compare_objective_subjective_beliefs_gain, \
impute_barycentre_in_bins, impute_average_bdf_in_bins, extrapolate_distribution_bcp_from_bdf, \
save_dataframes_kernel_density, compute_effort_rate_decile
seaborn.set_palette(seaborn.color_palette([u'blue', u'red']))
""" Load samples - BdF/BCP """
df_bdf = prepare_dataset()
df_ptc = pd.read_csv(r'code/survey_prepared.csv', sep = ";", decimal = ',')
#try:
# df_ptc = pd.read_csv(r'../code/survey_prepared.csv')
#except:
# df_ptc = pd.read_csv(r'..\code\survey_prepared.csv')
df_bdf = compute_gains_losses(df_bdf)
df_bdf = compute_gain_net_uc(df_bdf)
df_bdf = variables_names_bdf_to_ptc(df_bdf)
#(df_bdf, df_ptc) = create_new_variables_bdf_ptc(df_bdf, df_ptc)
#df_bdf['weight'] = 1
#df_ptc['weight'] = 1
df_ptc['gain_taxe_carbone_echelle'] = df_ptc['gain_echelle']
""" Define policy to study """ # chauffage, fuel ou taxe_carbone
energy = 'taxe_carbone'
""" Define sub-samples to study """ # Put None to variable, value or sign to get full sample
variable = 'gaz'
sign = '=='
value = None
try:
df_bdf = df_bdf.query('{0} {1} {2}'.format(variable, sign, value))
df_ptc = df_ptc.query('{0} {1} {2}'.format(variable, sign, value))
print "### We have selected a sub-sample from the original datasets:"
except:
print "### Estimation on full samples:"
""" Choose which function to perform """ # True to compute and display results, False otherwise
plot_step_distribution = False
plot_kde = False
save_kde_data = True
test_imputation_methods = False # The two methods provide quite similar results
regress = False
number_winners = False
effort_rate = True
prop_constrained = False
""" Compare distributions BdF/BCP """ # pdf and CDF net gains of both samples (step function or Kernel density estimation)
if plot_step_distribution == True:
df_to_plot = compare_objective_subjective_beliefs_gain(df_bdf, df_ptc, energy, True)
if plot_kde == True:
extrapolate_distribution_bcp_from_bdf(df_bdf, df_ptc, energy, bw_size = 0.3, vector = True)
if save_kde_data == True:
save_dataframes_kernel_density(df_bdf, df_ptc)
""" Imputate numerical values for net gain for BCP """ # Two different imputation methods: provide very similar results
df_ptc = impute_barycentre_in_bins(df_bdf, df_ptc)
df_ptc = impute_average_bdf_in_bins(df_bdf, df_ptc)
if test_imputation_methods == True:
print "Sample means:", df_ptc['gain_net_numeric_uc_{}'.format(energy)].mean(), \
df_ptc['gain_net_numeric_barycentre_uc_{}'.format(energy)].mean()
for i in range(-6,6):
print i, "Average BdF: ", df_ptc.query('gain_{0}_echelle == {1}'.format(energy, i))['gain_net_numeric_uc_{}'.format(energy)].mean(), \
"/ Barycentre method:", df_ptc.query('gain_{0}_echelle == {1}'.format(energy, i))['gain_net_numeric_barycentre_uc_{}'.format(energy)].mean()
""" Regress expected net gain on various households characteristics """
if regress == True:
data = df_ptc
data['revenu_2'] = data['revenu'] ** 2
data['rev_tot_2'] = data['rev_tot'] ** 2
ols_net_gain = smf.ols(formula = 'gain_net_numeric_uc_{} ~ \
revenu + revenu_2 + rev_tot + rev_tot_2 + fioul + gaz + taille_menage + surface + \
age_18_24 + age_25_34 + age_35_49 + age_50_64'.format(energy),
data = data).fit() # To do : add more variables
print ols_net_gain.summary()
# Results are barely sensitive to choice of imputation method for numeric net gain
""" Number of winners and average expected net gains"""
if number_winners == True:
print float((df_ptc['gain_net_numeric_uc_fuel'] * df_ptc['weight']).sum()) / (df_ptc['weight'] * (1-1*np.isnan(df_ptc['gain_fuel_echelle']))).sum()
print float((df_ptc['gain_net_numeric_uc_chauffage'] * df_ptc['weight']).sum()) / (df_ptc['weight'] * (1-1*np.isnan(df_ptc['gain_chauffage_echelle']))).sum()
print float((df_ptc['gain_net_numeric_uc_taxe_carbone'] * df_ptc['weight']).sum()) / df_ptc['weight'].sum()
print float((df_bdf['gain_net_numeric_uc_fuel'] * df_bdf['weight']).sum()) / df_bdf['weight'].sum()
print float((df_bdf['gain_net_numeric_uc_chauffage'] * df_bdf['weight']).sum()) / df_bdf['weight'].sum()
print float((df_bdf['gain_net_numeric_uc_taxe_carbone'] * df_bdf['weight']).sum()) / df_bdf['weight'].sum()
print float(((df_ptc['gain_net_numeric_uc_fuel'] > 0) * df_ptc['weight']).sum()) / df_ptc['weight'].sum()
print float(((df_ptc['gain_net_numeric_uc_chauffage'] > 0) * df_ptc['weight']).sum()) / df_ptc['weight'].sum()
print float(((df_ptc['gain_net_numeric_uc_taxe_carbone'] > 0) * df_ptc['weight']).sum()) / df_ptc['weight'].sum()
print float(((df_bdf['gain_net_numeric_uc_fuel'] > 0) * df_bdf['weight']).sum()) / df_bdf['weight'].sum()
print float(((df_bdf['gain_net_numeric_uc_chauffage'] > 0) * df_bdf['weight']).sum()) / df_bdf['weight'].sum()
print float(((df_bdf['gain_net_numeric_uc_taxe_carbone'] > 0) * df_bdf['weight']).sum()) / df_bdf['weight'].sum()
""" Effort rate decile """
if effort_rate == True:
compute_effort_rate_decile(df_bdf, energy)
""" Proportion of households more constrained than average """
if prop_constrained == True:
mean_transport_expenditure_increase = (df_bdf['transport_expenditures_increase'] * df_bdf['weight']).sum() / df_bdf['weight'].sum()
print float(((df_bdf['transport_expenditures_increase'] < mean_transport_expenditure_increase) * df_bdf['weight']).sum()) / df_bdf['weight'].sum()
mean_housing_expenditure_increase = (df_bdf['transport_expenditures_increase'] * df_bdf['weight']).sum() / df_bdf['weight'].sum()
print float(((df_bdf['housing_expenditures_increase'] < mean_housing_expenditure_increase) * df_bdf['weight']).sum()) / df_bdf['weight'].sum()