-
Notifications
You must be signed in to change notification settings - Fork 0
/
Core.py
42 lines (37 loc) · 1.83 KB
/
Core.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
def featStat(featureName, constrain,plotType):
feat = train[featureName][train[featureName]>0]
data = train[[featureName,'winPlacePerc']].copy()
q99 = int(data[featureName].quantile(0.99))
plt.rcParams['figure.figsize'] = 15,5;
if constrain!=None:
feat = feat[feat<constrain]
if plotType == 'hist':
plt.subplot(1,2,1)
feat.hist(bins=50);
plt.title(featureName);
n = 20
cut_range = np.linspace(0,q99,n)
cut_range = np.append(cut_range, data[featureName].max())
data[featureName] = pd.cut(data[featureName],
cut_range,
labels=["{:.0f}-{:.0f}".format(a_, b_) for a_, b_ in zip(cut_range[:n], cut_range[1:])],
include_lowest=True
)
ax = plt.subplot(1,2,2)
sns.boxplot(x="winPlacePerc", y=featureName, data=data, ax=ax, color="#2196F3")
ax.set_xlabel('winPlacePerc', size=14, color="#263238")
ax.set_ylabel(featureName, size=14, color="#263238")
plt.gca().xaxis.grid(True)
plt.tight_layout()
if plotType == 'count':
plt.subplot(1,2,1)
sns.countplot(feat, color="#2196F3");
plt.subplot(1,2,2)
data.loc[data[featureName] > q99, featureName] = q99+1
x_order = data.groupby(featureName).mean().reset_index()[featureName]
x_order.iloc[-1] = str(q99+1)+"+"
data[featureName][data[featureName] == q99+1] = str(q99+1)+"+"
ax = sns.boxplot(x=featureName, y='winPlacePerc', data=data, color="#2196F3", order = x_order);
ax.set_xlabel(featureName, size=14, color="#263238")
ax.set_ylabel('WinPlacePerc', size=14, color="#263238")
plt.tight_layout()