-
Notifications
You must be signed in to change notification settings - Fork 275
/
flatiron_stats.py
46 lines (31 loc) · 1.26 KB
/
flatiron_stats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#flatiron_stats
import numpy as np
import scipy.stats as stats
def welch_t(a, b):
""" Calculate Welch's t statistic for two samples. """
numerator = a.mean() - b.mean()
# “ddof = Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof,
# where N represents the number of elements. By default ddof is zero.
denominator = np.sqrt(a.var(ddof=1)/a.size + b.var(ddof=1)/b.size)
return np.abs(numerator/denominator)
def welch_df(a, b):
""" Calculate the effective degrees of freedom for two samples. This function returns the degrees of freedom """
s1 = a.var(ddof=1)
s2 = b.var(ddof=1)
n1 = a.size
n2 = b.size
numerator = (s1/n1 + s2/n2)**2
denominator = (s1/ n1)**2/(n1 - 1) + (s2/ n2)**2/(n2 - 1)
return numerator/denominator
def p_value_welch_ttest(a, b, two_sided=False):
"""Calculates the p-value for Welch's t-test given two samples.
By default, the returned p-value is for a one-sided t-test.
Set the two-sided parameter to True if you wish to perform a two-sided t-test instead.
"""
t = welch_t(a, b)
df = welch_df(a, b)
p = 1-stats.t.cdf(np.abs(t), df)
if two_sided:
return 2*p
else:
return p