-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
- Loading branch information
There are no files selected for viewing
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
from typing import Type | ||
import pandas as pd | ||
import numpy as np | ||
import statsmodels.api as sm | ||
import statsmodels.formula.api as smf | ||
from plotnine import * | ||
import patchworklib as pw | ||
|
||
def dgplots(results: Type[sm.regression.linear_model.RegressionResultsWrapper]) -> None: | ||
if isinstance(results, sm.regression.linear_model.RegressionResultsWrapper) is False: | ||
raise TypeError("Please provide a model fit.") | ||
else: | ||
|
||
residuals = results.resid.rename("residuals") | ||
predicted_values = results.fittedvalues.rename("predicted_values") | ||
std_resid = pd.Series(np.sqrt(np.abs(results.get_influence().resid_studentized_internal))).rename("std_resid") | ||
influence = results.get_influence() | ||
cooks_d = pd.Series(influence.cooks_distance[0]).rename("cooks_d") | ||
leverage = pd.Series(influence.hat_matrix_diag).rename("leverage") | ||
obs = pd.Series(range(len(residuals))).rename("obs") | ||
n_obs = len(obs.index) | ||
|
||
# combine Series into DataFrame | ||
model_values = residuals.to_frame().join(predicted_values).join(std_resid).join(cooks_d).join(leverage).join(obs) | ||
# add the total number of observations | ||
model_values["n_obs"] = n_obs | ||
|
||
p1 = ( | ||
ggplot(model_values, aes(x = "predicted_values", y = "residuals")) | ||
+ geom_point() | ||
+ geom_smooth(se = False, colour = "red") | ||
+ labs(title = "Residuals plot") | ||
+ xlab("predicted values") | ||
+ ylab("residuals") | ||
+ theme_bw() | ||
) | ||
|
||
p2 = ( | ||
ggplot(model_values, aes(sample = "residuals")) | ||
+ stat_qq() | ||
+ stat_qq_line(colour = "blue") | ||
+ labs(title = "Q-Q plot") | ||
+ xlab("theoretical quantiles") | ||
+ ylab("sample quantiles") | ||
+ theme_bw() | ||
) | ||
|
||
p3 = ( | ||
ggplot(model_values, aes(x = "predicted_values", y = "std_resid")) | ||
+ geom_point() | ||
+ geom_smooth(se = False, colour = "red") | ||
+ labs(title = "Location-Scale plot") | ||
+ xlab("predicted values") | ||
+ ylab(u"\u221A"'|standardised residuals|') | ||
+ theme_bw() | ||
) | ||
|
||
p4 = ( | ||
ggplot(model_values, aes(x = "obs", y = "cooks_d")) | ||
+ geom_point() | ||
+ geom_segment(aes(xend = "obs", yend = 0), colour = "blue") | ||
+ geom_hline(aes(yintercept = 0)) | ||
+ geom_hline(aes(yintercept = 4/n_obs), colour = "blue", linetype = "dashed") | ||
+ labs(title = "Influential points") | ||
+ xlab("observation") | ||
+ ylab('cook\'s d') | ||
+ theme_bw() | ||
) | ||
|
||
p1 = pw.load_ggplot(p1, figsize=(3,2)) | ||
p2 = pw.load_ggplot(p2, figsize=(3,2)) | ||
p3 = pw.load_ggplot(p3, figsize=(3,2)) | ||
p4 = pw.load_ggplot(p4, figsize=(3,2)) | ||
|
||
dgplots = (p1 | p2) / (p3 | p4) | ||
return dgplots.savefig() |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
# Originally developed by Emer Jones (MRC Cognition and Brain Sciences Unit, Cambridge University) | ||
# adapted by M van Rongen (Nov 2022) | ||
|
||
def pwr_f2_test(u=None,v=None,f2=None,sig_level=None,power=None): | ||
""" | ||
Arguments: | ||
u: numerator degrees of freedom (#number of groups/parameters - 1) | ||
v: denominator degrees of freedom (#number of observations - #number of groups/parameters | ||
f2: Cohen's f2 effect size value | ||
sig_level: significance level we're working with | ||
power: desired power of test | ||
Only four out of the five parameters must be specified. The function returns the missing fifth value. | ||
""" | ||
if power is None: | ||
power = 1 - ncfdtr(u , v , f2*(u+v+1) , f.isf(q=sig_level , dfn=u , dfd=v)) | ||
|
||
elif u is None: | ||
def findu(u): | ||
return 1 - ncfdtr(u , v , f2*(u+v+1) , f.isf(q=sig_level , dfn=u , dfd=v)) - power | ||
u= brenth(findu , 1+1e-10, 100) | ||
|
||
elif v is None: | ||
def findv(v): | ||
return 1 - ncfdtr(u , v , f2*(u+v+1) , f.isf(q=sig_level , dfn=u , dfd=v)) - power | ||
v= brenth(findv , 1+1e-10, 1e9) | ||
|
||
elif f2 is None: | ||
def findf2(f2): | ||
return 1 - ncfdtr(u , v , f2*(u+v+1) , f.isf(q=sig_level , dfn=u , dfd=v)) - power | ||
f2= brenth(findf2 , 1e-7, 1e7) | ||
|
||
elif sig_level is None: | ||
def findsig(sig_level): | ||
return 1 - ncfdtr(u , v , f2*(u+v+1) , f.isf(q=sig_level , dfn=u , dfd=v)) - power | ||
sig_level= brenth(findsig , 1e-10, 1-1e10) | ||
|
||
|
||
#return {"u is":u,"v is":v,"f2 is":f2,"sig_level is":sig_level,"power is":power,"num_obs is":int(ceil(u))+int(ceil(v))+1} | ||
return print("Power analysis results: \n u is: {},\n v is: {},\n f2 is: {},\n sig_level is: {},\n power is: {},\n num_obs is: {}".format(u,v,f2,sig_level,power,int(ceil(u))+int(ceil(v))+1)) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Sitemap: https://cambiotraining.github.io/corestats/sitemap.xml |
Large diffs are not rendered by default.