From 66e6780b5beed1a38b32f83c946404986be1087c Mon Sep 17 00:00:00 2001 From: Gavin Rhys Lloyd Date: Thu, 27 Jun 2024 14:57:34 +0100 Subject: [PATCH] anova update - fix Type I SS - prevent dropping of columns - fix column selection for output tables --- R/anova_class.R | 50 +++++++++++++++++++++---------------- tests/testthat/test-anova.R | 29 +++++++++++++++++++-- 2 files changed, 56 insertions(+), 23 deletions(-) diff --git a/R/anova_class.R b/R/anova_class.R index 5c0c5a3..595c820 100644 --- a/R/anova_class.R +++ b/R/anova_class.R @@ -75,31 +75,31 @@ setMethod(f="model_apply", { X=D$data var_names=all.vars(M$formula) - + # attempt to detect within factors within=which(var_names %in% all.names(M$formula)[which('Error'== all.names(M$formula))+2]) - + if (length(within)>0) { var_names_ex=var_names[-within] } else { var_names_ex=var_names } - + y=D$sample_meta[var_names[2:length(var_names)]] - + # set the contrasts O=options('contrasts') # keep the old ones options(contrasts = c("contr.sum","contr.poly")) - + output=apply(X,2,function(x) { temp=y temp[[var_names[1]]]=x - + # check number levels temp2=na.omit(temp) s=sapply(var_names_ex[2:length(var_names)],function(x) summary(temp2[[x]])) n=nrow(temp2) - + # check for alias columns dona=FALSE if (all(unlist(s)>2)) { # check we have enough levels @@ -111,7 +111,7 @@ setMethod(f="model_apply", } else { dona=TRUE } - + # check for enough samples temp3=temp temp3[[var_names[1]]]=rnorm(nrow(y)) @@ -120,50 +120,58 @@ setMethod(f="model_apply", if (n<(p+1)) { dona=TRUE } - + if (dona) { # missing values have probably prevented one or more combinations of levels being present # use some fake data to generate the output table then replace all the values with NA temp[[var_names[1]]]=rnorm(nrow(y)) LM=lm(formula=M$formula,data=temp) - A=car::Anova(LM,type=M$ss_type) + if (M$ss_type=='I') { + A=anova(LM) + } else { + A=car::Anova(LM,type=M$ss_type) + } A[!is.na(A)]=NA return(A) } - + LM=lm(formula=M$formula,data=temp) - A=car::Anova(LM,type=M$ss_type) + if (M$ss_type=='I') { + A=anova(LM) + } else { + A=car::Anova(LM,type=M$ss_type) + } return(A) }) - + f_statistic=sapply(output,function(x){ x$`F value` }) f_statistic=as.data.frame(t(f_statistic)) colnames(f_statistic)=rownames(output[[1]]) - f_statistic=f_statistic[,2:(ncol(f_statistic)-1),drop=FALSE] - - + f_statistic=f_statistic[,colnames(y),drop=FALSE] + + p_value=sapply(output,function(x){ x$`Pr(>F)` }) p_value=as.data.frame(t(p_value)) colnames(p_value)=rownames(output[[1]]) - p_value=p_value[,2:(ncol(p_value)-1),drop=FALSE] - + p_value=p_value[,colnames(y),drop=FALSE] + # fdr correct the p.values for (k in 1:ncol(p_value)) { p_value[, k]=p.adjust(p_value[ ,k],M$mtc) } - + # populate the object M$f_statistic=f_statistic M$p_value=p_value M$significant=as.data.frame(p_value