From 66e6780b5beed1a38b32f83c946404986be1087c Mon Sep 17 00:00:00 2001
From: Gavin Rhys Lloyd <grlloyd@users.noreply.github.com>
Date: Thu, 27 Jun 2024 14:57:34 +0100
Subject: [PATCH] anova update

- fix Type I SS
- prevent dropping of columns
- fix column selection for output tables
---
 R/anova_class.R             | 50 +++++++++++++++++++++----------------
 tests/testthat/test-anova.R | 29 +++++++++++++++++++--
 2 files changed, 56 insertions(+), 23 deletions(-)

diff --git a/R/anova_class.R b/R/anova_class.R
index 5c0c5a3..595c820 100644
--- a/R/anova_class.R
+++ b/R/anova_class.R
@@ -75,31 +75,31 @@ setMethod(f="model_apply",
     {
         X=D$data
         var_names=all.vars(M$formula)
-
+        
         # attempt to detect within factors
         within=which(var_names %in% all.names(M$formula)[which('Error'== all.names(M$formula))+2])
-
+        
         if (length(within)>0) {
             var_names_ex=var_names[-within]
         } else {
             var_names_ex=var_names
         }
-
+        
         y=D$sample_meta[var_names[2:length(var_names)]]
-
+        
         # set the contrasts
         O=options('contrasts') # keep the old ones
         options(contrasts = c("contr.sum","contr.poly"))
-
+        
         output=apply(X,2,function(x) {
             temp=y
             temp[[var_names[1]]]=x
-
+            
             # check number levels
             temp2=na.omit(temp)
             s=sapply(var_names_ex[2:length(var_names)],function(x) summary(temp2[[x]]))
             n=nrow(temp2)
-
+            
             # check for alias columns
             dona=FALSE
             if (all(unlist(s)>2)) { # check we have enough levels
@@ -111,7 +111,7 @@ setMethod(f="model_apply",
             } else {
                 dona=TRUE
             }
-
+            
             # check for enough samples
             temp3=temp
             temp3[[var_names[1]]]=rnorm(nrow(y))
@@ -120,50 +120,58 @@ setMethod(f="model_apply",
             if (n<(p+1)) {
                 dona=TRUE
             }
-
+            
             if (dona) {
                 # missing values have probably prevented one or more combinations of levels being present
                 # use some fake data to generate the output table then replace all the values with NA
                 temp[[var_names[1]]]=rnorm(nrow(y))
                 LM=lm(formula=M$formula,data=temp)
-                A=car::Anova(LM,type=M$ss_type)
+                if (M$ss_type=='I') {
+                    A=anova(LM)  
+                } else {
+                    A=car::Anova(LM,type=M$ss_type)
+                }
                 A[!is.na(A)]=NA
                 return(A)
             }
-
+            
             LM=lm(formula=M$formula,data=temp)
-            A=car::Anova(LM,type=M$ss_type)
+            if (M$ss_type=='I') {
+                A=anova(LM)  
+            } else {
+                A=car::Anova(LM,type=M$ss_type)
+            }
             return(A)
         })
-
+        
         f_statistic=sapply(output,function(x){
             x$`F value`
         })
         f_statistic=as.data.frame(t(f_statistic))
         colnames(f_statistic)=rownames(output[[1]])
-        f_statistic=f_statistic[,2:(ncol(f_statistic)-1),drop=FALSE]
-
-
+        f_statistic=f_statistic[,colnames(y),drop=FALSE]
+        
+        
         p_value=sapply(output,function(x){
             x$`Pr(>F)`
         })
         p_value=as.data.frame(t(p_value))
         colnames(p_value)=rownames(output[[1]])
-        p_value=p_value[,2:(ncol(p_value)-1),drop=FALSE]
-
+        p_value=p_value[,colnames(y),drop=FALSE]
+        
         # fdr correct the p.values
         for (k in 1:ncol(p_value)) {
             p_value[, k]=p.adjust(p_value[ ,k],M$mtc)
         }
-
+        
         # populate the object
         M$f_statistic=f_statistic
         M$p_value=p_value
         M$significant=as.data.frame(p_value<M$alpha)
-
+        
         # reset the contrasts
         options(O)
-
+        
         return(M)
     }
 )
diff --git a/tests/testthat/test-anova.R b/tests/testthat/test-anova.R
index 3a6a742..51c6956 100644
--- a/tests/testthat/test-anova.R
+++ b/tests/testthat/test-anova.R
@@ -1,15 +1,40 @@
 # testanova class
-test_that('anova 1way',{
+test_that('anova 1way TYPE 1',{
   # DatasetExperiment
   D=iris_DatasetExperiment()
   # method
-  ME=ANOVA(formula=y~Species)
+  ME=ANOVA(formula=y~Species,ss_type = 'I')
   ME=model_apply(ME,D)
   # expect all true
   expect_true(all(ME$significant[,1]))
 
 })
 
+# testanova class
+test_that('anova 1way type 2',{
+    # DatasetExperiment
+    D=iris_DatasetExperiment()
+    # method
+    ME=ANOVA(formula=y~Species,ss_type='II')
+    ME=model_apply(ME,D)
+    # expect all true
+    expect_true(all(ME$significant[,1]))
+    
+})
+
+# testanova class
+test_that('anova 1way type 3',{
+    # DatasetExperiment
+    D=iris_DatasetExperiment()
+    # method
+    ME=ANOVA(formula=y~Species,ss_type='III')
+    ME=model_apply(ME,D)
+    # expect all true
+    expect_true(all(ME$significant[,1]))
+    
+})
+
+
 test_that('anova 2way',{
   set.seed('57475')
   # DatasetExperiment