Merge pull request #57 from yufongpeng/release-0.7.1

Bug fix for FullModel/export user fn only
yufongpeng · Jan 18, 2023 · 5e107d5 · 5e107d5 · yufongpeng · Jan 18, 2023
2 parents 5ac8260 + ad1d79f
commit 5e107d5
Show file tree

Hide file tree

Showing 11 changed files with 134 additions and 90 deletions.
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "AnovaBase"
 uuid = "946dddda-6a23-4b48-8e70-8e60d9b8d680"
 authors = ["Yu-Fong Peng <[email protected]>"]
-version = "0.7.0"
+version = "0.7.1"
 
 [deps]
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
@@ -16,4 +16,4 @@ Distributions = "0.23, 0.24, 0.25"
 Reexport = "0.2, 1"
 StatsBase = "0.33"
 StatsModels = "0.6"
-julia = "1.6, 1.7, 1.8"
+julia = "1.6, 1.7, 1.8"
diff --git a/README.md b/README.md
@@ -23,9 +23,9 @@ Use the following packages for different models:
 |Packages for models|Packages for ANOVA|Models|Fitted by|
 |:-----------------:|:----------------:|:----:|:-------:|
 |[GLM.jl](https://juliastats.org/GLM.jl/stable/)|[AnovaGLM.jl](https://github.com/yufongpeng/AnovaGLM.jl)|`TableRegressionModel{<: LinearModel}`|`GLM.lm` or `fit(LinearModel, ...)`|
-|||`TableRegressionModel{<: GeneralizedLinearModel}`|`GLM.glm` or `fit(GeneralizedLinearModel, ...)`|
+| | |`TableRegressionModel{<: GeneralizedLinearModel}`|`GLM.glm` or `fit(GeneralizedLinearModel, ...)`|
 |[MixedModels.jl](https://juliastats.org/MixedModels.jl/stable/)|[AnovaMixedModels.jl](https://github.com/yufongpeng/AnovaMixedModels.jl)|`LinearMixedModel`|`AnovaMixedModels.lme` or `fit(LinearMixedModel, ...)`|
-|||`GeneralizedLinearMixedModel`|`AnovaGLM.glme` or `fit(GeneralizedLinearMixedModel, ...)`|
+| | |`GeneralizedLinearMixedModel`|`AnovaGLM.glme` or `fit(GeneralizedLinearMixedModel, ...)`|
 |[FixedEffectModels.jl](https://github.com/FixedEffects/FixedEffectModels.jl)|[AnovaFixedEffectModels.jl](https://github.com/yufongpeng/AnovaFixedEffectModels.jl) |`TableRegressionModel{<: FixedEffectModel}`|`AnovaFixedEffectModels.lfe`|
 
 ## TO DO

diff --git a/docs/src/Algorithm_AnovaGLM.md b/docs/src/Algorithm_AnovaGLM.md
@@ -81,14 +81,14 @@ Calculate the the upper factor of Cholesky factorization of $\boldsymbol \Sigma^
 The included facrors are defined as follows:
 ```math
 \begin{aligned}
-    \mathcal{B}_j &= \{k \in [1, l]\, |\, k \text{ is not an interaction term of }p_j \text{ and other terms}\}\\\\
+    \mathcal{B}_j &= \{k \in \mathcal{P}\, |\, k \text{ is not an interaction term of }p_j \text{ and other terms}\}\\\\
     \mathcal{M}_j &= \mathcal{B}_j \cup \{p_j\}
 \end{aligned}
 ```
 Define two vectors of index sets $\mathbf J$ and $\mathbf K$ where 
 ```math
 \begin{aligned}
-    J_j &= \{i \in [1, m]\, |\, id_X(i) \text{ is an interaction term of }p_j \text{ and other terms}\}\\\\
+    J_j &= \{i \in \mathcal{C}\, |\, id_X(i) \text{ is an interaction term of }p_j \text{ and other terms}\}\\\\
     K_j &= J_j \cup I_j
 \end{aligned}
 ```

diff --git a/docs/src/AnovaBase.md b/docs/src/AnovaBase.md
@@ -14,7 +14,7 @@ AnovaBase.nestedmodels(::RegressionModel)
 ## ANOVA
 ```@docs
 AnovaBase.AnovaResult
-AnovaBase.anova(::Type{<: GoodnessOfFit}, ::RegressionModel)
+AnovaBase.anova(::Type{<: GoodnessOfFit}, ::AnovaModel)
 ```
 
 ## Attributes
@@ -40,7 +40,7 @@ AnovaBase.canonicalgoodnessoffit
 ```@docs
 AnovaBase.dof_residual(aov::AnovaResult)
 AnovaBase.predictors(::RegressionModel)
-AnovaBase.anovatable(::AnovaResult{<: FullModel})
+AnovaBase.anovatable(::AnovaResult)
 ```
 
 ## Developer utility
@@ -49,6 +49,8 @@ AnovaBase.ftest_nested
 AnovaBase.lrt_nested
 AnovaBase.dof_asgn
 AnovaBase.prednames
+AnovaBase.has_intercept
+AnovaBase.any_not_aliased_with_1
 AnovaBase.getterms
 AnovaBase.isinteract
 AnovaBase.select_super_interaction

diff --git a/docs/src/AnovaGLM.md b/docs/src/AnovaGLM.md
@@ -2,16 +2,9 @@
 ```@meta
 CurrentModule = AnovaGLM
 ```
-
-## ANOVA
 ```@docs
-AnovaGLM.anova(::Val{:AnovaGLM})
+AnovaGLM.anova(::Type{<: GoodnessOfFit}, ::Vararg{TableRegressionModel{<: Union{LinearModel, GeneralizedLinearModel}}})
 anova_lm
 anova_glm
-```
-
-## Models
-```@docs
-AnovaGLM.nestedmodels(::Val{:AnovaGLM})
-GLM.glm(::FormulaTerm, ::DataFrame, ::Binomial, ::Link, ::Vararg{Any})
+AnovaGLM.nestedmodels(<: TableRegressionModel{<: LinearModel})
 ```
diff --git a/src/AnovaBase.jl b/src/AnovaBase.jl
@@ -14,21 +14,14 @@ export
     anova, nestedmodels, 
 
     # GoodnessOfFit
-    GoodnessOfFit, FTest, LikelihoodRatioTest, LRT, canonicalgoodnessoffit, 
+    GoodnessOfFit, FTest, LikelihoodRatioTest, LRT,
 
     # Attributes
     dof, dof_residual, deviance, nobs, formula,
-    teststat, pval, anova_test, anova_type, 
-
-    # Utils
-    ftest_nested, lrt_nested, dof_asgn, _diff, _diffn,
-    predictors, getterms, isinteract, 
-    select_super_interaction, select_sub_interaction, 
-    select_not_super_interaction, select_not_sub_interaction, 
-    subformula, extract_contrasts, clear_schema, 
+    teststat, pval, anova_test, anova_type,
 
     # IO
-    prednames, testname, anovatable, AnovaTable
+    prednames, anovatable, AnovaTable
 
 # Test 
 """
@@ -55,12 +48,14 @@ struct LikelihoodRatioTest <: GoodnessOfFit end
 @doc doc_lrt
 const LRT = LikelihoodRatioTest
 
+include("term.jl")
+
 """
     abstract type AnovaModel{M, N} end
 
 An abstract type as super type of any models for ANOVA.
 """
-abstract type AnovaModel{M, N} end
+abstract type AnovaModel{M, N} <: StatisticalModel end
 """
     NestedModels{M, N} <: AnovaModel{M, N}
 
@@ -85,7 +80,7 @@ NestedModels{M}(model::T...) where {M, T <: Tuple} = throw(ArgumentError("Some m
 
 A wrapper of full model for conducting ANOVA.
 * `M` is a type of regression model.
-* `N` is the number of models.
+* `N` is the number of predictors.
 
 # Fields
 * `model`: a regression model.
@@ -98,11 +93,6 @@ struct FullModel{M, N} <: AnovaModel{M, N}
     type::Int
 end
 
-has_intercept(f::MatrixTerm) = has_intercept(f.terms)
-has_intercept(f::TupleTerm) = has_intercept(first(f))
-has_intercept(::InterceptTerm{H}) where H = H
-has_intercept(::AbstractTerm) = false
-
 """
     FullModel(model::RegressionModel, type::Int, null::Bool, test_intercept::Bool)
 
@@ -115,7 +105,7 @@ Create a `FullModel` with several model-specific parameters.
 """
 function FullModel(model::RegressionModel, type::Int, null::Bool, test_intercept::Bool)
     err1 = ArgumentError("Invalid set of model specification for ANOVA; not enough variables provided.")
-    #err2 = ArgumentError("Invalid set of model specification for ANOVA; try adding variables without zeros.")
+    #err2 = ArgumentError("Invalid set of model specification for ANOVA; all coefficents are aliased with 1.")
     preds = predictors(model)
     pred_id = collect(eachindex(preds))
     has_intercept(preds) || popfirst!(pred_id)
@@ -125,11 +115,15 @@ function FullModel(model::RegressionModel, type::Int, null::Bool, test_intercept
         null || popfirst!(pred_id)
     elseif type ≡ 2
         # ~ 0 + A + A & B + A & ..., all terms are related to A, ~ A as null
-        # ~ 1 + A..., all terms are related to 1, ~ 1 as null
         null || isempty(select_not_super_interaction(preds, first(pred_id))) && popfirst!(pred_id)
+        # ~ 1 + A..., all terms are aliased with 1, ~ 1 as null
+        null || any_not_aliased_with_1(preds) || filter!(!=(1), pred_id)
     elseif type ≡ 3
+        # ~ 1 + A + B + C... when !null and all aliased with 1, ~ 1 as null
+        null || any_not_aliased_with_1(preds) || filter!(!=(1), pred_id)
         # ~ 1
         null || length(pred_id) ≡ 1 && throw(err1)
+
     else
         throw(ArgumentError("Invalid type of ANOVA"))
     end
@@ -177,7 +171,6 @@ function_arg_error(fn, type::AbstractString) = ErrorException("Arguments are val
 
 include("fit.jl")
 include("attr.jl")
-include("term.jl")
 include("io.jl")
 include("interface.jl")
 

diff --git a/src/attr.jl b/src/attr.jl
@@ -5,8 +5,8 @@
 
 Number of observations.
 """
-nobs(aov::AnovaResult) = nobs(aov.anovamodel.model)
-nobs(aov::AnovaResult{<: NestedModels}) = nobs(first(aov.anovamodel.model))
+nobs(aov::AnovaResult) = round(Int, nobs(aov.anovamodel.model))
+nobs(aov::AnovaResult{<: NestedModels}) = round(Int, nobs(first(aov.anovamodel.model)))
 
 """
     dof(aov::AnovaResult)

diff --git a/src/interface.jl b/src/interface.jl
@@ -14,6 +14,7 @@ end
 
 # implement drop1/add1 in R?
 """
+    anova(Test::Type{<: GoodnessOfFit}, <anovamodel>; <keyword arguments>)
     anova(<models>...; test::Type{<: GoodnessOfFit}, <keyword arguments>)
     anova(Test::Type{<: GoodnessOfFit}, <model>; <keyword arguments>)
     anova(Test::Type{<: GoodnessOfFit}, <models>...; <keyword arguments>)
@@ -22,9 +23,13 @@ Analysis of variance.
 
 Return `AnovaResult{M, Test, N}`. See [`AnovaResult`](@ref) for details.
 
-* `models`: model objects. If mutiple models are provided, they should be nested, fitted with the same data and the last one is the most complex.
+* `anovamodel`: a `AnovaModel`.
+* `models`: `RegressionModel`(s). If mutiple models are provided, they should be nested, fitted with the same data and the last one is the most complex.
 * `Test`: test statistics for goodness of fit. Available tests are [`LikelihoodRatioTest`](@ref) (`LRT`) and [`FTest`](@ref).
 """
+function anova(Test::Type{T}, anovamodel::S; kwargs...) where {T <: GoodnessOfFit, S <: AnovaModel}
+    throw(function_arg_error(anova, "::Type{$T}, ::$S"))
+end
 function anova(Test::Type{T}, model::S; kwargs...) where {T <: GoodnessOfFit, S <: RegressionModel}
     throw(function_arg_error(anova, "::Type{$T}, ::$S"))
 end
@@ -48,12 +53,14 @@ dof_residual(aov::AnovaResult{<: NestedModels}) = dof_residual.(aov.anovamodel.m
 
 """
     predictors(model::RegressionModel)
+    predictors(anovamodel::FullModel)
 
-Return a tuple of `Terms` which are predictors of the model. 
+Return a tuple of `Terms` which are predictors of the model or anovamodel. 
 
 By default, it returns `formula(model).rhs.terms`; if the formula has special structures, this function should be overloaded.
 """
 predictors(model::RegressionModel) = formula(model).rhs.terms
+predictors(anovamodel::FullModel) = getindex.(Ref(predictors(anovamodel.model)), anovamodel.pred_id)
 
 """
     anovatable(aov::AnovaResult{<: FullModel, Test}; rownames = prednames(aov))
@@ -62,7 +69,7 @@ predictors(model::RegressionModel) = formula(model).rhs.terms
     anovatable(aov::AnovaResult{<: NestedModels, LRT, N}; rownames = string.(1:N)) where N
 
 Return a table with coefficients and related statistics of ANOVA.
-When displaying `aov` in repl, `rownames` will be `prednames(aov)` for `FullModel` and `string.(1:N)` for `NestedModels`. 
+When displaying `aov` in repl, `rownames` will be `prednames(aov)` for `FullModel` and `"x" .* string.(1:N)` for `NestedModels`. 
 
 For nested models, there are two default methods for `FTest` and `LRT`; one can also define new methods dispatching on `::NestedModels{M}` where `M` is a model type. 
 
@@ -75,12 +82,12 @@ function anovatable(aov::AnovaResult{T}; rownames = prednames(aov)) where {T <:
     throw(function_arg_error(anovatable, AnovaResult{T}))
 end
 
-function anovatable(::AnovaResult{T, S, N}; rownames = string.(1:N)) where {T <: NestedModels, S, N}
+function anovatable(::AnovaResult{T, S, N}; rownames = "x" .* string.(1:N)) where {T <: NestedModels, S, N}
     throw(function_arg_error(anovatable, AnovaResult{T}))
 end
 
 # default anovatable api for comparing multiple models
-function anovatable(aov::AnovaResult{<: NestedModels, FTest, N}; rownames = string.(1:N)) where N
+function anovatable(aov::AnovaResult{<: NestedModels, FTest, N}; rownames = "x" .* string.(1:N)) where N
     AnovaTable([
                     dof(aov), 
                     [NaN, _diff(dof(aov))...], 
@@ -94,7 +101,7 @@ function anovatable(aov::AnovaResult{<: NestedModels, FTest, N}; rownames = stri
               rownames, 7, 6)
 end 
 
-function anovatable(aov::AnovaResult{<: NestedModels, LRT, N}; rownames = string.(1:N)) where N
+function anovatable(aov::AnovaResult{<: NestedModels, LRT, N}; rownames = "x" .* string.(1:N)) where N
     AnovaTable([
                     dof(aov), 
                     [NaN, _diff(dof(aov))...], 

diff --git a/src/io.jl b/src/io.jl
@@ -12,6 +12,27 @@ testname(::Type{LRT}) = "Likelihood-ratio test"
 #testname(M::AnovaStatsCp) = "Mallow's Cp"
 @deprecate tname testname
 
+"""
+    prednames(aov::AnovaResult)
+    prednames(anovamodel::FullModel) 
+    prednames(anovamodel::NestedModels)
+    prednames(<model>)
+
+Return the name of predictors as a vector of strings.
+When there are multiple models, return value is `nothing`.
+"""
+prednames(aov::AnovaResult) = prednames(aov.anovamodel)
+prednames(anovamodel::FullModel) = collect(prednames.(predictors(anovamodel)))
+function prednames(anovamodel::NestedModels)
+    names = collect(prednames.(anovamodel.model))
+    names[2:end] = [setdiff(b, a) for (a, b) in @views zip(names[1:end - 1], names[2:end])]
+    join.(names, "+")
+end
+prednames(model::RegressionModel) = vectorize(prednames(predictors(model)))
+
+@deprecate coefnames(aov::AnovaResult) prednames(aov::AnovaResult)
+@deprecate coefnames(x, ::Val{:anova}) prednames(x)
+
 function show(io::IO, anovamodel::FullModel)
     println(io, "FullModel for type $(anovamodel.type) test")
     println(io)

diff --git a/src/term.jl b/src/term.jl
@@ -1,24 +1,32 @@
 # Function related to terms
 """
-    prednames(aov::AnovaResult)
-    prednames(anovamodel::FullModel) 
-    prednames(anovamodel::NestedModels)
-    prednames(<model>)
+    has_intercept(<terms>)
 
-Return the name of predictors as a vector of strings.
-When there are multiple models, return value is `nothing`.
+Return true if 'InterceptTerm{true}` is in the terms.
 """
-prednames(aov::AnovaResult) = prednames(aov.anovamodel)
-prednames(anovamodel::FullModel) = collect(prednames.(getindex.(Ref(predictors(anovamodel.model)), anovamodel.pred_id)))
-function prednames(anovamodel::NestedModels)
-    names = collect(prednames.(anovamodel.model))
-    names[2:end] = [setdiff(b, a) for (a, b) in @views zip(names[1:end - 1], names[2:end])]
-    join.(names, "+")
-end
-prednames(model::RegressionModel) = vectorize(prednames(predictors(model)))
+has_intercept(f::FormulaTerm) = has_intercept(f.rhs)
+has_intercept(f::MatrixTerm) = has_intercept(f.terms)
+has_intercept(f::TupleTerm) = has_intercept(first(f))
+has_intercept(::InterceptTerm{H}) where H = H
+has_intercept(::AbstractTerm) = false
+
+"""
+    any_not_aliased_with_1(<terms>)
 
-@deprecate coefnames(aov::AnovaResult) prednames(aov::AnovaResult)
-@deprecate coefnames(x, ::Val{:anova}) prednames(x)
+Return true if there are any terms not aliased with the intercept, e.g. `ContinuousTerm` or `FunctionTerm`.
+
+Terms without schema are considered aliased with the intercept.
+"""
+any_not_aliased_with_1(f::FormulaTerm) = any_not_aliased_with_1(f.rhs)
+any_not_aliased_with_1(f::MatrixTerm) = any_not_aliased_with_1(f.terms)
+any_not_aliased_with_1(f::TupleTerm) = any(any_not_aliased_with_1, f)
+any_not_aliased_with_1(::InterceptTerm) = false
+any_not_aliased_with_1(t::ContinuousTerm) = true
+any_not_aliased_with_1(t::CategoricalTerm) = false
+any_not_aliased_with_1(t::FunctionTerm) = true
+any_not_aliased_with_1(t::InteractionTerm) = any(any_not_aliased_with_1, t.terms)
+any_not_aliased_with_1(t::Term) = false
+any_not_aliased_with_1(t::ConstantTerm) = false
 
 """
     prednames(<term>)
@@ -56,7 +64,7 @@ prednames(t::CategoricalTerm) = string(t.sym)
 prednames(t::FunctionTerm) = string(t.exorig)
 prednames(t::InteractionTerm) = join(prednames.(t.terms), " & ")
 prednames(t::Term) = string(t)
-prednames(t::ConstantTerm{H}) where H = string(t)
+prednames(t::ConstantTerm) = string(t)
 prednames(t) = coefnames(t)
 
 """