From 0b3489e3a05b50b8d55ed3a9741d0ebfb8616b64 Mon Sep 17 00:00:00 2001 From: Yanwen Wang Date: Mon, 11 Nov 2024 23:07:59 +0800 Subject: [PATCH] Update tests for identifiability warning messages --- test/runtests.jl | 68 +++++++++++++++++++++++++----------------------- 1 file changed, 35 insertions(+), 33 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index f4bd2f8..fa59624 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -9,71 +9,73 @@ using Statistics @testset "LatentClassAnalysis.jl" begin Random.seed!(123) # For reproducibility - + @testset "Data Preparation" begin # Create test data with sufficient variables df = DataFrame( - x1 = repeat([1, 2], 50), # binary 1/2 - x2 = repeat([0, 1], 50), # binary 0/1 - x3 = categorical(repeat(["A", "B", "C"], 34)[1:100]), # 3 categories - x4 = repeat([1, 2], 50), # binary 1/2 - x5 = repeat([1, 2], 50) # binary 1/2 + x1=repeat([1, 2], 50), # binary 1/2 + x2=repeat([0, 1], 50), # binary 0/1 + x3=categorical(repeat(["A", "B", "C"], 34)[1:100]), # 3 categories + x4=repeat([1, 2], 50), # binary 1/2 + x5=repeat([1, 2], 50) # binary 1/2 ) - + # Test basic data preparation data, n_cats = prepare_data(df, :x1, :x2, :x3, :x4, :x5) @test size(data) == (100, 5) @test n_cats == [2, 2, 3, 2, 2] @test all(x -> x ≥ 1, data) # All values should be 1-based - + # Test with single column data_single, n_cats_single = prepare_data(df, :x1) @test size(data_single) == (100, 1) @test n_cats_single == [2] end - + @testset "Model Initialization" begin # Valid model (5 binary items, 2 classes) n_items = 5 n_classes = 2 n_categories = fill(2, n_items) model = LCAModel(n_classes, n_items, n_categories) - + # Test model structure @test model.n_classes == 2 @test model.n_items == 5 @test length(model.class_probs) == 2 @test length(model.item_probs) == 5 @test all(isapprox(sum(model.class_probs), 1.0)) - + # Test invalid inputs @test_throws ArgumentError LCAModel(1, n_items, n_categories) # < 2 classes @test_throws ArgumentError LCAModel(2, n_items, [1, 2, 2, 2, 2]) # < 2 categories - + # Test identifiability conditions - @test_throws ArgumentError LCAModel(3, 2, [2, 2]) # insufficient items for 3 classes + @test_logs (:warn, "Model may not be identifiable. With 3 classes and minimum of 2 categories, need ideally 5 items (got 2).") begin + LCAModel(3, 2, [2, 2]) + end end - + @testset "Model Fitting" begin # Generate synthetic data with known structure n_samples = 100 n_items = 5 # Sufficient for 2 classes with binary items - + # Create data matrix data = zeros(Int, n_samples, n_items) true_classes = rand(1:2, n_samples) - + for i in 1:n_samples for j in 1:n_items p = true_classes[i] == 1 ? 0.8 : 0.2 data[i, j] = rand() < p ? 1 : 2 end end - + # Fit model model = LCAModel(2, n_items, fill(2, n_items)) ll = fit!(model, data) - + # Test results @test !isnan(ll) @test !isinf(ll) @@ -84,7 +86,7 @@ using Statistics @test all(isapprox.(sum(item_prob, dims=2), 1.0, atol=1e-10)) end end - + @testset "Model Diagnostics" begin # Prepare data n_items = 5 # Sufficient for 2 classes @@ -92,14 +94,14 @@ using Statistics [Symbol("x$i") => repeat([1, 2], 50) for i in 1:n_items]... ) data, n_cats = prepare_data(df, [Symbol("x$i") for i in 1:n_items]...) - + # Fit model model = LCAModel(2, n_items, n_cats) ll = fit!(model, data) - + # Calculate diagnostics diag = diagnostics!(model, data, ll) - + # Test diagnostic values @test !isnan(diag.ll) @test !isnan(diag.aic) @@ -107,7 +109,7 @@ using Statistics @test !isnan(diag.sbic) @test diag.bic > diag.aic # BIC should be more conservative end - + @testset "Prediction" begin # Prepare data n_items = 5 # Sufficient for 2 classes @@ -116,25 +118,25 @@ using Statistics for i in 1:n_samples data[i, :] .= rand(1:2, n_items) end - + # Fit model and predict model = LCAModel(2, n_items, fill(2, n_items)) fit!(model, data) assignments, probs = predict(model, data) - + # Test predictions @test length(assignments) == n_samples @test size(probs) == (n_samples, 2) @test all(1 .<= assignments .<= 2) @test all(0 .<= probs .<= 1) @test all(isapprox.(sum(probs, dims=2), 1.0, atol=1e-10)) - + # Test consistency for i in 1:n_samples @test assignments[i] == argmax(probs[i, :]) end end - + @testset "Show Profiles" begin # Prepare data with sufficient items n_items = 5 @@ -142,21 +144,21 @@ using Statistics [Symbol("x$i") => repeat([1, 2], 50) for i in 1:n_items]... ) data, n_cats = prepare_data(df, [Symbol("x$i") for i in 1:n_items]...) - + # Fit model model = LCAModel(2, n_items, n_cats) fit!(model, data) - + # Test basic display @test_nowarn show_profiles(model, df, [Symbol("x$i") for i in 1:n_items]) - + # Test with custom names @test_nowarn show_profiles(model, df, [Symbol("x$i") for i in 1:n_items], - var_names=["Var$i" for i in 1:n_items]) - + var_names=["Var$i" for i in 1:n_items]) + # Test with custom labels custom_labels = [["No", "Yes"] for _ in 1:n_items] @test_nowarn show_profiles(model, df, [Symbol("x$i") for i in 1:n_items], - var_labels=custom_labels) + var_labels=custom_labels) end end \ No newline at end of file