Reformat code; use Chairmarks for benchmarking.

dmbates · dmbates · commit 3654bc12b1fe · 2024-04-25T10:51:41.000-05:00
diff --git a/LDT_accuracy.qmd b/LDT_accuracy.qmd
@@ -27,19 +27,23 @@ and define some constants
 ```{julia}
 #| code-fold: true
 #| output: false
-@isdefined(contrasts) || const contrasts = Dict{Symbol, Any}()
+@isdefined(contrasts) || const contrasts = Dict{Symbol,Any}()
 @isdefined(progress) || const progress = false
 ```
 
 ## Create the dataset
 
-
 ```{julia}
 #| output: false
 trials = innerjoin(
-    DataFrame(dataset(:ELP_ldt_trial)),
-    select(DataFrame(dataset(:ELP_ldt_item)), :item, :isword, :wrdlen),
-    on = :item
+  DataFrame(dataset(:ELP_ldt_trial)),
+  select(
+    DataFrame(dataset(:ELP_ldt_item)),
+    :item,
+    :isword,
+    :wrdlen,
+  ),
+  on=:item,
 )
 ```
 
@@ -54,20 +58,28 @@ This takes about ten to fifteen minutes on a recent laptop
 ```{julia}
 contrasts[:isword] = EffectsCoding()
 contrasts[:wrdlen] = Center(8)
-@time gm1 = let f = @formula(acc ~ 1 + isword * wrdlen + (1|item) + (1|subj))
-    fit(MixedModel, f, trials, Bernoulli(); contrasts, progress, init_from_lmm=(:β, :θ))
-end
+@time gm1 =
+  let f =
+      @formula(acc ~ 1 + isword * wrdlen + (1 | item) + (1 | subj))
+    fit(
+      MixedModel,
+      f,
+      trials,
+      Bernoulli();
+      contrasts,
+      progress,
+      init_from_lmm=(:β, :θ),
+    )
+  end
 ```
 
-
 ```{julia}
 print(gm1)
 ```
 
-
 ```{julia}
 #| fig-cap: Conditional modes and 95% prediction intervals on random effects for subject in model gm1
 #| label: fig-gm1condmodesubj
 #| code-fold: true
-qqcaterpillar!(Figure(; size=(800,800)), gm1, :subj)
-```
+qqcaterpillar!(Figure(; size=(800, 800)), gm1, :subj)
+```
diff --git a/Project.toml b/Project.toml
@@ -6,11 +6,11 @@ version = "0.1.0"
 [deps]
 AlgebraOfGraphics = "cbdf2221-f076-402e-a563-3d30da359d67"
 Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45"
-BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
 CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
 CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
 CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
 Chain = "8be319e6-bccf-4806-a6f7-6fae938471bc"
+Chairmarks = "0ca39b1e-fe0b-4e98-acfc-b1656634c4de"
 DataAPI = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a"
 DataFrameMacros = "75880514-38bc-4a95-a458-c2aea5a3a702"
 DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
diff --git a/aGHQ.qmd b/aGHQ.qmd
@@ -136,8 +136,8 @@ Load the packages to be used
 #| output: false
 #| label: packagesA03
 using AlgebraOfGraphics
-using BenchmarkTools
 using CairoMakie
+using Chairmarks    # a more modern BenchmarkTools
 using DataFrames
 using EmbraceUncertainty: dataset
 using FreqTables
@@ -404,8 +404,7 @@ Each evaluation of the deviance is fast, requiring only a fraction of a millisec
 
 ```{julia}
 βopt = copy(com05fe.β)
-@benchmark deviance(setβ!(m, β)) seconds = 1 setup =
-  (m = com05fe; β = βopt)
+@b deviance(setβ!($com05fe, $βopt))
 ```
 
 but the already large number of evaluations for these six coefficients would not scale well as this dimension increases.
@@ -637,7 +636,7 @@ The IRLS algorithm has converged in 4 iterations to essentially the same devianc
 Each iteration of the IRLS algorithm takes more time than a deviance evaluation, but still only a fraction of a millisecond on a laptop computer.
 
 ```{julia}
-@benchmark deviance(updateβ!(m)) seconds = 1 setup = (m = com05fe)
+@b deviance(updateβ!($com05fe))
 ```
 
 ## GLMMs and the PIRLS algorithm {#sec-PIRLS}
@@ -868,7 +867,7 @@ pirls!(m; verbose=true);
 As with IRLS, PIRLS is a fast and stable algorithm for determining the mode of the conditional distribution $(\mcU|\mcY=\bby)$ with $\bbtheta$ and $\bbbeta$ held fixed.
 
 ```{julia}
-@benchmark pirls!(mm) seconds = 1 setup = (mm = m)
+@b pirls!($m)
 ```
 
 The time taken for the four iterations to determine the conditional mode of $\bbu$ is comparable to the time taken for a single call to `updateβ!`.
@@ -1131,23 +1130,25 @@ The weights and positions for the 9th order rule are shown in @fig-ghnine.
 #| label: fig-ghnine
 df9 = DataFrame(gausshermitenorm(9))
 ggplot(df9, aes(; x=:abscissae, y=:weights)) +
-geom_point() + labs(; x="Positions", y="Weights")
+geom_point() +
+labs(; x="Positions", y="Weights")
 # draw(
 #   data(gausshermitenorm(9)) *
 #   mapping(:abscissae => "Positions", :weights);
 #   figure=(; size=(600,450)),
 # )
 ```
 
-Notice that the magnitudes of the weights drop quite dramatically away from zero, even on a logarithmic scale (@fig-ghninelog)
+Notice that the magnitudes of the weights drop quite dramatically as the position moves away from zero, even on a logarithmic scale (@fig-ghninelog)
 
 ```{julia}
 #| code-fold: true
 #| fig-cap: Weights (logarithm base 2) and positions for the 9th order normalized Gauss-Hermite quadrature rule
 #| label: fig-ghninelog
 ggplot(df9, aes(; x=:abscissae, y=:weights)) +
-geom_point() + labs(; x="Positions", y="Weights") +
-scale_y_log2()
+geom_point() +
+labs(; x="Positions", y="Weights") +
+scale_y_log10()
 # draw(
 #   data(gausshermitenorm(9)) * mapping(
 #     :abscissae => "Positions",
diff --git a/glmmbernoulli.qmd b/glmmbernoulli.qmd
@@ -149,11 +149,12 @@ contrasts[:urban] = HelmertCoding()
 com01 =
   let d = contra,
     ds = Bernoulli(),
-    f = @formula(use ~
-      1 + livch + (age + abs2(age)) * urban + (1 | dist))
+    f = @formula(
+      use ~ 1 + livch + (age + abs2(age)) * urban + (1 | dist)
+    )
 
-  fit(MixedModel, f, d, ds; contrasts, nAGQ, progress)
-end
+    fit(MixedModel, f, d, ds; contrasts, nAGQ, progress)
+  end
 ```
 
 Notice that in the formula language defined by the [StatsModels](https://github.com/JuliaStats/StatsModels.jl) package, an interaction term is written with the `&` operator.
@@ -284,8 +285,9 @@ A series of such model fits led to a model with random effects for the combinati
 
 ```{julia}
 com05 =
-  let f = @formula(use ~
-      1 + urban + ch * age + abs2(age) + (1 | dist & urban)),
+  let f = @formula(
+      use ~ 1 + urban + ch * age + abs2(age) + (1 | dist & urban)
+    ),
     d = contra,
     ds = Bernoulli()
 
diff --git a/intro.qmd b/intro.qmd
@@ -635,10 +635,10 @@ You can check the details by clicking on the "Code" button in the HTML version o
 #| warning: false
 ggplot(
   filter(==("β") ∘ getproperty(:type), dsm01pars),
-  aes(x = :value),
-) + 
+  aes(; x=:value),
+) +
 geom_density() +
-labs(x = "Bootstrap samples of β₁")
+labs(; x="Bootstrap samples of β₁")
 ```
 
 The distribution of the estimates of `β₁` is more-or-less a Gaussian (or "normal") shape, with a mean value of `{julia} repr(mean(βdf.value), context=:compact=>true)`  which is close to the estimated `β₁` of `{julia} repr(only(dsm01.β), context=:compact=>true)`.
diff --git a/largescaledesigned.qmd b/largescaledesigned.qmd
@@ -156,7 +156,9 @@ A bar plot of the word length counts, @fig-ldtwrdlenhist, shows that the majorit
 #| code-fold: true
 #| fig-cap: "Bar plot of word lengths in the items used in the lexical decision task."
 #| label: fig-ldtwrdlenhist
-ggplot(ldttrial, aes(; x=:wrdlen)) + geom_bar() + labs(; x="Word length")
+ggplot(ldttrial, aes(; x=:wrdlen)) +
+geom_bar() +
+labs(; x="Word length")
 # #| warning: false
 # let wlen = 1:21
 #   draw(
@@ -281,7 +283,8 @@ A plot of the median response time versus proportion accurate, @fig-ldtmedianrtv
 #| fig-cap: "Median response time versus proportion accurate by subject in the LDT."
 #| label: fig-ldtmedianrtvspropacc
 ggplot(bysubj, aes(; x=:spropacc, y=:smedianrt)) +
-geom_point() + geom_smooth(; method="smooth") +
+geom_point() +
+geom_smooth(; method="smooth") +
 labs(; x="Proportion accurate", y="Median response time (ms)")
 ```
 
@@ -338,8 +341,9 @@ A density plot of the pruned response times, @fig-elpldtrtdens, shows they are s
 #| code-fold: true
 #| fig-cap: Kernel density plot of the pruned response times (ms.) in the LDT.
 #| label: fig-elpldtrtdens
-ggplot(pruned, aes(; x=:rt)) + geom_density() +
-labs(; x = "Response time (ms.) for correct responses")
+ggplot(pruned, aes(; x=:rt)) +
+geom_density() +
+labs(; x="Response time (ms.) for correct responses")
 # draw(
 #   data(pruned) *
 #   mapping(:rt => "Response time (ms.) for correct responses") *
@@ -540,8 +544,7 @@ condmeans = leftjoin!(
   on=:item,
 )
 draw(
-  data(condmeans) *
-   mapping(
+  data(condmeans) * mapping(
     :elm01 => "Conditional means of item random effects for model elm01",
     :elm02 => "Conditional means of item random effects for model elm02";
     color=:isword,
diff --git a/multiple.qmd b/multiple.qmd
@@ -113,23 +113,30 @@ of the data, then plot it
 #| fig-cap: "Diameter of inhibition zone by plate and sample. Plates are ordered by increasing mean response."
 #| label: fig-penicillindot
 let sumry = sort!(
-    combine(groupby(penicillin, :plate), :diameter => mean => :meandia),
+    combine(
+      groupby(penicillin, :plate),
+      :diameter => mean => :meandia,
+    ),
     :meandia,
   ),
   df = sort(
-    transform(penicillin, :plate => ByRow(sorter(sumry.plate)); renamecols=false),
-    :plate
+    transform(
+      penicillin,
+      :plate => ByRow(sorter(sumry.plate));
+      renamecols=false,
+    ),
+    :plate,
   )
+
   mp = mapping(
     :diameter => "Diameter of inhibition zone [mm]",
     :plate => "Plate",
     color=:sample,
   )
 
   draw(
-    data(df) * mp * 
-    visual(ScatterLines; marker='○', markersize=12);
-    figure=(; size=(600, 450))
+    data(df) * mp * visual(ScatterLines; marker='○', markersize=12);
+    figure=(; size=(600, 450)),
   )
 end
 ```
@@ -249,7 +256,8 @@ As for model `dsm01` the bootstrap parameter estimates of the fixed-effects para
 #| label: fig-pnm01bsbeta
 #| warning: false
 ggplot(DataFrame(pnm01samp.tbl), aes(x=:β1)) +
-geom_density() + labs(x="Bootstrap samples of β₁")
+geom_density() +
+labs(; x="Bootstrap samples of β₁")
 ```
 
 and the shortest coverage interval on this parameter is close to the Wald interval
@@ -557,7 +565,9 @@ Although the response, `y`, is on a scale of 1 to 5,
 #| fig-cap: "Histogram of instructor ratings in the *insteval* data"
 #| label: fig-instevalhist
 #| warning: false
-ggplot(DataFrame(insteval), aes(x=:y)) + geom_bar() + labs(x="Rating")
+ggplot(DataFrame(insteval), aes(x=:y)) +
+geom_bar() +
+labs(; x="Rating")
 ```
 
 it is sufficiently diffuse to warrant treating it as if it were a continuous response.