Dev (#79)

* update dependencies * Reorganize repo into folders * Parameterize Model by module * Update for JuliaStaging/GeneralizedGenerated.jl#28 * Add package upper bounds * new `markovBlanket` combinator * Improved symbolic simplification and codegen
cscherrer · Dec 10, 2019 · e2a35d7 · e2a35d7
1 parent 9ee18be
commit e2a35d7
Show file tree

Hide file tree

Showing 17 changed files with 424 additions and 280 deletions.
diff --git a/Project.toml b/Project.toml
@@ -5,18 +5,15 @@ version = "0.8.0"
 
 [deps]
 AdvancedHMC = "0bf59076-c3b1-5ca4-86bd-e02cd72cde3d"
-Bijectors = "76274a88-744f-5084-9051-94815aaf08c4"
 DiffResults = "163ba53b-c6d8-5494-b064-1a9d43ac40c5"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 DynamicHMC = "bbc10e6e-7c05-544b-b16e-64fede858acb"
 FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 GeneralizedGenerated = "6b9d7cbe-bcb9-11e9-073f-15a7a543e2eb"
 Graphs = "86223c79-3864-5bf0-83f7-82e725a168b6"
-IRTools = "7869d1d1-7146-5819-86e3-90919afe41df"
 IterTools = "c8e1da08-722c-5040-9ed9-7db0dc04731e"
 LazyArrays = "5078a376-72f3-5289-bfd5-ec5146d43c02"
-LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 LogDensityProblems = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c"
 MLStyle = "d8e11817-5142-5d16-987a-aa16d5891078"
 MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
@@ -34,22 +31,18 @@ SimplePartitions = "ec83eff0-a5b5-5643-ae32-5cbf6eedec9d"
 SimplePosets = "b2aef97b-4721-5af9-b440-0bad754dc5ba"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c"
-Stheno = "8188c328-b5d6-583d-959b-9690869a5511"
 SymPy = "24249f21-da20-56a4-8eb1-6a02cf4ae2e6"
 TransformVariables = "84d833dd-6860-57f9-a1a7-6da5db126cff"
-Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 
 [compat]
 AdvancedHMC = "0.2"
-Bijectors = "0.4"
 DiffResults = "0.0.4"
 Distributions = "0.21"
 DynamicHMC = "2.1"
 FillArrays = "0.8"
 ForwardDiff = "0.10"
 GeneralizedGenerated = "0.2"
 Graphs = "0.10"
-IRTools = "0.3"
 IterTools = "1.3"
 LazyArrays = "0.14"
 LogDensityProblems = "0.9"
@@ -66,10 +59,8 @@ SimpleGraphs = "0.3"
 SimplePartitions = "0.2"
 SimplePosets = "0.0"
 StatsFuns = "0.9"
-Stheno = "0.3, 0.4, 0.5"
 SymPy = "1.0"
 TransformVariables = "0.3"
-Zygote = "0.4"
 julia = "^1.3"
 
 [extras]

diff --git a/docs/src/probprog2020/probprog2020.jmd b/docs/src/probprog2020/probprog2020.jmd
@@ -0,0 +1,90 @@
+
+```julia
+using Revise, Soss
+import SymPy
+```
+
+
+A `Model` is a 
+
+```julia
+m = @model x begin
+    α ~ Normal()
+    β ~ Normal()
+    σ ~ HalfNormal()
+    yhat = α .+ β .* x
+    n = length(x)
+    y ~ For(n) do j
+        Normal(yhat[j], σ)
+    end
+end;
+```
+
+
+## Symbolic Simplification
+
+A common bottleneck in probabilistic programming comes from evaluation of the log-posterior density, often required in an inner loop. Because of this, optimizing this computation can be a significant benefit to overall computational cost.
+
+## Code Generation
+
+Given values for model arguments and all stochastic variables, ...
+
+- 
+
+```julia
+Soss.sourceSymlogpdf()(m)
+```
+
+
+## Model Transformations
+
+### Markov Blankets
+
+In a DAG-based model, a _Markov blanket_ at a node $v$ is given by the following "moralizing" procedure:
+
+*TODO reference*
+
+1. Connect $v$ to parents of its children
+2. Replace directed edges with undirected edges
+3. Return the nodes connected to $v$ (including $v$)
+
+Markov blankets are commonly used in Gibbs sampling, where a given node can be updated by sampling from the posterior distribution for that node. This is complicated by the presence of deterministic nodes, which can introduce constraints that disallow any update. we address this problem be extending the blanket to ancestors, stopping only when a stochastic node is reached. For example, we have
+
+```julia
+markovBlanket(m,:α)
+```
+
+Note that the arguments of this model consist of the parents of a `:α`, together with its "stochastic childrens'" parents. Equivalently (and more simply), we must include any referenced but undefined variable as an argument.
+
+For deterministic 
+
+```julia
+markovBlanket(m, :yhat)
+```
+
+# Internals
+
+When designing a PPL, it's important to consider the form of the intermediate representation. 
+
+In Soss, a `Model` is a `struct` of the form
+
+```julia
+struct Model{A,B,M} 
+    args  :: Vector{Symbol}
+    vals  :: NamedTuple
+    dists :: NamedTuple
+    retn  :: Union{Nothing, Symbol, Expr}
+end
+```
+
+
+
+
+
+```julia
+symlogpdf(m)
+```
+
+```julia
+symlogpdf(m) |> expandSums |> foldConstants
+```
diff --git a/examples/2019-11-07-demo.jmd b/examples/2019-11-07-demo.jmd
@@ -166,7 +166,7 @@ scatter!(truth.x,truth.y, legend=false, c=1)
 ```
 
 ```julia
-pred2 = predictive(m2, setdiff(stochastic(m2), [:y])...)
+pred2 = predictive(m2, setdiff(sampled(m2), [:y])...)
 ```
 
 ```julia

diff --git a/src/Soss.jl b/src/Soss.jl
@@ -11,7 +11,6 @@ import MacroTools: prewalk, postwalk, @q, striplines, replace, flatten, @capture
 import MLStyle
 @reexport using MonteCarloMeasurements
 
-using LinearAlgebra
 using LazyArrays
 using FillArrays
 

diff --git a/src/core/model.jl b/src/core/model.jl
@@ -13,11 +13,11 @@ struct Model{A,B,M}
     retn  :: Union{Nothing, Symbol, Expr}
 end
 
-argstype(::Model{A,B}) where {A,B} = A
-bodytype(::Model{A,B}) where {A,B} = B
+argstype(::Model{A,B,M}) where {A,B,M} = A
+bodytype(::Model{A,B,M}) where {A,B,M} = B
 
-argstype(::Type{Model{A,B}}) where {A,B} = A
-bodytype(::Type{Model{A,B}}) where {A,B} = B
+argstype(::Type{Model{A,B,M}}) where {A,B,M} = A
+bodytype(::Type{Model{A,B,M}}) where {A,B,M} = B
 
 getmodule(::Type{Model{A,B,M}}) where {A,B,M} = from_type(M)
 getmodule(::Model{A,B,M}) where {A,B,M} = from_type(M)

diff --git a/src/core/utils.jl b/src/core/utils.jl
@@ -17,8 +17,23 @@ argtuple(m) = arguments(m) |> astuple
 astuple(x) = Expr(:tuple,x...)
 astuple(x::Symbol) = Expr(:tuple,x)
 
+
+
+export arguments
+arguments(m::Model) = m.args
+arguments(d::JointDistribution) = d.args
+
+export sampled
+sampled(m::Model) = keys(m.dists) |> collect
+
+export assigned
+assigned(m::Model) = keys(m.vals) |> collect
+
+export parameters
+parameters(m::Model) = union(assigned(m), sampled(m))
+
 export variables
-variables(m::Model) = m.args ∪ keys(m.vals) ∪ keys(m.dists)
+variables(m::Model) = union(arguments(m), parameters(m))
 
 function variables(expr :: Expr) 
     leaf(x::Symbol) = begin
@@ -34,29 +49,13 @@ end
 variables(s::Symbol) = [s]
 variables(x) = []
 
-
-export arguments
-arguments(m) = m.args
-
-export stochastic
-stochastic(m::Model) = keys(m.dists)
-
-export bound
-bound(m::Model) = keys(m.vals)
-
-export bodyVariables
-bodyVariables(m::Model) = setdiff(variables(m), arguments(m))
-
-# TODO: Fix these broken methods
-# export observed
-# observed(m::Model) = keys(m.data)
-
-# export parameters
-# parameters(m::Model) = setdiff(
-#     stochastic(m), 
-#     observed(m)
-# )
-
+for f in [:arguments, :assigned, :sampled, :parameters, :variables]
+    @eval function $f(m::Model, nt::NamedTuple) 
+        vs = $f(m)
+        isempty(vs) && return NamedTuple()
+        return select(nt, $f(m))
+    end
+end
 
 
 export foldall

diff --git a/src/examples/repeated-measurements.jl b/src/examples/repeated-measurements.jl
@@ -2,8 +2,8 @@ using Revise
 using Soss
 
 m = @model begin
-    n = 10 # number of samples
-    m = 10 # reviewers for each example
+    n = 100 # number of samples
+    m = 100 # reviewers for each example
 
     p_bad ~ Beta(1,3) |> iid(n)
 
@@ -20,16 +20,21 @@ using Random
 truth = rand(m());
 symlogpdf(m())
 
-@time logpdf(m(),truth, codegen)
-@time logpdf(m(),truth, codegen)
+using BenchmarkTools
+@btime logpdf(m(),truth)
+@btime logpdf(m(),truth, codegen)
 
+f1 = Soss._codegen(m, true);
+f2 = Soss._codegen(m,false);
 
+@btime f1((),truth)
+@btime f2((),truth)
 
 codegen(m(),truth)
 
 
 
-logpdf(m(), merge(truth, (p_bad=shuffle(truth.p_bad),)))
+logpdf(m(), merge(truth, (p_bad=shuffle(truth.p_bad),)), codegen)
 
 
 @time result = dynamicHMC(m(), (y=truth.y,), codegen) ;

diff --git a/src/examples/strata.jl b/src/examples/strata.jl
@@ -109,7 +109,7 @@ scatter!(truth.x,truth.y, legend=false, c=1)
 plot!(xx, truth.α .+ truth.β .* xx, legend=false, lw=3, c=2)
 savefig("fit2.png")
 
-pred2 = predictive(m2, setdiff(stochastic(m2), [:y])...)
+pred2 = predictive(m2, setdiff(sampled(m2), [:y])...)
 
 post2pred= [rand(pred2(θ)((x=x,))) for θ ∈ post2]  |> particles
 

diff --git a/src/fromcube.jl b/src/fromcube.jl
@@ -44,7 +44,7 @@ function sourceFromcube(m::Model)
     argsExpr = Expr(:tuple,freeVariables(m)...)
 
     stochExpr = begin
-        vals = map(stochastic(m)) do x Expr(:(=), x,x) end
+        vals = map(sampled(m)) do x Expr(:(=), x,x) end
         Expr(:tuple, vals...)
     end
 

diff --git a/src/importance.jl b/src/importance.jl
@@ -47,7 +47,7 @@ function sourceImportanceSample()
         kwargsExpr = Expr(:tuple,kwargs...)
 
         stochExpr = begin
-            vals = map(stochastic(m)) do x Expr(:(=), x,x) end
+            vals = map(sampled(m)) do x Expr(:(=), x,x) end
             Expr(:tuple, vals...)
         end