Skip to content

Commit

Permalink
reorganise multithreaded code
Browse files Browse the repository at this point in the history
  • Loading branch information
maximerischard committed Oct 4, 2020
1 parent 93292d2 commit 827298b
Show file tree
Hide file tree
Showing 5 changed files with 211 additions and 172 deletions.
40 changes: 13 additions & 27 deletions src/GPE.jl
Original file line number Diff line number Diff line change
Expand Up @@ -215,20 +215,6 @@ function update_mll!(gp::GPE; noise::Bool=true, domean::Bool=true, kern::Bool=tr
gp
end

function _dmll_kern_row!(dmll, buf, k, ααinvcKI, X, data, j, dim, nparams)
# diagonal
dKij_dθ!(buf, k, X, X, data, j, j, dim, nparams)
@inbounds for iparam in 1:nparams
dmll[iparam] += buf[iparam] * ααinvcKI[j, j] / 2.0
end
# off-diagonal
@inbounds for i in 1:j-1
dKij_dθ!(buf, k, X, X, data, i, j, dim, nparams)
@simd for iparam in 1:nparams
dmll[iparam] += buf[iparam] * ααinvcKI[i, j]
end
end
end
"""
dmll_kern!((dmll::AbstractVector, k::Kernel, X::AbstractMatrix, data::KernelData, ααinvcKI::AbstractMatrix))
Expand All @@ -241,20 +227,20 @@ function dmll_kern!(dmll::AbstractVector, k::Kernel, X::AbstractMatrix, data::Ke
@assert nparams == length(dmll)
dK_buffer = Vector{Float64}(undef, nparams)
dmll[:] .= 0.0
# make a copy per thread for objects that are potentially not thread-safe:
kcopies = [deepcopy(k) for _ in 1:Threads.nthreads()]
buffercopies = [similar(dK_buffer) for _ in 1:Threads.nthreads()]
dmllcopies = [deepcopy(dmll) for _ in 1:Threads.nthreads()]

@inbounds Threads.@threads for j in 1:nobs
kthread = kcopies[Threads.threadid()]
bufthread = buffercopies[Threads.threadid()]
dmllthread = dmllcopies[Threads.threadid()]
_dmll_kern_row!(dmllthread, bufthread, kthread,
ααinvcKI, X, data, j, dim, nparams)
@inbounds for j in 1:nobs
# diagonal
dKij_dθ!(dK_buffer, k, X, X, data, j, j, dim, nparams)
for iparam in 1:nparams
dmll[iparam] += dK_buffer[iparam] * ααinvcKI[j, j] / 2.0
end
# off-diagonal
for i in j+1:nobs
dKij_dθ!(dK_buffer, k, X, X, data, i, j, dim, nparams)
@simd for iparam in 1:nparams
dmll[iparam] += dK_buffer[iparam] * ααinvcKI[i, j]
end
end
end

dmll[:] = sum(dmllcopies) # sum up the results from all threads
return dmll
end

Expand Down
1 change: 1 addition & 0 deletions src/GaussianProcesses.jl
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ const invΦ = norminvcdf
# all package code should be included here
include("means/means.jl")
include("kernels/kernels.jl")
include("covariance/covariance.jl")
include("likelihoods/likelihoods.jl")
include("common.jl")
include("utils.jl")
Expand Down
72 changes: 72 additions & 0 deletions src/covariance/covariance.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
"""
cov(k::Kernel, X1::AbstractMatrix, X2::AbstractMatrix)
Create covariance matrix from kernel `k` and matrices of observations `X1` and `X2`, where
each column is an observation.
"""
function cov(k::Kernel, X1::AbstractMatrix, X2::AbstractMatrix, data::KernelData=EmptyData())
dim1, nobs1 = size(X1)
dim2, nobs2 = size(X2)
dim1==dim2 || throw(ArgumentError("X1 and X2 must have same dimension"))
cK = Array{promote_type(eltype(X1), eltype(X2))}(undef, nobs1, nobs2)
cov!(cK, k, X1, X2, data)
end

"""
cov(k::Kernel, X::AbstractMatrix[, data::KernelData = EmptyData()])
Create covariance matrix from kernel `k`, matrix of observations `X`, where each column is
an observation, and kernel data `data` constructed from input observations.
"""
cov(k::Kernel, X::AbstractMatrix, data::KernelData=EmptyData()) = cov(k, X, X, data)

function cov!(cK::AbstractMatrix, k::Kernel, X1::AbstractMatrix, X2::AbstractMatrix, data::KernelData=EmptyData())
dim, nobs1 = size(X1)
dim, nobs2 = size(X2)
cK .= cov_ij.(Ref(k), Ref(X1), Ref(X2), Ref(data), 1:nobs1, (1:nobs2)', dim)
end
cov!(cK::AbstractMatrix, k::Kernel, X::AbstractMatrix, data::KernelData=EmptyData()) = cov!(cK, k, X, X, data)

@inline cov_ij(k::Kernel, X1::AbstractMatrix, X2::AbstractMatrix, i::Int, j::Int, dim::Int) = cov(k, @view(X1[:,i]), @view(X2[:,j]))
# the default is to drop the KernelData
@inline cov_ij(k::Kernel, X1::AbstractMatrix, X2::AbstractMatrix, data::KernelData, i::Int, j::Int, dim::Int) = cov_ij(k, X1, X2, i, j, dim)

############################
##### Kernel Gradients #####
############################
@inline @inbounds function dKij_dθ!(dK::AbstractVector, kern::Kernel, X1::AbstractMatrix, X2::AbstractMatrix,
data::KernelData, i::Int, j::Int, dim::Int, npars::Int)
for iparam in 1:npars
dK[iparam] = dKij_dθp(kern, X1, X2, data, i, j, iparam, dim)
end
end

# Calculates the stack [dk / dθᵢ] of kernel matrix gradients
function grad_stack!(stack::AbstractArray, k::Kernel, X1::AbstractMatrix, X2::AbstractMatrix, data::KernelData)
@inbounds for p in 1:num_params(k)
grad_slice!(view(stack, :, :, p), k, X1, X2, data, p)
end
stack
end

grad_stack!(stack::AbstractArray, k::Kernel, X1::AbstractMatrix, X2::AbstractMatrix) =
grad_stack!(stack, k, X1, X2, KernelData(k, X1, X2))

grad_stack(k::Kernel, X1::AbstractMatrix, X2::AbstractMatrix) = grad_stack(k, X1, X2, KernelData(k, X1, X2))

function grad_stack(k::Kernel, X1::AbstractMatrix, X2::AbstractMatrix, data::KernelData)
nobs1 = size(X1, 2)
nobs2 = size(X2, 2)
stack = Array{eltype(X)}(undef, nobs1, nobs2, num_params(k))
grad_stack!(stack, k, X1, X2, data)
end

function grad_slice!(dK::AbstractMatrix, k::Kernel, X1::AbstractMatrix, X2::AbstractMatrix, data::KernelData, p::Int)
dim, nobs1 = size(X1)
dim, nobs2 = size(X2)
dK .= dKij_dθp.(Ref(k), Ref(X1), Ref(X2), Ref(data), 1:nobs1, (1:nobs2)', p, dim)
end

@inline function dKij_dθp(k::Kernel, X1::AbstractMatrix, X2::AbstractMatrix, data::KernelData, i::Int, j::Int, p::Int, dim::Int)
return dKij_dθp(k, X1, X2, i, j, p, dim)
end
125 changes: 125 additions & 0 deletions src/covariance/multithreaded.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
function _cov_row!(cK, k, X::AbstractMatrix, data, j, dim)
cK[j,j] = cov_ij(k, X, X, data, j, j, dim)
@inbounds for i in 1:j-1
cK[i,j] = cov_ij(k, X, X, data, i, j, dim)
cK[j,i] = cK[i,j]
end
end
function cov!(cK::Matrix, k::Kernel, X::Matrix, data::KernelData=EmptyData())
dim, nobs = size(X)
(nobs,nobs) == size(cK) || throw(ArgumentError("cK has size $(size(cK)) and X has size $(size(X))"))
kcopies = [deepcopy(k) for _ in 1:Threads.nthreads()] # in case k is not threadsafe (e.g. ADkernel)
@inbounds Threads.@threads for j in 1:nobs
kthread = kcopies[Threads.threadid()]
_cov_row!(cK, k, X, data, j, dim)
end
return cK
end
function _cov_row!(cK, k, X1::AbstractMatrix, X2::AbstractMatrix, data, i, dim, nobs2)
@inbounds for j in 1:nobs2
cK[i,j] = cov_ij(k, X1, X2, data, i, j, dim)
end
end
"""
cov!(cK::AbstractMatrix, k::Kernel, X1::AbstractMatrix, X2::AbstractMatrix, data::KernelData=EmptyData())
Like [`cov(k, X1, X2)`](@ref), but stores the result in `cK` rather than a new matrix.
"""
function cov!(cK::Matrix, k::Kernel, X1::Matrix, X2::Matrix, data::KernelData=EmptyData())
if X1 === X2
return cov!(cK, k, X1, data)
end
dim1, nobs1 = size(X1)
dim2, nobs2 = size(X2)
dim1==dim2 || throw(ArgumentError("X1 and X2 must have same dimension"))
dim = size(X1, 1)
(nobs1,nobs2) == size(cK) || throw(ArgumentError("cK has size $(size(cK)) X1 $(size(X1)) and X2 $(size(X2))"))
kcopies = [deepcopy(k) for _ in 1:Threads.nthreads()]
@inbounds Threads.@threads for i in 1:nobs1
kthread = kcopies[Threads.threadid()]
_cov_row!(cK, kthread, X1, X2, data, i, dim, nobs2)
end
return cK
end

function _grad_slice_row!(dK, k, X::AbstractMatrix, data, j, p, dim)
dK[j,j] = dKij_dθp(k,X,X,data,j,j,p,dim)
@inbounds @simd for i in 1:(j-1)
dK[i,j] = dKij_dθp(k,X,X,data,i,j,p,dim)
dK[j,i] = dK[i,j]
end
end
function grad_slice!(dK::AbstractMatrix, k::Kernel, X::Matrix, data::KernelData, p::Int)
dim, nobs = size(X)
(nobs,nobs) == size(dK) || throw(ArgumentError("dK has size $(size(dK)) and X has size $(size(X))"))
kcopies = [deepcopy(k) for _ in 1:Threads.nthreads()]
@inbounds Threads.@threads for j in 1:nobs
kthread = kcopies[Threads.threadid()]
_grad_slice_row!(dK, kthread, X, data, j, p, dim)
end
return dK
end
function _grad_slice_row!(dK, k, X1::AbstractMatrix, X2::AbstractMatrix, data, i, p, dim, nobs2)
@inbounds @simd for j in 1:nobs2
dK[i,j] = dKij_dθp(k,X1,X2,data,i,j,p,dim)
end
end
function grad_slice!(dK::AbstractMatrix, k::Kernel, X1::Matrix, X2::Matrix, data::KernelData, p::Int)
if X1 === X2
return grad_slice!(dK, k, X1, data, p)
end
dim1, nobs1 = size(X1)
dim2, nobs2 = size(X2)
dim1==dim2 || throw(ArgumentError("X1 and X2 must have same dimension"))
(nobs1,nobs2) == size(dK) || throw(ArgumentError("dK has size $(size(dK)) X1 $(size(X1)) and X2 $(size(X2))"))
dim=dim1
kcopies = [deepcopy(k) for _ in 1:Threads.nthreads()]
@inbounds Threads.@threads for i in 1:nobs1
kthread = kcopies[Threads.threadid()]
_grad_slice_row!(dK, kthread, X1, X2, data, i, p, dim, nobs2)
end
return dK
end

function _dmll_kern_row!(dmll, buf, k, ααinvcKI, X, data, j, dim, nparams)
# diagonal
dKij_dθ!(buf, k, X, X, data, j, j, dim, nparams)
@inbounds for iparam in 1:nparams
dmll[iparam] += buf[iparam] * ααinvcKI[j, j] / 2.0
end
# off-diagonal
@inbounds for i in 1:j-1
dKij_dθ!(buf, k, X, X, data, i, j, dim, nparams)
@simd for iparam in 1:nparams
dmll[iparam] += buf[iparam] * ααinvcKI[i, j]
end
end
end
"""
dmll_kern!((dmll::AbstractVector, k::Kernel, X::AbstractMatrix, data::KernelData, ααinvcKI::AbstractMatrix))
Derivative of the marginal log likelihood log p(Y|θ) with respect to the kernel hyperparameters.
"""
function dmll_kern!(dmll::AbstractVector, k::Kernel, X::Matrix, data::KernelData,
ααinvcKI::Matrix{Float64}, covstrat::CovarianceStrategy)
dim, nobs = size(X)
nparams = num_params(k)
@assert nparams == length(dmll)
dK_buffer = Vector{Float64}(undef, nparams)
dmll[:] .= 0.0
# make a copy per thread for objects that are potentially not thread-safe:
kcopies = [deepcopy(k) for _ in 1:Threads.nthreads()]
buffercopies = [similar(dK_buffer) for _ in 1:Threads.nthreads()]
dmllcopies = [deepcopy(dmll) for _ in 1:Threads.nthreads()]

@inbounds Threads.@threads for j in 1:nobs
kthread = kcopies[Threads.threadid()]
bufthread = buffercopies[Threads.threadid()]
dmllthread = dmllcopies[Threads.threadid()]
_dmll_kern_row!(dmllthread, bufthread, kthread,
ααinvcKI, X, data, j, dim, nparams)
end

dmll[:] = sum(dmllcopies) # sum up the results from all threads
return dmll
end
Loading

0 comments on commit 827298b

Please sign in to comment.