From f2e4c415bd6909ba22a2def85e0f17566fcee2f7 Mon Sep 17 00:00:00 2001 From: Joao Aparicio Date: Wed, 26 Jun 2024 03:18:18 -0500 Subject: [PATCH] Bugfix ChainedVector constructor performance Currently the ChainedVector constructor has bad performance if the args have empty entries. ``` import Random struct MyType x::Float64 y::Float64 end x1 = [ MyType[MyType(rand(), rand()) for _ in 1:Random.rand(1:6)] for _ in 1:1_000_000]; @time ChainedVector(x1); # 0.017 secs x2 = [ MyType[MyType(rand(), rand()) for _ in 1:Random.rand(0:5)] for _ in 1:1_000_000]; @time ChainedVector(x2); # 24 secs ``` Note that the difference comes only from the fact that x2 might have empty entries whereas x1 doesn't. The problem is that the function cleanup! has runtime proportional to the number of elements times the number of elements which are empty vectors. ``` function cleanup_original!(arrays, inds) @assert length(arrays) == length(inds) for i = length(arrays):-1:1 if !isassigned(arrays, i) || length(arrays[i]) == 0 deleteat!(arrays, i) deleteat!(inds, i) end end return end function cleanup_new!(arrays, inds) @assert length(arrays) == length(inds) mask_it = Base.Iterators.filter(i->isassigned(arrays, i) && length(arrays[i])>0, 1:length(arrays)) n = 0 for k in mask_it n += 1 k > n || continue arrays[n] = arrays[k] inds[n] = inds[k] end resize!(arrays, n) resize!(inds, n) return end ``` We can check that the new version is faster and returns the same result ``` x = [ MyType[MyType(rand(), rand()) for _ in 1:Random.rand(0:5)] for _ in 1:1_000_000]; arrays = x; inds = Vector{Int}(undef, length(arrays)); x1 = copy(x); inds1 = copy(inds); x2 = copy(x); inds2 = copy(inds); @time cleanup_original!(x1, inds1) # 20 secs @time cleanup_new!(x2, inds2) # 0.04 secs all(x1 .== x2) # true all(inds1 .== inds2) # true ``` --- Project.toml | 2 +- src/chainedvector.jl | 16 ++++++++++------ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/Project.toml b/Project.toml index 29245ec..bbe604d 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "SentinelArrays" uuid = "91c51154-3ec4-41a3-a24f-3f23e20d615c" authors = ["Jacob Quinn "] -version = "1.4.3" +version = "1.4.4" [deps] Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" diff --git a/src/chainedvector.jl b/src/chainedvector.jl index a5a1cb5..e445426 100644 --- a/src/chainedvector.jl +++ b/src/chainedvector.jl @@ -43,12 +43,16 @@ end @inline function cleanup!(arrays, inds) @assert length(arrays) == length(inds) - for i = length(arrays):-1:1 - if !isassigned(arrays, i) || length(arrays[i]) == 0 - deleteat!(arrays, i) - deleteat!(inds, i) - end - end + mask_it = Base.Iterators.filter(i->isassigned(arrays, i) && length(arrays[i])>0, 1:length(arrays)) + n = 0 + for k in mask_it + n += 1 + k > n || continue + arrays[n] = arrays[k] + inds[n] = inds[k] + end + resize!(arrays, n) + resize!(inds, n) return end