Skip to content

Commit

Permalink
Bugfix ChainedVector constructor performance
Browse files Browse the repository at this point in the history
Currently the ChainedVector constructor has bad performance if the args have empty entries.

```
import Random
struct MyType
    x::Float64
    y::Float64
end

x1 = [ MyType[MyType(rand(), rand()) for _ in 1:Random.rand(1:6)] for _ in 1:1_000_000];
@time ChainedVector(x1);  # 0.017 secs

x2 = [ MyType[MyType(rand(), rand()) for _ in 1:Random.rand(0:5)] for _ in 1:1_000_000];
@time ChainedVector(x2);  # 24 secs
```

Note that the difference comes only from the fact that x2 might have empty entries
whereas x1 doesn't.

The problem is that the function cleanup! has runtime proportional to the number of
elements times the number of elements which are empty vectors.

```
function cleanup_original!(arrays, inds)
    @Assert length(arrays) == length(inds)
    for i = length(arrays):-1:1
        if !isassigned(arrays, i) || length(arrays[i]) == 0
            deleteat!(arrays, i)
            deleteat!(inds, i)
        end
    end
    return
end

function cleanup_new!(arrays, inds)
    @Assert length(arrays) == length(inds)
    mask_it = Base.Iterators.filter(i->isassigned(arrays, i) && length(arrays[i])>0, 1:length(arrays))
    n = 0
    for k in mask_it
        n += 1
        k > n || continue
        arrays[n] = arrays[k]
        inds[n] = inds[k]
    end
    resize!(arrays, n)
    resize!(inds, n)
    return
end
```

We can check that the new version is faster and returns the same result

```
x = [ MyType[MyType(rand(), rand()) for _ in 1:Random.rand(0:5)] for _ in 1:1_000_000];
arrays = x;
inds = Vector{Int}(undef, length(arrays));

x1 = copy(x);
inds1 = copy(inds);
x2 = copy(x);
inds2 = copy(inds);

@time cleanup_original!(x1, inds1)  # 20 secs
@time cleanup_new!(x2, inds2)  # 0.04 secs

all(x1 .== x2)  # true
all(inds1 .== inds2)  # true
```
  • Loading branch information
JoaoAparicio committed Jun 26, 2024
1 parent d13560a commit f2e4c41
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 7 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "SentinelArrays"
uuid = "91c51154-3ec4-41a3-a24f-3f23e20d615c"
authors = ["Jacob Quinn <[email protected]>"]
version = "1.4.3"
version = "1.4.4"

[deps]
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
Expand Down
16 changes: 10 additions & 6 deletions src/chainedvector.jl
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,16 @@ end

@inline function cleanup!(arrays, inds)
@assert length(arrays) == length(inds)
for i = length(arrays):-1:1
if !isassigned(arrays, i) || length(arrays[i]) == 0
deleteat!(arrays, i)
deleteat!(inds, i)
end
end
mask_it = Base.Iterators.filter(i->isassigned(arrays, i) && length(arrays[i])>0, 1:length(arrays))
n = 0
for k in mask_it
n += 1
k > n || continue
arrays[n] = arrays[k]
inds[n] = inds[k]
end
resize!(arrays, n)
resize!(inds, n)
return
end

Expand Down

0 comments on commit f2e4c41

Please sign in to comment.