From 8bbcc7258314dc1824cad9bcdf32ac0f8e6d2e66 Mon Sep 17 00:00:00 2001 From: contradict Date: Tue, 7 Sep 2021 16:02:11 -0700 Subject: [PATCH 1/5] Add more iteration tests --- test/runtests.jl | 41 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 38 insertions(+), 3 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index b3a9b09..3a430fa 100755 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -239,8 +239,7 @@ const encodings = [ @test YAML.load(IOBuffer(data)) == "test" end -@testset "multi_doc_bom" begin - iterable = YAML.load_all(""" +const multidoc_contents = """ \ufeff---\r test: 1 \ufeff--- @@ -248,16 +247,52 @@ test: 2 \ufeff--- test: 3 -""") +\ufeff--- +42 +""" + +@testset "multi_doc_bom" begin + iterable = YAML.load_all(multidoc_contents) (val, state) = iterate(iterable) @test isequal(val, Dict("test" => 1)) (val, state) = iterate(iterable, state) @test isequal(val, Dict("test" => 2)) (val, state) = iterate(iterable, state) @test isequal(val, Dict("test" => 3)) + (val, state) = iterate(iterable, state) + @test isequal(val, 42) @test iterate(iterable, state) === nothing end +@testset "multi_doc_file" begin + fname = tempname() # cleanup=true, file will be deleted on process exit + open(fname, "w") do f + write(f, multidoc_contents) + end + iterable = YAML.load_all_file(fname) + (val, state) = iterate(iterable) + @test isequal(val, Dict("test" => 1)) + (val, state) = iterate(iterable, state) + @test isequal(val, Dict("test" => 2)) + (val, state) = iterate(iterable, state) + @test isequal(val, Dict("test" => 3)) + (val, state) = iterate(iterable, state) + @test isequal(val, 42) + @test iterate(iterable, state) === nothing +end + +@testset "multi_doc_iteration_protocol" begin + fname = tempname() # cleanup=true, file will be deleted on process exit + open(fname, "w") do f + write(f, multidoc_contents) + end + iterable = YAML.load_all_file(fname) + @test Base.IteratorSize(YAML.YAMLDocIterator) == Base.SizeUnknown() + @test Base.IteratorEltype(YAML.YAMLDocIterator) == Base.HasEltype() + @test eltype(iterable) == Dict{Any, Any} + @test length(collect(iterable)) == 3 +end + # test that an OrderedDict is written in the correct order using OrderedCollections, DataStructures @test strip(YAML.yaml(OrderedDict(:c => 3, :b => 2, :a => 1))) == join(["c: 3", "b: 2", "a: 1"], "\n") From 03e4c5993038e40ab66d1268c0c51e4a8d931051 Mon Sep 17 00:00:00 2001 From: contradict Date: Mon, 6 Sep 2021 18:22:57 -0700 Subject: [PATCH 2/5] Complete iteration protocol. --- src/YAML.jl | 3 +++ test/runtests.jl | 5 ++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/YAML.jl b/src/YAML.jl index d5a7067..8291fd1 100644 --- a/src/YAML.jl +++ b/src/YAML.jl @@ -131,6 +131,9 @@ function iterate(it::YAMLDocIterator, _ = nothing) return doc, nothing end +Base.IteratorSize(::Type{YAMLDocIterator}) = Base.SizeUnknown() +Base.IteratorEltype(::Type{YAMLDocIterator}) = Base.EltypeUnknown() + """ load_all(x::Union{AbstractString, IO}) -> YAMLDocIterator diff --git a/test/runtests.jl b/test/runtests.jl index 3a430fa..f4b558a 100755 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -288,9 +288,8 @@ end end iterable = YAML.load_all_file(fname) @test Base.IteratorSize(YAML.YAMLDocIterator) == Base.SizeUnknown() - @test Base.IteratorEltype(YAML.YAMLDocIterator) == Base.HasEltype() - @test eltype(iterable) == Dict{Any, Any} - @test length(collect(iterable)) == 3 + @test Base.IteratorEltype(YAML.YAMLDocIterator) == Base.EltypeUnknown() + @test length(collect(iterable)) == 4 end # test that an OrderedDict is written in the correct order From 67dd8dd354b713eff6d485308c2ab8d53e23d49c Mon Sep 17 00:00:00 2001 From: contradict Date: Tue, 7 Sep 2021 15:41:48 -0700 Subject: [PATCH 3/5] Do not close file before iterator reads data. --- src/YAML.jl | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/YAML.jl b/src/YAML.jl index 8291fd1..693b4bf 100644 --- a/src/YAML.jl +++ b/src/YAML.jl @@ -164,8 +164,5 @@ load_file(filename::AbstractString, args...; kwargs...) = Parse the YAML file `filename`, and return corresponding YAML documents. """ load_all_file(filename::AbstractString, args...; kwargs...) = - open(filename, "r") do input - load_all(input, args...; kwargs...) - end - + load_all(open(filename, "r"), args...; kwargs...) end # module From 566c4faaa256e2a18671686dd7e65f0244af7c05 Mon Sep 17 00:00:00 2001 From: contradict Date: Thu, 8 Aug 2024 08:10:45 -0700 Subject: [PATCH 4/5] Read the whole file into memory when parsing multi-document files. Since parsing is lazy, just using `do` results in the file being closed before parsing happens. Leaving the file open and wating for the finalizer to close it was deemed undesirable. --- src/YAML.jl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/YAML.jl b/src/YAML.jl index 693b4bf..a07e755 100644 --- a/src/YAML.jl +++ b/src/YAML.jl @@ -164,5 +164,8 @@ load_file(filename::AbstractString, args...; kwargs...) = Parse the YAML file `filename`, and return corresponding YAML documents. """ load_all_file(filename::AbstractString, args...; kwargs...) = - load_all(open(filename, "r"), args...; kwargs...) + open(filename, "r") do f + io = IOBuffer(read(f)) + load_all(io, args...; kwargs...) + end end # module From b13dddefefeea2f8d7f9d1e3235f0d77cd2d87cb Mon Sep 17 00:00:00 2001 From: contradict Date: Sat, 10 Aug 2024 13:47:13 -0700 Subject: [PATCH 5/5] Combine duplicated IteratorSize implementation, add comment --- src/YAML.jl | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/YAML.jl b/src/YAML.jl index a07e755..1e1fd3f 100644 --- a/src/YAML.jl +++ b/src/YAML.jl @@ -113,10 +113,6 @@ end YAMLDocIterator(input::IO, more_constructors::_constructor=nothing, multi_constructors::Dict = Dict(); dicttype::_dicttype=Dict{Any, Any}, constructorType::Function = SafeConstructor) = YAMLDocIterator(input, constructorType(_patch_constructors(more_constructors, dicttype), multi_constructors)) -# It's unknown how many documents will be found. By doing this, -# functions like `collect` do not try to query the length of the -# iterator. -Base.IteratorSize(::YAMLDocIterator) = Base.SizeUnknown() # Iteration protocol. function iterate(it::YAMLDocIterator, _ = nothing) @@ -131,7 +127,12 @@ function iterate(it::YAMLDocIterator, _ = nothing) return doc, nothing end +# It's unknown how many documents will be found. By doing this, +# functions like `collect` do not try to query the length of the +# iterator. Base.IteratorSize(::Type{YAMLDocIterator}) = Base.SizeUnknown() +# Documents can be trees of elements or just single values, so don't promise +# any particular type. Base.IteratorEltype(::Type{YAMLDocIterator}) = Base.EltypeUnknown() """