From cd5eff65d8777e9d32220a3cdf1faf0cbfb27dee Mon Sep 17 00:00:00 2001 From: Aurelio Amerio Date: Thu, 6 Oct 2022 18:12:11 +0200 Subject: [PATCH 1/6] wip --- .gitignore | 3 +- Project.toml | 9 +- src/TFRecord.jl | 1 + src/core.jl | 34 ++- src/jlout/example_pb.jl | 620 ++++++++++++++++------------------------ test.jl | 51 ++++ test.proto | 8 + test_pb.jl | 56 ++++ 8 files changed, 386 insertions(+), 396 deletions(-) create mode 100644 test.jl create mode 100644 test.proto create mode 100644 test_pb.jl diff --git a/.gitignore b/.gitignore index 55d0f3f..1376ae6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ /Manifest.toml -example.tfrecord \ No newline at end of file +example.tfrecord +.vscode/settings.json diff --git a/Project.toml b/Project.toml index 7751fbb..0d6ec99 100644 --- a/Project.toml +++ b/Project.toml @@ -7,20 +7,13 @@ version = "0.4.1" BufferedStreams = "e1450e63-4bb3-523b-b2a4-4ffa8c0fd77d" CRC32c = "8bf52ea8-c179-5cab-976a-9e18b702a9bc" CodecZlib = "944b1d66-785c-5afd-91f1-9de20f533193" +EnumX = "4e289a0a-7415-4d19-859d-a7e5c4648b56" MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" ProtoBuf = "3349acd9-ac6a-5e09-bcdb-63829b23a429" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" TranscodingStreams = "3bb67fe8-82b1-5028-8e26-92a6c54297fa" -[compat] -BufferedStreams = "1.0" -CodecZlib = "0.7" -MacroTools = "0.5" -ProtoBuf = "0.10, 0.11" -TranscodingStreams = "0.9" -julia = "1.5" - [extras] Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" diff --git a/src/TFRecord.jl b/src/TFRecord.jl index 66d3dee..6afb639 100644 --- a/src/TFRecord.jl +++ b/src/TFRecord.jl @@ -1,6 +1,7 @@ module TFRecord include("jlout/example_pb.jl") +using .example_pb include("core.jl") end diff --git a/src/core.jl b/src/core.jl index bf96202..148d5f1 100644 --- a/src/core.jl +++ b/src/core.jl @@ -3,7 +3,7 @@ using Base.Threads using CodecZlib using BufferedStreams using MacroTools: @forward -using ProtoBuf: ProtoType +using ProtoBuf using TranscodingStreams: NoopStream # Ref: https://github.com/tensorflow/tensorflow/blob/295ad2781683835be974faba0a191528d8079768/tensorflow/core/lib/hash/crc32c.h#L50-L59 @@ -72,7 +72,7 @@ function read( open(decompressor_stream(compression), file_name, "r") do io buffered_io = BufferedInputStream(io, bufsize) while !eof(buffered_io) - instance = readproto(IOBuffer(read_record(buffered_io)), record_type()) + instance = decode(IOBuffer(read_record(buffered_io)), record_type()) put!(ch, instance) end end @@ -113,9 +113,9 @@ function write(io::IO, xs) end end -function write(io::IO, x::ProtoType) +function write(io::IO, x::BytesList) buff = IOBuffer() - writeproto(buff, x) + encode(buff, x) data_crc = mask(crc32c(seekstart(buff))) data = take!(seekstart(buff)) @@ -135,23 +135,21 @@ end # convert ##### -Base.convert(::Type{Feature}, x::Int) = Feature(;int64_list=Int64List(value=[x])) -Base.convert(::Type{Feature}, x::Bool) = Feature(;int64_list=Int64List(value=[Int(x)])) -Base.convert(::Type{Feature}, x::Float32) = Feature(;float_list=FloatList(value=[x])) -Base.convert(::Type{Feature}, x::AbstractString) = Feature(;bytes_list=BytesList(value=[unsafe_wrap(Vector{UInt8}, x)])) +Base.convert(::Type{Feature}, x::Int) = Feature(OneOf(:int64_list,Int64List([x]))) +Base.convert(::Type{Feature}, x::Bool) = Feature(OneOf(:int64_list,Int64List([Int(x)]))) +Base.convert(::Type{Feature}, x::Float32) = Feature(OneOf(:float_list,FloatList([x]))) +Base.convert(::Type{Feature}, x::AbstractString) = Feature(OneOf(:bytes_list,BytesList([unsafe_wrap(Vector{UInt8}, x)]))) -Base.convert(::Type{Feature}, x::Vector{Int}) = Feature(;int64_list=Int64List(value=x)) -Base.convert(::Type{Feature}, x::Vector{Bool}) = Feature(;int64_list=Int64List(value=convert(Vector{Int}, x))) -Base.convert(::Type{Feature}, x::Vector{Float32}) = Feature(;float_list=FloatList(value=x)) -Base.convert(::Type{Feature}, x::Vector{<:AbstractString}) = Feature(;bytes_list=BytesList(value=[unsafe_wrap(Vector{UInt8}, s) for s in x])) -Base.convert(::Type{Feature}, x::Vector{Array{UInt8,1}}) = Feature(;bytes_list=BytesList(value=x)) +Base.convert(::Type{Feature}, x::Vector{Int}) = Feature(OneOf(:int64_list,Int64List(x))) +Base.convert(::Type{Feature}, x::Vector{Bool}) = Feature(OneOf(:int64_list,Int64List(convert(Vector{Int}, x)))) +Base.convert(::Type{Feature}, x::Vector{Float32}) = Feature(OneOf(:float_list,FloatList(x))) +Base.convert(::Type{Feature}, x::Vector{<:AbstractString}) = Feature(OneOf(:bytes_list,BytesList([unsafe_wrap(Vector{UInt8}, s) for s in x]))) +Base.convert(::Type{Feature}, x::Vector{Array{UInt8,1}}) = Feature(OneOf(:bytes_list,BytesList(x))) -Base.convert(::Type{Features}, x::Dict) = Features(;feature=Dict(k=>convert(Feature, v) for (k, v) in x)) +Base.convert(::Type{Features}, x::Dict) = Features(Dict(k=>convert(Feature, v) for (k, v) in x)) function Base.convert(::Type{Example}, x::Dict) - d = Example() - d.features = convert(Features, x) - d + return Example(convert(Features, x)) end # (De)compression @@ -178,4 +176,4 @@ function decompressor_stream(compression) else throw(ArgumentError("Unsupported decompression method: $compression")) end -end +end \ No newline at end of file diff --git a/src/jlout/example_pb.jl b/src/jlout/example_pb.jl index a84eb53..67f9b5c 100644 --- a/src/jlout/example_pb.jl +++ b/src/jlout/example_pb.jl @@ -1,427 +1,309 @@ -# syntax: proto3 -using ProtoBuf -import ProtoBuf.meta +# Autogenerated using ProtoBuf.jl v1.0.7 on 2022-10-06T16:49:58.692 +# original file: D:\Aure\Documenti\Github\TFRecord.jl\src\proto\example.proto (proto3 syntax) -mutable struct BytesList <: ProtoType - __protobuf_jl_internal_meta::ProtoMeta - __protobuf_jl_internal_values::Dict{Symbol,Any} - __protobuf_jl_internal_defaultset::Set{Symbol} +module example_pb - function BytesList(; kwargs...) - obj = new(meta(BytesList), Dict{Symbol,Any}(), Set{Symbol}()) - values = obj.__protobuf_jl_internal_values - symdict = obj.__protobuf_jl_internal_meta.symdict - for nv in kwargs - fldname, fldval = nv - fldtype = symdict[fldname].jtyp - (fldname in keys(symdict)) || error(string(typeof(obj), " has no field with name ", fldname)) - values[fldname] = isa(fldval, fldtype) ? fldval : convert(fldtype, fldval) - end - obj - end -end # mutable struct BytesList -const __meta_BytesList = Ref{ProtoMeta}() -function meta(::Type{BytesList}) - ProtoBuf.metalock() do - if !isassigned(__meta_BytesList) - __meta_BytesList[] = target = ProtoMeta(BytesList) - allflds = Pair{Symbol,Union{Type,String}}[:value => Base.Vector{Array{UInt8,1}}] - meta(target, BytesList, allflds, ProtoBuf.DEF_REQ, ProtoBuf.DEF_FNUM, ProtoBuf.DEF_VAL, ProtoBuf.DEF_PACK, ProtoBuf.DEF_WTYPES, ProtoBuf.DEF_ONEOFS, ProtoBuf.DEF_ONEOF_NAMES) - end - __meta_BytesList[] - end -end -function Base.getproperty(obj::BytesList, name::Symbol) - if name === :value - return (obj.__protobuf_jl_internal_values[name])::Base.Vector{Array{UInt8,1}} - else - getfield(obj, name) - end -end +import ProtoBuf as PB +using ProtoBuf: OneOf +using EnumX: @enumx -mutable struct FloatList <: ProtoType - __protobuf_jl_internal_meta::ProtoMeta - __protobuf_jl_internal_values::Dict{Symbol,Any} - __protobuf_jl_internal_defaultset::Set{Symbol} +export BytesList, FloatList, Int64List, Feature, Features, FeatureList, Example +export FeatureLists, SequenceExample - function FloatList(; kwargs...) - obj = new(meta(FloatList), Dict{Symbol,Any}(), Set{Symbol}()) - values = obj.__protobuf_jl_internal_values - symdict = obj.__protobuf_jl_internal_meta.symdict - for nv in kwargs - fldname, fldval = nv - fldtype = symdict[fldname].jtyp - (fldname in keys(symdict)) || error(string(typeof(obj), " has no field with name ", fldname)) - values[fldname] = isa(fldval, fldtype) ? fldval : convert(fldtype, fldval) - end - obj - end -end # mutable struct FloatList -const __meta_FloatList = Ref{ProtoMeta}() -function meta(::Type{FloatList}) - ProtoBuf.metalock() do - if !isassigned(__meta_FloatList) - __meta_FloatList[] = target = ProtoMeta(FloatList) - pack = Symbol[:value] - allflds = Pair{Symbol,Union{Type,String}}[:value => Base.Vector{Float32}] - meta(target, FloatList, allflds, ProtoBuf.DEF_REQ, ProtoBuf.DEF_FNUM, ProtoBuf.DEF_VAL, pack, ProtoBuf.DEF_WTYPES, ProtoBuf.DEF_ONEOFS, ProtoBuf.DEF_ONEOF_NAMES) - end - __meta_FloatList[] - end -end -function Base.getproperty(obj::FloatList, name::Symbol) - if name === :value - return (obj.__protobuf_jl_internal_values[name])::Base.Vector{Float32} - else - getfield(obj, name) - end +struct BytesList + value::Vector{Vector{UInt8}} end +PB.default_values(::Type{BytesList}) = (;value = Vector{Vector{UInt8}}()) +PB.field_numbers(::Type{BytesList}) = (;value = 1) -mutable struct Int64List <: ProtoType - __protobuf_jl_internal_meta::ProtoMeta - __protobuf_jl_internal_values::Dict{Symbol,Any} - __protobuf_jl_internal_defaultset::Set{Symbol} - - function Int64List(; kwargs...) - obj = new(meta(Int64List), Dict{Symbol,Any}(), Set{Symbol}()) - values = obj.__protobuf_jl_internal_values - symdict = obj.__protobuf_jl_internal_meta.symdict - for nv in kwargs - fldname, fldval = nv - fldtype = symdict[fldname].jtyp - (fldname in keys(symdict)) || error(string(typeof(obj), " has no field with name ", fldname)) - values[fldname] = isa(fldval, fldtype) ? fldval : convert(fldtype, fldval) - end - obj - end -end # mutable struct Int64List -const __meta_Int64List = Ref{ProtoMeta}() -function meta(::Type{Int64List}) - ProtoBuf.metalock() do - if !isassigned(__meta_Int64List) - __meta_Int64List[] = target = ProtoMeta(Int64List) - pack = Symbol[:value] - allflds = Pair{Symbol,Union{Type,String}}[:value => Base.Vector{Int64}] - meta(target, Int64List, allflds, ProtoBuf.DEF_REQ, ProtoBuf.DEF_FNUM, ProtoBuf.DEF_VAL, pack, ProtoBuf.DEF_WTYPES, ProtoBuf.DEF_ONEOFS, ProtoBuf.DEF_ONEOF_NAMES) +function PB.decode(d::PB.AbstractProtoDecoder, ::Type{<:BytesList}) + value = PB.BufferedVector{Vector{UInt8}}() + while !PB.message_done(d) + field_number, wire_type = PB.decode_tag(d) + if field_number == 1 + PB.decode!(d, value) + else + PB.skip(d, wire_type) end - __meta_Int64List[] end + return BytesList(value[]) end -function Base.getproperty(obj::Int64List, name::Symbol) - if name === :value - return (obj.__protobuf_jl_internal_values[name])::Base.Vector{Int64} - else - getfield(obj, name) - end + +function PB.encode(e::PB.AbstractProtoEncoder, x::BytesList) + initpos = position(e.io) + !isempty(x.value) && PB.encode(e, 1, x.value) + return position(e.io) - initpos +end +function PB._encoded_size(x::BytesList) + encoded_size = 0 + !isempty(x.value) && (encoded_size += PB._encoded_size(x.value, 1)) + return encoded_size end -mutable struct Feature <: ProtoType - __protobuf_jl_internal_meta::ProtoMeta - __protobuf_jl_internal_values::Dict{Symbol,Any} - __protobuf_jl_internal_defaultset::Set{Symbol} +struct FloatList + value::Vector{Float32} +end +PB.default_values(::Type{FloatList}) = (;value = Vector{Float32}()) +PB.field_numbers(::Type{FloatList}) = (;value = 1) - function Feature(; kwargs...) - obj = new(meta(Feature), Dict{Symbol,Any}(), Set{Symbol}()) - values = obj.__protobuf_jl_internal_values - symdict = obj.__protobuf_jl_internal_meta.symdict - for nv in kwargs - fldname, fldval = nv - fldtype = symdict[fldname].jtyp - (fldname in keys(symdict)) || error(string(typeof(obj), " has no field with name ", fldname)) - values[fldname] = isa(fldval, fldtype) ? fldval : convert(fldtype, fldval) - end - obj - end -end # mutable struct Feature -const __meta_Feature = Ref{ProtoMeta}() -function meta(::Type{Feature}) - ProtoBuf.metalock() do - if !isassigned(__meta_Feature) - __meta_Feature[] = target = ProtoMeta(Feature) - allflds = Pair{Symbol,Union{Type,String}}[:bytes_list => BytesList, :float_list => FloatList, :int64_list => Int64List] - oneofs = Int[1,1,1] - oneof_names = Symbol[Symbol("kind")] - meta(target, Feature, allflds, ProtoBuf.DEF_REQ, ProtoBuf.DEF_FNUM, ProtoBuf.DEF_VAL, ProtoBuf.DEF_PACK, ProtoBuf.DEF_WTYPES, oneofs, oneof_names) +function PB.decode(d::PB.AbstractProtoDecoder, ::Type{<:FloatList}) + value = PB.BufferedVector{Float32}() + while !PB.message_done(d) + field_number, wire_type = PB.decode_tag(d) + if field_number == 1 + PB.decode!(d, wire_type, value) + else + PB.skip(d, wire_type) end - __meta_Feature[] end + return FloatList(value[]) end -function Base.getproperty(obj::Feature, name::Symbol) - if name === :bytes_list - return (obj.__protobuf_jl_internal_values[name])::BytesList - elseif name === :float_list - return (obj.__protobuf_jl_internal_values[name])::FloatList - elseif name === :int64_list - return (obj.__protobuf_jl_internal_values[name])::Int64List - else - getfield(obj, name) - end + +function PB.encode(e::PB.AbstractProtoEncoder, x::FloatList) + initpos = position(e.io) + !isempty(x.value) && PB.encode(e, 1, x.value) + return position(e.io) - initpos +end +function PB._encoded_size(x::FloatList) + encoded_size = 0 + !isempty(x.value) && (encoded_size += PB._encoded_size(x.value, 1)) + return encoded_size end -mutable struct Features_FeatureEntry <: ProtoType - __protobuf_jl_internal_meta::ProtoMeta - __protobuf_jl_internal_values::Dict{Symbol,Any} - __protobuf_jl_internal_defaultset::Set{Symbol} +struct Int64List + value::Vector{Int64} +end +PB.default_values(::Type{Int64List}) = (;value = Vector{Int64}()) +PB.field_numbers(::Type{Int64List}) = (;value = 1) - function Features_FeatureEntry(; kwargs...) - obj = new(meta(Features_FeatureEntry), Dict{Symbol,Any}(), Set{Symbol}()) - values = obj.__protobuf_jl_internal_values - symdict = obj.__protobuf_jl_internal_meta.symdict - for nv in kwargs - fldname, fldval = nv - fldtype = symdict[fldname].jtyp - (fldname in keys(symdict)) || error(string(typeof(obj), " has no field with name ", fldname)) - values[fldname] = isa(fldval, fldtype) ? fldval : convert(fldtype, fldval) +function PB.decode(d::PB.AbstractProtoDecoder, ::Type{<:Int64List}) + value = PB.BufferedVector{Int64}() + while !PB.message_done(d) + field_number, wire_type = PB.decode_tag(d) + if field_number == 1 + PB.decode!(d, wire_type, value) + else + PB.skip(d, wire_type) end - obj - end -end # mutable struct Features_FeatureEntry (mapentry) -const __meta_Features_FeatureEntry = Ref{ProtoMeta}() -function meta(::Type{Features_FeatureEntry}) - ProtoBuf.metalock() do - if !isassigned(__meta_Features_FeatureEntry) - __meta_Features_FeatureEntry[] = target = ProtoMeta(Features_FeatureEntry) - allflds = Pair{Symbol,Union{Type,String}}[:key => AbstractString, :value => Feature] - meta(target, Features_FeatureEntry, allflds, ProtoBuf.DEF_REQ, ProtoBuf.DEF_FNUM, ProtoBuf.DEF_VAL, ProtoBuf.DEF_PACK, ProtoBuf.DEF_WTYPES, ProtoBuf.DEF_ONEOFS, ProtoBuf.DEF_ONEOF_NAMES) - end - __meta_Features_FeatureEntry[] end + return Int64List(value[]) end -function Base.getproperty(obj::Features_FeatureEntry, name::Symbol) - if name === :key - return (obj.__protobuf_jl_internal_values[name])::AbstractString - elseif name === :value - return (obj.__protobuf_jl_internal_values[name])::Feature - else - getfield(obj, name) - end + +function PB.encode(e::PB.AbstractProtoEncoder, x::Int64List) + initpos = position(e.io) + !isempty(x.value) && PB.encode(e, 1, x.value) + return position(e.io) - initpos +end +function PB._encoded_size(x::Int64List) + encoded_size = 0 + !isempty(x.value) && (encoded_size += PB._encoded_size(x.value, 1)) + return encoded_size end -mutable struct Features <: ProtoType - __protobuf_jl_internal_meta::ProtoMeta - __protobuf_jl_internal_values::Dict{Symbol,Any} - __protobuf_jl_internal_defaultset::Set{Symbol} +struct Feature + kind::Union{Nothing,OneOf{<:Union{BytesList,FloatList,Int64List}}} +end +PB.oneof_field_types(::Type{Feature}) = (; + kind = (;bytes_list=BytesList, float_list=FloatList, int64_list=Int64List), +) +PB.default_values(::Type{Feature}) = (;bytes_list = nothing, float_list = nothing, int64_list = nothing) +PB.field_numbers(::Type{Feature}) = (;bytes_list = 1, float_list = 2, int64_list = 3) - function Features(; kwargs...) - obj = new(meta(Features), Dict{Symbol,Any}(), Set{Symbol}()) - values = obj.__protobuf_jl_internal_values - symdict = obj.__protobuf_jl_internal_meta.symdict - for nv in kwargs - fldname, fldval = nv - fldtype = symdict[fldname].jtyp - (fldname in keys(symdict)) || error(string(typeof(obj), " has no field with name ", fldname)) - values[fldname] = isa(fldval, fldtype) ? fldval : convert(fldtype, fldval) - end - obj - end -end # mutable struct Features -const __meta_Features = Ref{ProtoMeta}() -function meta(::Type{Features}) - ProtoBuf.metalock() do - if !isassigned(__meta_Features) - __meta_Features[] = target = ProtoMeta(Features) - allflds = Pair{Symbol,Union{Type,String}}[:feature => Base.Dict{AbstractString,Feature}] - meta(target, Features, allflds, ProtoBuf.DEF_REQ, ProtoBuf.DEF_FNUM, ProtoBuf.DEF_VAL, ProtoBuf.DEF_PACK, ProtoBuf.DEF_WTYPES, ProtoBuf.DEF_ONEOFS, ProtoBuf.DEF_ONEOF_NAMES) +function PB.decode(d::PB.AbstractProtoDecoder, ::Type{<:Feature}) + kind = nothing + while !PB.message_done(d) + field_number, wire_type = PB.decode_tag(d) + if field_number == 1 + kind = OneOf(:bytes_list, PB.decode(d, Ref{BytesList})) + elseif field_number == 2 + kind = OneOf(:float_list, PB.decode(d, Ref{FloatList})) + elseif field_number == 3 + kind = OneOf(:int64_list, PB.decode(d, Ref{Int64List})) + else + PB.skip(d, wire_type) end - __meta_Features[] end + return Feature(kind) end -function Base.getproperty(obj::Features, name::Symbol) - if name === :feature - return (obj.__protobuf_jl_internal_values[name])::Base.Dict{AbstractString,Feature} - else - getfield(obj, name) - end + +function PB.encode(e::PB.AbstractProtoEncoder, x::Feature) + initpos = position(e.io) + if isnothing(x.kind); + elseif x.kind.name === :bytes_list + PB.encode(e, 1, x.kind[]) + elseif x.kind.name === :float_list + PB.encode(e, 2, x.kind[]) + elseif x.kind.name === :int64_list + PB.encode(e, 3, x.kind[]) + end + return position(e.io) - initpos +end +function PB._encoded_size(x::Feature) + encoded_size = 0 + if isnothing(x.kind); + elseif x.kind.name === :bytes_list + encoded_size += PB._encoded_size(x.kind[], 1) + elseif x.kind.name === :float_list + encoded_size += PB._encoded_size(x.kind[], 2) + elseif x.kind.name === :int64_list + encoded_size += PB._encoded_size(x.kind[], 3) + end + return encoded_size end -mutable struct Example <: ProtoType - __protobuf_jl_internal_meta::ProtoMeta - __protobuf_jl_internal_values::Dict{Symbol,Any} - __protobuf_jl_internal_defaultset::Set{Symbol} +struct Features + feature::Dict{String,Feature} +end +PB.default_values(::Type{Features}) = (;feature = Dict{String,Feature}()) +PB.field_numbers(::Type{Features}) = (;feature = 1) - function Example(; kwargs...) - obj = new(meta(Example), Dict{Symbol,Any}(), Set{Symbol}()) - values = obj.__protobuf_jl_internal_values - symdict = obj.__protobuf_jl_internal_meta.symdict - for nv in kwargs - fldname, fldval = nv - fldtype = symdict[fldname].jtyp - (fldname in keys(symdict)) || error(string(typeof(obj), " has no field with name ", fldname)) - values[fldname] = isa(fldval, fldtype) ? fldval : convert(fldtype, fldval) +function PB.decode(d::PB.AbstractProtoDecoder, ::Type{<:Features}) + feature = Dict{String,Feature}() + while !PB.message_done(d) + field_number, wire_type = PB.decode_tag(d) + if field_number == 1 + PB.decode!(d, feature) + else + PB.skip(d, wire_type) end - obj - end -end # mutable struct Example -const __meta_Example = Ref{ProtoMeta}() -function meta(::Type{Example}) - ProtoBuf.metalock() do - if !isassigned(__meta_Example) - __meta_Example[] = target = ProtoMeta(Example) - allflds = Pair{Symbol,Union{Type,String}}[:features => Features] - meta(target, Example, allflds, ProtoBuf.DEF_REQ, ProtoBuf.DEF_FNUM, ProtoBuf.DEF_VAL, ProtoBuf.DEF_PACK, ProtoBuf.DEF_WTYPES, ProtoBuf.DEF_ONEOFS, ProtoBuf.DEF_ONEOF_NAMES) - end - __meta_Example[] end + return Features(feature) end -function Base.getproperty(obj::Example, name::Symbol) - if name === :features - return (obj.__protobuf_jl_internal_values[name])::Features - else - getfield(obj, name) - end + +function PB.encode(e::PB.AbstractProtoEncoder, x::Features) + initpos = position(e.io) + !isempty(x.feature) && PB.encode(e, 1, x.feature) + return position(e.io) - initpos +end +function PB._encoded_size(x::Features) + encoded_size = 0 + !isempty(x.feature) && (encoded_size += PB._encoded_size(x.feature, 1)) + return encoded_size end -mutable struct FeatureList <: ProtoType - __protobuf_jl_internal_meta::ProtoMeta - __protobuf_jl_internal_values::Dict{Symbol,Any} - __protobuf_jl_internal_defaultset::Set{Symbol} +struct FeatureList + feature::Vector{Feature} +end +PB.default_values(::Type{FeatureList}) = (;feature = Vector{Feature}()) +PB.field_numbers(::Type{FeatureList}) = (;feature = 1) - function FeatureList(; kwargs...) - obj = new(meta(FeatureList), Dict{Symbol,Any}(), Set{Symbol}()) - values = obj.__protobuf_jl_internal_values - symdict = obj.__protobuf_jl_internal_meta.symdict - for nv in kwargs - fldname, fldval = nv - fldtype = symdict[fldname].jtyp - (fldname in keys(symdict)) || error(string(typeof(obj), " has no field with name ", fldname)) - values[fldname] = isa(fldval, fldtype) ? fldval : convert(fldtype, fldval) - end - obj - end -end # mutable struct FeatureList -const __meta_FeatureList = Ref{ProtoMeta}() -function meta(::Type{FeatureList}) - ProtoBuf.metalock() do - if !isassigned(__meta_FeatureList) - __meta_FeatureList[] = target = ProtoMeta(FeatureList) - allflds = Pair{Symbol,Union{Type,String}}[:feature => Base.Vector{Feature}] - meta(target, FeatureList, allflds, ProtoBuf.DEF_REQ, ProtoBuf.DEF_FNUM, ProtoBuf.DEF_VAL, ProtoBuf.DEF_PACK, ProtoBuf.DEF_WTYPES, ProtoBuf.DEF_ONEOFS, ProtoBuf.DEF_ONEOF_NAMES) +function PB.decode(d::PB.AbstractProtoDecoder, ::Type{<:FeatureList}) + feature = PB.BufferedVector{Feature}() + while !PB.message_done(d) + field_number, wire_type = PB.decode_tag(d) + if field_number == 1 + PB.decode!(d, feature) + else + PB.skip(d, wire_type) end - __meta_FeatureList[] end + return FeatureList(feature[]) end -function Base.getproperty(obj::FeatureList, name::Symbol) - if name === :feature - return (obj.__protobuf_jl_internal_values[name])::Base.Vector{Feature} - else - getfield(obj, name) - end + +function PB.encode(e::PB.AbstractProtoEncoder, x::FeatureList) + initpos = position(e.io) + !isempty(x.feature) && PB.encode(e, 1, x.feature) + return position(e.io) - initpos +end +function PB._encoded_size(x::FeatureList) + encoded_size = 0 + !isempty(x.feature) && (encoded_size += PB._encoded_size(x.feature, 1)) + return encoded_size end -mutable struct FeatureLists_FeatureListEntry <: ProtoType - __protobuf_jl_internal_meta::ProtoMeta - __protobuf_jl_internal_values::Dict{Symbol,Any} - __protobuf_jl_internal_defaultset::Set{Symbol} +struct Example + features::Union{Nothing,Features} +end +PB.default_values(::Type{Example}) = (;features = nothing) +PB.field_numbers(::Type{Example}) = (;features = 1) - function FeatureLists_FeatureListEntry(; kwargs...) - obj = new(meta(FeatureLists_FeatureListEntry), Dict{Symbol,Any}(), Set{Symbol}()) - values = obj.__protobuf_jl_internal_values - symdict = obj.__protobuf_jl_internal_meta.symdict - for nv in kwargs - fldname, fldval = nv - fldtype = symdict[fldname].jtyp - (fldname in keys(symdict)) || error(string(typeof(obj), " has no field with name ", fldname)) - values[fldname] = isa(fldval, fldtype) ? fldval : convert(fldtype, fldval) - end - obj - end -end # mutable struct FeatureLists_FeatureListEntry (mapentry) -const __meta_FeatureLists_FeatureListEntry = Ref{ProtoMeta}() -function meta(::Type{FeatureLists_FeatureListEntry}) - ProtoBuf.metalock() do - if !isassigned(__meta_FeatureLists_FeatureListEntry) - __meta_FeatureLists_FeatureListEntry[] = target = ProtoMeta(FeatureLists_FeatureListEntry) - allflds = Pair{Symbol,Union{Type,String}}[:key => AbstractString, :value => FeatureList] - meta(target, FeatureLists_FeatureListEntry, allflds, ProtoBuf.DEF_REQ, ProtoBuf.DEF_FNUM, ProtoBuf.DEF_VAL, ProtoBuf.DEF_PACK, ProtoBuf.DEF_WTYPES, ProtoBuf.DEF_ONEOFS, ProtoBuf.DEF_ONEOF_NAMES) +function PB.decode(d::PB.AbstractProtoDecoder, ::Type{<:Example}) + features = Ref{Union{Nothing,Features}}(nothing) + while !PB.message_done(d) + field_number, wire_type = PB.decode_tag(d) + if field_number == 1 + PB.decode!(d, features) + else + PB.skip(d, wire_type) end - __meta_FeatureLists_FeatureListEntry[] end + return Example(features[]) end -function Base.getproperty(obj::FeatureLists_FeatureListEntry, name::Symbol) - if name === :key - return (obj.__protobuf_jl_internal_values[name])::AbstractString - elseif name === :value - return (obj.__protobuf_jl_internal_values[name])::FeatureList - else - getfield(obj, name) - end + +function PB.encode(e::PB.AbstractProtoEncoder, x::Example) + initpos = position(e.io) + !isnothing(x.features) && PB.encode(e, 1, x.features) + return position(e.io) - initpos +end +function PB._encoded_size(x::Example) + encoded_size = 0 + !isnothing(x.features) && (encoded_size += PB._encoded_size(x.features, 1)) + return encoded_size end -mutable struct FeatureLists <: ProtoType - __protobuf_jl_internal_meta::ProtoMeta - __protobuf_jl_internal_values::Dict{Symbol,Any} - __protobuf_jl_internal_defaultset::Set{Symbol} +struct FeatureLists + feature_list::Dict{String,FeatureList} +end +PB.default_values(::Type{FeatureLists}) = (;feature_list = Dict{String,FeatureList}()) +PB.field_numbers(::Type{FeatureLists}) = (;feature_list = 1) - function FeatureLists(; kwargs...) - obj = new(meta(FeatureLists), Dict{Symbol,Any}(), Set{Symbol}()) - values = obj.__protobuf_jl_internal_values - symdict = obj.__protobuf_jl_internal_meta.symdict - for nv in kwargs - fldname, fldval = nv - fldtype = symdict[fldname].jtyp - (fldname in keys(symdict)) || error(string(typeof(obj), " has no field with name ", fldname)) - values[fldname] = isa(fldval, fldtype) ? fldval : convert(fldtype, fldval) +function PB.decode(d::PB.AbstractProtoDecoder, ::Type{<:FeatureLists}) + feature_list = Dict{String,FeatureList}() + while !PB.message_done(d) + field_number, wire_type = PB.decode_tag(d) + if field_number == 1 + PB.decode!(d, feature_list) + else + PB.skip(d, wire_type) end - obj - end -end # mutable struct FeatureLists -const __meta_FeatureLists = Ref{ProtoMeta}() -function meta(::Type{FeatureLists}) - ProtoBuf.metalock() do - if !isassigned(__meta_FeatureLists) - __meta_FeatureLists[] = target = ProtoMeta(FeatureLists) - allflds = Pair{Symbol,Union{Type,String}}[:feature_list => Base.Dict{AbstractString,FeatureList}] - meta(target, FeatureLists, allflds, ProtoBuf.DEF_REQ, ProtoBuf.DEF_FNUM, ProtoBuf.DEF_VAL, ProtoBuf.DEF_PACK, ProtoBuf.DEF_WTYPES, ProtoBuf.DEF_ONEOFS, ProtoBuf.DEF_ONEOF_NAMES) - end - __meta_FeatureLists[] end + return FeatureLists(feature_list) end -function Base.getproperty(obj::FeatureLists, name::Symbol) - if name === :feature_list - return (obj.__protobuf_jl_internal_values[name])::Base.Dict{AbstractString,FeatureList} - else - getfield(obj, name) - end + +function PB.encode(e::PB.AbstractProtoEncoder, x::FeatureLists) + initpos = position(e.io) + !isempty(x.feature_list) && PB.encode(e, 1, x.feature_list) + return position(e.io) - initpos +end +function PB._encoded_size(x::FeatureLists) + encoded_size = 0 + !isempty(x.feature_list) && (encoded_size += PB._encoded_size(x.feature_list, 1)) + return encoded_size end -mutable struct SequenceExample <: ProtoType - __protobuf_jl_internal_meta::ProtoMeta - __protobuf_jl_internal_values::Dict{Symbol,Any} - __protobuf_jl_internal_defaultset::Set{Symbol} +struct SequenceExample + context::Union{Nothing,Features} + feature_lists::Union{Nothing,FeatureLists} +end +PB.default_values(::Type{SequenceExample}) = (;context = nothing, feature_lists = nothing) +PB.field_numbers(::Type{SequenceExample}) = (;context = 1, feature_lists = 2) - function SequenceExample(; kwargs...) - obj = new(meta(SequenceExample), Dict{Symbol,Any}(), Set{Symbol}()) - values = obj.__protobuf_jl_internal_values - symdict = obj.__protobuf_jl_internal_meta.symdict - for nv in kwargs - fldname, fldval = nv - fldtype = symdict[fldname].jtyp - (fldname in keys(symdict)) || error(string(typeof(obj), " has no field with name ", fldname)) - values[fldname] = isa(fldval, fldtype) ? fldval : convert(fldtype, fldval) - end - obj - end -end # mutable struct SequenceExample -const __meta_SequenceExample = Ref{ProtoMeta}() -function meta(::Type{SequenceExample}) - ProtoBuf.metalock() do - if !isassigned(__meta_SequenceExample) - __meta_SequenceExample[] = target = ProtoMeta(SequenceExample) - allflds = Pair{Symbol,Union{Type,String}}[:context => Features, :feature_lists => FeatureLists] - meta(target, SequenceExample, allflds, ProtoBuf.DEF_REQ, ProtoBuf.DEF_FNUM, ProtoBuf.DEF_VAL, ProtoBuf.DEF_PACK, ProtoBuf.DEF_WTYPES, ProtoBuf.DEF_ONEOFS, ProtoBuf.DEF_ONEOF_NAMES) +function PB.decode(d::PB.AbstractProtoDecoder, ::Type{<:SequenceExample}) + context = Ref{Union{Nothing,Features}}(nothing) + feature_lists = Ref{Union{Nothing,FeatureLists}}(nothing) + while !PB.message_done(d) + field_number, wire_type = PB.decode_tag(d) + if field_number == 1 + PB.decode!(d, context) + elseif field_number == 2 + PB.decode!(d, feature_lists) + else + PB.skip(d, wire_type) end - __meta_SequenceExample[] - end -end -function Base.getproperty(obj::SequenceExample, name::Symbol) - if name === :context - return (obj.__protobuf_jl_internal_values[name])::Features - elseif name === :feature_lists - return (obj.__protobuf_jl_internal_values[name])::FeatureLists - else - getfield(obj, name) end + return SequenceExample(context[], feature_lists[]) end -export Example, SequenceExample, BytesList, FloatList, Int64List, Feature, Features_FeatureEntry, Features, FeatureList, FeatureLists_FeatureListEntry, FeatureLists -# mapentries: "FeatureLists_FeatureListEntry" => ("AbstractString", "FeatureList"), "Features_FeatureEntry" => ("AbstractString", "Feature") +function PB.encode(e::PB.AbstractProtoEncoder, x::SequenceExample) + initpos = position(e.io) + !isnothing(x.context) && PB.encode(e, 1, x.context) + !isnothing(x.feature_lists) && PB.encode(e, 2, x.feature_lists) + return position(e.io) - initpos +end +function PB._encoded_size(x::SequenceExample) + encoded_size = 0 + !isnothing(x.context) && (encoded_size += PB._encoded_size(x.context, 1)) + !isnothing(x.feature_lists) && (encoded_size += PB._encoded_size(x.feature_lists, 2)) + return encoded_size +end +end # module diff --git a/test.jl b/test.jl new file mode 100644 index 0000000..f4a303b --- /dev/null +++ b/test.jl @@ -0,0 +1,51 @@ +# using ProtoBuf +using ProtoBuf +# protojl("test.proto", ".", ".") +#%% +using Pkg +ENV["JULIA_REVISE_POLL"]=1 +using Revise +Pkg.activate(".") +using TFRecord +#%% +n = 10 +f1 = rand(Bool, n) +f2 = rand(1:5, n) +f3 = rand(("cat", "dog", "chicken", "horse", "goat"), n) +f4 = rand(Float32, n) + +@run TFRecord.write( + "example.tfrecord", + [ + Dict( + "feature1" => f1[i], + "feature2" => f2[i], + "feature3" => f3[i], + "feature4" => f4[i], + ) + for i in 1:n + ] +) + +ex = convert(TFRecord.Example, Dict( + "feature1" => f1[1], + "feature2" => f2[1], + "feature3" => f3[1], + "feature4" => f4[1], +)) + +ex + +# TFRecord.example_pb.Int64List <: +TFRecord.example_pb.Int64List <: ProtoBuf.OneOf{<:Union{TFRecord.example_pb.BytesList, TFRecord.example_pb.FloatList, TFRecord.example_pb.Int64List}} + +TFRecord.Feature(TFRecord.Int64List([1,2,3])) +ft = TFRecord.Feature(nothing) + +struct MyMessage + oneof_field::Union{Nothing,OneOf{<:Union{Int32,String}}} + end + +OneOf(:option1, 42).name + +MyMessage(option1= 42) \ No newline at end of file diff --git a/test.proto b/test.proto new file mode 100644 index 0000000..b013aa6 --- /dev/null +++ b/test.proto @@ -0,0 +1,8 @@ +syntax = "proto3"; + +message MyMessage { + oneof oneof_field { + int32 option1 = 1; + string option2 = 2; + } +} \ No newline at end of file diff --git a/test_pb.jl b/test_pb.jl new file mode 100644 index 0000000..cefc6b6 --- /dev/null +++ b/test_pb.jl @@ -0,0 +1,56 @@ +# Autogenerated using ProtoBuf.jl v1.0.7 on 2022-10-06T17:40:28.572 +# original file: D:\Aure\Documenti\Github\TFRecord.jl\test.proto (proto3 syntax) + +module test_pb + +import ProtoBuf as PB +using ProtoBuf: OneOf +using EnumX: @enumx + +export MyMessage + +struct MyMessage + oneof_field::Union{Nothing,OneOf{<:Union{Int32,String}}} +end +PB.oneof_field_types(::Type{MyMessage}) = (; + oneof_field = (;option1=Int32, option2=String), +) +PB.default_values(::Type{MyMessage}) = (;option1 = zero(Int32), option2 = "") +PB.field_numbers(::Type{MyMessage}) = (;option1 = 1, option2 = 2) + +function PB.decode(d::PB.AbstractProtoDecoder, ::Type{<:MyMessage}) + oneof_field = nothing + while !PB.message_done(d) + field_number, wire_type = PB.decode_tag(d) + if field_number == 1 + oneof_field = OneOf(:option1, PB.decode(d, Int32)) + elseif field_number == 2 + oneof_field = OneOf(:option2, PB.decode(d, String)) + else + PB.skip(d, wire_type) + end + end + return MyMessage(oneof_field) +end + +function PB.encode(e::PB.AbstractProtoEncoder, x::MyMessage) + initpos = position(e.io) + if isnothing(x.oneof_field); + elseif x.oneof_field.name === :option1 + PB.encode(e, 1, x.oneof_field[]) + elseif x.oneof_field.name === :option2 + PB.encode(e, 2, x.oneof_field[]) + end + return position(e.io) - initpos +end +function PB._encoded_size(x::MyMessage) + encoded_size = 0 + if isnothing(x.oneof_field); + elseif x.oneof_field.name === :option1 + encoded_size += PB._encoded_size(x.oneof_field[], 1) + elseif x.oneof_field.name === :option2 + encoded_size += PB._encoded_size(x.oneof_field[], 2) + end + return encoded_size +end +end # module From dbb70b04eb3d3fc8b7a8f8f547bcedcb3ef73102 Mon Sep 17 00:00:00 2001 From: Aurelio Amerio Date: Thu, 6 Oct 2022 20:30:47 +0200 Subject: [PATCH 2/6] updated TFRecord.jl for julia 1.7+, ProtoBuf v1+ --- .github/workflows/CI.yml | 54 +++++++++++------- .github/workflows/CompatHelper.yml | 13 +---- .github/workflows/TagBot.yml | 6 +- .github/workflows/register.yml | 16 ++++++ LICENSE | 2 +- Project.toml | 14 ++++- src/core.jl | 23 +++++--- test.jl | 51 ----------------- test.proto | 8 --- test/runtests.jl | 89 ++++++++++++++++++++++++++++-- test_pb.jl | 56 ------------------- 11 files changed, 167 insertions(+), 165 deletions(-) create mode 100644 .github/workflows/register.yml delete mode 100644 test.jl delete mode 100644 test.proto delete mode 100644 test_pb.jl diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 7cb55eb..1cffd8f 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -1,12 +1,15 @@ name: CI on: - pull_request: - branches: - - master push: branches: - - master - tags: '*' + - main + tags: ['*'] + pull_request: +concurrency: + # Skip intermediate builds: always. + # Cancel intermediate builds: only if it is a pull request build. + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }} jobs: test: name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} @@ -15,11 +18,11 @@ jobs: fail-fast: false matrix: version: - - '1' # Leave this line unchanged. '1' will automatically expand to the latest stable 1.x release of Julia. + - '1.7' + - '1.8' + - 'nightly' os: - ubuntu-latest - - macOS-latest - - windows-latest arch: - x64 steps: @@ -28,19 +31,30 @@ jobs: with: version: ${{ matrix.version }} arch: ${{ matrix.arch }} - - uses: actions/cache@v1 - env: - cache-name: cache-artifacts - with: - path: ~/.julia/artifacts - key: ${{ runner.os }}-test-${{ env.cache-name }}-${{ hashFiles('**/Project.toml') }} - restore-keys: | - ${{ runner.os }}-test-${{ env.cache-name }}- - ${{ runner.os }}-test- - ${{ runner.os }}- + - uses: julia-actions/cache@v1 - uses: julia-actions/julia-buildpkg@v1 - uses: julia-actions/julia-runtest@v1 - uses: julia-actions/julia-processcoverage@v1 - - uses: codecov/codecov-action@v1 + - uses: codecov/codecov-action@v2 with: - file: lcov.info + files: lcov.info + docs: + name: Documentation + runs-on: ubuntu-latest + permissions: + contents: write + steps: + - uses: actions/checkout@v2 + - uses: julia-actions/setup-julia@v1 + with: + version: '1' + - uses: julia-actions/julia-buildpkg@v1 + - uses: julia-actions/julia-docdeploy@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - run: | + julia --project=docs -e ' + using Documenter: DocMeta, doctest + using MapGen + DocMeta.setdocmeta!(MapGen, :DocTestSetup, :(using MapGen); recursive=true) + doctest(MapGen)' diff --git a/.github/workflows/CompatHelper.yml b/.github/workflows/CompatHelper.yml index 999ebbd..cba9134 100644 --- a/.github/workflows/CompatHelper.yml +++ b/.github/workflows/CompatHelper.yml @@ -1,23 +1,16 @@ name: CompatHelper - on: schedule: - - cron: '00 00 * * *' + - cron: 0 0 * * * workflow_dispatch: - jobs: CompatHelper: - runs-on: ${{ matrix.os }} - strategy: - matrix: - julia-version: [1.2.0] - julia-arch: [x86] - os: [ubuntu-latest] + runs-on: ubuntu-latest steps: - name: Pkg.add("CompatHelper") run: julia -e 'using Pkg; Pkg.add("CompatHelper")' - name: CompatHelper.main() env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - COMPATHELPER_PRIV: ${{ secrets.COMPATHELPER_PRIV}} + COMPATHELPER_PRIV: ${{ secrets.DOCUMENTER_KEY }} run: julia -e 'using CompatHelper; CompatHelper.main()' diff --git a/.github/workflows/TagBot.yml b/.github/workflows/TagBot.yml index 33fd52d..f49313b 100644 --- a/.github/workflows/TagBot.yml +++ b/.github/workflows/TagBot.yml @@ -1,17 +1,15 @@ name: TagBot on: - issue_comment: # THIS BIT IS NEW + issue_comment: types: - created workflow_dispatch: jobs: TagBot: - # THIS 'if' LINE IS NEW if: github.event_name == 'workflow_dispatch' || github.actor == 'JuliaTagBot' - # NOTHING BELOW HAS CHANGED runs-on: ubuntu-latest steps: - uses: JuliaRegistries/TagBot@v1 with: token: ${{ secrets.GITHUB_TOKEN }} - # ssh: ${{ secrets.DOCUMENTER_KEY }} + ssh: ${{ secrets.DOCUMENTER_KEY }} diff --git a/.github/workflows/register.yml b/.github/workflows/register.yml new file mode 100644 index 0000000..5b7cd3b --- /dev/null +++ b/.github/workflows/register.yml @@ -0,0 +1,16 @@ +name: Register Package +on: + workflow_dispatch: + inputs: + version: + description: Version to register or component to bump + required: true +jobs: + register: + runs-on: ubuntu-latest + permissions: + contents: write + steps: + - uses: julia-actions/RegisterAction@latest + with: + token: ${{ secrets.GITHUB_TOKEN }} diff --git a/LICENSE b/LICENSE index 7be0f93..b3a621d 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2020 Jun Tian and contributors +Copyright (c) 2022 Jun Tian and contributors Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/Project.toml b/Project.toml index 0d6ec99..e4da4dd 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "TFRecord" uuid = "841416d8-1a6a-485a-b0fc-1328d0f53d5e" authors = ["Jun Tian and contributors"] -version = "0.4.1" +version = "0.4.2" [deps] BufferedStreams = "e1450e63-4bb3-523b-b2a4-4ffa8c0fd77d" @@ -9,11 +9,19 @@ CRC32c = "8bf52ea8-c179-5cab-976a-9e18b702a9bc" CodecZlib = "944b1d66-785c-5afd-91f1-9de20f533193" EnumX = "4e289a0a-7415-4d19-859d-a7e5c4648b56" MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" -Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" ProtoBuf = "3349acd9-ac6a-5e09-bcdb-63829b23a429" -Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" TranscodingStreams = "3bb67fe8-82b1-5028-8e26-92a6c54297fa" +[compat] +BufferedStreams = "1.1" +CRC32c = "1.8" +CodecZlib = "0.7" +EnumX = "1" +MacroTools = "0.5" +ProtoBuf = "1.0" +TranscodingStreams = "0.9" +julia = "^1.7" + [extras] Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" diff --git a/src/core.jl b/src/core.jl index 148d5f1..b7f4d7c 100644 --- a/src/core.jl +++ b/src/core.jl @@ -30,15 +30,16 @@ byte data[n] uint32 masked_crc32_of_data ``` """ +# TODO check function read_record(io::IO) n = Base.read(io, sizeof(UInt64)) masked_crc32_n = Base.read(io, UInt32) - crc32c(n) == unmask(masked_crc32_n) || error("record corrupted, did you set the correct compression?") + @assert crc32c(n) == unmask(masked_crc32_n) "record corrupted, did you set the correct compression?" data = Base.read(io, Int(reinterpret(UInt64, n)[])) # !!! watch https://github.com/JuliaIO/TranscodingStreams.jl/pull/104 masked_crc32_data = Base.read(io, UInt32) - crc32c(data) == unmask(masked_crc32_data) || error("record corrupted, did you set the correct compression?") - data + @assert crc32c(data) == unmask(masked_crc32_data) "record corrupted, did you set the correct compression?" + return data end """ @@ -72,14 +73,19 @@ function read( open(decompressor_stream(compression), file_name, "r") do io buffered_io = BufferedInputStream(io, bufsize) while !eof(buffered_io) - instance = decode(IOBuffer(read_record(buffered_io)), record_type()) + buff = IOBuffer(read_record(buffered_io)) + d = ProtoDecoder(buff) + instance = decode(d, record_type) put!(ch, instance) + # close(buffered_io) end end end end end + + ##### # TFRecordWriter ##### @@ -102,8 +108,9 @@ for example `100M`. """ function write(s::AbstractString, x; compression=nothing, bufsize=1024*1024) open(compressor_stream(compression), s, "w") do io - buffered_io = BufferedOutputStream(open(s, "w"), bufsize) + buffered_io = BufferedOutputStream(io, bufsize) write(buffered_io, x) + close(buffered_io) end end @@ -113,9 +120,11 @@ function write(io::IO, xs) end end -function write(io::IO, x::BytesList) + +function write(io::IO, x::Example) buff = IOBuffer() - encode(buff, x) + e = ProtoEncoder(buff) + encode(e, x) data_crc = mask(crc32c(seekstart(buff))) data = take!(seekstart(buff)) diff --git a/test.jl b/test.jl deleted file mode 100644 index f4a303b..0000000 --- a/test.jl +++ /dev/null @@ -1,51 +0,0 @@ -# using ProtoBuf -using ProtoBuf -# protojl("test.proto", ".", ".") -#%% -using Pkg -ENV["JULIA_REVISE_POLL"]=1 -using Revise -Pkg.activate(".") -using TFRecord -#%% -n = 10 -f1 = rand(Bool, n) -f2 = rand(1:5, n) -f3 = rand(("cat", "dog", "chicken", "horse", "goat"), n) -f4 = rand(Float32, n) - -@run TFRecord.write( - "example.tfrecord", - [ - Dict( - "feature1" => f1[i], - "feature2" => f2[i], - "feature3" => f3[i], - "feature4" => f4[i], - ) - for i in 1:n - ] -) - -ex = convert(TFRecord.Example, Dict( - "feature1" => f1[1], - "feature2" => f2[1], - "feature3" => f3[1], - "feature4" => f4[1], -)) - -ex - -# TFRecord.example_pb.Int64List <: -TFRecord.example_pb.Int64List <: ProtoBuf.OneOf{<:Union{TFRecord.example_pb.BytesList, TFRecord.example_pb.FloatList, TFRecord.example_pb.Int64List}} - -TFRecord.Feature(TFRecord.Int64List([1,2,3])) -ft = TFRecord.Feature(nothing) - -struct MyMessage - oneof_field::Union{Nothing,OneOf{<:Union{Int32,String}}} - end - -OneOf(:option1, 42).name - -MyMessage(option1= 42) \ No newline at end of file diff --git a/test.proto b/test.proto deleted file mode 100644 index b013aa6..0000000 --- a/test.proto +++ /dev/null @@ -1,8 +0,0 @@ -syntax = "proto3"; - -message MyMessage { - oneof oneof_field { - int32 option1 = 1; - string option2 = 2; - } -} \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 6f3dda0..dc68ba5 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,13 +1,14 @@ using TFRecord using Test -@testset "TFRecord.jl" begin +@testset "no compression" begin n = 10 f1 = rand(Bool, n) f2 = rand(1:5, n) f3 = rand(("cat", "dog", "chicken", "horse", "goat"), n) f4 = rand(Float32, n) + f5 = rand(Float32, n,5) TFRecord.write( "example.tfrecord", @@ -17,6 +18,7 @@ using Test "feature2" => f2[i], "feature3" => f3[i], "feature4" => f4[i], + "feature5" => f5[i,:] ) for i in 1:n ) @@ -25,9 +27,86 @@ using Test reader = TFRecord.read("example.tfrecord") for (i, example) in enumerate(reader) - @test example.features.feature["feature1"].int64_list.value[] == Int(f1[i]) - @test example.features.feature["feature2"].int64_list.value[] == f2[i] - @test String(example.features.feature["feature3"].bytes_list.value[]) == f3[i] - @test example.features.feature["feature4"].float_list.value[] == f4[i] + @assert example.features.feature["feature1"].kind.value.value[1] == Int(f1[i]) + @assert example.features.feature["feature2"].kind.value.value[1] == f2[i] + @assert String(example.features.feature["feature3"].kind.value.value[1]) == f3[i] + @assert example.features.feature["feature4"].kind.value.value[1] == f4[i] + @assert all(example.features.feature["feature5"].kind.value.value .== f5[i,:]) end + sleep(1) + rm("example.tfrecord") +end + +@testset "gzip" begin + + n = 10 + f1 = rand(Bool, n) + f2 = rand(1:5, n) + f3 = rand(("cat", "dog", "chicken", "horse", "goat"), n) + f4 = rand(Float32, n) + f5 = rand(Float32, n,5) + + TFRecord.write( + "example.tfrecord", + ( + Dict( + "feature1" => f1[i], + "feature2" => f2[i], + "feature3" => f3[i], + "feature4" => f4[i], + "feature5" => f5[i,:] + ) + for i in 1:n + ), + compression=:gzip + ) + + reader = TFRecord.read("example.tfrecord", compression=:gzip) + + for (i, example) in enumerate(reader) + @assert example.features.feature["feature1"].kind.value.value[1] == Int(f1[i]) + @assert example.features.feature["feature2"].kind.value.value[1] == f2[i] + @assert String(example.features.feature["feature3"].kind.value.value[1]) == f3[i] + @assert example.features.feature["feature4"].kind.value.value[1] == f4[i] + @assert all(example.features.feature["feature5"].kind.value.value .== f5[i,:]) + end + sleep(1) + rm("example.tfrecord") +end + +@testset "zlib" begin + + n = 10 + f1 = rand(Bool, n) + f2 = rand(1:5, n) + f3 = rand(("cat", "dog", "chicken", "horse", "goat"), n) + f4 = rand(Float32, n) + f5 = rand(Float32, n,5) + + TFRecord.write( + "example.tfrecord", + ( + Dict( + "feature1" => f1[i], + "feature2" => f2[i], + "feature3" => f3[i], + "feature4" => f4[i], + "feature5" => f5[i,:] + ) + for i in 1:n + ), + compression=:zlib + ) + + reader = TFRecord.read("example.tfrecord", compression=:zlib) + + for (i, example) in enumerate(reader) + @assert example.features.feature["feature1"].kind.value.value[1] == Int(f1[i]) + @assert example.features.feature["feature2"].kind.value.value[1] == f2[i] + @assert String(example.features.feature["feature3"].kind.value.value[1]) == f3[i] + @assert example.features.feature["feature4"].kind.value.value[1] == f4[i] + @assert all(example.features.feature["feature5"].kind.value.value .== f5[i,:]) + end + sleep(1) + rm("example.tfrecord") end diff --git a/test_pb.jl b/test_pb.jl deleted file mode 100644 index cefc6b6..0000000 --- a/test_pb.jl +++ /dev/null @@ -1,56 +0,0 @@ -# Autogenerated using ProtoBuf.jl v1.0.7 on 2022-10-06T17:40:28.572 -# original file: D:\Aure\Documenti\Github\TFRecord.jl\test.proto (proto3 syntax) - -module test_pb - -import ProtoBuf as PB -using ProtoBuf: OneOf -using EnumX: @enumx - -export MyMessage - -struct MyMessage - oneof_field::Union{Nothing,OneOf{<:Union{Int32,String}}} -end -PB.oneof_field_types(::Type{MyMessage}) = (; - oneof_field = (;option1=Int32, option2=String), -) -PB.default_values(::Type{MyMessage}) = (;option1 = zero(Int32), option2 = "") -PB.field_numbers(::Type{MyMessage}) = (;option1 = 1, option2 = 2) - -function PB.decode(d::PB.AbstractProtoDecoder, ::Type{<:MyMessage}) - oneof_field = nothing - while !PB.message_done(d) - field_number, wire_type = PB.decode_tag(d) - if field_number == 1 - oneof_field = OneOf(:option1, PB.decode(d, Int32)) - elseif field_number == 2 - oneof_field = OneOf(:option2, PB.decode(d, String)) - else - PB.skip(d, wire_type) - end - end - return MyMessage(oneof_field) -end - -function PB.encode(e::PB.AbstractProtoEncoder, x::MyMessage) - initpos = position(e.io) - if isnothing(x.oneof_field); - elseif x.oneof_field.name === :option1 - PB.encode(e, 1, x.oneof_field[]) - elseif x.oneof_field.name === :option2 - PB.encode(e, 2, x.oneof_field[]) - end - return position(e.io) - initpos -end -function PB._encoded_size(x::MyMessage) - encoded_size = 0 - if isnothing(x.oneof_field); - elseif x.oneof_field.name === :option1 - encoded_size += PB._encoded_size(x.oneof_field[], 1) - elseif x.oneof_field.name === :option2 - encoded_size += PB._encoded_size(x.oneof_field[], 2) - end - return encoded_size -end -end # module From 08e35379d2d2ebb12cd9d816a998cb0b54e8e359 Mon Sep 17 00:00:00 2001 From: Jun Tian Date: Fri, 7 Oct 2022 10:40:27 +0800 Subject: [PATCH 3/6] Update Project.toml --- Project.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/Project.toml b/Project.toml index e4da4dd..ebb5770 100644 --- a/Project.toml +++ b/Project.toml @@ -14,7 +14,6 @@ TranscodingStreams = "3bb67fe8-82b1-5028-8e26-92a6c54297fa" [compat] BufferedStreams = "1.1" -CRC32c = "1.8" CodecZlib = "0.7" EnumX = "1" MacroTools = "0.5" From 866c7191e7eca425adfa63b92d7218fffedf66ae Mon Sep 17 00:00:00 2001 From: Jun Tian Date: Fri, 7 Oct 2022 10:43:54 +0800 Subject: [PATCH 4/6] Update CI.yml --- .github/workflows/CI.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 1cffd8f..1b545ec 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -18,9 +18,10 @@ jobs: fail-fast: false matrix: version: + - '1.6' - '1.7' - '1.8' - - 'nightly' + - '1' os: - ubuntu-latest arch: From fcdae764080391d162931d21d5b54d8ef6032cf8 Mon Sep 17 00:00:00 2001 From: Jun Tian Date: Fri, 7 Oct 2022 10:44:33 +0800 Subject: [PATCH 5/6] Update Project.toml Avoid breaking change --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index ebb5770..03cfe38 100644 --- a/Project.toml +++ b/Project.toml @@ -19,7 +19,7 @@ EnumX = "1" MacroTools = "0.5" ProtoBuf = "1.0" TranscodingStreams = "0.9" -julia = "^1.7" +julia = "1.6" [extras] Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" From 25e18217fa8debac65bc7f25e9dc3d0d395945a4 Mon Sep 17 00:00:00 2001 From: Jun Tian Date: Fri, 7 Oct 2022 10:47:53 +0800 Subject: [PATCH 6/6] Remove docs in CI --- .github/workflows/CI.yml | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 1b545ec..34e7ed5 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -39,23 +39,3 @@ jobs: - uses: codecov/codecov-action@v2 with: files: lcov.info - docs: - name: Documentation - runs-on: ubuntu-latest - permissions: - contents: write - steps: - - uses: actions/checkout@v2 - - uses: julia-actions/setup-julia@v1 - with: - version: '1' - - uses: julia-actions/julia-buildpkg@v1 - - uses: julia-actions/julia-docdeploy@v1 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - run: | - julia --project=docs -e ' - using Documenter: DocMeta, doctest - using MapGen - DocMeta.setdocmeta!(MapGen, :DocTestSetup, :(using MapGen); recursive=true) - doctest(MapGen)'