Skip to content

Commit

Permalink
Merge pull request #26 from aurelio-amerio/master
Browse files Browse the repository at this point in the history
Update TFRecord.jl to work with julia 1.7+ and ProtoBuf v1.0+
  • Loading branch information
findmyway authored Oct 7, 2022
2 parents d043a43 + 25e1821 commit 01ad066
Show file tree
Hide file tree
Showing 11 changed files with 410 additions and 438 deletions.
35 changes: 15 additions & 20 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
name: CI
on:
pull_request:
branches:
- master
push:
branches:
- master
tags: '*'
- main
tags: ['*']
pull_request:
concurrency:
# Skip intermediate builds: always.
# Cancel intermediate builds: only if it is a pull request build.
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }}
jobs:
test:
name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }}
Expand All @@ -15,11 +18,12 @@ jobs:
fail-fast: false
matrix:
version:
- '1' # Leave this line unchanged. '1' will automatically expand to the latest stable 1.x release of Julia.
- '1.6'
- '1.7'
- '1.8'
- '1'
os:
- ubuntu-latest
- macOS-latest
- windows-latest
arch:
- x64
steps:
Expand All @@ -28,19 +32,10 @@ jobs:
with:
version: ${{ matrix.version }}
arch: ${{ matrix.arch }}
- uses: actions/cache@v1
env:
cache-name: cache-artifacts
with:
path: ~/.julia/artifacts
key: ${{ runner.os }}-test-${{ env.cache-name }}-${{ hashFiles('**/Project.toml') }}
restore-keys: |
${{ runner.os }}-test-${{ env.cache-name }}-
${{ runner.os }}-test-
${{ runner.os }}-
- uses: julia-actions/cache@v1
- uses: julia-actions/julia-buildpkg@v1
- uses: julia-actions/julia-runtest@v1
- uses: julia-actions/julia-processcoverage@v1
- uses: codecov/codecov-action@v1
- uses: codecov/codecov-action@v2
with:
file: lcov.info
files: lcov.info
13 changes: 3 additions & 10 deletions .github/workflows/CompatHelper.yml
Original file line number Diff line number Diff line change
@@ -1,23 +1,16 @@
name: CompatHelper

on:
schedule:
- cron: '00 00 * * *'
- cron: 0 0 * * *
workflow_dispatch:

jobs:
CompatHelper:
runs-on: ${{ matrix.os }}
strategy:
matrix:
julia-version: [1.2.0]
julia-arch: [x86]
os: [ubuntu-latest]
runs-on: ubuntu-latest
steps:
- name: Pkg.add("CompatHelper")
run: julia -e 'using Pkg; Pkg.add("CompatHelper")'
- name: CompatHelper.main()
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
COMPATHELPER_PRIV: ${{ secrets.COMPATHELPER_PRIV}}
COMPATHELPER_PRIV: ${{ secrets.DOCUMENTER_KEY }}
run: julia -e 'using CompatHelper; CompatHelper.main()'
6 changes: 2 additions & 4 deletions .github/workflows/TagBot.yml
Original file line number Diff line number Diff line change
@@ -1,17 +1,15 @@
name: TagBot
on:
issue_comment: # THIS BIT IS NEW
issue_comment:
types:
- created
workflow_dispatch:
jobs:
TagBot:
# THIS 'if' LINE IS NEW
if: github.event_name == 'workflow_dispatch' || github.actor == 'JuliaTagBot'
# NOTHING BELOW HAS CHANGED
runs-on: ubuntu-latest
steps:
- uses: JuliaRegistries/TagBot@v1
with:
token: ${{ secrets.GITHUB_TOKEN }}
# ssh: ${{ secrets.DOCUMENTER_KEY }}
ssh: ${{ secrets.DOCUMENTER_KEY }}
16 changes: 16 additions & 0 deletions .github/workflows/register.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
name: Register Package
on:
workflow_dispatch:
inputs:
version:
description: Version to register or component to bump
required: true
jobs:
register:
runs-on: ubuntu-latest
permissions:
contents: write
steps:
- uses: julia-actions/RegisterAction@latest
with:
token: ${{ secrets.GITHUB_TOKEN }}
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
/Manifest.toml

example.tfrecord
example.tfrecord
.vscode/settings.json
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
MIT License

Copyright (c) 2020 Jun Tian <[email protected]> and contributors
Copyright (c) 2022 Jun Tian <[email protected]> and contributors

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
Expand Down
12 changes: 6 additions & 6 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,25 +1,25 @@
name = "TFRecord"
uuid = "841416d8-1a6a-485a-b0fc-1328d0f53d5e"
authors = ["Jun Tian <[email protected]> and contributors"]
version = "0.4.1"
version = "0.4.2"

[deps]
BufferedStreams = "e1450e63-4bb3-523b-b2a4-4ffa8c0fd77d"
CRC32c = "8bf52ea8-c179-5cab-976a-9e18b702a9bc"
CodecZlib = "944b1d66-785c-5afd-91f1-9de20f533193"
EnumX = "4e289a0a-7415-4d19-859d-a7e5c4648b56"
MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
ProtoBuf = "3349acd9-ac6a-5e09-bcdb-63829b23a429"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
TranscodingStreams = "3bb67fe8-82b1-5028-8e26-92a6c54297fa"

[compat]
BufferedStreams = "1.0"
BufferedStreams = "1.1"
CodecZlib = "0.7"
EnumX = "1"
MacroTools = "0.5"
ProtoBuf = "0.10, 0.11"
ProtoBuf = "1.0"
TranscodingStreams = "0.9"
julia = "1.5"
julia = "1.6"

[extras]
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
Expand Down
1 change: 1 addition & 0 deletions src/TFRecord.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
module TFRecord

include("jlout/example_pb.jl")
using .example_pb
include("core.jl")

end
51 changes: 29 additions & 22 deletions src/core.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ using Base.Threads
using CodecZlib
using BufferedStreams
using MacroTools: @forward
using ProtoBuf: ProtoType
using ProtoBuf
using TranscodingStreams: NoopStream

# Ref: https://github.com/tensorflow/tensorflow/blob/295ad2781683835be974faba0a191528d8079768/tensorflow/core/lib/hash/crc32c.h#L50-L59
Expand All @@ -30,15 +30,16 @@ byte data[n]
uint32 masked_crc32_of_data
```
"""
# TODO check
function read_record(io::IO)
n = Base.read(io, sizeof(UInt64))
masked_crc32_n = Base.read(io, UInt32)
crc32c(n) == unmask(masked_crc32_n) || error("record corrupted, did you set the correct compression?")
@assert crc32c(n) == unmask(masked_crc32_n) "record corrupted, did you set the correct compression?"

data = Base.read(io, Int(reinterpret(UInt64, n)[])) # !!! watch https://github.com/JuliaIO/TranscodingStreams.jl/pull/104
masked_crc32_data = Base.read(io, UInt32)
crc32c(data) == unmask(masked_crc32_data) || error("record corrupted, did you set the correct compression?")
data
@assert crc32c(data) == unmask(masked_crc32_data) "record corrupted, did you set the correct compression?"
return data
end

"""
Expand Down Expand Up @@ -72,14 +73,19 @@ function read(
open(decompressor_stream(compression), file_name, "r") do io
buffered_io = BufferedInputStream(io, bufsize)
while !eof(buffered_io)
instance = readproto(IOBuffer(read_record(buffered_io)), record_type())
buff = IOBuffer(read_record(buffered_io))
d = ProtoDecoder(buff)
instance = decode(d, record_type)
put!(ch, instance)
# close(buffered_io)
end
end
end
end
end



#####
# TFRecordWriter
#####
Expand All @@ -102,8 +108,9 @@ for example `100M`.
"""
function write(s::AbstractString, x; compression=nothing, bufsize=1024*1024)
open(compressor_stream(compression), s, "w") do io
buffered_io = BufferedOutputStream(open(s, "w"), bufsize)
buffered_io = BufferedOutputStream(io, bufsize)
write(buffered_io, x)
close(buffered_io)
end
end

Expand All @@ -113,9 +120,11 @@ function write(io::IO, xs)
end
end

function write(io::IO, x::ProtoType)

function write(io::IO, x::Example)
buff = IOBuffer()
writeproto(buff, x)
e = ProtoEncoder(buff)
encode(e, x)

data_crc = mask(crc32c(seekstart(buff)))
data = take!(seekstart(buff))
Expand All @@ -135,23 +144,21 @@ end
# convert
#####

Base.convert(::Type{Feature}, x::Int) = Feature(;int64_list=Int64List(value=[x]))
Base.convert(::Type{Feature}, x::Bool) = Feature(;int64_list=Int64List(value=[Int(x)]))
Base.convert(::Type{Feature}, x::Float32) = Feature(;float_list=FloatList(value=[x]))
Base.convert(::Type{Feature}, x::AbstractString) = Feature(;bytes_list=BytesList(value=[unsafe_wrap(Vector{UInt8}, x)]))
Base.convert(::Type{Feature}, x::Int) = Feature(OneOf(:int64_list,Int64List([x])))
Base.convert(::Type{Feature}, x::Bool) = Feature(OneOf(:int64_list,Int64List([Int(x)])))
Base.convert(::Type{Feature}, x::Float32) = Feature(OneOf(:float_list,FloatList([x])))
Base.convert(::Type{Feature}, x::AbstractString) = Feature(OneOf(:bytes_list,BytesList([unsafe_wrap(Vector{UInt8}, x)])))

Base.convert(::Type{Feature}, x::Vector{Int}) = Feature(;int64_list=Int64List(value=x))
Base.convert(::Type{Feature}, x::Vector{Bool}) = Feature(;int64_list=Int64List(value=convert(Vector{Int}, x)))
Base.convert(::Type{Feature}, x::Vector{Float32}) = Feature(;float_list=FloatList(value=x))
Base.convert(::Type{Feature}, x::Vector{<:AbstractString}) = Feature(;bytes_list=BytesList(value=[unsafe_wrap(Vector{UInt8}, s) for s in x]))
Base.convert(::Type{Feature}, x::Vector{Array{UInt8,1}}) = Feature(;bytes_list=BytesList(value=x))
Base.convert(::Type{Feature}, x::Vector{Int}) = Feature(OneOf(:int64_list,Int64List(x)))
Base.convert(::Type{Feature}, x::Vector{Bool}) = Feature(OneOf(:int64_list,Int64List(convert(Vector{Int}, x))))
Base.convert(::Type{Feature}, x::Vector{Float32}) = Feature(OneOf(:float_list,FloatList(x)))
Base.convert(::Type{Feature}, x::Vector{<:AbstractString}) = Feature(OneOf(:bytes_list,BytesList([unsafe_wrap(Vector{UInt8}, s) for s in x])))
Base.convert(::Type{Feature}, x::Vector{Array{UInt8,1}}) = Feature(OneOf(:bytes_list,BytesList(x)))

Base.convert(::Type{Features}, x::Dict) = Features(;feature=Dict(k=>convert(Feature, v) for (k, v) in x))
Base.convert(::Type{Features}, x::Dict) = Features(Dict(k=>convert(Feature, v) for (k, v) in x))

function Base.convert(::Type{Example}, x::Dict)
d = Example()
d.features = convert(Features, x)
d
return Example(convert(Features, x))
end

# (De)compression
Expand All @@ -178,4 +185,4 @@ function decompressor_stream(compression)
else
throw(ArgumentError("Unsupported decompression method: $compression"))
end
end
end
Loading

2 comments on commit 01ad066

@findmyway
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/69686

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.4.2 -m "<description of version>" 01ad066298091a83ddd1353cf75066add7346906
git push origin v0.4.2

Please sign in to comment.