Skip to content

Commit

Permalink
Fixes and cleanups
Browse files Browse the repository at this point in the history
  • Loading branch information
jaakkor2 committed Jan 22, 2024
1 parent f2ff299 commit c787bb6
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 34 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "JMPReader"
uuid = "d9f7e686-cf87-4d12-8d7a-0e9b8c9fba29"
authors = ["Jaakko Ruohio <[email protected]>"]
version = "0.1.4"
version = "0.1.5"

[deps]
CodecZlib = "944b1d66-785c-5afd-91f1-9de20f533193"
Expand Down
83 changes: 50 additions & 33 deletions src/column.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ function column_data(data, info, i::Int)
offset = [0]
columnname = _read_string!(raw, offset, 2)
lenname = length(columnname)
dt1,dt2,dt3,dt4,dt5 = _read_reals!(raw, offset, UInt8, 5)
dt1, dt2, dt3, dt4, dt5 = _read_reals!(raw, offset, UInt8, 5)

# compressed
if dt1 in [0x09, 0x0a]
Expand All @@ -28,44 +28,53 @@ function column_data(data, info, i::Int)
end

# one of Float64, Date, Time, Duration
# dt3 = format width
if dt1 in [0x01, 0x0a]
out = reinterpret(Float64, a[end-8*info.nrows+1:end])
# Float64
if [dt3, dt4] in [
[0x0c, 0x63], [0x0c, 0x43], [0x0d, 0x63], [0x0c, 0x03], [0x0c, 0x59],
[0x0c, 0x60], [0x0c, 0x42], [0x0d, 0x42], [0x01, 0x00], [0x06, 0x42],
[0x09, 0x63]
]
if (dt4 == dt5 && dt4 in [
0x00, 0x03, 0x42, 0x43, 0x59, 0x60, 0x63,
]) ||
dt5 in [0x5e] # fixed dec, dt3=width, dt4=dec

out = replace(out, NaN => missing)
return out
end
# then it is a date, time or duration
out = to_datetime(out)
# Date
if [dt3, dt4] in [
[0x0c, 0x65], [0x0c, 0x6e], [0x0c, 0x6f], [0x0c, 0x70], [0x0c, 0x71],
[0x0c, 0x72], [0x0c, 0x75], [0x0c, 0x76], [0x0c, 0x7a], [0x0c, 0x7f],
[0x0c, 0x88], [0x0c, 0x8b], [0x0a, 0x70], [0x0a, 0x75], [0x14, 0x67],
[0x23, 0x66],
]
if (dt4 == dt5 && dt4 in [
0x65, 0x66, 0x67, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x75, 0x76, 0x7a,
0x7f, 0x88, 0x8b,
]) ||
[dt4, dt5] in [[0x72, 0x65]]

return [ismissing(x) ? missing : Date(x) for x in out]
end
# Time
if [dt3, dt4] in [
[0x16, 0x7e], [0x16, 0x74], [0x13, 0x7d], [0x13, 0x69], [0x17, 0x6a],
[0x16, 0x73], [0x13, 0x77], [0x16, 0x78], [0x13, 0x86], [0x13, 0x87],
[0x13, 0x7b], [0x16, 0x7c], [0x13, 0x6c], [0x13, 0x6d], [0x13, 0x79],
[0x13, 0x82], [0x13, 0x80], [0x13, 0x81], [0x13, 0x89], [0x17, 0x8a],
if dt5 in [0x69, 0x6a, 0x73, 0x74, 0x78, 0x7e, 0x81] && dt4 in [
0x69, 0x6a, 0x6c, 0x6d, 0x73, 0x74, 0x77, 0x78, 0x79, 0x7b, 0x7c,
0x7d, 0x7e, 0x80, 0x81, 0x82, 0x86, 0x87, 0x89, 0x8a,
]
return [ismissing(x) ? missing : DateTime(x) for x in out]

end
# Duration
if [dt3, dt4] in [
[0x0c, 0x85], [0x0e, 0x6c], [0x11, 0x6d], [0x0c, 0x85], [0x0d, 0x84],
[0x0c, 0x83]
if dt4 == dt5 && dt4 in [
0x0c, 0x6b, 0x6c, 0x6d, 0x83, 0x84, 0x85
]
return [ismissing(x) ? missing : DateTime(x) - JMP_STARTDATE for x in out]
end
# Currency
if dt4 == dt5 && dt4 in [0x5f]
# 1,0,13,95,95
@warn("currency not implemented")
end
end
# 1-byte integer
if dt1 == 0xff # custom format?
# 255,0,4,99,1
@warn("one-byte integer not implemented")
end

# character
Expand Down Expand Up @@ -95,21 +104,29 @@ function column_data(data, info, i::Int)
throw(ErrorException("Unknown `widthbytes=$widthbytes`, some offset is wrong somewhere, column i=$i"))
end
else # uncompressed
hasunits = raw[lenname + 13] # used
unknown1 = raw[lenname + 15] # not used, a bit similar to offset3
unknown2 = raw[lenname + 23] # not used
offset1 = raw[lenname + 25] # used
lenunits = raw[lenname + 33]
ofs = lenname + offset1 # offset to width data
if hasunits == 1
offset2 = raw[lenname + lenunits + 43]
ofs += offset2 + 10
# continue after dt1,...,dt5 were read
_read_reals!(raw, offset, UInt8, 5)
hasunits = _read_real!(raw, offset, UInt8)
_read_reals!(raw, offset, UInt8)
n1 = _read_real!(raw, offset, Int64)
if hasunits == 1 && n1 > 0
_read_real!(raw, offset, Int16) # ??
_read_real!(raw, offset, Int64) # some length
label = _read_string!(raw, offset, 4)
_read_real!(raw, offset, UInt32)
end
widthbytes = raw[ofs + 37]
_read_real!(raw, offset, UInt16) # n2 as bytes
n2 = _read_real!(raw, offset, UInt32)
_read_reals!(raw, offset, UInt8, n2)
_read_real!(raw, offset, UInt64) # 8 bytes
widthbytes = _read_real!(raw, offset, UInt8)
maxwidth = _read_real!(raw, offset, UInt32)
if widthbytes == 0x01 # Int8
widths = reinterpret(Int8, raw[ofs + 41 .+ (1:info.nrows)])
widths = _read_reals!(raw, offset, Int8, info.nrows)
elseif widthbytes == 0x02 # Int16
widths = reinterpret(Int16, raw[ofs + 41 .+ (1:2*info.nrows)])
widths = _read_reals!(raw, offset, Int16, info.nrows)
elseif widthbytes == 0x04 # Int32
widths = _read_reals!(raw, offset, Int32, info.nrows)
else
throw(ErrorException("Unknown `widthbytes=$widthbytes`, some offset is wrong somewhere, column i=$i"))
end
Expand All @@ -122,7 +139,7 @@ function column_data(data, info, i::Int)
end

@error("Data type combination `(dt1,dt2,dt3,dt4,dt5)=$dt1,$dt2,$dt3,$dt4,$dt5` not implemented, found in column `$(info.column.names[i])` (i=$i), returning a vector of NaN's")
return fill(NaN, info.ncols)
return fill(NaN, info.nrows)
end

function column_data(data, info, name::Union{String,Regex})
Expand Down
3 changes: 3 additions & 0 deletions src/metadata.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ function metadata(a)
savetime = to_datetime([_read_real!(a, offset, Float64)])[1]
foo3 = _read_real!(a, offset, UInt16) ## 18
buildstring = _read_string!(a, offset, 4)
m = match(r"Version (?<version>.*)$", buildstring)
isnothing(m) && throw(ErrorException("Could not determine JMP version"))
VersionNumber(m["version"]) v"15" || throw(ErrorException("The file is saved with too old JMP version ($(m["version"])). Consider saving it with a more recent version of JMP."))

# brute-force find the offset to column data index
offset = find_column_data_offset(a, ncols)
Expand Down

0 comments on commit c787bb6

Please sign in to comment.