From 0846c6c596809c8abfeeed12d307c272e1b8c0cd Mon Sep 17 00:00:00 2001 From: ScottPJones Date: Fri, 24 Jan 2020 07:58:48 -0500 Subject: [PATCH] Fix issue on 32-bit systems --- .travis.yml | 2 +- Project.toml | 2 +- appveyor.yml | 2 +- src/MurmurHash3.jl | 23 +++++++++++++++++++++-- test/runtests.jl | 30 +++++++++++++++++------------- 5 files changed, 41 insertions(+), 18 deletions(-) diff --git a/.travis.yml b/.travis.yml index 171083c..c7f358b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,7 +5,7 @@ os: - osx julia: - 1.0 - - 1.1 + - 1.3 - nightly notifications: email: false diff --git a/Project.toml b/Project.toml index ac3467a..9df9e4c 100644 --- a/Project.toml +++ b/Project.toml @@ -4,7 +4,7 @@ keywords = ["Strings", "Hashing"] license = "MIT" name = "MurmurHash3" uuid = "b10b62ed-fbae-5ea5-b934-abaf0477b71d" -version = "1.0.1" +version = "1.0.2" [deps] diff --git a/appveyor.yml b/appveyor.yml index 022d9ed..fbbd6d3 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -1,7 +1,7 @@ environment: matrix: - julia_version: 1.0 - - julia_version: 1 + - julia_version: 1.3 - julia_version: latest platform: diff --git a/src/MurmurHash3.jl b/src/MurmurHash3.jl index 1de8e62..5ba1cf7 100644 --- a/src/MurmurHash3.jl +++ b/src/MurmurHash3.jl @@ -257,7 +257,7 @@ const d2 = 0x1b873593 @inline fmix(h::UInt32) = xor16(xor13(xor16(h) * 0x85ebca6b) * 0xc2b2ae35) -@inline mhblock(h1, k1) = rotl13(xor(h1, rotl15(k1 * d1) * d2))*5 + 0xe6546b64 +@inline mhblock(h1, k1) = u32(rotl13(xor(h1, rotl15(k1 * d1) * d2))*0x00005) + 0xe6546b64 @inline function mhbody(nblocks, pnt, h1) for i = 1:nblocks @@ -269,7 +269,11 @@ end function mmhash32(len, pnt, seed::UInt32) pnt, h1 = mhbody(len >>> 2, reinterpret(Ptr{UInt32}, pnt), seed) - (len & 3) == 0 || (h1 = xor(h1, rotl15(unsafe_load(pnt)) * d1) * d2) + res = len & 3 + if res != 0 + v = unsafe_load(pnt) & ifelse(res==1, 0x000ff, ifelse(res==2, 0x0ffff, 0xffffff)) + h1 = xor(h1, rotl15(v) * d1) * d2 + end fmix(xor(h1, u32(len))) end @@ -319,6 +323,12 @@ end pnt, h1, h2, h3, h4 end +# degenerate case, hash for 0 length strings, based entirely on seed +function mmhash128_4(seed::UInt32) + h = fmix(5*seed)*5 + up32(h) | fmix(4*seed)*4, up32(h) | h +end + function mmhash128_4(len, pnt, seed::UInt32) pnt, h1, h2, h3, h4 = mhbody(len >>> 4, pnt, seed, seed, seed, seed) if (left = len & 15) != 0 @@ -334,8 +344,17 @@ function mmhash128_4(len, pnt, seed::UInt32) mhfin(len, h1, h2, h3, h4) end +import Base.GC: @preserve + +# AbstractString MurmurHash3, converts to UTF-8 on the fly (not optimized yet!) +function mmhash128_4(s::AbstractString, seed::UInt32) + str = string(s) + @preserve str mmhash128_4(sizeof(str), pointer(str), seed) +end + @inline shift_n_32(v, n) = u32(v) << (((n & 7)%UInt)<<3) + @inline function get_utf8(cnt, ch) if ch <= 0x7f cnt + 1, u32(ch) diff --git a/test/runtests.jl b/test/runtests.jl index 82999bb..85f33e1 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -3,25 +3,29 @@ using MurmurHash3 -@static VERSION < v"0.7.0-DEV" ? (using Base.Test) : (using Test) - -_memhash(siz, ptr, seed) = - ccall(Base.memhash, UInt, (Ptr{UInt8}, Csize_t, UInt32), ptr, siz, seed % UInt32) +using Test p1 = SubString("--hello--",3,7) p2 = "hello" +_memhash(siz, ptr) = ccall(Base.memhash, UInt, (Ptr{UInt8}, Csize_t, UInt32), ptr, siz, 0%UInt32) +mh(str::String) = _memhash(sizeof(str), pointer(str)) +mh(str::AbstractString) = mh(string(str)) + mmhash(str::String) = mmhash128_a(sizeof(str), pointer(str), 0%UInt32) -@static if sizeof(Int) == 8 - mmhashc(str::AbstractString) = mmhash128_c(str, 0%UInt32) -else - mmhashc(str::AbstractString) = (s = string(str); mmhash128_c(sizeof(s), pointer(s), 0%UInt32)) -end -memhash(str) = _memhash(sizeof(str), pointer(str), 0%UInt32) +mmhashc(str::AbstractString) = mmhash128_c(str, 0%UInt32) + +mh32(str) = mmhash32(sizeof(str), pointer(str), 0%UInt32) @testset "MurmurHash3" begin @test mmhashc(p1) == mmhash(p2) - @test last(mmhashc(p1)) == memhash(p1) - @test last(mmhashc(p2)) == memhash(p2) - @test last(mmhash(p2)) == memhash(p1) + @static if sizeof(Int) == 8 + @test last(mmhashc(p1)) == mh(p1) + @test last(mmhashc(p2)) == mh(p2) + @test last(mmhash(p2)) == mh(p1) + else + @test mh32(p2) == mh(p2) + end end + +