From 2429fa1d996dbd5676df1afc8573864231c0b922 Mon Sep 17 00:00:00 2001 From: Anatoly Kussul Date: Mon, 6 Jan 2025 00:30:14 +0100 Subject: [PATCH 01/11] some optimizations --- alphabet.go | 36 +++++++---- encoder.go | 148 +++++++++++++++++++++++++--------------------- shortuuid.go | 20 ++++++- shortuuid_test.go | 8 +++ 4 files changed, 130 insertions(+), 82 deletions(-) diff --git a/alphabet.go b/alphabet.go index 4ee3ef4..0874443 100644 --- a/alphabet.go +++ b/alphabet.go @@ -4,19 +4,19 @@ import ( "fmt" "math" "slices" + "unicode/utf8" ) // DefaultAlphabet is the default alphabet used. const ( DefaultAlphabet = "23456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz" - rune1Max = 1<<7 - 1 ) type alphabet struct { - chars []rune - len int64 - encLen int64 - singleBytes bool + chars []rune + len int64 + encLen int64 + maxBytes int64 } // Remove duplicates and sort it to ensure reproducibility. @@ -30,15 +30,27 @@ func newAlphabet(s string) alphabet { } a := alphabet{ - chars: abc, - len: int64(len(abc)), - encLen: int64(math.Ceil(128 / math.Log2(float64(len(abc))))), - singleBytes: true, + chars: abc, + len: int64(len(abc)), + encLen: int64(math.Ceil(128 / math.Log2(float64(len(abc))))), + maxBytes: 1, } for _, c := range a.chars { - if c > rune1Max { - a.singleBytes = false - break + var b int64 + switch i := uint32(c); { + case i <= rune1Max: + b = 1 + case i <= rune2Max: + b = 2 + case i < surrogateMin, surrogateMax < i && i <= rune3Max: + b = 3 + case i > rune3Max && i <= utf8.MaxRune: + b = 4 + default: + b = 3 + } + if b > a.maxBytes { + a.maxBytes = b } } diff --git a/encoder.go b/encoder.go index afc7121..f333bb9 100644 --- a/encoder.go +++ b/encoder.go @@ -3,11 +3,11 @@ package shortuuid import ( "encoding/binary" "fmt" + "github.com/google/uuid" "math" "math/bits" - "strings" - - "github.com/google/uuid" + "unicode/utf8" + "unsafe" ) type encoder struct { @@ -20,6 +20,24 @@ const ( defaultEncLen = 22 defaultNDigits = 10 defaultDivisor = 362033331456891249 // 57^10 + + tx = 0b10000000 + t2 = 0b11000000 + t3 = 0b11100000 + t4 = 0b11110000 + + maskx = 0b00111111 + + rune1Max = 1<<7 - 1 + rune2Max = 1<<11 - 1 + rune3Max = 1<<16 - 1 + + surrogateMin = 0xD800 + surrogateMax = 0xDFFF + + runeErrorByte0 = t3 | (utf8.RuneError >> 12) + runeErrorByte1 = tx | (utf8.RuneError>>6)&maskx + runeErrorByte2 = tx | utf8.RuneError&maskx ) func maxPow(b uint64) (d uint64, n int) { @@ -34,86 +52,82 @@ func maxPow(b uint64) (d uint64, n int) { // Encode encodes uuid.UUID into a string using the most significant bits (MSB) // first according to the alphabet. func (e encoder) Encode(u uuid.UUID) string { - if e.alphabet.singleBytes { - return e.encodeSingleBytes(u) - } - return e.encode(u) -} - -func (e encoder) encodeSingleBytes(u uuid.UUID) string { num := uint128{ binary.BigEndian.Uint64(u[8:]), binary.BigEndian.Uint64(u[:8]), } - var r uint64 + if e.alphabet.len == defaultBase && e.alphabet.maxBytes == 1 { + return e.defaultEncode(num) + } + return e.encode(num) +} + +func (e encoder) defaultEncode(num uint128) string { // compiler optimizes a lot of divisions by constant var i int - var buf []byte - if e.alphabet.len == defaultBase { // compiler optimizations using constants for default base - buf = make([]byte, defaultEncLen) - for i = defaultEncLen - 1; num.Hi > 0 || num.Lo > 0; { - num, r = num.quoRem64(defaultDivisor) - for j := 0; j < defaultNDigits && i >= 0; j++ { - buf[i] = byte(e.alphabet.chars[r%defaultBase]) - r /= defaultBase - i-- - } - } - } else { - buf = make([]byte, e.alphabet.encLen) - l := uint64(e.alphabet.len) - d, n := maxPow(l) - for i = int(e.alphabet.encLen - 1); num.Hi > 0 || num.Lo > 0; { - num, r = num.quoRem64(d) - for j := 0; j < n && i >= 0; j++ { - buf[i] = byte(e.alphabet.chars[r%l]) - r /= l - i-- - } + var r uint64 + buf := make([]byte, defaultEncLen) + for i = defaultEncLen - 1; num.Hi > 0 || num.Lo > 0; { + num, r = num.quoRem64(defaultDivisor) + for j := 0; j < defaultNDigits && i >= 0; j++ { + buf[i] = byte(e.alphabet.chars[r%defaultBase]) + r /= defaultBase + i-- } } for ; i >= 0; i-- { buf[i] = byte(e.alphabet.chars[0]) } - return string(buf[:]) + return string(buf) } -func (e encoder) encode(u uuid.UUID) string { - num := uint128{ - binary.BigEndian.Uint64(u[8:]), - binary.BigEndian.Uint64(u[:8]), - } - var r uint64 - var outIndexes []uint64 - if e.alphabet.len == defaultBase { // compiler optimizations using constants for default base - outIndexes = make([]uint64, defaultEncLen) // avoids escaping to heap for base57 when used with constant - for i := defaultEncLen - 1; num.Hi > 0 || num.Lo > 0; { - num, r = num.quoRem64(defaultDivisor) - for j := 0; j < defaultNDigits && i >= 0; j++ { - outIndexes[i] = r % defaultBase - r /= defaultBase - i-- - } - } - } else { - outIndexes = make([]uint64, e.alphabet.encLen) - l := uint64(e.alphabet.len) - d, n := maxPow(l) - for i := int(e.alphabet.encLen - 1); num.Hi > 0 || num.Lo > 0; { - num, r = num.quoRem64(d) - for j := 0; j < n && i >= 0; j++ { - outIndexes[i] = r % l - r /= l - i-- - } +func (e encoder) encode(num uint128) string { + var r, ind uint64 + i := e.alphabet.encLen - 1 + buf := make([]byte, e.alphabet.encLen*e.alphabet.maxBytes) + curByteInd := len(buf) - 1 + l := uint64(e.alphabet.len) + d, n := maxPow(l) + + for num.Hi > 0 || num.Lo > 0 { + num, r = num.quoRem64(d) + for j := 0; j < n && i >= 0; j++ { + r, ind = r/l, r%l + curByteInd -= placeRuneEndingAt(buf, e.alphabet.chars[ind], curByteInd) + i-- } } + for ; i >= 0; i-- { + curByteInd -= placeRuneEndingAt(buf, e.alphabet.chars[0], curByteInd) + } + return unsafe.String(&buf[curByteInd+1], len(buf)-curByteInd-1) +} - var sb strings.Builder - sb.Grow(int(e.alphabet.encLen)) - for i := 0; i < int(e.alphabet.encLen); i++ { - sb.WriteRune(e.alphabet.chars[outIndexes[i]]) +func placeRuneEndingAt(p []byte, r rune, ind int) int { + switch i := uint32(r); { + case i <= rune1Max: + p[ind] = byte(r) + return 1 + case i <= rune2Max: + p[ind] = tx | byte(r)&maskx + p[ind-1] = t2 | byte(r>>6) + return 2 + case i < surrogateMin, surrogateMax < i && i <= rune3Max: + p[ind] = tx | byte(r)&maskx + p[ind-1] = tx | byte(r>>6)&maskx + p[ind-2] = t3 | byte(r>>12) + return 3 + case i > rune3Max && i <= utf8.MaxRune: + p[ind] = tx | byte(r)&maskx + p[ind-1] = tx | byte(r>>6)&maskx + p[ind-2] = tx | byte(r>>12)&maskx + p[ind-3] = t4 | byte(r>>18) + return 4 + default: + p[ind] = runeErrorByte2 + p[ind-1] = runeErrorByte1 + p[ind-2] = runeErrorByte0 + return 3 } - return sb.String() } // Decode decodes a string according to the alphabet into a uuid.UUID. If s is diff --git a/shortuuid.go b/shortuuid.go index 0c51b9a..87fdb57 100644 --- a/shortuuid.go +++ b/shortuuid.go @@ -1,7 +1,9 @@ package shortuuid import ( + "crypto/sha1" "strings" + "unsafe" "github.com/google/uuid" ) @@ -34,11 +36,11 @@ func NewWithNamespace(name string) string { case name == "": u = uuid.New() case hasPrefixCaseInsensitive(name, "https://"): - u = uuid.NewSHA1(uuid.NameSpaceURL, []byte(name)) + u = hashedUuid(uuid.NameSpaceURL, name) case hasPrefixCaseInsensitive(name, "http://"): - u = uuid.NewSHA1(uuid.NameSpaceURL, []byte(name)) + u = hashedUuid(uuid.NameSpaceURL, name) default: - u = uuid.NewSHA1(uuid.NameSpaceDNS, []byte(name)) + u = hashedUuid(uuid.NameSpaceDNS, name) } return DefaultEncoder.Encode(u) @@ -54,3 +56,15 @@ func NewWithAlphabet(abc string) string { func hasPrefixCaseInsensitive(s, prefix string) bool { return len(s) >= len(prefix) && strings.EqualFold(s[:len(prefix)], prefix) } + +func hashedUuid(space uuid.UUID, data string) (u uuid.UUID) { + h := sha1.New() + h.Write(space[:]) //nolint:errcheck + h.Write(unsafe.Slice(unsafe.StringData(data), len(data))) //nolint:errcheck + buf := make([]byte, 0, sha1.Size) + s := h.Sum(buf) + copy(u[:], s) + u[6] = (u[6] & 0x0f) | uint8((5&0xf)<<4) + u[8] = (u[8] & 0x3f) | 0x80 // RFC 4122 variant + return u +} diff --git a/shortuuid_test.go b/shortuuid_test.go index 105b466..610d33d 100644 --- a/shortuuid_test.go +++ b/shortuuid_test.go @@ -6,6 +6,10 @@ import ( "github.com/google/uuid" ) +func init() { + uuid.EnableRandPool() +} + var testVector = []struct { uuid string shortuuid string @@ -252,6 +256,10 @@ func TestAlphabet_MB(t *testing.T) { } } +func init() { + uuid.EnableRandPool() +} + func BenchmarkUUID(b *testing.B) { for i := 0; i < b.N; i++ { New() From eca5cff1ede07f1778d39ea05e9a7a2a82a1d308 Mon Sep 17 00:00:00 2001 From: Anatoly Kussul Date: Mon, 6 Jan 2025 02:25:23 +0100 Subject: [PATCH 02/11] remove randpool from tests --- shortuuid_test.go | 8 -------- 1 file changed, 8 deletions(-) diff --git a/shortuuid_test.go b/shortuuid_test.go index 610d33d..105b466 100644 --- a/shortuuid_test.go +++ b/shortuuid_test.go @@ -6,10 +6,6 @@ import ( "github.com/google/uuid" ) -func init() { - uuid.EnableRandPool() -} - var testVector = []struct { uuid string shortuuid string @@ -256,10 +252,6 @@ func TestAlphabet_MB(t *testing.T) { } } -func init() { - uuid.EnableRandPool() -} - func BenchmarkUUID(b *testing.B) { for i := 0; i < b.N; i++ { New() From 616238fc8ddac0530f95631aab3ded27d38c4625 Mon Sep 17 00:00:00 2001 From: Anatoly Kussul Date: Mon, 6 Jan 2025 02:29:48 +0100 Subject: [PATCH 03/11] fix unsafe pointer --- encoder.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/encoder.go b/encoder.go index f333bb9..dcdfbb5 100644 --- a/encoder.go +++ b/encoder.go @@ -99,7 +99,8 @@ func (e encoder) encode(num uint128) string { for ; i >= 0; i-- { curByteInd -= placeRuneEndingAt(buf, e.alphabet.chars[0], curByteInd) } - return unsafe.String(&buf[curByteInd+1], len(buf)-curByteInd-1) + buf = buf[curByteInd+1:] + return unsafe.String(unsafe.SliceData(buf), len(buf)) // same as in strings.Builder } func placeRuneEndingAt(p []byte, r rune, ind int) int { From b6e5b1a6f6c1f65984a688acc944a8e2893f0462 Mon Sep 17 00:00:00 2001 From: Anatoly Kussul Date: Mon, 6 Jan 2025 04:19:28 +0100 Subject: [PATCH 04/11] change alphabet.maxBytes to uint8 --- alphabet.go | 4 ++-- encoder.go | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/alphabet.go b/alphabet.go index 0874443..98b59d8 100644 --- a/alphabet.go +++ b/alphabet.go @@ -16,7 +16,7 @@ type alphabet struct { chars []rune len int64 encLen int64 - maxBytes int64 + maxBytes uint8 } // Remove duplicates and sort it to ensure reproducibility. @@ -36,7 +36,7 @@ func newAlphabet(s string) alphabet { maxBytes: 1, } for _, c := range a.chars { - var b int64 + var b uint8 switch i := uint32(c); { case i <= rune1Max: b = 1 diff --git a/encoder.go b/encoder.go index dcdfbb5..aceb332 100644 --- a/encoder.go +++ b/encoder.go @@ -83,7 +83,7 @@ func (e encoder) defaultEncode(num uint128) string { // compiler optimizes a lot func (e encoder) encode(num uint128) string { var r, ind uint64 i := e.alphabet.encLen - 1 - buf := make([]byte, e.alphabet.encLen*e.alphabet.maxBytes) + buf := make([]byte, e.alphabet.encLen*int64(e.alphabet.maxBytes)) curByteInd := len(buf) - 1 l := uint64(e.alphabet.len) d, n := maxPow(l) From cdc336fcfec347b9b8871868e15748a4d76711ef Mon Sep 17 00:00:00 2001 From: Anatoly Kussul Date: Mon, 6 Jan 2025 04:58:49 +0100 Subject: [PATCH 05/11] add strict NewWithNamespace tests --- shortuuid_test.go | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/shortuuid_test.go b/shortuuid_test.go index 105b466..8ef55ae 100644 --- a/shortuuid_test.go +++ b/shortuuid_test.go @@ -128,20 +128,29 @@ var testVector = []struct { {"f9ee01c3-2015-4716-930e-4d5449810833", "nUfojcH2M5j9j3Tk5A8mf7"}, } -func TestGeneration(t *testing.T) { - tests := []string{ - "", - "http://www.example.com/", - "HTTP://www.example.com/", - "example.com/", +func TestNewWithNamespace(t *testing.T) { + var tests = []struct { + name string + uuid string + }{ + {"http://www.example.com/", "nzUQAfy7CW4Dd4kzLguPSV"}, + {"HTTP://www.example.com/", "N9ZezvXJcoXvKzwiNmGYmH"}, + {"Https://www.example.com/", "jSz34Z6QzADzy93ywucXMv"}, + {"example.com/", "kueUMiGUbGccYhpZK8Czat"}, + {"うえおなにぬねのウエオナニヌネノうえおなにぬねのウエオナニヌネノ", "Mp2Q7GQSRYnoDZyCtGttDg"}, + {"う", "dTbaUbVKrhNkkZKEwZxLqa"}, } - for _, test := range tests { - u := NewWithNamespace(test) - if len(u) < 20 || len(u) > 24 { - t.Errorf("expected %q to be in range [20, 24], got %d", u, len(u)) + u := NewWithNamespace(test.name) + + if u != test.uuid { + t.Errorf("expected %q, got %q", test.uuid, u) } } + + if NewWithNamespace("") == NewWithNamespace("") { + t.Errorf("NewWithNamespace should generate random uuid with empty namespace") + } } func TestEncoding(t *testing.T) { From 6298a23f27f345d9d291c3c36efe9322b09fcc92 Mon Sep 17 00:00:00 2001 From: Anatoly Kussul Date: Mon, 6 Jan 2025 05:02:00 +0100 Subject: [PATCH 06/11] fix lint --- shortuuid_test.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/shortuuid_test.go b/shortuuid_test.go index 8ef55ae..18516c0 100644 --- a/shortuuid_test.go +++ b/shortuuid_test.go @@ -148,7 +148,9 @@ func TestNewWithNamespace(t *testing.T) { } } - if NewWithNamespace("") == NewWithNamespace("") { + u1 := NewWithNamespace("") + u2 := NewWithNamespace("") + if u1 == u2 { t.Errorf("NewWithNamespace should generate random uuid with empty namespace") } } From 3786020eba6c69df7cba6fde00f022ade67c9dad Mon Sep 17 00:00:00 2001 From: Anatoly Kussul Date: Mon, 6 Jan 2025 06:25:06 +0100 Subject: [PATCH 07/11] simplify working with runes --- alphabet.go | 21 +++--------------- encoder.go | 63 +++++++++------------------------------------------- shortuuid.go | 8 +++---- 3 files changed, 17 insertions(+), 75 deletions(-) diff --git a/alphabet.go b/alphabet.go index 98b59d8..d788b8a 100644 --- a/alphabet.go +++ b/alphabet.go @@ -15,7 +15,7 @@ const ( type alphabet struct { chars []rune len int64 - encLen int64 + encLen uint8 maxBytes uint8 } @@ -32,26 +32,11 @@ func newAlphabet(s string) alphabet { a := alphabet{ chars: abc, len: int64(len(abc)), - encLen: int64(math.Ceil(128 / math.Log2(float64(len(abc))))), + encLen: uint8(math.Ceil(128 / math.Log2(float64(len(abc))))), maxBytes: 1, } for _, c := range a.chars { - var b uint8 - switch i := uint32(c); { - case i <= rune1Max: - b = 1 - case i <= rune2Max: - b = 2 - case i < surrogateMin, surrogateMax < i && i <= rune3Max: - b = 3 - case i > rune3Max && i <= utf8.MaxRune: - b = 4 - default: - b = 3 - } - if b > a.maxBytes { - a.maxBytes = b - } + a.maxBytes = max(a.maxBytes, uint8(utf8.RuneLen(c))) } return a diff --git a/encoder.go b/encoder.go index aceb332..e3b9199 100644 --- a/encoder.go +++ b/encoder.go @@ -3,11 +3,12 @@ package shortuuid import ( "encoding/binary" "fmt" - "github.com/google/uuid" "math" "math/bits" "unicode/utf8" "unsafe" + + "github.com/google/uuid" ) type encoder struct { @@ -20,24 +21,6 @@ const ( defaultEncLen = 22 defaultNDigits = 10 defaultDivisor = 362033331456891249 // 57^10 - - tx = 0b10000000 - t2 = 0b11000000 - t3 = 0b11100000 - t4 = 0b11110000 - - maskx = 0b00111111 - - rune1Max = 1<<7 - 1 - rune2Max = 1<<11 - 1 - rune3Max = 1<<16 - 1 - - surrogateMin = 0xD800 - surrogateMax = 0xDFFF - - runeErrorByte0 = t3 | (utf8.RuneError >> 12) - runeErrorByte1 = tx | (utf8.RuneError>>6)&maskx - runeErrorByte2 = tx | utf8.RuneError&maskx ) func maxPow(b uint64) (d uint64, n int) { @@ -82,9 +65,9 @@ func (e encoder) defaultEncode(num uint128) string { // compiler optimizes a lot func (e encoder) encode(num uint128) string { var r, ind uint64 - i := e.alphabet.encLen - 1 - buf := make([]byte, e.alphabet.encLen*int64(e.alphabet.maxBytes)) - curByteInd := len(buf) - 1 + i := int(e.alphabet.encLen - 1) + buf := make([]byte, int64(e.alphabet.encLen*e.alphabet.maxBytes)) + lastPlaced := len(buf) l := uint64(e.alphabet.len) d, n := maxPow(l) @@ -92,45 +75,19 @@ func (e encoder) encode(num uint128) string { num, r = num.quoRem64(d) for j := 0; j < n && i >= 0; j++ { r, ind = r/l, r%l - curByteInd -= placeRuneEndingAt(buf, e.alphabet.chars[ind], curByteInd) + c := e.alphabet.chars[ind] + lastPlaced -= utf8.EncodeRune(buf[lastPlaced-utf8.RuneLen(c):], c) i-- } } + firstRuneLen := utf8.RuneLen(e.alphabet.chars[0]) for ; i >= 0; i-- { - curByteInd -= placeRuneEndingAt(buf, e.alphabet.chars[0], curByteInd) + lastPlaced -= utf8.EncodeRune(buf[lastPlaced-firstRuneLen:], e.alphabet.chars[0]) } - buf = buf[curByteInd+1:] + buf = buf[lastPlaced:] return unsafe.String(unsafe.SliceData(buf), len(buf)) // same as in strings.Builder } -func placeRuneEndingAt(p []byte, r rune, ind int) int { - switch i := uint32(r); { - case i <= rune1Max: - p[ind] = byte(r) - return 1 - case i <= rune2Max: - p[ind] = tx | byte(r)&maskx - p[ind-1] = t2 | byte(r>>6) - return 2 - case i < surrogateMin, surrogateMax < i && i <= rune3Max: - p[ind] = tx | byte(r)&maskx - p[ind-1] = tx | byte(r>>6)&maskx - p[ind-2] = t3 | byte(r>>12) - return 3 - case i > rune3Max && i <= utf8.MaxRune: - p[ind] = tx | byte(r)&maskx - p[ind-1] = tx | byte(r>>6)&maskx - p[ind-2] = tx | byte(r>>12)&maskx - p[ind-3] = t4 | byte(r>>18) - return 4 - default: - p[ind] = runeErrorByte2 - p[ind-1] = runeErrorByte1 - p[ind-2] = runeErrorByte0 - return 3 - } -} - // Decode decodes a string according to the alphabet into a uuid.UUID. If s is // too short, its most significant bits (MSB) will be padded with 0 (zero). func (e encoder) Decode(s string) (u uuid.UUID, err error) { diff --git a/shortuuid.go b/shortuuid.go index 87fdb57..ee7bfcd 100644 --- a/shortuuid.go +++ b/shortuuid.go @@ -36,11 +36,11 @@ func NewWithNamespace(name string) string { case name == "": u = uuid.New() case hasPrefixCaseInsensitive(name, "https://"): - u = hashedUuid(uuid.NameSpaceURL, name) + u = hashedUUID(uuid.NameSpaceURL, name) case hasPrefixCaseInsensitive(name, "http://"): - u = hashedUuid(uuid.NameSpaceURL, name) + u = hashedUUID(uuid.NameSpaceURL, name) default: - u = hashedUuid(uuid.NameSpaceDNS, name) + u = hashedUUID(uuid.NameSpaceDNS, name) } return DefaultEncoder.Encode(u) @@ -57,7 +57,7 @@ func hasPrefixCaseInsensitive(s, prefix string) bool { return len(s) >= len(prefix) && strings.EqualFold(s[:len(prefix)], prefix) } -func hashedUuid(space uuid.UUID, data string) (u uuid.UUID) { +func hashedUUID(space uuid.UUID, data string) (u uuid.UUID) { h := sha1.New() h.Write(space[:]) //nolint:errcheck h.Write(unsafe.Slice(unsafe.StringData(data), len(data))) //nolint:errcheck From 74867f459a67475a12e81dbfd9f03d6b729123ce Mon Sep 17 00:00:00 2001 From: Anatoly Kussul Date: Mon, 6 Jan 2025 06:30:41 +0100 Subject: [PATCH 08/11] fix byte len calculation --- encoder.go | 2 +- shortuuid.go | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/encoder.go b/encoder.go index e3b9199..42734d4 100644 --- a/encoder.go +++ b/encoder.go @@ -66,7 +66,7 @@ func (e encoder) defaultEncode(num uint128) string { // compiler optimizes a lot func (e encoder) encode(num uint128) string { var r, ind uint64 i := int(e.alphabet.encLen - 1) - buf := make([]byte, int64(e.alphabet.encLen*e.alphabet.maxBytes)) + buf := make([]byte, int64(e.alphabet.encLen)*int64(e.alphabet.maxBytes)) lastPlaced := len(buf) l := uint64(e.alphabet.len) d, n := maxPow(l) diff --git a/shortuuid.go b/shortuuid.go index ee7bfcd..daad857 100644 --- a/shortuuid.go +++ b/shortuuid.go @@ -61,8 +61,7 @@ func hashedUUID(space uuid.UUID, data string) (u uuid.UUID) { h := sha1.New() h.Write(space[:]) //nolint:errcheck h.Write(unsafe.Slice(unsafe.StringData(data), len(data))) //nolint:errcheck - buf := make([]byte, 0, sha1.Size) - s := h.Sum(buf) + s := h.Sum(make([]byte, 0, sha1.Size)) copy(u[:], s) u[6] = (u[6] & 0x0f) | uint8((5&0xf)<<4) u[8] = (u[8] & 0x3f) | 0x80 // RFC 4122 variant From c958a3cd0d415addb0e7ea92c2e97d88ae6ff0d8 Mon Sep 17 00:00:00 2001 From: Anatoly Kussul Date: Mon, 6 Jan 2025 06:38:01 +0100 Subject: [PATCH 09/11] add test for short alphabet (covers previous buf len fix) --- shortuuid_test.go | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/shortuuid_test.go b/shortuuid_test.go index 18516c0..75afa6f 100644 --- a/shortuuid_test.go +++ b/shortuuid_test.go @@ -223,6 +223,26 @@ func TestNewWithAlphabet_MultipleBytes(t *testing.T) { } } +func TestNewWithAlphabet_Short(t *testing.T) { + abc := "うえ" + enc := encoder{newAlphabet(abc)} + u1 := uuid.MustParse("bcee4c4f-cee8-4413-8f10-0f68d75c797b") + exp := "えうええええううえええうえええううえううええうううえううええええええううえええうえええうえううううえうううえうううううえううえええうううええええうううえううううううううええええうええうえうううええうえうえええうえうえええうううええええううえうええええうええ" + u2 := enc.Encode(u1) + if u2 != exp { + t.Errorf("expected uuid to be %q, got %q", exp, u2) + return + } + u3, err := enc.Decode(u2) + if err != nil { + t.Error(err) + return + } + if u1 != u3 { + t.Errorf("expected %q, got %q", u1, u3) + } +} + func TestAlphabetCustomLen(t *testing.T) { abc := "21345687654123456" enc := encoder{newAlphabet(abc)} From 43645faca246f70251b6d709b9c41e1dd979e55d Mon Sep 17 00:00:00 2001 From: Anatoly Kussul Date: Mon, 6 Jan 2025 07:14:26 +0100 Subject: [PATCH 10/11] optimize maxBytes calculation --- alphabet.go | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/alphabet.go b/alphabet.go index d788b8a..e4ae503 100644 --- a/alphabet.go +++ b/alphabet.go @@ -33,10 +33,7 @@ func newAlphabet(s string) alphabet { chars: abc, len: int64(len(abc)), encLen: uint8(math.Ceil(128 / math.Log2(float64(len(abc))))), - maxBytes: 1, - } - for _, c := range a.chars { - a.maxBytes = max(a.maxBytes, uint8(utf8.RuneLen(c))) + maxBytes: uint8(utf8.RuneLen(abc[len(abc)-1])), } return a From f85a57052141a785b3bd28da0ca77a7a531334a3 Mon Sep 17 00:00:00 2001 From: Anatoly Kussul Date: Thu, 9 Jan 2025 09:08:38 +0100 Subject: [PATCH 11/11] use array in default encode --- encoder.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/encoder.go b/encoder.go index 42734d4..924c1b1 100644 --- a/encoder.go +++ b/encoder.go @@ -48,7 +48,7 @@ func (e encoder) Encode(u uuid.UUID) string { func (e encoder) defaultEncode(num uint128) string { // compiler optimizes a lot of divisions by constant var i int var r uint64 - buf := make([]byte, defaultEncLen) + var buf [defaultEncLen]byte for i = defaultEncLen - 1; num.Hi > 0 || num.Lo > 0; { num, r = num.quoRem64(defaultDivisor) for j := 0; j < defaultNDigits && i >= 0; j++ { @@ -60,7 +60,7 @@ func (e encoder) defaultEncode(num uint128) string { // compiler optimizes a lot for ; i >= 0; i-- { buf[i] = byte(e.alphabet.chars[0]) } - return string(buf) + return string(buf[:]) } func (e encoder) encode(num uint128) string {