diff --git a/alphabet.go b/alphabet.go index 4ee3ef4..e4ae503 100644 --- a/alphabet.go +++ b/alphabet.go @@ -4,19 +4,19 @@ import ( "fmt" "math" "slices" + "unicode/utf8" ) // DefaultAlphabet is the default alphabet used. const ( DefaultAlphabet = "23456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz" - rune1Max = 1<<7 - 1 ) type alphabet struct { - chars []rune - len int64 - encLen int64 - singleBytes bool + chars []rune + len int64 + encLen uint8 + maxBytes uint8 } // Remove duplicates and sort it to ensure reproducibility. @@ -30,16 +30,10 @@ func newAlphabet(s string) alphabet { } a := alphabet{ - chars: abc, - len: int64(len(abc)), - encLen: int64(math.Ceil(128 / math.Log2(float64(len(abc))))), - singleBytes: true, - } - for _, c := range a.chars { - if c > rune1Max { - a.singleBytes = false - break - } + chars: abc, + len: int64(len(abc)), + encLen: uint8(math.Ceil(128 / math.Log2(float64(len(abc))))), + maxBytes: uint8(utf8.RuneLen(abc[len(abc)-1])), } return a diff --git a/encoder.go b/encoder.go index afc7121..924c1b1 100644 --- a/encoder.go +++ b/encoder.go @@ -5,7 +5,8 @@ import ( "fmt" "math" "math/bits" - "strings" + "unicode/utf8" + "unsafe" "github.com/google/uuid" ) @@ -34,41 +35,26 @@ func maxPow(b uint64) (d uint64, n int) { // Encode encodes uuid.UUID into a string using the most significant bits (MSB) // first according to the alphabet. func (e encoder) Encode(u uuid.UUID) string { - if e.alphabet.singleBytes { - return e.encodeSingleBytes(u) - } - return e.encode(u) -} - -func (e encoder) encodeSingleBytes(u uuid.UUID) string { num := uint128{ binary.BigEndian.Uint64(u[8:]), binary.BigEndian.Uint64(u[:8]), } - var r uint64 + if e.alphabet.len == defaultBase && e.alphabet.maxBytes == 1 { + return e.defaultEncode(num) + } + return e.encode(num) +} + +func (e encoder) defaultEncode(num uint128) string { // compiler optimizes a lot of divisions by constant var i int - var buf []byte - if e.alphabet.len == defaultBase { // compiler optimizations using constants for default base - buf = make([]byte, defaultEncLen) - for i = defaultEncLen - 1; num.Hi > 0 || num.Lo > 0; { - num, r = num.quoRem64(defaultDivisor) - for j := 0; j < defaultNDigits && i >= 0; j++ { - buf[i] = byte(e.alphabet.chars[r%defaultBase]) - r /= defaultBase - i-- - } - } - } else { - buf = make([]byte, e.alphabet.encLen) - l := uint64(e.alphabet.len) - d, n := maxPow(l) - for i = int(e.alphabet.encLen - 1); num.Hi > 0 || num.Lo > 0; { - num, r = num.quoRem64(d) - for j := 0; j < n && i >= 0; j++ { - buf[i] = byte(e.alphabet.chars[r%l]) - r /= l - i-- - } + var r uint64 + var buf [defaultEncLen]byte + for i = defaultEncLen - 1; num.Hi > 0 || num.Lo > 0; { + num, r = num.quoRem64(defaultDivisor) + for j := 0; j < defaultNDigits && i >= 0; j++ { + buf[i] = byte(e.alphabet.chars[r%defaultBase]) + r /= defaultBase + i-- } } for ; i >= 0; i-- { @@ -77,43 +63,29 @@ func (e encoder) encodeSingleBytes(u uuid.UUID) string { return string(buf[:]) } -func (e encoder) encode(u uuid.UUID) string { - num := uint128{ - binary.BigEndian.Uint64(u[8:]), - binary.BigEndian.Uint64(u[:8]), - } - var r uint64 - var outIndexes []uint64 - if e.alphabet.len == defaultBase { // compiler optimizations using constants for default base - outIndexes = make([]uint64, defaultEncLen) // avoids escaping to heap for base57 when used with constant - for i := defaultEncLen - 1; num.Hi > 0 || num.Lo > 0; { - num, r = num.quoRem64(defaultDivisor) - for j := 0; j < defaultNDigits && i >= 0; j++ { - outIndexes[i] = r % defaultBase - r /= defaultBase - i-- - } - } - } else { - outIndexes = make([]uint64, e.alphabet.encLen) - l := uint64(e.alphabet.len) - d, n := maxPow(l) - for i := int(e.alphabet.encLen - 1); num.Hi > 0 || num.Lo > 0; { - num, r = num.quoRem64(d) - for j := 0; j < n && i >= 0; j++ { - outIndexes[i] = r % l - r /= l - i-- - } +func (e encoder) encode(num uint128) string { + var r, ind uint64 + i := int(e.alphabet.encLen - 1) + buf := make([]byte, int64(e.alphabet.encLen)*int64(e.alphabet.maxBytes)) + lastPlaced := len(buf) + l := uint64(e.alphabet.len) + d, n := maxPow(l) + + for num.Hi > 0 || num.Lo > 0 { + num, r = num.quoRem64(d) + for j := 0; j < n && i >= 0; j++ { + r, ind = r/l, r%l + c := e.alphabet.chars[ind] + lastPlaced -= utf8.EncodeRune(buf[lastPlaced-utf8.RuneLen(c):], c) + i-- } } - - var sb strings.Builder - sb.Grow(int(e.alphabet.encLen)) - for i := 0; i < int(e.alphabet.encLen); i++ { - sb.WriteRune(e.alphabet.chars[outIndexes[i]]) + firstRuneLen := utf8.RuneLen(e.alphabet.chars[0]) + for ; i >= 0; i-- { + lastPlaced -= utf8.EncodeRune(buf[lastPlaced-firstRuneLen:], e.alphabet.chars[0]) } - return sb.String() + buf = buf[lastPlaced:] + return unsafe.String(unsafe.SliceData(buf), len(buf)) // same as in strings.Builder } // Decode decodes a string according to the alphabet into a uuid.UUID. If s is diff --git a/shortuuid.go b/shortuuid.go index 0c51b9a..daad857 100644 --- a/shortuuid.go +++ b/shortuuid.go @@ -1,7 +1,9 @@ package shortuuid import ( + "crypto/sha1" "strings" + "unsafe" "github.com/google/uuid" ) @@ -34,11 +36,11 @@ func NewWithNamespace(name string) string { case name == "": u = uuid.New() case hasPrefixCaseInsensitive(name, "https://"): - u = uuid.NewSHA1(uuid.NameSpaceURL, []byte(name)) + u = hashedUUID(uuid.NameSpaceURL, name) case hasPrefixCaseInsensitive(name, "http://"): - u = uuid.NewSHA1(uuid.NameSpaceURL, []byte(name)) + u = hashedUUID(uuid.NameSpaceURL, name) default: - u = uuid.NewSHA1(uuid.NameSpaceDNS, []byte(name)) + u = hashedUUID(uuid.NameSpaceDNS, name) } return DefaultEncoder.Encode(u) @@ -54,3 +56,14 @@ func NewWithAlphabet(abc string) string { func hasPrefixCaseInsensitive(s, prefix string) bool { return len(s) >= len(prefix) && strings.EqualFold(s[:len(prefix)], prefix) } + +func hashedUUID(space uuid.UUID, data string) (u uuid.UUID) { + h := sha1.New() + h.Write(space[:]) //nolint:errcheck + h.Write(unsafe.Slice(unsafe.StringData(data), len(data))) //nolint:errcheck + s := h.Sum(make([]byte, 0, sha1.Size)) + copy(u[:], s) + u[6] = (u[6] & 0x0f) | uint8((5&0xf)<<4) + u[8] = (u[8] & 0x3f) | 0x80 // RFC 4122 variant + return u +} diff --git a/shortuuid_test.go b/shortuuid_test.go index 105b466..75afa6f 100644 --- a/shortuuid_test.go +++ b/shortuuid_test.go @@ -128,20 +128,31 @@ var testVector = []struct { {"f9ee01c3-2015-4716-930e-4d5449810833", "nUfojcH2M5j9j3Tk5A8mf7"}, } -func TestGeneration(t *testing.T) { - tests := []string{ - "", - "http://www.example.com/", - "HTTP://www.example.com/", - "example.com/", +func TestNewWithNamespace(t *testing.T) { + var tests = []struct { + name string + uuid string + }{ + {"http://www.example.com/", "nzUQAfy7CW4Dd4kzLguPSV"}, + {"HTTP://www.example.com/", "N9ZezvXJcoXvKzwiNmGYmH"}, + {"Https://www.example.com/", "jSz34Z6QzADzy93ywucXMv"}, + {"example.com/", "kueUMiGUbGccYhpZK8Czat"}, + {"うえおなにぬねのウエオナニヌネノうえおなにぬねのウエオナニヌネノ", "Mp2Q7GQSRYnoDZyCtGttDg"}, + {"う", "dTbaUbVKrhNkkZKEwZxLqa"}, } - for _, test := range tests { - u := NewWithNamespace(test) - if len(u) < 20 || len(u) > 24 { - t.Errorf("expected %q to be in range [20, 24], got %d", u, len(u)) + u := NewWithNamespace(test.name) + + if u != test.uuid { + t.Errorf("expected %q, got %q", test.uuid, u) } } + + u1 := NewWithNamespace("") + u2 := NewWithNamespace("") + if u1 == u2 { + t.Errorf("NewWithNamespace should generate random uuid with empty namespace") + } } func TestEncoding(t *testing.T) { @@ -212,6 +223,26 @@ func TestNewWithAlphabet_MultipleBytes(t *testing.T) { } } +func TestNewWithAlphabet_Short(t *testing.T) { + abc := "うえ" + enc := encoder{newAlphabet(abc)} + u1 := uuid.MustParse("bcee4c4f-cee8-4413-8f10-0f68d75c797b") + exp := "えうええええううえええうえええううえううええうううえううええええええううえええうえええうえううううえうううえうううううえううえええうううええええうううえううううううううええええうええうえうううええうえうえええうえうえええうううええええううえうええええうええ" + u2 := enc.Encode(u1) + if u2 != exp { + t.Errorf("expected uuid to be %q, got %q", exp, u2) + return + } + u3, err := enc.Decode(u2) + if err != nil { + t.Error(err) + return + } + if u1 != u3 { + t.Errorf("expected %q, got %q", u1, u3) + } +} + func TestAlphabetCustomLen(t *testing.T) { abc := "21345687654123456" enc := encoder{newAlphabet(abc)}