Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

optimizations #63

Merged
merged 11 commits into from
Jan 10, 2025
24 changes: 9 additions & 15 deletions alphabet.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,19 @@ import (
"fmt"
"math"
"slices"
"unicode/utf8"
)

// DefaultAlphabet is the default alphabet used.
const (
DefaultAlphabet = "23456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz"
rune1Max = 1<<7 - 1
)

type alphabet struct {
chars []rune
len int64
encLen int64
singleBytes bool
chars []rune
len int64
encLen uint8
maxBytes uint8
}

// Remove duplicates and sort it to ensure reproducibility.
Expand All @@ -30,16 +30,10 @@ func newAlphabet(s string) alphabet {
}

a := alphabet{
chars: abc,
len: int64(len(abc)),
encLen: int64(math.Ceil(128 / math.Log2(float64(len(abc))))),
singleBytes: true,
}
for _, c := range a.chars {
if c > rune1Max {
a.singleBytes = false
break
}
chars: abc,
len: int64(len(abc)),
encLen: uint8(math.Ceil(128 / math.Log2(float64(len(abc))))),
maxBytes: uint8(utf8.RuneLen(abc[len(abc)-1])),
}

return a
Expand Down
102 changes: 37 additions & 65 deletions encoder.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@ import (
"fmt"
"math"
"math/bits"
"strings"
"unicode/utf8"
"unsafe"

"github.com/google/uuid"
)
Expand Down Expand Up @@ -34,41 +35,26 @@ func maxPow(b uint64) (d uint64, n int) {
// Encode encodes uuid.UUID into a string using the most significant bits (MSB)
// first according to the alphabet.
func (e encoder) Encode(u uuid.UUID) string {
if e.alphabet.singleBytes {
return e.encodeSingleBytes(u)
}
return e.encode(u)
}

func (e encoder) encodeSingleBytes(u uuid.UUID) string {
num := uint128{
binary.BigEndian.Uint64(u[8:]),
binary.BigEndian.Uint64(u[:8]),
}
var r uint64
if e.alphabet.len == defaultBase && e.alphabet.maxBytes == 1 {
return e.defaultEncode(num)
}
return e.encode(num)
}

func (e encoder) defaultEncode(num uint128) string { // compiler optimizes a lot of divisions by constant
var i int
var buf []byte
if e.alphabet.len == defaultBase { // compiler optimizations using constants for default base
buf = make([]byte, defaultEncLen)
for i = defaultEncLen - 1; num.Hi > 0 || num.Lo > 0; {
num, r = num.quoRem64(defaultDivisor)
for j := 0; j < defaultNDigits && i >= 0; j++ {
buf[i] = byte(e.alphabet.chars[r%defaultBase])
r /= defaultBase
i--
}
}
} else {
buf = make([]byte, e.alphabet.encLen)
l := uint64(e.alphabet.len)
d, n := maxPow(l)
for i = int(e.alphabet.encLen - 1); num.Hi > 0 || num.Lo > 0; {
num, r = num.quoRem64(d)
for j := 0; j < n && i >= 0; j++ {
buf[i] = byte(e.alphabet.chars[r%l])
r /= l
i--
}
var r uint64
var buf [defaultEncLen]byte
for i = defaultEncLen - 1; num.Hi > 0 || num.Lo > 0; {
num, r = num.quoRem64(defaultDivisor)
for j := 0; j < defaultNDigits && i >= 0; j++ {
buf[i] = byte(e.alphabet.chars[r%defaultBase])
r /= defaultBase
i--
}
}
for ; i >= 0; i-- {
Expand All @@ -77,43 +63,29 @@ func (e encoder) encodeSingleBytes(u uuid.UUID) string {
return string(buf[:])
}

func (e encoder) encode(u uuid.UUID) string {
num := uint128{
binary.BigEndian.Uint64(u[8:]),
binary.BigEndian.Uint64(u[:8]),
}
var r uint64
var outIndexes []uint64
if e.alphabet.len == defaultBase { // compiler optimizations using constants for default base
outIndexes = make([]uint64, defaultEncLen) // avoids escaping to heap for base57 when used with constant
for i := defaultEncLen - 1; num.Hi > 0 || num.Lo > 0; {
num, r = num.quoRem64(defaultDivisor)
for j := 0; j < defaultNDigits && i >= 0; j++ {
outIndexes[i] = r % defaultBase
r /= defaultBase
i--
}
}
} else {
outIndexes = make([]uint64, e.alphabet.encLen)
l := uint64(e.alphabet.len)
d, n := maxPow(l)
for i := int(e.alphabet.encLen - 1); num.Hi > 0 || num.Lo > 0; {
num, r = num.quoRem64(d)
for j := 0; j < n && i >= 0; j++ {
outIndexes[i] = r % l
r /= l
i--
}
func (e encoder) encode(num uint128) string {
var r, ind uint64
i := int(e.alphabet.encLen - 1)
buf := make([]byte, int64(e.alphabet.encLen)*int64(e.alphabet.maxBytes))
lastPlaced := len(buf)
l := uint64(e.alphabet.len)
d, n := maxPow(l)

for num.Hi > 0 || num.Lo > 0 {
num, r = num.quoRem64(d)
for j := 0; j < n && i >= 0; j++ {
r, ind = r/l, r%l
c := e.alphabet.chars[ind]
lastPlaced -= utf8.EncodeRune(buf[lastPlaced-utf8.RuneLen(c):], c)
i--
}
}

var sb strings.Builder
sb.Grow(int(e.alphabet.encLen))
for i := 0; i < int(e.alphabet.encLen); i++ {
sb.WriteRune(e.alphabet.chars[outIndexes[i]])
firstRuneLen := utf8.RuneLen(e.alphabet.chars[0])
for ; i >= 0; i-- {
lastPlaced -= utf8.EncodeRune(buf[lastPlaced-firstRuneLen:], e.alphabet.chars[0])
}
return sb.String()
buf = buf[lastPlaced:]
return unsafe.String(unsafe.SliceData(buf), len(buf)) // same as in strings.Builder
}

// Decode decodes a string according to the alphabet into a uuid.UUID. If s is
Expand Down
19 changes: 16 additions & 3 deletions shortuuid.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
package shortuuid

import (
"crypto/sha1"
"strings"
"unsafe"

"github.com/google/uuid"
)
Expand Down Expand Up @@ -34,11 +36,11 @@ func NewWithNamespace(name string) string {
case name == "":
u = uuid.New()
case hasPrefixCaseInsensitive(name, "https://"):
u = uuid.NewSHA1(uuid.NameSpaceURL, []byte(name))
u = hashedUUID(uuid.NameSpaceURL, name)
case hasPrefixCaseInsensitive(name, "http://"):
u = uuid.NewSHA1(uuid.NameSpaceURL, []byte(name))
u = hashedUUID(uuid.NameSpaceURL, name)
default:
u = uuid.NewSHA1(uuid.NameSpaceDNS, []byte(name))
u = hashedUUID(uuid.NameSpaceDNS, name)
}

return DefaultEncoder.Encode(u)
Expand All @@ -54,3 +56,14 @@ func NewWithAlphabet(abc string) string {
func hasPrefixCaseInsensitive(s, prefix string) bool {
return len(s) >= len(prefix) && strings.EqualFold(s[:len(prefix)], prefix)
}

func hashedUUID(space uuid.UUID, data string) (u uuid.UUID) {
h := sha1.New()
h.Write(space[:]) //nolint:errcheck
h.Write(unsafe.Slice(unsafe.StringData(data), len(data))) //nolint:errcheck
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I noticed that in google/uuid#181, you're not using unsafe?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, in that PR signature accepts data []byte.
unsafe here is used to read input string as a byte slice without allocating additional objects.
So if that PR is merged and released, it could then be integrated here as:
u = uuid.NewSHA1(uuid.NameSpaceDNS, unsafe.Slice(unsafe.StringData(data), len(data)))
instead of
u = uuid.NewSHA1(uuid.NameSpaceDNS, []byte(data))
as this []byte conversion sometimes adds an additional allocation.

s := h.Sum(make([]byte, 0, sha1.Size))
copy(u[:], s)
u[6] = (u[6] & 0x0f) | uint8((5&0xf)<<4)
u[8] = (u[8] & 0x3f) | 0x80 // RFC 4122 variant
return u
}
51 changes: 41 additions & 10 deletions shortuuid_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,20 +128,31 @@ var testVector = []struct {
{"f9ee01c3-2015-4716-930e-4d5449810833", "nUfojcH2M5j9j3Tk5A8mf7"},
}

func TestGeneration(t *testing.T) {
tests := []string{
"",
"http://www.example.com/",
"HTTP://www.example.com/",
"example.com/",
func TestNewWithNamespace(t *testing.T) {
var tests = []struct {
name string
uuid string
}{
{"http://www.example.com/", "nzUQAfy7CW4Dd4kzLguPSV"},
{"HTTP://www.example.com/", "N9ZezvXJcoXvKzwiNmGYmH"},
{"Https://www.example.com/", "jSz34Z6QzADzy93ywucXMv"},
{"example.com/", "kueUMiGUbGccYhpZK8Czat"},
{"うえおなにぬねのウエオナニヌネノうえおなにぬねのウエオナニヌネノ", "Mp2Q7GQSRYnoDZyCtGttDg"},
{"う", "dTbaUbVKrhNkkZKEwZxLqa"},
}

for _, test := range tests {
u := NewWithNamespace(test)
if len(u) < 20 || len(u) > 24 {
t.Errorf("expected %q to be in range [20, 24], got %d", u, len(u))
u := NewWithNamespace(test.name)

if u != test.uuid {
t.Errorf("expected %q, got %q", test.uuid, u)
}
}

u1 := NewWithNamespace("")
u2 := NewWithNamespace("")
if u1 == u2 {
t.Errorf("NewWithNamespace should generate random uuid with empty namespace")
}
}

func TestEncoding(t *testing.T) {
Expand Down Expand Up @@ -212,6 +223,26 @@ func TestNewWithAlphabet_MultipleBytes(t *testing.T) {
}
}

func TestNewWithAlphabet_Short(t *testing.T) {
abc := "うえ"
enc := encoder{newAlphabet(abc)}
u1 := uuid.MustParse("bcee4c4f-cee8-4413-8f10-0f68d75c797b")
exp := "えうええええううえええうえええううえううええうううえううええええええううえええうえええうえううううえうううえうううううえううえええうううええええうううえううううううううええええうええうえうううええうえうえええうえうえええうううええええううえうええええうええ"
u2 := enc.Encode(u1)
if u2 != exp {
t.Errorf("expected uuid to be %q, got %q", exp, u2)
return
}
u3, err := enc.Decode(u2)
if err != nil {
t.Error(err)
return
}
if u1 != u3 {
t.Errorf("expected %q, got %q", u1, u3)
}
}

func TestAlphabetCustomLen(t *testing.T) {
abc := "21345687654123456"
enc := encoder{newAlphabet(abc)}
Expand Down
Loading