Skip to content

Commit

Permalink
Merge pull request #63 from anatoly-kussul/optimizations
Browse files Browse the repository at this point in the history
optimizations
  • Loading branch information
lithammer authored Jan 10, 2025
2 parents a58fac9 + f85a570 commit 9e9e14d
Show file tree
Hide file tree
Showing 4 changed files with 103 additions and 93 deletions.
24 changes: 9 additions & 15 deletions alphabet.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,19 @@ import (
"fmt"
"math"
"slices"
"unicode/utf8"
)

// DefaultAlphabet is the default alphabet used.
const (
DefaultAlphabet = "23456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz"
rune1Max = 1<<7 - 1
)

type alphabet struct {
chars []rune
len int64
encLen int64
singleBytes bool
chars []rune
len int64
encLen uint8
maxBytes uint8
}

// Remove duplicates and sort it to ensure reproducibility.
Expand All @@ -30,16 +30,10 @@ func newAlphabet(s string) alphabet {
}

a := alphabet{
chars: abc,
len: int64(len(abc)),
encLen: int64(math.Ceil(128 / math.Log2(float64(len(abc))))),
singleBytes: true,
}
for _, c := range a.chars {
if c > rune1Max {
a.singleBytes = false
break
}
chars: abc,
len: int64(len(abc)),
encLen: uint8(math.Ceil(128 / math.Log2(float64(len(abc))))),
maxBytes: uint8(utf8.RuneLen(abc[len(abc)-1])),
}

return a
Expand Down
102 changes: 37 additions & 65 deletions encoder.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@ import (
"fmt"
"math"
"math/bits"
"strings"
"unicode/utf8"
"unsafe"

"github.com/google/uuid"
)
Expand Down Expand Up @@ -34,41 +35,26 @@ func maxPow(b uint64) (d uint64, n int) {
// Encode encodes uuid.UUID into a string using the most significant bits (MSB)
// first according to the alphabet.
func (e encoder) Encode(u uuid.UUID) string {
if e.alphabet.singleBytes {
return e.encodeSingleBytes(u)
}
return e.encode(u)
}

func (e encoder) encodeSingleBytes(u uuid.UUID) string {
num := uint128{
binary.BigEndian.Uint64(u[8:]),
binary.BigEndian.Uint64(u[:8]),
}
var r uint64
if e.alphabet.len == defaultBase && e.alphabet.maxBytes == 1 {
return e.defaultEncode(num)
}
return e.encode(num)
}

func (e encoder) defaultEncode(num uint128) string { // compiler optimizes a lot of divisions by constant
var i int
var buf []byte
if e.alphabet.len == defaultBase { // compiler optimizations using constants for default base
buf = make([]byte, defaultEncLen)
for i = defaultEncLen - 1; num.Hi > 0 || num.Lo > 0; {
num, r = num.quoRem64(defaultDivisor)
for j := 0; j < defaultNDigits && i >= 0; j++ {
buf[i] = byte(e.alphabet.chars[r%defaultBase])
r /= defaultBase
i--
}
}
} else {
buf = make([]byte, e.alphabet.encLen)
l := uint64(e.alphabet.len)
d, n := maxPow(l)
for i = int(e.alphabet.encLen - 1); num.Hi > 0 || num.Lo > 0; {
num, r = num.quoRem64(d)
for j := 0; j < n && i >= 0; j++ {
buf[i] = byte(e.alphabet.chars[r%l])
r /= l
i--
}
var r uint64
var buf [defaultEncLen]byte
for i = defaultEncLen - 1; num.Hi > 0 || num.Lo > 0; {
num, r = num.quoRem64(defaultDivisor)
for j := 0; j < defaultNDigits && i >= 0; j++ {
buf[i] = byte(e.alphabet.chars[r%defaultBase])
r /= defaultBase
i--
}
}
for ; i >= 0; i-- {
Expand All @@ -77,43 +63,29 @@ func (e encoder) encodeSingleBytes(u uuid.UUID) string {
return string(buf[:])
}

func (e encoder) encode(u uuid.UUID) string {
num := uint128{
binary.BigEndian.Uint64(u[8:]),
binary.BigEndian.Uint64(u[:8]),
}
var r uint64
var outIndexes []uint64
if e.alphabet.len == defaultBase { // compiler optimizations using constants for default base
outIndexes = make([]uint64, defaultEncLen) // avoids escaping to heap for base57 when used with constant
for i := defaultEncLen - 1; num.Hi > 0 || num.Lo > 0; {
num, r = num.quoRem64(defaultDivisor)
for j := 0; j < defaultNDigits && i >= 0; j++ {
outIndexes[i] = r % defaultBase
r /= defaultBase
i--
}
}
} else {
outIndexes = make([]uint64, e.alphabet.encLen)
l := uint64(e.alphabet.len)
d, n := maxPow(l)
for i := int(e.alphabet.encLen - 1); num.Hi > 0 || num.Lo > 0; {
num, r = num.quoRem64(d)
for j := 0; j < n && i >= 0; j++ {
outIndexes[i] = r % l
r /= l
i--
}
func (e encoder) encode(num uint128) string {
var r, ind uint64
i := int(e.alphabet.encLen - 1)
buf := make([]byte, int64(e.alphabet.encLen)*int64(e.alphabet.maxBytes))
lastPlaced := len(buf)
l := uint64(e.alphabet.len)
d, n := maxPow(l)

for num.Hi > 0 || num.Lo > 0 {
num, r = num.quoRem64(d)
for j := 0; j < n && i >= 0; j++ {
r, ind = r/l, r%l
c := e.alphabet.chars[ind]
lastPlaced -= utf8.EncodeRune(buf[lastPlaced-utf8.RuneLen(c):], c)
i--
}
}

var sb strings.Builder
sb.Grow(int(e.alphabet.encLen))
for i := 0; i < int(e.alphabet.encLen); i++ {
sb.WriteRune(e.alphabet.chars[outIndexes[i]])
firstRuneLen := utf8.RuneLen(e.alphabet.chars[0])
for ; i >= 0; i-- {
lastPlaced -= utf8.EncodeRune(buf[lastPlaced-firstRuneLen:], e.alphabet.chars[0])
}
return sb.String()
buf = buf[lastPlaced:]
return unsafe.String(unsafe.SliceData(buf), len(buf)) // same as in strings.Builder
}

// Decode decodes a string according to the alphabet into a uuid.UUID. If s is
Expand Down
19 changes: 16 additions & 3 deletions shortuuid.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
package shortuuid

import (
"crypto/sha1"
"strings"
"unsafe"

"github.com/google/uuid"
)
Expand Down Expand Up @@ -34,11 +36,11 @@ func NewWithNamespace(name string) string {
case name == "":
u = uuid.New()
case hasPrefixCaseInsensitive(name, "https://"):
u = uuid.NewSHA1(uuid.NameSpaceURL, []byte(name))
u = hashedUUID(uuid.NameSpaceURL, name)
case hasPrefixCaseInsensitive(name, "http://"):
u = uuid.NewSHA1(uuid.NameSpaceURL, []byte(name))
u = hashedUUID(uuid.NameSpaceURL, name)
default:
u = uuid.NewSHA1(uuid.NameSpaceDNS, []byte(name))
u = hashedUUID(uuid.NameSpaceDNS, name)
}

return DefaultEncoder.Encode(u)
Expand All @@ -54,3 +56,14 @@ func NewWithAlphabet(abc string) string {
func hasPrefixCaseInsensitive(s, prefix string) bool {
return len(s) >= len(prefix) && strings.EqualFold(s[:len(prefix)], prefix)
}

func hashedUUID(space uuid.UUID, data string) (u uuid.UUID) {
h := sha1.New()
h.Write(space[:]) //nolint:errcheck
h.Write(unsafe.Slice(unsafe.StringData(data), len(data))) //nolint:errcheck
s := h.Sum(make([]byte, 0, sha1.Size))
copy(u[:], s)
u[6] = (u[6] & 0x0f) | uint8((5&0xf)<<4)
u[8] = (u[8] & 0x3f) | 0x80 // RFC 4122 variant
return u
}
51 changes: 41 additions & 10 deletions shortuuid_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,20 +128,31 @@ var testVector = []struct {
{"f9ee01c3-2015-4716-930e-4d5449810833", "nUfojcH2M5j9j3Tk5A8mf7"},
}

func TestGeneration(t *testing.T) {
tests := []string{
"",
"http://www.example.com/",
"HTTP://www.example.com/",
"example.com/",
func TestNewWithNamespace(t *testing.T) {
var tests = []struct {
name string
uuid string
}{
{"http://www.example.com/", "nzUQAfy7CW4Dd4kzLguPSV"},
{"HTTP://www.example.com/", "N9ZezvXJcoXvKzwiNmGYmH"},
{"Https://www.example.com/", "jSz34Z6QzADzy93ywucXMv"},
{"example.com/", "kueUMiGUbGccYhpZK8Czat"},
{"うえおなにぬねのウエオナニヌネノうえおなにぬねのウエオナニヌネノ", "Mp2Q7GQSRYnoDZyCtGttDg"},
{"う", "dTbaUbVKrhNkkZKEwZxLqa"},
}

for _, test := range tests {
u := NewWithNamespace(test)
if len(u) < 20 || len(u) > 24 {
t.Errorf("expected %q to be in range [20, 24], got %d", u, len(u))
u := NewWithNamespace(test.name)

if u != test.uuid {
t.Errorf("expected %q, got %q", test.uuid, u)
}
}

u1 := NewWithNamespace("")
u2 := NewWithNamespace("")
if u1 == u2 {
t.Errorf("NewWithNamespace should generate random uuid with empty namespace")
}
}

func TestEncoding(t *testing.T) {
Expand Down Expand Up @@ -212,6 +223,26 @@ func TestNewWithAlphabet_MultipleBytes(t *testing.T) {
}
}

func TestNewWithAlphabet_Short(t *testing.T) {
abc := "うえ"
enc := encoder{newAlphabet(abc)}
u1 := uuid.MustParse("bcee4c4f-cee8-4413-8f10-0f68d75c797b")
exp := "えうええええううえええうえええううえううええうううえううええええええううえええうえええうえううううえうううえうううううえううえええうううええええうううえううううううううええええうええうえうううええうえうえええうえうえええうううええええううえうええええうええ"
u2 := enc.Encode(u1)
if u2 != exp {
t.Errorf("expected uuid to be %q, got %q", exp, u2)
return
}
u3, err := enc.Decode(u2)
if err != nil {
t.Error(err)
return
}
if u1 != u3 {
t.Errorf("expected %q, got %q", u1, u3)
}
}

func TestAlphabetCustomLen(t *testing.T) {
abc := "21345687654123456"
enc := encoder{newAlphabet(abc)}
Expand Down

0 comments on commit 9e9e14d

Please sign in to comment.