lithammer · lithammer · Jan 10, 2025 · Jan 5, 2025 · Jan 6, 2025 · Jan 6, 2025
diff --git a/alphabet.go b/alphabet.go
@@ -4,19 +4,19 @@ import (
 	"fmt"
 	"math"
 	"slices"
+	"unicode/utf8"
 )
 
 // DefaultAlphabet is the default alphabet used.
 const (
 	DefaultAlphabet = "23456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz"
-	rune1Max        = 1<<7 - 1
 )
 
 type alphabet struct {
-	chars       []rune
-	len         int64
-	encLen      int64
-	singleBytes bool
+	chars    []rune
+	len      int64
+	encLen   uint8
+	maxBytes uint8
 }
 
 // Remove duplicates and sort it to ensure reproducibility.
@@ -30,16 +30,10 @@ func newAlphabet(s string) alphabet {
 	}
 
 	a := alphabet{
-		chars:       abc,
-		len:         int64(len(abc)),
-		encLen:      int64(math.Ceil(128 / math.Log2(float64(len(abc))))),
-		singleBytes: true,
-	}
-	for _, c := range a.chars {
-		if c > rune1Max {
-			a.singleBytes = false
-			break
-		}
+		chars:    abc,
+		len:      int64(len(abc)),
+		encLen:   uint8(math.Ceil(128 / math.Log2(float64(len(abc))))),
+		maxBytes: uint8(utf8.RuneLen(abc[len(abc)-1])),
 	}
 
 	return a

diff --git a/encoder.go b/encoder.go
@@ -5,7 +5,8 @@ import (
 	"fmt"
 	"math"
 	"math/bits"
-	"strings"
+	"unicode/utf8"
+	"unsafe"
 
 	"github.com/google/uuid"
 )
@@ -34,41 +35,26 @@ func maxPow(b uint64) (d uint64, n int) {
 // Encode encodes uuid.UUID into a string using the most significant bits (MSB)
 // first according to the alphabet.
 func (e encoder) Encode(u uuid.UUID) string {
-	if e.alphabet.singleBytes {
-		return e.encodeSingleBytes(u)
-	}
-	return e.encode(u)
-}
-
-func (e encoder) encodeSingleBytes(u uuid.UUID) string {
 	num := uint128{
 		binary.BigEndian.Uint64(u[8:]),
 		binary.BigEndian.Uint64(u[:8]),
 	}
-	var r uint64
+	if e.alphabet.len == defaultBase && e.alphabet.maxBytes == 1 {
+		return e.defaultEncode(num)
+	}
+	return e.encode(num)
+}
+
+func (e encoder) defaultEncode(num uint128) string { // compiler optimizes a lot of divisions by constant
 	var i int
-	var buf []byte
-	if e.alphabet.len == defaultBase { // compiler optimizations using constants for default base
-		buf = make([]byte, defaultEncLen)
-		for i = defaultEncLen - 1; num.Hi > 0 || num.Lo > 0; {
-			num, r = num.quoRem64(defaultDivisor)
-			for j := 0; j < defaultNDigits && i >= 0; j++ {
-				buf[i] = byte(e.alphabet.chars[r%defaultBase])
-				r /= defaultBase
-				i--
-			}
-		}
-	} else {
-		buf = make([]byte, e.alphabet.encLen)
-		l := uint64(e.alphabet.len)
-		d, n := maxPow(l)
-		for i = int(e.alphabet.encLen - 1); num.Hi > 0 || num.Lo > 0; {
-			num, r = num.quoRem64(d)
-			for j := 0; j < n && i >= 0; j++ {
-				buf[i] = byte(e.alphabet.chars[r%l])
-				r /= l
-				i--
-			}
+	var r uint64
+	var buf [defaultEncLen]byte
+	for i = defaultEncLen - 1; num.Hi > 0 || num.Lo > 0; {
+		num, r = num.quoRem64(defaultDivisor)
+		for j := 0; j < defaultNDigits && i >= 0; j++ {
+			buf[i] = byte(e.alphabet.chars[r%defaultBase])
+			r /= defaultBase
+			i--
 		}
 	}
 	for ; i >= 0; i-- {
@@ -77,43 +63,29 @@ func (e encoder) encodeSingleBytes(u uuid.UUID) string {
 	return string(buf[:])
 }
 
-func (e encoder) encode(u uuid.UUID) string {
-	num := uint128{
-		binary.BigEndian.Uint64(u[8:]),
-		binary.BigEndian.Uint64(u[:8]),
-	}
-	var r uint64
-	var outIndexes []uint64
-	if e.alphabet.len == defaultBase { // compiler optimizations using constants for default base
-		outIndexes = make([]uint64, defaultEncLen) // avoids escaping to heap for base57 when used with constant
-		for i := defaultEncLen - 1; num.Hi > 0 || num.Lo > 0; {
-			num, r = num.quoRem64(defaultDivisor)
-			for j := 0; j < defaultNDigits && i >= 0; j++ {
-				outIndexes[i] = r % defaultBase
-				r /= defaultBase
-				i--
-			}
-		}
-	} else {
-		outIndexes = make([]uint64, e.alphabet.encLen)
-		l := uint64(e.alphabet.len)
-		d, n := maxPow(l)
-		for i := int(e.alphabet.encLen - 1); num.Hi > 0 || num.Lo > 0; {
-			num, r = num.quoRem64(d)
-			for j := 0; j < n && i >= 0; j++ {
-				outIndexes[i] = r % l
-				r /= l
-				i--
-			}
+func (e encoder) encode(num uint128) string {
+	var r, ind uint64
+	i := int(e.alphabet.encLen - 1)
+	buf := make([]byte, int64(e.alphabet.encLen)*int64(e.alphabet.maxBytes))
+	lastPlaced := len(buf)
+	l := uint64(e.alphabet.len)
+	d, n := maxPow(l)
+
+	for num.Hi > 0 || num.Lo > 0 {
+		num, r = num.quoRem64(d)
+		for j := 0; j < n && i >= 0; j++ {
+			r, ind = r/l, r%l
+			c := e.alphabet.chars[ind]
+			lastPlaced -= utf8.EncodeRune(buf[lastPlaced-utf8.RuneLen(c):], c)
+			i--
 		}
 	}
-
-	var sb strings.Builder
-	sb.Grow(int(e.alphabet.encLen))
-	for i := 0; i < int(e.alphabet.encLen); i++ {
-		sb.WriteRune(e.alphabet.chars[outIndexes[i]])
+	firstRuneLen := utf8.RuneLen(e.alphabet.chars[0])
+	for ; i >= 0; i-- {
+		lastPlaced -= utf8.EncodeRune(buf[lastPlaced-firstRuneLen:], e.alphabet.chars[0])
 	}
-	return sb.String()
+	buf = buf[lastPlaced:]
+	return unsafe.String(unsafe.SliceData(buf), len(buf)) // same as in strings.Builder
 }
 
 // Decode decodes a string according to the alphabet into a uuid.UUID. If s is

diff --git a/shortuuid.go b/shortuuid.go
@@ -1,7 +1,9 @@
 package shortuuid
 
 import (
+	"crypto/sha1"
 	"strings"
+	"unsafe"
 
 	"github.com/google/uuid"
 )
@@ -34,11 +36,11 @@ func NewWithNamespace(name string) string {
 	case name == "":
 		u = uuid.New()
 	case hasPrefixCaseInsensitive(name, "https://"):
-		u = uuid.NewSHA1(uuid.NameSpaceURL, []byte(name))
+		u = hashedUUID(uuid.NameSpaceURL, name)
 	case hasPrefixCaseInsensitive(name, "http://"):
-		u = uuid.NewSHA1(uuid.NameSpaceURL, []byte(name))
+		u = hashedUUID(uuid.NameSpaceURL, name)
 	default:
-		u = uuid.NewSHA1(uuid.NameSpaceDNS, []byte(name))
+		u = hashedUUID(uuid.NameSpaceDNS, name)
 	}
 
 	return DefaultEncoder.Encode(u)
@@ -54,3 +56,14 @@ func NewWithAlphabet(abc string) string {
 func hasPrefixCaseInsensitive(s, prefix string) bool {
 	return len(s) >= len(prefix) && strings.EqualFold(s[:len(prefix)], prefix)
 }
+
+func hashedUUID(space uuid.UUID, data string) (u uuid.UUID) {
+	h := sha1.New()
+	h.Write(space[:])                                         //nolint:errcheck
+	h.Write(unsafe.Slice(unsafe.StringData(data), len(data))) //nolint:errcheck
+	s := h.Sum(make([]byte, 0, sha1.Size))
+	copy(u[:], s)
+	u[6] = (u[6] & 0x0f) | uint8((5&0xf)<<4)
+	u[8] = (u[8] & 0x3f) | 0x80 // RFC 4122 variant
+	return u
+}
diff --git a/shortuuid_test.go b/shortuuid_test.go
@@ -128,20 +128,31 @@ var testVector = []struct {
 	{"f9ee01c3-2015-4716-930e-4d5449810833", "nUfojcH2M5j9j3Tk5A8mf7"},
 }
 
-func TestGeneration(t *testing.T) {
-	tests := []string{
-		"",
-		"http://www.example.com/",
-		"HTTP://www.example.com/",
-		"example.com/",
+func TestNewWithNamespace(t *testing.T) {
+	var tests = []struct {
+		name string
+		uuid string
+	}{
+		{"http://www.example.com/", "nzUQAfy7CW4Dd4kzLguPSV"},
+		{"HTTP://www.example.com/", "N9ZezvXJcoXvKzwiNmGYmH"},
+		{"Https://www.example.com/", "jSz34Z6QzADzy93ywucXMv"},
+		{"example.com/", "kueUMiGUbGccYhpZK8Czat"},
+		{"うえおなにぬねのウエオナニヌネノうえおなにぬねのウエオナニヌネノ", "Mp2Q7GQSRYnoDZyCtGttDg"},
+		{"う", "dTbaUbVKrhNkkZKEwZxLqa"},
 	}
-
 	for _, test := range tests {
-		u := NewWithNamespace(test)
-		if len(u) < 20 || len(u) > 24 {
-			t.Errorf("expected %q to be in range [20, 24], got %d", u, len(u))
+		u := NewWithNamespace(test.name)
+
+		if u != test.uuid {
+			t.Errorf("expected %q, got %q", test.uuid, u)
 		}
 	}
+
+	u1 := NewWithNamespace("")
+	u2 := NewWithNamespace("")
+	if u1 == u2 {
+		t.Errorf("NewWithNamespace should generate random uuid with empty namespace")
+	}
 }
 
 func TestEncoding(t *testing.T) {
@@ -212,6 +223,26 @@ func TestNewWithAlphabet_MultipleBytes(t *testing.T) {
 	}
 }
 
+func TestNewWithAlphabet_Short(t *testing.T) {
+	abc := "うえ"
+	enc := encoder{newAlphabet(abc)}
+	u1 := uuid.MustParse("bcee4c4f-cee8-4413-8f10-0f68d75c797b")
+	exp := "えうええええううえええうえええううえううええうううえううええええええううえええうえええうえううううえうううえうううううえううえええうううええええうううえううううううううええええうええうえうううええうえうえええうえうえええうううええええううえうええええうええ"
+	u2 := enc.Encode(u1)
+	if u2 != exp {
+		t.Errorf("expected uuid to be %q, got %q", exp, u2)
+		return
+	}
+	u3, err := enc.Decode(u2)
+	if err != nil {
+		t.Error(err)
+		return
+	}
+	if u1 != u3 {
+		t.Errorf("expected %q, got %q", u1, u3)
+	}
+}
+
 func TestAlphabetCustomLen(t *testing.T) {
 	abc := "21345687654123456"
 	enc := encoder{newAlphabet(abc)}