Skip to content

Commit

Permalink
Merge pull request #2 from sensiblecodeio/duncan/improve-build-memory…
Browse files Browse the repository at this point in the history
…-usage

Improve memory usage during map build
  • Loading branch information
phynes-sensiblecode authored Jul 27, 2021
2 parents a4c6370 + a9b6916 commit 99e4718
Showing 1 changed file with 64 additions and 14 deletions.
78 changes: 64 additions & 14 deletions uint32_store.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,25 +29,49 @@ type (
// Get should return the value for the supplied key
Get(string) uint32
}

// uint32Builder is used only during construction
uint32Builder struct {
all [][]byteValue
src Uint32Source
len int
}
)

// NewUint32Store creates from the data supplied in srcMap
func NewUint32Store(srcMap Uint32Source) Uint32Store {
m := Uint32Store{store: make([]byteValue, 1)}
if keys := srcMap.AppendKeys([]string(nil)); len(keys) > 0 {
// NewUint32Store creates from the data supplied in src
func NewUint32Store(src Uint32Source) Uint32Store {
if keys := src.AppendKeys([]string(nil)); len(keys) > 0 {
sort.Strings(keys)
m.makeByteValue(&m.store[0], keys, 0, srcMap)
return Uint32Store{store: uint32Build(keys, src)}
}
return Uint32Store{store: []byteValue{{}}}
}

// uint32Build constructs the map by allocating memory in blocks
// and then copying into the eventual slice at the end. This is
// more efficient than continually using append.
func uint32Build(keys []string, src Uint32Source) []byteValue {
b := uint32Builder{
all: [][]byteValue{make([]byteValue, 1, firstBufSize(len(keys)))},
src: src,
len: 1,
}
b.makeByteValue(&b.all[0][0], keys, 0)
// copy all blocks to one slice
s := make([]byteValue, 0, b.len)
for _, a := range b.all {
s = append(s, a...)
}
return m
return s
}

// makeByteValue will initialise the supplied byteValue for
// the sorted strings in slice a considering bytes at byteIndex in the strings
func (m *Uint32Store) makeByteValue(bv *byteValue, a []string, byteIndex int, srcMap Uint32Source) {
func (b *uint32Builder) makeByteValue(bv *byteValue, a []string, byteIndex int) {
// if there is a string with no more bytes then it is always first because they are sorted
if len(a[0]) == byteIndex {
bv.valid = true
bv.value = srcMap.Get(a[0])
bv.value = b.src.Get(a[0])
a = a[1:]
}
if len(a) == 0 {
Expand All @@ -56,23 +80,49 @@ func (m *Uint32Store) makeByteValue(bv *byteValue, a []string, byteIndex int, sr
bv.nextOffset = a[0][byteIndex] // lowest value for next byte
bv.nextLen = a[len(a)-1][byteIndex] - // highest value for next byte
bv.nextOffset + 1 // minus lowest value +1 = number of possible next bytes
bv.nextLo = uint32(len(m.store)) // first byteValue struct to use

// allocate enough byteValue structs - they default to "not valid"
m.store = append(m.store, make([]byteValue, bv.nextLen)...)
bv.nextLo = uint32(b.len) // first byteValue struct in eventual built slice
next := b.alloc(bv.nextLen) // new byteValues default to "not valid"

for i, n := 0, len(a); i < n; {
// find range of strings starting with the same byte
iSameByteHi := i + 1
for iSameByteHi < n && a[iSameByteHi][byteIndex] == a[i][byteIndex] {
iSameByteHi++
}
nextStoreIndex := bv.nextLo + uint32(a[i][byteIndex]-bv.nextOffset)
m.makeByteValue(&m.store[nextStoreIndex], a[i:iSameByteHi], byteIndex+1, srcMap)
b.makeByteValue(&next[(a[i][byteIndex]-bv.nextOffset)], a[i:iSameByteHi], byteIndex+1)
i = iSameByteHi
}
}

const maxBuildBufSize = 1 << 20

func firstBufSize(mapSize int) int {
size := 1 << 4
for size < mapSize && size < maxBuildBufSize {
size <<= 1
}
return size
}

// alloc will grab space in the current block if available or allocate a new one if not
func (b *uint32Builder) alloc(nByteValues byte) []byteValue {
n := int(nByteValues)
b.len += n
cur := &b.all[len(b.all)-1] // current
curCap, curLen := cap(*cur), len(*cur)
if curCap-curLen >= n { // enough space in current
*cur = (*cur)[: curLen+n : curCap]
return (*cur)[curLen:]
}
newCap := curCap
if newCap < maxBuildBufSize {
newCap *= 2
}
a := make([]byteValue, n, newCap)
b.all = append(b.all, a)
return a
}

// LookupString looks up the supplied string in the map
func (m *Uint32Store) LookupString(s string) (uint32, bool) {
bv := &m.store[0]
Expand Down

0 comments on commit 99e4718

Please sign in to comment.