Skip to content

Commit 168995b

Browse files
authored
Merge pull request #3 from holiman/less_alloc
all: less memory use on write to disk
2 parents 3e24bf2 + 298f676 commit 168995b

File tree

5 files changed

+146
-45
lines changed

5 files changed

+146
-45
lines changed

binarymarshaler.go

Lines changed: 59 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,24 @@ import (
1414
"bytes"
1515
"crypto/sha512"
1616
"encoding/binary"
17+
"io"
1718
)
1819

20+
// counter is a utility to count bytes written
21+
type counter struct {
22+
bytes int
23+
}
24+
25+
func (c *counter) Write(p []byte) (n int, err error) {
26+
count := len(p)
27+
c.bytes += count
28+
return count, nil
29+
}
30+
1931
// conforms to encoding.BinaryMarshaler
2032

21-
// marshalled binary layout (Little Endian):
33+
// MarshallToWriter marshalls the filter into the given io.Writer
34+
// Binary layout (Little Endian):
2235
//
2336
// k 1 uint64
2437
// n 1 uint64
@@ -29,59 +42,68 @@ import (
2942
//
3043
// size = (3 + k + (m+63)/64) * 8 bytes
3144
//
32-
33-
func (f *Filter) marshal() (buf *bytes.Buffer,
34-
hash [sha512.Size384]byte,
35-
err error,
36-
) {
45+
func (f *Filter) MarshallToWriter(out io.Writer) (int, [sha512.Size384]byte, error) {
46+
var (
47+
c = &counter{0}
48+
hasher = sha512.New384()
49+
mw = io.MultiWriter(out, hasher, c)
50+
hash [sha512.Size384]byte
51+
)
3752
f.lock.RLock()
3853
defer f.lock.RUnlock()
39-
4054
debug("write bf k=%d n=%d m=%d\n", f.K(), f.n, f.m)
4155

42-
buf = new(bytes.Buffer)
43-
44-
err = binary.Write(buf, binary.LittleEndian, f.K())
45-
if err != nil {
46-
return nil, hash, err
56+
if err := binary.Write(mw, binary.LittleEndian, f.K()); err != nil {
57+
return c.bytes, hash, err
4758
}
48-
49-
err = binary.Write(buf, binary.LittleEndian, f.n)
50-
if err != nil {
51-
return nil, hash, err
59+
if err := binary.Write(mw, binary.LittleEndian, f.n); err != nil {
60+
return c.bytes, hash, err
5261
}
53-
54-
err = binary.Write(buf, binary.LittleEndian, f.m)
55-
if err != nil {
56-
return nil, hash, err
62+
if err := binary.Write(mw, binary.LittleEndian, f.m); err != nil {
63+
return c.bytes, hash, err
5764
}
58-
59-
err = binary.Write(buf, binary.LittleEndian, f.keys)
60-
if err != nil {
61-
return nil, hash, err
65+
if err := binary.Write(mw, binary.LittleEndian, f.keys); err != nil {
66+
return c.bytes, hash, err
6267
}
63-
64-
err = binary.Write(buf, binary.LittleEndian, f.bits)
68+
// Write it in chunks of 5% (but at least 4K). Otherwise, the binary.Write will allocate a
69+
// same-size slice of bytes, doubling the memory usage
70+
var chunkSize = len(f.bits) / 20
71+
if chunkSize < 512 {
72+
chunkSize = 512 // Min 4K bytes (512 uint64s)
73+
}
74+
bs := make([]byte, chunkSize*8)
75+
for start := 0; start < len(f.bits); {
76+
end := start + chunkSize
77+
if end > len(f.bits) {
78+
end = len(f.bits)
79+
}
80+
for i, x := range f.bits[start:end] {
81+
binary.LittleEndian.PutUint64(bs[8*i:], x)
82+
}
83+
if _, err := mw.Write(bs[0 : (end-start)*8]); err != nil {
84+
return c.bytes, hash, err
85+
}
86+
start = end
87+
}
88+
// Now we stop using the multiwriter, pick out the hash of what we've
89+
// written so far, and then write the hash to the output
90+
hashbytes := hasher.Sum(nil)
91+
copy(hash[:], hashbytes[:sha512.Size384])
92+
err := binary.Write(out, binary.LittleEndian, hashbytes)
6593
if err != nil {
66-
return nil, hash, err
94+
debug("bloomfilter.MarshalBinary: Successfully wrote %d byte(s), sha384 %v",
95+
c.bytes, hash)
6796
}
68-
69-
hash = sha512.Sum384(buf.Bytes())
70-
err = binary.Write(buf, binary.LittleEndian, hash)
71-
return buf, hash, err
97+
return c.bytes, hash, err
7298
}
7399

74100
// MarshalBinary converts a Filter into []bytes
75101
func (f *Filter) MarshalBinary() (data []byte, err error) {
76-
buf, hash, err := f.marshal()
102+
buf := new(bytes.Buffer)
103+
_, _, err = f.MarshallToWriter(buf)
77104
if err != nil {
78105
return nil, err
79106
}
80-
81-
debug(
82-
"bloomfilter.MarshalBinary: Successfully wrote %d byte(s), sha384 %v",
83-
buf.Len(), hash,
84-
)
85107
data = buf.Bytes()
86108
return data, nil
87109
}

fileio.go

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -80,12 +80,7 @@ func (f *Filter) WriteTo(w io.Writer) (n int64, err error) {
8080
err = rawW.Close()
8181
}()
8282

83-
content, err := f.MarshalBinary()
84-
if err != nil {
85-
return -1, err
86-
}
87-
88-
intN, err := rawW.Write(content)
83+
intN, _, err := f.MarshallToWriter(rawW)
8984
n = int64(intN)
9085
return n, err
9186
}

fileio_test.go

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@ package bloomfilter
1212

1313
import (
1414
"bytes"
15+
"crypto/sha512"
16+
"fmt"
17+
"math/rand"
18+
"runtime"
1519
"testing"
1620
)
1721

@@ -38,3 +42,83 @@ func TestWriteRead(t *testing.T) {
3842
t.Error("Filters not equal")
3943
}
4044
}
45+
46+
func bToMb(b uint64) uint64 {
47+
return b / 1024 / 1024
48+
}
49+
func PrintMemUsage() {
50+
var m runtime.MemStats
51+
runtime.ReadMemStats(&m)
52+
// For info on each, see: https://golang.org/pkg/runtime/#MemStats
53+
fmt.Printf("Alloc = %v MiB", bToMb(m.Alloc))
54+
fmt.Printf("\tTotalAlloc = %v MiB", bToMb(m.TotalAlloc))
55+
fmt.Printf("\tSys = %v MiB", bToMb(m.Sys))
56+
fmt.Printf("\tNumGC = %v\n", m.NumGC)
57+
}
58+
59+
func totAllocMb() uint64 {
60+
var m runtime.MemStats
61+
runtime.ReadMemStats(&m)
62+
return bToMb(m.TotalAlloc)
63+
}
64+
65+
type devnull struct{}
66+
func (d devnull) Write(p []byte) (n int, err error) {
67+
return len(p), nil
68+
}
69+
70+
func TestWrite(t *testing.T) {
71+
// 1Mb
72+
f, _ := New(4*8*1024*1024, 1)
73+
fmt.Printf("Allocated 1mb filter\n")
74+
PrintMemUsage()
75+
_, _ = f.WriteTo(devnull{})
76+
fmt.Printf("Wrote filter to devnull\n")
77+
PrintMemUsage()
78+
}
79+
80+
// fillRandom fills the filter with N random values, where N is roughly half
81+
// the size of the number of uint64's in the filter
82+
func fillRandom(f *Filter) {
83+
num := len(f.bits) * 4
84+
for i := 0; i < num; i++ {
85+
f.AddHash(uint64(rand.Int63()))
86+
}
87+
}
88+
89+
// TestMarshaller tests that it writes outputs correctly.
90+
func TestMarshaller(t *testing.T) {
91+
92+
h1 := sha512.New384()
93+
h2 := sha512.New384()
94+
95+
f, _ := New(1*8*1024*1024, 1)
96+
fillRandom(f)
97+
// Marshall using writer
98+
f.MarshallToWriter(h1)
99+
// Marshall as a blob
100+
data, _ := f.MarshalBinary()
101+
h2.Write(data)
102+
103+
if have, want := h1.Sum(nil), h2.Sum(nil); !bytes.Equal(have, want) {
104+
t.Errorf("Marshalling error, have %x want %x", have, want)
105+
}
106+
}
107+
108+
func BenchmarkWrite1Mb(b *testing.B) {
109+
110+
// 1Mb
111+
f, _ := New(1*8*1024*1024, 1)
112+
f.Add(hashableUint64(0))
113+
f.Add(hashableUint64(1))
114+
f.Add(hashableUint64(1 << 3))
115+
f.Add(hashableUint64(1 << 40))
116+
f.Add(hashableUint64(1 << 23))
117+
f.Add(hashableUint64(1 << 16))
118+
f.Add(hashableUint64(1 << 28))
119+
120+
b.ReportAllocs()
121+
for i := 0; i < b.N; i++ {
122+
_, _ = f.WriteTo(devnull{})
123+
}
124+
}

textmarshaler.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ func (f *Filter) MarshalText() (text []byte, err error) {
3434
s += fmt.Sprintf(bitsFormat, w) + nl()
3535
}
3636

37-
_, hash, err := f.marshal()
37+
_, hash, err := f.MarshallToWriter(devnull{})
3838
if err != nil {
3939
return nil, err
4040
}

textunmarshaler.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ func unmarshalAndCheckTextHash(r io.Reader, f *Filter) (err error) {
7878
}
7979
}
8080

81-
_, expectedHash, err := f.marshal()
81+
_, expectedHash, err := f.MarshallToWriter(devnull{})
8282
if err != nil {
8383
return err
8484
}

0 commit comments

Comments
 (0)