Skip to content

Commit ab17948

Browse files
perf: further improve PartSet (backport #2439) (#2469)
Various performance optimizations to `PartSet`, especially related to `Encode` and `Decode` function. Resolves: #2403 --- You can checkout first commit to get benchmark "baseline". ``` go-perftuner benchstat main.txt latest.txt name old time/op new time/op delta PartSetEncodeDecode/dataSize=130857600,partSize=65536/encode-16 633ms ± 3% 294ms ± 8% -53.64% (p=0.000 n=10+10) PartSetEncodeDecode/dataSize=130857600,partSize=65536/decode-16 78.1ms ± 0% 78.3ms ± 2% ~ (p=0.489 n=9+9) PartSetEncodeDecode/dataSize=523430400,partSize=65536/encode-16 2.68s ± 2% 1.26s ± 5% -52.81% (p=0.000 n=10+10) PartSetEncodeDecode/dataSize=523430400,partSize=65536/decode-16 309ms ± 0% 305ms ± 0% ~ (p=0.000 n=9+8) name old alloc/op new alloc/op delta PartSetEncodeDecode/dataSize=130857600,partSize=65536/encode-16 681MB ± 0% 532MB ± 0% -21.89% (p=0.000 n=9+8) PartSetEncodeDecode/dataSize=130857600,partSize=65536/decode-16 4.88MB ± 0% 3.06MB ± 0% -37.32% (p=0.000 n=10+10) PartSetEncodeDecode/dataSize=523430400,partSize=65536/encode-16 2.72GB ± 0% 2.13GB ± 0% -21.90% (p=0.000 n=10+9) PartSetEncodeDecode/dataSize=523430400,partSize=65536/decode-16 19.6MB ± 0% 12.8MB ± 0% -34.57% (p=0.000 n=10+10) name old allocs/op new allocs/op delta PartSetEncodeDecode/dataSize=130857600,partSize=65536/encode-16 62.0k ± 0% 24.1k ± 0% -61.18% (p=0.000 n=10+10) PartSetEncodeDecode/dataSize=130857600,partSize=65536/decode-16 64.1k ± 0% 48.1k ± 0% -24.93% (p=0.000 n=10+8) PartSetEncodeDecode/dataSize=523430400,partSize=65536/encode-16 264k ± 0% 96k ± 0% -63.60% (p=0.000 n=10+9) PartSetEncodeDecode/dataSize=523430400,partSize=65536/decode-16 256k ± 0% 192k ± 0% -24.98% (p=0.000 n=10+10) ```<hr>This is an automatic backport of pull request #2439 done by [Mergify](https://mergify.com). Co-authored-by: Tomasz Zdybał <[email protected]>
1 parent 718cb34 commit ab17948

File tree

3 files changed

+107
-40
lines changed

3 files changed

+107
-40
lines changed

crypto/merkle/proof.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,14 @@ type ProofNode struct {
215215
// starting from a leaf ProofNode.
216216
func (spn *ProofNode) FlattenAunts() [][]byte {
217217
// Nonrecursive impl.
218-
innerHashes := [][]byte{}
218+
219+
// count the length of the path from the root to the leaf.
220+
cnt := 0
221+
for p := spn; p != nil; p = p.Parent {
222+
cnt++
223+
}
224+
225+
innerHashes := make([][]byte, 0, cnt-1)
219226
for spn != nil {
220227
switch {
221228
case spn.Left != nil:

types/part_set.go

Lines changed: 65 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -255,42 +255,83 @@ func NewPartSetFromData(data []byte, partSize uint32) (ops *PartSet, err error)
255255
ops.proofs = make([]merkle.Proof, total)
256256
for i := uint32(0); i < total; i++ {
257257
ops.proofs[i] = *proofs[i]
258-
ops.partsBitArray.SetIndex(int(i), true)
259258
}
259+
ops.partsBitArray.Fill()
260260
ops.count = total
261261
ops.byteSize = int64(len(data))
262262

263263
return ops, nil
264264
}
265265

266+
// newPartSetFromChunks creates a new PartSet from given data chunks, and other data.
267+
func newPartSetFromChunks(chunks [][]byte, root cmtbytes.HexBytes, proofs []*merkle.Proof, partSize int) (*PartSet, error) {
268+
total := len(chunks)
269+
if total != len(proofs) {
270+
return nil, fmt.Errorf("chunks and proofs have different lengths: %d != %d", len(chunks), len(proofs))
271+
}
272+
if root == nil {
273+
return nil, fmt.Errorf("root is nil")
274+
}
275+
276+
// create a new partset using the new parity parts.
277+
ps := NewPartSetFromHeader(PartSetHeader{
278+
Total: uint32(total),
279+
Hash: root,
280+
}, uint32(partSize))
281+
282+
// access ps directly, without mutex, because we know it is not used elsewhere
283+
for i := 0; i < total; i++ {
284+
start := i * partSize
285+
end := start + len(chunks[i])
286+
287+
// Ensure we don't exceed buffer bounds
288+
if end > len(ps.buffer) {
289+
return nil, fmt.Errorf("part data exceeds buffer bounds")
290+
}
291+
292+
copy(ps.buffer[start:end], chunks[i])
293+
ps.proofs[i] = *proofs[i]
294+
}
295+
ps.partsBitArray.Fill()
296+
ps.count = uint32(total)
297+
ps.lastPartSize = len(chunks[total-1])
298+
ps.byteSize = int64(len(ps.buffer))
299+
return ps, nil
300+
}
301+
266302
// Encode Extend erasure encodes the block parts. Only the original parts should be
267303
// provided. The parity data is formed into its own PartSet and returned
268304
// alongside the length of the last part. The length of the last part is
269305
// necessary because the last part may be padded with zeros after decoding. These zeros must be removed before computi
270306
func Encode(ops *PartSet, partSize uint32) (*PartSet, int, error) {
271-
chunks := make([][]byte, 2*ops.Total())
272-
for i := range chunks {
273-
if i < int(ops.Total()) {
274-
chunks[i] = ops.GetPart(i).Bytes.Bytes()
275-
continue
276-
}
277-
chunks[i] = make([]byte, partSize)
307+
total := int(ops.Total())
308+
chunks := make([][]byte, 2*total)
309+
ops.mtx.Lock()
310+
for i := range total {
311+
chunks[i] = ops.getPartBytes(i)
312+
}
313+
ops.mtx.Unlock()
314+
315+
ps := int(partSize)
316+
parityBuffer := make([]byte, total*ps) // allocate once, only slice later
317+
for i := 0; i < total; i++ {
318+
chunks[total+i] = parityBuffer[i*ps : (i+1)*ps]
278319
}
279320

280321
// pad ONLY the last chunk and not the part with zeros if necessary AFTER the root has been generated
281-
lastLen := len(ops.GetPart(int(ops.Total() - 1)).Bytes.Bytes())
322+
lastLen := len(ops.GetPartBytes(total - 1))
282323
if lastLen < int(partSize) {
283324
padded := make([]byte, partSize)
284-
count := copy(padded, chunks[ops.Total()-1])
285-
if count < len(chunks[ops.Total()-1]) {
325+
count := copy(padded, chunks[total-1])
326+
if count < len(chunks[total-1]) {
286327
return nil, 0, fmt.Errorf("copy failed of unpadded part with index %d: %d < %d", ops.Total()-1, count, len(chunks[ops.Total()-1]))
287328
}
288-
chunks[ops.Total()-1] = padded
329+
chunks[total-1] = padded
289330
}
290331

291332
// init an encoder if it is not already initialized using the original
292333
// number of parts.
293-
enc, err := reedsolomon.New(int(ops.Total()), int(ops.Total()))
334+
enc, err := reedsolomon.New(total, total)
294335
if err != nil {
295336
return nil, 0, err
296337
}
@@ -302,26 +343,12 @@ func Encode(ops *PartSet, partSize uint32) (*PartSet, int, error) {
302343
}
303344

304345
// only the parity data is needed for the new partset.
305-
chunks = chunks[ops.Total():]
346+
chunks = chunks[total:]
306347
eroot, eproofs := merkle.ParallelProofsFromByteSlices(chunks)
307348

308-
// create a new partset using the new parity parts.
309-
eps := NewPartSetFromHeader(PartSetHeader{
310-
Total: ops.Total(),
311-
Hash: eroot,
312-
}, partSize)
313-
for i := uint32(0); i < ops.Total(); i++ {
314-
added, err := eps.AddPart(&Part{
315-
Index: i,
316-
Bytes: chunks[i],
317-
Proof: *eproofs[i],
318-
})
319-
if err != nil {
320-
return nil, 0, err
321-
}
322-
if !added {
323-
return nil, 0, fmt.Errorf("couldn't add parity part %d", i)
324-
}
349+
eps, err := newPartSetFromChunks(chunks, eroot, eproofs, ps)
350+
if err != nil {
351+
return nil, 0, err
325352
}
326353
return eps, lastLen, nil
327354
}
@@ -541,9 +568,6 @@ func (ps *PartSet) addPart(part *Part) (bool, error) {
541568
return false, errors.New("nil part")
542569
}
543570

544-
ps.mtx.Lock()
545-
defer ps.mtx.Unlock()
546-
547571
// Invalid part index
548572
if part.Index >= ps.total {
549573
return false, ErrPartSetUnexpectedIndex
@@ -563,18 +587,22 @@ func (ps *PartSet) addPart(part *Part) (bool, error) {
563587
return false, fmt.Errorf("part data exceeds buffer bounds")
564588
}
565589

566-
copy(ps.buffer[start:end], part.Bytes)
590+
ps.mtx.Lock()
567591

592+
copy(ps.buffer[start:end], part.Bytes)
568593
ps.proofs[part.Index] = part.Proof
569594

570595
// Track last part size if this is the last part
571596
if part.Index == ps.total-1 {
572597
ps.lastPartSize = len(part.Bytes)
573598
}
574-
575-
ps.partsBitArray.SetIndex(int(part.Index), true)
576599
ps.count++
577600
ps.byteSize += int64(len(part.Bytes))
601+
602+
ps.mtx.Unlock()
603+
604+
ps.partsBitArray.SetIndex(int(part.Index), true)
605+
578606
return true, nil
579607
}
580608

@@ -632,8 +660,6 @@ func (ps *PartSet) GetPart(index int) *Part {
632660
}
633661

634662
func (ps *PartSet) HasPart(index int) bool {
635-
ps.mtx.Lock()
636-
defer ps.mtx.Unlock()
637663
return ps.partsBitArray.GetIndex(index)
638664
}
639665

types/part_set_test.go

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -441,3 +441,37 @@ func benchPartSetFromData(b *testing.B, data []byte, partSize uint32) (ops *Part
441441

442442
return ops, nil
443443
}
444+
445+
func BenchmarkPartSetEncodeDecode(b *testing.B) {
446+
cases := []struct {
447+
dataSize int
448+
partSize uint32
449+
}{
450+
{MaxBlockSizeBytes, BlockPartSizeBytes},
451+
{4 * MaxBlockSizeBytes, BlockPartSizeBytes},
452+
}
453+
454+
for c := range cases {
455+
b.Run(fmt.Sprintf("dataSize=%d,partSize=%d", cases[c].dataSize, cases[c].partSize), func(b *testing.B) {
456+
data := cmtrand.Bytes(cases[c].dataSize)
457+
ops, err := NewPartSetFromData(data, cases[c].partSize)
458+
require.NoError(b, err)
459+
var eps *PartSet
460+
lastPartLen := 0
461+
b.Run("encode", func(b *testing.B) {
462+
for i := 0; i < b.N; i++ {
463+
b.ReportAllocs()
464+
eps, lastPartLen, err = Encode(ops, cases[c].partSize)
465+
require.NoError(b, err)
466+
}
467+
})
468+
b.Run("decode", func(b *testing.B) {
469+
for i := 0; i < b.N; i++ {
470+
b.ReportAllocs()
471+
_, _, err := Decode(ops, eps, lastPartLen)
472+
require.NoError(b, err)
473+
}
474+
})
475+
})
476+
}
477+
}

0 commit comments

Comments
 (0)