From 5584146709737503c779300ff6d96e88157d93cb Mon Sep 17 00:00:00 2001 From: UlyanaAndrukhiv Date: Thu, 15 Aug 2024 13:38:07 +0300 Subject: [PATCH 01/31] Added new cli flags to access and observer nodes --- storage/pebble/registers_pruner.go | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 storage/pebble/registers_pruner.go diff --git a/storage/pebble/registers_pruner.go b/storage/pebble/registers_pruner.go new file mode 100644 index 00000000000..b97811bf8d3 --- /dev/null +++ b/storage/pebble/registers_pruner.go @@ -0,0 +1,11 @@ +package pebble + +import ( + "time" +) + +const ( + DefaultPruningInterval = uint64(2_000_000) + DefaultThreshold = uint64(100_000) + DefaultPruningThrottleDelay = 10 * time.Minute +) From fd5fa65189e653f9944560b236ab0ecc31460b21 Mon Sep 17 00:00:00 2001 From: UlyanaAndrukhiv Date: Thu, 15 Aug 2024 15:25:43 +0300 Subject: [PATCH 02/31] Updated last commit --- .../node_builder/access_node_builder.go | 40 ++++++++++++++----- cmd/observer/node_builder/observer_builder.go | 35 +++++++++++----- 2 files changed, 57 insertions(+), 18 deletions(-) diff --git a/cmd/access/node_builder/access_node_builder.go b/cmd/access/node_builder/access_node_builder.go index b31ea749060..00a536280a7 100644 --- a/cmd/access/node_builder/access_node_builder.go +++ b/cmd/access/node_builder/access_node_builder.go @@ -71,7 +71,7 @@ import ( "github.com/onflow/flow-go/module/execution" "github.com/onflow/flow-go/module/executiondatasync/execution_data" execdatacache "github.com/onflow/flow-go/module/executiondatasync/execution_data/cache" - "github.com/onflow/flow-go/module/executiondatasync/pruner" + edpruner "github.com/onflow/flow-go/module/executiondatasync/pruner" edstorage "github.com/onflow/flow-go/module/executiondatasync/storage" "github.com/onflow/flow-go/module/executiondatasync/tracker" finalizer "github.com/onflow/flow-go/module/finalizer/consensus" @@ -173,6 +173,9 @@ type AccessNodeConfig struct { programCacheSize uint checkPayerBalance bool versionControlEnabled bool + registerDBPruningEnabled bool + registerDBPruneInterval uint64 + registerDBPruneThrottleDelay time.Duration } type PublicNetworkConfig struct { @@ -264,8 +267,8 @@ func DefaultAccessNodeConfig() *AccessNodeConfig { executionDataIndexingEnabled: false, executionDataDBMode: execution_data.ExecutionDataDBModeBadger.String(), executionDataPrunerHeightRangeTarget: 0, - executionDataPrunerThreshold: pruner.DefaultThreshold, - executionDataPruningInterval: pruner.DefaultPruningInterval, + executionDataPrunerThreshold: edpruner.DefaultThreshold, + executionDataPruningInterval: edpruner.DefaultPruningInterval, registersDBPath: filepath.Join(homedir, ".flow", "execution_state"), checkpointFile: cmd.NotSet, scriptExecutorConfig: query.NewDefaultConfig(), @@ -276,6 +279,9 @@ func DefaultAccessNodeConfig() *AccessNodeConfig { programCacheSize: 0, checkPayerBalance: false, versionControlEnabled: true, + registerDBPruningEnabled: false, + registerDBPruneInterval: pstorage.DefaultPruneInterval, + registerDBPruneThrottleDelay: pstorage.DefaultPruneThrottleDelay, } } @@ -320,7 +326,7 @@ type FlowAccessNodeBuilder struct { TxResultsIndex *index.TransactionResultsIndex IndexerDependencies *cmd.DependencyList collectionExecutedMetric module.CollectionExecutedMetric - ExecutionDataPruner *pruner.Pruner + ExecutionDataPruner *edpruner.Pruner ExecutionDatastoreManager edstorage.DatastoreManager ExecutionDataTracker tracker.Storage versionControl *version.VersionControl @@ -771,16 +777,16 @@ func (builder *FlowAccessNodeBuilder) BuildExecutionSyncComponents() *FlowAccess } var err error - builder.ExecutionDataPruner, err = pruner.NewPruner( + builder.ExecutionDataPruner, err = edpruner.NewPruner( node.Logger, prunerMetrics, builder.ExecutionDataTracker, - pruner.WithPruneCallback(func(ctx context.Context) error { + edpruner.WithPruneCallback(func(ctx context.Context) error { return builder.ExecutionDatastoreManager.CollectGarbage(ctx) }), - pruner.WithHeightRangeTarget(builder.executionDataPrunerHeightRangeTarget), - pruner.WithThreshold(builder.executionDataPrunerThreshold), - pruner.WithPruningInterval(builder.executionDataPruningInterval), + edpruner.WithHeightRangeTarget(builder.executionDataPrunerHeightRangeTarget), + edpruner.WithThreshold(builder.executionDataPrunerThreshold), + edpruner.WithPruningInterval(builder.executionDataPruningInterval), ) if err != nil { return nil, fmt.Errorf("failed to create execution data pruner: %w", err) @@ -1297,6 +1303,8 @@ func (builder *FlowAccessNodeBuilder) extraFlags() { "execution-data-db", defaultConfig.executionDataDBMode, "[experimental] the DB type for execution datastore. One of [badger, pebble]") + + // Execution data pruner flags.Uint64Var(&builder.executionDataPrunerHeightRangeTarget, "execution-data-height-range-target", defaultConfig.executionDataPrunerHeightRangeTarget, @@ -1310,6 +1318,20 @@ func (builder *FlowAccessNodeBuilder) extraFlags() { defaultConfig.executionDataPruningInterval, "duration after which the pruner tries to prune execution data. The default value is 10 minutes") + // RegisterDB pruning + flags.BoolVar(&builder.registerDBPruningEnabled, + "registerdb-pruning-enabled", + defaultConfig.registerDBPruningEnabled, + "whether to enable the pruning for register db") + flags.Uint64Var(&builder.registerDBPruneInterval, + "registerdb-prune-interval", + defaultConfig.registerDBPruneInterval, + "a number of pruned blocks in the db, above which pruning should be triggered") + flags.DurationVar(&builder.registerDBPruneThrottleDelay, + "registerdb-prune-throttle-delay", + defaultConfig.executionDataPruningInterval, + "duration after which the pruner tries to prune data. The default value is 10 minutes") + // Execution State Streaming API flags.Uint32Var(&builder.stateStreamConf.ExecutionDataCacheSize, "execution-data-cache-size", defaultConfig.stateStreamConf.ExecutionDataCacheSize, "block execution data cache size") flags.Uint32Var(&builder.stateStreamConf.MaxGlobalStreams, "state-stream-global-max-streams", defaultConfig.stateStreamConf.MaxGlobalStreams, "global maximum number of concurrent streams") diff --git a/cmd/observer/node_builder/observer_builder.go b/cmd/observer/node_builder/observer_builder.go index 80c702f2967..26843609ad0 100644 --- a/cmd/observer/node_builder/observer_builder.go +++ b/cmd/observer/node_builder/observer_builder.go @@ -70,7 +70,7 @@ import ( "github.com/onflow/flow-go/module/execution" "github.com/onflow/flow-go/module/executiondatasync/execution_data" execdatacache "github.com/onflow/flow-go/module/executiondatasync/execution_data/cache" - "github.com/onflow/flow-go/module/executiondatasync/pruner" + edpruner "github.com/onflow/flow-go/module/executiondatasync/pruner" edstorage "github.com/onflow/flow-go/module/executiondatasync/storage" "github.com/onflow/flow-go/module/executiondatasync/tracker" finalizer "github.com/onflow/flow-go/module/finalizer/consensus" @@ -172,6 +172,9 @@ type ObserverServiceConfig struct { registerCacheType string registerCacheSize uint programCacheSize uint + registerDBPruningEnabled bool + registerDBPruneInterval uint64 + registerDBPruneThrottleDelay time.Duration } // DefaultObserverServiceConfig defines all the default values for the ObserverServiceConfig @@ -235,8 +238,8 @@ func DefaultObserverServiceConfig() *ObserverServiceConfig { executionDataIndexingEnabled: false, executionDataDBMode: execution_data.ExecutionDataDBModeBadger.String(), executionDataPrunerHeightRangeTarget: 0, - executionDataPrunerThreshold: pruner.DefaultThreshold, - executionDataPruningInterval: pruner.DefaultPruningInterval, + executionDataPrunerThreshold: edpruner.DefaultThreshold, + executionDataPruningInterval: edpruner.DefaultPruningInterval, localServiceAPIEnabled: false, versionControlEnabled: true, executionDataDir: filepath.Join(homedir, ".flow", "execution_data"), @@ -285,7 +288,7 @@ type ObserverServiceBuilder struct { ExecutionDataRequester state_synchronization.ExecutionDataRequester ExecutionDataStore execution_data.ExecutionDataStore ExecutionDataBlobstore blobs.Blobstore - ExecutionDataPruner *pruner.Pruner + ExecutionDataPruner *edpruner.Pruner ExecutionDatastoreManager edstorage.DatastoreManager ExecutionDataTracker tracker.Storage @@ -703,6 +706,20 @@ func (builder *ObserverServiceBuilder) extraFlags() { defaultConfig.executionDataPruningInterval, "duration after which the pruner tries to prune execution data. The default value is 10 minutes") + // RegisterDB pruning + flags.BoolVar(&builder.registerDBPruningEnabled, + "registerdb-pruning-enabled", + defaultConfig.registerDBPruningEnabled, + "whether to enable the pruning for register db") + flags.Uint64Var(&builder.registerDBPruneInterval, + "registerdb-prune-interval", + defaultConfig.registerDBPruneInterval, + "a number of pruned blocks in the db, above which pruning should be triggered") + flags.DurationVar(&builder.registerDBPruneThrottleDelay, + "registerdb-prune-throttle-delay", + defaultConfig.executionDataPruningInterval, + "duration after which the pruner tries to prune data. The default value is 10 minutes") + // ExecutionDataRequester config flags.BoolVar(&builder.executionDataSyncEnabled, "execution-data-sync-enabled", @@ -1341,16 +1358,16 @@ func (builder *ObserverServiceBuilder) BuildExecutionSyncComponents() *ObserverS } var err error - builder.ExecutionDataPruner, err = pruner.NewPruner( + builder.ExecutionDataPruner, err = edpruner.NewPruner( node.Logger, prunerMetrics, builder.ExecutionDataTracker, - pruner.WithPruneCallback(func(ctx context.Context) error { + edpruner.WithPruneCallback(func(ctx context.Context) error { return builder.ExecutionDatastoreManager.CollectGarbage(ctx) }), - pruner.WithHeightRangeTarget(builder.executionDataPrunerHeightRangeTarget), - pruner.WithThreshold(builder.executionDataPrunerThreshold), - pruner.WithPruningInterval(builder.executionDataPruningInterval), + edpruner.WithHeightRangeTarget(builder.executionDataPrunerHeightRangeTarget), + edpruner.WithThreshold(builder.executionDataPrunerThreshold), + edpruner.WithPruningInterval(builder.executionDataPruningInterval), ) if err != nil { return nil, fmt.Errorf("failed to create execution data pruner: %w", err) From 8aab5d9df7842304bd4ab11fe586dec6b5fdd217 Mon Sep 17 00:00:00 2001 From: UlyanaAndrukhiv Date: Fri, 16 Aug 2024 17:36:13 +0300 Subject: [PATCH 03/31] Added skeleton of register db pruner module --- .../node_builder/access_node_builder.go | 14 +- cmd/observer/node_builder/observer_builder.go | 25 ++-- storage/pebble/registers_pruner.go | 140 +++++++++++++++++- 3 files changed, 158 insertions(+), 21 deletions(-) diff --git a/cmd/access/node_builder/access_node_builder.go b/cmd/access/node_builder/access_node_builder.go index 00a536280a7..a5f39347643 100644 --- a/cmd/access/node_builder/access_node_builder.go +++ b/cmd/access/node_builder/access_node_builder.go @@ -174,7 +174,7 @@ type AccessNodeConfig struct { checkPayerBalance bool versionControlEnabled bool registerDBPruningEnabled bool - registerDBPruneInterval uint64 + registerDBPruneTickerInterval time.Duration registerDBPruneThrottleDelay time.Duration } @@ -280,7 +280,7 @@ func DefaultAccessNodeConfig() *AccessNodeConfig { checkPayerBalance: false, versionControlEnabled: true, registerDBPruningEnabled: false, - registerDBPruneInterval: pstorage.DefaultPruneInterval, + registerDBPruneTickerInterval: pstorage.DefaultPruneTickerInterval, registerDBPruneThrottleDelay: pstorage.DefaultPruneThrottleDelay, } } @@ -1323,13 +1323,13 @@ func (builder *FlowAccessNodeBuilder) extraFlags() { "registerdb-pruning-enabled", defaultConfig.registerDBPruningEnabled, "whether to enable the pruning for register db") - flags.Uint64Var(&builder.registerDBPruneInterval, - "registerdb-prune-interval", - defaultConfig.registerDBPruneInterval, - "a number of pruned blocks in the db, above which pruning should be triggered") flags.DurationVar(&builder.registerDBPruneThrottleDelay, "registerdb-prune-throttle-delay", - defaultConfig.executionDataPruningInterval, + defaultConfig.registerDBPruneThrottleDelay, + "delay for controlling a pause between batches of registers inspected and pruned") + flags.DurationVar(&builder.registerDBPruneTickerInterval, + "registerdb-prune-ticker-interval", + defaultConfig.registerDBPruneTickerInterval, "duration after which the pruner tries to prune data. The default value is 10 minutes") // Execution State Streaming API diff --git a/cmd/observer/node_builder/observer_builder.go b/cmd/observer/node_builder/observer_builder.go index 26843609ad0..d97ce0a5278 100644 --- a/cmd/observer/node_builder/observer_builder.go +++ b/cmd/observer/node_builder/observer_builder.go @@ -173,7 +173,7 @@ type ObserverServiceConfig struct { registerCacheSize uint programCacheSize uint registerDBPruningEnabled bool - registerDBPruneInterval uint64 + registerDBPruneTickerInterval time.Duration registerDBPruneThrottleDelay time.Duration } @@ -252,11 +252,14 @@ func DefaultObserverServiceConfig() *ObserverServiceConfig { RetryDelay: edrequester.DefaultRetryDelay, MaxRetryDelay: edrequester.DefaultMaxRetryDelay, }, - scriptExecMinBlock: 0, - scriptExecMaxBlock: math.MaxUint64, - registerCacheType: pstorage.CacheTypeTwoQueue.String(), - registerCacheSize: 0, - programCacheSize: 0, + scriptExecMinBlock: 0, + scriptExecMaxBlock: math.MaxUint64, + registerCacheType: pstorage.CacheTypeTwoQueue.String(), + registerCacheSize: 0, + programCacheSize: 0, + registerDBPruningEnabled: false, + registerDBPruneTickerInterval: pstorage.DefaultPruneTickerInterval, + registerDBPruneThrottleDelay: pstorage.DefaultPruneThrottleDelay, } } @@ -711,13 +714,13 @@ func (builder *ObserverServiceBuilder) extraFlags() { "registerdb-pruning-enabled", defaultConfig.registerDBPruningEnabled, "whether to enable the pruning for register db") - flags.Uint64Var(&builder.registerDBPruneInterval, - "registerdb-prune-interval", - defaultConfig.registerDBPruneInterval, - "a number of pruned blocks in the db, above which pruning should be triggered") flags.DurationVar(&builder.registerDBPruneThrottleDelay, "registerdb-prune-throttle-delay", - defaultConfig.executionDataPruningInterval, + defaultConfig.registerDBPruneThrottleDelay, + "delay for controlling a pause between batches of registers inspected and pruned") + flags.DurationVar(&builder.registerDBPruneTickerInterval, + "registerdb-prune-ticker-interval", + defaultConfig.registerDBPruneTickerInterval, "duration after which the pruner tries to prune data. The default value is 10 minutes") // ExecutionDataRequester config diff --git a/storage/pebble/registers_pruner.go b/storage/pebble/registers_pruner.go index b97811bf8d3..173a7935249 100644 --- a/storage/pebble/registers_pruner.go +++ b/storage/pebble/registers_pruner.go @@ -1,11 +1,145 @@ package pebble import ( + "fmt" "time" + + "github.com/cockroachdb/pebble" + "github.com/rs/zerolog" + + "github.com/onflow/flow-go/module/component" + "github.com/onflow/flow-go/module/irrecoverable" + "github.com/onflow/flow-go/storage" ) const ( - DefaultPruningInterval = uint64(2_000_000) - DefaultThreshold = uint64(100_000) - DefaultPruningThrottleDelay = 10 * time.Minute + DefaultPruneThreshold = uint64(100_000) + DefaultPruneThrottleDelay = 10 * time.Millisecond + DefaultPruneTickerInterval = 10 * time.Minute ) + +// pruneIntervalRatio represents an additional percentage of pruneThreshold which is used to calculate pruneInterval +const pruneIntervalRatio = 0.1 + +// pruneInterval is a helper function which calculates interval for pruner +func pruneInterval(threshold uint64) uint64 { + return threshold + uint64(float64(threshold)*pruneIntervalRatio) +} + +type RegisterPruner struct { + component.Component + componentManager *component.ComponentManager + + logger zerolog.Logger + db *pebble.DB + register storage.RegisterIndex + + // pruningInterval is a number of pruned blocks in the db, above which pruning should be triggered + pruneInterval uint64 + // threshold defines the number of blocks below the latestHeight to keep + pruneThreshold uint64 + // pruneThrottleDelay controls a small pause between batches of registers inspected and pruned + pruneThrottleDelay time.Duration + // pruneTickerInterval defines how frequently pruning can be performed + pruneTickerInterval time.Duration +} + +type PrunerOption func(*RegisterPruner) + +// WithPruneThreshold is used to configure the pruner with a custom threshold. +func WithPruneThreshold(threshold uint64) PrunerOption { + return func(p *RegisterPruner) { + p.pruneThreshold = threshold + p.pruneInterval = pruneInterval(threshold) + } +} + +// WithPruneThrottleDelay is used to configure the pruner with a custom +// throttle delay. +func WithPruneThrottleDelay(throttleDelay time.Duration) PrunerOption { + return func(p *RegisterPruner) { + p.pruneThrottleDelay = throttleDelay + } +} + +// WithPruneTickerInterval is used to configure the pruner with a custom +// ticker interval. +func WithPruneTickerInterval(interval time.Duration) PrunerOption { + return func(p *RegisterPruner) { + p.pruneTickerInterval = interval + } +} + +func NewRegisterPruner( + logger zerolog.Logger, + db *pebble.DB, + register storage.RegisterIndex, + opts ...PrunerOption, +) (*RegisterPruner, error) { + pruner := &RegisterPruner{ + logger: logger.With().Str("component", "register_pruner").Logger(), + db: db, + register: register, + pruneInterval: pruneInterval(DefaultPruneThreshold), + pruneThreshold: DefaultPruneThreshold, + pruneThrottleDelay: DefaultPruneThrottleDelay, + pruneTickerInterval: DefaultPruneTickerInterval, + } + + pruner.componentManager = component.NewComponentManagerBuilder(). + AddWorker(pruner.loop). + Build() + pruner.Component = pruner.componentManager + + for _, opt := range opts { + opt(pruner) + } + + return pruner, nil +} + +// loop is the main worker for the Pruner, responsible for triggering +// pruning operations at regular intervals. It monitors the heights +// of registered height recorders and checks if pruning is necessary. +func (p *RegisterPruner) loop(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) { + ready() + ticker := time.NewTicker(p.pruneTickerInterval) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + return + case <-ticker.C: + p.checkPrune(ctx) + } + } +} + +// checkPrune checks if pruning should be performed based on the height range +// and triggers the pruning operation if necessary. +// +// Parameters: +// - ctx: The context for managing the pruning operation. +func (p *RegisterPruner) checkPrune(ctx irrecoverable.SignalerContext) { + // TODO: get firstHeight and latestHeight from db + var firstHeight uint64 + var latestHeight uint64 + + pruneHeight := latestHeight - p.pruneThreshold + + if pruneHeight-firstHeight > p.pruneInterval { + p.logger.Info().Uint64("prune_height", pruneHeight).Msg("pruning storage") + + err := p.pruneUpToHeight(pruneHeight) + if err != nil { + ctx.Throw(fmt.Errorf("failed to prune: %w", err)) + } + + // TODO: update first height in db + } +} + +func (p *RegisterPruner) pruneUpToHeight(height uint64) error { + return nil +} From 08274d1a80e7d9a25c9590155950dde9c37835dc Mon Sep 17 00:00:00 2001 From: UlyanaAndrukhiv Date: Tue, 20 Aug 2024 13:39:45 +0300 Subject: [PATCH 04/31] Added missed logic for checkPrune implementation for register db pruning --- storage/pebble/operation/common.go | 1 + storage/pebble/operation/registers.go | 17 +++++++++++++++++ storage/pebble/registers.go | 23 +++++------------------ storage/pebble/registers_pruner.go | 18 ++++++++++++++---- 4 files changed, 37 insertions(+), 22 deletions(-) create mode 100644 storage/pebble/operation/registers.go diff --git a/storage/pebble/operation/common.go b/storage/pebble/operation/common.go index ad9e96c2c8b..7dfea6c3276 100644 --- a/storage/pebble/operation/common.go +++ b/storage/pebble/operation/common.go @@ -42,6 +42,7 @@ func retrieve(key []byte, sc interface{}) func(r pebble.Reader) error { } } +// convertNotFoundError converts pebble NotFound error to storage NotFound error func convertNotFoundError(err error) error { if errors.Is(err, pebble.ErrNotFound) { return storage.ErrNotFound diff --git a/storage/pebble/operation/registers.go b/storage/pebble/operation/registers.go new file mode 100644 index 00000000000..6a220460cc4 --- /dev/null +++ b/storage/pebble/operation/registers.go @@ -0,0 +1,17 @@ +package operation + +import ( + "encoding/binary" + + "github.com/cockroachdb/pebble" +) + +func RetrieveHeight(db *pebble.DB, key []byte) (uint64, error) { + res, closer, err := db.Get(key) + if err != nil { + return 0, convertNotFoundError(err) + } + defer closer.Close() + + return binary.BigEndian.Uint64(res), nil +} diff --git a/storage/pebble/registers.go b/storage/pebble/registers.go index 2d889f8602f..44f65fb7445 100644 --- a/storage/pebble/registers.go +++ b/storage/pebble/registers.go @@ -1,7 +1,6 @@ package pebble import ( - "encoding/binary" "fmt" "github.com/cockroachdb/pebble" @@ -10,6 +9,7 @@ import ( "github.com/onflow/flow-go/model/flow" "github.com/onflow/flow-go/storage" + "github.com/onflow/flow-go/storage/pebble/operation" ) // Registers library that implements pebble storage for registers @@ -148,26 +148,13 @@ func (s *Registers) FirstHeight() uint64 { } func firstStoredHeight(db *pebble.DB) (uint64, error) { - return heightLookup(db, firstHeightKey) + return operation.RetrieveHeight(db, firstHeightKey) } func latestStoredHeight(db *pebble.DB) (uint64, error) { - return heightLookup(db, latestHeightKey) + return operation.RetrieveHeight(db, latestHeightKey) } -func heightLookup(db *pebble.DB, key []byte) (uint64, error) { - res, closer, err := db.Get(key) - if err != nil { - return 0, convertNotFoundError(err) - } - defer closer.Close() - return binary.BigEndian.Uint64(res), nil -} - -// convert pebble NotFound error to storage NotFound error -func convertNotFoundError(err error) error { - if errors.Is(err, pebble.ErrNotFound) { - return storage.ErrNotFound - } - return err +func updateFirstStoredHeight(db *pebble.DB, height uint64) error { + return db.Set(firstHeightKey, encodedUint64(height), nil) } diff --git a/storage/pebble/registers_pruner.go b/storage/pebble/registers_pruner.go index 173a7935249..f39fe4080ab 100644 --- a/storage/pebble/registers_pruner.go +++ b/storage/pebble/registers_pruner.go @@ -122,9 +122,15 @@ func (p *RegisterPruner) loop(ctx irrecoverable.SignalerContext, ready component // Parameters: // - ctx: The context for managing the pruning operation. func (p *RegisterPruner) checkPrune(ctx irrecoverable.SignalerContext) { - // TODO: get firstHeight and latestHeight from db - var firstHeight uint64 - var latestHeight uint64 + firstHeight, err := firstStoredHeight(p.db) + if err != nil { + ctx.Throw(fmt.Errorf("failed to get first height from register storage: %w", err)) + } + + latestHeight, err := latestStoredHeight(p.db) + if err != nil { + ctx.Throw(fmt.Errorf("failed to get latest height from register storage: %w", err)) + } pruneHeight := latestHeight - p.pruneThreshold @@ -136,7 +142,11 @@ func (p *RegisterPruner) checkPrune(ctx irrecoverable.SignalerContext) { ctx.Throw(fmt.Errorf("failed to prune: %w", err)) } - // TODO: update first height in db + // update first indexed height + err = updateFirstStoredHeight(p.db, pruneHeight) + if err != nil { + ctx.Throw(fmt.Errorf("failed to update first height for register storage: %w", err)) + } } } From 61d3be2eb5bad5c7e8970d677b7808548049d66a Mon Sep 17 00:00:00 2001 From: UlyanaAndrukhiv Date: Tue, 20 Aug 2024 18:45:03 +0300 Subject: [PATCH 05/31] Added pruner integration to AN and ON --- .../node_builder/access_node_builder.go | 125 +++++++++++------- cmd/observer/node_builder/observer_builder.go | 52 ++++++-- storage/pebble/registers_pruner.go | 8 +- 3 files changed, 120 insertions(+), 65 deletions(-) diff --git a/cmd/access/node_builder/access_node_builder.go b/cmd/access/node_builder/access_node_builder.go index a5f39347643..e91a7f3d9a0 100644 --- a/cmd/access/node_builder/access_node_builder.go +++ b/cmd/access/node_builder/access_node_builder.go @@ -293,43 +293,45 @@ type FlowAccessNodeBuilder struct { *AccessNodeConfig // components - FollowerState protocol.FollowerState - SyncCore *chainsync.Core - RpcEng *rpc.Engine - FollowerDistributor *consensuspubsub.FollowerDistributor - CollectionRPC access.AccessAPIClient - TransactionTimings *stdmap.TransactionTimings - CollectionsToMarkFinalized *stdmap.Times - CollectionsToMarkExecuted *stdmap.Times - BlocksToMarkExecuted *stdmap.Times - TransactionMetrics *metrics.TransactionCollector - RestMetrics *metrics.RestCollector - AccessMetrics module.AccessMetrics - PingMetrics module.PingMetrics - Committee hotstuff.DynamicCommittee - Finalized *flow.Header // latest finalized block that the node knows of at startup time - Pending []*flow.Header - FollowerCore module.HotStuffFollower - Validator hotstuff.Validator - ExecutionDataDownloader execution_data.Downloader - PublicBlobService network.BlobService - ExecutionDataRequester state_synchronization.ExecutionDataRequester - ExecutionDataStore execution_data.ExecutionDataStore - ExecutionDataBlobstore blobs.Blobstore - ExecutionDataCache *execdatacache.ExecutionDataCache - ExecutionIndexer *indexer.Indexer - ExecutionIndexerCore *indexer.IndexerCore - ScriptExecutor *backend.ScriptExecutor - RegistersAsyncStore *execution.RegistersAsyncStore - Reporter *index.Reporter - EventsIndex *index.EventsIndex - TxResultsIndex *index.TransactionResultsIndex - IndexerDependencies *cmd.DependencyList - collectionExecutedMetric module.CollectionExecutedMetric - ExecutionDataPruner *edpruner.Pruner - ExecutionDatastoreManager edstorage.DatastoreManager - ExecutionDataTracker tracker.Storage - versionControl *version.VersionControl + FollowerState protocol.FollowerState + SyncCore *chainsync.Core + RpcEng *rpc.Engine + FollowerDistributor *consensuspubsub.FollowerDistributor + CollectionRPC access.AccessAPIClient + TransactionTimings *stdmap.TransactionTimings + CollectionsToMarkFinalized *stdmap.Times + CollectionsToMarkExecuted *stdmap.Times + BlocksToMarkExecuted *stdmap.Times + TransactionMetrics *metrics.TransactionCollector + RestMetrics *metrics.RestCollector + AccessMetrics module.AccessMetrics + PingMetrics module.PingMetrics + Committee hotstuff.DynamicCommittee + Finalized *flow.Header // latest finalized block that the node knows of at startup time + Pending []*flow.Header + FollowerCore module.HotStuffFollower + Validator hotstuff.Validator + ExecutionDataDownloader execution_data.Downloader + PublicBlobService network.BlobService + ExecutionDataRequester state_synchronization.ExecutionDataRequester + ExecutionDataStore execution_data.ExecutionDataStore + ExecutionDataBlobstore blobs.Blobstore + ExecutionDataCache *execdatacache.ExecutionDataCache + ExecutionIndexer *indexer.Indexer + ExecutionIndexerCore *indexer.IndexerCore + ScriptExecutor *backend.ScriptExecutor + RegistersAsyncStore *execution.RegistersAsyncStore + Reporter *index.Reporter + EventsIndex *index.EventsIndex + TxResultsIndex *index.TransactionResultsIndex + IndexerDependencies *cmd.DependencyList + collectionExecutedMetric module.CollectionExecutedMetric + ExecutionDataPruner *edpruner.Pruner + ExecutionDatastoreManager edstorage.DatastoreManager + ExecutionDataTracker tracker.Storage + versionControl *version.VersionControl + RegisterDB *pebble.DB + RegisterDBPrunerDependencies *cmd.DependencyList // The sync engine participants provider is the libp2p peer store for the access node // which is not available until after the network has started. @@ -548,6 +550,10 @@ func (builder *FlowAccessNodeBuilder) BuildExecutionSyncComponents() *FlowAccess requesterDependable := module.NewProxiedReadyDoneAware() builder.IndexerDependencies.Add(requesterDependable) + // setup dependency chain to ensure register db pruner starts after the indexer + indexerDependable := module.NewProxiedReadyDoneAware() + builder.RegisterDBPrunerDependencies.Add(indexerDependable) + executionDataPrunerEnabled := builder.executionDataPrunerHeightRangeTarget != 0 builder. @@ -851,16 +857,16 @@ func (builder *FlowAccessNodeBuilder) BuildExecutionSyncComponents() *FlowAccess // Note: using a DependableComponent here to ensure that the indexer does not block // other components from starting while bootstrapping the register db since it may // take hours to complete. - - pdb, err := pstorage.OpenRegisterPebbleDB(builder.registersDBPath) + var err error + builder.RegisterDB, err = pstorage.OpenRegisterPebbleDB(builder.registersDBPath) if err != nil { return nil, fmt.Errorf("could not open registers db: %w", err) } builder.ShutdownFunc(func() error { - return pdb.Close() + return builder.RegisterDB.Close() }) - bootstrapped, err := pstorage.IsBootstrapped(pdb) + bootstrapped, err := pstorage.IsBootstrapped(builder.RegisterDB) if err != nil { return nil, fmt.Errorf("could not check if registers db is bootstrapped: %w", err) } @@ -892,7 +898,7 @@ func (builder *FlowAccessNodeBuilder) BuildExecutionSyncComponents() *FlowAccess } rootHash := ledger.RootHash(builder.RootSeal.FinalState) - bootstrap, err := pstorage.NewRegisterBootstrap(pdb, checkpointFile, checkpointHeight, rootHash, builder.Logger) + bootstrap, err := pstorage.NewRegisterBootstrap(builder.RegisterDB, checkpointFile, checkpointHeight, rootHash, builder.Logger) if err != nil { return nil, fmt.Errorf("could not create registers bootstrap: %w", err) } @@ -905,7 +911,7 @@ func (builder *FlowAccessNodeBuilder) BuildExecutionSyncComponents() *FlowAccess } } - registers, err := pstorage.NewRegisters(pdb) + registers, err := pstorage.NewRegisters(builder.RegisterDB) if err != nil { return nil, fmt.Errorf("could not create registers storage: %w", err) } @@ -999,8 +1005,30 @@ func (builder *FlowAccessNodeBuilder) BuildExecutionSyncComponents() *FlowAccess return nil, err } + // add indexer into ReadyDoneAware dependency passed to pruner. This allows the register db pruner + // to wait for the indexer to be ready before starting. + indexerDependable.Init(builder.ExecutionIndexer) + return builder.ExecutionIndexer, nil - }, builder.IndexerDependencies) + }, builder.IndexerDependencies). + DependableComponent("register db pruner", func(node *cmd.NodeConfig) (module.ReadyDoneAware, error) { + if !builder.registerDBPruningEnabled { + return &module.NoopReadyDoneAware{}, nil + } + + registerDBPruner, err := pstorage.NewRegisterPruner( + node.Logger, + builder.RegisterDB, + //pstorage.WithPruneThreshold(builder.registerDBPruneThreshold), + pstorage.WithPruneThrottleDelay(builder.registerDBPruneThrottleDelay), + pstorage.WithPruneTickerInterval(builder.registerDBPruneTickerInterval), + ) + if err != nil { + return nil, fmt.Errorf("failed to create register db pruner: %w", err) + } + + return registerDBPruner, nil + }, builder.RegisterDBPrunerDependencies) } if builder.stateStreamConf.ListenAddr != "" { @@ -1127,10 +1155,11 @@ func FlowAccessNode(nodeBuilder *cmd.FlowNodeBuilder) *FlowAccessNodeBuilder { dist := consensuspubsub.NewFollowerDistributor() dist.AddProposalViolationConsumer(notifications.NewSlashingViolationsConsumer(nodeBuilder.Logger)) return &FlowAccessNodeBuilder{ - AccessNodeConfig: DefaultAccessNodeConfig(), - FlowNodeBuilder: nodeBuilder, - FollowerDistributor: dist, - IndexerDependencies: cmd.NewDependencyList(), + AccessNodeConfig: DefaultAccessNodeConfig(), + FlowNodeBuilder: nodeBuilder, + FollowerDistributor: dist, + IndexerDependencies: cmd.NewDependencyList(), + RegisterDBPrunerDependencies: cmd.NewDependencyList(), } } diff --git a/cmd/observer/node_builder/observer_builder.go b/cmd/observer/node_builder/observer_builder.go index d97ce0a5278..3c573a7c455 100644 --- a/cmd/observer/node_builder/observer_builder.go +++ b/cmd/observer/node_builder/observer_builder.go @@ -295,6 +295,9 @@ type ObserverServiceBuilder struct { ExecutionDatastoreManager edstorage.DatastoreManager ExecutionDataTracker tracker.Storage + RegisterDB *pebble.DB + RegisterDBPrunerDependencies *cmd.DependencyList + RegistersAsyncStore *execution.RegistersAsyncStore Reporter *index.Reporter EventsIndex *index.EventsIndex @@ -583,10 +586,11 @@ func NewFlowObserverServiceBuilder(opts ...Option) *ObserverServiceBuilder { opt(config) } anb := &ObserverServiceBuilder{ - ObserverServiceConfig: config, - FlowNodeBuilder: cmd.FlowNode("observer"), - FollowerDistributor: pubsub.NewFollowerDistributor(), - IndexerDependencies: cmd.NewDependencyList(), + ObserverServiceConfig: config, + FlowNodeBuilder: cmd.FlowNode("observer"), + FollowerDistributor: pubsub.NewFollowerDistributor(), + IndexerDependencies: cmd.NewDependencyList(), + RegisterDBPrunerDependencies: cmd.NewDependencyList(), } anb.FollowerDistributor.AddProposalViolationConsumer(notifications.NewSlashingViolationsConsumer(anb.Logger)) // the observer gets a version of the root snapshot file that does not contain any node addresses @@ -1139,6 +1143,10 @@ func (builder *ObserverServiceBuilder) BuildExecutionSyncComponents() *ObserverS requesterDependable := module.NewProxiedReadyDoneAware() builder.IndexerDependencies.Add(requesterDependable) + // setup dependency chain to ensure register db pruner starts after the indexer + indexerDependable := module.NewProxiedReadyDoneAware() + builder.RegisterDBPrunerDependencies.Add(indexerDependable) + executionDataPrunerEnabled := builder.executionDataPrunerHeightRangeTarget != 0 builder. @@ -1394,16 +1402,16 @@ func (builder *ObserverServiceBuilder) BuildExecutionSyncComponents() *ObserverS // Note: using a DependableComponent here to ensure that the indexer does not block // other components from starting while bootstrapping the register db since it may // take hours to complete. - - pdb, err := pstorage.OpenRegisterPebbleDB(builder.registersDBPath) + var err error + builder.RegisterDB, err = pstorage.OpenRegisterPebbleDB(builder.registersDBPath) if err != nil { return nil, fmt.Errorf("could not open registers db: %w", err) } builder.ShutdownFunc(func() error { - return pdb.Close() + return builder.RegisterDB.Close() }) - bootstrapped, err := pstorage.IsBootstrapped(pdb) + bootstrapped, err := pstorage.IsBootstrapped(builder.RegisterDB) if err != nil { return nil, fmt.Errorf("could not check if registers db is bootstrapped: %w", err) } @@ -1435,7 +1443,7 @@ func (builder *ObserverServiceBuilder) BuildExecutionSyncComponents() *ObserverS } rootHash := ledger.RootHash(builder.RootSeal.FinalState) - bootstrap, err := pstorage.NewRegisterBootstrap(pdb, checkpointFile, checkpointHeight, rootHash, builder.Logger) + bootstrap, err := pstorage.NewRegisterBootstrap(builder.RegisterDB, checkpointFile, checkpointHeight, rootHash, builder.Logger) if err != nil { return nil, fmt.Errorf("could not create registers bootstrap: %w", err) } @@ -1448,7 +1456,7 @@ func (builder *ObserverServiceBuilder) BuildExecutionSyncComponents() *ObserverS } } - registers, err := pstorage.NewRegisters(pdb) + registers, err := pstorage.NewRegisters(builder.RegisterDB) if err != nil { return nil, fmt.Errorf("could not create registers storage: %w", err) } @@ -1543,8 +1551,30 @@ func (builder *ObserverServiceBuilder) BuildExecutionSyncComponents() *ObserverS return nil, err } + // add indexer into ReadyDoneAware dependency passed to pruner. This allows the register db pruner + // to wait for the indexer to be ready before starting. + indexerDependable.Init(builder.ExecutionIndexer) + return builder.ExecutionIndexer, nil - }, builder.IndexerDependencies) + }, builder.IndexerDependencies). + DependableComponent("register db pruner", func(node *cmd.NodeConfig) (module.ReadyDoneAware, error) { + if !builder.registerDBPruningEnabled { + return &module.NoopReadyDoneAware{}, nil + } + + registerDBPruner, err := pstorage.NewRegisterPruner( + node.Logger, + builder.RegisterDB, + //pstorage.WithPruneThreshold(builder.registerDBPruneThreshold), + pstorage.WithPruneThrottleDelay(builder.registerDBPruneThrottleDelay), + pstorage.WithPruneTickerInterval(builder.registerDBPruneTickerInterval), + ) + if err != nil { + return nil, fmt.Errorf("failed to create register db pruner: %w", err) + } + + return registerDBPruner, nil + }, builder.RegisterDBPrunerDependencies) } if builder.stateStreamConf.ListenAddr != "" { diff --git a/storage/pebble/registers_pruner.go b/storage/pebble/registers_pruner.go index f39fe4080ab..d1bb947e9e9 100644 --- a/storage/pebble/registers_pruner.go +++ b/storage/pebble/registers_pruner.go @@ -9,7 +9,6 @@ import ( "github.com/onflow/flow-go/module/component" "github.com/onflow/flow-go/module/irrecoverable" - "github.com/onflow/flow-go/storage" ) const ( @@ -30,9 +29,8 @@ type RegisterPruner struct { component.Component componentManager *component.ComponentManager - logger zerolog.Logger - db *pebble.DB - register storage.RegisterIndex + logger zerolog.Logger + db *pebble.DB // pruningInterval is a number of pruned blocks in the db, above which pruning should be triggered pruneInterval uint64 @@ -73,13 +71,11 @@ func WithPruneTickerInterval(interval time.Duration) PrunerOption { func NewRegisterPruner( logger zerolog.Logger, db *pebble.DB, - register storage.RegisterIndex, opts ...PrunerOption, ) (*RegisterPruner, error) { pruner := &RegisterPruner{ logger: logger.With().Str("component", "register_pruner").Logger(), db: db, - register: register, pruneInterval: pruneInterval(DefaultPruneThreshold), pruneThreshold: DefaultPruneThreshold, pruneThrottleDelay: DefaultPruneThrottleDelay, From 89100831a45879824c00defb517a8e77ea59d82a Mon Sep 17 00:00:00 2001 From: UlyanaAndrukhiv Date: Wed, 21 Aug 2024 16:52:38 +0300 Subject: [PATCH 06/31] Added prune up to height implementation --- storage/pebble/registers_pruner.go | 110 ++++++++++++++++++++++++++++- 1 file changed, 108 insertions(+), 2 deletions(-) diff --git a/storage/pebble/registers_pruner.go b/storage/pebble/registers_pruner.go index d1bb947e9e9..c5e253e5b65 100644 --- a/storage/pebble/registers_pruner.go +++ b/storage/pebble/registers_pruner.go @@ -7,6 +7,7 @@ import ( "github.com/cockroachdb/pebble" "github.com/rs/zerolog" + "github.com/onflow/flow-go/model/flow" "github.com/onflow/flow-go/module/component" "github.com/onflow/flow-go/module/irrecoverable" ) @@ -18,7 +19,10 @@ const ( ) // pruneIntervalRatio represents an additional percentage of pruneThreshold which is used to calculate pruneInterval -const pruneIntervalRatio = 0.1 +const ( + pruneIntervalRatio = 0.1 + deleteItemsPerBatch = 256 +) // pruneInterval is a helper function which calculates interval for pruner func pruneInterval(threshold uint64) uint64 { @@ -146,6 +150,108 @@ func (p *RegisterPruner) checkPrune(ctx irrecoverable.SignalerContext) { } } -func (p *RegisterPruner) pruneUpToHeight(height uint64) error { +// pruneUpToHeight prunes all entries in the database with heights less than or equal +// to the specified pruneHeight. For each register prefix, it keeps the earliest entry +// that has a height less than or equal to pruneHeight, and deletes all other entries +// with lower heights. +// +// This function iterates over the database keys, identifies keys to delete in batches, +// and uses the batchDelete function to remove them efficiently. +// +// Parameters: +// - pruneHeight: The maximum height of entries to prune. +// +// No errors are expected during normal operations. +func (p *RegisterPruner) pruneUpToHeight(pruneHeight uint64) error { + prefix := []byte{codeRegister} + itemsPerBatch := deleteItemsPerBatch + var batchKeysToRemove [][]byte + + err := func(r pebble.Reader) error { + options := pebble.IterOptions{ + LowerBound: prefix, + } + + it, err := r.NewIter(&options) + if err != nil { + return fmt.Errorf("can not create iterator: %w", err) + } + defer it.Close() + + var lastRegisterID flow.RegisterID + var keepKeyFound bool + + for it.SeekGE(prefix); it.Valid(); it.Next() { + key := it.Key() + + keyHeight, registerID, err := lookupKeyToRegisterID(key) + if err != nil { + return fmt.Errorf("malformed lookup key %v: %w", key, err) + } + + // New register prefix, reset the state + if !keepKeyFound || lastRegisterID != registerID { + keepKeyFound = false + lastRegisterID = registerID + } + + if keyHeight <= pruneHeight { + if !keepKeyFound { + // Keep the first entry found for this registerID that is <= pruneHeight + keepKeyFound = true + continue + } + + // Otherwise, mark this key for removal + batchKeysToRemove = append(batchKeysToRemove, key) + + if len(batchKeysToRemove) == itemsPerBatch { + // Perform batch delete + if err := p.batchDelete(batchKeysToRemove); err != nil { + return err + } + batchKeysToRemove = nil + } + } + } + + if len(batchKeysToRemove) > 0 { + // Perform the final batch delete if there are any remaining keys + if err := p.batchDelete(batchKeysToRemove); err != nil { + return err + } + } + + return nil + }(p.db) + if err != nil { + return err + } + + return nil +} + +// batchDelete deletes the provided keys from the database in a single batch operation. +// It creates a new batch, deletes each key from the batch, and commits the batch to ensure +// that the deletions are applied atomically. +// +// Parameters: +// - lookupKeys: A slice of keys to be deleted from the database. +// +// No errors are expected during normal operations. +func (p *RegisterPruner) batchDelete(lookupKeys [][]byte) error { + batch := p.db.NewBatch() + defer batch.Close() + + for _, key := range lookupKeys { + if err := batch.Delete(key, nil); err != nil { + return fmt.Errorf("failed to delete lookupKey: %w", err) + } + } + + if err := batch.Commit(pebble.Sync); err != nil { + return fmt.Errorf("failed to commit batch: %w", err) + } + return nil } From b863b614db69329794ab407778fe81d78c1f3918 Mon Sep 17 00:00:00 2001 From: UlyanaAndrukhiv Date: Thu, 22 Aug 2024 16:52:00 +0300 Subject: [PATCH 07/31] Updated iteration over the register keys, moved update first height to pruner --- storage/pebble/operation/registers.go | 2 +- storage/pebble/registers.go | 8 ++------ storage/pebble/registers_pruner.go | 14 +++++++++++++- 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/storage/pebble/operation/registers.go b/storage/pebble/operation/registers.go index 6a220460cc4..e50e2f03e80 100644 --- a/storage/pebble/operation/registers.go +++ b/storage/pebble/operation/registers.go @@ -6,7 +6,7 @@ import ( "github.com/cockroachdb/pebble" ) -func RetrieveHeight(db *pebble.DB, key []byte) (uint64, error) { +func RetrieveRegisterHeight(db *pebble.DB, key []byte) (uint64, error) { res, closer, err := db.Get(key) if err != nil { return 0, convertNotFoundError(err) diff --git a/storage/pebble/registers.go b/storage/pebble/registers.go index 44f65fb7445..4eb618aae3c 100644 --- a/storage/pebble/registers.go +++ b/storage/pebble/registers.go @@ -148,13 +148,9 @@ func (s *Registers) FirstHeight() uint64 { } func firstStoredHeight(db *pebble.DB) (uint64, error) { - return operation.RetrieveHeight(db, firstHeightKey) + return operation.RetrieveRegisterHeight(db, firstHeightKey) } func latestStoredHeight(db *pebble.DB) (uint64, error) { - return operation.RetrieveHeight(db, latestHeightKey) -} - -func updateFirstStoredHeight(db *pebble.DB, height uint64) error { - return db.Set(firstHeightKey, encodedUint64(height), nil) + return operation.RetrieveRegisterHeight(db, latestHeightKey) } diff --git a/storage/pebble/registers_pruner.go b/storage/pebble/registers_pruner.go index c5e253e5b65..2575a9bc0d3 100644 --- a/storage/pebble/registers_pruner.go +++ b/storage/pebble/registers_pruner.go @@ -143,7 +143,7 @@ func (p *RegisterPruner) checkPrune(ctx irrecoverable.SignalerContext) { } // update first indexed height - err = updateFirstStoredHeight(p.db, pruneHeight) + err = p.updateFirstStoredHeight(pruneHeight) if err != nil { ctx.Throw(fmt.Errorf("failed to update first height for register storage: %w", err)) } @@ -170,6 +170,7 @@ func (p *RegisterPruner) pruneUpToHeight(pruneHeight uint64) error { err := func(r pebble.Reader) error { options := pebble.IterOptions{ LowerBound: prefix, + UpperBound: []byte{codeFirstBlockHeight}, } it, err := r.NewIter(&options) @@ -255,3 +256,14 @@ func (p *RegisterPruner) batchDelete(lookupKeys [][]byte) error { return nil } + +// updateFirstStoredHeight updates the first stored height in the database to the specified height. +// The height is stored using the `firstHeightKey` key. +// +// Parameters: +// - height: The height value to store as the first stored height. +// +// No errors are expected during normal operations. +func (p *RegisterPruner) updateFirstStoredHeight(height uint64) error { + return p.db.Set(firstHeightKey, encodedUint64(height), nil) +} From fc3c44ced5c81fd090defd591f634b002ce819de Mon Sep 17 00:00:00 2001 From: UlyanaAndrukhiv Date: Thu, 22 Aug 2024 20:32:17 +0300 Subject: [PATCH 08/31] Updated collecting keys to remove --- storage/pebble/registers_pruner.go | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/storage/pebble/registers_pruner.go b/storage/pebble/registers_pruner.go index 2575a9bc0d3..abd759a1a4e 100644 --- a/storage/pebble/registers_pruner.go +++ b/storage/pebble/registers_pruner.go @@ -204,7 +204,10 @@ func (p *RegisterPruner) pruneUpToHeight(pruneHeight uint64) error { } // Otherwise, mark this key for removal - batchKeysToRemove = append(batchKeysToRemove, key) + // Create a copy of the key to avoid memory issues + keyCopy := make([]byte, len(key)) + copy(keyCopy, key) + batchKeysToRemove = append(batchKeysToRemove, keyCopy) if len(batchKeysToRemove) == itemsPerBatch { // Perform batch delete @@ -245,8 +248,9 @@ func (p *RegisterPruner) batchDelete(lookupKeys [][]byte) error { defer batch.Close() for _, key := range lookupKeys { + keyHeight, registerID, _ := lookupKeyToRegisterID(key) if err := batch.Delete(key, nil); err != nil { - return fmt.Errorf("failed to delete lookupKey: %w", err) + return fmt.Errorf("failed to delete lookupKey: %w %d %v", err, keyHeight, registerID) } } From 64e9c9b1c5127a22f7c799832169ac12c1ff566d Mon Sep 17 00:00:00 2001 From: UlyanaAndrukhiv Date: Fri, 23 Aug 2024 13:41:18 +0300 Subject: [PATCH 09/31] Added tests for register pruner --- storage/pebble/register_pruner_test.go | 316 +++++++++++++++++++++++++ storage/pebble/testutil.go | 29 +++ 2 files changed, 345 insertions(+) create mode 100644 storage/pebble/register_pruner_test.go diff --git a/storage/pebble/register_pruner_test.go b/storage/pebble/register_pruner_test.go new file mode 100644 index 00000000000..8cb1805271f --- /dev/null +++ b/storage/pebble/register_pruner_test.go @@ -0,0 +1,316 @@ +package pebble + +import ( + "context" + "testing" + "time" + + "github.com/cockroachdb/pebble" + "github.com/rs/zerolog" + "github.com/stretchr/testify/require" + + "github.com/onflow/flow-go/model/flow" + "github.com/onflow/flow-go/module/irrecoverable" +) + +// testCase defines the structure for a single test case, including initial data setup, +// expected data after pruning, and the data that should be pruned from the database. +type testCase struct { + name string + // The initial register data for the test. + initialData map[uint64]flow.RegisterEntries + // The expected first register height in the database after pruning. + expectedFirstHeight uint64 + // The data that is expected to be present in the database after pruning. + expectedData map[uint64]flow.RegisterEntries + // The data that should be pruned (i.e., removed) from the database after pruning. + prunedData map[uint64]flow.RegisterEntries +} + +func TestPrune(t *testing.T) { + // Set up the test case with initial data, expected outcomes, and pruned data. + straightPruneData := straightPruneTestCase() + testCaseWithDiffData := testCaseWithDiffHeights() + + tests := []testCase{ + { + name: "straight pruning to a pruned height", + initialData: straightPruneData.initialData, + expectedFirstHeight: straightPruneData.expectedFirstHeight, + expectedData: straightPruneData.expectedData, + prunedData: straightPruneData.prunedData, + }, + { + name: "pruning with different entries to keep", + initialData: testCaseWithDiffData.initialData, + expectedFirstHeight: testCaseWithDiffData.expectedFirstHeight, + expectedData: testCaseWithDiffData.expectedData, + prunedData: testCaseWithDiffData.prunedData, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Run the test with the provided initial data and Register storage + RunWithRegistersStorageWithInitialData(t, tt.initialData, func(db *pebble.DB) { + pruner, err := NewRegisterPruner( + zerolog.Nop(), + db, + WithPruneThreshold(5), + WithPruneTickerInterval(10*time.Millisecond), + ) + require.NoError(t, err) + + ctx, cancel := context.WithCancel(context.Background()) + signalerCtx, errChan := irrecoverable.WithSignaler(ctx) + + // Start the pruning process + pruner.Start(signalerCtx) + + // Ensure pruning happens and the first height after pruning is as expected. + requirePruning(t, db, tt.expectedFirstHeight) + + // Clean up pruner and check for any errors. + cleanupPruner(t, pruner, cancel, errChan) + + // Verify that the data in the database matches the expected and pruned data. + verifyData(t, db, tt) + }) + }) + } +} + +// requirePruning checks if the first stored height in the database matches the expected height after pruning. +func requirePruning(t *testing.T, db *pebble.DB, expectedFirstHeightAfterPruning uint64) { + require.Eventually(t, func() bool { + actualFirstHeight, err := firstStoredHeight(db) + require.NoError(t, err) + return expectedFirstHeightAfterPruning == actualFirstHeight + }, 2*time.Second, 15*time.Millisecond) +} + +// cleanupPruner stops the pruner and verifies there are no errors in the error channel. +func cleanupPruner(t *testing.T, pruner *RegisterPruner, cancel context.CancelFunc, errChan <-chan error) { + cancel() + <-pruner.Done() + + select { + case err := <-errChan: + require.NoError(t, err) + default: + } +} + +// straightPruneTestCase initializes and returns a testCase with predefined data for straight pruning. +func straightPruneTestCase() testCase { + initialData := emptyRegistersData(12) + + key1 := flow.RegisterID{Owner: "owner1", Key: "key1"} + key2 := flow.RegisterID{Owner: "owner2", Key: "key2"} + key3 := flow.RegisterID{Owner: "owner3", Key: "key3"} + + value1 := []byte("value1") + value2 := []byte("value2") + value3 := []byte("value3") + + // Set up initial register entries for different heights. + // Set up initial register entries for different heights. + initialData[3] = flow.RegisterEntries{ + {Key: key1, Value: value1}, + {Key: key2, Value: value2}, + } + initialData[4] = flow.RegisterEntries{ + {Key: key2, Value: value2}, + {Key: key3, Value: value3}, + } + initialData[7] = flow.RegisterEntries{ + {Key: key1, Value: value1}, + {Key: key2, Value: value2}, + {Key: key3, Value: value3}, + } + initialData[8] = flow.RegisterEntries{ + {Key: key1, Value: value1}, + {Key: key3, Value: value3}, + } + initialData[9] = flow.RegisterEntries{ + {Key: key2, Value: value2}, + {Key: key3, Value: value3}, + } + initialData[10] = flow.RegisterEntries{ + {Key: key1, Value: value1}, + {Key: key2, Value: value2}, + } + initialData[12] = flow.RegisterEntries{ + {Key: key1, Value: value1}, + {Key: key3, Value: value3}, + } + + // Define the expected data after pruning. + expectedData := map[uint64]flow.RegisterEntries{ + 7: { + {Key: key1, Value: value1}, // keep, first row <= 7 + {Key: key2, Value: value2}, // keep, first row <= 7 + {Key: key3, Value: value3}, // keep, first row <= 7 + }, + 8: { + {Key: key1, Value: value1}, // keep, height > 7 + {Key: key3, Value: value3}, // keep, height > 7 + }, + 9: { + {Key: key2, Value: value2}, // keep, height > 7 + {Key: key3, Value: value3}, // keep, height > 7 + }, + 10: { + {Key: key1, Value: value1}, // keep, height > 7 + {Key: key2, Value: value2}, // keep, height > 7 + }, + 12: { + {Key: key1, Value: value1}, // keep, height > 7 + {Key: key3, Value: value3}, // keep, height > 7 + }, + } + + // Define the data that should be pruned (i.e., removed) from the database after pruning. + prunedData := map[uint64]flow.RegisterEntries{ + 3: { + {Key: key1, Value: value1}, + {Key: key2, Value: value2}, + }, + 4: { + {Key: key2, Value: value2}, + {Key: key3, Value: value3}, + }, + } + + return testCase{ + initialData: initialData, + expectedFirstHeight: 7, + expectedData: expectedData, + prunedData: prunedData, + } +} + +// testCaseWithDiffHeights initializes and returns a testCase with predefined data for different entries to keep +func testCaseWithDiffHeights() testCase { + initialData := emptyRegistersData(12) + + key1 := flow.RegisterID{Owner: "owner1", Key: "key1"} + key2 := flow.RegisterID{Owner: "owner2", Key: "key2"} + key3 := flow.RegisterID{Owner: "owner3", Key: "key3"} + + value1 := []byte("value1") + value2 := []byte("value2") + value3 := []byte("value3") + + // Set up initial register entries for different heights. + initialData[1] = flow.RegisterEntries{ + {Key: key1, Value: value1}, + {Key: key2, Value: value2}, + {Key: key3, Value: value3}, + } + initialData[2] = flow.RegisterEntries{ + {Key: key1, Value: value1}, + {Key: key2, Value: value2}, + } + initialData[5] = flow.RegisterEntries{ + {Key: key1, Value: value1}, + {Key: key3, Value: value3}, + } + initialData[6] = flow.RegisterEntries{ + {Key: key1, Value: value1}, + {Key: key2, Value: value2}, + } + initialData[10] = flow.RegisterEntries{ + {Key: key1, Value: value1}, + {Key: key2, Value: value2}, + {Key: key3, Value: value3}, + } + initialData[11] = flow.RegisterEntries{ + {Key: key1, Value: value1}, + {Key: key3, Value: value3}, + } + + initialData[12] = flow.RegisterEntries{ + {Key: key1, Value: value1}, + {Key: key3, Value: value3}, + } + + // Define the expected data after pruning. + expectedData := map[uint64]flow.RegisterEntries{ + 5: { + {Key: key3, Value: value3}, // keep, first row <= 7 + }, + 6: { + {Key: key1, Value: value1}, // keep, first row <= 7 + {Key: key2, Value: value2}, // keep, first row <= 7 + }, + 10: { + {Key: key1, Value: value1}, // keep, height > 7 + {Key: key2, Value: value2}, // keep, height > 7 + {Key: key3, Value: value3}, // keep, height > 7 + }, + 11: { + {Key: key1, Value: value1}, // keep, height > 7 + {Key: key3, Value: value3}, // keep, height > 7 + }, + 12: { + {Key: key1, Value: value1}, // keep, height > 7 + {Key: key3, Value: value3}, // keep, height > 7 + }, + } + + // Define the data that should be pruned (i.e., removed) from the database after pruning. + prunedData := map[uint64]flow.RegisterEntries{ + 1: { + {Key: key1, Value: value1}, + {Key: key2, Value: value2}, + {Key: key3, Value: value3}, + }, + 2: { + {Key: key1, Value: value1}, + {Key: key2, Value: value2}, + }, + 5: { + {Key: key1, Value: value1}, + }, + } + + return testCase{ + initialData: initialData, + expectedFirstHeight: 7, + expectedData: expectedData, + prunedData: prunedData, + } +} + +// emptyRegistersData initializes an empty map for storing register entries. +func emptyRegistersData(count int) map[uint64]flow.RegisterEntries { + data := make(map[uint64]flow.RegisterEntries, count) + for i := 1; i <= count; i++ { + data[uint64(i)] = flow.RegisterEntries{} + } + + return data +} + +// verifyData verifies that the data in the database matches the expected and pruned data after pruning. +func verifyData(t *testing.T, + db *pebble.DB, + data testCase, +) { + for height, entries := range data.expectedData { + for _, entry := range entries { + val, closer, err := db.Get(newLookupKey(height, entry.Key).Bytes()) + require.NoError(t, err) + require.Equal(t, entry.Value, val) + closer.Close() + } + } + + for height, entries := range data.prunedData { + for _, entry := range entries { + _, _, err := db.Get(newLookupKey(height, entry.Key).Bytes()) + require.Error(t, err) + } + } +} diff --git a/storage/pebble/testutil.go b/storage/pebble/testutil.go index 64bd5a71a21..0d80271f52d 100644 --- a/storage/pebble/testutil.go +++ b/storage/pebble/testutil.go @@ -1,11 +1,13 @@ package pebble import ( + "sort" "testing" "github.com/cockroachdb/pebble" "github.com/stretchr/testify/require" + "github.com/onflow/flow-go/model/flow" "github.com/onflow/flow-go/utils/unittest" ) @@ -30,3 +32,30 @@ func NewBootstrappedRegistersWithPathForTest(tb testing.TB, dir string, first, l require.NoError(tb, db.Set(latestHeightKey, encodedUint64(latest), nil)) return db } + +func RunWithRegistersStorageWithInitialData( + tb testing.TB, + data map[uint64]flow.RegisterEntries, + f func(db *pebble.DB)) { + unittest.RunWithTempDir(tb, func(dir string) { + db := NewBootstrappedRegistersWithPathForTest(tb, dir, uint64(0), uint64(0)) + r, err := NewRegisters(db) + require.NoError(tb, err) + + keys := make([]uint64, 0, len(data)) + for k := range data { + keys = append(keys, k) + } + sort.Slice(keys, func(i, j int) bool { return keys[i] < keys[j] }) + + // Iterate over the map in ascending order by key + for _, height := range keys { + err = r.Store(data[height], height) + require.NoError(tb, err) + } + + f(db) + + require.NoError(tb, db.Close()) + }) +} From 177651ced207ff164a61d76b02fb39108c49c4f8 Mon Sep 17 00:00:00 2001 From: Andrii Slisarchuk Date: Fri, 23 Aug 2024 16:24:59 +0300 Subject: [PATCH 10/31] Added basic metrics for pruner --- .../node_builder/access_node_builder.go | 7 +++ module/metrics.go | 6 ++ module/metrics/access.go | 14 +++++ module/metrics/namespaces.go | 1 + module/metrics/noop.go | 1 + module/metrics/registers_db_pruner.go | 57 +++++++++++++++++++ module/mock/access_metrics.go | 5 ++ module/mock/register_db_pruner_metrics.go | 33 +++++++++++ storage/pebble/registers_pruner.go | 19 +++++++ 9 files changed, 143 insertions(+) create mode 100644 module/metrics/registers_db_pruner.go create mode 100644 module/mock/register_db_pruner_metrics.go diff --git a/cmd/access/node_builder/access_node_builder.go b/cmd/access/node_builder/access_node_builder.go index e91a7f3d9a0..435ed4f2245 100644 --- a/cmd/access/node_builder/access_node_builder.go +++ b/cmd/access/node_builder/access_node_builder.go @@ -304,6 +304,7 @@ type FlowAccessNodeBuilder struct { BlocksToMarkExecuted *stdmap.Times TransactionMetrics *metrics.TransactionCollector RestMetrics *metrics.RestCollector + RegisterDBPrunerMetrics *metrics.RegisterDBPrunerCollector AccessMetrics module.AccessMetrics PingMetrics module.PingMetrics Committee hotstuff.DynamicCommittee @@ -1019,6 +1020,7 @@ func (builder *FlowAccessNodeBuilder) BuildExecutionSyncComponents() *FlowAccess registerDBPruner, err := pstorage.NewRegisterPruner( node.Logger, builder.RegisterDB, + pstorage.WithPrunerMetrics(builder.RegisterDBPrunerMetrics), //pstorage.WithPruneThreshold(builder.registerDBPruneThreshold), pstorage.WithPruneThrottleDelay(builder.registerDBPruneThrottleDelay), pstorage.WithPruneTickerInterval(builder.registerDBPruneTickerInterval), @@ -1724,11 +1726,16 @@ func (builder *FlowAccessNodeBuilder) Build() (cmd.Node, error) { builder.RestMetrics = m return nil }). + Module("register db metrics", func(node *cmd.NodeConfig) error { + builder.RegisterDBPrunerMetrics = metrics.NewRegisterDBPrunerCollector() + return nil + }). Module("access metrics", func(node *cmd.NodeConfig) error { builder.AccessMetrics = metrics.NewAccessCollector( metrics.WithTransactionMetrics(builder.TransactionMetrics), metrics.WithBackendScriptsMetrics(builder.TransactionMetrics), metrics.WithRestMetrics(builder.RestMetrics), + metrics.WithRegisterDBPrunerMetrics(builder.RegisterDBPrunerMetrics), ) return nil }). diff --git a/module/metrics.go b/module/metrics.go index cfad9bb11a0..5ae942cec71 100644 --- a/module/metrics.go +++ b/module/metrics.go @@ -908,6 +908,7 @@ type AccessMetrics interface { GRPCConnectionPoolMetrics TransactionMetrics BackendScriptsMetrics + RegisterDBPrunerMetrics // UpdateExecutionReceiptMaxHeight is called whenever we store an execution receipt from a block from a newer height UpdateExecutionReceiptMaxHeight(height uint64) @@ -916,6 +917,11 @@ type AccessMetrics interface { UpdateLastFullBlockHeight(height uint64) } +type RegisterDBPrunerMetrics interface { + // Pruned tracks the last pruned height and the pruning operation duration + Pruned(height uint64, duration time.Duration) +} + type ExecutionResultStats struct { ComputationUsed uint64 MemoryUsed uint64 diff --git a/module/metrics/access.go b/module/metrics/access.go index 1116f87f433..951f7ec5410 100644 --- a/module/metrics/access.go +++ b/module/metrics/access.go @@ -1,6 +1,8 @@ package metrics import ( + "time" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" @@ -28,10 +30,17 @@ func WithRestMetrics(m module.RestMetrics) AccessCollectorOpts { } } +func WithRegisterDBPrunerMetrics(m module.RegisterDBPrunerMetrics) AccessCollectorOpts { + return func(ac *AccessCollector) { + ac.RegisterDBPrunerMetrics = m + } +} + type AccessCollector struct { module.RestMetrics module.TransactionMetrics module.BackendScriptsMetrics + module.RegisterDBPrunerMetrics connectionReused prometheus.Counter connectionsInPool *prometheus.GaugeVec @@ -153,3 +162,8 @@ func (ac *AccessCollector) UpdateExecutionReceiptMaxHeight(height uint64) { ac.maxReceiptHeight.Set(float64(height)) } } + +// Pruned records the duration of a pruning operation and updates the latest pruned height. +func (ac *AccessCollector) Pruned(height uint64, duration time.Duration) { + ac.RegisterDBPrunerMetrics.Pruned(height, duration) +} diff --git a/module/metrics/namespaces.go b/module/metrics/namespaces.go index a35885db7dc..9a173d3f24e 100644 --- a/module/metrics/namespaces.go +++ b/module/metrics/namespaces.go @@ -46,6 +46,7 @@ const ( subsystemTransactionSubmission = "transaction_submission" subsystemConnectionPool = "connection_pool" subsystemHTTP = "http" + subsystemRegisterDBPruner = "register_db_pruner" ) // Observer subsystem diff --git a/module/metrics/noop.go b/module/metrics/noop.go index fb5ceeeed81..91733e5cdc0 100644 --- a/module/metrics/noop.go +++ b/module/metrics/noop.go @@ -41,6 +41,7 @@ var _ module.HotstuffMetrics = (*NoopCollector)(nil) var _ module.EngineMetrics = (*NoopCollector)(nil) var _ module.HeroCacheMetrics = (*NoopCollector)(nil) var _ module.NetworkMetrics = (*NoopCollector)(nil) +var _ module.RegisterDBPrunerMetrics = (*NoopCollector)(nil) func (nc *NoopCollector) Peers(prefix string, n int) {} func (nc *NoopCollector) Wantlist(prefix string, n int) {} diff --git a/module/metrics/registers_db_pruner.go b/module/metrics/registers_db_pruner.go new file mode 100644 index 00000000000..43f2a1bc618 --- /dev/null +++ b/module/metrics/registers_db_pruner.go @@ -0,0 +1,57 @@ +package metrics + +import ( + "time" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" + + "github.com/onflow/flow-go/module" +) + +// RegisterDBPrunerCollector collects metrics for the database pruning process, +// including the durations of pruning operations and the latest height that has been pruned. +type RegisterDBPrunerCollector struct { + pruneDurations prometheus.Summary // Summary metric for tracking pruning operation durations in milliseconds. + latestHeightPruned prometheus.Gauge // Gauge metric for tracking the latest pruned block height. +} + +var _ module.RegisterDBPrunerMetrics = (*AccessCollector)(nil) + +// NewRegisterDBPrunerCollector creates and returns a new RegisterDBPrunerCollector instance +// with pre-configured Prometheus metrics for monitoring the pruning process. +func NewRegisterDBPrunerCollector() *RegisterDBPrunerCollector { + return &RegisterDBPrunerCollector{ + pruneDurations: promauto.NewSummary(prometheus.SummaryOpts{ + Name: "prune_durations_ms", + Namespace: namespaceAccess, + Subsystem: subsystemRegisterDBPruner, + Help: "The durations of pruning operations in milliseconds.", + Objectives: map[float64]float64{ + 0.01: 0.001, + 0.1: 0.01, + 0.25: 0.025, + 0.5: 0.05, + 0.75: 0.025, + 0.9: 0.01, + 0.99: 0.001, + }, + }), + latestHeightPruned: promauto.NewGauge(prometheus.GaugeOpts{ + Name: "latest_height_pruned", + Namespace: namespaceAccess, + Subsystem: subsystemRegisterDBPruner, + Help: "The latest block height that has been pruned.", + }), + } +} + +// Pruned records the duration of a pruning operation and updates the latest pruned height. +// +// Parameters: +// - height: The height of the block that was pruned. +// - duration: The time taken to complete the pruning operation. +func (c *RegisterDBPrunerCollector) Pruned(height uint64, duration time.Duration) { + c.pruneDurations.Observe(float64(duration.Milliseconds())) + c.latestHeightPruned.Set(float64(height)) +} diff --git a/module/mock/access_metrics.go b/module/mock/access_metrics.go index 9866ad90e02..df5a0a5508e 100644 --- a/module/mock/access_metrics.go +++ b/module/mock/access_metrics.go @@ -68,6 +68,11 @@ func (_m *AccessMetrics) ObserveHTTPResponseSize(ctx context.Context, props metr _m.Called(ctx, props, sizeBytes) } +// Pruned provides a mock function with given fields: height, duration +func (_m *AccessMetrics) Pruned(height uint64, duration time.Duration) { + _m.Called(height, duration) +} + // ScriptExecuted provides a mock function with given fields: dur, size func (_m *AccessMetrics) ScriptExecuted(dur time.Duration, size int) { _m.Called(dur, size) diff --git a/module/mock/register_db_pruner_metrics.go b/module/mock/register_db_pruner_metrics.go new file mode 100644 index 00000000000..a6d5bf5c1fa --- /dev/null +++ b/module/mock/register_db_pruner_metrics.go @@ -0,0 +1,33 @@ +// Code generated by mockery v2.43.2. DO NOT EDIT. + +package mock + +import ( + mock "github.com/stretchr/testify/mock" + + time "time" +) + +// RegisterDBPrunerMetrics is an autogenerated mock type for the RegisterDBPrunerMetrics type +type RegisterDBPrunerMetrics struct { + mock.Mock +} + +// Pruned provides a mock function with given fields: height, duration +func (_m *RegisterDBPrunerMetrics) Pruned(height uint64, duration time.Duration) { + _m.Called(height, duration) +} + +// NewRegisterDBPrunerMetrics creates a new instance of RegisterDBPrunerMetrics. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. +// The first argument is typically a *testing.T value. +func NewRegisterDBPrunerMetrics(t interface { + mock.TestingT + Cleanup(func()) +}) *RegisterDBPrunerMetrics { + mock := &RegisterDBPrunerMetrics{} + mock.Mock.Test(t) + + t.Cleanup(func() { mock.AssertExpectations(t) }) + + return mock +} diff --git a/storage/pebble/registers_pruner.go b/storage/pebble/registers_pruner.go index c5e253e5b65..818de73a3e5 100644 --- a/storage/pebble/registers_pruner.go +++ b/storage/pebble/registers_pruner.go @@ -8,6 +8,7 @@ import ( "github.com/rs/zerolog" "github.com/onflow/flow-go/model/flow" + "github.com/onflow/flow-go/module" "github.com/onflow/flow-go/module/component" "github.com/onflow/flow-go/module/irrecoverable" ) @@ -36,6 +37,8 @@ type RegisterPruner struct { logger zerolog.Logger db *pebble.DB + metrics module.RegisterDBPrunerMetrics + // pruningInterval is a number of pruned blocks in the db, above which pruning should be triggered pruneInterval uint64 // threshold defines the number of blocks below the latestHeight to keep @@ -72,6 +75,13 @@ func WithPruneTickerInterval(interval time.Duration) PrunerOption { } } +// WithPrunerMetrics is used to sets the metrics for a RegisterPruner instance. +func WithPrunerMetrics(metrics module.RegisterDBPrunerMetrics) PrunerOption { + return func(p *RegisterPruner) { + p.metrics = metrics + } +} + func NewRegisterPruner( logger zerolog.Logger, db *pebble.DB, @@ -167,6 +177,8 @@ func (p *RegisterPruner) pruneUpToHeight(pruneHeight uint64) error { itemsPerBatch := deleteItemsPerBatch var batchKeysToRemove [][]byte + start := time.Now() + err := func(r pebble.Reader) error { options := pebble.IterOptions{ LowerBound: prefix, @@ -224,10 +236,17 @@ func (p *RegisterPruner) pruneUpToHeight(pruneHeight uint64) error { return nil }(p.db) + + duration := time.Since(start) + if err != nil { return err } + if p.metrics != nil { + p.metrics.Pruned(pruneHeight, duration) + } + return nil } From 2d847baa1c60c1dab66cd6f70b8435d7ff5101f7 Mon Sep 17 00:00:00 2001 From: Andrii Slisarchuk Date: Mon, 26 Aug 2024 14:23:54 +0300 Subject: [PATCH 11/31] Added aditional metrics --- module/metrics.go | 9 ++++ module/metrics/access.go | 7 --- module/metrics/noop.go | 3 ++ module/metrics/registers_db_pruner.go | 54 +++++++++++++++++++++-- module/mock/access_metrics.go | 15 +++++++ module/mock/register_db_pruner_metrics.go | 15 +++++++ storage/pebble/registers_pruner.go | 12 ++++- 7 files changed, 102 insertions(+), 13 deletions(-) diff --git a/module/metrics.go b/module/metrics.go index 5ae942cec71..a94a75aed17 100644 --- a/module/metrics.go +++ b/module/metrics.go @@ -920,6 +920,15 @@ type AccessMetrics interface { type RegisterDBPrunerMetrics interface { // Pruned tracks the last pruned height and the pruning operation duration Pruned(height uint64, duration time.Duration) + + // NumberOfBlocksPruned tracks the number of blocks that were pruned during the operation. + NumberOfBlocksPruned(blocks uint64) + + // NumberOfRowsPruned tracks the number of rows that were pruned during the operation. + NumberOfRowsPruned(rows uint64) + + // NumberOfElementsVisited tracks the number of elements that were visited during the pruning operation. + NumberOfElementsVisited(elements uint64) } type ExecutionResultStats struct { diff --git a/module/metrics/access.go b/module/metrics/access.go index 951f7ec5410..891b0fad7d8 100644 --- a/module/metrics/access.go +++ b/module/metrics/access.go @@ -1,8 +1,6 @@ package metrics import ( - "time" - "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" @@ -162,8 +160,3 @@ func (ac *AccessCollector) UpdateExecutionReceiptMaxHeight(height uint64) { ac.maxReceiptHeight.Set(float64(height)) } } - -// Pruned records the duration of a pruning operation and updates the latest pruned height. -func (ac *AccessCollector) Pruned(height uint64, duration time.Duration) { - ac.RegisterDBPrunerMetrics.Pruned(height, duration) -} diff --git a/module/metrics/noop.go b/module/metrics/noop.go index 91733e5cdc0..b7efac734f9 100644 --- a/module/metrics/noop.go +++ b/module/metrics/noop.go @@ -235,6 +235,9 @@ func (nc *NoopCollector) RequestFailed(duration time.Duration, retryable bool) func (nc *NoopCollector) RequestCanceled() {} func (nc *NoopCollector) ResponseDropped() {} func (nc *NoopCollector) Pruned(height uint64, duration time.Duration) {} +func (nc *NoopCollector) NumberOfRowsPruned(rows uint64) {} +func (nc *NoopCollector) NumberOfElementsVisited(elements uint64) {} +func (nc *NoopCollector) NumberOfBlocksPruned(blocks uint64) {} func (nc *NoopCollector) UpdateCollectionMaxHeight(height uint64) {} func (nc *NoopCollector) BucketAvailableSlots(uint64, uint64) {} func (nc *NoopCollector) OnKeyPutSuccess(uint32) {} diff --git a/module/metrics/registers_db_pruner.go b/module/metrics/registers_db_pruner.go index 43f2a1bc618..140bd63fa42 100644 --- a/module/metrics/registers_db_pruner.go +++ b/module/metrics/registers_db_pruner.go @@ -10,13 +10,17 @@ import ( ) // RegisterDBPrunerCollector collects metrics for the database pruning process, -// including the durations of pruning operations and the latest height that has been pruned. +// including the durations of pruning operations, the latest height that has been pruned, +// the number of blocks pruned, the number of rows pruned, and the number of elements visited. type RegisterDBPrunerCollector struct { - pruneDurations prometheus.Summary // Summary metric for tracking pruning operation durations in milliseconds. - latestHeightPruned prometheus.Gauge // Gauge metric for tracking the latest pruned block height. + pruneDurations prometheus.Summary // The pruning operation durations in milliseconds. + latestHeightPruned prometheus.Gauge // The latest pruned block height. + numberOfBlocksPruned prometheus.Gauge // The number of blocks pruned. + numberOfRowsPruned prometheus.Gauge // The number of rows pruned. + numberOfElementsVisited prometheus.Gauge // The number of elements visited during pruning. } -var _ module.RegisterDBPrunerMetrics = (*AccessCollector)(nil) +var _ module.RegisterDBPrunerMetrics = (*RegisterDBPrunerCollector)(nil) // NewRegisterDBPrunerCollector creates and returns a new RegisterDBPrunerCollector instance // with pre-configured Prometheus metrics for monitoring the pruning process. @@ -43,6 +47,24 @@ func NewRegisterDBPrunerCollector() *RegisterDBPrunerCollector { Subsystem: subsystemRegisterDBPruner, Help: "The latest block height that has been pruned.", }), + numberOfBlocksPruned: promauto.NewGauge(prometheus.GaugeOpts{ + Name: "number_of_blocks_pruned", + Namespace: namespaceAccess, + Subsystem: subsystemRegisterDBPruner, + Help: "The number of blocks that have been pruned.", + }), + numberOfRowsPruned: promauto.NewGauge(prometheus.GaugeOpts{ + Name: "number_of_rows_pruned", + Namespace: namespaceAccess, + Subsystem: subsystemRegisterDBPruner, + Help: "The number of rows that have been pruned.", + }), + numberOfElementsVisited: promauto.NewGauge(prometheus.GaugeOpts{ + Name: "number_of_elements_visited", + Namespace: namespaceAccess, + Subsystem: subsystemRegisterDBPruner, + Help: "The number of elements that have been visited.", + }), } } @@ -55,3 +77,27 @@ func (c *RegisterDBPrunerCollector) Pruned(height uint64, duration time.Duration c.pruneDurations.Observe(float64(duration.Milliseconds())) c.latestHeightPruned.Set(float64(height)) } + +// NumberOfRowsPruned records the number of rows that were pruned during the pruning operation. +// +// Parameters: +// - rows: The number of rows that were pruned. +func (c *RegisterDBPrunerCollector) NumberOfRowsPruned(rows uint64) { + c.numberOfRowsPruned.Set(float64(rows)) +} + +// NumberOfElementsVisited records the number of elements that were visited during the pruning operation. +// +// Parameters: +// - elements: The number of elements that were visited. +func (c *RegisterDBPrunerCollector) NumberOfElementsVisited(elements uint64) { + c.numberOfElementsVisited.Set(float64(elements)) +} + +// NumberOfBlocksPruned tracks the number of blocks that were pruned during the operation. +// +// Parameters: +// - blocks: The number of blocks that were pruned. +func (c *RegisterDBPrunerCollector) NumberOfBlocksPruned(blocks uint64) { + c.numberOfBlocksPruned.Set(float64(blocks)) +} diff --git a/module/mock/access_metrics.go b/module/mock/access_metrics.go index df5a0a5508e..f06991a7257 100644 --- a/module/mock/access_metrics.go +++ b/module/mock/access_metrics.go @@ -58,6 +58,21 @@ func (_m *AccessMetrics) NewConnectionEstablished() { _m.Called() } +// NumberOfBlocksPruned provides a mock function with given fields: blocks +func (_m *AccessMetrics) NumberOfBlocksPruned(blocks uint64) { + _m.Called(blocks) +} + +// NumberOfElementsVisited provides a mock function with given fields: elements +func (_m *AccessMetrics) NumberOfElementsVisited(elements uint64) { + _m.Called(elements) +} + +// NumberOfRowsPruned provides a mock function with given fields: rows +func (_m *AccessMetrics) NumberOfRowsPruned(rows uint64) { + _m.Called(rows) +} + // ObserveHTTPRequestDuration provides a mock function with given fields: ctx, props, duration func (_m *AccessMetrics) ObserveHTTPRequestDuration(ctx context.Context, props metrics.HTTPReqProperties, duration time.Duration) { _m.Called(ctx, props, duration) diff --git a/module/mock/register_db_pruner_metrics.go b/module/mock/register_db_pruner_metrics.go index a6d5bf5c1fa..5e08e17e32c 100644 --- a/module/mock/register_db_pruner_metrics.go +++ b/module/mock/register_db_pruner_metrics.go @@ -13,6 +13,21 @@ type RegisterDBPrunerMetrics struct { mock.Mock } +// NumberOfBlocksPruned provides a mock function with given fields: blocks +func (_m *RegisterDBPrunerMetrics) NumberOfBlocksPruned(blocks uint64) { + _m.Called(blocks) +} + +// NumberOfElementsVisited provides a mock function with given fields: elements +func (_m *RegisterDBPrunerMetrics) NumberOfElementsVisited(elements uint64) { + _m.Called(elements) +} + +// NumberOfRowsPruned provides a mock function with given fields: rows +func (_m *RegisterDBPrunerMetrics) NumberOfRowsPruned(rows uint64) { + _m.Called(rows) +} + // Pruned provides a mock function with given fields: height, duration func (_m *RegisterDBPrunerMetrics) Pruned(height uint64, duration time.Duration) { _m.Called(height, duration) diff --git a/storage/pebble/registers_pruner.go b/storage/pebble/registers_pruner.go index c81f066ccab..c5b48ec5bc1 100644 --- a/storage/pebble/registers_pruner.go +++ b/storage/pebble/registers_pruner.go @@ -152,6 +152,10 @@ func (p *RegisterPruner) checkPrune(ctx irrecoverable.SignalerContext) { ctx.Throw(fmt.Errorf("failed to prune: %w", err)) } + if p.metrics != nil { + p.metrics.NumberOfBlocksPruned(pruneHeight - firstHeight) + } + // update first indexed height err = p.updateFirstStoredHeight(pruneHeight) if err != nil { @@ -241,12 +245,12 @@ func (p *RegisterPruner) pruneUpToHeight(pruneHeight uint64) error { return nil }(p.db) - duration := time.Since(start) - if err != nil { return err } + duration := time.Since(start) + if p.metrics != nil { p.metrics.Pruned(pruneHeight, duration) } @@ -277,6 +281,10 @@ func (p *RegisterPruner) batchDelete(lookupKeys [][]byte) error { return fmt.Errorf("failed to commit batch: %w", err) } + if p.metrics != nil { + p.metrics.NumberOfRowsPruned(uint64(len(lookupKeys))) + } + return nil } From 1018ba27d8541e195e2d843ab6904848fc98d55f Mon Sep 17 00:00:00 2001 From: Andrii Slisarchuk Date: Mon, 26 Aug 2024 16:56:13 +0300 Subject: [PATCH 12/31] Added missing metric --- module/metrics.go | 4 ++-- module/metrics/noop.go | 2 +- module/metrics/registers_db_pruner.go | 19 ++++++++----------- module/mock/access_metrics.go | 10 +++++----- module/mock/register_db_pruner_metrics.go | 10 +++++----- storage/pebble/registers_pruner.go | 4 ++++ 6 files changed, 25 insertions(+), 24 deletions(-) diff --git a/module/metrics.go b/module/metrics.go index a94a75aed17..809db16f5a6 100644 --- a/module/metrics.go +++ b/module/metrics.go @@ -927,8 +927,8 @@ type RegisterDBPrunerMetrics interface { // NumberOfRowsPruned tracks the number of rows that were pruned during the operation. NumberOfRowsPruned(rows uint64) - // NumberOfElementsVisited tracks the number of elements that were visited during the pruning operation. - NumberOfElementsVisited(elements uint64) + // ElementVisited records the element that were visited during the pruning operation. + ElementVisited() } type ExecutionResultStats struct { diff --git a/module/metrics/noop.go b/module/metrics/noop.go index b7efac734f9..0d9b05a4379 100644 --- a/module/metrics/noop.go +++ b/module/metrics/noop.go @@ -236,7 +236,7 @@ func (nc *NoopCollector) RequestCanceled() func (nc *NoopCollector) ResponseDropped() {} func (nc *NoopCollector) Pruned(height uint64, duration time.Duration) {} func (nc *NoopCollector) NumberOfRowsPruned(rows uint64) {} -func (nc *NoopCollector) NumberOfElementsVisited(elements uint64) {} +func (nc *NoopCollector) ElementVisited() {} func (nc *NoopCollector) NumberOfBlocksPruned(blocks uint64) {} func (nc *NoopCollector) UpdateCollectionMaxHeight(height uint64) {} func (nc *NoopCollector) BucketAvailableSlots(uint64, uint64) {} diff --git a/module/metrics/registers_db_pruner.go b/module/metrics/registers_db_pruner.go index 140bd63fa42..1e44364ad28 100644 --- a/module/metrics/registers_db_pruner.go +++ b/module/metrics/registers_db_pruner.go @@ -48,19 +48,19 @@ func NewRegisterDBPrunerCollector() *RegisterDBPrunerCollector { Help: "The latest block height that has been pruned.", }), numberOfBlocksPruned: promauto.NewGauge(prometheus.GaugeOpts{ - Name: "number_of_blocks_pruned", + Name: "number_of_blocks_pruned_total", Namespace: namespaceAccess, Subsystem: subsystemRegisterDBPruner, Help: "The number of blocks that have been pruned.", }), numberOfRowsPruned: promauto.NewGauge(prometheus.GaugeOpts{ - Name: "number_of_rows_pruned", + Name: "number_of_rows_pruned_total", Namespace: namespaceAccess, Subsystem: subsystemRegisterDBPruner, Help: "The number of rows that have been pruned.", }), numberOfElementsVisited: promauto.NewGauge(prometheus.GaugeOpts{ - Name: "number_of_elements_visited", + Name: "number_of_elements_visited_total", Namespace: namespaceAccess, Subsystem: subsystemRegisterDBPruner, Help: "The number of elements that have been visited.", @@ -83,15 +83,12 @@ func (c *RegisterDBPrunerCollector) Pruned(height uint64, duration time.Duration // Parameters: // - rows: The number of rows that were pruned. func (c *RegisterDBPrunerCollector) NumberOfRowsPruned(rows uint64) { - c.numberOfRowsPruned.Set(float64(rows)) + c.numberOfRowsPruned.Add(float64(rows)) } -// NumberOfElementsVisited records the number of elements that were visited during the pruning operation. -// -// Parameters: -// - elements: The number of elements that were visited. -func (c *RegisterDBPrunerCollector) NumberOfElementsVisited(elements uint64) { - c.numberOfElementsVisited.Set(float64(elements)) +// ElementVisited records the element that were visited during the pruning operation. +func (c *RegisterDBPrunerCollector) ElementVisited() { + c.numberOfElementsVisited.Inc() } // NumberOfBlocksPruned tracks the number of blocks that were pruned during the operation. @@ -99,5 +96,5 @@ func (c *RegisterDBPrunerCollector) NumberOfElementsVisited(elements uint64) { // Parameters: // - blocks: The number of blocks that were pruned. func (c *RegisterDBPrunerCollector) NumberOfBlocksPruned(blocks uint64) { - c.numberOfBlocksPruned.Set(float64(blocks)) + c.numberOfBlocksPruned.Add(float64(blocks)) } diff --git a/module/mock/access_metrics.go b/module/mock/access_metrics.go index f06991a7257..0614544a74b 100644 --- a/module/mock/access_metrics.go +++ b/module/mock/access_metrics.go @@ -53,6 +53,11 @@ func (_m *AccessMetrics) ConnectionFromPoolUpdated() { _m.Called() } +// ElementVisited provides a mock function with given fields: +func (_m *AccessMetrics) ElementVisited() { + _m.Called() +} + // NewConnectionEstablished provides a mock function with given fields: func (_m *AccessMetrics) NewConnectionEstablished() { _m.Called() @@ -63,11 +68,6 @@ func (_m *AccessMetrics) NumberOfBlocksPruned(blocks uint64) { _m.Called(blocks) } -// NumberOfElementsVisited provides a mock function with given fields: elements -func (_m *AccessMetrics) NumberOfElementsVisited(elements uint64) { - _m.Called(elements) -} - // NumberOfRowsPruned provides a mock function with given fields: rows func (_m *AccessMetrics) NumberOfRowsPruned(rows uint64) { _m.Called(rows) diff --git a/module/mock/register_db_pruner_metrics.go b/module/mock/register_db_pruner_metrics.go index 5e08e17e32c..a34eebdc053 100644 --- a/module/mock/register_db_pruner_metrics.go +++ b/module/mock/register_db_pruner_metrics.go @@ -13,16 +13,16 @@ type RegisterDBPrunerMetrics struct { mock.Mock } +// ElementVisited provides a mock function with given fields: +func (_m *RegisterDBPrunerMetrics) ElementVisited() { + _m.Called() +} + // NumberOfBlocksPruned provides a mock function with given fields: blocks func (_m *RegisterDBPrunerMetrics) NumberOfBlocksPruned(blocks uint64) { _m.Called(blocks) } -// NumberOfElementsVisited provides a mock function with given fields: elements -func (_m *RegisterDBPrunerMetrics) NumberOfElementsVisited(elements uint64) { - _m.Called(elements) -} - // NumberOfRowsPruned provides a mock function with given fields: rows func (_m *RegisterDBPrunerMetrics) NumberOfRowsPruned(rows uint64) { _m.Called(rows) diff --git a/storage/pebble/registers_pruner.go b/storage/pebble/registers_pruner.go index c5b48ec5bc1..6a3cb34cf5d 100644 --- a/storage/pebble/registers_pruner.go +++ b/storage/pebble/registers_pruner.go @@ -206,6 +206,10 @@ func (p *RegisterPruner) pruneUpToHeight(pruneHeight uint64) error { return fmt.Errorf("malformed lookup key %v: %w", key, err) } + if p.metrics != nil { + p.metrics.ElementVisited() + } + // New register prefix, reset the state if !keepKeyFound || lastRegisterID != registerID { keepKeyFound = false From 0e8cc4c1a00fc7f3cf2c9e1dc28906de2d6d69ce Mon Sep 17 00:00:00 2001 From: UlyanaAndrukhiv Date: Mon, 26 Aug 2024 17:00:03 +0300 Subject: [PATCH 13/31] Added throttle delay to prevent excessive system load during pruning --- storage/pebble/registers_pruner.go | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/storage/pebble/registers_pruner.go b/storage/pebble/registers_pruner.go index c5b48ec5bc1..8d69ffef23d 100644 --- a/storage/pebble/registers_pruner.go +++ b/storage/pebble/registers_pruner.go @@ -47,6 +47,9 @@ type RegisterPruner struct { pruneThrottleDelay time.Duration // pruneTickerInterval defines how frequently pruning can be performed pruneTickerInterval time.Duration + + // throttleTicker controls the delay between pruning batches to manage system load + throttleTicker *time.Ticker } type PrunerOption func(*RegisterPruner) @@ -177,6 +180,9 @@ func (p *RegisterPruner) checkPrune(ctx irrecoverable.SignalerContext) { // // No errors are expected during normal operations. func (p *RegisterPruner) pruneUpToHeight(pruneHeight uint64) error { + p.setupThrottleDelay() + defer p.throttleTicker.Stop() + prefix := []byte{codeRegister} itemsPerBatch := deleteItemsPerBatch var batchKeysToRemove [][]byte @@ -258,6 +264,17 @@ func (p *RegisterPruner) pruneUpToHeight(pruneHeight uint64) error { return nil } +// setupThrottleDelay sets up or reset a ticker for the throttle delay to manage system load +// during pruning operations. This ensures that there is a small pause between pruning +// each batch of registers. +func (p *RegisterPruner) setupThrottleDelay() { + if p.throttleTicker == nil { + p.throttleTicker = time.NewTicker(p.pruneThrottleDelay) + } else { + p.throttleTicker.Reset(p.pruneThrottleDelay) + } +} + // batchDelete deletes the provided keys from the database in a single batch operation. // It creates a new batch, deletes each key from the batch, and commits the batch to ensure // that the deletions are applied atomically. @@ -285,6 +302,9 @@ func (p *RegisterPruner) batchDelete(lookupKeys [][]byte) error { p.metrics.NumberOfRowsPruned(uint64(len(lookupKeys))) } + // Throttle to prevent excessive system load + <-p.throttleTicker.C + return nil } From b9247b660e968f114c326fe7db7b610b67af3ebc Mon Sep 17 00:00:00 2001 From: UlyanaAndrukhiv Date: Mon, 26 Aug 2024 17:34:39 +0300 Subject: [PATCH 14/31] Updated checkPrune implementation --- storage/pebble/registers_pruner.go | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/storage/pebble/registers_pruner.go b/storage/pebble/registers_pruner.go index 1d59bc77789..b541b586b40 100644 --- a/storage/pebble/registers_pruner.go +++ b/storage/pebble/registers_pruner.go @@ -135,6 +135,8 @@ func (p *RegisterPruner) loop(ctx irrecoverable.SignalerContext, ready component // Parameters: // - ctx: The context for managing the pruning operation. func (p *RegisterPruner) checkPrune(ctx irrecoverable.SignalerContext) { + // TODO: Clarify whether batching can be used here for getting the first and latest heights, + // and for updating the first height. firstHeight, err := firstStoredHeight(p.db) if err != nil { ctx.Throw(fmt.Errorf("failed to get first height from register storage: %w", err)) @@ -150,6 +152,12 @@ func (p *RegisterPruner) checkPrune(ctx irrecoverable.SignalerContext) { if pruneHeight-firstHeight > p.pruneInterval { p.logger.Info().Uint64("prune_height", pruneHeight).Msg("pruning storage") + // update first indexed height + err = p.updateFirstStoredHeight(pruneHeight) + if err != nil { + ctx.Throw(fmt.Errorf("failed to update first height for register storage: %w", err)) + } + err := p.pruneUpToHeight(pruneHeight) if err != nil { ctx.Throw(fmt.Errorf("failed to prune: %w", err)) @@ -158,12 +166,6 @@ func (p *RegisterPruner) checkPrune(ctx irrecoverable.SignalerContext) { if p.metrics != nil { p.metrics.NumberOfBlocksPruned(pruneHeight - firstHeight) } - - // update first indexed height - err = p.updateFirstStoredHeight(pruneHeight) - if err != nil { - ctx.Throw(fmt.Errorf("failed to update first height for register storage: %w", err)) - } } } From b57bc4eb05b3cfddf47d6a41f71e87989e69303d Mon Sep 17 00:00:00 2001 From: UlyanaAndrukhiv Date: Thu, 29 Aug 2024 11:49:16 +0300 Subject: [PATCH 15/31] Added first batch of suggested changes according to comments --- storage/pebble/registers_pruner.go | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/storage/pebble/registers_pruner.go b/storage/pebble/registers_pruner.go index b541b586b40..64d8c41243e 100644 --- a/storage/pebble/registers_pruner.go +++ b/storage/pebble/registers_pruner.go @@ -91,7 +91,7 @@ func NewRegisterPruner( opts ...PrunerOption, ) (*RegisterPruner, error) { pruner := &RegisterPruner{ - logger: logger.With().Str("component", "register_pruner").Logger(), + logger: logger.With().Str("component", "registerdb_pruner").Logger(), db: db, pruneInterval: pruneInterval(DefaultPruneThreshold), pruneThreshold: DefaultPruneThreshold, @@ -135,8 +135,6 @@ func (p *RegisterPruner) loop(ctx irrecoverable.SignalerContext, ready component // Parameters: // - ctx: The context for managing the pruning operation. func (p *RegisterPruner) checkPrune(ctx irrecoverable.SignalerContext) { - // TODO: Clarify whether batching can be used here for getting the first and latest heights, - // and for updating the first height. firstHeight, err := firstStoredHeight(p.db) if err != nil { ctx.Throw(fmt.Errorf("failed to get first height from register storage: %w", err)) @@ -152,7 +150,9 @@ func (p *RegisterPruner) checkPrune(ctx irrecoverable.SignalerContext) { if pruneHeight-firstHeight > p.pruneInterval { p.logger.Info().Uint64("prune_height", pruneHeight).Msg("pruning storage") - // update first indexed height + // first, update firstHeight in the db + // this ensures that if the node crashes during pruning, there will still be a consistent + // view when the node starts up. Subsequent prunes will remove any leftover data. err = p.updateFirstStoredHeight(pruneHeight) if err != nil { ctx.Throw(fmt.Errorf("failed to update first height for register storage: %w", err)) @@ -186,7 +186,6 @@ func (p *RegisterPruner) pruneUpToHeight(pruneHeight uint64) error { defer p.throttleTicker.Stop() prefix := []byte{codeRegister} - itemsPerBatch := deleteItemsPerBatch var batchKeysToRemove [][]byte start := time.Now() @@ -237,7 +236,7 @@ func (p *RegisterPruner) pruneUpToHeight(pruneHeight uint64) error { copy(keyCopy, key) batchKeysToRemove = append(batchKeysToRemove, keyCopy) - if len(batchKeysToRemove) == itemsPerBatch { + if len(batchKeysToRemove) == deleteItemsPerBatch { // Perform batch delete if err := p.batchDelete(batchKeysToRemove); err != nil { return err From 3ca788372b30910d6f1ea81fcf5e4566397a9634 Mon Sep 17 00:00:00 2001 From: UlyanaAndrukhiv Date: Thu, 29 Aug 2024 12:00:19 +0300 Subject: [PATCH 16/31] Removed ComponentManager --- storage/pebble/registers_pruner.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/storage/pebble/registers_pruner.go b/storage/pebble/registers_pruner.go index 64d8c41243e..1a44d024ce2 100644 --- a/storage/pebble/registers_pruner.go +++ b/storage/pebble/registers_pruner.go @@ -32,7 +32,6 @@ func pruneInterval(threshold uint64) uint64 { type RegisterPruner struct { component.Component - componentManager *component.ComponentManager logger zerolog.Logger db *pebble.DB @@ -99,10 +98,9 @@ func NewRegisterPruner( pruneTickerInterval: DefaultPruneTickerInterval, } - pruner.componentManager = component.NewComponentManagerBuilder(). + pruner.Component = component.NewComponentManagerBuilder(). AddWorker(pruner.loop). Build() - pruner.Component = pruner.componentManager for _, opt := range opts { opt(pruner) From 1ff10bcc30ef9697bae0c939b4f6f347548bdc76 Mon Sep 17 00:00:00 2001 From: UlyanaAndrukhiv Date: Thu, 29 Aug 2024 12:24:50 +0300 Subject: [PATCH 17/31] Simplified prune throttle delay implementation and updated error handling according to comments --- storage/pebble/registers_pruner.go | 54 +++++++++++++----------------- 1 file changed, 24 insertions(+), 30 deletions(-) diff --git a/storage/pebble/registers_pruner.go b/storage/pebble/registers_pruner.go index 1a44d024ce2..cfc3b2046fe 100644 --- a/storage/pebble/registers_pruner.go +++ b/storage/pebble/registers_pruner.go @@ -1,6 +1,7 @@ package pebble import ( + "context" "fmt" "time" @@ -46,9 +47,6 @@ type RegisterPruner struct { pruneThrottleDelay time.Duration // pruneTickerInterval defines how frequently pruning can be performed pruneTickerInterval time.Duration - - // throttleTicker controls the delay between pruning batches to manage system load - throttleTicker *time.Ticker } type PrunerOption func(*RegisterPruner) @@ -122,7 +120,10 @@ func (p *RegisterPruner) loop(ctx irrecoverable.SignalerContext, ready component case <-ctx.Done(): return case <-ticker.C: - p.checkPrune(ctx) + err := p.checkPrune(ctx) + if err != nil { + ctx.Throw(err) + } } } } @@ -131,16 +132,16 @@ func (p *RegisterPruner) loop(ctx irrecoverable.SignalerContext, ready component // and triggers the pruning operation if necessary. // // Parameters: -// - ctx: The context for managing the pruning operation. -func (p *RegisterPruner) checkPrune(ctx irrecoverable.SignalerContext) { +// - ctx: The context for managing the pruning throttle delay operation. +func (p *RegisterPruner) checkPrune(ctx context.Context) error { firstHeight, err := firstStoredHeight(p.db) if err != nil { - ctx.Throw(fmt.Errorf("failed to get first height from register storage: %w", err)) + return fmt.Errorf("failed to get first height from register storage: %w", err) } latestHeight, err := latestStoredHeight(p.db) if err != nil { - ctx.Throw(fmt.Errorf("failed to get latest height from register storage: %w", err)) + return fmt.Errorf("failed to get latest height from register storage: %w", err) } pruneHeight := latestHeight - p.pruneThreshold @@ -153,18 +154,20 @@ func (p *RegisterPruner) checkPrune(ctx irrecoverable.SignalerContext) { // view when the node starts up. Subsequent prunes will remove any leftover data. err = p.updateFirstStoredHeight(pruneHeight) if err != nil { - ctx.Throw(fmt.Errorf("failed to update first height for register storage: %w", err)) + return fmt.Errorf("failed to update first height for register storage: %w", err) } - err := p.pruneUpToHeight(pruneHeight) + err := p.pruneUpToHeight(ctx, pruneHeight) if err != nil { - ctx.Throw(fmt.Errorf("failed to prune: %w", err)) + return fmt.Errorf("failed to prune: %w", err) } if p.metrics != nil { p.metrics.NumberOfBlocksPruned(pruneHeight - firstHeight) } } + + return nil } // pruneUpToHeight prunes all entries in the database with heights less than or equal @@ -176,13 +179,11 @@ func (p *RegisterPruner) checkPrune(ctx irrecoverable.SignalerContext) { // and uses the batchDelete function to remove them efficiently. // // Parameters: +// - ctx: The context for managing the pruning throttle delay operation. // - pruneHeight: The maximum height of entries to prune. // // No errors are expected during normal operations. -func (p *RegisterPruner) pruneUpToHeight(pruneHeight uint64) error { - p.setupThrottleDelay() - defer p.throttleTicker.Stop() - +func (p *RegisterPruner) pruneUpToHeight(ctx context.Context, pruneHeight uint64) error { prefix := []byte{codeRegister} var batchKeysToRemove [][]byte @@ -236,7 +237,7 @@ func (p *RegisterPruner) pruneUpToHeight(pruneHeight uint64) error { if len(batchKeysToRemove) == deleteItemsPerBatch { // Perform batch delete - if err := p.batchDelete(batchKeysToRemove); err != nil { + if err := p.batchDelete(ctx, batchKeysToRemove); err != nil { return err } batchKeysToRemove = nil @@ -246,7 +247,7 @@ func (p *RegisterPruner) pruneUpToHeight(pruneHeight uint64) error { if len(batchKeysToRemove) > 0 { // Perform the final batch delete if there are any remaining keys - if err := p.batchDelete(batchKeysToRemove); err != nil { + if err := p.batchDelete(ctx, batchKeysToRemove); err != nil { return err } } @@ -267,26 +268,16 @@ func (p *RegisterPruner) pruneUpToHeight(pruneHeight uint64) error { return nil } -// setupThrottleDelay sets up or reset a ticker for the throttle delay to manage system load -// during pruning operations. This ensures that there is a small pause between pruning -// each batch of registers. -func (p *RegisterPruner) setupThrottleDelay() { - if p.throttleTicker == nil { - p.throttleTicker = time.NewTicker(p.pruneThrottleDelay) - } else { - p.throttleTicker.Reset(p.pruneThrottleDelay) - } -} - // batchDelete deletes the provided keys from the database in a single batch operation. // It creates a new batch, deletes each key from the batch, and commits the batch to ensure // that the deletions are applied atomically. // // Parameters: +// - ctx: The context for managing the pruning throttle delay operation. // - lookupKeys: A slice of keys to be deleted from the database. // // No errors are expected during normal operations. -func (p *RegisterPruner) batchDelete(lookupKeys [][]byte) error { +func (p *RegisterPruner) batchDelete(ctx context.Context, lookupKeys [][]byte) error { batch := p.db.NewBatch() defer batch.Close() @@ -306,7 +297,10 @@ func (p *RegisterPruner) batchDelete(lookupKeys [][]byte) error { } // Throttle to prevent excessive system load - <-p.throttleTicker.C + select { + case <-ctx.Done(): + case <-time.After(p.pruneThrottleDelay): + } return nil } From 25e1356776fae69a82abceed975f470cc300798d Mon Sep 17 00:00:00 2001 From: UlyanaAndrukhiv Date: Thu, 29 Aug 2024 12:35:26 +0300 Subject: [PATCH 18/31] Removed unnecessary checks for metrics and updated tests --- storage/pebble/register_pruner_test.go | 2 ++ storage/pebble/registers_pruner.go | 10 ++-------- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/storage/pebble/register_pruner_test.go b/storage/pebble/register_pruner_test.go index 8cb1805271f..b1aae9c9ea3 100644 --- a/storage/pebble/register_pruner_test.go +++ b/storage/pebble/register_pruner_test.go @@ -11,6 +11,7 @@ import ( "github.com/onflow/flow-go/model/flow" "github.com/onflow/flow-go/module/irrecoverable" + "github.com/onflow/flow-go/module/metrics" ) // testCase defines the structure for a single test case, including initial data setup, @@ -58,6 +59,7 @@ func TestPrune(t *testing.T) { db, WithPruneThreshold(5), WithPruneTickerInterval(10*time.Millisecond), + WithPrunerMetrics(metrics.NewNoopCollector()), ) require.NoError(t, err) diff --git a/storage/pebble/registers_pruner.go b/storage/pebble/registers_pruner.go index cfc3b2046fe..823230bb267 100644 --- a/storage/pebble/registers_pruner.go +++ b/storage/pebble/registers_pruner.go @@ -162,9 +162,7 @@ func (p *RegisterPruner) checkPrune(ctx context.Context) error { return fmt.Errorf("failed to prune: %w", err) } - if p.metrics != nil { - p.metrics.NumberOfBlocksPruned(pruneHeight - firstHeight) - } + p.metrics.NumberOfBlocksPruned(pruneHeight - firstHeight) } return nil @@ -259,11 +257,7 @@ func (p *RegisterPruner) pruneUpToHeight(ctx context.Context, pruneHeight uint64 return err } - duration := time.Since(start) - - if p.metrics != nil { - p.metrics.Pruned(pruneHeight, duration) - } + p.metrics.Pruned(pruneHeight, time.Since(start)) return nil } From 66e810b1321dab3a01c2f0119551b80f6f9d3fbf Mon Sep 17 00:00:00 2001 From: UlyanaAndrukhiv Date: Thu, 29 Aug 2024 12:45:11 +0300 Subject: [PATCH 19/31] Removed extra scope according to comments --- storage/pebble/registers_pruner.go | 40 ++++++++++++++++-------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/storage/pebble/registers_pruner.go b/storage/pebble/registers_pruner.go index 823230bb267..a6691ec4b4c 100644 --- a/storage/pebble/registers_pruner.go +++ b/storage/pebble/registers_pruner.go @@ -220,26 +220,28 @@ func (p *RegisterPruner) pruneUpToHeight(ctx context.Context, pruneHeight uint64 lastRegisterID = registerID } - if keyHeight <= pruneHeight { - if !keepKeyFound { - // Keep the first entry found for this registerID that is <= pruneHeight - keepKeyFound = true - continue - } + if keyHeight > pruneHeight { + continue + } + + if !keepKeyFound { + // Keep the first entry found for this registerID that is <= pruneHeight + keepKeyFound = true + continue + } + + // Otherwise, mark this key for removal + // Create a copy of the key to avoid memory issues + keyCopy := make([]byte, len(key)) + copy(keyCopy, key) + batchKeysToRemove = append(batchKeysToRemove, keyCopy) - // Otherwise, mark this key for removal - // Create a copy of the key to avoid memory issues - keyCopy := make([]byte, len(key)) - copy(keyCopy, key) - batchKeysToRemove = append(batchKeysToRemove, keyCopy) - - if len(batchKeysToRemove) == deleteItemsPerBatch { - // Perform batch delete - if err := p.batchDelete(ctx, batchKeysToRemove); err != nil { - return err - } - batchKeysToRemove = nil + if len(batchKeysToRemove) == deleteItemsPerBatch { + // Perform batch delete + if err := p.batchDelete(ctx, batchKeysToRemove); err != nil { + return err } + batchKeysToRemove = nil } } @@ -276,8 +278,8 @@ func (p *RegisterPruner) batchDelete(ctx context.Context, lookupKeys [][]byte) e defer batch.Close() for _, key := range lookupKeys { - keyHeight, registerID, _ := lookupKeyToRegisterID(key) if err := batch.Delete(key, nil); err != nil { + keyHeight, registerID, _ := lookupKeyToRegisterID(key) return fmt.Errorf("failed to delete lookupKey: %w %d %v", err, keyHeight, registerID) } } From 0aada715d2b235ac0110cf3bd20b120f341e1798 Mon Sep 17 00:00:00 2001 From: UlyanaAndrukhiv Date: Thu, 29 Aug 2024 13:27:03 +0300 Subject: [PATCH 20/31] Fixed check for pruning --- storage/pebble/registers_pruner.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/storage/pebble/registers_pruner.go b/storage/pebble/registers_pruner.go index a6691ec4b4c..387248c4db8 100644 --- a/storage/pebble/registers_pruner.go +++ b/storage/pebble/registers_pruner.go @@ -144,9 +144,8 @@ func (p *RegisterPruner) checkPrune(ctx context.Context) error { return fmt.Errorf("failed to get latest height from register storage: %w", err) } - pruneHeight := latestHeight - p.pruneThreshold - - if pruneHeight-firstHeight > p.pruneInterval { + if latestHeight-firstHeight > p.pruneInterval+p.pruneThreshold { + pruneHeight := latestHeight - p.pruneThreshold p.logger.Info().Uint64("prune_height", pruneHeight).Msg("pruning storage") // first, update firstHeight in the db From f039d15538a775e14cf38f92dc0f561654737a66 Mon Sep 17 00:00:00 2001 From: UlyanaAndrukhiv Date: Thu, 29 Aug 2024 13:52:34 +0300 Subject: [PATCH 21/31] Updated godoc --- storage/pebble/registers_pruner.go | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/storage/pebble/registers_pruner.go b/storage/pebble/registers_pruner.go index 387248c4db8..8d031ebf2e3 100644 --- a/storage/pebble/registers_pruner.go +++ b/storage/pebble/registers_pruner.go @@ -20,9 +20,11 @@ const ( DefaultPruneTickerInterval = 10 * time.Minute ) -// pruneIntervalRatio represents an additional percentage of pruneThreshold which is used to calculate pruneInterval const ( - pruneIntervalRatio = 0.1 + // pruneIntervalRatio represents an additional percentage of pruneThreshold which is used to calculate pruneInterval + pruneIntervalRatio = 0.1 + // deleteItemsPerBatch defines the number of database keys to delete in each batch operation. + // This value is used to control the size of deletion operations during pruning. deleteItemsPerBatch = 256 ) @@ -133,6 +135,8 @@ func (p *RegisterPruner) loop(ctx irrecoverable.SignalerContext, ready component // // Parameters: // - ctx: The context for managing the pruning throttle delay operation. +// +// No errors are expected during normal operations. func (p *RegisterPruner) checkPrune(ctx context.Context) error { firstHeight, err := firstStoredHeight(p.db) if err != nil { From b6c8040cb5031462b0a38916faeb5b490cf02b54 Mon Sep 17 00:00:00 2001 From: UlyanaAndrukhiv Date: Thu, 29 Aug 2024 14:59:39 +0300 Subject: [PATCH 22/31] Added more comments --- storage/pebble/registers_pruner.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/storage/pebble/registers_pruner.go b/storage/pebble/registers_pruner.go index 8d031ebf2e3..19491bfd198 100644 --- a/storage/pebble/registers_pruner.go +++ b/storage/pebble/registers_pruner.go @@ -20,9 +20,10 @@ const ( DefaultPruneTickerInterval = 10 * time.Minute ) +// TODO: This configuration should be changed after testing it with real network data for better performance. const ( // pruneIntervalRatio represents an additional percentage of pruneThreshold which is used to calculate pruneInterval - pruneIntervalRatio = 0.1 + pruneIntervalRatio = 0.1 // 10% // deleteItemsPerBatch defines the number of database keys to delete in each batch operation. // This value is used to control the size of deletion operations during pruning. deleteItemsPerBatch = 256 From db82c623f3b8a8edec834997979264b6487a5e03 Mon Sep 17 00:00:00 2001 From: UlyanaAndrukhiv Date: Thu, 29 Aug 2024 16:25:42 +0300 Subject: [PATCH 23/31] Updated comments for register priner tests --- storage/pebble/register_pruner_test.go | 1 - 1 file changed, 1 deletion(-) diff --git a/storage/pebble/register_pruner_test.go b/storage/pebble/register_pruner_test.go index b1aae9c9ea3..518ad344b52 100644 --- a/storage/pebble/register_pruner_test.go +++ b/storage/pebble/register_pruner_test.go @@ -115,7 +115,6 @@ func straightPruneTestCase() testCase { value2 := []byte("value2") value3 := []byte("value3") - // Set up initial register entries for different heights. // Set up initial register entries for different heights. initialData[3] = flow.RegisterEntries{ {Key: key1, Value: value1}, From dc984cf10919e7a891ab6fbbf217f67ba5b600a0 Mon Sep 17 00:00:00 2001 From: UlyanaAndrukhiv Date: Fri, 30 Aug 2024 15:43:53 +0300 Subject: [PATCH 24/31] Added functional test for error handling for register pruner, added more godoc for tests --- storage/pebble/register_pruner_test.go | 80 ++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/storage/pebble/register_pruner_test.go b/storage/pebble/register_pruner_test.go index 518ad344b52..3d591bbfd15 100644 --- a/storage/pebble/register_pruner_test.go +++ b/storage/pebble/register_pruner_test.go @@ -2,6 +2,7 @@ package pebble import ( "context" + "fmt" "testing" "time" @@ -12,6 +13,7 @@ import ( "github.com/onflow/flow-go/model/flow" "github.com/onflow/flow-go/module/irrecoverable" "github.com/onflow/flow-go/module/metrics" + "github.com/onflow/flow-go/utils/unittest" ) // testCase defines the structure for a single test case, including initial data setup, @@ -28,6 +30,18 @@ type testCase struct { prunedData map[uint64]flow.RegisterEntries } +// TestPrune validates the pruning functionality of the RegisterPruner. +// +// It runs multiple test cases, each of which initializes the database with specific +// register entries and then verifies that the pruning operation behaves as expected. +// The test cases check that: +// - Register entries below a certain height are pruned (i.e., removed) from the database. +// - The remaining data in the database matches the expected state after pruning. +// - The first height of the register entries in the database is correct after pruning. +// +// The test cases include: +// - Straight pruning, where register entries are pruned up to a specific height. +// - Pruning with different entries at varying heights, ensuring only the correct entries are kept. func TestPrune(t *testing.T) { // Set up the test case with initial data, expected outcomes, and pruned data. straightPruneData := straightPruneTestCase() @@ -82,6 +96,72 @@ func TestPrune(t *testing.T) { } } +// TestPruneErrors checks the error handling behavior of the RegisterPruner when certain +// conditions cause failures during the pruning process. +// +// This test covers scenarios where: +// - The first stored height in the database cannot be retrieved, simulating a failure to locate it. +// - The latest height in the database cannot be retrieved, simulating a missing entry. +// +// The tests ensure that the RegisterPruner handles these error conditions correctly by +// triggering the appropriate irrecoverable errors and shutting down gracefully. +func TestPruneErrors(t *testing.T) { + t.Run("not found first height", func(t *testing.T) { + unittest.RunWithTempDir(t, func(dir string) { + // Run the test with the Register storage + db, err := OpenRegisterPebbleDB(dir) + require.NoError(t, err) + + pruner, err := NewRegisterPruner( + zerolog.Nop(), + db, + WithPruneThreshold(5), + WithPruneTickerInterval(10*time.Millisecond), + WithPrunerMetrics(metrics.NewNoopCollector()), + ) + require.NoError(t, err) + + err = fmt.Errorf("key not found") + signCtxErr := fmt.Errorf("failed to get first height from register storage: %w", err) + ctx := irrecoverable.NewMockSignalerContextExpectError(t, context.Background(), signCtxErr) + + // Start the pruning process + pruner.Start(ctx) + + unittest.AssertClosesBefore(t, pruner.Done(), 2*time.Second) + }) + }) + + t.Run("not found latest height", func(t *testing.T) { + unittest.RunWithTempDir(t, func(dir string) { + // Run the test with the Register storage + db, err := OpenRegisterPebbleDB(dir) + require.NoError(t, err) + + // insert initial first height to pebble + require.NoError(t, db.Set(firstHeightKey, encodedUint64(1), nil)) + + pruner, err := NewRegisterPruner( + zerolog.Nop(), + db, + WithPruneThreshold(5), + WithPruneTickerInterval(10*time.Millisecond), + WithPrunerMetrics(metrics.NewNoopCollector()), + ) + require.NoError(t, err) + + err = fmt.Errorf("key not found") + signCtxErr := fmt.Errorf("failed to get latest height from register storage: %w", err) + ctx := irrecoverable.NewMockSignalerContextExpectError(t, context.Background(), signCtxErr) + + // Start the pruning process + pruner.Start(ctx) + + unittest.AssertClosesBefore(t, pruner.Done(), 2*time.Second) + }) + }) +} + // requirePruning checks if the first stored height in the database matches the expected height after pruning. func requirePruning(t *testing.T, db *pebble.DB, expectedFirstHeightAfterPruning uint64) { require.Eventually(t, func() bool { From 9ed6ca8b6f0c4ddf62d9d56491051fd6430726f2 Mon Sep 17 00:00:00 2001 From: Andrii Slisarchuk Date: Tue, 10 Sep 2024 11:24:50 +0300 Subject: [PATCH 25/31] Added basic integration test functionality --- .../node_builder/access_node_builder.go | 1 + cmd/observer/node_builder/observer_builder.go | 1 + .../cohort3/register_db_pruning_test.go | 226 ++++++++++++++++++ 3 files changed, 228 insertions(+) create mode 100644 integration/tests/access/cohort3/register_db_pruning_test.go diff --git a/cmd/access/node_builder/access_node_builder.go b/cmd/access/node_builder/access_node_builder.go index 68dc6d9cb29..7e3b92f0ba6 100644 --- a/cmd/access/node_builder/access_node_builder.go +++ b/cmd/access/node_builder/access_node_builder.go @@ -861,6 +861,7 @@ func (builder *FlowAccessNodeBuilder) BuildExecutionSyncComponents() *FlowAccess // take hours to complete. var err error builder.RegisterDB, err = pstorage.OpenRegisterPebbleDB(builder.registersDBPath) + builder.Logger.Warn().Msg(fmt.Sprintf("!!!!!!!!!! builder.registersDBPath: %s", builder.registersDBPath)) if err != nil { return nil, fmt.Errorf("could not open registers db: %w", err) } diff --git a/cmd/observer/node_builder/observer_builder.go b/cmd/observer/node_builder/observer_builder.go index 9d04b2ba839..ab2f6c51778 100644 --- a/cmd/observer/node_builder/observer_builder.go +++ b/cmd/observer/node_builder/observer_builder.go @@ -1404,6 +1404,7 @@ func (builder *ObserverServiceBuilder) BuildExecutionSyncComponents() *ObserverS // take hours to complete. var err error builder.RegisterDB, err = pstorage.OpenRegisterPebbleDB(builder.registersDBPath) + builder.Logger.Warn().Msg(fmt.Sprintf("!!!!!!!!!! builder.registersDBPath: %s", builder.registersDBPath)) if err != nil { return nil, fmt.Errorf("could not open registers db: %w", err) } diff --git a/integration/tests/access/cohort3/register_db_pruning_test.go b/integration/tests/access/cohort3/register_db_pruning_test.go new file mode 100644 index 00000000000..a3b3eccd43f --- /dev/null +++ b/integration/tests/access/cohort3/register_db_pruning_test.go @@ -0,0 +1,226 @@ +package cohort3 + +import ( + "context" + "fmt" + "path/filepath" + "testing" + "time" + + "github.com/cockroachdb/pebble" + sdk "github.com/onflow/flow-go-sdk" + accessproto "github.com/onflow/flow/protobuf/go/flow/access" + "github.com/onflow/flow/protobuf/go/flow/entities" + "github.com/onflow/flow/protobuf/go/flow/executiondata" + "github.com/rs/zerolog" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/stretchr/testify/suite" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" + + "github.com/onflow/flow-go/integration/testnet" + "github.com/onflow/flow-go/model/flow" + pstorage "github.com/onflow/flow-go/storage/pebble" + "github.com/onflow/flow-go/utils/unittest" +) + +type RegisterDBPruningSuite struct { + suite.Suite + + log zerolog.Logger + + accessNodeName string + observerNodeName string + + pebbleDb *pebble.DB + + // root context for the current test + ctx context.Context + cancel context.CancelFunc + + net *testnet.FlowNetwork +} + +func TestRegisterDBPruning(t *testing.T) { + suite.Run(t, new(RegisterDBPruningSuite)) +} + +func (s *RegisterDBPruningSuite) TearDownTest() { + s.log.Info().Msg("================> Start TearDownTest") + s.net.Remove() + s.cancel() + s.log.Info().Msg("================> Finish TearDownTest") +} + +func (s *RegisterDBPruningSuite) SetupTest() { + s.log = unittest.LoggerForTest(s.Suite.T(), zerolog.InfoLevel) + s.log.Info().Msg("================> SetupTest") + defer func() { + s.log.Info().Msg("================> Finish SetupTest") + }() + + // access node + s.accessNodeName = testnet.PrimaryAN + accessNodeConfig := testnet.NewNodeConfig( + flow.RoleAccess, + testnet.WithLogLevel(zerolog.FatalLevel), + testnet.WithAdditionalFlag("--supports-observer=true"), + testnet.WithAdditionalFlag("--execution-data-sync-enabled=true"), + testnet.WithAdditionalFlagf("--execution-data-dir=%s", testnet.DefaultExecutionDataServiceDir), + testnet.WithAdditionalFlag("--execution-data-retry-delay=1s"), + testnet.WithAdditionalFlag("--execution-data-indexing-enabled=true"), + testnet.WithAdditionalFlagf("--execution-state-dir=%s", s.getPebbleDBPath(s.accessNodeName)), + testnet.WithAdditionalFlagf("--public-network-execution-data-sync-enabled=true"), + testnet.WithAdditionalFlagf("--event-query-mode=local-only"), + testnet.WithAdditionalFlag("--registerdb-pruning-enabled=true"), + testnet.WithAdditionalFlag("--registerdb-prune-throttle-delay=1s"), + testnet.WithAdditionalFlag("--registerdb-prune-ticker-interval=10s"), + ) + + consensusConfigs := []func(config *testnet.NodeConfig){ + testnet.WithAdditionalFlag("--cruise-ctl-fallback-proposal-duration=400ms"), + testnet.WithAdditionalFlag(fmt.Sprintf("--required-verification-seal-approvals=%d", 1)), + testnet.WithAdditionalFlag(fmt.Sprintf("--required-construction-seal-approvals=%d", 1)), + testnet.WithLogLevel(zerolog.FatalLevel), + } + + nodeConfigs := []testnet.NodeConfig{ + testnet.NewNodeConfig(flow.RoleCollection, testnet.WithLogLevel(zerolog.FatalLevel)), + testnet.NewNodeConfig(flow.RoleCollection, testnet.WithLogLevel(zerolog.FatalLevel)), + testnet.NewNodeConfig(flow.RoleExecution, testnet.WithLogLevel(zerolog.FatalLevel)), + testnet.NewNodeConfig(flow.RoleExecution, testnet.WithLogLevel(zerolog.FatalLevel)), + testnet.NewNodeConfig(flow.RoleConsensus, consensusConfigs...), + testnet.NewNodeConfig(flow.RoleConsensus, consensusConfigs...), + testnet.NewNodeConfig(flow.RoleConsensus, consensusConfigs...), + testnet.NewNodeConfig(flow.RoleVerification, testnet.WithLogLevel(zerolog.FatalLevel)), + accessNodeConfig, // access_1 + } + + // add the observer node config + s.observerNodeName = testnet.PrimaryON + + observers := []testnet.ObserverConfig{{ + ContainerName: s.observerNodeName, + LogLevel: zerolog.InfoLevel, + AdditionalFlags: []string{ + fmt.Sprintf("--execution-data-dir=%s", testnet.DefaultExecutionDataServiceDir), + fmt.Sprintf("--execution-state-dir=%s", s.getPebbleDBPath(s.observerNodeName)), + "--execution-data-sync-enabled=true", + "--execution-data-indexing-enabled=true", + "--execution-data-retry-delay=1s", + "--event-query-mode=local-only", + "--local-service-api-enabled=true", + "--registerdb-pruning-enabled=true", + "--registerdb-prune-throttle-delay=1s", + "--registerdb-prune-ticker-interval=10s", + }, + }} + + conf := testnet.NewNetworkConfig("register_db_pruning", nodeConfigs, testnet.WithObservers(observers...)) + s.net = testnet.PrepareFlowNetwork(s.T(), conf, flow.Localnet) + + // start the network + s.T().Logf("starting flow network with docker containers") + s.ctx, s.cancel = context.WithCancel(context.Background()) + + s.net.Start(s.ctx) +} + +func (s *RegisterDBPruningSuite) TestHappyPath() { + //accessNode := s.net.ContainerByName(s.accessNodeName) + observerNode := s.net.ContainerByName(s.observerNodeName) + + waitingBlockHeight := uint64(200) + s.waitUntilExecutionDataForBlockIndexed(observerNode, waitingBlockHeight) + s.net.StopContainers() + + pebbleAN := s.getPebbleDB(s.accessNodeName) + registerAN := s.nodeRegisterStorage(pebbleAN) + + pebbleON := s.getPebbleDB(s.observerNodeName) + registerON := s.nodeRegisterStorage(pebbleON) + + assert.Equal(s.T(), registerAN.LatestHeight(), registerON.LatestHeight()) +} + +func (s *RegisterDBPruningSuite) getPebbleDBPath(containerName string) string { + return filepath.Join(testnet.DefaultExecutionStateDir, containerName) +} + +func (s *RegisterDBPruningSuite) getPebbleDB(containerName string) *pebble.DB { + if s.pebbleDb == nil { + var err error + s.pebbleDb, err = pstorage.OpenRegisterPebbleDB(s.getPebbleDBPath(containerName)) + require.NoError(s.T(), err, "could not open db") + } + return s.pebbleDb +} + +func (s *RegisterDBPruningSuite) nodeRegisterStorage(db *pebble.DB) *pstorage.Registers { + registers, err := pstorage.NewRegisters(db) + s.Require().NoError(err) + return registers +} + +// getGRPCClient is the helper func to create an access api client +func (s *RegisterDBPruningSuite) getGRPCClient(address string) (accessproto.AccessAPIClient, error) { + conn, err := grpc.Dial(address, grpc.WithTransportCredentials(insecure.NewCredentials())) + if err != nil { + return nil, err + } + + client := accessproto.NewAccessAPIClient(conn) + return client, nil +} + +// waitUntilExecutionDataForBlockIndexed waits until the execution data for the specified block height is indexed. +// It subscribes to events from the start height and waits until the execution data for the specified block height is indexed. +func (s *RegisterDBPruningSuite) waitUntilExecutionDataForBlockIndexed(observerNode *testnet.Container, waitingBlockHeight uint64) { + grpcClient, err := s.getGRPCClient(observerNode.Addr(testnet.GRPCPort)) + s.Require().NoError(err) + + // creating execution data api client + client, err := getClient(fmt.Sprintf("localhost:%s", observerNode.Port(testnet.ExecutionStatePort))) + s.Require().NoError(err) + + // pause until the observer node start indexing blocks, + // getting events from 1-nd block to make sure that 1-st block already indexed, and we can start subscribing + s.Require().Eventually(func() bool { + _, err := grpcClient.GetEventsForHeightRange(s.ctx, &accessproto.GetEventsForHeightRangeRequest{ + Type: sdk.EventAccountCreated, + StartHeight: 1, + EndHeight: 1, + EventEncodingVersion: entities.EventEncodingVersion_CCF_V0, + }) + + return err == nil + }, 2*time.Minute, 10*time.Second) + + // subscribe on events till waitingBlockHeight to make sure that execution data for block indexed till waitingBlockHeight and pruner + // pruned execution data at least once + // SubscribeEventsFromStartHeight used as subscription here because we need to make sure that execution data are already indexed + stream, err := client.SubscribeEventsFromStartHeight(s.ctx, &executiondata.SubscribeEventsFromStartHeightRequest{ + EventEncodingVersion: entities.EventEncodingVersion_CCF_V0, + Filter: &executiondata.EventFilter{}, + HeartbeatInterval: 1, + StartBlockHeight: 0, + }) + s.Require().NoError(err) + eventsChan, errChan, err := SubscribeHandler(s.ctx, stream.Recv, eventsResponseHandler) + s.Require().NoError(err) + + duration := 3 * time.Minute + for { + select { + case err := <-errChan: + s.Require().NoErrorf(err, "unexpected %s error", s.observerNodeName) + case event := <-eventsChan: + if event.Height >= waitingBlockHeight { + return + } + case <-time.After(duration): + s.T().Fatalf("failed to index to %d block within %s", waitingBlockHeight, duration.String()) + } + } +} From 7ffc1a4b36b30035780fee25445786c8001c180e Mon Sep 17 00:00:00 2001 From: Andrii Slisarchuk Date: Mon, 23 Sep 2024 10:49:17 +0300 Subject: [PATCH 26/31] db path fix --- integration/testnet/container.go | 4 ++++ .../cohort3/register_db_pruning_test.go | 19 +++++++------------ 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/integration/testnet/container.go b/integration/testnet/container.go index e466363b3e4..453551f004b 100644 --- a/integration/testnet/container.go +++ b/integration/testnet/container.go @@ -257,6 +257,10 @@ func (c *Container) DBPath() string { return filepath.Join(c.datadir, DefaultFlowDBDir) } +func (c *Container) PebbleDBPath() string { + return filepath.Join(c.datadir, DefaultExecutionStateDir) +} + func (c *Container) ExecutionDataDBPath() string { return filepath.Join(c.datadir, DefaultExecutionDataServiceDir) } diff --git a/integration/tests/access/cohort3/register_db_pruning_test.go b/integration/tests/access/cohort3/register_db_pruning_test.go index a3b3eccd43f..a53899a759c 100644 --- a/integration/tests/access/cohort3/register_db_pruning_test.go +++ b/integration/tests/access/cohort3/register_db_pruning_test.go @@ -3,7 +3,6 @@ package cohort3 import ( "context" "fmt" - "path/filepath" "testing" "time" @@ -70,7 +69,7 @@ func (s *RegisterDBPruningSuite) SetupTest() { testnet.WithAdditionalFlagf("--execution-data-dir=%s", testnet.DefaultExecutionDataServiceDir), testnet.WithAdditionalFlag("--execution-data-retry-delay=1s"), testnet.WithAdditionalFlag("--execution-data-indexing-enabled=true"), - testnet.WithAdditionalFlagf("--execution-state-dir=%s", s.getPebbleDBPath(s.accessNodeName)), + testnet.WithAdditionalFlagf("--execution-state-dir=%s", testnet.DefaultExecutionStateDir), testnet.WithAdditionalFlagf("--public-network-execution-data-sync-enabled=true"), testnet.WithAdditionalFlagf("--event-query-mode=local-only"), testnet.WithAdditionalFlag("--registerdb-pruning-enabled=true"), @@ -105,7 +104,7 @@ func (s *RegisterDBPruningSuite) SetupTest() { LogLevel: zerolog.InfoLevel, AdditionalFlags: []string{ fmt.Sprintf("--execution-data-dir=%s", testnet.DefaultExecutionDataServiceDir), - fmt.Sprintf("--execution-state-dir=%s", s.getPebbleDBPath(s.observerNodeName)), + fmt.Sprintf("--execution-state-dir=%s", testnet.DefaultExecutionStateDir), "--execution-data-sync-enabled=true", "--execution-data-indexing-enabled=true", "--execution-data-retry-delay=1s", @@ -128,30 +127,26 @@ func (s *RegisterDBPruningSuite) SetupTest() { } func (s *RegisterDBPruningSuite) TestHappyPath() { - //accessNode := s.net.ContainerByName(s.accessNodeName) + accessNode := s.net.ContainerByName(s.accessNodeName) observerNode := s.net.ContainerByName(s.observerNodeName) waitingBlockHeight := uint64(200) s.waitUntilExecutionDataForBlockIndexed(observerNode, waitingBlockHeight) s.net.StopContainers() - pebbleAN := s.getPebbleDB(s.accessNodeName) + pebbleAN := s.getPebbleDB(accessNode.PebbleDBPath()) registerAN := s.nodeRegisterStorage(pebbleAN) - pebbleON := s.getPebbleDB(s.observerNodeName) + pebbleON := s.getPebbleDB(observerNode.PebbleDBPath()) registerON := s.nodeRegisterStorage(pebbleON) assert.Equal(s.T(), registerAN.LatestHeight(), registerON.LatestHeight()) } -func (s *RegisterDBPruningSuite) getPebbleDBPath(containerName string) string { - return filepath.Join(testnet.DefaultExecutionStateDir, containerName) -} - -func (s *RegisterDBPruningSuite) getPebbleDB(containerName string) *pebble.DB { +func (s *RegisterDBPruningSuite) getPebbleDB(path string) *pebble.DB { if s.pebbleDb == nil { var err error - s.pebbleDb, err = pstorage.OpenRegisterPebbleDB(s.getPebbleDBPath(containerName)) + s.pebbleDb, err = pstorage.OpenRegisterPebbleDB(path) require.NoError(s.T(), err, "could not open db") } return s.pebbleDb From 10cb2f200fc73855f4985e0a3b47ae0ab942fc55 Mon Sep 17 00:00:00 2001 From: Andrii Slisarchuk Date: Thu, 26 Sep 2024 16:06:53 +0300 Subject: [PATCH 27/31] Fixed crashes in tests --- cmd/observer/node_builder/observer_builder.go | 13 ++++++-- .../cohort3/register_db_pruning_test.go | 32 ++++++++++++------- storage/pebble/registers_pruner.go | 7 ++-- 3 files changed, 36 insertions(+), 16 deletions(-) diff --git a/cmd/observer/node_builder/observer_builder.go b/cmd/observer/node_builder/observer_builder.go index d6575305598..12ec8689dcc 100644 --- a/cmd/observer/node_builder/observer_builder.go +++ b/cmd/observer/node_builder/observer_builder.go @@ -319,9 +319,10 @@ type ObserverServiceBuilder struct { // Public network peerID peer.ID - TransactionMetrics *metrics.TransactionCollector - RestMetrics *metrics.RestCollector - AccessMetrics module.AccessMetrics + TransactionMetrics *metrics.TransactionCollector + RestMetrics *metrics.RestCollector + AccessMetrics module.AccessMetrics + RegisterDBPrunerMetrics *metrics.RegisterDBPrunerCollector // grpc servers secureGrpcServer *grpcserver.GrpcServer @@ -1578,6 +1579,7 @@ func (builder *ObserverServiceBuilder) BuildExecutionSyncComponents() *ObserverS registerDBPruner, err := pstorage.NewRegisterPruner( node.Logger, builder.RegisterDB, + pstorage.WithPrunerMetrics(builder.RegisterDBPrunerMetrics), //pstorage.WithPruneThreshold(builder.registerDBPruneThreshold), pstorage.WithPruneThrottleDelay(builder.registerDBPruneThrottleDelay), pstorage.WithPruneTickerInterval(builder.registerDBPruneTickerInterval), @@ -1813,11 +1815,16 @@ func (builder *ObserverServiceBuilder) enqueueRPCServer() { builder.RestMetrics = m return nil }) + builder.Module("register db metrics", func(node *cmd.NodeConfig) error { + builder.RegisterDBPrunerMetrics = metrics.NewRegisterDBPrunerCollector() + return nil + }) builder.Module("access metrics", func(node *cmd.NodeConfig) error { builder.AccessMetrics = metrics.NewAccessCollector( metrics.WithTransactionMetrics(builder.TransactionMetrics), metrics.WithBackendScriptsMetrics(builder.TransactionMetrics), metrics.WithRestMetrics(builder.RestMetrics), + metrics.WithRegisterDBPrunerMetrics(builder.RegisterDBPrunerMetrics), ) return nil }) diff --git a/integration/tests/access/cohort3/register_db_pruning_test.go b/integration/tests/access/cohort3/register_db_pruning_test.go index a53899a759c..6d789b81760 100644 --- a/integration/tests/access/cohort3/register_db_pruning_test.go +++ b/integration/tests/access/cohort3/register_db_pruning_test.go @@ -111,8 +111,7 @@ func (s *RegisterDBPruningSuite) SetupTest() { "--event-query-mode=local-only", "--local-service-api-enabled=true", "--registerdb-pruning-enabled=true", - "--registerdb-prune-throttle-delay=1s", - "--registerdb-prune-ticker-interval=10s", + "--registerdb-prune-ticker-interval=5s", }, }} @@ -134,22 +133,33 @@ func (s *RegisterDBPruningSuite) TestHappyPath() { s.waitUntilExecutionDataForBlockIndexed(observerNode, waitingBlockHeight) s.net.StopContainers() + //1. Get AN register height boundaries pebbleAN := s.getPebbleDB(accessNode.PebbleDBPath()) - registerAN := s.nodeRegisterStorage(pebbleAN) + firstHeightAN, latestHeightAN, err := pstorage.ReadHeightsFromBootstrappedDB(pebbleAN) + require.NoError(s.T(), err, "could not read from AN db") + //2. Get AN register height boundaries pebbleON := s.getPebbleDB(observerNode.PebbleDBPath()) - registerON := s.nodeRegisterStorage(pebbleON) + firstHeightON, latestHeightON, err := pstorage.ReadHeightsFromBootstrappedDB(pebbleON) + require.NoError(s.T(), err, "could not read from ON db") - assert.Equal(s.T(), registerAN.LatestHeight(), registerON.LatestHeight()) + //4. Check heights for equality on both AN and ON nodes + assert.Equal(s.T(), firstHeightAN, firstHeightON) + assert.Equal(s.T(), latestHeightAN, latestHeightON) + + //5. Check if first height is less or equal predicted first height + predictedPruneThreshold := uint64(5) + registerAN := s.nodeRegisterStorage(pebbleAN) + assert.LessOrEqual(s.T(), registerAN.LatestHeight()-predictedPruneThreshold, firstHeightAN) + registerON := s.nodeRegisterStorage(pebbleON) + assert.LessOrEqual(s.T(), registerON.LatestHeight()-predictedPruneThreshold, firstHeightON) } func (s *RegisterDBPruningSuite) getPebbleDB(path string) *pebble.DB { - if s.pebbleDb == nil { - var err error - s.pebbleDb, err = pstorage.OpenRegisterPebbleDB(path) - require.NoError(s.T(), err, "could not open db") - } - return s.pebbleDb + pebbleDb, err := pstorage.OpenRegisterPebbleDB(path) + require.NoError(s.T(), err, "could not open db") + + return pebbleDb } func (s *RegisterDBPruningSuite) nodeRegisterStorage(db *pebble.DB) *pstorage.Registers { diff --git a/storage/pebble/registers_pruner.go b/storage/pebble/registers_pruner.go index 19491bfd198..c10c2145483 100644 --- a/storage/pebble/registers_pruner.go +++ b/storage/pebble/registers_pruner.go @@ -15,7 +15,8 @@ import ( ) const ( - DefaultPruneThreshold = uint64(100_000) + // DefaultPruneThreshold TODO: The value is temporary due to PruneThreshold PR not merged yet + DefaultPruneThreshold = uint64(5) DefaultPruneThrottleDelay = 10 * time.Millisecond DefaultPruneTickerInterval = 10 * time.Minute ) @@ -263,7 +264,9 @@ func (p *RegisterPruner) pruneUpToHeight(ctx context.Context, pruneHeight uint64 return err } - p.metrics.Pruned(pruneHeight, time.Since(start)) + if p.metrics != nil { + p.metrics.Pruned(pruneHeight, time.Since(start)) + } return nil } From c238bac0c611aa6b38e956d03eaccd0b01be7d10 Mon Sep 17 00:00:00 2001 From: Andrii Slisarchuk Date: Fri, 1 Nov 2024 22:06:06 +0200 Subject: [PATCH 28/31] Fixed remarks --- .../node_builder/access_node_builder.go | 2 +- cmd/observer/node_builder/observer_builder.go | 2 +- module/metrics.go | 13 +- module/metrics/noop.go | 4 +- module/metrics/registers_db_pruner.go | 83 +------- module/mock/access_metrics.go | 21 +- module/mock/register_db_pruner_metrics.go | 27 +-- storage/pebble/db_pruner.go | 96 +++++++++ storage/pebble/interval_worker.go | 35 ++++ storage/pebble/registers_pruner.go | 191 ++++++------------ ...runer_test.go => registers_pruner_test.go} | 0 11 files changed, 211 insertions(+), 263 deletions(-) create mode 100644 storage/pebble/db_pruner.go create mode 100644 storage/pebble/interval_worker.go rename storage/pebble/{register_pruner_test.go => registers_pruner_test.go} (100%) diff --git a/cmd/access/node_builder/access_node_builder.go b/cmd/access/node_builder/access_node_builder.go index 8d99863f466..dfcd29cc24d 100644 --- a/cmd/access/node_builder/access_node_builder.go +++ b/cmd/access/node_builder/access_node_builder.go @@ -1042,7 +1042,7 @@ func (builder *FlowAccessNodeBuilder) BuildExecutionSyncComponents() *FlowAccess node.Logger, builder.RegisterDB, pstorage.WithPrunerMetrics(builder.RegisterDBPrunerMetrics), - //pstorage.WithPruneThreshold(builder.registerDBPruneThreshold), + pstorage.WithPruneThreshold(builder.registerDBPruneThreshold), pstorage.WithPruneThrottleDelay(builder.registerDBPruneThrottleDelay), pstorage.WithPruneTickerInterval(builder.registerDBPruneTickerInterval), ) diff --git a/cmd/observer/node_builder/observer_builder.go b/cmd/observer/node_builder/observer_builder.go index a800d501939..7b9abfefcc3 100644 --- a/cmd/observer/node_builder/observer_builder.go +++ b/cmd/observer/node_builder/observer_builder.go @@ -1503,7 +1503,7 @@ func (builder *ObserverServiceBuilder) BuildExecutionSyncComponents() *ObserverS node.Logger, builder.RegisterDB, pstorage.WithPrunerMetrics(builder.RegisterDBPrunerMetrics), - //pstorage.WithPruneThreshold(builder.registerDBPruneThreshold), + pstorage.WithPruneThreshold(builder.registerDBPruneThreshold), pstorage.WithPruneThrottleDelay(builder.registerDBPruneThrottleDelay), pstorage.WithPruneTickerInterval(builder.registerDBPruneTickerInterval), ) diff --git a/module/metrics.go b/module/metrics.go index 379a6ca3440..f4bb9c1bb6f 100644 --- a/module/metrics.go +++ b/module/metrics.go @@ -920,17 +920,8 @@ type AccessMetrics interface { } type RegisterDBPrunerMetrics interface { - // Pruned tracks the last pruned height and the pruning operation duration - Pruned(height uint64, duration time.Duration) - - // NumberOfBlocksPruned tracks the number of blocks that were pruned during the operation. - NumberOfBlocksPruned(blocks uint64) - - // NumberOfRowsPruned tracks the number of rows that were pruned during the operation. - NumberOfRowsPruned(rows uint64) - - // ElementVisited records the element that were visited during the pruning operation. - ElementVisited() + // LatestPrunedHeightWithProgressPercentage tracks the percentage progress of pruning. + LatestPrunedHeightWithProgressPercentage(lastPrunedHeight uint64) } type ExecutionResultStats struct { diff --git a/module/metrics/noop.go b/module/metrics/noop.go index babc96226ee..72e13d768e8 100644 --- a/module/metrics/noop.go +++ b/module/metrics/noop.go @@ -239,9 +239,7 @@ func (nc *NoopCollector) RequestFailed(duration time.Duration, retryable bool) func (nc *NoopCollector) RequestCanceled() {} func (nc *NoopCollector) ResponseDropped() {} func (nc *NoopCollector) Pruned(height uint64, duration time.Duration) {} -func (nc *NoopCollector) NumberOfRowsPruned(rows uint64) {} -func (nc *NoopCollector) ElementVisited() {} -func (nc *NoopCollector) NumberOfBlocksPruned(blocks uint64) {} +func (nc *NoopCollector) LatestPrunedHeightWithProgressPercentage(lastPrunedHeight uint64) {} func (nc *NoopCollector) UpdateCollectionMaxHeight(height uint64) {} func (nc *NoopCollector) BucketAvailableSlots(uint64, uint64) {} func (nc *NoopCollector) OnKeyPutSuccess(uint32) {} diff --git a/module/metrics/registers_db_pruner.go b/module/metrics/registers_db_pruner.go index 1e44364ad28..638fb28b35e 100644 --- a/module/metrics/registers_db_pruner.go +++ b/module/metrics/registers_db_pruner.go @@ -1,23 +1,16 @@ package metrics import ( - "time" - "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" "github.com/onflow/flow-go/module" ) -// RegisterDBPrunerCollector collects metrics for the database pruning process, -// including the durations of pruning operations, the latest height that has been pruned, -// the number of blocks pruned, the number of rows pruned, and the number of elements visited. +// RegisterDBPrunerCollector collects metrics for the database pruning process, the latest height that has been pruned, +// and the pruning progress. type RegisterDBPrunerCollector struct { - pruneDurations prometheus.Summary // The pruning operation durations in milliseconds. - latestHeightPruned prometheus.Gauge // The latest pruned block height. - numberOfBlocksPruned prometheus.Gauge // The number of blocks pruned. - numberOfRowsPruned prometheus.Gauge // The number of rows pruned. - numberOfElementsVisited prometheus.Gauge // The number of elements visited during pruning. + lastPrunedHeight prometheus.Gauge // The last pruned block height. } var _ module.RegisterDBPrunerMetrics = (*RegisterDBPrunerCollector)(nil) @@ -26,75 +19,15 @@ var _ module.RegisterDBPrunerMetrics = (*RegisterDBPrunerCollector)(nil) // with pre-configured Prometheus metrics for monitoring the pruning process. func NewRegisterDBPrunerCollector() *RegisterDBPrunerCollector { return &RegisterDBPrunerCollector{ - pruneDurations: promauto.NewSummary(prometheus.SummaryOpts{ - Name: "prune_durations_ms", - Namespace: namespaceAccess, - Subsystem: subsystemRegisterDBPruner, - Help: "The durations of pruning operations in milliseconds.", - Objectives: map[float64]float64{ - 0.01: 0.001, - 0.1: 0.01, - 0.25: 0.025, - 0.5: 0.05, - 0.75: 0.025, - 0.9: 0.01, - 0.99: 0.001, - }, - }), - latestHeightPruned: promauto.NewGauge(prometheus.GaugeOpts{ - Name: "latest_height_pruned", - Namespace: namespaceAccess, - Subsystem: subsystemRegisterDBPruner, - Help: "The latest block height that has been pruned.", - }), - numberOfBlocksPruned: promauto.NewGauge(prometheus.GaugeOpts{ - Name: "number_of_blocks_pruned_total", - Namespace: namespaceAccess, - Subsystem: subsystemRegisterDBPruner, - Help: "The number of blocks that have been pruned.", - }), - numberOfRowsPruned: promauto.NewGauge(prometheus.GaugeOpts{ - Name: "number_of_rows_pruned_total", + lastPrunedHeight: promauto.NewGauge(prometheus.GaugeOpts{ + Name: "last_pruned_height", Namespace: namespaceAccess, Subsystem: subsystemRegisterDBPruner, - Help: "The number of rows that have been pruned.", - }), - numberOfElementsVisited: promauto.NewGauge(prometheus.GaugeOpts{ - Name: "number_of_elements_visited_total", - Namespace: namespaceAccess, - Subsystem: subsystemRegisterDBPruner, - Help: "The number of elements that have been visited.", + Help: "The last block height that has been pruned.", }), } } -// Pruned records the duration of a pruning operation and updates the latest pruned height. -// -// Parameters: -// - height: The height of the block that was pruned. -// - duration: The time taken to complete the pruning operation. -func (c *RegisterDBPrunerCollector) Pruned(height uint64, duration time.Duration) { - c.pruneDurations.Observe(float64(duration.Milliseconds())) - c.latestHeightPruned.Set(float64(height)) -} - -// NumberOfRowsPruned records the number of rows that were pruned during the pruning operation. -// -// Parameters: -// - rows: The number of rows that were pruned. -func (c *RegisterDBPrunerCollector) NumberOfRowsPruned(rows uint64) { - c.numberOfRowsPruned.Add(float64(rows)) -} - -// ElementVisited records the element that were visited during the pruning operation. -func (c *RegisterDBPrunerCollector) ElementVisited() { - c.numberOfElementsVisited.Inc() -} - -// NumberOfBlocksPruned tracks the number of blocks that were pruned during the operation. -// -// Parameters: -// - blocks: The number of blocks that were pruned. -func (c *RegisterDBPrunerCollector) NumberOfBlocksPruned(blocks uint64) { - c.numberOfBlocksPruned.Add(float64(blocks)) +func (c *RegisterDBPrunerCollector) LatestPrunedHeightWithProgressPercentage(lastPrunedHeight uint64) { + c.lastPrunedHeight.Set(float64(lastPrunedHeight)) } diff --git a/module/mock/access_metrics.go b/module/mock/access_metrics.go index fc2677ec5f3..82ceb09fc96 100644 --- a/module/mock/access_metrics.go +++ b/module/mock/access_metrics.go @@ -53,9 +53,9 @@ func (_m *AccessMetrics) ConnectionFromPoolUpdated() { _m.Called() } -// ElementVisited provides a mock function with given fields: -func (_m *AccessMetrics) ElementVisited() { - _m.Called() +// LatestPrunedHeightWithProgressPercentage provides a mock function with given fields: lastPrunedHeight +func (_m *AccessMetrics) LatestPrunedHeightWithProgressPercentage(lastPrunedHeight uint64) { + _m.Called(lastPrunedHeight) } // NewConnectionEstablished provides a mock function with given fields: @@ -63,16 +63,6 @@ func (_m *AccessMetrics) NewConnectionEstablished() { _m.Called() } -// NumberOfBlocksPruned provides a mock function with given fields: blocks -func (_m *AccessMetrics) NumberOfBlocksPruned(blocks uint64) { - _m.Called(blocks) -} - -// NumberOfRowsPruned provides a mock function with given fields: rows -func (_m *AccessMetrics) NumberOfRowsPruned(rows uint64) { - _m.Called(rows) -} - // ObserveHTTPRequestDuration provides a mock function with given fields: ctx, props, duration func (_m *AccessMetrics) ObserveHTTPRequestDuration(ctx context.Context, props metrics.HTTPReqProperties, duration time.Duration) { _m.Called(ctx, props, duration) @@ -83,11 +73,6 @@ func (_m *AccessMetrics) ObserveHTTPResponseSize(ctx context.Context, props metr _m.Called(ctx, props, sizeBytes) } -// Pruned provides a mock function with given fields: height, duration -func (_m *AccessMetrics) Pruned(height uint64, duration time.Duration) { - _m.Called(height, duration) -} - // ScriptExecuted provides a mock function with given fields: dur, size func (_m *AccessMetrics) ScriptExecuted(dur time.Duration, size int) { _m.Called(dur, size) diff --git a/module/mock/register_db_pruner_metrics.go b/module/mock/register_db_pruner_metrics.go index a34eebdc053..ac5ae6e5217 100644 --- a/module/mock/register_db_pruner_metrics.go +++ b/module/mock/register_db_pruner_metrics.go @@ -2,35 +2,16 @@ package mock -import ( - mock "github.com/stretchr/testify/mock" - - time "time" -) +import mock "github.com/stretchr/testify/mock" // RegisterDBPrunerMetrics is an autogenerated mock type for the RegisterDBPrunerMetrics type type RegisterDBPrunerMetrics struct { mock.Mock } -// ElementVisited provides a mock function with given fields: -func (_m *RegisterDBPrunerMetrics) ElementVisited() { - _m.Called() -} - -// NumberOfBlocksPruned provides a mock function with given fields: blocks -func (_m *RegisterDBPrunerMetrics) NumberOfBlocksPruned(blocks uint64) { - _m.Called(blocks) -} - -// NumberOfRowsPruned provides a mock function with given fields: rows -func (_m *RegisterDBPrunerMetrics) NumberOfRowsPruned(rows uint64) { - _m.Called(rows) -} - -// Pruned provides a mock function with given fields: height, duration -func (_m *RegisterDBPrunerMetrics) Pruned(height uint64, duration time.Duration) { - _m.Called(height, duration) +// LatestPrunedHeightWithProgressPercentage provides a mock function with given fields: lastPrunedHeight +func (_m *RegisterDBPrunerMetrics) LatestPrunedHeightWithProgressPercentage(lastPrunedHeight uint64) { + _m.Called(lastPrunedHeight) } // NewRegisterDBPrunerMetrics creates a new instance of RegisterDBPrunerMetrics. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. diff --git a/storage/pebble/db_pruner.go b/storage/pebble/db_pruner.go new file mode 100644 index 00000000000..1ba31056131 --- /dev/null +++ b/storage/pebble/db_pruner.go @@ -0,0 +1,96 @@ +package pebble + +import ( + "context" + "fmt" + "io" + "time" + + "github.com/cockroachdb/pebble" + "github.com/rs/zerolog" + + "github.com/onflow/flow-go/model/flow" +) + +type DBPruner struct { + logger zerolog.Logger + // pruneThrottleDelay controls a small pause between batches of registers inspected and pruned + pruneThrottleDelay time.Duration + pruneHeight uint64 + + dbBatch *pebble.Batch + lastRegisterID flow.RegisterID + keepKeyFound bool +} + +var _ io.Closer = (*DBPruner)(nil) + +func NewDBPruner(db *pebble.DB, logger zerolog.Logger, pruneThrottleDelay time.Duration, pruneHeight uint64) *DBPruner { + return &DBPruner{ + logger: logger, + pruneThrottleDelay: pruneThrottleDelay, + pruneHeight: pruneHeight, + dbBatch: db.NewBatch(), + } +} + +// BatchDelete deletes the provided keys from the database in a single batch operation. +// It creates a new batch, deletes each key from the batch, and commits the batch to ensure +// that the deletions are applied atomically. +// +// Parameters: +// - ctx: The context for managing the pruning throttle delay operation. +// - lookupKeys: A slice of keys to be deleted from the database. +// +// No errors are expected during normal operations. +func (p *DBPruner) BatchDelete(ctx context.Context, lookupKeys [][]byte) error { + defer p.dbBatch.Reset() + + for _, key := range lookupKeys { + if err := p.dbBatch.Delete(key, nil); err != nil { + keyHeight, registerID, _ := lookupKeyToRegisterID(key) + return fmt.Errorf("failed to delete lookupKey: %w %d %v", err, keyHeight, registerID) + } + } + + if err := p.dbBatch.Commit(pebble.Sync); err != nil { + return fmt.Errorf("failed to commit batch: %w", err) + } + + // Throttle to prevent excessive system load + select { + case <-ctx.Done(): + case <-time.After(p.pruneThrottleDelay): + } + + return nil +} + +func (p *DBPruner) CanPruneKey(key []byte) (bool, error) { + keyHeight, registerID, err := lookupKeyToRegisterID(key) + if err != nil { + return false, fmt.Errorf("malformed lookup key %v: %w", key, err) + } + + // New register prefix, reset the state + if !p.keepKeyFound || p.lastRegisterID != registerID { + p.keepKeyFound = false + p.lastRegisterID = registerID + } + + if keyHeight > p.pruneHeight { + return false, nil + } + + if !p.keepKeyFound { + // Keep the first entry found for this registerID that is <= pruneHeight + p.keepKeyFound = true + return false, nil + } + + return true, nil +} + +func (p *DBPruner) Close() error { + return p.dbBatch.Close() +} diff --git a/storage/pebble/interval_worker.go b/storage/pebble/interval_worker.go new file mode 100644 index 00000000000..6ecf5e37d4c --- /dev/null +++ b/storage/pebble/interval_worker.go @@ -0,0 +1,35 @@ +package pebble + +import ( + "context" + "time" +) + +// IntervalWorker runs a periodic task with support for context cancellation. +type IntervalWorker struct { + interval time.Duration + timer *time.Timer +} + +// NewIntervalWorker initializes a new IntervalWorker. +func NewIntervalWorker(interval time.Duration) *IntervalWorker { + return &IntervalWorker{ + interval: interval, + timer: time.NewTimer(interval), + } +} + +// Run starts the worker and calls the provided function periodically. +// It stops and returns if the context is canceled. +func (iw *IntervalWorker) Run(ctx context.Context, f func()) { + defer iw.timer.Stop() + for { + select { + case <-ctx.Done(): + // Stop if the context is canceled + return + case <-iw.timer.C: + f() + } + } +} diff --git a/storage/pebble/registers_pruner.go b/storage/pebble/registers_pruner.go index 7a969166c45..ad78674a373 100644 --- a/storage/pebble/registers_pruner.go +++ b/storage/pebble/registers_pruner.go @@ -1,6 +1,7 @@ package pebble import ( + "bytes" "context" "fmt" "time" @@ -8,7 +9,6 @@ import ( "github.com/cockroachdb/pebble" "github.com/rs/zerolog" - "github.com/onflow/flow-go/model/flow" "github.com/onflow/flow-go/module" "github.com/onflow/flow-go/module/component" "github.com/onflow/flow-go/module/irrecoverable" @@ -115,20 +115,14 @@ func NewRegisterPruner( // of registered height recorders and checks if pruning is necessary. func (p *RegisterPruner) loop(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) { ready() - ticker := time.NewTicker(p.pruneTickerInterval) - defer ticker.Stop() - - for { - select { - case <-ctx.Done(): - return - case <-ticker.C: - err := p.checkPrune(ctx) - if err != nil { - ctx.Throw(err) - } + + worker := NewIntervalWorker(p.pruneTickerInterval) + + worker.Run(ctx, func() { + if err := p.checkPrune(ctx); err != nil { + ctx.Throw(err) } - } + }) } // checkPrune checks if pruning should be performed based on the height range @@ -149,24 +143,15 @@ func (p *RegisterPruner) checkPrune(ctx context.Context) error { return fmt.Errorf("failed to get latest height from register storage: %w", err) } - if latestHeight-firstHeight > p.pruneInterval+p.pruneThreshold { - pruneHeight := latestHeight - p.pruneThreshold - p.logger.Info().Uint64("prune_height", pruneHeight).Msg("pruning storage") - - // first, update firstHeight in the db - // this ensures that if the node crashes during pruning, there will still be a consistent - // view when the node starts up. Subsequent prunes will remove any leftover data. - err = p.updateFirstStoredHeight(pruneHeight) - if err != nil { - return fmt.Errorf("failed to update first height for register storage: %w", err) - } + if latestHeight-firstHeight <= p.pruneInterval+p.pruneThreshold { + return nil + } - err := p.pruneUpToHeight(ctx, pruneHeight) - if err != nil { - return fmt.Errorf("failed to prune: %w", err) - } + pruneHeight := latestHeight - p.pruneThreshold + p.logger.Info().Uint64("prune_height", pruneHeight).Msg("pruning storage") - p.metrics.NumberOfBlocksPruned(pruneHeight - firstHeight) + if err = p.pruneUpToHeight(ctx, p.db, pruneHeight); err != nil { + return fmt.Errorf("failed to prune: %w", err) } return nil @@ -185,125 +170,69 @@ func (p *RegisterPruner) checkPrune(ctx context.Context) error { // - pruneHeight: The maximum height of entries to prune. // // No errors are expected during normal operations. -func (p *RegisterPruner) pruneUpToHeight(ctx context.Context, pruneHeight uint64) error { - prefix := []byte{codeRegister} - var batchKeysToRemove [][]byte +func (p *RegisterPruner) pruneUpToHeight(ctx context.Context, r pebble.Reader, pruneHeight uint64) error { + // first, update firstHeight in the db + // this ensures that if the node crashes during pruning, there will still be a consistent + // view when the node starts up. Subsequent prunes will remove any leftover data. + if err := p.updateFirstStoredHeight(pruneHeight); err != nil { + return fmt.Errorf("failed to update first height for register storage: %w", err) + } - start := time.Now() + dbPruner := NewDBPruner(p.db, p.logger, p.pruneThrottleDelay, pruneHeight) - err := func(r pebble.Reader) error { - options := pebble.IterOptions{ - LowerBound: prefix, - UpperBound: []byte{codeFirstBlockHeight}, - } + prefix := []byte{codeRegister} + it, err := r.NewIter(&pebble.IterOptions{ + LowerBound: prefix, + UpperBound: []byte{codeFirstBlockHeight}, + }) + if err != nil { + return fmt.Errorf("cannot create iterator: %w", err) + } - it, err := r.NewIter(&options) - if err != nil { - return fmt.Errorf("can not create iterator: %w", err) + defer func() { + if cerr := it.Close(); cerr != nil { + p.logger.Err(cerr).Msg("error while closing the iterator") } - defer it.Close() - - var lastRegisterID flow.RegisterID - var keepKeyFound bool - - for it.SeekGE(prefix); it.Valid(); it.Next() { - key := it.Key() - keyHeight, registerID, err := lookupKeyToRegisterID(key) - if err != nil { - return fmt.Errorf("malformed lookup key %v: %w", key, err) - } + if cerr := dbPruner.Close(); cerr != nil { + p.logger.Err(cerr).Msg("error while closing the db pruner") + } + }() - if p.metrics != nil { - p.metrics.ElementVisited() - } + var batchKeysToRemove [][]byte - // New register prefix, reset the state - if !keepKeyFound || lastRegisterID != registerID { - keepKeyFound = false - lastRegisterID = registerID - } + for it.SeekGE(prefix); it.Valid(); it.Next() { + key := it.Key() - if keyHeight > pruneHeight { - continue - } - - if !keepKeyFound { - // Keep the first entry found for this registerID that is <= pruneHeight - keepKeyFound = true - continue - } + canPruneKey, err := dbPruner.CanPruneKey(key) + if err != nil { + return fmt.Errorf("cannot check key %v for pruning, reason: %w", key, err) + } - // Otherwise, mark this key for removal - // Create a copy of the key to avoid memory issues - keyCopy := make([]byte, len(key)) - copy(keyCopy, key) - batchKeysToRemove = append(batchKeysToRemove, keyCopy) - - if len(batchKeysToRemove) == deleteItemsPerBatch { - // Perform batch delete - if err := p.batchDelete(ctx, batchKeysToRemove); err != nil { - return err - } - batchKeysToRemove = nil - } + if !canPruneKey { + continue } - if len(batchKeysToRemove) > 0 { - // Perform the final batch delete if there are any remaining keys - if err := p.batchDelete(ctx, batchKeysToRemove); err != nil { + // Create a copy of the key to avoid memory issues + batchKeysToRemove = append(batchKeysToRemove, bytes.Clone(key)) + + if len(batchKeysToRemove) == deleteItemsPerBatch { + // Perform batch delete + if err := dbPruner.BatchDelete(ctx, batchKeysToRemove); err != nil { return err } + // Reset batchKeysToRemove to empty slice while retaining capacity + batchKeysToRemove = batchKeysToRemove[:0] } - - return nil - }(p.db) - - if err != nil { - return err } - if p.metrics != nil { - p.metrics.Pruned(pruneHeight, time.Since(start)) - } - - return nil -} - -// batchDelete deletes the provided keys from the database in a single batch operation. -// It creates a new batch, deletes each key from the batch, and commits the batch to ensure -// that the deletions are applied atomically. -// -// Parameters: -// - ctx: The context for managing the pruning throttle delay operation. -// - lookupKeys: A slice of keys to be deleted from the database. -// -// No errors are expected during normal operations. -func (p *RegisterPruner) batchDelete(ctx context.Context, lookupKeys [][]byte) error { - batch := p.db.NewBatch() - defer batch.Close() - - for _, key := range lookupKeys { - if err := batch.Delete(key, nil); err != nil { - keyHeight, registerID, _ := lookupKeyToRegisterID(key) - return fmt.Errorf("failed to delete lookupKey: %w %d %v", err, keyHeight, registerID) + if len(batchKeysToRemove) > 0 { + // Perform the final batch delete if there are any remaining keys + if err := dbPruner.BatchDelete(ctx, batchKeysToRemove); err != nil { + return err } } - if err := batch.Commit(pebble.Sync); err != nil { - return fmt.Errorf("failed to commit batch: %w", err) - } - - if p.metrics != nil { - p.metrics.NumberOfRowsPruned(uint64(len(lookupKeys))) - } - - // Throttle to prevent excessive system load - select { - case <-ctx.Done(): - case <-time.After(p.pruneThrottleDelay): - } - return nil } @@ -315,5 +244,5 @@ func (p *RegisterPruner) batchDelete(ctx context.Context, lookupKeys [][]byte) e // // No errors are expected during normal operations. func (p *RegisterPruner) updateFirstStoredHeight(height uint64) error { - return p.db.Set(firstHeightKey, encodedUint64(height), nil) + return p.db.Set(firstHeightKey, encodedUint64(height), pebble.Sync) } diff --git a/storage/pebble/register_pruner_test.go b/storage/pebble/registers_pruner_test.go similarity index 100% rename from storage/pebble/register_pruner_test.go rename to storage/pebble/registers_pruner_test.go From c4bf9f30700c703c3445c7abf70fc6c763b63a57 Mon Sep 17 00:00:00 2001 From: Andrii Slisarchuk Date: Mon, 4 Nov 2024 14:37:14 +0200 Subject: [PATCH 29/31] changed naming of test data --- storage/pebble/testutil.go | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/storage/pebble/testutil.go b/storage/pebble/testutil.go index e58c4bc7317..3be4231e6d2 100644 --- a/storage/pebble/testutil.go +++ b/storage/pebble/testutil.go @@ -42,14 +42,15 @@ func RunWithRegistersStorageWithInitialData( r, err := NewRegisters(db, 5) require.NoError(tb, err) - keys := make([]uint64, 0, len(data)) - for k := range data { - keys = append(keys, k) + heights := make([]uint64, 0, len(data)) + for h := range data { + heights = append(heights, h) } - sort.Slice(keys, func(i, j int) bool { return keys[i] < keys[j] }) + // Should sort heights before store them through register, as they are not stored in order in test data + sort.Slice(heights, func(i, j int) bool { return heights[i] < heights[j] }) - // Iterate over the map in ascending order by key - for _, height := range keys { + // Iterate over the heights in ascending order + for _, height := range heights { err = r.Store(data[height], height) require.NoError(tb, err) } From d086c1b007fc2e70304325c970911705580912cd Mon Sep 17 00:00:00 2001 From: Andrii Slisarchuk Date: Mon, 4 Nov 2024 16:35:10 +0200 Subject: [PATCH 30/31] Added log for total keys pruned and duration --- storage/pebble/db_pruner.go | 9 +++++++++ storage/pebble/registers_pruner.go | 8 ++++++++ storage/pebble/testutil.go | 8 ++++---- 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/storage/pebble/db_pruner.go b/storage/pebble/db_pruner.go index 1ba31056131..91f45e52375 100644 --- a/storage/pebble/db_pruner.go +++ b/storage/pebble/db_pruner.go @@ -21,6 +21,8 @@ type DBPruner struct { dbBatch *pebble.Batch lastRegisterID flow.RegisterID keepKeyFound bool + + totalKeysPruned int } var _ io.Closer = (*DBPruner)(nil) @@ -31,6 +33,7 @@ func NewDBPruner(db *pebble.DB, logger zerolog.Logger, pruneThrottleDelay time.D pruneThrottleDelay: pruneThrottleDelay, pruneHeight: pruneHeight, dbBatch: db.NewBatch(), + totalKeysPruned: 0, } } @@ -57,6 +60,8 @@ func (p *DBPruner) BatchDelete(ctx context.Context, lookupKeys [][]byte) error { return fmt.Errorf("failed to commit batch: %w", err) } + p.totalKeysPruned += len(lookupKeys) + // Throttle to prevent excessive system load select { case <-ctx.Done(): @@ -91,6 +96,10 @@ func (p *DBPruner) CanPruneKey(key []byte) (bool, error) { return true, nil } +func (p *DBPruner) TotalKeysPruned() int { + return p.totalKeysPruned +} + func (p *DBPruner) Close() error { return p.dbBatch.Close() } diff --git a/storage/pebble/registers_pruner.go b/storage/pebble/registers_pruner.go index ad78674a373..a9257ea4ecf 100644 --- a/storage/pebble/registers_pruner.go +++ b/storage/pebble/registers_pruner.go @@ -171,6 +171,8 @@ func (p *RegisterPruner) checkPrune(ctx context.Context) error { // // No errors are expected during normal operations. func (p *RegisterPruner) pruneUpToHeight(ctx context.Context, r pebble.Reader, pruneHeight uint64) error { + start := time.Now() + // first, update firstHeight in the db // this ensures that if the node crashes during pruning, there will still be a consistent // view when the node starts up. Subsequent prunes will remove any leftover data. @@ -233,6 +235,12 @@ func (p *RegisterPruner) pruneUpToHeight(ctx context.Context, r pebble.Reader, p } } + p.logger.Info().Msgf( + "Pruned %d keys in %s", + dbPruner.TotalKeysPruned(), + time.Since(start).String(), + ) + return nil } diff --git a/storage/pebble/testutil.go b/storage/pebble/testutil.go index 3be4231e6d2..1cedb2cf183 100644 --- a/storage/pebble/testutil.go +++ b/storage/pebble/testutil.go @@ -39,19 +39,19 @@ func RunWithRegistersStorageWithInitialData( f func(db *pebble.DB)) { unittest.RunWithTempDir(tb, func(dir string) { db := NewBootstrappedRegistersWithPathForTest(tb, dir, uint64(0), uint64(0)) - r, err := NewRegisters(db, 5) + registers, err := NewRegisters(db, 5) require.NoError(tb, err) heights := make([]uint64, 0, len(data)) for h := range data { heights = append(heights, h) } - // Should sort heights before store them through register, as they are not stored in order in test data + // Should sort heights before store them through register, as they are not stored in the test data map sort.Slice(heights, func(i, j int) bool { return heights[i] < heights[j] }) - // Iterate over the heights in ascending order + // Iterate over the heights in ascending order and store keys in DB through registers for _, height := range heights { - err = r.Store(data[height], height) + err = registers.Store(data[height], height) require.NoError(tb, err) } From 0f3b5aa3224148b091f80335a1cbdfbc7b7519cf Mon Sep 17 00:00:00 2001 From: Andrii Slisarchuk Date: Mon, 4 Nov 2024 18:47:50 +0200 Subject: [PATCH 31/31] Added comments and clarify check for pruning keys --- storage/pebble/db_pruner.go | 50 +++++++++++++++++++++++++++---------- 1 file changed, 37 insertions(+), 13 deletions(-) diff --git a/storage/pebble/db_pruner.go b/storage/pebble/db_pruner.go index 91f45e52375..c29c060b78b 100644 --- a/storage/pebble/db_pruner.go +++ b/storage/pebble/db_pruner.go @@ -12,15 +12,17 @@ import ( "github.com/onflow/flow-go/model/flow" ) +// DBPruner is responsible for pruning outdated register entries in a Pebble database. +// It batches deletions to improve performance and reduce the load on the system. type DBPruner struct { logger zerolog.Logger // pruneThrottleDelay controls a small pause between batches of registers inspected and pruned pruneThrottleDelay time.Duration pruneHeight uint64 - dbBatch *pebble.Batch - lastRegisterID flow.RegisterID - keepKeyFound bool + dbBatch *pebble.Batch + lastRegisterID flow.RegisterID + keepFirstRelevantKey bool totalKeysPruned int } @@ -38,7 +40,7 @@ func NewDBPruner(db *pebble.DB, logger zerolog.Logger, pruneThrottleDelay time.D } // BatchDelete deletes the provided keys from the database in a single batch operation. -// It creates a new batch, deletes each key from the batch, and commits the batch to ensure +// It resets the batch for reuse, deletes each key from the batch, and commits the batch to ensure // that the deletions are applied atomically. // // Parameters: @@ -71,35 +73,57 @@ func (p *DBPruner) BatchDelete(ctx context.Context, lookupKeys [][]byte) error { return nil } +// CanPruneKey checks if a key can be pruned based on its height and the last processed register ID. +// It ensures that only the first relevant key (the earliest entry) for each register ID is kept. +// +// Parameters: +// - key: The key to check for pruning eligibility. +// +// No errors are expected during normal operations. func (p *DBPruner) CanPruneKey(key []byte) (bool, error) { keyHeight, registerID, err := lookupKeyToRegisterID(key) if err != nil { return false, fmt.Errorf("malformed lookup key %v: %w", key, err) } - // New register prefix, reset the state - if !p.keepKeyFound || p.lastRegisterID != registerID { - p.keepKeyFound = false - p.lastRegisterID = registerID - } - + // If height is greater than prune height, the key cannot be pruned. if keyHeight > p.pruneHeight { return false, nil } - if !p.keepKeyFound { - // Keep the first entry found for this registerID that is <= pruneHeight - p.keepKeyFound = true + // In case of a new register ID, reset the state. + if p.lastRegisterID != registerID { + p.keepFirstRelevantKey = false + p.lastRegisterID = registerID + } + + // For each register ID, find the first key whose height is less than or equal to the prune height. + // This is the earliest entry to keep. For example, if pruneHeight is 99989: + // [0x01/key/owner1/99990] [keep, > 99989] + // [0x01/key/owner1/99988] [first key to keep < 99989] + // [0x01/key/owner1/85000] [remove] + // ... + // [0x01/key/owner2/99989] [first key to keep == 99989] + // [0x01/key/owner2/99988] [remove] + // ... + // [0x01/key/owner3/99988] [first key to keep < 99989] + // [0x01/key/owner3/98001] [remove] + // ... + // [0x02/key/owner0/99900] [first key to keep < 99989] + if !p.keepFirstRelevantKey { + p.keepFirstRelevantKey = true return false, nil } return true, nil } +// TotalKeysPruned returns the total number of keys that have been pruned by the DBPruner. func (p *DBPruner) TotalKeysPruned() int { return p.totalKeysPruned } +// Close closes the batch associated with the DBPruner. func (p *DBPruner) Close() error { return p.dbBatch.Close() }