Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 75 additions & 0 deletions pkg/dataobj/consumer/downscale.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
package consumer

import (
"context"
"fmt"

"github.com/go-kit/log"
"github.com/go-kit/log/level"

"github.com/grafana/loki/v3/pkg/kafka/partition"
)

type downscalePermittedFunc func(context.Context) (bool, error)

// newChainedDownscalePermittedFunc returns a chain of downscalePermittedFunc
// that must all return true for the func to return true.
func newChainedDownscalePermittedFunc(funcs ...downscalePermittedFunc) downscalePermittedFunc {
return func(ctx context.Context) (bool, error) {
for _, f := range funcs {
if ok, err := f(ctx); err != nil || !ok {
return false, err
}
}
return true, nil
}
}

// newOffsetCommittedDownscaleFunc returns a downscalePermittedFunc that checks
// if the consumer has committed all records up to the end offset.
func newOffsetCommittedDownscaleFunc(offsetManager *partition.KafkaOffsetManager, partitionID int32, logger log.Logger) downscalePermittedFunc {
return func(ctx context.Context) (bool, error) {
endOffset, err := offsetManager.PartitionOffset(ctx, partitionID, partition.KafkaEndOffset)
if err != nil {
return false, fmt.Errorf("failed to get end offset: %w", err)
}
// If the end offset is zero then no records have been produced for
// this partition, which means we can downscale.
if endOffset == 0 {
level.Debug(logger).Log("msg", "no records produced for partition")
return true, nil
}
// If some records have been produced for this partition we need to
// make sure the consumer has processed and committed all of them
// otherwise we risk data loss.
lastCommittedOffset, err := offsetManager.LastCommittedOffset(ctx, partitionID)
if err != nil {
return false, fmt.Errorf("failed to get last committed offset: %w", err)
}
// The end offset is the offset of the next record, so we need to
// subtract one to get the offset of last record.
isDownscalePermitted := lastCommittedOffset == endOffset-1
if isDownscalePermitted {
level.Debug(logger).Log(
"msg",
"all offsets have been committed",
"last_committed_offset",
lastCommittedOffset,
"end_offset",
endOffset,
)
} else {
level.Debug(logger).Log(
"msg",
"there are uncommitted offsets",
"last_committed_offset",
lastCommittedOffset,
"end_offset",
endOffset,
"delta",
endOffset-lastCommittedOffset-1,
)
}
return isDownscalePermitted, nil
}
}
12 changes: 11 additions & 1 deletion pkg/dataobj/consumer/http.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,17 @@ import (
// PrepareDownscaleHandler is a special handler called by the rollout operator
// immediately before the pod is downscaled. It can stop a downscale by
// responding with a non 2xx status code.
func (s *Service) PrepareDownscaleHandler(_ http.ResponseWriter, _ *http.Request) {
func (s *Service) PrepareDownscaleHandler(w http.ResponseWriter, r *http.Request) {
isDownscalePermitted, err := s.downscalePermitted(r.Context())
if err != nil {
level.Error(s.logger).Log("msg", "failed to check if downscale is permitted", "err", err)
w.WriteHeader(http.StatusInternalServerError)
return
}
if !isDownscalePermitted {
w.WriteHeader(http.StatusBadRequest)
return
}
s.partitionInstanceLifecycler.SetRemoveOwnerOnShutdown(true)
}

Expand Down
9 changes: 9 additions & 0 deletions pkg/dataobj/consumer/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ type Service struct {
lifecycler *ring.Lifecycler
partitionInstanceLifecycler *ring.PartitionInstanceLifecycler
partitionReader *partition.ReaderService
downscalePermitted downscalePermittedFunc
watcher *services.FailureWatcher
logger log.Logger
reg prometheus.Registerer
Expand Down Expand Up @@ -134,6 +135,14 @@ func New(kafkaCfg kafka.Config, cfg Config, mCfg metastore.Config, bucket objsto
}
s.partitionReader = partitionReader

// TODO: We have to pass prometheus.NewRegistry() to avoid duplicate
// metric registration with partition.NewReaderService.
offsetManager, err := partition.NewKafkaOffsetManager(kafkaCfg, cfg.LifecyclerConfig.ID, logger, prometheus.NewRegistry())
if err != nil {
return nil, err
}
s.downscalePermitted = newOffsetCommittedDownscaleFunc(offsetManager, partitionID, logger)

watcher := services.NewFailureWatcher()
watcher.WatchService(lifecycler)
watcher.WatchService(partitionInstanceLifecycler)
Expand Down