From 0efed733352bc7e1a4bcabb1188b7dd0afda4841 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Leszczy=C5=84ski?= <2000michal@wp.pl> Date: Wed, 2 Oct 2024 20:45:53 +0200 Subject: [PATCH] s: add workload indexing - add logging --- pkg/service/restore/index.go | 51 ++++++++++++++++++++++++++++++++++-- 1 file changed, 49 insertions(+), 2 deletions(-) diff --git a/pkg/service/restore/index.go b/pkg/service/restore/index.go index eb238b1df..dd7b7b72c 100644 --- a/pkg/service/restore/index.go +++ b/pkg/service/restore/index.go @@ -79,7 +79,9 @@ func (w *tablesWorker) indexLocationWorkload(ctx context.Context, location Locat return LocationWorkload{}, errors.Wrap(err, "filter already restored sstables") } } - return aggregateLocationWorkload(rawWorkload), nil + workload := aggregateLocationWorkload(rawWorkload) + w.logWorkloadInfo(ctx, workload) + return workload, nil } func (w *tablesWorker) createRemoteDirWorkloads(ctx context.Context, location Location) ([]RemoteDirWorkload, error) { @@ -115,7 +117,9 @@ func (w *tablesWorker) createRemoteDirWorkloads(ctx context.Context, location Lo Size: size, SSTables: remoteSSTables, } - rawWorkload = append(rawWorkload, workload) + if size > 0 { + rawWorkload = append(rawWorkload, workload) + } return nil }) }) @@ -212,6 +216,49 @@ func (w *tablesWorker) initMetrics(workload []LocationWorkload) { }, float64(totalSize-workloadSize)/float64(totalSize)*100) } +func (w *tablesWorker) logWorkloadInfo(ctx context.Context, workload LocationWorkload) { + if workload.Size == 0 { + return + } + var locMax, locCnt int64 + for _, twl := range workload.Tables { + if twl.Size == 0 { + continue + } + var tabMax, tabCnt int64 + for _, rdwl := range twl.RemoteDirs { + if rdwl.Size == 0 { + continue + } + var dirMax int64 + for _, sst := range rdwl.SSTables { + dirMax = max(dirMax, sst.Size) + } + dirCnt := int64(len(rdwl.SSTables)) + w.logger.Info(ctx, "Remote sstable dir workload info", + "path", rdwl.RemoteSSTableDir, + "max size", dirMax, + "average size", rdwl.Size/dirCnt, + "count", dirCnt) + tabCnt += dirCnt + tabMax = max(tabMax, dirMax) + } + w.logger.Info(ctx, "Table workload info", + "keyspace", twl.Keyspace, + "table", twl.Table, + "max size", tabMax, + "average size", twl.Size/tabCnt, + "count", tabCnt) + locCnt += tabCnt + locMax = max(locMax, tabMax) + } + w.logger.Info(ctx, "Location workload info", + "location", workload.Location.String(), + "max size", locMax, + "average size", workload.Size/locCnt, + "count", locCnt) +} + func aggregateLocationWorkload(rawWorkload []RemoteDirWorkload) LocationWorkload { remoteDirWorkloads := make(map[TableName][]RemoteDirWorkload) for _, rw := range rawWorkload {