From 0efed733352bc7e1a4bcabb1188b7dd0afda4841 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Leszczy=C5=84ski?= <2000michal@wp.pl>
Date: Wed, 2 Oct 2024 20:45:53 +0200
Subject: [PATCH] s: add workload indexing - add logging

---
 pkg/service/restore/index.go | 51 ++++++++++++++++++++++++++++++++++--
 1 file changed, 49 insertions(+), 2 deletions(-)

diff --git a/pkg/service/restore/index.go b/pkg/service/restore/index.go
index eb238b1df..dd7b7b72c 100644
--- a/pkg/service/restore/index.go
+++ b/pkg/service/restore/index.go
@@ -79,7 +79,9 @@ func (w *tablesWorker) indexLocationWorkload(ctx context.Context, location Locat
 			return LocationWorkload{}, errors.Wrap(err, "filter already restored sstables")
 		}
 	}
-	return aggregateLocationWorkload(rawWorkload), nil
+	workload := aggregateLocationWorkload(rawWorkload)
+	w.logWorkloadInfo(ctx, workload)
+	return workload, nil
 }
 
 func (w *tablesWorker) createRemoteDirWorkloads(ctx context.Context, location Location) ([]RemoteDirWorkload, error) {
@@ -115,7 +117,9 @@ func (w *tablesWorker) createRemoteDirWorkloads(ctx context.Context, location Lo
 				Size:             size,
 				SSTables:         remoteSSTables,
 			}
-			rawWorkload = append(rawWorkload, workload)
+			if size > 0 {
+				rawWorkload = append(rawWorkload, workload)
+			}
 			return nil
 		})
 	})
@@ -212,6 +216,49 @@ func (w *tablesWorker) initMetrics(workload []LocationWorkload) {
 	}, float64(totalSize-workloadSize)/float64(totalSize)*100)
 }
 
+func (w *tablesWorker) logWorkloadInfo(ctx context.Context, workload LocationWorkload) {
+	if workload.Size == 0 {
+		return
+	}
+	var locMax, locCnt int64
+	for _, twl := range workload.Tables {
+		if twl.Size == 0 {
+			continue
+		}
+		var tabMax, tabCnt int64
+		for _, rdwl := range twl.RemoteDirs {
+			if rdwl.Size == 0 {
+				continue
+			}
+			var dirMax int64
+			for _, sst := range rdwl.SSTables {
+				dirMax = max(dirMax, sst.Size)
+			}
+			dirCnt := int64(len(rdwl.SSTables))
+			w.logger.Info(ctx, "Remote sstable dir workload info",
+				"path", rdwl.RemoteSSTableDir,
+				"max size", dirMax,
+				"average size", rdwl.Size/dirCnt,
+				"count", dirCnt)
+			tabCnt += dirCnt
+			tabMax = max(tabMax, dirMax)
+		}
+		w.logger.Info(ctx, "Table workload info",
+			"keyspace", twl.Keyspace,
+			"table", twl.Table,
+			"max size", tabMax,
+			"average size", twl.Size/tabCnt,
+			"count", tabCnt)
+		locCnt += tabCnt
+		locMax = max(locMax, tabMax)
+	}
+	w.logger.Info(ctx, "Location workload info",
+		"location", workload.Location.String(),
+		"max size", locMax,
+		"average size", workload.Size/locCnt,
+		"count", locCnt)
+}
+
 func aggregateLocationWorkload(rawWorkload []RemoteDirWorkload) LocationWorkload {
 	remoteDirWorkloads := make(map[TableName][]RemoteDirWorkload)
 	for _, rw := range rawWorkload {