Skip to content

Commit f9f9be6

Browse files
authored
*: clean up handling metrics process (#7370)
ref #5839, close #7391 Signed-off-by: Ryan Leung <[email protected]>
1 parent 89c8374 commit f9f9be6

File tree

8 files changed

+30
-49
lines changed

8 files changed

+30
-49
lines changed

Diff for: pkg/mcs/scheduling/server/cluster.go

-16
Original file line numberDiff line numberDiff line change
@@ -485,10 +485,6 @@ func (c *Cluster) collectMetrics() {
485485

486486
c.coordinator.GetSchedulersController().CollectSchedulerMetrics()
487487
c.coordinator.CollectHotSpotMetrics()
488-
c.collectClusterMetrics()
489-
}
490-
491-
func (c *Cluster) collectClusterMetrics() {
492488
if c.regionStats == nil {
493489
return
494490
}
@@ -500,20 +496,8 @@ func (c *Cluster) collectClusterMetrics() {
500496

501497
func (c *Cluster) resetMetrics() {
502498
statistics.Reset()
503-
504499
schedulers.ResetSchedulerMetrics()
505500
schedule.ResetHotSpotMetrics()
506-
c.resetClusterMetrics()
507-
}
508-
509-
func (c *Cluster) resetClusterMetrics() {
510-
if c.regionStats == nil {
511-
return
512-
}
513-
c.regionStats.Reset()
514-
c.labelStats.Reset()
515-
// reset hot cache metrics
516-
c.hotStat.ResetMetrics()
517501
}
518502

519503
// StartBackgroundJobs starts background jobs.

Diff for: pkg/statistics/hot_cache.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -125,8 +125,8 @@ func (w *HotCache) CollectMetrics() {
125125
w.CheckReadAsync(newCollectMetricsTask())
126126
}
127127

128-
// ResetMetrics resets the hot cache metrics.
129-
func (w *HotCache) ResetMetrics() {
128+
// ResetHotCacheStatusMetrics resets the hot cache metrics.
129+
func ResetHotCacheStatusMetrics() {
130130
hotCacheStatusGauge.Reset()
131131
}
132132

Diff for: pkg/statistics/region_collection.go

+4-4
Original file line numberDiff line numberDiff line change
@@ -272,8 +272,8 @@ func (r *RegionStatistics) Collect() {
272272
regionWitnessLeaderRegionCounter.Set(float64(len(r.stats[WitnessLeader])))
273273
}
274274

275-
// Reset resets the metrics of the regions' status.
276-
func (r *RegionStatistics) Reset() {
275+
// ResetRegionStatsMetrics resets the metrics of the regions' status.
276+
func ResetRegionStatsMetrics() {
277277
regionMissPeerRegionCounter.Set(0)
278278
regionExtraPeerRegionCounter.Set(0)
279279
regionDownPeerRegionCounter.Set(0)
@@ -326,8 +326,8 @@ func (l *LabelStatistics) Collect() {
326326
}
327327
}
328328

329-
// Reset resets the metrics of the label status.
330-
func (l *LabelStatistics) Reset() {
329+
// ResetLabelStatsMetrics resets the metrics of the label status.
330+
func ResetLabelStatsMetrics() {
331331
regionLabelLevelGauge.Reset()
332332
}
333333

Diff for: pkg/statistics/store_collection.go

+3
Original file line numberDiff line numberDiff line change
@@ -322,4 +322,7 @@ func Reset() {
322322
storeStatusGauge.Reset()
323323
clusterStatusGauge.Reset()
324324
placementStatusGauge.Reset()
325+
ResetRegionStatsMetrics()
326+
ResetLabelStatsMetrics()
327+
ResetHotCacheStatusMetrics()
325328
}

Diff for: server/cluster/cluster.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -654,7 +654,7 @@ func (c *RaftCluster) runMetricsCollectionJob() {
654654
ticker := time.NewTicker(metricsCollectionJobInterval)
655655
failpoint.Inject("highFrequencyClusterJobs", func() {
656656
ticker.Stop()
657-
ticker = time.NewTicker(time.Microsecond)
657+
ticker = time.NewTicker(time.Millisecond)
658658
})
659659
defer ticker.Stop()
660660

@@ -734,10 +734,10 @@ func (c *RaftCluster) Stop() {
734734
return
735735
}
736736
c.running = false
737+
c.cancel()
737738
if !c.IsServiceIndependent(mcsutils.SchedulingServiceName) {
738739
c.stopSchedulingJobs()
739740
}
740-
c.cancel()
741741
c.Unlock()
742742

743743
c.wg.Wait()

Diff for: server/cluster/cluster_test.go

+14-7
Original file line numberDiff line numberDiff line change
@@ -2485,7 +2485,10 @@ func TestCollectMetricsConcurrent(t *testing.T) {
24852485
nil)
24862486
}, func(co *schedule.Coordinator) { co.Run() }, re)
24872487
defer cleanup()
2488-
2488+
rc := co.GetCluster().(*RaftCluster)
2489+
rc.schedulingController = newSchedulingController(rc.serverCtx, rc.GetBasicCluster(), rc.GetOpts(), rc.GetRuleManager())
2490+
rc.schedulingController.coordinator = co
2491+
controller := co.GetSchedulersController()
24892492
// Make sure there are no problem when concurrent write and read
24902493
var wg sync.WaitGroup
24912494
count := 10
@@ -2498,15 +2501,14 @@ func TestCollectMetricsConcurrent(t *testing.T) {
24982501
}
24992502
}(i)
25002503
}
2501-
controller := co.GetSchedulersController()
25022504
for i := 0; i < 1000; i++ {
25032505
co.CollectHotSpotMetrics()
25042506
controller.CollectSchedulerMetrics()
2505-
co.GetCluster().(*RaftCluster).collectStatisticsMetrics()
2507+
rc.collectSchedulingMetrics()
25062508
}
25072509
schedule.ResetHotSpotMetrics()
25082510
schedulers.ResetSchedulerMetrics()
2509-
co.GetCluster().(*RaftCluster).resetStatisticsMetrics()
2511+
rc.resetSchedulingMetrics()
25102512
wg.Wait()
25112513
}
25122514

@@ -2520,6 +2522,11 @@ func TestCollectMetrics(t *testing.T) {
25202522
nil)
25212523
}, func(co *schedule.Coordinator) { co.Run() }, re)
25222524
defer cleanup()
2525+
2526+
rc := co.GetCluster().(*RaftCluster)
2527+
rc.schedulingController = newSchedulingController(rc.serverCtx, rc.GetBasicCluster(), rc.GetOpts(), rc.GetRuleManager())
2528+
rc.schedulingController.coordinator = co
2529+
controller := co.GetSchedulersController()
25232530
count := 10
25242531
for i := 0; i <= count; i++ {
25252532
for k := 0; k < 200; k++ {
@@ -2533,11 +2540,11 @@ func TestCollectMetrics(t *testing.T) {
25332540
tc.hotStat.HotCache.Update(item, utils.Write)
25342541
}
25352542
}
2536-
controller := co.GetSchedulersController()
2543+
25372544
for i := 0; i < 1000; i++ {
25382545
co.CollectHotSpotMetrics()
25392546
controller.CollectSchedulerMetrics()
2540-
co.GetCluster().(*RaftCluster).collectStatisticsMetrics()
2547+
rc.collectSchedulingMetrics()
25412548
}
25422549
stores := co.GetCluster().GetStores()
25432550
regionStats := co.GetCluster().RegionWriteStats()
@@ -2552,7 +2559,7 @@ func TestCollectMetrics(t *testing.T) {
25522559
re.Equal(status1, status2)
25532560
schedule.ResetHotSpotMetrics()
25542561
schedulers.ResetSchedulerMetrics()
2555-
co.GetCluster().(*RaftCluster).resetStatisticsMetrics()
2562+
rc.resetSchedulingMetrics()
25562563
}
25572564

25582565
func prepare(setCfg func(*sc.ScheduleConfig), setTc func(*testCluster), run func(*schedule.Coordinator), re *require.Assertions) (*testCluster, *schedule.Coordinator, func()) {

Diff for: server/cluster/scheduling_controller.go

+5-16
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ func (sc *schedulingController) runSchedulingMetricsCollectionJob() {
149149
ticker := time.NewTicker(metricsCollectionJobInterval)
150150
failpoint.Inject("highFrequencyClusterJobs", func() {
151151
ticker.Stop()
152-
ticker = time.NewTicker(time.Microsecond)
152+
ticker = time.NewTicker(time.Millisecond)
153153
})
154154
defer ticker.Stop()
155155

@@ -170,7 +170,10 @@ func (sc *schedulingController) resetSchedulingMetrics() {
170170
statistics.Reset()
171171
schedulers.ResetSchedulerMetrics()
172172
schedule.ResetHotSpotMetrics()
173-
sc.resetStatisticsMetrics()
173+
statistics.ResetRegionStatsMetrics()
174+
statistics.ResetLabelStatsMetrics()
175+
// reset hot cache metrics
176+
statistics.ResetHotCacheStatusMetrics()
174177
}
175178

176179
func (sc *schedulingController) collectSchedulingMetrics() {
@@ -183,20 +186,6 @@ func (sc *schedulingController) collectSchedulingMetrics() {
183186
statsMap.Collect()
184187
sc.coordinator.GetSchedulersController().CollectSchedulerMetrics()
185188
sc.coordinator.CollectHotSpotMetrics()
186-
sc.collectStatisticsMetrics()
187-
}
188-
189-
func (sc *schedulingController) resetStatisticsMetrics() {
190-
if sc.regionStats == nil {
191-
return
192-
}
193-
sc.regionStats.Reset()
194-
sc.labelStats.Reset()
195-
// reset hot cache metrics
196-
sc.hotStat.ResetMetrics()
197-
}
198-
199-
func (sc *schedulingController) collectStatisticsMetrics() {
200189
if sc.regionStats == nil {
201190
return
202191
}

Diff for: tests/server/cluster/cluster_test.go

-2
Original file line numberDiff line numberDiff line change
@@ -518,8 +518,6 @@ func TestRaftClusterMultipleRestart(t *testing.T) {
518518
err = rc.Start(leaderServer.GetServer())
519519
re.NoError(err)
520520
time.Sleep(time.Millisecond)
521-
rc = leaderServer.GetRaftCluster()
522-
re.NotNil(rc)
523521
rc.Stop()
524522
}
525523
re.NoError(failpoint.Disable("github.com/tikv/pd/server/cluster/highFrequencyClusterJobs"))

0 commit comments

Comments
 (0)