-
Notifications
You must be signed in to change notification settings - Fork 12
*: add metrics-min-interval to improve the chance of collecting top-heavy metrics #490
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,6 +18,7 @@ import ( | |
| "fmt" | ||
| "io" | ||
| "maps" | ||
| "net" | ||
| "net/http" | ||
| "net/url" | ||
| "os" | ||
|
|
@@ -54,7 +55,7 @@ const ( | |
| subdirMetrics = "metrics" | ||
| subdirRaw = "raw" | ||
| maxQueryRange = 120 * 60 // 120min | ||
| minQueryRange = 5 * 60 // 5min | ||
| minQueryRange = 1 * 60 // 1min | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it is not used anymore
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is the minimum protection value. Because if the value is lower than this, the proportion of metadata will be high. If the segmentation is too small, it would be meaningless. |
||
| ) | ||
|
|
||
| type collectMonitor struct { | ||
|
|
@@ -177,6 +178,7 @@ type MetricCollectOptions struct { | |
| filter []string | ||
| exclude []string | ||
| limit int // series*min per query | ||
| minInterval int // the minimum interval of a single request in seconds | ||
| compress bool | ||
| customHeader []string | ||
| endpoint string | ||
|
|
@@ -287,8 +289,11 @@ func (c *MetricCollectOptions) Collect(m *Manager, topo *models.TiDBCluster) err | |
| mu := sync.Mutex{} | ||
|
|
||
| key := c.endpoint | ||
| if c.minInterval < minQueryRange { | ||
| c.minInterval = minQueryRange | ||
| } | ||
| if _, ok := bars[key]; !ok { | ||
| bars[key] = mb.AddBar(fmt.Sprintf(" - Querying server %s", key)) | ||
| bars[key] = mb.AddBar(fmt.Sprintf(" - Querying server %s, min interval %v", key, c.minInterval)) | ||
| } | ||
|
|
||
| if m.diagMode == DiagModeCmd { | ||
|
|
@@ -312,7 +317,19 @@ func (c *MetricCollectOptions) Collect(m *Manager, topo *models.TiDBCluster) err | |
| return err | ||
| } | ||
|
|
||
| client := &http.Client{Timeout: time.Second * time.Duration(c.opt.APITimeout)} | ||
| client := &http.Client{ | ||
| Transport: &http.Transport{ | ||
| MaxIdleConns: qLimit * 2, | ||
| MaxIdleConnsPerHost: 10, | ||
| IdleConnTimeout: 30 * time.Second, | ||
| ExpectContinueTimeout: 1 * time.Second, | ||
| DialContext: (&net.Dialer{ | ||
| Timeout: 5 * time.Second, | ||
| KeepAlive: 30 * time.Second, | ||
| }).DialContext, | ||
| }, | ||
| Timeout: time.Second * time.Duration(c.opt.APITimeout), | ||
| } | ||
| for _, mtc := range c.metrics { | ||
| go func(tok *utils.Token, mtc string) { | ||
| bars[key].UpdateDisplay(&progress.DisplayProps{ | ||
|
|
@@ -322,7 +339,7 @@ func (c *MetricCollectOptions) Collect(m *Manager, topo *models.TiDBCluster) err | |
|
|
||
| tsEnd, _ := utils.ParseTime(c.GetBaseOptions().ScrapeEnd) | ||
| tsStart, _ := utils.ParseTime(c.GetBaseOptions().ScrapeBegin) | ||
| collectMetric(m.logger, client, key, tsStart, tsEnd, mtc, c.label, c.resultDir, c.limit, c.compress, c.customHeader, "") | ||
| collectMetric(m.logger, client, key, tsStart, tsEnd, mtc, c.label, c.resultDir, c.limit, c.minInterval, c.compress, c.customHeader, "") | ||
|
|
||
| mu.Lock() | ||
| done++ | ||
|
|
@@ -337,6 +354,7 @@ func (c *MetricCollectOptions) Collect(m *Manager, topo *models.TiDBCluster) err | |
| tl.Put(tok) | ||
| }(tl.Get(), mtc) | ||
| } | ||
| m.logger.Infof("Collected metrics ...") | ||
|
|
||
| tl.Wait() | ||
|
|
||
|
|
@@ -407,6 +425,7 @@ func collectMetric( | |
| label map[string]string, | ||
| resultDir string, | ||
| speedlimit int, | ||
| minInterval int, | ||
| compress bool, | ||
| customHeader []string, | ||
| instance string, | ||
|
|
@@ -466,7 +485,7 @@ func collectMetric( | |
| newLabel := make(map[string]string) | ||
| maps.Copy(newLabel, label) | ||
| newLabel["instance"] = instance | ||
| collectMetric(l, c, promAddr, beginTime, endTime, mtc, newLabel, resultDir, speedlimit, compress, customHeader, instance) | ||
| collectMetric(l, c, promAddr, beginTime, endTime, mtc, newLabel, resultDir, speedlimit, minInterval, compress, customHeader, instance) | ||
| } | ||
| } | ||
| return | ||
|
|
@@ -485,8 +504,8 @@ func collectMetric( | |
| if block > maxQueryRange { | ||
| block = maxQueryRange | ||
| } | ||
| if block < minQueryRange { | ||
| block = minQueryRange | ||
| if block < minInterval { | ||
| block = minInterval | ||
| } | ||
|
|
||
| l.Debugf("Dumping metric %s-%s-%s%s...", mtc, beginTime.Format(time.RFC3339), endTime.Format(time.RFC3339), nameSuffix) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
is 120s the new default value?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, if you think it's best to keep it the same as before, I can change it to 5 minutes.