From 0ed8e298f909a8976086d438e26059b6a3491822 Mon Sep 17 00:00:00 2001 From: Lynn Date: Wed, 26 Nov 2025 10:43:43 +0800 Subject: [PATCH 1/2] *: add metrics-min-interval and tiny update --- cmd/diag/command/collect.go | 1 + collector/collect.go | 46 +++++++++++++++++++------------------ collector/prometheus.go | 23 +++++++++++++++---- 3 files changed, 44 insertions(+), 26 deletions(-) diff --git a/cmd/diag/command/collect.go b/cmd/diag/command/collect.go index f298b9c5..434ee93e 100644 --- a/cmd/diag/command/collect.go +++ b/cmd/diag/command/collect.go @@ -160,6 +160,7 @@ func newCollectCmd() *cobra.Command { cmd.Flags().IntVar(&cOpt.MetricsLimit, "metricslimit", 10000, "metric size limit of single request, specified in series*hour per request") cmd.Flags().StringVar(&metricsConf, "metricsconfig", "", "config file of metricsfilter") cmd.Flags().StringSliceVar(&labels, "metricslabel", nil, "only collect metrics that match labels") + cmd.Flags().IntVar(&cOpt.MetricsMinInterval, "metrics-min-interval", 120, "the minimum interval of a single request in seconds") cmd.Flags().StringVar(&promEndpoint, "overwrite-prometheus-endpoint", "", "Prometheus endpoint") cmd.Flags().StringSliceVarP(&cOpt.Header, "prometheus-header", "H", nil, "custom headers of http request when collect metrics") cmd.Flags().StringVarP(&cOpt.Dir, "output", "o", "", "output directory of collected data") diff --git a/collector/collect.go b/collector/collect.go index 5dc557a2..a4f148e4 100644 --- a/collector/collect.go +++ b/collector/collect.go @@ -108,28 +108,29 @@ type BaseOptions struct { // CollectOptions contains the options defining which type of data to collect type CollectOptions struct { - RawRequest interface{} // raw collect command or request - Mode string // the cluster is deployed with what type of tool - DiagMode string // run diag collect at command line mode or server mode - ProfileName string // the name of a pre-defined collecting profile - Collectors CollectTree // struct to show which collector is enabled - MetricsFilter []string // prefix of metrics to collect - MetricsExclude []string // prefix of metrics to exclude - MetricsLabel map[string]string // label to filte metrics - Dir string // target directory to store collected data - Limit int // rate limit of SCP - MetricsLimit int // query limit of one request - PerfDuration int // seconds: profile time(s), default is 30s. - CompressScp bool // compress of files during collecting - CompressMetrics bool // compress of files during collecting - RawMonitor bool // collect raw data for metrics - ExitOnError bool // break the process and exit when an error occur - ExtendedAttrs map[string]string // extended attributes used for manual collecting mode - ExplainSQLPath string // File path for explain sql - ExplainSqls []string // explain sqls - CurrDB string - Header []string - UsePortForward bool // use portforward when call api inside k8s cluster + RawRequest interface{} // raw collect command or request + Mode string // the cluster is deployed with what type of tool + DiagMode string // run diag collect at command line mode or server mode + ProfileName string // the name of a pre-defined collecting profile + Collectors CollectTree // struct to show which collector is enabled + MetricsFilter []string // prefix of metrics to collect + MetricsExclude []string // prefix of metrics to exclude + MetricsLabel map[string]string // label to filte metrics + Dir string // target directory to store collected data + Limit int // rate limit of SCP + MetricsLimit int // query limit of one request + MetricsMinInterval int // query minimum interval of one request, default is 1min. + PerfDuration int // seconds: profile time(s), default is 30s. + CompressScp bool // compress of files during collecting + CompressMetrics bool // compress of files during collecting + RawMonitor bool // collect raw data for metrics + ExitOnError bool // break the process and exit when an error occur + ExtendedAttrs map[string]string // extended attributes used for manual collecting mode + ExplainSQLPath string // File path for explain sql + ExplainSqls []string // explain sqls + CurrDB string + Header []string + UsePortForward bool // use portforward when call api inside k8s cluster } // CollectStat is estimated size stats of data to be collected @@ -301,6 +302,7 @@ func (m *Manager) CollectClusterInfo( filter: cOpt.MetricsFilter, exclude: cOpt.MetricsExclude, limit: cOpt.MetricsLimit, + minInterval: cOpt.MetricsMinInterval, compress: cOpt.CompressMetrics, customHeader: cOpt.Header, portForward: cOpt.UsePortForward, diff --git a/collector/prometheus.go b/collector/prometheus.go index 08cf20fd..00963447 100644 --- a/collector/prometheus.go +++ b/collector/prometheus.go @@ -18,6 +18,7 @@ import ( "fmt" "io" "maps" + "net" "net/http" "net/url" "os" @@ -54,7 +55,6 @@ const ( subdirMetrics = "metrics" subdirRaw = "raw" maxQueryRange = 120 * 60 // 120min - minQueryRange = 5 * 60 // 5min ) type collectMonitor struct { @@ -177,6 +177,7 @@ type MetricCollectOptions struct { filter []string exclude []string limit int // series*min per query + minInterval int // the minimum interval of a single request in seconds compress bool customHeader []string endpoint string @@ -312,7 +313,19 @@ func (c *MetricCollectOptions) Collect(m *Manager, topo *models.TiDBCluster) err return err } - client := &http.Client{Timeout: time.Second * time.Duration(c.opt.APITimeout)} + client := &http.Client{ + Transport: &http.Transport{ + MaxIdleConns: qLimit * 2, + MaxIdleConnsPerHost: 10, + IdleConnTimeout: 30 * time.Second, + ExpectContinueTimeout: 1 * time.Second, + DialContext: (&net.Dialer{ + Timeout: 5 * time.Second, + KeepAlive: 30 * time.Second, + }).DialContext, + }, + Timeout: time.Second * time.Duration(c.opt.APITimeout), + } for _, mtc := range c.metrics { go func(tok *utils.Token, mtc string) { bars[key].UpdateDisplay(&progress.DisplayProps{ @@ -322,7 +335,7 @@ func (c *MetricCollectOptions) Collect(m *Manager, topo *models.TiDBCluster) err tsEnd, _ := utils.ParseTime(c.GetBaseOptions().ScrapeEnd) tsStart, _ := utils.ParseTime(c.GetBaseOptions().ScrapeBegin) - collectMetric(m.logger, client, key, tsStart, tsEnd, mtc, c.label, c.resultDir, c.limit, c.compress, c.customHeader, "") + collectMetric(m.logger, client, key, tsStart, tsEnd, mtc, c.label, c.resultDir, c.limit, c.minInterval, c.compress, c.customHeader, "") mu.Lock() done++ @@ -337,6 +350,7 @@ func (c *MetricCollectOptions) Collect(m *Manager, topo *models.TiDBCluster) err tl.Put(tok) }(tl.Get(), mtc) } + m.logger.Infof("Collected metrics ...") tl.Wait() @@ -407,6 +421,7 @@ func collectMetric( label map[string]string, resultDir string, speedlimit int, + minQueryRange int, compress bool, customHeader []string, instance string, @@ -466,7 +481,7 @@ func collectMetric( newLabel := make(map[string]string) maps.Copy(newLabel, label) newLabel["instance"] = instance - collectMetric(l, c, promAddr, beginTime, endTime, mtc, newLabel, resultDir, speedlimit, compress, customHeader, instance) + collectMetric(l, c, promAddr, beginTime, endTime, mtc, newLabel, resultDir, speedlimit, minQueryRange, compress, customHeader, instance) } } return From 8bb4f6b1cb6bb56bcfe639cf69ad568b83a744cf Mon Sep 17 00:00:00 2001 From: Lynn Date: Wed, 26 Nov 2025 16:31:05 +0800 Subject: [PATCH 2/2] *: add a lower bound --- collector/prometheus.go | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/collector/prometheus.go b/collector/prometheus.go index 00963447..604b3689 100644 --- a/collector/prometheus.go +++ b/collector/prometheus.go @@ -55,6 +55,7 @@ const ( subdirMetrics = "metrics" subdirRaw = "raw" maxQueryRange = 120 * 60 // 120min + minQueryRange = 1 * 60 // 1min ) type collectMonitor struct { @@ -288,8 +289,11 @@ func (c *MetricCollectOptions) Collect(m *Manager, topo *models.TiDBCluster) err mu := sync.Mutex{} key := c.endpoint + if c.minInterval < minQueryRange { + c.minInterval = minQueryRange + } if _, ok := bars[key]; !ok { - bars[key] = mb.AddBar(fmt.Sprintf(" - Querying server %s", key)) + bars[key] = mb.AddBar(fmt.Sprintf(" - Querying server %s, min interval %v", key, c.minInterval)) } if m.diagMode == DiagModeCmd { @@ -421,7 +425,7 @@ func collectMetric( label map[string]string, resultDir string, speedlimit int, - minQueryRange int, + minInterval int, compress bool, customHeader []string, instance string, @@ -481,7 +485,7 @@ func collectMetric( newLabel := make(map[string]string) maps.Copy(newLabel, label) newLabel["instance"] = instance - collectMetric(l, c, promAddr, beginTime, endTime, mtc, newLabel, resultDir, speedlimit, minQueryRange, compress, customHeader, instance) + collectMetric(l, c, promAddr, beginTime, endTime, mtc, newLabel, resultDir, speedlimit, minInterval, compress, customHeader, instance) } } return @@ -500,8 +504,8 @@ func collectMetric( if block > maxQueryRange { block = maxQueryRange } - if block < minQueryRange { - block = minQueryRange + if block < minInterval { + block = minInterval } l.Debugf("Dumping metric %s-%s-%s%s...", mtc, beginTime.Format(time.RFC3339), endTime.Format(time.RFC3339), nameSuffix)