Skip to content

Commit c50aaa2

Browse files
authored
Merge pull request #59 from janboll/add-metrics-cache
Implementing metrics caching
2 parents 19c8a98 + 2f4b682 commit c50aaa2

11 files changed

+516
-305
lines changed

README.md

+10
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,8 @@ vpc:
7373
- "us-east-1"
7474
- "eu-central-1"
7575
timeout: 30s
76+
interval: 300s
77+
cache_ttl: 500s
7678
ec2:
7779
enabled: true
7880
regions:
@@ -84,6 +86,7 @@ route53:
8486
enabled: true
8587
region: "us-east-1"
8688
timeout: 60s
89+
interval: 90s
8790
```
8891
8992
Some exporters might expose different configuration values, see the example files for possible keys.
@@ -96,6 +99,13 @@ tweak this behavior.
9699
- `LOGS_METRICS_WORKERS`: Number of workers to request log metrics in parallel (default=10)
97100
- `LOGS_METRICS_TTL`: Cache TTL for rds logs related metrics (default=300)
98101

102+
103+
Defaults:
104+
- interval: 15 seconds
105+
- cache_ttl: 35 seconds
106+
- timeout: 10 seconds
107+
108+
99109
To view all available command-line flags, run `./aws-resource-exporter -h`.
100110

101111
## License

aws-resource-exporter-config.yaml

+1-5
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,12 @@ vpc:
88
- "us-east-1"
99
- "eu-central-1"
1010
- "eu-west-1"
11-
timeout: 30s
1211
route53:
1312
enabled: true
1413
region: "us-east-1"
15-
timeout: 300s
16-
interval: 300s
1714
ec2:
1815
enabled: true
1916
regions:
2017
- "us-east-1"
2118
- "eu-central-1"
22-
- "us-west-1"
23-
timeout: 30s
19+
- "us-west-1"

ec2.go

+32-16
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"sync"
66
"time"
77

8+
"github.com/app-sre/aws-resource-exporter/pkg"
89
"github.com/aws/aws-sdk-go/aws"
910
"github.com/aws/aws-sdk-go/aws/session"
1011
"github.com/aws/aws-sdk-go/service/ec2"
@@ -24,35 +25,51 @@ var TransitGatewaysUsage *prometheus.Desc = prometheus.NewDesc(prometheus.BuildF
2425

2526
type EC2Exporter struct {
2627
sessions []*session.Session
28+
cache pkg.MetricsCache
2729

28-
logger log.Logger
29-
timeout time.Duration
30+
logger log.Logger
31+
timeout time.Duration
32+
interval time.Duration
3033
}
3134

32-
func NewEC2Exporter(sessions []*session.Session, logger log.Logger, timeout time.Duration) *EC2Exporter {
35+
func NewEC2Exporter(sessions []*session.Session, logger log.Logger, config EC2Config) *EC2Exporter {
3336

3437
level.Info(logger).Log("msg", "Initializing EC2 exporter")
3538
return &EC2Exporter{
3639
sessions: sessions,
40+
cache: *pkg.NewMetricsCache(*config.CacheTTL),
3741

38-
logger: logger,
39-
timeout: timeout,
42+
logger: logger,
43+
timeout: *config.Timeout,
44+
interval: *config.Interval,
4045
}
4146
}
4247

4348
func (e *EC2Exporter) Collect(ch chan<- prometheus.Metric) {
44-
ctx, ctxCancel := context.WithTimeout(context.Background(), e.timeout)
45-
defer ctxCancel()
46-
wg := &sync.WaitGroup{}
47-
wg.Add(len(e.sessions))
49+
for _, m := range e.cache.GetAllMetrics() {
50+
ch <- m
51+
}
52+
}
4853

49-
for _, sess := range e.sessions {
50-
go collectInRegion(sess, e.logger, wg, ch, ctx)
54+
func (e *EC2Exporter) CollectLoop() {
55+
for {
56+
ctx, ctxCancel := context.WithTimeout(context.Background(), e.timeout)
57+
defer ctxCancel()
58+
wg := &sync.WaitGroup{}
59+
wg.Add(len(e.sessions))
60+
61+
for _, sess := range e.sessions {
62+
go e.collectInRegion(sess, e.logger, wg, ctx)
63+
}
64+
wg.Wait()
65+
66+
level.Info(e.logger).Log("msg", "EC2 metrics Updated")
67+
68+
time.Sleep(e.interval)
5169
}
52-
wg.Wait()
5370
}
5471

55-
func collectInRegion(sess *session.Session, logger log.Logger, wg *sync.WaitGroup, ch chan<- prometheus.Metric, ctx context.Context) {
72+
func (e *EC2Exporter) collectInRegion(sess *session.Session, logger log.Logger, wg *sync.WaitGroup, ctx context.Context) {
5673
defer wg.Done()
5774
ec2Svc := ec2.New(sess)
5875
serviceQuotaSvc := servicequotas.New(sess)
@@ -71,9 +88,8 @@ func collectInRegion(sess *session.Session, logger log.Logger, wg *sync.WaitGrou
7188
return
7289
}
7390

74-
ch <- prometheus.MustNewConstMetric(TransitGatewaysUsage, prometheus.GaugeValue, float64(len(gateways)), *sess.Config.Region)
75-
ch <- prometheus.MustNewConstMetric(TransitGatewaysQuota, prometheus.GaugeValue, quota, *sess.Config.Region)
76-
91+
e.cache.AddMetric(prometheus.MustNewConstMetric(TransitGatewaysUsage, prometheus.GaugeValue, float64(len(gateways)), *sess.Config.Region))
92+
e.cache.AddMetric(prometheus.MustNewConstMetric(TransitGatewaysQuota, prometheus.GaugeValue, quota, *sess.Config.Region))
7793
}
7894

7995
func (e *EC2Exporter) Describe(ch chan<- *prometheus.Desc) {

go.mod

+4
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,17 @@ require (
1616
github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4 // indirect
1717
github.com/beorn7/perks v1.0.1 // indirect
1818
github.com/cespare/xxhash/v2 v2.1.1 // indirect
19+
github.com/davecgh/go-spew v1.1.1 // indirect
1920
github.com/go-logfmt/logfmt v0.4.0 // indirect
2021
github.com/golang/protobuf v1.3.3 // indirect
2122
github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af // indirect
2223
github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515 // indirect
2324
github.com/matttproud/golang_protobuf_extensions v1.0.1 // indirect
2425
github.com/pkg/errors v0.9.1 // indirect
26+
github.com/pmezard/go-difflib v1.0.0 // indirect
2527
github.com/prometheus/client_model v0.2.0 // indirect
2628
github.com/prometheus/procfs v0.0.8 // indirect
29+
github.com/stretchr/testify v1.8.0 // indirect
2730
golang.org/x/sys v0.0.0-20200212091648-12a6c2dcc1e4 // indirect
31+
gopkg.in/yaml.v3 v3.0.1 // indirect
2832
)

go.sum

+7
Original file line numberDiff line numberDiff line change
@@ -79,10 +79,14 @@ github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPx
7979
github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
8080
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
8181
github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
82+
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
8283
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
8384
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
8485
github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
8586
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
87+
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
88+
github.com/stretchr/testify v1.8.0 h1:pSgiaMZlXftHpm5L7V1+rVB+AZJydKsMxsQBIJw4PKk=
89+
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
8690
golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
8791
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
8892
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
@@ -111,3 +115,6 @@ gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
111115
gopkg.in/yaml.v2 v2.2.5/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
112116
gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10=
113117
gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
118+
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
119+
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
120+
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

main.go

+59-12
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,9 @@ func main() {
4343
}
4444

4545
type BaseConfig struct {
46-
Enabled bool `yaml:"enabled"`
46+
Enabled bool `yaml:"enabled"`
47+
Interval *time.Duration `yaml:"interval"`
48+
CacheTTL *time.Duration `yaml:"cache_ttl"`
4749
}
4850

4951
type RDSConfig struct {
@@ -53,21 +55,20 @@ type RDSConfig struct {
5355

5456
type VPCConfig struct {
5557
BaseConfig `yaml:"base,inline"`
56-
Timeout time.Duration `yaml:"timeout"`
57-
Regions []string `yaml:"regions"`
58+
Timeout *time.Duration `yaml:"timeout"`
59+
Regions []string `yaml:"regions"`
5860
}
5961

6062
type Route53Config struct {
6163
BaseConfig `yaml:"base,inline"`
62-
Interval time.Duration `yaml:"interval"`
63-
Timeout time.Duration `yaml:"timeout"`
64-
Region string `yaml:"region"` // Use only a single Region for now, as the current metric is global
64+
Timeout *time.Duration `yaml:"timeout"`
65+
Region string `yaml:"region"` // Use only a single Region for now, as the current metric is global
6566
}
6667

6768
type EC2Config struct {
6869
BaseConfig `yaml:"base,inline"`
69-
Timeout time.Duration `yaml:"timeout"`
70-
Regions []string `yaml:"regions"`
70+
Timeout *time.Duration `yaml:"timeout"`
71+
Regions []string `yaml:"regions"`
7172
}
7273

7374
type Config struct {
@@ -77,6 +78,10 @@ type Config struct {
7778
EC2Config EC2Config `yaml:"ec2"`
7879
}
7980

81+
func durationPtr(duration time.Duration) *time.Duration {
82+
return &duration
83+
}
84+
8085
func loadExporterConfiguration(logger log.Logger, configFile string) (*Config, error) {
8186
var config Config
8287
file, err := ioutil.ReadFile(configFile)
@@ -85,6 +90,42 @@ func loadExporterConfiguration(logger log.Logger, configFile string) (*Config, e
8590
return nil, errors.New("Could not load configuration file: " + configFile)
8691
}
8792
yaml.Unmarshal(file, &config)
93+
94+
if config.RdsConfig.CacheTTL == nil {
95+
config.RdsConfig.CacheTTL = durationPtr(35 * time.Second)
96+
}
97+
if config.VpcConfig.CacheTTL == nil {
98+
config.VpcConfig.CacheTTL = durationPtr(35 * time.Second)
99+
}
100+
if config.Route53Config.CacheTTL == nil {
101+
config.Route53Config.CacheTTL = durationPtr(35 * time.Second)
102+
}
103+
if config.EC2Config.CacheTTL == nil {
104+
config.EC2Config.CacheTTL = durationPtr(35 * time.Second)
105+
}
106+
107+
if config.RdsConfig.Interval == nil {
108+
config.RdsConfig.Interval = durationPtr(15 * time.Second)
109+
}
110+
if config.VpcConfig.Interval == nil {
111+
config.VpcConfig.Interval = durationPtr(15 * time.Second)
112+
}
113+
if config.Route53Config.Interval == nil {
114+
config.Route53Config.Interval = durationPtr(15 * time.Second)
115+
}
116+
if config.EC2Config.Interval == nil {
117+
config.EC2Config.Interval = durationPtr(15 * time.Second)
118+
}
119+
120+
if config.VpcConfig.Timeout == nil {
121+
config.VpcConfig.Timeout = durationPtr(10 * time.Second)
122+
}
123+
if config.Route53Config.Timeout == nil {
124+
config.Route53Config.Timeout = durationPtr(10 * time.Second)
125+
}
126+
if config.EC2Config.Timeout == nil {
127+
config.EC2Config.Timeout = durationPtr(10 * time.Second)
128+
}
88129
return &config, nil
89130
}
90131

@@ -106,7 +147,9 @@ func setupCollectors(logger log.Logger, configFile string, creds *credentials.Cr
106147
sess := session.Must(session.NewSession(config))
107148
vpcSessions = append(vpcSessions, sess)
108149
}
109-
collectors = append(collectors, NewVPCExporter(vpcSessions, logger, config.VpcConfig.Timeout))
150+
vpcExporter := NewVPCExporter(vpcSessions, logger, config.VpcConfig)
151+
collectors = append(collectors, vpcExporter)
152+
go vpcExporter.CollectLoop()
110153
}
111154
level.Info(logger).Log("msg", "Will RDS metrics be gathered?", "rds-enabled", config.RdsConfig.Enabled)
112155
var rdsSessions []*session.Session
@@ -116,7 +159,9 @@ func setupCollectors(logger log.Logger, configFile string, creds *credentials.Cr
116159
sess := session.Must(session.NewSession(config))
117160
rdsSessions = append(rdsSessions, sess)
118161
}
119-
collectors = append(collectors, NewRDSExporter(rdsSessions, logger))
162+
rdsExporter := NewRDSExporter(rdsSessions, logger, config.RdsConfig)
163+
collectors = append(collectors, rdsExporter)
164+
go rdsExporter.CollectLoop()
120165
}
121166
level.Info(logger).Log("msg", "Will EC2 metrics be gathered?", "ec2-enabled", config.EC2Config.Enabled)
122167
var ec2Sessions []*session.Session
@@ -126,13 +171,15 @@ func setupCollectors(logger log.Logger, configFile string, creds *credentials.Cr
126171
sess := session.Must(session.NewSession(config))
127172
ec2Sessions = append(ec2Sessions, sess)
128173
}
129-
collectors = append(collectors, NewEC2Exporter(ec2Sessions, logger, config.EC2Config.Timeout))
174+
ec2Exporter := NewEC2Exporter(ec2Sessions, logger, config.EC2Config)
175+
collectors = append(collectors, ec2Exporter)
176+
go ec2Exporter.CollectLoop()
130177
}
131178
level.Info(logger).Log("msg", "Will Route53 metrics be gathered?", "route53-enabled", config.Route53Config.Enabled)
132179
if config.Route53Config.Enabled {
133180
awsConfig := aws.NewConfig().WithCredentials(creds).WithRegion(config.Route53Config.Region)
134181
sess := session.Must(session.NewSession(awsConfig))
135-
r53Exporter := NewRoute53Exporter(sess, logger, config.Route53Config.Interval, config.Route53Config.Timeout)
182+
r53Exporter := NewRoute53Exporter(sess, logger, config.Route53Config)
136183
collectors = append(collectors, r53Exporter)
137184
go r53Exporter.CollectLoop()
138185
}

pkg/cache.go

+68
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
package pkg
2+
3+
import (
4+
"crypto/sha256"
5+
"fmt"
6+
"sync"
7+
"time"
8+
9+
"github.com/prometheus/client_golang/prometheus"
10+
dto "github.com/prometheus/client_model/go"
11+
)
12+
13+
type MetricsCache struct {
14+
cacheMutex *sync.Mutex
15+
entries map[string]cacheEntry
16+
ttl time.Duration
17+
}
18+
19+
func NewMetricsCache(ttl time.Duration) *MetricsCache {
20+
return &MetricsCache{
21+
cacheMutex: &sync.Mutex{},
22+
entries: map[string]cacheEntry{},
23+
ttl: ttl,
24+
}
25+
}
26+
27+
func getMetricHash(metric prometheus.Metric) string {
28+
var dto dto.Metric
29+
metric.Write(&dto)
30+
labelString := metric.Desc().String()
31+
32+
for _, labelPair := range dto.GetLabel() {
33+
labelString = fmt.Sprintf("%s,%s,%s", labelString, labelPair.GetName(), labelPair.GetValue())
34+
}
35+
36+
checksum := sha256.Sum256([]byte(labelString))
37+
return fmt.Sprintf("%x", checksum[:])
38+
}
39+
40+
// AddMetric adds a metric to the cache
41+
func (mc *MetricsCache) AddMetric(metric prometheus.Metric) {
42+
mc.cacheMutex.Lock()
43+
mc.entries[getMetricHash(metric)] = cacheEntry{
44+
creation: time.Now(),
45+
metric: metric,
46+
}
47+
mc.cacheMutex.Unlock()
48+
}
49+
50+
// GetAllMetrics Iterates over all cached metrics and discards expired ones.
51+
func (mc *MetricsCache) GetAllMetrics() []prometheus.Metric {
52+
mc.cacheMutex.Lock()
53+
returnArr := make([]prometheus.Metric, 0)
54+
for k, v := range mc.entries {
55+
if time.Since(v.creation).Seconds() > mc.ttl.Seconds() {
56+
delete(mc.entries, k)
57+
} else {
58+
returnArr = append(returnArr, v.metric)
59+
}
60+
}
61+
mc.cacheMutex.Unlock()
62+
return returnArr
63+
}
64+
65+
type cacheEntry struct {
66+
creation time.Time
67+
metric prometheus.Metric
68+
}

0 commit comments

Comments
 (0)